diff --git a/network.py b/network.py index 6358761..6ceefbe 100644 --- a/network.py +++ b/network.py @@ -152,8 +152,8 @@ class Network: # print("Found the best state, being:", np.array(move_scores).argmax()) return best_move_pair - def eval(self, trained_eps=0): - def do_eval(sess, method, episodes=1000, trained_eps=trained_eps): + def eval(self, episode_count, trained_eps = 0, tf_session = None): + def do_eval(sess, method, episodes = 1000, trained_eps = 0): start_time = time.time() def print_time_estimate(eps_completed): @@ -290,17 +290,26 @@ class Network: else: sys.stderr.write("[EVAL ] Evaluation method '{}' is not defined\n".format(method)) return [0] - - with tf.Session() as session: - session.run(tf.global_variables_initializer()) - self.restore_model(session) - outcomes = [(method, do_eval(session, - method, - self.config['episode_count'], - trained_eps=trained_eps)) - for method - in self.config['eval_methods']] - return outcomes + + if tf_session == None: + with tf.Session(): + session.run(tf.global_variables_initializer()) + self.restore_model(session) + outcomes = [ (method, do_eval(session, + method, + episode_count, + trained_eps = trained_eps)) + for method + in self.config['eval_methods'] ] + return outcomes + else: + outcomes = [ (method, do_eval(tf_session, + method, + episode_count, + trained_eps = trained_eps)) + for method + in self.config['eval_methods'] ] + return outcomes def train_model(self, episodes=1000, save_step_size=100, trained_eps=0): with tf.Session() as sess: @@ -401,43 +410,3 @@ class Network: # save the current state again, so we can continue running backprop based on the "previous" turn. # NOTE: We need to make a method so that we can take a single turn or at least just pick the next best move, so we know how to evaluate according to TD-learning. Right now, our game just continues in a while loop without nothing to stop it! - - - - def eval(self, episode_count, trained_eps = 0, tf_session = None): - def do_eval(sess, method, episodes = 1000, trained_eps = 0): - start_time = time.time() - - writer.close() - - return outcomes - - # take turn, which finds the best state and picks it, based on the current network - # save current state - # run training operation (session.run(self.training_op, {x:x, value_next, value_next})), - # (something which does the backprop, based on the state after having taken a turn, - # found before, and the state we saved in the beginning and from now we'll - # save it at the end of the turn - - # save the current state again, so we can continue running backprop based on the "previous" turn. - - - if tf_session == None: - with tf.Session(): - session.run(tf.global_variables_initializer()) - self.restore_model(session) - outcomes = [ (method, do_eval(session, - method, - episode_count, - trained_eps = trained_eps)) - for method - in self.config['eval_methods'] ] - return outcomes - else: - outcomes = [ (method, do_eval(tf_session, - method, - episode_count, - trained_eps = trained_eps)) - for method - in self.config['eval_methods'] ] - return outcomes