import argparse import sys import os import time model_storage_path = 'models' # Create models folder if not os.path.exists(model_storage_path): os.makedirs(model_storage_path) # Define helper functions def log_train_outcome(outcome, trained_eps = 0): format_vars = { 'trained_eps': trained_eps, 'count': len(train_outcome), 'sum': sum(train_outcome), 'mean': sum(train_outcome) / len(train_outcome), 'time': int(time.time()) } with open(os.path.join(config['model_path'], 'logs', "train.log"), 'a+') as f: f.write("{time};{trained_eps};{count};{sum};{mean}".format(**format_vars) + "\n") def log_eval_outcomes(outcomes, trained_eps = 0): for outcome in outcomes: scores = outcome[1] format_vars = { 'trained_eps': trained_eps, 'method': outcome[0], 'count': len(scores), 'sum': sum(scores), 'mean': sum(scores) / len(scores), 'time': int(time.time()) } with open(os.path.join(config['model_path'], 'logs', "eval.log"), 'a+') as f: f.write("{time};{method};{trained_eps};{count};{sum};{mean}".format(**format_vars) + "\n") # Parse command line arguments parser = argparse.ArgumentParser(description="Backgammon games") parser.add_argument('--episodes', action='store', dest='episode_count', type=int, default=1000, help='number of episodes to train') parser.add_argument('--model', action='store', dest='model', default='default', help='name of Tensorflow model to use') parser.add_argument('--eval-methods', action='store', default=['random'], nargs='*', help='specifies evaluation methods') parser.add_argument('--eval', action='store_true', help='whether to evaluate the neural network with a random choice bot') parser.add_argument('--train', action='store_true', help='whether to train the neural network') parser.add_argument('--eval-after-train', action='store_true', dest='eval_after_train', help='whether to evaluate after each training session') parser.add_argument('--play', action='store_true', help='whether to play with the neural network') parser.add_argument('--start-episode', action='store', dest='start_episode', type=int, default=0, help='episode count to start at; purely for display purposes') parser.add_argument('--train-perpetually', action='store_true', help='start new training session as soon as the previous is finished') parser.add_argument('--list-models', action='store_true', help='list all known models') args = parser.parse_args() config = { 'model': args.model, 'model_path': os.path.join(model_storage_path, args.model), 'episode_count': args.episode_count, 'eval_methods': args.eval_methods, 'train': args.train, 'play': args.play, 'eval': args.eval, 'eval_after_train': args.eval_after_train, 'start_episode': args.start_episode, 'train_perpetually': args.train_perpetually, 'model_storage_path': model_storage_path } # Make sure directories exist model_path = os.path.join(config['model_path']) log_path = os.path.join(model_path, 'logs') if not os.path.isdir(model_path): os.mkdir(model_path) if not os.path.isdir(log_path): os.mkdir(log_path) # Set up network from network import Network # Set up variables episode_count = config['episode_count'] # Do actions specified by command-line if args.list_models: def get_eps_trained(folder): with open(os.path.join(folder, 'episodes_trained'), 'r') as f: return int(f.read()) model_folders = [ f.path for f in os.scandir(model_storage_path) if f.is_dir() ] models = [ (folder, get_eps_trained(folder)) for folder in model_folders ] sys.stderr.write("Found {} model(s)\n".format(len(models))) for model in models: sys.stderr.write(" {name}: {eps_trained}\n".format(name = model[0], eps_trained = model[1])) elif args.train: network = Network(config, config['model']) eps = config['start_episode'] while True: train_outcome = network.train_model(episodes = episode_count, trained_eps = eps) eps += episode_count log_train_outcome(train_outcome, trained_eps = eps) if config['eval_after_train']: eval_outcomes = g.eval(trained_eps = eps) log_eval_outcomes(eval_outcomes, trained_eps = eps) if not config['train_perpetually']: break elif args.eval: eps = config['start_episode'] outcomes = g.eval() log_eval_outcomes(outcomes, trained_eps = eps) #elif args.play: # g.play(episodes = episode_count)