backgammon/main.py

import argparse
import sys
import os
import time

# Parse command line arguments
parser = argparse.ArgumentParser(description="Backgammon games")
parser.add_argument('--episodes', action='store', dest='episode_count',
                    type=int, default=1000,
                    help='number of episodes to train')
parser.add_argument('--model', action='store', dest='model',
                    default='default',
                    help='name of Tensorflow model to use')
parser.add_argument('--eval-methods', action='store',
                    default=['random'], nargs='*',
                    help='specifies evaluation methods')
parser.add_argument('--eval', action='store_true',
                    help='evaluate the neural network with a random choice bot')
parser.add_argument('--bench-eval-scores', action='store_true',
                    help='benchmark scores of evaluation measures. episode counts and model specified as options are ignored.')
parser.add_argument('--train', action='store_true',
                    help='train the neural network')
parser.add_argument('--eval-after-train', action='store_true', dest='eval_after_train',
                    help='evaluate after each training session')
parser.add_argument('--play', action='store_true',
                    help='play with the neural network')
parser.add_argument('--start-episode', action='store', dest='start_episode',
                    type=int, default=0,
                    help='episode count to start at; purely for display purposes')
parser.add_argument('--train-perpetually', action='store_true',
                    help='start new training session as soon as the previous is finished')
parser.add_argument('--list-models', action='store_true',
                    help='list all known models')
parser.add_argument('--force-creation', action='store_true',
                    help='force model creation if model does not exist')

args = parser.parse_args()

if args.model == "baseline_model":
    print("Model name 'baseline_model' not allowed")
    exit()

config = {
    'model': args.model,
    'episode_count': args.episode_count,
    'eval_methods': args.eval_methods,
    'train': args.train,
    'play': args.play,
    'eval': args.eval,
    'bench_eval_scores': args.bench_eval_scores,
    'eval_after_train': args.eval_after_train,
    'start_episode': args.start_episode,
    'train_perpetually': args.train_perpetually,
    'model_storage_path': 'models',
    'bench_storage_path': 'bench',
    'board_representation': 'quack',
    'force_creation': args.force_creation
}

# Create models folder
if not os.path.exists(config['model_storage_path']):
    os.makedirs(config['model_storage_path'])

model_path = lambda: os.path.join(config['model_storage_path'], config['model'])

# Make sure directories exist
log_path = os.path.join(model_path(), 'logs')
if not os.path.isdir(model_path()):
    os.mkdir(model_path())
if not os.path.isdir(log_path):
    os.mkdir(log_path)


# Define helper functions
def log_train_outcome(outcome, trained_eps = 0, log_path = os.path.join(model_path(), 'logs', "train.log")):
    format_vars = { 'trained_eps': trained_eps,
                    'count': len(train_outcome),
                    'sum': sum(train_outcome),
                    'mean': sum(train_outcome) / len(train_outcome),
                    'time': int(time.time())
    }
    with open(log_path, 'a+') as f:
        f.write("{time};{trained_eps};{count};{sum};{mean}".format(**format_vars) + "\n")
    

def log_eval_outcomes(outcomes, trained_eps = 0, log_path = os.path.join(model_path(), 'logs', "eval.log")):
    for outcome in outcomes:
        scores = outcome[1]
        format_vars = { 'trained_eps': trained_eps,
                        'method': outcome[0],
                        'count': len(scores),
                        'sum': sum(scores),
                        'mean': sum(scores) / len(scores),
                        'time': int(time.time())
        }
        with open(log_path, 'a+') as f:
            f.write("{time};{method};{trained_eps};{count};{sum};{mean}".format(**format_vars) + "\n")

def log_bench_eval_outcomes(outcomes, log_path, index, time, trained_eps = 0):
    for outcome in outcomes:
        scores = outcome[1]
        format_vars = { 'trained_eps': trained_eps,
                        'method': outcome[0],
                        'count': len(scores),
                        'sum': sum(scores),
                        'mean': sum(scores) / len(scores),
                        'time': time,
                        'index': index,
        }
        with open(log_path, 'a+') as f:
            f.write("{method};{count};{index};{time};{sum};{mean}".format(**format_vars) + "\n")

# Do actions specified by command-line
if args.list_models:
    def get_eps_trained(folder):
        with open(os.path.join(folder, 'episodes_trained'), 'r') as f:
            return int(f.read())
    model_folders = [ f.path
                      for f
                      in os.scandir(config['model_storage_path'])
                      if f.is_dir() ]
    models = [ (folder, get_eps_trained(folder)) for folder in model_folders ]
    sys.stderr.write("Found {} model(s)\n".format(len(models)))
    for model in models:
        sys.stderr.write("  {name}: {eps_trained}\n".format(name = model[0], eps_trained = model[1]))

    exit()

if __name__ == "__main__":
    # Set up network
    from network import Network

    # Set up variables
    episode_count = config['episode_count']
                  
    if args.train:
        network = Network(config, config['model'])
        start_episode = network.episodes_trained
        while True:
            train_outcome = network.train_model(episodes = episode_count, trained_eps = start_episode)
            start_episode += episode_count
            log_train_outcome(train_outcome, trained_eps = start_episode)
            if config['eval_after_train']:
                eval_outcomes = network.eval(trained_eps = start_episode)
                log_eval_outcomes(eval_outcomes, trained_eps = start_episode)
            if not config['train_perpetually']:
                break

            
    elif args.eval:
        network = Network(config, config['model'])
        start_episode = network.episodes_trained
        # Evaluation measures are described in `config`
        outcomes = network.eval(config['episode_count'])
        log_eval_outcomes(outcomes, trained_eps = start_episode)
        # elif args.play:
        # g.play(episodes = episode_count)

        
    elif args.bench_eval_scores:
        # Make sure benchmark directory exists
        if not os.path.isdir(config['bench_storage_path']):
            os.mkdir(config['bench_storage_path'])

        config = config.copy()
        config['model'] = 'bench'
        
        network = Network(config, config['model'])
        start_episode = network.episodes_trained

        if start_episode == 0:
            print("Model not trained! Beware of using non-existing models!")
            exit()
        
        sample_count = 20
        episode_counts = [25, 50, 100, 250, 500, 1000, 2500, 5000,
                          10000, 20000]

        def do_eval(sess):
            for eval_method in config['eval_methods']:
                result_path = os.path.join(config['bench_storage_path'],
                                           eval_method) + "-{}.log".format(int(time.time()))
                for n in episode_counts:
                    for i in range(sample_count):
                        start_time = time.time()
                        # Evaluation measure to be benchmarked are described in `config`
                        outcomes = network.eval(episode_count = n,
                                                tf_session = sess)
                        time_diff = time.time() - start_time
                        log_bench_eval_outcomes(outcomes,
                                                time = time_diff,
                                                index = i,
                                                trained_eps = start_episode,
                                                log_path = result_path)

        # CMM: oh no
        import tensorflow as tf
        with tf.Session() as session:
            network.restore_model(session)
            do_eval(session)
woooow 2018-03-08 15:27:16 +00:00			`import argparse`
plot-plot 2018-03-08 16:13:25 +00:00			`import sys`
training and evaluation stats are now logged by default to model/logs/ 2018-03-09 23:39:55 +00:00			`import os`
train and eval now outputs proper number of training episodes to log 2018-03-09 20:05:38 +00:00			`import time`
woooow 2018-03-08 15:27:16 +00:00
training and evaluation stats are now logged by default to model/logs/ 2018-03-09 23:39:55 +00:00			`# Parse command line arguments`
woooow 2018-03-08 15:27:16 +00:00			`parser = argparse.ArgumentParser(description="Backgammon games")`
			`parser.add_argument('--episodes', action='store', dest='episode_count',`
			`type=int, default=1000,`
			`help='number of episodes to train')`
renaming parameters 2018-03-11 23:11:55 +00:00			`parser.add_argument('--model', action='store', dest='model',`
			`default='default',`
			`help='name of Tensorflow model to use')`
woooow 2018-03-08 15:27:16 +00:00			`parser.add_argument('--eval-methods', action='store',`
			`default=['random'], nargs='*',`
			`help='specifies evaluation methods')`
			`parser.add_argument('--eval', action='store_true',`
Add evaluation variance benchmark To do a benchmark for `pubeval`, run `python3 main.py --bench-eval-scores --eval-methods pubeval` Logs will be placed in directory `bench` Use `plot_bench(data_path)` in `plot.py` for plotting 2018-03-26 14:45:26 +00:00			`help='evaluate the neural network with a random choice bot')`
			`parser.add_argument('--bench-eval-scores', action='store_true',`
			`help='benchmark scores of evaluation measures. episode counts and model specified as options are ignored.')`
woooow 2018-03-08 15:27:16 +00:00			`parser.add_argument('--train', action='store_true',`
Add evaluation variance benchmark To do a benchmark for `pubeval`, run `python3 main.py --bench-eval-scores --eval-methods pubeval` Logs will be placed in directory `bench` Use `plot_bench(data_path)` in `plot.py` for plotting 2018-03-26 14:45:26 +00:00			`help='train the neural network')`
training and evaluation stats are now logged by default to model/logs/ 2018-03-09 23:39:55 +00:00			`parser.add_argument('--eval-after-train', action='store_true', dest='eval_after_train',`
Add evaluation variance benchmark To do a benchmark for `pubeval`, run `python3 main.py --bench-eval-scores --eval-methods pubeval` Logs will be placed in directory `bench` Use `plot_bench(data_path)` in `plot.py` for plotting 2018-03-26 14:45:26 +00:00			`help='evaluate after each training session')`
woooow 2018-03-08 15:27:16 +00:00			`parser.add_argument('--play', action='store_true',`
Add evaluation variance benchmark To do a benchmark for `pubeval`, run `python3 main.py --bench-eval-scores --eval-methods pubeval` Logs will be placed in directory `bench` Use `plot_bench(data_path)` in `plot.py` for plotting 2018-03-26 14:45:26 +00:00			`help='play with the neural network')`
save and restore number of trained episodes 2018-03-09 23:22:20 +00:00			`parser.add_argument('--start-episode', action='store', dest='start_episode',`
			`type=int, default=0,`
			`help='episode count to start at; purely for display purposes')`
clean up 2018-03-12 14:18:44 +00:00			`parser.add_argument('--train-perpetually', action='store_true',`
			`help='start new training session as soon as the previous is finished')`
			`parser.add_argument('--list-models', action='store_true',`
			`help='list all known models')`
add "--force-creation" flag to force model creation 2018-04-26 09:43:19 +00:00			`parser.add_argument('--force-creation', action='store_true',`
			`help='force model creation if model does not exist')`
woooow 2018-03-08 15:27:16 +00:00
			`args = parser.parse_args()`

add "--force-creation" flag to force model creation 2018-04-26 09:43:19 +00:00			`if args.model == "baseline_model":`
			`print("Model name 'baseline_model' not allowed")`
disallow using model "baseline" 2018-04-24 19:16:54 +00:00			`exit()`

woooow 2018-03-08 15:27:16 +00:00			`config = {`
clean up and move things to network.py 2018-03-20 12:03:21 +00:00			`'model': args.model,`
woooow 2018-03-08 15:27:16 +00:00			`'episode_count': args.episode_count,`
			`'eval_methods': args.eval_methods,`
			`'train': args.train,`
			`'play': args.play,`
save and restore number of trained episodes 2018-03-09 23:22:20 +00:00			`'eval': args.eval,`
Add evaluation variance benchmark To do a benchmark for `pubeval`, run `python3 main.py --bench-eval-scores --eval-methods pubeval` Logs will be placed in directory `bench` Use `plot_bench(data_path)` in `plot.py` for plotting 2018-03-26 14:45:26 +00:00			`'bench_eval_scores': args.bench_eval_scores,`
training and evaluation stats are now logged by default to model/logs/ 2018-03-09 23:39:55 +00:00			`'eval_after_train': args.eval_after_train,`
clean up 2018-03-12 14:18:44 +00:00			`'start_episode': args.start_episode,`
fixed dumb bugs; still messy 2018-03-14 19:42:09 +00:00			`'train_perpetually': args.train_perpetually,`
Add evaluation variance benchmark To do a benchmark for `pubeval`, run `python3 main.py --bench-eval-scores --eval-methods pubeval` Logs will be placed in directory `bench` Use `plot_bench(data_path)` in `plot.py` for plotting 2018-03-26 14:45:26 +00:00			`'model_storage_path': 'models',`
parametric board representation in network 2018-03-28 10:00:47 +00:00			`'bench_storage_path': 'bench',`
Fixed n_ply and actually added a comma in main.py. clap Christoffer 2018-05-01 18:39:29 +00:00			`'board_representation': 'quack',`
add "--force-creation" flag to force model creation 2018-04-26 09:43:19 +00:00			`'force_creation': args.force_creation`
woooow 2018-03-08 15:27:16 +00:00			`}`

Add evaluation variance benchmark To do a benchmark for `pubeval`, run `python3 main.py --bench-eval-scores --eval-methods pubeval` Logs will be placed in directory `bench` Use `plot_bench(data_path)` in `plot.py` for plotting 2018-03-26 14:45:26 +00:00			`# Create models folder`
			`if not os.path.exists(config['model_storage_path']):`
			`os.makedirs(config['model_storage_path'])`

			`model_path = lambda: os.path.join(config['model_storage_path'], config['model'])`

training and evaluation stats are now logged by default to model/logs/ 2018-03-09 23:39:55 +00:00			`# Make sure directories exist`
Add evaluation variance benchmark To do a benchmark for `pubeval`, run `python3 main.py --bench-eval-scores --eval-methods pubeval` Logs will be placed in directory `bench` Use `plot_bench(data_path)` in `plot.py` for plotting 2018-03-26 14:45:26 +00:00			`log_path = os.path.join(model_path(), 'logs')`
			`if not os.path.isdir(model_path()):`
			`os.mkdir(model_path())`
training and evaluation stats are now logged by default to model/logs/ 2018-03-09 23:39:55 +00:00			`if not os.path.isdir(log_path):`
			`os.mkdir(log_path)`
Add evaluation variance benchmark To do a benchmark for `pubeval`, run `python3 main.py --bench-eval-scores --eval-methods pubeval` Logs will be placed in directory `bench` Use `plot_bench(data_path)` in `plot.py` for plotting 2018-03-26 14:45:26 +00:00

			`# Define helper functions`
			`def log_train_outcome(outcome, trained_eps = 0, log_path = os.path.join(model_path(), 'logs', "train.log")):`
			`format_vars = { 'trained_eps': trained_eps,`
			`'count': len(train_outcome),`
			`'sum': sum(train_outcome),`
			`'mean': sum(train_outcome) / len(train_outcome),`
			`'time': int(time.time())`
			`}`
			`with open(log_path, 'a+') as f:`
			`f.write("{time};{trained_eps};{count};{sum};{mean}".format(**format_vars) + "\n")`
training and evaluation stats are now logged by default to model/logs/ 2018-03-09 23:39:55 +00:00
woooow 2018-03-08 15:27:16 +00:00
Add evaluation variance benchmark To do a benchmark for `pubeval`, run `python3 main.py --bench-eval-scores --eval-methods pubeval` Logs will be placed in directory `bench` Use `plot_bench(data_path)` in `plot.py` for plotting 2018-03-26 14:45:26 +00:00			`def log_eval_outcomes(outcomes, trained_eps = 0, log_path = os.path.join(model_path(), 'logs', "eval.log")):`
			`for outcome in outcomes:`
			`scores = outcome[1]`
			`format_vars = { 'trained_eps': trained_eps,`
			`'method': outcome[0],`
			`'count': len(scores),`
			`'sum': sum(scores),`
			`'mean': sum(scores) / len(scores),`
			`'time': int(time.time())`
			`}`
			`with open(log_path, 'a+') as f:`
			`f.write("{time};{method};{trained_eps};{count};{sum};{mean}".format(**format_vars) + "\n")`

			`def log_bench_eval_outcomes(outcomes, log_path, index, time, trained_eps = 0):`
			`for outcome in outcomes:`
			`scores = outcome[1]`
			`format_vars = { 'trained_eps': trained_eps,`
			`'method': outcome[0],`
			`'count': len(scores),`
			`'sum': sum(scores),`
			`'mean': sum(scores) / len(scores),`
			`'time': time,`
			`'index': index,`
			`}`
			`with open(log_path, 'a+') as f:`
			`f.write("{method};{count};{index};{time};{sum};{mean}".format(**format_vars) + "\n")`

training and evaluation stats are now logged by default to model/logs/ 2018-03-09 23:39:55 +00:00			`# Do actions specified by command-line`
clean up 2018-03-12 14:18:44 +00:00			`if args.list_models:`
			`def get_eps_trained(folder):`
			`with open(os.path.join(folder, 'episodes_trained'), 'r') as f:`
			`return int(f.read())`
			`model_folders = [ f.path`
			`for f`
Add evaluation variance benchmark To do a benchmark for `pubeval`, run `python3 main.py --bench-eval-scores --eval-methods pubeval` Logs will be placed in directory `bench` Use `plot_bench(data_path)` in `plot.py` for plotting 2018-03-26 14:45:26 +00:00			`in os.scandir(config['model_storage_path'])`
clean up 2018-03-12 14:18:44 +00:00			`if f.is_dir() ]`
			`models = [ (folder, get_eps_trained(folder)) for folder in model_folders ]`
			`sys.stderr.write("Found {} model(s)\n".format(len(models)))`
			`for model in models:`
			`sys.stderr.write(" {name}: {eps_trained}\n".format(name = model[0], eps_trained = model[1]))`
move evaluation code into network.py 2018-03-20 12:17:38 +00:00
			`exit()`

rework network 2018-03-22 14:30:47 +00:00			`if __name__ == "__main__":`
			`# Set up network`
			`from network import Network`

			`# Set up variables`
			`episode_count = config['episode_count']`
clean up 2018-03-12 14:18:44 +00:00
rework network 2018-03-22 14:30:47 +00:00			`if args.train:`
Add evaluation variance benchmark To do a benchmark for `pubeval`, run `python3 main.py --bench-eval-scores --eval-methods pubeval` Logs will be placed in directory `bench` Use `plot_bench(data_path)` in `plot.py` for plotting 2018-03-26 14:45:26 +00:00			`network = Network(config, config['model'])`
			`start_episode = network.episodes_trained`
rework network 2018-03-22 14:30:47 +00:00			`while True:`
			`train_outcome = network.train_model(episodes = episode_count, trained_eps = start_episode)`
			`start_episode += episode_count`
			`log_train_outcome(train_outcome, trained_eps = start_episode)`
			`if config['eval_after_train']:`
			`eval_outcomes = network.eval(trained_eps = start_episode)`
			`log_eval_outcomes(eval_outcomes, trained_eps = start_episode)`
			`if not config['train_perpetually']:`
			`break`
Add evaluation variance benchmark To do a benchmark for `pubeval`, run `python3 main.py --bench-eval-scores --eval-methods pubeval` Logs will be placed in directory `bench` Use `plot_bench(data_path)` in `plot.py` for plotting 2018-03-26 14:45:26 +00:00

rework network 2018-03-22 14:30:47 +00:00			`elif args.eval:`
Add evaluation variance benchmark To do a benchmark for `pubeval`, run `python3 main.py --bench-eval-scores --eval-methods pubeval` Logs will be placed in directory `bench` Use `plot_bench(data_path)` in `plot.py` for plotting 2018-03-26 14:45:26 +00:00			`network = Network(config, config['model'])`
			`start_episode = network.episodes_trained`
			# Evaluation measures are described in `config`
			`outcomes = network.eval(config['episode_count'])`
rework network 2018-03-22 14:30:47 +00:00			`log_eval_outcomes(outcomes, trained_eps = start_episode)`
			`# elif args.play:`
			`# g.play(episodes = episode_count)`
Add evaluation variance benchmark To do a benchmark for `pubeval`, run `python3 main.py --bench-eval-scores --eval-methods pubeval` Logs will be placed in directory `bench` Use `plot_bench(data_path)` in `plot.py` for plotting 2018-03-26 14:45:26 +00:00

			`elif args.bench_eval_scores:`
			`# Make sure benchmark directory exists`
			`if not os.path.isdir(config['bench_storage_path']):`
			`os.mkdir(config['bench_storage_path'])`

			`config = config.copy()`
			`config['model'] = 'bench'`

			`network = Network(config, config['model'])`
			`start_episode = network.episodes_trained`

			`if start_episode == 0:`
			`print("Model not trained! Beware of using non-existing models!")`
			`exit()`

			`sample_count = 20`
			`episode_counts = [25, 50, 100, 250, 500, 1000, 2500, 5000,`
			`10000, 20000]`

			`def do_eval(sess):`
			`for eval_method in config['eval_methods']:`
			`result_path = os.path.join(config['bench_storage_path'],`
			`eval_method) + "-{}.log".format(int(time.time()))`
			`for n in episode_counts:`
			`for i in range(sample_count):`
			`start_time = time.time()`
			# Evaluation measure to be benchmarked are described in `config`
			`outcomes = network.eval(episode_count = n,`
			`tf_session = sess)`
			`time_diff = time.time() - start_time`
			`log_bench_eval_outcomes(outcomes,`
			`time = time_diff,`
			`index = i,`
			`trained_eps = start_episode,`
			`log_path = result_path)`

			`# CMM: oh no`
			`import tensorflow as tf`
			`with tf.Session() as session:`
			`network.restore_model(session)`
			`do_eval(session)`