Merge branch 'rework-1' into 'fuck_git'
Rework 1 See merge request Pownie/backgammon!2
This commit is contained in:
commit
d4e699bc49
3
.gitignore
vendored
3
.gitignore
vendored
|
@ -169,3 +169,6 @@ venv.bak/
|
||||||
README.*
|
README.*
|
||||||
!README.org
|
!README.org
|
||||||
models/
|
models/
|
||||||
|
.DS_Store
|
||||||
|
bench/
|
||||||
|
|
||||||
|
|
159
main.py
159
main.py
|
@ -3,38 +3,6 @@ import sys
|
||||||
import os
|
import os
|
||||||
import time
|
import time
|
||||||
|
|
||||||
model_storage_path = 'models'
|
|
||||||
|
|
||||||
# Create models folder
|
|
||||||
if not os.path.exists(model_storage_path):
|
|
||||||
os.makedirs(model_storage_path)
|
|
||||||
|
|
||||||
# Define helper functions
|
|
||||||
def log_train_outcome(outcome, trained_eps = 0):
|
|
||||||
format_vars = { 'trained_eps': trained_eps,
|
|
||||||
'count': len(train_outcome),
|
|
||||||
'sum': sum(train_outcome),
|
|
||||||
'mean': sum(train_outcome) / len(train_outcome),
|
|
||||||
'time': int(time.time())
|
|
||||||
}
|
|
||||||
with open(os.path.join(config['model_path'], 'logs', "train.log"), 'a+') as f:
|
|
||||||
f.write("{time};{trained_eps};{count};{sum};{mean}".format(**format_vars) + "\n")
|
|
||||||
|
|
||||||
|
|
||||||
def log_eval_outcomes(outcomes, trained_eps = 0):
|
|
||||||
for outcome in outcomes:
|
|
||||||
scores = outcome[1]
|
|
||||||
format_vars = { 'trained_eps': trained_eps,
|
|
||||||
'method': outcome[0],
|
|
||||||
'count': len(scores),
|
|
||||||
'sum': sum(scores),
|
|
||||||
'mean': sum(scores) / len(scores),
|
|
||||||
'time': int(time.time())
|
|
||||||
}
|
|
||||||
with open(os.path.join(config['model_path'], 'logs', "eval.log"), 'a+') as f:
|
|
||||||
f.write("{time};{method};{trained_eps};{count};{sum};{mean}".format(**format_vars) + "\n")
|
|
||||||
|
|
||||||
|
|
||||||
# Parse command line arguments
|
# Parse command line arguments
|
||||||
parser = argparse.ArgumentParser(description="Backgammon games")
|
parser = argparse.ArgumentParser(description="Backgammon games")
|
||||||
parser.add_argument('--episodes', action='store', dest='episode_count',
|
parser.add_argument('--episodes', action='store', dest='episode_count',
|
||||||
|
@ -47,13 +15,15 @@ parser.add_argument('--eval-methods', action='store',
|
||||||
default=['random'], nargs='*',
|
default=['random'], nargs='*',
|
||||||
help='specifies evaluation methods')
|
help='specifies evaluation methods')
|
||||||
parser.add_argument('--eval', action='store_true',
|
parser.add_argument('--eval', action='store_true',
|
||||||
help='whether to evaluate the neural network with a random choice bot')
|
help='evaluate the neural network with a random choice bot')
|
||||||
|
parser.add_argument('--bench-eval-scores', action='store_true',
|
||||||
|
help='benchmark scores of evaluation measures. episode counts and model specified as options are ignored.')
|
||||||
parser.add_argument('--train', action='store_true',
|
parser.add_argument('--train', action='store_true',
|
||||||
help='whether to train the neural network')
|
help='train the neural network')
|
||||||
parser.add_argument('--eval-after-train', action='store_true', dest='eval_after_train',
|
parser.add_argument('--eval-after-train', action='store_true', dest='eval_after_train',
|
||||||
help='whether to evaluate after each training session')
|
help='evaluate after each training session')
|
||||||
parser.add_argument('--play', action='store_true',
|
parser.add_argument('--play', action='store_true',
|
||||||
help='whether to play with the neural network')
|
help='play with the neural network')
|
||||||
parser.add_argument('--start-episode', action='store', dest='start_episode',
|
parser.add_argument('--start-episode', action='store', dest='start_episode',
|
||||||
type=int, default=0,
|
type=int, default=0,
|
||||||
help='episode count to start at; purely for display purposes')
|
help='episode count to start at; purely for display purposes')
|
||||||
|
@ -66,27 +36,73 @@ args = parser.parse_args()
|
||||||
|
|
||||||
config = {
|
config = {
|
||||||
'model': args.model,
|
'model': args.model,
|
||||||
'model_path': os.path.join(model_storage_path, args.model),
|
|
||||||
'episode_count': args.episode_count,
|
'episode_count': args.episode_count,
|
||||||
'eval_methods': args.eval_methods,
|
'eval_methods': args.eval_methods,
|
||||||
'train': args.train,
|
'train': args.train,
|
||||||
'play': args.play,
|
'play': args.play,
|
||||||
'eval': args.eval,
|
'eval': args.eval,
|
||||||
|
'bench_eval_scores': args.bench_eval_scores,
|
||||||
'eval_after_train': args.eval_after_train,
|
'eval_after_train': args.eval_after_train,
|
||||||
'start_episode': args.start_episode,
|
'start_episode': args.start_episode,
|
||||||
'train_perpetually': args.train_perpetually,
|
'train_perpetually': args.train_perpetually,
|
||||||
'model_storage_path': model_storage_path
|
'model_storage_path': 'models',
|
||||||
|
'bench_storage_path': 'bench'
|
||||||
}
|
}
|
||||||
|
|
||||||
|
# Create models folder
|
||||||
|
if not os.path.exists(config['model_storage_path']):
|
||||||
|
os.makedirs(config['model_storage_path'])
|
||||||
|
|
||||||
|
model_path = lambda: os.path.join(config['model_storage_path'], config['model'])
|
||||||
|
|
||||||
# Make sure directories exist
|
# Make sure directories exist
|
||||||
model_path = os.path.join(config['model_path'])
|
log_path = os.path.join(model_path(), 'logs')
|
||||||
log_path = os.path.join(model_path, 'logs')
|
if not os.path.isdir(model_path()):
|
||||||
if not os.path.isdir(model_path):
|
os.mkdir(model_path())
|
||||||
os.mkdir(model_path)
|
|
||||||
if not os.path.isdir(log_path):
|
if not os.path.isdir(log_path):
|
||||||
os.mkdir(log_path)
|
os.mkdir(log_path)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
# Define helper functions
|
||||||
|
def log_train_outcome(outcome, trained_eps = 0, log_path = os.path.join(model_path(), 'logs', "train.log")):
|
||||||
|
format_vars = { 'trained_eps': trained_eps,
|
||||||
|
'count': len(train_outcome),
|
||||||
|
'sum': sum(train_outcome),
|
||||||
|
'mean': sum(train_outcome) / len(train_outcome),
|
||||||
|
'time': int(time.time())
|
||||||
|
}
|
||||||
|
with open(log_path, 'a+') as f:
|
||||||
|
f.write("{time};{trained_eps};{count};{sum};{mean}".format(**format_vars) + "\n")
|
||||||
|
|
||||||
|
|
||||||
|
def log_eval_outcomes(outcomes, trained_eps = 0, log_path = os.path.join(model_path(), 'logs', "eval.log")):
|
||||||
|
for outcome in outcomes:
|
||||||
|
scores = outcome[1]
|
||||||
|
format_vars = { 'trained_eps': trained_eps,
|
||||||
|
'method': outcome[0],
|
||||||
|
'count': len(scores),
|
||||||
|
'sum': sum(scores),
|
||||||
|
'mean': sum(scores) / len(scores),
|
||||||
|
'time': int(time.time())
|
||||||
|
}
|
||||||
|
with open(log_path, 'a+') as f:
|
||||||
|
f.write("{time};{method};{trained_eps};{count};{sum};{mean}".format(**format_vars) + "\n")
|
||||||
|
|
||||||
|
def log_bench_eval_outcomes(outcomes, log_path, index, time, trained_eps = 0):
|
||||||
|
for outcome in outcomes:
|
||||||
|
scores = outcome[1]
|
||||||
|
format_vars = { 'trained_eps': trained_eps,
|
||||||
|
'method': outcome[0],
|
||||||
|
'count': len(scores),
|
||||||
|
'sum': sum(scores),
|
||||||
|
'mean': sum(scores) / len(scores),
|
||||||
|
'time': time,
|
||||||
|
'index': index,
|
||||||
|
}
|
||||||
|
with open(log_path, 'a+') as f:
|
||||||
|
f.write("{method};{count};{index};{time};{sum};{mean}".format(**format_vars) + "\n")
|
||||||
|
|
||||||
# Do actions specified by command-line
|
# Do actions specified by command-line
|
||||||
if args.list_models:
|
if args.list_models:
|
||||||
def get_eps_trained(folder):
|
def get_eps_trained(folder):
|
||||||
|
@ -94,7 +110,7 @@ if args.list_models:
|
||||||
return int(f.read())
|
return int(f.read())
|
||||||
model_folders = [ f.path
|
model_folders = [ f.path
|
||||||
for f
|
for f
|
||||||
in os.scandir(model_storage_path)
|
in os.scandir(config['model_storage_path'])
|
||||||
if f.is_dir() ]
|
if f.is_dir() ]
|
||||||
models = [ (folder, get_eps_trained(folder)) for folder in model_folders ]
|
models = [ (folder, get_eps_trained(folder)) for folder in model_folders ]
|
||||||
sys.stderr.write("Found {} model(s)\n".format(len(models)))
|
sys.stderr.write("Found {} model(s)\n".format(len(models)))
|
||||||
|
@ -106,13 +122,13 @@ if args.list_models:
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
# Set up network
|
# Set up network
|
||||||
from network import Network
|
from network import Network
|
||||||
network = Network(config, config['model'])
|
|
||||||
start_episode = network.episodes_trained
|
|
||||||
|
|
||||||
# Set up variables
|
# Set up variables
|
||||||
episode_count = config['episode_count']
|
episode_count = config['episode_count']
|
||||||
|
|
||||||
if args.train:
|
if args.train:
|
||||||
|
network = Network(config, config['model'])
|
||||||
|
start_episode = network.episodes_trained
|
||||||
while True:
|
while True:
|
||||||
train_outcome = network.train_model(episodes = episode_count, trained_eps = start_episode)
|
train_outcome = network.train_model(episodes = episode_count, trained_eps = start_episode)
|
||||||
start_episode += episode_count
|
start_episode += episode_count
|
||||||
|
@ -122,9 +138,58 @@ if __name__ == "__main__":
|
||||||
log_eval_outcomes(eval_outcomes, trained_eps = start_episode)
|
log_eval_outcomes(eval_outcomes, trained_eps = start_episode)
|
||||||
if not config['train_perpetually']:
|
if not config['train_perpetually']:
|
||||||
break
|
break
|
||||||
|
|
||||||
|
|
||||||
elif args.eval:
|
elif args.eval:
|
||||||
outcomes = network.eval()
|
network = Network(config, config['model'])
|
||||||
|
start_episode = network.episodes_trained
|
||||||
|
# Evaluation measures are described in `config`
|
||||||
|
outcomes = network.eval(config['episode_count'])
|
||||||
log_eval_outcomes(outcomes, trained_eps = start_episode)
|
log_eval_outcomes(outcomes, trained_eps = start_episode)
|
||||||
# elif args.play:
|
# elif args.play:
|
||||||
# g.play(episodes = episode_count)
|
# g.play(episodes = episode_count)
|
||||||
|
|
||||||
|
|
||||||
|
elif args.bench_eval_scores:
|
||||||
|
# Make sure benchmark directory exists
|
||||||
|
if not os.path.isdir(config['bench_storage_path']):
|
||||||
|
os.mkdir(config['bench_storage_path'])
|
||||||
|
|
||||||
|
config = config.copy()
|
||||||
|
config['model'] = 'bench'
|
||||||
|
|
||||||
|
network = Network(config, config['model'])
|
||||||
|
start_episode = network.episodes_trained
|
||||||
|
|
||||||
|
if start_episode == 0:
|
||||||
|
print("Model not trained! Beware of using non-existing models!")
|
||||||
|
exit()
|
||||||
|
|
||||||
|
sample_count = 20
|
||||||
|
episode_counts = [25, 50, 100, 250, 500, 1000, 2500, 5000,
|
||||||
|
10000, 20000]
|
||||||
|
|
||||||
|
def do_eval(sess):
|
||||||
|
for eval_method in config['eval_methods']:
|
||||||
|
result_path = os.path.join(config['bench_storage_path'],
|
||||||
|
eval_method) + "-{}.log".format(int(time.time()))
|
||||||
|
for n in episode_counts:
|
||||||
|
for i in range(sample_count):
|
||||||
|
start_time = time.time()
|
||||||
|
# Evaluation measure to be benchmarked are described in `config`
|
||||||
|
outcomes = network.eval(episode_count = n,
|
||||||
|
tf_session = sess)
|
||||||
|
time_diff = time.time() - start_time
|
||||||
|
log_bench_eval_outcomes(outcomes,
|
||||||
|
time = time_diff,
|
||||||
|
index = i,
|
||||||
|
trained_eps = start_episode,
|
||||||
|
log_path = result_path)
|
||||||
|
|
||||||
|
# CMM: oh no
|
||||||
|
import tensorflow as tf
|
||||||
|
with tf.Session() as session:
|
||||||
|
network.restore_model(session)
|
||||||
|
do_eval(session)
|
||||||
|
|
||||||
|
|
||||||
|
|
42
network.py
42
network.py
|
@ -22,7 +22,7 @@ class Network:
|
||||||
|
|
||||||
def __init__(self, config, name):
|
def __init__(self, config, name):
|
||||||
self.config = config
|
self.config = config
|
||||||
self.checkpoint_path = config['model_path']
|
self.checkpoint_path = os.path.join(config['model_storage_path'], config['model'])
|
||||||
|
|
||||||
self.name = name
|
self.name = name
|
||||||
|
|
||||||
|
@ -388,7 +388,25 @@ class Network:
|
||||||
print_time_estimate(episode)
|
print_time_estimate(episode)
|
||||||
|
|
||||||
sys.stderr.write("[TRAIN] Saving model for final episode...\n")
|
sys.stderr.write("[TRAIN] Saving model for final episode...\n")
|
||||||
self.save_model(sess, episode + trained_eps)
|
self.save_model(sess, episode+trained_eps)
|
||||||
|
|
||||||
|
writer.close()
|
||||||
|
|
||||||
|
return outcomes
|
||||||
|
|
||||||
|
|
||||||
|
# take turn, which finds the best state and picks it, based on the current network
|
||||||
|
# save current state
|
||||||
|
# run training operation (session.run(self.training_op, {x:x, value_next, value_next})), (something which does the backprop, based on the state after having taken a turn, found before, and the state we saved in the beginning and from now we'll save it at the end of the turn
|
||||||
|
# save the current state again, so we can continue running backprop based on the "previous" turn.
|
||||||
|
|
||||||
|
# NOTE: We need to make a method so that we can take a single turn or at least just pick the next best move, so we know how to evaluate according to TD-learning. Right now, our game just continues in a while loop without nothing to stop it!
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
def eval(self, episode_count, trained_eps = 0, tf_session = None):
|
||||||
|
def do_eval(sess, method, episodes = 1000, trained_eps = 0):
|
||||||
|
start_time = time.time()
|
||||||
|
|
||||||
writer.close()
|
writer.close()
|
||||||
|
|
||||||
|
@ -403,3 +421,23 @@ class Network:
|
||||||
|
|
||||||
# save the current state again, so we can continue running backprop based on the "previous" turn.
|
# save the current state again, so we can continue running backprop based on the "previous" turn.
|
||||||
|
|
||||||
|
|
||||||
|
if tf_session == None:
|
||||||
|
with tf.Session():
|
||||||
|
session.run(tf.global_variables_initializer())
|
||||||
|
self.restore_model(session)
|
||||||
|
outcomes = [ (method, do_eval(session,
|
||||||
|
method,
|
||||||
|
episode_count,
|
||||||
|
trained_eps = trained_eps))
|
||||||
|
for method
|
||||||
|
in self.config['eval_methods'] ]
|
||||||
|
return outcomes
|
||||||
|
else:
|
||||||
|
outcomes = [ (method, do_eval(tf_session,
|
||||||
|
method,
|
||||||
|
episode_count,
|
||||||
|
trained_eps = trained_eps))
|
||||||
|
for method
|
||||||
|
in self.config['eval_methods'] ]
|
||||||
|
return outcomes
|
||||||
|
|
17
plot.py
17
plot.py
|
@ -9,9 +9,26 @@ import matplotlib.dates as mdates
|
||||||
|
|
||||||
train_headers = ['timestamp', 'eps_train', 'eps_trained_session', 'sum', 'mean']
|
train_headers = ['timestamp', 'eps_train', 'eps_trained_session', 'sum', 'mean']
|
||||||
eval_headers = ['timestamp', 'method', 'eps_train', 'eval_eps_used', 'sum', 'mean']
|
eval_headers = ['timestamp', 'method', 'eps_train', 'eval_eps_used', 'sum', 'mean']
|
||||||
|
bench_headers = ['method', 'sample_count', 'i', 'time', 'sum', 'mean']
|
||||||
|
|
||||||
model_path = 'models'
|
model_path = 'models'
|
||||||
|
|
||||||
|
def plot_bench(data_path):
|
||||||
|
df = pd.read_csv(data_path, sep=";",
|
||||||
|
names=bench_headers, index_col=[0,1,2])
|
||||||
|
for method_label in df.index.levels[0]:
|
||||||
|
df_prime = df[['mean']].loc[method_label].unstack().T
|
||||||
|
plot = df_prime.plot.box()
|
||||||
|
plot.set_title("Evaluation variance, {}".format(method_label))
|
||||||
|
plot.set_xlabel("Sample count")
|
||||||
|
plot.set_ylabel("Mean score")
|
||||||
|
plt.show(plot.figure)
|
||||||
|
|
||||||
|
# for later use:
|
||||||
|
variances = df_prime.var()
|
||||||
|
print(variances)
|
||||||
|
|
||||||
|
del df_prime, plot, variances
|
||||||
|
|
||||||
def dataframes(model_name):
|
def dataframes(model_name):
|
||||||
def df_timestamp_to_datetime(df):
|
def df_timestamp_to_datetime(df):
|
||||||
|
|
Loading…
Reference in New Issue
Block a user