Compare commits

..

59 Commits

Author SHA1 Message Date
ea4efc5a2b Updated server code. 2018-06-07 21:36:06 +02:00
26c0b469eb restore restore_model 2018-05-22 20:49:10 +02:00
f170bad9b1 tesauro fat and diffs in values 2018-05-22 15:39:14 +02:00
6e061171da rm TODO 2018-05-22 15:38:04 +02:00
40c228ef01 pubeval tests 2018-05-22 15:36:23 +02:00
c2c6c89e9f Merge branch 'experimentation' into 'master'
Experimentation

See merge request Pownie/backgammon!8
2018-05-22 13:16:10 +00:00
b7708b3675 train-evaluate-save 2018-05-22 15:15:36 +02:00
bad870c27a update 0-ply-tests 2018-05-22 15:15:15 +02:00
653d6e30a8 add missing comma 2018-05-22 15:12:47 +02:00
7e51b44e33 Merge branch 'experimentation' into 'master'
tesauro fat and diffs in values

See merge request Pownie/backgammon!7
2018-05-22 13:12:10 +00:00
1fd6c35baa Merge branch 'master' into 'experimentation'
# Conflicts:
#   main.py
2018-05-22 13:11:43 +00:00
d426c1c3b5 tesauro fat and diffs in values 2018-05-22 15:10:41 +02:00
5ab144cffc add git commit status to all logs 2018-05-22 14:44:13 +02:00
cef8e54709 Merge branch 'master' of gitfub.space:Pownie/backgammon 2018-05-22 14:37:46 +02:00
2efbc446f2 log git commit status in evaluation logs 2018-05-22 14:37:27 +02:00
c54f7aca24 Merge branch 'experimentation' into 'master'
Experimentation

See merge request Pownie/backgammon!6
2018-05-22 12:36:37 +00:00
c31bc39780 More server 2018-05-22 00:26:32 +02:00
6133cb439f Merge remote-tracking branch 'origin/experimentation' into experimentation 2018-05-20 20:15:57 +02:00
5acd79b6da Slight modification to move calculation 2018-05-20 19:43:28 +02:00
=
b11e783b30 add 0-ply-tests 2018-05-20 18:50:28 +02:00
f834b10e02 remove unnecessary print 2018-05-20 16:52:05 +02:00
72f01a2a2d remove dependency on yaml 2018-05-20 16:03:58 +02:00
d14e6c5994 Everything might work, except for quad, that might be bugged. 2018-05-20 00:38:13 +02:00
a266293ecd Stuff is happening, moving is better! 2018-05-19 22:01:55 +02:00
e9a46c79df server and stuff 2018-05-19 14:12:13 +02:00
816cdfae00 fix and clean 2018-05-18 14:55:10 +02:00
ff9664eb38 Merge branch 'eager_eval' into 'master'
Eager eval

See merge request Pownie/backgammon!5
2018-05-18 12:06:12 +00:00
3e379b40c4 Accidentally added a '5' in the middle of a variable. 2018-05-16 00:20:54 +02:00
90fad334b9 More optimizations. 2018-05-15 23:37:35 +02:00
a77c13a0a4 1-ply runs even faster. 2018-05-15 19:29:27 +02:00
260c32d909 oiuhhiu 2018-05-15 18:16:44 +02:00
00974b0f11 Added '--play' flag, so you can now play against the ai. 2018-05-14 13:07:48 +02:00
2c02689577 Merge remote-tracking branch 'origin/eager_eval' into eager_eval 2018-05-13 23:55:02 +02:00
926a331df0 Some flags from main.py is gone, rolls now allow a face_value of 0 yet
again and it is possible to play against the ai. There is no flag
for this yet, so this has to be added.
2018-05-13 23:54:13 +02:00
d932663519 add explanation of ply speedup 2018-05-13 22:26:24 +02:00
2312c9cb2a Merge branch 'eager_eval' of gitfub.space:Pownie/backgammon into eager_eval 2018-05-12 15:19:12 +02:00
9f1bd56c0a fix bear_off bug; addtional tests and additional fixes 2018-05-12 15:18:52 +02:00
ba4ef86bb5 Board rep can now be inferred from file after being given once.
We can also evaluate multiple times by using the flag "--repeat-eval".
The flag defaults to 1, if not provided.
2018-05-12 12:14:47 +02:00
c3f5e909d6 flip is back 2018-05-11 21:47:48 +02:00
1aa9cf705f quack without leaks 2018-05-11 21:24:10 +02:00
383dd7aa4b code works again; quack gave ~3 times improvement for calc_moves 2018-05-11 20:13:43 +02:00
93188fe06b more quack for board 2018-05-11 20:07:27 +02:00
ffbc98e1a2 quack kind of works 2018-05-11 19:00:39 +02:00
03e61a59cf quack 2018-05-11 17:29:22 +02:00
93224864a4 More comments, backprop have been somewhat tested in the eager_main.py
and normal_main.py.
2018-05-11 13:35:01 +02:00
504308a9af Yet another input argument, "--ply", 0 for no look-ahead, 1 for a single
look-ahead.
2018-05-10 23:22:41 +02:00
3b57c10b5a Saves calling tf.reduce_mean on all values once. 2018-05-10 22:57:27 +02:00
4fa10861bb update TF dependency to 1.8.0 2018-05-10 19:27:51 +02:00
6131d5b5f4 Added comments for Christoffer! 2018-05-10 19:25:28 +02:00
1aedc23de1 1-ply now works again. 2018-05-10 19:13:18 +02:00
2d84cd5a0b 1-ply now works again. 2018-05-10 19:06:53 +02:00
396d5b036d All values for boards and all rolls can now be calculated 2018-05-10 18:41:21 +02:00
4efb229d34 Added a lot of comments 2018-05-10 15:28:33 +02:00
f2a67ca92e All board reps should now work as input. 2018-05-10 10:49:25 +02:00
9cfdd7e2b2 Added a verbosity flag, --verbose, which allows for printing of
variables and such.
2018-05-10 10:39:22 +02:00
6429e0732c We should now be able to both train and eval as per usual.
I've added a file "global_step", which works as the new global_step
counter, so we can use it for exp_decay.
2018-05-09 23:15:35 +02:00
cb7e7b519c Getting closer to functionality. We're capable of evaluating moves
and a rework of global_step has begun, such that we now use
episode_count as a way of calculating exp_decay, which have been
implemented as a function.
2018-05-09 22:22:12 +02:00
9a2d87516e Ongoing rewrite of network to use an eager model. We're now capable of
evaluating a list of states with network.py. We can also save and
restore models.
2018-05-09 00:33:05 +02:00
7b308be4e2 Different implementations of different speed 2018-05-07 22:24:47 +02:00
14 changed files with 1452 additions and 501 deletions

141
app.py Normal file
View File

@ -0,0 +1,141 @@
from flask import Flask, request, jsonify
from flask_json import FlaskJSON, as_json_p
from flask_cors import CORS
from board import Board
from eval import Eval
import main
import random
from network import Network
app = Flask(__name__)
app.config['JSON_ADD_STATUS'] = False
app.config['JSON_JSONP_OPTIONAL'] = False
json = FlaskJSON(app)
CORS(app)
config = main.config.copy()
config['model'] = "player_testings"
config['ply'] = "0"
config['board_representation'] = 'tesauro'
network = Network(config, config['model'])
network.restore_model()
def calc_move_sets(from_board, roll, player):
board = from_board
sets = []
total = 0
for r in roll:
# print("Value of r:", r)
sets.append([Board.calculate_legal_states(board, player, [r, 0]), r])
total += r
sets.append([Board.calculate_legal_states(board, player, roll), total])
return sets
def tmp_name(from_board, to_board, roll, player, total_moves, is_quad=False):
sets = calc_move_sets(from_board, roll, player)
return_board = from_board
print("To board:\n",to_board)
print("All sets:\n",sets)
for idx, board_set in enumerate(sets):
board_set[0] = list(board_set[0])
# print(to_board)
# print(board_set)
if to_board in board_set[0]:
# print("To board:", to_board)
# print(board_set[0])
# print(board_set[1])
total_moves -= board_set[1]
# if it's not the sum of the moves
if idx < (4 if is_quad else 2):
roll[idx] = 0
else:
roll = [0, 0]
return_board = to_board
break
# print("Return board!:\n",return_board)
return total_moves, roll, return_board
def calc_move_stuff(from_board, to_board, roll, player, total_roll, is_quad):
total_moves, roll, board = tmp_name(from_board, to_board, list(roll), player, total_roll, is_quad)
return board, total_moves, roll
@app.route('/get_board', methods=['GET'])
@as_json_p
def get_board():
return {'board':'0, 2, 0, 0, 0, 0, -5, 0, -3, 0, 0, 0, 5, -5, 0, 0, 0, 3, 0, 5, 0, 0, 0, 0, -2, 0'}
def check_move(prev, curr):
# TODO: Decide on player system and implement roll properly
legal_states = Board.calculate_legal_states(tuple(prev), -1, [1,2])
truth_list = [list(curr) == list(ele) for ele in legal_states]
return any(truth_list)
@app.route('/bot_move', methods=['POST'])
def bot_move():
data = request.get_json(force=True)
board = [int(x) for x in data['board'].split(',')]
use_pubeval = bool(data['pubeval'])
roll = (random.randrange(1, 7), random.randrange(1, 7))
if use_pubeval:
board, value = Eval.make_pubeval_move(tuple(board), 1, roll)
else:
board, _ = network.make_move(tuple(board), roll, 1)
# print("Board!:",board)
return ",".join([str(x) for x in list(board)])
@app.route('/post_board', methods=['POST'])
def post_board():
data = request.get_json(force=True)
# TODO: Fix hardcoded player
player = -1
board = [int(x) for x in data['board'].split(',')]
prev_board = [int(x) for x in data['prevBoard'].split(',')]
print(data['roll'])
roll = [int(x) for x in data['roll'].split(',')]
print(roll)
quad = data['quad'] == "true"
# print(board)
total_roll = int(data['totalRoll'])
print("total roll is:", total_roll)
return_board, total_moves, roll = calc_move_stuff(tuple(prev_board), tuple(board), tuple(roll), player, total_roll, quad)
str_board = ",".join([str(x) for x in return_board])
str_roll = ",".join([str(x) for x in roll])
return_string = str_board + "#" + str(total_moves) + "#" + str_roll
print(return_string)
return return_string
if __name__ == '__main__':
app.run(host = '0.0.0.0', port=35270)

78
bin/0-ply-tests.rb Normal file
View File

@ -0,0 +1,78 @@
def run_stuff(board_rep, model_name, ply)
epi_count = 0
system("python3 main.py --train --model #{model_name} --board-rep #{board_rep} --episodes 1 --ply #{ply}")
while epi_count < 200000 do
system("python3 main.py --eval --model #{model_name} --eval-methods dumbeval --episodes 250 --ply #{ply} --repeat-eval 3")
system("python3 main.py --eval --model #{model_name} --eval-methods pubeval --episodes 250 --ply #{ply} --repeat-eval 3")
system("python3 main.py --train --model #{model_name} --episodes 2000 --ply #{ply}")
epi_count += 2000
end
end
### ///////////////////////////////////////////////////////////////
# QUACK TESTINGS
### ///////////////////////////////////////////////////////////////
board_rep = "quack"
model_name = "quack_test_0_ply"
ply = 0
run_stuff(board_rep, model_name, ply)
# board_rep = "quack"
# model_name = "quack_test_1_ply"
# ply = 1
# run_stuff(board_rep, model_name, ply)
### ///////////////////////////////////////////////////////////////
# QUACK-FAT TESTING
### ///////////////////////////////////////////////////////////////
board_rep = "quack-fat"
model_name = "quack-fat_test_0_ply"
ply = 0
run_stuff(board_rep, model_name, ply)
# board_rep = "quack-fat"
# model_name = "quack-fat_test_1_ply"
# ply = 1
# run_stuff(board_rep, model_name, ply)
### ///////////////////////////////////////////////////////////////
# QUACK-NORM TESTING
### ///////////////////////////////////////////////////////////////
board_rep = "quack-norm"
model_name = "quack-norm_test_0_ply"
ply = 0
run_stuff(board_rep, model_name, ply)
# board_rep = "quack-norm"
# model_name = "quack-norm_test_1_ply"
# ply = 1
# run_stuff(board_rep, model_name, ply)
### ///////////////////////////////////////////////////////////////
# TESAURO TESTING
### ///////////////////////////////////////////////////////////////
board_rep = "tesauro"
model_name = "tesauro_test_0_ply"
ply = 0
run_stuff(board_rep, model_name, ply)
# board_rep = "tesauro"
# model_name = "tesauro_test_1_ply"
# ply = 1
# run_stuff(board_rep, model_name, ply)

View File

@ -1,30 +1,30 @@
#!/usr/bin/env ruby #!/usr/bin/env ruby
MODELS_DIR = 'models'
def save(model_name) def save(model_name)
require 'date' require 'date'
models_dir = 'models' model_path = File.join(MODELS_DIR, model_name)
model_path = File.join(models_dir, model_name)
if not File.exists? model_path then
return false
end
episode_count = (File.read File.join(model_path, 'episodes_trained')).to_i episode_count = (File.read File.join(model_path, 'episodes_trained')).to_i
puts "Found model #{model_name} with episodes #{episode_count} trained!" puts "Found model #{model_name} with episodes #{episode_count} trained!"
file_name = "model-#{model_name}-#{episode_count}-#{Time.now.strftime('%Y%m%d-%H%M%S')}.tar.gz" file_name = "model-#{model_name}-#{episode_count}-#{Time.now.strftime('%Y%m%d-%H%M%S')}.tar.gz"
save_path = File.join(models_dir, 'saves', file_name) save_path = File.join(MODELS_DIR, 'saves', file_name)
puts "Saving to #{save_path}" puts "Saving to #{save_path}"
system("tar", "-cvzf", save_path, "-C", models_dir, model_name) system("tar", "-cvzf", save_path, "-C", MODELS_DIR, model_name)
return true
end end
def train(model, episodes) def train(model, episodes)
system("python3", "main.py", "--train", "--model", model, "--episodes", episodes.to_s) system("python3", "main.py", "--train", "--model", model, "--episodes", episodes.to_s)
end end
def force_train(model, episodes)
system("python3", "main.py", "--train", "--force-creation", "--model", model, "--episodes", episodes.to_s)
end
def evaluate(model, episodes, method) def evaluate(model, episodes, method)
system("python3", "main.py", "--eval" , "--model", model, "--episodes", episodes.to_s, "--eval-methods", method) system("python3", "main.py", "--eval" , "--model", model, "--episodes", episodes.to_s, "--eval-methods", method)
end end
@ -33,11 +33,9 @@ model = ARGV[0]
if model.nil? then raise "no model specified" end if model.nil? then raise "no model specified" end
while true do if not File.exists? File.join(MODELS_DIR, model) then
force_train model, 10
save model save model
train model, 1000
save model
train model, 1000
3.times do 3.times do
evaluate model, 250, "pubeval" evaluate model, 250, "pubeval"
end end
@ -45,3 +43,27 @@ while true do
evaluate model, 250, "dumbeval" evaluate model, 250, "dumbeval"
end end
end end
# while true do
# save model
# train model, 1000
# save model
# train model, 1000
# 3.times do
# evaluate model, 250, "pubeval"
# end
# 3.times do
# evaluate model, 250, "dumbeval"
# end
# end
while true do
save model
train model, 500
5.times do
evaluate model, 250, "pubeval"
end
5.times do
evaluate model, 250, "dumbeval"
end
end

84
bot.py
View File

@ -1,24 +1,8 @@
from cup import Cup
from network import Network
from board import Board from board import Board
import tensorflow as tf
import numpy as np
import random
class Bot: class Bot:
def __init__(self, sym, config = None, name = "unnamed"): def __init__(self, sym):
self.config = config
self.cup = Cup()
self.sym = sym self.sym = sym
self.graph = tf.Graph()
self.network = Network(config, name)
self.network.restore_model()
def restore_model(self):
with self.graph.as_default():
self.network.restore_model()
def get_session(self): def get_session(self):
return self.session return self.session
@ -26,16 +10,60 @@ class Bot:
def get_sym(self): def get_sym(self):
return self.sym return self.sym
def get_network(self):
return self.network
# TODO: DEPRECATE def calc_move_sets(self, from_board, roll, player):
def make_move(self, board, sym, roll): board = from_board
# print(Board.pretty(board)) sets = []
legal_moves = Board.calculate_legal_states(board, sym, roll) total = 0
moves_and_scores = [ (move, self.network.eval_state(np.array(move).reshape(1,26))) for move in legal_moves ] print("board!:",board)
scores = [ x[1] for x in moves_and_scores ] for r in roll:
best_move_pair = moves_and_scores[np.array(scores).argmax()] # print("Value of r:",r)
#print("Found the best state, being:", np.array(move_scores).argmax()) sets.append([Board.calculate_legal_states(board, player, [r,0]), r])
return best_move_pair total += r
sets.append([Board.calculate_legal_states(board, player, [total,0]), total])
return sets
def handle_move(self, from_board, to_board, roll, player):
# print("Cur board:",board)
sets = self.calc_move_sets(from_board, roll, player)
for idx, board_set in enumerate(sets):
board_set[0] = list(board_set[0])
# print("My board_set:",board_set)
if to_board in [list(c) for c in board_set[0]]:
self.total_moves -= board_set[1]
if idx < 2:
# print("Roll object:",self.roll)
self.roll[idx] = 0
else:
self.roll = [0,0]
break
print("Total moves left:",self.total_moves)
def tmp_name(self, from_board, to_board, roll, player, total_moves):
sets = self.calc_move_sets(from_board, roll, player)
return_board = from_board
for idx, board_set in enumerate(sets):
board_set = list(board_set[0])
if to_board in [list(board) for board in board_set]:
total_moves -= board_set[1]
# if it's not the sum of the moves
if idx < 2:
roll[idx] = 0
else:
roll = [0,0]
return_board = to_board
break
return total_moves, roll, return_board
def make_human_move(self, board, player, roll):
total_moves = roll[0] + roll[1]
previous_board = board
while total_moves != 0:
move = input("Pick a move!\n")
to_board = Board.apply_moves_to_board(previous_board, player, move)
total_moves, roll, board = self.tmp_name(board, to_board, roll, player, total_moves)

91
main.py
View File

@ -2,6 +2,7 @@ import argparse
import sys import sys
import os import os
import time import time
import subprocess
# Parse command line arguments # Parse command line arguments
parser = argparse.ArgumentParser(description="Backgammon games") parser = argparse.ArgumentParser(description="Backgammon games")
@ -31,19 +32,17 @@ parser.add_argument('--train-perpetually', action='store_true',
help='start new training session as soon as the previous is finished') help='start new training session as soon as the previous is finished')
parser.add_argument('--list-models', action='store_true', parser.add_argument('--list-models', action='store_true',
help='list all known models') help='list all known models')
parser.add_argument('--force-creation', action='store_true',
help='force model creation if model does not exist')
parser.add_argument('--board-rep', action='store', dest='board_rep', parser.add_argument('--board-rep', action='store', dest='board_rep',
default='tesauro',
help='name of board representation to use as input to neural network') help='name of board representation to use as input to neural network')
parser.add_argument('--use-baseline', action='store_true', parser.add_argument('--verbose', action='store_true',
help='use the baseline model, note, has size 28') help='If set, a lot of stuff will be printed')
parser.add_argument('--ply', action='store', dest='ply', default='0',
help='defines the amount of ply used when deciding what move to make')
parser.add_argument('--repeat-eval', action='store', dest='repeat_eval', default='1',
help='the amount of times the evaluation method should be repeated')
args = parser.parse_args() args = parser.parse_args()
if args.model == "baseline_model":
print("Model name 'baseline_model' not allowed")
exit()
config = { config = {
'model': args.model, 'model': args.model,
@ -59,10 +58,13 @@ config = {
'model_storage_path': 'models', 'model_storage_path': 'models',
'bench_storage_path': 'bench', 'bench_storage_path': 'bench',
'board_representation': args.board_rep, 'board_representation': args.board_rep,
'force_creation': args.force_creation, 'global_step': 0,
'use_baseline': args.use_baseline 'verbose': args.verbose,
'ply': args.ply,
'repeat_eval': args.repeat_eval
} }
# Create models folder # Create models folder
if not os.path.exists(config['model_storage_path']): if not os.path.exists(config['model_storage_path']):
os.makedirs(config['model_storage_path']) os.makedirs(config['model_storage_path'])
@ -76,19 +78,20 @@ if not os.path.isdir(model_path()):
if not os.path.isdir(log_path): if not os.path.isdir(log_path):
os.mkdir(log_path) os.mkdir(log_path)
# Define helper functions # Define helper functions
def log_train_outcome(outcome, diff_in_values, trained_eps = 0, log_path = os.path.join(model_path(), 'logs', "train.log")): def log_train_outcome(outcome, diff_in_values, trained_eps = 0, log_path = os.path.join(model_path(), 'logs', "train.log")):
commit = subprocess.run(['git', 'describe', '--first-parent', '--always'], stdout=subprocess.PIPE).stdout.decode('utf-8').rstrip()
format_vars = { 'trained_eps': trained_eps, format_vars = { 'trained_eps': trained_eps,
'count': len(outcome), 'count': len(outcome),
'sum': sum(outcome), 'sum': sum(outcome),
'mean': sum(outcome) / len(outcome), 'mean': sum(outcome) / len(outcome),
'time': int(time.time()), 'time': int(time.time()),
'average_diff_in_vals': diff_in_values/len(outcome) 'average_diff_in_vals': diff_in_values,
'commit': commit
} }
with open(log_path, 'a+') as f: with open(log_path, 'a+') as f:
f.write("{time};{trained_eps};{count};{sum};{mean};{average_diff_in_vals}".format(**format_vars) + "\n") f.write("{time};{trained_eps};{count};{sum};{mean};{average_diff_in_vals};{commit}".format(**format_vars) + "\n")
def log_eval_outcomes(outcomes, trained_eps = 0, log_path = os.path.join(model_path(), 'logs', "eval.log")): def log_eval_outcomes(outcomes, trained_eps = 0, log_path = os.path.join(model_path(), 'logs', "eval.log")):
@ -99,9 +102,12 @@ def log_eval_outcomes(outcomes, trained_eps = 0, log_path = os.path.join(model_p
:param log_path: :param log_path:
:return: :return:
""" """
commit = subprocess.run(['git', 'describe', '--first-parent', '--always'], stdout=subprocess.PIPE).stdout.decode('utf-8').rstrip()
for outcome in outcomes: for outcome in outcomes:
scores = outcome[1] scores = outcome[1]
format_vars = { 'trained_eps': trained_eps, format_vars = { 'commit': commit,
'trained_eps': trained_eps,
'method': outcome[0], 'method': outcome[0],
'count': len(scores), 'count': len(scores),
'sum': sum(scores), 'sum': sum(scores),
@ -109,9 +115,10 @@ def log_eval_outcomes(outcomes, trained_eps = 0, log_path = os.path.join(model_p
'time': int(time.time()) 'time': int(time.time())
} }
with open(log_path, 'a+') as f: with open(log_path, 'a+') as f:
f.write("{time};{method};{trained_eps};{count};{sum};{mean}".format(**format_vars) + "\n") f.write("{time};{method};{trained_eps};{count};{sum};{mean};{commit}".format(**format_vars) + "\n")
def log_bench_eval_outcomes(outcomes, log_path, index, time, trained_eps = 0): def log_bench_eval_outcomes(outcomes, log_path, index, time, trained_eps = 0):
commit = subprocess.run(['git', 'describe', '--first-parent', '--always'], stdout=subprocess.PIPE).stdout.decode('utf-8').rstrip()
for outcome in outcomes: for outcome in outcomes:
scores = outcome[1] scores = outcome[1]
format_vars = { 'trained_eps': trained_eps, format_vars = { 'trained_eps': trained_eps,
@ -121,9 +128,28 @@ def log_bench_eval_outcomes(outcomes, log_path, index, time, trained_eps = 0):
'mean': sum(scores) / len(scores), 'mean': sum(scores) / len(scores),
'time': time, 'time': time,
'index': index, 'index': index,
'commit': commit
} }
with open(log_path, 'a+') as f: with open(log_path, 'a+') as f:
f.write("{method};{count};{index};{time};{sum};{mean}".format(**format_vars) + "\n") f.write("{method};{count};{index};{time};{sum};{mean};{commit}".format(**format_vars) + "\n")
def find_board_rep():
checkpoint_path = os.path.join(config['model_storage_path'], config['model'])
board_rep_path = os.path.join(checkpoint_path, "board_representation")
with open(board_rep_path, 'r') as f:
return f.read()
def board_rep_file_exists():
checkpoint_path = os.path.join(config['model_storage_path'], config['model'])
board_rep_path = os.path.join(checkpoint_path, "board_representation")
return os.path.isfile(board_rep_path)
def create_board_rep():
checkpoint_path = os.path.join(config['model_storage_path'], config['model'])
board_rep_path = os.path.join(checkpoint_path, "board_representation")
with open(board_rep_path, 'a+') as f:
f.write(config['board_representation'])
# Do actions specified by command-line # Do actions specified by command-line
if args.list_models: if args.list_models:
@ -148,6 +174,22 @@ if __name__ == "__main__":
# Set up variables # Set up variables
episode_count = config['episode_count'] episode_count = config['episode_count']
if config['board_representation'] is None:
if board_rep_file_exists():
config['board_representation'] = find_board_rep()
else:
sys.stderr.write("Was not given a board_rep and was unable to find a board_rep file\n")
exit()
else:
if not board_rep_file_exists():
create_board_rep()
else:
if config['board_representation'] != find_board_rep():
sys.stderr.write("Board representation \"{given}\", does not match one in board_rep file, \"{board_rep}\"\n".
format(given = config['board_representation'], board_rep = find_board_rep()))
exit()
if args.train: if args.train:
network = Network(config, config['model']) network = Network(config, config['model'])
start_episode = network.episodes_trained start_episode = network.episodes_trained
@ -161,9 +203,15 @@ if __name__ == "__main__":
if not config['train_perpetually']: if not config['train_perpetually']:
break break
elif args.play:
network = Network(config, config['model'])
network.play_against_network()
elif args.eval: elif args.eval:
network = Network(config, config['model']) network = Network(config, config['model'])
network.restore_model()
for i in range(int(config['repeat_eval'])):
start_episode = network.episodes_trained start_episode = network.episodes_trained
# Evaluation measures are described in `config` # Evaluation measures are described in `config`
outcomes = network.eval(config['episode_count']) outcomes = network.eval(config['episode_count'])
@ -191,7 +239,7 @@ if __name__ == "__main__":
episode_counts = [25, 50, 100, 250, 500, 1000, 2500, 5000, episode_counts = [25, 50, 100, 250, 500, 1000, 2500, 5000,
10000, 20000] 10000, 20000]
def do_eval(sess): def do_eval():
for eval_method in config['eval_methods']: for eval_method in config['eval_methods']:
result_path = os.path.join(config['bench_storage_path'], result_path = os.path.join(config['bench_storage_path'],
eval_method) + "-{}.log".format(int(time.time())) eval_method) + "-{}.log".format(int(time.time()))
@ -199,8 +247,7 @@ if __name__ == "__main__":
for i in range(sample_count): for i in range(sample_count):
start_time = time.time() start_time = time.time()
# Evaluation measure to be benchmarked are described in `config` # Evaluation measure to be benchmarked are described in `config`
outcomes = network.eval(episode_count = n, outcomes = network.eval(episode_count = n)
tf_session = sess)
time_diff = time.time() - start_time time_diff = time.time() - start_time
log_bench_eval_outcomes(outcomes, log_bench_eval_outcomes(outcomes,
time = time_diff, time = time_diff,
@ -210,8 +257,8 @@ if __name__ == "__main__":
# CMM: oh no # CMM: oh no
import tensorflow as tf import tensorflow as tf
with tf.Session() as session:
network.restore_model(session) network.restore_model()
do_eval(session) do_eval()

View File

@ -8,6 +8,8 @@ import random
from eval import Eval from eval import Eval
import glob import glob
from operator import itemgetter from operator import itemgetter
import tensorflow.contrib.eager as tfe
from player import Player
class Network: class Network:
# board_features_quack has size 28 # board_features_quack has size 28
@ -18,18 +20,43 @@ class Network:
'quack-fat' : (30, Board.board_features_quack_fat), 'quack-fat' : (30, Board.board_features_quack_fat),
'quack' : (28, Board.board_features_quack), 'quack' : (28, Board.board_features_quack),
'tesauro' : (198, Board.board_features_tesauro), 'tesauro' : (198, Board.board_features_tesauro),
'quack-norm': (30, Board.board_features_quack_norm) 'quack-norm' : (30, Board.board_features_quack_norm),
'tesauro-fat' : (726, Board.board_features_tesauro_fat),
'tesauro-poop': (198, Board.board_features_tesauro_wrong)
} }
def custom_tanh(self, x, name=None): def custom_tanh(self, x, name=None):
return tf.scalar_mul(tf.constant(2.00), tf.tanh(x, name)) return tf.scalar_mul(tf.constant(2.00), tf.tanh(x, name))
def __init__(self, config, name): def __init__(self, config, name):
"""
:param config:
:param name:
"""
move_options = {
'1': self.make_move_1_ply,
'0': self.make_move_0_ply
}
self.max_or_min = {
1: np.argmax,
-1: np.argmin
}
tf.enable_eager_execution()
xavier_init = tf.contrib.layers.xavier_initializer()
self.config = config self.config = config
self.checkpoint_path = os.path.join(config['model_storage_path'], config['model']) self.checkpoint_path = os.path.join(config['model_storage_path'], config['model'])
self.name = name self.name = name
self.make_move = move_options[
self.config['ply']
]
# Set board representation from config # Set board representation from config
self.input_size, self.board_trans_func = Network.board_reps[ self.input_size, self.board_trans_func = Network.board_reps[
self.config['board_representation'] self.config['board_representation']
@ -39,16 +66,6 @@ class Network:
self.max_learning_rate = 0.1 self.max_learning_rate = 0.1
self.min_learning_rate = 0.001 self.min_learning_rate = 0.001
self.global_step = tf.Variable(0, trainable=False, name="global_step")
self.learning_rate = tf.maximum(self.min_learning_rate,
tf.train.exponential_decay(self.max_learning_rate,
self.global_step, 50000,
0.96,
staircase=True),
name="learning_rate")
# Restore trained episode count for model # Restore trained episode count for model
episode_count_path = os.path.join(self.checkpoint_path, "episodes_trained") episode_count_path = os.path.join(self.checkpoint_path, "episodes_trained")
if os.path.isfile(episode_count_path): if os.path.isfile(episode_count_path):
@ -57,336 +74,261 @@ class Network:
else: else:
self.episodes_trained = 0 self.episodes_trained = 0
self.x = tf.placeholder('float', [1, self.input_size], name='input') global_step_path = os.path.join(self.checkpoint_path, "global_step")
self.value_next = tf.placeholder('float', [1, self.output_size], name="value_next") if os.path.isfile(global_step_path):
with open(global_step_path, 'r') as f:
xavier_init = tf.contrib.layers.xavier_initializer() self.global_step = int(f.read())
else:
W_1 = tf.get_variable("w_1", (self.input_size, self.hidden_size), self.global_step = 0
initializer=xavier_init)
W_2 = tf.get_variable("w_2", (self.hidden_size, self.output_size),
initializer=xavier_init)
b_1 = tf.get_variable("b_1", (self.hidden_size,),
initializer=tf.zeros_initializer)
b_2 = tf.get_variable("b_2", (self.output_size,),
initializer=tf.zeros_initializer)
value_after_input = tf.sigmoid(tf.matmul(self.x, W_1) + b_1, name='hidden_layer') self.model = tf.keras.Sequential([
tf.keras.layers.Dense(40, activation="sigmoid", kernel_initializer=xavier_init,
self.value = tf.sigmoid(tf.matmul(value_after_input, W_2) + b_2, name='output_layer') input_shape=(1,self.input_size)),
tf.keras.layers.Dense(1, activation="sigmoid", kernel_initializer=xavier_init)
# TODO: Alexander thinks that self.value will be computed twice (instead of once) ])
difference_in_values = tf.reshape(tf.subtract(self.value_next, self.value, name='difference_in_values'), [])
tf.summary.scalar("difference_in_values", tf.abs(difference_in_values))
trainable_vars = tf.trainable_variables()
gradients = tf.gradients(self.value, trainable_vars)
apply_gradients = []
global_step_op = self.global_step.assign_add(1)
with tf.variable_scope('apply_gradients'): def exp_decay(self, max_lr, global_step, decay_rate, decay_steps):
for gradient, trainable_var in zip(gradients, trainable_vars): """
backprop_calc = self.learning_rate * difference_in_values * gradient Calculates the exponential decay on a learning rate
grad_apply = trainable_var.assign_add(backprop_calc) :param max_lr: The learning rate that the network starts at
apply_gradients.append(grad_apply) :param global_step: The global step
:param decay_rate: The rate at which the learning rate should decay
:param decay_steps: The amount of steps between each decay
:return: The result of the exponential decay performed on the learning rate
"""
res = max_lr * decay_rate ** (global_step // decay_steps)
return res
def do_backprop(self, prev_state, value_next):
"""
Performs the Temporal-difference backpropagation step on the model
:param prev_state: The previous state of the game, this has its value recalculated
:param value_next: The value of the current move
:return: Nothing, the calculation is performed on the model of the network
"""
self.learning_rate = tf.maximum(self.min_learning_rate,
self.exp_decay(self.max_learning_rate, self.global_step, 0.96, 50000),
name="learning_rate")
with tf.GradientTape() as tape:
value = self.model(prev_state.reshape(1,-1))
grads = tape.gradient(value, self.model.variables)
difference_in_values = tf.reshape(tf.subtract(value_next, value, name='difference_in_values'), [])
for grad, train_var in zip(grads, self.model.variables):
backprop_calc = self.learning_rate * difference_in_values * grad
train_var.assign_add(backprop_calc)
with tf.control_dependencies([global_step_op]):
self.training_op = tf.group(*apply_gradients, name='training_op') def print_variables(self):
"""
Prints all the variables of the model
:return:
"""
variables = self.model.variables
for k in variables:
print(k)
self.saver = tf.train.Saver(max_to_keep=1) def eval_state(self, state):
"""
Evaluates a single state
:param state:
:return:
"""
return self.model(state.reshape(1,-1))
def eval_state(self, sess, state): def save_model(self, episode_count):
return sess.run(self.value, feed_dict={self.x: state}) """
Saves the model of the network, it references global_step as self.global_step
:param episode_count:
:return:
"""
tfe.Saver(self.model.variables).save(os.path.join(self.checkpoint_path, 'model.ckpt'))
def save_model(self, sess, episode_count, global_step):
self.saver.save(sess, os.path.join(self.checkpoint_path, 'model.ckpt'), global_step=global_step)
with open(os.path.join(self.checkpoint_path, "episodes_trained"), 'w+') as f: with open(os.path.join(self.checkpoint_path, "episodes_trained"), 'w+') as f:
print("[NETWK] ({name}) Saving model to:".format(name=self.name), print("[NETWK] ({name}) Saving model to:".format(name=self.name),
os.path.join(self.checkpoint_path, 'model.ckpt')) os.path.join(self.checkpoint_path, 'model.ckpt'))
f.write(str(episode_count) + "\n") f.write(str(episode_count) + "\n")
def restore_model(self, sess): with open(os.path.join(self.checkpoint_path, "global_step"), 'w+') as f:
print("[NETWK] ({name}) Saving global step to:".format(name=self.name),
os.path.join(self.checkpoint_path, 'model.ckpt'))
f.write(str(self.global_step) + "\n")
if self.config['verbose']:
self.print_variables()
def calc_vals(self, states):
"""
Calculate a score of each state in states
:param states: A number of states. The states have to be transformed before being given to this function.
:return:
"""
return self.model.predict_on_batch(states)
def restore_model(self):
""" """
Restore a model for a session, such that a trained model and either be further trained or Restore a model for a session, such that a trained model and either be further trained or
used for evaluation used for evaluation
:param sess: Current session
:return: Nothing. It's a side-effect that a model gets restored for the network. :return: Nothing. It's a side-effect that a model gets restored for the network.
""" """
if glob.glob(os.path.join(self.checkpoint_path, 'model.ckpt*.index')): if glob.glob(os.path.join(self.checkpoint_path, 'model.ckpt*.index')):
latest_checkpoint = tf.train.latest_checkpoint(self.checkpoint_path) latest_checkpoint = tf.train.latest_checkpoint(self.checkpoint_path)
print("[NETWK] ({name}) Restoring model from:".format(name=self.name), print("[NETWK] ({name}) Restoring model from:".format(name=self.name),
str(latest_checkpoint)) str(latest_checkpoint))
self.saver.restore(sess, latest_checkpoint) tfe.Saver(self.model.variables).restore(latest_checkpoint)
variables_names = [v.name for v in tf.trainable_variables()]
values = sess.run(variables_names)
for k, v in zip(variables_names, values):
print("Variable: ", k)
print("Shape: ", v.shape)
print(v)
# Restore trained episode count for model # Restore trained episode count for model
episode_count_path = os.path.join(self.checkpoint_path, "episodes_trained") episode_count_path = os.path.join(self.checkpoint_path, "episodes_trained")
if os.path.isfile(episode_count_path): if os.path.isfile(episode_count_path):
with open(episode_count_path, 'r') as f: with open(episode_count_path, 'r') as f:
self.config['start_episode'] = int(f.read()) self.config['start_episode'] = int(f.read())
elif self.config['use_baseline'] and glob.glob(os.path.join(os.path.join(self.config['model_storage_path'], "baseline_model"), 'model.ckpt*.index')):
checkpoint_path = os.path.join(self.config['model_storage_path'], "baseline_model")
latest_checkpoint = tf.train.latest_checkpoint(checkpoint_path)
print("[NETWK] ({name}) Restoring model from:".format(name=self.name),
str(latest_checkpoint))
self.saver.restore(sess, latest_checkpoint)
variables_names = [v.name for v in tf.trainable_variables()] global_step_path = os.path.join(self.checkpoint_path, "global_step")
values = sess.run(variables_names) if os.path.isfile(global_step_path):
for k, v in zip(variables_names, values): with open(global_step_path, 'r') as f:
print("Variable: ", k) self.config['global_step'] = int(f.read())
print("Shape: ", v.shape)
print(v) if self.config['verbose']:
elif not self.config['force_creation']: self.print_variables()
print("You need to have baseline_model inside models")
exit()
def make_move(self, sess, board, roll, player):
def make_move_0_ply(self, board, roll, player):
""" """
Find the best move given a board, roll and a player, by finding all possible states one can go to Find the best move given a board, roll and a player, by finding all possible states one can go to
and then picking the best, by using the network to evaluate each state. The highest score is picked and then picking the best, by using the network to evaluate each state. This is 0-ply, ie. no look-ahead.
for the 1-player and the max(1-score) is picked for the -1-player. The highest score is picked for the 1-player and the max(1-score) is picked for the -1-player.
:param sess:
:param board: Current board :param board: Current board
:param roll: Current roll :param roll: Current roll
:param player: Current player :param player: Current player
:return: A pair of the best state to go to, together with the score of that state :return: A pair of the best state to go to, together with the score of that state
""" """
legal_moves = Board.calculate_legal_states(board, player, roll) legal_moves = list(Board.calculate_legal_states(board, player, roll))
moves_and_scores = [(move, self.eval_state(sess, self.board_trans_func(move, player))) for move in legal_moves] legal_states = np.array([self.board_trans_func(move, player)[0] for move in legal_moves])
scores = [x[1] if np.sign(player) > 0 else 1-x[1] for x in moves_and_scores]
best_score_index = np.array(scores).argmax()
best_move_pair = moves_and_scores[best_score_index]
return best_move_pair
def make_move_n_ply(self, sess, board, roll, player, n = 1): scores = self.model.predict_on_batch(legal_states)
best_pair = self.calc_n_ply(n, sess, board, player, roll)
best_score_idx = self.max_or_min[player](scores)
best_move, best_score = legal_moves[best_score_idx], scores[best_score_idx]
return (best_move, best_score)
def make_move_1_ply(self, board, roll, player):
"""
Return the best board and best score based on a 1-ply look-ahead.
:param board:
:param roll:
:param player:
:return:
"""
start = time.time()
best_pair = self.calculate_1_ply(board, roll, player)
#print(time.time() - start)
return best_pair return best_pair
def calculate_1_ply(self, sess, board, roll, player): def calculate_1_ply(self, board, roll, player):
""" """
Find the best move based on a 1-ply look-ahead. First the best move is found for a single ply and then an Find the best move based on a 1-ply look-ahead. First the x best moves are picked from a 0-ply and then
exhaustive search is performed on the best 15 moves from the single ply. all moves and scores are found for them. The expected score is then calculated for each of the boards from the
0-ply.
:param sess:
:param board: :param board:
:param roll: The original roll :param roll: The original roll
:param player: The current player :param player: The current player
:return: Best possible move based on 1-ply look-ahead :return: Best possible move based on 1-ply look-ahead
""" """
# find all legal states from the given board and the given roll # find all legal states from the given board and the given roll
init_legal_states = Board.calculate_legal_states(board, player, roll) init_legal_states = Board.calculate_legal_states(board, player, roll)
legal_states = np.array([self.board_trans_func(state, player)[0] for state in init_legal_states])
# find all values for the above boards scores = [ score.numpy()
zero_ply_moves_and_scores = [(move, self.eval_state(sess, self.board_trans_func(move, player))) for move in init_legal_states] for score
in self.calc_vals(legal_states) ]
# pythons reverse is in place and I can't call [:15] on it, without applying it to an object like so. Fuck.
best_fifteen = sorted(zero_ply_moves_and_scores, key=itemgetter(1), reverse=player==1)
best_fifteen_boards = [x[0] for x in best_fifteen[:10]]
all_rolls_scores = self.do_ply(sess, best_fifteen_boards, player)
best_score_index = np.array(all_rolls_scores).argmax()
best_board = best_fifteen_boards[best_score_index]
return [best_board, max(all_rolls_scores)]
def calc_n_ply(self, n_init, sess, board, player, roll):
# find all legal states from the given board and the given roll
init_legal_states = Board.calculate_legal_states(board, player, roll)
# find all values for the above boards
zero_ply_moves_and_scores = [(move, self.eval_state(sess, self.board_trans_func(move, player))) for move in init_legal_states]
# pythons reverse is in place and I can't call [:15] on it, without applying it to an object like so. Fuck.
sorted_moves_and_scores = sorted(zero_ply_moves_and_scores, key=itemgetter(1), reverse=player==1)
moves_and_scores = list(zip(init_legal_states, scores))
sorted_moves_and_scores = sorted(moves_and_scores, key=itemgetter(1), reverse=(player == 1))
best_boards = [ x[0] for x in sorted_moves_and_scores[:10] ] best_boards = [ x[0] for x in sorted_moves_and_scores[:10] ]
best_move_score_pair = self.n_ply(n_init, sess, best_boards, player) scores = self.do_ply(best_boards, player)
return best_move_score_pair best_score_idx = self.max_or_min[player](scores)
# best_score_idx = np.array(trans_scores).argmax()
return (best_boards[best_score_idx], scores[best_score_idx])
def n_ply(self, n_init, sess, boards_init, player_init): def do_ply(self, boards, player):
def ply(n, boards, player):
def calculate_possible_states(board):
possible_rolls = [ (1, 1), (1, 2), (1, 3), (1, 4), (1, 5),
(1, 6), (2, 2), (2, 3), (2, 4), (2, 5),
(2, 6), (3, 3), (3, 4), (3, 5), (3, 6),
(4, 4), (4, 5), (4, 6), (5, 5), (5, 6),
(6, 6) ]
# for roll in possible_rolls:
# print(len(Board.calculate_legal_states(board, player, roll)))
return [ Board.calculate_legal_states(board, player, roll)
for roll
in possible_rolls ]
def find_best_state_score(boards):
score_pairs = [ (board, self.eval_state(sess, self.board_trans_func(board, player)))
for board
in boards ]
scores = [ pair[1]
for pair
in score_pairs ]
best_score_pair = score_pairs[np.array(scores).argmax()]
return best_score_pair
def average_score(boards):
return sum(boards)/len(boards)
def average_ply_score(board):
states_for_rolls = calculate_possible_states(board)
best_state_score_for_each_roll = [
find_best_state_score(states)
for states
in states_for_rolls ]
best_score_for_each_roll = [ x[1]
for x
in best_state_score_for_each_roll ]
average_score_var = average_score(best_score_for_each_roll)
return average_score_var
if n == 1:
average_score_pairs = [ (board, average_ply_score(board))
for board
in boards ]
return average_score_pairs
elif n > 1: # n != 1
def average_for_score_pairs(score_pairs):
scores = [ pair[1]
for pair
in score_pairs ]
return sum(scores)/len(scores)
def average_plain(scores):
return sum(scores)/len(scores)
print("+"*20)
print(n)
print(type(boards))
print(boards)
possible_states_for_boards = [
(board, calculate_possible_states(board))
for board
in boards ]
average_score_pairs = [
(inner_boards[0], average_plain([ average_for_score_pairs(ply(n - 1, inner_board, player * -1 if n == 1 else player))
for inner_board
in inner_boards[1] ]))
for inner_boards
in possible_states_for_boards ]
return average_score_pairs
else:
assert False
if n_init < 1: print("Unexpected argument n = {}".format(n_init)); exit()
boards_with_scores = ply(n_init, boards_init, -1 * player_init)
#print("Boards with scores:",boards_with_scores)
scores = [ ( pair[1] if player_init == 1 else (1 - pair[1]) )
for pair
in boards_with_scores ]
#print("All the scores:",scores)
best_score_pair = boards_with_scores[np.array(scores).argmax()]
return best_score_pair
def do_ply(self, sess, boards, player):
""" """
Calculates a single extra ply, resulting in a larger search space for our best move. Calculates a single extra ply, resulting in a larger search space for our best move.
This is somewhat hardcoded to only do a single ply, seeing that it calls max on all scores, rather than This is somewhat hardcoded to only do a single ply, seeing that it calls max on all scores, rather than
allowing the function to search deeper, which could result in an even larger search space. If we wish allowing the function to search deeper, which could result in an even larger search space. If we wish
to have more than 2-ply, this should be fixed, so we could extend this method to allow for 3-ply. to have more than 2-ply, this should be fixed, so we could extend this method to allow for 3-ply.
:param sess:
:param boards: The boards to try all rolls on :param boards: The boards to try all rolls on
:param player: The player of the previous ply :param player: The player of the previous ply
:return: An array of scores where each index describes one of the boards which was given as param :return: An array of scores where each index describes one of the boards which was given as param
to this function. to this function.
""" """
def gen_21_rolls(): all_rolls = [ (1, 1), (1, 2), (1, 3), (1, 4), (1, 5),
""" (1, 6), (2, 2), (2, 3), (2, 4), (2, 5),
Calculate all possible rolls, [[1,1], [1,2] ..] (2, 6), (3, 3), (3, 4), (3, 5), (3, 6),
:return: All possible rolls (4, 4), (4, 5), (4, 6), (5, 5), (5, 6),
""" (6, 6) ]
a = []
for x in range(1, 7):
for y in range(1, 7):
if not [x, y] in a and not [y, x] in a:
a.append([x, y])
return a
all_rolls = gen_21_rolls() # start = time.time()
all_rolls_scores = [] # print("/"*50)
count = 0 length_list = []
# loop over boards test_list = []
for a_board in boards: # Prepping of data
a_board_scores = [] # start = time.time()
for board in boards:
# loop over all rolls, for each board length = 0
for roll in all_rolls: for roll in all_rolls:
all_states = Board.calculate_legal_states(board, player*-1, roll)
for state in all_states:
state = np.array(self.board_trans_func(state, player*-1)[0])
test_list.append(state)
length += 1
length_list.append(length)
# find all states we can get to, given the board and roll and the opposite player # print(time.time() - start)
all_rolls_boards = Board.calculate_legal_states(a_board, player*-1, roll)
count += len(all_rolls_boards)
# find scores for each board found above
spec_roll_scores = [self.eval_state(sess, self.board_trans_func(new_board, player*-1))
for new_board in all_rolls_boards]
# if the original player is the -1 player, then we need to find (1-value) start = time.time()
spec_roll_scores = [x if player == 1 else (1-x) for x in spec_roll_scores]
# find the best score all_scores = self.model.predict_on_batch(np.array(test_list))
best_score = max(spec_roll_scores)
# append the best score to a_board_scores, where we keep track of the best score for each board split_scores = []
a_board_scores.append(best_score) from_idx = 0
for length in length_list:
split_scores.append(all_scores[from_idx:from_idx+length])
from_idx += length
# save the expected average of board scores means_splits = [tf.reduce_mean(scores) for scores in split_scores]
all_rolls_scores.append(sum(a_board_scores)/len(a_board_scores))
# return all the average scores # print(time.time() - start)
print(count) # print("/"*50)
return all_rolls_scores return means_splits
def eval(self, episode_count, trained_eps = 0, tf_session = None): def eval(self, episode_count, trained_eps = 0):
""" """
Used to evaluate a model. Can either use pubeval, a model playing at an intermediate level, or dumbeval Used to evaluate a model. Can either use pubeval, a model playing at an intermediate level, or dumbeval
a model which has been given random weights, so it acts deterministically random. a model which has been given random weights, so it acts deterministically random.
@ -397,11 +339,10 @@ class Network:
:return: outcomes: The outcomes of the evaluation session :return: outcomes: The outcomes of the evaluation session
""" """
def do_eval(sess, method, episodes = 1000, trained_eps = 0): def do_eval(method, episodes = 1000, trained_eps = 0):
""" """
Do the actual evaluation Do the actual evaluation
:param sess:
:param method: Either pubeval or dumbeval :param method: Either pubeval or dumbeval
:param episodes: Amount of episodes to use in the evaluation :param episodes: Amount of episodes to use in the evaluation
:param trained_eps: :param trained_eps:
@ -425,7 +366,6 @@ class Network:
sys.stderr.write( sys.stderr.write(
"[EVAL ] Evaluating {eps} episode(s) with method '{method}'\n".format(eps=episodes, method=method)) "[EVAL ] Evaluating {eps} episode(s) with method '{method}'\n".format(eps=episodes, method=method))
if method == 'pubeval': if method == 'pubeval':
outcomes = [] outcomes = []
for i in range(1, episodes + 1): for i in range(1, episodes + 1):
@ -433,11 +373,9 @@ class Network:
board = Board.initial_state board = Board.initial_state
while Board.outcome(board) is None: while Board.outcome(board) is None:
roll = (random.randrange(1, 7), random.randrange(1, 7)) roll = (random.randrange(1, 7), random.randrange(1, 7))
board = (self.make_move(board, roll, 1))[0]
board = (self.make_move(sess, board, roll, 1))[0]
roll = (random.randrange(1, 7), random.randrange(1, 7)) roll = (random.randrange(1, 7), random.randrange(1, 7))
board = Eval.make_pubeval_move(board, -1, roll)[0][0:26] board = Eval.make_pubeval_move(board, -1, roll)[0][0:26]
sys.stderr.write("\t outcome {}".format(Board.outcome(board)[1])) sys.stderr.write("\t outcome {}".format(Board.outcome(board)[1]))
@ -456,11 +394,9 @@ class Network:
board = Board.initial_state board = Board.initial_state
while Board.outcome(board) is None: while Board.outcome(board) is None:
roll = (random.randrange(1, 7), random.randrange(1, 7)) roll = (random.randrange(1, 7), random.randrange(1, 7))
board = (self.make_move(board, roll, 1))[0]
board = (self.make_move(sess, board, roll, 1))[0]
roll = (random.randrange(1, 7), random.randrange(1, 7)) roll = (random.randrange(1, 7), random.randrange(1, 7))
board = Eval.make_dumbeval_move(board, -1, roll)[0][0:26] board = Eval.make_dumbeval_move(board, -1, roll)[0][0:26]
sys.stderr.write("\t outcome {}".format(Board.outcome(board)[1])) sys.stderr.write("\t outcome {}".format(Board.outcome(board)[1]))
@ -476,41 +412,52 @@ class Network:
sys.stderr.write("[EVAL ] Evaluation method '{}' is not defined\n".format(method)) sys.stderr.write("[EVAL ] Evaluation method '{}' is not defined\n".format(method))
return [0] return [0]
if tf_session == None:
with tf.Session() as session: outcomes = [ (method, do_eval(method,
session.run(tf.global_variables_initializer())
self.restore_model(session)
outcomes = [ (method, do_eval(session,
method,
episode_count,
trained_eps = trained_eps))
for method
in self.config['eval_methods'] ]
return outcomes
else:
outcomes = [ (method, do_eval(tf_session,
method,
episode_count, episode_count,
trained_eps = trained_eps)) trained_eps = trained_eps))
for method for method
in self.config['eval_methods'] ] in self.config['eval_methods'] ]
return outcomes return outcomes
def play_against_network(self):
"""
Allows you to play against a supplied model.
:return:
"""
self.restore_model()
human_player = Player(-1)
cur_player = 1
player = 1
board = Board.initial_state
i = 0
while Board.outcome(board) is None:
print(Board.pretty(board))
roll = (random.randrange(1, 7), random.randrange(1, 7))
print("Bot rolled:", roll)
board, _ = self.make_move(board, roll, player)
print(Board.pretty(board))
roll = (random.randrange(1, 7), random.randrange(1, 7))
print("You rolled:", roll)
board = human_player.make_human_move(board, roll)
print("DONE "*10)
print(Board.pretty(board))
def train_model(self, episodes=1000, save_step_size=100, trained_eps=0): def train_model(self, episodes=1000, save_step_size=100, trained_eps=0):
with tf.Session() as sess: """
difference_in_vals = 0 Train a model to by self-learning.
writer = tf.summary.FileWriter("/tmp/log/tf", sess.graph) :param episodes:
:param save_step_size:
sess.run(tf.global_variables_initializer()) :param trained_eps:
self.restore_model(sess) :return:
"""
variables_names = [v.name for v in tf.trainable_variables()]
values = sess.run(variables_names)
for k, v in zip(variables_names, values):
print("Variable: ", k)
print("Shape: ", v.shape)
print(v)
self.restore_model()
average_diffs = 0
start_time = time.time() start_time = time.time()
def print_time_estimate(eps_completed): def print_time_estimate(eps_completed):
@ -530,27 +477,30 @@ class Network:
for episode in range(1, episodes + 1): for episode in range(1, episodes + 1):
sys.stderr.write("[TRAIN] Episode {}".format(episode + trained_eps)) sys.stderr.write("[TRAIN] Episode {}".format(episode + trained_eps))
# TODO decide which player should be here
player = 1 # player = 1
player = random.choice([-1,1])
prev_board = Board.initial_state prev_board = Board.initial_state
i = 0 i = 0
difference_in_values = 0
while Board.outcome(prev_board) is None: while Board.outcome(prev_board) is None:
i += 1 i += 1
self.global_step += 1
cur_board, cur_board_value = self.make_move(sess, cur_board, cur_board_value = self.make_move(prev_board,
prev_board,
(random.randrange(1, 7), random.randrange(1, 7)), (random.randrange(1, 7), random.randrange(1, 7)),
player) player)
difference_in_vals += abs((cur_board_value - self.eval_state(sess, self.board_trans_func(prev_board, player)))) difference_in_values += abs((cur_board_value - self.eval_state(self.board_trans_func(prev_board, player))))
if self.config['verbose']:
print("Difference in values:", difference_in_vals)
print("Current board value :", cur_board_value)
print("Current board is :\n",cur_board)
# adjust weights # adjust weights
sess.run(self.training_op, if Board.outcome(cur_board) is None:
feed_dict={self.x: self.board_trans_func(prev_board, player), self.do_backprop(self.board_trans_func(prev_board, player), cur_board_value)
self.value_next: cur_board_value})
player *= -1 player *= -1
prev_board = cur_board prev_board = cur_board
@ -561,27 +511,25 @@ class Network:
final_score = np.array([Board.outcome(final_board)[1]]) final_score = np.array([Board.outcome(final_board)[1]])
scaled_final_score = ((final_score + 2) / 4) scaled_final_score = ((final_score + 2) / 4)
with tf.name_scope("final"): difference_in_values += abs(scaled_final_score-cur_board_value)
merged = tf.summary.merge_all()
global_step, summary, _ = sess.run([self.global_step, merged, self.training_op], average_diffs += (difference_in_values[0][0] / (i+1))
feed_dict={self.x: self.board_trans_func(prev_board, player),
self.value_next: scaled_final_score.reshape((1, 1))}) self.do_backprop(self.board_trans_func(prev_board, player), scaled_final_score.reshape(1,1))
writer.add_summary(summary, episode + trained_eps)
sys.stderr.write("\n") sys.stderr.write("\n")
if episode % min(save_step_size, episodes) == 0: if episode % min(save_step_size, episodes) == 0:
sys.stderr.write("[TRAIN] Saving model...\n") sys.stderr.write("[TRAIN] Saving model...\n")
self.save_model(sess, episode + trained_eps, global_step) self.save_model(episode + trained_eps)
if episode % 50 == 0: if episode % 50 == 0:
print_time_estimate(episode) print_time_estimate(episode)
sys.stderr.write("[TRAIN] Saving model for final episode...\n") sys.stderr.write("[TRAIN] Saving model for final episode...\n")
self.save_model(sess, episode+trained_eps, global_step)
writer.close() self.save_model(episode+trained_eps)
return outcomes, difference_in_vals[0][0] return outcomes, average_diffs/len(outcomes)

View File

@ -9,14 +9,12 @@ from board import Board
import main import main
config = main.config.copy() config = main.config.copy()
config['model'] = "tesauro_blah" config['model'] = "player_testings"
config['force_creation'] = True config['ply'] = "1"
config['board_representation'] = 'quack-fat'
network = Network(config, config['model']) network = Network(config, config['model'])
session = tf.Session() network.restore_model()
session.run(tf.global_variables_initializer())
network.restore_model(session)
initial_state = Board.initial_state initial_state = Board.initial_state
initial_state_1 = ( 0, initial_state_1 = ( 0,
@ -38,65 +36,32 @@ boards = {initial_state,
initial_state_2 } initial_state_2 }
def gen_21_rolls():
"""
Calculate all possible rolls, [[1,1], [1,2] ..]
:return: All possible rolls
"""
a = []
for x in range(1, 7):
for y in range(1, 7):
if not [x, y] in a and not [y, x] in a:
a.append([x, y])
return a
def calc_all_scores(board, player):
scores = []
trans_board = network.board_trans_func(board, player)
rolls = gen_21_rolls()
for roll in rolls:
score = network.eval_state(session, trans_board)
scores.append(score)
return scores
def calculate_possible_states(board):
possible_rolls = [(1, 1), (1, 2), (1, 3), (1, 4), (1, 5),
(1, 6), (2, 2), (2, 3), (2, 4), (2, 5),
(2, 6), (3, 3), (3, 4), (3, 5), (3, 6),
(4, 4), (4, 5), (4, 6), (5, 5), (5, 6),
(6, 6)]
for roll in possible_rolls:
meh = Board.calculate_legal_states(board, -1, roll)
print(len(meh))
return [Board.calculate_legal_states(board, -1, roll)
for roll
in possible_rolls]
#for board in boards:
# calculate_possible_states(board)
#print("-"*30) # board = network.board_trans_func(Board.initial_state, 1)
#print(network.calculate_1_ply(session, Board.initial_state, [2,4], 1))
#print(" "*10 + "network_test")
print(" "*20 + "Depth 1")
print(network.calc_n_ply(2, session, Board.initial_state, 1, [2, 4]))
#print(scores) # pair = network.make_move(Board.initial_state, [3,2], 1)
#print(" "*20 + "Depth 2") # print(pair[1])
#print(network.n_ply(2, session, boards, 1))
# #print(x.shape) # network.do_backprop(board, 0.9)
# with graph_lol.as_default():
# session_2 = tf.Session(graph = graph_lol)
# network_2 = Network(session_2)
# network_2.restore_model()
# print(network_2.eval_state(initial_state))
# print(network.eval_state(initial_state))
# network.print_variables()
# network.save_model(2)
# print(network.calculate_1_ply(Board.initial_state, [3,2], 1))
diff = [0, 0]
val = network.eval_state(Board.board_features_quack_fat(initial_state, 1))
print(val)
diff[0] += abs(-1-val)
diff[1] += 1
print(diff[1])

View File

@ -11,19 +11,59 @@ class Player:
def get_sym(self): def get_sym(self):
return self.sym return self.sym
def make_move(self, board, sym, roll): def calc_move_sets(self, from_board, roll, player):
print(Board.pretty(board)) board = from_board
legal_moves = Board.calculate_legal_states(board, sym, roll) sets = []
if roll[0] == roll[1]: total = 0
print("Example of move: 4/6,6/8,12/14,13/15") for r in roll:
# print("Value of r:",r)
sets.append([Board.calculate_legal_states(board, player, [r,0]), r])
total += r
sets.append([Board.calculate_legal_states(board, player, [total,0]), total])
print(sets)
return sets
def tmp_name(self, from_board, to_board, roll, player, total_moves, is_quad = False):
sets = self.calc_move_sets(from_board, roll, player)
return_board = from_board
for idx, board_set in enumerate(sets):
board_set[0] = list(board_set[0])
# print(to_board)
# print(board_set)
if to_board in board_set[0]:
total_moves -= board_set[1]
# if it's not the sum of the moves
if idx < (4 if is_quad else 2):
roll[idx] = 0
else: else:
print("Example of move: 4/6,13/17") roll = [0,0]
return_board = to_board
break
return total_moves, roll, return_board
user_moves = input("Enter your move: ").strip().split(",") def make_human_move(self, board, roll):
board = Board.apply_moves_to_board(board, sym, user_moves) is_quad = roll[0] == roll[1]
while board not in legal_moves: total_moves = roll[0] + roll[1] if not is_quad else int(roll[0])*4
print("Move is invalid, please enter a new move") if is_quad:
user_moves = input("Enter your move: ").strip().split(",") roll = [roll[0]]*4
board = Board.apply_moves_to_board(board, sym, user_moves)
while total_moves != 0:
while True:
print("You have {roll} left!".format(roll=total_moves))
move = input("Pick a move!\n")
pot_move = move.split("/")
if len(pot_move) == 2:
try:
pot_move[0] = int(pot_move[0])
pot_move[1] = int(pot_move[1])
move = pot_move
break;
except TypeError:
print("The correct syntax is: 2/5 for a move from index 2 to 5.")
to_board = Board.apply_moves_to_board(board, self.get_sym(), move)
total_moves, roll, board = self.tmp_name(board, to_board, list(roll), self.get_sym(), total_moves, is_quad)
print(Board.pretty(board))
return board return board

484
quack/quack.c Normal file
View File

@ -0,0 +1,484 @@
#include <Python.h>
static PyObject* QuackError;
typedef struct board_list board_list;
struct board_list {
int size;
PyObject* list[16];
};
/* Utility functions */
int sign(int x) {
return (x > 0) - (x < 0);
}
int abs(int x) {
if (x >= 0) return x;
else return -x;
}
/* end utility functions */
/* Helper functions */
int *idxs_with_checkers_of_player(int board[], int player) {
int idxs_tmp[26];
int ctr = 0;
for (int i = 0; i < 26; i++) {
if (board[i] * player >= 1) {
idxs_tmp[ctr] = i;
ctr++;
}
}
int *idxs = malloc((1 + ctr) * sizeof(int));
if (idxs == NULL) {
PyErr_NoMemory();
abort();
}
idxs[0] = ctr;
for (int i = 0; i < ctr; i++) {
idxs[i+1] = idxs_tmp[i];
}
return idxs;
}
int is_forward_move(int direction, int player) {
return direction == player;
}
int face_value_match_move_length(int delta, int face_value) {
return abs(delta) == face_value;
}
int bear_in_if_checker_on_bar(int board[], int player, int from_idx) {
int bar;
if (player == 1) bar = 0;
else bar = 25;
if (board[bar] != 0) return from_idx == bar;
else return 1;
}
int checkers_at_from_idx(int from_state, int player) {
return sign(from_state) == player;
}
int no_block_at_to_idx(int to_state, int player) {
if (-sign(to_state) == player) return abs(to_state) == 1;
else return 1;
}
int can_bear_off(int board[], int player, int from_idx, int to_idx) {
int* checker_idxs = idxs_with_checkers_of_player(board, player);
int moving_backmost_checker = 1;
int bearing_directly_off = 0;
int all_checkers_in_last_quadrant = 1;
/* Check if bearing directly off */
if (player == 1 && to_idx == 25) bearing_directly_off = 1;
else if (player == -1 && to_idx == 0) bearing_directly_off = 1;
for (int i = 1; i <= checker_idxs[0]; i++) {
if (player == 1 ) {
/* Check if all checkers are in last quardrant */
if (checker_idxs[i] < 19) {
all_checkers_in_last_quadrant = 0;
break;
}
/* Check if moving backmost checker */
if (checker_idxs[i] < from_idx) {
moving_backmost_checker = 0;
if (!bearing_directly_off) break;
}
} else {
if (checker_idxs[i] > 6) {
all_checkers_in_last_quadrant = 0;
break;
}
if (checker_idxs[i] > from_idx) {
moving_backmost_checker = 0;
if (!bearing_directly_off) break;
}
}
}
free(checker_idxs);
if (all_checkers_in_last_quadrant &&
(bearing_directly_off || moving_backmost_checker)) return 1;
else return 0;
}
/* end helper functions */
int is_move_valid(int board[], int player, int face_value, int move[]) {
int from_idx = move[0];
int to_idx = move[1];
int to_state;
int from_state = board[from_idx];
int delta = to_idx - from_idx;
int direction = sign(delta);
int bearing_off;
if (to_idx >= 1 && to_idx <= 24) {
to_state = board[to_idx];
bearing_off = 0;
} else {
to_state = 0;
bearing_off = 1;
}
return is_forward_move(direction, player)
&& face_value_match_move_length(delta, face_value)
&& bear_in_if_checker_on_bar(board, player, from_idx)
&& checkers_at_from_idx(from_state, player)
&& no_block_at_to_idx(to_state, player)
&& (!bearing_off || can_bear_off(board, player, from_idx, to_idx))
;
}
void do_move(int board[], int player, int move[]) {
int from_idx = move[0];
int to_idx = move[1];
/* "lift" checker */
board[from_idx] -= player;
/* Return early if bearing off */
if (to_idx < 1 || to_idx > 24) return;
/* Hit opponent checker */
if (board[to_idx] * player == -1) {
/* Move checker to bar */
if (player == 1) board[25] -= player;
else board[0] -= player;
board[to_idx] = 0;
}
/* Put down checker */
board[to_idx] += player;
return;
}
int* do_move_clone(int board[], int player, int move[]) {
int* new_board = malloc(sizeof(int) * 26);
if (new_board == NULL) {
PyErr_NoMemory();
abort();
}
for (int i = 0; i < 26; i++) {
new_board[i] = board[i];
}
do_move(new_board, player, move);
return new_board;
}
PyObject* store_board_to_pytuple(int board[], int size) {
PyObject* board_tuple = PyTuple_New(size);
for (int i = 0; i < size; i++) {
PyTuple_SetItem(board_tuple, i, Py_BuildValue("i", board[i]));
}
return board_tuple;
}
board_list calc_moves(int board[], int player, int face_value) {
int* checker_idxs = idxs_with_checkers_of_player(board, player);
board_list boards = { .size = 0 };
if (checker_idxs[0] == 0) {
boards.size = 1;
PyObject* board_tuple = store_board_to_pytuple(board, 26);
boards.list[0] = board_tuple;
free(checker_idxs);
return boards;
}
int ctr = 0;
for (int i = 1; i <= checker_idxs[0]; i++) {
int move[2];
move[0] = checker_idxs[i];
move[1] = checker_idxs[i] + (face_value * player);
if (is_move_valid(board, player, face_value, move)) {
int* new_board = do_move_clone(board, player, move);
PyObject* board_tuple = store_board_to_pytuple(new_board, 26);
// segfault maybe :'(
free(new_board);
boards.list[ctr] = board_tuple;
ctr++;
}
}
free(checker_idxs);
boards.size = ctr;
return boards;
}
int* board_features_quack_fat(int board[], int player) {
int* new_board = malloc(sizeof(int) * 30);
if (new_board == NULL) {
PyErr_NoMemory();
abort();
}
int pos_sum = 0;
int neg_sum = 0;
for (int i = 0; i < 26; i++) {
new_board[i] = board[i];
if (sign(new_board[i] > 0)) pos_sum += new_board[i];
else neg_sum += new_board[i];
}
new_board[26] = 15 - pos_sum;
new_board[27] = -15 - neg_sum;
if (player == 1) {
new_board[28] = 1;
new_board[29] = 0;
} else {
new_board[28] = 0;
new_board[29] = 1;
}
return new_board;
}
/* Meta definitions */
int extract_board(int *board, PyObject* board_tuple_obj) {
long numValuesBoard;
numValuesBoard = PyTuple_Size(board_tuple_obj);
if (numValuesBoard != 26) {
PyErr_SetString(QuackError, "Board tuple must have 26 entries");
return 1;
}
PyObject* board_val_obj;
// Iterate over tuple to retreive positions
for (int i=0; i<numValuesBoard; i++) {
board_val_obj = PyTuple_GetItem(board_tuple_obj, i);
board[i] = PyLong_AsLong(board_val_obj);
}
return 0;
}
int extract_move(int *move, PyObject* move_tuple_obj) {
long numValuesMove;
numValuesMove = PyTuple_Size(move_tuple_obj);
if (numValuesMove != 2) {
PyErr_SetString(QuackError, "Move tuple must have exactly 2 entries");
return 1;
}
PyObject* move_val_obj;
for (int i=0; i<numValuesMove; i++) {
move_val_obj = PyTuple_GetItem(move_tuple_obj, i);
move[i] = PyLong_AsLong(move_val_obj);
}
return 0;
}
static PyObject*
quack_is_move_valid(PyObject *self, PyObject *args) {
int board[26];
int player;
int face_value;
int move[2];
PyObject* board_tuple_obj;
PyObject* move_tuple_obj;
if (! PyArg_ParseTuple(args, "O!iiO!",
&PyTuple_Type, &board_tuple_obj,
&player,
&face_value,
&PyTuple_Type, &move_tuple_obj))
return NULL;
if (extract_board(board, board_tuple_obj)) return NULL;
if (extract_move(move, move_tuple_obj)) return NULL;
if (is_move_valid(board, player, face_value, move)) Py_RETURN_TRUE;
else Py_RETURN_FALSE;
}
static PyObject*
quack_idxs_with_checkers_of_player(PyObject *self, PyObject *args) {
int board[26];
int player;
int* idxs;
PyObject* board_tuple_obj;
if (! PyArg_ParseTuple(args, "O!i",
&PyTuple_Type, &board_tuple_obj,
&player))
return NULL;
if (extract_board(board, board_tuple_obj)) return NULL;
idxs = idxs_with_checkers_of_player(board, player);
PyObject* idxs_list = PyList_New(idxs[0]);
for (int i = 0; i < idxs[0]; i++) {
PyList_SetItem(idxs_list, i, Py_BuildValue("i", idxs[i+1]));
}
free(idxs);
PyObject *result = Py_BuildValue("O", idxs_list);
Py_DECREF(idxs_list);
return result;
}
static PyObject*
quack_do_move(PyObject *self, PyObject *args) {
int board[26];
int player;
int move[2];
PyObject* board_tuple_obj;
PyObject* move_tuple_obj;
if (! PyArg_ParseTuple(args, "O!iO!",
&PyTuple_Type, &board_tuple_obj,
&player,
&PyTuple_Type, &move_tuple_obj))
return NULL;
if (extract_board(board, board_tuple_obj)) return NULL;
if (extract_move(move, move_tuple_obj)) return NULL;
do_move(board, player, move);
PyObject* board_tuple = store_board_to_pytuple(board, 26);
// This is shaky
Py_DECREF(board);
PyObject *result = Py_BuildValue("O", board_tuple);
Py_DECREF(board_tuple);
return result;
}
static PyObject*
quack_calc_moves(PyObject *self, PyObject *args) {
int board[26];
int player;
int face_value;
PyObject* board_tuple_obj;
if (! PyArg_ParseTuple(args, "O!ii",
&PyTuple_Type, &board_tuple_obj,
&player,
&face_value))
return NULL;
if (extract_board(board, board_tuple_obj)) return NULL;
board_list boards = calc_moves(board, player, face_value);
PyObject* boards_list = PyList_New(boards.size);
for (int i = 0; i < boards.size; i++) {
if (PyList_SetItem(boards_list, i, boards.list[i])) {
printf("list insertion failed at index %i\n",i);
abort();
}
}
PyObject *result = Py_BuildValue("O", boards_list);
Py_DECREF(boards_list);
return result;
}
static PyObject*
quack_board_features_quack_fat(PyObject *self, PyObject *args) {
int board[26];
int player;
PyObject* board_tuple_obj;
if (! PyArg_ParseTuple(args, "O!i",
&PyTuple_Type, &board_tuple_obj,
&player))
return NULL;
if (extract_board(board, board_tuple_obj)) return NULL;
int* new_board = board_features_quack_fat(board, player);
PyObject* board_tuple = store_board_to_pytuple(new_board, 30);
free(new_board);
PyObject *result = Py_BuildValue("O", board_tuple);
Py_DECREF(board_tuple);
return result;
}
static PyMethodDef quack_methods[] = {
{
"is_move_valid", quack_is_move_valid, METH_VARARGS,
"Evaluates the validity of the proposed move."
},
{
"idxs_with_checkers_of_player", quack_idxs_with_checkers_of_player, METH_VARARGS,
"Returns a list of indexes with checkers of the specified player"
},
{
"do_move", quack_do_move, METH_VARARGS,
"Returns the board after doing the specified move"
},
{
"calc_moves", quack_calc_moves, METH_VARARGS,
"Calculates all legal moves from board with specified face value"
},
{
"board_features_quack_fat", quack_board_features_quack_fat, METH_VARARGS,
"Transforms a board to the quack-fat board representation"
},
{NULL, NULL, 0, NULL}
};
static struct PyModuleDef quack_definition = {
PyModuleDef_HEAD_INIT,
"quack",
"A Python module that provides various useful Backgammon-related functions.",
-1,
quack_methods
};
PyMODINIT_FUNC PyInit_quack(void) {
PyObject* module;
module = PyModule_Create(&quack_definition);
if (module == NULL)
return NULL;
QuackError = PyErr_NewException("quack.error", NULL, NULL);
Py_INCREF(QuackError);
PyModule_AddObject(module, "error", QuackError);
return module;
}

9
quack/setup.py Normal file
View File

@ -0,0 +1,9 @@
from distutils.core import setup, Extension
quack = Extension('quack',
sources = ['quack.c'])
setup (name = 'quack',
version = '0.1',
description = 'Quack Backgammon Tools',
ext_modules = [quack])

28
report_docs.txt Normal file
View File

@ -0,0 +1,28 @@
<christoffer> Alexander og jeg skrev noget af vores bachelorprojekt om til C her i fredags.
<christoffer> Man skal virkelig passe på sine hukommelsesallokeringer.
<Jmaa> Ja, helt klart.
<christoffer> Jeg fandt et memory leak, der lækkede 100 MiB hukommelse i sekundet.
<Jmaa> Hvilken del blev C-ificeret?
<Jmaa> Damned
<christoffer> Årsagen var at vi gav et objekt med tilbage til Python uden at dekrementere dets ref-count, så fortolkeren stadig troede at nogen havde brug for det.
<christoffer> Den del af spillogikken, der tjekker om træk er gyldige.
<christoffer> Det bliver kaldt ret mange tusinde gange pr. spil, så vi tænkte at der måske kunne være lidt optimering at hente i at omskrive det til C.
<Jmaa> Ok, så I har ikke selv brugt alloc og free. Det er alligevel noget.
<christoffer> Metoden selv blev 7 gange hurtigere!
<Jmaa> Wow!
<christoffer> Jo. Det endte vi også med at gøre.
<christoffer> Vi havde brug for lister af variabel størrelse. Det endte med en struct med et "size" felt og et "list" felt.
<Jmaa> Inkluderer det speedup, frem og tilbagen mellem C og python?
<christoffer> Det burde det gøre, ja!
<Jmaa> Gjorde det nogen stor effekt for hvor hurtigt I kan evaluere?
<christoffer> Jeg tror ikke at der er særligt meget "frem og tilbage"-stads. Det ser ud til at det kode man skriver bliver kastet ret direkte ind i fortolkeren.
<christoffer> Det gjorde en stor forskel for når vi laver 1-ply.
<christoffer> "ply" er hvor mange træk man kigger fremad.
<christoffer> Så kun at kigge på det umiddelbart næste træk er 0-ply, hvilket er det vi har gjort indtil nu
<christoffer> 1-ply var for langsomt. Det tog ca. 6-7 sekunder at evaluere ét træk.
<christoffer> Alexander lavede lidt omskrivninger, så TensorFlow udregnede det hurtigere og fik det ned på ca. 3-4 sekunder *pr. spil*.
<christoffer> Så skrev vi noget af det om til C, og nu er vi så på ca. 2 sekunder pr. spil med 1-ply, hvilket er ret vildt.
<christoffer> Det er så godt at Python-fortolkeren kan udvides med C!
<christoffer> caspervk, kan I optimere jeres bachelorprojekt med et par C-moduler?
<Jmaa> Det er en hel lille sektion til rapporten det der.
<christoffer> Yeah. Kopierer bare det her verbatim ind.

View File

@ -16,8 +16,8 @@ pyparsing==2.2.0
python-dateutil==2.7.2 python-dateutil==2.7.2
pytz==2018.3 pytz==2018.3
six==1.11.0 six==1.11.0
tensorboard==1.6.0 tensorboard==1.8.0
tensorflow==1.6.0 tensorflow==1.8.0
termcolor==1.1.0 termcolor==1.1.0
Werkzeug==0.14.1 Werkzeug==0.14.1
pygame==1.9.3 pygame==1.9.3

View File

@ -0,0 +1,94 @@
import time
import numpy as np
import tensorflow as tf
from board import Board
import tensorflow.contrib.eager as tfe
tf.enable_eager_execution()
xavier_init = tf.contrib.layers.xavier_initializer()
opt = tf.train.MomentumOptimizer(learning_rate=0.1, momentum=1)
output_size = 1
hidden_size = 40
input_size = 30
model = tf.keras.Sequential([
tf.keras.layers.Dense(40, activation="sigmoid", kernel_initializer=tf.constant_initializer(-2), input_shape=(1,input_size)),
tf.keras.layers.Dense(1, activation="sigmoid", kernel_initializer=tf.constant_initializer(0.2))
])
# tfe.Saver(model.variables).restore(tf.train.latest_checkpoint("./"))
input = [0, 2, 0, 0, 0, 0, -5, 0, -3, 0, 0, 0, 5, -5, 0, 0, 0, 3, 0, 5, 0, 0, 0, 0, -2, 0]
all_input = np.array([Board.board_features_quack_fat(input, 1) for _ in range(20)])
single_in = Board.board_features_quack_fat(input, 1)
start = time.time()
all_predictions = model.predict_on_batch(all_input)
learning_rate = 0.1
with tf.GradientTape() as tape:
value = model(single_in)
print("Before:", value)
grads = tape.gradient(value, model.variables)
print("/"*40,"model_variables","/"*40)
print(model.variables)
print("/"*40,"grads","/"*40)
print(grads)
difference_in_values = tf.reshape(tf.subtract(0.9, value, name='difference_in_values'), [])
for grad, train_var in zip(grads, model.variables):
backprop_calc = 0.1 * difference_in_values * grad
train_var.assign_add(backprop_calc)
value = model(single_in)
print("/"*40,"model_variables","/"*40)
print(model.variables)
print("After:", value)
# # grads = [0.1*val-np.random.uniform(-1,1)+grad for grad, trainable_var in zip(grads, model.variables)]
#
# # print(model.variables[0][0])
# weights_before = model.weights[0]
#
# start = time.time()
# #[trainable_var.assign_add(0.1*val-0.3+grad) for grad, trainable_var in zip(grads, model.variables)]
#
# start = time.time()
# for gradient, trainable_var in zip(grads, model.variables):
# backprop_calc = 0.1 * (0.9 - val) * gradient
# trainable_var.assign_add(backprop_calc)
#
# # opt.apply_gradients(zip(grads, model.variables))
#
# print(time.time() - start)
#
# print(model(single_in))
#
# vals = model.predict_on_batch(all_input)
# vals = list(vals)
# vals[3] = 4
# print(vals)
# print(np.argmax(np.array(vals)))
# tfe.Saver(model.variables).save("./tmp_ckpt")

View File

@ -0,0 +1,67 @@
import tensorflow as tf
import numpy as np
import time
class Everything:
def __init__(self):
self.output_size = 1
self.hidden_size = 40
self.input_size = 30
self.input = tf.placeholder('float', [1, self.input_size])
xavier_init = tf.contrib.layers.xavier_initializer()
W_1 = tf.get_variable("w_1", (self.input_size, self.hidden_size),
initializer=tf.constant_initializer(-2))
W_2 = tf.get_variable("w_2", (self.hidden_size, self.output_size),
initializer=tf.constant_initializer(0.2))
b_1 = tf.get_variable("b_1", (self.hidden_size,),
initializer=tf.zeros_initializer)
b_2 = tf.get_variable("b_2", (self.output_size,),
initializer=tf.zeros_initializer)
value_after_input = tf.sigmoid(tf.matmul(self.input, W_1) + b_1, name='hidden_layer')
self.value = tf.sigmoid(tf.matmul(value_after_input, W_2) + b_2, name='output_layer')
apply_gradients = []
trainable_vars = tf.trainable_variables()
gradients = tf.gradients(self.value, trainable_vars)
difference_in_values = tf.reshape(tf.subtract(0.9, self.value, name='difference_in_values'), [])
with tf.variable_scope('apply_gradients'):
for gradient, trainable_var in zip(gradients, trainable_vars):
backprop_calc = 0.1 * difference_in_values * gradient
grad_apply = trainable_var.assign_add(backprop_calc)
apply_gradients.append(grad_apply)
self.training_op = tf.group(*apply_gradients, name='training_op')
def eval(self):
input = np.array([0, 2, 0, 0, 0, 0, -5, 0, -3, 0, 0, 0, 5, -5, 0, 0, 0, 3, 0, 5, 0, 0, 0, 0, -2, 0, 0, 0, 1, 0])
start = time.time()
sess = tf.Session()
sess.run(tf.global_variables_initializer())
for i in range(20):
val = sess.run(self.value, feed_dict={self.input: input.reshape(1,-1)})
print(time.time() - start)
print(val)
sess.run(self.training_op, feed_dict={self.input: input.reshape(1,-1)})
val = sess.run(self.value, feed_dict={self.input: input.reshape(1, -1)})
print(val)
everything = Everything()
everything.eval()