spring cleaning 1

This commit is contained in:
Christoffer Müller Madsen 2018-03-14 14:02:19 +01:00
parent fcc373c0d8
commit 5caae5b935
5 changed files with 36 additions and 53 deletions

37
bot.py
View File

@ -1,15 +1,12 @@
from cup import Cup from cup import Cup
import tensorflow as tf
from network import Network from network import Network
import numpy as np
from board import Board from board import Board
import subprocess
import tensorflow as tf
import numpy as np
import random import random
import sys
import pubeval
class Bot: class Bot:
def __init__(self, sym, config = None): def __init__(self, sym, config = None):
self.config = config self.config = config
self.cup = Cup() self.cup = Cup()
@ -20,17 +17,6 @@ class Bot:
self.network = Network(self.session, config) self.network = Network(self.session, config)
self.network.restore_model() self.network.restore_model()
def roll(self):
print("{} rolled: ".format(self.sym))
roll = self.cup.roll()
# print(roll)
return roll
def switch(self,cur):
return -1 if cur == 1 else 1
def restore_model(self): def restore_model(self):
with self.graph.as_default(): with self.graph.as_default():
self.network.restore_model() self.network.restore_model()
@ -44,22 +30,6 @@ class Bot:
def get_network(self): def get_network(self):
return self.network return self.network
def make_random_move(self, board, sym, roll):
legal_moves = Board.calculate_legal_states(board, sym, roll)
return random.choice(list(legal_moves))
# TODO: Test this, the score results should be deterministic
def make_pubeval_move(self, board, sym, roll):
legal_moves = Board.calculate_legal_states(tuple(board), sym, roll)
moves_and_scores = [(board, pubeval.eval(False, Board.board_features_to_pubeval(board, sym))) for board in legal_moves]
scores = [ x[1] for x in moves_and_scores ]
best_move_pair = moves_and_scores[np.array(scores).argmax()]
return best_move_pair
def make_move(self, board, sym, roll): def make_move(self, board, sym, roll):
# print(Board.pretty(board)) # print(Board.pretty(board))
legal_moves = Board.calculate_legal_states(board, sym, roll) legal_moves = Board.calculate_legal_states(board, sym, roll)
@ -69,4 +39,3 @@ class Bot:
#print("Found the best state, being:", np.array(move_scores).argmax()) #print("Found the best state, being:", np.array(move_scores).argmax())
return best_move_pair return best_move_pair

11
cup.py
View File

@ -1,10 +1,5 @@
from dice import Dice # on Christoffer's kill list
class Cup: class Cup:
def __init__(self):
self.dice_1 = Dice
self.dice_2 = Dice
def roll(self): def roll(self):
return [self.dice_1.roll(), self.dice_2.roll()] return ( random.randrange(1,7),
random.randrange(1,7) )

View File

@ -1,5 +0,0 @@
import random
class Dice:
def roll():
return random.randrange(1,7)

21
eval.py Normal file
View File

@ -0,0 +1,21 @@
from board import Board
import numpy as np
import pubeval
class Eval:
@staticmethod
def make_random_move(board, sym, roll):
legal_moves = Board.calculate_legal_states(board, sym, roll)
return random.choice(list(legal_moves))
# TODO: Test this, the score results should be deterministic
@staticmethod
def make_pubeval_move(board, sym, roll):
legal_moves = Board.calculate_legal_states(tuple(board), sym, roll)
moves_and_scores = [(board, pubeval.eval(False, Board.board_features_to_pubeval(board, sym))) for board in legal_moves]
scores = [ x[1] for x in moves_and_scores ]
best_move_pair = moves_and_scores[np.array(scores).argmax()]
return best_move_pair

15
game.py
View File

@ -3,6 +3,7 @@ from player import Player
from bot import Bot from bot import Bot
from restore_bot import RestoreBot from restore_bot import RestoreBot
from cup import Cup from cup import Cup
from eval import Eval
import numpy as np import numpy as np
import sys import sys
@ -16,6 +17,7 @@ class Game:
self.p1 = None self.p1 = None
self.p2 = None self.p2 = None
# TODO remove this
self.cup = Cup() self.cup = Cup()
def set_up_bots(self): def set_up_bots(self):
@ -25,7 +27,7 @@ class Game:
def roll(self): def roll(self):
return self.cup.roll() return self.cup.roll()
def roll_and_find_best_for_bot(self): def best_move_and_score(self):
roll = self.roll() roll = self.roll()
move_and_val = self.p1.make_move(self.board, self.p1.get_sym(), roll) move_and_val = self.p1.make_move(self.board, self.p1.get_sym(), roll)
self.board = move_and_val[0] self.board = move_and_val[0]
@ -82,6 +84,7 @@ class Game:
def train_model(self, episodes=1000, save_step_size = 100, trained_eps = 0): def train_model(self, episodes=1000, save_step_size = 100, trained_eps = 0):
start_time = time.time() start_time = time.time()
def print_time_estimate(eps_completed): def print_time_estimate(eps_completed):
cur_time = time.time() cur_time = time.time()
time_diff = cur_time - start_time time_diff = cur_time - start_time
@ -98,14 +101,14 @@ class Game:
sys.stderr.write("[TRAIN] Episode {}".format(episode + trained_eps)) sys.stderr.write("[TRAIN] Episode {}".format(episode + trained_eps))
self.board = Board.initial_state self.board = Board.initial_state
prev_board, prev_board_value = self.roll_and_find_best_for_bot() prev_board, prev_board_value = self.best_move_and_score()
# find the best move here, make this move, then change turn as the # find the best move here, make this move, then change turn as the
# first thing inside of the while loop and then call # first thing inside of the while loop and then call
# roll_and_find_best_for_bot to get V_t+1 # best_move_and_score to get V_t+1
while Board.outcome(self.board) is None: while Board.outcome(self.board) is None:
self.next_round() self.next_round()
cur_board, cur_board_value = self.roll_and_find_best_for_bot() cur_board, cur_board_value = self.best_move_and_score()
self.p1.get_network().train(prev_board, cur_board_value) self.p1.get_network().train(prev_board, cur_board_value)
prev_board = cur_board prev_board = cur_board
@ -166,7 +169,7 @@ class Game:
roll = self.roll() roll = self.roll()
self.board = (self.p1.make_move(self.board, self.p1.get_sym(), roll))[0] self.board = (self.p1.make_move(self.board, self.p1.get_sym(), roll))[0]
roll = self.roll() roll = self.roll()
self.board = Board.flip(self.p2.make_random_move(Board.flip(self.board), self.p2.get_sym(), roll)) self.board = Board.flip(Eval.make_random_move(Board.flip(self.board), self.p2.get_sym(), roll))
sys.stderr.write("\t outcome {}".format(Board.outcome(self.board)[1])) sys.stderr.write("\t outcome {}".format(Board.outcome(self.board)[1]))
outcomes.append(Board.outcome(self.board)[1]) outcomes.append(Board.outcome(self.board)[1])
sys.stderr.write("\n") sys.stderr.write("\n")
@ -184,7 +187,7 @@ class Game:
roll = self.roll() roll = self.roll()
self.board = (self.p1.make_move(self.board, self.p1.get_sym(), roll))[0] self.board = (self.p1.make_move(self.board, self.p1.get_sym(), roll))[0]
roll = self.roll() roll = self.roll()
self.board = Board.flip(self.p2.make_pubeval_move(self.board, self.p2.get_sym(), roll)[0][0:26]) self.board = Board.flip(Eval.make_pubeval_move(self.board, self.p2.get_sym(), roll)[0][0:26])
sys.stderr.write("\t outcome {}".format(Board.outcome(self.board)[1])) sys.stderr.write("\t outcome {}".format(Board.outcome(self.board)[1]))
outcomes.append(Board.outcome(self.board)[1]) outcomes.append(Board.outcome(self.board)[1])
sys.stderr.write("\n") sys.stderr.write("\n")