spring cleaning 1

This commit is contained in:
Christoffer Müller Madsen 2018-03-14 14:02:19 +01:00
parent fcc373c0d8
commit 5caae5b935
5 changed files with 36 additions and 53 deletions

37
bot.py
View File

@ -1,15 +1,12 @@
from cup import Cup
import tensorflow as tf
from network import Network
import numpy as np
from board import Board
import subprocess
import tensorflow as tf
import numpy as np
import random
import sys
import pubeval
class Bot:
def __init__(self, sym, config = None):
self.config = config
self.cup = Cup()
@ -19,17 +16,6 @@ class Bot:
self.session = tf.Session()
self.network = Network(self.session, config)
self.network.restore_model()
def roll(self):
print("{} rolled: ".format(self.sym))
roll = self.cup.roll()
# print(roll)
return roll
def switch(self,cur):
return -1 if cur == 1 else 1
def restore_model(self):
with self.graph.as_default():
@ -44,22 +30,6 @@ class Bot:
def get_network(self):
return self.network
def make_random_move(self, board, sym, roll):
legal_moves = Board.calculate_legal_states(board, sym, roll)
return random.choice(list(legal_moves))
# TODO: Test this, the score results should be deterministic
def make_pubeval_move(self, board, sym, roll):
legal_moves = Board.calculate_legal_states(tuple(board), sym, roll)
moves_and_scores = [(board, pubeval.eval(False, Board.board_features_to_pubeval(board, sym))) for board in legal_moves]
scores = [ x[1] for x in moves_and_scores ]
best_move_pair = moves_and_scores[np.array(scores).argmax()]
return best_move_pair
def make_move(self, board, sym, roll):
# print(Board.pretty(board))
legal_moves = Board.calculate_legal_states(board, sym, roll)
@ -69,4 +39,3 @@ class Bot:
#print("Found the best state, being:", np.array(move_scores).argmax())
return best_move_pair

11
cup.py
View File

@ -1,10 +1,5 @@
from dice import Dice
# on Christoffer's kill list
class Cup:
def __init__(self):
self.dice_1 = Dice
self.dice_2 = Dice
def roll(self):
return [self.dice_1.roll(), self.dice_2.roll()]
return ( random.randrange(1,7),
random.randrange(1,7) )

View File

@ -1,5 +0,0 @@
import random
class Dice:
def roll():
return random.randrange(1,7)

21
eval.py Normal file
View File

@ -0,0 +1,21 @@
from board import Board
import numpy as np
import pubeval
class Eval:
@staticmethod
def make_random_move(board, sym, roll):
legal_moves = Board.calculate_legal_states(board, sym, roll)
return random.choice(list(legal_moves))
# TODO: Test this, the score results should be deterministic
@staticmethod
def make_pubeval_move(board, sym, roll):
legal_moves = Board.calculate_legal_states(tuple(board), sym, roll)
moves_and_scores = [(board, pubeval.eval(False, Board.board_features_to_pubeval(board, sym))) for board in legal_moves]
scores = [ x[1] for x in moves_and_scores ]
best_move_pair = moves_and_scores[np.array(scores).argmax()]
return best_move_pair

15
game.py
View File

@ -3,6 +3,7 @@ from player import Player
from bot import Bot
from restore_bot import RestoreBot
from cup import Cup
from eval import Eval
import numpy as np
import sys
@ -16,6 +17,7 @@ class Game:
self.p1 = None
self.p2 = None
# TODO remove this
self.cup = Cup()
def set_up_bots(self):
@ -25,7 +27,7 @@ class Game:
def roll(self):
return self.cup.roll()
def roll_and_find_best_for_bot(self):
def best_move_and_score(self):
roll = self.roll()
move_and_val = self.p1.make_move(self.board, self.p1.get_sym(), roll)
self.board = move_and_val[0]
@ -82,6 +84,7 @@ class Game:
def train_model(self, episodes=1000, save_step_size = 100, trained_eps = 0):
start_time = time.time()
def print_time_estimate(eps_completed):
cur_time = time.time()
time_diff = cur_time - start_time
@ -98,14 +101,14 @@ class Game:
sys.stderr.write("[TRAIN] Episode {}".format(episode + trained_eps))
self.board = Board.initial_state
prev_board, prev_board_value = self.roll_and_find_best_for_bot()
prev_board, prev_board_value = self.best_move_and_score()
# find the best move here, make this move, then change turn as the
# first thing inside of the while loop and then call
# roll_and_find_best_for_bot to get V_t+1
# best_move_and_score to get V_t+1
while Board.outcome(self.board) is None:
self.next_round()
cur_board, cur_board_value = self.roll_and_find_best_for_bot()
cur_board, cur_board_value = self.best_move_and_score()
self.p1.get_network().train(prev_board, cur_board_value)
prev_board = cur_board
@ -166,7 +169,7 @@ class Game:
roll = self.roll()
self.board = (self.p1.make_move(self.board, self.p1.get_sym(), roll))[0]
roll = self.roll()
self.board = Board.flip(self.p2.make_random_move(Board.flip(self.board), self.p2.get_sym(), roll))
self.board = Board.flip(Eval.make_random_move(Board.flip(self.board), self.p2.get_sym(), roll))
sys.stderr.write("\t outcome {}".format(Board.outcome(self.board)[1]))
outcomes.append(Board.outcome(self.board)[1])
sys.stderr.write("\n")
@ -184,7 +187,7 @@ class Game:
roll = self.roll()
self.board = (self.p1.make_move(self.board, self.p1.get_sym(), roll))[0]
roll = self.roll()
self.board = Board.flip(self.p2.make_pubeval_move(self.board, self.p2.get_sym(), roll)[0][0:26])
self.board = Board.flip(Eval.make_pubeval_move(self.board, self.p2.get_sym(), roll)[0][0:26])
sys.stderr.write("\t outcome {}".format(Board.outcome(self.board)[1]))
outcomes.append(Board.outcome(self.board)[1])
sys.stderr.write("\n")