This commit is contained in:
Alexander Munch-Hansen 2018-03-06 13:04:47 +01:00
parent 73f10158e5
commit d2b1f5d523
3 changed files with 19 additions and 25 deletions

11
bot.py
View File

@ -38,11 +38,10 @@ class Bot:
def make_move(self, board, sym, roll): def make_move(self, board, sym, roll):
# print(Board.pretty(board)) # print(Board.pretty(board))
legal_moves = Board.calculate_legal_states(board, sym, roll) legal_moves = Board.calculate_legal_states(board, sym, roll)
legal_list = list(legal_moves) moves_and_scores = [ (move, self.network.eval_state(np.array(move).reshape(1,26))) for move in legal_moves ]
lol = [] scores = [ x[1] for x in moves_and_scores ]
for move in legal_list: best_move = moves_and_scores[np.array(scores).argmax()][0]
lol.append(self.network.eval_state(np.array(move).reshape(1,26))) #print("Found the best state, being:", np.array(move_scores).argmax())
print("Found the best state, being:", np.array(lol).argmax()) return best_move
return [legal_list[np.array(lol).argmax()], max(lol)]
# return random.choice(list(legal_moves)) # return random.choice(list(legal_moves))

12
game.py
View File

@ -9,9 +9,9 @@ from cup import Cup
class Game: class Game:
def __init__(self): def __init__(self):
self.board = Board.initial_state self.board = Board.initial_state
self.p1 = Bot(1) self.p1 = Bot(1)
self.p2 = RestoreBot(-1) self.p2 = RestoreBot(1)
self.cup = Cup() self.cup = Cup()
def roll(self): def roll(self):
@ -22,11 +22,11 @@ class Game:
move_and_val = self.p1.make_move(self.board, self.p1.get_sym(), roll) move_and_val = self.p1.make_move(self.board, self.p1.get_sym(), roll)
self.board = move_and_val[0] self.board = move_and_val[0]
return move_and_val return move_and_val
def next_round(self): def next_round(self):
roll = self.roll() roll = self.roll()
print(roll) print(roll)
self.board = self.p2.make_move(self.board, self.p2.get_sym(),roll) self.board = Board.flip(self.p2.make_move(Board.flip(self.board), self.p2.get_sym(), roll))
return self.board return self.board
def board_state(self): def board_state(self):
@ -43,6 +43,9 @@ class Game:
self.p1.get_network().train(prev_board, cur_board_value) self.p1.get_network().train(prev_board, cur_board_value)
prev_board = cur_board prev_board = cur_board
self.next_round() self.next_round()
# print("-"*30)
# print(Board.pretty(self.board))
# print("/"*30)
print("Outcome:", Board.outcome(self.board)[1]) print("Outcome:", Board.outcome(self.board)[1])
outcomes.append(Board.outcome(self.board)[1]) outcomes.append(Board.outcome(self.board)[1])
final_score = np.array([ Board.outcome(self.board)[1] ]).reshape((1, 1)) final_score = np.array([ Board.outcome(self.board)[1] ]).reshape((1, 1))
@ -51,6 +54,7 @@ class Game:
if episode % 10 == 0: if episode % 10 == 0:
print("Saving...") print("Saving...")
self.p1.get_network().save_model() self.p1.get_network().save_model()
print(outcomes) print(outcomes)
def next_round_test(self): def next_round_test(self):

View File

@ -15,17 +15,7 @@ class RestoreBot:
with self.graph.as_default(): with self.graph.as_default():
self.session = tf.Session(graph = self.graph) self.session = tf.Session(graph = self.graph)
self.network = Network(self.session) self.network = Network(self.session)
self.network.restore_model() self.network.restore_model()
def roll(self):
print("{} rolled: ".format(self.sym))
roll = self.cup.roll()
print(roll)
return roll
def switch(self,cur):
return -1 if cur == 1 else 1
def get_sym(self): def get_sym(self):
return self.sym return self.sym
@ -33,9 +23,10 @@ class RestoreBot:
def make_move(self, board, sym, roll): def make_move(self, board, sym, roll):
# print(Board.pretty(board)) # print(Board.pretty(board))
legal_moves = Board.calculate_legal_states(board, sym, roll) legal_moves = Board.calculate_legal_states(board, sym, roll)
legal_list = list(legal_moves) moves_and_scores = [ (move, self.network.eval_state(np.array(move).reshape(1,26))) for move in legal_moves ]
move_scores = [ self.network.eval_state(np.array(move).reshape(1,26)) for move in legal_list ] scores = [ x[1] for x in moves_and_scores ]
print("Found the best state, being:", np.array(move_scores).argmax()) best_move = moves_and_scores[np.array(scores).argmax()][0]
return legal_list[np.array(move_scores).argmax()] #print("Found the best state, being:", np.array(move_scores).argmax())
return best_move
# return random.choice(list(legal_moves)) # return random.choice(list(legal_moves))