flips
This commit is contained in:
parent
73f10158e5
commit
d2b1f5d523
11
bot.py
11
bot.py
|
@ -38,11 +38,10 @@ class Bot:
|
||||||
def make_move(self, board, sym, roll):
|
def make_move(self, board, sym, roll):
|
||||||
# print(Board.pretty(board))
|
# print(Board.pretty(board))
|
||||||
legal_moves = Board.calculate_legal_states(board, sym, roll)
|
legal_moves = Board.calculate_legal_states(board, sym, roll)
|
||||||
legal_list = list(legal_moves)
|
moves_and_scores = [ (move, self.network.eval_state(np.array(move).reshape(1,26))) for move in legal_moves ]
|
||||||
lol = []
|
scores = [ x[1] for x in moves_and_scores ]
|
||||||
for move in legal_list:
|
best_move = moves_and_scores[np.array(scores).argmax()][0]
|
||||||
lol.append(self.network.eval_state(np.array(move).reshape(1,26)))
|
#print("Found the best state, being:", np.array(move_scores).argmax())
|
||||||
print("Found the best state, being:", np.array(lol).argmax())
|
return best_move
|
||||||
return [legal_list[np.array(lol).argmax()], max(lol)]
|
|
||||||
|
|
||||||
# return random.choice(list(legal_moves))
|
# return random.choice(list(legal_moves))
|
||||||
|
|
8
game.py
8
game.py
|
@ -11,7 +11,7 @@ class Game:
|
||||||
self.board = Board.initial_state
|
self.board = Board.initial_state
|
||||||
|
|
||||||
self.p1 = Bot(1)
|
self.p1 = Bot(1)
|
||||||
self.p2 = RestoreBot(-1)
|
self.p2 = RestoreBot(1)
|
||||||
self.cup = Cup()
|
self.cup = Cup()
|
||||||
|
|
||||||
def roll(self):
|
def roll(self):
|
||||||
|
@ -26,7 +26,7 @@ class Game:
|
||||||
def next_round(self):
|
def next_round(self):
|
||||||
roll = self.roll()
|
roll = self.roll()
|
||||||
print(roll)
|
print(roll)
|
||||||
self.board = self.p2.make_move(self.board, self.p2.get_sym(),roll)
|
self.board = Board.flip(self.p2.make_move(Board.flip(self.board), self.p2.get_sym(), roll))
|
||||||
return self.board
|
return self.board
|
||||||
|
|
||||||
def board_state(self):
|
def board_state(self):
|
||||||
|
@ -43,6 +43,9 @@ class Game:
|
||||||
self.p1.get_network().train(prev_board, cur_board_value)
|
self.p1.get_network().train(prev_board, cur_board_value)
|
||||||
prev_board = cur_board
|
prev_board = cur_board
|
||||||
self.next_round()
|
self.next_round()
|
||||||
|
# print("-"*30)
|
||||||
|
# print(Board.pretty(self.board))
|
||||||
|
# print("/"*30)
|
||||||
print("Outcome:", Board.outcome(self.board)[1])
|
print("Outcome:", Board.outcome(self.board)[1])
|
||||||
outcomes.append(Board.outcome(self.board)[1])
|
outcomes.append(Board.outcome(self.board)[1])
|
||||||
final_score = np.array([ Board.outcome(self.board)[1] ]).reshape((1, 1))
|
final_score = np.array([ Board.outcome(self.board)[1] ]).reshape((1, 1))
|
||||||
|
@ -51,6 +54,7 @@ class Game:
|
||||||
if episode % 10 == 0:
|
if episode % 10 == 0:
|
||||||
print("Saving...")
|
print("Saving...")
|
||||||
self.p1.get_network().save_model()
|
self.p1.get_network().save_model()
|
||||||
|
|
||||||
print(outcomes)
|
print(outcomes)
|
||||||
|
|
||||||
def next_round_test(self):
|
def next_round_test(self):
|
||||||
|
|
|
@ -17,25 +17,16 @@ class RestoreBot:
|
||||||
self.network = Network(self.session)
|
self.network = Network(self.session)
|
||||||
self.network.restore_model()
|
self.network.restore_model()
|
||||||
|
|
||||||
def roll(self):
|
|
||||||
print("{} rolled: ".format(self.sym))
|
|
||||||
roll = self.cup.roll()
|
|
||||||
print(roll)
|
|
||||||
return roll
|
|
||||||
|
|
||||||
|
|
||||||
def switch(self,cur):
|
|
||||||
return -1 if cur == 1 else 1
|
|
||||||
|
|
||||||
def get_sym(self):
|
def get_sym(self):
|
||||||
return self.sym
|
return self.sym
|
||||||
|
|
||||||
def make_move(self, board, sym, roll):
|
def make_move(self, board, sym, roll):
|
||||||
# print(Board.pretty(board))
|
# print(Board.pretty(board))
|
||||||
legal_moves = Board.calculate_legal_states(board, sym, roll)
|
legal_moves = Board.calculate_legal_states(board, sym, roll)
|
||||||
legal_list = list(legal_moves)
|
moves_and_scores = [ (move, self.network.eval_state(np.array(move).reshape(1,26))) for move in legal_moves ]
|
||||||
move_scores = [ self.network.eval_state(np.array(move).reshape(1,26)) for move in legal_list ]
|
scores = [ x[1] for x in moves_and_scores ]
|
||||||
print("Found the best state, being:", np.array(move_scores).argmax())
|
best_move = moves_and_scores[np.array(scores).argmax()][0]
|
||||||
return legal_list[np.array(move_scores).argmax()]
|
#print("Found the best state, being:", np.array(move_scores).argmax())
|
||||||
|
return best_move
|
||||||
|
|
||||||
# return random.choice(list(legal_moves))
|
# return random.choice(list(legal_moves))
|
||||||
|
|
Loading…
Reference in New Issue
Block a user