Now only using one bot again. Also changed learning rate to 0.1

This commit is contained in:
Alexander Munch-Hansen 2018-03-07 14:44:17 +01:00
parent 11d25603cf
commit bae1e73692
3 changed files with 11 additions and 6 deletions

4
bot.py
View File

@ -27,6 +27,10 @@ class Bot:
def switch(self,cur): def switch(self,cur):
return -1 if cur == 1 else 1 return -1 if cur == 1 else 1
def restore_model(self):
with self.graph.as_default():
self.network.restore_model()
def get_session(self): def get_session(self):
return self.session return self.session

11
game.py
View File

@ -11,7 +11,7 @@ class Game:
self.board = Board.initial_state self.board = Board.initial_state
self.p1 = Bot(1) self.p1 = Bot(1)
self.p2 = RestoreBot(1) self.p2 = Bot(1)
self.cup = Cup() self.cup = Cup()
def roll(self): def roll(self):
@ -26,14 +26,14 @@ class Game:
def next_round(self): def next_round(self):
roll = self.roll() roll = self.roll()
#print(roll) #print(roll)
self.board = Board.flip(self.p2.make_move(Board.flip(self.board), self.p2.get_sym(), roll)) self.board = Board.flip(self.p2.make_move(Board.flip(self.board), self.p2.get_sym(), roll)[0])
return self.board return self.board
def board_state(self): def board_state(self):
return self.board return self.board
def train_model(self): def train_model(self):
episodes = 8000 episodes = 100
outcomes = [] outcomes = []
for episode in range(episodes): for episode in range(episodes):
self.board = Board.initial_state self.board = Board.initial_state
@ -57,10 +57,11 @@ class Game:
final_score = np.array([ Board.outcome(self.board)[1] ]).reshape((1, 1)) final_score = np.array([ Board.outcome(self.board)[1] ]).reshape((1, 1))
self.p1.get_network().train(prev_board, final_score) self.p1.get_network().train(prev_board, final_score)
print("trained episode {}".format(episode)) print("trained episode {}".format(episode))
if episode % 100 == 0: if episode % 10 == 0:
print("Saving...") print("Saving...")
self.p1.get_network().save_model() self.p1.get_network().save_model()
self.p2.restore_model() self.p2.restore_model()
print(sum(outcomes))
print(outcomes) print(outcomes)
print(sum(outcomes)) print(sum(outcomes))
@ -95,7 +96,7 @@ class Game:
roll = self.roll() roll = self.roll()
print("{} rolled: {}".format(self.p2.get_sym(), roll)) print("{} rolled: {}".format(self.p2.get_sym(), roll))
self.board = self.p2.make_move(self.board, self.p2.get_sym(), roll) self.board = self.p2.make_move(self.board, self.p2.get_sym(), roll)[0]
if Board.outcome(self.board)[1] > 0: if Board.outcome(self.board)[1] > 0:

View File

@ -10,7 +10,7 @@ class Config():
input_size = 26 input_size = 26
output_size = 1 output_size = 1
# Can't remember the best learning_rate, look this up # Can't remember the best learning_rate, look this up
learning_rate = 0.3 learning_rate = 0.1
checkpoint_path = "/tmp/" checkpoint_path = "/tmp/"