Now only using one bot again. Also changed learning rate to 0.1
This commit is contained in:
parent
11d25603cf
commit
bae1e73692
4
bot.py
4
bot.py
|
@ -27,6 +27,10 @@ class Bot:
|
||||||
def switch(self,cur):
|
def switch(self,cur):
|
||||||
return -1 if cur == 1 else 1
|
return -1 if cur == 1 else 1
|
||||||
|
|
||||||
|
def restore_model(self):
|
||||||
|
with self.graph.as_default():
|
||||||
|
self.network.restore_model()
|
||||||
|
|
||||||
def get_session(self):
|
def get_session(self):
|
||||||
return self.session
|
return self.session
|
||||||
|
|
||||||
|
|
11
game.py
11
game.py
|
@ -11,7 +11,7 @@ class Game:
|
||||||
self.board = Board.initial_state
|
self.board = Board.initial_state
|
||||||
|
|
||||||
self.p1 = Bot(1)
|
self.p1 = Bot(1)
|
||||||
self.p2 = RestoreBot(1)
|
self.p2 = Bot(1)
|
||||||
self.cup = Cup()
|
self.cup = Cup()
|
||||||
|
|
||||||
def roll(self):
|
def roll(self):
|
||||||
|
@ -26,14 +26,14 @@ class Game:
|
||||||
def next_round(self):
|
def next_round(self):
|
||||||
roll = self.roll()
|
roll = self.roll()
|
||||||
#print(roll)
|
#print(roll)
|
||||||
self.board = Board.flip(self.p2.make_move(Board.flip(self.board), self.p2.get_sym(), roll))
|
self.board = Board.flip(self.p2.make_move(Board.flip(self.board), self.p2.get_sym(), roll)[0])
|
||||||
return self.board
|
return self.board
|
||||||
|
|
||||||
def board_state(self):
|
def board_state(self):
|
||||||
return self.board
|
return self.board
|
||||||
|
|
||||||
def train_model(self):
|
def train_model(self):
|
||||||
episodes = 8000
|
episodes = 100
|
||||||
outcomes = []
|
outcomes = []
|
||||||
for episode in range(episodes):
|
for episode in range(episodes):
|
||||||
self.board = Board.initial_state
|
self.board = Board.initial_state
|
||||||
|
@ -57,10 +57,11 @@ class Game:
|
||||||
final_score = np.array([ Board.outcome(self.board)[1] ]).reshape((1, 1))
|
final_score = np.array([ Board.outcome(self.board)[1] ]).reshape((1, 1))
|
||||||
self.p1.get_network().train(prev_board, final_score)
|
self.p1.get_network().train(prev_board, final_score)
|
||||||
print("trained episode {}".format(episode))
|
print("trained episode {}".format(episode))
|
||||||
if episode % 100 == 0:
|
if episode % 10 == 0:
|
||||||
print("Saving...")
|
print("Saving...")
|
||||||
self.p1.get_network().save_model()
|
self.p1.get_network().save_model()
|
||||||
self.p2.restore_model()
|
self.p2.restore_model()
|
||||||
|
print(sum(outcomes))
|
||||||
|
|
||||||
print(outcomes)
|
print(outcomes)
|
||||||
print(sum(outcomes))
|
print(sum(outcomes))
|
||||||
|
@ -95,7 +96,7 @@ class Game:
|
||||||
|
|
||||||
roll = self.roll()
|
roll = self.roll()
|
||||||
print("{} rolled: {}".format(self.p2.get_sym(), roll))
|
print("{} rolled: {}".format(self.p2.get_sym(), roll))
|
||||||
self.board = self.p2.make_move(self.board, self.p2.get_sym(), roll)
|
self.board = self.p2.make_move(self.board, self.p2.get_sym(), roll)[0]
|
||||||
|
|
||||||
|
|
||||||
if Board.outcome(self.board)[1] > 0:
|
if Board.outcome(self.board)[1] > 0:
|
||||||
|
|
|
@ -10,7 +10,7 @@ class Config():
|
||||||
input_size = 26
|
input_size = 26
|
||||||
output_size = 1
|
output_size = 1
|
||||||
# Can't remember the best learning_rate, look this up
|
# Can't remember the best learning_rate, look this up
|
||||||
learning_rate = 0.3
|
learning_rate = 0.1
|
||||||
checkpoint_path = "/tmp/"
|
checkpoint_path = "/tmp/"
|
||||||
|
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue
Block a user