Compare commits
2 Commits
master
...
Tesaurofie
Author | SHA1 | Date | |
---|---|---|---|
20788292a4 | |||
85ec8d8e4e |
32
board.py
32
board.py
|
@ -35,7 +35,39 @@ class Board:
|
||||||
board.append(-15 - sum(negatives))
|
board.append(-15 - sum(negatives))
|
||||||
return tuple(board)
|
return tuple(board)
|
||||||
|
|
||||||
|
# The original tesauro also takes in the player, so [1,0] for one of them and [0,1] for the other
|
||||||
|
# Not sure if this should be included
|
||||||
|
@staticmethod
|
||||||
|
def map_to_tesauro(board):
|
||||||
|
features = []
|
||||||
|
for i in range(1,25):
|
||||||
|
idx = list(board)[i]
|
||||||
|
place = [0]*8
|
||||||
|
if (idx != 0):
|
||||||
|
if idx > 0:
|
||||||
|
for i in range(min(int(idx),3)):
|
||||||
|
place[i]=1.
|
||||||
|
if idx>3:
|
||||||
|
place[3]+=(idx-3)/2
|
||||||
|
else:
|
||||||
|
for i in range(min(abs(int(idx)),3)):
|
||||||
|
place[i+4]=1.
|
||||||
|
if idx>3:
|
||||||
|
place[3+4]+=(idx-3)/2
|
||||||
|
features+=place
|
||||||
|
|
||||||
|
nega_hits = list(board)[0]/2
|
||||||
|
posi_hits = list(board)[25]/2
|
||||||
|
positives = [x if x > 0 else 0 for x in board]
|
||||||
|
negatives = [x if x < 0 else 0 for x in board]
|
||||||
|
posi_home = ((15 - sum(positives))/15)
|
||||||
|
nega_home = ((-15 - sum(negatives))/15)
|
||||||
|
features.append(nega_hits)
|
||||||
|
features.append(posi_hits)
|
||||||
|
features.append(posi_home)
|
||||||
|
features.append(nega_home)
|
||||||
|
# print(features)
|
||||||
|
return features
|
||||||
|
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
|
|
2
eval.py
2
eval.py
|
@ -15,6 +15,8 @@ class Eval:
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def make_pubeval_move(board, sym, roll):
|
def make_pubeval_move(board, sym, roll):
|
||||||
legal_moves = Board.calculate_legal_states(board, sym, roll)
|
legal_moves = Board.calculate_legal_states(board, sym, roll)
|
||||||
|
# print("Board:", board)
|
||||||
|
# print("Length:",len(board))
|
||||||
moves_and_scores = [ ( board,
|
moves_and_scores = [ ( board,
|
||||||
pubeval.eval(False, Board.board_features_to_pubeval(board, sym)))
|
pubeval.eval(False, Board.board_features_to_pubeval(board, sym)))
|
||||||
for board
|
for board
|
||||||
|
|
33
network.py
33
network.py
|
@ -10,7 +10,7 @@ from eval import Eval
|
||||||
|
|
||||||
class Network:
|
class Network:
|
||||||
hidden_size = 40
|
hidden_size = 40
|
||||||
input_size = 26
|
input_size = 196
|
||||||
output_size = 1
|
output_size = 1
|
||||||
# Can't remember the best learning_rate, look this up
|
# Can't remember the best learning_rate, look this up
|
||||||
learning_rate = 0.1
|
learning_rate = 0.1
|
||||||
|
@ -43,16 +43,19 @@ class Network:
|
||||||
b_2 = tf.get_variable("b_2", (Network.output_size,),
|
b_2 = tf.get_variable("b_2", (Network.output_size,),
|
||||||
initializer=tf.zeros_initializer)
|
initializer=tf.zeros_initializer)
|
||||||
|
|
||||||
value_after_input = self.custom_tanh(tf.matmul(self.x, W_1) + b_1, name='hidden_layer')
|
value_after_input = tf.sigmoid(tf.matmul(self.x, W_1) + b_1, name='hidden_layer')
|
||||||
|
|
||||||
self.value = self.custom_tanh(tf.matmul(value_after_input, W_2) + b_2, name='output_layer')
|
|
||||||
|
|
||||||
|
|
||||||
|
self.value = tf.sigmoid(tf.matmul(value_after_input, W_2) + b_2, name='output_layer')
|
||||||
|
|
||||||
# tf.reduce_sum basically finds the sum of its input, so this gives the
|
# tf.reduce_sum basically finds the sum of its input, so this gives the
|
||||||
# difference between the two values, in case they should be lists, which
|
# difference between the two values, in case they should be lists, which
|
||||||
# they might be if our input changes
|
# they might be if our input changes
|
||||||
|
|
||||||
# TODO: Alexander thinks that self.value will be computed twice (instead of once)
|
# TODO: Alexander thinks that self.value will be computed twice (instead of once)
|
||||||
difference_in_values = tf.reduce_sum(self.value_next - self.value, name='difference')
|
difference_in_values = tf.reduce_sum(tf.subtract(self.value_next, self.value, name='difference'))
|
||||||
|
|
||||||
trainable_vars = tf.trainable_variables()
|
trainable_vars = tf.trainable_variables()
|
||||||
gradients = tf.gradients(self.value, trainable_vars)
|
gradients = tf.gradients(self.value, trainable_vars)
|
||||||
|
@ -140,7 +143,7 @@ class Network:
|
||||||
# Have a circular dependency, #fuck, need to rewrite something
|
# Have a circular dependency, #fuck, need to rewrite something
|
||||||
def adjust_weights(self, board, v_next):
|
def adjust_weights(self, board, v_next):
|
||||||
# print("lol")
|
# print("lol")
|
||||||
board = np.array(board).reshape((1,26))
|
board = np.array(board).reshape((1,-1))
|
||||||
self.session.run(self.training_op, feed_dict = { self.x: board,
|
self.session.run(self.training_op, feed_dict = { self.x: board,
|
||||||
self.value_next: v_next })
|
self.value_next: v_next })
|
||||||
|
|
||||||
|
@ -156,7 +159,7 @@ class Network:
|
||||||
def make_move(self, board, roll):
|
def make_move(self, board, roll):
|
||||||
# print(Board.pretty(board))
|
# print(Board.pretty(board))
|
||||||
legal_moves = Board.calculate_legal_states(board, 1, roll)
|
legal_moves = Board.calculate_legal_states(board, 1, roll)
|
||||||
moves_and_scores = [ (move, self.eval_state(np.array(move).reshape(1,26))) for move in legal_moves ]
|
moves_and_scores = [ (move, self.eval_state(np.array(Board.map_to_tesauro(move)).reshape(1,-1))) for move in legal_moves ]
|
||||||
scores = [ x[1] for x in moves_and_scores ]
|
scores = [ x[1] for x in moves_and_scores ]
|
||||||
best_score_index = np.array(scores).argmax()
|
best_score_index = np.array(scores).argmax()
|
||||||
best_move_pair = moves_and_scores[best_score_index]
|
best_move_pair = moves_and_scores[best_score_index]
|
||||||
|
@ -181,20 +184,31 @@ class Network:
|
||||||
outcomes = []
|
outcomes = []
|
||||||
for episode in range(1, episodes + 1):
|
for episode in range(1, episodes + 1):
|
||||||
sys.stderr.write("[TRAIN] Episode {}".format(episode + trained_eps))
|
sys.stderr.write("[TRAIN] Episode {}".format(episode + trained_eps))
|
||||||
|
# print("greerggeregr"*10000)
|
||||||
# TODO decide which player should be here
|
# TODO decide which player should be here
|
||||||
player = 1
|
player = 1
|
||||||
|
|
||||||
roll = (random.randrange(1,7), random.randrange(1,7))
|
roll = (random.randrange(1,7), random.randrange(1,7))
|
||||||
|
|
||||||
|
def tesaurofi(board):
|
||||||
|
return Board.map_to_tesauro(board)
|
||||||
|
|
||||||
prev_board, _ = self.make_move(Board.flip(Board.initial_state) if player == -1 else Board.initial_state, roll)
|
prev_board, _ = self.make_move(Board.flip(Board.initial_state) if player == -1 else Board.initial_state, roll)
|
||||||
|
|
||||||
if player == -1:
|
if player == -1:
|
||||||
prev_board = Board.flip(prev_board)
|
prev_board = Board.flip(prev_board)
|
||||||
|
|
||||||
|
# print("board:",prev_board)
|
||||||
|
# print(len(prev_board))
|
||||||
|
|
||||||
# find the best move here, make this move, then change turn as the
|
# find the best move here, make this move, then change turn as the
|
||||||
# first thing inside of the while loop and then call
|
# first thing inside of the while loop and then call
|
||||||
# best_move_and_score to get V_t+1
|
# best_move_and_score to get V_t+1
|
||||||
|
|
||||||
# i = 0
|
# i = 0
|
||||||
while Board.outcome(prev_board) is None:
|
while Board.outcome(prev_board) is None:
|
||||||
|
#print(prev_board)
|
||||||
|
|
||||||
# print("-"*30)
|
# print("-"*30)
|
||||||
# print(i)
|
# print(i)
|
||||||
# print(roll)
|
# print(roll)
|
||||||
|
@ -206,10 +220,11 @@ class Network:
|
||||||
roll = (random.randrange(1,7), random.randrange(1,7))
|
roll = (random.randrange(1,7), random.randrange(1,7))
|
||||||
|
|
||||||
cur_board, cur_board_value = self.make_move(Board.flip(prev_board) if player == -1 else prev_board, roll)
|
cur_board, cur_board_value = self.make_move(Board.flip(prev_board) if player == -1 else prev_board, roll)
|
||||||
|
#print("pls",cur_board_value)
|
||||||
if player == -1:
|
if player == -1:
|
||||||
cur_board = Board.flip(cur_board)
|
cur_board = Board.flip(cur_board)
|
||||||
|
|
||||||
self.adjust_weights(prev_board, cur_board_value)
|
self.adjust_weights(tesaurofi(prev_board), cur_board_value)
|
||||||
|
|
||||||
prev_board = cur_board
|
prev_board = cur_board
|
||||||
|
|
||||||
|
@ -217,7 +232,7 @@ class Network:
|
||||||
sys.stderr.write("\t outcome {}".format(Board.outcome(final_board)[1]))
|
sys.stderr.write("\t outcome {}".format(Board.outcome(final_board)[1]))
|
||||||
outcomes.append(Board.outcome(final_board)[1])
|
outcomes.append(Board.outcome(final_board)[1])
|
||||||
final_score = np.array([ Board.outcome(final_board)[1] ])
|
final_score = np.array([ Board.outcome(final_board)[1] ])
|
||||||
self.adjust_weights(prev_board, final_score.reshape((1, 1)))
|
self.adjust_weights(tesaurofi(prev_board), final_score.reshape((1, 1)))
|
||||||
|
|
||||||
sys.stderr.write("\n")
|
sys.stderr.write("\n")
|
||||||
|
|
||||||
|
@ -265,7 +280,7 @@ class Network:
|
||||||
board = Board.initial_state
|
board = Board.initial_state
|
||||||
while Board.outcome(board) is None:
|
while Board.outcome(board) is None:
|
||||||
roll = (random.randrange(1,7), random.randrange(1,7))
|
roll = (random.randrange(1,7), random.randrange(1,7))
|
||||||
board = (self.p1.make_move(board, self.p1.get_sym(), roll))[0]
|
board = (self.p1.make_move(Board.map_to_tesauro(board), self.p1.get_sym(), roll))[0]
|
||||||
roll = (random.randrange(1,7), random.randrange(1,7))
|
roll = (random.randrange(1,7), random.randrange(1,7))
|
||||||
board = Board.flip(Eval.make_random_move(Board.flip(board), 1, roll))
|
board = Board.flip(Eval.make_random_move(Board.flip(board), 1, roll))
|
||||||
sys.stderr.write("\t outcome {}".format(Board.outcome(board)[1]))
|
sys.stderr.write("\t outcome {}".format(Board.outcome(board)[1]))
|
||||||
|
|
Loading…
Reference in New Issue
Block a user