Able to eval

Added tesauro + sigmoid
2018-03-20 21:51:58 +01:00 · 2018-03-20 17:29:29 +01:00
25 changed files with 604 additions and 3036 deletions
--- a/.gitignore
+++ b/.gitignore
@ -169,6 +169,3 @@ venv.bak/
 README.*
 !README.org
 models/
 .DS_Store
 bench/
--- a/actual_board.py
+++ b/actual_board.py
@ -1,427 +0,0 @@
 # TODO: The bar is just for show at the moment. Home doesn't work either.
 # TODO: An issue with the bouncing back things. It appears to do the move and then
 #       it doesn't properly restore the buckets to where they should be.
 import random
 import pygame
 import threading
 from board import Board
 import numpy as np
 import time
 # --- constants --- (UPPER_CASE names)
 class Board_painter:
    def __init__(self):
        self.SCREEN_WIDTH = 1050
        self.SCREEN_HEIGHT = 400
        self.SPACING = 83.333
        #BLACK = (  0,   0,   0)
        #242	209	107
        self.SAND  = (242, 209, 107)
        self.GREEN_FILT = (0,102,0)
        self.WHITE = (255, 255, 255)
        self.RED   = (255,   0,   0)
        self.SALMON = (250,128,114)
        self.BLACK = (0,0,0)
        self.BROWN = (160,82,45)
        self.LIGHT_GREY = (220,220,220)
        self.num_pieces = 15
        self.FPS = 999
        cen = self.SPACING/2 - 11
        t = 5*self.SPACING - cen-22
        m = 7*self.SPACING+50 - cen-22
        self.STARTING_IDX_P1 = [[cen,0], [cen, 30], [cen, 60], [cen, 90], [cen,120], [self.SCREEN_WIDTH-cen-22, 0], [self.SCREEN_WIDTH-cen-22, 30], [t, 378],[t,348],[t,318],[m, 378], [m,348],[m,318],[m,288],[m,258]]
        self.STARTING_IDX_P2 = [[cen, 378], [cen, 348], [cen, 318], [cen, 288], [cen, 258], [self.SCREEN_WIDTH-cen-22, 378], [self.SCREEN_WIDTH-cen-22, 348], [t, 0], [t, 30], [t, 60], [m, 0], [m,30],[m,60],[m,90],[m,120]]
        pygame.init()
        self.screen = pygame.display.set_mode((self.SCREEN_WIDTH, self.SCREEN_HEIGHT))
        #screen_rect = screen.get_rect()
        pygame.display.set_caption("Backgammon")
        self.all_rects = {-1 : [], 1 : []}
        for p in [-1,1]:
            if p == -1:
                for idx in self.STARTING_IDX_P1:
                    self.all_rects[p] += [pygame.rect.Rect(idx[0],idx[1], 22, 22)]
            if p == 1:
                for idx in self.STARTING_IDX_P2:
                    self.all_rects[p] += [pygame.rect.Rect(idx[0],idx[1], 22, 22)]
        # for i in range(num_pieces):
        #     x = x+20
        #     all_rects[p] += [pygame.rect.Rect(x,y, 22, 22)]
        # x = 100
        # y += 100
        self.all_drag = {-1 : [], 1 : []}
        self.all_drag[-1] += [False]*self.num_pieces
        self.all_drag[1] += [False]*self.num_pieces
        self.all_off = {-1 : [], 1 : []}
        self.all_off[-1] += [[0,0]]*self.num_pieces
        self.all_off[1] += [[0,0]]*self.num_pieces
        self.is_true = False
        self.clock = pygame.time.Clock()
        self.buckets = [[0,0],[5,-1],[0,0],[0,0],[0,0],[3,1],[0,0],[5,1],[0,0],[0,0],[0,0],[0,0],[2,-1],[5,1],[0,0],[0,0],[0,0],[3,-1],[0,0],[5,-1],[0,0],[0,0],[0,0],[0,0],[2,1],[0,0]]
        self.running = True
        self.player = -1
        self.roll = [random.randrange(1, 7), random.randrange(1, 7)]
        print("initial_roll:", self.roll)
        self.from_board = None
        self.from_buckets = [x for x in self.buckets]
        self.from_locat = None
        self.total_moves = 0
    def switch_player(self):
        self.player *= -1
        print("CHANGED PLAYER!")
    def gen_buckets_from_board(self, board):
        meh = []
        for i in range(13,25):
            pin = board[i]
            # print(pin)
            meh.append([abs(pin), np.sign(pin)])
        for i in range(1,13):
            pin = board[i]
            meh.append([abs(pin), np.sign(pin)])
        return meh
    def gen_board_from_buckets(self, buckets):
        board = []
        board.append(buckets[0])
        for i in range(-2,-14,-1):
            board.append(buckets[i])
        for i in range(1,13):
            board.append(buckets[i])
        board.append(buckets[25])
        board = [x*y for x,y in board]
        return board
    def move_legal(self, from_board, buckets, roll):
        board = self.gen_board_from_buckets(buckets)
        legal_states = Board.calculate_legal_states(from_board, self.player, roll)
        # print(legal_states)
        if board in [list(state) for state in list(legal_states)]:
            return True
        return False
    def find_pin(self, pos):
        SPACING = self.SPACING
        x,y = pos
        if 500 < x < 550:
            if y > 225:
                pin = 0
                idx = 0
            else:
                pin = 25
                idx = 25
        else:
            x -= 50 if x > 550 else 0
            if y < 175:
                pin = (13 + int(x / SPACING))
                idx = 1+int(x / SPACING)
            elif y > 225:
                pin = (12 - int(x / SPACING))
                idx = 13+ int(x / SPACING)
        return pin, idx
    # Find the y position based on the chosen pin
    def calc_pos(self, buckets, chosen):
        amount = buckets[chosen][0]
        print(chosen)
        SPACING = self.SPACING
        if chosen == 0:
            x = 525
            y = 350
        elif chosen == 25:
            x = 525
            y = 50
        else:
            if chosen > 12:
                # print("Amount at pin:", amount)
                y = 378 - (30 * amount)
                chosen -= 12
                x = (SPACING*(chosen-1))+(SPACING/2)
                x += 50 if x > 500 else 0
            else:
                y = 30 * amount
                x = (SPACING*(chosen-1))+(SPACING/2)
                x += 50 if x > 500 else 0
        return x,y
    def calc_move_sets(self, from_board, roll, player):
        # board = self.gen_board_from_buckets(buckets)
        board = from_board
        sets = []
        total = 0
        print("board!:",board)
        for r in roll:
            # print("Value of r:",r)
            sets.append([Board.calculate_legal_states(board, player, [r,0]), r])
            total += r
        sets.append([Board.calculate_legal_states(board, player, [total,0]), total])
        return sets
    def calc_turn(self):
        player = self.player
        if self.total_moves == 0:
            return player * -1
        return player
    def handle_move(self, from_board, buckets, roll, player):
        board = self.gen_board_from_buckets(buckets)
        # print("Cur board:",board)
        sets = self.calc_move_sets(from_board, roll, player)
        for idx, board_set in enumerate(sets):
            board_set[0] = list(board_set[0])
            # print("My board_set:",board_set)
            if board in [list(c) for c in board_set[0]]:
                self.total_moves -= board_set[1]
                if idx < 2:
                    # print("Roll object:",self.roll)
                    self.roll[idx] = 0
                else:
                    self.roll = [0,0]
                break
        print("Total moves left:",self.total_moves)
    # while running:
    def paint_board(self):
        # - events -
        if self.player != self.calc_turn():
            self.switch_player()
            self.roll = [random.randrange(1, 7), random.randrange(1, 7)]
            self.total_moves = self.roll[0] + self.roll[1]
            print("Player:",self.player,"rolled:",self.roll)
        player = self.player
        rectangles_drag = self.all_drag[player]
        rectangles = self.all_rects[player]
        offsets = self.all_off[player]
        buckets = self.buckets
        for event in pygame.event.get():
            if event.type == pygame.QUIT:
                running = False
            elif event.type == pygame.MOUSEBUTTONDOWN:
                if event.button == 1:
                    meh = [rect.collidepoint(event.pos) for rect in rectangles]
                    if any(meh):
                        is_true = np.where(meh)[0][0]
                        if any(meh):
                            # print("GETTING CALLED")
                            rectangles_drag[is_true] = True
                            mouse_x, mouse_y = event.pos
                            # Need this to be a deepcopy :<
                            self.from_buckets = []
                            for x in buckets:
                                tmp = []
                                for y in x:
                                    tmp.append(y)
                                self.from_buckets.append(tmp)
                            self.from_board = [x for x in self.gen_board_from_buckets(buckets)]
                            # print("From board in mousedown:", from_board)
                            pin, idx = self.find_pin(event.pos)
                            from_pin = pin
                            buckets[idx][0] -= 1
                            if buckets[idx][0] == 0:
                                buckets[idx][1] = 0
                            print("Location for mouse_down:", self.from_board)
                            offsets[is_true][0] = rectangles[is_true].x - mouse_x
                            offsets[is_true][1] = rectangles[is_true].y - mouse_y
                            self.from_locat = [rectangles[is_true].x, rectangles[is_true].y]
            elif event.type == pygame.MOUSEBUTTONUP:
                if event.button == 1:
                    meh = [rect.collidepoint(event.pos) for rect in rectangles]
                    if any(meh):
                        is_true = np.where(meh)[0][0]
                        pin, idx = self.find_pin(event.pos)
                        x, y = self.calc_pos(buckets,idx)
                        # Need to take care of bar stuff :<
                        if (buckets[idx][1] == player*-1) and buckets[idx][0] == 1:
                            to_idx = 0 if buckets[idx][1] == 1 else 25
                            enemy_rects = self.all_rects[player*-1]
                            # Have some check if we're looking for either rects in the bottom or top,
                            # instead of having both here
                            neg_tester = [rect.collidepoint(x,y-30) for rect in enemy_rects]
                            pos_tester = [rect.collidepoint(x,y+30) for rect in enemy_rects]
                            print("Neg tester:",neg_tester)
                            print("Pos tester:",pos_tester)
                            if any(neg_tester):
                                enemy = np.where(neg_tester)[0][0]
                            elif any(pos_tester):
                                enemy = np.where(pos_tester)[0][0]
                            buckets[to_idx][0] += 1
                            buckets[to_idx][1] = buckets[idx][1]
                            bar_x, bar_y = self.calc_pos(buckets, to_idx)
                            enemy_rects[enemy].x = bar_x
                            enemy_rects[enemy].y = bar_y
                            buckets[idx][0] = 0
                            print("In here"*20)
                        pin, idx = self.find_pin(event.pos)
                        x, y = self.calc_pos(buckets,idx)
                        buckets[idx][0] += 1
                        buckets[idx][1] = player
                        # print(self.from_board)
                        # print("To  :",self.gen_board_from_buckets(buckets))
                        # print(move_legal(from_board, buckets, [1,2]))
                        # if self.move_legal(self.from_board, buckets, self.roll):
                        pot_board = self.gen_board_from_buckets(buckets)
                        sets = self.calc_move_sets(self.from_board, self.roll, player)
                        print("potential board:",pot_board)
                        # print("board:",pot_board)
                        truth_values = []
                        for t in sets:
                            b = [list(c) for c in list(t)[0]]
                            if pot_board in list(b):
                                truth_values.append(pot_board in list(b))
                        print("Truth values:",truth_values)
                        if any(truth_values):
                            self.handle_move(self.from_board, buckets, self.roll, player)
                            # print("From:",self.gen_board_from_buckets(self.from_buckets))
                            # print("WOHO!"*10)
                            rectangles_drag[is_true] = False
                            rectangles[is_true].x = x
                            rectangles[is_true].y = y
                        else:
                            # print("From:",self.gen_board_from_buckets(self.from_buckets))
                            self.buckets = []
                            for x in self.from_buckets:
                                tmp = []
                                for y in x:
                                    tmp.append(y)
                                self.buckets.append(tmp)
                            rectangles_drag[is_true] = False
                            rectangles[is_true].x = self.from_locat[0]
                            rectangles[is_true].y = self.from_locat[1]
                        # print("End :",self.gen_board_from_buckets(buckets))
            elif event.type == pygame.MOUSEMOTION:
                if any(rectangles_drag):
                    is_true = np.where(rectangles_drag)[0][0]
                    mouse_x, mouse_y = event.pos
                    rectangles[is_true].x = mouse_x + offsets[is_true][0]
                    rectangles[is_true].y = mouse_y + offsets[is_true][1]
            self.screen.fill(self.GREEN_FILT)
            # pygame.draw.polygon(screen, (RED), [[0, 0], [50,0],[25,100]], 2)
            color = self.LIGHT_GREY
            x = 0
            y = 150
            # for _ in range(2):
            for i in range(12):
                if x < 500 and x+self.SPACING > 500:
                    x = 550
                color = self.SALMON if color == self.LIGHT_GREY else self.LIGHT_GREY
                pygame.draw.polygon(self.screen, color, [[x, 0], [x+self.SPACING, 0], [(2*x+self.SPACING)/2, y]])
                x += self.SPACING
                # y += 50
            x = 0
            y = 250
            # for _ in range(2):
            color = self.SALMON if color == self.LIGHT_GREY else self.LIGHT_GREY
            for i in range(12):
                if x < 500 and x+self.SPACING > 500:
                    x = 550
                color = self.SALMON if color == self.LIGHT_GREY else self.LIGHT_GREY
                pygame.draw.polygon(self.screen, color, [[x, 400], [x+self.SPACING, 400], [(2*x+self.SPACING)/2, y]])
                x += self.SPACING
            # print(gen_board_from_buckets(buckets))
            pygame.draw.rect(self.screen, self.BROWN, pygame.rect.Rect((500, 0, 50, 400)))
            for p in [-1,1]:
                for rect in self.all_rects[p]:
                    pygame.draw.rect(self.screen, self.RED if p == -1 else self.BLACK, rect)
            pygame.display.flip()
            # - constant game speed / FPS -
            self.clock.tick(self.FPS)
    def test(self):
        while True:
            self.paint_board()
        pygame.quit()
 b = Board_painter()
 b.test()
--- a/app.py
+++ b/app.py
@ -1,141 +0,0 @@
 from flask import Flask, request, jsonify
 from flask_json import FlaskJSON, as_json_p
 from flask_cors import CORS
 from board import Board
 from eval import Eval
 import main
 import random
 from network import Network
 app = Flask(__name__)
 app.config['JSON_ADD_STATUS'] = False
 app.config['JSON_JSONP_OPTIONAL'] = False
 json = FlaskJSON(app)
 CORS(app)
 config = main.config.copy()
 config['model'] = "player_testings"
 config['ply'] = "0"
 config['board_representation'] = 'tesauro'
 network = Network(config, config['model'])
 network.restore_model()
 def calc_move_sets(from_board, roll, player):
    board = from_board
    sets = []
    total = 0
    for r in roll:
        # print("Value of r:", r)
        sets.append([Board.calculate_legal_states(board, player, [r, 0]), r])
        total += r
    sets.append([Board.calculate_legal_states(board, player, roll), total])
    return sets
 def tmp_name(from_board, to_board, roll, player, total_moves, is_quad=False):
    sets = calc_move_sets(from_board, roll, player)
    return_board = from_board
    print("To board:\n",to_board)
    print("All sets:\n",sets)
    for idx, board_set in enumerate(sets):
        board_set[0] = list(board_set[0])
        # print(to_board)
        # print(board_set)
        if to_board in board_set[0]:
            # print("To board:", to_board)
            # print(board_set[0])
            # print(board_set[1])
            total_moves -= board_set[1]
            # if it's not the sum of the moves
            if idx < (4 if is_quad else 2):
                roll[idx] = 0
            else:
                roll = [0, 0]
            return_board = to_board
            break
    # print("Return board!:\n",return_board)
    return total_moves, roll, return_board
 def calc_move_stuff(from_board, to_board, roll, player, total_roll, is_quad):
    total_moves, roll, board = tmp_name(from_board, to_board, list(roll), player, total_roll, is_quad)
    return board, total_moves, roll
@app.route('/get_board', methods=['GET'])
@as_json_p
 def get_board():
    return {'board':'0, 2, 0, 0, 0, 0, -5, 0, -3, 0, 0, 0, 5, -5, 0, 0, 0, 3, 0, 5, 0, 0, 0, 0, -2, 0'}
 def check_move(prev, curr):
    # TODO: Decide on player system and implement roll properly
    legal_states = Board.calculate_legal_states(tuple(prev), -1, [1,2])
    truth_list = [list(curr) == list(ele) for ele in legal_states]
    return any(truth_list)
@app.route('/bot_move', methods=['POST'])
 def bot_move():
    data = request.get_json(force=True)
    board = [int(x) for x in data['board'].split(',')]
    use_pubeval = bool(data['pubeval'])
    roll = (random.randrange(1, 7), random.randrange(1, 7))
    if use_pubeval:
        board, value = Eval.make_pubeval_move(tuple(board), 1, roll)
    else:
        board, _ = network.make_move(tuple(board), roll, 1)
    # print("Board!:",board)
    return ",".join([str(x) for x in list(board)])
@app.route('/post_board', methods=['POST'])
 def post_board():
    data = request.get_json(force=True)
    # TODO: Fix hardcoded player
    player = -1
    board = [int(x) for x in data['board'].split(',')]
    prev_board = [int(x) for x in data['prevBoard'].split(',')]
    print(data['roll'])
    roll = [int(x) for x in data['roll'].split(',')]
    print(roll)
    quad = data['quad'] == "true"
    # print(board)
    total_roll = int(data['totalRoll'])
    print("total roll is:", total_roll)
    return_board, total_moves, roll = calc_move_stuff(tuple(prev_board), tuple(board), tuple(roll), player, total_roll, quad)
    str_board = ",".join([str(x) for x in return_board])
    str_roll = ",".join([str(x) for x in roll])
    return_string = str_board + "#" + str(total_moves) + "#" + str_roll
    print(return_string)
    return return_string
 if __name__ == '__main__':
    app.run(host = '0.0.0.0', port=35270)
--- a/bin/0-ply-tests.rb
+++ b/bin/0-ply-tests.rb
@ -1,78 +0,0 @@
 def run_stuff(board_rep, model_name, ply)
  epi_count = 0
  system("python3 main.py --train --model #{model_name} --board-rep #{board_rep} --episodes 1 --ply #{ply}")
  while epi_count < 200000 do
    system("python3 main.py --eval --model #{model_name} --eval-methods dumbeval --episodes 250 --ply #{ply} --repeat-eval 3")
    system("python3 main.py --eval --model #{model_name} --eval-methods pubeval --episodes 250 --ply #{ply} --repeat-eval 3")
    system("python3 main.py --train --model #{model_name} --episodes 2000 --ply #{ply}")
    epi_count += 2000
  end
 end
 ### ///////////////////////////////////////////////////////////////
 # QUACK TESTINGS
 ### ///////////////////////////////////////////////////////////////
 board_rep = "quack"
 model_name = "quack_test_0_ply"
 ply = 0
 run_stuff(board_rep, model_name, ply)
 # board_rep = "quack"
 # model_name = "quack_test_1_ply"
 # ply = 1
 # run_stuff(board_rep, model_name, ply)
 ### ///////////////////////////////////////////////////////////////
 # QUACK-FAT TESTING
 ### ///////////////////////////////////////////////////////////////
 board_rep = "quack-fat"
 model_name = "quack-fat_test_0_ply"
 ply = 0
 run_stuff(board_rep, model_name, ply)
 # board_rep = "quack-fat"
 # model_name = "quack-fat_test_1_ply"
 # ply = 1
 # run_stuff(board_rep, model_name, ply)
 ### ///////////////////////////////////////////////////////////////
 # QUACK-NORM TESTING
 ### ///////////////////////////////////////////////////////////////
 board_rep = "quack-norm"
 model_name = "quack-norm_test_0_ply"
 ply = 0
 run_stuff(board_rep, model_name, ply)
 # board_rep = "quack-norm"
 # model_name = "quack-norm_test_1_ply"
 # ply = 1
 # run_stuff(board_rep, model_name, ply)
 ### ///////////////////////////////////////////////////////////////
 # TESAURO TESTING
 ### ///////////////////////////////////////////////////////////////
 board_rep = "tesauro"
 model_name = "tesauro_test_0_ply"
 ply = 0
 run_stuff(board_rep, model_name, ply)
 # board_rep = "tesauro"
 # model_name = "tesauro_test_1_ply"
 # ply = 1
 # run_stuff(board_rep, model_name, ply)
--- a/bin/train-evaluate-save
+++ b/bin/train-evaluate-save
@ -1,69 +0,0 @@
 #!/usr/bin/env ruby
 MODELS_DIR = 'models'
 def save(model_name)
  require 'date'
  model_path = File.join(MODELS_DIR, model_name)
  episode_count = (File.read File.join(model_path, 'episodes_trained')).to_i
  puts "Found model #{model_name} with episodes #{episode_count} trained!"
  file_name = "model-#{model_name}-#{episode_count}-#{Time.now.strftime('%Y%m%d-%H%M%S')}.tar.gz"
  save_path = File.join(MODELS_DIR, 'saves', file_name)
  puts "Saving to #{save_path}"
  system("tar", "-cvzf", save_path, "-C", MODELS_DIR, model_name)
 end
 def train(model, episodes)
  system("python3", "main.py", "--train", "--model", model, "--episodes", episodes.to_s)
 end
 def force_train(model, episodes)
  system("python3", "main.py", "--train", "--force-creation", "--model", model, "--episodes", episodes.to_s)
 end
 def evaluate(model, episodes, method)
  system("python3", "main.py", "--eval" , "--model", model, "--episodes", episodes.to_s, "--eval-methods", method)
 end
 model = ARGV[0]
 if model.nil? then raise "no model specified" end
 if not File.exists? File.join(MODELS_DIR, model) then
  force_train model, 10
  save model
  3.times do
    evaluate model, 250, "pubeval"
  end
  3.times do
    evaluate model, 250, "dumbeval"
  end
 end
 # while true do
 #   save model
 #   train model, 1000
 #   save model
 #   train model, 1000
 #   3.times do
 #     evaluate model, 250, "pubeval"
 #   end
 #   3.times do
 #     evaluate model, 250, "dumbeval"
 #   end
 # end
 while true do
  save model
  train model, 500
  5.times do
    evaluate model, 250, "pubeval"
  end
  5.times do
    evaluate model, 250, "dumbeval"
  end
 end
--- a/board.py
+++ b/board.py
@ -1,4 +1,3 @@
 import quack
 import numpy as np
 import itertools
@ -13,9 +12,15 @@ class Board:
    @staticmethod
    def idxs_with_checkers_of_player(board, player):
-        return quack.idxs_with_checkers_of_player(board, player)
+        idxs = []
        for idx, checker_count in enumerate(board):
            if checker_count * player >= 1:
                idxs.append(idx)
        return idxs
    # TODO: Write a test for this
    # TODO: Make sure that the bars fit, 0 represents the -1 player and 25 represents the 1 player
    # index 26 is player 1 home, index 27 is player -1 home
    @staticmethod
    def board_features_to_pubeval(board, player):
@ -26,161 +31,124 @@ class Board:
        board = list(board)
        positives = [x if x > 0 else 0 for x in board]
        negatives = [x if x < 0 else 0 for x in board]
-        board.append( 15 - sum(positives))
+        board.append(15 - sum(positives))
        board.append(-15 - sum(negatives))
        return tuple(board)
-    # quack
+    # The original tesauro also takes in the player, so [1,0] for one of them and [0,1] for the other
    # Not sure if this should be included
    @staticmethod
-    def board_features_quack(board, player):
+    def map_to_tesauro(board):
-        board = list(board)
+        features = []
-        board += ([1, 0] if np.sign(player) > 0 else [0, 1])
+        for i in range(1,25):
-        return np.array(board).reshape(1,28)
+            idx = list(board)[i]
            place = [0]*8
            if (idx != 0):
                if idx > 0:
                    for i in range(min(int(idx),3)):
                        place[i]=1.
                    if idx>3:
                        place[3]+=(idx-3)/2
                else:
                    for i in range(min(abs(int(idx)),3)):
                        place[i+4]=1.
                    if idx>3:
                        place[3+4]+=(idx-3)/2    
            features+=place
-    # quack-fat
+        nega_hits = list(board)[0]/2 
-    @staticmethod
+        posi_hits = list(board)[25]/2 
    def board_features_quack_fat(board, player):
        return np.array(quack.board_features_quack_fat(board,player)).reshape(1,30)
        # board = list(board)
        # positives = [x if x > 0 else 0 for x in board]
        # negatives = [x if x < 0 else 0 for x in board]
        # board.append( 15 - sum(positives))
        # board.append(-15 - sum(negatives))
        # board += ([1, 0] if np.sign(player) > 0 else [0, 1])
        # return np.array(board).reshape(1,30)
    # quack-fatter
    @staticmethod
    def board_features_quack_norm(board, player):
        board = list(board)
        positives = [x if x > 0 else 0 for x in board]
        negatives = [x if x < 0 else 0 for x in board]
-        board[0] = board[0] / 2
+        posi_home = ((15 - sum(positives))/15)
-        board[25] = board[25] / 2
+        nega_home = ((-15 - sum(negatives))/15)
-
+        features.append(nega_hits)
-        board = [board[x] if x == 0 or 25 else board[x] / 15 for x in range(0, 26)]
+        features.append(posi_hits)
-
+        features.append(posi_home)
-        board.append(15 - sum(positives))
+        features.append(nega_home)
-        board.append(-15 - sum(negatives))
+#        print(features)
-        board += ([1, 0] if np.sign(player) > 0 else [0, 1])
+        return features
        return np.array(board).reshape(1, 30)
    # tesauro
    @staticmethod
    def board_features_tesauro(board, cur_player):
        def ordinary_trans(val, player):
            abs_val = val * player
            if   abs_val <= 0: return (0,0,0,0)
            elif abs_val == 1: return (1,0,0,0)
            elif abs_val == 2: return (1,1,0,0)
            elif abs_val == 3: return (1,1,1,0)
            else:              return (1,1,1, (abs_val - 3) / 2)
        def bar_trans(board, player):
            if    player == 1: return (abs(board[0]/2),)
            elif player == -1: return (abs(board[25]/2),)
        # def ordinary_trans_board(board, player):
        #     return np.array(
        #         [ordinary_trans(x, player) for x in board[1:25]]
        #     ).flatten()
        board_rep = []
        for player in [1,-1]:
            for x in board[1:25]:
                board_rep += ordinary_trans(x, player)
            board_rep += bar_trans(board, player)
            board_rep += (15 - Board.num_of_checkers_for_player(board, player),)
        board_rep += ([1, 0] if cur_player == 1 else [0, 1])
        return np.array(board_rep).reshape(1, 198)
    @staticmethod
    def board_features_tesauro_fat(board, cur_player):
        def ordinary_trans(val, player):
            abs_val = val*player
            if abs_val <= 0:
                return (0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)
            elif abs_val == 1:
                return (1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)
            elif abs_val == 2:
                return (1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)
            elif abs_val == 3:
                return (1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)
            elif abs_val == 4:
                return (1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)
            elif abs_val == 5:
                return (1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)
            elif abs_val == 6:
                return (1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0)
            elif abs_val == 7:
                return (1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0)
            elif abs_val == 8:
                return (1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0)
            elif abs_val == 9:
                return (1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0)
            elif abs_val == 10:
                return (1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0)
            elif abs_val == 11:
                return (1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0)
            elif abs_val == 12:
                return (1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0)
            elif abs_val == 13:
                return (1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0)
            elif abs_val == 14:
                return (1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0)
            elif abs_val == 15:
                return (1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1)
        def bar_trans(board, player):
            if   player == 1: return (abs(board[0]/2),)
            elif player == -1: return (abs(board[25]/2),)
        board_rep = []
        for player in [1, -1]:
            for x in board[1:25]:
                board_rep += ordinary_trans(x, player)
            board_rep += bar_trans(board, player)
            board_rep += (15 - Board.num_of_checkers_for_player(board, player),)
        board_rep += ([1, 0] if cur_player == 1 else [0, 1])
        return np.array(board_rep).reshape(1, len(board_rep))
    @staticmethod
    def board_features_tesauro_wrong(board, cur_player):
        features = []
        for player in [-1,1]:
            sum = 0.0
            for board_range in range(1,25):
                pin = board[board_range]
                #print("PIIIN:",pin)
                feature = [0.0]*4
                if np.sign(pin) == np.sign(player):
                    sum += abs(pin)
                    for i in range(min(abs(pin), 3)):
                        feature[i] = 1
                        if (abs(pin) > 3):
                            feature[3] = (abs(pin)-3)/2
                features += feature
            #print("SUUUM:",sum)
            # Append the amount of men on the bar of the current player divided by 2
            features.append((board[0] if np.sign(player) < 0 else board[25]) / 2.0)
            # Calculate how many pieces there must be in the home state and divide it by 15
            features.append((15 - sum) / 15)
        features += ([1,0] if np.sign(cur_player) > 0 else [0,1])
        test = np.array(features)
        #print("TEST:",test)
        return test.reshape(1,198)
    @staticmethod
    def is_move_valid(board, player, face_value, move):
-        return quack.is_move_valid(board, player, face_value, move)
+        def sign(a):
            return (a > 0) - (a < 0)
        from_idx   = move[0]
        to_idx     = move[1]
        to_state   = None
        from_state = board[from_idx]
        delta      = to_idx - from_idx
        direction  = sign(delta)
        bearing_off = None
        # FIXME: Use get instead of array-like indexing
        if to_idx >= 1 and to_idx <= 24:
            to_state   = board[to_idx]
            bearing_off = False
        else:  # Bearing off
            to_state   = 0
            bearing_off = True
        # print("_"*20)
        # print("board:", board)
        # print("to_idx:", to_idx, "board[to_idx]:", board[to_idx], "to_state:", to_state)
        # print("+"*20)
        def is_forward_move():
            return direction == player
        def face_value_match_move_length():
            return abs(delta) == face_value
        def bear_in_if_checker_on_bar():
            if player == 1:
                bar = 0
            else:
                bar = 25
            bar_state = board[bar]
            if bar_state != 0:
                return from_idx == bar
            else:
                return True
        def checkers_at_from_idx():
            return sign(from_state) == player
        def no_block_at_to_idx():
            if -sign(to_state) == player:
                return abs(to_state) == 1
            else:
                return True
        def can_bear_off():
            checker_idxs = Board.idxs_with_checkers_of_player(board, player)
            def is_moving_backmost_checker():
                if player == 1:
                    return all([(idx >= from_idx) for idx in checker_idxs])
                else:
                    return all([(idx <= from_idx) for idx in checker_idxs])
            def all_checkers_in_last_quadrant():
                if player == 1:
                    return all([(idx >= 19) for idx in checker_idxs])
                else:
                    return all([(idx <= 6) for idx in checker_idxs])
            return all([ is_moving_backmost_checker(),
                         all_checkers_in_last_quadrant() ])
            # TODO: add switch here instead of wonky ternary in all        
        return all([ is_forward_move(),
                     face_value_match_move_length(),
                     bear_in_if_checker_on_bar(),
                     checkers_at_from_idx(),
                     no_block_at_to_idx(),
                     can_bear_off() if bearing_off else True ])
    @staticmethod
    def any_move_valid(board, player, roll):
@ -220,37 +188,40 @@ class Board:
    @staticmethod
-    def apply_moves_to_board(board, player, move):
+    def apply_moves_to_board(board, player, moves):
-        from_idx = move[0]
+        for move in moves:
-        to_idx = move[1]
+            from_idx, to_idx = move.split("/")
-        board = list(board)
+            board[int(from_idx)] -= int(player)
-        board[from_idx] -= player
+            board[int(to_idx)] += int(player)
-
+        return board
        if (to_idx < 1 or to_idx > 24):
            return
        if (board[to_idx] * player == -1):
            if (player == 1):
                board[25] -= player
            else:
                board[0] -= player
            board[to_idx] = 0
        board[to_idx] += player
        return tuple(board)
    @staticmethod
    def calculate_legal_states(board, player, roll):
        # Find all points with checkers on them belonging to the player
        # Iterate through each index and check if it's a possible move given the roll
        # TODO: make sure that it is not possible to do nothing on first part of
        #       turn and then do something with the second die
        def calc_moves(board, face_value):
-            if face_value == 0:
+            idxs_with_checkers = Board.idxs_with_checkers_of_player(board, player)
            if len(idxs_with_checkers) == 0:
                return [board]
-            return quack.calc_moves(board, player, face_value)
+            boards = [(Board.do_move(board,
                               player,
                               (idx, idx + (face_value * player)))
                       if Board.is_move_valid(board,
                                              player,
                                              face_value,
                                              (idx, idx + (face_value * player)))
                       else None)
                      for idx in idxs_with_checkers]
            board_list = list(filter(None, boards))  # Remove None-values
            # if len(board_list) == 0:
            #     return [board]
            return board_list
        # Problem with cal_moves: Method can return empty list (should always contain at least same board).
        #               *Update*: Seems to be fixed.
@ -263,18 +234,26 @@ class Board:
        if not Board.any_move_valid(board, player, roll):
            return { board }
        dice_permutations = list(itertools.permutations(roll)) if roll[0] != roll[1] else [[roll[0]]*4]
-        #print("Permuts:",dice_permutations)
+        
        # print("Dice permuts:",dice_permutations)
        for roll in dice_permutations:
            # Calculate boards resulting from first move
            #print("initial board: ", board)
            #print("roll:", roll)
            boards = calc_moves(board, roll[0])
            #print("boards after first die: ", boards)
            for die in roll[1:]:
                # Calculate boards resulting from second move
                nested_boards = [calc_moves(board, die) for board in boards]
                #print("nested boards: ", nested_boards)
                boards = [board for boards in nested_boards for board in boards]
                # What the fuck
                #for board in boards:
                #    print(board)
                #    print("type__:",type(board))
                # Add resulting unique boards to set of legal boards resulting from roll
                #print("printing boards from calculate_legal_states: ", boards)
@ -303,9 +282,9 @@ class Board:
        return """
  13  14  15  16  17  18               19  20  21  22  23  24
 +--------------------------------------------------------------------------+
-| {13}| {14}| {15}| {16}| {17}| {18}| bar -1: {25} | {19}| {20}| {21}| {22}| {23}| {24}| end  1: TODO|
+| {12}| {11}| {10}| {9}| {8}| {7}| bar -1: {0} | {6}| {5}| {4}| {3}| {2}| {1}| end -1: TODO|
 |---|---|---|---|---|---|------------|---|---|---|---|---|---|             |
-| {12}| {11}| {10}| {9}| {8}| {7}| bar  1: {0} | {6}| {5}| {4}| {3}| {2}| {1}| end -1: TODO|
+| {13}| {14}| {15}| {16}| {17}| {18}| bar  1: {25} | {19}| {20}| {21}| {22}| {23}| {24}| end  1: TODO|
 +--------------------------------------------------------------------------+
  12  11  10   9   8   7                6   5   4   3   2   1 
 """.format(*temp)
@ -313,8 +292,42 @@ class Board:
    @staticmethod
    def do_move(board, player, move):
        # Implies that move is valid; make sure to check move validity before calling do_move(...)
        return quack.do_move(board, player, move)
        def move_to_bar(board, to_idx):
            board = list(board)
            if player == 1:
                board[25] -= player
            else:
                board[0] -= player
            board[to_idx] = 0
            return board
        # TODO: Moving in from bar is handled by the representation
        # TODONE: Handle bearing off
        from_idx = move[0]
        #print("from_idx: ", from_idx)
        to_idx = move[1]
        #print("to_idx: ", to_idx)
        # pdb.set_trace()
        board = list(board) # Make mutable copy of board
        # 'Lift' checker
        board[from_idx] -= player
        # Handle bearing off
        if to_idx < 1 or to_idx > 24:
            return tuple(board)
        # Handle hitting checkers
        if board[to_idx] * player == -1:
            board = move_to_bar(board, to_idx)
        # Put down checker
        board[to_idx] += player
        return tuple(board)
    @staticmethod
    def flip(board):
--- a/bot.py
+++ b/bot.py
@ -1,8 +1,24 @@
 from cup import Cup
 from network import Network
 from board import Board
 import tensorflow as tf
 import numpy as np
 import random
 class Bot:
-    def __init__(self, sym):
+    def __init__(self, sym, config = None, name = "unnamed"):
        self.config = config
        self.cup = Cup()
        self.sym = sym
        self.graph = tf.Graph()
        self.network = Network(config, name)
        self.network.restore_model()
    def restore_model(self):
        with self.graph.as_default():
            self.network.restore_model()
    def get_session(self):
        return self.session
@ -10,60 +26,16 @@ class Bot:
    def get_sym(self):
        return self.sym
    def get_network(self):
        return self.network
-    def calc_move_sets(self, from_board, roll, player):
+    # TODO: DEPRECATE
-        board = from_board
+    def make_move(self, board, sym, roll):
-        sets = []
+        # print(Board.pretty(board))
-        total = 0
+        legal_moves = Board.calculate_legal_states(board, sym, roll)
-        print("board!:",board)
+        moves_and_scores = [ (move, self.network.eval_state(np.array(move).reshape(1,26))) for move in legal_moves ]
-        for r in roll:
+        scores = [ x[1] for x in moves_and_scores ]
-            # print("Value of r:",r)
+        best_move_pair = moves_and_scores[np.array(scores).argmax()]
-            sets.append([Board.calculate_legal_states(board, player, [r,0]), r])
+        #print("Found the best state, being:", np.array(move_scores).argmax())
-            total += r
+        return best_move_pair
        sets.append([Board.calculate_legal_states(board, player, [total,0]), total])
        return sets
    def handle_move(self, from_board, to_board, roll, player):
        # print("Cur board:",board)
        sets = self.calc_move_sets(from_board, roll, player)
        for idx, board_set in enumerate(sets):
            board_set[0] = list(board_set[0])
            # print("My board_set:",board_set)
            if to_board in [list(c) for c in board_set[0]]:
                self.total_moves -= board_set[1]
                if idx < 2:
                    # print("Roll object:",self.roll)
                    self.roll[idx] = 0
                else:
                    self.roll = [0,0]
                break
        print("Total moves left:",self.total_moves)
    def tmp_name(self, from_board, to_board, roll, player, total_moves):
        sets = self.calc_move_sets(from_board, roll, player)
        return_board = from_board
        for idx, board_set in enumerate(sets):
            board_set = list(board_set[0])
            if to_board in [list(board) for board in board_set]:
                total_moves -= board_set[1]
                # if it's not the sum of the moves
                if idx < 2:
                    roll[idx] = 0
                else:
                    roll = [0,0]
                return_board = to_board
                break
        return total_moves, roll, return_board
    def make_human_move(self, board, player, roll):
        total_moves = roll[0] + roll[1]
        previous_board = board
        while total_moves != 0:
            move = input("Pick a move!\n")
            to_board = Board.apply_moves_to_board(previous_board, player, move)
            total_moves, roll, board = self.tmp_name(board, to_board, roll, player, total_moves)
--- a/dumbeval/.gitignore
+++ b/dumbeval/.gitignore
@ -1 +0,0 @@
 build/
--- a/dumbeval/dumbeval.c
+++ b/dumbeval/dumbeval.c
@ -1,194 +0,0 @@
 #include <Python.h>
 static PyObject* DumbevalError;
 static float x[122];
 /* With apologies to Gerry Tesauro */
 /* Weights generated by weights.py */
 static const float wc[122] = {
 -1.91222,  1.45979,  0.40657, -1.39159,  3.64558, -0.45381, -0.03157,
  0.14539,  0.80232,  0.87558,  2.36202, -2.01887, -0.88918,  2.65871,
 -1.31587,  1.07476,  0.30491, -1.32892,  0.38018, -0.30714, -1.16178,
  0.71481, -1.01334, -0.44373,  0.51255, -0.17171, -0.88886,  0.02071,
 -0.53279, -0.22139, -1.02436,  0.17948,  0.95697,  0.49272,  0.31848,
 -0.58293,  0.14484,  0.22063,  1.0336 , -1.90554,  1.10291, -2.05589,
 -0.16964, -0.82442,  1.27217, -1.24968, -0.90372,  0.05546,  0.2535 ,
 -0.03533, -0.31773,  0.43704,  0.21699,  0.10519,  2.12775, -0.48196,
 -0.08445, -0.13156, -0.68362,  0.64765,  0.32537,  0.79493,  1.94577,
 -0.63827,  0.97057, -0.46039,  1.51801, -0.62955, -0.43632,  0.25876,
 -0.46623, -0.46963,  1.3532 , -0.07362, -1.53211,  0.69676, -0.92407,
  0.07153,  0.67173,  0.27661, -0.51579, -0.49019,  1.06603, -0.97673,
 -1.21231, -1.54966, -0.07795,  0.32697,  0.02873,  1.38703,  0.41725,
  0.78326, -0.7257 ,  0.54165,  1.38882,  0.27304,  1.0739 ,  0.74654,
  1.35561,  1.18697,  1.09146,  0.17552, -0.30773,  0.27812, -1.674  ,
 -0.31073, -0.40745,  0.51546, -1.10875,  2.0081 , -1.27931, -1.16321,
  0.95652,  0.7487 , -0.2347 ,  0.20324, -0.41417,  0.05929,  0.72632,
 -1.15223,  1.2745 , -0.15947 };
 static const float wr[122] = {
 0.13119, -0.13164, -1.2736 ,  1.06352, -1.34749, -1.03086, -0.27417,
 -0.27762,  0.79454, -1.12623,  2.1134 , -0.7003 ,  0.26056, -1.13518,
 -1.64548, -1.30828, -0.96589, -0.36258, -1.14323, -0.2006 , -1.00307,
  0.57739, -0.62693,  0.29721, -0.36996, -0.17462,  0.96704,  0.08902,
  1.4337 , -0.47107,  0.82156,  0.14988,  1.74034,  1.13313, -0.32083,
 -0.00048, -0.86622,  1.12808,  0.99875,  0.8049 , -0.16841, -0.42677,
 -1.9409 , -0.53565, -0.83708,  0.69603,  0.32079,  0.56942,  0.67965,
  1.49328, -1.65885,  0.96284,  0.63196, -0.27504,  0.39174,  0.71225,
 -0.3614 ,  0.88761,  1.12882,  0.77764,  1.02618, -0.20245, -0.39245,
 -1.56799,  1.04888, -1.20858, -0.24361, -1.85157, -0.16912,  0.50512,
 -2.93122,  0.70477, -0.93066,  1.74867,  0.23963, -0.00699, -1.27183,
 -0.30604,  1.71039,  0.82202, -1.36734, -1.08352, -1.25054,  0.49436,
 -1.5037 , -0.73143,  0.74189,  0.32365,  0.30539, -0.72169,  0.41088,
 -1.56632, -0.63526,  0.58779, -0.05653,  0.76713, -1.40898, -0.33683,
  1.86802,  0.59773,  1.28668, -0.65817,  2.46829, -0.09331,  2.9034 ,
  1.04809,  0.73222, -0.44372,  0.53044, -1.9274 , -1.57183, -1.14068,
  1.26036, -0.9296 ,  0.06662, -0.26572, -0.30862,  0.72915,  0.98977,
  0.63513, -1.43917, -0.12523 };
 void setx(int pos[])
 {
        /* sets input vector x[] given board position pos[] */
        extern float x[];
        int j, jm1, n;
        /* initialize */
        for(j=0;j<122;++j) x[j] = 0.0;
        /* first encode board locations 24-1 */
        for(j=1;j<=24;++j) {
            jm1 = j - 1;
            n = pos[25-j];
            if(n!=0) {
                if(n==-1) x[5*jm1+0] = 1.0;
                if(n==1) x[5*jm1+1] = 1.0;
                if(n>=2) x[5*jm1+2] = 1.0;
                if(n==3) x[5*jm1+3] = 1.0;
                if(n>=4) x[5*jm1+4] = (float)(n-3)/2.0;
            }
        }
        /* encode opponent barmen */
        x[120] = -(float)(pos[0])/2.0;
        /* encode computer's menoff */
        x[121] = (float)(pos[26])/15.0;
 }
 float dumbeval(int race, int pos[])
 {
        /* Backgammon move-selection evaluation function
           for benchmark comparisons.  Computes a linear
           evaluation function:  Score = W * X, where X is
           an input vector encoding the board state (using
           a raw encoding of the number of men at each location),
           and W is a weight vector.  Separate weight vectors
           are used for racing positions and contact positions.
           Makes lots of obvious mistakes, but provides a
           decent level of play for benchmarking purposes. */
        /* Provided as a public service to the backgammon
           programming community by Gerry Tesauro, IBM Research.
           (e-mail: tesauro@watson.ibm.com)                     */
        /* The following inputs are needed for this routine:
           race   is an integer variable which should be set
           based on the INITIAL position BEFORE the move.
           Set race=1 if the position is a race (i.e. no contact)
           and 0 if the position is a contact position.
           pos[]  is an integer array of dimension 28 which
           should represent a legal final board state after
           the move. Elements 1-24 correspond to board locations
           1-24 from computer's point of view, i.e. computer's
           men move in the negative direction from 24 to 1, and
           opponent's men move in the positive direction from
           1 to 24. Computer's men are represented by positive
           integers, and opponent's men are represented by negative
           integers. Element 25 represents computer's men on the
           bar (positive integer), and element 0 represents opponent's
           men on the bar (negative integer). Element 26 represents
           computer's men off the board (positive integer), and
           element 27 represents opponent's men off the board
           (negative integer).                                  */
        /* Also, be sure to call rdwts() at the start of your
           program to read in the weight values. Happy hacking] */
        int i;
        float score;
        if(pos[26]==15) return(99999999.);
        /* all men off, best possible move */
        setx(pos); /* sets input array x[] */
        score = 0.0;
        if(race) {  /* use race weights */
            for(i=0;i<122;++i) score += wr[i]*x[i];
        }
        else {  /* use contact weights */
            for(i=0;i<122;++i) score += wc[i]*x[i];
        }
        return(score);
 }
 static PyObject*
 dumbeval_eval(PyObject *self, PyObject *args) {
  int race;
  long numValues;
  int board[28];
  float eval_score;
  PyObject* tuple_obj;
  PyObject* val_obj;
  if (! PyArg_ParseTuple(args, "pO!", &race, &PyTuple_Type, &tuple_obj))
    return NULL;
  numValues = PyTuple_Size(tuple_obj);
  if (numValues < 0) return NULL;
  if (numValues != 28) {
    PyErr_SetString(DumbevalError, "Tuple must have 28 entries");
    return NULL;
  }
  // Iterate over tuple to retreive positions
  for (int i=0; i<numValues; i++) {
    val_obj = PyTuple_GetItem(tuple_obj, i);
    board[i] = PyLong_AsLong(val_obj);
  }
  eval_score = dumbeval(race, board);
  return Py_BuildValue("f", eval_score);
 }
 static PyMethodDef dumbeval_methods[] = {
  {
    "eval", dumbeval_eval, METH_VARARGS,
    "Returns evaluation results for the given board position."
  },
  {NULL, NULL, 0, NULL}
 };
 static struct PyModuleDef dumbeval_definition = {
  PyModuleDef_HEAD_INIT,
  "dumbeval",
  "A Python module that implements Gerald Tesauro's pubeval function for evaluation backgammon positions with badly initialized weights.",
  -1,
  dumbeval_methods
 };
 PyMODINIT_FUNC PyInit_dumbeval(void) {
  PyObject* module;
  module = PyModule_Create(&dumbeval_definition);
  if (module == NULL)
    return NULL;
  DumbevalError = PyErr_NewException("dumbeval.error", NULL, NULL);
  Py_INCREF(DumbevalError);
  PyModule_AddObject(module, "error", DumbevalError);
  return module;
 }
--- a/dumbeval/setup.py
+++ b/dumbeval/setup.py
@ -1,9 +0,0 @@
 from distutils.core import setup, Extension
 dumbeval = Extension('dumbeval',
                    sources = ['dumbeval.c'])
 setup (name = 'dumbeval',
       version = '0.1',
       description = 'Dumbeval for Python',
       ext_modules = [dumbeval])
--- a/dumbeval/weights.py
+++ b/dumbeval/weights.py
@ -1,14 +0,0 @@
 #!/usr/bin/env python3
 import numpy as np
 import re
 re.DOTALL = True
 np.set_printoptions(precision=5, suppress=True, threshold=np.nan)
 def random_array_string():
    return re.sub(r'^\[(.*)\]$(?s)', r'{\n\1 };', np.array2string(np.random.normal(0,1,122), separator=', '))
 print("/* Weights generated by weights.py */")
 print("static const float wc[122] =", random_array_string())
 print()
 print("static const float wr[122] =", random_array_string())
--- a/eval.py
+++ b/eval.py
@ -2,7 +2,6 @@ from board import Board
 import numpy as np
 import pubeval
 import dumbeval
 class Eval:
@ -16,6 +15,8 @@ class Eval:
    @staticmethod
    def make_pubeval_move(board, sym, roll):
        legal_moves = Board.calculate_legal_states(board, sym, roll)
       # print("Board:", board)
       # print("Length:",len(board))
        moves_and_scores = [ ( board,
                               pubeval.eval(False, Board.board_features_to_pubeval(board, sym)))
                             for board
@ -25,16 +26,4 @@ class Eval:
        return best_move_pair
    @staticmethod
    def make_dumbeval_move(board, sym, roll):
        legal_moves = Board.calculate_legal_states(board, sym, roll)
        moves_and_scores = [ ( board,
                               dumbeval.eval(False, Board.board_features_to_pubeval(board, sym)))
                             for board
                             in legal_moves ]
        scores = [ x[1] for x in moves_and_scores ]
        best_move_pair = moves_and_scores[np.array(scores).argmax()]
        return best_move_pair
--- a/game.py
+++ b/game.py
@ -23,21 +23,18 @@ class Game:
    def roll(self):
        return self.cup.roll()
-    '''
+
    def best_move_and_score(self):
        roll = self.roll()
        move_and_val = self.p1.make_move(self.board, self.p1.get_sym(), roll)
        self.board = move_and_val[0]
        return move_and_val
    '''
    '''
    def next_round(self):
        roll = self.roll()
        #print(roll)
        self.board = Board.flip(self.p2.make_move(Board.flip(self.board), self.p2.get_sym(), roll)[0])
        return self.board
    '''
    def board_state(self):
        return self.board
--- a/main.py
+++ b/main.py
@ -2,7 +2,38 @@ import argparse
 import sys
 import os
 import time
-import subprocess
+
 model_storage_path = 'models'
 # Create models folder
 if not os.path.exists(model_storage_path):
    os.makedirs(model_storage_path)
 # Define helper functions
 def log_train_outcome(outcome, trained_eps = 0):
    format_vars = { 'trained_eps': trained_eps,
                    'count': len(train_outcome),
                    'sum': sum(train_outcome),
                    'mean': sum(train_outcome) / len(train_outcome),
                    'time': int(time.time())
    }
    with open(os.path.join(config['model_path'], 'logs', "train.log"), 'a+') as f:
        f.write("{time};{trained_eps};{count};{sum};{mean}".format(**format_vars) + "\n")
 def log_eval_outcomes(outcomes, trained_eps = 0):
    for outcome in outcomes:
        scores = outcome[1]
        format_vars = { 'trained_eps': trained_eps,
                        'method': outcome[0],
                        'count': len(scores),
                        'sum': sum(scores),
                        'mean': sum(scores) / len(scores),
                        'time': int(time.time())
        }
        with open(os.path.join(config['model_path'], 'logs', "eval.log"), 'a+') as f:
            f.write("{time};{method};{trained_eps};{count};{sum};{mean}".format(**format_vars) + "\n")
 # Parse command line arguments
 parser = argparse.ArgumentParser(description="Backgammon games")
@ -16,15 +47,13 @@ parser.add_argument('--eval-methods', action='store',
                    default=['random'], nargs='*',
                    help='specifies evaluation methods')
 parser.add_argument('--eval', action='store_true',
-                    help='evaluate the neural network with a random choice bot')
+                    help='whether to evaluate the neural network with a random choice bot')
 parser.add_argument('--bench-eval-scores', action='store_true',
                    help='benchmark scores of evaluation measures. episode counts and model specified as options are ignored.')
 parser.add_argument('--train', action='store_true',
-                    help='train the neural network')
+                    help='whether to train the neural network')
 parser.add_argument('--eval-after-train', action='store_true', dest='eval_after_train',
-                    help='evaluate after each training session')
+                    help='whether to evaluate after each training session')
 parser.add_argument('--play', action='store_true',
-                    help='play with the neural network')
+                    help='whether to play with the neural network')
 parser.add_argument('--start-episode', action='store', dest='start_episode',
                    type=int, default=0,
                    help='episode count to start at; purely for display purposes')
@ -32,124 +61,31 @@ parser.add_argument('--train-perpetually', action='store_true',
                    help='start new training session as soon as the previous is finished')
 parser.add_argument('--list-models', action='store_true',
                    help='list all known models')
 parser.add_argument('--board-rep', action='store', dest='board_rep',
                    help='name of board representation to use as input to neural network')
 parser.add_argument('--verbose', action='store_true',
                    help='If set, a lot of stuff will be printed')
 parser.add_argument('--ply', action='store', dest='ply', default='0',
                    help='defines the amount of ply used when deciding what move to make')
 parser.add_argument('--repeat-eval', action='store', dest='repeat_eval', default='1',
                    help='the amount of times the evaluation method should be repeated')
 args = parser.parse_args()
 config = {
    'model': args.model,
    'model_path': os.path.join(model_storage_path, args.model),
    'episode_count': args.episode_count,
    'eval_methods': args.eval_methods,
    'train': args.train,
    'play': args.play,
    'eval': args.eval,
    'bench_eval_scores': args.bench_eval_scores,
    'eval_after_train': args.eval_after_train,
    'start_episode': args.start_episode,
    'train_perpetually': args.train_perpetually,
-    'model_storage_path': 'models',
+    'model_storage_path': model_storage_path
    'bench_storage_path': 'bench',
    'board_representation': args.board_rep,
    'global_step': 0,
    'verbose': args.verbose,
    'ply': args.ply,
    'repeat_eval': args.repeat_eval
 }
 # Create models folder
 if not os.path.exists(config['model_storage_path']):
    os.makedirs(config['model_storage_path'])
 model_path = lambda: os.path.join(config['model_storage_path'], config['model'])
 # Make sure directories exist
-log_path = os.path.join(model_path(), 'logs')
+model_path = os.path.join(config['model_path'])
-if not os.path.isdir(model_path()):
+log_path   = os.path.join(model_path, 'logs')
-    os.mkdir(model_path())
+if not os.path.isdir(model_path):
    os.mkdir(model_path)
 if not os.path.isdir(log_path):
    os.mkdir(log_path)
 # Define helper functions
 def log_train_outcome(outcome, diff_in_values, trained_eps = 0, log_path = os.path.join(model_path(), 'logs', "train.log")):
    commit = subprocess.run(['git', 'describe', '--first-parent', '--always'], stdout=subprocess.PIPE).stdout.decode('utf-8').rstrip()
    format_vars = { 'trained_eps': trained_eps,
                    'count': len(outcome),
                    'sum': sum(outcome),
                    'mean': sum(outcome) / len(outcome),
                    'time': int(time.time()),
                    'average_diff_in_vals': diff_in_values,
                    'commit': commit
    }
    with open(log_path, 'a+') as f:
        f.write("{time};{trained_eps};{count};{sum};{mean};{average_diff_in_vals};{commit}".format(**format_vars) + "\n")
 def log_eval_outcomes(outcomes, trained_eps = 0, log_path = os.path.join(model_path(), 'logs', "eval.log")):
    """
    :param outcomes:
    :param average_diff_in_value:
    :param trained_eps:
    :param log_path:
    :return:
    """
    commit = subprocess.run(['git', 'describe', '--first-parent', '--always'], stdout=subprocess.PIPE).stdout.decode('utf-8').rstrip()
    for outcome in outcomes:
        scores = outcome[1]
        format_vars = { 'commit': commit,
                        'trained_eps': trained_eps,
                        'method': outcome[0],
                        'count': len(scores),
                        'sum': sum(scores),
                        'mean': sum(scores) / len(scores),
                        'time': int(time.time())
        }
        with open(log_path, 'a+') as f:
            f.write("{time};{method};{trained_eps};{count};{sum};{mean};{commit}".format(**format_vars) + "\n")
 def log_bench_eval_outcomes(outcomes, log_path, index, time, trained_eps = 0):
    commit = subprocess.run(['git', 'describe', '--first-parent', '--always'], stdout=subprocess.PIPE).stdout.decode('utf-8').rstrip()
    for outcome in outcomes:
        scores = outcome[1]
        format_vars = { 'trained_eps': trained_eps,
                        'method': outcome[0],
                        'count': len(scores),
                        'sum': sum(scores),
                        'mean': sum(scores) / len(scores),
                        'time': time,
                        'index': index,
                        'commit': commit
        }
        with open(log_path, 'a+') as f:
            f.write("{method};{count};{index};{time};{sum};{mean};{commit}".format(**format_vars) + "\n")
 def find_board_rep():
    checkpoint_path = os.path.join(config['model_storage_path'], config['model'])
    board_rep_path = os.path.join(checkpoint_path, "board_representation")
    with open(board_rep_path, 'r') as f:
        return f.read()
 def board_rep_file_exists():
    checkpoint_path = os.path.join(config['model_storage_path'], config['model'])
    board_rep_path = os.path.join(checkpoint_path, "board_representation")
    return os.path.isfile(board_rep_path)
 def create_board_rep():
    checkpoint_path = os.path.join(config['model_storage_path'], config['model'])
    board_rep_path = os.path.join(checkpoint_path, "board_representation")
    with open(board_rep_path, 'a+') as f:
        f.write(config['board_representation'])
 # Do actions specified by command-line
 if args.list_models:
@ -158,7 +94,7 @@ if args.list_models:
            return int(f.read())
    model_folders = [ f.path
                      for f
-                      in os.scandir(config['model_storage_path'])
+                      in os.scandir(model_storage_path)
                      if f.is_dir() ]
    models = [ (folder, get_eps_trained(folder)) for folder in model_folders ]
    sys.stderr.write("Found {} model(s)\n".format(len(models)))
@ -167,98 +103,28 @@ if args.list_models:
    exit()
-if __name__ == "__main__":
+# Set up network
-    # Set up network
+from network import Network
-    from network import Network
+network = Network(config, config['model'])
 eps = config['start_episode']
-    # Set up variables
+# Set up variables
-    episode_count = config['episode_count']
+episode_count = config['episode_count']
    if config['board_representation'] is None:
        if board_rep_file_exists():
            config['board_representation'] = find_board_rep()
        else:
            sys.stderr.write("Was not given a board_rep and was unable to find a board_rep file\n")
            exit()
    else:
        if not board_rep_file_exists():
            create_board_rep()
        else:
            if config['board_representation'] != find_board_rep():
                sys.stderr.write("Board representation \"{given}\", does not match one in board_rep file, \"{board_rep}\"\n".
                                 format(given = config['board_representation'], board_rep = find_board_rep()))
                exit()
    if args.train:
        network = Network(config, config['model'])
        start_episode = network.episodes_trained
        while True:
            train_outcome, diff_in_values = network.train_model(episodes = episode_count, trained_eps = start_episode)
            start_episode += episode_count
            log_train_outcome(train_outcome, diff_in_values, trained_eps = start_episode)
            if config['eval_after_train']:
                eval_outcomes = network.eval(trained_eps = start_episode)
                log_eval_outcomes(eval_outcomes, trained_eps = start_episode)
            if not config['train_perpetually']:
                break
    elif args.play:
        network = Network(config, config['model'])
        network.play_against_network()
    elif args.eval:
        network = Network(config, config['model'])
        network.restore_model()
        for i in range(int(config['repeat_eval'])):
            start_episode = network.episodes_trained
            # Evaluation measures are described in `config`
            outcomes = network.eval(config['episode_count'])
            log_eval_outcomes(outcomes, trained_eps = start_episode)
            # elif args.play:
            # g.play(episodes = episode_count)
    elif args.bench_eval_scores:
        # Make sure benchmark directory exists
        if not os.path.isdir(config['bench_storage_path']):
            os.mkdir(config['bench_storage_path'])
        config = config.copy()
        config['model'] = 'bench'
        network = Network(config, config['model'])
        start_episode = network.episodes_trained
        if start_episode == 0:
            print("Model not trained! Beware of using non-existing models!")
            exit()
        sample_count = 20
        episode_counts = [25, 50, 100, 250, 500, 1000, 2500, 5000,
                          10000, 20000]
        def do_eval():
            for eval_method in config['eval_methods']:
                result_path = os.path.join(config['bench_storage_path'],
                                           eval_method) + "-{}.log".format(int(time.time()))
                for n in episode_counts:
                    for i in range(sample_count):
                        start_time = time.time()
                        # Evaluation measure to be benchmarked are described in `config`
                        outcomes = network.eval(episode_count = n)
                        time_diff = time.time() - start_time
                        log_bench_eval_outcomes(outcomes,
                                                time = time_diff,
                                                index = i,
                                                trained_eps = start_episode,
                                                log_path = result_path)
        # CMM: oh no
        import tensorflow as tf
        network.restore_model()
        do_eval()
 if args.train:
    while True:
        train_outcome = network.train_model(episodes = episode_count, trained_eps = eps)
        eps += episode_count
        log_train_outcome(train_outcome, trained_eps = eps)
        if config['eval_after_train']:
            eval_outcomes = network.eval(trained_eps = eps)
            log_eval_outcomes(eval_outcomes, trained_eps = eps)
        if not config['train_perpetually']:
            break
 elif args.eval:
    eps = config['start_episode']
    outcomes = network.eval()
    log_eval_outcomes(outcomes, trained_eps = eps)
 #elif args.play:
 #    g.play(episodes = episode_count)
--- a/network.py
+++ b/network.py
@ -1,4 +1,5 @@
 import tensorflow as tf
 from cup import Cup
 import numpy as np
 from board import Board
 import os
@ -6,188 +7,132 @@ import time
 import sys
 import random
 from eval import Eval
 import glob
 from operator import itemgetter
 import tensorflow.contrib.eager as tfe
 from player import Player
 class Network:
-    # board_features_quack has size 28
+    hidden_size = 40
-    # board_features_quack_fat has size 30
+    input_size = 196
-    # board_features_tesauro has size 198
+    output_size = 1
    # Can't remember the best learning_rate, look this up
    learning_rate = 0.1
-    board_reps = {
+    # TODO: Actually compile tensorflow properly
-        'quack-fat'   : (30, Board.board_features_quack_fat),
+    #os.environ["TF_CPP_MIN_LOG_LEVEL"]="2"
        'quack'       : (28, Board.board_features_quack),
        'tesauro'     : (198, Board.board_features_tesauro),
        'quack-norm'  : (30, Board.board_features_quack_norm),
        'tesauro-fat' : (726, Board.board_features_tesauro_fat),
        'tesauro-poop': (198, Board.board_features_tesauro_wrong)
    }
    def custom_tanh(self, x, name=None):
        return tf.scalar_mul(tf.constant(2.00), tf.tanh(x, name))
    def __init__(self, config, name):
-        """
+        self.config = config
-        :param config:
+        self.session = tf.Session()
-        :param name:
+        self.checkpoint_path = config['model_path']
-        """
+        self.name = name
-        move_options = {
+        # input = x
-            '1': self.make_move_1_ply,
+        self.x = tf.placeholder('float', [1, Network.input_size], name='x')
-            '0': self.make_move_0_ply
+        self.value_next = tf.placeholder('float', [1, Network.output_size], name="value_next")
        }
        self.max_or_min = {
            1: np.argmax,
            -1: np.argmin
        }
        tf.enable_eager_execution()
        xavier_init = tf.contrib.layers.xavier_initializer()
-        self.config = config
+        W_1 = tf.get_variable("w_1", (Network.input_size, Network.hidden_size),
-        self.checkpoint_path = os.path.join(config['model_storage_path'], config['model'])
+                              initializer=xavier_init)
        W_2 = tf.get_variable("w_2", (Network.hidden_size, Network.output_size),
                              initializer=xavier_init)
-        self.name = name
+        b_1 = tf.get_variable("b_1", (Network.hidden_size,),
                              initializer=tf.zeros_initializer)
        b_2 = tf.get_variable("b_2", (Network.output_size,),
                              initializer=tf.zeros_initializer)
-        self.make_move = move_options[
+        value_after_input = tf.sigmoid(tf.matmul(self.x, W_1) + b_1, name='hidden_layer')
            self.config['ply']
        ]
        # Set board representation from config
        self.input_size, self.board_trans_func = Network.board_reps[
            self.config['board_representation']
        ]
        self.output_size = 1
        self.hidden_size = 40
        self.max_learning_rate = 0.1
        self.min_learning_rate = 0.001
        # Restore trained episode count for model
        episode_count_path = os.path.join(self.checkpoint_path, "episodes_trained")
        if os.path.isfile(episode_count_path):
            with open(episode_count_path, 'r') as f:
                self.episodes_trained = int(f.read())
        else:
            self.episodes_trained = 0
        global_step_path = os.path.join(self.checkpoint_path, "global_step")
        if os.path.isfile(global_step_path):
            with open(global_step_path, 'r') as f:
                self.global_step = int(f.read())
        else:
            self.global_step = 0
        self.model = tf.keras.Sequential([
            tf.keras.layers.Dense(40, activation="sigmoid", kernel_initializer=xavier_init,
                                  input_shape=(1,self.input_size)),
            tf.keras.layers.Dense(1, activation="sigmoid", kernel_initializer=xavier_init)
        ])
    def exp_decay(self, max_lr, global_step, decay_rate, decay_steps):
        """
        Calculates the exponential decay on a learning rate
        :param max_lr: The learning rate that the network starts at
        :param global_step: The global step
        :param decay_rate: The rate at which the learning rate should decay
        :param decay_steps: The amount of steps between each decay
        :return: The result of the exponential decay performed on the learning rate
        """
        res = max_lr * decay_rate ** (global_step // decay_steps)
        return res
    def do_backprop(self, prev_state, value_next):
        """
        Performs the Temporal-difference backpropagation step on the model
        :param prev_state: The previous state of the game, this has its value recalculated
        :param value_next: The value of the current move
        :return: Nothing, the calculation is performed on the model of the network
        """
        self.learning_rate = tf.maximum(self.min_learning_rate,
                                        self.exp_decay(self.max_learning_rate, self.global_step, 0.96, 50000),
                                        name="learning_rate")
        with tf.GradientTape() as tape:
            value = self.model(prev_state.reshape(1,-1))
        grads = tape.gradient(value, self.model.variables)
        difference_in_values = tf.reshape(tf.subtract(value_next, value, name='difference_in_values'), [])
        for grad, train_var in zip(grads, self.model.variables):
            backprop_calc = self.learning_rate * difference_in_values * grad
            train_var.assign_add(backprop_calc)
-    def print_variables(self):
+        
-        """
+        self.value = tf.sigmoid(tf.matmul(value_after_input, W_2) + b_2, name='output_layer')
-        Prints all the variables of the model
+
-        :return:
+        # tf.reduce_sum basically finds the sum of its input, so this gives the
-        """
+        # difference between the two values, in case they should be lists, which
-        variables = self.model.variables
+        # they might be if our input changes
-        for k in variables:
+
-            print(k)
+        # TODO: Alexander thinks that self.value will be computed twice (instead of once)
        difference_in_values = tf.reduce_sum(tf.subtract(self.value_next, self.value, name='difference'))
        trainable_vars = tf.trainable_variables()
        gradients = tf.gradients(self.value, trainable_vars)
        apply_gradients = []
        with tf.variable_scope('apply_gradients'):
            for gradient, trainable_var in zip(gradients, trainable_vars):
                # Hopefully this is Δw_t = α(V_t+1 - V_t)▿_wV_t.
                backprop_calc = Network.learning_rate * difference_in_values * gradient
                grad_apply = trainable_var.assign_add(backprop_calc)
                apply_gradients.append(grad_apply)
            self.training_op = tf.group(*apply_gradients, name='training_op')
        self.saver = tf.train.Saver(max_to_keep=1)
        self.session.run(tf.global_variables_initializer())
        self.restore_model()
    def eval_state(self, state):
-        """
+        # Run state through a network
-        Evaluates a single state
+
-        :param state:
+        # Remember to create placeholders for everything because wtf tensorflow
-        :return:
+        # and graphs
-        """
+
-        return self.model(state.reshape(1,-1))
+        # Remember to create the dense layers
        # Figure out a way of giving a layer a custom activiation function (we
        # want something which gives [-2,2]. Naively tahn*2, however I fell this
        # is wrong.
        # tf.group, groups a bunch of actions, so calculate the different
        # gradients for the different weights, by using tf.trainable_variables()
        # to find all variables and tf.gradients(current_value,
        # trainable_variables) to find all the gradients. We can then loop
        # through this and calculate the trace for each gradient and variable
        # pair (note, zip can be used to combine the two lists found before),
        # and then we can calculate the overall change in weights, based on the
        # formula listed in tesauro (learning_rate * difference_in_values *
        # trace), this calculation can be assigned to a tf variable and put in a
        # list and then this can be grouped into a single operation, essentially
        # building our own backprop function.
        # Grouping them is done by
        # tf.group(*the_gradients_from_before_we_want_to_apply,
        # name="training_op")
        # If we remove the eligibily trace to begin with, we only have to
        # implement learning_rate * (difference_in_values) * gradients (the
        # before-mentioned calculation.
        # print("Network is evaluating")
        val = self.session.run(self.value, feed_dict={self.x: state})
        #print("eval ({})".format(self.name), state, val, sep="\n")
        return val
    def save_model(self, episode_count):
-        """
+        self.saver.save(self.session, os.path.join(self.checkpoint_path, 'model.ckpt'))
        Saves the model of the network, it references global_step as self.global_step
        :param episode_count:
        :return:
        """
        tfe.Saver(self.model.variables).save(os.path.join(self.checkpoint_path, 'model.ckpt'))
        with open(os.path.join(self.checkpoint_path, "episodes_trained"), 'w+') as f:
-            print("[NETWK] ({name}) Saving model to:".format(name=self.name),
+            print("[NETWK] ({name}) Saving model to:".format(name = self.name),
                  os.path.join(self.checkpoint_path, 'model.ckpt'))
            f.write(str(episode_count) + "\n")
        with open(os.path.join(self.checkpoint_path, "global_step"), 'w+') as f:
            print("[NETWK] ({name}) Saving global step to:".format(name=self.name),
                  os.path.join(self.checkpoint_path, 'model.ckpt'))
            f.write(str(self.global_step) + "\n")
        if self.config['verbose']:
            self.print_variables()
    def calc_vals(self, states):
        """
        Calculate a score of each state in states
        :param states: A number of states. The states have to be transformed before being given to this function.
        :return:
        """
        return self.model.predict_on_batch(states)
    def restore_model(self):
-        """
+        if os.path.isfile(os.path.join(self.checkpoint_path, 'model.ckpt.index')):
        Restore a model for a session, such that a trained model and either be further trained or
        used for evaluation
        :return: Nothing. It's a side-effect that a model gets restored for the network.
        """
        if glob.glob(os.path.join(self.checkpoint_path, 'model.ckpt*.index')):
            latest_checkpoint = tf.train.latest_checkpoint(self.checkpoint_path)
-            print("[NETWK] ({name}) Restoring model from:".format(name=self.name),
+            print("[NETWK] ({name}) Restoring model from:".format(name = self.name),
                  str(latest_checkpoint))
-            tfe.Saver(self.model.variables).restore(latest_checkpoint)
+            self.saver.restore(self.session, latest_checkpoint)
            variables_names = [v.name for v in tf.trainable_variables()]
            values = self.session.run(variables_names)
            for k, v in zip(variables_names, values):
                print("Variable: ", k)
                print("Shape: ", v.shape)
                print(v)
            # Restore trained episode count for model
            episode_count_path = os.path.join(self.checkpoint_path, "episodes_trained")
@ -195,341 +140,220 @@ class Network:
                with open(episode_count_path, 'r') as f:
                    self.config['start_episode'] = int(f.read())
-            global_step_path = os.path.join(self.checkpoint_path, "global_step")
+    # Have a circular dependency, #fuck, need to rewrite something
-            if os.path.isfile(global_step_path):
+    def adjust_weights(self, board, v_next):
-                with open(global_step_path, 'r') as f:
+#        print("lol")
-                    self.config['global_step'] = int(f.read())
+        board = np.array(board).reshape((1,-1))
        self.session.run(self.training_op, feed_dict = { self.x: board,
                                                         self.value_next: v_next })
-            if self.config['verbose']:
+
-                self.print_variables()
+            # while game isn't done:
                #x_next = g.next_move()
                #value_next = network.eval_state(x_next)
                #self.session.run(self.training_op, feed_dict={self.x: x, self.value_next: value_next})
                #x = x_next
-    def make_move_0_ply(self, board, roll, player):
+    def make_move(self, board, roll):
-        """
+        # print(Board.pretty(board))
-        Find the best move given a board, roll and a player, by finding all possible states one can go to
+        legal_moves = Board.calculate_legal_states(board, 1, roll)
-        and then picking the best, by using the network to evaluate each state. This is 0-ply, ie. no look-ahead.
+        moves_and_scores = [ (move, self.eval_state(np.array(Board.map_to_tesauro(move)).reshape(1,-1))) for move in legal_moves ]
-        The highest score is picked for the 1-player and the max(1-score) is picked for the -1-player.
+        scores = [ x[1] for x in moves_and_scores ]
-
+        best_score_index = np.array(scores).argmax()
-        :param board: Current board
+        best_move_pair = moves_and_scores[best_score_index]
-        :param roll:  Current roll
+        #print("Found the best state, being:", np.array(move_scores).argmax())
-        :param player: Current player
+        return best_move_pair
        :return: A pair of the best state to go to, together with the score of that state
        """
        legal_moves = list(Board.calculate_legal_states(board, player, roll))
        legal_states = np.array([self.board_trans_func(move, player)[0] for move in legal_moves])
        scores = self.model.predict_on_batch(legal_states)
        best_score_idx = self.max_or_min[player](scores)
        best_move, best_score = legal_moves[best_score_idx], scores[best_score_idx]
        return (best_move, best_score)
    def make_move_1_ply(self, board, roll, player):
        """
        Return the best board and best score based on a 1-ply look-ahead.
        :param board:
        :param roll:
        :param player:
        :return:
        """
        start = time.time()
        best_pair = self.calculate_1_ply(board, roll, player)
        #print(time.time() - start)
        return best_pair
-    def calculate_1_ply(self, board, roll, player):
+    def train_model(self, episodes=1000, save_step_size = 100, trained_eps = 0):
        """
        Find the best move based on a 1-ply look-ahead. First the x best moves are picked from a 0-ply and then
        all moves and scores are found for them. The expected score is then calculated for each of the boards from the
        0-ply.
        :param board:
        :param roll: The original roll
        :param player: The current player
        :return: Best possible move based on 1-ply look-ahead
        """
        # find all legal states from the given board and the given roll
        init_legal_states = Board.calculate_legal_states(board, player, roll)
        legal_states = np.array([self.board_trans_func(state, player)[0] for state in init_legal_states])
        scores = [ score.numpy()
                   for score
                   in  self.calc_vals(legal_states) ]
        moves_and_scores = list(zip(init_legal_states, scores))
        sorted_moves_and_scores = sorted(moves_and_scores, key=itemgetter(1), reverse=(player == 1))
        best_boards = [ x[0] for x in sorted_moves_and_scores[:10] ]
        scores = self.do_ply(best_boards, player)
        best_score_idx = self.max_or_min[player](scores)
        # best_score_idx = np.array(trans_scores).argmax()
        return (best_boards[best_score_idx], scores[best_score_idx])
    def do_ply(self, boards, player):
        """
        Calculates a single extra ply, resulting in a larger search space for our best move.
        This is somewhat hardcoded to only do a single ply, seeing that it calls max on all scores, rather than
        allowing the function to search deeper, which could result in an even larger search space. If we wish
        to have more than 2-ply, this should be fixed, so we could extend this method to allow for 3-ply.
        :param boards: The boards to try all rolls on
        :param player: The player of the previous ply
        :return: An array of scores where each index describes one of the boards which was given as param
        to this function.
        """
        all_rolls = [ (1, 1), (1, 2), (1, 3), (1, 4), (1, 5),
                      (1, 6), (2, 2), (2, 3), (2, 4), (2, 5),
                      (2, 6), (3, 3), (3, 4), (3, 5), (3, 6),
                      (4, 4), (4, 5), (4, 6), (5, 5), (5, 6),
                      (6, 6) ]
        # start = time.time()
        # print("/"*50)
        length_list = []
        test_list = []
        # Prepping of data
        # start = time.time()
        for board in boards:
            length = 0
            for roll in all_rolls:
                all_states = Board.calculate_legal_states(board, player*-1, roll)
                for state in all_states:
                    state = np.array(self.board_trans_func(state, player*-1)[0])
                    test_list.append(state)
                    length += 1
            length_list.append(length)
        # print(time.time() - start)
        start = time.time()
        all_scores = self.model.predict_on_batch(np.array(test_list))
        split_scores = []
        from_idx = 0
        for length in length_list:
            split_scores.append(all_scores[from_idx:from_idx+length])
            from_idx += length
        means_splits = [tf.reduce_mean(scores) for scores in split_scores]
        # print(time.time() - start)
        # print("/"*50)
        return means_splits
    def eval(self, episode_count, trained_eps = 0):
        """
        Used to evaluate a model. Can either use pubeval, a model playing at an intermediate level, or dumbeval
        a model which has been given random weights, so it acts deterministically random.
        :param episode_count: The amount of episodes to run
        :param trained_eps:   The amount of episodes the model we want to evaluate, has trained
        :param tf_session:
        :return: outcomes:    The outcomes of the evaluation session
        """
        def do_eval(method, episodes = 1000, trained_eps = 0):
            """
            Do the actual evaluation
            :param method:     Either pubeval or dumbeval
            :param episodes:   Amount of episodes to use in the evaluation
            :param trained_eps:
            :return: outcomes : Described above
            """
            start_time = time.time()
            def print_time_estimate(eps_completed):
                cur_time = time.time()
                time_diff = cur_time - start_time
                eps_per_sec = eps_completed / time_diff
                secs_per_ep = time_diff / eps_completed
                eps_remaining = (episodes - eps_completed)
                sys.stderr.write(
                    "[EVAL ] Averaging {per_sec} episodes per second\n".format(per_sec=round(eps_per_sec, 2)))
                sys.stderr.write(
                    "[EVAL ] {eps_remaining} episodes remaining; approx. {time_remaining} seconds remaining\n".format(
                        eps_remaining=eps_remaining, time_remaining=int(eps_remaining * secs_per_ep)))
            sys.stderr.write(
                "[EVAL ] Evaluating {eps} episode(s) with method '{method}'\n".format(eps=episodes, method=method))
            if method == 'pubeval':
                outcomes = []
                for i in range(1, episodes + 1):
                    sys.stderr.write("[EVAL ] Episode {}".format(i))
                    board = Board.initial_state
                    while Board.outcome(board) is None:
                        roll = (random.randrange(1, 7), random.randrange(1, 7))
                        board = (self.make_move(board, roll, 1))[0]
                        roll = (random.randrange(1, 7), random.randrange(1, 7))
                        board = Eval.make_pubeval_move(board, -1, roll)[0][0:26]
                    sys.stderr.write("\t outcome {}".format(Board.outcome(board)[1]))
                    outcomes.append(Board.outcome(board)[1])
                    sys.stderr.write("\n")
                    if i % 10 == 0:
                        print_time_estimate(i)
                return outcomes
            elif method == 'dumbeval':
                outcomes = []
                for i in range(1, episodes + 1):
                    sys.stderr.write("[EVAL ] Episode {}".format(i))
                    board = Board.initial_state
                    while Board.outcome(board) is None:
                        roll = (random.randrange(1, 7), random.randrange(1, 7))
                        board = (self.make_move(board, roll, 1))[0]
                        roll = (random.randrange(1, 7), random.randrange(1, 7))
                        board = Eval.make_dumbeval_move(board, -1, roll)[0][0:26]
                    sys.stderr.write("\t outcome {}".format(Board.outcome(board)[1]))
                    outcomes.append(Board.outcome(board)[1])
                    sys.stderr.write("\n")
                    if i % 10 == 0:
                        print_time_estimate(i)
                return outcomes
            else:
                sys.stderr.write("[EVAL ] Evaluation method '{}' is not defined\n".format(method))
                return [0]
        outcomes = [ (method, do_eval(method,
                                      episode_count,
                                      trained_eps = trained_eps))
                     for method
                     in self.config['eval_methods'] ]
        return outcomes
    def play_against_network(self):
        """
        Allows you to play against a supplied model.
        :return:
        """
        self.restore_model()
        human_player = Player(-1)
        cur_player = 1
        player = 1
        board = Board.initial_state
        i = 0
        while Board.outcome(board) is None:
            print(Board.pretty(board))
            roll = (random.randrange(1, 7), random.randrange(1, 7))
            print("Bot rolled:", roll)
            board, _ = self.make_move(board, roll, player)
            print(Board.pretty(board))
            roll = (random.randrange(1, 7), random.randrange(1, 7))
            print("You rolled:", roll)
            board = human_player.make_human_move(board, roll)
        print("DONE "*10)
        print(Board.pretty(board))
    def train_model(self, episodes=1000, save_step_size=100, trained_eps=0):
        """
        Train a model to by self-learning.
        :param episodes:
        :param save_step_size:
        :param trained_eps:
        :return:
        """
        self.restore_model()
        average_diffs = 0
        start_time = time.time()
        def print_time_estimate(eps_completed):
-            cur_time = time.time()
+            cur_time      = time.time()
-            time_diff = cur_time - start_time
+            time_diff     = cur_time - start_time
-            eps_per_sec = eps_completed / time_diff
+            eps_per_sec   = eps_completed / time_diff
-            secs_per_ep = time_diff / eps_completed
+            secs_per_ep   = time_diff / eps_completed
            eps_remaining = (episodes - eps_completed)
-            sys.stderr.write(
+            sys.stderr.write("[TRAIN] Averaging {per_sec} episodes per second\n".format(per_sec = round(eps_per_sec, 2)))
-                "[TRAIN] Averaging {per_sec} episodes per second\n".format(per_sec=round(eps_per_sec, 2)))
+            sys.stderr.write("[TRAIN] {eps_remaining} episodes remaining; approx. {time_remaining} seconds remaining\n".format(eps_remaining = eps_remaining, time_remaining = int(eps_remaining * secs_per_ep)))
-            sys.stderr.write(
+
                "[TRAIN] {eps_remaining} episodes remaining; approx. {time_remaining} seconds remaining\n".format(
                    eps_remaining=eps_remaining, time_remaining=int(eps_remaining * secs_per_ep)))
        sys.stderr.write("[TRAIN] Training {} episodes and save_step_size {}\n".format(episodes, save_step_size))
        outcomes = []
        for episode in range(1, episodes + 1):
            sys.stderr.write("[TRAIN] Episode {}".format(episode + trained_eps))
 #            print("greerggeregr"*10000)
            # TODO decide which player should be here
            player = 1
-            # player = 1
+            roll = (random.randrange(1,7), random.randrange(1,7))
-            player = random.choice([-1,1])
+
-            prev_board = Board.initial_state
+            def tesaurofi(board):
-            i = 0
+                return Board.map_to_tesauro(board)
-            difference_in_values = 0
+
            prev_board, _ = self.make_move(Board.flip(Board.initial_state) if player == -1 else Board.initial_state, roll)
            if player == -1:
                prev_board = Board.flip(prev_board)
            # print("board:",prev_board)
            # print(len(prev_board))
            # find the best move here, make this move, then change turn as the
            # first thing inside of the while loop and then call
            # best_move_and_score to get V_t+1
            # i = 0
            while Board.outcome(prev_board) is None:
-                i += 1
+                #print(prev_board)
                self.global_step += 1
-                cur_board, cur_board_value = self.make_move(prev_board,
+                # print("-"*30)
-                                                            (random.randrange(1, 7), random.randrange(1, 7)),
+                # print(i)
-                                                            player)
+                # print(roll)
                # print(Board.pretty(prev_board))
                # print("/"*30)
                # i += 1
-                difference_in_values += abs((cur_board_value - self.eval_state(self.board_trans_func(prev_board, player))))
+                player *= -1
                roll = (random.randrange(1,7), random.randrange(1,7))
-                if self.config['verbose']:
+                cur_board, cur_board_value = self.make_move(Board.flip(prev_board) if player == -1 else prev_board, roll)
-                    print("Difference in values:", difference_in_vals)
+                #print("pls",cur_board_value)
-                    print("Current board value :", cur_board_value)
+                if player == -1:
-                    print("Current board is    :\n",cur_board)
+                    cur_board  = Board.flip(cur_board)
-                # adjust weights
+                self.adjust_weights(tesaurofi(prev_board), cur_board_value)
                if Board.outcome(cur_board) is None:
                    self.do_backprop(self.board_trans_func(prev_board, player), cur_board_value)
                    player *= -1
                prev_board = cur_board
            final_board = prev_board
-            sys.stderr.write("\t outcome {}\t turns {}".format(Board.outcome(final_board)[1], i))
+            sys.stderr.write("\t outcome {}".format(Board.outcome(final_board)[1]))
            outcomes.append(Board.outcome(final_board)[1])
-            final_score = np.array([Board.outcome(final_board)[1]])
+            final_score = np.array([ Board.outcome(final_board)[1] ])
-            scaled_final_score = ((final_score + 2) / 4)
+            self.adjust_weights(tesaurofi(prev_board), final_score.reshape((1, 1)))
            difference_in_values += abs(scaled_final_score-cur_board_value)
            average_diffs += (difference_in_values[0][0] / (i+1))
            self.do_backprop(self.board_trans_func(prev_board, player), scaled_final_score.reshape(1,1))
            sys.stderr.write("\n")
            if episode % min(save_step_size, episodes) == 0:
                sys.stderr.write("[TRAIN] Saving model...\n")
-                self.save_model(episode + trained_eps)
+                self.save_model(episode+trained_eps)
            if episode % 50 == 0:
                print_time_estimate(episode)
        sys.stderr.write("[TRAIN] Saving model for final episode...\n")
        self.save_model(episode+trained_eps)
-        return outcomes, average_diffs/len(outcomes)
+        return outcomes
                # take turn, which finds the best state and picks it, based on the current network
                # save current state
                # run training operation (session.run(self.training_op, {x:x, value_next, value_next})), (something which does the backprop, based on the state after having taken a turn, found before, and the state we saved in the beginning and from now we'll save it at the end of the turn
                # save the current state again, so we can continue running backprop based on the "previous" turn.
        # NOTE: We need to make a method so that we can take a single turn or at least just pick the next best move, so we know how to evaluate according to TD-learning. Right now, our game just continues in a while loop without nothing to stop it!
    def eval(self, trained_eps = 0):
        def do_eval(method, episodes = 1000, trained_eps = 0):
            start_time = time.time()
            def print_time_estimate(eps_completed):
                cur_time      = time.time()
                time_diff     = cur_time - start_time
                eps_per_sec   = eps_completed / time_diff
                secs_per_ep   = time_diff / eps_completed
                eps_remaining = (episodes - eps_completed)
                sys.stderr.write("[EVAL ] Averaging {per_sec} episodes per second\n".format(per_sec = round(eps_per_sec, 2)))
                sys.stderr.write("[EVAL ] {eps_remaining} episodes remaining; approx. {time_remaining} seconds remaining\n".format(eps_remaining = eps_remaining, time_remaining = int(eps_remaining * secs_per_ep)))
            sys.stderr.write("[EVAL ] Evaluating {eps} episode(s) with method '{method}'\n".format(eps=episodes, method=method))
            if method == 'random':
                outcomes = []
                for i in range(1, episodes + 1):
                    sys.stderr.write("[EVAL ] Episode {}".format(i))
                    board = Board.initial_state
                    while Board.outcome(board) is None:
                        roll = (random.randrange(1,7), random.randrange(1,7))
                        board = (self.p1.make_move(Board.map_to_tesauro(board), self.p1.get_sym(), roll))[0]
                        roll = (random.randrange(1,7), random.randrange(1,7))
                        board = Board.flip(Eval.make_random_move(Board.flip(board), 1, roll))
                    sys.stderr.write("\t outcome {}".format(Board.outcome(board)[1]))
                    outcomes.append(Board.outcome(board)[1])
                    sys.stderr.write("\n")
                    if i % 50 == 0:
                        print_time_estimate(i)
                return outcomes
            elif method == 'pubeval':
                outcomes = []
                # Add the evaluation code for pubeval, the bot has a method make_pubeval_move(board, sym, roll), which can be used to get the best move according to pubeval
                for i in range(1, episodes + 1):
                    sys.stderr.write("[EVAL ] Episode {}".format(i))
                    board = Board.initial_state
                    #print("init:", board, sep="\n")
                    while Board.outcome(board) is None:
                        #print("-"*30)
                        roll = (random.randrange(1,7), random.randrange(1,7))
                        #print(roll)
                        prev_board = tuple(board)
                        board = (self.make_move(board, roll))[0]
                        #print("post p1:", board, sep="\n")
                        #print("."*30)
                        roll = (random.randrange(1,7), random.randrange(1,7))
                        #print(roll)
                        prev_board = tuple(board)
                        board = Eval.make_pubeval_move(board, -1, roll)[0][0:26]
                        #print("post pubeval:", board, sep="\n")
                    #print("*"*30)
                    #print(board)
                    #print("+"*30)
                    sys.stderr.write("\t outcome {}".format(Board.outcome(board)[1]))
                    outcomes.append(Board.outcome(board)[1])
                    sys.stderr.write("\n")
                    if i % 10 == 0:
                        print_time_estimate(i)
                return outcomes
            # elif method == 'dumbmodel':
            #     config_prime = self.config.copy()
            #     config_prime['model_path'] = os.path.join(config_prime['model_storage_path'], 'dumbmodel')
            #     eval_bot = Bot(1, config = config_prime, name = "dumbmodel")
            #     #print(self.config, "\n", config_prime)
            #     outcomes = []
            #     for i in range(1, episodes + 1):
            #         sys.stderr.write("[EVAL ] Episode {}".format(i))
            #         board = Board.initial_state
            #         while Board.outcome(board) is None:
            #             roll = (random.randrange(1,7), random.randrange(1,7))
            #             board = (self.make_move(board, self.p1.get_sym(), roll))[0]
            #             roll = (random.randrange(1,7), random.randrange(1,7))
            #             board = Board.flip(eval_bot.make_move(Board.flip(board), self.p1.get_sym(), roll)[0])
            #         sys.stderr.write("\t outcome {}".format(Board.outcome(board)[1]))
            #         outcomes.append(Board.outcome(board)[1])
            #         sys.stderr.write("\n")
            #         if i % 50 == 0:
            #             print_time_estimate(i)
            #     return outcomes
            else:
                sys.stderr.write("[EVAL ] Evaluation method '{}' is not defined\n".format(method))
                return [0]
        return [ (method, do_eval(method,
                                  self.config['episode_count'],
                                  trained_eps = trained_eps))
                 for method
                 in self.config['eval_methods'] ]
--- a/network_test.py
+++ b/network_test.py
@ -3,65 +3,30 @@ import tensorflow as tf
 import random
 import numpy as np
 session = tf.Session()
 graph_lol = tf.Graph()
 from board import Board
 import main
-config = main.config.copy()
+network = Network(session)
 config['model'] = "player_testings"
 config['ply'] = "1"
 config['board_representation'] = 'quack-fat'
 network = Network(config, config['model'])
-network.restore_model()
+initial_state = np.array(( 0,
-initial_state = Board.initial_state
+                  2, 0, 0, 0, 0, -5,
-
+                  0, -3, 0, 0, 0, 5,
-initial_state_1 = ( 0,
+                  -5, 0, 0, 0, 3, 0,
-                    0, 0, 0, 2, 0, -5,
+                  5, 0, 0, 0, 0, -2,
-                    0, -3, 0, 0, 0, 0,
+                  0 )).reshape((1,26))
                    -5, 0, 0, 0, 3, 5,
                    0, 0, 0, 0, 5, -2,
                    0 )
 initial_state_2 = ( 0,
                    -5, -5, -3, -2, 0, 0,
                    0, 0, 0, 0, 0, 0,
                    0, 0, 0, 15, 0, 0,
                    0, 0, 0, 0, 0, 0,
                    0 )
 boards = {initial_state,
          initial_state_1,
          initial_state_2 }
 #print(x.shape)
 with graph_lol.as_default():
    session_2 = tf.Session(graph = graph_lol)
    network_2 = Network(session_2)
    network_2.restore_model()
    print(network_2.eval_state(initial_state))
 print(network.eval_state(initial_state))
 # board = network.board_trans_func(Board.initial_state, 1)
 # pair = network.make_move(Board.initial_state, [3,2], 1)
 # print(pair[1])
 # network.do_backprop(board, 0.9)
 # network.print_variables()
 # network.save_model(2)
 # print(network.calculate_1_ply(Board.initial_state, [3,2], 1))
 diff = [0, 0]
 val = network.eval_state(Board.board_features_quack_fat(initial_state, 1))
 print(val)
 diff[0] += abs(-1-val)
 diff[1] += 1
 print(diff[1])
--- a/player.py
+++ b/player.py
@ -11,59 +11,19 @@ class Player:
    def get_sym(self):
        return self.sym
-    def calc_move_sets(self, from_board, roll, player):
+    def make_move(self, board, sym, roll):
-        board = from_board
+        print(Board.pretty(board))
-        sets = []
+        legal_moves = Board.calculate_legal_states(board, sym, roll)
-        total = 0
+        if roll[0] == roll[1]:
-        for r in roll:
+            print("Example of move: 4/6,6/8,12/14,13/15")
-            # print("Value of r:",r)
+        else:
-            sets.append([Board.calculate_legal_states(board, player, [r,0]), r])
+            print("Example of move: 4/6,13/17")
            total += r
        sets.append([Board.calculate_legal_states(board, player, [total,0]), total])
        print(sets)
        return sets
        user_moves = input("Enter your move: ").strip().split(",")
        board = Board.apply_moves_to_board(board, sym, user_moves)
        while board not in legal_moves:
            print("Move is invalid, please enter a new move")
            user_moves = input("Enter your move: ").strip().split(",")
            board = Board.apply_moves_to_board(board, sym, user_moves)
    def tmp_name(self, from_board, to_board, roll, player, total_moves, is_quad = False):
        sets = self.calc_move_sets(from_board, roll, player)
        return_board = from_board
        for idx, board_set in enumerate(sets):
            board_set[0] = list(board_set[0])
            # print(to_board)
            # print(board_set)
            if to_board in board_set[0]:
                total_moves -= board_set[1]
                # if it's not the sum of the moves
                if idx < (4 if is_quad else 2):
                    roll[idx] = 0
                else:
                    roll = [0,0]
                return_board = to_board
                break
        return total_moves, roll, return_board
    def make_human_move(self, board, roll):
        is_quad = roll[0] == roll[1]
        total_moves = roll[0] + roll[1] if not is_quad else int(roll[0])*4
        if is_quad:
            roll = [roll[0]]*4
        while total_moves != 0:
            while True:
                print("You have {roll} left!".format(roll=total_moves))
                move = input("Pick a move!\n")
                pot_move = move.split("/")
                if len(pot_move) == 2:
                    try:
                        pot_move[0] = int(pot_move[0])
                        pot_move[1] = int(pot_move[1])
                        move = pot_move
                        break;
                    except TypeError:
                        print("The correct syntax is: 2/5 for a move from index 2 to 5.")
            to_board = Board.apply_moves_to_board(board, self.get_sym(), move)
            total_moves, roll, board = self.tmp_name(board, to_board, list(roll), self.get_sym(), total_moves, is_quad)
            print(Board.pretty(board))
        return board
--- a/plot.py
+++ b/plot.py
@ -9,26 +9,9 @@ import matplotlib.dates as mdates
 train_headers = ['timestamp', 'eps_train', 'eps_trained_session', 'sum', 'mean']
 eval_headers  = ['timestamp', 'method', 'eps_train', 'eval_eps_used', 'sum', 'mean']
 bench_headers = ['method', 'sample_count', 'i', 'time', 'sum', 'mean']
 model_path = 'models'
 def plot_bench(data_path):
    df = pd.read_csv(data_path, sep=";",
                     names=bench_headers, index_col=[0,1,2])
    for method_label in df.index.levels[0]:
        df_prime = df[['mean']].loc[method_label].unstack().T
        plot = df_prime.plot.box()
        plot.set_title("Evaluation variance, {}".format(method_label))
        plot.set_xlabel("Sample count")
        plot.set_ylabel("Mean score")
        plt.show(plot.figure)
        # for later use:
        variances = df_prime.var()
        print(variances)
        del df_prime, plot, variances
 def dataframes(model_name):
    def df_timestamp_to_datetime(df):
@ -61,7 +44,7 @@ if __name__ == '__main__':
    plt.show()
    while True:
-        df = dataframes('a')['eval']
+        df = dataframes('default')['eval']
        print(df)
--- a/quack/quack.c
+++ b/quack/quack.c
@ -1,484 +0,0 @@
 #include <Python.h>
 static PyObject* QuackError;
 typedef struct board_list board_list;
 struct board_list {
  int size;
  PyObject* list[16];
 };
 /* Utility functions */
 int sign(int x) {
    return (x > 0) - (x < 0);
 }
 int abs(int x) {
  if (x >= 0) return x;
  else        return -x;
 }
 /* end utility functions */
 /* Helper functions */
 int *idxs_with_checkers_of_player(int board[], int player) {
  int idxs_tmp[26];
  int ctr = 0;
  for (int i = 0; i < 26; i++) {
    if (board[i] * player >= 1) {
      idxs_tmp[ctr] = i;
      ctr++;
    }
  }
  int *idxs = malloc((1 + ctr) * sizeof(int));
  if (idxs == NULL) {
    PyErr_NoMemory();
    abort();
  }
  idxs[0] = ctr;
  for (int i = 0; i < ctr; i++) {
    idxs[i+1] = idxs_tmp[i];
  }
  return idxs;
 }
 int is_forward_move(int direction, int player) {
  return direction == player;
 }
 int face_value_match_move_length(int delta, int face_value) {
  return abs(delta) == face_value;
 }
 int bear_in_if_checker_on_bar(int board[], int player, int from_idx) {
  int bar;
  if (player == 1) bar = 0;
  else             bar = 25;
  if (board[bar] != 0) return from_idx == bar;
  else                 return 1;
 }
 int checkers_at_from_idx(int from_state, int player) {
  return sign(from_state) == player;
 }
 int no_block_at_to_idx(int to_state, int player) {
  if (-sign(to_state) == player) return abs(to_state) == 1;
  else                           return 1;
 }
 int can_bear_off(int board[], int player, int from_idx, int to_idx) {
  int* checker_idxs = idxs_with_checkers_of_player(board, player);
  int moving_backmost_checker = 1;
  int bearing_directly_off = 0;
  int all_checkers_in_last_quadrant = 1;
  /* Check if bearing directly off */
  if      (player ==  1 && to_idx == 25) bearing_directly_off = 1;
  else if (player == -1 && to_idx == 0)  bearing_directly_off = 1;
  for (int i = 1; i <= checker_idxs[0]; i++) {
    if (player == 1 ) {
      /* Check if all checkers are in last quardrant */
      if (checker_idxs[i] < 19) {
 	all_checkers_in_last_quadrant = 0;
 	break;
      }
      /* Check if moving backmost checker */
      if (checker_idxs[i] < from_idx) {
 	moving_backmost_checker = 0;
 	if (!bearing_directly_off) break;
      }
    } else {
      if (checker_idxs[i] > 6) {
 	all_checkers_in_last_quadrant = 0;
 	break;
      }
      if (checker_idxs[i] > from_idx) {
 	moving_backmost_checker = 0;
 	if (!bearing_directly_off) break;
      }
    }
  }
  free(checker_idxs);
  if (all_checkers_in_last_quadrant &&
      (bearing_directly_off || moving_backmost_checker))  return 1;
  else                                                    return 0;
 }
 /* end helper functions */
 int is_move_valid(int board[], int player, int face_value, int move[]) {
  int from_idx = move[0];
  int to_idx = move[1];
  int to_state;
  int from_state = board[from_idx];
  int delta = to_idx - from_idx;
  int direction = sign(delta);
  int bearing_off;
  if (to_idx >= 1 && to_idx <= 24) {
    to_state = board[to_idx];
    bearing_off = 0;
  } else {
    to_state = 0;
    bearing_off = 1;
  }
  return is_forward_move(direction, player)
    && face_value_match_move_length(delta, face_value)
    && bear_in_if_checker_on_bar(board, player, from_idx)
    && checkers_at_from_idx(from_state, player)
    && no_block_at_to_idx(to_state, player)
    && (!bearing_off || can_bear_off(board, player, from_idx, to_idx))
    ;
 }
 void do_move(int board[], int player, int move[]) {
  int from_idx = move[0];
  int to_idx   = move[1];
  /* "lift" checker */
  board[from_idx] -= player;
  /* Return early if bearing off */
  if (to_idx < 1 || to_idx > 24) return;
  /* Hit opponent checker */
  if (board[to_idx] * player == -1) {
    /* Move checker to bar */
    if (player == 1) board[25] -= player;
    else             board[0]  -= player;
    board[to_idx] = 0;
  }
  /* Put down checker */
  board[to_idx] += player;
  return;
 }
 int* do_move_clone(int board[], int player, int move[]) {
  int* new_board = malloc(sizeof(int) * 26);
  if (new_board == NULL) {
    PyErr_NoMemory();
    abort();
  }
  for (int i = 0; i < 26; i++) {
    new_board[i] = board[i];
  }
  do_move(new_board, player, move);
  return new_board;
 }
 PyObject* store_board_to_pytuple(int board[], int size) {
  PyObject* board_tuple = PyTuple_New(size);
  for (int i = 0; i < size; i++) {
    PyTuple_SetItem(board_tuple, i, Py_BuildValue("i", board[i]));
  }
  return board_tuple;
 }
 board_list calc_moves(int board[], int player, int face_value) {
  int* checker_idxs = idxs_with_checkers_of_player(board, player);
  board_list boards = { .size = 0 };
  if (checker_idxs[0] == 0) { 
    boards.size = 1;
    PyObject* board_tuple = store_board_to_pytuple(board, 26);
    boards.list[0] = board_tuple;
    free(checker_idxs);
    return boards;
  }
  int ctr = 0;
  for (int i = 1; i <= checker_idxs[0]; i++) {
    int move[2];
    move[0] = checker_idxs[i];
    move[1] = checker_idxs[i] + (face_value * player);
    if (is_move_valid(board, player, face_value, move)) {
      int* new_board = do_move_clone(board, player, move);
      PyObject* board_tuple = store_board_to_pytuple(new_board, 26);
      // segfault maybe :'(
      free(new_board);
      boards.list[ctr] = board_tuple;
      ctr++;
    }
  }
  free(checker_idxs);
  boards.size = ctr;
  return boards;
 }
 int* board_features_quack_fat(int board[], int player) {
  int* new_board = malloc(sizeof(int) * 30);
  if (new_board == NULL) {
    PyErr_NoMemory();
    abort();
  }
  int pos_sum = 0;
  int neg_sum = 0;
  for (int i = 0; i < 26; i++) {
    new_board[i] = board[i];
    if (sign(new_board[i] > 0)) pos_sum += new_board[i];
    else                        neg_sum += new_board[i]; 
  }
  new_board[26] = 15 - pos_sum;
  new_board[27] = -15 - neg_sum;
  if (player == 1) {
    new_board[28] = 1;
    new_board[29] = 0;
  } else {
    new_board[28] = 0;
    new_board[29] = 1;
  }
  return new_board;
 }
 /* Meta definitions */
 int extract_board(int *board, PyObject* board_tuple_obj) {
  long numValuesBoard;
  numValuesBoard = PyTuple_Size(board_tuple_obj);
  if (numValuesBoard != 26) {
    PyErr_SetString(QuackError, "Board tuple must have 26 entries");
    return 1;
  }
  PyObject* board_val_obj;
  // Iterate over tuple to retreive positions
  for (int i=0; i<numValuesBoard; i++) {
    board_val_obj = PyTuple_GetItem(board_tuple_obj, i);
    board[i] = PyLong_AsLong(board_val_obj);
  }
  return 0;
 }
 int extract_move(int *move, PyObject* move_tuple_obj) {
  long numValuesMove;
  numValuesMove = PyTuple_Size(move_tuple_obj);
  if (numValuesMove != 2) {
    PyErr_SetString(QuackError, "Move tuple must have exactly 2 entries");
    return 1;
  }
  PyObject* move_val_obj;
  for (int i=0; i<numValuesMove; i++) {
    move_val_obj = PyTuple_GetItem(move_tuple_obj, i);
    move[i] = PyLong_AsLong(move_val_obj);
  }
  return 0;
 }
 static PyObject*
 quack_is_move_valid(PyObject *self, PyObject *args) {
  int board[26];
  int player;
  int face_value;
  int move[2];
  PyObject* board_tuple_obj;
  PyObject* move_tuple_obj;
  if (! PyArg_ParseTuple(args, "O!iiO!",
 			 &PyTuple_Type, &board_tuple_obj,
 			 &player,
 			 &face_value,
 			 &PyTuple_Type, &move_tuple_obj))
    return NULL;
 if (extract_board(board, board_tuple_obj)) return NULL;
 if (extract_move(move, move_tuple_obj))    return NULL;
 if (is_move_valid(board, player, face_value, move)) Py_RETURN_TRUE;
 else                                                Py_RETURN_FALSE;
 }
 static PyObject*
 quack_idxs_with_checkers_of_player(PyObject *self, PyObject *args) {
  int board[26];
  int player;
  int* idxs;
  PyObject* board_tuple_obj;
  if (! PyArg_ParseTuple(args, "O!i",
 			 &PyTuple_Type, &board_tuple_obj,
 			 &player))
    return NULL;
  if (extract_board(board, board_tuple_obj)) return NULL;
  idxs = idxs_with_checkers_of_player(board, player);
  PyObject* idxs_list = PyList_New(idxs[0]);
  for (int i = 0; i < idxs[0]; i++) {
    PyList_SetItem(idxs_list, i, Py_BuildValue("i", idxs[i+1]));
  }
  free(idxs);
  PyObject *result = Py_BuildValue("O", idxs_list);
  Py_DECREF(idxs_list);
  return result;
 }
 static PyObject*
 quack_do_move(PyObject *self, PyObject *args) {
  int board[26];
  int player;
  int move[2];
  PyObject* board_tuple_obj;
  PyObject* move_tuple_obj;
  if (! PyArg_ParseTuple(args, "O!iO!",
 			 &PyTuple_Type, &board_tuple_obj,
 			 &player,
 			 &PyTuple_Type, &move_tuple_obj))
    return NULL;
  if (extract_board(board, board_tuple_obj)) return NULL;
  if (extract_move(move, move_tuple_obj))    return NULL;
  do_move(board, player, move);
  PyObject* board_tuple = store_board_to_pytuple(board, 26);
  // This is shaky
  Py_DECREF(board);
  PyObject *result = Py_BuildValue("O", board_tuple);
  Py_DECREF(board_tuple);
  return result;
 }
 static PyObject*
 quack_calc_moves(PyObject *self, PyObject *args) {
  int board[26];
  int player;
  int face_value;
  PyObject* board_tuple_obj;
  if (! PyArg_ParseTuple(args, "O!ii",
 			 &PyTuple_Type, &board_tuple_obj,
 			 &player,
 			 &face_value))
    return NULL;
  if (extract_board(board, board_tuple_obj)) return NULL;
  board_list boards = calc_moves(board, player, face_value);
  PyObject* boards_list = PyList_New(boards.size);
  for (int i = 0; i < boards.size; i++) {
    if (PyList_SetItem(boards_list, i, boards.list[i])) {
      printf("list insertion failed at index %i\n",i);
      abort();
    }
  }
  PyObject *result = Py_BuildValue("O", boards_list);
  Py_DECREF(boards_list);
  return result;
 }
 static PyObject*
 quack_board_features_quack_fat(PyObject *self, PyObject *args) {
  int board[26];
  int player;
  PyObject* board_tuple_obj;
  if (! PyArg_ParseTuple(args, "O!i",
 			 &PyTuple_Type, &board_tuple_obj,
 			 &player))
    return NULL;
  if (extract_board(board, board_tuple_obj)) return NULL;
  int* new_board = board_features_quack_fat(board, player);
  PyObject* board_tuple = store_board_to_pytuple(new_board, 30);
  free(new_board);
  PyObject *result = Py_BuildValue("O", board_tuple);
  Py_DECREF(board_tuple);
  return result;
 }
 static PyMethodDef quack_methods[] = {
  {
    "is_move_valid", quack_is_move_valid, METH_VARARGS,
    "Evaluates the validity of the proposed move."
  },
  {
    "idxs_with_checkers_of_player", quack_idxs_with_checkers_of_player, METH_VARARGS,
    "Returns a list of indexes with checkers of the specified player"
  },
  {
    "do_move", quack_do_move, METH_VARARGS,
    "Returns the board after doing the specified move"
  },
  {
    "calc_moves", quack_calc_moves, METH_VARARGS,
    "Calculates all legal moves from board with specified face value"
  },
  {
    "board_features_quack_fat", quack_board_features_quack_fat, METH_VARARGS,
    "Transforms a board to the quack-fat board representation"
  },
  {NULL, NULL, 0, NULL}
 };
 static struct PyModuleDef quack_definition = {
  PyModuleDef_HEAD_INIT,
  "quack",
  "A Python module that provides various useful Backgammon-related functions.",
  -1,
  quack_methods
 };
 PyMODINIT_FUNC PyInit_quack(void) {
  PyObject* module;
  module = PyModule_Create(&quack_definition);
  if (module == NULL)
    return NULL;
  QuackError = PyErr_NewException("quack.error", NULL, NULL);
  Py_INCREF(QuackError);
  PyModule_AddObject(module, "error", QuackError);
  return module;
 }
--- a/quack/setup.py
+++ b/quack/setup.py
@ -1,9 +0,0 @@
 from distutils.core import setup, Extension
 quack = Extension('quack',
                  sources = ['quack.c'])
 setup (name = 'quack',
       version = '0.1',
       description = 'Quack Backgammon Tools',
       ext_modules = [quack])
--- a/report_docs.txt
+++ b/report_docs.txt
@ -1,28 +0,0 @@
 <christoffer> Alexander og jeg skrev noget af vores bachelorprojekt om til C her i fredags.
 <christoffer> Man skal virkelig passe på sine hukommelsesallokeringer.
 <Jmaa> Ja, helt klart.
 <christoffer> Jeg fandt et memory leak, der lækkede 100 MiB hukommelse i sekundet.
 <Jmaa> Hvilken del blev C-ificeret?
 <Jmaa> Damned
 <christoffer> Årsagen var at vi gav et objekt med tilbage til Python uden at dekrementere dets ref-count, så fortolkeren stadig troede at nogen havde brug for det.
 <christoffer> Den del af spillogikken, der tjekker om træk er gyldige.
 <christoffer> Det bliver kaldt ret mange tusinde gange pr. spil, så vi tænkte at der måske kunne være lidt optimering at hente i at omskrive det til C.
 <Jmaa> Ok, så I har ikke selv brugt alloc og free. Det er alligevel noget.
 <christoffer> Metoden selv blev 7 gange hurtigere!
 <Jmaa> Wow!
 <christoffer> Jo. Det endte vi også med at gøre.
 <christoffer> Vi havde brug for lister af variabel størrelse. Det endte med en struct med et "size" felt og et "list" felt.
 <Jmaa> Inkluderer det speedup, frem og tilbagen mellem C og python?
 <christoffer> Det burde det gøre, ja!
 <Jmaa> Gjorde det nogen stor effekt for hvor hurtigt I kan evaluere?
 <christoffer> Jeg tror ikke at der er særligt meget "frem og tilbage"-stads. Det ser ud til at det kode man skriver bliver kastet ret direkte ind i fortolkeren.
 <christoffer> Det gjorde en stor forskel for når vi laver 1-ply.
 <christoffer> "ply" er hvor mange træk man kigger fremad.
 <christoffer> Så kun at kigge på det umiddelbart næste træk er 0-ply, hvilket er det vi har gjort indtil nu
 <christoffer> 1-ply var for langsomt. Det tog ca. 6-7 sekunder at evaluere ét træk.
 <christoffer> Alexander lavede lidt omskrivninger, så TensorFlow udregnede det hurtigere og fik det ned på ca. 3-4 sekunder *pr. spil*.
 <christoffer> Så skrev vi noget af det om til C, og nu er vi så på ca. 2 sekunder pr. spil med 1-ply, hvilket er ret vildt.
 <christoffer> Det er så godt at Python-fortolkeren kan udvides med C!
 <christoffer> caspervk, kan I optimere jeres bachelorprojekt med et par C-moduler?
 <Jmaa> Det er en hel lille sektion til rapporten det der.
 <christoffer> Yeah. Kopierer bare det her verbatim ind.
--- a/requirements.txt
+++ b/requirements.txt
@ -1,24 +1,14 @@
 absl-py==0.1.10
 astor==0.6.2
 bleach==1.5.0
 cycler==0.10.0
 gast==0.2.0
 grpcio==1.10.0
 html5lib==0.9999999
 kiwisolver==1.0.1
 Markdown==2.6.11
 matplotlib==2.2.2
 numpy==1.14.1
 pandas==0.22.0
 protobuf==3.5.1
 pubeval==0.3
 pyparsing==2.2.0
 python-dateutil==2.7.2
 pytz==2018.3
 six==1.11.0
-tensorboard==1.8.0
+tensorboard==1.6.0
-tensorflow==1.8.0
+tensorflow==1.6.0
 termcolor==1.1.0
 Werkzeug==0.14.1
 pygame==1.9.3
--- a/tensorflow_impl_tests/eager_main.py
+++ b/tensorflow_impl_tests/eager_main.py
@ -1,94 +0,0 @@
 import time
 import numpy as np
 import tensorflow as tf
 from board import Board
 import tensorflow.contrib.eager as tfe
 tf.enable_eager_execution()
 xavier_init = tf.contrib.layers.xavier_initializer()
 opt = tf.train.MomentumOptimizer(learning_rate=0.1, momentum=1)
 output_size = 1
 hidden_size = 40
 input_size = 30
 model = tf.keras.Sequential([
    tf.keras.layers.Dense(40, activation="sigmoid", kernel_initializer=tf.constant_initializer(-2), input_shape=(1,input_size)),
    tf.keras.layers.Dense(1, activation="sigmoid", kernel_initializer=tf.constant_initializer(0.2))
 ])
 # tfe.Saver(model.variables).restore(tf.train.latest_checkpoint("./"))
 input = [0, 2, 0, 0, 0, 0, -5, 0, -3, 0, 0, 0, 5, -5, 0, 0, 0, 3, 0, 5, 0, 0, 0, 0, -2, 0]
 all_input = np.array([Board.board_features_quack_fat(input, 1) for _ in range(20)])
 single_in = Board.board_features_quack_fat(input, 1)
 start = time.time()
 all_predictions = model.predict_on_batch(all_input)
 learning_rate = 0.1
 with tf.GradientTape() as tape:
    value = model(single_in)
 print("Before:", value)
 grads = tape.gradient(value, model.variables)
 print("/"*40,"model_variables","/"*40)
 print(model.variables)
 print("/"*40,"grads","/"*40)
 print(grads)
 difference_in_values = tf.reshape(tf.subtract(0.9, value, name='difference_in_values'), [])
 for grad, train_var in zip(grads, model.variables):
    backprop_calc = 0.1 * difference_in_values * grad
    train_var.assign_add(backprop_calc)
 value = model(single_in)
 print("/"*40,"model_variables","/"*40)
 print(model.variables)
 print("After:", value)
 # # grads = [0.1*val-np.random.uniform(-1,1)+grad for grad, trainable_var in zip(grads, model.variables)]
 #
 # # print(model.variables[0][0])
 # weights_before = model.weights[0]
 #
 # start = time.time()
 # #[trainable_var.assign_add(0.1*val-0.3+grad) for grad, trainable_var in zip(grads, model.variables)]
 #
 # start = time.time()
 # for gradient, trainable_var in zip(grads, model.variables):
 #     backprop_calc = 0.1 * (0.9 - val) * gradient
 #     trainable_var.assign_add(backprop_calc)
 #
 # # opt.apply_gradients(zip(grads, model.variables))
 #
 # print(time.time() - start)
 #
 # print(model(single_in))
 #
 # vals = model.predict_on_batch(all_input)
 # vals = list(vals)
 # vals[3] = 4
 # print(vals)
 # print(np.argmax(np.array(vals)))
 # tfe.Saver(model.variables).save("./tmp_ckpt")
--- a/tensorflow_impl_tests/normal_main.py
+++ b/tensorflow_impl_tests/normal_main.py
@ -1,67 +0,0 @@
 import tensorflow as tf
 import numpy as np
 import time
 class Everything:
    def __init__(self):
        self.output_size = 1
        self.hidden_size = 40
        self.input_size = 30
        self.input = tf.placeholder('float', [1, self.input_size])
        xavier_init = tf.contrib.layers.xavier_initializer()
        W_1 = tf.get_variable("w_1", (self.input_size, self.hidden_size),
                              initializer=tf.constant_initializer(-2))
        W_2 = tf.get_variable("w_2", (self.hidden_size, self.output_size),
                              initializer=tf.constant_initializer(0.2))
        b_1 = tf.get_variable("b_1", (self.hidden_size,),
                              initializer=tf.zeros_initializer)
        b_2 = tf.get_variable("b_2", (self.output_size,),
                              initializer=tf.zeros_initializer)
        value_after_input = tf.sigmoid(tf.matmul(self.input, W_1) + b_1, name='hidden_layer')
        self.value = tf.sigmoid(tf.matmul(value_after_input, W_2) + b_2, name='output_layer')
        apply_gradients = []
        trainable_vars = tf.trainable_variables()
        gradients = tf.gradients(self.value, trainable_vars)
        difference_in_values = tf.reshape(tf.subtract(0.9, self.value, name='difference_in_values'), [])
        with tf.variable_scope('apply_gradients'):
            for gradient, trainable_var in zip(gradients, trainable_vars):
                backprop_calc = 0.1 * difference_in_values * gradient
                grad_apply = trainable_var.assign_add(backprop_calc)
                apply_gradients.append(grad_apply)
        self.training_op = tf.group(*apply_gradients, name='training_op')
    def eval(self):
        input = np.array([0, 2, 0, 0, 0, 0, -5, 0, -3, 0, 0, 0, 5, -5, 0, 0, 0, 3, 0, 5, 0, 0, 0, 0, -2, 0, 0, 0, 1, 0])
        start = time.time()
        sess = tf.Session()
        sess.run(tf.global_variables_initializer())
        for i in range(20):
            val = sess.run(self.value, feed_dict={self.input: input.reshape(1,-1)})
        print(time.time() - start)
        print(val)
        sess.run(self.training_op, feed_dict={self.input: input.reshape(1,-1)})
        val = sess.run(self.value, feed_dict={self.input: input.reshape(1, -1)})
        print(val)
 everything = Everything()
 everything.eval()
--- a/test.py
+++ b/test.py
@ -141,56 +141,6 @@ class TestIsMoveValid(unittest.TestCase):
    # TODO: More tests for bearing off are needed
    def test_bear_off_non_backmost(self):
        board = ( 0,
                  0, 0, 0, 0, 0, 0,
                  0, 0, 0, 0, 0, 0,
                  0, 0, 0, 0, 0, 0,
                  0, 0, 0, 0, 1, 1,
                  0 )
        self.assertEqual(Board.is_move_valid(board, 1, 2, (23, 25)), True)
        self.assertEqual(Board.is_move_valid(board, 1, 1, (24, 25)), True)
        self.assertEqual(Board.is_move_valid(board, 1, 2, (24, 26)), False)
    def test_bear_off_quadrant_limits_white(self):
        board = ( 0,
                  0, 0, 0, 0, 0, 0,
                  0, 0, 0, 0, 0, 0,
                  0, 0, 0, 0, 0, 1,
                  1, 1, 1, 1, 1, 1,
                  0 )
        self.assertEqual(Board.is_move_valid(board, 1, 2, (23, 25)), False)
        self.assertEqual(Board.is_move_valid(board, 1, 1, (24, 25)), False)
    def test_bear_off_quadrant_limits_black(self):
        board = ( 0,
                  -1, -1, -1, -1, -1, -1,
                  -1, 0, 0, 0, 0, 0,
                  0, 0, 0, 0, 0, 0,
                  0, 0, 0, 0, 0, 0,
                  0 )
        self.assertEqual(Board.is_move_valid(board, -1, 2, (2, 0)), False)
        self.assertEqual(Board.is_move_valid(board, -1, 1, (1, 0)), False)
    def test_bear_off_quadrant_limits_white_2(self):
        board = ( 0,
                  0, 0, 0, 0, 0, 0,
                  0, 0, 0, 0, 0, 0,
                  0, 0, 0, 0, 0, 0,
                  1, 0, 0, 0, 0, 1,
                  0 )
        self.assertEqual(Board.is_move_valid(board, 1, 1, (24, 25)), True)
    def test_bear_off_quadrant_limits_black_2(self):
        board = ( 0,
                  -1, 0, 0, 0, 0, -1,
                  0, 0, 0, 0, 0, 0,
                  0, 0, 0, 0, 0, 0,
                  0, 0, 0, 0, 0, 0,
                  0 )
        self.assertEqual(Board.is_move_valid(board, -1, 1, (1, 0)), True)
 class TestNumOfChecker(unittest.TestCase):
    def test_simple_1(self):
        board = ( 0,
@ -664,328 +614,5 @@ class TestBoardFlip(unittest.TestCase):
        self.assertEqual(Board.flip(Board.flip(board)), board)
    def test_tesauro_initial(self):
        board = Board.initial_state
        expected = (1,1,0,0,
                    0,0,0,0,
                    0,0,0,0,
                    0,0,0,0,
                    0,0,0,0,
                    0,0,0,0,
                    0,0,0,0,
                    0,0,0,0,
                    0,0,0,0,
                    0,0,0,0,
                    0,0,0,0,
                    1,1,1,1,
                    0,0,0,0,
                    0,0,0,0,
                    0,0,0,0,
                    0,0,0,0,
                    1,1,1,0,
                    0,0,0,0,
                    1,1,1,1,
                    0,0,0,0,
                    0,0,0,0,
                    0,0,0,0,
                    0,0,0,0,
                    0,0,0,0,
                    0.0,
                    0,
                    0,0,0,0,
                    0,0,0,0,
                    0,0,0,0,
                    0,0,0,0,
                    0,0,0,0,
                    1,1,1,1,
                    0,0,0,0,
                    1,1,1,0,
                    0,0,0,0,
                    0,0,0,0,
                    0,0,0,0,
                    0,0,0,0,
                    1,1,1,1,
                    0,0,0,0,
                    0,0,0,0,
                    0,0,0,0,
                    0,0,0,0,
                    0,0,0,0,
                    0,0,0,0,
                    0,0,0,0,
                    0,0,0,0,
                    0,0,0,0,
                    0,0,0,0,
                    1,1,0,0,
                    0.0,
                    0,
                    1,
                    0
        )
        import numpy as np
        self.assertTrue((Board.board_features_tesauro(board, 1) ==
                         np.array(expected).reshape(1, 198)).all())
    def test_pubeval_features(self):
        board = Board.initial_state
        expected = (0,
                    2, 0, 0, 0, 0, -5,
                    0, -3, 0, 0, 0, 5,
                    -5, 0, 0, 0, 3, 0,
                    5, 0, 0, 0, 0, -2,
                    0,
                    0, 0)
        import numpy as np
        self.assertTrue((Board.board_features_to_pubeval(board, 1) ==
                         np.array(expected).reshape(1, 28)).all())
        self.assertTrue((Board.board_features_to_pubeval(board, -1) ==
                         np.array(expected).reshape(1, 28)).all())
    def test_tesauro_bars(self):
        board = list(Board.initial_state)
        board[1] = 0
        board[0] = 2
        board[24] = 0
        board[25] = -2
        board = tuple(board)
        expected = (0,0,0,0,
                    0,0,0,0,
                    0,0,0,0,
                    0,0,0,0,
                    0,0,0,0,
                    0,0,0,0,
                    0,0,0,0,
                    0,0,0,0,
                    0,0,0,0,
                    0,0,0,0,
                    0,0,0,0,
                    1,1,1,1,
                    0,0,0,0,
                    0,0,0,0,
                    0,0,0,0,
                    0,0,0,0,
                    1,1,1,0,
                    0,0,0,0,
                    1,1,1,1,
                    0,0,0,0,
                    0,0,0,0,
                    0,0,0,0,
                    0,0,0,0,
                    0,0,0,0,
                    1.0,
                    0,
                    0,0,0,0,
                    0,0,0,0,
                    0,0,0,0,
                    0,0,0,0,
                    0,0,0,0,
                    1,1,1,1,
                    0,0,0,0,
                    1,1,1,0,
                    0,0,0,0,
                    0,0,0,0,
                    0,0,0,0,
                    0,0,0,0,
                    1,1,1,1,
                    0,0,0,0,
                    0,0,0,0,
                    0,0,0,0,
                    0,0,0,0,
                    0,0,0,0,
                    0,0,0,0,
                    0,0,0,0,
                    0,0,0,0,
                    0,0,0,0,
                    0,0,0,0,
                    0,0,0,0,
                    1.0,
                    0,
                    1,
                    0
        )
        import numpy as np
        self.assertTrue((Board.board_features_tesauro(board, 1) ==
                         np.array(expected).reshape(1, 198)).all())
    def test_tesauro_home(self):
        board = list(Board.initial_state)
        board[1] = 0
        board[24] = 0
        board = tuple(board)
        expected = (0,0,0,0,
                    0,0,0,0,
                    0,0,0,0,
                    0,0,0,0,
                    0,0,0,0,
                    0,0,0,0,
                    0,0,0,0,
                    0,0,0,0,
                    0,0,0,0,
                    0,0,0,0,
                    0,0,0,0,
                    1,1,1,1,
                    0,0,0,0,
                    0,0,0,0,
                    0,0,0,0,
                    0,0,0,0,
                    1,1,1,0,
                    0,0,0,0,
                    1,1,1,1,
                    0,0,0,0,
                    0,0,0,0,
                    0,0,0,0,
                    0,0,0,0,
                    0,0,0,0,
                    0.0,
                    2,
                    0,0,0,0,
                    0,0,0,0,
                    0,0,0,0,
                    0,0,0,0,
                    0,0,0,0,
                    1,1,1,1,
                    0,0,0,0,
                    1,1,1,0,
                    0,0,0,0,
                    0,0,0,0,
                    0,0,0,0,
                    0,0,0,0,
                    1,1,1,1,
                    0,0,0,0,
                    0,0,0,0,
                    0,0,0,0,
                    0,0,0,0,
                    0,0,0,0,
                    0,0,0,0,
                    0,0,0,0,
                    0,0,0,0,
                    0,0,0,0,
                    0,0,0,0,
                    0,0,0,0,
                    0.0,
                    2,
                    1,
                    0
        )
        import numpy as np
        self.assertTrue((Board.board_features_tesauro(board, 1) ==
                         np.array(expected).reshape(1, 198)).all())
    def test_tesauro_black_player(self):
        board = Board.initial_state
        expected = (1,1,0,0,
                    0,0,0,0,
                    0,0,0,0,
                    0,0,0,0,
                    0,0,0,0,
                    0,0,0,0,
                    0,0,0,0,
                    0,0,0,0,
                    0,0,0,0,
                    0,0,0,0,
                    0,0,0,0,
                    1,1,1,1,
                    0,0,0,0,
                    0,0,0,0,
                    0,0,0,0,
                    0,0,0,0,
                    1,1,1,0,
                    0,0,0,0,
                    1,1,1,1,
                    0,0,0,0,
                    0,0,0,0,
                    0,0,0,0,
                    0,0,0,0,
                    0,0,0,0,
                    0.0,
                    0,
                    0,0,0,0,
                    0,0,0,0,
                    0,0,0,0,
                    0,0,0,0,
                    0,0,0,0,
                    1,1,1,1,
                    0,0,0,0,
                    1,1,1,0,
                    0,0,0,0,
                    0,0,0,0,
                    0,0,0,0,
                    0,0,0,0,
                    1,1,1,1,
                    0,0,0,0,
                    0,0,0,0,
                    0,0,0,0,
                    0,0,0,0,
                    0,0,0,0,
                    0,0,0,0,
                    0,0,0,0,
                    0,0,0,0,
                    0,0,0,0,
                    0,0,0,0,
                    1,1,0,0,
                    0.0,
                    0,
                    0,
                    1
        )
        import numpy as np
        self.assertTrue((Board.board_features_tesauro(board, -1) ==
                         np.array(expected).reshape(1, 198)).all())
 if __name__ == '__main__':
    unittest.main()
Author	SHA1	Message	Date
alex	20788292a4	Able to eval	2018-03-20 21:51:58 +01:00
Alexander Munch-Hansen	85ec8d8e4e	Added tesauro + sigmoid	2018-03-20 17:29:29 +01:00