Updated server code.

restore restore_model
tesauro fat and diffs in values
2018-06-07 21:36:06 +02:00 · 2018-05-22 20:49:10 +02:00 · 2018-05-22 15:39:14 +02:00 · 2018-05-22 15:38:04 +02:00 · 2018-05-22 15:36:23 +02:00 · 2018-05-22 13:16:10 +00:00
25 changed files with 3035 additions and 603 deletions
--- a/.gitignore
+++ b/.gitignore
@ -169,3 +169,6 @@ venv.bak/
 README.*
 !README.org
 models/
+.DS_Store
+bench/
+
--- a/actual_board.py
+++ b/actual_board.py
@ -0,0 +1,427 @@
+# TODO: The bar is just for show at the moment. Home doesn't work either.
+# TODO: An issue with the bouncing back things. It appears to do the move and then
+#       it doesn't properly restore the buckets to where they should be.
+
+import random
+import pygame
+import threading
+from board import Board
+import numpy as np
+import time
+
+# --- constants --- (UPPER_CASE names)
+
+
+
+class Board_painter:
+
+    def __init__(self):
+        self.SCREEN_WIDTH = 1050
+        self.SCREEN_HEIGHT = 400
+
+        self.SPACING = 83.333
+
+
+        #BLACK = (  0,   0,   0)
+        #242	209	107
+        self.SAND  = (242, 209, 107)
+        self.GREEN_FILT = (0,102,0)
+        self.WHITE = (255, 255, 255)
+        self.RED   = (255,   0,   0)
+        self.SALMON = (250,128,114)
+        self.BLACK = (0,0,0)
+        self.BROWN = (160,82,45)
+        self.LIGHT_GREY = (220,220,220)
+        self.num_pieces = 15
+        self.FPS = 999
+        cen = self.SPACING/2 - 11
+        t = 5*self.SPACING - cen-22
+        m = 7*self.SPACING+50 - cen-22
+        self.STARTING_IDX_P1 = [[cen,0], [cen, 30], [cen, 60], [cen, 90], [cen,120], [self.SCREEN_WIDTH-cen-22, 0], [self.SCREEN_WIDTH-cen-22, 30], [t, 378],[t,348],[t,318],[m, 378], [m,348],[m,318],[m,288],[m,258]]
+    
+        self.STARTING_IDX_P2 = [[cen, 378], [cen, 348], [cen, 318], [cen, 288], [cen, 258], [self.SCREEN_WIDTH-cen-22, 378], [self.SCREEN_WIDTH-cen-22, 348], [t, 0], [t, 30], [t, 60], [m, 0], [m,30],[m,60],[m,90],[m,120]]
+
+        pygame.init()
+        
+        self.screen = pygame.display.set_mode((self.SCREEN_WIDTH, self.SCREEN_HEIGHT))
+        #screen_rect = screen.get_rect()
+
+        pygame.display.set_caption("Backgammon")
+
+
+
+        self.all_rects = {-1 : [], 1 : []}
+
+        for p in [-1,1]:
+            if p == -1:
+                for idx in self.STARTING_IDX_P1:
+                    self.all_rects[p] += [pygame.rect.Rect(idx[0],idx[1], 22, 22)]
+
+            if p == 1:
+                for idx in self.STARTING_IDX_P2:
+                    self.all_rects[p] += [pygame.rect.Rect(idx[0],idx[1], 22, 22)]
+        # for i in range(num_pieces):
+        #     x = x+20
+        #     all_rects[p] += [pygame.rect.Rect(x,y, 22, 22)]
+        # x = 100
+        # y += 100
+
+
+
+        self.all_drag = {-1 : [], 1 : []}
+        self.all_drag[-1] += [False]*self.num_pieces
+        self.all_drag[1] += [False]*self.num_pieces
+        
+        self.all_off = {-1 : [], 1 : []}
+        self.all_off[-1] += [[0,0]]*self.num_pieces
+        self.all_off[1] += [[0,0]]*self.num_pieces
+
+        self.is_true = False
+
+
+
+        self.clock = pygame.time.Clock()
+
+        self.buckets = [[0,0],[5,-1],[0,0],[0,0],[0,0],[3,1],[0,0],[5,1],[0,0],[0,0],[0,0],[0,0],[2,-1],[5,1],[0,0],[0,0],[0,0],[3,-1],[0,0],[5,-1],[0,0],[0,0],[0,0],[0,0],[2,1],[0,0]]
+        self.running = True
+        self.player = -1
+
+        self.roll = [random.randrange(1, 7), random.randrange(1, 7)]
+        print("initial_roll:", self.roll)
+
+        self.from_board = None
+        self.from_buckets = [x for x in self.buckets]
+        self.from_locat = None
+        self.total_moves = 0
+    
+    def switch_player(self):
+        self.player *= -1
+        print("CHANGED PLAYER!")
+
+
+    def gen_buckets_from_board(self, board):
+        meh = []
+        for i in range(13,25):
+            pin = board[i]
+            # print(pin)
+            meh.append([abs(pin), np.sign(pin)])
+        for i in range(1,13):
+            pin = board[i]
+            meh.append([abs(pin), np.sign(pin)])
+    
+        return meh
+
+
+    def gen_board_from_buckets(self, buckets):
+        board = []
+        board.append(buckets[0])
+        for i in range(-2,-14,-1):
+            board.append(buckets[i])
+        for i in range(1,13):
+            board.append(buckets[i])
+        board.append(buckets[25])
+        board = [x*y for x,y in board]
+
+        return board
+    
+    def move_legal(self, from_board, buckets, roll):
+        board = self.gen_board_from_buckets(buckets)
+        legal_states = Board.calculate_legal_states(from_board, self.player, roll)
+        # print(legal_states)
+        if board in [list(state) for state in list(legal_states)]:
+            return True
+        return False
+            
+    def find_pin(self, pos):
+        SPACING = self.SPACING
+        x,y = pos
+
+
+        if 500 < x < 550:
+            if y > 225:
+                pin = 0
+                idx = 0
+            else:
+                pin = 25
+                idx = 25
+        else:
+            x -= 50 if x > 550 else 0
+            if y < 175:
+                pin = (13 + int(x / SPACING))
+                idx = 1+int(x / SPACING)
+            elif y > 225:
+                pin = (12 - int(x / SPACING))
+                idx = 13+ int(x / SPACING)
+        return pin, idx
+
+    # Find the y position based on the chosen pin
+    def calc_pos(self, buckets, chosen):
+        amount = buckets[chosen][0]
+        print(chosen)
+        SPACING = self.SPACING
+
+        if chosen == 0:
+            x = 525
+            y = 350
+        elif chosen == 25:
+            x = 525
+            y = 50
+        else:
+            if chosen > 12:
+                # print("Amount at pin:", amount)
+                y = 378 - (30 * amount)
+                chosen -= 12
+                x = (SPACING*(chosen-1))+(SPACING/2)
+                x += 50 if x > 500 else 0
+            else:
+        
+                y = 30 * amount
+                x = (SPACING*(chosen-1))+(SPACING/2)
+                x += 50 if x > 500 else 0
+        return x,y
+
+    def calc_move_sets(self, from_board, roll, player):
+        # board = self.gen_board_from_buckets(buckets)
+        board = from_board
+        sets = []
+        total = 0
+        print("board!:",board)
+        for r in roll:
+            # print("Value of r:",r)
+            sets.append([Board.calculate_legal_states(board, player, [r,0]), r])
+            total += r
+        sets.append([Board.calculate_legal_states(board, player, [total,0]), total])
+        return sets
+        
+    def calc_turn(self):
+        player = self.player
+        if self.total_moves == 0:
+            return player * -1
+        return player
+
+    def handle_move(self, from_board, buckets, roll, player):
+        board = self.gen_board_from_buckets(buckets)
+        # print("Cur board:",board)
+        sets = self.calc_move_sets(from_board, roll, player)
+        for idx, board_set in enumerate(sets):
+            board_set[0] = list(board_set[0])
+            # print("My board_set:",board_set)
+            if board in [list(c) for c in board_set[0]]:
+                self.total_moves -= board_set[1]
+                if idx < 2:
+                    # print("Roll object:",self.roll)
+                    self.roll[idx] = 0
+                else:
+                    self.roll = [0,0]
+                break
+        print("Total moves left:",self.total_moves)
+            
+                    
+    # while running:
+
+    def paint_board(self):
+        # - events -
+        if self.player != self.calc_turn():
+            self.switch_player()
+            self.roll = [random.randrange(1, 7), random.randrange(1, 7)]
+            self.total_moves = self.roll[0] + self.roll[1]
+            print("Player:",self.player,"rolled:",self.roll)
+        
+        player = self.player
+        rectangles_drag = self.all_drag[player]
+        rectangles = self.all_rects[player]
+        offsets = self.all_off[player]
+        buckets = self.buckets
+    
+        for event in pygame.event.get():
+            if event.type == pygame.QUIT:
+                running = False
+            
+            elif event.type == pygame.MOUSEBUTTONDOWN:
+                if event.button == 1:
+                    meh = [rect.collidepoint(event.pos) for rect in rectangles]
+                    if any(meh):
+                        is_true = np.where(meh)[0][0]
+                        if any(meh):
+                            # print("GETTING CALLED")
+                            rectangles_drag[is_true] = True
+                            mouse_x, mouse_y = event.pos
+
+                            # Need this to be a deepcopy :<
+                            self.from_buckets = []
+                            for x in buckets:
+                                tmp = []
+                                for y in x:
+                                    tmp.append(y)
+                                self.from_buckets.append(tmp)
+                                
+                            self.from_board = [x for x in self.gen_board_from_buckets(buckets)]
+                            # print("From board in mousedown:", from_board)
+                            pin, idx = self.find_pin(event.pos)
+                            from_pin = pin
+                            buckets[idx][0] -= 1
+
+                            if buckets[idx][0] == 0:
+                                buckets[idx][1] = 0
+
+                            print("Location for mouse_down:", self.from_board)
+                            
+                            offsets[is_true][0] = rectangles[is_true].x - mouse_x
+                            offsets[is_true][1] = rectangles[is_true].y - mouse_y
+
+                            self.from_locat = [rectangles[is_true].x, rectangles[is_true].y]
+
+            elif event.type == pygame.MOUSEBUTTONUP:
+                if event.button == 1:
+                    meh = [rect.collidepoint(event.pos) for rect in rectangles]
+                    if any(meh):
+                        is_true = np.where(meh)[0][0]
+                    
+                        pin, idx = self.find_pin(event.pos)
+                        x, y = self.calc_pos(buckets,idx)
+
+                        # Need to take care of bar stuff :<
+                        if (buckets[idx][1] == player*-1) and buckets[idx][0] == 1:
+                            to_idx = 0 if buckets[idx][1] == 1 else 25
+                            enemy_rects = self.all_rects[player*-1]
+
+                            
+                            # Have some check if we're looking for either rects in the bottom or top,
+                            # instead of having both here
+                            neg_tester = [rect.collidepoint(x,y-30) for rect in enemy_rects]
+                            pos_tester = [rect.collidepoint(x,y+30) for rect in enemy_rects]
+                            print("Neg tester:",neg_tester)
+                            print("Pos tester:",pos_tester)
+                            if any(neg_tester):
+                                enemy = np.where(neg_tester)[0][0]
+                            elif any(pos_tester):
+                                enemy = np.where(pos_tester)[0][0]
+
+                            buckets[to_idx][0] += 1
+                            buckets[to_idx][1] = buckets[idx][1]
+
+                            bar_x, bar_y = self.calc_pos(buckets, to_idx)
+                            enemy_rects[enemy].x = bar_x
+                            enemy_rects[enemy].y = bar_y
+                                
+                            
+                            buckets[idx][0] = 0
+                            print("In here"*20)
+
+                            
+                            
+                        pin, idx = self.find_pin(event.pos)
+                        x, y = self.calc_pos(buckets,idx)
+                        buckets[idx][0] += 1
+                        buckets[idx][1] = player
+
+                    
+                        # print(self.from_board)
+                        # print("To  :",self.gen_board_from_buckets(buckets))
+                        # print(move_legal(from_board, buckets, [1,2]))
+
+
+                        
+                        # if self.move_legal(self.from_board, buckets, self.roll):
+                        pot_board = self.gen_board_from_buckets(buckets)
+                        sets = self.calc_move_sets(self.from_board, self.roll, player)
+
+                        print("potential board:",pot_board)
+                        # print("board:",pot_board)
+                        truth_values = []
+                        for t in sets:
+                            b = [list(c) for c in list(t)[0]]
+                            if pot_board in list(b):
+                                truth_values.append(pot_board in list(b))
+
+                        print("Truth values:",truth_values)
+                        if any(truth_values):
+                            self.handle_move(self.from_board, buckets, self.roll, player)
+                            # print("From:",self.gen_board_from_buckets(self.from_buckets))
+                            # print("WOHO!"*10)
+                    
+                            rectangles_drag[is_true] = False
+                            rectangles[is_true].x = x
+                            rectangles[is_true].y = y
+                        else:
+                            # print("From:",self.gen_board_from_buckets(self.from_buckets))
+
+                            self.buckets = []
+                            for x in self.from_buckets:
+                                tmp = []
+                                for y in x:
+                                    tmp.append(y)
+                                self.buckets.append(tmp)
+                            
+                            rectangles_drag[is_true] = False
+                            rectangles[is_true].x = self.from_locat[0]
+                            rectangles[is_true].y = self.from_locat[1]
+                            
+                        # print("End :",self.gen_board_from_buckets(buckets))
+                        
+
+            elif event.type == pygame.MOUSEMOTION:
+                
+                if any(rectangles_drag):
+                    is_true = np.where(rectangles_drag)[0][0]
+            
+                    mouse_x, mouse_y = event.pos
+                    rectangles[is_true].x = mouse_x + offsets[is_true][0]
+                    rectangles[is_true].y = mouse_y + offsets[is_true][1]
+
+            self.screen.fill(self.GREEN_FILT)
+            # pygame.draw.polygon(screen, (RED), [[0, 0], [50,0],[25,100]], 2)
+
+
+
+    
+            color = self.LIGHT_GREY
+            x = 0
+            y = 150
+            # for _ in range(2):
+            for i in range(12):
+                if x < 500 and x+self.SPACING > 500:
+                    x = 550
+                color = self.SALMON if color == self.LIGHT_GREY else self.LIGHT_GREY
+                pygame.draw.polygon(self.screen, color, [[x, 0], [x+self.SPACING, 0], [(2*x+self.SPACING)/2, y]])
+                x += self.SPACING
+                # y += 50
+            
+            x = 0
+            y = 250
+            # for _ in range(2):
+            color = self.SALMON if color == self.LIGHT_GREY else self.LIGHT_GREY
+            
+            for i in range(12):
+                if x < 500 and x+self.SPACING > 500:
+                    x = 550
+                color = self.SALMON if color == self.LIGHT_GREY else self.LIGHT_GREY
+                pygame.draw.polygon(self.screen, color, [[x, 400], [x+self.SPACING, 400], [(2*x+self.SPACING)/2, y]])
+                x += self.SPACING
+                
+
+            # print(gen_board_from_buckets(buckets))
+            pygame.draw.rect(self.screen, self.BROWN, pygame.rect.Rect((500, 0, 50, 400)))
+            for p in [-1,1]:
+                for rect in self.all_rects[p]:
+
+                    pygame.draw.rect(self.screen, self.RED if p == -1 else self.BLACK, rect)
+
+            pygame.display.flip()
+
+            # - constant game speed / FPS -
+            
+            self.clock.tick(self.FPS)
+
+    def test(self):
+        while True:
+            self.paint_board()
+        pygame.quit()
+    
+
+
+b = Board_painter()
+b.test()
+
+
+
--- a/app.py
+++ b/app.py
@ -0,0 +1,141 @@
+from flask import Flask, request, jsonify
+from flask_json import FlaskJSON, as_json_p
+from flask_cors import CORS
+from board import Board
+from eval import Eval
+import main
+import random
+from network import Network
+
+app = Flask(__name__)
+
+
+app.config['JSON_ADD_STATUS'] = False
+app.config['JSON_JSONP_OPTIONAL'] = False
+
+json = FlaskJSON(app)
+CORS(app)
+
+config = main.config.copy()
+config['model'] = "player_testings"
+config['ply'] = "0"
+config['board_representation'] = 'tesauro'
+network = Network(config, config['model'])
+
+network.restore_model()
+
+
+def calc_move_sets(from_board, roll, player):
+    board = from_board
+    sets = []
+    total = 0
+    for r in roll:
+        # print("Value of r:", r)
+        sets.append([Board.calculate_legal_states(board, player, [r, 0]), r])
+        total += r
+    sets.append([Board.calculate_legal_states(board, player, roll), total])
+    return sets
+
+
+def tmp_name(from_board, to_board, roll, player, total_moves, is_quad=False):
+    sets = calc_move_sets(from_board, roll, player)
+    return_board = from_board
+    print("To board:\n",to_board)
+    print("All sets:\n",sets)
+    for idx, board_set in enumerate(sets):
+        board_set[0] = list(board_set[0])
+        # print(to_board)
+        # print(board_set)
+        if to_board in board_set[0]:
+            # print("To board:", to_board)
+            # print(board_set[0])
+            # print(board_set[1])
+            total_moves -= board_set[1]
+            # if it's not the sum of the moves
+            if idx < (4 if is_quad else 2):
+                roll[idx] = 0
+            else:
+                roll = [0, 0]
+            return_board = to_board
+            break
+
+    # print("Return board!:\n",return_board)
+    return total_moves, roll, return_board
+
+def calc_move_stuff(from_board, to_board, roll, player, total_roll, is_quad):
+
+    total_moves, roll, board = tmp_name(from_board, to_board, list(roll), player, total_roll, is_quad)
+    return board, total_moves, roll
+
+
+@app.route('/get_board', methods=['GET'])
+@as_json_p
+def get_board():
+    return {'board':'0, 2, 0, 0, 0, 0, -5, 0, -3, 0, 0, 0, 5, -5, 0, 0, 0, 3, 0, 5, 0, 0, 0, 0, -2, 0'}
+
+
+
+def check_move(prev, curr):
+
+    # TODO: Decide on player system and implement roll properly
+    legal_states = Board.calculate_legal_states(tuple(prev), -1, [1,2])
+
+    truth_list = [list(curr) == list(ele) for ele in legal_states]
+
+    return any(truth_list)
+
+
+
+@app.route('/bot_move', methods=['POST'])
+def bot_move():
+    data = request.get_json(force=True)
+
+    board = [int(x) for x in data['board'].split(',')]
+    use_pubeval = bool(data['pubeval'])
+
+    roll = (random.randrange(1, 7), random.randrange(1, 7))
+
+    if use_pubeval:
+        board, value = Eval.make_pubeval_move(tuple(board), 1, roll)
+    else:
+        board, _ = network.make_move(tuple(board), roll, 1)
+
+    # print("Board!:",board)
+
+    return ",".join([str(x) for x in list(board)])
+
+
+
+@app.route('/post_board', methods=['POST'])
+def post_board():
+    data = request.get_json(force=True)
+
+    # TODO: Fix hardcoded player
+    player = -1
+
+    board = [int(x) for x in data['board'].split(',')]
+    prev_board = [int(x) for x in data['prevBoard'].split(',')]
+    print(data['roll'])
+    roll = [int(x) for x in data['roll'].split(',')]
+    print(roll)
+    quad = data['quad'] == "true"
+
+
+    # print(board)
+
+    total_roll = int(data['totalRoll'])
+    print("total roll is:", total_roll)
+    return_board, total_moves, roll = calc_move_stuff(tuple(prev_board), tuple(board), tuple(roll), player, total_roll, quad)
+
+    str_board = ",".join([str(x) for x in return_board])
+    str_roll = ",".join([str(x) for x in roll])
+
+
+    return_string = str_board + "#" + str(total_moves) + "#" + str_roll
+
+    print(return_string)
+
+    return return_string
+
+if __name__ == '__main__':
+    app.run(host = '0.0.0.0', port=35270)
--- a/bin/0-ply-tests.rb
+++ b/bin/0-ply-tests.rb
@ -0,0 +1,78 @@
+def run_stuff(board_rep, model_name, ply)
+  epi_count = 0
+  system("python3 main.py --train --model #{model_name} --board-rep #{board_rep} --episodes 1 --ply #{ply}")
+  while epi_count < 200000 do
+    system("python3 main.py --eval --model #{model_name} --eval-methods dumbeval --episodes 250 --ply #{ply} --repeat-eval 3")
+    system("python3 main.py --eval --model #{model_name} --eval-methods pubeval --episodes 250 --ply #{ply} --repeat-eval 3")
+    system("python3 main.py --train --model #{model_name} --episodes 2000 --ply #{ply}")
+    epi_count += 2000
+  end
+end
+
+
+### ///////////////////////////////////////////////////////////////
+# QUACK TESTINGS
+### ///////////////////////////////////////////////////////////////
+
+board_rep = "quack"
+model_name = "quack_test_0_ply"
+ply = 0
+
+run_stuff(board_rep, model_name, ply)
+
+
+# board_rep = "quack"
+# model_name = "quack_test_1_ply"
+# ply = 1
+
+# run_stuff(board_rep, model_name, ply)
+
+### ///////////////////////////////////////////////////////////////
+# QUACK-FAT TESTING
+### ///////////////////////////////////////////////////////////////
+
+board_rep = "quack-fat"
+model_name = "quack-fat_test_0_ply"
+ply = 0
+
+run_stuff(board_rep, model_name, ply)
+
+# board_rep = "quack-fat"
+# model_name = "quack-fat_test_1_ply"
+# ply = 1
+
+# run_stuff(board_rep, model_name, ply)
+
+### ///////////////////////////////////////////////////////////////
+# QUACK-NORM TESTING
+### ///////////////////////////////////////////////////////////////
+
+
+board_rep = "quack-norm"
+model_name = "quack-norm_test_0_ply"
+ply = 0
+
+run_stuff(board_rep, model_name, ply)
+
+# board_rep = "quack-norm"
+# model_name = "quack-norm_test_1_ply"
+# ply = 1
+
+# run_stuff(board_rep, model_name, ply)
+
+### ///////////////////////////////////////////////////////////////
+# TESAURO TESTING
+### ///////////////////////////////////////////////////////////////
+
+
+board_rep = "tesauro"
+model_name = "tesauro_test_0_ply"
+ply = 0
+
+run_stuff(board_rep, model_name, ply)
+
+# board_rep = "tesauro"
+# model_name = "tesauro_test_1_ply"
+# ply = 1
+
+# run_stuff(board_rep, model_name, ply)
--- a/bin/train-evaluate-save
+++ b/bin/train-evaluate-save
@ -0,0 +1,69 @@
+#!/usr/bin/env ruby
+MODELS_DIR = 'models'
+
+def save(model_name)
+  require 'date'
+
+  model_path = File.join(MODELS_DIR, model_name)
+
+  episode_count = (File.read File.join(model_path, 'episodes_trained')).to_i
+
+  puts "Found model #{model_name} with episodes #{episode_count} trained!"
+
+  file_name = "model-#{model_name}-#{episode_count}-#{Time.now.strftime('%Y%m%d-%H%M%S')}.tar.gz"
+  save_path = File.join(MODELS_DIR, 'saves', file_name)
+  puts "Saving to #{save_path}"
+  
+  system("tar", "-cvzf", save_path, "-C", MODELS_DIR, model_name)
+end
+
+def train(model, episodes)
+  system("python3", "main.py", "--train", "--model", model, "--episodes", episodes.to_s)
+end
+
+def force_train(model, episodes)
+  system("python3", "main.py", "--train", "--force-creation", "--model", model, "--episodes", episodes.to_s)
+end
+
+def evaluate(model, episodes, method)
+  system("python3", "main.py", "--eval" , "--model", model, "--episodes", episodes.to_s, "--eval-methods", method)
+end
+
+model = ARGV[0]
+
+if model.nil? then raise "no model specified" end
+
+if not File.exists? File.join(MODELS_DIR, model) then
+  force_train model, 10
+  save model
+  3.times do
+    evaluate model, 250, "pubeval"
+  end
+  3.times do
+    evaluate model, 250, "dumbeval"
+  end
+end
+
+# while true do
+#   save model
+#   train model, 1000
+#   save model
+#   train model, 1000
+#   3.times do
+#     evaluate model, 250, "pubeval"
+#   end
+#   3.times do
+#     evaluate model, 250, "dumbeval"
+#   end
+# end
+
+while true do
+  save model
+  train model, 500
+  5.times do
+    evaluate model, 250, "pubeval"
+  end
+  5.times do
+    evaluate model, 250, "dumbeval"
+  end
+end
--- a/board.py
+++ b/board.py
@ -1,3 +1,4 @@
+import quack
 import numpy as np
 import itertools

@ -12,15 +13,9 @@ class Board:
    
    @staticmethod
    def idxs_with_checkers_of_player(board, player):
-        idxs = []
-        for idx, checker_count in enumerate(board):
-            if checker_count * player >= 1:
-                idxs.append(idx)
-        return idxs
+        return quack.idxs_with_checkers_of_player(board, player)

    
-    # TODO: Write a test for this
-    # TODO: Make sure that the bars fit, 0 represents the -1 player and 25 represents the 1 player
    # index 26 is player 1 home, index 27 is player -1 home
    @staticmethod
    def board_features_to_pubeval(board, player):
@ -35,120 +30,157 @@ class Board:
        board.append(-15 - sum(negatives))
        return tuple(board)

-    # The original tesauro also takes in the player, so [1,0] for one of them and [0,1] for the other
-    # Not sure if this should be included
+    # quack
    @staticmethod
-    def map_to_tesauro(board):
-        features = []
-        for i in range(1,25):
-            idx = list(board)[i]
-            place = [0]*8
-            if (idx != 0):
-                if idx > 0:
-                    for i in range(min(int(idx),3)):
-                        place[i]=1.
-                    if idx>3:
-                        place[3]+=(idx-3)/2
-                else:
-                    for i in range(min(abs(int(idx)),3)):
-                        place[i+4]=1.
-                    if idx>3:
-                        place[3+4]+=(idx-3)/2    
-            features+=place
+    def board_features_quack(board, player):
+        board = list(board)
+        board += ([1, 0] if np.sign(player) > 0 else [0, 1])
+        return np.array(board).reshape(1,28)

-        nega_hits = list(board)[0]/2 
-        posi_hits = list(board)[25]/2 
+    # quack-fat
+    @staticmethod
+    def board_features_quack_fat(board, player):
+        return np.array(quack.board_features_quack_fat(board,player)).reshape(1,30)
+        # board = list(board)
+        # positives = [x if x > 0 else 0 for x in board]
+        # negatives = [x if x < 0 else 0 for x in board]
+        # board.append( 15 - sum(positives))
+        # board.append(-15 - sum(negatives))
+        # board += ([1, 0] if np.sign(player) > 0 else [0, 1])
+        # return np.array(board).reshape(1,30)
+
+    # quack-fatter
+    @staticmethod
+    def board_features_quack_norm(board, player):
+        board = list(board)
        positives = [x if x > 0 else 0 for x in board]
        negatives = [x if x < 0 else 0 for x in board]
-        posi_home = ((15 - sum(positives))/15)
-        nega_home = ((-15 - sum(negatives))/15)
-        features.append(nega_hits)
-        features.append(posi_hits)
-        features.append(posi_home)
-        features.append(nega_home)
-#        print(features)
-        return features
+        board[0] = board[0] / 2
+        board[25] = board[25] / 2
+
+        board = [board[x] if x == 0 or 25 else board[x] / 15 for x in range(0, 26)]
+
+        board.append(15 - sum(positives))
+        board.append(-15 - sum(negatives))
+        board += ([1, 0] if np.sign(player) > 0 else [0, 1])
+        return np.array(board).reshape(1, 30)
+
+    # tesauro
+    @staticmethod
+    def board_features_tesauro(board, cur_player):
+        def ordinary_trans(val, player):
+            abs_val = val * player
+            if   abs_val <= 0: return (0,0,0,0)
+            elif abs_val == 1: return (1,0,0,0)
+            elif abs_val == 2: return (1,1,0,0)
+            elif abs_val == 3: return (1,1,1,0)
+            else:              return (1,1,1, (abs_val - 3) / 2)
+
+        def bar_trans(board, player):
+            if    player == 1: return (abs(board[0]/2),)
+            elif player == -1: return (abs(board[25]/2),)
+
+        # def ordinary_trans_board(board, player):
+        #     return np.array(
+        #         [ordinary_trans(x, player) for x in board[1:25]]
+        #     ).flatten()
+
+        board_rep = []
+        for player in [1,-1]:
+            for x in board[1:25]:
+                board_rep += ordinary_trans(x, player)
+            board_rep += bar_trans(board, player)
+            board_rep += (15 - Board.num_of_checkers_for_player(board, player),)
+
+        board_rep += ([1, 0] if cur_player == 1 else [0, 1])
+
+        return np.array(board_rep).reshape(1, 198)
+
+
+    @staticmethod
+    def board_features_tesauro_fat(board, cur_player):
+        def ordinary_trans(val, player):
+            abs_val = val*player
+            if abs_val <= 0:
+                return (0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)
+            elif abs_val == 1:
+                return (1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)
+            elif abs_val == 2:
+                return (1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)
+            elif abs_val == 3:
+                return (1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)
+            elif abs_val == 4:
+                return (1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)
+            elif abs_val == 5:
+                return (1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)
+            elif abs_val == 6:
+                return (1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0)
+            elif abs_val == 7:
+                return (1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0)
+            elif abs_val == 8:
+                return (1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0)
+            elif abs_val == 9:
+                return (1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0)
+            elif abs_val == 10:
+                return (1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0)
+            elif abs_val == 11:
+                return (1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0)
+            elif abs_val == 12:
+                return (1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0)
+            elif abs_val == 13:
+                return (1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0)
+            elif abs_val == 14:
+                return (1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0)
+            elif abs_val == 15:
+                return (1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1)
+
+        def bar_trans(board, player):
+            if   player == 1: return (abs(board[0]/2),)
+            elif player == -1: return (abs(board[25]/2),)
+
+        board_rep = []
+        for player in [1, -1]:
+            for x in board[1:25]:
+                board_rep += ordinary_trans(x, player)
+            board_rep += bar_trans(board, player)
+            board_rep += (15 - Board.num_of_checkers_for_player(board, player),)
+
+        board_rep += ([1, 0] if cur_player == 1 else [0, 1])
+
+        return np.array(board_rep).reshape(1, len(board_rep))
+
+
+    @staticmethod
+    def board_features_tesauro_wrong(board, cur_player):
+        features = []
+        for player in [-1,1]:
+            sum = 0.0
+            for board_range in range(1,25):
+                pin = board[board_range]
+                #print("PIIIN:",pin)
+                feature = [0.0]*4
+                if np.sign(pin) == np.sign(player):
+                    sum += abs(pin)
+                    for i in range(min(abs(pin), 3)):
+                        feature[i] = 1
+                        if (abs(pin) > 3):
+                            feature[3] = (abs(pin)-3)/2
+                features += feature
+            #print("SUUUM:",sum)
+            # Append the amount of men on the bar of the current player divided by 2
+            features.append((board[0] if np.sign(player) < 0 else board[25]) / 2.0)
+            # Calculate how many pieces there must be in the home state and divide it by 15
+            features.append((15 - sum) / 15)
+        features += ([1,0] if np.sign(cur_player) > 0 else [0,1])
+        test = np.array(features)
+        #print("TEST:",test)
+        return test.reshape(1,198)
+


    @staticmethod
    def is_move_valid(board, player, face_value, move):
-        def sign(a):
-            return (a > 0) - (a < 0)
-
-        from_idx   = move[0]
-        to_idx     = move[1]
-        to_state   = None
-        from_state = board[from_idx]
-        delta      = to_idx - from_idx
-        direction  = sign(delta)
-        bearing_off = None
-
-        # FIXME: Use get instead of array-like indexing
-        if to_idx >= 1 and to_idx <= 24:
-            to_state   = board[to_idx]
-            bearing_off = False
-        else:  # Bearing off
-            to_state   = 0
-            bearing_off = True
-
-        # print("_"*20)
-        # print("board:", board)
-        # print("to_idx:", to_idx, "board[to_idx]:", board[to_idx], "to_state:", to_state)
-        # print("+"*20)
-        
-        def is_forward_move():
-            return direction == player
-
-        def face_value_match_move_length():
-            return abs(delta) == face_value
-
-        def bear_in_if_checker_on_bar():
-            if player == 1:
-                bar = 0
-            else:
-                bar = 25
-
-            bar_state = board[bar]
-
-            if bar_state != 0:
-                return from_idx == bar
-            else:
-                return True
-
-        def checkers_at_from_idx():
-            return sign(from_state) == player
-
-        def no_block_at_to_idx():
-            if -sign(to_state) == player:
-                return abs(to_state) == 1
-            else:
-                return True
-
-        def can_bear_off():
-            checker_idxs = Board.idxs_with_checkers_of_player(board, player)
-            def is_moving_backmost_checker():
-                if player == 1:
-                    return all([(idx >= from_idx) for idx in checker_idxs])
-                else:
-                    return all([(idx <= from_idx) for idx in checker_idxs])
-
-            def all_checkers_in_last_quadrant():
-                if player == 1:
-                    return all([(idx >= 19) for idx in checker_idxs])
-                else:
-                    return all([(idx <= 6) for idx in checker_idxs])
-
-            return all([ is_moving_backmost_checker(),
-                         all_checkers_in_last_quadrant() ])
-                
-            # TODO: add switch here instead of wonky ternary in all        
-            
-        return all([ is_forward_move(),
-                     face_value_match_move_length(),
-                     bear_in_if_checker_on_bar(),
-                     checkers_at_from_idx(),
-                     no_block_at_to_idx(),
-                     can_bear_off() if bearing_off else True ])
+        return quack.is_move_valid(board, player, face_value, move)

    @staticmethod
    def any_move_valid(board, player, roll):
@ -188,40 +220,37 @@ class Board:


    @staticmethod
-    def apply_moves_to_board(board, player, moves):
-        for move in moves:
-            from_idx, to_idx = move.split("/")
-            board[int(from_idx)] -= int(player)
-            board[int(to_idx)] += int(player)
-        return board
+    def apply_moves_to_board(board, player, move):
+        from_idx = move[0]
+        to_idx = move[1]
+        board = list(board)
+        board[from_idx] -= player
+
+        if (to_idx < 1 or to_idx > 24):
+            return
+
+        if (board[to_idx] * player == -1):
+
+            if (player == 1):
+                board[25] -= player
+            else:
+                board[0] -= player
+
+            board[to_idx] = 0
+
+        board[to_idx] += player
+
+        return tuple(board)

    @staticmethod
    def calculate_legal_states(board, player, roll):
        # Find all points with checkers on them belonging to the player
        # Iterate through each index and check if it's a possible move given the roll

-        # TODO: make sure that it is not possible to do nothing on first part of
-        #       turn and then do something with the second die
-        
        def calc_moves(board, face_value):
-            idxs_with_checkers = Board.idxs_with_checkers_of_player(board, player)
-            if len(idxs_with_checkers) == 0:
+            if face_value == 0:
                return [board]
-            boards = [(Board.do_move(board,
-                               player,
-                               (idx, idx + (face_value * player)))
-                       if Board.is_move_valid(board,
-                                              player,
-                                              face_value,
-                                              (idx, idx + (face_value * player)))
-                       else None)
-                      for idx in idxs_with_checkers]
-
-            board_list = list(filter(None, boards))  # Remove None-values
-            # if len(board_list) == 0:
-            #     return [board]
-            
-            return board_list
+            return quack.calc_moves(board, player, face_value)

        # Problem with cal_moves: Method can return empty list (should always contain at least same board).
        #               *Update*: Seems to be fixed.
@ -234,26 +263,18 @@ class Board:
        
        if not Board.any_move_valid(board, player, roll):
            return { board }
-        
        dice_permutations = list(itertools.permutations(roll)) if roll[0] != roll[1] else [[roll[0]]*4]
-        
+        #print("Permuts:",dice_permutations)
+        # print("Dice permuts:",dice_permutations)
        for roll in dice_permutations:
            # Calculate boards resulting from first move
-            #print("initial board: ", board)
-            #print("roll:", roll)
            boards = calc_moves(board, roll[0])
-            #print("boards after first die: ", boards)

            for die in roll[1:]:
                # Calculate boards resulting from second move
                nested_boards = [calc_moves(board, die) for board in boards]
-                #print("nested boards: ", nested_boards)
                boards = [board for boards in nested_boards for board in boards]

-                # What the fuck
-                #for board in boards:
-                #    print(board)
-                #    print("type__:",type(board))
                # Add resulting unique boards to set of legal boards resulting from roll

                #print("printing boards from calculate_legal_states: ", boards)
@ -282,9 +303,9 @@ class Board:
        return """
  13  14  15  16  17  18               19  20  21  22  23  24
 +--------------------------------------------------------------------------+
-| {12}| {11}| {10}| {9}| {8}| {7}| bar -1: {0} | {6}| {5}| {4}| {3}| {2}| {1}| end -1: TODO|
+| {13}| {14}| {15}| {16}| {17}| {18}| bar -1: {25} | {19}| {20}| {21}| {22}| {23}| {24}| end  1: TODO|
 |---|---|---|---|---|---|------------|---|---|---|---|---|---|             |
-| {13}| {14}| {15}| {16}| {17}| {18}| bar  1: {25} | {19}| {20}| {21}| {22}| {23}| {24}| end  1: TODO|
+| {12}| {11}| {10}| {9}| {8}| {7}| bar  1: {0} | {6}| {5}| {4}| {3}| {2}| {1}| end -1: TODO|
 +--------------------------------------------------------------------------+
  12  11  10   9   8   7                6   5   4   3   2   1 
 """.format(*temp)
@ -292,42 +313,8 @@ class Board:
    @staticmethod
    def do_move(board, player, move):
        # Implies that move is valid; make sure to check move validity before calling do_move(...)
+        return quack.do_move(board, player, move)

-        def move_to_bar(board, to_idx):
-            board = list(board)
-            if player == 1:
-                board[25] -= player
-            else:
-                board[0] -= player
-                
-            board[to_idx] = 0
-            return board
-
-        # TODO: Moving in from bar is handled by the representation
-        # TODONE: Handle bearing off
-
-        from_idx = move[0]
-        #print("from_idx: ", from_idx)
-        to_idx = move[1]
-        #print("to_idx: ", to_idx)
-        # pdb.set_trace()
-        board = list(board) # Make mutable copy of board
-
-        # 'Lift' checker
-        board[from_idx] -= player
-
-        # Handle bearing off
-        if to_idx < 1 or to_idx > 24:
-            return tuple(board)
-        
-        # Handle hitting checkers
-        if board[to_idx] * player == -1:
-            board = move_to_bar(board, to_idx)
-
-        # Put down checker
-        board[to_idx] += player
-
-        return tuple(board)

    @staticmethod
    def flip(board):
--- a/bot.py
+++ b/bot.py
@ -1,24 +1,8 @@
-from cup import Cup
-from network import Network
 from board import Board

-import tensorflow as tf
-import numpy as np
-import random
-
 class Bot:
-    def __init__(self, sym, config = None, name = "unnamed"):
-        self.config = config
-        self.cup = Cup()
+    def __init__(self, sym):
        self.sym = sym
-        self.graph = tf.Graph()
-
-        self.network = Network(config, name)
-        self.network.restore_model()
-
-    def restore_model(self):
-        with self.graph.as_default():
-            self.network.restore_model()

    def get_session(self):
        return self.session
@ -26,16 +10,60 @@ class Bot:
    def get_sym(self):
        return self.sym

-    def get_network(self):
-        return self.network

-    # TODO: DEPRECATE
-    def make_move(self, board, sym, roll):
-        # print(Board.pretty(board))
-        legal_moves = Board.calculate_legal_states(board, sym, roll)
-        moves_and_scores = [ (move, self.network.eval_state(np.array(move).reshape(1,26))) for move in legal_moves ]
-        scores = [ x[1] for x in moves_and_scores ]
-        best_move_pair = moves_and_scores[np.array(scores).argmax()]
-        #print("Found the best state, being:", np.array(move_scores).argmax())
-        return best_move_pair
+    def calc_move_sets(self, from_board, roll, player):
+        board = from_board
+        sets = []
+        total = 0
+        print("board!:",board)
+        for r in roll:
+            # print("Value of r:",r)
+            sets.append([Board.calculate_legal_states(board, player, [r,0]), r])
+            total += r
+        sets.append([Board.calculate_legal_states(board, player, [total,0]), total])
+        return sets
+
+
+    def handle_move(self, from_board, to_board, roll, player):
+
+        # print("Cur board:",board)
+        sets = self.calc_move_sets(from_board, roll, player)
+        for idx, board_set in enumerate(sets):
+            board_set[0] = list(board_set[0])
+            # print("My board_set:",board_set)
+            if to_board in [list(c) for c in board_set[0]]:
+                self.total_moves -= board_set[1]
+                if idx < 2:
+                    # print("Roll object:",self.roll)
+                    self.roll[idx] = 0
+                else:
+                    self.roll = [0,0]
+                break
+        print("Total moves left:",self.total_moves)
+
+
+    def tmp_name(self, from_board, to_board, roll, player, total_moves):
+        sets = self.calc_move_sets(from_board, roll, player)
+        return_board = from_board
+        for idx, board_set in enumerate(sets):
+            board_set = list(board_set[0])
+            if to_board in [list(board) for board in board_set]:
+                total_moves -= board_set[1]
+                # if it's not the sum of the moves
+                if idx < 2:
+                    roll[idx] = 0
+                else:
+                    roll = [0,0]
+                return_board = to_board
+                break
+        return total_moves, roll, return_board
+
+    def make_human_move(self, board, player, roll):
+        total_moves = roll[0] + roll[1]
+        previous_board = board
+        while total_moves != 0:
+            move = input("Pick a move!\n")
+            to_board = Board.apply_moves_to_board(previous_board, player, move)
+            total_moves, roll, board = self.tmp_name(board, to_board, roll, player, total_moves)
+
        
--- a/dumbeval/.gitignore
+++ b/dumbeval/.gitignore
@ -0,0 +1 @@
+build/
--- a/dumbeval/dumbeval.c
+++ b/dumbeval/dumbeval.c
@ -0,0 +1,194 @@
+#include <Python.h>
+
+static PyObject* DumbevalError;
+
+static float x[122];
+
+
+/* With apologies to Gerry Tesauro */
+
+/* Weights generated by weights.py */
+static const float wc[122] = {
+-1.91222,  1.45979,  0.40657, -1.39159,  3.64558, -0.45381, -0.03157,
+  0.14539,  0.80232,  0.87558,  2.36202, -2.01887, -0.88918,  2.65871,
+ -1.31587,  1.07476,  0.30491, -1.32892,  0.38018, -0.30714, -1.16178,
+  0.71481, -1.01334, -0.44373,  0.51255, -0.17171, -0.88886,  0.02071,
+ -0.53279, -0.22139, -1.02436,  0.17948,  0.95697,  0.49272,  0.31848,
+ -0.58293,  0.14484,  0.22063,  1.0336 , -1.90554,  1.10291, -2.05589,
+ -0.16964, -0.82442,  1.27217, -1.24968, -0.90372,  0.05546,  0.2535 ,
+ -0.03533, -0.31773,  0.43704,  0.21699,  0.10519,  2.12775, -0.48196,
+ -0.08445, -0.13156, -0.68362,  0.64765,  0.32537,  0.79493,  1.94577,
+ -0.63827,  0.97057, -0.46039,  1.51801, -0.62955, -0.43632,  0.25876,
+ -0.46623, -0.46963,  1.3532 , -0.07362, -1.53211,  0.69676, -0.92407,
+  0.07153,  0.67173,  0.27661, -0.51579, -0.49019,  1.06603, -0.97673,
+ -1.21231, -1.54966, -0.07795,  0.32697,  0.02873,  1.38703,  0.41725,
+  0.78326, -0.7257 ,  0.54165,  1.38882,  0.27304,  1.0739 ,  0.74654,
+  1.35561,  1.18697,  1.09146,  0.17552, -0.30773,  0.27812, -1.674  ,
+ -0.31073, -0.40745,  0.51546, -1.10875,  2.0081 , -1.27931, -1.16321,
+  0.95652,  0.7487 , -0.2347 ,  0.20324, -0.41417,  0.05929,  0.72632,
+ -1.15223,  1.2745 , -0.15947 };
+
+static const float wr[122] = {
+ 0.13119, -0.13164, -1.2736 ,  1.06352, -1.34749, -1.03086, -0.27417,
+ -0.27762,  0.79454, -1.12623,  2.1134 , -0.7003 ,  0.26056, -1.13518,
+ -1.64548, -1.30828, -0.96589, -0.36258, -1.14323, -0.2006 , -1.00307,
+  0.57739, -0.62693,  0.29721, -0.36996, -0.17462,  0.96704,  0.08902,
+  1.4337 , -0.47107,  0.82156,  0.14988,  1.74034,  1.13313, -0.32083,
+ -0.00048, -0.86622,  1.12808,  0.99875,  0.8049 , -0.16841, -0.42677,
+ -1.9409 , -0.53565, -0.83708,  0.69603,  0.32079,  0.56942,  0.67965,
+  1.49328, -1.65885,  0.96284,  0.63196, -0.27504,  0.39174,  0.71225,
+ -0.3614 ,  0.88761,  1.12882,  0.77764,  1.02618, -0.20245, -0.39245,
+ -1.56799,  1.04888, -1.20858, -0.24361, -1.85157, -0.16912,  0.50512,
+ -2.93122,  0.70477, -0.93066,  1.74867,  0.23963, -0.00699, -1.27183,
+ -0.30604,  1.71039,  0.82202, -1.36734, -1.08352, -1.25054,  0.49436,
+ -1.5037 , -0.73143,  0.74189,  0.32365,  0.30539, -0.72169,  0.41088,
+ -1.56632, -0.63526,  0.58779, -0.05653,  0.76713, -1.40898, -0.33683,
+  1.86802,  0.59773,  1.28668, -0.65817,  2.46829, -0.09331,  2.9034 ,
+  1.04809,  0.73222, -0.44372,  0.53044, -1.9274 , -1.57183, -1.14068,
+  1.26036, -0.9296 ,  0.06662, -0.26572, -0.30862,  0.72915,  0.98977,
+  0.63513, -1.43917, -0.12523 };
+
+void setx(int pos[])
+{
+        /* sets input vector x[] given board position pos[] */
+        extern float x[];
+        int j, jm1, n;
+        /* initialize */
+        for(j=0;j<122;++j) x[j] = 0.0;
+
+        /* first encode board locations 24-1 */
+        for(j=1;j<=24;++j) {
+            jm1 = j - 1;
+            n = pos[25-j];
+            if(n!=0) {
+                if(n==-1) x[5*jm1+0] = 1.0;
+                if(n==1) x[5*jm1+1] = 1.0;
+                if(n>=2) x[5*jm1+2] = 1.0;
+                if(n==3) x[5*jm1+3] = 1.0;
+                if(n>=4) x[5*jm1+4] = (float)(n-3)/2.0;
+            }
+        }
+        /* encode opponent barmen */
+        x[120] = -(float)(pos[0])/2.0;
+        /* encode computer's menoff */
+        x[121] = (float)(pos[26])/15.0;
+}
+
+float dumbeval(int race, int pos[])
+{
+        /* Backgammon move-selection evaluation function
+           for benchmark comparisons.  Computes a linear
+           evaluation function:  Score = W * X, where X is
+           an input vector encoding the board state (using
+           a raw encoding of the number of men at each location),
+           and W is a weight vector.  Separate weight vectors
+           are used for racing positions and contact positions.
+           Makes lots of obvious mistakes, but provides a
+           decent level of play for benchmarking purposes. */
+
+        /* Provided as a public service to the backgammon
+           programming community by Gerry Tesauro, IBM Research.
+           (e-mail: tesauro@watson.ibm.com)                     */
+
+        /* The following inputs are needed for this routine:
+
+           race   is an integer variable which should be set
+           based on the INITIAL position BEFORE the move.
+           Set race=1 if the position is a race (i.e. no contact)
+           and 0 if the position is a contact position.
+
+           pos[]  is an integer array of dimension 28 which
+           should represent a legal final board state after
+           the move. Elements 1-24 correspond to board locations
+           1-24 from computer's point of view, i.e. computer's
+           men move in the negative direction from 24 to 1, and
+           opponent's men move in the positive direction from
+           1 to 24. Computer's men are represented by positive
+           integers, and opponent's men are represented by negative
+           integers. Element 25 represents computer's men on the
+           bar (positive integer), and element 0 represents opponent's
+           men on the bar (negative integer). Element 26 represents
+           computer's men off the board (positive integer), and
+           element 27 represents opponent's men off the board
+           (negative integer).                                  */
+
+        /* Also, be sure to call rdwts() at the start of your
+           program to read in the weight values. Happy hacking] */
+
+        int i;
+        float score;
+
+        if(pos[26]==15) return(99999999.);
+        /* all men off, best possible move */
+
+        setx(pos); /* sets input array x[] */
+        score = 0.0;
+        if(race) {  /* use race weights */
+            for(i=0;i<122;++i) score += wr[i]*x[i];
+        }
+        else {  /* use contact weights */
+            for(i=0;i<122;++i) score += wc[i]*x[i];
+        }
+        return(score);
+}
+
+static PyObject*
+dumbeval_eval(PyObject *self, PyObject *args) {
+  int race;
+  long numValues;
+  int board[28];
+  float eval_score;
+
+  PyObject* tuple_obj;
+  PyObject* val_obj;
+
+  if (! PyArg_ParseTuple(args, "pO!", &race, &PyTuple_Type, &tuple_obj))
+    return NULL;
+
+  numValues = PyTuple_Size(tuple_obj);
+
+  if (numValues < 0) return NULL;
+  if (numValues != 28) {
+    PyErr_SetString(DumbevalError, "Tuple must have 28 entries");
+    return NULL;
+  }
+
+  // Iterate over tuple to retreive positions
+  for (int i=0; i<numValues; i++) {
+    val_obj = PyTuple_GetItem(tuple_obj, i);
+    board[i] = PyLong_AsLong(val_obj);
+  }
+
+  eval_score = dumbeval(race, board);
+  return Py_BuildValue("f", eval_score);
+}
+
+static PyMethodDef dumbeval_methods[] = {
+  {
+    "eval", dumbeval_eval, METH_VARARGS,
+    "Returns evaluation results for the given board position."
+  },
+  {NULL, NULL, 0, NULL}
+};
+
+static struct PyModuleDef dumbeval_definition = {
+  PyModuleDef_HEAD_INIT,
+  "dumbeval",
+  "A Python module that implements Gerald Tesauro's pubeval function for evaluation backgammon positions with badly initialized weights.",
+  -1,
+  dumbeval_methods
+};
+
+PyMODINIT_FUNC PyInit_dumbeval(void) {
+  PyObject* module;
+
+  module = PyModule_Create(&dumbeval_definition);
+  if (module == NULL)
+    return NULL;
+
+  DumbevalError = PyErr_NewException("dumbeval.error", NULL, NULL);
+  Py_INCREF(DumbevalError);
+  PyModule_AddObject(module, "error", DumbevalError);
+
+  return module;
+}
--- a/dumbeval/setup.py
+++ b/dumbeval/setup.py
@ -0,0 +1,9 @@
+from distutils.core import setup, Extension
+
+dumbeval = Extension('dumbeval',
+                    sources = ['dumbeval.c'])
+
+setup (name = 'dumbeval',
+       version = '0.1',
+       description = 'Dumbeval for Python',
+       ext_modules = [dumbeval])
--- a/dumbeval/weights.py
+++ b/dumbeval/weights.py
@ -0,0 +1,14 @@
+#!/usr/bin/env python3
+import numpy as np
+import re
+
+re.DOTALL = True
+
+np.set_printoptions(precision=5, suppress=True, threshold=np.nan)
+def random_array_string():
+    return re.sub(r'^\[(.*)\]$(?s)', r'{\n\1 };', np.array2string(np.random.normal(0,1,122), separator=', '))
+
+print("/* Weights generated by weights.py */")
+print("static const float wc[122] =", random_array_string())
+print()
+print("static const float wr[122] =", random_array_string())
--- a/eval.py
+++ b/eval.py
@ -2,6 +2,7 @@ from board import Board

 import numpy as np
 import pubeval
+import dumbeval


 class Eval:
@ -15,8 +16,6 @@ class Eval:
    @staticmethod
    def make_pubeval_move(board, sym, roll):
        legal_moves = Board.calculate_legal_states(board, sym, roll)
-       # print("Board:", board)
-       # print("Length:",len(board))
        moves_and_scores = [ ( board,
                               pubeval.eval(False, Board.board_features_to_pubeval(board, sym)))
                             for board
@ -26,4 +25,16 @@ class Eval:

        return best_move_pair

+    @staticmethod
+    def make_dumbeval_move(board, sym, roll):
+        legal_moves = Board.calculate_legal_states(board, sym, roll)
+        moves_and_scores = [ ( board,
+                               dumbeval.eval(False, Board.board_features_to_pubeval(board, sym)))
+                             for board
+                             in legal_moves ]
+        scores = [ x[1] for x in moves_and_scores ]
+        best_move_pair = moves_and_scores[np.array(scores).argmax()]
+
+        return best_move_pair
+
    
--- a/game.py
+++ b/game.py
@ -23,18 +23,21 @@ class Game:
        
    def roll(self):
        return self.cup.roll()
-
+    '''
    def best_move_and_score(self):
        roll = self.roll()
        move_and_val = self.p1.make_move(self.board, self.p1.get_sym(), roll)
        self.board = move_and_val[0]
        return move_and_val
+    '''

+    '''
    def next_round(self):
        roll = self.roll()
        #print(roll)
        self.board = Board.flip(self.p2.make_move(Board.flip(self.board), self.p2.get_sym(), roll)[0])
        return self.board
+    '''

    def board_state(self):
        return self.board
--- a/main.py
+++ b/main.py
@ -2,38 +2,7 @@ import argparse
 import sys
 import os
 import time
-
-model_storage_path = 'models'
-
-# Create models folder
-if not os.path.exists(model_storage_path):
-    os.makedirs(model_storage_path)
-
-# Define helper functions
-def log_train_outcome(outcome, trained_eps = 0):
-    format_vars = { 'trained_eps': trained_eps,
-                    'count': len(train_outcome),
-                    'sum': sum(train_outcome),
-                    'mean': sum(train_outcome) / len(train_outcome),
-                    'time': int(time.time())
-    }
-    with open(os.path.join(config['model_path'], 'logs', "train.log"), 'a+') as f:
-        f.write("{time};{trained_eps};{count};{sum};{mean}".format(**format_vars) + "\n")
-    
-
-def log_eval_outcomes(outcomes, trained_eps = 0):
-    for outcome in outcomes:
-        scores = outcome[1]
-        format_vars = { 'trained_eps': trained_eps,
-                        'method': outcome[0],
-                        'count': len(scores),
-                        'sum': sum(scores),
-                        'mean': sum(scores) / len(scores),
-                        'time': int(time.time())
-        }
-        with open(os.path.join(config['model_path'], 'logs', "eval.log"), 'a+') as f:
-            f.write("{time};{method};{trained_eps};{count};{sum};{mean}".format(**format_vars) + "\n")
-
+import subprocess

 # Parse command line arguments
 parser = argparse.ArgumentParser(description="Backgammon games")
@ -47,13 +16,15 @@ parser.add_argument('--eval-methods', action='store',
                    default=['random'], nargs='*',
                    help='specifies evaluation methods')
 parser.add_argument('--eval', action='store_true',
-                    help='whether to evaluate the neural network with a random choice bot')
+                    help='evaluate the neural network with a random choice bot')
+parser.add_argument('--bench-eval-scores', action='store_true',
+                    help='benchmark scores of evaluation measures. episode counts and model specified as options are ignored.')
 parser.add_argument('--train', action='store_true',
-                    help='whether to train the neural network')
+                    help='train the neural network')
 parser.add_argument('--eval-after-train', action='store_true', dest='eval_after_train',
-                    help='whether to evaluate after each training session')
+                    help='evaluate after each training session')
 parser.add_argument('--play', action='store_true',
-                    help='whether to play with the neural network')
+                    help='play with the neural network')
 parser.add_argument('--start-episode', action='store', dest='start_episode',
                    type=int, default=0,
                    help='episode count to start at; purely for display purposes')
@ -61,31 +32,124 @@ parser.add_argument('--train-perpetually', action='store_true',
                    help='start new training session as soon as the previous is finished')
 parser.add_argument('--list-models', action='store_true',
                    help='list all known models')
+parser.add_argument('--board-rep', action='store', dest='board_rep',
+                    help='name of board representation to use as input to neural network')
+parser.add_argument('--verbose', action='store_true',
+                    help='If set, a lot of stuff will be printed')
+parser.add_argument('--ply', action='store', dest='ply', default='0',
+                    help='defines the amount of ply used when deciding what move to make')
+parser.add_argument('--repeat-eval', action='store', dest='repeat_eval', default='1',
+                    help='the amount of times the evaluation method should be repeated')

 args = parser.parse_args()

+
 config = {
    'model': args.model,
-    'model_path': os.path.join(model_storage_path, args.model),
    'episode_count': args.episode_count,
    'eval_methods': args.eval_methods,
    'train': args.train,
    'play': args.play,
    'eval': args.eval,
+    'bench_eval_scores': args.bench_eval_scores,
    'eval_after_train': args.eval_after_train,
    'start_episode': args.start_episode,
    'train_perpetually': args.train_perpetually,
-    'model_storage_path': model_storage_path
+    'model_storage_path': 'models',
+    'bench_storage_path': 'bench',
+    'board_representation': args.board_rep,
+    'global_step': 0,
+    'verbose': args.verbose,
+    'ply': args.ply,
+    'repeat_eval': args.repeat_eval
 }

+
+# Create models folder
+if not os.path.exists(config['model_storage_path']):
+    os.makedirs(config['model_storage_path'])
+
+model_path = lambda: os.path.join(config['model_storage_path'], config['model'])
+
 # Make sure directories exist
-model_path = os.path.join(config['model_path'])
-log_path   = os.path.join(model_path, 'logs')
-if not os.path.isdir(model_path):
-    os.mkdir(model_path)
+log_path = os.path.join(model_path(), 'logs')
+if not os.path.isdir(model_path()):
+    os.mkdir(model_path())
 if not os.path.isdir(log_path):
    os.mkdir(log_path)

+# Define helper functions
+def log_train_outcome(outcome, diff_in_values, trained_eps = 0, log_path = os.path.join(model_path(), 'logs', "train.log")):
+    commit = subprocess.run(['git', 'describe', '--first-parent', '--always'], stdout=subprocess.PIPE).stdout.decode('utf-8').rstrip()
+    format_vars = { 'trained_eps': trained_eps,
+                    'count': len(outcome),
+                    'sum': sum(outcome),
+                    'mean': sum(outcome) / len(outcome),
+                    'time': int(time.time()),
+                    'average_diff_in_vals': diff_in_values,
+                    'commit': commit
+    }
+
+    with open(log_path, 'a+') as f:
+        f.write("{time};{trained_eps};{count};{sum};{mean};{average_diff_in_vals};{commit}".format(**format_vars) + "\n")
+    
+
+def log_eval_outcomes(outcomes, trained_eps = 0, log_path = os.path.join(model_path(), 'logs', "eval.log")):
+    """
+    :param outcomes:
+    :param average_diff_in_value:
+    :param trained_eps:
+    :param log_path:
+    :return:
+    """
+    commit = subprocess.run(['git', 'describe', '--first-parent', '--always'], stdout=subprocess.PIPE).stdout.decode('utf-8').rstrip()
+    
+    for outcome in outcomes:
+        scores = outcome[1]
+        format_vars = { 'commit': commit,
+                        'trained_eps': trained_eps,
+                        'method': outcome[0],
+                        'count': len(scores),
+                        'sum': sum(scores),
+                        'mean': sum(scores) / len(scores),
+                        'time': int(time.time())
+        }
+        with open(log_path, 'a+') as f:
+            f.write("{time};{method};{trained_eps};{count};{sum};{mean};{commit}".format(**format_vars) + "\n")
+
+def log_bench_eval_outcomes(outcomes, log_path, index, time, trained_eps = 0):
+    commit = subprocess.run(['git', 'describe', '--first-parent', '--always'], stdout=subprocess.PIPE).stdout.decode('utf-8').rstrip()
+    for outcome in outcomes:
+        scores = outcome[1]
+        format_vars = { 'trained_eps': trained_eps,
+                        'method': outcome[0],
+                        'count': len(scores),
+                        'sum': sum(scores),
+                        'mean': sum(scores) / len(scores),
+                        'time': time,
+                        'index': index,
+                        'commit': commit
+        }
+        with open(log_path, 'a+') as f:
+            f.write("{method};{count};{index};{time};{sum};{mean};{commit}".format(**format_vars) + "\n")
+
+def find_board_rep():
+    checkpoint_path = os.path.join(config['model_storage_path'], config['model'])
+    board_rep_path = os.path.join(checkpoint_path, "board_representation")
+    with open(board_rep_path, 'r') as f:
+        return f.read()
+
+
+def board_rep_file_exists():
+    checkpoint_path = os.path.join(config['model_storage_path'], config['model'])
+    board_rep_path = os.path.join(checkpoint_path, "board_representation")
+    return os.path.isfile(board_rep_path)
+
+def create_board_rep():
+    checkpoint_path = os.path.join(config['model_storage_path'], config['model'])
+    board_rep_path = os.path.join(checkpoint_path, "board_representation")
+    with open(board_rep_path, 'a+') as f:
+        f.write(config['board_representation'])

 # Do actions specified by command-line
 if args.list_models:
@ -94,7 +158,7 @@ if args.list_models:
            return int(f.read())
    model_folders = [ f.path
                      for f
-                      in os.scandir(model_storage_path)
+                      in os.scandir(config['model_storage_path'])
                      if f.is_dir() ]
    models = [ (folder, get_eps_trained(folder)) for folder in model_folders ]
    sys.stderr.write("Found {} model(s)\n".format(len(models)))
@ -103,28 +167,98 @@ if args.list_models:

    exit()

+if __name__ == "__main__":
    # Set up network
    from network import Network
-network = Network(config, config['model'])
-eps = config['start_episode']

    # Set up variables
    episode_count = config['episode_count']

+    if config['board_representation'] is None:
+        if board_rep_file_exists():
+            config['board_representation'] = find_board_rep()
+        else:
+            sys.stderr.write("Was not given a board_rep and was unable to find a board_rep file\n")
+            exit()
+    else:
+        if not board_rep_file_exists():
+            create_board_rep()
+        else:
+            if config['board_representation'] != find_board_rep():
+                sys.stderr.write("Board representation \"{given}\", does not match one in board_rep file, \"{board_rep}\"\n".
+                                 format(given = config['board_representation'], board_rep = find_board_rep()))
+                exit()
+
+                  
    if args.train:
+        network = Network(config, config['model'])
+        start_episode = network.episodes_trained
        while True:
-        train_outcome = network.train_model(episodes = episode_count, trained_eps = eps)
-        eps += episode_count
-        log_train_outcome(train_outcome, trained_eps = eps)
+            train_outcome, diff_in_values = network.train_model(episodes = episode_count, trained_eps = start_episode)
+            start_episode += episode_count
+            log_train_outcome(train_outcome, diff_in_values, trained_eps = start_episode)
            if config['eval_after_train']:
-            eval_outcomes = network.eval(trained_eps = eps)
-            log_eval_outcomes(eval_outcomes, trained_eps = eps)
+                eval_outcomes = network.eval(trained_eps = start_episode)
+                log_eval_outcomes(eval_outcomes, trained_eps = start_episode)
            if not config['train_perpetually']:
                break
+
+    elif args.play:
+        network = Network(config, config['model'])
+        network.play_against_network()
+
    elif args.eval:
-    eps = config['start_episode']
-    outcomes = network.eval()
-    log_eval_outcomes(outcomes, trained_eps = eps)
+        network = Network(config, config['model'])
+        network.restore_model()
+
+        for i in range(int(config['repeat_eval'])):
+            start_episode = network.episodes_trained
+            # Evaluation measures are described in `config`
+            outcomes = network.eval(config['episode_count'])
+            log_eval_outcomes(outcomes, trained_eps = start_episode)
            # elif args.play:
            # g.play(episodes = episode_count)

+        
+    elif args.bench_eval_scores:
+        # Make sure benchmark directory exists
+        if not os.path.isdir(config['bench_storage_path']):
+            os.mkdir(config['bench_storage_path'])
+
+        config = config.copy()
+        config['model'] = 'bench'
+        
+        network = Network(config, config['model'])
+        start_episode = network.episodes_trained
+
+        if start_episode == 0:
+            print("Model not trained! Beware of using non-existing models!")
+            exit()
+        
+        sample_count = 20
+        episode_counts = [25, 50, 100, 250, 500, 1000, 2500, 5000,
+                          10000, 20000]
+
+        def do_eval():
+            for eval_method in config['eval_methods']:
+                result_path = os.path.join(config['bench_storage_path'],
+                                           eval_method) + "-{}.log".format(int(time.time()))
+                for n in episode_counts:
+                    for i in range(sample_count):
+                        start_time = time.time()
+                        # Evaluation measure to be benchmarked are described in `config`
+                        outcomes = network.eval(episode_count = n)
+                        time_diff = time.time() - start_time
+                        log_bench_eval_outcomes(outcomes,
+                                                time = time_diff,
+                                                index = i,
+                                                trained_eps = start_episode,
+                                                log_path = result_path)
+
+        # CMM: oh no
+        import tensorflow as tf
+
+        network.restore_model()
+        do_eval()
+        
+        
--- a/network.py
+++ b/network.py
@ -1,5 +1,4 @@
 import tensorflow as tf
-from cup import Cup
 import numpy as np
 from board import Board
 import os
@ -7,132 +6,188 @@ import time
 import sys
 import random
 from eval import Eval
+import glob
+from operator import itemgetter
+import tensorflow.contrib.eager as tfe
+from player import Player

 class Network:
-    hidden_size = 40
-    input_size = 196
-    output_size = 1
-    # Can't remember the best learning_rate, look this up
-    learning_rate = 0.1
+    # board_features_quack has size 28
+    # board_features_quack_fat has size 30
+    # board_features_tesauro has size 198

-    # TODO: Actually compile tensorflow properly
-    #os.environ["TF_CPP_MIN_LOG_LEVEL"]="2"
+    board_reps = {
+        'quack-fat'   : (30, Board.board_features_quack_fat),
+        'quack'       : (28, Board.board_features_quack),
+        'tesauro'     : (198, Board.board_features_tesauro),
+        'quack-norm'  : (30, Board.board_features_quack_norm),
+        'tesauro-fat' : (726, Board.board_features_tesauro_fat),
+        'tesauro-poop': (198, Board.board_features_tesauro_wrong)
+    }

    def custom_tanh(self, x, name=None):
        return tf.scalar_mul(tf.constant(2.00), tf.tanh(x, name))

    def __init__(self, config, name):
-        self.config = config
-        self.session = tf.Session()
-        self.checkpoint_path = config['model_path']
-        self.name = name
+        """
+        :param config:
+        :param name:
+        """

-        # input = x
-        self.x = tf.placeholder('float', [1, Network.input_size], name='x')
-        self.value_next = tf.placeholder('float', [1, Network.output_size], name="value_next")
+        move_options = {
+            '1': self.make_move_1_ply,
+            '0': self.make_move_0_ply
+        }
+
+        self.max_or_min = {
+            1: np.argmax,
+            -1: np.argmin
+        }
+
+        tf.enable_eager_execution()

        xavier_init = tf.contrib.layers.xavier_initializer()

-        W_1 = tf.get_variable("w_1", (Network.input_size, Network.hidden_size),
-                              initializer=xavier_init)
-        W_2 = tf.get_variable("w_2", (Network.hidden_size, Network.output_size),
-                              initializer=xavier_init)
+        self.config = config
+        self.checkpoint_path = os.path.join(config['model_storage_path'], config['model'])

-        b_1 = tf.get_variable("b_1", (Network.hidden_size,),
-                              initializer=tf.zeros_initializer)
-        b_2 = tf.get_variable("b_2", (Network.output_size,),
-                              initializer=tf.zeros_initializer)
+        self.name = name

-        value_after_input = tf.sigmoid(tf.matmul(self.x, W_1) + b_1, name='hidden_layer')
+        self.make_move = move_options[
+            self.config['ply']
+        ]
+
+        # Set board representation from config
+        self.input_size, self.board_trans_func = Network.board_reps[
+            self.config['board_representation']
+        ]
+        self.output_size = 1
+        self.hidden_size = 40
+        self.max_learning_rate = 0.1
+        self.min_learning_rate = 0.001
+
+        # Restore trained episode count for model
+        episode_count_path = os.path.join(self.checkpoint_path, "episodes_trained")
+        if os.path.isfile(episode_count_path):
+            with open(episode_count_path, 'r') as f:
+                self.episodes_trained = int(f.read())
+        else:
+            self.episodes_trained = 0
+
+        global_step_path = os.path.join(self.checkpoint_path, "global_step")
+        if os.path.isfile(global_step_path):
+            with open(global_step_path, 'r') as f:
+                self.global_step = int(f.read())
+        else:
+            self.global_step = 0
+
+
+        self.model = tf.keras.Sequential([
+            tf.keras.layers.Dense(40, activation="sigmoid", kernel_initializer=xavier_init,
+                                  input_shape=(1,self.input_size)),
+            tf.keras.layers.Dense(1, activation="sigmoid", kernel_initializer=xavier_init)
+        ])
+
+
+    def exp_decay(self, max_lr, global_step, decay_rate, decay_steps):
+        """
+        Calculates the exponential decay on a learning rate
+        :param max_lr: The learning rate that the network starts at
+        :param global_step: The global step
+        :param decay_rate: The rate at which the learning rate should decay
+        :param decay_steps: The amount of steps between each decay
+        :return: The result of the exponential decay performed on the learning rate
+        """
+        res = max_lr * decay_rate ** (global_step // decay_steps)
+        return res
+
+    def do_backprop(self, prev_state, value_next):
+        """
+        Performs the Temporal-difference backpropagation step on the model
+        :param prev_state: The previous state of the game, this has its value recalculated
+        :param value_next: The value of the current move
+        :return: Nothing, the calculation is performed on the model of the network
+        """
+        self.learning_rate = tf.maximum(self.min_learning_rate,
+                                        self.exp_decay(self.max_learning_rate, self.global_step, 0.96, 50000),
+                                        name="learning_rate")
+
+        with tf.GradientTape() as tape:
+            value = self.model(prev_state.reshape(1,-1))
+
+        grads = tape.gradient(value, self.model.variables)
+
+        difference_in_values = tf.reshape(tf.subtract(value_next, value, name='difference_in_values'), [])
+
+        for grad, train_var in zip(grads, self.model.variables):
+            backprop_calc = self.learning_rate * difference_in_values * grad
+            train_var.assign_add(backprop_calc)



-        
-        self.value = tf.sigmoid(tf.matmul(value_after_input, W_2) + b_2, name='output_layer')
-
-        # tf.reduce_sum basically finds the sum of its input, so this gives the
-        # difference between the two values, in case they should be lists, which
-        # they might be if our input changes
-
-        # TODO: Alexander thinks that self.value will be computed twice (instead of once)
-        difference_in_values = tf.reduce_sum(tf.subtract(self.value_next, self.value, name='difference'))
-
-        trainable_vars = tf.trainable_variables()
-        gradients = tf.gradients(self.value, trainable_vars)
-
-        apply_gradients = []
-        
-        with tf.variable_scope('apply_gradients'):
-            for gradient, trainable_var in zip(gradients, trainable_vars):
-                # Hopefully this is Δw_t = α(V_t+1 - V_t)▿_wV_t.
-                backprop_calc = Network.learning_rate * difference_in_values * gradient
-                grad_apply = trainable_var.assign_add(backprop_calc)
-                apply_gradients.append(grad_apply)
-            
-            self.training_op = tf.group(*apply_gradients, name='training_op')
-            
-        self.saver = tf.train.Saver(max_to_keep=1)
-        self.session.run(tf.global_variables_initializer())
-
-        self.restore_model()
+    def print_variables(self):
+        """
+        Prints all the variables of the model
+        :return:
+        """
+        variables = self.model.variables
+        for k in variables:
+            print(k)

    def eval_state(self, state):
-        # Run state through a network
-
-        # Remember to create placeholders for everything because wtf tensorflow
-        # and graphs
-
-        # Remember to create the dense layers
-
-        # Figure out a way of giving a layer a custom activiation function (we
-        # want something which gives [-2,2]. Naively tahn*2, however I fell this
-        # is wrong.
-
-        # tf.group, groups a bunch of actions, so calculate the different
-        # gradients for the different weights, by using tf.trainable_variables()
-        # to find all variables and tf.gradients(current_value,
-        # trainable_variables) to find all the gradients. We can then loop
-        # through this and calculate the trace for each gradient and variable
-        # pair (note, zip can be used to combine the two lists found before),
-        # and then we can calculate the overall change in weights, based on the
-        # formula listed in tesauro (learning_rate * difference_in_values *
-        # trace), this calculation can be assigned to a tf variable and put in a
-        # list and then this can be grouped into a single operation, essentially
-        # building our own backprop function.
-
-        # Grouping them is done by
-        # tf.group(*the_gradients_from_before_we_want_to_apply,
-        # name="training_op")
-
-        # If we remove the eligibily trace to begin with, we only have to
-        # implement learning_rate * (difference_in_values) * gradients (the
-        # before-mentioned calculation.
-
-        
-        # print("Network is evaluating")
-        val = self.session.run(self.value, feed_dict={self.x: state})
-        #print("eval ({})".format(self.name), state, val, sep="\n")
-        return val
+        """
+        Evaluates a single state
+        :param state:
+        :return:
+        """
+        return self.model(state.reshape(1,-1))

    def save_model(self, episode_count):
-        self.saver.save(self.session, os.path.join(self.checkpoint_path, 'model.ckpt'))
+        """
+        Saves the model of the network, it references global_step as self.global_step
+        :param episode_count:
+        :return:
+        """
+
+        tfe.Saver(self.model.variables).save(os.path.join(self.checkpoint_path, 'model.ckpt'))
+
        with open(os.path.join(self.checkpoint_path, "episodes_trained"), 'w+') as f:
            print("[NETWK] ({name}) Saving model to:".format(name=self.name),
                  os.path.join(self.checkpoint_path, 'model.ckpt'))
            f.write(str(episode_count) + "\n")

+        with open(os.path.join(self.checkpoint_path, "global_step"), 'w+') as f:
+            print("[NETWK] ({name}) Saving global step to:".format(name=self.name),
+                  os.path.join(self.checkpoint_path, 'model.ckpt'))
+            f.write(str(self.global_step) + "\n")
+        if self.config['verbose']:
+            self.print_variables()
+
+
+    def calc_vals(self, states):
+        """
+        Calculate a score of each state in states
+        :param states: A number of states. The states have to be transformed before being given to this function.
+        :return:
+        """
+        return self.model.predict_on_batch(states)
+
+
    def restore_model(self):
-        if os.path.isfile(os.path.join(self.checkpoint_path, 'model.ckpt.index')):
+        """
+        Restore a model for a session, such that a trained model and either be further trained or
+        used for evaluation
+        
+        :return: Nothing. It's a side-effect that a model gets restored for the network.
+        """
+
+
+        if glob.glob(os.path.join(self.checkpoint_path, 'model.ckpt*.index')):
+        
            latest_checkpoint = tf.train.latest_checkpoint(self.checkpoint_path)
            print("[NETWK] ({name}) Restoring model from:".format(name=self.name),
                  str(latest_checkpoint))
-            self.saver.restore(self.session, latest_checkpoint)
-            variables_names = [v.name for v in tf.trainable_variables()]
-            values = self.session.run(variables_names)
-            for k, v in zip(variables_names, values):
-                print("Variable: ", k)
-                print("Shape: ", v.shape)
-                print(v)
+            tfe.Saver(self.model.variables).restore(latest_checkpoint)

            # Restore trained episode count for model
            episode_count_path = os.path.join(self.checkpoint_path, "episodes_trained")
@ -140,34 +195,160 @@ class Network:
                with open(episode_count_path, 'r') as f:
                    self.config['start_episode'] = int(f.read())

-    # Have a circular dependency, #fuck, need to rewrite something
-    def adjust_weights(self, board, v_next):
-#        print("lol")
-        board = np.array(board).reshape((1,-1))
-        self.session.run(self.training_op, feed_dict = { self.x: board,
-                                                         self.value_next: v_next })
+            global_step_path = os.path.join(self.checkpoint_path, "global_step")
+            if os.path.isfile(global_step_path):
+                with open(global_step_path, 'r') as f:
+                    self.config['global_step'] = int(f.read())

-
-            # while game isn't done:
-                #x_next = g.next_move()
-                #value_next = network.eval_state(x_next)
-                #self.session.run(self.training_op, feed_dict={self.x: x, self.value_next: value_next})
-                #x = x_next
+            if self.config['verbose']:
+                self.print_variables()



-    def make_move(self, board, roll):
-        # print(Board.pretty(board))
-        legal_moves = Board.calculate_legal_states(board, 1, roll)
-        moves_and_scores = [ (move, self.eval_state(np.array(Board.map_to_tesauro(move)).reshape(1,-1))) for move in legal_moves ]
-        scores = [ x[1] for x in moves_and_scores ]
-        best_score_index = np.array(scores).argmax()
-        best_move_pair = moves_and_scores[best_score_index]
-        #print("Found the best state, being:", np.array(move_scores).argmax())
-        return best_move_pair
+    def make_move_0_ply(self, board, roll, player):
+        """
+        Find the best move given a board, roll and a player, by finding all possible states one can go to
+        and then picking the best, by using the network to evaluate each state. This is 0-ply, ie. no look-ahead.
+        The highest score is picked for the 1-player and the max(1-score) is picked for the -1-player.
+
+        :param board: Current board
+        :param roll:  Current roll
+        :param player: Current player
+        :return: A pair of the best state to go to, together with the score of that state
+        """
+        legal_moves = list(Board.calculate_legal_states(board, player, roll))
+        legal_states = np.array([self.board_trans_func(move, player)[0] for move in legal_moves])
+
+        scores = self.model.predict_on_batch(legal_states)
+
+        best_score_idx = self.max_or_min[player](scores)
+
+        best_move, best_score = legal_moves[best_score_idx], scores[best_score_idx]
+
+        return (best_move, best_score)
+
+    def make_move_1_ply(self, board, roll, player):
+        """
+        Return the best board and best score based on a 1-ply look-ahead.
+        :param board:
+        :param roll:
+        :param player:
+        :return:
+        """
+        start = time.time()
+        best_pair = self.calculate_1_ply(board, roll, player)
+        #print(time.time() - start)
+        return best_pair


-    def train_model(self, episodes=1000, save_step_size = 100, trained_eps = 0):
+    def calculate_1_ply(self, board, roll, player):
+        """
+        Find the best move based on a 1-ply look-ahead. First the x best moves are picked from a 0-ply and then
+        all moves and scores are found for them. The expected score is then calculated for each of the boards from the
+        0-ply.
+
+        :param board:
+        :param roll: The original roll
+        :param player: The current player
+        :return: Best possible move based on 1-ply look-ahead
+        """
+
+        # find all legal states from the given board and the given roll
+        init_legal_states = Board.calculate_legal_states(board, player, roll)
+        legal_states = np.array([self.board_trans_func(state, player)[0] for state in init_legal_states])
+
+        scores = [ score.numpy()
+                   for score
+                   in  self.calc_vals(legal_states) ]
+
+        moves_and_scores = list(zip(init_legal_states, scores))
+        sorted_moves_and_scores = sorted(moves_and_scores, key=itemgetter(1), reverse=(player == 1))
+        best_boards = [ x[0] for x in sorted_moves_and_scores[:10] ]
+
+        scores = self.do_ply(best_boards, player)
+
+        best_score_idx = self.max_or_min[player](scores)
+        # best_score_idx = np.array(trans_scores).argmax()
+
+        return (best_boards[best_score_idx], scores[best_score_idx])
+
+    def do_ply(self, boards, player):
+        """
+        Calculates a single extra ply, resulting in a larger search space for our best move.
+        This is somewhat hardcoded to only do a single ply, seeing that it calls max on all scores, rather than
+        allowing the function to search deeper, which could result in an even larger search space. If we wish
+        to have more than 2-ply, this should be fixed, so we could extend this method to allow for 3-ply.
+
+        :param boards: The boards to try all rolls on
+        :param player: The player of the previous ply
+        :return: An array of scores where each index describes one of the boards which was given as param
+        to this function.
+        """
+
+        all_rolls = [ (1, 1), (1, 2), (1, 3), (1, 4), (1, 5),
+                      (1, 6), (2, 2), (2, 3), (2, 4), (2, 5),
+                      (2, 6), (3, 3), (3, 4), (3, 5), (3, 6),
+                      (4, 4), (4, 5), (4, 6), (5, 5), (5, 6),
+                      (6, 6) ]
+
+
+        # start = time.time()
+
+        # print("/"*50)
+        length_list = []
+        test_list = []
+        # Prepping of data
+        # start = time.time()
+        for board in boards:
+            length = 0
+            for roll in all_rolls:
+                all_states = Board.calculate_legal_states(board, player*-1, roll)
+                for state in all_states:
+                    state = np.array(self.board_trans_func(state, player*-1)[0])
+                    test_list.append(state)
+                    length += 1
+            length_list.append(length)
+
+        # print(time.time() - start)
+
+        start = time.time()
+
+        all_scores = self.model.predict_on_batch(np.array(test_list))
+
+        split_scores = []
+        from_idx = 0
+        for length in length_list:
+            split_scores.append(all_scores[from_idx:from_idx+length])
+            from_idx += length
+
+        means_splits = [tf.reduce_mean(scores) for scores in split_scores]
+
+        # print(time.time() - start)
+        # print("/"*50)
+        return means_splits
+
+
+    def eval(self, episode_count, trained_eps = 0):
+        """
+        Used to evaluate a model. Can either use pubeval, a model playing at an intermediate level, or dumbeval
+        a model which has been given random weights, so it acts deterministically random.
+
+        :param episode_count: The amount of episodes to run
+        :param trained_eps:   The amount of episodes the model we want to evaluate, has trained
+        :param tf_session:
+        :return: outcomes:    The outcomes of the evaluation session
+        """
+
+        def do_eval(method, episodes = 1000, trained_eps = 0):
+            """
+            Do the actual evaluation
+
+            :param method:     Either pubeval or dumbeval
+            :param episodes:   Amount of episodes to use in the evaluation
+            :param trained_eps:
+            :return: outcomes : Described above
+            """
+
            start_time = time.time()

            def print_time_estimate(eps_completed):
@ -176,63 +357,165 @@ class Network:
                eps_per_sec = eps_completed / time_diff
                secs_per_ep = time_diff / eps_completed
                eps_remaining = (episodes - eps_completed)
-            sys.stderr.write("[TRAIN] Averaging {per_sec} episodes per second\n".format(per_sec = round(eps_per_sec, 2)))
-            sys.stderr.write("[TRAIN] {eps_remaining} episodes remaining; approx. {time_remaining} seconds remaining\n".format(eps_remaining = eps_remaining, time_remaining = int(eps_remaining * secs_per_ep)))
+                sys.stderr.write(
+                    "[EVAL ] Averaging {per_sec} episodes per second\n".format(per_sec=round(eps_per_sec, 2)))
+                sys.stderr.write(
+                    "[EVAL ] {eps_remaining} episodes remaining; approx. {time_remaining} seconds remaining\n".format(
+                        eps_remaining=eps_remaining, time_remaining=int(eps_remaining * secs_per_ep)))

+            sys.stderr.write(
+                "[EVAL ] Evaluating {eps} episode(s) with method '{method}'\n".format(eps=episodes, method=method))
+
+            if method == 'pubeval':
+                outcomes = []
+                for i in range(1, episodes + 1):
+                    sys.stderr.write("[EVAL ] Episode {}".format(i))
+                    board = Board.initial_state
+                    while Board.outcome(board) is None:
+                        roll = (random.randrange(1, 7), random.randrange(1, 7))
+                        board = (self.make_move(board, roll, 1))[0]
+
+                        roll = (random.randrange(1, 7), random.randrange(1, 7))
+                        board = Eval.make_pubeval_move(board, -1, roll)[0][0:26]
+
+                    sys.stderr.write("\t outcome {}".format(Board.outcome(board)[1]))
+                    outcomes.append(Board.outcome(board)[1])
+                    sys.stderr.write("\n")
+
+                    if i % 10 == 0:
+                        print_time_estimate(i)
+
+                return outcomes
+
+            elif method == 'dumbeval':
+                outcomes = []
+                for i in range(1, episodes + 1):
+                    sys.stderr.write("[EVAL ] Episode {}".format(i))
+                    board = Board.initial_state
+                    while Board.outcome(board) is None:
+                        roll = (random.randrange(1, 7), random.randrange(1, 7))
+                        board = (self.make_move(board, roll, 1))[0]
+
+                        roll = (random.randrange(1, 7), random.randrange(1, 7))
+                        board = Eval.make_dumbeval_move(board, -1, roll)[0][0:26]
+
+                    sys.stderr.write("\t outcome {}".format(Board.outcome(board)[1]))
+                    outcomes.append(Board.outcome(board)[1])
+                    sys.stderr.write("\n")
+
+                    if i % 10 == 0:
+                        print_time_estimate(i)
+
+                return outcomes
+
+            else:
+                sys.stderr.write("[EVAL ] Evaluation method '{}' is not defined\n".format(method))
+                return [0]
+            
+
+        outcomes = [ (method, do_eval(method,
+                                      episode_count,
+                                      trained_eps = trained_eps))
+                     for method
+                     in self.config['eval_methods'] ]
+        return outcomes
+
+
+    def play_against_network(self):
+        """
+        Allows you to play against a supplied model.
+        :return:
+        """
+        self.restore_model()
+        human_player = Player(-1)
+        cur_player = 1
+        player = 1
+        board = Board.initial_state
+        i = 0
+        while Board.outcome(board) is None:
+            print(Board.pretty(board))
+            roll = (random.randrange(1, 7), random.randrange(1, 7))
+            print("Bot rolled:", roll)
+
+            board, _ = self.make_move(board, roll, player)
+            print(Board.pretty(board))
+            roll = (random.randrange(1, 7), random.randrange(1, 7))
+            print("You rolled:", roll)
+            board = human_player.make_human_move(board, roll)
+        print("DONE "*10)
+        print(Board.pretty(board))
+
+
+
+    def train_model(self, episodes=1000, save_step_size=100, trained_eps=0):
+        """
+        Train a model to by self-learning.
+        :param episodes:
+        :param save_step_size:
+        :param trained_eps:
+        :return:
+        """
+
+        self.restore_model()
+        average_diffs = 0
+        start_time = time.time()
+
+        def print_time_estimate(eps_completed):
+            cur_time = time.time()
+            time_diff = cur_time - start_time
+            eps_per_sec = eps_completed / time_diff
+            secs_per_ep = time_diff / eps_completed
+            eps_remaining = (episodes - eps_completed)
+            sys.stderr.write(
+                "[TRAIN] Averaging {per_sec} episodes per second\n".format(per_sec=round(eps_per_sec, 2)))
+            sys.stderr.write(
+                "[TRAIN] {eps_remaining} episodes remaining; approx. {time_remaining} seconds remaining\n".format(
+                    eps_remaining=eps_remaining, time_remaining=int(eps_remaining * secs_per_ep)))

        sys.stderr.write("[TRAIN] Training {} episodes and save_step_size {}\n".format(episodes, save_step_size))
        outcomes = []
        for episode in range(1, episodes + 1):
+
            sys.stderr.write("[TRAIN] Episode {}".format(episode + trained_eps))
-#            print("greerggeregr"*10000)
-            # TODO decide which player should be here
-            player = 1

-            roll = (random.randrange(1,7), random.randrange(1,7))
-
-            def tesaurofi(board):
-                return Board.map_to_tesauro(board)
-
-            prev_board, _ = self.make_move(Board.flip(Board.initial_state) if player == -1 else Board.initial_state, roll)
-
-            if player == -1:
-                prev_board = Board.flip(prev_board)
-
-            # print("board:",prev_board)
-            # print(len(prev_board))
-                
-            # find the best move here, make this move, then change turn as the
-            # first thing inside of the while loop and then call
-            # best_move_and_score to get V_t+1
-
-            # i = 0
+            # player = 1
+            player = random.choice([-1,1])
+            prev_board = Board.initial_state
+            i = 0
+            difference_in_values = 0
            while Board.outcome(prev_board) is None:
-                #print(prev_board)
+                i += 1
+                self.global_step += 1

-                # print("-"*30)
-                # print(i)
-                # print(roll)
-                # print(Board.pretty(prev_board))
-                # print("/"*30)
-                # i += 1
+                cur_board, cur_board_value = self.make_move(prev_board,
+                                                            (random.randrange(1, 7), random.randrange(1, 7)),
+                                                            player)

+                difference_in_values += abs((cur_board_value - self.eval_state(self.board_trans_func(prev_board, player))))
+
+                if self.config['verbose']:
+                    print("Difference in values:", difference_in_vals)
+                    print("Current board value :", cur_board_value)
+                    print("Current board is    :\n",cur_board)
+
+                # adjust weights
+                if Board.outcome(cur_board) is None:
+                    self.do_backprop(self.board_trans_func(prev_board, player), cur_board_value)
                    player *= -1
-                roll = (random.randrange(1,7), random.randrange(1,7))
-
-                cur_board, cur_board_value = self.make_move(Board.flip(prev_board) if player == -1 else prev_board, roll)
-                #print("pls",cur_board_value)
-                if player == -1:
-                    cur_board  = Board.flip(cur_board)
-
-                self.adjust_weights(tesaurofi(prev_board), cur_board_value)

                prev_board = cur_board

            final_board = prev_board
-            sys.stderr.write("\t outcome {}".format(Board.outcome(final_board)[1]))
+            sys.stderr.write("\t outcome {}\t turns {}".format(Board.outcome(final_board)[1], i))
            outcomes.append(Board.outcome(final_board)[1])
            final_score = np.array([Board.outcome(final_board)[1]])
-            self.adjust_weights(tesaurofi(prev_board), final_score.reshape((1, 1)))
+            scaled_final_score = ((final_score + 2) / 4)
+
+            difference_in_values += abs(scaled_final_score-cur_board_value)
+
+            average_diffs += (difference_in_values[0][0] / (i+1))
+
+            self.do_backprop(self.board_trans_func(prev_board, player), scaled_final_score.reshape(1,1))

            sys.stderr.write("\n")

@ -244,116 +527,9 @@ class Network:
                print_time_estimate(episode)

        sys.stderr.write("[TRAIN] Saving model for final episode...\n")
+
        self.save_model(episode+trained_eps)

-        return outcomes
+        return outcomes, average_diffs/len(outcomes)


-                # take turn, which finds the best state and picks it, based on the current network
-                # save current state
-                # run training operation (session.run(self.training_op, {x:x, value_next, value_next})), (something which does the backprop, based on the state after having taken a turn, found before, and the state we saved in the beginning and from now we'll save it at the end of the turn
-                # save the current state again, so we can continue running backprop based on the "previous" turn.
-
-        # NOTE: We need to make a method so that we can take a single turn or at least just pick the next best move, so we know how to evaluate according to TD-learning. Right now, our game just continues in a while loop without nothing to stop it!
-        
-
-
-    def eval(self, trained_eps = 0):
-        def do_eval(method, episodes = 1000, trained_eps = 0):
-            start_time = time.time()
-
-            def print_time_estimate(eps_completed):
-                cur_time      = time.time()
-                time_diff     = cur_time - start_time
-                eps_per_sec   = eps_completed / time_diff
-                secs_per_ep   = time_diff / eps_completed
-                eps_remaining = (episodes - eps_completed)
-                sys.stderr.write("[EVAL ] Averaging {per_sec} episodes per second\n".format(per_sec = round(eps_per_sec, 2)))
-                sys.stderr.write("[EVAL ] {eps_remaining} episodes remaining; approx. {time_remaining} seconds remaining\n".format(eps_remaining = eps_remaining, time_remaining = int(eps_remaining * secs_per_ep)))
-
-            sys.stderr.write("[EVAL ] Evaluating {eps} episode(s) with method '{method}'\n".format(eps=episodes, method=method))
-            
-            if method == 'random':
-                outcomes = []
-                for i in range(1, episodes + 1):
-                    sys.stderr.write("[EVAL ] Episode {}".format(i))
-                    board = Board.initial_state
-                    while Board.outcome(board) is None:
-                        roll = (random.randrange(1,7), random.randrange(1,7))
-                        board = (self.p1.make_move(Board.map_to_tesauro(board), self.p1.get_sym(), roll))[0]
-                        roll = (random.randrange(1,7), random.randrange(1,7))
-                        board = Board.flip(Eval.make_random_move(Board.flip(board), 1, roll))
-                    sys.stderr.write("\t outcome {}".format(Board.outcome(board)[1]))
-                    outcomes.append(Board.outcome(board)[1])
-                    sys.stderr.write("\n")
-
-                    if i % 50 == 0:
-                        print_time_estimate(i)
-                return outcomes
-            elif method == 'pubeval':
-                outcomes = []
-                # Add the evaluation code for pubeval, the bot has a method make_pubeval_move(board, sym, roll), which can be used to get the best move according to pubeval
-                for i in range(1, episodes + 1):
-                    sys.stderr.write("[EVAL ] Episode {}".format(i))
-                    board = Board.initial_state
-                    #print("init:", board, sep="\n")
-                    while Board.outcome(board) is None:
-                        #print("-"*30)
-                        roll = (random.randrange(1,7), random.randrange(1,7))
-                        #print(roll)
-
-                        prev_board = tuple(board)
-                        board = (self.make_move(board, roll))[0]
-                        #print("post p1:", board, sep="\n")
-
-                        #print("."*30)
-                        roll = (random.randrange(1,7), random.randrange(1,7))
-                        #print(roll)
-                        
-                        prev_board = tuple(board)
-                        board = Eval.make_pubeval_move(board, -1, roll)[0][0:26]
-                        #print("post pubeval:", board, sep="\n")
-
-                        
-                    #print("*"*30)
-                    #print(board)
-                    #print("+"*30)
-                    sys.stderr.write("\t outcome {}".format(Board.outcome(board)[1]))
-                    outcomes.append(Board.outcome(board)[1])
-                    sys.stderr.write("\n")
-
-                    if i % 10 == 0:
-                        print_time_estimate(i)
-                    
-                return outcomes
-            # elif method == 'dumbmodel':
-            #     config_prime = self.config.copy()
-            #     config_prime['model_path'] = os.path.join(config_prime['model_storage_path'], 'dumbmodel')
-            #     eval_bot = Bot(1, config = config_prime, name = "dumbmodel")
-            #     #print(self.config, "\n", config_prime)
-            #     outcomes = []
-            #     for i in range(1, episodes + 1):
-            #         sys.stderr.write("[EVAL ] Episode {}".format(i))
-            #         board = Board.initial_state
-            #         while Board.outcome(board) is None:
-            #             roll = (random.randrange(1,7), random.randrange(1,7))
-            #             board = (self.make_move(board, self.p1.get_sym(), roll))[0]
-                        
-            #             roll = (random.randrange(1,7), random.randrange(1,7))
-            #             board = Board.flip(eval_bot.make_move(Board.flip(board), self.p1.get_sym(), roll)[0])
-            #         sys.stderr.write("\t outcome {}".format(Board.outcome(board)[1]))
-            #         outcomes.append(Board.outcome(board)[1])
-            #         sys.stderr.write("\n")
-
-            #         if i % 50 == 0:
-            #             print_time_estimate(i)
-            #     return outcomes
-            else:
-                sys.stderr.write("[EVAL ] Evaluation method '{}' is not defined\n".format(method))
-                return [0]
-            
-        return [ (method, do_eval(method,
-                                  self.config['episode_count'],
-                                  trained_eps = trained_eps))
-                 for method
-                 in self.config['eval_methods'] ]
--- a/network_test.py
+++ b/network_test.py
@ -3,30 +3,65 @@ import tensorflow as tf
 import random
 import numpy as np

-session = tf.Session()
-graph_lol = tf.Graph()

+from board import Board

+import main

-network = Network(session)
+config = main.config.copy()
+config['model'] = "player_testings"
+config['ply'] = "1"
+config['board_representation'] = 'quack-fat'
+network = Network(config, config['model'])

-initial_state = np.array(( 0,
-                  2, 0, 0, 0, 0, -5,
-                  0, -3, 0, 0, 0, 5,
-                  -5, 0, 0, 0, 3, 0,
-                  5, 0, 0, 0, 0, -2,
-                  0 )).reshape((1,26))
+network.restore_model()
+initial_state = Board.initial_state
+
+initial_state_1 = ( 0,
+                    0, 0, 0, 2, 0, -5,
+                    0, -3, 0, 0, 0, 0,
+                    -5, 0, 0, 0, 3, 5,
+                    0, 0, 0, 0, 5, -2,
+                    0 )
+
+initial_state_2 = ( 0,
+                    -5, -5, -3, -2, 0, 0,
+                    0, 0, 0, 0, 0, 0,
+                    0, 0, 0, 15, 0, 0,
+                    0, 0, 0, 0, 0, 0,
+                    0 )
+
+boards = {initial_state,
+          initial_state_1,
+          initial_state_2 }




-#print(x.shape)
-with graph_lol.as_default():
-    session_2 = tf.Session(graph = graph_lol)
-    network_2 = Network(session_2)
-    network_2.restore_model()
-    print(network_2.eval_state(initial_state))
-    
-print(network.eval_state(initial_state))


+# board = network.board_trans_func(Board.initial_state, 1)
+
+
+# pair = network.make_move(Board.initial_state, [3,2], 1)
+
+# print(pair[1])
+
+# network.do_backprop(board, 0.9)
+
+
+# network.print_variables()
+
+
+# network.save_model(2)
+
+# print(network.calculate_1_ply(Board.initial_state, [3,2], 1))
+
+
+diff = [0, 0]
+val = network.eval_state(Board.board_features_quack_fat(initial_state, 1))
+print(val)
+diff[0] += abs(-1-val)
+diff[1] += 1
+
+print(diff[1])
--- a/player.py
+++ b/player.py
@ -11,19 +11,59 @@ class Player:
    def get_sym(self):
        return self.sym
    
-    def make_move(self, board, sym, roll):
-        print(Board.pretty(board))
-        legal_moves = Board.calculate_legal_states(board, sym, roll)
-        if roll[0] == roll[1]:
-            print("Example of move: 4/6,6/8,12/14,13/15")
+    def calc_move_sets(self, from_board, roll, player):
+        board = from_board
+        sets = []
+        total = 0
+        for r in roll:
+            # print("Value of r:",r)
+            sets.append([Board.calculate_legal_states(board, player, [r,0]), r])
+            total += r
+        sets.append([Board.calculate_legal_states(board, player, [total,0]), total])
+        print(sets)
+        return sets
+
+
+    def tmp_name(self, from_board, to_board, roll, player, total_moves, is_quad = False):
+        sets = self.calc_move_sets(from_board, roll, player)
+        return_board = from_board
+        for idx, board_set in enumerate(sets):
+
+            board_set[0] = list(board_set[0])
+            # print(to_board)
+            # print(board_set)
+            if to_board in board_set[0]:
+                total_moves -= board_set[1]
+                # if it's not the sum of the moves
+                if idx < (4 if is_quad else 2):
+                    roll[idx] = 0
                else:
-            print("Example of move: 4/6,13/17")
+                    roll = [0,0]
+                return_board = to_board
+                break
+        return total_moves, roll, return_board

-        user_moves = input("Enter your move: ").strip().split(",")
-        board = Board.apply_moves_to_board(board, sym, user_moves)
-        while board not in legal_moves:
-            print("Move is invalid, please enter a new move")
-            user_moves = input("Enter your move: ").strip().split(",")
-            board = Board.apply_moves_to_board(board, sym, user_moves)
+    def make_human_move(self, board, roll):
+        is_quad = roll[0] == roll[1]
+        total_moves = roll[0] + roll[1] if not is_quad else int(roll[0])*4
+        if is_quad:
+            roll = [roll[0]]*4
        
+        while total_moves != 0:
+            while True:
+                print("You have {roll} left!".format(roll=total_moves))
+                move = input("Pick a move!\n")
+                pot_move = move.split("/")
+                if len(pot_move) == 2:
+                    try:
+                        pot_move[0] = int(pot_move[0])
+                        pot_move[1] = int(pot_move[1])
+                        move = pot_move
+                        break;
+                    except TypeError:
+                        print("The correct syntax is: 2/5 for a move from index 2 to 5.")
+
+            to_board = Board.apply_moves_to_board(board, self.get_sym(), move)
+            total_moves, roll, board = self.tmp_name(board, to_board, list(roll), self.get_sym(), total_moves, is_quad)
+            print(Board.pretty(board))
        return board
--- a/plot.py
+++ b/plot.py
@ -9,9 +9,26 @@ import matplotlib.dates as mdates

 train_headers = ['timestamp', 'eps_train', 'eps_trained_session', 'sum', 'mean']
 eval_headers  = ['timestamp', 'method', 'eps_train', 'eval_eps_used', 'sum', 'mean']
+bench_headers = ['method', 'sample_count', 'i', 'time', 'sum', 'mean']

 model_path = 'models'

+def plot_bench(data_path):
+    df = pd.read_csv(data_path, sep=";",
+                     names=bench_headers, index_col=[0,1,2])
+    for method_label in df.index.levels[0]:
+        df_prime = df[['mean']].loc[method_label].unstack().T
+        plot = df_prime.plot.box()
+        plot.set_title("Evaluation variance, {}".format(method_label))
+        plot.set_xlabel("Sample count")
+        plot.set_ylabel("Mean score")
+        plt.show(plot.figure)
+
+        # for later use:
+        variances = df_prime.var()
+        print(variances)
+
+        del df_prime, plot, variances

 def dataframes(model_name):
    def df_timestamp_to_datetime(df):
@ -44,7 +61,7 @@ if __name__ == '__main__':
    plt.show()

    while True:
-        df = dataframes('default')['eval']
+        df = dataframes('a')['eval']

        print(df)
        
--- a/quack/quack.c
+++ b/quack/quack.c
@ -0,0 +1,484 @@
+#include <Python.h>
+
+static PyObject* QuackError;
+
+typedef struct board_list board_list;
+struct board_list {
+  int size;
+  PyObject* list[16];
+};
+
+/* Utility functions */
+int sign(int x) {
+    return (x > 0) - (x < 0);
+}
+
+int abs(int x) {
+  if (x >= 0) return x;
+  else        return -x;
+}
+/* end utility functions */
+
+/* Helper functions */
+
+int *idxs_with_checkers_of_player(int board[], int player) {
+  int idxs_tmp[26];
+  int ctr = 0;
+  
+  for (int i = 0; i < 26; i++) {
+    if (board[i] * player >= 1) {
+      idxs_tmp[ctr] = i;
+      ctr++;
+    }
+  }
+
+  int *idxs = malloc((1 + ctr) * sizeof(int));
+  if (idxs == NULL) {
+    PyErr_NoMemory();
+    abort();
+  }
+
+  idxs[0] = ctr;
+  for (int i = 0; i < ctr; i++) {
+    idxs[i+1] = idxs_tmp[i];
+  }
+
+  return idxs;
+}
+
+int is_forward_move(int direction, int player) {
+  return direction == player;
+}
+
+int face_value_match_move_length(int delta, int face_value) {
+  return abs(delta) == face_value;
+}
+
+int bear_in_if_checker_on_bar(int board[], int player, int from_idx) {
+  int bar;
+    
+  if (player == 1) bar = 0;
+  else             bar = 25;
+
+  if (board[bar] != 0) return from_idx == bar;
+  else                 return 1;
+}
+
+int checkers_at_from_idx(int from_state, int player) {
+  return sign(from_state) == player;
+}
+
+int no_block_at_to_idx(int to_state, int player) {
+  if (-sign(to_state) == player) return abs(to_state) == 1;
+  else                           return 1;
+}
+
+
+int can_bear_off(int board[], int player, int from_idx, int to_idx) {
+  int* checker_idxs = idxs_with_checkers_of_player(board, player);
+
+  int moving_backmost_checker = 1;
+  int bearing_directly_off = 0;
+  int all_checkers_in_last_quadrant = 1;
+
+  /* Check if bearing directly off */
+  if      (player ==  1 && to_idx == 25) bearing_directly_off = 1;
+  else if (player == -1 && to_idx == 0)  bearing_directly_off = 1;
+  
+  for (int i = 1; i <= checker_idxs[0]; i++) {
+    if (player == 1 ) {
+      /* Check if all checkers are in last quardrant */
+      if (checker_idxs[i] < 19) {
+	all_checkers_in_last_quadrant = 0;
+	break;
+      }
+      
+      /* Check if moving backmost checker */
+      if (checker_idxs[i] < from_idx) {
+	moving_backmost_checker = 0;
+	if (!bearing_directly_off) break;
+      }
+    } else {
+      if (checker_idxs[i] > 6) {
+	all_checkers_in_last_quadrant = 0;
+	break;
+      }
+
+      if (checker_idxs[i] > from_idx) {
+	moving_backmost_checker = 0;
+	if (!bearing_directly_off) break;
+      }
+    }
+  }
+
+  free(checker_idxs);
+  
+  if (all_checkers_in_last_quadrant &&
+      (bearing_directly_off || moving_backmost_checker))  return 1;
+  else                                                    return 0;
+}
+
+
+
+/* end helper functions */
+
+int is_move_valid(int board[], int player, int face_value, int move[]) {
+  int from_idx = move[0];
+  int to_idx = move[1];
+  int to_state;
+  int from_state = board[from_idx];
+  int delta = to_idx - from_idx;
+  int direction = sign(delta);
+  int bearing_off;
+
+  if (to_idx >= 1 && to_idx <= 24) {
+    to_state = board[to_idx];
+    bearing_off = 0;
+  } else {
+    to_state = 0;
+    bearing_off = 1;
+  }
+  
+  return is_forward_move(direction, player)
+    && face_value_match_move_length(delta, face_value)
+    && bear_in_if_checker_on_bar(board, player, from_idx)
+    && checkers_at_from_idx(from_state, player)
+    && no_block_at_to_idx(to_state, player)
+    && (!bearing_off || can_bear_off(board, player, from_idx, to_idx))
+    ;
+}
+
+void do_move(int board[], int player, int move[]) {
+  int from_idx = move[0];
+  int to_idx   = move[1];
+
+  /* "lift" checker */
+  board[from_idx] -= player;
+
+  /* Return early if bearing off */
+  if (to_idx < 1 || to_idx > 24) return;
+
+  /* Hit opponent checker */
+  if (board[to_idx] * player == -1) {
+    /* Move checker to bar */
+    if (player == 1) board[25] -= player;
+    else             board[0]  -= player;
+
+    board[to_idx] = 0;
+  }
+
+  /* Put down checker */
+  board[to_idx] += player;
+
+  return;
+}
+
+int* do_move_clone(int board[], int player, int move[]) {
+  int* new_board = malloc(sizeof(int) * 26);
+  if (new_board == NULL) {
+    PyErr_NoMemory();
+    abort();
+  }
+  
+  for (int i = 0; i < 26; i++) {
+    new_board[i] = board[i];
+  }
+
+  do_move(new_board, player, move);
+  return new_board;
+}
+
+PyObject* store_board_to_pytuple(int board[], int size) {
+  PyObject* board_tuple = PyTuple_New(size);
+  for (int i = 0; i < size; i++) {
+    PyTuple_SetItem(board_tuple, i, Py_BuildValue("i", board[i]));
+  }
+  return board_tuple;
+}
+
+board_list calc_moves(int board[], int player, int face_value) {
+  int* checker_idxs = idxs_with_checkers_of_player(board, player);
+  board_list boards = { .size = 0 };
+  
+  if (checker_idxs[0] == 0) { 
+    boards.size = 1;
+    PyObject* board_tuple = store_board_to_pytuple(board, 26);
+    boards.list[0] = board_tuple;
+    free(checker_idxs);
+    return boards;
+  }
+
+  int ctr = 0;
+  for (int i = 1; i <= checker_idxs[0]; i++) {
+    int move[2];
+    move[0] = checker_idxs[i];
+    move[1] = checker_idxs[i] + (face_value * player);
+
+    if (is_move_valid(board, player, face_value, move)) {
+      int* new_board = do_move_clone(board, player, move);
+      PyObject* board_tuple = store_board_to_pytuple(new_board, 26);
+
+      // segfault maybe :'(
+      free(new_board);
+
+      boards.list[ctr] = board_tuple;
+      ctr++;
+    }
+  }
+
+  free(checker_idxs);
+  
+  boards.size = ctr;
+  return boards;
+}
+
+int* board_features_quack_fat(int board[], int player) {
+  int* new_board = malloc(sizeof(int) * 30);
+  if (new_board == NULL) {
+    PyErr_NoMemory();
+    abort();
+  }
+
+  int pos_sum = 0;
+  int neg_sum = 0;
+  for (int i = 0; i < 26; i++) {
+    new_board[i] = board[i];
+    if (sign(new_board[i] > 0)) pos_sum += new_board[i];
+    else                        neg_sum += new_board[i]; 
+  }
+
+  new_board[26] = 15 - pos_sum;
+  new_board[27] = -15 - neg_sum;
+  if (player == 1) {
+    new_board[28] = 1;
+    new_board[29] = 0;
+  } else {
+    new_board[28] = 0;
+    new_board[29] = 1;
+  }
+  
+  return new_board;
+}
+
+/* Meta definitions */
+int extract_board(int *board, PyObject* board_tuple_obj) {
+  long numValuesBoard;
+  numValuesBoard = PyTuple_Size(board_tuple_obj);
+  if (numValuesBoard != 26) {
+    PyErr_SetString(QuackError, "Board tuple must have 26 entries");
+    return 1;
+  }
+  
+  PyObject* board_val_obj;
+  // Iterate over tuple to retreive positions
+  for (int i=0; i<numValuesBoard; i++) {
+    board_val_obj = PyTuple_GetItem(board_tuple_obj, i);
+    board[i] = PyLong_AsLong(board_val_obj);
+  }
+  
+  return 0;
+}
+
+int extract_move(int *move, PyObject* move_tuple_obj) {
+  long numValuesMove;
+  numValuesMove = PyTuple_Size(move_tuple_obj);
+  if (numValuesMove != 2) {
+    PyErr_SetString(QuackError, "Move tuple must have exactly 2 entries");
+    return 1;
+  }
+  PyObject* move_val_obj;
+  for (int i=0; i<numValuesMove; i++) {
+    move_val_obj = PyTuple_GetItem(move_tuple_obj, i);
+    move[i] = PyLong_AsLong(move_val_obj);
+  }
+  
+  return 0;
+}
+
+static PyObject*
+quack_is_move_valid(PyObject *self, PyObject *args) {
+  int board[26];
+  int player;
+  int face_value;
+  int move[2];
+  
+  PyObject* board_tuple_obj;
+  PyObject* move_tuple_obj;
+  
+  if (! PyArg_ParseTuple(args, "O!iiO!",
+			 &PyTuple_Type, &board_tuple_obj,
+			 &player,
+			 &face_value,
+			 &PyTuple_Type, &move_tuple_obj))
+    return NULL;
+
+ if (extract_board(board, board_tuple_obj)) return NULL;
+ if (extract_move(move, move_tuple_obj))    return NULL;
+ 
+ if (is_move_valid(board, player, face_value, move)) Py_RETURN_TRUE;
+ else                                                Py_RETURN_FALSE;
+}
+
+static PyObject*
+quack_idxs_with_checkers_of_player(PyObject *self, PyObject *args) {
+
+  int board[26];
+  int player;
+  
+  int* idxs;
+  
+  PyObject* board_tuple_obj;
+  
+  if (! PyArg_ParseTuple(args, "O!i",
+			 &PyTuple_Type, &board_tuple_obj,
+			 &player))
+    return NULL;
+
+  if (extract_board(board, board_tuple_obj)) return NULL;
+  
+  idxs = idxs_with_checkers_of_player(board, player);
+  PyObject* idxs_list = PyList_New(idxs[0]);
+  
+  for (int i = 0; i < idxs[0]; i++) {
+    PyList_SetItem(idxs_list, i, Py_BuildValue("i", idxs[i+1]));
+  }
+  free(idxs);
+
+  PyObject *result = Py_BuildValue("O", idxs_list);
+  Py_DECREF(idxs_list);
+  
+  return result;
+}
+
+static PyObject*
+quack_do_move(PyObject *self, PyObject *args) {
+  int board[26];
+  int player;
+  int move[2];
+  
+  PyObject* board_tuple_obj;
+  PyObject* move_tuple_obj;
+  
+  if (! PyArg_ParseTuple(args, "O!iO!",
+			 &PyTuple_Type, &board_tuple_obj,
+			 &player,
+			 &PyTuple_Type, &move_tuple_obj))
+    return NULL;
+
+  if (extract_board(board, board_tuple_obj)) return NULL;
+  if (extract_move(move, move_tuple_obj))    return NULL;
+
+  do_move(board, player, move);
+  PyObject* board_tuple = store_board_to_pytuple(board, 26);
+
+  // This is shaky
+  Py_DECREF(board);
+
+  PyObject *result = Py_BuildValue("O", board_tuple);
+  Py_DECREF(board_tuple);
+  
+  return result;
+}
+
+static PyObject*
+quack_calc_moves(PyObject *self, PyObject *args) {
+  int board[26];
+  int player;
+  int face_value;
+  
+  PyObject* board_tuple_obj;
+  
+  if (! PyArg_ParseTuple(args, "O!ii",
+			 &PyTuple_Type, &board_tuple_obj,
+			 &player,
+			 &face_value))
+    return NULL;
+
+  if (extract_board(board, board_tuple_obj)) return NULL;
+
+  board_list boards = calc_moves(board, player, face_value);
+  PyObject* boards_list = PyList_New(boards.size);
+  
+  for (int i = 0; i < boards.size; i++) {
+    if (PyList_SetItem(boards_list, i, boards.list[i])) {
+      printf("list insertion failed at index %i\n",i);
+      abort();
+    }
+  }
+  
+  PyObject *result = Py_BuildValue("O", boards_list);
+  Py_DECREF(boards_list);
+  
+  return result;
+}
+
+static PyObject*
+quack_board_features_quack_fat(PyObject *self, PyObject *args) {
+  int board[26];
+  int player;
+  
+  PyObject* board_tuple_obj;
+  
+  if (! PyArg_ParseTuple(args, "O!i",
+			 &PyTuple_Type, &board_tuple_obj,
+			 &player))
+    return NULL;
+
+  if (extract_board(board, board_tuple_obj)) return NULL;
+
+  int* new_board = board_features_quack_fat(board, player);
+  PyObject* board_tuple = store_board_to_pytuple(new_board, 30);
+  free(new_board);
+
+  PyObject *result = Py_BuildValue("O", board_tuple);
+  Py_DECREF(board_tuple);
+  
+  return result;
+}
+
+
+static PyMethodDef quack_methods[] = {
+  {
+    "is_move_valid", quack_is_move_valid, METH_VARARGS,
+    "Evaluates the validity of the proposed move."
+  },
+  {
+    "idxs_with_checkers_of_player", quack_idxs_with_checkers_of_player, METH_VARARGS,
+    "Returns a list of indexes with checkers of the specified player"
+  },
+  {
+    "do_move", quack_do_move, METH_VARARGS,
+    "Returns the board after doing the specified move"
+  },
+  {
+    "calc_moves", quack_calc_moves, METH_VARARGS,
+    "Calculates all legal moves from board with specified face value"
+  },
+  {
+    "board_features_quack_fat", quack_board_features_quack_fat, METH_VARARGS,
+    "Transforms a board to the quack-fat board representation"
+  },
+  {NULL, NULL, 0, NULL}
+};
+
+static struct PyModuleDef quack_definition = {
+  PyModuleDef_HEAD_INIT,
+  "quack",
+  "A Python module that provides various useful Backgammon-related functions.",
+  -1,
+  quack_methods
+};
+
+PyMODINIT_FUNC PyInit_quack(void) {
+  PyObject* module;
+
+  module = PyModule_Create(&quack_definition);
+  if (module == NULL)
+    return NULL;
+    
+  QuackError = PyErr_NewException("quack.error", NULL, NULL);
+  Py_INCREF(QuackError);
+  PyModule_AddObject(module, "error", QuackError);
+  
+  return module;
+}
--- a/quack/setup.py
+++ b/quack/setup.py
@ -0,0 +1,9 @@
+from distutils.core import setup, Extension
+
+quack = Extension('quack',
+                  sources = ['quack.c'])
+
+setup (name = 'quack',
+       version = '0.1',
+       description = 'Quack Backgammon Tools',
+       ext_modules = [quack])
--- a/report_docs.txt
+++ b/report_docs.txt
@ -0,0 +1,28 @@
+<christoffer> Alexander og jeg skrev noget af vores bachelorprojekt om til C her i fredags.
+<christoffer> Man skal virkelig passe på sine hukommelsesallokeringer.
+<Jmaa> Ja, helt klart.
+<christoffer> Jeg fandt et memory leak, der lækkede 100 MiB hukommelse i sekundet.
+<Jmaa> Hvilken del blev C-ificeret?
+<Jmaa> Damned
+<christoffer> Årsagen var at vi gav et objekt med tilbage til Python uden at dekrementere dets ref-count, så fortolkeren stadig troede at nogen havde brug for det.
+<christoffer> Den del af spillogikken, der tjekker om træk er gyldige.
+<christoffer> Det bliver kaldt ret mange tusinde gange pr. spil, så vi tænkte at der måske kunne være lidt optimering at hente i at omskrive det til C.
+<Jmaa> Ok, så I har ikke selv brugt alloc og free. Det er alligevel noget.
+<christoffer> Metoden selv blev 7 gange hurtigere!
+<Jmaa> Wow!
+<christoffer> Jo. Det endte vi også med at gøre.
+<christoffer> Vi havde brug for lister af variabel størrelse. Det endte med en struct med et "size" felt og et "list" felt.
+<Jmaa> Inkluderer det speedup, frem og tilbagen mellem C og python?
+<christoffer> Det burde det gøre, ja!
+<Jmaa> Gjorde det nogen stor effekt for hvor hurtigt I kan evaluere?
+<christoffer> Jeg tror ikke at der er særligt meget "frem og tilbage"-stads. Det ser ud til at det kode man skriver bliver kastet ret direkte ind i fortolkeren.
+<christoffer> Det gjorde en stor forskel for når vi laver 1-ply.
+<christoffer> "ply" er hvor mange træk man kigger fremad.
+<christoffer> Så kun at kigge på det umiddelbart næste træk er 0-ply, hvilket er det vi har gjort indtil nu
+<christoffer> 1-ply var for langsomt. Det tog ca. 6-7 sekunder at evaluere ét træk.
+<christoffer> Alexander lavede lidt omskrivninger, så TensorFlow udregnede det hurtigere og fik det ned på ca. 3-4 sekunder *pr. spil*.
+<christoffer> Så skrev vi noget af det om til C, og nu er vi så på ca. 2 sekunder pr. spil med 1-ply, hvilket er ret vildt.
+<christoffer> Det er så godt at Python-fortolkeren kan udvides med C!
+<christoffer> caspervk, kan I optimere jeres bachelorprojekt med et par C-moduler?
+<Jmaa> Det er en hel lille sektion til rapporten det der.
+<christoffer> Yeah. Kopierer bare det her verbatim ind.
--- a/requirements.txt
+++ b/requirements.txt
@ -1,14 +1,24 @@
 absl-py==0.1.10
 astor==0.6.2
 bleach==1.5.0
+cycler==0.10.0
 gast==0.2.0
 grpcio==1.10.0
 html5lib==0.9999999
+kiwisolver==1.0.1
 Markdown==2.6.11
+matplotlib==2.2.2
 numpy==1.14.1
+pandas==0.22.0
 protobuf==3.5.1
+pubeval==0.3
+pyparsing==2.2.0
+python-dateutil==2.7.2
+pytz==2018.3
 six==1.11.0
-tensorboard==1.6.0
-tensorflow==1.6.0
+tensorboard==1.8.0
+tensorflow==1.8.0
 termcolor==1.1.0
 Werkzeug==0.14.1
+pygame==1.9.3
+
--- a/tensorflow_impl_tests/eager_main.py
+++ b/tensorflow_impl_tests/eager_main.py
@ -0,0 +1,94 @@
+import time
+import numpy as np
+import tensorflow as tf
+from board import Board
+import tensorflow.contrib.eager as tfe
+
+
+tf.enable_eager_execution()
+xavier_init = tf.contrib.layers.xavier_initializer()
+
+
+
+opt = tf.train.MomentumOptimizer(learning_rate=0.1, momentum=1)
+
+output_size = 1
+hidden_size = 40
+input_size = 30
+
+
+model = tf.keras.Sequential([
+    tf.keras.layers.Dense(40, activation="sigmoid", kernel_initializer=tf.constant_initializer(-2), input_shape=(1,input_size)),
+    tf.keras.layers.Dense(1, activation="sigmoid", kernel_initializer=tf.constant_initializer(0.2))
+])
+
+
+# tfe.Saver(model.variables).restore(tf.train.latest_checkpoint("./"))
+
+input = [0, 2, 0, 0, 0, 0, -5, 0, -3, 0, 0, 0, 5, -5, 0, 0, 0, 3, 0, 5, 0, 0, 0, 0, -2, 0]
+
+
+
+all_input = np.array([Board.board_features_quack_fat(input, 1) for _ in range(20)])
+
+
+single_in = Board.board_features_quack_fat(input, 1)
+
+
+start = time.time()
+
+all_predictions = model.predict_on_batch(all_input)
+
+
+learning_rate = 0.1
+
+with tf.GradientTape() as tape:
+    value = model(single_in)
+
+
+print("Before:", value)
+
+grads = tape.gradient(value, model.variables)
+print("/"*40,"model_variables","/"*40)
+print(model.variables)
+print("/"*40,"grads","/"*40)
+print(grads)
+
+difference_in_values = tf.reshape(tf.subtract(0.9, value, name='difference_in_values'), [])
+
+for grad, train_var in zip(grads, model.variables):
+    backprop_calc = 0.1 * difference_in_values * grad
+    train_var.assign_add(backprop_calc)
+
+value = model(single_in)
+print("/"*40,"model_variables","/"*40)
+print(model.variables)
+print("After:", value)
+
+
+# # grads = [0.1*val-np.random.uniform(-1,1)+grad for grad, trainable_var in zip(grads, model.variables)]
+#
+# # print(model.variables[0][0])
+# weights_before = model.weights[0]
+#
+# start = time.time()
+# #[trainable_var.assign_add(0.1*val-0.3+grad) for grad, trainable_var in zip(grads, model.variables)]
+#
+# start = time.time()
+# for gradient, trainable_var in zip(grads, model.variables):
+#     backprop_calc = 0.1 * (0.9 - val) * gradient
+#     trainable_var.assign_add(backprop_calc)
+#
+# # opt.apply_gradients(zip(grads, model.variables))
+#
+# print(time.time() - start)
+#
+# print(model(single_in))
+#
+# vals = model.predict_on_batch(all_input)
+# vals = list(vals)
+# vals[3] = 4
+# print(vals)
+# print(np.argmax(np.array(vals)))
+
+# tfe.Saver(model.variables).save("./tmp_ckpt")
--- a/tensorflow_impl_tests/normal_main.py
+++ b/tensorflow_impl_tests/normal_main.py
@ -0,0 +1,67 @@
+import tensorflow as tf
+import numpy as np
+import time
+
+class Everything:
+
+    def __init__(self):
+
+        self.output_size = 1
+        self.hidden_size = 40
+        self.input_size = 30
+
+        self.input = tf.placeholder('float', [1, self.input_size])
+
+        xavier_init = tf.contrib.layers.xavier_initializer()
+
+
+        W_1 = tf.get_variable("w_1", (self.input_size, self.hidden_size),
+                              initializer=tf.constant_initializer(-2))
+        W_2 = tf.get_variable("w_2", (self.hidden_size, self.output_size),
+                              initializer=tf.constant_initializer(0.2))
+
+        b_1 = tf.get_variable("b_1", (self.hidden_size,),
+                              initializer=tf.zeros_initializer)
+        b_2 = tf.get_variable("b_2", (self.output_size,),
+                              initializer=tf.zeros_initializer)
+
+        value_after_input = tf.sigmoid(tf.matmul(self.input, W_1) + b_1, name='hidden_layer')
+
+        self.value = tf.sigmoid(tf.matmul(value_after_input, W_2) + b_2, name='output_layer')
+
+        apply_gradients = []
+
+
+        trainable_vars = tf.trainable_variables()
+        gradients = tf.gradients(self.value, trainable_vars)
+
+        difference_in_values = tf.reshape(tf.subtract(0.9, self.value, name='difference_in_values'), [])
+
+        with tf.variable_scope('apply_gradients'):
+            for gradient, trainable_var in zip(gradients, trainable_vars):
+                backprop_calc = 0.1 * difference_in_values * gradient
+                grad_apply = trainable_var.assign_add(backprop_calc)
+                apply_gradients.append(grad_apply)
+
+
+        self.training_op = tf.group(*apply_gradients, name='training_op')
+
+
+
+    def eval(self):
+        input = np.array([0, 2, 0, 0, 0, 0, -5, 0, -3, 0, 0, 0, 5, -5, 0, 0, 0, 3, 0, 5, 0, 0, 0, 0, -2, 0, 0, 0, 1, 0])
+        start = time.time()
+        sess = tf.Session()
+        sess.run(tf.global_variables_initializer())
+        for i in range(20):
+            val = sess.run(self.value, feed_dict={self.input: input.reshape(1,-1)})
+        print(time.time() - start)
+        print(val)
+        sess.run(self.training_op, feed_dict={self.input: input.reshape(1,-1)})
+        val = sess.run(self.value, feed_dict={self.input: input.reshape(1, -1)})
+        print(val)
+
+everything = Everything()
+everything.eval()
+
+
--- a/test.py
+++ b/test.py
@ -141,6 +141,56 @@ class TestIsMoveValid(unittest.TestCase):
    # TODO: More tests for bearing off are needed


+    def test_bear_off_non_backmost(self):
+        board = ( 0,
+                  0, 0, 0, 0, 0, 0,
+                  0, 0, 0, 0, 0, 0,
+                  0, 0, 0, 0, 0, 0,
+                  0, 0, 0, 0, 1, 1,
+                  0 )
+        self.assertEqual(Board.is_move_valid(board, 1, 2, (23, 25)), True)
+        self.assertEqual(Board.is_move_valid(board, 1, 1, (24, 25)), True)
+        self.assertEqual(Board.is_move_valid(board, 1, 2, (24, 26)), False)
+
+    def test_bear_off_quadrant_limits_white(self):
+        board = ( 0,
+                  0, 0, 0, 0, 0, 0,
+                  0, 0, 0, 0, 0, 0,
+                  0, 0, 0, 0, 0, 1,
+                  1, 1, 1, 1, 1, 1,
+                  0 )
+        self.assertEqual(Board.is_move_valid(board, 1, 2, (23, 25)), False)
+        self.assertEqual(Board.is_move_valid(board, 1, 1, (24, 25)), False)
+
+    def test_bear_off_quadrant_limits_black(self):
+        board = ( 0,
+                  -1, -1, -1, -1, -1, -1,
+                  -1, 0, 0, 0, 0, 0,
+                  0, 0, 0, 0, 0, 0,
+                  0, 0, 0, 0, 0, 0,
+                  0 )
+        self.assertEqual(Board.is_move_valid(board, -1, 2, (2, 0)), False)
+        self.assertEqual(Board.is_move_valid(board, -1, 1, (1, 0)), False)
+
+    def test_bear_off_quadrant_limits_white_2(self):
+        board = ( 0,
+                  0, 0, 0, 0, 0, 0,
+                  0, 0, 0, 0, 0, 0,
+                  0, 0, 0, 0, 0, 0,
+                  1, 0, 0, 0, 0, 1,
+                  0 )
+        self.assertEqual(Board.is_move_valid(board, 1, 1, (24, 25)), True)
+
+    def test_bear_off_quadrant_limits_black_2(self):
+        board = ( 0,
+                  -1, 0, 0, 0, 0, -1,
+                  0, 0, 0, 0, 0, 0,
+                  0, 0, 0, 0, 0, 0,
+                  0, 0, 0, 0, 0, 0,
+                  0 )
+        self.assertEqual(Board.is_move_valid(board, -1, 1, (1, 0)), True)
+        
+        
 class TestNumOfChecker(unittest.TestCase):
    def test_simple_1(self):
        board = ( 0,
@ -614,5 +664,328 @@ class TestBoardFlip(unittest.TestCase):

        self.assertEqual(Board.flip(Board.flip(board)), board)

+    def test_tesauro_initial(self):
+        board = Board.initial_state
+
+        expected = (1,1,0,0,
+                    0,0,0,0,
+                    0,0,0,0,
+                    0,0,0,0,
+                    0,0,0,0,
+                    0,0,0,0,
+
+                    0,0,0,0,
+                    0,0,0,0,
+                    0,0,0,0,
+                    0,0,0,0,
+                    0,0,0,0,
+                    1,1,1,1,
+                    
+                    0,0,0,0,
+                    0,0,0,0,
+                    0,0,0,0,
+                    0,0,0,0,
+                    1,1,1,0,
+                    0,0,0,0,
+
+                    1,1,1,1,
+                    0,0,0,0,
+                    0,0,0,0,
+                    0,0,0,0,
+                    0,0,0,0,
+                    0,0,0,0,
+
+                    0.0,
+                    0,
+
+                    0,0,0,0,
+                    0,0,0,0,
+                    0,0,0,0,
+                    0,0,0,0,
+                    0,0,0,0,
+                    1,1,1,1,
+
+                    0,0,0,0,
+                    1,1,1,0,
+                    0,0,0,0,
+                    0,0,0,0,
+                    0,0,0,0,
+                    0,0,0,0,
+
+                    1,1,1,1,
+                    0,0,0,0,
+                    0,0,0,0,
+                    0,0,0,0,
+                    0,0,0,0,
+                    0,0,0,0,
+
+                    0,0,0,0,
+                    0,0,0,0,
+                    0,0,0,0,
+                    0,0,0,0,
+                    0,0,0,0,
+                    1,1,0,0,
+
+                    0.0,
+                    0,
+
+                    1,
+                    0
+        )
+
+        import numpy as np
+        self.assertTrue((Board.board_features_tesauro(board, 1) ==
+                         np.array(expected).reshape(1, 198)).all())
+
+    def test_pubeval_features(self):
+        board = Board.initial_state
+
+        expected = (0,
+                    2, 0, 0, 0, 0, -5,
+                    0, -3, 0, 0, 0, 5,
+                    -5, 0, 0, 0, 3, 0,
+                    5, 0, 0, 0, 0, -2,
+                    0,
+                    0, 0)
+
+        import numpy as np
+        self.assertTrue((Board.board_features_to_pubeval(board, 1) ==
+                         np.array(expected).reshape(1, 28)).all())
+        self.assertTrue((Board.board_features_to_pubeval(board, -1) ==
+                         np.array(expected).reshape(1, 28)).all())
+
+    def test_tesauro_bars(self):
+        board = list(Board.initial_state)
+        board[1] = 0
+        board[0] = 2
+        board[24] = 0
+        board[25] = -2
+
+        board = tuple(board)
+        
+        expected = (0,0,0,0,
+                    0,0,0,0,
+                    0,0,0,0,
+                    0,0,0,0,
+                    0,0,0,0,
+                    0,0,0,0,
+
+                    0,0,0,0,
+                    0,0,0,0,
+                    0,0,0,0,
+                    0,0,0,0,
+                    0,0,0,0,
+                    1,1,1,1,
+                    
+                    0,0,0,0,
+                    0,0,0,0,
+                    0,0,0,0,
+                    0,0,0,0,
+                    1,1,1,0,
+                    0,0,0,0,
+
+                    1,1,1,1,
+                    0,0,0,0,
+                    0,0,0,0,
+                    0,0,0,0,
+                    0,0,0,0,
+                    0,0,0,0,
+
+                    1.0,
+                    0,
+
+                    0,0,0,0,
+                    0,0,0,0,
+                    0,0,0,0,
+                    0,0,0,0,
+                    0,0,0,0,
+                    1,1,1,1,
+
+                    0,0,0,0,
+                    1,1,1,0,
+                    0,0,0,0,
+                    0,0,0,0,
+                    0,0,0,0,
+                    0,0,0,0,
+
+                    1,1,1,1,
+                    0,0,0,0,
+                    0,0,0,0,
+                    0,0,0,0,
+                    0,0,0,0,
+                    0,0,0,0,
+
+                    0,0,0,0,
+                    0,0,0,0,
+                    0,0,0,0,
+                    0,0,0,0,
+                    0,0,0,0,
+                    0,0,0,0,
+
+                    1.0,
+                    0,
+
+                    1,
+                    0
+        )
+
+        import numpy as np
+        self.assertTrue((Board.board_features_tesauro(board, 1) ==
+                         np.array(expected).reshape(1, 198)).all())
+
+
+    def test_tesauro_home(self):
+        board = list(Board.initial_state)
+
+        board[1] = 0
+        board[24] = 0
+
+        board = tuple(board)
+        
+        expected = (0,0,0,0,
+                    0,0,0,0,
+                    0,0,0,0,
+                    0,0,0,0,
+                    0,0,0,0,
+                    0,0,0,0,
+
+                    0,0,0,0,
+                    0,0,0,0,
+                    0,0,0,0,
+                    0,0,0,0,
+                    0,0,0,0,
+                    1,1,1,1,
+                    
+                    0,0,0,0,
+                    0,0,0,0,
+                    0,0,0,0,
+                    0,0,0,0,
+                    1,1,1,0,
+                    0,0,0,0,
+
+                    1,1,1,1,
+                    0,0,0,0,
+                    0,0,0,0,
+                    0,0,0,0,
+                    0,0,0,0,
+                    0,0,0,0,
+
+                    0.0,
+                    2,
+
+                    0,0,0,0,
+                    0,0,0,0,
+                    0,0,0,0,
+                    0,0,0,0,
+                    0,0,0,0,
+                    1,1,1,1,
+
+                    0,0,0,0,
+                    1,1,1,0,
+                    0,0,0,0,
+                    0,0,0,0,
+                    0,0,0,0,
+                    0,0,0,0,
+
+                    1,1,1,1,
+                    0,0,0,0,
+                    0,0,0,0,
+                    0,0,0,0,
+                    0,0,0,0,
+                    0,0,0,0,
+
+                    0,0,0,0,
+                    0,0,0,0,
+                    0,0,0,0,
+                    0,0,0,0,
+                    0,0,0,0,
+                    0,0,0,0,
+
+                    0.0,
+                    2,
+
+                    1,
+                    0
+        )
+
+        import numpy as np
+        self.assertTrue((Board.board_features_tesauro(board, 1) ==
+                         np.array(expected).reshape(1, 198)).all())
+
+
+    def test_tesauro_black_player(self):
+        board = Board.initial_state
+
+        expected = (1,1,0,0,
+                    0,0,0,0,
+                    0,0,0,0,
+                    0,0,0,0,
+                    0,0,0,0,
+                    0,0,0,0,
+
+                    0,0,0,0,
+                    0,0,0,0,
+                    0,0,0,0,
+                    0,0,0,0,
+                    0,0,0,0,
+                    1,1,1,1,
+                    
+                    0,0,0,0,
+                    0,0,0,0,
+                    0,0,0,0,
+                    0,0,0,0,
+                    1,1,1,0,
+                    0,0,0,0,
+
+                    1,1,1,1,
+                    0,0,0,0,
+                    0,0,0,0,
+                    0,0,0,0,
+                    0,0,0,0,
+                    0,0,0,0,
+
+                    0.0,
+                    0,
+
+                    0,0,0,0,
+                    0,0,0,0,
+                    0,0,0,0,
+                    0,0,0,0,
+                    0,0,0,0,
+                    1,1,1,1,
+
+                    0,0,0,0,
+                    1,1,1,0,
+                    0,0,0,0,
+                    0,0,0,0,
+                    0,0,0,0,
+                    0,0,0,0,
+
+                    1,1,1,1,
+                    0,0,0,0,
+                    0,0,0,0,
+                    0,0,0,0,
+                    0,0,0,0,
+                    0,0,0,0,
+
+                    0,0,0,0,
+                    0,0,0,0,
+                    0,0,0,0,
+                    0,0,0,0,
+                    0,0,0,0,
+                    1,1,0,0,
+
+                    0.0,
+                    0,
+
+                    0,
+                    1
+        )
+
+        import numpy as np
+        self.assertTrue((Board.board_features_tesauro(board, -1) ==
+                         np.array(expected).reshape(1, 198)).all())
+
+        
 if __name__ == '__main__':
    unittest.main()
Author	SHA1	Message	Date
Alexander Munch-Hansen	ea4efc5a2b	Updated server code.	2018-06-07 21:36:06 +02:00
Christoffer Müller Madsen	26c0b469eb	restore restore_model	2018-05-22 20:49:10 +02:00
Alexander Munch-Hansen	f170bad9b1	tesauro fat and diffs in values	2018-05-22 15:39:14 +02:00
Christoffer Müller Madsen	6e061171da	rm TODO	2018-05-22 15:38:04 +02:00
Christoffer Müller Madsen	40c228ef01	pubeval tests	2018-05-22 15:36:23 +02:00
Christoffer Müller Madsen	c2c6c89e9f	Merge branch 'experimentation' into 'master' Experimentation See merge request Pownie/backgammon!8	2018-05-22 13:16:10 +00:00
Christoffer Müller Madsen	b7708b3675	train-evaluate-save	2018-05-22 15:15:36 +02:00
Christoffer Müller Madsen	bad870c27a	update 0-ply-tests	2018-05-22 15:15:15 +02:00
Christoffer Müller Madsen	653d6e30a8	add missing comma	2018-05-22 15:12:47 +02:00
Christoffer Müller Madsen	7e51b44e33	Merge branch 'experimentation' into 'master' tesauro fat and diffs in values See merge request Pownie/backgammon!7	2018-05-22 13:12:10 +00:00
Christoffer Müller Madsen	1fd6c35baa	Merge branch 'master' into 'experimentation' # Conflicts: # main.py	2018-05-22 13:11:43 +00:00
Alexander Munch-Hansen	d426c1c3b5	tesauro fat and diffs in values	2018-05-22 15:10:41 +02:00
Christoffer Müller Madsen	5ab144cffc	add git commit status to all logs	2018-05-22 14:44:13 +02:00
Christoffer Müller Madsen	cef8e54709	Merge branch 'master' of gitfub.space:Pownie/backgammon	2018-05-22 14:37:46 +02:00
Christoffer Müller Madsen	2efbc446f2	log git commit status in evaluation logs	2018-05-22 14:37:27 +02:00
Christoffer Müller Madsen	c54f7aca24	Merge branch 'experimentation' into 'master' Experimentation See merge request Pownie/backgammon!6	2018-05-22 12:36:37 +00:00
Alexander Munch-Hansen	c31bc39780	More server	2018-05-22 00:26:32 +02:00
Alexander Munch-Hansen	6133cb439f	Merge remote-tracking branch 'origin/experimentation' into experimentation	2018-05-20 20:15:57 +02:00
Alexander Munch-Hansen	5acd79b6da	Slight modification to move calculation	2018-05-20 19:43:28 +02:00
=	b11e783b30	add 0-ply-tests	2018-05-20 18:50:28 +02:00
Christoffer Müller Madsen	f834b10e02	remove unnecessary print	2018-05-20 16:52:05 +02:00
Christoffer Müller Madsen	72f01a2a2d	remove dependency on yaml	2018-05-20 16:03:58 +02:00
Alexander Munch-Hansen	d14e6c5994	Everything might work, except for quad, that might be bugged.	2018-05-20 00:38:13 +02:00
Alexander Munch-Hansen	a266293ecd	Stuff is happening, moving is better!	2018-05-19 22:01:55 +02:00
Alexander Munch-Hansen	e9a46c79df	server and stuff	2018-05-19 14:12:13 +02:00
Alexander Munch-Hansen	816cdfae00	fix and clean	2018-05-18 14:55:10 +02:00
Christoffer Müller Madsen	ff9664eb38	Merge branch 'eager_eval' into 'master' Eager eval See merge request Pownie/backgammon!5	2018-05-18 12:06:12 +00:00
Alexander Munch-Hansen	3e379b40c4	Accidentally added a '5' in the middle of a variable.	2018-05-16 00:20:54 +02:00
Alexander Munch-Hansen	90fad334b9	More optimizations.	2018-05-15 23:37:35 +02:00
Alexander Munch-Hansen	a77c13a0a4	1-ply runs even faster.	2018-05-15 19:29:27 +02:00
Alexander Munch-Hansen	260c32d909	oiuhhiu	2018-05-15 18:16:44 +02:00
Alexander Munch-Hansen	00974b0f11	Added '--play' flag, so you can now play against the ai.	2018-05-14 13:07:48 +02:00
Alexander Munch-Hansen	2c02689577	Merge remote-tracking branch 'origin/eager_eval' into eager_eval	2018-05-13 23:55:02 +02:00
Alexander Munch-Hansen	926a331df0	Some flags from main.py is gone, rolls now allow a face_value of 0 yet again and it is possible to play against the ai. There is no flag for this yet, so this has to be added.	2018-05-13 23:54:13 +02:00
Christoffer Müller Madsen	d932663519	add explanation of ply speedup	2018-05-13 22:26:24 +02:00
Christoffer Müller Madsen	2312c9cb2a	Merge branch 'eager_eval' of gitfub.space:Pownie/backgammon into eager_eval	2018-05-12 15:19:12 +02:00
Christoffer Müller Madsen	9f1bd56c0a	fix bear_off bug; addtional tests and additional fixes	2018-05-12 15:18:52 +02:00
Alexander Munch-Hansen	ba4ef86bb5	Board rep can now be inferred from file after being given once. We can also evaluate multiple times by using the flag "--repeat-eval". The flag defaults to 1, if not provided.	2018-05-12 12:14:47 +02:00
Christoffer Müller Madsen	c3f5e909d6	flip is back	2018-05-11 21:47:48 +02:00
Christoffer Müller Madsen	1aa9cf705f	quack without leaks	2018-05-11 21:24:10 +02:00
Christoffer Müller Madsen	383dd7aa4b	code works again; quack gave ~3 times improvement for calc_moves	2018-05-11 20:13:43 +02:00
Christoffer Müller Madsen	93188fe06b	more quack for board	2018-05-11 20:07:27 +02:00
Christoffer Müller Madsen	ffbc98e1a2	quack kind of works	2018-05-11 19:00:39 +02:00
Christoffer Müller Madsen	03e61a59cf	quack	2018-05-11 17:29:22 +02:00
Alexander Munch-Hansen	93224864a4	More comments, backprop have been somewhat tested in the eager_main.py and normal_main.py.	2018-05-11 13:35:01 +02:00
Alexander Munch-Hansen	504308a9af	Yet another input argument, "--ply", 0 for no look-ahead, 1 for a single look-ahead.	2018-05-10 23:22:41 +02:00
Alexander Munch-Hansen	3b57c10b5a	Saves calling tf.reduce_mean on all values once.	2018-05-10 22:57:27 +02:00
Christoffer Müller Madsen	4fa10861bb	update TF dependency to 1.8.0	2018-05-10 19:27:51 +02:00
Alexander Munch-Hansen	6131d5b5f4	Added comments for Christoffer!	2018-05-10 19:25:28 +02:00
Alexander Munch-Hansen	1aedc23de1	1-ply now works again.	2018-05-10 19:13:18 +02:00
Alexander Munch-Hansen	2d84cd5a0b	1-ply now works again.	2018-05-10 19:06:53 +02:00
Alexander Munch-Hansen	396d5b036d	All values for boards and all rolls can now be calculated	2018-05-10 18:41:21 +02:00
Alexander Munch-Hansen	4efb229d34	Added a lot of comments	2018-05-10 15:28:33 +02:00
Alexander Munch-Hansen	f2a67ca92e	All board reps should now work as input.	2018-05-10 10:49:25 +02:00
Alexander Munch-Hansen	9cfdd7e2b2	Added a verbosity flag, --verbose, which allows for printing of variables and such.	2018-05-10 10:39:22 +02:00
Alexander Munch-Hansen	6429e0732c	We should now be able to both train and eval as per usual. I've added a file "global_step", which works as the new global_step counter, so we can use it for exp_decay.	2018-05-09 23:15:35 +02:00
Alexander Munch-Hansen	cb7e7b519c	Getting closer to functionality. We're capable of evaluating moves and a rework of global_step has begun, such that we now use episode_count as a way of calculating exp_decay, which have been implemented as a function.	2018-05-09 22:22:12 +02:00
Alexander Munch-Hansen	9a2d87516e	Ongoing rewrite of network to use an eager model. We're now capable of evaluating a list of states with network.py. We can also save and restore models.	2018-05-09 00:33:05 +02:00
Alexander Munch-Hansen	7b308be4e2	Different implementations of different speed	2018-05-07 22:24:47 +02:00
Alexander Munch-Hansen	ac6660e05b	Added board-rep as cli argument, to state which input-board-rep to use. Also fixed weird nesting of difference_in_values.	2018-05-06 20:52:35 +02:00
Alexander Munch-Hansen	1f8485f54e	No longer use n_ply, shit's too slow man. Added extra logging, now logs the average difference in values between trainings. Also fixed bug with the length of quack-norm. Also added cli argument; use-baseline, if set, the baseline-model will be used.	2018-05-06 20:41:07 +02:00
Alexander Munch-Hansen	1db469709a	make_move now calls n_ply to search deeper and potentially give better moves. It's hella fucking slow.	2018-05-02 01:06:23 +02:00
Alexander Munch-Hansen	695a3d43db	Fixed n_ply and actually added a comma in main.py. clap Christoffer	2018-05-01 20:39:29 +02:00
Christoffer Müller Madsen	c530aa688d	flipidip	2018-05-01 13:48:42 +02:00
Alexander Munch-Hansen	3f6849048e	added network_test and some comments	2018-04-29 12:14:14 +02:00
Christoffer Müller Madsen	afa6504b05	ply again again	2018-04-26 16:49:49 +02:00
Christoffer Müller Madsen	9428a00c11	add "--force-creation" flag to force model creation	2018-04-26 11:43:19 +02:00
Pownie	48a5f6cbb6	Moved "do_ply" out of "calculate_2_ply", in an effort to be able to eventually do further plies, however some rewriting of the current "do_ply" will be needed, as described in a comment.	2018-04-26 09:42:03 +02:00
Pownie	8899c5c2d9	Fixed potential bug in regards to scores in 2-ply calculation.	2018-04-25 00:51:04 +02:00
Pownie	ea3f05846d	Merge branch 'master' of https://gitfub.space/Pownie/backgammon	2018-04-24 22:31:18 +02:00
Pownie	0509a51fd3	Added baseline model for testing	2018-04-24 22:30:58 +02:00
Christoffer Müller Madsen	33a4b0db3c	disallow using model "baseline"	2018-04-24 21:16:54 +02:00
Pownie	349ad718f1	Moved gen_21_rolls into the 2-ply method, so it can be correctly used like the good helper method that it is	2018-04-23 00:45:31 +02:00
Pownie	e5cc54d3e0	Added a normalised version of quack	2018-04-23 00:35:25 +02:00
Pownie	160f5bd737	added some comments and removed some old code	2018-04-22 19:13:46 +02:00
Pownie	77d82f6883	Added code for 2-ply look-ahead	2018-04-22 15:07:19 +02:00
Christoffer Müller Madsen	1062b72bda	fix typo	2018-04-19 16:04:49 +02:00
Alexander Munch-Hansen	66589dfde3	fixed global step, now using exp decay	2018-04-19 16:01:19 +02:00
Alexander Munch-Hansen	cba0f67ae2	fixed the bug	2018-04-19 15:22:00 +02:00
Christoffer Müller Madsen	b6c52ba476	fix type error	2018-04-16 00:24:24 +02:00
Christoffer Müller Madsen	8998dca1f2	remove @Pownie 's debug print	2018-04-16 00:03:02 +02:00
Pownie	611f6cdba0	Changed alpha to learning_rate	2018-04-15 23:53:35 +02:00
Pownie	57fb1cb141	Merge branch 'master' of https://gitfub.space/Pownie/backgammon	2018-04-15 23:52:00 +02:00
Pownie	cc1e010840	Uses proper board instead of Alex' drunken mistakes	2018-04-15 23:51:28 +02:00
Christoffer Müller Madsen	f68d7a9ded	add pygame to requirements.txt	2018-04-15 22:45:37 +02:00
Pownie	f59fe27e5f	You can now move off bar	2018-04-14 23:31:33 +02:00
Pownie	7d29fc02f2	Added global step + exponential decay	2018-04-14 23:11:20 +02:00
Pownie	1d9c94896d	Red can go on bar as well now	2018-04-14 22:53:49 +02:00
Pownie	716413e2b6	bar works somewhat if black goes on there. Still can't get off it	2018-04-14 22:51:41 +02:00
Pownie	7993da0db7	Turns are now functioning	2018-04-14 18:47:38 +02:00
Alexander Munch-Hansen	7764a70799	Changed calculate_legal_states to allow for possible face_value of 0	2018-04-14 14:51:50 +02:00
Alexander Munch-Hansen	c08e7fe540	Few changes to board	2018-04-14 14:13:27 +02:00
Alexander Munch-Hansen	dec12d989e	Not fully implented board	2018-04-11 00:38:25 +02:00
Christoffer Müller Madsen	4cdd1960a0	add pandas and matplotlib to Python package requirements	2018-03-28 15:37:48 +02:00
Christoffer Müller Madsen	3bcb7c5df9	Merge branch 'rework-1' into 'master' Rework 1 See merge request Pownie/backgammon!4	2018-03-28 13:32:58 +00:00
Christoffer Müller Madsen	8764fadd6a	train-evaluate-save	2018-03-28 15:32:22 +02:00
Christoffer Müller Madsen	17f5b62e9b	proper Tesauro board representation	2018-03-28 14:36:52 +02:00
Christoffer Müller Madsen	fda2c6e08d	parametric board representation in network	2018-03-28 12:00:47 +02:00
Christoffer Müller Madsen	abce56dd40	fix typo	2018-03-27 23:13:59 +00:00
alex	95b12a6c35	Added another board_rep	2018-03-28 00:33:39 +02:00
alex	785ae6a5be	Fixed wrongful appending of current player to board rep	2018-03-28 00:16:50 +02:00
Christoffer Müller Madsen	2654006222	fix wrongful mergings	2018-03-27 13:02:36 +02:00
Christoffer Müller Madsen	28b82e8228	update dumbeval weights	2018-03-27 12:57:06 +02:00
Christoffer Müller Madsen	8822af81e6	move dumbeval code to separate directory	2018-03-27 12:23:15 +02:00
Christoffer Müller Madsen	5e5b3981fc	Merge branch 'fuck_git' into 'rework-1' Merge branch 'rework-1' into 'fuck_git' See merge request Pownie/backgammon!3	2018-03-27 10:19:50 +00:00
Christoffer Müller Madsen	d4e699bc49	Merge branch 'rework-1' into 'fuck_git' Rework 1 See merge request Pownie/backgammon!2	2018-03-27 10:16:37 +00:00
Christoffer Müller Madsen	c248ca0452	Merge branch 'fuck_git' into 'rework-1' # Conflicts: # network.py	2018-03-27 10:15:51 +00:00
Christoffer Müller Madsen	0eac5434d6	update .gitignore	2018-03-27 11:55:32 +02:00
alex	f43108c239	Training using slightly revamped version of our own board rep. Not sure if works yet.	2018-03-27 04:06:08 +02:00
alex	ab5d2aabb2	Initialized weights completely randomly for dumbeval	2018-03-27 02:41:58 +02:00
alex	006f791727	Functioning network using board representation shamelessly ripped from Tesauro	2018-03-27 02:26:15 +02:00
Christoffer Müller Madsen	9b2bbfb4d1	print variances when plotting evaluation variance benchmark	2018-03-26 17:06:12 +02:00
Christoffer Müller Madsen	4c43bf19a3	Add evaluation variance benchmark To do a benchmark for `pubeval`, run `python3 main.py --bench-eval-scores --eval-methods pubeval` Logs will be placed in directory `bench` Use `plot_bench(data_path)` in `plot.py` for plotting	2018-03-26 16:45:26 +02:00
Christoffer Müller Madsen	1f1e806306	fix errant whitespace	2018-03-26 15:55:48 +02:00
Christoffer Müller Madsen	98c9af72e7	rework network	2018-03-22 15:30:47 +01:00