fix network.py

fix bearing off bug
2018-05-24 17:04:49 +02:00 · 2018-05-24 16:59:07 +02:00
3 changed files with 187 additions and 111 deletions
--- a/app.py
+++ b/app.py
@ -2,7 +2,6 @@ from flask import Flask, request, jsonify
 from flask_json import FlaskJSON, as_json_p
 from flask_cors import CORS
 from board import Board
 from eval import Eval
 import main
 import random
 from network import Network
@ -18,8 +17,8 @@ CORS(app)
 config = main.config.copy()
 config['model'] = "player_testings"
-config['ply'] = "0"
+config['ply'] = "1"
-config['board_representation'] = 'tesauro'
+config['board_representation'] = 'quack-fat'
 network = Network(config, config['model'])
 network.restore_model()
@ -91,16 +90,11 @@ def bot_move():
    data = request.get_json(force=True)
    board = [int(x) for x in data['board'].split(',')]
    use_pubeval = bool(data['pubeval'])
    roll = (random.randrange(1,7), random.randrange(1,7))
-
+    # print(roll)
    if use_pubeval:
        board, value = Eval.make_pubeval_move(tuple(board), 1, roll)
    else:
    board, _ = network.make_move(tuple(board), roll, 1)
-
+    # print("Boards!:",board)
    # print("Board!:",board)
    return ",".join([str(x) for x in list(board)])
--- a/board.py
+++ b/board.py
@ -1,4 +1,3 @@
 import quack
 import numpy as np
 import itertools
@ -13,9 +12,15 @@ class Board:
    @staticmethod
    def idxs_with_checkers_of_player(board, player):
-        return quack.idxs_with_checkers_of_player(board, player)
+        idxs = []
        for idx, checker_count in enumerate(board):
            if checker_count * player >= 1:
                idxs.append(idx)
        return idxs
    # TODO: Write a test for this
    # TODO: Make sure that the bars fit, 0 represents the -1 player and 25 represents the 1 player
    # index 26 is player 1 home, index 27 is player -1 home
    @staticmethod
    def board_features_to_pubeval(board, player):
@ -40,14 +45,14 @@ class Board:
    # quack-fat
    @staticmethod
    def board_features_quack_fat(board, player):
-        return np.array(quack.board_features_quack_fat(board,player)).reshape(1,30)
+        board = list(board)
-        # board = list(board)
+        positives = [x if x > 0 else 0 for x in board]
-        # positives = [x if x > 0 else 0 for x in board]
+        negatives = [x if x < 0 else 0 for x in board]
-        # negatives = [x if x < 0 else 0 for x in board]
+        board.append( 15 - sum(positives))
-        # board.append( 15 - sum(positives))
+        board.append(-15 - sum(negatives))
-        # board.append(-15 - sum(negatives))
+        board += ([1, 0] if np.sign(player) > 0 else [0, 1])
-        # board += ([1, 0] if np.sign(player) > 0 else [0, 1])
+        return np.array(board).reshape(1,30)
-        # return np.array(board).reshape(1,30)
+
    # quack-fatter
    @staticmethod
@ -97,59 +102,6 @@ class Board:
        return np.array(board_rep).reshape(1,198)
    @staticmethod
    def board_features_tesauro_fat(board, cur_player):
        def ordinary_trans(val, player):
            abs_val = val*player
            if abs_val <= 0:
                return (0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)
            elif abs_val == 1:
                return (1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)
            elif abs_val == 2:
                return (1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)
            elif abs_val == 3:
                return (1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)
            elif abs_val == 4:
                return (1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)
            elif abs_val == 5:
                return (1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)
            elif abs_val == 6:
                return (1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0)
            elif abs_val == 7:
                return (1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0)
            elif abs_val == 8:
                return (1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0)
            elif abs_val == 9:
                return (1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0)
            elif abs_val == 10:
                return (1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0)
            elif abs_val == 11:
                return (1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0)
            elif abs_val == 12:
                return (1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0)
            elif abs_val == 13:
                return (1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0)
            elif abs_val == 14:
                return (1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0)
            elif abs_val == 15:
                return (1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1)
        def bar_trans(board, player):
            if   player == 1: return (abs(board[0]/2),)
            elif player == -1: return (abs(board[25]/2),)
        board_rep = []
        for player in [1, -1]:
            for x in board[1:25]:
                board_rep += ordinary_trans(x, player)
            board_rep += bar_trans(board, player)
            board_rep += (15 - Board.num_of_checkers_for_player(board, player),)
        board_rep += ([1, 0] if cur_player == 1 else [0, 1])
        return np.array(board_rep).reshape(1, len(board_rep))
    @staticmethod
    def board_features_tesauro_wrong(board, cur_player):
        features = []
@ -180,7 +132,96 @@ class Board:
    @staticmethod
    def is_move_valid(board, player, face_value, move):
-        return quack.is_move_valid(board, player, face_value, move)
+        if face_value == 0:
            return True
        else:
            def sign(a):
                return (a > 0) - (a < 0)
            from_idx   = move[0]
            to_idx     = move[1]
            to_state   = None
            from_state = board[from_idx]
            delta      = to_idx - from_idx
            direction  = sign(delta)
            bearing_off = None
            # FIXME: Use get instead of array-like indexing
            if to_idx >= 1 and to_idx <= 24:
                to_state   = board[to_idx]
                bearing_off = False
            else:  # Bearing off
                to_state   = 0
                bearing_off = True
            # print("_"*20)
            # print("board:", board)
            # print("to_idx:", to_idx, "board[to_idx]:", board[to_idx], "to_state:", to_state)
            # print("+"*20)
            def is_forward_move():
                return direction == player
            def face_value_match_move_length():
                return abs(delta) == face_value
            def bear_in_if_checker_on_bar():
                if player == 1:
                    bar = 0
                else:
                    bar = 25
                bar_state = board[bar]
                if bar_state != 0:
                    return from_idx == bar
                else:
                    return True
            def checkers_at_from_idx():
                return sign(from_state) == player
            def no_block_at_to_idx():
                if -sign(to_state) == player:
                    return abs(to_state) == 1
                else:
                    return True
            def can_bear_off():
                checker_idxs = Board.idxs_with_checkers_of_player(board, player)
                def bearing_directly_off():
                    if player == 1:
                        return to_idx == 25
                    if player == -1:
                        return to_idx == 0
                def is_moving_backmost_checker():
                    if player == 1:
                        return all([(idx >= from_idx) for idx in checker_idxs])
                    else:
                        return all([(idx <= from_idx) for idx in checker_idxs])
                def all_checkers_in_last_quadrant():
                    if player == 1:
                        return all([(idx >= 19) for idx in checker_idxs])
                    else:
                        return all([(idx <= 6) for idx in checker_idxs])
                return all([ (bearing_directly_off() or is_moving_backmost_checker()),
                             all_checkers_in_last_quadrant() ])
            # TODO: add switch here instead of wonky ternary in all        
            # print("is_forward:",is_forward_move())
            # print("face_value:",face_value_match_move_length())
            # print("Checkes_at_from:",checkers_at_from_idx())
            # print("no_block:",no_block_at_to_idx())
            return all([ is_forward_move(),
                         face_value_match_move_length(),
                         bear_in_if_checker_on_bar(),
                         checkers_at_from_idx(),
                         no_block_at_to_idx(),
                         can_bear_off() if bearing_off else True ])
    @staticmethod
    def any_move_valid(board, player, roll):
@ -220,37 +261,40 @@ class Board:
    @staticmethod
-    def apply_moves_to_board(board, player, move):
+    def apply_moves_to_board(board, player, moves):
-        from_idx = move[0]
+        for move in moves:
-        to_idx = move[1]
+            from_idx, to_idx = move.split("/")
-        board = list(board)
+            board[int(from_idx)] -= int(player)
-        board[from_idx] -= player
+            board[int(to_idx)] += int(player)
-
+        return board
        if (to_idx < 1 or to_idx > 24):
            return
        if (board[to_idx] * player == -1):
            if (player == 1):
                board[25] -= player
            else:
                board[0] -= player
            board[to_idx] = 0
        board[to_idx] += player
        return tuple(board)
    @staticmethod
    def calculate_legal_states(board, player, roll):
        # Find all points with checkers on them belonging to the player
        # Iterate through each index and check if it's a possible move given the roll
        # TODO: make sure that it is not possible to do nothing on first part of
        #       turn and then do something with the second die
        def calc_moves(board, face_value):
-            if face_value == 0:
+            idxs_with_checkers = Board.idxs_with_checkers_of_player(board, player)
            if len(idxs_with_checkers) == 0:
                return [board]
-            return quack.calc_moves(board, player, face_value)
+            boards = [(Board.do_move(board,
                               player,
                               (idx, idx + (face_value * player)))
                       if Board.is_move_valid(board,
                                              player,
                                              face_value,
                                              (idx, idx + (face_value * player)))
                       else None)
                      for idx in idxs_with_checkers]
            # print("pls:",boards)
            board_list = list(filter(None, boards))  # Remove None-values
            # if len(board_list) == 0:
            #     return [board]
            # print("board list:", board_list)
            return board_list
        # Problem with cal_moves: Method can return empty list (should always contain at least same board).
        #               *Update*: Seems to be fixed.
@ -264,17 +308,23 @@ class Board:
        if not Board.any_move_valid(board, player, roll):
            return { board }
        dice_permutations = list(itertools.permutations(roll)) if roll[0] != roll[1] else [[roll[0]]*4]
        #print("Permuts:",dice_permutations)
        # print("Dice permuts:",dice_permutations)
        for roll in dice_permutations:
            # Calculate boards resulting from first move
            #print("initial board: ", board)
            #print("roll:", roll)
            boards = calc_moves(board, roll[0])
            #print("boards after first die: ", boards)
            for die in roll[1:]:
                # Calculate boards resulting from second move
                nested_boards = [calc_moves(board, die) for board in boards]
                #print("nested boards: ", nested_boards)
                boards = [board for boards in nested_boards for board in boards]
-
+                # What the fuck
                #for board in boards:
                #    print(board)
                #    print("type__:",type(board))
                # Add resulting unique boards to set of legal boards resulting from roll
                #print("printing boards from calculate_legal_states: ", boards)
@ -303,9 +353,9 @@ class Board:
        return """
  13  14  15  16  17  18               19  20  21  22  23  24
 +--------------------------------------------------------------------------+
-| {13}| {14}| {15}| {16}| {17}| {18}| bar -1: {25} | {19}| {20}| {21}| {22}| {23}| {24}| end  1: TODO|
+| {13}| {14}| {15}| {16}| {17}| {18}| bar -1: {25} | {19}| {20}| {21}| {22}| {23}| {24}| end -1: TODO|
 |---|---|---|---|---|---|------------|---|---|---|---|---|---|             |
-| {12}| {11}| {10}| {9}| {8}| {7}| bar  1: {0} | {6}| {5}| {4}| {3}| {2}| {1}| end -1: TODO|
+| {12}| {11}| {10}| {9}| {8}| {7}| bar  1: {0} | {6}| {5}| {4}| {3}| {2}| {1}| end  1: TODO|
 +--------------------------------------------------------------------------+
  12  11  10   9   8   7                6   5   4   3   2   1 
 """.format(*temp)
@ -313,8 +363,42 @@ class Board:
    @staticmethod
    def do_move(board, player, move):
        # Implies that move is valid; make sure to check move validity before calling do_move(...)
        return quack.do_move(board, player, move)
        def move_to_bar(board, to_idx):
            board = list(board)
            if player == 1:
                board[25] -= player
            else:
                board[0] -= player
            board[to_idx] = 0
            return board
        # TODO: Moving in from bar is handled by the representation
        # TODONE: Handle bearing off
        from_idx = move[0]
        #print("from_idx: ", from_idx)
        to_idx = move[1]
        #print("to_idx: ", to_idx)
        # pdb.set_trace()
        board = list(board) # Make mutable copy of board
        # 'Lift' checker
        board[from_idx] -= player
        # Handle bearing off
        if to_idx < 1 or to_idx > 24:
            return tuple(board)
        # Handle hitting checkers
        if board[to_idx] * player == -1:
            board = move_to_bar(board, to_idx)
        # Put down checker
        board[to_idx] += player
        return tuple(board)
    @staticmethod
    def flip(board):
--- a/network.py
+++ b/network.py
@ -20,9 +20,7 @@ class Network:
        'quack-fat'   : (30, Board.board_features_quack_fat),
        'quack'       : (28, Board.board_features_quack),
        'tesauro'     : (198, Board.board_features_tesauro),
-        'quack-norm'  : (30, Board.board_features_quack_norm),
+        'quack-norm'  : (30, Board.board_features_quack_norm)
        'tesauro-fat' : (726, Board.board_features_tesauro_fat),
        'tesauro-poop': (198, Board.board_features_tesauro_wrong)
    }
    def custom_tanh(self, x, name=None):
Author	SHA1	Message	Date
Christoffer Müller Madsen	445a8435b1	fix network.py	2018-05-24 17:04:49 +02:00
Christoffer Müller Madsen	32a2631ed0	fix bearing off bug	2018-05-24 16:59:07 +02:00