From 22870b90d372eaa7b87839523091d7d6d43d9b69 Mon Sep 17 00:00:00 2001 From: Alexander Munch-Hansen Date: Tue, 6 Mar 2018 11:06:38 +0100 Subject: [PATCH] things are better now --- .gitignore | 167 +++++++++++++++++++++++++++++++++++++++++++++++++ game.py | 39 +++++------- network.py | 2 - restore_bot.py | 10 ++- 4 files changed, 186 insertions(+), 32 deletions(-) create mode 100644 .gitignore diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..0fbef80 --- /dev/null +++ b/.gitignore @@ -0,0 +1,167 @@ +### Emacs ### +# -*- mode: gitignore; -*- +*~ +\#*\# +/.emacs.desktop +/.emacs.desktop.lock +*.elc +auto-save-list +tramp +.\#* + +# Org-mode +.org-id-locations +*_archive + +# flymake-mode +*_flymake.* + +# eshell files +/eshell/history +/eshell/lastdir + +# elpa packages +/elpa/ + +# reftex files +*.rel + +# AUCTeX auto folder +/auto/ + +# cask packages +.cask/ +dist/ + +# Flycheck +flycheck_*.el + +# server auth directory +/server/ + +# projectiles files +.projectile +projectile-bookmarks.eld + +# directory configuration +.dir-locals.el + +# saveplace +places + +# url cache +url/cache/ + +# cedet +ede-projects.el + +# smex +smex-items + +# company-statistics +company-statistics-cache.el + +# anaconda-mode +anaconda-mode/ + +### Python ### +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +# C extensions +*.so + +# Distribution / packaging +.Python +build/ +develop-eggs/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +*.egg-info/ +.installed.cfg +*.egg + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.coverage +.coverage.* +.cache +.pytest_cache/ +nosetests.xml +coverage.xml +*.cover +.hypothesis/ + +# Translations +*.mo +*.pot + +# Flask stuff: +instance/ +.webassets-cache + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/_build/ + +# PyBuilder +target/ + +# Jupyter Notebook +.ipynb_checkpoints + +# pyenv +.python-version + +# celery beat schedule file +celerybeat-schedule.* + +# SageMath parsed files +*.sage.py + +# Environments +.env +.venv +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ + +# Spyder project settings +.spyderproject +.spyproject + +# Rope project settings +.ropeproject + +# mkdocs documentation +/site + +# mypy +.mypy_cache/ + + +# End of https://www.gitignore.io/api/emacs,python diff --git a/game.py b/game.py index 1b1dbbb..5ea989e 100644 --- a/game.py +++ b/game.py @@ -1,24 +1,17 @@ -import time -from human import Human from board import Board from bot import Bot -from restore_bot import Restore_bot -import tensorflow as tf +from restore_bot import RestoreBot import numpy as np -import random from cup import Cup + class Game: def __init__(self): self.board = Board.initial_state -# self.session = tf.Session() -# self.restored_network = Network(self.session) -# self.network = Network(self.session) -# self.restored_network.restore_model() self.p1 = Bot(1) - self.p2 = Restore_bot(-1) + self.p2 = RestoreBot(-1) self.cup = Cup() def roll(self): @@ -44,22 +37,22 @@ class Game: outcomes = [] for episode in range(episodes): self.board = Board.initial_state - x = self.board - while Board.outcome(self.board) == None: - x_next, v_next = self.roll_and_find_best_for_bot() - self.p1.get_network().train(x, v_next) - x = x_next + prev_board = self.board + while Board.outcome(self.board) is None: + cur_board, cur_board_value = self.roll_and_find_best_for_bot() + self.p1.get_network().train(prev_board, cur_board_value) + prev_board = cur_board self.next_round() - print("Outcome:",Board.outcome(self.board)[1]) + print("Outcome:", Board.outcome(self.board)[1]) outcomes.append(Board.outcome(self.board)[1]) - self.p1.get_network().train(x, np.array([Board.outcome(self.board)[1]]).reshape((1,1))) - print("trained an episode") + final_score = np.array([ Board.outcome(self.board)[1] ]).reshape((1, 1)) + self.p1.get_network().train(prev_board, final_score) + print("trained episode {}".format(episode)) if episode % 10 == 0: - print("Saving ....") + print("Saving...") self.p1.get_network().save_model() print(outcomes) - - + def next_round_test(self): print(self.board) print() @@ -68,12 +61,10 @@ class Game: print(self.board) print("--------------------------------") - def play(self): count = 0 - while Board.outcome(self.board) == None: + while Board.outcome(self.board) is None: count += 1 - print("Turn:",count) roll = self.roll() diff --git a/network.py b/network.py index a700e7c..d548f26 100644 --- a/network.py +++ b/network.py @@ -89,8 +89,6 @@ class Network: return val - - def save_model(self): self.saver.save(self.session, self.checkpoint_path + 'model.ckpt') diff --git a/restore_bot.py b/restore_bot.py index be199e8..b238815 100644 --- a/restore_bot.py +++ b/restore_bot.py @@ -5,7 +5,7 @@ from network import Network from board import Board import random -class Restore_bot: +class RestoreBot: def __init__(self, sym): self.cup = Cup() @@ -34,10 +34,8 @@ class Restore_bot: # print(Board.pretty(board)) legal_moves = Board.calculate_legal_states(board, sym, roll) legal_list = list(legal_moves) - lol = [] - for move in legal_list: - lol.append(self.network.eval_state(np.array(move).reshape(1,26))) - print("Found the best state, being:", np.array(lol).argmax()) - return legal_list[np.array(lol).argmax()] + move_scores = [ self.network.eval_state(np.array(move).reshape(1,26)) for move in legal_list ] + print("Found the best state, being:", np.array(move_scores).argmax()) + return legal_list[np.array(move_scores).argmax()] # return random.choice(list(legal_moves))