things are better now

This commit is contained in:
Alexander Munch-Hansen 2018-03-06 11:06:38 +01:00
parent d3fe3c918c
commit 22870b90d3
4 changed files with 186 additions and 32 deletions

167
.gitignore vendored Normal file
View File

@ -0,0 +1,167 @@
### Emacs ###
# -*- mode: gitignore; -*-
*~
\#*\#
/.emacs.desktop
/.emacs.desktop.lock
*.elc
auto-save-list
tramp
.\#*
# Org-mode
.org-id-locations
*_archive
# flymake-mode
*_flymake.*
# eshell files
/eshell/history
/eshell/lastdir
# elpa packages
/elpa/
# reftex files
*.rel
# AUCTeX auto folder
/auto/
# cask packages
.cask/
dist/
# Flycheck
flycheck_*.el
# server auth directory
/server/
# projectiles files
.projectile
projectile-bookmarks.eld
# directory configuration
.dir-locals.el
# saveplace
places
# url cache
url/cache/
# cedet
ede-projects.el
# smex
smex-items
# company-statistics
company-statistics-cache.el
# anaconda-mode
anaconda-mode/
### Python ###
# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
*$py.class
# C extensions
*.so
# Distribution / packaging
.Python
build/
develop-eggs/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
wheels/
*.egg-info/
.installed.cfg
*.egg
# PyInstaller
# Usually these files are written by a python script from a template
# before PyInstaller builds the exe, so as to inject date/other infos into it.
*.manifest
*.spec
# Installer logs
pip-log.txt
pip-delete-this-directory.txt
# Unit test / coverage reports
htmlcov/
.tox/
.coverage
.coverage.*
.cache
.pytest_cache/
nosetests.xml
coverage.xml
*.cover
.hypothesis/
# Translations
*.mo
*.pot
# Flask stuff:
instance/
.webassets-cache
# Scrapy stuff:
.scrapy
# Sphinx documentation
docs/_build/
# PyBuilder
target/
# Jupyter Notebook
.ipynb_checkpoints
# pyenv
.python-version
# celery beat schedule file
celerybeat-schedule.*
# SageMath parsed files
*.sage.py
# Environments
.env
.venv
env/
venv/
ENV/
env.bak/
venv.bak/
# Spyder project settings
.spyderproject
.spyproject
# Rope project settings
.ropeproject
# mkdocs documentation
/site
# mypy
.mypy_cache/
# End of https://www.gitignore.io/api/emacs,python

37
game.py
View File

@ -1,24 +1,17 @@
import time
from human import Human
from board import Board from board import Board
from bot import Bot from bot import Bot
from restore_bot import Restore_bot from restore_bot import RestoreBot
import tensorflow as tf
import numpy as np import numpy as np
import random
from cup import Cup from cup import Cup
class Game: class Game:
def __init__(self): def __init__(self):
self.board = Board.initial_state self.board = Board.initial_state
# self.session = tf.Session()
# self.restored_network = Network(self.session)
# self.network = Network(self.session)
# self.restored_network.restore_model()
self.p1 = Bot(1) self.p1 = Bot(1)
self.p2 = Restore_bot(-1) self.p2 = RestoreBot(-1)
self.cup = Cup() self.cup = Cup()
def roll(self): def roll(self):
@ -44,22 +37,22 @@ class Game:
outcomes = [] outcomes = []
for episode in range(episodes): for episode in range(episodes):
self.board = Board.initial_state self.board = Board.initial_state
x = self.board prev_board = self.board
while Board.outcome(self.board) == None: while Board.outcome(self.board) is None:
x_next, v_next = self.roll_and_find_best_for_bot() cur_board, cur_board_value = self.roll_and_find_best_for_bot()
self.p1.get_network().train(x, v_next) self.p1.get_network().train(prev_board, cur_board_value)
x = x_next prev_board = cur_board
self.next_round() self.next_round()
print("Outcome:",Board.outcome(self.board)[1]) print("Outcome:", Board.outcome(self.board)[1])
outcomes.append(Board.outcome(self.board)[1]) outcomes.append(Board.outcome(self.board)[1])
self.p1.get_network().train(x, np.array([Board.outcome(self.board)[1]]).reshape((1,1))) final_score = np.array([ Board.outcome(self.board)[1] ]).reshape((1, 1))
print("trained an episode") self.p1.get_network().train(prev_board, final_score)
print("trained episode {}".format(episode))
if episode % 10 == 0: if episode % 10 == 0:
print("Saving ....") print("Saving...")
self.p1.get_network().save_model() self.p1.get_network().save_model()
print(outcomes) print(outcomes)
def next_round_test(self): def next_round_test(self):
print(self.board) print(self.board)
print() print()
@ -68,12 +61,10 @@ class Game:
print(self.board) print(self.board)
print("--------------------------------") print("--------------------------------")
def play(self): def play(self):
count = 0 count = 0
while Board.outcome(self.board) == None: while Board.outcome(self.board) is None:
count += 1 count += 1
print("Turn:",count) print("Turn:",count)
roll = self.roll() roll = self.roll()

View File

@ -89,8 +89,6 @@ class Network:
return val return val
def save_model(self): def save_model(self):
self.saver.save(self.session, self.checkpoint_path + 'model.ckpt') self.saver.save(self.session, self.checkpoint_path + 'model.ckpt')

View File

@ -5,7 +5,7 @@ from network import Network
from board import Board from board import Board
import random import random
class Restore_bot: class RestoreBot:
def __init__(self, sym): def __init__(self, sym):
self.cup = Cup() self.cup = Cup()
@ -34,10 +34,8 @@ class Restore_bot:
# print(Board.pretty(board)) # print(Board.pretty(board))
legal_moves = Board.calculate_legal_states(board, sym, roll) legal_moves = Board.calculate_legal_states(board, sym, roll)
legal_list = list(legal_moves) legal_list = list(legal_moves)
lol = [] move_scores = [ self.network.eval_state(np.array(move).reshape(1,26)) for move in legal_list ]
for move in legal_list: print("Found the best state, being:", np.array(move_scores).argmax())
lol.append(self.network.eval_state(np.array(move).reshape(1,26))) return legal_list[np.array(move_scores).argmax()]
print("Found the best state, being:", np.array(lol).argmax())
return legal_list[np.array(lol).argmax()]
# return random.choice(list(legal_moves)) # return random.choice(list(legal_moves))