backgammon/plot.py
Christoffer Müller Madsen 4c43bf19a3
Add evaluation variance benchmark
To do a benchmark for `pubeval`, run `python3 main.py --bench-eval-scores
--eval-methods pubeval`

Logs will be placed in directory `bench`

Use `plot_bench(data_path)` in `plot.py` for plotting
2018-03-26 16:45:26 +02:00

69 lines
2.0 KiB
Python

import os
import pandas as pd
from datetime import datetime
import csv
import datetime
import matplotlib.pyplot as plt
import matplotlib.ticker as mtick
import matplotlib.dates as mdates
train_headers = ['timestamp', 'eps_train', 'eps_trained_session', 'sum', 'mean']
eval_headers = ['timestamp', 'method', 'eps_train', 'eval_eps_used', 'sum', 'mean']
bench_headers = ['method', 'sample_count', 'i', 'time', 'sum', 'mean']
model_path = 'models'
def plot_bench(data_path):
df = pd.read_csv(data_path, sep=";",
names=bench_headers, index_col=[0,1,2])
for method_label in df.index.levels[0]:
cur_df = df.loc[method_label]
plot = df[['mean']].loc['pubeval'].unstack().T.plot.box()
plot.set_title("Evaluation variance, {}".format(method_label))
plot.set_xlabel("Sample count")
plot.set_ylabel("Mean score")
plt.show(plot.figure)
del cur_df, plot
def dataframes(model_name):
def df_timestamp_to_datetime(df):
df['timestamp'] = df['timestamp'].map(lambda t: datetime.datetime.fromtimestamp(t))
return df
log_path = os.path.join(model_path, model_name, 'logs')
raw_dfs = [ pd.read_csv(os.path.join(log_path, 'eval.log'), sep=';', names=eval_headers),
pd.read_csv(os.path.join(log_path, 'train.log'), sep=';', names=train_headers) ]
dfs = [ df_timestamp_to_datetime(df) for df in raw_dfs ]
dataframes = {
'eval': dfs[0],
'train': dfs[1]
}
return dataframes
if __name__ == '__main__':
fig, ax = plt.subplots(1, 1)
plt.ion()
plt.title('Mean over episodes')
plt.xlabel('Episodes trained')
plt.ylabel('Mean')
plt.grid(True)
#ax.set_xlim(left=0)
ax.set_ylim([-2, 2])
plt.show()
while True:
df = dataframes('a')['eval']
print(df)
x = df['eps_train']
y = df['mean']
plt.scatter(x, y, c=[[1, 0.5, 0]])
#fig.canvas.draw()
plt.pause(2)