nightr/server/nightr/strategies/svm_strat.py

63 lines
1.9 KiB
Python

from pathlib import Path
from sklearn import svm
from sklearn.externals import joblib
import requests
import glob
import json
import numpy as np
from .strat_utils import write_json
from ..util import Context, Prediction
def write_data(time):
write_json("https://portal.opendata.dk/api/3/action/datastore_search?resource_id=2a82a145-0195-4081-a13c-b0e587e9b89c", "parking_aarhus", time)
def load_data():
X = []
Y = []
for filename in glob.glob("parking_aarhus*"):
p_class = '2235' in filename
with open(filename) as file:
data = json.load(file)
records = data['result']['records']
frequencies = [house['vehicleCount'] / house['totalSpaces'] for house in records]
X.append(frequencies)
Y.append(int(p_class))
return np.array(X), np.array(Y)
def train():
X, Y = load_data()
classifier = svm.SVC(gamma=0.01, probability=True)
classifier.fit(X, Y)
joblib.dump(classifier, "nightness_classifier.pkl")
def predict(X):
classifier = joblib.load(str(Path(__file__).parent.joinpath("nightness_classifier.pkl")))
prob = classifier.predict_proba(np.array(X).reshape(1, -1))
return prob[0, 1]
def perform_svm_pred(context: Context) -> Prediction:
"""
An SVM trained on two data points, which is capable of guessing 0.5 no matter what.
"""
p = Prediction()
p.weight = 0.5
data = requests.get('https://portal.opendata.dk/api/3/action/datastore_search?resource_id=2a82a145-0195-4081-a13c-b0e587e9b89c')
records = data.json()['result']['records']
X = [house['vehicleCount'] / house['totalSpaces'] for house in records]
X = [min(x, 1) for x in X]
p.reasons.append("We only have two data points")
p.reasons.append("Our only two data points have 11 dimensions")
p.reasons.append("We are using a SVM. Apparently that's a poor idea.")
p.probability = float(predict(X))
return p