nightr/server/nightr/strategies/svm_strat.py

from pathlib import Path

from sklearn import svm
from sklearn.externals import joblib
import requests
import glob
import json
import numpy as np

from .strat_utils import write_json
from ..util import Context, Prediction

def write_data(time):
    write_json("https://portal.opendata.dk/api/3/action/datastore_search?resource_id=2a82a145-0195-4081-a13c-b0e587e9b89c", "parking_aarhus", time)

def load_data():

    X = []
    Y = []

    for filename in glob.glob("parking_aarhus*"):
        p_class = '2235' in filename

        with open(filename) as file:
            data = json.load(file)

        records = data['result']['records']
        frequencies = [house['vehicleCount'] / house['totalSpaces'] for house in records]
        X.append(frequencies)
        Y.append(int(p_class))

    return np.array(X), np.array(Y)

def train():
    X, Y = load_data()
    classifier = svm.SVC(gamma=0.01, probability=True)
    classifier.fit(X, Y)
    joblib.dump(classifier, "nightness_classifier.pkl")

def predict(X):
    classifier = joblib.load(str(Path(__file__).parent.joinpath("nightness_classifier.pkl")))
    prob = classifier.predict_proba(np.array(X).reshape(1, -1))
    return prob[0, 1]


def perform_svm_pred(context: Context) -> Prediction:
    """
    An SVM trained on two data points, which is capable of guessing 0.5 no matter what.
    """
    p = Prediction()
    p.weight = 0.5
    data = requests.get('https://portal.opendata.dk/api/3/action/datastore_search?resource_id=2a82a145-0195-4081-a13c-b0e587e9b89c')

    records = data.json()['result']['records']
    X = [house['vehicleCount'] / house['totalSpaces'] for house in records]
    X = [min(x, 1) for x in X]
    p.reasons.append("We only have two data points")
    p.reasons.append("Our only two data points have 11 dimensions")
    p.reasons.append("We are using a SVM. Apparently that's a poor idea.")

    p.probability = float(predict(X))
    return p