# SVR-based speed estimation, performed using the MA predictions 
# The output is a file which contains speed estimations for all vehicles

import h5py
import numpy as np
from sklearn.svm import SVR
from feature_extraction import time_samples
import project_tools as pt
from importlib import reload
reload(pt)

###############################################################################
runs = 20

C = 10
epsilon = 0.1
win_len = 36
###############################################################################

folder_MA_regr = 'results/MA_regressions/'
folder_speeds = 'results/speed_estimations/'
model = 'NN_1000-200-50-10-1_reg1e-3_lossMSE'
hf = h5py.File(folder_MA_regr + 'regression_' + model + '.h5', 'r')
vehicles = ["CitroenC4Picasso","Mazda3","MercedesAMG550","NissanQashqai","OpelInsignia",
            "Peugeot3008","Peugeot307","RenaultCaptur","RenaultScenic","VWPassat"]

n_veh = len(vehicles)
speed_est_all = []          # Array of all speed estimations (all vehicles, all runs)
for veh_ind in range(n_veh):
    speed_est_all.append(np.zeros((runs, hf[f'{vehicles[veh_ind]}_speeds_test'].size)))

speed_samp = 0
err_total = 0
for run in range(runs):
    for veh_ind in range(n_veh):
        MA_train = np.array(hf[f'{vehicles[veh_ind]}_MA_train'], dtype=np.float64)[run,:].reshape(-1, time_samples)
        speeds_train = np.array(hf[f'{vehicles[veh_ind]}_speeds_train'], dtype=np.float64)
        
        # Remove no vehicle cases
        ind_vehs = np.argwhere(speeds_train > 0).flatten()
        MA_train = MA_train[ind_vehs, :]
        speeds_train = speeds_train[ind_vehs]
        
        MA_test = np.array(hf[f'{vehicles[veh_ind]}_MA_test'], dtype=np.float64)[run,:].reshape(-1, time_samples)
        speeds_test = np.array(hf[f'{vehicles[veh_ind]}_speeds_test'], dtype=np.float64)
        veh_inds_test = veh_ind*np.ones_like(speeds_test).astype('int')
        
        MA_maxima_pos_train = pt.detect_MA_maxima(MA_train)
        MA_maxima_pos_test = pt.detect_MA_maxima(MA_test)
                    
        x_train = pt.speed_est_feats(MA_train, MA_maxima_pos_train, win_len)
        x_test = pt.speed_est_feats(MA_test, MA_maxima_pos_test, win_len)
        
        x_train_shuff, y_train_shuff = pt.shuffle_data(x_train, speeds_train)
        regressor = SVR(gamma='scale', C=C, epsilon=epsilon)
        regressor.fit(x_train_shuff, y_train_shuff)
        speeds_test_est = regressor.predict(x_test).flatten()  
        speed_est_all[veh_ind][run, :] = speeds_test_est

        err_total += np.sum(np.square(speeds_test - speeds_test_est))
        speed_samp += np.size(speeds_test)

    print(f"run {run}")

err_total /= speed_samp
print(f"Total MSE: {err_total:.2f}")


# Create h5 file with speed estimations per vehicle. 
# 20 fields in h5 file: 10 for speed estimations (one per vehicle) and 10 for ground truth (gt) speeds
hf2 = h5py.File(folder_speeds + f'speed_estimations_{model}' + '.h5', 'w')
for veh_ind in range(n_veh):
    hf2.create_dataset(vehicles[veh_ind] + '_speeds_est_all', data=speed_est_all[veh_ind], compression="gzip")
    hf2.create_dataset(vehicles[veh_ind] + '_speeds_gt', data=np.array(hf[f'{vehicles[veh_ind]}_speeds_test'], dtype=np.float64), compression="gzip")
hf2.close()

hf.close()