File size: 3,635 Bytes
05a3e2c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
from datetime import datetime, timedelta
import pandas as pd
import numpy as np

import model_utils as mu
from statsmodels.tsa.arima.model import ARIMA

def model_run(df_all):
    """ Prediciton function that runs ARIMA model and predicts tomorrow cryptocurrency price.
    Useful for forecasting a variable using ARIMA model.
    Use historical 'prices' and get prediction.
    Give prediction output to the client.
    """
    first_day_future=pd.to_datetime(datetime.now()+timedelta(days=1))
    #----------------------------------------- DATASET MANIPULATION FOR SUPERVISED LEARNING --------------------------------------------
    reframed_lags, df_final=mu.data_transform(df_all, first_day_future)
    
    print(f'I have transformed the dataset into the frame for supervised learning')
    df=reframed_lags[['prices','price_eth','GSPC','Day','Month', 'TNX', 'Employment', 'google_trend','EURUSD']]
    date=pd.to_datetime(dict(year=reframed_lags['Year'], month=reframed_lags['Month'], day=reframed_lags['Day']))
    df_with_date=pd.concat([date,df],axis=1)
    df_with_date.columns=np.append('date',df.columns)
    df_with_date.set_index('date',inplace=True)
    df_with_date=df_with_date.dropna()
    df_past=df_with_date.iloc[:-1,:]
    df_future=df_with_date.iloc[-1:,:]
    model = ARIMA(df_past['prices'],exog=df_past.drop(columns=['prices']), order=(2,1,2))
    model_fit = model.fit()

    # Make predictions
    predictions = model_fit.forecast(steps=1,exog=df_future.drop(columns='prices'))

    #Add forecast to df_with_date
    df_with_forecast=reframed_lags.copy()
    df_with_forecast.loc[df_with_forecast.index==df_with_forecast.index[-1],'prices']=predictions[-1:].values[0]
    #----------------------------------- MODEL ACCURACY
    #Calculate accuracy after transformation!!!
    #get rid of values below 0.01 which skew the accuracy measure if in denominator

    #Rolling window accuracy measure
    if len(reframed_lags)>500:
        train_size=0.9
    elif len(reframed_lags)>200:
        train_size=0.8
    else:
        train_size=0.7
    predictions=[]
    test_labels_all=[]
    test_labels_all1=[]
    train_labels_all=[]
    data_arima=df_with_date
    window_length=int((len(data_arima)-len(data_arima)*train_size))
    for i in range(0,window_length):
        train_accuracy=data_arima.iloc[0:int(len(data_arima)*train_size)+i,:]
        
        test_accuracy=data_arima.iloc[len(train_accuracy):len(train_accuracy)+1,:]
        train_features_accuracy=train_accuracy.drop(columns='prices')
        test_features_accuracy=test_accuracy.drop(columns='prices')
        train_labels_accuracy=train_accuracy['prices']
        test_labels_accuracy=test_accuracy['prices']
        print(train_labels_accuracy)

        arima = ARIMA(train_labels_accuracy,exog=train_features_accuracy, order=(2,1,2)) #RandomForestRegressor(n_estimators= 1000)
        arima_fit=arima.fit() #train_features_accuracy, train_labels_accuracy)
        prediction_arima = arima_fit.forecast(steps=1,exog=test_features_accuracy) #predict(test_features_accuracy)
        predictions=np.append(predictions,prediction_arima)
        test_labels_all=np.append(test_labels_all,test_labels_accuracy)
        train_labels_all=np.append(train_labels_all,train_accuracy)
        test_labels_all1=np.append(test_labels_all1,test_accuracy)

    #Calculate accuracy
    from sklearn.metrics import r2_score
    accuracy=r2_score(predictions,test_labels_all)
    result_arima=pd.DataFrame({'prediction':predictions,'data':test_labels_all})
    result_arima.to_csv('result_arima_kat.csv')
    return df_with_forecast, accuracy, result_arima