from datetime import datetime, timedelta import pandas as pd import numpy as np import model_utils as mu from statsmodels.tsa.arima.model import ARIMA def model_run(df_all): """ Prediciton function that runs ARIMA model and predicts tomorrow cryptocurrency price. Useful for forecasting a variable using ARIMA model. Use historical 'prices' and get prediction. Give prediction output to the client. """ first_day_future=pd.to_datetime(datetime.now()+timedelta(days=1)) #----------------------------------------- DATASET MANIPULATION FOR SUPERVISED LEARNING -------------------------------------------- reframed_lags, df_final=mu.data_transform(df_all, first_day_future) print(f'I have transformed the dataset into the frame for supervised learning') df=reframed_lags[['prices','price_eth','GSPC','Day','Month', 'TNX', 'Employment', 'google_trend','EURUSD']] date=pd.to_datetime(dict(year=reframed_lags['Year'], month=reframed_lags['Month'], day=reframed_lags['Day'])) df_with_date=pd.concat([date,df],axis=1) df_with_date.columns=np.append('date',df.columns) df_with_date.set_index('date',inplace=True) df_with_date=df_with_date.dropna() df_past=df_with_date.iloc[:-1,:] df_future=df_with_date.iloc[-1:,:] model = ARIMA(df_past['prices'],exog=df_past.drop(columns=['prices']), order=(2,1,2)) model_fit = model.fit() # Make predictions predictions = model_fit.forecast(steps=1,exog=df_future.drop(columns='prices')) #Add forecast to df_with_date df_with_forecast=reframed_lags.copy() df_with_forecast.loc[df_with_forecast.index==df_with_forecast.index[-1],'prices']=predictions[-1:].values[0] #----------------------------------- MODEL ACCURACY #Calculate accuracy after transformation!!! #get rid of values below 0.01 which skew the accuracy measure if in denominator #Rolling window accuracy measure if len(reframed_lags)>500: train_size=0.9 elif len(reframed_lags)>200: train_size=0.8 else: train_size=0.7 predictions=[] test_labels_all=[] test_labels_all1=[] train_labels_all=[] data_arima=df_with_date window_length=int((len(data_arima)-len(data_arima)*train_size)) for i in range(0,window_length): train_accuracy=data_arima.iloc[0:int(len(data_arima)*train_size)+i,:] test_accuracy=data_arima.iloc[len(train_accuracy):len(train_accuracy)+1,:] train_features_accuracy=train_accuracy.drop(columns='prices') test_features_accuracy=test_accuracy.drop(columns='prices') train_labels_accuracy=train_accuracy['prices'] test_labels_accuracy=test_accuracy['prices'] print(train_labels_accuracy) arima = ARIMA(train_labels_accuracy,exog=train_features_accuracy, order=(2,1,2)) #RandomForestRegressor(n_estimators= 1000) arima_fit=arima.fit() #train_features_accuracy, train_labels_accuracy) prediction_arima = arima_fit.forecast(steps=1,exog=test_features_accuracy) #predict(test_features_accuracy) predictions=np.append(predictions,prediction_arima) test_labels_all=np.append(test_labels_all,test_labels_accuracy) train_labels_all=np.append(train_labels_all,train_accuracy) test_labels_all1=np.append(test_labels_all1,test_accuracy) #Calculate accuracy from sklearn.metrics import r2_score accuracy=r2_score(predictions,test_labels_all) result_arima=pd.DataFrame({'prediction':predictions,'data':test_labels_all}) result_arima.to_csv('result_arima_kat.csv') return df_with_forecast, accuracy, result_arima