Spaces:

KatGaw
/

Krypto1

Sleeping

App Files Files Community

Krypto1 / app_crypto_arima_model.py

KatGaw

adding new reddit group

05a3e2c about 1 month ago

raw

history blame

3.64 kB

	from datetime import datetime, timedelta
	import pandas as pd
	import numpy as np

	import model_utils as mu
	from statsmodels.tsa.arima.model import ARIMA

	def model_run(df_all):
	""" Prediciton function that runs ARIMA model and predicts tomorrow cryptocurrency price.
	Useful for forecasting a variable using ARIMA model.
	Use historical 'prices' and get prediction.
	Give prediction output to the client.
	"""
	first_day_future=pd.to_datetime(datetime.now()+timedelta(days=1))
	#----------------------------------------- DATASET MANIPULATION FOR SUPERVISED LEARNING --------------------------------------------
	reframed_lags, df_final=mu.data_transform(df_all, first_day_future)

	print(f'I have transformed the dataset into the frame for supervised learning')
	df=reframed_lags[['prices','price_eth','GSPC','Day','Month', 'TNX', 'Employment', 'google_trend','EURUSD']]
	date=pd.to_datetime(dict(year=reframed_lags['Year'], month=reframed_lags['Month'], day=reframed_lags['Day']))
	df_with_date=pd.concat([date,df],axis=1)
	df_with_date.columns=np.append('date',df.columns)
	df_with_date.set_index('date',inplace=True)
	df_with_date=df_with_date.dropna()
	df_past=df_with_date.iloc[:-1,:]
	df_future=df_with_date.iloc[-1:,:]
	model = ARIMA(df_past['prices'],exog=df_past.drop(columns=['prices']), order=(2,1,2))
	model_fit = model.fit()

	# Make predictions
	predictions = model_fit.forecast(steps=1,exog=df_future.drop(columns='prices'))

	#Add forecast to df_with_date
	df_with_forecast=reframed_lags.copy()
	df_with_forecast.loc[df_with_forecast.index==df_with_forecast.index[-1],'prices']=predictions[-1:].values[0]
	#----------------------------------- MODEL ACCURACY
	#Calculate accuracy after transformation!!!
	#get rid of values below 0.01 which skew the accuracy measure if in denominator

	#Rolling window accuracy measure
	if len(reframed_lags)>500:
	train_size=0.9
	elif len(reframed_lags)>200:
	train_size=0.8
	else:
	train_size=0.7
	predictions=[]
	test_labels_all=[]
	test_labels_all1=[]
	train_labels_all=[]
	data_arima=df_with_date
	window_length=int((len(data_arima)-len(data_arima)*train_size))
	for i in range(0,window_length):
	train_accuracy=data_arima.iloc[0:int(len(data_arima)*train_size)+i,:]

	test_accuracy=data_arima.iloc[len(train_accuracy):len(train_accuracy)+1,:]
	train_features_accuracy=train_accuracy.drop(columns='prices')
	test_features_accuracy=test_accuracy.drop(columns='prices')
	train_labels_accuracy=train_accuracy['prices']
	test_labels_accuracy=test_accuracy['prices']
	print(train_labels_accuracy)

	arima = ARIMA(train_labels_accuracy,exog=train_features_accuracy, order=(2,1,2)) #RandomForestRegressor(n_estimators= 1000)
	arima_fit=arima.fit() #train_features_accuracy, train_labels_accuracy)
	prediction_arima = arima_fit.forecast(steps=1,exog=test_features_accuracy) #predict(test_features_accuracy)
	predictions=np.append(predictions,prediction_arima)
	test_labels_all=np.append(test_labels_all,test_labels_accuracy)
	train_labels_all=np.append(train_labels_all,train_accuracy)
	test_labels_all1=np.append(test_labels_all1,test_accuracy)

	#Calculate accuracy
	from sklearn.metrics import r2_score
	accuracy=r2_score(predictions,test_labels_all)
	result_arima=pd.DataFrame({'prediction':predictions,'data':test_labels_all})
	result_arima.to_csv('result_arima_kat.csv')
	return df_with_forecast, accuracy, result_arima