KatGaw commited on
Commit
96edc51
1 Parent(s): 633754a

adding files

Browse files
.DS_Store ADDED
Binary file (6.15 kB). View file
 
Dockerfile ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.9
2
+ RUN useradd -m -u 1000 user
3
+ USER user
4
+ ENV HOME=/home/user \
5
+ PATH=/home/user/.local/bin:$PATH
6
+ WORKDIR $HOME/app
7
+ COPY --chown=user . $HOME/app
8
+ COPY ./requirements.txt ~/app/requirements.txt
9
+ RUN pip install -r requirements.txt
10
+ COPY . .
11
+ CMD ["streamlit", "run", "app.py", "--port", "7860"]
app.py ADDED
@@ -0,0 +1,222 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from openai import OpenAI
2
+ from langchain_core.messages import BaseMessage, HumanMessage
3
+ from langchain_openai import ChatOpenAI
4
+ from typing import Annotated
5
+ import operator
6
+ from typing import Sequence, TypedDict
7
+ import numpy as np
8
+ import pandas as pd
9
+ from dotenv import load_dotenv
10
+ import os
11
+ from typing import Annotated
12
+ import operator
13
+ from typing import Sequence, TypedDict
14
+ import matplotlib.pyplot as plt
15
+ from langchain.schema.output_parser import StrOutputParser
16
+ from tools import data_analyst #forecasting_expert_arima, forecasting_expert_rf, evaluator, investment_advisor
17
+ from tools import crypto_sentiment_analysis_util
18
+ import app_crypto_rf_model as rf
19
+ import app_crypto_scrape as sa
20
+ import app_crypto_arima_model as arima
21
+ import streamlit as st
22
+
23
+ st.set_page_config(page_title="LangChain Agent", layout="wide")
24
+ load_dotenv()
25
+ OPENAI_API_KEY = os.environ["OPENAI_API_KEY"]
26
+
27
+ llm = ChatOpenAI(model="gpt-3.5-turbo")
28
+
29
+ #======================== AGENTS ==================================
30
+ # The agent state is the input to each node in the graph
31
+ class AgentState(TypedDict):
32
+ # The annotation tells the graph that new messages will always
33
+ # be added to the current states
34
+ messages: Annotated[Sequence[BaseMessage], operator.add]
35
+ # The 'next' field indicates where to route to next
36
+ next: str
37
+
38
+ tool=data_analyst.data_analyst_tools()
39
+
40
+ from langchain_core.runnables import RunnableConfig
41
+ st.title("💬 Krypto")
42
+
43
+ #@st.cache_data
44
+
45
+ #@st.cache_resource
46
+ #def initialize_session_state():
47
+ if "chat_history" not in st.session_state:
48
+ st.session_state["messages"] = [{"role":"system", "content":"""
49
+ You are a cryptocurrency investing expert. Answer all questions related to cryptocurrency investment reccommendations. Say I don't know if you don't know.
50
+ """}]
51
+
52
+ #initialize_session_state()
53
+
54
+ # Streamlit UI elements
55
+ st.image('crypto_image.png')
56
+ #st.text("Start by entering the currency.")
57
+
58
+ sideb = st.sidebar
59
+
60
+ with st.sidebar:
61
+ #st.subheader("This is the LangGraph workflow visualization of this application rendered in real-time.")
62
+ #st.image(create_graph_image())
63
+
64
+ title = st.text_input("Start by entering the currency name:")
65
+
66
+ check1 = sideb.button(f"analyze {title}")
67
+ results=[]
68
+
69
+ if check1:
70
+ st.write(f"I am now producing analysis for {title}")
71
+
72
+ model = ChatOpenAI(temperature=0.7, api_key=OPENAI_API_KEY)
73
+ chain= model | StrOutputParser()
74
+ result=chain.invoke(f"You are a cryptocurrency data analyst.\
75
+ Provide correct cryptocurrency ticker from Coingecko website for cryptocurrency: {title}.\
76
+ Expected output: ticker.\
77
+ Provide it in the following format: >>cryptocurrencyticker>> \
78
+ for example: >>BTC>>")
79
+
80
+ print(result)
81
+ print('ticker',str(result).split(">>")[0])
82
+ if len(str(result).split(">>")[1])<10:
83
+ cryptocurrencyticker=(str(result).split(">>")[1])
84
+ else:
85
+ cryptocurrencyticker=(str(result).split(">>")[0])
86
+ cryptocurrency=title
87
+
88
+ print(cryptocurrency,cryptocurrencyticker)
89
+ print('here')
90
+
91
+ #================== Scrape Current/Historical Price ====================
92
+ df=sa.scrape_crypto(cryptocurrency,cryptocurrencyticker)
93
+ if len(df)>0:
94
+ print(df.tail)
95
+ print("Running forecasting models on historical prices")
96
+ df_with_forecast_rf, accuracy_rf, result_rf=rf.model_run(df)
97
+
98
+ df_with_forecast_arima, accuracy_arima, result_arima=arima.model_run(df)
99
+ print("done")
100
+ print(np.round(df['prices'][-1],2))
101
+ #--- for llm
102
+ if accuracy_rf<accuracy_arima:
103
+ forecasted_price=(np.round(np.array(df_with_forecast_arima['prices'])[-1]),2)
104
+ prompt = f"You are an investment recommendation expert for crypto currency {cryptocurrency}.You are selecting the predicted price from the ARIMA model because its accuracy (R2 measure:{(np.round(accuracy_arima,2))}) is higher than the accuracy (R2:{(np.round(accuracy_rf,2))}) for random forest model.Compare current price to the predicted price. If current price exceeds predicted price, recommend selling the stock, otherwise recommend buying. Tell the user what the current price, predicted price and accuracy values are. You know that the predicted price for tomorrow using random forest model is {(np.round(np.array(df_with_forecast_rf['prices'])[-1],2))}. The prediction accuracy for the random forest model is {(np.round(accuracy_rf,2))}. The current price of {cryptocurrency} is: {(np.round(df['prices'][-1],2))}. "
105
+ else:
106
+ forecasted_price=(np.round(np.array(df_with_forecast_rf['prices'])[-1],2))
107
+ prompt = f"You are an investment recommendation expert for crypto currency {cryptocurrency}. You are selecting the predicted price from the random forest model because its accuracy (R2 measure:{(np.round(accuracy_rf,2))}) is higher than the accuracy (R2:{(np.round(accuracy_arima,2))}) for arima model. Compare current price to the predicted price. If current price exceeds predicted price, recommend selling the stock, otherwise recommend buying. Tell the user what the current price, predicted price and accuracy values are. You know that the predicted price for tomorrow using random forest model is {(np.round(np.array(df_with_forecast_arima['prices'])[-1]),2)}. The prediction accuracy for the random forest model is {(np.round(accuracy_arima,2))}. The current price of {cryptocurrency} is: {(np.round(df['prices'][-1],2))}. "
108
+
109
+ #prompt=str(prompt)
110
+ inputs_reccommend = {"messages": [HumanMessage(content=prompt)]}
111
+
112
+ model = ChatOpenAI(temperature=0.7, api_key=OPENAI_API_KEY)
113
+ response=model.invoke(prompt)
114
+ response_content=response.content
115
+ st.chat_message("assistant").markdown((response_content))
116
+ st.session_state.messages.append({"role": "assistant", "content": prompt})
117
+
118
+ fig, ax = plt.subplots(1,2, figsize=(10, 3))
119
+ ax[0].plot(result_arima['prediction'], color='blue', marker='o')
120
+ ax[0].plot(result_arima['data'], color='orange', marker='o')
121
+ ax[0].set_title('ARIMA')
122
+ ax[1].plot(result_rf['prediction'], color='blue', marker='o')
123
+ ax[1].plot(result_rf['data'], color='orange', marker='o')
124
+ ax[1].set_title('RF')
125
+ fig.suptitle('Prediction vs Actuals')
126
+ plt.legend(['prediction','actuals'])
127
+ st.pyplot(fig)
128
+ # ========================== Sentiment analysis
129
+ #Perform sentiment analysis on the cryptocurrency news & predict dominant sentiment along with plotting the sentiment breakdown chart
130
+ # Downloading from reddit
131
+
132
+ # Downloading from alpaca
133
+ news_articles = crypto_sentiment_analysis_util.fetch_news(cryptocurrency)
134
+ reddit_news_articles=crypto_sentiment_analysis_util.fetch_reddit_news(cryptocurrency)
135
+ #os.system('scrapy crawl reddit -o crypto_reddit.txt')
136
+
137
+
138
+ #crypto_sentiment_analysis_util.fetch_reddit_news() #(f"cryptocurrency {cryptocurrency}")
139
+ analysis_results = []
140
+
141
+ #Perform sentiment analysis for each product review
142
+ for article in news_articles:
143
+ if cryptocurrency[0:6] in article['News_Article'].lower():
144
+ sentiment_analysis_result = crypto_sentiment_analysis_util.analyze_sentiment(article['News_Article'])
145
+
146
+ # Display sentiment analysis results
147
+ #print(f'News Article: {sentiment_analysis_result["News_Article"]} : Sentiment: {sentiment_analysis_result["Sentiment"]}', '\n')
148
+
149
+ result = {
150
+ 'News_Article': sentiment_analysis_result["News_Article"],
151
+ 'Sentiment': sentiment_analysis_result["Sentiment"][0]['label'],
152
+ 'Index': sentiment_analysis_result["Sentiment"][0]['score']
153
+ }
154
+
155
+ analysis_results.append(result)
156
+
157
+ for article in reddit_news_articles:
158
+ if cryptocurrency[0:6] in article.lower():
159
+ sentiment_analysis_result_reddit = crypto_sentiment_analysis_util.analyze_sentiment(article)
160
+
161
+ # Display sentiment analysis results
162
+ #print(f'News Article: {sentiment_analysis_result_reddit["News_Article"]} : Sentiment: {sentiment_analysis_result_reddit["Sentiment"]}', '\n')
163
+
164
+ result = {
165
+ 'News_Article': sentiment_analysis_result_reddit["News_Article"],
166
+ 'Index':np.round(sentiment_analysis_result_reddit["Sentiment"][0]['score'],2)
167
+ }
168
+ analysis_results.append(result)
169
+
170
+ #Generate summarized message rationalize dominant sentiment
171
+ summary = crypto_sentiment_analysis_util.generate_summary_of_sentiment(analysis_results)
172
+ st.chat_message("assistant").write(str(summary))
173
+ st.session_state.messages.append({"role": "assistant", "content": summary})
174
+ #answers=np.append(res["messages"][-1].content,summary)
175
+
176
+ # Set OpenAI API key from Streamlit secrets
177
+ client = OpenAI(api_key=OPENAI_API_KEY)
178
+
179
+ # Set a default model
180
+ if "openai_model" not in st.session_state:
181
+ st.session_state["openai_model"] = "gpt-3.5-turbo"
182
+
183
+ #model = ChatOpenAI(temperature=0.7, api_key=OPENAI_API_KEY)
184
+ if prompt := st.chat_input("Some other questions?"):
185
+ # Add user message to chat history
186
+ st.session_state.messages.append({"role": "user", "content": prompt})
187
+ # Display user message in chat message container
188
+ with st.chat_message("user"):
189
+ st.markdown(prompt)
190
+ # Display assistant response in chat message container
191
+ with st.chat_message("assistant"):
192
+ stream = client.chat.completions.create(
193
+ model=st.session_state["openai_model"],
194
+ messages=[
195
+ {"role": m["role"], "content": m["content"]}
196
+ for m in st.session_state.messages
197
+ ],
198
+ stream=True,
199
+ )
200
+ response = st.write_stream(stream)
201
+ st.session_state.messages.append({"role": "assistant", "content": response})
202
+
203
+ # Generate a new response if last message is not from assistant
204
+ # model = ChatOpenAI(temperature=0.7, api_key=OPENAI_API_KEY)
205
+ # input_text = st.text_area(f"Enter your further questions for {title}:")
206
+ # if st.session_state.messages[-1]["role"] != "assistant":
207
+ # with st.chat_message("assistant"):
208
+ # with st.spinner("Thinking..."):
209
+ # response = model.invoke(input_text)
210
+ # st.markdown(response.content)
211
+ # message = {"role": "assistant", "content": response.content}
212
+ # st.session_state.messages.append(message)
213
+
214
+ # model = ChatOpenAI(temperature=0.7, api_key=OPENAI_API_KEY)
215
+ # input_text = st.text_area(f"Enter your further questions for {title}:")
216
+ # if st.button(f"answer"):
217
+ # #inputs = {"messages": [HumanMessage(content=input_text)]}
218
+ # response=model.invoke(input_text)
219
+ # response_content=response.content
220
+ # print(response_content)
221
+ # st.chat_message("assistant").write(str(response_content))
222
+ # st.session_state.messages.append({"role": "assistant", "content": response_content})
app_crypto_arima_model.py ADDED
@@ -0,0 +1,72 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from datetime import datetime, timedelta
2
+ import pandas as pd
3
+ import numpy as np
4
+
5
+ import model_utils as mu
6
+ from statsmodels.tsa.arima.model import ARIMA
7
+
8
+ def model_run(df_all):
9
+ """ Prediciton function that runs ARIMA model and predicts tomorrow cryptocurrency price.
10
+ Useful for forecasting a variable using ARIMA model.
11
+ Use historical 'prices' and get prediction.
12
+ Give prediction output to the client.
13
+ """
14
+ first_day_future=pd.to_datetime(datetime.now()+timedelta(days=1))
15
+ #----------------------------------------- DATASET MANIPULATION FOR SUPERVISED LEARNING --------------------------------------------
16
+ reframed_lags, df_final=mu.data_transform(df_all, first_day_future)
17
+ print(f'I have transformed the dataset into the frame for supervised learning')
18
+
19
+ df=reframed_lags[['prices','total_vol','GSPC','Day','Month', 'TNX', 'Employment', 'google_trend','EURUSD']]
20
+ date=pd.to_datetime(dict(year=reframed_lags['Year'], month=reframed_lags['Month'], day=reframed_lags['Day']))
21
+ df_with_date=pd.concat([date,df],axis=1)
22
+ df_with_date.columns=np.append('date',df.columns)
23
+ df_with_date.set_index('date',inplace=True)
24
+ df_with_date=df_with_date.dropna()
25
+ df_past=df_with_date.iloc[:-1,:]
26
+ df_future=df_with_date.iloc[-1:,:]
27
+ model = ARIMA(df_past['prices'],exog=df_past.drop(columns='prices'), order=(2,1,2))
28
+ model_fit = model.fit()
29
+
30
+ # Make predictions
31
+ predictions = model_fit.forecast(steps=1,exog=df_future.drop(columns='prices'))
32
+
33
+ #Add forecast to df_with_date
34
+ df_with_forecast=reframed_lags.copy()
35
+ df_with_forecast.loc[df_with_forecast.index==df_with_forecast.index[-1],'prices']=predictions[-1:].values[0]
36
+
37
+ #----------------------------------- MODEL ACCURACY
38
+ #Calculate accuracy after transformation!!!
39
+ #get rid of values below 0.01 which skew the accuracy measure if in denominator
40
+
41
+ #Rolling window accuracy measure
42
+ if len(reframed_lags)>500:
43
+ train_size=0.9
44
+ elif len(reframed_lags)>200:
45
+ train_size=0.8
46
+ else:
47
+ train_size=0.7
48
+ predictions=[]
49
+ test_labels_all=[]
50
+ data_arima=df_with_date
51
+ window_length=int((len(data_arima)-len(data_arima)*train_size))
52
+ for i in range(0,window_length):
53
+ train_accuracy=data_arima.iloc[0:int(len(data_arima)*train_size)+i,:]
54
+ test_accuracy=data_arima.iloc[len(train_accuracy):len(train_accuracy)+1,:]
55
+ train_features_accuracy=train_accuracy.drop(columns='prices')
56
+ test_features_accuracy=test_accuracy.drop(columns='prices')
57
+ train_labels_accuracy=train_accuracy['prices']
58
+ test_labels_accuracy=test_accuracy['prices']
59
+
60
+ arima = ARIMA(train_labels_accuracy,exog=train_features_accuracy, order=(2,1,2)) #RandomForestRegressor(n_estimators= 1000)
61
+ arima_fit=arima.fit() #train_features_accuracy, train_labels_accuracy)
62
+ prediction_arima = arima_fit.forecast(steps=1,exog=test_features_accuracy) #predict(test_features_accuracy)
63
+ predictions=np.append(predictions,prediction_arima)
64
+ test_labels_all=np.append(test_labels_all,test_labels_accuracy)
65
+
66
+
67
+ #Calculate accuracy
68
+ from sklearn.metrics import r2_score
69
+ accuracy=r2_score(predictions,test_labels_all)
70
+ result_arima=pd.DataFrame({'prediction':predictions,'data':test_labels_all})
71
+ result_arima.to_csv('result_arima.csv')
72
+ return df_with_forecast, accuracy, result_arima
app_crypto_rf_model.py ADDED
@@ -0,0 +1,124 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from datetime import datetime, timedelta
2
+ import pandas as pd
3
+ import numpy as np
4
+
5
+ from sklearn.ensemble import RandomForestRegressor
6
+ from sklearn.metrics import mean_squared_error
7
+ from math import sqrt
8
+ from sklearn.preprocessing import MinMaxScaler
9
+ import model_utils as mu
10
+
11
+ def model_run(df_all):
12
+ """ Prediciton function that runs random forest model and predicts tomorrow cryptocurrency price"""
13
+
14
+ first_day_future=pd.to_datetime(datetime.now()+timedelta(days=1))
15
+ #----------------------------------------- DATASET MANIPULATION FOR SUPERVISED LEARNING --------------------------------------------
16
+ reframed_lags, df_final=mu.data_transform(df_all, first_day_future)
17
+ print(f'I have transformed the dataset into the frame for supervised learning')
18
+ reframed_lags.to_csv('reframed_lags.csv')
19
+ #----------------------------------------- TRAIN/TEST SPLIT ------------------------------------------------------
20
+ """ Randomly split a chunk into train test based on train/test ratio (0.8) and split the other chunks for all the other currencies in the same fashion"""
21
+ import random
22
+ train_size=0.8
23
+
24
+ df_cut1=reframed_lags.reset_index().iloc[:,1:]
25
+ print('tady')
26
+ train_value=int(len(df_cut1)*train_size)
27
+ first_random=random.sample(range(len(df_cut1)-1), train_value)
28
+ train_bulk=np.sort(first_random) #make sure all the consequent ones have the same random numbers
29
+
30
+ df_cut=reframed_lags.reset_index()
31
+ train_sample=df_cut.loc[df_cut['index'].isin(train_bulk)]
32
+ test_sample=df_cut.loc[~df_cut['index'].isin(train_bulk)]
33
+
34
+ test=test_sample.iloc[:,1:]
35
+ train=train_sample.iloc[:,1:]
36
+ print(f'I have split the dataset into training and testing samples')
37
+
38
+ #----------------------------------- Re-Scale for supervised learning
39
+ # TRAIN RESCALE
40
+ # normalize features for the supervised learning (0,1)
41
+ scaler_train = MinMaxScaler(feature_range=(0, 1))
42
+ scaled = scaler_train.fit_transform(train.values.astype('float32'))
43
+ df_train=pd.DataFrame(scaled)
44
+ df_train.columns=train.columns #rename columns
45
+
46
+ # TEST RESCALE
47
+ scaler_test = MinMaxScaler(feature_range=(0, 1))
48
+ scaled = scaler_test.fit_transform(test.values.astype('float32'))
49
+ df_test=pd.DataFrame(scaled)
50
+ df_test.columns=test.columns #rename columns
51
+
52
+ #----------------------------------- MODEL
53
+
54
+ #define features
55
+ train_features=df_train.values
56
+ test_features=df_test.values
57
+ #define labels
58
+ train_labels = df_train['prices'].values
59
+ test_labels = df_test['prices'].values
60
+
61
+ #define baseline prediction (as last values) for evaluating prediction accuracy
62
+ baseline_preds = pd.DataFrame(test_features).iloc[:,0]
63
+ # Calculate errors for the baseline prediction
64
+ baseline_errors = abs(baseline_preds - test_labels)
65
+
66
+ # Import the model we are using
67
+ from sklearn.ensemble import RandomForestRegressor
68
+ # Instantiate model with 1000 decision trees
69
+ rf = RandomForestRegressor(n_estimators= 1000)
70
+ rf.fit(train_features, train_labels)
71
+ prediction_rf = rf.predict(test_features)
72
+ predictions=prediction_rf
73
+
74
+ #----------------------------------- MODEL OUTPUT TRANSFORMATION
75
+ #Convert test column
76
+ df_test['prices']=predictions
77
+ prediction_transformed=pd.DataFrame(scaler_test.inverse_transform(df_test.values.astype('float')))
78
+ prediction_transformed.columns=test.columns
79
+
80
+ #Convert prediction
81
+ df_test.loc[df_test.index==(len(df_test)-1),'prices']=predictions[-1:][0]
82
+ inv_transformed=pd.DataFrame(scaler_test.inverse_transform(df_test.values.astype('float')))
83
+ inv_transformed.columns=test.columns
84
+
85
+ # data with forecast
86
+ df_with_forecast=df_final.copy()
87
+ df_with_forecast.loc[df_with_forecast.index==df_with_forecast.index[-1],'prices']=inv_transformed['prices'][-1:].values[0]
88
+ print('Final result')
89
+ print(df_with_forecast)
90
+
91
+ #----------------------------------- MODEL ACCURACY
92
+ #Calculate accuracy after transformation!!!
93
+ #get rid of values below 0.01 which skew the accuracy measure if in denominator
94
+
95
+ #Rolling window accuracy measure
96
+ if len(reframed_lags)>500:
97
+ train_size=0.9
98
+ elif len(reframed_lags)>200:
99
+ train_size=0.8
100
+ else:
101
+ train_size=0.7
102
+ predictions=[]
103
+ test_labels_all=[]
104
+ window_length=int((len(reframed_lags)-len(reframed_lags)*train_size))
105
+ for i in range(0,window_length):
106
+ train_accuracy=reframed_lags.iloc[0:int(len(reframed_lags)*train_size)+i,:]
107
+ test_accuracy=reframed_lags.iloc[len(train_accuracy):len(train_accuracy)+1,:]
108
+ train_features_accuracy=train_accuracy.drop(columns='prices')
109
+ test_features_accuracy=test_accuracy.drop(columns='prices')
110
+ train_labels_accuracy=train_accuracy['prices']
111
+ test_labels_accuracy=test_accuracy['prices']
112
+
113
+ rf = RandomForestRegressor(n_estimators= 1000)
114
+ rf.fit(train_features_accuracy, train_labels_accuracy)
115
+ prediction_rf = rf.predict(test_features_accuracy)
116
+ predictions=np.append(predictions,prediction_rf)
117
+ test_labels_all=np.append(test_labels_all,test_labels_accuracy)
118
+
119
+ #Calculate accuracy
120
+ from sklearn.metrics import r2_score
121
+ accuracy=r2_score(predictions,test_labels_all)
122
+ result_rf=pd.DataFrame({'prediction':predictions,'data':test_labels_all})
123
+ result_rf.to_csv('result_rf.csv')
124
+ return df_with_forecast, accuracy, result_rf
app_crypto_scrape.py ADDED
@@ -0,0 +1,82 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ #Import packages
3
+ import pandas as pd
4
+ import numpy as np
5
+ from pycoingecko import CoinGeckoAPI
6
+ cg = CoinGeckoAPI()
7
+ import pandas as pd
8
+ import numpy as np
9
+ from pytrends.request import TrendReq
10
+ pytrends = TrendReq(hl='en-US')
11
+ import scrape_utils as su
12
+
13
+
14
+ def scrape_crypto(currency, ticker):
15
+
16
+ # 1. Scrape historical Price and Volume currency data
17
+ from datetime import date
18
+ today = date.today()
19
+ Day_end = today.strftime("%d")
20
+ Month_end = today.strftime("%m")
21
+ Year_end = today.strftime("%Y")
22
+
23
+ from datetime import date
24
+ from datetime import timedelta
25
+
26
+ past=today-timedelta(days=200)
27
+ Day_start = past.strftime("%d")
28
+ Month_start = past.strftime("%m")
29
+ Year_start = past.strftime("%Y")
30
+
31
+ date_start=[Year_start,Month_start,Day_start]
32
+ date_end=[Year_end,Month_end,Day_end]
33
+
34
+ import datetime
35
+ df_ts_coins=su.scrape_historical_series([currency],date_start,date_end)
36
+ if len(df_ts_coins)>0:
37
+ df_ts_coins['date']=[(datetime.datetime.fromtimestamp(int(i))).strftime('%Y-%m-%d %H:%M:%S') for i in df_ts_coins['date']]
38
+
39
+ # # Add current data
40
+ current_data=cg.get_price(ids=currency, vs_currencies='usd',include_market_cap='true', include_24hr_vol='true',include_last_updated_at='true')
41
+
42
+ prices=pd.DataFrame(current_data).T['usd'].values[0]
43
+ market_caps=pd.DataFrame(current_data).T['usd_market_cap'].values[0]
44
+ total_vol=pd.DataFrame(current_data).T['usd_24h_vol'].values[0]
45
+ df_today_row=pd.DataFrame({0:['id','date','prices','market_caps','total_vol'],1:[currency,today.strftime('%Y-%m-%d %H:%M:%S'),prices,market_caps,total_vol]}).T
46
+ df_today_row.columns=df_today_row.iloc[0,:]
47
+ df_today_row=df_today_row.drop(0)
48
+
49
+ df_ts_coins=pd.concat([df_ts_coins,df_today_row],axis=0)
50
+ df_ts_coins.set_index('date',inplace=True)
51
+ df_ts_coins.index=pd.to_datetime(df_ts_coins.index).strftime("%Y-%m-%d %H:%M:%S")
52
+
53
+ # 2. Scrape macro
54
+ df_cli=su.scrape_cli(past,today)
55
+ df_cpi=su.scrape_cpi_employment()
56
+ print(f'I have scraped CLI and L, CPI')
57
+
58
+ # 3. Scrape google-trends
59
+ google_data=su.scrape_google_trends(currency,ticker)
60
+ print(f'Google trend dataset')
61
+
62
+ # 4. Scrape Yahoo-Finance
63
+ df_finance=su.scrape_stocks(past,today)
64
+ print(f'yahoo dataset. I am done scraping !!!!!!!')
65
+
66
+ #==== 5. CONCAT DATAFRAMES TOGETHER
67
+ df_ts_coins.index=pd.to_datetime(df_ts_coins.index).strftime("%Y-%m-%d")
68
+ df_cli.index=pd.to_datetime(df_cli.index).strftime("%Y-%m-%d")
69
+ if len(df_cpi)>0:
70
+ df_cpi.index=pd.to_datetime(df_cpi.index).strftime("%Y-%m-%d")
71
+ else:
72
+ print('MISSING CPI')
73
+ df_cpi=pd.DataFrame({'CPI':np.repeat(0,len(df_cli)),'Employment':np.repeat(0,len(df_cli))})
74
+ df_cpi.index=df_cli.index
75
+ google_data.index=pd.to_datetime(google_data.index).strftime("%Y-%m-%d")
76
+ df_finance.index=pd.to_datetime(df_finance.index).strftime("%Y-%m-%d")
77
+ df_all=pd.concat([df_ts_coins,df_cli,df_cpi,google_data,df_finance],axis=1)
78
+ df_all=df_all.sort_index()
79
+ else:
80
+ print('No data available.')
81
+ df_all=pd.DataFrame()
82
+ return df_all
model_utils.py ADDED
@@ -0,0 +1,50 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pandas as pd
2
+ import numpy as np
3
+
4
+ def data_transform(df_all,first_day_future):
5
+ df_all['CPI']=df_all['CPI'].astype('float')
6
+ df_all['Employment']=df_all['Employment'].astype('float')
7
+
8
+ # Add future row and Shift X columns
9
+ df_future_row=pd.DataFrame({0: df_all.reset_index().columns,1:df_all.reset_index().iloc[-1,:]}).T
10
+
11
+ df_future_row.columns=df_future_row.iloc[0,:]
12
+
13
+ df_future_row=df_future_row.drop(0).drop(columns=['index'])
14
+ df_future_row.insert(0,'',pd.to_datetime(first_day_future).strftime("%Y-%m-%d"))
15
+ df_future_row.set_index(df_future_row.iloc[:,0],inplace=True)
16
+ df_future_row=df_future_row.drop(columns='')
17
+ if 'level_0' in df_future_row.columns:
18
+ df_future_row=df_future_row.drop(columns='level_0')
19
+ if 'index' in df_all.columns:
20
+ df_all=df_all.drop(columns='index')
21
+
22
+ df_with_future=pd.concat([df_all,df_future_row],axis=0)
23
+ df_with_future.index.names=['date']
24
+ df_with_future.index=pd.to_datetime(df_with_future.index).strftime("%Y-%m-%d")
25
+ df_with_future=df_with_future.shift()
26
+ df_final=df_with_future.interpolate(method='linear',limit_direction='both',
27
+ limit=100).bfill().ffill()
28
+ df_final['name']=np.repeat(df_final['id'].iloc[:,0].dropna()[0:1][0],len(df_final))
29
+ df_final=df_final.drop(columns='id')
30
+ #Data transformation coin_dummy, time_variables, shift X, iso_week
31
+ df_final['name_no']=pd.get_dummies(df_final['name'],dtype='int')
32
+ df_final.index=pd.to_datetime(df_final.index, utc=True)
33
+ df_final['Day']=df_final.index.day
34
+ df_final['Month']=df_final.index.month
35
+ df_final['Year']=df_final.index.year
36
+ seasonal_dummy=pd.get_dummies(df_final.index.day,dtype='int')
37
+ seasonal_dummy.index=df_final.index
38
+ seasonal_dummy.columns=[str('day_'+str(value)) for value in seasonal_dummy.columns]
39
+ reframed=pd.concat([df_final,seasonal_dummy],axis=1).drop(columns='name')
40
+ print(reframed.iloc[-5:,:])
41
+ reframed=reframed.reset_index().drop(columns=['date'])
42
+ reframed_lags=reframed.copy()
43
+ reframed_lags['lag1'] = reframed_lags['prices'].iloc[-1]
44
+ reframed_lags['lag2'] = reframed_lags['prices'].iloc[-2]
45
+
46
+ # Use the last observed values for lag features
47
+ for i in range(1, len(reframed_lags)):
48
+ reframed_lags.loc[reframed_lags.index[i], 'lag1'] = reframed_lags.loc[reframed_lags.index[i-1], 'prices'] if 'prices' in reframed_lags.columns else reframed_lags.loc[reframed_lags.index[i-1], 'lag1']
49
+ reframed_lags.loc[reframed_lags.index[i], 'lag2'] = reframed_lags.loc[reframed_lags.index[i-1], 'lag1']
50
+ return reframed_lags, df_final
scrape_utils.py ADDED
@@ -0,0 +1,287 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Import libraries
2
+ #Import packages
3
+ import pandas as pd
4
+ import numpy as np
5
+ import time
6
+ import datetime
7
+ from pycoingecko import CoinGeckoAPI
8
+ #from utils import slice
9
+ # Get API for CoinGecko
10
+ #cg = CoinGeckoAPI()
11
+ from dotenv import load_dotenv
12
+
13
+ from bs4 import BeautifulSoup
14
+ import requests
15
+ from pytrends.request import TrendReq
16
+ pytrends = TrendReq(hl='en-US')
17
+ from pytrends import dailydata
18
+ import yfinance as yf
19
+ import json
20
+ import prettytable
21
+ import os
22
+ load_dotenv()
23
+
24
+ COINGECKO_API_KEY=os.environ["COINGECKO_API_KEY"]
25
+ # Historical crypto data
26
+ def scrape_historical_series(coin_name,date_start,date_end):
27
+ import datetime
28
+ """ Scrape historical series on the sample of coins.
29
+
30
+ Args:
31
+ coin_names(list): List of coins we will use for training.
32
+ date_start(list): List of values for Year_start,Month_start,Day_start.
33
+ date_end(list): List of values for Year_end,Month_end,Day_end.
34
+ Returns:
35
+ Dataframe with the evolution of prices, market capitalizaiton, and total volume over time, for each respective currency.
36
+ """
37
+
38
+ df_ts_coins1=pd.DataFrame()
39
+
40
+ #DATE definitions
41
+
42
+ date_time = datetime.datetime(int(date_start[0]),int(date_start[1]),int(date_start[2]))
43
+ date_time_now = datetime.datetime(int(date_end[0]),int(date_end[1]),int(date_end[2]))
44
+ unix_past=time.mktime(date_time.timetuple()) #change the date format into unix for scraping
45
+ unix_now=time.mktime(date_time_now.timetuple())
46
+ past=datetime.datetime(int(date_start[0]),int(date_start[1]),int(date_start[2])).strftime('%Y-%m-%d')
47
+ now=datetime.datetime(int(date_end[0]),int(date_end[1]),int(date_end[2])).strftime('%Y-%m-%d')
48
+ datum_range=pd.date_range(start=past,end=now, freq='D')
49
+
50
+ #empty lists
51
+ unix_all=[]
52
+ coins_names=[]
53
+
54
+ #create date variable
55
+ for val in datum_range:
56
+ unix_all=np.append(unix_all,time.mktime(val.timetuple()))
57
+
58
+ for coin in pd.unique(coin_name):
59
+ time.sleep(5)
60
+ url = f"https://api.coingecko.com/api/v3/coins/{coin.lower()}/market_chart/range?vs_currency=usd&from={unix_past}&to={unix_now}"
61
+
62
+ headers = {
63
+ "accept": "application/json",
64
+ "x-cg-demo-api-key": COINGECKO_API_KEY
65
+ }
66
+
67
+ response = requests.get(url, headers=headers)
68
+ data=response.json()
69
+ #data=cg.get_coin_market_chart_range_by_id(id=coin.lower(),vs_currency='usd',include_market_cap='true', include_24hr_vol='true', from_timestamp=unix_past,to_timestamp=unix_now)
70
+ if len(data)>0:
71
+ prices=pd.DataFrame(data['prices'],columns=['date','prices'])
72
+ market=pd.DataFrame(data['market_caps'],columns=['date','market_caps'])
73
+ volume=pd.DataFrame(data['total_volumes'],columns=['date','total_vol'])
74
+ ts_coins_cut=pd.concat([prices,market.iloc[:,1],volume.iloc[:,1]],axis=1)
75
+
76
+ #create id variable for each coin
77
+ coinn=np.repeat(coin,len(ts_coins_cut))
78
+ coins_names=np.append(coins_names,coinn)
79
+
80
+ #make daily data from hourly
81
+ ts_coins_cut['id']=coinn
82
+ date_all=[]
83
+
84
+ #create date variable
85
+ import datetime
86
+ for val in ts_coins_cut['date']:
87
+ date_all=np.append(date_all,((datetime.datetime.fromtimestamp(int(val)/1000)).strftime('%m/%d/%y, %H:%M:%S')))
88
+ dates=pd.to_datetime(date_all, format='%m/%d/%y, %H:%M:%S')
89
+
90
+ #set date as an index to aggreggate hourly data into daily
91
+ ts_coins_cut['dates']=dates
92
+ ts_coins_cut=ts_coins_cut.set_index('dates')
93
+ prices=ts_coins_cut.pop('prices')
94
+ ts_coins_cut=ts_coins_cut.groupby([pd.Grouper(freq='D'), 'id']).mean()
95
+ prices1=prices.groupby([pd.Grouper(freq='D')]).mean()
96
+ #after you aggreggated data change the index back
97
+ prices1=prices1.reset_index()
98
+ ts_coins_cut.reset_index(inplace=True)
99
+
100
+ ts_coins_cut.insert(2,'prices',prices1.iloc[:,1])
101
+ #move the date column to different position
102
+ ts_coins_cut=ts_coins_cut.drop(columns=['date'])
103
+ ts_coins_cut.insert(2,'date',unix_all[0:len(ts_coins_cut)])
104
+ df_ts_coins1=pd.concat([df_ts_coins1,ts_coins_cut]) #concat the chunk with the selected variables across all currencies
105
+
106
+
107
+ else:
108
+ df_ts_coins1=pd.DataFrame()
109
+ df_ts_coins1=df_ts_coins1.drop(columns=['dates'])
110
+ return df_ts_coins1
111
+
112
+ # 2. Macro variables, CLI
113
+
114
+ def scrape_cli(past,today):
115
+ """Scrape data on leading indicator for USA.
116
+ Args:
117
+ past(date): Date for which you want to start scraping.
118
+ today(date): Date for which you want to end scraping.
119
+ Returns:
120
+ Dataframe with CLI and dates.
121
+ """
122
+ countries=['USA'] #,'OECDE','OECD','NMEC']
123
+ past_date=past.strftime('%Y-%m')
124
+ today_date=today.strftime('%Y-%m')
125
+ clis=[]
126
+ bclis=[]
127
+ names=[]
128
+ datas_country=pd.DataFrame()
129
+ datas1=pd.DataFrame()
130
+ types=['CLI'] #,'BCLI']
131
+ for type in types:
132
+ print(type)
133
+ '''Scrape OECD data and create dataset in the form of time series where variables are CLI and BCLI for each country'''
134
+ for country in countries:
135
+ # Scrape data
136
+ # if type=='BCLI':
137
+ # mainpage=requests.get(f'https://stats.oecd.org/restsdmx/sdmx.ashx/GetData/MEI_CLI/BSCICP03.{country}.M/all?startTime={past}&endTime={today}')
138
+ if type=='CLI':
139
+ mainpage=requests.get(f'https://stats.oecd.org/restsdmx/sdmx.ashx/GetData/MEI_CLI/CSCICP03.{country}.M/all?startTime={past_date}&endTime={today_date}')
140
+ soup=BeautifulSoup(mainpage.content,'xml') #'html.parser')
141
+ whatis=soup.find_all("ObsValue")
142
+ whatis_key=soup.find_all("ObsKey")
143
+ country=([(str(whatis_key[i]).split('"REF_AREA" value="')[1][:3]) for i in range(len(whatis))])
144
+ dates=[pd.to_datetime(str(whatis_key[i]).split('"TIME_PERIOD" value="')[1][:7]) for i in range(len(whatis))]
145
+ measure=[(str(whatis_key[i]).split('"MEASURE" value="')[1][:7][:-2]) for i in range(len(whatis))]
146
+ values=[float(str(whatis[i]).split('value="')[1][0:-4]) for i in range(len(whatis))]
147
+ df_cli=pd.DataFrame({'date':dates,'country':country,'measure':measure,type:values})
148
+ df_cli.index=pd.to_datetime(df_cli['date'])
149
+ df_cli=df_cli.loc[df_cli['country']=='USA']['CLI'].astype('float').resample('M').mean()
150
+ return df_cli
151
+
152
+ def scrape_cpi_employment():
153
+ """Scrape CPI and employment data."""
154
+
155
+ headers = {'Content-type': 'application/json'}
156
+ variables=['CUUR0000SA0','LNS12000000']
157
+ data = json.dumps({"seriesid": variables,"startyear":"2024", "endyear":"2024"})
158
+ p = requests.post('https://api.bls.gov/publicAPI/v2/timeseries/data/', data=data, headers=headers)
159
+ json_data = json.loads(p.text)
160
+ year_all=[]
161
+ period_all=[]
162
+ value_all=[]
163
+ series_id=[]
164
+ if len(json_data['Results'])>0:
165
+ for series in json_data['Results']['series']:
166
+ x=prettytable.PrettyTable(["series id","year","period","value","footnotes"])
167
+ seriesId = series['seriesID']
168
+ for item in series['data']:
169
+ year = item['year']
170
+ period = item['period']
171
+ value = item['value']
172
+ footnotes=""
173
+ for footnote in item['footnotes']:
174
+ if footnote:
175
+ footnotes = footnotes + footnote['text'] + ','
176
+ if 'M01' <= period <= 'M12':
177
+ x.add_row([seriesId,year,period,value,footnotes[0:-1]])
178
+ year_all=np.append(year_all,year)
179
+ period_all=np.append(period_all,period)
180
+ value_all=np.append(value_all,value)
181
+ if seriesId=='CUUR0000SA0':
182
+ series_id=np.append(series_id,'CPI')
183
+ if seriesId=='LNS12000000':
184
+ series_id=np.append(series_id,'Employment')
185
+
186
+
187
+ date=[(pd.to_datetime(f"{year_all[i]}'-'{int(period_all[i][-2:])}")) for i in range(len(year_all))]
188
+ df_cpi=pd.DataFrame({'date':date,'value':value_all})
189
+ df_cpi['series_id']=series_id
190
+ df_cpi.set_index('date',inplace=True)
191
+ df_cpi=pd.concat([df_cpi.loc[df_cpi['series_id']=='CPI'],df_cpi.loc[df_cpi['series_id']=='Employment']],axis=1)
192
+ df_cpi=df_cpi.drop(columns='series_id')
193
+ df_cpi.columns=['CPI','Employment']
194
+ else:
195
+ df_cpi=pd.DataFrame()
196
+ return df_cpi
197
+
198
+ def scrape_google_trends(currency, currency_short):
199
+ curr_neni=[]
200
+ names_values=[currency]
201
+ names_short=[currency_short]
202
+
203
+ from datetime import date
204
+ today = date.today()
205
+ Day_end = today.strftime("%d")
206
+ Month_end = today.strftime("%m")
207
+ Year_end = today.strftime("%Y")
208
+ Hour_end=21
209
+ Minute_end=20
210
+
211
+ past=today-datetime.timedelta(days=30)
212
+ Day_start = past.strftime("%d")
213
+ Month_start = past.strftime("%m")
214
+ Year_start = past.strftime("%Y")
215
+
216
+ date_start=[Year_start,Month_start,Day_start]
217
+ date_end=[Year_end,Month_end,Day_end]
218
+ date_all1=pd.date_range(past,today)
219
+ #data_all1=np.repeat(0,len(date))
220
+ keywords = []
221
+ google_data=pd.DataFrame()
222
+ for run_name in list(names_values):
223
+ '''Scrape Google trends and create one time-series in the form of concated time series across all currencies'''
224
+ #google_old_slice=slice(run_name,google_old_for_slice,google_old_for_slice['id'])
225
+ run=list(names_values).index(run_name)
226
+ time.sleep(5)
227
+ try:
228
+ data=dailydata.get_daily_data(str(run_name),int(Year_start), int(Month_start), int(Year_end), int(Month_end),verbose=False) #kw_list, 2021, 10, 2021, 11, geo = '',verbose=False,wait_time=5
229
+ data1=data.iloc[:,4]
230
+ except:
231
+ try:
232
+ time.sleep(5)
233
+ new_index=list(names_values).index(run_name)
234
+ data = dailydata.get_daily_data(word=names_short[new_index],start_year=Year_start, start_mon=Month_start, stop_year=Year_end, stop_mon=Month_end,verbose=False) #kw_list, 2021, 10, 2021, 11, geo = '',verbose=False,wait_time=5
235
+ data1=data.iloc[:,4]
236
+ except:
237
+ pass
238
+ curr_neni=np.append(curr_neni,run_name)
239
+ #print(f'no currency {run_name} to scrape in google trends')
240
+ data1=(np.repeat(0,len(date_all1)))
241
+ data1=pd.DataFrame({'google_trend':pd.Series(data1)})
242
+ data1.insert(0,'id',np.repeat(run_name,len(data1)))
243
+ #google_tog=pd.concat([google_old_slice.set_index('date'),data1],axis=0)
244
+ google_data=pd.concat([google_data,data1],axis=0)
245
+
246
+ #change index from date to date_new to match old_dataset
247
+ google_data.reset_index(inplace=True)
248
+
249
+ if int(np.mean(data1['google_trend']))==0==0:
250
+ google_data['date_new']=date_all1
251
+ google_data.set_index('date_new',inplace=True)
252
+ else:
253
+ google_data.columns=np.append('date_new',google_data.columns[1:])
254
+ google_data.set_index('date_new',inplace=True)
255
+ return google_data
256
+
257
+ def scrape_stocks(past,today):
258
+ # Set dates in the form needed for scraping
259
+ date_old = past
260
+ date_new = today
261
+ #date_new = date.today().strftime("%Y-%m-%d")
262
+
263
+ df=pd.DataFrame()
264
+ #the codes for variables we are going to scrape
265
+ codes=['^GSPC','GC=F','EURUSD%3DX','^TNX']
266
+ codes_names=['GSPC','GC=F','EURUSD','TNX']
267
+
268
+ for code in codes:
269
+ '''Scrape Yahoo finance and create dataset with time series for all the financial variables'''
270
+ code_index=codes.index(code)
271
+ code_name=codes_names[code_index]
272
+ df_code = yf.download(code,start=date_old, end=date_new,progress=False) #^IXIC print
273
+ df_code=pd.DataFrame(df_code)
274
+ df_code=df_code.reset_index()
275
+ df_code_ts=df_code.iloc[:,1]
276
+ df_code_ts=df_code_ts.rename(code_name)
277
+ df = pd.concat([df,df_code_ts],axis=1)
278
+ df_time=df_code.iloc[:,0]
279
+
280
+ #set the index to date_new
281
+ df.insert(0,'date',df_time)
282
+ df_finance=df.dropna()
283
+ df_finance.set_index('date',inplace=True)
284
+ df_finance.index=pd.to_datetime(df_finance.index)
285
+
286
+ #combine dataset with the old one
287
+ return df_finance
sentiment_analysis/__init__.py ADDED
File without changes
sentiment_analysis/client.py ADDED
@@ -0,0 +1,63 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from alpaca_trade_api import REST
2
+ import os
3
+ from dotenv import load_dotenv
4
+
5
+
6
+ class AlpacaNewsFetcher:
7
+ """
8
+ A class for fetching news articles related to a specific stock from Alpaca API.
9
+
10
+ Attributes:
11
+ - api_key (str): Alpaca API key for authentication.
12
+ - api_secret (str): Alpaca API secret for authentication.
13
+ - rest_client (alpaca_trade_api.REST): Alpaca REST API client.
14
+ """
15
+
16
+ def __init__(self, api_key, api_secret):
17
+ """
18
+ Initializes the AlpacaNewsFetcher object.
19
+
20
+ Args:
21
+ - api_key (str): Alpaca API key for authentication.
22
+ - api_secret (str): Alpaca API secret for authentication.
23
+ """
24
+ self.api_key = api_key
25
+ self.api_secret = api_secret
26
+ self.rest_client = REST(api_key, api_secret)
27
+
28
+ load_dotenv()
29
+ self.no_of_newsarticles_to_fetch = os.environ["NO_OF_NEWSARTICLES_TO_FETCH"]
30
+
31
+ def fetch_news(self, symbol, start_date, end_date):
32
+ """
33
+ Fetches news articles for a given stock symbol within a specified date range.
34
+
35
+ Args:
36
+ - symbol (str): Stock symbol for which news articles are to be fetched (e.g., "AAPL").
37
+ - start_date (str): Start date of the range in the format "YYYY-MM-DD".
38
+ - end_date (str): End date of the range in the format "YYYY-MM-DD".
39
+
40
+ Returns:
41
+ - list: A list of dictionaries containing relevant information for each news article.
42
+ """
43
+ news_articles = self.rest_client.get_news(symbol, start_date, end_date, limit=self.no_of_newsarticles_to_fetch )
44
+ formatted_news = []
45
+ print("-----------------------------------------------------")
46
+ print(len(news_articles))
47
+ print("-----------------------------------------------------")
48
+
49
+ for article in news_articles:
50
+ summary = article.summary
51
+ title = article.headline
52
+ timestamp = article.created_at
53
+
54
+ relevant_info = {
55
+ 'timestamp': timestamp,
56
+ 'title': title,
57
+ 'summary': summary
58
+ }
59
+
60
+ formatted_news.append(relevant_info)
61
+
62
+ return formatted_news
63
+
sentiment_analysis/requirements.txt ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ alpaca_trade_api
2
+ transformers
3
+ einops
4
+ accelerate
5
+ langchain
6
+ bitsandbytes
7
+ #sentencepeice
8
+ openai
9
+ backtrader
10
+ yfinance
11
+ pandas
12
+ pyfolio
13
+ python-dotenv
sentiment_analysis/sentiment_analysis_pipeline.py ADDED
@@ -0,0 +1,151 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # !pip install transformers
2
+ from transformers import pipeline
3
+ from client import AlpacaNewsFetcher
4
+ from alpaca_trade_api import REST
5
+ import os
6
+ from dotenv import load_dotenv
7
+ import pandas as pd
8
+ import matplotlib.pyplot as plt
9
+ from collections import defaultdict
10
+ from datetime import date
11
+
12
+
13
+
14
+ class NewsSentimentAnalysis:
15
+ """
16
+ A class for sentiment analysis of news articles using the Transformers library.
17
+
18
+ Attributes:
19
+ - classifier (pipeline): Sentiment analysis pipeline from Transformers.
20
+ """
21
+
22
+ def __init__(self):
23
+ """
24
+ Initializes the NewsSentimentAnalysis object.
25
+ """
26
+ self.classifier = pipeline('sentiment-analysis')
27
+
28
+ def analyze_sentiment(self, news_article):
29
+ """
30
+ Analyzes the sentiment of a given news article.
31
+
32
+ Args:
33
+ - news_article (dict): Dictionary containing 'summary', 'headline', and 'created_at' keys.
34
+
35
+ Returns:
36
+ - dict: A dictionary containing sentiment analysis results.
37
+ """
38
+ summary = news_article['summary']
39
+ title = news_article['title']
40
+ timestamp = news_article['timestamp']
41
+
42
+ relevant_text = summary + title
43
+ sentiment_result = self.classifier(relevant_text)
44
+
45
+ analysis_result = {
46
+ 'timestamp': timestamp,
47
+ 'title': title,
48
+ 'summary': summary,
49
+ 'sentiment': sentiment_result
50
+ }
51
+
52
+ return analysis_result
53
+
54
+ def plot_sentiment_graph(self, sentiment_analysis_result):
55
+ """
56
+ Plots a sentiment analysis graph
57
+
58
+ Args:
59
+ - sentiment_analysis_result): (dict): Dictionary containing 'summary', 'headline', and 'created_at' keys.
60
+
61
+ Returns:
62
+ - dict: A dictionary containing sentiment analysis results.
63
+ """
64
+ df = pd.DataFrame(sentiment_analysis_result)
65
+ df['Timestamp'] = pd.to_datetime(df['Timestamp'])
66
+ df['Date'] = df['Timestamp'].dt.date
67
+
68
+ #Group by Date, sentiment value count
69
+ grouped = df.groupby(by='Date')['Sentiment'].value_counts()
70
+ grouped.plot.pie()
71
+
72
+
73
+ def get_dominant_sentiment (self, sentiment_analysis_result):
74
+ """
75
+ Returns overall sentiment, negative or positive or neutral depending on the count of negative sentiment vs positive sentiment
76
+
77
+ Args:
78
+ - sentiment_analysis_result): (dict): Dictionary containing 'summary', 'headline', and 'created_at' keys.
79
+
80
+ Returns:
81
+ - dict: A dictionary containing sentiment analysis results.
82
+ """
83
+ df = pd.DataFrame(sentiment_analysis_result)
84
+ df['Timestamp'] = pd.to_datetime(df['Timestamp'])
85
+ df['Date'] = df['Timestamp'].dt.date
86
+
87
+ #Group by Date, sentiment value count
88
+ grouped = df.groupby(by='Date')['Sentiment'].value_counts()
89
+ df = pd.DataFrame(list(grouped.items()), columns=['Sentiment', 'count'])
90
+ df['date'] = df['Sentiment'].apply(lambda x: x[0])
91
+ df['sentiment'] = df['Sentiment'].apply(lambda x: x[1])
92
+ df.drop('Sentiment', axis=1, inplace=True)
93
+ result = df.groupby('sentiment')['count'].sum().reset_index()
94
+
95
+ # Determine the sentiment with the most count
96
+ dominant_sentiment = result.loc[result['count'].idxmax()]
97
+
98
+ return dominant_sentiment
99
+
100
+
101
+
102
+ #starting point of the program
103
+ if __name__ == '__main__':
104
+ # Example Usage:
105
+ # Initialize the AlpacaNewsFetcher object
106
+
107
+ #Load Alpaca Key and Secret from environment.
108
+ load_dotenv()
109
+ api_key = os.environ["ALPACA_API_KEY"]
110
+ api_secret = os.environ["ALPACA_SECRET"]
111
+
112
+ #Initialize AlpacaNewsFetcher, a class for fetching news articles related to a specific stock from Alpaca API.
113
+ news_fetcher = AlpacaNewsFetcher(api_key, api_secret)
114
+
115
+ # Fetch news (contains - title of the news, timestamp and summary) for AAPL from 2021-01-01 to 2021-12-31
116
+ news_data = news_fetcher.fetch_news("AAPL", "2021-01-01", "2021-12-31")
117
+
118
+ # Initialize the NewsSentimentAnalysis object
119
+ news_sentiment_analyzer = NewsSentimentAnalysis()
120
+ analysis_result = []
121
+ # Assume 'news_data' is a list of news articles (each as a dictionary)
122
+ for article in news_data:
123
+ sentiment_analysis_result = news_sentiment_analyzer.analyze_sentiment(article)
124
+
125
+ # Display sentiment analysis results
126
+ """ print(f'Timestamp: {sentiment_analysis_result["timestamp"]}, '
127
+ f'Title: {sentiment_analysis_result["title"]}, '
128
+ f'Summary: {sentiment_analysis_result["summary"]}')
129
+
130
+ print(f'Sentiment: {sentiment_analysis_result["sentiment"]}', '\n') """
131
+
132
+ #Extracting timestamp of article and sentiment of article for graphing
133
+ result = {
134
+ 'Timestamp': sentiment_analysis_result["timestamp"],
135
+ 'News- Title:Summary': sentiment_analysis_result["title"] + sentiment_analysis_result["summary"],
136
+ 'Sentiment': sentiment_analysis_result["sentiment"][0]['label']
137
+ }
138
+
139
+ analysis_result.append(result)
140
+
141
+ #Graph dominant sentiment based on sentiment analysis data of news articles
142
+ dominant_sentiment = news_sentiment_analyzer.get_dominant_sentiment(analysis_result)
143
+
144
+ final_result = {
145
+ 'Sentiment-analysis-result' : analysis_result,
146
+ 'Dominant-sentiment' : dominant_sentiment['sentiment']
147
+ }
148
+
149
+ print(final_result)
150
+
151
+
tools/.DS_Store ADDED
Binary file (6.15 kB). View file
 
tools/.chainlit/config.toml ADDED
@@ -0,0 +1,84 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [project]
2
+ # Whether to enable telemetry (default: true). No personal data is collected.
3
+ enable_telemetry = true
4
+
5
+ # List of environment variables to be provided by each user to use the app.
6
+ user_env = []
7
+
8
+ # Duration (in seconds) during which the session is saved when the connection is lost
9
+ session_timeout = 3600
10
+
11
+ # Enable third parties caching (e.g LangChain cache)
12
+ cache = false
13
+
14
+ # Follow symlink for asset mount (see https://github.com/Chainlit/chainlit/issues/317)
15
+ # follow_symlink = false
16
+
17
+ [features]
18
+ # Show the prompt playground
19
+ prompt_playground = true
20
+
21
+ # Process and display HTML in messages. This can be a security risk (see https://stackoverflow.com/questions/19603097/why-is-it-dangerous-to-render-user-generated-html-or-javascript)
22
+ unsafe_allow_html = false
23
+
24
+ # Process and display mathematical expressions. This can clash with "$" characters in messages.
25
+ latex = false
26
+
27
+ # Authorize users to upload files with messages
28
+ multi_modal = true
29
+
30
+ # Allows user to use speech to text
31
+ [features.speech_to_text]
32
+ enabled = false
33
+ # See all languages here https://github.com/JamesBrill/react-speech-recognition/blob/HEAD/docs/API.md#language-string
34
+ # language = "en-US"
35
+
36
+ [UI]
37
+ # Name of the app and chatbot.
38
+ name = "Chatbot"
39
+
40
+ # Show the readme while the conversation is empty.
41
+ show_readme_as_default = true
42
+
43
+ # Description of the app and chatbot. This is used for HTML tags.
44
+ # description = ""
45
+
46
+ # Large size content are by default collapsed for a cleaner ui
47
+ default_collapse_content = true
48
+
49
+ # The default value for the expand messages settings.
50
+ default_expand_messages = false
51
+
52
+ # Hide the chain of thought details from the user in the UI.
53
+ hide_cot = false
54
+
55
+ # Link to your github repo. This will add a github button in the UI's header.
56
+ # github = ""
57
+
58
+ # Specify a CSS file that can be used to customize the user interface.
59
+ # The CSS file can be served from the public directory or via an external link.
60
+ # custom_css = "/public/test.css"
61
+
62
+ # Override default MUI light theme. (Check theme.ts)
63
+ [UI.theme.light]
64
+ #background = "#FAFAFA"
65
+ #paper = "#FFFFFF"
66
+
67
+ [UI.theme.light.primary]
68
+ #main = "#F80061"
69
+ #dark = "#980039"
70
+ #light = "#FFE7EB"
71
+
72
+ # Override default MUI dark theme. (Check theme.ts)
73
+ [UI.theme.dark]
74
+ #background = "#FAFAFA"
75
+ #paper = "#FFFFFF"
76
+
77
+ [UI.theme.dark.primary]
78
+ #main = "#F80061"
79
+ #dark = "#980039"
80
+ #light = "#FFE7EB"
81
+
82
+
83
+ [meta]
84
+ generated_by = "0.7.700"
tools/__pycache__/crypto_sentiment_analysis_util.cpython-311.pyc ADDED
Binary file (8.33 kB). View file
 
tools/__pycache__/data_analyst.cpython-311.pyc ADDED
Binary file (3.33 kB). View file
 
tools/crypto_sentiment_analysis_util.py ADDED
@@ -0,0 +1,217 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ import os
3
+ from dotenv import load_dotenv
4
+ from transformers import pipeline
5
+ import os
6
+ import pandas as pd
7
+ from GoogleNews import GoogleNews
8
+ from langchain_openai import ChatOpenAI
9
+ import pandas as pd
10
+ import praw
11
+ from datetime import datetime
12
+
13
+ load_dotenv()
14
+
15
+ def fetch_news(stockticker):
16
+
17
+ """ Fetches news articles for a given stock symbol within a specified date range.
18
+
19
+ Args:
20
+ - stockticker (str): Symbol of a particular stock
21
+
22
+ Returns:
23
+ - list: A list of dictionaries containing stock news. """
24
+
25
+ load_dotenv()
26
+ days_to_fetch_news = os.environ["DAYS_TO_FETCH_NEWS"]
27
+
28
+ googlenews = GoogleNews()
29
+ googlenews.set_period(days_to_fetch_news)
30
+ googlenews.get_news(stockticker)
31
+ news_json=googlenews.get_texts()
32
+ urls=googlenews.get_links()
33
+
34
+ no_of_news_articles_to_fetch = os.environ["NO_OF_NEWS_ARTICLES_TO_FETCH"]
35
+ news_article_list = []
36
+ counter = 0
37
+ for article in news_json:
38
+
39
+ if(counter >= int(no_of_news_articles_to_fetch)):
40
+ break
41
+
42
+ relevant_info = {
43
+ 'News_Article': article,
44
+ 'URL': urls[counter]
45
+ }
46
+ news_article_list.append(relevant_info)
47
+ counter+=1
48
+
49
+ return news_article_list
50
+
51
+ def fetch_reddit_news(cryptocurrencyticker):
52
+ load_dotenv()
53
+ REDDIT_USER_AGENT= os.environ["REDDIT_USER_AGENT"]
54
+ REDDIT_CLIENT_ID= os.environ["REDDIT_CLIENT_ID"]
55
+ REDDIT_CLIENT_SECRET= os.environ["REDDIT_CLIENT_SECRET"]
56
+ #https://medium.com/geekculture/a-complete-guide-to-web-scraping-reddit-with-python-16e292317a52
57
+ user_agent = REDDIT_USER_AGENT
58
+ reddit = praw.Reddit (
59
+ client_id= REDDIT_CLIENT_ID,
60
+ client_secret= REDDIT_CLIENT_SECRET,
61
+ user_agent=user_agent
62
+ )
63
+
64
+ headlines = set ( )
65
+ for submission in reddit.subreddit('CryptoCurrencyTrading').search(cryptocurrencyticker,time_filter='week'):
66
+ headlines.add(submission.title + ', Date: ' +datetime.utcfromtimestamp(int(submission.created_utc)).strftime('%Y-%m-%d %H:%M:%S') + ', URL:' +submission.url)
67
+
68
+ if len(headlines)<10:
69
+ for submission in reddit.subreddit('CryptoCurrencyTrading').search(cryptocurrencyticker,time_filter='year'):
70
+ headlines.add(submission.title + ', Date: ' +datetime.utcfromtimestamp(int(submission.created_utc)).strftime('%Y-%m-%d %H:%M:%S') + ', URL:' +submission.url)
71
+
72
+ if len(headlines)<10:
73
+ for submission in reddit.subreddit('CryptoCurrencyTrading').search(cryptocurrencyticker): #,time_filter='week'):
74
+ headlines.add(submission.title + ', Date: ' +datetime.utcfromtimestamp(int(submission.created_utc)).strftime('%Y-%m-%d %H:%M:%S') + ', URL:' +submission.url)
75
+ return headlines
76
+
77
+ def analyze_sentiment(article):
78
+ """
79
+ Analyzes the sentiment of a given news article.
80
+
81
+ Args:
82
+ - news_article (dict): Dictionary containing 'summary', 'headline', and 'created_at' keys.
83
+
84
+ Returns:
85
+ - dict: A dictionary containing sentiment analysis results.
86
+ """
87
+
88
+ #Analyze sentiment using default model
89
+ #classifier = pipeline('sentiment-analysis')
90
+
91
+ #Analyze sentiment using specific model
92
+ classifier = pipeline(model='mrm8488/distilroberta-finetuned-financial-news-sentiment-analysis')
93
+ sentiment_result = classifier(str(article))
94
+
95
+ analysis_result = {
96
+ 'News_Article': article,
97
+ 'Sentiment': sentiment_result
98
+ }
99
+
100
+ return analysis_result
101
+
102
+
103
+ def generate_summary_of_sentiment(sentiment_analysis_results):
104
+
105
+
106
+ news_article_sentiment = str(sentiment_analysis_results)
107
+ print("News article sentiment : " + news_article_sentiment)
108
+
109
+
110
+ os.environ["OPENAI_API_KEY"] = os.environ["OPENAI_API_KEY"]
111
+ model = ChatOpenAI(
112
+ model="gpt-4o",
113
+ temperature=0,
114
+ max_tokens=None,
115
+ timeout=None,
116
+ max_retries=2,
117
+ # api_key="...", # if you prefer to pass api key in directly instaed of using env vars
118
+ # base_url="...",
119
+ # organization="...",
120
+ # other params...
121
+ )
122
+
123
+ messages=[
124
+ {"role": "system", "content": "You are a helpful assistant that looks at all news articles with their sentiment, hyperlink and date in front of the article text, the articles MUST be ordered by date!, and generate a summary rationalizing dominant sentiment. At the end of the summary, add URL links with dates for all the articles in the markdown format for streamlit. Make sure the articles as well as the links are ordered descending by Date!!!!!!! Example of adding the URLs: The Check out the links: [link](%s) % url, 2024-03-01. "},
125
+ {"role": "user", "content": f"News articles and their sentiments: {news_article_sentiment}"}
126
+ ]
127
+ response = model.invoke(messages)
128
+
129
+
130
+ summary = response.content
131
+ print ("+++++++++++++++++++++++++++++++++++++++++++++++")
132
+ print(summary)
133
+ print ("+++++++++++++++++++++++++++++++++++++++++++++++")
134
+ return summary
135
+
136
+
137
+ def plot_sentiment_graph(sentiment_analysis_results):
138
+ """
139
+ Plots a sentiment analysis graph
140
+
141
+ Args:
142
+ - sentiment_analysis_result): (dict): Dictionary containing 'Review Title : Summary', 'Rating', and 'Sentiment' keys.
143
+
144
+ Returns:
145
+ - dict: A dictionary containing sentiment analysis results.
146
+ """
147
+ df = pd.DataFrame(sentiment_analysis_results)
148
+ print(df)
149
+
150
+ #Group by Rating, sentiment value count
151
+ grouped = df['Sentiment'].value_counts()
152
+
153
+ sentiment_counts = df['Sentiment'].value_counts()
154
+
155
+ # Plotting pie chart
156
+ # fig = plt.figure(figsize=(5, 3))
157
+ # plt.pie(sentiment_counts, labels=sentiment_counts.index, autopct='%1.1f%%', startangle=140)
158
+ # plt.axis('equal') # Equal aspect ratio ensures that pie is drawn as a circle.
159
+
160
+ #Open below when u running this program locally and c
161
+ #plt.show()
162
+
163
+ return sentiment_counts
164
+
165
+
166
+ def get_dominant_sentiment (sentiment_analysis_results):
167
+ """
168
+ Returns overall sentiment, negative or positive or neutral depending on the count of negative sentiment vs positive sentiment
169
+
170
+ Args:
171
+ - sentiment_analysis_result): (dict): Dictionary containing 'summary', 'headline', and 'created_at' keys.
172
+
173
+ Returns:
174
+ - dict: A dictionary containing sentiment analysis results.
175
+ """
176
+ df = pd.DataFrame(sentiment_analysis_results)
177
+
178
+ # Group by the 'sentiment' column and count the occurrences of each sentiment value
179
+ sentiment_counts = df['Sentiment'].value_counts().reset_index()
180
+ sentiment_counts.columns = ['sentiment', 'count']
181
+ print(sentiment_counts)
182
+
183
+ # Find the sentiment with the highest count
184
+ dominant_sentiment = sentiment_counts.loc[sentiment_counts['count'].idxmax()]
185
+
186
+ return dominant_sentiment['sentiment']
187
+
188
+ #starting point of the program
189
+ if __name__ == '__main__':
190
+
191
+ #fetch stock news
192
+ news_articles = fetch_news('AAPL')
193
+
194
+ analysis_results = []
195
+
196
+ #Perform sentiment analysis for each product review
197
+ for article in news_articles:
198
+ sentiment_analysis_result = analyze_sentiment(article['News_Article'])
199
+
200
+ # Display sentiment analysis results
201
+ print(f'News Article: {sentiment_analysis_result["News_Article"]} : Sentiment: {sentiment_analysis_result["Sentiment"]}', '\n')
202
+
203
+ result = {
204
+ 'News_Article': sentiment_analysis_result["News_Article"],
205
+ 'Sentiment': sentiment_analysis_result["Sentiment"][0]['label']
206
+ }
207
+
208
+ analysis_results.append(result)
209
+
210
+
211
+ #Graph dominant sentiment based on sentiment analysis data of reviews
212
+ dominant_sentiment = get_dominant_sentiment(analysis_results)
213
+ print(dominant_sentiment)
214
+
215
+ #Plot graph
216
+ plot_sentiment_graph(analysis_results)
217
+
tools/data_analyst.py ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from pydantic import BaseModel, Field
2
+ from langchain.tools import BaseTool
3
+ from typing import Optional, Type
4
+ from langchain.tools import StructuredTool
5
+ import yfinance as yf
6
+ from typing import List
7
+ from datetime import datetime,timedelta
8
+ from pycoingecko import CoinGeckoAPI
9
+ cg = CoinGeckoAPI()
10
+
11
+ def data_analyst_tools():
12
+ def get_crypto_price(cryptocurrencyticker: str) -> str:
13
+ current_data=cg.get_price(ids=cryptocurrencyticker, vs_currencies='usd',include_market_cap='true', include_24hr_vol='true',include_last_updated_at='true')
14
+ return str(current_data)
15
+
16
+ class CryptoPriceCheckInput(BaseModel):
17
+ """Input for Crypto price check."""
18
+ Cryptoticker: str = Field(..., description="Ticker symbol for Crypto or index")
19
+
20
+ class CryptoPriceTool(BaseTool):
21
+ name = "get_crypto_price"
22
+ description = "Useful for when you need to find out the price of Cryptocurrency. You should input the Crypto ticker used on the Coingecko API"
23
+ """Input for Cryptocurrency price check."""
24
+ Cryptoticker: str = Field(..., description="Ticker symbol for Crypto or index")
25
+ def _run(self, Cryptoticker: str):
26
+ # print("i'm running")
27
+ price_response = get_crypto_price(Cryptoticker)
28
+
29
+ return str(price_response)
30
+
31
+ def _arun(self, Cryptoticker: str):
32
+ raise NotImplementedError("This tool does not support async")
33
+ args_schema: Optional[Type[BaseModel]] = CryptoPriceCheckInput
34
+
35
+
36
+ tools_data_analyst = [StructuredTool.from_function(
37
+ func=CryptoPriceTool,
38
+ args_schema=CryptoPriceCheckInput,
39
+ description="Function to get current Crypto prices.",
40
+ ),
41
+ ]
42
+ return tools_data_analyst
tools/df_history.csv ADDED
@@ -0,0 +1,63 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Date,Open,High,Low,Close,Volume,Dividends,Stock Splits,stockticker
2
+ 2024-04-22 00:00:00-04:00,399.3596471827562,402.1246793258712,395.03745670124425,400.2380676269531,20286900,0.0,0.0,MSFT
3
+ 2024-04-23 00:00:00-04:00,403.51216021293357,407.4650522060062,402.33429210205253,406.836181640625,15734500,0.0,0.0,MSFT
4
+ 2024-04-24 00:00:00-04:00,408.82258607970806,411.72735028943725,406.0475926794115,408.323486328125,15065300,0.0,0.0,MSFT
5
+ 2024-04-25 00:00:00-04:00,393.32054400787314,399.1700088625431,387.3313470651067,398.321533203125,40586500,0.0,0.0,MSFT
6
+ 2024-04-26 00:00:00-04:00,411.4279132786848,412.25640548421114,405.02945064216703,405.58843994140625,29694700,0.0,0.0,MSFT
7
+ 2024-04-29 00:00:00-04:00,404.52035539531056,405.5884361925186,398.4712687423875,401.5257568359375,19582100,0.0,0.0,MSFT
8
+ 2024-04-30 00:00:00-04:00,400.76710737423014,401.4359144425664,388.4693126899027,388.6289978027344,28781400,0.0,0.0,MSFT
9
+ 2024-05-01 00:00:00-04:00,391.9030904630616,400.9967037344784,389.6072438016868,394.2289123535156,23562500,0.0,0.0,MSFT
10
+ 2024-05-02 00:00:00-04:00,396.94401914412265,399.20992105581087,393.9394288835304,397.1236877441406,17709400,0.0,0.0,MSFT
11
+ 2024-05-03 00:00:00-04:00,401.55570709720826,406.4169339510819,401.13644988960164,405.9278259277344,17446700,0.0,0.0,MSFT
12
+ 2024-05-06 00:00:00-04:00,408.024048156178,413.1847226485525,405.63833666603693,412.7954406738281,16996600,0.0,0.0,MSFT
13
+ 2024-05-07 00:00:00-04:00,413.91342570011614,413.92341744357753,408.35344694069664,408.6029968261719,20018200,0.0,0.0,MSFT
14
+ 2024-05-08 00:00:00-04:00,407.4351142805277,411.48780192255407,405.97772103822234,409.80084228515625,11792300,0.0,0.0,MSFT
15
+ 2024-05-09 00:00:00-04:00,409.8307875446534,411.97691043744567,408.363433019907,411.57763671875,14689700,0.0,0.0,MSFT
16
+ 2024-05-10 00:00:00-04:00,412.1965086797442,414.6321179246016,411.05854661467066,413.9932556152344,13402300,0.0,0.0,MSFT
17
+ 2024-05-13 00:00:00-04:00,417.2573820335048,417.5967662074119,410.08032520369875,412.97509765625,15440200,0.0,0.0,MSFT
18
+ 2024-05-14 00:00:00-04:00,411.2781631216723,416.73831581889846,410.8090081198034,415.80999755859375,15109300,0.0,0.0,MSFT
19
+ 2024-05-15 00:00:00-04:00,417.8999938964844,423.80999755859375,417.2699890136719,423.0799865722656,22239500,0.75,0.0,MSFT
20
+ 2024-05-16 00:00:00-04:00,421.79998779296875,425.4200134277344,420.3500061035156,420.989990234375,17530100,0.0,0.0,MSFT
21
+ 2024-05-17 00:00:00-04:00,422.5400085449219,422.9200134277344,418.0299987792969,420.2099914550781,15352200,0.0,0.0,MSFT
22
+ 2024-05-20 00:00:00-04:00,420.2099914550781,426.7699890136719,419.989990234375,425.3399963378906,16272100,0.0,0.0,MSFT
23
+ 2024-05-21 00:00:00-04:00,426.8299865722656,432.9700012207031,424.8500061035156,429.0400085449219,21453300,0.0,0.0,MSFT
24
+ 2024-05-22 00:00:00-04:00,430.0899963378906,432.4100036621094,427.1300048828125,430.5199890136719,18073700,0.0,0.0,MSFT
25
+ 2024-05-23 00:00:00-04:00,432.9700012207031,433.6000061035156,425.4200134277344,427.0,17211700,0.0,0.0,MSFT
26
+ 2024-05-24 00:00:00-04:00,427.19000244140625,431.05999755859375,424.4100036621094,430.1600036621094,11845800,0.0,0.0,MSFT
27
+ 2024-05-28 00:00:00-04:00,429.6300048828125,430.82000732421875,426.6000061035156,430.32000732421875,15718000,0.0,0.0,MSFT
28
+ 2024-05-29 00:00:00-04:00,425.69000244140625,430.94000244140625,425.69000244140625,429.1700134277344,15517100,0.0,0.0,MSFT
29
+ 2024-05-30 00:00:00-04:00,424.29998779296875,424.29998779296875,414.239990234375,414.6700134277344,28424800,0.0,0.0,MSFT
30
+ 2024-05-31 00:00:00-04:00,416.75,416.75,404.510009765625,415.1300048828125,47995300,0.0,0.0,MSFT
31
+ 2024-06-03 00:00:00-04:00,415.5299987792969,416.42999267578125,408.9200134277344,413.5199890136719,17484700,0.0,0.0,MSFT
32
+ 2024-06-04 00:00:00-04:00,412.42999267578125,416.44000244140625,409.67999267578125,416.07000732421875,14348900,0.0,0.0,MSFT
33
+ 2024-06-05 00:00:00-04:00,417.80999755859375,424.0799865722656,416.29998779296875,424.010009765625,16988000,0.0,0.0,MSFT
34
+ 2024-06-06 00:00:00-04:00,424.010009765625,425.30999755859375,420.5799865722656,424.5199890136719,14861300,0.0,0.0,MSFT
35
+ 2024-06-07 00:00:00-04:00,426.20001220703125,426.2799987792969,423.0,423.8500061035156,13621700,0.0,0.0,MSFT
36
+ 2024-06-10 00:00:00-04:00,424.70001220703125,428.0799865722656,423.8900146484375,427.8699951171875,14003000,0.0,0.0,MSFT
37
+ 2024-06-11 00:00:00-04:00,425.4800109863281,432.82000732421875,425.25,432.67999267578125,14551100,0.0,0.0,MSFT
38
+ 2024-06-12 00:00:00-04:00,435.32000732421875,443.3999938964844,433.25,441.05999755859375,22366200,0.0,0.0,MSFT
39
+ 2024-06-13 00:00:00-04:00,440.8500061035156,443.3900146484375,439.3699951171875,441.5799865722656,15960600,0.0,0.0,MSFT
40
+ 2024-06-14 00:00:00-04:00,438.2799987792969,443.1400146484375,436.7200012207031,442.57000732421875,13582000,0.0,0.0,MSFT
41
+ 2024-06-17 00:00:00-04:00,442.5899963378906,450.94000244140625,440.7200012207031,448.3699951171875,20790000,0.0,0.0,MSFT
42
+ 2024-06-18 00:00:00-04:00,449.7099914550781,450.1400146484375,444.8900146484375,446.3399963378906,17112500,0.0,0.0,MSFT
43
+ 2024-06-20 00:00:00-04:00,446.29998779296875,446.5299987792969,441.2699890136719,445.70001220703125,19877400,0.0,0.0,MSFT
44
+ 2024-06-21 00:00:00-04:00,447.3800048828125,450.5799865722656,446.510009765625,449.7799987792969,34486200,0.0,0.0,MSFT
45
+ 2024-06-24 00:00:00-04:00,449.79998779296875,452.75,446.4100036621094,447.6700134277344,15913700,0.0,0.0,MSFT
46
+ 2024-06-25 00:00:00-04:00,448.25,451.4200134277344,446.75,450.95001220703125,16747500,0.0,0.0,MSFT
47
+ 2024-06-26 00:00:00-04:00,449.0,453.6000061035156,448.19000244140625,452.1600036621094,16507000,0.0,0.0,MSFT
48
+ 2024-06-27 00:00:00-04:00,452.17999267578125,456.1700134277344,451.7699890136719,452.8500061035156,14806300,0.0,0.0,MSFT
49
+ 2024-06-28 00:00:00-04:00,453.07000732421875,455.3800048828125,446.4100036621094,446.95001220703125,28362300,0.0,0.0,MSFT
50
+ 2024-07-01 00:00:00-04:00,448.6600036621094,457.3699951171875,445.6600036621094,456.7300109863281,17662800,0.0,0.0,MSFT
51
+ 2024-07-02 00:00:00-04:00,453.20001220703125,459.5899963378906,453.1099853515625,459.2799987792969,13979800,0.0,0.0,MSFT
52
+ 2024-07-03 00:00:00-04:00,458.19000244140625,461.0199890136719,457.8800048828125,460.7699890136719,9932800,0.0,0.0,MSFT
53
+ 2024-07-05 00:00:00-04:00,459.6099853515625,468.3500061035156,458.9700012207031,467.55999755859375,16000300,0.0,0.0,MSFT
54
+ 2024-07-08 00:00:00-04:00,466.54998779296875,467.70001220703125,464.4599914550781,466.239990234375,12962300,0.0,0.0,MSFT
55
+ 2024-07-09 00:00:00-04:00,467.0,467.3299865722656,458.0,459.5400085449219,17207200,0.0,0.0,MSFT
56
+ 2024-07-10 00:00:00-04:00,461.2200012207031,466.4599914550781,458.8599853515625,466.25,18196100,0.0,0.0,MSFT
57
+ 2024-07-11 00:00:00-04:00,462.9800109863281,464.7799987792969,451.54998779296875,454.70001220703125,23111200,0.0,0.0,MSFT
58
+ 2024-07-12 00:00:00-04:00,454.3299865722656,456.3599853515625,450.6499938964844,453.54998779296875,16311300,0.0,0.0,MSFT
59
+ 2024-07-15 00:00:00-04:00,453.29998779296875,457.260009765625,451.42999267578125,453.9599914550781,14429400,0.0,0.0,MSFT
60
+ 2024-07-16 00:00:00-04:00,454.2200012207031,454.29998779296875,446.6600036621094,449.5199890136719,17175700,0.0,0.0,MSFT
61
+ 2024-07-17 00:00:00-04:00,442.5899963378906,444.8500061035156,439.17999267578125,443.5199890136719,21778000,0.0,0.0,MSFT
62
+ 2024-07-18 00:00:00-04:00,444.3399963378906,444.6499938964844,434.3999938964844,440.3699951171875,20794800,0.0,0.0,MSFT
63
+ 2024-07-19 00:00:00-04:00,433.1000061035156,441.1400146484375,432.0,437.1099853515625,20862400,0.0,0.0,MSFT
tools/df_with_forecast.csv ADDED
@@ -0,0 +1,113 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ,id,prices,market_caps,total_vol,CLI,CPI,Employment,id,google_trend,GSPC,GC=F,EURUSD,TNX
2
+ 2023-01-31,,,,,107.893,,,,,,,,
3
+ 2023-02-28,,,,,,,,,,,,,
4
+ 2023-03-31,,,,,,,,,,,,,
5
+ 2023-04-30,,,,,108.87,,,,,,,,
6
+ 2023-05-31,,,,,,,,,,,,,
7
+ 2023-06-30,,,,,,,,,,,,,
8
+ 2023-07-31,,,,,108.931,,,,,,,,
9
+ 2023-08-31,,,,,,,,,,,,,
10
+ 2023-09-30,,,,,,,,,,,,,
11
+ 2023-10-31,,,,,105.3285,,,,,,,,
12
+ 2023-11-30,,,,,101.961,,,,,,,,
13
+ 2023-12-31,,,,,76.021485,,,,,,,,
14
+ 2024-01-01,,,,,,308.417,161152.0,,,,,,
15
+ 2024-01-31,,,,,82.796104,,,,,,,,
16
+ 2024-02-01,,,,,,310.326,160968.0,,,,,,
17
+ 2024-02-29,,,,,50.859505000000006,,,,,,,,
18
+ 2024-03-01,,,,,,312.332,161466.0,,,,,,
19
+ 2024-03-31,,,,,71.54424900000001,,,,,,,,
20
+ 2024-04-01,,,,,,313.548,161491.0,,,,,,
21
+ 2024-04-30,,,,,70.99926128571428,,,,,,,,
22
+ 2024-05-01,,,,,,314.069,161083.0,,,,,,
23
+ 2024-05-06,ethereum,3114.4007005303224,374072475993.8121,11127068946.811003,,,,,,5142.419921875,2322.800048828125,1.0758124589920044,4.486999988555908
24
+ 2024-05-07,ethereum,3062.1337546278614,367763583617.18,12212682358.435501,,,,,,5187.2001953125,2324.300048828125,1.0767507553100586,4.4730000495910645
25
+ 2024-05-08,ethereum,2999.4869525045415,360243594935.94305,11179447639.771477,,,,,,5168.97998046875,2313.60009765625,1.0750491619110107,4.484000205993652
26
+ 2024-05-09,ethereum,3003.5642861321066,360831840100.2795,10896607994.801586,,,,,,5189.02978515625,2310.699951171875,1.0746910572052002,4.515999794006348
27
+ 2024-05-10,ethereum,2966.441885585809,356315503171.5778,11384672900.217531,,,,,,5225.490234375,2367.300048828125,1.078515887260437,4.4770002365112305
28
+ 2024-05-11,ethereum,2916.0617572305187,350195653103.13715,9198771437.235367,,,,,,,,,
29
+ 2024-05-12,ethereum,2924.2519718055137,351264187115.9079,5460016379.689179,,,,,,,,,
30
+ 2024-05-13,ethereum,2945.101626776707,353629048507.87177,11486452597.64263,,,,,,5233.080078125,2358.300048828125,1.077040672302246,4.486999988555908
31
+ 2024-05-14,ethereum,2900.6563297650755,348385655037.8947,11542903142.535707,,,,,,5221.10009765625,2336.0,1.079354166984558,4.480999946594238
32
+ 2024-05-15,ethereum,2963.759315698215,355948495364.97504,11917611608.681692,,,,,,5263.259765625,2361.60009765625,1.0814552307128906,4.418000221252441
33
+ 2024-05-16,ethereum,2973.3231927105603,357110705527.3933,12982589615.756212,,,,,,5310.06982421875,2389.5,1.0889805555343628,4.329999923706055
34
+ 2024-05-17,ethereum,3055.768061630982,366942477718.2434,13075157085.833336,,,,,,5303.10009765625,2380.699951171875,1.0867793560028076,4.395999908447266
35
+ 2024-05-18,ethereum,3116.6010226358244,374444822902.376,10245527976.19185,,,,,,,,,
36
+ 2024-05-19,ethereum,3089.6899077803787,371102827471.68726,7186818177.12275,,,,,,,,,
37
+ 2024-05-20,ethereum,3275.1726235398733,393064227949.9598,14861247848.577646,,,,,,5305.35009765625,2415.800048828125,1.0875475406646729,4.421999931335449
38
+ 2024-05-21,ethereum,3736.779369147562,449277769707.8145,39114320451.96952,,,,,,5298.68994140625,2429.5,1.086082935333252,4.429999828338623
39
+ 2024-05-22,ethereum,3743.6797798618477,449904838634.0898,27177492763.312775,,,,,,5319.27978515625,2417.5,1.0855170488357544,4.453000068664551
40
+ 2024-05-23,ethereum,3802.195186351903,456440243805.41815,31490184022.903027,,,,,,5340.259765625,2371.199951171875,1.0825204849243164,4.418000221252441
41
+ 2024-05-24,ethereum,3716.584005854382,446214223854.924,36595327650.52252,,,,,,5281.4501953125,2342.60009765625,1.0812681913375854,4.488999843597412
42
+ 2024-05-25,ethereum,3747.9139284087246,450191345047.8288,12320451102.246416,,,,,,,,,
43
+ 2024-05-26,ethereum,3822.9062956505663,459222383843.30835,10446532594.41313,,,,,,,,,
44
+ 2024-05-27,ethereum,3904.4531708423287,468888345618.59467,15875563017.846474,,,,,,,,,
45
+ 2024-05-28,ethereum,3850.4555712911824,462421783973.1804,16981798566.54435,,,,,,5315.91015625,2336.89990234375,1.0847634077072144,4.453000068664551
46
+ 2024-05-29,ethereum,3790.3890178850775,455720009606.71265,16291087718.980705,,,,,,5278.72998046875,2340.300048828125,1.0861891508102417,4.565999984741211
47
+ 2024-05-30,ethereum,3760.0562456291614,451909655831.15967,14241005007.401228,,,,,,5259.77001953125,2336.89990234375,1.0851871967315674,4.593999862670898
48
+ 2024-05-31,ethereum,3766.348666657115,452613341252.82587,13233598539.854492,74.31149225,,,,,5243.2099609375,2344.10009765625,1.0801819562911987,4.552000045776367
49
+ 2024-06-01,ethereum,3797.773661763607,456183752606.3188,10134684638.033493,,314.175,161199.0,,,,,,
50
+ 2024-06-02,ethereum,3789.4643581084024,455395042781.10913,8704992695.338041,,,,,,,,,
51
+ 2024-06-03,ethereum,3792.876939393309,455583728457.67914,11345758405.200346,,,,,,5297.14990234375,2322.60009765625,1.0835410356521606,4.4730000495910645
52
+ 2024-06-04,ethereum,3791.003611630613,455454864609.9826,11583124383.448898,,,,,,5278.240234375,2347.5,1.0853049755096436,4.361000061035156
53
+ 2024-06-05,ethereum,3826.985885858965,459614958452.3508,12970952878.62752,,,,,,5314.47998046875,2326.39990234375,1.0907385349273682,4.326000213623047
54
+ 2024-06-06,ethereum,3831.1242540234693,460276696407.6153,12084914430.50892,,,,,,5357.7998046875,2355.0,1.0881866216659546,4.301000118255615
55
+ 2024-06-07,ethereum,3759.215714495715,451768670762.0454,13410143841.10996,,,,,,5343.81005859375,2379.89990234375,1.087453007698059,4.296999931335449
56
+ 2024-06-08,ethereum,3684.6113824590207,442729750197.81464,12328054405.91001,,,,,,,,,
57
+ 2024-06-09,ethereum,3693.5688944666886,443758507298.5523,6434938580.374867,,,,,,,,,
58
+ 2024-06-10,ethereum,3669.804414640858,441385340635.06006,7678615033.327232,,,,,,5341.22021484375,2290.60009765625,1.0894432067871094,4.453000068664551
59
+ 2024-06-11,ethereum,3511.552216421044,421773678119.8096,16295040761.774801,,,,,,5353.0,2300.0,1.0778882503509521,4.435999870300293
60
+ 2024-06-12,ethereum,3559.325489288785,427430429412.461,16146566925.235643,,,,,,5409.1298828125,2314.89990234375,1.0764262676239014,4.388000011444092
61
+ 2024-06-13,ethereum,3487.56598378508,418921286187.6116,16102374829.54766,,,,,,5441.93017578125,2309.39990234375,1.0740330219268799,4.309999942779541
62
+ 2024-06-14,ethereum,3477.487513961109,417925411247.93335,13751574508.884224,,,,,,5424.080078125,2307.0,1.0813149213790894,4.196000099182129
63
+ 2024-06-15,ethereum,3547.2656444381705,426052557357.5137,14435000601.871336,,,,,,,,,
64
+ 2024-06-16,ethereum,3581.554801676437,430252401136.91156,9265313569.748865,,,,,,,,,
65
+ 2024-06-17,ethereum,3521.540033404755,423525318255.71875,14792637405.883486,,,,,,5431.10986328125,2320.199951171875,1.0738831758499146,4.264999866485596
66
+ 2024-06-18,ethereum,3443.057015401169,420823437646.33813,20647300792.256817,,,,,,5476.14990234375,2311.800048828125,1.0702168941497803,4.2870001792907715
67
+ 2024-06-19,ethereum,3548.6088199945443,433926242387.04596,17174770852.62574,,,,,,,,,
68
+ 2024-06-20,ethereum,3544.840023868359,433416267378.3272,14431531886.666199,,,,,,5499.990234375,2328.89990234375,1.0740677118301392,4.23799991607666
69
+ 2024-06-21,ethereum,3501.4844259897077,428154107816.4788,15591212529.32492,,,,,,5466.77001953125,2331.199951171875,1.0742292404174805,4.223999977111816
70
+ 2024-06-22,ethereum,3498.401912499916,427797791720.6712,10074881877.914883,,,,,,,,,
71
+ 2024-06-23,ethereum,3471.25661843264,424559563614.4485,6991292722.007789,,,,,,,,,
72
+ 2024-06-24,ethereum,3330.9914608412787,405609291184.5871,18078022125.09135,,,,,,5459.580078125,2323.300048828125,1.0751532316207886,4.275000095367432
73
+ 2024-06-25,ethereum,3388.6552364887525,407290827369.29706,17480780190.891705,,,,,,5460.72998046875,2324.39990234375,1.0706409215927124,4.2170000076293945
74
+ 2024-06-26,ethereum,3379.6384972292053,406200008420.7241,11068675554.45744,,,,,,5460.7099609375,2307.89990234375,1.0687757730484009,4.284999847412109
75
+ 2024-06-27,ethereum,3422.9987958226375,411321369785.6611,11381676501.223251,,,,,,5473.58984375,2296.800048828125,1.0732723474502563,4.327000141143799
76
+ 2024-06-28,ethereum,3409.9899950655326,410020829397.1302,12108738591.606865,,,,,,5488.47998046875,2325.39990234375,1.0709619522094727,4.303999900817871
77
+ 2024-06-29,ethereum,3382.1846820729265,406488247465.7656,8118319341.85326,,,,,,,,,
78
+ 2024-06-30,ethereum,3405.653616859288,409202194429.17535,6586293113.04436,74.519282,,,,,,,,
79
+ 2024-07-01,ethereum,3466.7870078346746,416633403003.7347,10972345968.355255,,,161266.0,ethereum,16.81,5471.080078125,2323.800048828125,1.068010926246643,4.423999786376953
80
+ 2024-07-02,ethereum,3425.9867093877297,411805426012.3614,9347179358.150301,,,,ethereum,13.69,5461.83984375,2330.699951171875,1.0708472728729248,4.441999912261963
81
+ 2024-07-03,ethereum,3305.863374711538,397336350754.1616,12936936280.4328,,,,ethereum,18.49,5507.43994140625,2330.89990234375,1.0735257863998413,4.426000118255615
82
+ 2024-07-04,ethereum,3141.9137606677323,377789953332.375,19762357071.82109,,,,ethereum,18.49,,,,
83
+ 2024-07-05,ethereum,2942.873381316971,353947760154.63116,30371642360.497025,,,,ethereum,33.64,5537.91015625,2354.89990234375,1.0737102031707764,4.330999851226807
84
+ 2024-07-06,ethereum,3026.0221393875127,363716353821.9705,14095786002.87394,,,,ethereum,17.64,,,,
85
+ 2024-07-07,ethereum,2980.329795932314,358196417772.35425,10109122874.859922,,,,ethereum,12.959999999999999,,,,
86
+ 2024-07-08,ethereum,2995.42249679924,359848515705.2204,18656672090.05734,,,,ethereum,22.09,5572.75,2381.699951171875,1.0748412609100342,4.306000232696533
87
+ 2024-07-09,ethereum,3067.945362448234,368790204798.86127,17265940071.85487,,,,ethereum,18.49,5584.240234375,2363.10009765625,1.0793308019638062,4.300000190734863
88
+ 2024-07-10,ethereum,3103.355851261978,373109735757.489,14135312863.462542,,,,ethereum,18.49,5591.259765625,2366.300048828125,1.082602620124817,4.2779998779296875
89
+ 2024-07-11,ethereum,3120.2511152159645,375003912446.32135,14601771425.99495,,,,ethereum,20.25,5635.2099609375,2378.699951171875,1.0830950736999512,4.288000106811523
90
+ 2024-07-12,ethereum,3106.665060608468,373479677453.21234,13272564100.61316,,,,ethereum,14.44,5590.759765625,2399.800048828125,1.0814785957336426,4.205999851226807
91
+ 2024-07-13,ethereum,3156.7229997808245,379336539640.4357,9099839817.153015,,,,ethereum,13.69,,,,
92
+ 2024-07-14,ethereum,3214.951666958399,386450506355.89264,9399920281.501238,,,,ethereum,14.44,,,,
93
+ 2024-07-15,ethereum,3390.5558977346586,407705698434.75323,14682856047.608225,,,,ethereum,36.0,5638.16015625,2430.0,1.083329677581787,4.236999988555908
94
+ 2024-07-16,ethereum,3439.2531522603167,413315389170.71234,20249821643.173527,,,,ethereum,33.64,5644.08984375,2427.39990234375,1.0872756242752075,4.175000190734863
95
+ 2024-07-17,ethereum,3445.9301630097125,414204244128.5122,18316846571.47861,,,,ethereum,29.160000000000004,5610.06982421875,2472.89990234375,1.0885539054870605,4.178999900817871
96
+ 2024-07-18,ethereum,3428.483712881156,412151252125.53094,14880766047.388323,,,,ethereum,27.04,5608.56005859375,2466.0,1.0901559591293335,4.183000087738037
97
+ 2024-07-19,ethereum,3459.866194068627,415830197917.89233,15874038814.582893,,,,ethereum,26.01,5543.3701171875,2418.800048828125,1.0902509689331055,4.224999904632568
98
+ 2024-07-20,ethereum,3506.166559401983,421506391873.4467,12819209095.177147,,,,ethereum,24.009999999999998,,,,
99
+ 2024-07-21,ethereum,3505.198785199442,421479666822.44586,9734735125.637077,,,,ethereum,24.009999999999998,,,,
100
+ 2024-07-22,ethereum,3478.5398389521833,418248333345.1597,17165553086.473969,,,,ethereum,43.56,5544.5400390625,2402.10009765625,1.094020128250122,4.224999904632568
101
+ 2024-07-23,ethereum,3471.4537045282686,417282646136.00616,22247562224.91563,,,,ethereum,100.0,5565.2998046875,2395.800048828125,1.0901559591293335,4.236999988555908
102
+ 2024-07-24,ethereum,3389.34554552627,407555827602.13794,19555399218.051266,,,,ethereum,53.29,5505.83984375,2421.0,1.0899182558059692,4.229000091552734
103
+ 2024-07-25,ethereum,3166.8674273376732,380757485120.10834,23309182458.392136,,,,ethereum,49.0,5428.7001953125,2365.5,1.0889805555343628,4.205999851226807
104
+ 2024-07-26,ethereum,3256.836248537211,391618277681.34503,18543581673.430332,,,,ethereum,32.489999999999995,5433.669921875,2368.699951171875,1.0850694179534912,4.244999885559082
105
+ 2024-07-27,ethereum,3268.634820892746,393058948301.01434,12350215232.854982,,,,ethereum,26.01,,,,
106
+ 2024-07-28,ethereum,3261.358165252211,392191417408.00104,11791845044.016218,,,,ethereum,22.09,,,,
107
+ 2024-07-29,ethereum,3339.1396964471446,401594098625.7107,13845546119.727053,,,,ethereum,30.250000000000004,5476.5498046875,2377.300048828125,1.0839520692825317,4.163000106811523
108
+ 2024-07-30,ethereum,3307.2248771443924,397741773052.1379,15364998924.01399,,,,ethereum,21.16,5478.72998046875,2380.89990234375,1.0851283073425293,4.173999786376953
109
+ 2024-07-31,ethereum,3281.768911864367,394601659805.1198,14197119142.220865,99.00630000000001,,,ethereum,19.36,5505.58984375,2407.10009765625,1.0853756666183472,4.127999782562256
110
+ 2024-08-01,ethereum,3164.2867044555733,380493843585.9209,19172882534.0517,,,,ethereum,45.0,5537.83984375,2446.699951171875,1.081946611404419,4.052999973297119
111
+ 2024-08-02,ethereum,3067.1887722202587,368925501105.263,19531566013.812,,,,ethereum,37.309999999999995,5376.6298828125,2490.800048828125,1.0816072225570679,3.940000057220459
112
+ 2024-08-03,ethereum,2946.763174245429,354325094002.6969,19461236907.28371,,,,ethereum,35.6,,,,
113
+ 2024-08-04,ethereum,2911.61,350078479982.3696,13827666339.828363,,,,ethereum,40.42,,,,
tools/stock_sentiment_evalutor.py ADDED
@@ -0,0 +1,261 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from transformers import pipeline
2
+ from alpaca_trade_api import REST
3
+ import os
4
+ from dotenv import load_dotenv
5
+ from datetime import datetime
6
+ import pandas as pd
7
+ import matplotlib.pyplot as plt
8
+ from datetime import date, timedelta
9
+ from pydantic import BaseModel, Field
10
+ from langchain.tools import BaseTool
11
+ from typing import Optional, Type
12
+ from langchain.tools import StructuredTool
13
+
14
+
15
+ def sentimental_analysis_tools():
16
+
17
+ class AlpacaNewsFetcher:
18
+ """
19
+ A class for fetching news articles related to a specific stock from Alpaca API.
20
+
21
+ Attributes:
22
+ - api_key (str): Alpaca API key for authentication.
23
+ - api_secret (str): Alpaca API secret for authentication.
24
+ - rest_client (alpaca_trade_api.REST): Alpaca REST API client.
25
+ """
26
+
27
+ def __init__(self):
28
+ """
29
+ Initializes the AlpacaNewsFetcher object.
30
+
31
+ Args:
32
+ - api_key (str): Alpaca API key for authentication.
33
+ - api_secret (str): Alpaca API secret for authentication.
34
+ """
35
+ load_dotenv()
36
+ self.api_key = os.environ["ALPACA_API_KEY"]
37
+ self.api_secret = os.environ["ALPACA_SECRET"]
38
+ self.rest_client = REST(self.api_key, self.api_secret)
39
+
40
+ #No of news articles to fetch for the input stock ticker.
41
+ self.no_of_newsarticles_to_fetch = os.environ["NO_OF_NEWSARTICLES_TO_FETCH"]
42
+
43
+ #No of days to fetch news articles for
44
+ self.no_of_days = os.environ["NO_OF_DAYS_TO_FETCH_NEWS_ARTICLES"]
45
+
46
+
47
+ def fetch_news(self, stockticker):
48
+ """
49
+ Fetches news articles for a given stock symbol within a specified date range.
50
+
51
+ Args:
52
+ - stockticker (str): Stock symbol for which news articles are to be fetched (e.g., "AAPL").
53
+
54
+ Returns:
55
+ - list: A list of dictionaries containing relevant information for each news article.
56
+ """
57
+
58
+ #Date range for which to get the news
59
+ start_date = date.today()
60
+ end_date = date.today() - timedelta(self.no_of_days)
61
+
62
+ news_articles = self.rest_client.get_news(stockticker, start_date, end_date, limit=self.no_of_newsarticles_to_fetch )
63
+ formatted_news = []
64
+
65
+ for article in news_articles:
66
+ summary = article.summary
67
+ title = article.headline
68
+ timestamp = article.created_at
69
+
70
+ relevant_info = {
71
+ 'timestamp': timestamp,
72
+ 'title': title,
73
+ 'summary': summary
74
+ }
75
+
76
+ formatted_news.append(relevant_info)
77
+
78
+ return formatted_news
79
+
80
+
81
+ class NewsSentimentAnalysis:
82
+ """
83
+ A class for sentiment analysis of news articles using the Transformers library.
84
+
85
+ Attributes:
86
+ - classifier (pipeline): Sentiment analysis pipeline from Transformers.
87
+ """
88
+
89
+ def __init__(self):
90
+ """
91
+ Initializes the NewsSentimentAnalysis object.
92
+ """
93
+ self.classifier = pipeline('sentiment-analysis')
94
+
95
+
96
+ def analyze_sentiment(self, news_article):
97
+ """
98
+ Analyzes the sentiment of a given news article.
99
+
100
+ Args:
101
+ - news_article (dict): Dictionary containing 'summary', 'headline', and 'created_at' keys.
102
+
103
+ Returns:
104
+ - dict: A dictionary containing sentiment analysis results.
105
+ """
106
+ summary = news_article['summary']
107
+ title = news_article['title']
108
+ timestamp = news_article['timestamp']
109
+
110
+ relevant_text = summary + title
111
+ sentiment_result = self.classifier(relevant_text)
112
+
113
+ analysis_result = {
114
+ 'timestamp': timestamp,
115
+ 'title': title,
116
+ 'summary': summary,
117
+ 'sentiment': sentiment_result
118
+ }
119
+
120
+ return analysis_result
121
+
122
+ def plot_sentiment_graph(self, sentiment_analysis_result):
123
+ """
124
+ Plots a sentiment analysis graph
125
+
126
+ Args:
127
+ - sentiment_analysis_result): (dict): Dictionary containing 'summary', 'headline', and 'created_at' keys.
128
+
129
+ Returns:
130
+ - dict: A dictionary containing sentiment analysis results.
131
+ """
132
+ df = pd.DataFrame(sentiment_analysis_result)
133
+ df['Timestamp'] = pd.to_datetime(df['Timestamp'])
134
+ df['Date'] = df['Timestamp'].dt.date
135
+
136
+ #Group by Date, sentiment value count
137
+ grouped = df.groupby(by='Date')['Sentiment'].value_counts()
138
+
139
+ grouped.plot.pie()
140
+
141
+ def get_dominant_sentiment (self, sentiment_analysis_result):
142
+ """
143
+ Returns overall sentiment, negative or positive or neutral depending on the count of negative sentiment vs positive sentiment
144
+
145
+ Args:
146
+ - sentiment_analysis_result): (dict): Dictionary containing 'summary', 'headline', and 'created_at' keys.
147
+
148
+ Returns:
149
+ - dict: A dictionary containing sentiment analysis results.
150
+ """
151
+ df = pd.DataFrame(sentiment_analysis_result)
152
+ df['Timestamp'] = pd.to_datetime(df['Timestamp'])
153
+ df['Date'] = df['Timestamp'].dt.date
154
+
155
+ #Group by Date, sentiment value count
156
+ grouped = df.groupby(by='Date')['Sentiment'].value_counts()
157
+ df = pd.DataFrame(list(grouped.items()), columns=['Sentiment', 'count'])
158
+ df['date'] = df['Sentiment'].apply(lambda x: x[0])
159
+ df['sentiment'] = df['Sentiment'].apply(lambda x: x[1])
160
+ df.drop('Sentiment', axis=1, inplace=True)
161
+ result = df.groupby('sentiment')['count'].sum().reset_index()
162
+
163
+ # Determine the sentiment with the most count
164
+ dominant_sentiment = result.loc[result['count'].idxmax()]
165
+
166
+ return dominant_sentiment
167
+
168
+
169
+ #Function to get the stock sentiment
170
+ def get_stock_sentiment(stockticker: str):
171
+
172
+ #Initialize AlpacaNewsFetcher, a class for fetching news articles related to a specific stock from Alpaca API.
173
+ news_fetcher = AlpacaNewsFetcher()
174
+
175
+
176
+ # Fetch news (contains - title of the news, timestamp and summary) for specified stocksticker
177
+ news_data = news_fetcher.fetch_news(stockticker)
178
+
179
+ # Initialize the NewsSentimentAnalysis object
180
+ news_sentiment_analyzer = NewsSentimentAnalysis()
181
+ analysis_result = []
182
+
183
+ # Assume 'news_data' is a list of news articles (each as a dictionary), analyze sentiment of each news
184
+ for article in news_data:
185
+ sentiment_analysis_result = news_sentiment_analyzer.analyze_sentiment(article)
186
+
187
+ # Display sentiment analysis results
188
+ print(f'Timestamp: {sentiment_analysis_result["timestamp"]}, '
189
+ f'Title: {sentiment_analysis_result["title"]}, '
190
+ f'Summary: {sentiment_analysis_result["summary"]}')
191
+
192
+ print(f'Sentiment: {sentiment_analysis_result["sentiment"]}', '\n')
193
+
194
+ result = {
195
+ 'Timestamp': sentiment_analysis_result["timestamp"],
196
+ 'News- Title:Summar': sentiment_analysis_result["title"] + sentiment_analysis_result["summary"],
197
+ 'Sentiment': sentiment_analysis_result["sentiment"][0]['label']
198
+ }
199
+ analysis_result.append(result)
200
+
201
+ #Extracting timestamp of article and sentiment of article for graphing
202
+ """ result_for_graph = {
203
+ 'Timestamp': sentiment_analysis_result["timestamp"],
204
+ 'Sentiment': sentiment_analysis_result["sentiment"][0]['label']
205
+ }
206
+
207
+ analysis_result.append(result_for_graph)
208
+ """
209
+
210
+ #Get dominant sentiment
211
+ dominant_sentiment = news_sentiment_analyzer.get_dominant_sentiment(sentiment_analysis_result)
212
+
213
+ #Build response string for news sentiment
214
+ output_string = ""
215
+ for result in analysis_result:
216
+ output_string = output_string + f'{result["Timestamp"]} : {result["News- Title:Summary"]} : {result["Sentiment"]}' + '\n'
217
+
218
+ final_result = {
219
+ 'Sentiment-analysis-result' : output_string,
220
+ 'Dominant-sentiment' : dominant_sentiment['sentiment']
221
+ }
222
+
223
+ return final_result
224
+
225
+
226
+ class StockSentimentCheckInput(BaseModel):
227
+ """Input for Stock price check."""
228
+ stockticker: str = Field(..., description="Ticker symbol for stock or index")
229
+
230
+ class StockSentimentAnalysisTool(BaseTool):
231
+ name = "get_stock_sentiment"
232
+ description = """Useful for finding sentiment of stock, based on published news articles.
233
+ Fetches configured number of news items for the sentiment,
234
+ determines sentiment of each news items and then returns
235
+ List of sentiment analysit result & domainant sentiment of the news
236
+ """
237
+
238
+ """Input for Stock sentiment analysis."""
239
+ stockticker: str = Field(..., description="Ticker symbol for stock or index")
240
+ def _run(self, stockticker: str):
241
+ # print("i'm running")
242
+ sentiment_response = get_stock_sentiment(stockticker)
243
+ print("++++++++++++++++++++++++++++++++++++++++++++++++++++++")
244
+ print(str(sentiment_response))
245
+ print("++++++++++++++++++++++++++++++++++++++++++++++++++++++")
246
+
247
+ return sentiment_response
248
+
249
+ def _arun(self, stockticker: str):
250
+ raise NotImplementedError("This tool does not support async")
251
+
252
+ args_schema: Optional[Type[BaseModel]] = StockSentimentCheckInput
253
+
254
+
255
+ tools_sentiment_analyst = [StructuredTool.from_function(
256
+ func=StockSentimentAnalysisTool,
257
+ args_schema=StockSentimentCheckInput,
258
+ description="Function to get stock sentiment.",
259
+ )
260
+ ]
261
+ return tools_sentiment_analyst
utils.py ADDED
@@ -0,0 +1,177 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import matplotlib.pyplot as plt
2
+ import plotly.graph_objects as go
3
+ import pandas as pd
4
+ import numpy as np
5
+ from datetime import datetime, timedelta
6
+ import yfinance as yf
7
+ from plotly.subplots import make_subplots
8
+
9
+ def get_stock_price(stockticker: str) -> str:
10
+ ticker = yf.Ticker(stockticker)
11
+ todays_data = ticker.history(period='1d')
12
+ return str(round(todays_data['Close'][0], 2))
13
+
14
+ def plot_candlestick_stock_price(historical_data):
15
+ """Useful for plotting candlestick plot for stock prices.
16
+ Use historical stock price data from yahoo finance for the week and plot them."""
17
+ df=historical_data[['Close','Open','High','Low']]
18
+ df.index=pd.to_datetime(df.index)
19
+ df.index.names=['Date']
20
+ df=df.reset_index()
21
+
22
+ fig = go.Figure(data=[go.Candlestick(x=df['Date'],
23
+ open=df['Open'],
24
+ high=df['High'],
25
+ low=df['Low'],
26
+ close=df['Close'])])
27
+ fig.show()
28
+
29
+ def historical_stock_prices(stockticker, days_ago):
30
+ """Upload accurate data to accurate dates from yahoo finance."""
31
+ ticker = yf.Ticker(stockticker)
32
+ end_date = datetime.now()
33
+ start_date = end_date - timedelta(days=days_ago)
34
+ start_date = start_date.strftime('%Y-%m-%d')
35
+ end_date = end_date.strftime('%Y-%m-%d')
36
+ historical_data = ticker.history(start=start_date, end=end_date)
37
+ return historical_data
38
+
39
+ def plot_macd2(df):
40
+ try:
41
+ # Debugging: Print the dataframe columns and a few rows
42
+ print("DataFrame columns:", df.columns)
43
+ print("DataFrame head:\n", df.head())
44
+
45
+ # Convert DataFrame index and columns to numpy arrays
46
+ index = df.index.to_numpy()
47
+ close_prices = df['Close'].to_numpy()
48
+ macd = df['MACD'].to_numpy()
49
+ signal_line = df['Signal_Line'].to_numpy()
50
+ macd_histogram = df['MACD_Histogram'].to_numpy()
51
+
52
+ fig, (ax1, ax2) = plt.subplots(2, 1, sharex=True, figsize=(10, 8), gridspec_kw={'height_ratios': [3, 1]})
53
+
54
+ # Subplot 1: Candlestick chart
55
+ ax1.plot(index, close_prices, label='Close', color='black')
56
+ ax1.set_title("Candlestick Chart")
57
+ ax1.set_ylabel("Price")
58
+ ax1.legend()
59
+
60
+ # Subplot 2: MACD
61
+ ax2.plot(index, macd, label='MACD', color='blue')
62
+ ax2.plot(index, signal_line, label='Signal Line', color='red')
63
+
64
+ histogram_colors = np.where(macd_histogram >= 0, 'green', 'red')
65
+ ax2.bar(index, macd_histogram, color=histogram_colors, alpha=0.6)
66
+
67
+ ax2.set_title("MACD")
68
+ ax2.set_ylabel("MACD Value")
69
+ ax2.legend()
70
+
71
+ plt.xlabel("Date")
72
+ plt.tight_layout()
73
+
74
+ return fig
75
+ except Exception as e:
76
+ print(f"Error in plot_macd: {e}")
77
+ return None
78
+
79
+ def plot_macd(df):
80
+
81
+ # Create Figure
82
+ fig = make_subplots(rows=2, cols=1, shared_xaxes=True, row_heights=[0.2, 0.1],
83
+ vertical_spacing=0.15, # Adjust vertical spacing between subplots
84
+ subplot_titles=("Candlestick Chart", "MACD")) # Add subplot titles
85
+
86
+
87
+ # Subplot 1: Plot candlestick chart
88
+ fig.add_trace(go.Candlestick(
89
+ x=df.index,
90
+ open=df['Open'],
91
+ high=df['High'],
92
+ low=df['Low'],
93
+ close=df['Close'],
94
+ increasing_line_color='#00cc96', # Green for increasing
95
+ decreasing_line_color='#ff3e3e', # Red for decreasing
96
+ showlegend=False
97
+ ), row=1, col=1) # Specify row and column indices
98
+
99
+
100
+ # Subplot 2: Plot MACD
101
+ fig.add_trace(
102
+ go.Scatter(
103
+ x=df.index,
104
+ y=df['MACD'],
105
+ mode='lines',
106
+ name='MACD',
107
+ line=dict(color='blue')
108
+ ),
109
+ row=2, col=1
110
+ )
111
+
112
+ fig.add_trace(
113
+ go.Scatter(
114
+ x=df.index,
115
+ y=df['Signal_Line'],
116
+ mode='lines',
117
+ name='Signal Line',
118
+ line=dict(color='red')
119
+ ),
120
+ row=2, col=1
121
+ )
122
+
123
+ # Plot MACD Histogram with different colors for positive and negative values
124
+ histogram_colors = ['green' if val >= 0 else 'red' for val in df['MACD_Histogram']]
125
+
126
+ fig.add_trace(
127
+ go.Bar(
128
+ x=df.index,
129
+ y=df['MACD_Histogram'],
130
+ name='MACD Histogram',
131
+ marker_color=histogram_colors
132
+ ),
133
+ row=2, col=1
134
+ )
135
+
136
+ # Update layout with zoom and pan tools enabled
137
+ layout = go.Layout(
138
+ title='MSFT Candlestick Chart and MACD Subplots',
139
+ title_font=dict(size=12), # Adjust title font size
140
+ plot_bgcolor='#f2f2f2', # Light gray background
141
+ height=600,
142
+ width=1200,
143
+ xaxis_rangeslider=dict(visible=True, thickness=0.03),
144
+ )
145
+
146
+ # Update the layout of the entire figure
147
+ fig.update_layout(layout)
148
+ fig.update_yaxes(fixedrange=False, row=1, col=1)
149
+ fig.update_yaxes(fixedrange=True, row=2, col=1)
150
+ fig.update_xaxes(type='category', row=1, col=1)
151
+ fig.update_xaxes(type='category', nticks=10, row=2, col=1)
152
+
153
+ fig.show()
154
+ #return fig
155
+
156
+ def calculate_MACD(df, fast_period=12, slow_period=26, signal_period=9):
157
+ """
158
+ Calculates the MACD (Moving Average Convergence Divergence) and related indicators.
159
+
160
+ Parameters:
161
+ df (DataFrame): A pandas DataFrame containing at least a 'Close' column with closing prices.
162
+ fast_period (int): The period for the fast EMA (default is 12).
163
+ slow_period (int): The period for the slow EMA (default is 26).
164
+ signal_period (int): The period for the signal line EMA (default is 9).
165
+
166
+ Returns:
167
+ DataFrame: A pandas DataFrame with the original data and added columns for MACD, Signal Line, and MACD Histogram.
168
+ """
169
+
170
+ df['EMA_fast'] = df['Close'].ewm(span=fast_period, adjust=False).mean()
171
+ df['EMA_slow'] = df['Close'].ewm(span=slow_period, adjust=False).mean()
172
+ df['MACD'] = df['EMA_fast'] - df['EMA_slow']
173
+
174
+ df['Signal_Line'] = df['MACD'].ewm(span=signal_period, adjust=False).mean()
175
+ df['MACD_Histogram'] = df['MACD'] - df['Signal_Line']
176
+
177
+ return df