adding files
Browse files- .DS_Store +0 -0
- Dockerfile +11 -0
- app.py +222 -0
- app_crypto_arima_model.py +72 -0
- app_crypto_rf_model.py +124 -0
- app_crypto_scrape.py +82 -0
- model_utils.py +50 -0
- scrape_utils.py +287 -0
- sentiment_analysis/__init__.py +0 -0
- sentiment_analysis/client.py +63 -0
- sentiment_analysis/requirements.txt +13 -0
- sentiment_analysis/sentiment_analysis_pipeline.py +151 -0
- tools/.DS_Store +0 -0
- tools/.chainlit/config.toml +84 -0
- tools/__pycache__/crypto_sentiment_analysis_util.cpython-311.pyc +0 -0
- tools/__pycache__/data_analyst.cpython-311.pyc +0 -0
- tools/crypto_sentiment_analysis_util.py +217 -0
- tools/data_analyst.py +42 -0
- tools/df_history.csv +63 -0
- tools/df_with_forecast.csv +113 -0
- tools/stock_sentiment_evalutor.py +261 -0
- utils.py +177 -0
.DS_Store
ADDED
Binary file (6.15 kB). View file
|
|
Dockerfile
ADDED
@@ -0,0 +1,11 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
FROM python:3.9
|
2 |
+
RUN useradd -m -u 1000 user
|
3 |
+
USER user
|
4 |
+
ENV HOME=/home/user \
|
5 |
+
PATH=/home/user/.local/bin:$PATH
|
6 |
+
WORKDIR $HOME/app
|
7 |
+
COPY --chown=user . $HOME/app
|
8 |
+
COPY ./requirements.txt ~/app/requirements.txt
|
9 |
+
RUN pip install -r requirements.txt
|
10 |
+
COPY . .
|
11 |
+
CMD ["streamlit", "run", "app.py", "--port", "7860"]
|
app.py
ADDED
@@ -0,0 +1,222 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from openai import OpenAI
|
2 |
+
from langchain_core.messages import BaseMessage, HumanMessage
|
3 |
+
from langchain_openai import ChatOpenAI
|
4 |
+
from typing import Annotated
|
5 |
+
import operator
|
6 |
+
from typing import Sequence, TypedDict
|
7 |
+
import numpy as np
|
8 |
+
import pandas as pd
|
9 |
+
from dotenv import load_dotenv
|
10 |
+
import os
|
11 |
+
from typing import Annotated
|
12 |
+
import operator
|
13 |
+
from typing import Sequence, TypedDict
|
14 |
+
import matplotlib.pyplot as plt
|
15 |
+
from langchain.schema.output_parser import StrOutputParser
|
16 |
+
from tools import data_analyst #forecasting_expert_arima, forecasting_expert_rf, evaluator, investment_advisor
|
17 |
+
from tools import crypto_sentiment_analysis_util
|
18 |
+
import app_crypto_rf_model as rf
|
19 |
+
import app_crypto_scrape as sa
|
20 |
+
import app_crypto_arima_model as arima
|
21 |
+
import streamlit as st
|
22 |
+
|
23 |
+
st.set_page_config(page_title="LangChain Agent", layout="wide")
|
24 |
+
load_dotenv()
|
25 |
+
OPENAI_API_KEY = os.environ["OPENAI_API_KEY"]
|
26 |
+
|
27 |
+
llm = ChatOpenAI(model="gpt-3.5-turbo")
|
28 |
+
|
29 |
+
#======================== AGENTS ==================================
|
30 |
+
# The agent state is the input to each node in the graph
|
31 |
+
class AgentState(TypedDict):
|
32 |
+
# The annotation tells the graph that new messages will always
|
33 |
+
# be added to the current states
|
34 |
+
messages: Annotated[Sequence[BaseMessage], operator.add]
|
35 |
+
# The 'next' field indicates where to route to next
|
36 |
+
next: str
|
37 |
+
|
38 |
+
tool=data_analyst.data_analyst_tools()
|
39 |
+
|
40 |
+
from langchain_core.runnables import RunnableConfig
|
41 |
+
st.title("💬 Krypto")
|
42 |
+
|
43 |
+
#@st.cache_data
|
44 |
+
|
45 |
+
#@st.cache_resource
|
46 |
+
#def initialize_session_state():
|
47 |
+
if "chat_history" not in st.session_state:
|
48 |
+
st.session_state["messages"] = [{"role":"system", "content":"""
|
49 |
+
You are a cryptocurrency investing expert. Answer all questions related to cryptocurrency investment reccommendations. Say I don't know if you don't know.
|
50 |
+
"""}]
|
51 |
+
|
52 |
+
#initialize_session_state()
|
53 |
+
|
54 |
+
# Streamlit UI elements
|
55 |
+
st.image('crypto_image.png')
|
56 |
+
#st.text("Start by entering the currency.")
|
57 |
+
|
58 |
+
sideb = st.sidebar
|
59 |
+
|
60 |
+
with st.sidebar:
|
61 |
+
#st.subheader("This is the LangGraph workflow visualization of this application rendered in real-time.")
|
62 |
+
#st.image(create_graph_image())
|
63 |
+
|
64 |
+
title = st.text_input("Start by entering the currency name:")
|
65 |
+
|
66 |
+
check1 = sideb.button(f"analyze {title}")
|
67 |
+
results=[]
|
68 |
+
|
69 |
+
if check1:
|
70 |
+
st.write(f"I am now producing analysis for {title}")
|
71 |
+
|
72 |
+
model = ChatOpenAI(temperature=0.7, api_key=OPENAI_API_KEY)
|
73 |
+
chain= model | StrOutputParser()
|
74 |
+
result=chain.invoke(f"You are a cryptocurrency data analyst.\
|
75 |
+
Provide correct cryptocurrency ticker from Coingecko website for cryptocurrency: {title}.\
|
76 |
+
Expected output: ticker.\
|
77 |
+
Provide it in the following format: >>cryptocurrencyticker>> \
|
78 |
+
for example: >>BTC>>")
|
79 |
+
|
80 |
+
print(result)
|
81 |
+
print('ticker',str(result).split(">>")[0])
|
82 |
+
if len(str(result).split(">>")[1])<10:
|
83 |
+
cryptocurrencyticker=(str(result).split(">>")[1])
|
84 |
+
else:
|
85 |
+
cryptocurrencyticker=(str(result).split(">>")[0])
|
86 |
+
cryptocurrency=title
|
87 |
+
|
88 |
+
print(cryptocurrency,cryptocurrencyticker)
|
89 |
+
print('here')
|
90 |
+
|
91 |
+
#================== Scrape Current/Historical Price ====================
|
92 |
+
df=sa.scrape_crypto(cryptocurrency,cryptocurrencyticker)
|
93 |
+
if len(df)>0:
|
94 |
+
print(df.tail)
|
95 |
+
print("Running forecasting models on historical prices")
|
96 |
+
df_with_forecast_rf, accuracy_rf, result_rf=rf.model_run(df)
|
97 |
+
|
98 |
+
df_with_forecast_arima, accuracy_arima, result_arima=arima.model_run(df)
|
99 |
+
print("done")
|
100 |
+
print(np.round(df['prices'][-1],2))
|
101 |
+
#--- for llm
|
102 |
+
if accuracy_rf<accuracy_arima:
|
103 |
+
forecasted_price=(np.round(np.array(df_with_forecast_arima['prices'])[-1]),2)
|
104 |
+
prompt = f"You are an investment recommendation expert for crypto currency {cryptocurrency}.You are selecting the predicted price from the ARIMA model because its accuracy (R2 measure:{(np.round(accuracy_arima,2))}) is higher than the accuracy (R2:{(np.round(accuracy_rf,2))}) for random forest model.Compare current price to the predicted price. If current price exceeds predicted price, recommend selling the stock, otherwise recommend buying. Tell the user what the current price, predicted price and accuracy values are. You know that the predicted price for tomorrow using random forest model is {(np.round(np.array(df_with_forecast_rf['prices'])[-1],2))}. The prediction accuracy for the random forest model is {(np.round(accuracy_rf,2))}. The current price of {cryptocurrency} is: {(np.round(df['prices'][-1],2))}. "
|
105 |
+
else:
|
106 |
+
forecasted_price=(np.round(np.array(df_with_forecast_rf['prices'])[-1],2))
|
107 |
+
prompt = f"You are an investment recommendation expert for crypto currency {cryptocurrency}. You are selecting the predicted price from the random forest model because its accuracy (R2 measure:{(np.round(accuracy_rf,2))}) is higher than the accuracy (R2:{(np.round(accuracy_arima,2))}) for arima model. Compare current price to the predicted price. If current price exceeds predicted price, recommend selling the stock, otherwise recommend buying. Tell the user what the current price, predicted price and accuracy values are. You know that the predicted price for tomorrow using random forest model is {(np.round(np.array(df_with_forecast_arima['prices'])[-1]),2)}. The prediction accuracy for the random forest model is {(np.round(accuracy_arima,2))}. The current price of {cryptocurrency} is: {(np.round(df['prices'][-1],2))}. "
|
108 |
+
|
109 |
+
#prompt=str(prompt)
|
110 |
+
inputs_reccommend = {"messages": [HumanMessage(content=prompt)]}
|
111 |
+
|
112 |
+
model = ChatOpenAI(temperature=0.7, api_key=OPENAI_API_KEY)
|
113 |
+
response=model.invoke(prompt)
|
114 |
+
response_content=response.content
|
115 |
+
st.chat_message("assistant").markdown((response_content))
|
116 |
+
st.session_state.messages.append({"role": "assistant", "content": prompt})
|
117 |
+
|
118 |
+
fig, ax = plt.subplots(1,2, figsize=(10, 3))
|
119 |
+
ax[0].plot(result_arima['prediction'], color='blue', marker='o')
|
120 |
+
ax[0].plot(result_arima['data'], color='orange', marker='o')
|
121 |
+
ax[0].set_title('ARIMA')
|
122 |
+
ax[1].plot(result_rf['prediction'], color='blue', marker='o')
|
123 |
+
ax[1].plot(result_rf['data'], color='orange', marker='o')
|
124 |
+
ax[1].set_title('RF')
|
125 |
+
fig.suptitle('Prediction vs Actuals')
|
126 |
+
plt.legend(['prediction','actuals'])
|
127 |
+
st.pyplot(fig)
|
128 |
+
# ========================== Sentiment analysis
|
129 |
+
#Perform sentiment analysis on the cryptocurrency news & predict dominant sentiment along with plotting the sentiment breakdown chart
|
130 |
+
# Downloading from reddit
|
131 |
+
|
132 |
+
# Downloading from alpaca
|
133 |
+
news_articles = crypto_sentiment_analysis_util.fetch_news(cryptocurrency)
|
134 |
+
reddit_news_articles=crypto_sentiment_analysis_util.fetch_reddit_news(cryptocurrency)
|
135 |
+
#os.system('scrapy crawl reddit -o crypto_reddit.txt')
|
136 |
+
|
137 |
+
|
138 |
+
#crypto_sentiment_analysis_util.fetch_reddit_news() #(f"cryptocurrency {cryptocurrency}")
|
139 |
+
analysis_results = []
|
140 |
+
|
141 |
+
#Perform sentiment analysis for each product review
|
142 |
+
for article in news_articles:
|
143 |
+
if cryptocurrency[0:6] in article['News_Article'].lower():
|
144 |
+
sentiment_analysis_result = crypto_sentiment_analysis_util.analyze_sentiment(article['News_Article'])
|
145 |
+
|
146 |
+
# Display sentiment analysis results
|
147 |
+
#print(f'News Article: {sentiment_analysis_result["News_Article"]} : Sentiment: {sentiment_analysis_result["Sentiment"]}', '\n')
|
148 |
+
|
149 |
+
result = {
|
150 |
+
'News_Article': sentiment_analysis_result["News_Article"],
|
151 |
+
'Sentiment': sentiment_analysis_result["Sentiment"][0]['label'],
|
152 |
+
'Index': sentiment_analysis_result["Sentiment"][0]['score']
|
153 |
+
}
|
154 |
+
|
155 |
+
analysis_results.append(result)
|
156 |
+
|
157 |
+
for article in reddit_news_articles:
|
158 |
+
if cryptocurrency[0:6] in article.lower():
|
159 |
+
sentiment_analysis_result_reddit = crypto_sentiment_analysis_util.analyze_sentiment(article)
|
160 |
+
|
161 |
+
# Display sentiment analysis results
|
162 |
+
#print(f'News Article: {sentiment_analysis_result_reddit["News_Article"]} : Sentiment: {sentiment_analysis_result_reddit["Sentiment"]}', '\n')
|
163 |
+
|
164 |
+
result = {
|
165 |
+
'News_Article': sentiment_analysis_result_reddit["News_Article"],
|
166 |
+
'Index':np.round(sentiment_analysis_result_reddit["Sentiment"][0]['score'],2)
|
167 |
+
}
|
168 |
+
analysis_results.append(result)
|
169 |
+
|
170 |
+
#Generate summarized message rationalize dominant sentiment
|
171 |
+
summary = crypto_sentiment_analysis_util.generate_summary_of_sentiment(analysis_results)
|
172 |
+
st.chat_message("assistant").write(str(summary))
|
173 |
+
st.session_state.messages.append({"role": "assistant", "content": summary})
|
174 |
+
#answers=np.append(res["messages"][-1].content,summary)
|
175 |
+
|
176 |
+
# Set OpenAI API key from Streamlit secrets
|
177 |
+
client = OpenAI(api_key=OPENAI_API_KEY)
|
178 |
+
|
179 |
+
# Set a default model
|
180 |
+
if "openai_model" not in st.session_state:
|
181 |
+
st.session_state["openai_model"] = "gpt-3.5-turbo"
|
182 |
+
|
183 |
+
#model = ChatOpenAI(temperature=0.7, api_key=OPENAI_API_KEY)
|
184 |
+
if prompt := st.chat_input("Some other questions?"):
|
185 |
+
# Add user message to chat history
|
186 |
+
st.session_state.messages.append({"role": "user", "content": prompt})
|
187 |
+
# Display user message in chat message container
|
188 |
+
with st.chat_message("user"):
|
189 |
+
st.markdown(prompt)
|
190 |
+
# Display assistant response in chat message container
|
191 |
+
with st.chat_message("assistant"):
|
192 |
+
stream = client.chat.completions.create(
|
193 |
+
model=st.session_state["openai_model"],
|
194 |
+
messages=[
|
195 |
+
{"role": m["role"], "content": m["content"]}
|
196 |
+
for m in st.session_state.messages
|
197 |
+
],
|
198 |
+
stream=True,
|
199 |
+
)
|
200 |
+
response = st.write_stream(stream)
|
201 |
+
st.session_state.messages.append({"role": "assistant", "content": response})
|
202 |
+
|
203 |
+
# Generate a new response if last message is not from assistant
|
204 |
+
# model = ChatOpenAI(temperature=0.7, api_key=OPENAI_API_KEY)
|
205 |
+
# input_text = st.text_area(f"Enter your further questions for {title}:")
|
206 |
+
# if st.session_state.messages[-1]["role"] != "assistant":
|
207 |
+
# with st.chat_message("assistant"):
|
208 |
+
# with st.spinner("Thinking..."):
|
209 |
+
# response = model.invoke(input_text)
|
210 |
+
# st.markdown(response.content)
|
211 |
+
# message = {"role": "assistant", "content": response.content}
|
212 |
+
# st.session_state.messages.append(message)
|
213 |
+
|
214 |
+
# model = ChatOpenAI(temperature=0.7, api_key=OPENAI_API_KEY)
|
215 |
+
# input_text = st.text_area(f"Enter your further questions for {title}:")
|
216 |
+
# if st.button(f"answer"):
|
217 |
+
# #inputs = {"messages": [HumanMessage(content=input_text)]}
|
218 |
+
# response=model.invoke(input_text)
|
219 |
+
# response_content=response.content
|
220 |
+
# print(response_content)
|
221 |
+
# st.chat_message("assistant").write(str(response_content))
|
222 |
+
# st.session_state.messages.append({"role": "assistant", "content": response_content})
|
app_crypto_arima_model.py
ADDED
@@ -0,0 +1,72 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from datetime import datetime, timedelta
|
2 |
+
import pandas as pd
|
3 |
+
import numpy as np
|
4 |
+
|
5 |
+
import model_utils as mu
|
6 |
+
from statsmodels.tsa.arima.model import ARIMA
|
7 |
+
|
8 |
+
def model_run(df_all):
|
9 |
+
""" Prediciton function that runs ARIMA model and predicts tomorrow cryptocurrency price.
|
10 |
+
Useful for forecasting a variable using ARIMA model.
|
11 |
+
Use historical 'prices' and get prediction.
|
12 |
+
Give prediction output to the client.
|
13 |
+
"""
|
14 |
+
first_day_future=pd.to_datetime(datetime.now()+timedelta(days=1))
|
15 |
+
#----------------------------------------- DATASET MANIPULATION FOR SUPERVISED LEARNING --------------------------------------------
|
16 |
+
reframed_lags, df_final=mu.data_transform(df_all, first_day_future)
|
17 |
+
print(f'I have transformed the dataset into the frame for supervised learning')
|
18 |
+
|
19 |
+
df=reframed_lags[['prices','total_vol','GSPC','Day','Month', 'TNX', 'Employment', 'google_trend','EURUSD']]
|
20 |
+
date=pd.to_datetime(dict(year=reframed_lags['Year'], month=reframed_lags['Month'], day=reframed_lags['Day']))
|
21 |
+
df_with_date=pd.concat([date,df],axis=1)
|
22 |
+
df_with_date.columns=np.append('date',df.columns)
|
23 |
+
df_with_date.set_index('date',inplace=True)
|
24 |
+
df_with_date=df_with_date.dropna()
|
25 |
+
df_past=df_with_date.iloc[:-1,:]
|
26 |
+
df_future=df_with_date.iloc[-1:,:]
|
27 |
+
model = ARIMA(df_past['prices'],exog=df_past.drop(columns='prices'), order=(2,1,2))
|
28 |
+
model_fit = model.fit()
|
29 |
+
|
30 |
+
# Make predictions
|
31 |
+
predictions = model_fit.forecast(steps=1,exog=df_future.drop(columns='prices'))
|
32 |
+
|
33 |
+
#Add forecast to df_with_date
|
34 |
+
df_with_forecast=reframed_lags.copy()
|
35 |
+
df_with_forecast.loc[df_with_forecast.index==df_with_forecast.index[-1],'prices']=predictions[-1:].values[0]
|
36 |
+
|
37 |
+
#----------------------------------- MODEL ACCURACY
|
38 |
+
#Calculate accuracy after transformation!!!
|
39 |
+
#get rid of values below 0.01 which skew the accuracy measure if in denominator
|
40 |
+
|
41 |
+
#Rolling window accuracy measure
|
42 |
+
if len(reframed_lags)>500:
|
43 |
+
train_size=0.9
|
44 |
+
elif len(reframed_lags)>200:
|
45 |
+
train_size=0.8
|
46 |
+
else:
|
47 |
+
train_size=0.7
|
48 |
+
predictions=[]
|
49 |
+
test_labels_all=[]
|
50 |
+
data_arima=df_with_date
|
51 |
+
window_length=int((len(data_arima)-len(data_arima)*train_size))
|
52 |
+
for i in range(0,window_length):
|
53 |
+
train_accuracy=data_arima.iloc[0:int(len(data_arima)*train_size)+i,:]
|
54 |
+
test_accuracy=data_arima.iloc[len(train_accuracy):len(train_accuracy)+1,:]
|
55 |
+
train_features_accuracy=train_accuracy.drop(columns='prices')
|
56 |
+
test_features_accuracy=test_accuracy.drop(columns='prices')
|
57 |
+
train_labels_accuracy=train_accuracy['prices']
|
58 |
+
test_labels_accuracy=test_accuracy['prices']
|
59 |
+
|
60 |
+
arima = ARIMA(train_labels_accuracy,exog=train_features_accuracy, order=(2,1,2)) #RandomForestRegressor(n_estimators= 1000)
|
61 |
+
arima_fit=arima.fit() #train_features_accuracy, train_labels_accuracy)
|
62 |
+
prediction_arima = arima_fit.forecast(steps=1,exog=test_features_accuracy) #predict(test_features_accuracy)
|
63 |
+
predictions=np.append(predictions,prediction_arima)
|
64 |
+
test_labels_all=np.append(test_labels_all,test_labels_accuracy)
|
65 |
+
|
66 |
+
|
67 |
+
#Calculate accuracy
|
68 |
+
from sklearn.metrics import r2_score
|
69 |
+
accuracy=r2_score(predictions,test_labels_all)
|
70 |
+
result_arima=pd.DataFrame({'prediction':predictions,'data':test_labels_all})
|
71 |
+
result_arima.to_csv('result_arima.csv')
|
72 |
+
return df_with_forecast, accuracy, result_arima
|
app_crypto_rf_model.py
ADDED
@@ -0,0 +1,124 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from datetime import datetime, timedelta
|
2 |
+
import pandas as pd
|
3 |
+
import numpy as np
|
4 |
+
|
5 |
+
from sklearn.ensemble import RandomForestRegressor
|
6 |
+
from sklearn.metrics import mean_squared_error
|
7 |
+
from math import sqrt
|
8 |
+
from sklearn.preprocessing import MinMaxScaler
|
9 |
+
import model_utils as mu
|
10 |
+
|
11 |
+
def model_run(df_all):
|
12 |
+
""" Prediciton function that runs random forest model and predicts tomorrow cryptocurrency price"""
|
13 |
+
|
14 |
+
first_day_future=pd.to_datetime(datetime.now()+timedelta(days=1))
|
15 |
+
#----------------------------------------- DATASET MANIPULATION FOR SUPERVISED LEARNING --------------------------------------------
|
16 |
+
reframed_lags, df_final=mu.data_transform(df_all, first_day_future)
|
17 |
+
print(f'I have transformed the dataset into the frame for supervised learning')
|
18 |
+
reframed_lags.to_csv('reframed_lags.csv')
|
19 |
+
#----------------------------------------- TRAIN/TEST SPLIT ------------------------------------------------------
|
20 |
+
""" Randomly split a chunk into train test based on train/test ratio (0.8) and split the other chunks for all the other currencies in the same fashion"""
|
21 |
+
import random
|
22 |
+
train_size=0.8
|
23 |
+
|
24 |
+
df_cut1=reframed_lags.reset_index().iloc[:,1:]
|
25 |
+
print('tady')
|
26 |
+
train_value=int(len(df_cut1)*train_size)
|
27 |
+
first_random=random.sample(range(len(df_cut1)-1), train_value)
|
28 |
+
train_bulk=np.sort(first_random) #make sure all the consequent ones have the same random numbers
|
29 |
+
|
30 |
+
df_cut=reframed_lags.reset_index()
|
31 |
+
train_sample=df_cut.loc[df_cut['index'].isin(train_bulk)]
|
32 |
+
test_sample=df_cut.loc[~df_cut['index'].isin(train_bulk)]
|
33 |
+
|
34 |
+
test=test_sample.iloc[:,1:]
|
35 |
+
train=train_sample.iloc[:,1:]
|
36 |
+
print(f'I have split the dataset into training and testing samples')
|
37 |
+
|
38 |
+
#----------------------------------- Re-Scale for supervised learning
|
39 |
+
# TRAIN RESCALE
|
40 |
+
# normalize features for the supervised learning (0,1)
|
41 |
+
scaler_train = MinMaxScaler(feature_range=(0, 1))
|
42 |
+
scaled = scaler_train.fit_transform(train.values.astype('float32'))
|
43 |
+
df_train=pd.DataFrame(scaled)
|
44 |
+
df_train.columns=train.columns #rename columns
|
45 |
+
|
46 |
+
# TEST RESCALE
|
47 |
+
scaler_test = MinMaxScaler(feature_range=(0, 1))
|
48 |
+
scaled = scaler_test.fit_transform(test.values.astype('float32'))
|
49 |
+
df_test=pd.DataFrame(scaled)
|
50 |
+
df_test.columns=test.columns #rename columns
|
51 |
+
|
52 |
+
#----------------------------------- MODEL
|
53 |
+
|
54 |
+
#define features
|
55 |
+
train_features=df_train.values
|
56 |
+
test_features=df_test.values
|
57 |
+
#define labels
|
58 |
+
train_labels = df_train['prices'].values
|
59 |
+
test_labels = df_test['prices'].values
|
60 |
+
|
61 |
+
#define baseline prediction (as last values) for evaluating prediction accuracy
|
62 |
+
baseline_preds = pd.DataFrame(test_features).iloc[:,0]
|
63 |
+
# Calculate errors for the baseline prediction
|
64 |
+
baseline_errors = abs(baseline_preds - test_labels)
|
65 |
+
|
66 |
+
# Import the model we are using
|
67 |
+
from sklearn.ensemble import RandomForestRegressor
|
68 |
+
# Instantiate model with 1000 decision trees
|
69 |
+
rf = RandomForestRegressor(n_estimators= 1000)
|
70 |
+
rf.fit(train_features, train_labels)
|
71 |
+
prediction_rf = rf.predict(test_features)
|
72 |
+
predictions=prediction_rf
|
73 |
+
|
74 |
+
#----------------------------------- MODEL OUTPUT TRANSFORMATION
|
75 |
+
#Convert test column
|
76 |
+
df_test['prices']=predictions
|
77 |
+
prediction_transformed=pd.DataFrame(scaler_test.inverse_transform(df_test.values.astype('float')))
|
78 |
+
prediction_transformed.columns=test.columns
|
79 |
+
|
80 |
+
#Convert prediction
|
81 |
+
df_test.loc[df_test.index==(len(df_test)-1),'prices']=predictions[-1:][0]
|
82 |
+
inv_transformed=pd.DataFrame(scaler_test.inverse_transform(df_test.values.astype('float')))
|
83 |
+
inv_transformed.columns=test.columns
|
84 |
+
|
85 |
+
# data with forecast
|
86 |
+
df_with_forecast=df_final.copy()
|
87 |
+
df_with_forecast.loc[df_with_forecast.index==df_with_forecast.index[-1],'prices']=inv_transformed['prices'][-1:].values[0]
|
88 |
+
print('Final result')
|
89 |
+
print(df_with_forecast)
|
90 |
+
|
91 |
+
#----------------------------------- MODEL ACCURACY
|
92 |
+
#Calculate accuracy after transformation!!!
|
93 |
+
#get rid of values below 0.01 which skew the accuracy measure if in denominator
|
94 |
+
|
95 |
+
#Rolling window accuracy measure
|
96 |
+
if len(reframed_lags)>500:
|
97 |
+
train_size=0.9
|
98 |
+
elif len(reframed_lags)>200:
|
99 |
+
train_size=0.8
|
100 |
+
else:
|
101 |
+
train_size=0.7
|
102 |
+
predictions=[]
|
103 |
+
test_labels_all=[]
|
104 |
+
window_length=int((len(reframed_lags)-len(reframed_lags)*train_size))
|
105 |
+
for i in range(0,window_length):
|
106 |
+
train_accuracy=reframed_lags.iloc[0:int(len(reframed_lags)*train_size)+i,:]
|
107 |
+
test_accuracy=reframed_lags.iloc[len(train_accuracy):len(train_accuracy)+1,:]
|
108 |
+
train_features_accuracy=train_accuracy.drop(columns='prices')
|
109 |
+
test_features_accuracy=test_accuracy.drop(columns='prices')
|
110 |
+
train_labels_accuracy=train_accuracy['prices']
|
111 |
+
test_labels_accuracy=test_accuracy['prices']
|
112 |
+
|
113 |
+
rf = RandomForestRegressor(n_estimators= 1000)
|
114 |
+
rf.fit(train_features_accuracy, train_labels_accuracy)
|
115 |
+
prediction_rf = rf.predict(test_features_accuracy)
|
116 |
+
predictions=np.append(predictions,prediction_rf)
|
117 |
+
test_labels_all=np.append(test_labels_all,test_labels_accuracy)
|
118 |
+
|
119 |
+
#Calculate accuracy
|
120 |
+
from sklearn.metrics import r2_score
|
121 |
+
accuracy=r2_score(predictions,test_labels_all)
|
122 |
+
result_rf=pd.DataFrame({'prediction':predictions,'data':test_labels_all})
|
123 |
+
result_rf.to_csv('result_rf.csv')
|
124 |
+
return df_with_forecast, accuracy, result_rf
|
app_crypto_scrape.py
ADDED
@@ -0,0 +1,82 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
|
2 |
+
#Import packages
|
3 |
+
import pandas as pd
|
4 |
+
import numpy as np
|
5 |
+
from pycoingecko import CoinGeckoAPI
|
6 |
+
cg = CoinGeckoAPI()
|
7 |
+
import pandas as pd
|
8 |
+
import numpy as np
|
9 |
+
from pytrends.request import TrendReq
|
10 |
+
pytrends = TrendReq(hl='en-US')
|
11 |
+
import scrape_utils as su
|
12 |
+
|
13 |
+
|
14 |
+
def scrape_crypto(currency, ticker):
|
15 |
+
|
16 |
+
# 1. Scrape historical Price and Volume currency data
|
17 |
+
from datetime import date
|
18 |
+
today = date.today()
|
19 |
+
Day_end = today.strftime("%d")
|
20 |
+
Month_end = today.strftime("%m")
|
21 |
+
Year_end = today.strftime("%Y")
|
22 |
+
|
23 |
+
from datetime import date
|
24 |
+
from datetime import timedelta
|
25 |
+
|
26 |
+
past=today-timedelta(days=200)
|
27 |
+
Day_start = past.strftime("%d")
|
28 |
+
Month_start = past.strftime("%m")
|
29 |
+
Year_start = past.strftime("%Y")
|
30 |
+
|
31 |
+
date_start=[Year_start,Month_start,Day_start]
|
32 |
+
date_end=[Year_end,Month_end,Day_end]
|
33 |
+
|
34 |
+
import datetime
|
35 |
+
df_ts_coins=su.scrape_historical_series([currency],date_start,date_end)
|
36 |
+
if len(df_ts_coins)>0:
|
37 |
+
df_ts_coins['date']=[(datetime.datetime.fromtimestamp(int(i))).strftime('%Y-%m-%d %H:%M:%S') for i in df_ts_coins['date']]
|
38 |
+
|
39 |
+
# # Add current data
|
40 |
+
current_data=cg.get_price(ids=currency, vs_currencies='usd',include_market_cap='true', include_24hr_vol='true',include_last_updated_at='true')
|
41 |
+
|
42 |
+
prices=pd.DataFrame(current_data).T['usd'].values[0]
|
43 |
+
market_caps=pd.DataFrame(current_data).T['usd_market_cap'].values[0]
|
44 |
+
total_vol=pd.DataFrame(current_data).T['usd_24h_vol'].values[0]
|
45 |
+
df_today_row=pd.DataFrame({0:['id','date','prices','market_caps','total_vol'],1:[currency,today.strftime('%Y-%m-%d %H:%M:%S'),prices,market_caps,total_vol]}).T
|
46 |
+
df_today_row.columns=df_today_row.iloc[0,:]
|
47 |
+
df_today_row=df_today_row.drop(0)
|
48 |
+
|
49 |
+
df_ts_coins=pd.concat([df_ts_coins,df_today_row],axis=0)
|
50 |
+
df_ts_coins.set_index('date',inplace=True)
|
51 |
+
df_ts_coins.index=pd.to_datetime(df_ts_coins.index).strftime("%Y-%m-%d %H:%M:%S")
|
52 |
+
|
53 |
+
# 2. Scrape macro
|
54 |
+
df_cli=su.scrape_cli(past,today)
|
55 |
+
df_cpi=su.scrape_cpi_employment()
|
56 |
+
print(f'I have scraped CLI and L, CPI')
|
57 |
+
|
58 |
+
# 3. Scrape google-trends
|
59 |
+
google_data=su.scrape_google_trends(currency,ticker)
|
60 |
+
print(f'Google trend dataset')
|
61 |
+
|
62 |
+
# 4. Scrape Yahoo-Finance
|
63 |
+
df_finance=su.scrape_stocks(past,today)
|
64 |
+
print(f'yahoo dataset. I am done scraping !!!!!!!')
|
65 |
+
|
66 |
+
#==== 5. CONCAT DATAFRAMES TOGETHER
|
67 |
+
df_ts_coins.index=pd.to_datetime(df_ts_coins.index).strftime("%Y-%m-%d")
|
68 |
+
df_cli.index=pd.to_datetime(df_cli.index).strftime("%Y-%m-%d")
|
69 |
+
if len(df_cpi)>0:
|
70 |
+
df_cpi.index=pd.to_datetime(df_cpi.index).strftime("%Y-%m-%d")
|
71 |
+
else:
|
72 |
+
print('MISSING CPI')
|
73 |
+
df_cpi=pd.DataFrame({'CPI':np.repeat(0,len(df_cli)),'Employment':np.repeat(0,len(df_cli))})
|
74 |
+
df_cpi.index=df_cli.index
|
75 |
+
google_data.index=pd.to_datetime(google_data.index).strftime("%Y-%m-%d")
|
76 |
+
df_finance.index=pd.to_datetime(df_finance.index).strftime("%Y-%m-%d")
|
77 |
+
df_all=pd.concat([df_ts_coins,df_cli,df_cpi,google_data,df_finance],axis=1)
|
78 |
+
df_all=df_all.sort_index()
|
79 |
+
else:
|
80 |
+
print('No data available.')
|
81 |
+
df_all=pd.DataFrame()
|
82 |
+
return df_all
|
model_utils.py
ADDED
@@ -0,0 +1,50 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import pandas as pd
|
2 |
+
import numpy as np
|
3 |
+
|
4 |
+
def data_transform(df_all,first_day_future):
|
5 |
+
df_all['CPI']=df_all['CPI'].astype('float')
|
6 |
+
df_all['Employment']=df_all['Employment'].astype('float')
|
7 |
+
|
8 |
+
# Add future row and Shift X columns
|
9 |
+
df_future_row=pd.DataFrame({0: df_all.reset_index().columns,1:df_all.reset_index().iloc[-1,:]}).T
|
10 |
+
|
11 |
+
df_future_row.columns=df_future_row.iloc[0,:]
|
12 |
+
|
13 |
+
df_future_row=df_future_row.drop(0).drop(columns=['index'])
|
14 |
+
df_future_row.insert(0,'',pd.to_datetime(first_day_future).strftime("%Y-%m-%d"))
|
15 |
+
df_future_row.set_index(df_future_row.iloc[:,0],inplace=True)
|
16 |
+
df_future_row=df_future_row.drop(columns='')
|
17 |
+
if 'level_0' in df_future_row.columns:
|
18 |
+
df_future_row=df_future_row.drop(columns='level_0')
|
19 |
+
if 'index' in df_all.columns:
|
20 |
+
df_all=df_all.drop(columns='index')
|
21 |
+
|
22 |
+
df_with_future=pd.concat([df_all,df_future_row],axis=0)
|
23 |
+
df_with_future.index.names=['date']
|
24 |
+
df_with_future.index=pd.to_datetime(df_with_future.index).strftime("%Y-%m-%d")
|
25 |
+
df_with_future=df_with_future.shift()
|
26 |
+
df_final=df_with_future.interpolate(method='linear',limit_direction='both',
|
27 |
+
limit=100).bfill().ffill()
|
28 |
+
df_final['name']=np.repeat(df_final['id'].iloc[:,0].dropna()[0:1][0],len(df_final))
|
29 |
+
df_final=df_final.drop(columns='id')
|
30 |
+
#Data transformation coin_dummy, time_variables, shift X, iso_week
|
31 |
+
df_final['name_no']=pd.get_dummies(df_final['name'],dtype='int')
|
32 |
+
df_final.index=pd.to_datetime(df_final.index, utc=True)
|
33 |
+
df_final['Day']=df_final.index.day
|
34 |
+
df_final['Month']=df_final.index.month
|
35 |
+
df_final['Year']=df_final.index.year
|
36 |
+
seasonal_dummy=pd.get_dummies(df_final.index.day,dtype='int')
|
37 |
+
seasonal_dummy.index=df_final.index
|
38 |
+
seasonal_dummy.columns=[str('day_'+str(value)) for value in seasonal_dummy.columns]
|
39 |
+
reframed=pd.concat([df_final,seasonal_dummy],axis=1).drop(columns='name')
|
40 |
+
print(reframed.iloc[-5:,:])
|
41 |
+
reframed=reframed.reset_index().drop(columns=['date'])
|
42 |
+
reframed_lags=reframed.copy()
|
43 |
+
reframed_lags['lag1'] = reframed_lags['prices'].iloc[-1]
|
44 |
+
reframed_lags['lag2'] = reframed_lags['prices'].iloc[-2]
|
45 |
+
|
46 |
+
# Use the last observed values for lag features
|
47 |
+
for i in range(1, len(reframed_lags)):
|
48 |
+
reframed_lags.loc[reframed_lags.index[i], 'lag1'] = reframed_lags.loc[reframed_lags.index[i-1], 'prices'] if 'prices' in reframed_lags.columns else reframed_lags.loc[reframed_lags.index[i-1], 'lag1']
|
49 |
+
reframed_lags.loc[reframed_lags.index[i], 'lag2'] = reframed_lags.loc[reframed_lags.index[i-1], 'lag1']
|
50 |
+
return reframed_lags, df_final
|
scrape_utils.py
ADDED
@@ -0,0 +1,287 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Import libraries
|
2 |
+
#Import packages
|
3 |
+
import pandas as pd
|
4 |
+
import numpy as np
|
5 |
+
import time
|
6 |
+
import datetime
|
7 |
+
from pycoingecko import CoinGeckoAPI
|
8 |
+
#from utils import slice
|
9 |
+
# Get API for CoinGecko
|
10 |
+
#cg = CoinGeckoAPI()
|
11 |
+
from dotenv import load_dotenv
|
12 |
+
|
13 |
+
from bs4 import BeautifulSoup
|
14 |
+
import requests
|
15 |
+
from pytrends.request import TrendReq
|
16 |
+
pytrends = TrendReq(hl='en-US')
|
17 |
+
from pytrends import dailydata
|
18 |
+
import yfinance as yf
|
19 |
+
import json
|
20 |
+
import prettytable
|
21 |
+
import os
|
22 |
+
load_dotenv()
|
23 |
+
|
24 |
+
COINGECKO_API_KEY=os.environ["COINGECKO_API_KEY"]
|
25 |
+
# Historical crypto data
|
26 |
+
def scrape_historical_series(coin_name,date_start,date_end):
|
27 |
+
import datetime
|
28 |
+
""" Scrape historical series on the sample of coins.
|
29 |
+
|
30 |
+
Args:
|
31 |
+
coin_names(list): List of coins we will use for training.
|
32 |
+
date_start(list): List of values for Year_start,Month_start,Day_start.
|
33 |
+
date_end(list): List of values for Year_end,Month_end,Day_end.
|
34 |
+
Returns:
|
35 |
+
Dataframe with the evolution of prices, market capitalizaiton, and total volume over time, for each respective currency.
|
36 |
+
"""
|
37 |
+
|
38 |
+
df_ts_coins1=pd.DataFrame()
|
39 |
+
|
40 |
+
#DATE definitions
|
41 |
+
|
42 |
+
date_time = datetime.datetime(int(date_start[0]),int(date_start[1]),int(date_start[2]))
|
43 |
+
date_time_now = datetime.datetime(int(date_end[0]),int(date_end[1]),int(date_end[2]))
|
44 |
+
unix_past=time.mktime(date_time.timetuple()) #change the date format into unix for scraping
|
45 |
+
unix_now=time.mktime(date_time_now.timetuple())
|
46 |
+
past=datetime.datetime(int(date_start[0]),int(date_start[1]),int(date_start[2])).strftime('%Y-%m-%d')
|
47 |
+
now=datetime.datetime(int(date_end[0]),int(date_end[1]),int(date_end[2])).strftime('%Y-%m-%d')
|
48 |
+
datum_range=pd.date_range(start=past,end=now, freq='D')
|
49 |
+
|
50 |
+
#empty lists
|
51 |
+
unix_all=[]
|
52 |
+
coins_names=[]
|
53 |
+
|
54 |
+
#create date variable
|
55 |
+
for val in datum_range:
|
56 |
+
unix_all=np.append(unix_all,time.mktime(val.timetuple()))
|
57 |
+
|
58 |
+
for coin in pd.unique(coin_name):
|
59 |
+
time.sleep(5)
|
60 |
+
url = f"https://api.coingecko.com/api/v3/coins/{coin.lower()}/market_chart/range?vs_currency=usd&from={unix_past}&to={unix_now}"
|
61 |
+
|
62 |
+
headers = {
|
63 |
+
"accept": "application/json",
|
64 |
+
"x-cg-demo-api-key": COINGECKO_API_KEY
|
65 |
+
}
|
66 |
+
|
67 |
+
response = requests.get(url, headers=headers)
|
68 |
+
data=response.json()
|
69 |
+
#data=cg.get_coin_market_chart_range_by_id(id=coin.lower(),vs_currency='usd',include_market_cap='true', include_24hr_vol='true', from_timestamp=unix_past,to_timestamp=unix_now)
|
70 |
+
if len(data)>0:
|
71 |
+
prices=pd.DataFrame(data['prices'],columns=['date','prices'])
|
72 |
+
market=pd.DataFrame(data['market_caps'],columns=['date','market_caps'])
|
73 |
+
volume=pd.DataFrame(data['total_volumes'],columns=['date','total_vol'])
|
74 |
+
ts_coins_cut=pd.concat([prices,market.iloc[:,1],volume.iloc[:,1]],axis=1)
|
75 |
+
|
76 |
+
#create id variable for each coin
|
77 |
+
coinn=np.repeat(coin,len(ts_coins_cut))
|
78 |
+
coins_names=np.append(coins_names,coinn)
|
79 |
+
|
80 |
+
#make daily data from hourly
|
81 |
+
ts_coins_cut['id']=coinn
|
82 |
+
date_all=[]
|
83 |
+
|
84 |
+
#create date variable
|
85 |
+
import datetime
|
86 |
+
for val in ts_coins_cut['date']:
|
87 |
+
date_all=np.append(date_all,((datetime.datetime.fromtimestamp(int(val)/1000)).strftime('%m/%d/%y, %H:%M:%S')))
|
88 |
+
dates=pd.to_datetime(date_all, format='%m/%d/%y, %H:%M:%S')
|
89 |
+
|
90 |
+
#set date as an index to aggreggate hourly data into daily
|
91 |
+
ts_coins_cut['dates']=dates
|
92 |
+
ts_coins_cut=ts_coins_cut.set_index('dates')
|
93 |
+
prices=ts_coins_cut.pop('prices')
|
94 |
+
ts_coins_cut=ts_coins_cut.groupby([pd.Grouper(freq='D'), 'id']).mean()
|
95 |
+
prices1=prices.groupby([pd.Grouper(freq='D')]).mean()
|
96 |
+
#after you aggreggated data change the index back
|
97 |
+
prices1=prices1.reset_index()
|
98 |
+
ts_coins_cut.reset_index(inplace=True)
|
99 |
+
|
100 |
+
ts_coins_cut.insert(2,'prices',prices1.iloc[:,1])
|
101 |
+
#move the date column to different position
|
102 |
+
ts_coins_cut=ts_coins_cut.drop(columns=['date'])
|
103 |
+
ts_coins_cut.insert(2,'date',unix_all[0:len(ts_coins_cut)])
|
104 |
+
df_ts_coins1=pd.concat([df_ts_coins1,ts_coins_cut]) #concat the chunk with the selected variables across all currencies
|
105 |
+
|
106 |
+
|
107 |
+
else:
|
108 |
+
df_ts_coins1=pd.DataFrame()
|
109 |
+
df_ts_coins1=df_ts_coins1.drop(columns=['dates'])
|
110 |
+
return df_ts_coins1
|
111 |
+
|
112 |
+
# 2. Macro variables, CLI
|
113 |
+
|
114 |
+
def scrape_cli(past,today):
|
115 |
+
"""Scrape data on leading indicator for USA.
|
116 |
+
Args:
|
117 |
+
past(date): Date for which you want to start scraping.
|
118 |
+
today(date): Date for which you want to end scraping.
|
119 |
+
Returns:
|
120 |
+
Dataframe with CLI and dates.
|
121 |
+
"""
|
122 |
+
countries=['USA'] #,'OECDE','OECD','NMEC']
|
123 |
+
past_date=past.strftime('%Y-%m')
|
124 |
+
today_date=today.strftime('%Y-%m')
|
125 |
+
clis=[]
|
126 |
+
bclis=[]
|
127 |
+
names=[]
|
128 |
+
datas_country=pd.DataFrame()
|
129 |
+
datas1=pd.DataFrame()
|
130 |
+
types=['CLI'] #,'BCLI']
|
131 |
+
for type in types:
|
132 |
+
print(type)
|
133 |
+
'''Scrape OECD data and create dataset in the form of time series where variables are CLI and BCLI for each country'''
|
134 |
+
for country in countries:
|
135 |
+
# Scrape data
|
136 |
+
# if type=='BCLI':
|
137 |
+
# mainpage=requests.get(f'https://stats.oecd.org/restsdmx/sdmx.ashx/GetData/MEI_CLI/BSCICP03.{country}.M/all?startTime={past}&endTime={today}')
|
138 |
+
if type=='CLI':
|
139 |
+
mainpage=requests.get(f'https://stats.oecd.org/restsdmx/sdmx.ashx/GetData/MEI_CLI/CSCICP03.{country}.M/all?startTime={past_date}&endTime={today_date}')
|
140 |
+
soup=BeautifulSoup(mainpage.content,'xml') #'html.parser')
|
141 |
+
whatis=soup.find_all("ObsValue")
|
142 |
+
whatis_key=soup.find_all("ObsKey")
|
143 |
+
country=([(str(whatis_key[i]).split('"REF_AREA" value="')[1][:3]) for i in range(len(whatis))])
|
144 |
+
dates=[pd.to_datetime(str(whatis_key[i]).split('"TIME_PERIOD" value="')[1][:7]) for i in range(len(whatis))]
|
145 |
+
measure=[(str(whatis_key[i]).split('"MEASURE" value="')[1][:7][:-2]) for i in range(len(whatis))]
|
146 |
+
values=[float(str(whatis[i]).split('value="')[1][0:-4]) for i in range(len(whatis))]
|
147 |
+
df_cli=pd.DataFrame({'date':dates,'country':country,'measure':measure,type:values})
|
148 |
+
df_cli.index=pd.to_datetime(df_cli['date'])
|
149 |
+
df_cli=df_cli.loc[df_cli['country']=='USA']['CLI'].astype('float').resample('M').mean()
|
150 |
+
return df_cli
|
151 |
+
|
152 |
+
def scrape_cpi_employment():
|
153 |
+
"""Scrape CPI and employment data."""
|
154 |
+
|
155 |
+
headers = {'Content-type': 'application/json'}
|
156 |
+
variables=['CUUR0000SA0','LNS12000000']
|
157 |
+
data = json.dumps({"seriesid": variables,"startyear":"2024", "endyear":"2024"})
|
158 |
+
p = requests.post('https://api.bls.gov/publicAPI/v2/timeseries/data/', data=data, headers=headers)
|
159 |
+
json_data = json.loads(p.text)
|
160 |
+
year_all=[]
|
161 |
+
period_all=[]
|
162 |
+
value_all=[]
|
163 |
+
series_id=[]
|
164 |
+
if len(json_data['Results'])>0:
|
165 |
+
for series in json_data['Results']['series']:
|
166 |
+
x=prettytable.PrettyTable(["series id","year","period","value","footnotes"])
|
167 |
+
seriesId = series['seriesID']
|
168 |
+
for item in series['data']:
|
169 |
+
year = item['year']
|
170 |
+
period = item['period']
|
171 |
+
value = item['value']
|
172 |
+
footnotes=""
|
173 |
+
for footnote in item['footnotes']:
|
174 |
+
if footnote:
|
175 |
+
footnotes = footnotes + footnote['text'] + ','
|
176 |
+
if 'M01' <= period <= 'M12':
|
177 |
+
x.add_row([seriesId,year,period,value,footnotes[0:-1]])
|
178 |
+
year_all=np.append(year_all,year)
|
179 |
+
period_all=np.append(period_all,period)
|
180 |
+
value_all=np.append(value_all,value)
|
181 |
+
if seriesId=='CUUR0000SA0':
|
182 |
+
series_id=np.append(series_id,'CPI')
|
183 |
+
if seriesId=='LNS12000000':
|
184 |
+
series_id=np.append(series_id,'Employment')
|
185 |
+
|
186 |
+
|
187 |
+
date=[(pd.to_datetime(f"{year_all[i]}'-'{int(period_all[i][-2:])}")) for i in range(len(year_all))]
|
188 |
+
df_cpi=pd.DataFrame({'date':date,'value':value_all})
|
189 |
+
df_cpi['series_id']=series_id
|
190 |
+
df_cpi.set_index('date',inplace=True)
|
191 |
+
df_cpi=pd.concat([df_cpi.loc[df_cpi['series_id']=='CPI'],df_cpi.loc[df_cpi['series_id']=='Employment']],axis=1)
|
192 |
+
df_cpi=df_cpi.drop(columns='series_id')
|
193 |
+
df_cpi.columns=['CPI','Employment']
|
194 |
+
else:
|
195 |
+
df_cpi=pd.DataFrame()
|
196 |
+
return df_cpi
|
197 |
+
|
198 |
+
def scrape_google_trends(currency, currency_short):
|
199 |
+
curr_neni=[]
|
200 |
+
names_values=[currency]
|
201 |
+
names_short=[currency_short]
|
202 |
+
|
203 |
+
from datetime import date
|
204 |
+
today = date.today()
|
205 |
+
Day_end = today.strftime("%d")
|
206 |
+
Month_end = today.strftime("%m")
|
207 |
+
Year_end = today.strftime("%Y")
|
208 |
+
Hour_end=21
|
209 |
+
Minute_end=20
|
210 |
+
|
211 |
+
past=today-datetime.timedelta(days=30)
|
212 |
+
Day_start = past.strftime("%d")
|
213 |
+
Month_start = past.strftime("%m")
|
214 |
+
Year_start = past.strftime("%Y")
|
215 |
+
|
216 |
+
date_start=[Year_start,Month_start,Day_start]
|
217 |
+
date_end=[Year_end,Month_end,Day_end]
|
218 |
+
date_all1=pd.date_range(past,today)
|
219 |
+
#data_all1=np.repeat(0,len(date))
|
220 |
+
keywords = []
|
221 |
+
google_data=pd.DataFrame()
|
222 |
+
for run_name in list(names_values):
|
223 |
+
'''Scrape Google trends and create one time-series in the form of concated time series across all currencies'''
|
224 |
+
#google_old_slice=slice(run_name,google_old_for_slice,google_old_for_slice['id'])
|
225 |
+
run=list(names_values).index(run_name)
|
226 |
+
time.sleep(5)
|
227 |
+
try:
|
228 |
+
data=dailydata.get_daily_data(str(run_name),int(Year_start), int(Month_start), int(Year_end), int(Month_end),verbose=False) #kw_list, 2021, 10, 2021, 11, geo = '',verbose=False,wait_time=5
|
229 |
+
data1=data.iloc[:,4]
|
230 |
+
except:
|
231 |
+
try:
|
232 |
+
time.sleep(5)
|
233 |
+
new_index=list(names_values).index(run_name)
|
234 |
+
data = dailydata.get_daily_data(word=names_short[new_index],start_year=Year_start, start_mon=Month_start, stop_year=Year_end, stop_mon=Month_end,verbose=False) #kw_list, 2021, 10, 2021, 11, geo = '',verbose=False,wait_time=5
|
235 |
+
data1=data.iloc[:,4]
|
236 |
+
except:
|
237 |
+
pass
|
238 |
+
curr_neni=np.append(curr_neni,run_name)
|
239 |
+
#print(f'no currency {run_name} to scrape in google trends')
|
240 |
+
data1=(np.repeat(0,len(date_all1)))
|
241 |
+
data1=pd.DataFrame({'google_trend':pd.Series(data1)})
|
242 |
+
data1.insert(0,'id',np.repeat(run_name,len(data1)))
|
243 |
+
#google_tog=pd.concat([google_old_slice.set_index('date'),data1],axis=0)
|
244 |
+
google_data=pd.concat([google_data,data1],axis=0)
|
245 |
+
|
246 |
+
#change index from date to date_new to match old_dataset
|
247 |
+
google_data.reset_index(inplace=True)
|
248 |
+
|
249 |
+
if int(np.mean(data1['google_trend']))==0==0:
|
250 |
+
google_data['date_new']=date_all1
|
251 |
+
google_data.set_index('date_new',inplace=True)
|
252 |
+
else:
|
253 |
+
google_data.columns=np.append('date_new',google_data.columns[1:])
|
254 |
+
google_data.set_index('date_new',inplace=True)
|
255 |
+
return google_data
|
256 |
+
|
257 |
+
def scrape_stocks(past,today):
|
258 |
+
# Set dates in the form needed for scraping
|
259 |
+
date_old = past
|
260 |
+
date_new = today
|
261 |
+
#date_new = date.today().strftime("%Y-%m-%d")
|
262 |
+
|
263 |
+
df=pd.DataFrame()
|
264 |
+
#the codes for variables we are going to scrape
|
265 |
+
codes=['^GSPC','GC=F','EURUSD%3DX','^TNX']
|
266 |
+
codes_names=['GSPC','GC=F','EURUSD','TNX']
|
267 |
+
|
268 |
+
for code in codes:
|
269 |
+
'''Scrape Yahoo finance and create dataset with time series for all the financial variables'''
|
270 |
+
code_index=codes.index(code)
|
271 |
+
code_name=codes_names[code_index]
|
272 |
+
df_code = yf.download(code,start=date_old, end=date_new,progress=False) #^IXIC print
|
273 |
+
df_code=pd.DataFrame(df_code)
|
274 |
+
df_code=df_code.reset_index()
|
275 |
+
df_code_ts=df_code.iloc[:,1]
|
276 |
+
df_code_ts=df_code_ts.rename(code_name)
|
277 |
+
df = pd.concat([df,df_code_ts],axis=1)
|
278 |
+
df_time=df_code.iloc[:,0]
|
279 |
+
|
280 |
+
#set the index to date_new
|
281 |
+
df.insert(0,'date',df_time)
|
282 |
+
df_finance=df.dropna()
|
283 |
+
df_finance.set_index('date',inplace=True)
|
284 |
+
df_finance.index=pd.to_datetime(df_finance.index)
|
285 |
+
|
286 |
+
#combine dataset with the old one
|
287 |
+
return df_finance
|
sentiment_analysis/__init__.py
ADDED
File without changes
|
sentiment_analysis/client.py
ADDED
@@ -0,0 +1,63 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from alpaca_trade_api import REST
|
2 |
+
import os
|
3 |
+
from dotenv import load_dotenv
|
4 |
+
|
5 |
+
|
6 |
+
class AlpacaNewsFetcher:
|
7 |
+
"""
|
8 |
+
A class for fetching news articles related to a specific stock from Alpaca API.
|
9 |
+
|
10 |
+
Attributes:
|
11 |
+
- api_key (str): Alpaca API key for authentication.
|
12 |
+
- api_secret (str): Alpaca API secret for authentication.
|
13 |
+
- rest_client (alpaca_trade_api.REST): Alpaca REST API client.
|
14 |
+
"""
|
15 |
+
|
16 |
+
def __init__(self, api_key, api_secret):
|
17 |
+
"""
|
18 |
+
Initializes the AlpacaNewsFetcher object.
|
19 |
+
|
20 |
+
Args:
|
21 |
+
- api_key (str): Alpaca API key for authentication.
|
22 |
+
- api_secret (str): Alpaca API secret for authentication.
|
23 |
+
"""
|
24 |
+
self.api_key = api_key
|
25 |
+
self.api_secret = api_secret
|
26 |
+
self.rest_client = REST(api_key, api_secret)
|
27 |
+
|
28 |
+
load_dotenv()
|
29 |
+
self.no_of_newsarticles_to_fetch = os.environ["NO_OF_NEWSARTICLES_TO_FETCH"]
|
30 |
+
|
31 |
+
def fetch_news(self, symbol, start_date, end_date):
|
32 |
+
"""
|
33 |
+
Fetches news articles for a given stock symbol within a specified date range.
|
34 |
+
|
35 |
+
Args:
|
36 |
+
- symbol (str): Stock symbol for which news articles are to be fetched (e.g., "AAPL").
|
37 |
+
- start_date (str): Start date of the range in the format "YYYY-MM-DD".
|
38 |
+
- end_date (str): End date of the range in the format "YYYY-MM-DD".
|
39 |
+
|
40 |
+
Returns:
|
41 |
+
- list: A list of dictionaries containing relevant information for each news article.
|
42 |
+
"""
|
43 |
+
news_articles = self.rest_client.get_news(symbol, start_date, end_date, limit=self.no_of_newsarticles_to_fetch )
|
44 |
+
formatted_news = []
|
45 |
+
print("-----------------------------------------------------")
|
46 |
+
print(len(news_articles))
|
47 |
+
print("-----------------------------------------------------")
|
48 |
+
|
49 |
+
for article in news_articles:
|
50 |
+
summary = article.summary
|
51 |
+
title = article.headline
|
52 |
+
timestamp = article.created_at
|
53 |
+
|
54 |
+
relevant_info = {
|
55 |
+
'timestamp': timestamp,
|
56 |
+
'title': title,
|
57 |
+
'summary': summary
|
58 |
+
}
|
59 |
+
|
60 |
+
formatted_news.append(relevant_info)
|
61 |
+
|
62 |
+
return formatted_news
|
63 |
+
|
sentiment_analysis/requirements.txt
ADDED
@@ -0,0 +1,13 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
alpaca_trade_api
|
2 |
+
transformers
|
3 |
+
einops
|
4 |
+
accelerate
|
5 |
+
langchain
|
6 |
+
bitsandbytes
|
7 |
+
#sentencepeice
|
8 |
+
openai
|
9 |
+
backtrader
|
10 |
+
yfinance
|
11 |
+
pandas
|
12 |
+
pyfolio
|
13 |
+
python-dotenv
|
sentiment_analysis/sentiment_analysis_pipeline.py
ADDED
@@ -0,0 +1,151 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# !pip install transformers
|
2 |
+
from transformers import pipeline
|
3 |
+
from client import AlpacaNewsFetcher
|
4 |
+
from alpaca_trade_api import REST
|
5 |
+
import os
|
6 |
+
from dotenv import load_dotenv
|
7 |
+
import pandas as pd
|
8 |
+
import matplotlib.pyplot as plt
|
9 |
+
from collections import defaultdict
|
10 |
+
from datetime import date
|
11 |
+
|
12 |
+
|
13 |
+
|
14 |
+
class NewsSentimentAnalysis:
|
15 |
+
"""
|
16 |
+
A class for sentiment analysis of news articles using the Transformers library.
|
17 |
+
|
18 |
+
Attributes:
|
19 |
+
- classifier (pipeline): Sentiment analysis pipeline from Transformers.
|
20 |
+
"""
|
21 |
+
|
22 |
+
def __init__(self):
|
23 |
+
"""
|
24 |
+
Initializes the NewsSentimentAnalysis object.
|
25 |
+
"""
|
26 |
+
self.classifier = pipeline('sentiment-analysis')
|
27 |
+
|
28 |
+
def analyze_sentiment(self, news_article):
|
29 |
+
"""
|
30 |
+
Analyzes the sentiment of a given news article.
|
31 |
+
|
32 |
+
Args:
|
33 |
+
- news_article (dict): Dictionary containing 'summary', 'headline', and 'created_at' keys.
|
34 |
+
|
35 |
+
Returns:
|
36 |
+
- dict: A dictionary containing sentiment analysis results.
|
37 |
+
"""
|
38 |
+
summary = news_article['summary']
|
39 |
+
title = news_article['title']
|
40 |
+
timestamp = news_article['timestamp']
|
41 |
+
|
42 |
+
relevant_text = summary + title
|
43 |
+
sentiment_result = self.classifier(relevant_text)
|
44 |
+
|
45 |
+
analysis_result = {
|
46 |
+
'timestamp': timestamp,
|
47 |
+
'title': title,
|
48 |
+
'summary': summary,
|
49 |
+
'sentiment': sentiment_result
|
50 |
+
}
|
51 |
+
|
52 |
+
return analysis_result
|
53 |
+
|
54 |
+
def plot_sentiment_graph(self, sentiment_analysis_result):
|
55 |
+
"""
|
56 |
+
Plots a sentiment analysis graph
|
57 |
+
|
58 |
+
Args:
|
59 |
+
- sentiment_analysis_result): (dict): Dictionary containing 'summary', 'headline', and 'created_at' keys.
|
60 |
+
|
61 |
+
Returns:
|
62 |
+
- dict: A dictionary containing sentiment analysis results.
|
63 |
+
"""
|
64 |
+
df = pd.DataFrame(sentiment_analysis_result)
|
65 |
+
df['Timestamp'] = pd.to_datetime(df['Timestamp'])
|
66 |
+
df['Date'] = df['Timestamp'].dt.date
|
67 |
+
|
68 |
+
#Group by Date, sentiment value count
|
69 |
+
grouped = df.groupby(by='Date')['Sentiment'].value_counts()
|
70 |
+
grouped.plot.pie()
|
71 |
+
|
72 |
+
|
73 |
+
def get_dominant_sentiment (self, sentiment_analysis_result):
|
74 |
+
"""
|
75 |
+
Returns overall sentiment, negative or positive or neutral depending on the count of negative sentiment vs positive sentiment
|
76 |
+
|
77 |
+
Args:
|
78 |
+
- sentiment_analysis_result): (dict): Dictionary containing 'summary', 'headline', and 'created_at' keys.
|
79 |
+
|
80 |
+
Returns:
|
81 |
+
- dict: A dictionary containing sentiment analysis results.
|
82 |
+
"""
|
83 |
+
df = pd.DataFrame(sentiment_analysis_result)
|
84 |
+
df['Timestamp'] = pd.to_datetime(df['Timestamp'])
|
85 |
+
df['Date'] = df['Timestamp'].dt.date
|
86 |
+
|
87 |
+
#Group by Date, sentiment value count
|
88 |
+
grouped = df.groupby(by='Date')['Sentiment'].value_counts()
|
89 |
+
df = pd.DataFrame(list(grouped.items()), columns=['Sentiment', 'count'])
|
90 |
+
df['date'] = df['Sentiment'].apply(lambda x: x[0])
|
91 |
+
df['sentiment'] = df['Sentiment'].apply(lambda x: x[1])
|
92 |
+
df.drop('Sentiment', axis=1, inplace=True)
|
93 |
+
result = df.groupby('sentiment')['count'].sum().reset_index()
|
94 |
+
|
95 |
+
# Determine the sentiment with the most count
|
96 |
+
dominant_sentiment = result.loc[result['count'].idxmax()]
|
97 |
+
|
98 |
+
return dominant_sentiment
|
99 |
+
|
100 |
+
|
101 |
+
|
102 |
+
#starting point of the program
|
103 |
+
if __name__ == '__main__':
|
104 |
+
# Example Usage:
|
105 |
+
# Initialize the AlpacaNewsFetcher object
|
106 |
+
|
107 |
+
#Load Alpaca Key and Secret from environment.
|
108 |
+
load_dotenv()
|
109 |
+
api_key = os.environ["ALPACA_API_KEY"]
|
110 |
+
api_secret = os.environ["ALPACA_SECRET"]
|
111 |
+
|
112 |
+
#Initialize AlpacaNewsFetcher, a class for fetching news articles related to a specific stock from Alpaca API.
|
113 |
+
news_fetcher = AlpacaNewsFetcher(api_key, api_secret)
|
114 |
+
|
115 |
+
# Fetch news (contains - title of the news, timestamp and summary) for AAPL from 2021-01-01 to 2021-12-31
|
116 |
+
news_data = news_fetcher.fetch_news("AAPL", "2021-01-01", "2021-12-31")
|
117 |
+
|
118 |
+
# Initialize the NewsSentimentAnalysis object
|
119 |
+
news_sentiment_analyzer = NewsSentimentAnalysis()
|
120 |
+
analysis_result = []
|
121 |
+
# Assume 'news_data' is a list of news articles (each as a dictionary)
|
122 |
+
for article in news_data:
|
123 |
+
sentiment_analysis_result = news_sentiment_analyzer.analyze_sentiment(article)
|
124 |
+
|
125 |
+
# Display sentiment analysis results
|
126 |
+
""" print(f'Timestamp: {sentiment_analysis_result["timestamp"]}, '
|
127 |
+
f'Title: {sentiment_analysis_result["title"]}, '
|
128 |
+
f'Summary: {sentiment_analysis_result["summary"]}')
|
129 |
+
|
130 |
+
print(f'Sentiment: {sentiment_analysis_result["sentiment"]}', '\n') """
|
131 |
+
|
132 |
+
#Extracting timestamp of article and sentiment of article for graphing
|
133 |
+
result = {
|
134 |
+
'Timestamp': sentiment_analysis_result["timestamp"],
|
135 |
+
'News- Title:Summary': sentiment_analysis_result["title"] + sentiment_analysis_result["summary"],
|
136 |
+
'Sentiment': sentiment_analysis_result["sentiment"][0]['label']
|
137 |
+
}
|
138 |
+
|
139 |
+
analysis_result.append(result)
|
140 |
+
|
141 |
+
#Graph dominant sentiment based on sentiment analysis data of news articles
|
142 |
+
dominant_sentiment = news_sentiment_analyzer.get_dominant_sentiment(analysis_result)
|
143 |
+
|
144 |
+
final_result = {
|
145 |
+
'Sentiment-analysis-result' : analysis_result,
|
146 |
+
'Dominant-sentiment' : dominant_sentiment['sentiment']
|
147 |
+
}
|
148 |
+
|
149 |
+
print(final_result)
|
150 |
+
|
151 |
+
|
tools/.DS_Store
ADDED
Binary file (6.15 kB). View file
|
|
tools/.chainlit/config.toml
ADDED
@@ -0,0 +1,84 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
[project]
|
2 |
+
# Whether to enable telemetry (default: true). No personal data is collected.
|
3 |
+
enable_telemetry = true
|
4 |
+
|
5 |
+
# List of environment variables to be provided by each user to use the app.
|
6 |
+
user_env = []
|
7 |
+
|
8 |
+
# Duration (in seconds) during which the session is saved when the connection is lost
|
9 |
+
session_timeout = 3600
|
10 |
+
|
11 |
+
# Enable third parties caching (e.g LangChain cache)
|
12 |
+
cache = false
|
13 |
+
|
14 |
+
# Follow symlink for asset mount (see https://github.com/Chainlit/chainlit/issues/317)
|
15 |
+
# follow_symlink = false
|
16 |
+
|
17 |
+
[features]
|
18 |
+
# Show the prompt playground
|
19 |
+
prompt_playground = true
|
20 |
+
|
21 |
+
# Process and display HTML in messages. This can be a security risk (see https://stackoverflow.com/questions/19603097/why-is-it-dangerous-to-render-user-generated-html-or-javascript)
|
22 |
+
unsafe_allow_html = false
|
23 |
+
|
24 |
+
# Process and display mathematical expressions. This can clash with "$" characters in messages.
|
25 |
+
latex = false
|
26 |
+
|
27 |
+
# Authorize users to upload files with messages
|
28 |
+
multi_modal = true
|
29 |
+
|
30 |
+
# Allows user to use speech to text
|
31 |
+
[features.speech_to_text]
|
32 |
+
enabled = false
|
33 |
+
# See all languages here https://github.com/JamesBrill/react-speech-recognition/blob/HEAD/docs/API.md#language-string
|
34 |
+
# language = "en-US"
|
35 |
+
|
36 |
+
[UI]
|
37 |
+
# Name of the app and chatbot.
|
38 |
+
name = "Chatbot"
|
39 |
+
|
40 |
+
# Show the readme while the conversation is empty.
|
41 |
+
show_readme_as_default = true
|
42 |
+
|
43 |
+
# Description of the app and chatbot. This is used for HTML tags.
|
44 |
+
# description = ""
|
45 |
+
|
46 |
+
# Large size content are by default collapsed for a cleaner ui
|
47 |
+
default_collapse_content = true
|
48 |
+
|
49 |
+
# The default value for the expand messages settings.
|
50 |
+
default_expand_messages = false
|
51 |
+
|
52 |
+
# Hide the chain of thought details from the user in the UI.
|
53 |
+
hide_cot = false
|
54 |
+
|
55 |
+
# Link to your github repo. This will add a github button in the UI's header.
|
56 |
+
# github = ""
|
57 |
+
|
58 |
+
# Specify a CSS file that can be used to customize the user interface.
|
59 |
+
# The CSS file can be served from the public directory or via an external link.
|
60 |
+
# custom_css = "/public/test.css"
|
61 |
+
|
62 |
+
# Override default MUI light theme. (Check theme.ts)
|
63 |
+
[UI.theme.light]
|
64 |
+
#background = "#FAFAFA"
|
65 |
+
#paper = "#FFFFFF"
|
66 |
+
|
67 |
+
[UI.theme.light.primary]
|
68 |
+
#main = "#F80061"
|
69 |
+
#dark = "#980039"
|
70 |
+
#light = "#FFE7EB"
|
71 |
+
|
72 |
+
# Override default MUI dark theme. (Check theme.ts)
|
73 |
+
[UI.theme.dark]
|
74 |
+
#background = "#FAFAFA"
|
75 |
+
#paper = "#FFFFFF"
|
76 |
+
|
77 |
+
[UI.theme.dark.primary]
|
78 |
+
#main = "#F80061"
|
79 |
+
#dark = "#980039"
|
80 |
+
#light = "#FFE7EB"
|
81 |
+
|
82 |
+
|
83 |
+
[meta]
|
84 |
+
generated_by = "0.7.700"
|
tools/__pycache__/crypto_sentiment_analysis_util.cpython-311.pyc
ADDED
Binary file (8.33 kB). View file
|
|
tools/__pycache__/data_analyst.cpython-311.pyc
ADDED
Binary file (3.33 kB). View file
|
|
tools/crypto_sentiment_analysis_util.py
ADDED
@@ -0,0 +1,217 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
|
2 |
+
import os
|
3 |
+
from dotenv import load_dotenv
|
4 |
+
from transformers import pipeline
|
5 |
+
import os
|
6 |
+
import pandas as pd
|
7 |
+
from GoogleNews import GoogleNews
|
8 |
+
from langchain_openai import ChatOpenAI
|
9 |
+
import pandas as pd
|
10 |
+
import praw
|
11 |
+
from datetime import datetime
|
12 |
+
|
13 |
+
load_dotenv()
|
14 |
+
|
15 |
+
def fetch_news(stockticker):
|
16 |
+
|
17 |
+
""" Fetches news articles for a given stock symbol within a specified date range.
|
18 |
+
|
19 |
+
Args:
|
20 |
+
- stockticker (str): Symbol of a particular stock
|
21 |
+
|
22 |
+
Returns:
|
23 |
+
- list: A list of dictionaries containing stock news. """
|
24 |
+
|
25 |
+
load_dotenv()
|
26 |
+
days_to_fetch_news = os.environ["DAYS_TO_FETCH_NEWS"]
|
27 |
+
|
28 |
+
googlenews = GoogleNews()
|
29 |
+
googlenews.set_period(days_to_fetch_news)
|
30 |
+
googlenews.get_news(stockticker)
|
31 |
+
news_json=googlenews.get_texts()
|
32 |
+
urls=googlenews.get_links()
|
33 |
+
|
34 |
+
no_of_news_articles_to_fetch = os.environ["NO_OF_NEWS_ARTICLES_TO_FETCH"]
|
35 |
+
news_article_list = []
|
36 |
+
counter = 0
|
37 |
+
for article in news_json:
|
38 |
+
|
39 |
+
if(counter >= int(no_of_news_articles_to_fetch)):
|
40 |
+
break
|
41 |
+
|
42 |
+
relevant_info = {
|
43 |
+
'News_Article': article,
|
44 |
+
'URL': urls[counter]
|
45 |
+
}
|
46 |
+
news_article_list.append(relevant_info)
|
47 |
+
counter+=1
|
48 |
+
|
49 |
+
return news_article_list
|
50 |
+
|
51 |
+
def fetch_reddit_news(cryptocurrencyticker):
|
52 |
+
load_dotenv()
|
53 |
+
REDDIT_USER_AGENT= os.environ["REDDIT_USER_AGENT"]
|
54 |
+
REDDIT_CLIENT_ID= os.environ["REDDIT_CLIENT_ID"]
|
55 |
+
REDDIT_CLIENT_SECRET= os.environ["REDDIT_CLIENT_SECRET"]
|
56 |
+
#https://medium.com/geekculture/a-complete-guide-to-web-scraping-reddit-with-python-16e292317a52
|
57 |
+
user_agent = REDDIT_USER_AGENT
|
58 |
+
reddit = praw.Reddit (
|
59 |
+
client_id= REDDIT_CLIENT_ID,
|
60 |
+
client_secret= REDDIT_CLIENT_SECRET,
|
61 |
+
user_agent=user_agent
|
62 |
+
)
|
63 |
+
|
64 |
+
headlines = set ( )
|
65 |
+
for submission in reddit.subreddit('CryptoCurrencyTrading').search(cryptocurrencyticker,time_filter='week'):
|
66 |
+
headlines.add(submission.title + ', Date: ' +datetime.utcfromtimestamp(int(submission.created_utc)).strftime('%Y-%m-%d %H:%M:%S') + ', URL:' +submission.url)
|
67 |
+
|
68 |
+
if len(headlines)<10:
|
69 |
+
for submission in reddit.subreddit('CryptoCurrencyTrading').search(cryptocurrencyticker,time_filter='year'):
|
70 |
+
headlines.add(submission.title + ', Date: ' +datetime.utcfromtimestamp(int(submission.created_utc)).strftime('%Y-%m-%d %H:%M:%S') + ', URL:' +submission.url)
|
71 |
+
|
72 |
+
if len(headlines)<10:
|
73 |
+
for submission in reddit.subreddit('CryptoCurrencyTrading').search(cryptocurrencyticker): #,time_filter='week'):
|
74 |
+
headlines.add(submission.title + ', Date: ' +datetime.utcfromtimestamp(int(submission.created_utc)).strftime('%Y-%m-%d %H:%M:%S') + ', URL:' +submission.url)
|
75 |
+
return headlines
|
76 |
+
|
77 |
+
def analyze_sentiment(article):
|
78 |
+
"""
|
79 |
+
Analyzes the sentiment of a given news article.
|
80 |
+
|
81 |
+
Args:
|
82 |
+
- news_article (dict): Dictionary containing 'summary', 'headline', and 'created_at' keys.
|
83 |
+
|
84 |
+
Returns:
|
85 |
+
- dict: A dictionary containing sentiment analysis results.
|
86 |
+
"""
|
87 |
+
|
88 |
+
#Analyze sentiment using default model
|
89 |
+
#classifier = pipeline('sentiment-analysis')
|
90 |
+
|
91 |
+
#Analyze sentiment using specific model
|
92 |
+
classifier = pipeline(model='mrm8488/distilroberta-finetuned-financial-news-sentiment-analysis')
|
93 |
+
sentiment_result = classifier(str(article))
|
94 |
+
|
95 |
+
analysis_result = {
|
96 |
+
'News_Article': article,
|
97 |
+
'Sentiment': sentiment_result
|
98 |
+
}
|
99 |
+
|
100 |
+
return analysis_result
|
101 |
+
|
102 |
+
|
103 |
+
def generate_summary_of_sentiment(sentiment_analysis_results):
|
104 |
+
|
105 |
+
|
106 |
+
news_article_sentiment = str(sentiment_analysis_results)
|
107 |
+
print("News article sentiment : " + news_article_sentiment)
|
108 |
+
|
109 |
+
|
110 |
+
os.environ["OPENAI_API_KEY"] = os.environ["OPENAI_API_KEY"]
|
111 |
+
model = ChatOpenAI(
|
112 |
+
model="gpt-4o",
|
113 |
+
temperature=0,
|
114 |
+
max_tokens=None,
|
115 |
+
timeout=None,
|
116 |
+
max_retries=2,
|
117 |
+
# api_key="...", # if you prefer to pass api key in directly instaed of using env vars
|
118 |
+
# base_url="...",
|
119 |
+
# organization="...",
|
120 |
+
# other params...
|
121 |
+
)
|
122 |
+
|
123 |
+
messages=[
|
124 |
+
{"role": "system", "content": "You are a helpful assistant that looks at all news articles with their sentiment, hyperlink and date in front of the article text, the articles MUST be ordered by date!, and generate a summary rationalizing dominant sentiment. At the end of the summary, add URL links with dates for all the articles in the markdown format for streamlit. Make sure the articles as well as the links are ordered descending by Date!!!!!!! Example of adding the URLs: The Check out the links: [link](%s) % url, 2024-03-01. "},
|
125 |
+
{"role": "user", "content": f"News articles and their sentiments: {news_article_sentiment}"}
|
126 |
+
]
|
127 |
+
response = model.invoke(messages)
|
128 |
+
|
129 |
+
|
130 |
+
summary = response.content
|
131 |
+
print ("+++++++++++++++++++++++++++++++++++++++++++++++")
|
132 |
+
print(summary)
|
133 |
+
print ("+++++++++++++++++++++++++++++++++++++++++++++++")
|
134 |
+
return summary
|
135 |
+
|
136 |
+
|
137 |
+
def plot_sentiment_graph(sentiment_analysis_results):
|
138 |
+
"""
|
139 |
+
Plots a sentiment analysis graph
|
140 |
+
|
141 |
+
Args:
|
142 |
+
- sentiment_analysis_result): (dict): Dictionary containing 'Review Title : Summary', 'Rating', and 'Sentiment' keys.
|
143 |
+
|
144 |
+
Returns:
|
145 |
+
- dict: A dictionary containing sentiment analysis results.
|
146 |
+
"""
|
147 |
+
df = pd.DataFrame(sentiment_analysis_results)
|
148 |
+
print(df)
|
149 |
+
|
150 |
+
#Group by Rating, sentiment value count
|
151 |
+
grouped = df['Sentiment'].value_counts()
|
152 |
+
|
153 |
+
sentiment_counts = df['Sentiment'].value_counts()
|
154 |
+
|
155 |
+
# Plotting pie chart
|
156 |
+
# fig = plt.figure(figsize=(5, 3))
|
157 |
+
# plt.pie(sentiment_counts, labels=sentiment_counts.index, autopct='%1.1f%%', startangle=140)
|
158 |
+
# plt.axis('equal') # Equal aspect ratio ensures that pie is drawn as a circle.
|
159 |
+
|
160 |
+
#Open below when u running this program locally and c
|
161 |
+
#plt.show()
|
162 |
+
|
163 |
+
return sentiment_counts
|
164 |
+
|
165 |
+
|
166 |
+
def get_dominant_sentiment (sentiment_analysis_results):
|
167 |
+
"""
|
168 |
+
Returns overall sentiment, negative or positive or neutral depending on the count of negative sentiment vs positive sentiment
|
169 |
+
|
170 |
+
Args:
|
171 |
+
- sentiment_analysis_result): (dict): Dictionary containing 'summary', 'headline', and 'created_at' keys.
|
172 |
+
|
173 |
+
Returns:
|
174 |
+
- dict: A dictionary containing sentiment analysis results.
|
175 |
+
"""
|
176 |
+
df = pd.DataFrame(sentiment_analysis_results)
|
177 |
+
|
178 |
+
# Group by the 'sentiment' column and count the occurrences of each sentiment value
|
179 |
+
sentiment_counts = df['Sentiment'].value_counts().reset_index()
|
180 |
+
sentiment_counts.columns = ['sentiment', 'count']
|
181 |
+
print(sentiment_counts)
|
182 |
+
|
183 |
+
# Find the sentiment with the highest count
|
184 |
+
dominant_sentiment = sentiment_counts.loc[sentiment_counts['count'].idxmax()]
|
185 |
+
|
186 |
+
return dominant_sentiment['sentiment']
|
187 |
+
|
188 |
+
#starting point of the program
|
189 |
+
if __name__ == '__main__':
|
190 |
+
|
191 |
+
#fetch stock news
|
192 |
+
news_articles = fetch_news('AAPL')
|
193 |
+
|
194 |
+
analysis_results = []
|
195 |
+
|
196 |
+
#Perform sentiment analysis for each product review
|
197 |
+
for article in news_articles:
|
198 |
+
sentiment_analysis_result = analyze_sentiment(article['News_Article'])
|
199 |
+
|
200 |
+
# Display sentiment analysis results
|
201 |
+
print(f'News Article: {sentiment_analysis_result["News_Article"]} : Sentiment: {sentiment_analysis_result["Sentiment"]}', '\n')
|
202 |
+
|
203 |
+
result = {
|
204 |
+
'News_Article': sentiment_analysis_result["News_Article"],
|
205 |
+
'Sentiment': sentiment_analysis_result["Sentiment"][0]['label']
|
206 |
+
}
|
207 |
+
|
208 |
+
analysis_results.append(result)
|
209 |
+
|
210 |
+
|
211 |
+
#Graph dominant sentiment based on sentiment analysis data of reviews
|
212 |
+
dominant_sentiment = get_dominant_sentiment(analysis_results)
|
213 |
+
print(dominant_sentiment)
|
214 |
+
|
215 |
+
#Plot graph
|
216 |
+
plot_sentiment_graph(analysis_results)
|
217 |
+
|
tools/data_analyst.py
ADDED
@@ -0,0 +1,42 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from pydantic import BaseModel, Field
|
2 |
+
from langchain.tools import BaseTool
|
3 |
+
from typing import Optional, Type
|
4 |
+
from langchain.tools import StructuredTool
|
5 |
+
import yfinance as yf
|
6 |
+
from typing import List
|
7 |
+
from datetime import datetime,timedelta
|
8 |
+
from pycoingecko import CoinGeckoAPI
|
9 |
+
cg = CoinGeckoAPI()
|
10 |
+
|
11 |
+
def data_analyst_tools():
|
12 |
+
def get_crypto_price(cryptocurrencyticker: str) -> str:
|
13 |
+
current_data=cg.get_price(ids=cryptocurrencyticker, vs_currencies='usd',include_market_cap='true', include_24hr_vol='true',include_last_updated_at='true')
|
14 |
+
return str(current_data)
|
15 |
+
|
16 |
+
class CryptoPriceCheckInput(BaseModel):
|
17 |
+
"""Input for Crypto price check."""
|
18 |
+
Cryptoticker: str = Field(..., description="Ticker symbol for Crypto or index")
|
19 |
+
|
20 |
+
class CryptoPriceTool(BaseTool):
|
21 |
+
name = "get_crypto_price"
|
22 |
+
description = "Useful for when you need to find out the price of Cryptocurrency. You should input the Crypto ticker used on the Coingecko API"
|
23 |
+
"""Input for Cryptocurrency price check."""
|
24 |
+
Cryptoticker: str = Field(..., description="Ticker symbol for Crypto or index")
|
25 |
+
def _run(self, Cryptoticker: str):
|
26 |
+
# print("i'm running")
|
27 |
+
price_response = get_crypto_price(Cryptoticker)
|
28 |
+
|
29 |
+
return str(price_response)
|
30 |
+
|
31 |
+
def _arun(self, Cryptoticker: str):
|
32 |
+
raise NotImplementedError("This tool does not support async")
|
33 |
+
args_schema: Optional[Type[BaseModel]] = CryptoPriceCheckInput
|
34 |
+
|
35 |
+
|
36 |
+
tools_data_analyst = [StructuredTool.from_function(
|
37 |
+
func=CryptoPriceTool,
|
38 |
+
args_schema=CryptoPriceCheckInput,
|
39 |
+
description="Function to get current Crypto prices.",
|
40 |
+
),
|
41 |
+
]
|
42 |
+
return tools_data_analyst
|
tools/df_history.csv
ADDED
@@ -0,0 +1,63 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
Date,Open,High,Low,Close,Volume,Dividends,Stock Splits,stockticker
|
2 |
+
2024-04-22 00:00:00-04:00,399.3596471827562,402.1246793258712,395.03745670124425,400.2380676269531,20286900,0.0,0.0,MSFT
|
3 |
+
2024-04-23 00:00:00-04:00,403.51216021293357,407.4650522060062,402.33429210205253,406.836181640625,15734500,0.0,0.0,MSFT
|
4 |
+
2024-04-24 00:00:00-04:00,408.82258607970806,411.72735028943725,406.0475926794115,408.323486328125,15065300,0.0,0.0,MSFT
|
5 |
+
2024-04-25 00:00:00-04:00,393.32054400787314,399.1700088625431,387.3313470651067,398.321533203125,40586500,0.0,0.0,MSFT
|
6 |
+
2024-04-26 00:00:00-04:00,411.4279132786848,412.25640548421114,405.02945064216703,405.58843994140625,29694700,0.0,0.0,MSFT
|
7 |
+
2024-04-29 00:00:00-04:00,404.52035539531056,405.5884361925186,398.4712687423875,401.5257568359375,19582100,0.0,0.0,MSFT
|
8 |
+
2024-04-30 00:00:00-04:00,400.76710737423014,401.4359144425664,388.4693126899027,388.6289978027344,28781400,0.0,0.0,MSFT
|
9 |
+
2024-05-01 00:00:00-04:00,391.9030904630616,400.9967037344784,389.6072438016868,394.2289123535156,23562500,0.0,0.0,MSFT
|
10 |
+
2024-05-02 00:00:00-04:00,396.94401914412265,399.20992105581087,393.9394288835304,397.1236877441406,17709400,0.0,0.0,MSFT
|
11 |
+
2024-05-03 00:00:00-04:00,401.55570709720826,406.4169339510819,401.13644988960164,405.9278259277344,17446700,0.0,0.0,MSFT
|
12 |
+
2024-05-06 00:00:00-04:00,408.024048156178,413.1847226485525,405.63833666603693,412.7954406738281,16996600,0.0,0.0,MSFT
|
13 |
+
2024-05-07 00:00:00-04:00,413.91342570011614,413.92341744357753,408.35344694069664,408.6029968261719,20018200,0.0,0.0,MSFT
|
14 |
+
2024-05-08 00:00:00-04:00,407.4351142805277,411.48780192255407,405.97772103822234,409.80084228515625,11792300,0.0,0.0,MSFT
|
15 |
+
2024-05-09 00:00:00-04:00,409.8307875446534,411.97691043744567,408.363433019907,411.57763671875,14689700,0.0,0.0,MSFT
|
16 |
+
2024-05-10 00:00:00-04:00,412.1965086797442,414.6321179246016,411.05854661467066,413.9932556152344,13402300,0.0,0.0,MSFT
|
17 |
+
2024-05-13 00:00:00-04:00,417.2573820335048,417.5967662074119,410.08032520369875,412.97509765625,15440200,0.0,0.0,MSFT
|
18 |
+
2024-05-14 00:00:00-04:00,411.2781631216723,416.73831581889846,410.8090081198034,415.80999755859375,15109300,0.0,0.0,MSFT
|
19 |
+
2024-05-15 00:00:00-04:00,417.8999938964844,423.80999755859375,417.2699890136719,423.0799865722656,22239500,0.75,0.0,MSFT
|
20 |
+
2024-05-16 00:00:00-04:00,421.79998779296875,425.4200134277344,420.3500061035156,420.989990234375,17530100,0.0,0.0,MSFT
|
21 |
+
2024-05-17 00:00:00-04:00,422.5400085449219,422.9200134277344,418.0299987792969,420.2099914550781,15352200,0.0,0.0,MSFT
|
22 |
+
2024-05-20 00:00:00-04:00,420.2099914550781,426.7699890136719,419.989990234375,425.3399963378906,16272100,0.0,0.0,MSFT
|
23 |
+
2024-05-21 00:00:00-04:00,426.8299865722656,432.9700012207031,424.8500061035156,429.0400085449219,21453300,0.0,0.0,MSFT
|
24 |
+
2024-05-22 00:00:00-04:00,430.0899963378906,432.4100036621094,427.1300048828125,430.5199890136719,18073700,0.0,0.0,MSFT
|
25 |
+
2024-05-23 00:00:00-04:00,432.9700012207031,433.6000061035156,425.4200134277344,427.0,17211700,0.0,0.0,MSFT
|
26 |
+
2024-05-24 00:00:00-04:00,427.19000244140625,431.05999755859375,424.4100036621094,430.1600036621094,11845800,0.0,0.0,MSFT
|
27 |
+
2024-05-28 00:00:00-04:00,429.6300048828125,430.82000732421875,426.6000061035156,430.32000732421875,15718000,0.0,0.0,MSFT
|
28 |
+
2024-05-29 00:00:00-04:00,425.69000244140625,430.94000244140625,425.69000244140625,429.1700134277344,15517100,0.0,0.0,MSFT
|
29 |
+
2024-05-30 00:00:00-04:00,424.29998779296875,424.29998779296875,414.239990234375,414.6700134277344,28424800,0.0,0.0,MSFT
|
30 |
+
2024-05-31 00:00:00-04:00,416.75,416.75,404.510009765625,415.1300048828125,47995300,0.0,0.0,MSFT
|
31 |
+
2024-06-03 00:00:00-04:00,415.5299987792969,416.42999267578125,408.9200134277344,413.5199890136719,17484700,0.0,0.0,MSFT
|
32 |
+
2024-06-04 00:00:00-04:00,412.42999267578125,416.44000244140625,409.67999267578125,416.07000732421875,14348900,0.0,0.0,MSFT
|
33 |
+
2024-06-05 00:00:00-04:00,417.80999755859375,424.0799865722656,416.29998779296875,424.010009765625,16988000,0.0,0.0,MSFT
|
34 |
+
2024-06-06 00:00:00-04:00,424.010009765625,425.30999755859375,420.5799865722656,424.5199890136719,14861300,0.0,0.0,MSFT
|
35 |
+
2024-06-07 00:00:00-04:00,426.20001220703125,426.2799987792969,423.0,423.8500061035156,13621700,0.0,0.0,MSFT
|
36 |
+
2024-06-10 00:00:00-04:00,424.70001220703125,428.0799865722656,423.8900146484375,427.8699951171875,14003000,0.0,0.0,MSFT
|
37 |
+
2024-06-11 00:00:00-04:00,425.4800109863281,432.82000732421875,425.25,432.67999267578125,14551100,0.0,0.0,MSFT
|
38 |
+
2024-06-12 00:00:00-04:00,435.32000732421875,443.3999938964844,433.25,441.05999755859375,22366200,0.0,0.0,MSFT
|
39 |
+
2024-06-13 00:00:00-04:00,440.8500061035156,443.3900146484375,439.3699951171875,441.5799865722656,15960600,0.0,0.0,MSFT
|
40 |
+
2024-06-14 00:00:00-04:00,438.2799987792969,443.1400146484375,436.7200012207031,442.57000732421875,13582000,0.0,0.0,MSFT
|
41 |
+
2024-06-17 00:00:00-04:00,442.5899963378906,450.94000244140625,440.7200012207031,448.3699951171875,20790000,0.0,0.0,MSFT
|
42 |
+
2024-06-18 00:00:00-04:00,449.7099914550781,450.1400146484375,444.8900146484375,446.3399963378906,17112500,0.0,0.0,MSFT
|
43 |
+
2024-06-20 00:00:00-04:00,446.29998779296875,446.5299987792969,441.2699890136719,445.70001220703125,19877400,0.0,0.0,MSFT
|
44 |
+
2024-06-21 00:00:00-04:00,447.3800048828125,450.5799865722656,446.510009765625,449.7799987792969,34486200,0.0,0.0,MSFT
|
45 |
+
2024-06-24 00:00:00-04:00,449.79998779296875,452.75,446.4100036621094,447.6700134277344,15913700,0.0,0.0,MSFT
|
46 |
+
2024-06-25 00:00:00-04:00,448.25,451.4200134277344,446.75,450.95001220703125,16747500,0.0,0.0,MSFT
|
47 |
+
2024-06-26 00:00:00-04:00,449.0,453.6000061035156,448.19000244140625,452.1600036621094,16507000,0.0,0.0,MSFT
|
48 |
+
2024-06-27 00:00:00-04:00,452.17999267578125,456.1700134277344,451.7699890136719,452.8500061035156,14806300,0.0,0.0,MSFT
|
49 |
+
2024-06-28 00:00:00-04:00,453.07000732421875,455.3800048828125,446.4100036621094,446.95001220703125,28362300,0.0,0.0,MSFT
|
50 |
+
2024-07-01 00:00:00-04:00,448.6600036621094,457.3699951171875,445.6600036621094,456.7300109863281,17662800,0.0,0.0,MSFT
|
51 |
+
2024-07-02 00:00:00-04:00,453.20001220703125,459.5899963378906,453.1099853515625,459.2799987792969,13979800,0.0,0.0,MSFT
|
52 |
+
2024-07-03 00:00:00-04:00,458.19000244140625,461.0199890136719,457.8800048828125,460.7699890136719,9932800,0.0,0.0,MSFT
|
53 |
+
2024-07-05 00:00:00-04:00,459.6099853515625,468.3500061035156,458.9700012207031,467.55999755859375,16000300,0.0,0.0,MSFT
|
54 |
+
2024-07-08 00:00:00-04:00,466.54998779296875,467.70001220703125,464.4599914550781,466.239990234375,12962300,0.0,0.0,MSFT
|
55 |
+
2024-07-09 00:00:00-04:00,467.0,467.3299865722656,458.0,459.5400085449219,17207200,0.0,0.0,MSFT
|
56 |
+
2024-07-10 00:00:00-04:00,461.2200012207031,466.4599914550781,458.8599853515625,466.25,18196100,0.0,0.0,MSFT
|
57 |
+
2024-07-11 00:00:00-04:00,462.9800109863281,464.7799987792969,451.54998779296875,454.70001220703125,23111200,0.0,0.0,MSFT
|
58 |
+
2024-07-12 00:00:00-04:00,454.3299865722656,456.3599853515625,450.6499938964844,453.54998779296875,16311300,0.0,0.0,MSFT
|
59 |
+
2024-07-15 00:00:00-04:00,453.29998779296875,457.260009765625,451.42999267578125,453.9599914550781,14429400,0.0,0.0,MSFT
|
60 |
+
2024-07-16 00:00:00-04:00,454.2200012207031,454.29998779296875,446.6600036621094,449.5199890136719,17175700,0.0,0.0,MSFT
|
61 |
+
2024-07-17 00:00:00-04:00,442.5899963378906,444.8500061035156,439.17999267578125,443.5199890136719,21778000,0.0,0.0,MSFT
|
62 |
+
2024-07-18 00:00:00-04:00,444.3399963378906,444.6499938964844,434.3999938964844,440.3699951171875,20794800,0.0,0.0,MSFT
|
63 |
+
2024-07-19 00:00:00-04:00,433.1000061035156,441.1400146484375,432.0,437.1099853515625,20862400,0.0,0.0,MSFT
|
tools/df_with_forecast.csv
ADDED
@@ -0,0 +1,113 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
,id,prices,market_caps,total_vol,CLI,CPI,Employment,id,google_trend,GSPC,GC=F,EURUSD,TNX
|
2 |
+
2023-01-31,,,,,107.893,,,,,,,,
|
3 |
+
2023-02-28,,,,,,,,,,,,,
|
4 |
+
2023-03-31,,,,,,,,,,,,,
|
5 |
+
2023-04-30,,,,,108.87,,,,,,,,
|
6 |
+
2023-05-31,,,,,,,,,,,,,
|
7 |
+
2023-06-30,,,,,,,,,,,,,
|
8 |
+
2023-07-31,,,,,108.931,,,,,,,,
|
9 |
+
2023-08-31,,,,,,,,,,,,,
|
10 |
+
2023-09-30,,,,,,,,,,,,,
|
11 |
+
2023-10-31,,,,,105.3285,,,,,,,,
|
12 |
+
2023-11-30,,,,,101.961,,,,,,,,
|
13 |
+
2023-12-31,,,,,76.021485,,,,,,,,
|
14 |
+
2024-01-01,,,,,,308.417,161152.0,,,,,,
|
15 |
+
2024-01-31,,,,,82.796104,,,,,,,,
|
16 |
+
2024-02-01,,,,,,310.326,160968.0,,,,,,
|
17 |
+
2024-02-29,,,,,50.859505000000006,,,,,,,,
|
18 |
+
2024-03-01,,,,,,312.332,161466.0,,,,,,
|
19 |
+
2024-03-31,,,,,71.54424900000001,,,,,,,,
|
20 |
+
2024-04-01,,,,,,313.548,161491.0,,,,,,
|
21 |
+
2024-04-30,,,,,70.99926128571428,,,,,,,,
|
22 |
+
2024-05-01,,,,,,314.069,161083.0,,,,,,
|
23 |
+
2024-05-06,ethereum,3114.4007005303224,374072475993.8121,11127068946.811003,,,,,,5142.419921875,2322.800048828125,1.0758124589920044,4.486999988555908
|
24 |
+
2024-05-07,ethereum,3062.1337546278614,367763583617.18,12212682358.435501,,,,,,5187.2001953125,2324.300048828125,1.0767507553100586,4.4730000495910645
|
25 |
+
2024-05-08,ethereum,2999.4869525045415,360243594935.94305,11179447639.771477,,,,,,5168.97998046875,2313.60009765625,1.0750491619110107,4.484000205993652
|
26 |
+
2024-05-09,ethereum,3003.5642861321066,360831840100.2795,10896607994.801586,,,,,,5189.02978515625,2310.699951171875,1.0746910572052002,4.515999794006348
|
27 |
+
2024-05-10,ethereum,2966.441885585809,356315503171.5778,11384672900.217531,,,,,,5225.490234375,2367.300048828125,1.078515887260437,4.4770002365112305
|
28 |
+
2024-05-11,ethereum,2916.0617572305187,350195653103.13715,9198771437.235367,,,,,,,,,
|
29 |
+
2024-05-12,ethereum,2924.2519718055137,351264187115.9079,5460016379.689179,,,,,,,,,
|
30 |
+
2024-05-13,ethereum,2945.101626776707,353629048507.87177,11486452597.64263,,,,,,5233.080078125,2358.300048828125,1.077040672302246,4.486999988555908
|
31 |
+
2024-05-14,ethereum,2900.6563297650755,348385655037.8947,11542903142.535707,,,,,,5221.10009765625,2336.0,1.079354166984558,4.480999946594238
|
32 |
+
2024-05-15,ethereum,2963.759315698215,355948495364.97504,11917611608.681692,,,,,,5263.259765625,2361.60009765625,1.0814552307128906,4.418000221252441
|
33 |
+
2024-05-16,ethereum,2973.3231927105603,357110705527.3933,12982589615.756212,,,,,,5310.06982421875,2389.5,1.0889805555343628,4.329999923706055
|
34 |
+
2024-05-17,ethereum,3055.768061630982,366942477718.2434,13075157085.833336,,,,,,5303.10009765625,2380.699951171875,1.0867793560028076,4.395999908447266
|
35 |
+
2024-05-18,ethereum,3116.6010226358244,374444822902.376,10245527976.19185,,,,,,,,,
|
36 |
+
2024-05-19,ethereum,3089.6899077803787,371102827471.68726,7186818177.12275,,,,,,,,,
|
37 |
+
2024-05-20,ethereum,3275.1726235398733,393064227949.9598,14861247848.577646,,,,,,5305.35009765625,2415.800048828125,1.0875475406646729,4.421999931335449
|
38 |
+
2024-05-21,ethereum,3736.779369147562,449277769707.8145,39114320451.96952,,,,,,5298.68994140625,2429.5,1.086082935333252,4.429999828338623
|
39 |
+
2024-05-22,ethereum,3743.6797798618477,449904838634.0898,27177492763.312775,,,,,,5319.27978515625,2417.5,1.0855170488357544,4.453000068664551
|
40 |
+
2024-05-23,ethereum,3802.195186351903,456440243805.41815,31490184022.903027,,,,,,5340.259765625,2371.199951171875,1.0825204849243164,4.418000221252441
|
41 |
+
2024-05-24,ethereum,3716.584005854382,446214223854.924,36595327650.52252,,,,,,5281.4501953125,2342.60009765625,1.0812681913375854,4.488999843597412
|
42 |
+
2024-05-25,ethereum,3747.9139284087246,450191345047.8288,12320451102.246416,,,,,,,,,
|
43 |
+
2024-05-26,ethereum,3822.9062956505663,459222383843.30835,10446532594.41313,,,,,,,,,
|
44 |
+
2024-05-27,ethereum,3904.4531708423287,468888345618.59467,15875563017.846474,,,,,,,,,
|
45 |
+
2024-05-28,ethereum,3850.4555712911824,462421783973.1804,16981798566.54435,,,,,,5315.91015625,2336.89990234375,1.0847634077072144,4.453000068664551
|
46 |
+
2024-05-29,ethereum,3790.3890178850775,455720009606.71265,16291087718.980705,,,,,,5278.72998046875,2340.300048828125,1.0861891508102417,4.565999984741211
|
47 |
+
2024-05-30,ethereum,3760.0562456291614,451909655831.15967,14241005007.401228,,,,,,5259.77001953125,2336.89990234375,1.0851871967315674,4.593999862670898
|
48 |
+
2024-05-31,ethereum,3766.348666657115,452613341252.82587,13233598539.854492,74.31149225,,,,,5243.2099609375,2344.10009765625,1.0801819562911987,4.552000045776367
|
49 |
+
2024-06-01,ethereum,3797.773661763607,456183752606.3188,10134684638.033493,,314.175,161199.0,,,,,,
|
50 |
+
2024-06-02,ethereum,3789.4643581084024,455395042781.10913,8704992695.338041,,,,,,,,,
|
51 |
+
2024-06-03,ethereum,3792.876939393309,455583728457.67914,11345758405.200346,,,,,,5297.14990234375,2322.60009765625,1.0835410356521606,4.4730000495910645
|
52 |
+
2024-06-04,ethereum,3791.003611630613,455454864609.9826,11583124383.448898,,,,,,5278.240234375,2347.5,1.0853049755096436,4.361000061035156
|
53 |
+
2024-06-05,ethereum,3826.985885858965,459614958452.3508,12970952878.62752,,,,,,5314.47998046875,2326.39990234375,1.0907385349273682,4.326000213623047
|
54 |
+
2024-06-06,ethereum,3831.1242540234693,460276696407.6153,12084914430.50892,,,,,,5357.7998046875,2355.0,1.0881866216659546,4.301000118255615
|
55 |
+
2024-06-07,ethereum,3759.215714495715,451768670762.0454,13410143841.10996,,,,,,5343.81005859375,2379.89990234375,1.087453007698059,4.296999931335449
|
56 |
+
2024-06-08,ethereum,3684.6113824590207,442729750197.81464,12328054405.91001,,,,,,,,,
|
57 |
+
2024-06-09,ethereum,3693.5688944666886,443758507298.5523,6434938580.374867,,,,,,,,,
|
58 |
+
2024-06-10,ethereum,3669.804414640858,441385340635.06006,7678615033.327232,,,,,,5341.22021484375,2290.60009765625,1.0894432067871094,4.453000068664551
|
59 |
+
2024-06-11,ethereum,3511.552216421044,421773678119.8096,16295040761.774801,,,,,,5353.0,2300.0,1.0778882503509521,4.435999870300293
|
60 |
+
2024-06-12,ethereum,3559.325489288785,427430429412.461,16146566925.235643,,,,,,5409.1298828125,2314.89990234375,1.0764262676239014,4.388000011444092
|
61 |
+
2024-06-13,ethereum,3487.56598378508,418921286187.6116,16102374829.54766,,,,,,5441.93017578125,2309.39990234375,1.0740330219268799,4.309999942779541
|
62 |
+
2024-06-14,ethereum,3477.487513961109,417925411247.93335,13751574508.884224,,,,,,5424.080078125,2307.0,1.0813149213790894,4.196000099182129
|
63 |
+
2024-06-15,ethereum,3547.2656444381705,426052557357.5137,14435000601.871336,,,,,,,,,
|
64 |
+
2024-06-16,ethereum,3581.554801676437,430252401136.91156,9265313569.748865,,,,,,,,,
|
65 |
+
2024-06-17,ethereum,3521.540033404755,423525318255.71875,14792637405.883486,,,,,,5431.10986328125,2320.199951171875,1.0738831758499146,4.264999866485596
|
66 |
+
2024-06-18,ethereum,3443.057015401169,420823437646.33813,20647300792.256817,,,,,,5476.14990234375,2311.800048828125,1.0702168941497803,4.2870001792907715
|
67 |
+
2024-06-19,ethereum,3548.6088199945443,433926242387.04596,17174770852.62574,,,,,,,,,
|
68 |
+
2024-06-20,ethereum,3544.840023868359,433416267378.3272,14431531886.666199,,,,,,5499.990234375,2328.89990234375,1.0740677118301392,4.23799991607666
|
69 |
+
2024-06-21,ethereum,3501.4844259897077,428154107816.4788,15591212529.32492,,,,,,5466.77001953125,2331.199951171875,1.0742292404174805,4.223999977111816
|
70 |
+
2024-06-22,ethereum,3498.401912499916,427797791720.6712,10074881877.914883,,,,,,,,,
|
71 |
+
2024-06-23,ethereum,3471.25661843264,424559563614.4485,6991292722.007789,,,,,,,,,
|
72 |
+
2024-06-24,ethereum,3330.9914608412787,405609291184.5871,18078022125.09135,,,,,,5459.580078125,2323.300048828125,1.0751532316207886,4.275000095367432
|
73 |
+
2024-06-25,ethereum,3388.6552364887525,407290827369.29706,17480780190.891705,,,,,,5460.72998046875,2324.39990234375,1.0706409215927124,4.2170000076293945
|
74 |
+
2024-06-26,ethereum,3379.6384972292053,406200008420.7241,11068675554.45744,,,,,,5460.7099609375,2307.89990234375,1.0687757730484009,4.284999847412109
|
75 |
+
2024-06-27,ethereum,3422.9987958226375,411321369785.6611,11381676501.223251,,,,,,5473.58984375,2296.800048828125,1.0732723474502563,4.327000141143799
|
76 |
+
2024-06-28,ethereum,3409.9899950655326,410020829397.1302,12108738591.606865,,,,,,5488.47998046875,2325.39990234375,1.0709619522094727,4.303999900817871
|
77 |
+
2024-06-29,ethereum,3382.1846820729265,406488247465.7656,8118319341.85326,,,,,,,,,
|
78 |
+
2024-06-30,ethereum,3405.653616859288,409202194429.17535,6586293113.04436,74.519282,,,,,,,,
|
79 |
+
2024-07-01,ethereum,3466.7870078346746,416633403003.7347,10972345968.355255,,,161266.0,ethereum,16.81,5471.080078125,2323.800048828125,1.068010926246643,4.423999786376953
|
80 |
+
2024-07-02,ethereum,3425.9867093877297,411805426012.3614,9347179358.150301,,,,ethereum,13.69,5461.83984375,2330.699951171875,1.0708472728729248,4.441999912261963
|
81 |
+
2024-07-03,ethereum,3305.863374711538,397336350754.1616,12936936280.4328,,,,ethereum,18.49,5507.43994140625,2330.89990234375,1.0735257863998413,4.426000118255615
|
82 |
+
2024-07-04,ethereum,3141.9137606677323,377789953332.375,19762357071.82109,,,,ethereum,18.49,,,,
|
83 |
+
2024-07-05,ethereum,2942.873381316971,353947760154.63116,30371642360.497025,,,,ethereum,33.64,5537.91015625,2354.89990234375,1.0737102031707764,4.330999851226807
|
84 |
+
2024-07-06,ethereum,3026.0221393875127,363716353821.9705,14095786002.87394,,,,ethereum,17.64,,,,
|
85 |
+
2024-07-07,ethereum,2980.329795932314,358196417772.35425,10109122874.859922,,,,ethereum,12.959999999999999,,,,
|
86 |
+
2024-07-08,ethereum,2995.42249679924,359848515705.2204,18656672090.05734,,,,ethereum,22.09,5572.75,2381.699951171875,1.0748412609100342,4.306000232696533
|
87 |
+
2024-07-09,ethereum,3067.945362448234,368790204798.86127,17265940071.85487,,,,ethereum,18.49,5584.240234375,2363.10009765625,1.0793308019638062,4.300000190734863
|
88 |
+
2024-07-10,ethereum,3103.355851261978,373109735757.489,14135312863.462542,,,,ethereum,18.49,5591.259765625,2366.300048828125,1.082602620124817,4.2779998779296875
|
89 |
+
2024-07-11,ethereum,3120.2511152159645,375003912446.32135,14601771425.99495,,,,ethereum,20.25,5635.2099609375,2378.699951171875,1.0830950736999512,4.288000106811523
|
90 |
+
2024-07-12,ethereum,3106.665060608468,373479677453.21234,13272564100.61316,,,,ethereum,14.44,5590.759765625,2399.800048828125,1.0814785957336426,4.205999851226807
|
91 |
+
2024-07-13,ethereum,3156.7229997808245,379336539640.4357,9099839817.153015,,,,ethereum,13.69,,,,
|
92 |
+
2024-07-14,ethereum,3214.951666958399,386450506355.89264,9399920281.501238,,,,ethereum,14.44,,,,
|
93 |
+
2024-07-15,ethereum,3390.5558977346586,407705698434.75323,14682856047.608225,,,,ethereum,36.0,5638.16015625,2430.0,1.083329677581787,4.236999988555908
|
94 |
+
2024-07-16,ethereum,3439.2531522603167,413315389170.71234,20249821643.173527,,,,ethereum,33.64,5644.08984375,2427.39990234375,1.0872756242752075,4.175000190734863
|
95 |
+
2024-07-17,ethereum,3445.9301630097125,414204244128.5122,18316846571.47861,,,,ethereum,29.160000000000004,5610.06982421875,2472.89990234375,1.0885539054870605,4.178999900817871
|
96 |
+
2024-07-18,ethereum,3428.483712881156,412151252125.53094,14880766047.388323,,,,ethereum,27.04,5608.56005859375,2466.0,1.0901559591293335,4.183000087738037
|
97 |
+
2024-07-19,ethereum,3459.866194068627,415830197917.89233,15874038814.582893,,,,ethereum,26.01,5543.3701171875,2418.800048828125,1.0902509689331055,4.224999904632568
|
98 |
+
2024-07-20,ethereum,3506.166559401983,421506391873.4467,12819209095.177147,,,,ethereum,24.009999999999998,,,,
|
99 |
+
2024-07-21,ethereum,3505.198785199442,421479666822.44586,9734735125.637077,,,,ethereum,24.009999999999998,,,,
|
100 |
+
2024-07-22,ethereum,3478.5398389521833,418248333345.1597,17165553086.473969,,,,ethereum,43.56,5544.5400390625,2402.10009765625,1.094020128250122,4.224999904632568
|
101 |
+
2024-07-23,ethereum,3471.4537045282686,417282646136.00616,22247562224.91563,,,,ethereum,100.0,5565.2998046875,2395.800048828125,1.0901559591293335,4.236999988555908
|
102 |
+
2024-07-24,ethereum,3389.34554552627,407555827602.13794,19555399218.051266,,,,ethereum,53.29,5505.83984375,2421.0,1.0899182558059692,4.229000091552734
|
103 |
+
2024-07-25,ethereum,3166.8674273376732,380757485120.10834,23309182458.392136,,,,ethereum,49.0,5428.7001953125,2365.5,1.0889805555343628,4.205999851226807
|
104 |
+
2024-07-26,ethereum,3256.836248537211,391618277681.34503,18543581673.430332,,,,ethereum,32.489999999999995,5433.669921875,2368.699951171875,1.0850694179534912,4.244999885559082
|
105 |
+
2024-07-27,ethereum,3268.634820892746,393058948301.01434,12350215232.854982,,,,ethereum,26.01,,,,
|
106 |
+
2024-07-28,ethereum,3261.358165252211,392191417408.00104,11791845044.016218,,,,ethereum,22.09,,,,
|
107 |
+
2024-07-29,ethereum,3339.1396964471446,401594098625.7107,13845546119.727053,,,,ethereum,30.250000000000004,5476.5498046875,2377.300048828125,1.0839520692825317,4.163000106811523
|
108 |
+
2024-07-30,ethereum,3307.2248771443924,397741773052.1379,15364998924.01399,,,,ethereum,21.16,5478.72998046875,2380.89990234375,1.0851283073425293,4.173999786376953
|
109 |
+
2024-07-31,ethereum,3281.768911864367,394601659805.1198,14197119142.220865,99.00630000000001,,,ethereum,19.36,5505.58984375,2407.10009765625,1.0853756666183472,4.127999782562256
|
110 |
+
2024-08-01,ethereum,3164.2867044555733,380493843585.9209,19172882534.0517,,,,ethereum,45.0,5537.83984375,2446.699951171875,1.081946611404419,4.052999973297119
|
111 |
+
2024-08-02,ethereum,3067.1887722202587,368925501105.263,19531566013.812,,,,ethereum,37.309999999999995,5376.6298828125,2490.800048828125,1.0816072225570679,3.940000057220459
|
112 |
+
2024-08-03,ethereum,2946.763174245429,354325094002.6969,19461236907.28371,,,,ethereum,35.6,,,,
|
113 |
+
2024-08-04,ethereum,2911.61,350078479982.3696,13827666339.828363,,,,ethereum,40.42,,,,
|
tools/stock_sentiment_evalutor.py
ADDED
@@ -0,0 +1,261 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from transformers import pipeline
|
2 |
+
from alpaca_trade_api import REST
|
3 |
+
import os
|
4 |
+
from dotenv import load_dotenv
|
5 |
+
from datetime import datetime
|
6 |
+
import pandas as pd
|
7 |
+
import matplotlib.pyplot as plt
|
8 |
+
from datetime import date, timedelta
|
9 |
+
from pydantic import BaseModel, Field
|
10 |
+
from langchain.tools import BaseTool
|
11 |
+
from typing import Optional, Type
|
12 |
+
from langchain.tools import StructuredTool
|
13 |
+
|
14 |
+
|
15 |
+
def sentimental_analysis_tools():
|
16 |
+
|
17 |
+
class AlpacaNewsFetcher:
|
18 |
+
"""
|
19 |
+
A class for fetching news articles related to a specific stock from Alpaca API.
|
20 |
+
|
21 |
+
Attributes:
|
22 |
+
- api_key (str): Alpaca API key for authentication.
|
23 |
+
- api_secret (str): Alpaca API secret for authentication.
|
24 |
+
- rest_client (alpaca_trade_api.REST): Alpaca REST API client.
|
25 |
+
"""
|
26 |
+
|
27 |
+
def __init__(self):
|
28 |
+
"""
|
29 |
+
Initializes the AlpacaNewsFetcher object.
|
30 |
+
|
31 |
+
Args:
|
32 |
+
- api_key (str): Alpaca API key for authentication.
|
33 |
+
- api_secret (str): Alpaca API secret for authentication.
|
34 |
+
"""
|
35 |
+
load_dotenv()
|
36 |
+
self.api_key = os.environ["ALPACA_API_KEY"]
|
37 |
+
self.api_secret = os.environ["ALPACA_SECRET"]
|
38 |
+
self.rest_client = REST(self.api_key, self.api_secret)
|
39 |
+
|
40 |
+
#No of news articles to fetch for the input stock ticker.
|
41 |
+
self.no_of_newsarticles_to_fetch = os.environ["NO_OF_NEWSARTICLES_TO_FETCH"]
|
42 |
+
|
43 |
+
#No of days to fetch news articles for
|
44 |
+
self.no_of_days = os.environ["NO_OF_DAYS_TO_FETCH_NEWS_ARTICLES"]
|
45 |
+
|
46 |
+
|
47 |
+
def fetch_news(self, stockticker):
|
48 |
+
"""
|
49 |
+
Fetches news articles for a given stock symbol within a specified date range.
|
50 |
+
|
51 |
+
Args:
|
52 |
+
- stockticker (str): Stock symbol for which news articles are to be fetched (e.g., "AAPL").
|
53 |
+
|
54 |
+
Returns:
|
55 |
+
- list: A list of dictionaries containing relevant information for each news article.
|
56 |
+
"""
|
57 |
+
|
58 |
+
#Date range for which to get the news
|
59 |
+
start_date = date.today()
|
60 |
+
end_date = date.today() - timedelta(self.no_of_days)
|
61 |
+
|
62 |
+
news_articles = self.rest_client.get_news(stockticker, start_date, end_date, limit=self.no_of_newsarticles_to_fetch )
|
63 |
+
formatted_news = []
|
64 |
+
|
65 |
+
for article in news_articles:
|
66 |
+
summary = article.summary
|
67 |
+
title = article.headline
|
68 |
+
timestamp = article.created_at
|
69 |
+
|
70 |
+
relevant_info = {
|
71 |
+
'timestamp': timestamp,
|
72 |
+
'title': title,
|
73 |
+
'summary': summary
|
74 |
+
}
|
75 |
+
|
76 |
+
formatted_news.append(relevant_info)
|
77 |
+
|
78 |
+
return formatted_news
|
79 |
+
|
80 |
+
|
81 |
+
class NewsSentimentAnalysis:
|
82 |
+
"""
|
83 |
+
A class for sentiment analysis of news articles using the Transformers library.
|
84 |
+
|
85 |
+
Attributes:
|
86 |
+
- classifier (pipeline): Sentiment analysis pipeline from Transformers.
|
87 |
+
"""
|
88 |
+
|
89 |
+
def __init__(self):
|
90 |
+
"""
|
91 |
+
Initializes the NewsSentimentAnalysis object.
|
92 |
+
"""
|
93 |
+
self.classifier = pipeline('sentiment-analysis')
|
94 |
+
|
95 |
+
|
96 |
+
def analyze_sentiment(self, news_article):
|
97 |
+
"""
|
98 |
+
Analyzes the sentiment of a given news article.
|
99 |
+
|
100 |
+
Args:
|
101 |
+
- news_article (dict): Dictionary containing 'summary', 'headline', and 'created_at' keys.
|
102 |
+
|
103 |
+
Returns:
|
104 |
+
- dict: A dictionary containing sentiment analysis results.
|
105 |
+
"""
|
106 |
+
summary = news_article['summary']
|
107 |
+
title = news_article['title']
|
108 |
+
timestamp = news_article['timestamp']
|
109 |
+
|
110 |
+
relevant_text = summary + title
|
111 |
+
sentiment_result = self.classifier(relevant_text)
|
112 |
+
|
113 |
+
analysis_result = {
|
114 |
+
'timestamp': timestamp,
|
115 |
+
'title': title,
|
116 |
+
'summary': summary,
|
117 |
+
'sentiment': sentiment_result
|
118 |
+
}
|
119 |
+
|
120 |
+
return analysis_result
|
121 |
+
|
122 |
+
def plot_sentiment_graph(self, sentiment_analysis_result):
|
123 |
+
"""
|
124 |
+
Plots a sentiment analysis graph
|
125 |
+
|
126 |
+
Args:
|
127 |
+
- sentiment_analysis_result): (dict): Dictionary containing 'summary', 'headline', and 'created_at' keys.
|
128 |
+
|
129 |
+
Returns:
|
130 |
+
- dict: A dictionary containing sentiment analysis results.
|
131 |
+
"""
|
132 |
+
df = pd.DataFrame(sentiment_analysis_result)
|
133 |
+
df['Timestamp'] = pd.to_datetime(df['Timestamp'])
|
134 |
+
df['Date'] = df['Timestamp'].dt.date
|
135 |
+
|
136 |
+
#Group by Date, sentiment value count
|
137 |
+
grouped = df.groupby(by='Date')['Sentiment'].value_counts()
|
138 |
+
|
139 |
+
grouped.plot.pie()
|
140 |
+
|
141 |
+
def get_dominant_sentiment (self, sentiment_analysis_result):
|
142 |
+
"""
|
143 |
+
Returns overall sentiment, negative or positive or neutral depending on the count of negative sentiment vs positive sentiment
|
144 |
+
|
145 |
+
Args:
|
146 |
+
- sentiment_analysis_result): (dict): Dictionary containing 'summary', 'headline', and 'created_at' keys.
|
147 |
+
|
148 |
+
Returns:
|
149 |
+
- dict: A dictionary containing sentiment analysis results.
|
150 |
+
"""
|
151 |
+
df = pd.DataFrame(sentiment_analysis_result)
|
152 |
+
df['Timestamp'] = pd.to_datetime(df['Timestamp'])
|
153 |
+
df['Date'] = df['Timestamp'].dt.date
|
154 |
+
|
155 |
+
#Group by Date, sentiment value count
|
156 |
+
grouped = df.groupby(by='Date')['Sentiment'].value_counts()
|
157 |
+
df = pd.DataFrame(list(grouped.items()), columns=['Sentiment', 'count'])
|
158 |
+
df['date'] = df['Sentiment'].apply(lambda x: x[0])
|
159 |
+
df['sentiment'] = df['Sentiment'].apply(lambda x: x[1])
|
160 |
+
df.drop('Sentiment', axis=1, inplace=True)
|
161 |
+
result = df.groupby('sentiment')['count'].sum().reset_index()
|
162 |
+
|
163 |
+
# Determine the sentiment with the most count
|
164 |
+
dominant_sentiment = result.loc[result['count'].idxmax()]
|
165 |
+
|
166 |
+
return dominant_sentiment
|
167 |
+
|
168 |
+
|
169 |
+
#Function to get the stock sentiment
|
170 |
+
def get_stock_sentiment(stockticker: str):
|
171 |
+
|
172 |
+
#Initialize AlpacaNewsFetcher, a class for fetching news articles related to a specific stock from Alpaca API.
|
173 |
+
news_fetcher = AlpacaNewsFetcher()
|
174 |
+
|
175 |
+
|
176 |
+
# Fetch news (contains - title of the news, timestamp and summary) for specified stocksticker
|
177 |
+
news_data = news_fetcher.fetch_news(stockticker)
|
178 |
+
|
179 |
+
# Initialize the NewsSentimentAnalysis object
|
180 |
+
news_sentiment_analyzer = NewsSentimentAnalysis()
|
181 |
+
analysis_result = []
|
182 |
+
|
183 |
+
# Assume 'news_data' is a list of news articles (each as a dictionary), analyze sentiment of each news
|
184 |
+
for article in news_data:
|
185 |
+
sentiment_analysis_result = news_sentiment_analyzer.analyze_sentiment(article)
|
186 |
+
|
187 |
+
# Display sentiment analysis results
|
188 |
+
print(f'Timestamp: {sentiment_analysis_result["timestamp"]}, '
|
189 |
+
f'Title: {sentiment_analysis_result["title"]}, '
|
190 |
+
f'Summary: {sentiment_analysis_result["summary"]}')
|
191 |
+
|
192 |
+
print(f'Sentiment: {sentiment_analysis_result["sentiment"]}', '\n')
|
193 |
+
|
194 |
+
result = {
|
195 |
+
'Timestamp': sentiment_analysis_result["timestamp"],
|
196 |
+
'News- Title:Summar': sentiment_analysis_result["title"] + sentiment_analysis_result["summary"],
|
197 |
+
'Sentiment': sentiment_analysis_result["sentiment"][0]['label']
|
198 |
+
}
|
199 |
+
analysis_result.append(result)
|
200 |
+
|
201 |
+
#Extracting timestamp of article and sentiment of article for graphing
|
202 |
+
""" result_for_graph = {
|
203 |
+
'Timestamp': sentiment_analysis_result["timestamp"],
|
204 |
+
'Sentiment': sentiment_analysis_result["sentiment"][0]['label']
|
205 |
+
}
|
206 |
+
|
207 |
+
analysis_result.append(result_for_graph)
|
208 |
+
"""
|
209 |
+
|
210 |
+
#Get dominant sentiment
|
211 |
+
dominant_sentiment = news_sentiment_analyzer.get_dominant_sentiment(sentiment_analysis_result)
|
212 |
+
|
213 |
+
#Build response string for news sentiment
|
214 |
+
output_string = ""
|
215 |
+
for result in analysis_result:
|
216 |
+
output_string = output_string + f'{result["Timestamp"]} : {result["News- Title:Summary"]} : {result["Sentiment"]}' + '\n'
|
217 |
+
|
218 |
+
final_result = {
|
219 |
+
'Sentiment-analysis-result' : output_string,
|
220 |
+
'Dominant-sentiment' : dominant_sentiment['sentiment']
|
221 |
+
}
|
222 |
+
|
223 |
+
return final_result
|
224 |
+
|
225 |
+
|
226 |
+
class StockSentimentCheckInput(BaseModel):
|
227 |
+
"""Input for Stock price check."""
|
228 |
+
stockticker: str = Field(..., description="Ticker symbol for stock or index")
|
229 |
+
|
230 |
+
class StockSentimentAnalysisTool(BaseTool):
|
231 |
+
name = "get_stock_sentiment"
|
232 |
+
description = """Useful for finding sentiment of stock, based on published news articles.
|
233 |
+
Fetches configured number of news items for the sentiment,
|
234 |
+
determines sentiment of each news items and then returns
|
235 |
+
List of sentiment analysit result & domainant sentiment of the news
|
236 |
+
"""
|
237 |
+
|
238 |
+
"""Input for Stock sentiment analysis."""
|
239 |
+
stockticker: str = Field(..., description="Ticker symbol for stock or index")
|
240 |
+
def _run(self, stockticker: str):
|
241 |
+
# print("i'm running")
|
242 |
+
sentiment_response = get_stock_sentiment(stockticker)
|
243 |
+
print("++++++++++++++++++++++++++++++++++++++++++++++++++++++")
|
244 |
+
print(str(sentiment_response))
|
245 |
+
print("++++++++++++++++++++++++++++++++++++++++++++++++++++++")
|
246 |
+
|
247 |
+
return sentiment_response
|
248 |
+
|
249 |
+
def _arun(self, stockticker: str):
|
250 |
+
raise NotImplementedError("This tool does not support async")
|
251 |
+
|
252 |
+
args_schema: Optional[Type[BaseModel]] = StockSentimentCheckInput
|
253 |
+
|
254 |
+
|
255 |
+
tools_sentiment_analyst = [StructuredTool.from_function(
|
256 |
+
func=StockSentimentAnalysisTool,
|
257 |
+
args_schema=StockSentimentCheckInput,
|
258 |
+
description="Function to get stock sentiment.",
|
259 |
+
)
|
260 |
+
]
|
261 |
+
return tools_sentiment_analyst
|
utils.py
ADDED
@@ -0,0 +1,177 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import matplotlib.pyplot as plt
|
2 |
+
import plotly.graph_objects as go
|
3 |
+
import pandas as pd
|
4 |
+
import numpy as np
|
5 |
+
from datetime import datetime, timedelta
|
6 |
+
import yfinance as yf
|
7 |
+
from plotly.subplots import make_subplots
|
8 |
+
|
9 |
+
def get_stock_price(stockticker: str) -> str:
|
10 |
+
ticker = yf.Ticker(stockticker)
|
11 |
+
todays_data = ticker.history(period='1d')
|
12 |
+
return str(round(todays_data['Close'][0], 2))
|
13 |
+
|
14 |
+
def plot_candlestick_stock_price(historical_data):
|
15 |
+
"""Useful for plotting candlestick plot for stock prices.
|
16 |
+
Use historical stock price data from yahoo finance for the week and plot them."""
|
17 |
+
df=historical_data[['Close','Open','High','Low']]
|
18 |
+
df.index=pd.to_datetime(df.index)
|
19 |
+
df.index.names=['Date']
|
20 |
+
df=df.reset_index()
|
21 |
+
|
22 |
+
fig = go.Figure(data=[go.Candlestick(x=df['Date'],
|
23 |
+
open=df['Open'],
|
24 |
+
high=df['High'],
|
25 |
+
low=df['Low'],
|
26 |
+
close=df['Close'])])
|
27 |
+
fig.show()
|
28 |
+
|
29 |
+
def historical_stock_prices(stockticker, days_ago):
|
30 |
+
"""Upload accurate data to accurate dates from yahoo finance."""
|
31 |
+
ticker = yf.Ticker(stockticker)
|
32 |
+
end_date = datetime.now()
|
33 |
+
start_date = end_date - timedelta(days=days_ago)
|
34 |
+
start_date = start_date.strftime('%Y-%m-%d')
|
35 |
+
end_date = end_date.strftime('%Y-%m-%d')
|
36 |
+
historical_data = ticker.history(start=start_date, end=end_date)
|
37 |
+
return historical_data
|
38 |
+
|
39 |
+
def plot_macd2(df):
|
40 |
+
try:
|
41 |
+
# Debugging: Print the dataframe columns and a few rows
|
42 |
+
print("DataFrame columns:", df.columns)
|
43 |
+
print("DataFrame head:\n", df.head())
|
44 |
+
|
45 |
+
# Convert DataFrame index and columns to numpy arrays
|
46 |
+
index = df.index.to_numpy()
|
47 |
+
close_prices = df['Close'].to_numpy()
|
48 |
+
macd = df['MACD'].to_numpy()
|
49 |
+
signal_line = df['Signal_Line'].to_numpy()
|
50 |
+
macd_histogram = df['MACD_Histogram'].to_numpy()
|
51 |
+
|
52 |
+
fig, (ax1, ax2) = plt.subplots(2, 1, sharex=True, figsize=(10, 8), gridspec_kw={'height_ratios': [3, 1]})
|
53 |
+
|
54 |
+
# Subplot 1: Candlestick chart
|
55 |
+
ax1.plot(index, close_prices, label='Close', color='black')
|
56 |
+
ax1.set_title("Candlestick Chart")
|
57 |
+
ax1.set_ylabel("Price")
|
58 |
+
ax1.legend()
|
59 |
+
|
60 |
+
# Subplot 2: MACD
|
61 |
+
ax2.plot(index, macd, label='MACD', color='blue')
|
62 |
+
ax2.plot(index, signal_line, label='Signal Line', color='red')
|
63 |
+
|
64 |
+
histogram_colors = np.where(macd_histogram >= 0, 'green', 'red')
|
65 |
+
ax2.bar(index, macd_histogram, color=histogram_colors, alpha=0.6)
|
66 |
+
|
67 |
+
ax2.set_title("MACD")
|
68 |
+
ax2.set_ylabel("MACD Value")
|
69 |
+
ax2.legend()
|
70 |
+
|
71 |
+
plt.xlabel("Date")
|
72 |
+
plt.tight_layout()
|
73 |
+
|
74 |
+
return fig
|
75 |
+
except Exception as e:
|
76 |
+
print(f"Error in plot_macd: {e}")
|
77 |
+
return None
|
78 |
+
|
79 |
+
def plot_macd(df):
|
80 |
+
|
81 |
+
# Create Figure
|
82 |
+
fig = make_subplots(rows=2, cols=1, shared_xaxes=True, row_heights=[0.2, 0.1],
|
83 |
+
vertical_spacing=0.15, # Adjust vertical spacing between subplots
|
84 |
+
subplot_titles=("Candlestick Chart", "MACD")) # Add subplot titles
|
85 |
+
|
86 |
+
|
87 |
+
# Subplot 1: Plot candlestick chart
|
88 |
+
fig.add_trace(go.Candlestick(
|
89 |
+
x=df.index,
|
90 |
+
open=df['Open'],
|
91 |
+
high=df['High'],
|
92 |
+
low=df['Low'],
|
93 |
+
close=df['Close'],
|
94 |
+
increasing_line_color='#00cc96', # Green for increasing
|
95 |
+
decreasing_line_color='#ff3e3e', # Red for decreasing
|
96 |
+
showlegend=False
|
97 |
+
), row=1, col=1) # Specify row and column indices
|
98 |
+
|
99 |
+
|
100 |
+
# Subplot 2: Plot MACD
|
101 |
+
fig.add_trace(
|
102 |
+
go.Scatter(
|
103 |
+
x=df.index,
|
104 |
+
y=df['MACD'],
|
105 |
+
mode='lines',
|
106 |
+
name='MACD',
|
107 |
+
line=dict(color='blue')
|
108 |
+
),
|
109 |
+
row=2, col=1
|
110 |
+
)
|
111 |
+
|
112 |
+
fig.add_trace(
|
113 |
+
go.Scatter(
|
114 |
+
x=df.index,
|
115 |
+
y=df['Signal_Line'],
|
116 |
+
mode='lines',
|
117 |
+
name='Signal Line',
|
118 |
+
line=dict(color='red')
|
119 |
+
),
|
120 |
+
row=2, col=1
|
121 |
+
)
|
122 |
+
|
123 |
+
# Plot MACD Histogram with different colors for positive and negative values
|
124 |
+
histogram_colors = ['green' if val >= 0 else 'red' for val in df['MACD_Histogram']]
|
125 |
+
|
126 |
+
fig.add_trace(
|
127 |
+
go.Bar(
|
128 |
+
x=df.index,
|
129 |
+
y=df['MACD_Histogram'],
|
130 |
+
name='MACD Histogram',
|
131 |
+
marker_color=histogram_colors
|
132 |
+
),
|
133 |
+
row=2, col=1
|
134 |
+
)
|
135 |
+
|
136 |
+
# Update layout with zoom and pan tools enabled
|
137 |
+
layout = go.Layout(
|
138 |
+
title='MSFT Candlestick Chart and MACD Subplots',
|
139 |
+
title_font=dict(size=12), # Adjust title font size
|
140 |
+
plot_bgcolor='#f2f2f2', # Light gray background
|
141 |
+
height=600,
|
142 |
+
width=1200,
|
143 |
+
xaxis_rangeslider=dict(visible=True, thickness=0.03),
|
144 |
+
)
|
145 |
+
|
146 |
+
# Update the layout of the entire figure
|
147 |
+
fig.update_layout(layout)
|
148 |
+
fig.update_yaxes(fixedrange=False, row=1, col=1)
|
149 |
+
fig.update_yaxes(fixedrange=True, row=2, col=1)
|
150 |
+
fig.update_xaxes(type='category', row=1, col=1)
|
151 |
+
fig.update_xaxes(type='category', nticks=10, row=2, col=1)
|
152 |
+
|
153 |
+
fig.show()
|
154 |
+
#return fig
|
155 |
+
|
156 |
+
def calculate_MACD(df, fast_period=12, slow_period=26, signal_period=9):
|
157 |
+
"""
|
158 |
+
Calculates the MACD (Moving Average Convergence Divergence) and related indicators.
|
159 |
+
|
160 |
+
Parameters:
|
161 |
+
df (DataFrame): A pandas DataFrame containing at least a 'Close' column with closing prices.
|
162 |
+
fast_period (int): The period for the fast EMA (default is 12).
|
163 |
+
slow_period (int): The period for the slow EMA (default is 26).
|
164 |
+
signal_period (int): The period for the signal line EMA (default is 9).
|
165 |
+
|
166 |
+
Returns:
|
167 |
+
DataFrame: A pandas DataFrame with the original data and added columns for MACD, Signal Line, and MACD Histogram.
|
168 |
+
"""
|
169 |
+
|
170 |
+
df['EMA_fast'] = df['Close'].ewm(span=fast_period, adjust=False).mean()
|
171 |
+
df['EMA_slow'] = df['Close'].ewm(span=slow_period, adjust=False).mean()
|
172 |
+
df['MACD'] = df['EMA_fast'] - df['EMA_slow']
|
173 |
+
|
174 |
+
df['Signal_Line'] = df['MACD'].ewm(span=signal_period, adjust=False).mean()
|
175 |
+
df['MACD_Histogram'] = df['MACD'] - df['Signal_Line']
|
176 |
+
|
177 |
+
return df
|