niyaa commited on
Commit
4fbfe72
·
1 Parent(s): a5f31f0

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +6 -35
app.py CHANGED
@@ -8,6 +8,9 @@ from bs4 import BeautifulSoup
8
  import requests
9
  import os
10
  from datetime import date, timedelta
 
 
 
11
 
12
  # App title
13
  st.markdown('''
@@ -84,9 +87,6 @@ if tickerSymbol:
84
 
85
 
86
 
87
-
88
-
89
-
90
  d0 = start_date
91
  d1 = datetime.date(2008, 1, 1)
92
  delta = d0 - d1
@@ -94,11 +94,6 @@ if tickerSymbol:
94
  st.write(delta)
95
 
96
  Begindatestring = start_date
97
-
98
-
99
- #Begindatestring = datetime.strptime(Begindatestring, "%Y-%m-%d").date()
100
-
101
-
102
  val = 39448 + int(delta.days)
103
  url = 'https://economictimes.indiatimes.com/archivelist/year-'+str(Begindatestring.year)+',month-'+str(Begindatestring.month)+',starttime-'+str(val)+'.cms' # Replace with your URL
104
 
@@ -108,7 +103,7 @@ if tickerSymbol:
108
  html_text = response.text
109
  soup = BeautifulSoup(html_text, "lxml")
110
  else:
111
- st.write(f"Failed to fetch the page. Status code: {response.status_code}")
112
  jobs = soup.find_all("li")
113
  headlines = []
114
  for job in jobs:
@@ -126,20 +121,13 @@ if tickerSymbol:
126
  del headlines[index:]
127
  news = pd.DataFrame({"News": headlines})
128
  news.insert(0, 'Date', Begindatestring)
129
- #st.dataframe(df[0:1])
130
 
131
 
132
  news = news.drop_duplicates()
133
  news = news.dropna(how='any')
134
  news = news.reset_index(drop=True)
135
 
136
- import pandas as pd
137
- import numpy as np
138
-
139
 
140
- from transformers import pipeline
141
- import torch
142
- from transformers import AutoTokenizer, AutoModelForSequenceClassification
143
 
144
  device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
145
 
@@ -148,14 +136,6 @@ if tickerSymbol:
148
  model = AutoModelForSequenceClassification.from_pretrained("nickmuchi/sec-bert-finetuned-finance-classification")
149
 
150
 
151
-
152
-
153
-
154
-
155
-
156
-
157
-
158
-
159
  nlp = pipeline("text-classification", model=model, tokenizer=tokenizer, device=device)
160
 
161
  length = len(news[ 'News'].to_list())
@@ -169,32 +149,23 @@ if tickerSymbol:
169
  df.loc[i, "News"] = news_list[i]
170
  df.loc[i , 'label'] = results[0]["label"]
171
  df.loc[i , 'score'] = results[0]["score"]
 
172
 
173
 
174
-
175
- #st.dataframe(df)
176
-
177
- # Filter the DataFrame to get rows with "neutral" sentiment
178
  bullish_rows = df[df['label'] == 'bullish']
179
 
180
- # Calculate the sum of the 'Score' column for "neutral" rows
181
  bullish_score_sum = bullish_rows['score'].sum()
182
 
183
  num_bullish_rows = len(bullish_rows)
184
- # Calculate the average score for "neutral" sentiment
185
- average_score_for_bullish = bullish_score_sum / num_bullish_rows
186
 
 
187
 
188
- # Filter the DataFrame to get rows with "neutral" sentiment
189
  bearish_rows = df[df['label'] == 'bearish']
190
 
191
- # Calculate the sum of the 'Score' column for "neutral" rows
192
  bearish_score_sum = bearish_rows['score'].sum()
193
 
194
- # Cabearishlculate the number of "neutral" rows
195
  num_bearish_rows = len(bearish_rows)
196
 
197
- # Calculate the average score for "neutral" sentiment
198
  average_score_for_bearish = bearish_score_sum / num_bearish_rows
199
 
200
 
 
8
  import requests
9
  import os
10
  from datetime import date, timedelta
11
+ from transformers import pipeline
12
+ import torch
13
+ from transformers import AutoTokenizer, AutoModelForSequenceClassification
14
 
15
  # App title
16
  st.markdown('''
 
87
 
88
 
89
 
 
 
 
90
  d0 = start_date
91
  d1 = datetime.date(2008, 1, 1)
92
  delta = d0 - d1
 
94
  st.write(delta)
95
 
96
  Begindatestring = start_date
 
 
 
 
 
97
  val = 39448 + int(delta.days)
98
  url = 'https://economictimes.indiatimes.com/archivelist/year-'+str(Begindatestring.year)+',month-'+str(Begindatestring.month)+',starttime-'+str(val)+'.cms' # Replace with your URL
99
 
 
103
  html_text = response.text
104
  soup = BeautifulSoup(html_text, "lxml")
105
  else:
106
+ gg=0
107
  jobs = soup.find_all("li")
108
  headlines = []
109
  for job in jobs:
 
121
  del headlines[index:]
122
  news = pd.DataFrame({"News": headlines})
123
  news.insert(0, 'Date', Begindatestring)
 
124
 
125
 
126
  news = news.drop_duplicates()
127
  news = news.dropna(how='any')
128
  news = news.reset_index(drop=True)
129
 
 
 
 
130
 
 
 
 
131
 
132
  device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
133
 
 
136
  model = AutoModelForSequenceClassification.from_pretrained("nickmuchi/sec-bert-finetuned-finance-classification")
137
 
138
 
 
 
 
 
 
 
 
 
139
  nlp = pipeline("text-classification", model=model, tokenizer=tokenizer, device=device)
140
 
141
  length = len(news[ 'News'].to_list())
 
149
  df.loc[i, "News"] = news_list[i]
150
  df.loc[i , 'label'] = results[0]["label"]
151
  df.loc[i , 'score'] = results[0]["score"]
152
+ if(i%100 ==0): st.write("Articles Processed Number "+ str(i))
153
 
154
 
 
 
 
 
155
  bullish_rows = df[df['label'] == 'bullish']
156
 
 
157
  bullish_score_sum = bullish_rows['score'].sum()
158
 
159
  num_bullish_rows = len(bullish_rows)
 
 
160
 
161
+ average_score_for_bullish = bullish_score_sum / num_bullish_rows
162
 
 
163
  bearish_rows = df[df['label'] == 'bearish']
164
 
 
165
  bearish_score_sum = bearish_rows['score'].sum()
166
 
 
167
  num_bearish_rows = len(bearish_rows)
168
 
 
169
  average_score_for_bearish = bearish_score_sum / num_bearish_rows
170
 
171