AhmedTaha012 commited on
Commit
3be33d4
1 Parent(s): 281267d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +71 -18
app.py CHANGED
@@ -1,13 +1,23 @@
1
  import streamlit as st
2
  from transformers import pipeline
 
3
  import math
4
  import nltk
 
5
  from nltk.corpus import stopwords
 
 
 
 
6
  nltk.download('punkt')
7
  nltk.download('stopwords')
 
8
  sentiment_model = pipeline("text-classification", model="AhmedTaha012/managersFeedback-V1.0.7")
9
  increase_decrease_model = pipeline("text-classification", model="AhmedTaha012/nextQuarter-status-V1.1.9")
10
- ner_model = pipeline("token-classification", model="AhmedTaha012/finance-ner-v0.0.9-finetuned-ner")
 
 
 
11
  def getSpeakers(data):
12
  if "Speakers" in data:
13
  return "\n".join([x for x in data.split("Speakers")[-1].split("\n") if "--" in x])
@@ -168,16 +178,14 @@ def clean_and_preprocess(text):
168
  cleaned_text = ' '.join(filtered_words)
169
  l.append(cleaned_text)
170
  return "\n".join(l)
 
 
 
 
 
171
 
172
-
173
-
174
-
175
-
176
-
177
-
178
- st.title("Transcript Analysis")
179
- transcript = st.text_area("Enter the transcript:", height=200)
180
-
181
 
182
  if st.button("Analyze"):
183
  transcript=replace_abbreviations(transcript)
@@ -188,19 +196,64 @@ if st.button("Analyze"):
188
  tokens=transcript.split()
189
  splitSize=256
190
  chunks=[tokens[r*splitSize:(r+1)*splitSize] for r in range(math.ceil(len(tokens)/splitSize))]
191
- st.subheader("Sentiment Analysis")
 
192
  sentiment = [sentiment_model(x)[0]['label'] for x in chunks]
193
  sentiment=max(sentiment,key=sentiment.count)
194
- sentiment_color = "green" if sentiment == "POSITIVE" else "red"
195
  st.markdown(f'<span style="color:{sentiment_color}">{sentiment}</span>', unsafe_allow_html=True)
196
-
197
- st.subheader("Increase/Decrease Prediction")
198
  increase_decrease = [increase_decrease_model(x)[0]['label'] for x in chunks]
199
  increase_decrease=max(increase_decrease,key=increase_decrease.count)
200
- increase_decrease_color = "green" if increase_decrease == "INCREASE" else "red"
201
  st.markdown(f'<span style="color:{increase_decrease_color}">{increase_decrease}</span>', unsafe_allow_html=True)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
202
 
203
- st.subheader("NER Metrics")
204
- ner_result = [ner_model(x) for x in chunks]
205
- st.write(str(ner_result))
206
 
 
1
  import streamlit as st
2
  from transformers import pipeline
3
+ from transformers import AutoTokenizer,AutoModelForTokenClassification
4
  import math
5
  import nltk
6
+ import torch
7
  from nltk.corpus import stopwords
8
+ from streamlit_extras.colored_header import colored_header
9
+ import spacy
10
+ from spacy import displacy
11
+ from word2number import w2n
12
  nltk.download('punkt')
13
  nltk.download('stopwords')
14
+ nlp = spacy.load('en_core_web_md')
15
  sentiment_model = pipeline("text-classification", model="AhmedTaha012/managersFeedback-V1.0.7")
16
  increase_decrease_model = pipeline("text-classification", model="AhmedTaha012/nextQuarter-status-V1.1.9")
17
+ tokenizer = AutoTokenizer.from_pretrained("AhmedTaha012/finance-ner-v0.0.9-finetuned-ner")
18
+ model = AutoModelForTokenClassification.from_pretrained("AhmedTaha012/finance-ner-v0.0.9-finetuned-ner")
19
+ # torch.compile(model)
20
+ nlpPipe = pipeline("ner", model=model, tokenizer=tokenizer, grouped_entities=True)
21
  def getSpeakers(data):
22
  if "Speakers" in data:
23
  return "\n".join([x for x in data.split("Speakers")[-1].split("\n") if "--" in x])
 
178
  cleaned_text = ' '.join(filtered_words)
179
  l.append(cleaned_text)
180
  return "\n".join(l)
181
+ def convert_amount_to_number(amount_str):
182
+ try:
183
+ return w2n.word_to_num(amount_str)
184
+ except ValueError:
185
+ return 0 # Return 0 if the conversion fails
186
 
187
+ st.header("Transcript Analysis", divider='rainbow')
188
+ transcript = st.text_area("Enter the transcript:", height=100)
 
 
 
 
 
 
 
189
 
190
  if st.button("Analyze"):
191
  transcript=replace_abbreviations(transcript)
 
196
  tokens=transcript.split()
197
  splitSize=256
198
  chunks=[tokens[r*splitSize:(r+1)*splitSize] for r in range(math.ceil(len(tokens)/splitSize))]
199
+ chunks=[" ".join(chuk) for chuk in chunks]
200
+ st.subheader("Management Sentiment", divider='rainbow')
201
  sentiment = [sentiment_model(x)[0]['label'] for x in chunks]
202
  sentiment=max(sentiment,key=sentiment.count)
203
+ sentiment_color = "green" if sentiment == "postive" else "red"
204
  st.markdown(f'<span style="color:{sentiment_color}">{sentiment}</span>', unsafe_allow_html=True)
205
+ st.subheader("Next Quarter Perdiction", divider='rainbow')
 
206
  increase_decrease = [increase_decrease_model(x)[0]['label'] for x in chunks]
207
  increase_decrease=max(increase_decrease,key=increase_decrease.count)
208
+ increase_decrease_color = "green" if increase_decrease == "Increase" else "red"
209
  st.markdown(f'<span style="color:{increase_decrease_color}">{increase_decrease}</span>', unsafe_allow_html=True)
210
+ st.subheader("Financial Metrics", divider='rainbow')
211
+ ner_result=[]
212
+ savedchunks=[]
213
+ idx=0
214
+ while idx<len(chunks):
215
+ ents=nlpPipe(chunks[idx])
216
+ if len(ents)>=1:
217
+ idxx=0
218
+ savedchunks.append(idx)
219
+ while idxx<len(ents):
220
+ if len(ents[idxx]["word"].split())==2:
221
+ ner_result.append({ents[idxx]["entity_group"]:ents[idxx]["word"]})
222
+ else:
223
+ ner_result.append({ents[idxx]["entity_group"]:ents[idxx]["word"]+ents[idxx+1]["word"]+ents[idxx+2]["word"]})
224
+ idxx=idxx+2
225
+ idxx=idxx+1
226
+ idx=idx+1
227
+ profits=[x["profit"] for x in ner_result if "profit" in x]
228
+ revenues=[x["revenue"] for x in ner_result if "revenue" in x]
229
+ expences=[x["expense"] for x in ner_result if "expense" in x]
230
+ for idx in range(len(revenues)):
231
+ st.text_input(f'Revenue:{idx+1}', revenues[idx])
232
+ for idx in range(len(profits)):
233
+ st.text_input(f'Profit:{idx+1}', profits[idx])
234
+ for idx in range(len(expences)):
235
+ st.text_input(f'Expences:{idx+1}', expences[idx])
236
+ st.subheader("Parts from transcript that contais financial metrics", divider='rainbow')
237
+ for idx in savedchunks:
238
+ doc = nlp(chunks[idx])
239
+ entity_list=nlpPipe(chunks[idx])
240
+ entities = []
241
+ for entity in entity_list:
242
+ span = doc.char_span(entity['start'], entity['end'], label=entity['entity_group'])
243
+ entities.append(span)
244
+ try:
245
+ doc.ents = entities
246
+ ent_html = displacy.render(doc, style="ent", jupyter=False)
247
+ st.markdown(ent_html, unsafe_allow_html=True)
248
+ except:
249
+ pass
250
+ st.subheader("Investment Recommendation", divider='rainbow')
251
+ profitAmount=sum([convert_amount_to_number(x) for x in profits])
252
+ expencesAmount=sum([convert_amount_to_number(x) for x in expences])
253
+ if increase_decrease=="Increase" and sentiment=="postive" and profitAmount>expencesAmount:
254
+ st.markdown(f'<span style="color:green">{"This is a great chance for investment. Do consider it."}</span>', unsafe_allow_html=True)
255
+ else:
256
+ st.markdown(f'<span style="color:red">{"Not the best chance for investment."}</span>', unsafe_allow_html=True)
257
+
258
 
 
 
 
259