Spaces:
Build error
Build error
AhmedTaha012
commited on
Commit
•
3be33d4
1
Parent(s):
281267d
Update app.py
Browse files
app.py
CHANGED
@@ -1,13 +1,23 @@
|
|
1 |
import streamlit as st
|
2 |
from transformers import pipeline
|
|
|
3 |
import math
|
4 |
import nltk
|
|
|
5 |
from nltk.corpus import stopwords
|
|
|
|
|
|
|
|
|
6 |
nltk.download('punkt')
|
7 |
nltk.download('stopwords')
|
|
|
8 |
sentiment_model = pipeline("text-classification", model="AhmedTaha012/managersFeedback-V1.0.7")
|
9 |
increase_decrease_model = pipeline("text-classification", model="AhmedTaha012/nextQuarter-status-V1.1.9")
|
10 |
-
|
|
|
|
|
|
|
11 |
def getSpeakers(data):
|
12 |
if "Speakers" in data:
|
13 |
return "\n".join([x for x in data.split("Speakers")[-1].split("\n") if "--" in x])
|
@@ -168,16 +178,14 @@ def clean_and_preprocess(text):
|
|
168 |
cleaned_text = ' '.join(filtered_words)
|
169 |
l.append(cleaned_text)
|
170 |
return "\n".join(l)
|
|
|
|
|
|
|
|
|
|
|
171 |
|
172 |
-
|
173 |
-
|
174 |
-
|
175 |
-
|
176 |
-
|
177 |
-
|
178 |
-
st.title("Transcript Analysis")
|
179 |
-
transcript = st.text_area("Enter the transcript:", height=200)
|
180 |
-
|
181 |
|
182 |
if st.button("Analyze"):
|
183 |
transcript=replace_abbreviations(transcript)
|
@@ -188,19 +196,64 @@ if st.button("Analyze"):
|
|
188 |
tokens=transcript.split()
|
189 |
splitSize=256
|
190 |
chunks=[tokens[r*splitSize:(r+1)*splitSize] for r in range(math.ceil(len(tokens)/splitSize))]
|
191 |
-
|
|
|
192 |
sentiment = [sentiment_model(x)[0]['label'] for x in chunks]
|
193 |
sentiment=max(sentiment,key=sentiment.count)
|
194 |
-
sentiment_color = "green" if sentiment == "
|
195 |
st.markdown(f'<span style="color:{sentiment_color}">{sentiment}</span>', unsafe_allow_html=True)
|
196 |
-
|
197 |
-
st.subheader("Increase/Decrease Prediction")
|
198 |
increase_decrease = [increase_decrease_model(x)[0]['label'] for x in chunks]
|
199 |
increase_decrease=max(increase_decrease,key=increase_decrease.count)
|
200 |
-
increase_decrease_color = "green" if increase_decrease == "
|
201 |
st.markdown(f'<span style="color:{increase_decrease_color}">{increase_decrease}</span>', unsafe_allow_html=True)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
202 |
|
203 |
-
st.subheader("NER Metrics")
|
204 |
-
ner_result = [ner_model(x) for x in chunks]
|
205 |
-
st.write(str(ner_result))
|
206 |
|
|
|
1 |
import streamlit as st
|
2 |
from transformers import pipeline
|
3 |
+
from transformers import AutoTokenizer,AutoModelForTokenClassification
|
4 |
import math
|
5 |
import nltk
|
6 |
+
import torch
|
7 |
from nltk.corpus import stopwords
|
8 |
+
from streamlit_extras.colored_header import colored_header
|
9 |
+
import spacy
|
10 |
+
from spacy import displacy
|
11 |
+
from word2number import w2n
|
12 |
nltk.download('punkt')
|
13 |
nltk.download('stopwords')
|
14 |
+
nlp = spacy.load('en_core_web_md')
|
15 |
sentiment_model = pipeline("text-classification", model="AhmedTaha012/managersFeedback-V1.0.7")
|
16 |
increase_decrease_model = pipeline("text-classification", model="AhmedTaha012/nextQuarter-status-V1.1.9")
|
17 |
+
tokenizer = AutoTokenizer.from_pretrained("AhmedTaha012/finance-ner-v0.0.9-finetuned-ner")
|
18 |
+
model = AutoModelForTokenClassification.from_pretrained("AhmedTaha012/finance-ner-v0.0.9-finetuned-ner")
|
19 |
+
# torch.compile(model)
|
20 |
+
nlpPipe = pipeline("ner", model=model, tokenizer=tokenizer, grouped_entities=True)
|
21 |
def getSpeakers(data):
|
22 |
if "Speakers" in data:
|
23 |
return "\n".join([x for x in data.split("Speakers")[-1].split("\n") if "--" in x])
|
|
|
178 |
cleaned_text = ' '.join(filtered_words)
|
179 |
l.append(cleaned_text)
|
180 |
return "\n".join(l)
|
181 |
+
def convert_amount_to_number(amount_str):
|
182 |
+
try:
|
183 |
+
return w2n.word_to_num(amount_str)
|
184 |
+
except ValueError:
|
185 |
+
return 0 # Return 0 if the conversion fails
|
186 |
|
187 |
+
st.header("Transcript Analysis", divider='rainbow')
|
188 |
+
transcript = st.text_area("Enter the transcript:", height=100)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
189 |
|
190 |
if st.button("Analyze"):
|
191 |
transcript=replace_abbreviations(transcript)
|
|
|
196 |
tokens=transcript.split()
|
197 |
splitSize=256
|
198 |
chunks=[tokens[r*splitSize:(r+1)*splitSize] for r in range(math.ceil(len(tokens)/splitSize))]
|
199 |
+
chunks=[" ".join(chuk) for chuk in chunks]
|
200 |
+
st.subheader("Management Sentiment", divider='rainbow')
|
201 |
sentiment = [sentiment_model(x)[0]['label'] for x in chunks]
|
202 |
sentiment=max(sentiment,key=sentiment.count)
|
203 |
+
sentiment_color = "green" if sentiment == "postive" else "red"
|
204 |
st.markdown(f'<span style="color:{sentiment_color}">{sentiment}</span>', unsafe_allow_html=True)
|
205 |
+
st.subheader("Next Quarter Perdiction", divider='rainbow')
|
|
|
206 |
increase_decrease = [increase_decrease_model(x)[0]['label'] for x in chunks]
|
207 |
increase_decrease=max(increase_decrease,key=increase_decrease.count)
|
208 |
+
increase_decrease_color = "green" if increase_decrease == "Increase" else "red"
|
209 |
st.markdown(f'<span style="color:{increase_decrease_color}">{increase_decrease}</span>', unsafe_allow_html=True)
|
210 |
+
st.subheader("Financial Metrics", divider='rainbow')
|
211 |
+
ner_result=[]
|
212 |
+
savedchunks=[]
|
213 |
+
idx=0
|
214 |
+
while idx<len(chunks):
|
215 |
+
ents=nlpPipe(chunks[idx])
|
216 |
+
if len(ents)>=1:
|
217 |
+
idxx=0
|
218 |
+
savedchunks.append(idx)
|
219 |
+
while idxx<len(ents):
|
220 |
+
if len(ents[idxx]["word"].split())==2:
|
221 |
+
ner_result.append({ents[idxx]["entity_group"]:ents[idxx]["word"]})
|
222 |
+
else:
|
223 |
+
ner_result.append({ents[idxx]["entity_group"]:ents[idxx]["word"]+ents[idxx+1]["word"]+ents[idxx+2]["word"]})
|
224 |
+
idxx=idxx+2
|
225 |
+
idxx=idxx+1
|
226 |
+
idx=idx+1
|
227 |
+
profits=[x["profit"] for x in ner_result if "profit" in x]
|
228 |
+
revenues=[x["revenue"] for x in ner_result if "revenue" in x]
|
229 |
+
expences=[x["expense"] for x in ner_result if "expense" in x]
|
230 |
+
for idx in range(len(revenues)):
|
231 |
+
st.text_input(f'Revenue:{idx+1}', revenues[idx])
|
232 |
+
for idx in range(len(profits)):
|
233 |
+
st.text_input(f'Profit:{idx+1}', profits[idx])
|
234 |
+
for idx in range(len(expences)):
|
235 |
+
st.text_input(f'Expences:{idx+1}', expences[idx])
|
236 |
+
st.subheader("Parts from transcript that contais financial metrics", divider='rainbow')
|
237 |
+
for idx in savedchunks:
|
238 |
+
doc = nlp(chunks[idx])
|
239 |
+
entity_list=nlpPipe(chunks[idx])
|
240 |
+
entities = []
|
241 |
+
for entity in entity_list:
|
242 |
+
span = doc.char_span(entity['start'], entity['end'], label=entity['entity_group'])
|
243 |
+
entities.append(span)
|
244 |
+
try:
|
245 |
+
doc.ents = entities
|
246 |
+
ent_html = displacy.render(doc, style="ent", jupyter=False)
|
247 |
+
st.markdown(ent_html, unsafe_allow_html=True)
|
248 |
+
except:
|
249 |
+
pass
|
250 |
+
st.subheader("Investment Recommendation", divider='rainbow')
|
251 |
+
profitAmount=sum([convert_amount_to_number(x) for x in profits])
|
252 |
+
expencesAmount=sum([convert_amount_to_number(x) for x in expences])
|
253 |
+
if increase_decrease=="Increase" and sentiment=="postive" and profitAmount>expencesAmount:
|
254 |
+
st.markdown(f'<span style="color:green">{"This is a great chance for investment. Do consider it."}</span>', unsafe_allow_html=True)
|
255 |
+
else:
|
256 |
+
st.markdown(f'<span style="color:red">{"Not the best chance for investment."}</span>', unsafe_allow_html=True)
|
257 |
+
|
258 |
|
|
|
|
|
|
|
259 |
|