Spaces:

AhmedTaha012
/

Finance

Build error

App Files Files Community

AhmedTaha012 commited on Aug 30, 2023

Commit

3be33d4

1 Parent(s): 281267d

Update app.py

Browse files

Files changed (1) hide show

app.py +71 -18

app.py CHANGED Viewed

@@ -1,13 +1,23 @@
 import streamlit as st
 from transformers import pipeline
 import math
 import nltk
 from nltk.corpus import stopwords
 nltk.download('punkt')
 nltk.download('stopwords')
 sentiment_model = pipeline("text-classification", model="AhmedTaha012/managersFeedback-V1.0.7")
 increase_decrease_model = pipeline("text-classification", model="AhmedTaha012/nextQuarter-status-V1.1.9")
-ner_model = pipeline("token-classification", model="AhmedTaha012/finance-ner-v0.0.9-finetuned-ner")
 def getSpeakers(data):
     if "Speakers" in data:
         return "\n".join([x for x in data.split("Speakers")[-1].split("\n") if "--" in x])
@@ -168,16 +178,14 @@ def clean_and_preprocess(text):
         cleaned_text = ' '.join(filtered_words)
         l.append(cleaned_text)
     return "\n".join(l)
-st.title("Transcript Analysis")
-transcript = st.text_area("Enter the transcript:", height=200)
 if st.button("Analyze"):
     transcript=replace_abbreviations(transcript)
@@ -188,19 +196,64 @@ if st.button("Analyze"):
     tokens=transcript.split()
     splitSize=256
     chunks=[tokens[r*splitSize:(r+1)*splitSize] for r in range(math.ceil(len(tokens)/splitSize))]
-    st.subheader("Sentiment Analysis")
     sentiment = [sentiment_model(x)[0]['label'] for x in chunks]
     sentiment=max(sentiment,key=sentiment.count)
-    sentiment_color = "green" if sentiment == "POSITIVE" else "red"
     st.markdown(f'<span style="color:{sentiment_color}">{sentiment}</span>', unsafe_allow_html=True)
-    st.subheader("Increase/Decrease Prediction")
     increase_decrease = [increase_decrease_model(x)[0]['label'] for x in chunks]
     increase_decrease=max(increase_decrease,key=increase_decrease.count)
-    increase_decrease_color = "green" if increase_decrease == "INCREASE" else "red"
     st.markdown(f'<span style="color:{increase_decrease_color}">{increase_decrease}</span>', unsafe_allow_html=True)
-    st.subheader("NER Metrics")
-    ner_result = [ner_model(x) for x in chunks]
-    st.write(str(ner_result))

 import streamlit as st
 from transformers import pipeline
+from transformers import AutoTokenizer,AutoModelForTokenClassification
 import math
 import nltk
+import torch
 from nltk.corpus import stopwords
+from streamlit_extras.colored_header import colored_header
+import spacy
+from spacy import displacy
+from word2number import w2n
 nltk.download('punkt')
 nltk.download('stopwords')
+nlp = spacy.load('en_core_web_md')
 sentiment_model = pipeline("text-classification", model="AhmedTaha012/managersFeedback-V1.0.7")
 increase_decrease_model = pipeline("text-classification", model="AhmedTaha012/nextQuarter-status-V1.1.9")
+tokenizer = AutoTokenizer.from_pretrained("AhmedTaha012/finance-ner-v0.0.9-finetuned-ner")
+model = AutoModelForTokenClassification.from_pretrained("AhmedTaha012/finance-ner-v0.0.9-finetuned-ner")
+# torch.compile(model)
+nlpPipe = pipeline("ner", model=model, tokenizer=tokenizer, grouped_entities=True)
 def getSpeakers(data):
     if "Speakers" in data:
         return "\n".join([x for x in data.split("Speakers")[-1].split("\n") if "--" in x])
         cleaned_text = ' '.join(filtered_words)
         l.append(cleaned_text)
     return "\n".join(l)
+def convert_amount_to_number(amount_str):
+    try:
+        return w2n.word_to_num(amount_str)
+    except ValueError:
+        return 0  # Return 0 if the conversion fails
+st.header("Transcript Analysis", divider='rainbow')
+transcript = st.text_area("Enter the transcript:", height=100)
 if st.button("Analyze"):
     transcript=replace_abbreviations(transcript)
     tokens=transcript.split()
     splitSize=256
     chunks=[tokens[r*splitSize:(r+1)*splitSize] for r in range(math.ceil(len(tokens)/splitSize))]
+    chunks=[" ".join(chuk) for chuk in chunks]
+    st.subheader("Management Sentiment", divider='rainbow')
     sentiment = [sentiment_model(x)[0]['label'] for x in chunks]
     sentiment=max(sentiment,key=sentiment.count)
+    sentiment_color = "green" if sentiment == "postive" else "red"
     st.markdown(f'<span style="color:{sentiment_color}">{sentiment}</span>', unsafe_allow_html=True)
+    st.subheader("Next Quarter Perdiction", divider='rainbow')
     increase_decrease = [increase_decrease_model(x)[0]['label'] for x in chunks]
     increase_decrease=max(increase_decrease,key=increase_decrease.count)
+    increase_decrease_color = "green" if increase_decrease == "Increase" else "red"
     st.markdown(f'<span style="color:{increase_decrease_color}">{increase_decrease}</span>', unsafe_allow_html=True)
+    st.subheader("Financial Metrics", divider='rainbow')
+    ner_result=[]
+    savedchunks=[]
+    idx=0
+    while idx<len(chunks):
+        ents=nlpPipe(chunks[idx])
+        if len(ents)>=1:
+            idxx=0
+            savedchunks.append(idx)
+            while idxx<len(ents):
+                if len(ents[idxx]["word"].split())==2:
+                    ner_result.append({ents[idxx]["entity_group"]:ents[idxx]["word"]})
+                else:
+                    ner_result.append({ents[idxx]["entity_group"]:ents[idxx]["word"]+ents[idxx+1]["word"]+ents[idxx+2]["word"]})
+                    idxx=idxx+2
+                idxx=idxx+1
+        idx=idx+1
+    profits=[x["profit"] for x in ner_result if "profit" in x]
+    revenues=[x["revenue"] for x in ner_result if "revenue" in x]
+    expences=[x["expense"] for x in ner_result if "expense" in x]
+    for idx in range(len(revenues)):
+        st.text_input(f'Revenue:{idx+1}', revenues[idx])
+    for idx in range(len(profits)):
+        st.text_input(f'Profit:{idx+1}', profits[idx])
+    for idx in range(len(expences)):
+        st.text_input(f'Expences:{idx+1}', expences[idx])
+    st.subheader("Parts from transcript that contais financial metrics", divider='rainbow')
+    for idx in savedchunks:
+        doc = nlp(chunks[idx])
+        entity_list=nlpPipe(chunks[idx])
+        entities = []
+        for entity in entity_list:
+            span = doc.char_span(entity['start'], entity['end'], label=entity['entity_group'])
+            entities.append(span)
+        try:
+            doc.ents = entities
+            ent_html = displacy.render(doc, style="ent", jupyter=False)
+            st.markdown(ent_html, unsafe_allow_html=True)
+        except:
+            pass
+    st.subheader("Investment Recommendation", divider='rainbow')
+    profitAmount=sum([convert_amount_to_number(x) for x in profits])
+    expencesAmount=sum([convert_amount_to_number(x) for x in expences])
+    if increase_decrease=="Increase" and sentiment=="postive" and profitAmount>expencesAmount:
+        st.markdown(f'<span style="color:green">{"This is a great chance for investment. Do consider it."}</span>', unsafe_allow_html=True)
+    else:
+        st.markdown(f'<span style="color:red">{"Not the best chance for investment."}</span>', unsafe_allow_html=True)