Finance / app.py
AhmedTaha012's picture
Update app.py
54b6823
raw
history blame
No virus
8.68 kB
import streamlit as st
from transformers import pipeline
import math
import nltk
from nltk.corpus import stopwords
nltk.download('punkt')
nltk.download('stopwords')
sentiment_model = pipeline("text-classification", model="AhmedTaha012/managersFeedback-V1.0.7")
increase_decrease_model = pipeline("text-classification", model="AhmedTaha012/nextQuarter-status-V1.1.9")
ner_model = pipeline("token-classification", model="AhmedTaha012/finance-ner-v0.0.9-finetuned-ner")
def getSpeakers(data):
if "Speakers" in data:
return "\n".join([x for x in data.split("Speakers")[-1].split("\n") if "--" in x])
elif "Call participants" in data:
return "\n".join([x for x in data.split("Call participants")[-1].split("\n") if "--" in x])
elif "Call Participants" in data:
return "\n".join([x for x in data.split("Call Participants")[-1].split("\n") if "--" in x])
def removeSpeakers(data):
if "Speakers" in data:
return data.split("Speakers")[0]
elif "Call participants" in data:
return data.split("Call participants")[0]
elif "Call Participants" in data:
return data.split("Call Participants")[0]
def getQA(data):
if "Questions and Answers" in data:
return data.split("Questions and Answers")[-1]
elif "Questions & Answers" in data:
return data.split("Questions & Answers")[-1]
elif "Q&A" in data:
return data.split("Q&A")[-1]
else:
return ""
def removeQA(data):
if "Questions and Answers" in data:
return data.split("Questions and Answers")[0]
elif "Questions & Answers" in data:
return data.split("Questions & Answers")[0]
elif "Q&A" in data:
return data.split("Q&A")[0]
else:
return ""
def clean_and_preprocess(text):
text=[x for x in text.split("\n") if len(x)>100]
l=[]
for t in text:
# Convert to lowercase
t = t.lower()
# Tokenize text into words
words = nltk.word_tokenize(t)
# Remove stopwords
stop_words = set(stopwords.words('english'))
filtered_words = [word for word in words if word not in stop_words]
# Join the words back into a cleaned text
cleaned_text = ' '.join(filtered_words)
l.append(cleaned_text)
return "\n".join(l)
def replace_abbreviations(text):
replacements = {
'Q1': 'first quarter',
'Q2': 'second quarter',
'Q3': 'third quarter',
'Q4': 'fourth quarter',
'q1': 'first quarter',
'q2': 'second quarter',
'q3': 'third quarter',
'q4': 'fourth quarter',
'FY': 'fiscal year',
'YoY': 'year over year',
'MoM': 'month over month',
'EBITDA': 'earnings before interest, taxes, depreciation, and amortization',
'ROI': 'return on investment',
'EPS': 'earnings per share',
'P/E': 'price-to-earnings',
'DCF': 'discounted cash flow',
'CAGR': 'compound annual growth rate',
'GDP': 'gross domestic product',
'CFO': 'chief financial officer',
'GAAP': 'generally accepted accounting principles',
'SEC': 'U.S. Securities and Exchange Commission',
'IPO': 'initial public offering',
'M&A': 'mergers and acquisitions',
'EBIT': 'earnings before interest and taxes',
'IRR': 'internal rate of return',
'ROA': 'return on assets',
'ROE': 'return on equity',
'NAV': 'net asset value',
'PE ratio': 'price-to-earnings ratio',
'EPS growth': 'earnings per share growth',
'Fiscal Year': 'financial year',
'CAPEX': 'capital expenditure',
'APR': 'annual percentage rate',
'P&L': 'profit and loss',
'NPM': 'net profit margin',
'EBT': 'earnings before taxes',
'EBITDAR': 'earnings before interest, taxes, depreciation, amortization, and rent',
'PAT': 'profit after tax',
'COGS': 'cost of goods sold',
'EBTIDA': 'earnings before taxes, interest, depreciation, and amortization',
'E&Y': 'Ernst & Young',
'B2B': 'business to business',
'B2C': 'business to consumer',
'LIFO': 'last in, first out',
'FIFO': 'first in, first out',
'FCF': 'free cash flow',
'LTM': 'last twelve months',
'OPEX': 'operating expenses',
'TSR': 'total shareholder return',
'PP&E': 'property, plant, and equipment',
'PBT': 'profit before tax',
'EBITDAR margin': 'earnings before interest, taxes, depreciation, amortization, and rent margin',
'ROIC': 'return on invested capital',
'EPS': 'earnings per share',
'P/E': 'price-to-earnings',
'EBITDA': 'earnings before interest, taxes, depreciation, and amortization',
'YOY': 'year-over-year',
'MOM': 'month-over-month',
'CAGR': 'compound annual growth rate',
'GDP': 'gross domestic product',
'ROI': 'return on investment',
'ROE': 'return on equity',
'EBIT': 'earnings before interest and taxes',
'DCF': 'discounted cash flow',
'GAAP': 'Generally Accepted Accounting Principles',
'LTM': 'last twelve months',
'EBIT margin': 'earnings before interest and taxes margin',
'EBT': 'earnings before taxes',
'EBTA': 'earnings before taxes and amortization',
'FTE': 'full-time equivalent',
'EBIDTA': 'earnings before interest, depreciation, taxes, and amortization',
'EBTIDA': 'earnings before taxes, interest, depreciation, and amortization',
'EBITDAR': 'earnings before interest, taxes, depreciation, amortization, and rent',
'COGS': 'cost of goods sold',
'APR': 'annual percentage rate',
'PESTEL': 'Political, Economic, Social, Technological, Environmental, and Legal',
'KPI': 'key performance indicator',
'SWOT': 'Strengths, Weaknesses, Opportunities, Threats',
'CAPEX': 'capital expenditures',
'EBITDARM': 'earnings before interest, taxes, depreciation, amortization, rent, and management fees',
'EBITDAX': 'earnings before interest, taxes, depreciation, amortization, and exploration expenses',
'EBITDAS': 'earnings before interest, taxes, depreciation, amortization, and restructuring costs',
'EBITDAX-C': 'earnings before interest, taxes, depreciation, amortization, exploration expenses, and commodity derivatives',
'EBITDAX-R': 'earnings before interest, taxes, depreciation, amortization, exploration expenses, and asset retirement obligations',
'EBITDAX-E': 'earnings before interest, taxes, depreciation, amortization, exploration expenses, and environmental liabilities'
# Add more abbreviations and replacements as needed
}
for abbreviation, full_form in replacements.items():
text = text.replace(abbreviation, full_form)
return text
def clean_and_preprocess(text):
text=[x for x in text.split("\n") if len(x)>100]
l=[]
for t in text:
# Convert to lowercase
t = t.lower()
# Tokenize text into words
words = nltk.word_tokenize(t)
# Remove stopwords
stop_words = set(stopwords.words('english'))
filtered_words = [word for word in words if word not in stop_words]
# Join the words back into a cleaned text
cleaned_text = ' '.join(filtered_words)
l.append(cleaned_text)
return "\n".join(l)
st.title("Transcript Analysis")
transcript = st.text_area("Enter the transcript:", height=200)
transcript=replace_abbreviations(transcript)
transcript=replace_abbreviations(transcript)
transcript=removeSpeakers(transcript)
transcript=removeQA(transcript)
transcript=clean_and_preprocess(transcript)
tokens=transcript.split()
splitSize=256
chunks=[tokens[r*splitSize:(r+1)*splitSize] for r in range(math.ceil(len(tokens)/splitSize))]
if st.button("Analyze"):
st.subheader("Sentiment Analysis")
sentiment = [sentiment_model(x)[0]['label'] for x in chunks]
sentiment=max(sentiment,key=sentiment.count)
sentiment_color = "green" if sentiment == "POSITIVE" else "red"
st.markdown(f'<span style="color:{sentiment_color}">{sentiment}</span>', unsafe_allow_html=True)
st.subheader("Increase/Decrease Prediction")
increase_decrease = [increase_decrease_model(x)[0]['label'] for x in chunks]
increase_decrease=max(increase_decrease,key=increase_decrease.count)
increase_decrease_color = "green" if increase_decrease == "INCREASE" else "red"
st.markdown(f'<span style="color:{increase_decrease_color}">{increase_decrease}</span>', unsafe_allow_html=True)
st.subheader("NER Metrics")
ner_result = [ner_model(x) for x in chunks]
st.write(str(ner_result))