Spaces:
Build error
Build error
import streamlit as st | |
from transformers import pipeline | |
import math | |
sentiment_model = pipeline("text-classification", model="AhmedTaha012/managersFeedback-V1.0.7") | |
increase_decrease_model = pipeline("text-classification", model="AhmedTaha012/nextQuarter-status-V1.1.9") | |
ner_model = pipeline("token-classification", model="AhmedTaha012/finance-ner-v0.0.9-finetuned-ner") | |
def getSpeakers(data): | |
if "Speakers" in data: | |
return "\n".join([x for x in data.split("Speakers")[-1].split("\n") if "--" in x]) | |
elif "Call participants" in data: | |
return "\n".join([x for x in data.split("Call participants")[-1].split("\n") if "--" in x]) | |
elif "Call Participants" in data: | |
return "\n".join([x for x in data.split("Call Participants")[-1].split("\n") if "--" in x]) | |
def removeSpeakers(data): | |
if "Speakers" in data: | |
return data.split("Speakers")[0] | |
elif "Call participants" in data: | |
return data.split("Call participants")[0] | |
elif "Call Participants" in data: | |
return data.split("Call Participants")[0] | |
def getQA(data): | |
if "Questions and Answers" in data: | |
return data.split("Questions and Answers")[-1] | |
elif "Questions & Answers" in data: | |
return data.split("Questions & Answers")[-1] | |
elif "Q&A" in data: | |
return data.split("Q&A")[-1] | |
else: | |
return "" | |
def removeQA(data): | |
if "Questions and Answers" in data: | |
return data.split("Questions and Answers")[0] | |
elif "Questions & Answers" in data: | |
return data.split("Questions & Answers")[0] | |
elif "Q&A" in data: | |
return data.split("Q&A")[0] | |
else: | |
return "" | |
def clean_and_preprocess(text): | |
text=[x for x in text.split("\n") if len(x)>100] | |
l=[] | |
for t in text: | |
# Convert to lowercase | |
t = t.lower() | |
# Tokenize text into words | |
words = nltk.word_tokenize(t) | |
# Remove stopwords | |
stop_words = set(stopwords.words('english')) | |
filtered_words = [word for word in words if word not in stop_words] | |
# Join the words back into a cleaned text | |
cleaned_text = ' '.join(filtered_words) | |
l.append(cleaned_text) | |
return "\n".join(l) | |
def replace_abbreviations(text): | |
replacements = { | |
'Q1': 'first quarter', | |
'Q2': 'second quarter', | |
'Q3': 'third quarter', | |
'Q4': 'fourth quarter', | |
'q1': 'first quarter', | |
'q2': 'second quarter', | |
'q3': 'third quarter', | |
'q4': 'fourth quarter', | |
'FY': 'fiscal year', | |
'YoY': 'year over year', | |
'MoM': 'month over month', | |
'EBITDA': 'earnings before interest, taxes, depreciation, and amortization', | |
'ROI': 'return on investment', | |
'EPS': 'earnings per share', | |
'P/E': 'price-to-earnings', | |
'DCF': 'discounted cash flow', | |
'CAGR': 'compound annual growth rate', | |
'GDP': 'gross domestic product', | |
'CFO': 'chief financial officer', | |
'GAAP': 'generally accepted accounting principles', | |
'SEC': 'U.S. Securities and Exchange Commission', | |
'IPO': 'initial public offering', | |
'M&A': 'mergers and acquisitions', | |
'EBIT': 'earnings before interest and taxes', | |
'IRR': 'internal rate of return', | |
'ROA': 'return on assets', | |
'ROE': 'return on equity', | |
'NAV': 'net asset value', | |
'PE ratio': 'price-to-earnings ratio', | |
'EPS growth': 'earnings per share growth', | |
'Fiscal Year': 'financial year', | |
'CAPEX': 'capital expenditure', | |
'APR': 'annual percentage rate', | |
'P&L': 'profit and loss', | |
'NPM': 'net profit margin', | |
'EBT': 'earnings before taxes', | |
'EBITDAR': 'earnings before interest, taxes, depreciation, amortization, and rent', | |
'PAT': 'profit after tax', | |
'COGS': 'cost of goods sold', | |
'EBTIDA': 'earnings before taxes, interest, depreciation, and amortization', | |
'E&Y': 'Ernst & Young', | |
'B2B': 'business to business', | |
'B2C': 'business to consumer', | |
'LIFO': 'last in, first out', | |
'FIFO': 'first in, first out', | |
'FCF': 'free cash flow', | |
'LTM': 'last twelve months', | |
'OPEX': 'operating expenses', | |
'TSR': 'total shareholder return', | |
'PP&E': 'property, plant, and equipment', | |
'PBT': 'profit before tax', | |
'EBITDAR margin': 'earnings before interest, taxes, depreciation, amortization, and rent margin', | |
'ROIC': 'return on invested capital', | |
'EPS': 'earnings per share', | |
'P/E': 'price-to-earnings', | |
'EBITDA': 'earnings before interest, taxes, depreciation, and amortization', | |
'YOY': 'year-over-year', | |
'MOM': 'month-over-month', | |
'CAGR': 'compound annual growth rate', | |
'GDP': 'gross domestic product', | |
'ROI': 'return on investment', | |
'ROE': 'return on equity', | |
'EBIT': 'earnings before interest and taxes', | |
'DCF': 'discounted cash flow', | |
'GAAP': 'Generally Accepted Accounting Principles', | |
'LTM': 'last twelve months', | |
'EBIT margin': 'earnings before interest and taxes margin', | |
'EBT': 'earnings before taxes', | |
'EBTA': 'earnings before taxes and amortization', | |
'FTE': 'full-time equivalent', | |
'EBIDTA': 'earnings before interest, depreciation, taxes, and amortization', | |
'EBTIDA': 'earnings before taxes, interest, depreciation, and amortization', | |
'EBITDAR': 'earnings before interest, taxes, depreciation, amortization, and rent', | |
'COGS': 'cost of goods sold', | |
'APR': 'annual percentage rate', | |
'PESTEL': 'Political, Economic, Social, Technological, Environmental, and Legal', | |
'KPI': 'key performance indicator', | |
'SWOT': 'Strengths, Weaknesses, Opportunities, Threats', | |
'CAPEX': 'capital expenditures', | |
'EBITDARM': 'earnings before interest, taxes, depreciation, amortization, rent, and management fees', | |
'EBITDAX': 'earnings before interest, taxes, depreciation, amortization, and exploration expenses', | |
'EBITDAS': 'earnings before interest, taxes, depreciation, amortization, and restructuring costs', | |
'EBITDAX-C': 'earnings before interest, taxes, depreciation, amortization, exploration expenses, and commodity derivatives', | |
'EBITDAX-R': 'earnings before interest, taxes, depreciation, amortization, exploration expenses, and asset retirement obligations', | |
'EBITDAX-E': 'earnings before interest, taxes, depreciation, amortization, exploration expenses, and environmental liabilities' | |
# Add more abbreviations and replacements as needed | |
} | |
for abbreviation, full_form in replacements.items(): | |
text = text.replace(abbreviation, full_form) | |
return text | |
def clean_and_preprocess(text): | |
text=[x for x in text.split("\n") if len(x)>100] | |
l=[] | |
for t in text: | |
# Convert to lowercase | |
t = t.lower() | |
# Tokenize text into words | |
words = nltk.word_tokenize(t) | |
# Remove stopwords | |
stop_words = set(stopwords.words('english')) | |
filtered_words = [word for word in words if word not in stop_words] | |
# Join the words back into a cleaned text | |
cleaned_text = ' '.join(filtered_words) | |
l.append(cleaned_text) | |
return "\n".join(l) | |
st.title("Transcript Analysis") | |
transcript = st.text_area("Enter the transcript:", height=200) | |
transcript=replace_abbreviations(transcript) | |
transcript=replace_abbreviations(transcript) | |
transcript=removeSpeakers(transcript) | |
transcript=removeQA(transcript) | |
transcript=clean_and_preprocess(transcript) | |
tokens=transcript.split() | |
splitSize=256 | |
chunks=[tokens[r*splitSize:(r+1)*splitSize] for r in range(math.ceil(len(tokens)/splitSize))] | |
if st.button("Analyze"): | |
st.subheader("Sentiment Analysis") | |
sentiment = [sentiment_model(x)[0]['label'] for x in chunks] | |
sentiment=max(sentiment,key=sentiment.count) | |
sentiment_color = "green" if sentiment == "POSITIVE" else "red" | |
st.markdown(f'<span style="color:{sentiment_color}">{sentiment}</span>', unsafe_allow_html=True) | |
st.subheader("Increase/Decrease Prediction") | |
increase_decrease = [increase_decrease_model(x)[0]['label'] for x in chunks] | |
increase_decrease=max(increase_decrease,key=increase_decrease.count) | |
increase_decrease_color = "green" if increase_decrease == "INCREASE" else "red" | |
st.markdown(f'<span style="color:{increase_decrease_color}">{increase_decrease}</span>', unsafe_allow_html=True) | |
st.subheader("NER Metrics") | |
ner_result = [ner_model(x) for x in chunks] | |
st.write(str(ner_result)) | |