Spaces:
Build error
Build error
AhmedTaha012
commited on
Commit
•
bebb1c3
1
Parent(s):
dbf26ec
Update app.py
Browse files
app.py
CHANGED
@@ -195,7 +195,7 @@ def getTopic(encoded_input):
|
|
195 |
logits = modelTopic(**encoded_input).logits
|
196 |
predicted_class_id = logits.argmax().item()
|
197 |
return modelTopic.config.id2label[predicted_class_id]
|
198 |
-
def selectedCorpusForNextQuarterModel(x,quarter):
|
199 |
number_word_dict = {
|
200 |
"1": "first",
|
201 |
"2": "second",
|
@@ -205,9 +205,9 @@ def selectedCorpusForNextQuarterModel(x,quarter):
|
|
205 |
}
|
206 |
tokens=tokenizerTopic(x, padding=True, truncation=True, return_tensors='pt')
|
207 |
splitSize=256
|
208 |
-
chunksInput_ids=[tokens["input_ids"][r*splitSize:(r+1)*splitSize] for r in range(math.ceil(len(tokens["input_ids"])/splitSize))]
|
209 |
-
chunksToken_type_ids=[tokens["token_type_ids"][r*splitSize:(r+1)*splitSize] for r in range(math.ceil(len(tokens["token_type_ids"])/splitSize))]
|
210 |
-
chunksAttention_mask=[tokens["attention_mask"][r*splitSize:(r+1)*splitSize] for r in range(math.ceil(len(tokens["attention_mask"])/splitSize))]
|
211 |
l=[]
|
212 |
for idx in range(len(chunksInput_ids)):
|
213 |
l.append({"input_ids":torch.tensor([list(chunksInput_ids[idx])]).to("cuda"),
|
@@ -218,16 +218,17 @@ def selectedCorpusForNextQuarterModel(x,quarter):
|
|
218 |
selectedTopics = ["Stock Movement", "Earnings", "IPO", "Stock Commentary", "Currencies", "M&A | Investments", "Financials", "Macro", "Analyst Update", "Company | Product News"]
|
219 |
result = [tokenizerTopic.decode(x["input_ids"][0], skip_special_tokens=True) for x in l if getTopic(x) in selectedTopics]
|
220 |
result=[x for x in result if len(x)>10]
|
221 |
-
des=f"the {number_word_dict[str(quarter)]} quarter results of the {
|
222 |
-
courpus=result
|
223 |
embeddings_1 = similarityModel.encode([des]+courpus, normalize_embeddings=True,device='cuda',show_progress_bar=False)
|
224 |
sents=[des]+courpus
|
225 |
rest=[sents[f] for f in [list(cosine_similarity(embeddings_1)[0][1:]).index(value)+1 for value in sorted(list(cosine_similarity(embeddings_1)[0][1:]),reverse=True)][:3]]
|
226 |
-
return
|
227 |
|
228 |
st.header("Transcript Analysis", divider='rainbow')
|
229 |
mainTranscript = st.text_area("Enter the transcript:", height=100)
|
230 |
-
quarter = st.text_input('Enter your quarter', '
|
|
|
231 |
if st.button("Analyze"):
|
232 |
transcript=replace_abbreviations(mainTranscript)
|
233 |
transcript=replace_abbreviations(transcript)
|
@@ -245,7 +246,7 @@ if st.button("Analyze"):
|
|
245 |
st.markdown(f'<span style="color:{sentiment_color}">{sentiment}</span>', unsafe_allow_html=True)
|
246 |
st.subheader("Next Quarter Perdiction", divider='rainbow')
|
247 |
# increase_decrease = [increase_decrease_model(x)[0]['label'] for x in chunks]
|
248 |
-
increase_decrease=increase_decrease_model(selectedCorpusForNextQuarterModel(mainTranscript,quarter))[0]['label']
|
249 |
increase_decrease=max(increase_decrease,key=increase_decrease.count)
|
250 |
increase_decrease_color = "green" if increase_decrease == "Increase" else "red"
|
251 |
st.markdown(f'<span style="color:{increase_decrease_color}">{increase_decrease}</span>', unsafe_allow_html=True)
|
|
|
195 |
logits = modelTopic(**encoded_input).logits
|
196 |
predicted_class_id = logits.argmax().item()
|
197 |
return modelTopic.config.id2label[predicted_class_id]
|
198 |
+
def selectedCorpusForNextQuarterModel(x,quarter,year):
|
199 |
number_word_dict = {
|
200 |
"1": "first",
|
201 |
"2": "second",
|
|
|
205 |
}
|
206 |
tokens=tokenizerTopic(x, padding=True, truncation=True, return_tensors='pt')
|
207 |
splitSize=256
|
208 |
+
chunksInput_ids=[tokens["input_ids"][0][r*splitSize:(r+1)*splitSize] for r in range(math.ceil(len(tokens["input_ids"][0])/splitSize))]
|
209 |
+
chunksToken_type_ids=[tokens["token_type_ids"][0][r*splitSize:(r+1)*splitSize] for r in range(math.ceil(len(tokens["token_type_ids"][0])/splitSize))]
|
210 |
+
chunksAttention_mask=[tokens["attention_mask"][0][r*splitSize:(r+1)*splitSize] for r in range(math.ceil(len(tokens["attention_mask"][0])/splitSize))]
|
211 |
l=[]
|
212 |
for idx in range(len(chunksInput_ids)):
|
213 |
l.append({"input_ids":torch.tensor([list(chunksInput_ids[idx])]).to("cuda"),
|
|
|
218 |
selectedTopics = ["Stock Movement", "Earnings", "IPO", "Stock Commentary", "Currencies", "M&A | Investments", "Financials", "Macro", "Analyst Update", "Company | Product News"]
|
219 |
result = [tokenizerTopic.decode(x["input_ids"][0], skip_special_tokens=True) for x in l if getTopic(x) in selectedTopics]
|
220 |
result=[x for x in result if len(x)>10]
|
221 |
+
des=f"the {number_word_dict[str(quarter)]} quarter results of the {year}"
|
222 |
+
courpus=result
|
223 |
embeddings_1 = similarityModel.encode([des]+courpus, normalize_embeddings=True,device='cuda',show_progress_bar=False)
|
224 |
sents=[des]+courpus
|
225 |
rest=[sents[f] for f in [list(cosine_similarity(embeddings_1)[0][1:]).index(value)+1 for value in sorted(list(cosine_similarity(embeddings_1)[0][1:]),reverse=True)][:3]]
|
226 |
+
return ",".join(rest)
|
227 |
|
228 |
st.header("Transcript Analysis", divider='rainbow')
|
229 |
mainTranscript = st.text_area("Enter the transcript:", height=100)
|
230 |
+
quarter = st.text_input('Enter your quarter', '')
|
231 |
+
year = st.text_input('Enter your quarter', '')
|
232 |
if st.button("Analyze"):
|
233 |
transcript=replace_abbreviations(mainTranscript)
|
234 |
transcript=replace_abbreviations(transcript)
|
|
|
246 |
st.markdown(f'<span style="color:{sentiment_color}">{sentiment}</span>', unsafe_allow_html=True)
|
247 |
st.subheader("Next Quarter Perdiction", divider='rainbow')
|
248 |
# increase_decrease = [increase_decrease_model(x)[0]['label'] for x in chunks]
|
249 |
+
increase_decrease=increase_decrease_model(selectedCorpusForNextQuarterModel(mainTranscript,quarter,year))[0]['label']
|
250 |
increase_decrease=max(increase_decrease,key=increase_decrease.count)
|
251 |
increase_decrease_color = "green" if increase_decrease == "Increase" else "red"
|
252 |
st.markdown(f'<span style="color:{increase_decrease_color}">{increase_decrease}</span>', unsafe_allow_html=True)
|