AhmedTaha012 commited on
Commit
bebb1c3
1 Parent(s): dbf26ec

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +10 -9
app.py CHANGED
@@ -195,7 +195,7 @@ def getTopic(encoded_input):
195
  logits = modelTopic(**encoded_input).logits
196
  predicted_class_id = logits.argmax().item()
197
  return modelTopic.config.id2label[predicted_class_id]
198
- def selectedCorpusForNextQuarterModel(x,quarter):
199
  number_word_dict = {
200
  "1": "first",
201
  "2": "second",
@@ -205,9 +205,9 @@ def selectedCorpusForNextQuarterModel(x,quarter):
205
  }
206
  tokens=tokenizerTopic(x, padding=True, truncation=True, return_tensors='pt')
207
  splitSize=256
208
- chunksInput_ids=[tokens["input_ids"][r*splitSize:(r+1)*splitSize] for r in range(math.ceil(len(tokens["input_ids"])/splitSize))]
209
- chunksToken_type_ids=[tokens["token_type_ids"][r*splitSize:(r+1)*splitSize] for r in range(math.ceil(len(tokens["token_type_ids"])/splitSize))]
210
- chunksAttention_mask=[tokens["attention_mask"][r*splitSize:(r+1)*splitSize] for r in range(math.ceil(len(tokens["attention_mask"])/splitSize))]
211
  l=[]
212
  for idx in range(len(chunksInput_ids)):
213
  l.append({"input_ids":torch.tensor([list(chunksInput_ids[idx])]).to("cuda"),
@@ -218,16 +218,17 @@ def selectedCorpusForNextQuarterModel(x,quarter):
218
  selectedTopics = ["Stock Movement", "Earnings", "IPO", "Stock Commentary", "Currencies", "M&A | Investments", "Financials", "Macro", "Analyst Update", "Company | Product News"]
219
  result = [tokenizerTopic.decode(x["input_ids"][0], skip_special_tokens=True) for x in l if getTopic(x) in selectedTopics]
220
  result=[x for x in result if len(x)>10]
221
- des=f"the {number_word_dict[str(quarter)]} quarter results of the {usedData['quad-date'].iloc[i]}"
222
- courpus=result.split("\n")
223
  embeddings_1 = similarityModel.encode([des]+courpus, normalize_embeddings=True,device='cuda',show_progress_bar=False)
224
  sents=[des]+courpus
225
  rest=[sents[f] for f in [list(cosine_similarity(embeddings_1)[0][1:]).index(value)+1 for value in sorted(list(cosine_similarity(embeddings_1)[0][1:]),reverse=True)][:3]]
226
- return selectedCourpusForTraing.append(",".join(rest))
227
 
228
  st.header("Transcript Analysis", divider='rainbow')
229
  mainTranscript = st.text_area("Enter the transcript:", height=100)
230
- quarter = st.text_input('Enter your quarter', 'quarter of transcript')
 
231
  if st.button("Analyze"):
232
  transcript=replace_abbreviations(mainTranscript)
233
  transcript=replace_abbreviations(transcript)
@@ -245,7 +246,7 @@ if st.button("Analyze"):
245
  st.markdown(f'<span style="color:{sentiment_color}">{sentiment}</span>', unsafe_allow_html=True)
246
  st.subheader("Next Quarter Perdiction", divider='rainbow')
247
  # increase_decrease = [increase_decrease_model(x)[0]['label'] for x in chunks]
248
- increase_decrease=increase_decrease_model(selectedCorpusForNextQuarterModel(mainTranscript,quarter))[0]['label']
249
  increase_decrease=max(increase_decrease,key=increase_decrease.count)
250
  increase_decrease_color = "green" if increase_decrease == "Increase" else "red"
251
  st.markdown(f'<span style="color:{increase_decrease_color}">{increase_decrease}</span>', unsafe_allow_html=True)
 
195
  logits = modelTopic(**encoded_input).logits
196
  predicted_class_id = logits.argmax().item()
197
  return modelTopic.config.id2label[predicted_class_id]
198
+ def selectedCorpusForNextQuarterModel(x,quarter,year):
199
  number_word_dict = {
200
  "1": "first",
201
  "2": "second",
 
205
  }
206
  tokens=tokenizerTopic(x, padding=True, truncation=True, return_tensors='pt')
207
  splitSize=256
208
+ chunksInput_ids=[tokens["input_ids"][0][r*splitSize:(r+1)*splitSize] for r in range(math.ceil(len(tokens["input_ids"][0])/splitSize))]
209
+ chunksToken_type_ids=[tokens["token_type_ids"][0][r*splitSize:(r+1)*splitSize] for r in range(math.ceil(len(tokens["token_type_ids"][0])/splitSize))]
210
+ chunksAttention_mask=[tokens["attention_mask"][0][r*splitSize:(r+1)*splitSize] for r in range(math.ceil(len(tokens["attention_mask"][0])/splitSize))]
211
  l=[]
212
  for idx in range(len(chunksInput_ids)):
213
  l.append({"input_ids":torch.tensor([list(chunksInput_ids[idx])]).to("cuda"),
 
218
  selectedTopics = ["Stock Movement", "Earnings", "IPO", "Stock Commentary", "Currencies", "M&A | Investments", "Financials", "Macro", "Analyst Update", "Company | Product News"]
219
  result = [tokenizerTopic.decode(x["input_ids"][0], skip_special_tokens=True) for x in l if getTopic(x) in selectedTopics]
220
  result=[x for x in result if len(x)>10]
221
+ des=f"the {number_word_dict[str(quarter)]} quarter results of the {year}"
222
+ courpus=result
223
  embeddings_1 = similarityModel.encode([des]+courpus, normalize_embeddings=True,device='cuda',show_progress_bar=False)
224
  sents=[des]+courpus
225
  rest=[sents[f] for f in [list(cosine_similarity(embeddings_1)[0][1:]).index(value)+1 for value in sorted(list(cosine_similarity(embeddings_1)[0][1:]),reverse=True)][:3]]
226
+ return ",".join(rest)
227
 
228
  st.header("Transcript Analysis", divider='rainbow')
229
  mainTranscript = st.text_area("Enter the transcript:", height=100)
230
+ quarter = st.text_input('Enter your quarter', '')
231
+ year = st.text_input('Enter your quarter', '')
232
  if st.button("Analyze"):
233
  transcript=replace_abbreviations(mainTranscript)
234
  transcript=replace_abbreviations(transcript)
 
246
  st.markdown(f'<span style="color:{sentiment_color}">{sentiment}</span>', unsafe_allow_html=True)
247
  st.subheader("Next Quarter Perdiction", divider='rainbow')
248
  # increase_decrease = [increase_decrease_model(x)[0]['label'] for x in chunks]
249
+ increase_decrease=increase_decrease_model(selectedCorpusForNextQuarterModel(mainTranscript,quarter,year))[0]['label']
250
  increase_decrease=max(increase_decrease,key=increase_decrease.count)
251
  increase_decrease_color = "green" if increase_decrease == "Increase" else "red"
252
  st.markdown(f'<span style="color:{increase_decrease_color}">{increase_decrease}</span>', unsafe_allow_html=True)