Spaces:

sohomghosh
/

FENCE_Financial_Exaggerated_Numeral_ClassifiEr

Sleeping

App Files Files Community

sohomghosh commited on Apr 8, 2023

Commit

dba2b44

•

1 Parent(s): 2c0c269

Create app.py

Browse files

Files changed (1) hide show

app.py +140 -0

app.py ADDED Viewed

	@@ -0,0 +1,140 @@

+import numpy as np
+import pandas as pd
+import pickle
+import gradio as gr
+import torch
+import math
+from transformers import AutoTokenizer, AutoModel
+import transformers
+import re
+mlp = pickle.load(open("MLP_over_embeddings.pickle", "rb"))
+tokenizer = AutoTokenizer.from_pretrained("nlpaueb/sec-bert-num")
+model = AutoModel.from_pretrained('nlpaueb/sec-bert-num')
+"""# Input here"""
+def convert_actual_to_num(text, number, offset):
+  length = len(str(number))
+  offset = int(offset)
+  new_text= text[:offset] + " [NUM] " + text[offset+length:]
+  return new_text
+def num_detector_highlighter_adv(text):
+  num_posn = []
+  posn = -1
+  num = ""
+  text = text + "  "
+  others = ""
+  for i in range(len(text)-2):
+      if (text[i].isdigit() and text[i+1].isdigit()) or (text[i].isdigit() and text[i+1]=="." and text[i+2].isdigit()):
+        num = num + str(text[i])
+        if posn == -1:
+          posn = i
+        if others!="":
+          num_posn.append((others,""))
+          others = ""
+      elif (text[i].isdigit() and text[i+1].isdigit()==False and text[i+1]!=".") or (text[i].isdigit() and text[i+1]=="." and text[i].isdigit() and text[i+2].isdigit()==False):
+        num = num + str(text[i])
+        if len(num)==1:
+          posn = i
+        if others!="":
+          num_posn.append((others,""))
+          others = ""
+        num_posn.append((str(num), "@POSITION " + str(posn)))
+        num = ""
+        posn = -1
+      elif text[i] == ".":
+        if text[i+1].isdigit():
+          num = num + str(text[i])
+        else:
+          others = others + str(text[i])
+      elif text[i]!=' ':
+        others = others + str(text[i])
+      elif text[i]==" ":
+         if others!="" and others!=" ":
+          num_posn.append((others,""))
+          others = ""
+  if others!="":
+    num_posn.append((others,""))
+  #print(num_posn)
+  return num_posn
+def exnum_evaluator(df):
+  df['preprocessed_text'] = df.apply(lambda x: convert_actual_to_num(x.text, x.number, x.position), axis = 1)
+  df['number_processed'] = df['number'].apply(lambda x: str(x)[0:str(x).index(".")+2] if "." in str(x) else str(x))
+  #preprocessed_text = convert_actual_to_num(raw_text,number,offset)
+  all_preds = []
+  for preprocessed_text in df["preprocessed_text"].values:
+    tokenized_text = tokenizer.tokenize(preprocessed_text)
+    indexed_tokens = tokenizer.convert_tokens_to_ids(tokenized_text)
+    index = tokenized_text.index('[NUM]')
+    tokens_tensor = torch.tensor([indexed_tokens])
+    model.eval()
+    with torch.no_grad():
+      last_hidden_states = model(tokens_tensor)[0]
+    embedding_of_num = last_hidden_states[:,index,:]
+    embedding_of_num_use = list(embedding_of_num[0].cpu().detach().numpy())
+    pred = mlp.predict([embedding_of_num_use])[0]
+    all_preds.append(pred)
+  df['pred'] = all_preds
+  df['calculated_magnitude'] = df['number_processed'].apply(lambda x : min(6,int(math.log10(float(x)))+1)) # restric upto 2 dp in x if decimal
+  df["prediction"] =  np.where((df['calculated_magnitude'] != df['pred']), "Exaggerated", "Non-Exaggerated") #df.apply(lambda x : "Exaggerated" if x.calculated_magnitude!=x.prediction else "Non-Exaggerated", axis=1)
+  return df[["number", "position", "prediction"]]#, "text", "preprocessed_text",'number_processed', "pred", "calculated_magnitude"]]
+def change_checkbox_group(text2):
+  num_posn_inp = [(num, posn) for (num,posn) in eval(text2) if posn!=""]
+  num_posn_dislay = [str(num) + " " + str(posn) for (num,posn) in num_posn_inp]
+  return gr.CheckboxGroup.update(choices = num_posn_dislay, label="Numerals", visible=True, value=num_posn_dislay)
+def combined_fns(text, text2, choices=[]):
+  num_posn_inp =  [(num, posn) for (num,posn) in eval(text2) if posn!=""]#[(num, posn) for (num,posn) in num_detector_highlighter_adv(text) if posn!=""]
+  #num_posn_dislay = [str(num) + " " + str(posn) for (num,posn) in num_posn]
+  df = pd.DataFrame({"text": [text]*len(num_posn_inp), "number" : [i[0] for i in num_posn_inp], "position" : [i[1].replace("@POSITION ", "") for i in num_posn_inp]})
+  df['num_position'] = [str(num) + " " + str(posn) for (num,posn) in num_posn_inp]
+  if len(choices)>0:
+    df = df[df['num_position'].isin(choices)]
+  return exnum_evaluator(df)
+#examples
+def set_example_text(example_text):
+    return gr.Textbox.update(value=example_text[0])
+demo = gr.Blocks(theme=gr.themes.Soft())
+with demo:
+    gr.Markdown("# **Financial Exaggerated Numeral ClassifiEr (FENCE)**")
+    with gr.Row():
+      with gr.Column():
+        text = gr.components.Textbox(label="Enter financial text here", lines=2, placeholder="Enter Financial Text here...")
+        b1 = gr.Button("Get numerals present in the entered text")
+        b1.click(num_detector_highlighter_adv, inputs = text, outputs = gr.HighlightedText(label='Numerals present in the text'))
+        text2 = gr.components.Textbox(visible=False)
+        b1.click(num_detector_highlighter_adv, inputs = text, outputs =text2)
+        with gr.Row():
+          with gr.Tabs():
+            with gr.TabItem("All numerals"):
+              b2 = gr.Button("Predict for all numerals")
+              b2.click(combined_fns, inputs = [text, text2], outputs = gr.DataFrame())
+            with gr.TabItem("Specific numerals"):
+              b3 = gr.Button("Get option to select numerals")
+              num_posn_inp_ckbx = gr.CheckboxGroup(choices = [], interactive=True, label='Specific Numerals')
+              b3.click(change_checkbox_group, inputs=text2, outputs=num_posn_inp_ckbx)
+              b4 = gr.Button("Predict for specific numerals")
+              b4.click(combined_fns, inputs = [text, text2, num_posn_inp_ckbx], outputs = gr.DataFrame())
+        example_text = gr.Dataset(components=[text], samples=[["Get 30% off Gap denim whilst recycling your old denim for communities in need"], ["	Matthew Perry puts Malibu mansion on the market for $13.5 million"], ["Anton Art Center in Mt. Clemens hosts 19th Annual ArtParty Fundraiser - Twilight in the Tropics"], ["Black Friday Sales! - Vegas hotel packages 50% savings from Southwest Vacations"]])
+        example_text.click(fn=set_example_text,
+                             inputs=example_text,
+                             outputs=example_text.components)
+        gr.Markdown("<sub><sup>How to use? [link](https://github.com/sohomghosh/FENCE_Financial_Exaggerated_Numeral_ClassifiEr/blob/main/README.md), Warning: User discretion is advised.</sup></sub>")
+demo.launch(share = True)