Shredder commited on
Commit
6b12e0b
·
1 Parent(s): f7ea213

Upload Cuad_others.py

Browse files
Files changed (1) hide show
  1. Cuad_others.py +67 -0
Cuad_others.py ADDED
@@ -0,0 +1,67 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from predict import run_prediction
2
+ from io import StringIO
3
+ import json
4
+ import spacy
5
+ from spacy import displacy
6
+ from transformers import pipeline
7
+ import torch
8
+ import nltk
9
+ nltk.download('punkt')
10
+
11
+
12
+
13
+
14
+ ##Summarization
15
+ summarizer = pipeline("summarization", model="knkarthick/MEETING_SUMMARY")
16
+ def summarize_text(text):
17
+ resp = summarizer(text)
18
+ stext = resp[0]['summary_text']
19
+ return stext
20
+
21
+
22
+ ##Company Extraction
23
+ ner=pipeline('ner',model='Jean-Baptiste/camembert-ner-with-dates',tokenizer='Jean-Baptiste/camembert-ner-with-dates', aggregation_strategy="simple")
24
+ def fin_ner(text):
25
+ replaced_spans = ner(text)
26
+ new_spans=[]
27
+ for item in replaced_spans:
28
+ item['entity']=item['entity_group']
29
+ del item['entity_group']
30
+ new_spans.append(item)
31
+ return {"text": text, "entities": new_spans}
32
+
33
+
34
+ #CUAD STARTS
35
+ def load_questions():
36
+ questions = []
37
+ with open('questions.txt') as f:
38
+ questions = f.readlines()
39
+ return questions
40
+
41
+
42
+ def load_questions_short():
43
+ questions_short = []
44
+ with open('questionshort.txt') as f:
45
+ questions_short = f.readlines()
46
+ return questions_short
47
+
48
+ def quad(query,file):
49
+ with open(file) as f:
50
+ paragraph = f.read()
51
+ questions = load_questions()
52
+ questions_short = load_questions_short()
53
+ if (not len(paragraph)==0) and not (len(query)==0):
54
+ print('getting predictions')
55
+ predictions = run_prediction([query], paragraph, 'marshmellow77/roberta-base-cuad',n_best_size=5)
56
+ answer = ""
57
+ answer_p=""
58
+ if predictions['0'] == "":
59
+ answer = 'No answer found in document'
60
+ else:
61
+ with open("nbest.json") as jf:
62
+ data = json.load(jf)
63
+ for i in range(1):
64
+ raw_answer=data['0'][i]['text']
65
+ answer += f"{data['0'][i]['text']}\n"
66
+ answer_p =answer+ f"Probability: {round(data['0'][i]['probability']*100,1)}%\n\n"
67
+ return answer,answer_p