import gradio as gr import nltk from fincat_utils import extract_context_words from fincat_utils import bert_embedding_extract import pickle lr_clf = pickle.load(open("lr_clf_FiNCAT.pickle",'rb')) nltk.download('punkt') def score_fincat(txt): li = [] highlight = [] txt = " " + txt + " " k = '' for word in txt.split(): if any(char.isdigit() for char in word): if word[-1] in ['.', ',', ';', ":", "-", "!", "?", ")", '"', "'"]: k = word[-1] word = word[:-1] st = txt.find(" " + word + k + " ")+1 k = '' ed = st + len(word) x = {'paragraph' : txt, 'offset_start':st, 'offset_end':ed} context_text = extract_context_words(x) features = bert_embedding_extract(context_text, word) if(features[0]=='None'): highlight.append((word, '')) continue prediction = lr_clf.predict(features.reshape(1, 768)) prediction_probability = '{:.4f}'.format(round(lr_clf.predict_proba(features.reshape(1, 768))[:,1][0], 4)) highlight.append((word, ' In-claim' if prediction==1 else 'Out-of-Claim')) else: continue if(len(highlight)<1): highlight.append((txt,'None')) return highlight