Update pipeline.py
Browse files- pipeline.py +3 -5
pipeline.py
CHANGED
@@ -20,10 +20,8 @@ import math
|
|
20 |
import xgboost
|
21 |
import re
|
22 |
import nltk
|
23 |
-
nltk.download('punkt')
|
24 |
nltk.download('stopwords')
|
25 |
nltk.download('wordnet')
|
26 |
-
nltk.download('omw-1.4')
|
27 |
import html
|
28 |
|
29 |
from config import config_file
|
@@ -298,9 +296,9 @@ class Branch_Classifier(object):
|
|
298 |
def format_data(self,data:list)-> list:
|
299 |
try:
|
300 |
X = text_cleaning.text_cleaning(data, steam=False, lemma=True,single_input=True)[0]
|
|
|
301 |
### Add Bigrams and keep only the good ones(pre-selected)
|
302 |
X_bigrmas = self.phrase_bigrams[X]
|
303 |
-
|
304 |
data_clean = self.clean_bigram(X_bigrmas)
|
305 |
X_bigrams_clean = ' '.join(map(str, data_clean))
|
306 |
pre_processed = self.vectorizer.transform([X_bigrams_clean]).toarray(),X_bigrams_clean
|
@@ -645,7 +643,7 @@ class Pipeline(object):
|
|
645 |
for higher_order, sub_arr in pred.items():
|
646 |
# Check which branch it belongs to
|
647 |
if higher_order in ["Negative","No Prediction"]:
|
648 |
-
pred[higher_order]['
|
649 |
pred[higher_order]["word_analysis"] = {"discriminator_data": "Not Used", "word_importance": {x:0 for x in input_text.split()}, "highlighted_html_text": " ".join(x for x in input_text.split())}
|
650 |
|
651 |
# For each Severity, run the corresponding Branch Prediction
|
@@ -653,7 +651,7 @@ class Pipeline(object):
|
|
653 |
model = self.all_label_models[self.bert_model.config['classes'].index(higher_order)]
|
654 |
out_pred = model.predict([input_text])
|
655 |
|
656 |
-
pred[higher_order]['
|
657 |
pred[higher_order]['word_analysis'] = out_pred['word_analysis']
|
658 |
|
659 |
return predictions,embeddings_output
|
|
|
20 |
import xgboost
|
21 |
import re
|
22 |
import nltk
|
|
|
23 |
nltk.download('stopwords')
|
24 |
nltk.download('wordnet')
|
|
|
25 |
import html
|
26 |
|
27 |
from config import config_file
|
|
|
296 |
def format_data(self,data:list)-> list:
|
297 |
try:
|
298 |
X = text_cleaning.text_cleaning(data, steam=False, lemma=True,single_input=True)[0]
|
299 |
+
|
300 |
### Add Bigrams and keep only the good ones(pre-selected)
|
301 |
X_bigrmas = self.phrase_bigrams[X]
|
|
|
302 |
data_clean = self.clean_bigram(X_bigrmas)
|
303 |
X_bigrams_clean = ' '.join(map(str, data_clean))
|
304 |
pre_processed = self.vectorizer.transform([X_bigrams_clean]).toarray(),X_bigrams_clean
|
|
|
643 |
for higher_order, sub_arr in pred.items():
|
644 |
# Check which branch it belongs to
|
645 |
if higher_order in ["Negative","No Prediction"]:
|
646 |
+
pred[higher_order]['diagnose'] = {higher_order: {"probability":sub_arr['probability']}}
|
647 |
pred[higher_order]["word_analysis"] = {"discriminator_data": "Not Used", "word_importance": {x:0 for x in input_text.split()}, "highlighted_html_text": " ".join(x for x in input_text.split())}
|
648 |
|
649 |
# For each Severity, run the corresponding Branch Prediction
|
|
|
651 |
model = self.all_label_models[self.bert_model.config['classes'].index(higher_order)]
|
652 |
out_pred = model.predict([input_text])
|
653 |
|
654 |
+
pred[higher_order]['diagnose'] = out_pred['predictions']
|
655 |
pred[higher_order]['word_analysis'] = out_pred['word_analysis']
|
656 |
|
657 |
return predictions,embeddings_output
|