tsantos commited on
Commit
6f0005a
·
1 Parent(s): 60b25bc

Update pipeline.py

Browse files
Files changed (1) hide show
  1. pipeline.py +3 -5
pipeline.py CHANGED
@@ -20,10 +20,8 @@ import math
20
  import xgboost
21
  import re
22
  import nltk
23
- nltk.download('punkt')
24
  nltk.download('stopwords')
25
  nltk.download('wordnet')
26
- nltk.download('omw-1.4')
27
  import html
28
 
29
  from config import config_file
@@ -298,9 +296,9 @@ class Branch_Classifier(object):
298
  def format_data(self,data:list)-> list:
299
  try:
300
  X = text_cleaning.text_cleaning(data, steam=False, lemma=True,single_input=True)[0]
 
301
  ### Add Bigrams and keep only the good ones(pre-selected)
302
  X_bigrmas = self.phrase_bigrams[X]
303
-
304
  data_clean = self.clean_bigram(X_bigrmas)
305
  X_bigrams_clean = ' '.join(map(str, data_clean))
306
  pre_processed = self.vectorizer.transform([X_bigrams_clean]).toarray(),X_bigrams_clean
@@ -645,7 +643,7 @@ class Pipeline(object):
645
  for higher_order, sub_arr in pred.items():
646
  # Check which branch it belongs to
647
  if higher_order in ["Negative","No Prediction"]:
648
- pred[higher_order]['labels'] = {higher_order: {"probability":sub_arr['probability']}}
649
  pred[higher_order]["word_analysis"] = {"discriminator_data": "Not Used", "word_importance": {x:0 for x in input_text.split()}, "highlighted_html_text": " ".join(x for x in input_text.split())}
650
 
651
  # For each Severity, run the corresponding Branch Prediction
@@ -653,7 +651,7 @@ class Pipeline(object):
653
  model = self.all_label_models[self.bert_model.config['classes'].index(higher_order)]
654
  out_pred = model.predict([input_text])
655
 
656
- pred[higher_order]['labels'] = out_pred['predictions']
657
  pred[higher_order]['word_analysis'] = out_pred['word_analysis']
658
 
659
  return predictions,embeddings_output
 
20
  import xgboost
21
  import re
22
  import nltk
 
23
  nltk.download('stopwords')
24
  nltk.download('wordnet')
 
25
  import html
26
 
27
  from config import config_file
 
296
  def format_data(self,data:list)-> list:
297
  try:
298
  X = text_cleaning.text_cleaning(data, steam=False, lemma=True,single_input=True)[0]
299
+
300
  ### Add Bigrams and keep only the good ones(pre-selected)
301
  X_bigrmas = self.phrase_bigrams[X]
 
302
  data_clean = self.clean_bigram(X_bigrmas)
303
  X_bigrams_clean = ' '.join(map(str, data_clean))
304
  pre_processed = self.vectorizer.transform([X_bigrams_clean]).toarray(),X_bigrams_clean
 
643
  for higher_order, sub_arr in pred.items():
644
  # Check which branch it belongs to
645
  if higher_order in ["Negative","No Prediction"]:
646
+ pred[higher_order]['diagnose'] = {higher_order: {"probability":sub_arr['probability']}}
647
  pred[higher_order]["word_analysis"] = {"discriminator_data": "Not Used", "word_importance": {x:0 for x in input_text.split()}, "highlighted_html_text": " ".join(x for x in input_text.split())}
648
 
649
  # For each Severity, run the corresponding Branch Prediction
 
651
  model = self.all_label_models[self.bert_model.config['classes'].index(higher_order)]
652
  out_pred = model.predict([input_text])
653
 
654
+ pred[higher_order]['diagnose'] = out_pred['predictions']
655
  pred[higher_order]['word_analysis'] = out_pred['word_analysis']
656
 
657
  return predictions,embeddings_output