Spaces:

tsantos
/

Hierarchical-Classification-System-for-Breast-Cancer

Runtime error

App Files Files Community

tsantos commited on Apr 11, 2022

Commit

6f0005a

1 Parent(s): 60b25bc

Update pipeline.py

Browse files

Files changed (1) hide show

pipeline.py +3 -5

pipeline.py CHANGED Viewed

@@ -20,10 +20,8 @@ import math
 import xgboost
 import re
 import nltk
-nltk.download('punkt')
 nltk.download('stopwords')
 nltk.download('wordnet')
-nltk.download('omw-1.4')
 import html
 from config import config_file
@@ -298,9 +296,9 @@ class Branch_Classifier(object):
 	def format_data(self,data:list)-> list:
 		try:
 			X = text_cleaning.text_cleaning(data, steam=False, lemma=True,single_input=True)[0]
 			### Add Bigrams and keep only the good ones(pre-selected)
 			X_bigrmas  = self.phrase_bigrams[X]
 			data_clean = self.clean_bigram(X_bigrmas)
 			X_bigrams_clean = ' '.join(map(str, data_clean))
 			pre_processed = self.vectorizer.transform([X_bigrams_clean]).toarray(),X_bigrams_clean
@@ -645,7 +643,7 @@ class Pipeline(object):
 			for higher_order, sub_arr in pred.items():
 				# Check which branch it belongs to
 				if higher_order in ["Negative","No Prediction"]:
-					pred[higher_order]['labels'] = {higher_order: {"probability":sub_arr['probability']}}
 					pred[higher_order]["word_analysis"] = {"discriminator_data": "Not Used", "word_importance": {x:0 for x in input_text.split()}, "highlighted_html_text": " ".join(x for x in input_text.split())}
 				# For each Severity, run the corresponding Branch Prediction
@@ -653,7 +651,7 @@ class Pipeline(object):
 					model = self.all_label_models[self.bert_model.config['classes'].index(higher_order)]
 					out_pred = model.predict([input_text])
-					pred[higher_order]['labels'] = out_pred['predictions']
 					pred[higher_order]['word_analysis'] = out_pred['word_analysis']
 		return predictions,embeddings_output

 import xgboost
 import re
 import nltk
 nltk.download('stopwords')
 nltk.download('wordnet')
 import html
 from config import config_file
 	def format_data(self,data:list)-> list:
 		try:
 			X = text_cleaning.text_cleaning(data, steam=False, lemma=True,single_input=True)[0]
 			### Add Bigrams and keep only the good ones(pre-selected)
 			X_bigrmas  = self.phrase_bigrams[X]
 			data_clean = self.clean_bigram(X_bigrmas)
 			X_bigrams_clean = ' '.join(map(str, data_clean))
 			pre_processed = self.vectorizer.transform([X_bigrams_clean]).toarray(),X_bigrams_clean
 			for higher_order, sub_arr in pred.items():
 				# Check which branch it belongs to
 				if higher_order in ["Negative","No Prediction"]:
+					pred[higher_order]['diagnose'] = {higher_order: {"probability":sub_arr['probability']}}
 					pred[higher_order]["word_analysis"] = {"discriminator_data": "Not Used", "word_importance": {x:0 for x in input_text.split()}, "highlighted_html_text": " ".join(x for x in input_text.split())}
 				# For each Severity, run the corresponding Branch Prediction
 					model = self.all_label_models[self.bert_model.config['classes'].index(higher_order)]
 					out_pred = model.predict([input_text])
+					pred[higher_order]['diagnose'] = out_pred['predictions']
 					pred[higher_order]['word_analysis'] = out_pred['word_analysis']
 		return predictions,embeddings_output