behnamsa
/

persian-reverse-dict

Text Classification

TF-Keras

generic

reverse-dictionary

Model card Files Files and versions Community

alighadami77 commited on Mar 10, 2023

Commit

b3e3919

1 Parent(s): 12e2be7

Update pipeline.py

Browse files

Files changed (1) hide show

pipeline.py +13 -56

pipeline.py CHANGED Viewed

@@ -42,64 +42,21 @@ class PreTrainedPipeline():
         similarities = distance.cdist(embeddings.reshape((1,300)), self.comparisons, "cosine")[0]
         top_indices = similarities.argsort()[:10]
         top_words = [[self.id2h[str(top_indices[i])]] for i in range(10)]
         return [
             [
-                {'label': top_words[0], 'score': 0},
-                {'label': top_words[1], 'score': 0},
-                {'label': top_words[2], 'score': 0},
-                {'label': top_words[3], 'score': 0},
             ]
         ]
-        # return [
-        #     [ # Sample output, call the model here TODO
-        #         {'label': 'POSITIVE', 'score': 0.05},
-        #         {'label': 'NEGATIVE', 'score': 0.03},
-        #         {'label': 'معنی', 'score': 0.92},
-        #         {'label': f'{inputs}', 'score': 0},
-        #     ]
-        # ]
-    # def RevDict(sent,flag,model):
-    #     """
-    #     This function recieves a sentence from the user, and turns back top_10 (for flag=0) or top_100 (for flag=1) predictions.
-    #     the input sentence will be normalized, and stop words will be removed
-    #     """
-    #     normalizer = Normalizer()
-    #     X_Normalized = normalizer.normalize(sent)
-    #     X_Tokens = word_tokenize(X_Normalized)
-    #     stopwords = [normalizer.normalize(x.strip()) for x in codecs.open(r"stopwords.txt",'r','utf-8').readlines()]
-    #     X_Tokens = [t for t in X_Tokens if t not in stopwords]
-    #     preprocessed = [' '.join(X_Tokens)][0]
-    #     sent_ids = sent2id([preprocessed])
-    #     output=np.array((model.predict(sent_ids.reshape((1,20))).tolist()[0]))
-    #     distances=distance.cdist(output.reshape((1,300)), comparison_matrix, "cosine")[0]
-    #     min_index_100 = distances.argsort()[:100]
-    #     min_index_10 = distances.argsort()[:10]
-    #     temp=[]
-    #     if flag == 0:
-    #         for i in range(10):
-    #             temp.append(id2h[str(min_index_10[i])])
-    #     elif flag == 1:
-    #         for i in range(100):
-    #             temp.append(id2h[str(min_index_100[i])])
-    #     for i in range(len(temp)):
-    #         print(temp[i])
-    # def sent2id(sents):
-    #     sents_id=np.zeros((len(sents),20))
-    #     for j in tqdm(range(len(sents))):
-    #         for i,word in enumerate(sents[j].split()):
-    #             try:
-    #                 sents_id[j,i] = t2id[word]
-    #             except:
-    #                 sents_id[j,i] = t2id['UNK']
-    #             if i==19:
-    #                 break
-    #     return sents_id

         similarities = distance.cdist(embeddings.reshape((1,300)), self.comparisons, "cosine")[0]
         top_indices = similarities.argsort()[:10]
         top_words = [[self.id2h[str(top_indices[i])]] for i in range(10)]
+        logits = np.exp(-10*np.array(similarities[top_indices]))
+        softmax_probs = tf.nn.softmax(logits).numpy()
+        top_scores = [round(float(softmax_probs[i]), 3) for i in range(10)]
         return [
             [
+                {'label': top_words[0], 'score': top_scores[0]},
+                {'label': top_words[1], 'score': top_scores[1]},
+                {'label': top_words[2], 'score': top_scores[2]},
+                {'label': top_words[3], 'score': top_scores[3]},
+                {'label': top_words[4], 'score': top_scores[4]},
+                {'label': top_words[5], 'score': top_scores[5]},
+                {'label': top_words[6], 'score': top_scores[6]},
+                {'label': top_words[7], 'score': top_scores[7]},
+                {'label': top_words[8], 'score': top_scores[8]},
+                {'label': top_words[9], 'score': top_scores[9]},
             ]
         ]