alighadami77 commited on
Commit
b3e3919
·
1 Parent(s): 12e2be7

Update pipeline.py

Browse files
Files changed (1) hide show
  1. pipeline.py +13 -56
pipeline.py CHANGED
@@ -42,64 +42,21 @@ class PreTrainedPipeline():
42
  similarities = distance.cdist(embeddings.reshape((1,300)), self.comparisons, "cosine")[0]
43
  top_indices = similarities.argsort()[:10]
44
  top_words = [[self.id2h[str(top_indices[i])]] for i in range(10)]
45
-
 
 
46
 
47
  return [
48
  [
49
- {'label': top_words[0], 'score': 0},
50
- {'label': top_words[1], 'score': 0},
51
- {'label': top_words[2], 'score': 0},
52
- {'label': top_words[3], 'score': 0},
 
 
 
 
 
 
53
  ]
54
  ]
55
-
56
-
57
- # return [
58
- # [ # Sample output, call the model here TODO
59
- # {'label': 'POSITIVE', 'score': 0.05},
60
- # {'label': 'NEGATIVE', 'score': 0.03},
61
- # {'label': 'معنی', 'score': 0.92},
62
- # {'label': f'{inputs}', 'score': 0},
63
- # ]
64
- # ]
65
-
66
- # def RevDict(sent,flag,model):
67
- # """
68
- # This function recieves a sentence from the user, and turns back top_10 (for flag=0) or top_100 (for flag=1) predictions.
69
- # the input sentence will be normalized, and stop words will be removed
70
- # """
71
-
72
- # normalizer = Normalizer()
73
- # X_Normalized = normalizer.normalize(sent)
74
- # X_Tokens = word_tokenize(X_Normalized)
75
- # stopwords = [normalizer.normalize(x.strip()) for x in codecs.open(r"stopwords.txt",'r','utf-8').readlines()]
76
- # X_Tokens = [t for t in X_Tokens if t not in stopwords]
77
- # preprocessed = [' '.join(X_Tokens)][0]
78
- # sent_ids = sent2id([preprocessed])
79
- # output=np.array((model.predict(sent_ids.reshape((1,20))).tolist()[0]))
80
- # distances=distance.cdist(output.reshape((1,300)), comparison_matrix, "cosine")[0]
81
- # min_index_100 = distances.argsort()[:100]
82
- # min_index_10 = distances.argsort()[:10]
83
-
84
- # temp=[]
85
- # if flag == 0:
86
- # for i in range(10):
87
- # temp.append(id2h[str(min_index_10[i])])
88
- # elif flag == 1:
89
- # for i in range(100):
90
- # temp.append(id2h[str(min_index_100[i])])
91
-
92
- # for i in range(len(temp)):
93
- # print(temp[i])
94
-
95
- # def sent2id(sents):
96
- # sents_id=np.zeros((len(sents),20))
97
- # for j in tqdm(range(len(sents))):
98
- # for i,word in enumerate(sents[j].split()):
99
- # try:
100
- # sents_id[j,i] = t2id[word]
101
- # except:
102
- # sents_id[j,i] = t2id['UNK']
103
- # if i==19:
104
- # break
105
- # return sents_id
 
42
  similarities = distance.cdist(embeddings.reshape((1,300)), self.comparisons, "cosine")[0]
43
  top_indices = similarities.argsort()[:10]
44
  top_words = [[self.id2h[str(top_indices[i])]] for i in range(10)]
45
+ logits = np.exp(-10*np.array(similarities[top_indices]))
46
+ softmax_probs = tf.nn.softmax(logits).numpy()
47
+ top_scores = [round(float(softmax_probs[i]), 3) for i in range(10)]
48
 
49
  return [
50
  [
51
+ {'label': top_words[0], 'score': top_scores[0]},
52
+ {'label': top_words[1], 'score': top_scores[1]},
53
+ {'label': top_words[2], 'score': top_scores[2]},
54
+ {'label': top_words[3], 'score': top_scores[3]},
55
+ {'label': top_words[4], 'score': top_scores[4]},
56
+ {'label': top_words[5], 'score': top_scores[5]},
57
+ {'label': top_words[6], 'score': top_scores[6]},
58
+ {'label': top_words[7], 'score': top_scores[7]},
59
+ {'label': top_words[8], 'score': top_scores[8]},
60
+ {'label': top_words[9], 'score': top_scores[9]},
61
  ]
62
  ]