behnamsa
/

persian-reverse-dict

Text Classification

TF-Keras

generic

reverse-dictionary

Model card Files Files and versions Community

behnamsa commited on Mar 10, 2023

Commit

bf1126c

1 Parent(s): fcb30fd

Fix pipeline

Browse files

Files changed (1) hide show

pipeline.py +52 -44

pipeline.py CHANGED Viewed

@@ -2,7 +2,8 @@
 import tensorflow as tf
 class PreTrainedPipeline():
-    def __init__(self):
         sequence_input = tf.keras.Input(shape=(300), name='input')
         x = tf.keras.layers.Dense(2048, activation="LeakyReLU")(sequence_input)
         x = tf.keras.layers.Dense(1024, activation="LeakyReLU")(x)
@@ -17,48 +18,55 @@ class PreTrainedPipeline():
         model.compile(optimizer="Adamax", loss="cosine_similarity")
-    def __call__(self):
-        return {
-            "text": "Hi!!!"
-        }
-# def RevDict(sent,flag,model):
-#     """
-#     This function recieves a sentence from the user, and turns back top_10 (for flag=0) or top_100 (for flag=1) predictions.
-#     the input sentence will be normalized, and stop words will be removed
-#     """
-#     normalizer = Normalizer()
-#     X_Normalized = normalizer.normalize(sent)
-#     X_Tokens = word_tokenize(X_Normalized)
-#     stopwords = [normalizer.normalize(x.strip()) for x in codecs.open(r"stopwords.txt",'r','utf-8').readlines()]
-#     X_Tokens = [t for t in X_Tokens if t not in stopwords]
-#     preprocessed = [' '.join(X_Tokens)][0]
-#     sent_ids = sent2id([preprocessed])
-#     output=np.array((model.predict(sent_ids.reshape((1,20))).tolist()[0]))
-#     distances=distance.cdist(output.reshape((1,300)), comparison_matrix, "cosine")[0]
-#     min_index_100 = distances.argsort()[:100]
-#     min_index_10 = distances.argsort()[:10]
-#     temp=[]
-#     if flag == 0:
-#         for i in range(10):
-#             temp.append(id2h[str(min_index_10[i])])
-#     elif flag == 1:
-#         for i in range(100):
-#             temp.append(id2h[str(min_index_100[i])])
-#     for i in range(len(temp)):
-#         print(temp[i])
-# def sent2id(sents):
-#     sents_id=np.zeros((len(sents),20))
-#     for j in tqdm(range(len(sents))):
-#         for i,word in enumerate(sents[j].split()):
-#             try:
-#                 sents_id[j,i] = t2id[word]
-#             except:
-#                 sents_id[j,i] = t2id['UNK']
-#             if i==19:
-#                 break
-#     return sents_id

 import tensorflow as tf
 class PreTrainedPipeline():
+    def __init__(self, path):
+        # define the best model TODO
         sequence_input = tf.keras.Input(shape=(300), name='input')
         x = tf.keras.layers.Dense(2048, activation="LeakyReLU")(sequence_input)
         x = tf.keras.layers.Dense(1024, activation="LeakyReLU")(x)
         model.compile(optimizer="Adamax", loss="cosine_similarity")
+        # model.load_weights("path to model file") TODO
+        self.model = model
+    def __call__(self, inputs):
+        return [ # Sample output, call the model here TODO
+            {'label': 'POSITIVE', 'score': 0.05},
+            {'label': 'NEGATIVE', 'score': 0.03},
+            {'label': 'معنی', 'score': 0.92},
+            {'label': f'{inputs}', 'score': 0},
+        ]
+    # def RevDict(sent,flag,model):
+    #     """
+    #     This function recieves a sentence from the user, and turns back top_10 (for flag=0) or top_100 (for flag=1) predictions.
+    #     the input sentence will be normalized, and stop words will be removed
+    #     """
+    #     normalizer = Normalizer()
+    #     X_Normalized = normalizer.normalize(sent)
+    #     X_Tokens = word_tokenize(X_Normalized)
+    #     stopwords = [normalizer.normalize(x.strip()) for x in codecs.open(r"stopwords.txt",'r','utf-8').readlines()]
+    #     X_Tokens = [t for t in X_Tokens if t not in stopwords]
+    #     preprocessed = [' '.join(X_Tokens)][0]
+    #     sent_ids = sent2id([preprocessed])
+    #     output=np.array((model.predict(sent_ids.reshape((1,20))).tolist()[0]))
+    #     distances=distance.cdist(output.reshape((1,300)), comparison_matrix, "cosine")[0]
+    #     min_index_100 = distances.argsort()[:100]
+    #     min_index_10 = distances.argsort()[:10]
+    #     temp=[]
+    #     if flag == 0:
+    #         for i in range(10):
+    #             temp.append(id2h[str(min_index_10[i])])
+    #     elif flag == 1:
+    #         for i in range(100):
+    #             temp.append(id2h[str(min_index_100[i])])
+    #     for i in range(len(temp)):
+    #         print(temp[i])
+    # def sent2id(sents):
+    #     sents_id=np.zeros((len(sents),20))
+    #     for j in tqdm(range(len(sents))):
+    #         for i,word in enumerate(sents[j].split()):
+    #             try:
+    #                 sents_id[j,i] = t2id[word]
+    #             except:
+    #                 sents_id[j,i] = t2id['UNK']
+    #             if i==19:
+    #                 break
+    #     return sents_id