Fix pipeline
Browse files- pipeline.py +52 -44
pipeline.py
CHANGED
@@ -2,7 +2,8 @@
|
|
2 |
import tensorflow as tf
|
3 |
|
4 |
class PreTrainedPipeline():
|
5 |
-
def __init__(self):
|
|
|
6 |
sequence_input = tf.keras.Input(shape=(300), name='input')
|
7 |
x = tf.keras.layers.Dense(2048, activation="LeakyReLU")(sequence_input)
|
8 |
x = tf.keras.layers.Dense(1024, activation="LeakyReLU")(x)
|
@@ -17,48 +18,55 @@ class PreTrainedPipeline():
|
|
17 |
|
18 |
model.compile(optimizer="Adamax", loss="cosine_similarity")
|
19 |
|
20 |
-
|
21 |
-
|
22 |
-
|
23 |
-
|
24 |
-
|
25 |
-
#
|
26 |
-
|
27 |
-
|
28 |
-
|
29 |
-
|
30 |
-
|
31 |
-
# normalizer = Normalizer()
|
32 |
-
# X_Normalized = normalizer.normalize(sent)
|
33 |
-
# X_Tokens = word_tokenize(X_Normalized)
|
34 |
-
# stopwords = [normalizer.normalize(x.strip()) for x in codecs.open(r"stopwords.txt",'r','utf-8').readlines()]
|
35 |
-
# X_Tokens = [t for t in X_Tokens if t not in stopwords]
|
36 |
-
# preprocessed = [' '.join(X_Tokens)][0]
|
37 |
-
# sent_ids = sent2id([preprocessed])
|
38 |
-
# output=np.array((model.predict(sent_ids.reshape((1,20))).tolist()[0]))
|
39 |
-
# distances=distance.cdist(output.reshape((1,300)), comparison_matrix, "cosine")[0]
|
40 |
-
# min_index_100 = distances.argsort()[:100]
|
41 |
-
# min_index_10 = distances.argsort()[:10]
|
42 |
|
43 |
-
#
|
44 |
-
#
|
45 |
-
#
|
46 |
-
#
|
47 |
-
#
|
48 |
-
|
49 |
-
#
|
50 |
-
|
51 |
-
#
|
52 |
-
#
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
53 |
|
54 |
-
# def sent2id(sents):
|
55 |
-
# sents_id=np.zeros((len(sents),20))
|
56 |
-
# for j in tqdm(range(len(sents))):
|
57 |
-
# for i,word in enumerate(sents[j].split()):
|
58 |
-
# try:
|
59 |
-
# sents_id[j,i] = t2id[word]
|
60 |
-
# except:
|
61 |
-
# sents_id[j,i] = t2id['UNK']
|
62 |
-
# if i==19:
|
63 |
-
# break
|
64 |
-
# return sents_id
|
|
|
2 |
import tensorflow as tf
|
3 |
|
4 |
class PreTrainedPipeline():
|
5 |
+
def __init__(self, path):
|
6 |
+
# define the best model TODO
|
7 |
sequence_input = tf.keras.Input(shape=(300), name='input')
|
8 |
x = tf.keras.layers.Dense(2048, activation="LeakyReLU")(sequence_input)
|
9 |
x = tf.keras.layers.Dense(1024, activation="LeakyReLU")(x)
|
|
|
18 |
|
19 |
model.compile(optimizer="Adamax", loss="cosine_similarity")
|
20 |
|
21 |
+
# model.load_weights("path to model file") TODO
|
22 |
+
|
23 |
+
self.model = model
|
24 |
+
|
25 |
+
def __call__(self, inputs):
|
26 |
+
return [ # Sample output, call the model here TODO
|
27 |
+
{'label': 'POSITIVE', 'score': 0.05},
|
28 |
+
{'label': 'NEGATIVE', 'score': 0.03},
|
29 |
+
{'label': 'معنی', 'score': 0.92},
|
30 |
+
{'label': f'{inputs}', 'score': 0},
|
31 |
+
]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
32 |
|
33 |
+
# def RevDict(sent,flag,model):
|
34 |
+
# """
|
35 |
+
# This function recieves a sentence from the user, and turns back top_10 (for flag=0) or top_100 (for flag=1) predictions.
|
36 |
+
# the input sentence will be normalized, and stop words will be removed
|
37 |
+
# """
|
38 |
+
|
39 |
+
# normalizer = Normalizer()
|
40 |
+
# X_Normalized = normalizer.normalize(sent)
|
41 |
+
# X_Tokens = word_tokenize(X_Normalized)
|
42 |
+
# stopwords = [normalizer.normalize(x.strip()) for x in codecs.open(r"stopwords.txt",'r','utf-8').readlines()]
|
43 |
+
# X_Tokens = [t for t in X_Tokens if t not in stopwords]
|
44 |
+
# preprocessed = [' '.join(X_Tokens)][0]
|
45 |
+
# sent_ids = sent2id([preprocessed])
|
46 |
+
# output=np.array((model.predict(sent_ids.reshape((1,20))).tolist()[0]))
|
47 |
+
# distances=distance.cdist(output.reshape((1,300)), comparison_matrix, "cosine")[0]
|
48 |
+
# min_index_100 = distances.argsort()[:100]
|
49 |
+
# min_index_10 = distances.argsort()[:10]
|
50 |
+
|
51 |
+
# temp=[]
|
52 |
+
# if flag == 0:
|
53 |
+
# for i in range(10):
|
54 |
+
# temp.append(id2h[str(min_index_10[i])])
|
55 |
+
# elif flag == 1:
|
56 |
+
# for i in range(100):
|
57 |
+
# temp.append(id2h[str(min_index_100[i])])
|
58 |
+
|
59 |
+
# for i in range(len(temp)):
|
60 |
+
# print(temp[i])
|
61 |
|
62 |
+
# def sent2id(sents):
|
63 |
+
# sents_id=np.zeros((len(sents),20))
|
64 |
+
# for j in tqdm(range(len(sents))):
|
65 |
+
# for i,word in enumerate(sents[j].split()):
|
66 |
+
# try:
|
67 |
+
# sents_id[j,i] = t2id[word]
|
68 |
+
# except:
|
69 |
+
# sents_id[j,i] = t2id['UNK']
|
70 |
+
# if i==19:
|
71 |
+
# break
|
72 |
+
# return sents_id
|