Files changed (1) hide show
  1. pipeline.py +49 -20
pipeline.py CHANGED
@@ -1,36 +1,65 @@
1
  # from scipy.special import softmax
2
  import tensorflow as tf
 
 
 
 
 
 
 
3
 
4
  class PreTrainedPipeline():
5
  def __init__(self, path):
6
- # define the best model TODO
7
- sequence_input = tf.keras.Input(shape=(300), name='input')
8
- x = tf.keras.layers.Dense(2048, activation="LeakyReLU")(sequence_input)
9
- x = tf.keras.layers.Dense(1024, activation="LeakyReLU")(x)
10
- x = tf.keras.layers.Dense(512, activation="LeakyReLU")(x)
11
- x = tf.keras.layers.Dense(128, activation="LeakyReLU")(x)
12
- x = tf.keras.layers.Dense(512, activation="LeakyReLU")(x)
13
- x = tf.keras.layers.Dense(1024, activation="LeakyReLU")(x)
14
- x = tf.keras.layers.Dense(2048, activation="LeakyReLU")(x)
15
- outputs = tf.keras.layers.Dense(300, activation="tanh")(x)
16
 
17
- model = tf.keras.Model(sequence_input, outputs)
 
18
 
19
- model.compile(optimizer="Adamax", loss="cosine_similarity")
20
 
21
- # model.load_weights("path to model file") TODO
22
 
23
- self.model = model
 
 
 
 
 
 
 
 
24
 
25
- def __call__(self, inputs: str):
 
 
 
 
 
 
 
26
  return [
27
- [ # Sample output, call the model here TODO
28
- {'label': 'POSITIVE', 'score': 0.05},
29
- {'label': 'NEGATIVE', 'score': 0.03},
30
- {'label': 'معنی', 'score': 0.92},
31
- {'label': f'{inputs}', 'score': 0},
32
  ]
33
  ]
 
 
 
 
 
 
 
 
 
 
34
 
35
  # def RevDict(sent,flag,model):
36
  # """
 
1
  # from scipy.special import softmax
2
  import tensorflow as tf
3
+ from transformers import Pipeline
4
+ import tensorflow as tf
5
+ import numpy as np
6
+ import json
7
+ from hazm import *
8
+ from scipy.spatial import distance
9
+
10
 
11
  class PreTrainedPipeline():
12
  def __init__(self, path):
13
+ self.model_dir = "saved_model"
14
+ self.t2id_path = "t2id.json"
15
+ self.stopwords_path = "stopwords.txt"
16
+ self.id2h_path = "id2h.json"
17
+ self.t2id = json.load(open(self.t2id_path,encoding="utf8"))
18
+ self.id2h = json.load(open(self.id2h_path,encoding="utf8"))
 
 
 
 
19
 
20
+ self.stopwords = set(line.strip() for line in open(self.stopwords_path,encoding="utf8"))
21
+ self.comparisons = np.load(self.comparison_matrix_path)['arr_0']
22
 
23
+ self.model = tf.saved_model.load(self.model_dir)
24
 
25
+ def __call__(self, inputs: str):
26
 
27
+ # Preprocess the input sentence
28
+ sentence = Normalizer().normalize(inputs)
29
+ tokens = word_tokenize(sentence)
30
+ tokens = [t for t in tokens if t not in self.stopwords]
31
+ input_ids = np.zeros((1, 20))
32
+ for i, token in enumerate(tokens):
33
+ if i >= 20:
34
+ break
35
+ input_ids[0, i] = self.t2id.get(token, self.t2id['UNK'])
36
 
37
+ # Call the model on the input ids
38
+ embeddings = self.model(tf.constant(input_ids, dtype=tf.int32)).numpy()
39
+ # Postprocess the embeddings to get the most similar words
40
+ similarities = distance.cdist(embeddings.reshape((1,300)), self.comparisons, "cosine")[0]
41
+ top_indices = similarities.argsort()[:10]
42
+ top_words = [[self.id2h[str(top_indices[i])]] for i in range(10)]
43
+
44
+
45
  return [
46
+ [
47
+ {'label': top_words[0], 'score': 0},
48
+ {'label': top_words[1], 'score': 0},
49
+ {'label': top_words[2], 'score': 0},
50
+ {'label': top_words[3], 'score': 0},
51
  ]
52
  ]
53
+
54
+
55
+ # return [
56
+ # [ # Sample output, call the model here TODO
57
+ # {'label': 'POSITIVE', 'score': 0.05},
58
+ # {'label': 'NEGATIVE', 'score': 0.03},
59
+ # {'label': 'معنی', 'score': 0.92},
60
+ # {'label': f'{inputs}', 'score': 0},
61
+ # ]
62
+ # ]
63
 
64
  # def RevDict(sent,flag,model):
65
  # """