Spaces:
Sleeping
Sleeping
Commit
·
a8bebb5
1
Parent(s):
f54f661
build error
Browse files- model_utils.py +5 -0
model_utils.py
CHANGED
@@ -66,6 +66,7 @@ def preprocess(docs):
|
|
66 |
return newLists
|
67 |
|
68 |
def tokenize_text(text, hugging_model='roberta-base'):
|
|
|
69 |
clean_text = preprocess(text)
|
70 |
tokenizer = AutoTokenizer.from_pretrained(hugging_model)
|
71 |
inputs = tokenizer(clean_text, padding=True, truncation=True, return_tensors='tf')
|
@@ -73,12 +74,16 @@ def tokenize_text(text, hugging_model='roberta-base'):
|
|
73 |
return x
|
74 |
|
75 |
def single_predict(model, text, traits=['cAGR', 'cCON', 'cEXT', 'cOPN', 'cNEU']):
|
|
|
76 |
traits_scores = dict()
|
77 |
predicted_labels = dict()
|
78 |
x = tokenize_text([text])
|
79 |
logits = model.predict(x, verbose=0).logits
|
|
|
80 |
probs = tf.math.sigmoid(logits).numpy()
|
|
|
81 |
predictions = np.where(probs > 0.5, 1, 0)
|
|
|
82 |
for t, s in zip(traits, probs[0]):
|
83 |
traits_scores[t] = s
|
84 |
for t, l in zip(traits, predictions[0]):
|
|
|
66 |
return newLists
|
67 |
|
68 |
def tokenize_text(text, hugging_model='roberta-base'):
|
69 |
+
print("tokenize_text")
|
70 |
clean_text = preprocess(text)
|
71 |
tokenizer = AutoTokenizer.from_pretrained(hugging_model)
|
72 |
inputs = tokenizer(clean_text, padding=True, truncation=True, return_tensors='tf')
|
|
|
74 |
return x
|
75 |
|
76 |
def single_predict(model, text, traits=['cAGR', 'cCON', 'cEXT', 'cOPN', 'cNEU']):
|
77 |
+
print("predict function-----")
|
78 |
traits_scores = dict()
|
79 |
predicted_labels = dict()
|
80 |
x = tokenize_text([text])
|
81 |
logits = model.predict(x, verbose=0).logits
|
82 |
+
print("logits function-----")
|
83 |
probs = tf.math.sigmoid(logits).numpy()
|
84 |
+
print("sigmoid function-----")
|
85 |
predictions = np.where(probs > 0.5, 1, 0)
|
86 |
+
print("predictions function-----")
|
87 |
for t, s in zip(traits, probs[0]):
|
88 |
traits_scores[t] = s
|
89 |
for t, l in zip(traits, predictions[0]):
|