Update app.py
Browse files
app.py
CHANGED
@@ -3,19 +3,14 @@ import json
|
|
3 |
import numpy as np
|
4 |
import tensorflow as tf
|
5 |
from google.cloud import storage
|
6 |
-
from
|
7 |
-
from
|
8 |
-
from
|
9 |
from sklearn.model_selection import train_test_split
|
10 |
from dotenv import load_dotenv
|
11 |
-
from tqdm import tqdm
|
12 |
-
import io
|
13 |
import random
|
14 |
import nltk
|
15 |
from nltk.corpus import wordnet
|
16 |
-
from nltk import pos_tag
|
17 |
-
from nltk.tokenize import word_tokenize, sent_tokenize
|
18 |
-
from nltk.corpus import brown, stopwords, reuters, genesis
|
19 |
|
20 |
nltk.download('punkt')
|
21 |
nltk.download('wordnet')
|
@@ -211,35 +206,46 @@ intents = list(set(expanded_intents))[:100000]
|
|
211 |
|
212 |
labels = [1] * len(intents)
|
213 |
|
214 |
-
|
215 |
-
|
216 |
-
|
217 |
-
|
218 |
-
|
219 |
-
|
220 |
-
|
221 |
-
|
222 |
-
|
223 |
-
|
224 |
-
|
225 |
-
|
226 |
-
|
227 |
-
|
228 |
-
|
229 |
-
|
230 |
-
|
231 |
-
|
232 |
-
|
233 |
-
|
234 |
-
|
235 |
-
|
236 |
-
|
237 |
-
|
238 |
-
|
239 |
-
|
240 |
-
|
241 |
-
|
242 |
-
|
243 |
-
|
244 |
-
|
245 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
3 |
import numpy as np
|
4 |
import tensorflow as tf
|
5 |
from google.cloud import storage
|
6 |
+
from tensorflow import keras
|
7 |
+
from transformers import TFBertModel, BertTokenizerFast
|
8 |
+
from keras.callbacks import EarlyStopping
|
9 |
from sklearn.model_selection import train_test_split
|
10 |
from dotenv import load_dotenv
|
|
|
|
|
11 |
import random
|
12 |
import nltk
|
13 |
from nltk.corpus import wordnet
|
|
|
|
|
|
|
14 |
|
15 |
nltk.download('punkt')
|
16 |
nltk.download('wordnet')
|
|
|
206 |
|
207 |
labels = [1] * len(intents)
|
208 |
|
209 |
+
X_train, X_val, y_train, y_val = train_test_split(intents, labels, test_size=0.2, random_state=42)
|
210 |
+
|
211 |
+
tokenizer = BertTokenizerFast.from_pretrained('bert-base-uncased')
|
212 |
+
|
213 |
+
train_encodings = tokenizer(X_train, truncation=True, padding=True, return_tensors="tf")
|
214 |
+
val_encodings = tokenizer(X_val, truncation=True, padding=True, return_tensors="tf")
|
215 |
+
|
216 |
+
bert_model = TFBertModel.from_pretrained('bert-base-uncased')
|
217 |
+
|
218 |
+
input_ids = keras.layers.Input(shape=(None,), dtype=tf.int32, name="input_ids")
|
219 |
+
attention_mask = keras.layers.Input(shape=(None,), dtype=tf.int32, name="attention_mask")
|
220 |
+
bert_output = bert_model([input_ids, attention_mask])[1]
|
221 |
+
dropout = keras.layers.Dropout(0.1)(bert_output)
|
222 |
+
output = keras.layers.Dense(1, activation='sigmoid')(dropout)
|
223 |
+
|
224 |
+
model = keras.Model(inputs=[input_ids, attention_mask], outputs=output)
|
225 |
+
|
226 |
+
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
|
227 |
+
|
228 |
+
early_stopping = EarlyStopping(monitor='val_loss', patience=3, restore_best_weights=True)
|
229 |
+
history = model.fit(x=train_encodings, y=np.array(y_train), validation_data=(val_encodings, np.array(y_val)), epochs=10, batch_size=16, callbacks=[early_stopping])
|
230 |
+
|
231 |
+
accuracy = history.history['accuracy'][-1]
|
232 |
+
print(f"Accuracy: {accuracy}")
|
233 |
+
|
234 |
+
intents_json = json.dumps(intents, ensure_ascii=False)
|
235 |
+
intents_file_path = 'intents.json'
|
236 |
+
model_file_path = 'model.h5'
|
237 |
+
|
238 |
+
bucket = storage_client.bucket(bucket_name)
|
239 |
+
|
240 |
+
intents_blob = bucket.blob(intents_file_path)
|
241 |
+
model_blob = bucket.blob(model_file_path)
|
242 |
+
|
243 |
+
if not intents_blob.exists():
|
244 |
+
intents_blob.upload_from_string(intents_json, content_type='application/json')
|
245 |
+
print(f"Intents uploaded to {intents_file_path} in bucket {bucket_name}.")
|
246 |
+
|
247 |
+
model.save(model_file_path)
|
248 |
+
model_blob.upload_from_filename(model_file_path)
|
249 |
+
print(f"Model uploaded to {model_file_path} in bucket {bucket_name}.")
|
250 |
+
|
251 |
+
os.remove(model_file_path)
|