Yhhxhfh commited on
Commit
9a485d3
1 Parent(s): 2890033

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +46 -40
app.py CHANGED
@@ -3,19 +3,14 @@ import json
3
  import numpy as np
4
  import tensorflow as tf
5
  from google.cloud import storage
6
- from keras_nlp.models import BERT
7
- from keras_nlp.tokenizers import BertTokenizer
8
- from keras_nlp.callbacks import EarlyStopping
9
  from sklearn.model_selection import train_test_split
10
  from dotenv import load_dotenv
11
- from tqdm import tqdm
12
- import io
13
  import random
14
  import nltk
15
  from nltk.corpus import wordnet
16
- from nltk import pos_tag
17
- from nltk.tokenize import word_tokenize, sent_tokenize
18
- from nltk.corpus import brown, stopwords, reuters, genesis
19
 
20
  nltk.download('punkt')
21
  nltk.download('wordnet')
@@ -211,35 +206,46 @@ intents = list(set(expanded_intents))[:100000]
211
 
212
  labels = [1] * len(intents)
213
 
214
- while True:
215
- X_train, X_val, y_train, y_val = train_test_split(intents, labels, test_size=0.2, random_state=42)
216
- tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
217
- train_encodings = tokenizer(X_train, truncation=True, padding=True)
218
- val_encodings = tokenizer(X_val, truncation=True, padding=True)
219
-
220
- model = BERT.from_pretrained('bert-base-uncased')
221
- model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
222
-
223
- early_stopping = EarlyStopping(monitor='val_loss', patience=3)
224
- history = model.fit(train_encodings, y_train, validation_data=(val_encodings, y_val), epochs=10, callbacks=[early_stopping])
225
-
226
- accuracy = history.history['accuracy'][-1]
227
- print(f"Accuracy: {accuracy}")
228
-
229
- intents_json = json.dumps(intents, ensure_ascii=False)
230
- intents_file_path = 'intents.json'
231
- model_file_path = 'model.h5'
232
-
233
- bucket = storage_client.bucket(bucket_name)
234
-
235
- intents_blob = bucket.blob(intents_file_path)
236
- model_blob = bucket.blob(model_file_path)
237
-
238
- if not intents_blob.exists():
239
- intents_blob.upload_from_string(intents_json, content_type='application/json')
240
- print(f"Intents uploaded to {intents_file_path} in bucket {bucket_name}.")
241
-
242
- if not model_blob.exists():
243
- model.save(model_file_path)
244
- model_blob.upload_from_filename(model_file_path)
245
- print(f"Model uploaded to {model_file_path} in bucket {bucket_name}.")
 
 
 
 
 
 
 
 
 
 
 
 
3
  import numpy as np
4
  import tensorflow as tf
5
  from google.cloud import storage
6
+ from tensorflow import keras
7
+ from transformers import TFBertModel, BertTokenizerFast
8
+ from keras.callbacks import EarlyStopping
9
  from sklearn.model_selection import train_test_split
10
  from dotenv import load_dotenv
 
 
11
  import random
12
  import nltk
13
  from nltk.corpus import wordnet
 
 
 
14
 
15
  nltk.download('punkt')
16
  nltk.download('wordnet')
 
206
 
207
  labels = [1] * len(intents)
208
 
209
+ X_train, X_val, y_train, y_val = train_test_split(intents, labels, test_size=0.2, random_state=42)
210
+
211
+ tokenizer = BertTokenizerFast.from_pretrained('bert-base-uncased')
212
+
213
+ train_encodings = tokenizer(X_train, truncation=True, padding=True, return_tensors="tf")
214
+ val_encodings = tokenizer(X_val, truncation=True, padding=True, return_tensors="tf")
215
+
216
+ bert_model = TFBertModel.from_pretrained('bert-base-uncased')
217
+
218
+ input_ids = keras.layers.Input(shape=(None,), dtype=tf.int32, name="input_ids")
219
+ attention_mask = keras.layers.Input(shape=(None,), dtype=tf.int32, name="attention_mask")
220
+ bert_output = bert_model([input_ids, attention_mask])[1]
221
+ dropout = keras.layers.Dropout(0.1)(bert_output)
222
+ output = keras.layers.Dense(1, activation='sigmoid')(dropout)
223
+
224
+ model = keras.Model(inputs=[input_ids, attention_mask], outputs=output)
225
+
226
+ model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
227
+
228
+ early_stopping = EarlyStopping(monitor='val_loss', patience=3, restore_best_weights=True)
229
+ history = model.fit(x=train_encodings, y=np.array(y_train), validation_data=(val_encodings, np.array(y_val)), epochs=10, batch_size=16, callbacks=[early_stopping])
230
+
231
+ accuracy = history.history['accuracy'][-1]
232
+ print(f"Accuracy: {accuracy}")
233
+
234
+ intents_json = json.dumps(intents, ensure_ascii=False)
235
+ intents_file_path = 'intents.json'
236
+ model_file_path = 'model.h5'
237
+
238
+ bucket = storage_client.bucket(bucket_name)
239
+
240
+ intents_blob = bucket.blob(intents_file_path)
241
+ model_blob = bucket.blob(model_file_path)
242
+
243
+ if not intents_blob.exists():
244
+ intents_blob.upload_from_string(intents_json, content_type='application/json')
245
+ print(f"Intents uploaded to {intents_file_path} in bucket {bucket_name}.")
246
+
247
+ model.save(model_file_path)
248
+ model_blob.upload_from_filename(model_file_path)
249
+ print(f"Model uploaded to {model_file_path} in bucket {bucket_name}.")
250
+
251
+ os.remove(model_file_path)