Yhhxhfh commited on
Commit
aa18bae
1 Parent(s): 9a485d3

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +23 -13
app.py CHANGED
@@ -4,13 +4,16 @@ import numpy as np
4
  import tensorflow as tf
5
  from google.cloud import storage
6
  from tensorflow import keras
7
- from transformers import TFBertModel, BertTokenizerFast
 
8
  from keras.callbacks import EarlyStopping
9
  from sklearn.model_selection import train_test_split
10
  from dotenv import load_dotenv
11
  import random
12
  import nltk
13
  from nltk.corpus import wordnet
 
 
14
 
15
  nltk.download('punkt')
16
  nltk.download('wordnet')
@@ -174,6 +177,7 @@ def expand_intent(intent):
174
  expanded_intents.append(new_intent)
175
  return expanded_intents
176
 
 
177
  num_intents = 100000
178
  intents = generate_intents(num_intents)
179
  sentences = generate_sentences(2000)
@@ -208,25 +212,31 @@ labels = [1] * len(intents)
208
 
209
  X_train, X_val, y_train, y_val = train_test_split(intents, labels, test_size=0.2, random_state=42)
210
 
211
- tokenizer = BertTokenizerFast.from_pretrained('bert-base-uncased')
 
212
 
213
- train_encodings = tokenizer(X_train, truncation=True, padding=True, return_tensors="tf")
214
- val_encodings = tokenizer(X_val, truncation=True, padding=True, return_tensors="tf")
215
 
216
- bert_model = TFBertModel.from_pretrained('bert-base-uncased')
 
 
217
 
218
- input_ids = keras.layers.Input(shape=(None,), dtype=tf.int32, name="input_ids")
219
- attention_mask = keras.layers.Input(shape=(None,), dtype=tf.int32, name="attention_mask")
220
- bert_output = bert_model([input_ids, attention_mask])[1]
221
- dropout = keras.layers.Dropout(0.1)(bert_output)
222
- output = keras.layers.Dense(1, activation='sigmoid')(dropout)
223
 
224
- model = keras.Model(inputs=[input_ids, attention_mask], outputs=output)
 
 
 
 
225
 
226
- model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
227
 
 
228
  early_stopping = EarlyStopping(monitor='val_loss', patience=3, restore_best_weights=True)
229
- history = model.fit(x=train_encodings, y=np.array(y_train), validation_data=(val_encodings, np.array(y_val)), epochs=10, batch_size=16, callbacks=[early_stopping])
 
230
 
231
  accuracy = history.history['accuracy'][-1]
232
  print(f"Accuracy: {accuracy}")
 
4
  import tensorflow as tf
5
  from google.cloud import storage
6
  from tensorflow import keras
7
+ from keras.layers import Embedding, LSTM, Dense, Dropout, Bidirectional, Input
8
+ from keras.models import Model
9
  from keras.callbacks import EarlyStopping
10
  from sklearn.model_selection import train_test_split
11
  from dotenv import load_dotenv
12
  import random
13
  import nltk
14
  from nltk.corpus import wordnet
15
+ from keras.preprocessing.text import Tokenizer
16
+ from keras.preprocessing.sequence import pad_sequences
17
 
18
  nltk.download('punkt')
19
  nltk.download('wordnet')
 
177
  expanded_intents.append(new_intent)
178
  return expanded_intents
179
 
180
+
181
  num_intents = 100000
182
  intents = generate_intents(num_intents)
183
  sentences = generate_sentences(2000)
 
212
 
213
  X_train, X_val, y_train, y_val = train_test_split(intents, labels, test_size=0.2, random_state=42)
214
 
215
+ tokenizer = Tokenizer(num_words=5000, oov_token="<OOV>")
216
+ tokenizer.fit_on_texts(X_train)
217
 
218
+ train_sequences = tokenizer.texts_to_sequences(X_train)
219
+ val_sequences = tokenizer.texts_to_sequences(X_val)
220
 
221
+ max_length = 128
222
+ train_padded = pad_sequences(train_sequences, maxlen=max_length, padding="post", truncating="post")
223
+ val_padded = pad_sequences(val_sequences, maxlen=max_length, padding="post", truncating="post")
224
 
225
+ vocab_size = len(tokenizer.word_index) + 1
226
+ embedding_dim = 100
 
 
 
227
 
228
+ input_layer = Input(shape=(max_length,))
229
+ embedding_layer = Embedding(vocab_size, embedding_dim)(input_layer)
230
+ lstm_layer = Bidirectional(LSTM(64))(embedding_layer)
231
+ dropout_layer = Dropout(0.5)(lstm_layer)
232
+ output_layer = Dense(1, activation='sigmoid')(dropout_layer)
233
 
234
+ model = Model(inputs=input_layer, outputs=output_layer)
235
 
236
+ model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
237
  early_stopping = EarlyStopping(monitor='val_loss', patience=3, restore_best_weights=True)
238
+
239
+ model.fit(train_padded, np.array(y_train), validation_data=(val_padded, np.array(y_val)), epochs=10, batch_size=32, callbacks=[early_stopping])
240
 
241
  accuracy = history.history['accuracy'][-1]
242
  print(f"Accuracy: {accuracy}")