Update app.py
Browse files
app.py
CHANGED
@@ -4,13 +4,16 @@ import numpy as np
|
|
4 |
import tensorflow as tf
|
5 |
from google.cloud import storage
|
6 |
from tensorflow import keras
|
7 |
-
from
|
|
|
8 |
from keras.callbacks import EarlyStopping
|
9 |
from sklearn.model_selection import train_test_split
|
10 |
from dotenv import load_dotenv
|
11 |
import random
|
12 |
import nltk
|
13 |
from nltk.corpus import wordnet
|
|
|
|
|
14 |
|
15 |
nltk.download('punkt')
|
16 |
nltk.download('wordnet')
|
@@ -174,6 +177,7 @@ def expand_intent(intent):
|
|
174 |
expanded_intents.append(new_intent)
|
175 |
return expanded_intents
|
176 |
|
|
|
177 |
num_intents = 100000
|
178 |
intents = generate_intents(num_intents)
|
179 |
sentences = generate_sentences(2000)
|
@@ -208,25 +212,31 @@ labels = [1] * len(intents)
|
|
208 |
|
209 |
X_train, X_val, y_train, y_val = train_test_split(intents, labels, test_size=0.2, random_state=42)
|
210 |
|
211 |
-
tokenizer =
|
|
|
212 |
|
213 |
-
|
214 |
-
|
215 |
|
216 |
-
|
|
|
|
|
217 |
|
218 |
-
|
219 |
-
|
220 |
-
bert_output = bert_model([input_ids, attention_mask])[1]
|
221 |
-
dropout = keras.layers.Dropout(0.1)(bert_output)
|
222 |
-
output = keras.layers.Dense(1, activation='sigmoid')(dropout)
|
223 |
|
224 |
-
|
|
|
|
|
|
|
|
|
225 |
|
226 |
-
model
|
227 |
|
|
|
228 |
early_stopping = EarlyStopping(monitor='val_loss', patience=3, restore_best_weights=True)
|
229 |
-
|
|
|
230 |
|
231 |
accuracy = history.history['accuracy'][-1]
|
232 |
print(f"Accuracy: {accuracy}")
|
|
|
4 |
import tensorflow as tf
|
5 |
from google.cloud import storage
|
6 |
from tensorflow import keras
|
7 |
+
from keras.layers import Embedding, LSTM, Dense, Dropout, Bidirectional, Input
|
8 |
+
from keras.models import Model
|
9 |
from keras.callbacks import EarlyStopping
|
10 |
from sklearn.model_selection import train_test_split
|
11 |
from dotenv import load_dotenv
|
12 |
import random
|
13 |
import nltk
|
14 |
from nltk.corpus import wordnet
|
15 |
+
from keras.preprocessing.text import Tokenizer
|
16 |
+
from keras.preprocessing.sequence import pad_sequences
|
17 |
|
18 |
nltk.download('punkt')
|
19 |
nltk.download('wordnet')
|
|
|
177 |
expanded_intents.append(new_intent)
|
178 |
return expanded_intents
|
179 |
|
180 |
+
|
181 |
num_intents = 100000
|
182 |
intents = generate_intents(num_intents)
|
183 |
sentences = generate_sentences(2000)
|
|
|
212 |
|
213 |
X_train, X_val, y_train, y_val = train_test_split(intents, labels, test_size=0.2, random_state=42)
|
214 |
|
215 |
+
tokenizer = Tokenizer(num_words=5000, oov_token="<OOV>")
|
216 |
+
tokenizer.fit_on_texts(X_train)
|
217 |
|
218 |
+
train_sequences = tokenizer.texts_to_sequences(X_train)
|
219 |
+
val_sequences = tokenizer.texts_to_sequences(X_val)
|
220 |
|
221 |
+
max_length = 128
|
222 |
+
train_padded = pad_sequences(train_sequences, maxlen=max_length, padding="post", truncating="post")
|
223 |
+
val_padded = pad_sequences(val_sequences, maxlen=max_length, padding="post", truncating="post")
|
224 |
|
225 |
+
vocab_size = len(tokenizer.word_index) + 1
|
226 |
+
embedding_dim = 100
|
|
|
|
|
|
|
227 |
|
228 |
+
input_layer = Input(shape=(max_length,))
|
229 |
+
embedding_layer = Embedding(vocab_size, embedding_dim)(input_layer)
|
230 |
+
lstm_layer = Bidirectional(LSTM(64))(embedding_layer)
|
231 |
+
dropout_layer = Dropout(0.5)(lstm_layer)
|
232 |
+
output_layer = Dense(1, activation='sigmoid')(dropout_layer)
|
233 |
|
234 |
+
model = Model(inputs=input_layer, outputs=output_layer)
|
235 |
|
236 |
+
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
|
237 |
early_stopping = EarlyStopping(monitor='val_loss', patience=3, restore_best_weights=True)
|
238 |
+
|
239 |
+
model.fit(train_padded, np.array(y_train), validation_data=(val_padded, np.array(y_val)), epochs=10, batch_size=32, callbacks=[early_stopping])
|
240 |
|
241 |
accuracy = history.history['accuracy'][-1]
|
242 |
print(f"Accuracy: {accuracy}")
|