ierhon commited on
Commit
e75ea50
·
1 Parent(s): 2d971fe

Update train.py

Browse files

Create the model

Files changed (1) hide show
  1. train.py +6 -3
train.py CHANGED
@@ -6,11 +6,11 @@ from keras.layers import Embedding, Dense, Dropout, Flatten, PReLU
6
  from keras.preprocessing.text import Tokenizer
7
  from keras_self_attention import SeqSelfAttention, SeqWeightedAttention
8
 
9
- with open("dataset.json", "r") as f:
10
  dset = json.load(f)
11
 
12
  dset_size = len(dset)
13
- tokenizer = Tokenizer()
14
  tokenizer.fit_on_texts(list(dset.keys()))
15
 
16
  emb_size = 128 # how big are the word vectors in the input (how much information can be fit into one word)
@@ -21,8 +21,11 @@ model = Sequential()
21
  model.add(Embedding(input_dim=vocab_size, output_dim=emb_size, input_length=inp_len))
22
  model.add(SeqSelfAttention()) # an ATTENTION LAYER makes the model LEARN the MAIN INFORMATION in the text, AND NOT the TEXT ITSELF
23
  model.add(Flatten()) # SelfAttention and the embedding layer outputs a 2D array, it's a list of words with a list of numbers for each word
24
- model.add(Dense(1024, activation="relu"))
25
  model.add(Dropout(0.5)) # dropout makes ___ task harder __ removing ____ information, 0.5 means delete 50% (it resets neurons to 0 so the model will truly focus on what's important, and not learn on some data that's there by accident)
 
 
 
26
  model.add(Dense(dset_size, activation="linear")) # TBH it doesn't matter that much what activation function to use, just linear does nothing at all to the output, that might be something like softmax but i'll test that later
27
 
28
  model.save("chatbot.keras") # It's obvious what it does, saves the model to a file
 
6
  from keras.preprocessing.text import Tokenizer
7
  from keras_self_attention import SeqSelfAttention, SeqWeightedAttention
8
 
9
+ with open("dataset.json", "r") as f: # TODO: move the outputs into a separate file, so it would be "key": 0, "key2": 1 etc
10
  dset = json.load(f)
11
 
12
  dset_size = len(dset)
13
+ tokenizer = Tokenizer() # a tokenizer is a thing to split text into words, it might have some other stuff like making all the letters lowercase, etc.
14
  tokenizer.fit_on_texts(list(dset.keys()))
15
 
16
  emb_size = 128 # how big are the word vectors in the input (how much information can be fit into one word)
 
21
  model.add(Embedding(input_dim=vocab_size, output_dim=emb_size, input_length=inp_len))
22
  model.add(SeqSelfAttention()) # an ATTENTION LAYER makes the model LEARN the MAIN INFORMATION in the text, AND NOT the TEXT ITSELF
23
  model.add(Flatten()) # SelfAttention and the embedding layer outputs a 2D array, it's a list of words with a list of numbers for each word
24
+ model.add(Dense(1024, activation="relu")) # 1024 relu neurons, why? 2 to the power of 10 is 1024 and I'm a fan of ReLU, it's double-fast (fast training and fast to compute function, no division, square roots or powers, just (x>0)*x ) and overall cool
25
  model.add(Dropout(0.5)) # dropout makes ___ task harder __ removing ____ information, 0.5 means delete 50% (it resets neurons to 0 so the model will truly focus on what's important, and not learn on some data that's there by accident)
26
+ model.add(Dense(512, activation="relu"))
27
+ model.add(Dense(512, activation="relu"))
28
+ model.add(Dense(256, activation="relu"))
29
  model.add(Dense(dset_size, activation="linear")) # TBH it doesn't matter that much what activation function to use, just linear does nothing at all to the output, that might be something like softmax but i'll test that later
30
 
31
  model.save("chatbot.keras") # It's obvious what it does, saves the model to a file