Yhhxhfh commited on
Commit
5f33166
1 Parent(s): 32d6875

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +15 -16
app.py CHANGED
@@ -21,7 +21,8 @@ from fastapi.responses import HTMLResponse
21
  from pydantic import BaseModel
22
  from dotenv import load_dotenv
23
  from datetime import datetime
24
- from kareas_nlp import TextProcessor
 
25
 
26
  load_dotenv()
27
 
@@ -48,7 +49,6 @@ def create_intents_json():
48
  }
49
  ]
50
  }
51
-
52
  with open('intents.json', 'w') as f:
53
  json.dump(intents, f, ensure_ascii=False, indent=4)
54
 
@@ -56,10 +56,7 @@ def load_and_filter_data():
56
  with open("intents.json") as file:
57
  intents = json.load(file)
58
 
59
- filtered_intents = {
60
- "intents": []
61
- }
62
-
63
  for intent in intents['intents']:
64
  if "date" in intent:
65
  intent_date = datetime.strptime(intent["date"], "%Y-%m-%d")
@@ -76,20 +73,19 @@ async def train_and_save_model():
76
  while True:
77
  words, classes, documents = [], [], []
78
  ignore_words = ['?', '!']
79
-
80
  intents = load_and_filter_data()
81
 
82
  user_questions = r.lrange('user_questions', 0, -1)
83
 
84
  for question in user_questions:
85
  question = question.decode('utf-8')
86
- processed_words = TextProcessor().process(question)
87
  documents.append((processed_words, "user_question"))
88
  words.extend(processed_words)
89
 
90
  for intent in intents['intents']:
91
  for pattern in intent['patterns']:
92
- processed_words = TextProcessor().process(pattern)
93
  documents.append((processed_words, intent['tag']))
94
  words.extend(processed_words)
95
  if intent['tag'] not in classes:
@@ -99,7 +95,7 @@ async def train_and_save_model():
99
  for pattern in intent['patterns']:
100
  synonyms = generate_synonyms(pattern)
101
  for synonym in synonyms:
102
- processed_words = TextProcessor().process(synonym)
103
  documents.append((processed_words, intent['tag']))
104
  words.extend(processed_words)
105
 
@@ -125,6 +121,11 @@ async def train_and_save_model():
125
  train_x = np.array([row[0] for row in training])
126
  train_y = np.array([row[1] for row in training])
127
 
 
 
 
 
 
128
  if r.exists('chatbot_model'):
129
  with tempfile.NamedTemporaryFile(delete=False, suffix='.h5') as temp_file:
130
  temp_file.write(r.get('chatbot_model'))
@@ -132,7 +133,7 @@ async def train_and_save_model():
132
  model = load_model(temp_file_name)
133
  os.remove(temp_file.name)
134
  else:
135
- input_layer = Input(shape=(len(train_x[0]),))
136
  layer1 = Dense(128, activation='relu')(input_layer)
137
  layer2 = Dropout(0.5)(layer1)
138
  layer3 = Dense(64, activation='relu')(layer2)
@@ -143,7 +144,7 @@ async def train_and_save_model():
143
  sgd = SGD(learning_rate=0.01, momentum=0.9, nesterov=True)
144
  model.compile(loss='categorical_crossentropy', optimizer=sgd, metrics=['accuracy'])
145
 
146
- model.fit(train_x, train_y, epochs=1, batch_size=len(train_x), verbose=0)
147
 
148
  r.set('words', pickle.dumps(words))
149
  r.set('classes', pickle.dumps(classes))
@@ -183,7 +184,7 @@ async def chat(message: ChatMessage):
183
  model = load_model(temp_file_name)
184
  os.remove(temp_file.name)
185
 
186
- sentence_words = TextProcessor().process(message.message)
187
  bag = [0] * len(words)
188
  for s in sentence_words:
189
  for i, w in enumerate(words):
@@ -302,7 +303,5 @@ async def root():
302
  if __name__ == "__main__":
303
  print("Iniciando la aplicación...")
304
  create_intents_json()
305
- initialize_redis()
306
- training_process = multiprocessing.Process(target=train_and_save_model)
307
- training_process.start()
308
  uvicorn.run(app, host="0.0.0.0", port=7860)
 
21
  from pydantic import BaseModel
22
  from dotenv import load_dotenv
23
  from datetime import datetime
24
+ from sklearn.feature_extraction.text import CountVectorizer
25
+ from sklearn.model_selection import train_test_split
26
 
27
  load_dotenv()
28
 
 
49
  }
50
  ]
51
  }
 
52
  with open('intents.json', 'w') as f:
53
  json.dump(intents, f, ensure_ascii=False, indent=4)
54
 
 
56
  with open("intents.json") as file:
57
  intents = json.load(file)
58
 
59
+ filtered_intents = {"intents": []}
 
 
 
60
  for intent in intents['intents']:
61
  if "date" in intent:
62
  intent_date = datetime.strptime(intent["date"], "%Y-%m-%d")
 
73
  while True:
74
  words, classes, documents = [], [], []
75
  ignore_words = ['?', '!']
 
76
  intents = load_and_filter_data()
77
 
78
  user_questions = r.lrange('user_questions', 0, -1)
79
 
80
  for question in user_questions:
81
  question = question.decode('utf-8')
82
+ processed_words = nltk.word_tokenize(question)
83
  documents.append((processed_words, "user_question"))
84
  words.extend(processed_words)
85
 
86
  for intent in intents['intents']:
87
  for pattern in intent['patterns']:
88
+ processed_words = nltk.word_tokenize(pattern)
89
  documents.append((processed_words, intent['tag']))
90
  words.extend(processed_words)
91
  if intent['tag'] not in classes:
 
95
  for pattern in intent['patterns']:
96
  synonyms = generate_synonyms(pattern)
97
  for synonym in synonyms:
98
+ processed_words = nltk.word_tokenize(synonym)
99
  documents.append((processed_words, intent['tag']))
100
  words.extend(processed_words)
101
 
 
121
  train_x = np.array([row[0] for row in training])
122
  train_y = np.array([row[1] for row in training])
123
 
124
+ vectorizer = CountVectorizer()
125
+ X = vectorizer.fit_transform([" ".join(doc[0]) for doc in documents]).toarray()
126
+ y = [classes.index(doc[1]) for doc in documents]
127
+ X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
128
+
129
  if r.exists('chatbot_model'):
130
  with tempfile.NamedTemporaryFile(delete=False, suffix='.h5') as temp_file:
131
  temp_file.write(r.get('chatbot_model'))
 
133
  model = load_model(temp_file_name)
134
  os.remove(temp_file.name)
135
  else:
136
+ input_layer = Input(shape=(len(X_train[0]),))
137
  layer1 = Dense(128, activation='relu')(input_layer)
138
  layer2 = Dropout(0.5)(layer1)
139
  layer3 = Dense(64, activation='relu')(layer2)
 
144
  sgd = SGD(learning_rate=0.01, momentum=0.9, nesterov=True)
145
  model.compile(loss='categorical_crossentropy', optimizer=sgd, metrics=['accuracy'])
146
 
147
+ model.fit(X_train, y_train, epochs=1, batch_size=len(X_train), verbose=0)
148
 
149
  r.set('words', pickle.dumps(words))
150
  r.set('classes', pickle.dumps(classes))
 
184
  model = load_model(temp_file_name)
185
  os.remove(temp_file.name)
186
 
187
+ sentence_words = nltk.word_tokenize(message.message)
188
  bag = [0] * len(words)
189
  for s in sentence_words:
190
  for i, w in enumerate(words):
 
303
  if __name__ == "__main__":
304
  print("Iniciando la aplicación...")
305
  create_intents_json()
306
+ asyncio.run(train_and_save_model())
 
 
307
  uvicorn.run(app, host="0.0.0.0", port=7860)