Spaces:

Yhhxhfh
/

Ghgg

Build error

App Files Files Community

Yhhxhfh commited on about 19 hours ago

Commit

5f33166

•

1 Parent(s): 32d6875

Update app.py

Browse files

Files changed (1) hide show

app.py +15 -16

app.py CHANGED Viewed

@@ -21,7 +21,8 @@ from fastapi.responses import HTMLResponse
 from pydantic import BaseModel
 from dotenv import load_dotenv
 from datetime import datetime
-from kareas_nlp import TextProcessor
 load_dotenv()
@@ -48,7 +49,6 @@ def create_intents_json():
             }
         ]
     }
     with open('intents.json', 'w') as f:
         json.dump(intents, f, ensure_ascii=False, indent=4)
@@ -56,10 +56,7 @@ def load_and_filter_data():
     with open("intents.json") as file:
         intents = json.load(file)
-    filtered_intents = {
-        "intents": []
-    }
     for intent in intents['intents']:
         if "date" in intent:
             intent_date = datetime.strptime(intent["date"], "%Y-%m-%d")
@@ -76,20 +73,19 @@ async def train_and_save_model():
     while True:
         words, classes, documents = [], [], []
         ignore_words = ['?', '!']
         intents = load_and_filter_data()
         user_questions = r.lrange('user_questions', 0, -1)
         for question in user_questions:
             question = question.decode('utf-8')
-            processed_words = TextProcessor().process(question)
             documents.append((processed_words, "user_question"))
             words.extend(processed_words)
         for intent in intents['intents']:
             for pattern in intent['patterns']:
-                processed_words = TextProcessor().process(pattern)
                 documents.append((processed_words, intent['tag']))
                 words.extend(processed_words)
                 if intent['tag'] not in classes:
@@ -99,7 +95,7 @@ async def train_and_save_model():
             for pattern in intent['patterns']:
                 synonyms = generate_synonyms(pattern)
                 for synonym in synonyms:
-                    processed_words = TextProcessor().process(synonym)
                     documents.append((processed_words, intent['tag']))
                     words.extend(processed_words)
@@ -125,6 +121,11 @@ async def train_and_save_model():
         train_x = np.array([row[0] for row in training])
         train_y = np.array([row[1] for row in training])
         if r.exists('chatbot_model'):
             with tempfile.NamedTemporaryFile(delete=False, suffix='.h5') as temp_file:
                 temp_file.write(r.get('chatbot_model'))
@@ -132,7 +133,7 @@ async def train_and_save_model():
             model = load_model(temp_file_name)
             os.remove(temp_file.name)
         else:
-            input_layer = Input(shape=(len(train_x[0]),))
             layer1 = Dense(128, activation='relu')(input_layer)
             layer2 = Dropout(0.5)(layer1)
             layer3 = Dense(64, activation='relu')(layer2)
@@ -143,7 +144,7 @@ async def train_and_save_model():
             sgd = SGD(learning_rate=0.01, momentum=0.9, nesterov=True)
             model.compile(loss='categorical_crossentropy', optimizer=sgd, metrics=['accuracy'])
-        model.fit(train_x, train_y, epochs=1, batch_size=len(train_x), verbose=0)
         r.set('words', pickle.dumps(words))
         r.set('classes', pickle.dumps(classes))
@@ -183,7 +184,7 @@ async def chat(message: ChatMessage):
         model = load_model(temp_file_name)
         os.remove(temp_file.name)
-    sentence_words = TextProcessor().process(message.message)
     bag = [0] * len(words)
     for s in sentence_words:
         for i, w in enumerate(words):
@@ -302,7 +303,5 @@ async def root():
 if __name__ == "__main__":
     print("Iniciando la aplicación...")
     create_intents_json()
-    initialize_redis()
-    training_process = multiprocessing.Process(target=train_and_save_model)
-    training_process.start()
     uvicorn.run(app, host="0.0.0.0", port=7860)

 from pydantic import BaseModel
 from dotenv import load_dotenv
 from datetime import datetime
+from sklearn.feature_extraction.text import CountVectorizer
+from sklearn.model_selection import train_test_split
 load_dotenv()
             }
         ]
     }
     with open('intents.json', 'w') as f:
         json.dump(intents, f, ensure_ascii=False, indent=4)
     with open("intents.json") as file:
         intents = json.load(file)
+    filtered_intents = {"intents": []}
     for intent in intents['intents']:
         if "date" in intent:
             intent_date = datetime.strptime(intent["date"], "%Y-%m-%d")
     while True:
         words, classes, documents = [], [], []
         ignore_words = ['?', '!']
         intents = load_and_filter_data()
         user_questions = r.lrange('user_questions', 0, -1)
         for question in user_questions:
             question = question.decode('utf-8')
+            processed_words = nltk.word_tokenize(question)
             documents.append((processed_words, "user_question"))
             words.extend(processed_words)
         for intent in intents['intents']:
             for pattern in intent['patterns']:
+                processed_words = nltk.word_tokenize(pattern)
                 documents.append((processed_words, intent['tag']))
                 words.extend(processed_words)
                 if intent['tag'] not in classes:
             for pattern in intent['patterns']:
                 synonyms = generate_synonyms(pattern)
                 for synonym in synonyms:
+                    processed_words = nltk.word_tokenize(synonym)
                     documents.append((processed_words, intent['tag']))
                     words.extend(processed_words)
         train_x = np.array([row[0] for row in training])
         train_y = np.array([row[1] for row in training])
+        vectorizer = CountVectorizer()
+        X = vectorizer.fit_transform([" ".join(doc[0]) for doc in documents]).toarray()
+        y = [classes.index(doc[1]) for doc in documents]
+        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
         if r.exists('chatbot_model'):
             with tempfile.NamedTemporaryFile(delete=False, suffix='.h5') as temp_file:
                 temp_file.write(r.get('chatbot_model'))
             model = load_model(temp_file_name)
             os.remove(temp_file.name)
         else:
+            input_layer = Input(shape=(len(X_train[0]),))
             layer1 = Dense(128, activation='relu')(input_layer)
             layer2 = Dropout(0.5)(layer1)
             layer3 = Dense(64, activation='relu')(layer2)
             sgd = SGD(learning_rate=0.01, momentum=0.9, nesterov=True)
             model.compile(loss='categorical_crossentropy', optimizer=sgd, metrics=['accuracy'])
+        model.fit(X_train, y_train, epochs=1, batch_size=len(X_train), verbose=0)
         r.set('words', pickle.dumps(words))
         r.set('classes', pickle.dumps(classes))
         model = load_model(temp_file_name)
         os.remove(temp_file.name)
+    sentence_words = nltk.word_tokenize(message.message)
     bag = [0] * len(words)
     for s in sentence_words:
         for i, w in enumerate(words):
 if __name__ == "__main__":
     print("Iniciando la aplicación...")
     create_intents_json()
+    asyncio.run(train_and_save_model())
     uvicorn.run(app, host="0.0.0.0", port=7860)