Spaces:

Yhhxhfh
/

Ghgg

Build error

App Files Files Community

Yhhxhfh commited on 1 day ago

Commit

a5501b7

•

1 Parent(s): 6895902

Update app.py

Browse files

Files changed (1) hide show

app.py +37 -18

app.py CHANGED Viewed

@@ -2,9 +2,9 @@ import uvicorn
 import nltk
 nltk.download('punkt')
 nltk.download('wordnet')
-nltk.download('punkt_tab')
 nltk.download('omw-1.4')
 nltk.download('averaged_perceptron_tagger')
 from nltk.stem import WordNetLemmatizer
 from nltk.corpus import wordnet
 from tqdm import tqdm
@@ -16,6 +16,8 @@ import random
 import asyncio
 import concurrent.futures
 import multiprocessing
 import numpy as np
 from tensorflow.keras import Sequential
@@ -39,6 +41,7 @@ lemmatizer = WordNetLemmatizer()
 redis_password = os.getenv("REDIS_PASSWORD")
 r = redis.Redis(host=os.getenv("REDIS_HOST"), port=int(os.getenv("REDIS_PORT")), password=redis_password)
 def initialize_redis():
     global r
     try:
@@ -48,6 +51,7 @@ def initialize_redis():
         print("Error connecting to Redis. Exiting.")
         exit(1)
 async def train_and_save_model():
     global lemmatizer, r
     while True:
@@ -56,16 +60,19 @@ async def train_and_save_model():
         documents = []
         ignore_words = ['?', '!']
-        try:
-            with open('intents.json') as file:
-                intents = json.load(file)
-        except FileNotFoundError:
-            intents = {"intents": []}
-            with open('intents.json', 'w') as file:
-                json.dump(intents, file, indent=4)
-            print("intents.json created. Please populate it with training data.")
-            await asyncio.sleep(60)
-            continue
         print("Loading user questions from Redis...")
         if not r.exists('user_questions_loaded'):
@@ -83,7 +90,7 @@ async def train_and_save_model():
                         classes.append("unknown")
             r.set('user_questions_loaded', 1)
-        print("Processing intents from intents.json...")
         for intent in intents['intents']:
             for pattern in intent['patterns']:
                 w = nltk.word_tokenize(pattern)
@@ -134,8 +141,9 @@ async def train_and_save_model():
         train_y = np.array([row[1] for row in training])
         print("Loading or creating model...")
-        if os.path.exists('chatbot_model.h5'):
-            model = load_model('chatbot_model.h5')
         else:
             input_layer = Input(shape=(len(train_x[0]),))
             layer1 = Dense(128, activation='relu')(input_layer)
@@ -150,16 +158,19 @@ async def train_and_save_model():
             model.compile(loss='categorical_crossentropy', optimizer=sgd, metrics=['accuracy'])
         print("Training the model...")
-        model.fit(train_x, train_y, epochs=1, batch_size=len(train_x), verbose=0, callbacks=[TqdmCallback(verbose=2)])
         print("Saving data to Redis...")
         r.set('words', pickle.dumps(words))
         r.set('classes', pickle.dumps(classes))
-        save_model(model, 'chatbot_model.h5')
         print("Data and model saved. Re-training...")
 def generate_synonym_pattern(patterns):
     new_pattern = []
     for word in random.choice(patterns).split():
@@ -171,19 +182,23 @@ def generate_synonym_pattern(patterns):
             new_pattern.append(word)
     return " ".join(new_pattern)
 def start_training_loop():
     loop = asyncio.new_event_loop()
     asyncio.set_event_loop(loop)
     loop.run_until_complete(train_and_save_model())
 class ChatMessage(BaseModel):
     message: str
 @app.post("/chat")
 async def chat(message: ChatMessage):
     words = pickle.loads(r.get('words'))
     classes = pickle.loads(r.get('classes'))
-    model = load_model('chatbot_model.h5')
     sentence_words = nltk.word_tokenize(message.message)
     sentence_words = [lemmatizer.lemmatize(word.lower()) for word in sentence_words]
@@ -206,11 +221,13 @@ async def chat(message: ChatMessage):
     return return_list
 @app.post("/tag")
 async def tag_question(question: str, tag: str):
     r.set(f"tag:{question}", tag)
     return {"message": "Tag saved"}
 html_code = """
 <!DOCTYPE html>
 <html>
@@ -308,9 +325,11 @@ html_code = """
 </html>
 """
 @app.get("/", response_class=HTMLResponse)
 async def root():
-  return html_code
 if __name__ == "__main__":
     initialize_redis()

 import nltk
 nltk.download('punkt')
 nltk.download('wordnet')
 nltk.download('omw-1.4')
 nltk.download('averaged_perceptron_tagger')
+nltk.download('punkt_tab')  # Download the punkt_tab resource
 from nltk.stem import WordNetLemmatizer
 from nltk.corpus import wordnet
 from tqdm import tqdm
 import asyncio
 import concurrent.futures
 import multiprocessing
+import io
+import os
 import numpy as np
 from tensorflow.keras import Sequential
 redis_password = os.getenv("REDIS_PASSWORD")
 r = redis.Redis(host=os.getenv("REDIS_HOST"), port=int(os.getenv("REDIS_PORT")), password=redis_password)
 def initialize_redis():
     global r
     try:
         print("Error connecting to Redis. Exiting.")
         exit(1)
 async def train_and_save_model():
     global lemmatizer, r
     while True:
         documents = []
         ignore_words = ['?', '!']
+        # Check if intents exist in Redis, otherwise load from local file and upload
+        if not r.exists('intents'):
+            if os.path.exists('intents.json'):
+                with open('intents.json') as f:
+                    intents = json.load(f)
+                r.set('intents', json.dumps(intents))
+                print("Intents loaded from local file and uploaded to Redis.")
+            else:
+                intents = {"intents": []}
+                r.set('intents', json.dumps(intents))
+                print("intents.json not found locally, creating empty intents in Redis.")
+        else:
+            intents = json.loads(r.get('intents'))
         print("Loading user questions from Redis...")
         if not r.exists('user_questions_loaded'):
                         classes.append("unknown")
             r.set('user_questions_loaded', 1)
+        print("Processing intents from Redis...")
         for intent in intents['intents']:
             for pattern in intent['patterns']:
                 w = nltk.word_tokenize(pattern)
         train_y = np.array([row[1] for row in training])
         print("Loading or creating model...")
+        if r.exists('chatbot_model'):
+            with io.BytesIO(r.get('chatbot_model')) as f:
+                model = load_model(f)
         else:
             input_layer = Input(shape=(len(train_x[0]),))
             layer1 = Dense(128, activation='relu')(input_layer)
             model.compile(loss='categorical_crossentropy', optimizer=sgd, metrics=['accuracy'])
         print("Training the model...")
+        model.fit(train_x, train_y, epochs=1, batch_size=len(train_x), verbose=0, callbacks=[TqdmCallback(verbose=2)])
         print("Saving data to Redis...")
         r.set('words', pickle.dumps(words))
         r.set('classes', pickle.dumps(classes))
+        with io.BytesIO() as f:
+            save_model(model, f)
+            r.set('chatbot_model', f.getvalue())
         print("Data and model saved. Re-training...")
 def generate_synonym_pattern(patterns):
     new_pattern = []
     for word in random.choice(patterns).split():
             new_pattern.append(word)
     return " ".join(new_pattern)
 def start_training_loop():
     loop = asyncio.new_event_loop()
     asyncio.set_event_loop(loop)
     loop.run_until_complete(train_and_save_model())
 class ChatMessage(BaseModel):
     message: str
 @app.post("/chat")
 async def chat(message: ChatMessage):
     words = pickle.loads(r.get('words'))
     classes = pickle.loads(r.get('classes'))
+    with io.BytesIO(r.get('chatbot_model')) as f:
+        model = load_model(f)
     sentence_words = nltk.word_tokenize(message.message)
     sentence_words = [lemmatizer.lemmatize(word.lower()) for word in sentence_words]
     return return_list
 @app.post("/tag")
 async def tag_question(question: str, tag: str):
     r.set(f"tag:{question}", tag)
     return {"message": "Tag saved"}
 html_code = """
 <!DOCTYPE html>
 <html>
 </html>
 """
 @app.get("/", response_class=HTMLResponse)
 async def root():
+    return html_code
 if __name__ == "__main__":
     initialize_redis()