asasasText-servicekdjdjjd

Runtime error

App Files Files Community

Yhhxhfh commited on Sep 29

Commit

f8544e9

•

1 Parent(s): 6133a63

Update app.py

Browse files

Files changed (1) hide show

app.py +39 -28

app.py CHANGED Viewed

@@ -1,6 +1,6 @@
 import os
 import gc
-import tempfile
 from llama_cpp import Llama
 from concurrent.futures import ThreadPoolExecutor, as_completed
 import gradio as gr
@@ -8,30 +8,30 @@ from fastapi import FastAPI, Request, HTTPException
 from fastapi.responses import JSONResponse
 from tqdm import tqdm
 from dotenv import load_dotenv
-from functools import lru_cache
-import urllib3
-urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
-os.system("pip install llama-cpp-python")
 app = FastAPI()
-load_dotenv()
 HUGGINGFACE_TOKEN = os.getenv("HUGGINGFACE_TOKEN")
 global_data = {
     'model_configs': [
-        {"repo_id": "Ffftdtd5dtft/gpt2-xl-Q2_K-GGUF", "filename": "gpt2-xl-q2_k.gguf", "name": "GPT-2 XL"},
-        {"repo_id": "Ffftdtd5dtft/gemma-2-27b-Q2_K-GGUF", "filename": "gemma-2-27b-q2_k.gguf", "name": "Gemma 2-27B"},
-        {"repo_id": "Ffftdtd5dtft/Phi-3-mini-128k-instruct-Q2_K-GGUF", "filename": "phi-3-mini-128k-instruct-q2_k.gguf", "name": "Phi-3 Mini 128K Instruct"},
-        {"repo_id": "Ffftdtd5dtft/starcoder2-3b-Q2_K-GGUF", "filename": "starcoder2-3b-q2_k.gguf", "name": "Starcoder2 3B"},
-        {"repo_id": "Ffftdtd5dtft/Qwen2-1.5B-Instruct-Q2_K-GGUF", "filename": "qwen2-1.5b-instruct-q2_k.gguf", "name": "Qwen2 1.5B Instruct"},
-        {"repo_id": "Ffftdtd5dtft/Mistral-Nemo-Instruct-2407-Q2_K-GGUF", "filename": "mistral-nemo-instruct-2407-q2_k.gguf", "name": "Mistral Nemo Instruct 2407"},
-        {"repo_id": "Ffftdtd5dtft/Phi-3-mini-128k-instruct-IQ2_XXS-GGUF", "filename": "phi-3-mini-128k-instruct-iq2_xxs-imat.gguf", "name": "Phi 3 Mini 128K Instruct XXS"},
-        {"repo_id": "Ffftdtd5dtft/TinyLlama-1.1B-Chat-v1.0-IQ1_S-GGUF", "filename": "tinyllama-1.1b-chat-v1.0-iq1_s-imat.gguf", "name": "TinyLlama 1.1B Chat"},
-        {"repo_id": "Ffftdtd5dtft/Meta-Llama-3.1-8B-Q2_K-GGUF", "filename": "meta-llama-3.1-8b-q2_k.gguf", "name": "Meta Llama 3.1-8B"},
-        {"repo_id": "Ffftdtd5dtft/codegemma-2b-IQ1_S-GGUF", "filename": "codegemma-2b-iq1_s-imat.gguf", "name": "Codegemma 2B"},
-    ]
 }
 class ModelManager:
@@ -49,18 +49,14 @@ class ModelManager:
         model_name = model_config['name']
         if model_name not in self.models:
             try:
-                tempdir = tempfile.TemporaryDirectory()
-                filepath = os.path.join(tempdir.name, model_config['filename'])
-                model = Llama.from_pretrained(repo_id=model_config['repo_id'], filename=model_config['filename'], use_auth_token=HUGGINGFACE_TOKEN)
                 self.models[model_name] = model
-                model.model.model_path = filepath
             except Exception as e:
-                print(f"Error loading {model_name}: {e}")
                 self.models[model_name] = None
             finally:
                 gc.collect()
-    def get_model(self, model_name):
         return self.models.get(model_name)
 model_manager = ModelManager()
@@ -68,15 +64,17 @@ model_manager = ModelManager()
 class ChatRequest(BaseModel):
     message: str
-@lru_cache(maxsize=128)
-def generate_model_response(model, inputs):
     try:
         response = model(inputs, max_tokens=150)
         return response['choices'][0]['text']
     except Exception as e:
         return f"Error: Could not generate a response. Details: {e}"
-async def process_message(message):
     inputs = message.strip()
     responses = {}
@@ -84,10 +82,23 @@ async def process_message(message):
         futures = [executor.submit(generate_model_response, model_manager.get_model(config['name']), inputs) for config in global_data['model_configs'] if model_manager.get_model(config['name'])]
         for i, future in enumerate(tqdm(as_completed(futures), total=len([f for f in futures]), desc="Generating responses")):
             model_name = global_data['model_configs'][i]['name']
-            responses[model_name] = future.result()
     return "\n\n".join([f"**{model}:**\n{response}" for model, response in responses.items()])
 @app.post("/generate_multimodel")
 async def api_generate_multimodel(request: Request):
     try:

 import os
 import gc
+import io
 from llama_cpp import Llama
 from concurrent.futures import ThreadPoolExecutor, as_completed
 import gradio as gr
 from fastapi.responses import JSONResponse
 from tqdm import tqdm
 from dotenv import load_dotenv
+from pydantic import BaseModel
+import asyncio
+load_dotenv()
+os.system("pip install --upgrade llama-cpp-python")
 app = FastAPI()
 HUGGINGFACE_TOKEN = os.getenv("HUGGINGFACE_TOKEN")
 global_data = {
     'model_configs': [
+        {"repo_id": "Ffftdtd5dtft/gpt2-xl-Q2_K-GGUF", "name": "GPT-2 XL"},
+        {"repo_id": "Ffftdtd5dtft/gemma-2-27b-Q2_K-GGUF", "name": "Gemma 2-27B"},
+        {"repo_id": "Ffftdtd5dtft/Phi-3-mini-128k-instruct-Q2_K-GGUF", "name": "Phi-3 Mini 128K Instruct"},
+        {"repo_id": "Ffftdtd5dtft/starcoder2-3b-Q2_K-GGUF", "name": "Starcoder2 3B"},
+        {"repo_id": "Ffftdtd5dtft/Qwen2-1.5B-Instruct-Q2_K-GGUF", "name": "Qwen2 1.5B Instruct"},
+        {"repo_id": "Ffftdtd5dtft/Mistral-Nemo-Instruct-2407-Q2_K-GGUF", "name": "Mistral Nemo Instruct 2407"},
+        {"repo_id": "Ffftdtd5dtft/Phi-3-mini-128k-instruct-IQ2_XXS-GGUF", "name": "Phi 3 Mini 128K Instruct XXS"},
+        {"repo_id": "Ffftdtd5dtft/TinyLlama-1.1B-Chat-v1.0-IQ1_S-GGUF", "name": "TinyLlama 1.1B Chat"},
+        {"repo_id": "Ffftdtd5dtft/Meta-Llama-3.1-8B-Q2_K-GGUF", "name": "Meta Llama 3.1-8B"},
+        {"repo_id": "Ffftdtd5dtft/codegemma-2b-IQ1_S-GGUF", "name": "Codegemma 2B"},
+    ],
+    'training_data': io.StringIO(),
+    'auto_train_threshold': 10
 }
 class ModelManager:
         model_name = model_config['name']
         if model_name not in self.models:
             try:
+                model = Llama.from_pretrained(repo_id=model_config['repo_id'], use_auth_token=HUGGINGFACE_TOKEN)
                 self.models[model_name] = model
             except Exception as e:
                 self.models[model_name] = None
             finally:
                 gc.collect()
+    def get_model(self, model_name: str):
         return self.models.get(model_name)
 model_manager = ModelManager()
 class ChatRequest(BaseModel):
     message: str
+async def generate_model_response(model, inputs: str) -> str:
     try:
         response = model(inputs, max_tokens=150)
         return response['choices'][0]['text']
     except Exception as e:
         return f"Error: Could not generate a response. Details: {e}"
+interaction_count = 0
+async def process_message(message: str) -> str:
+    global interaction_count
     inputs = message.strip()
     responses = {}
         futures = [executor.submit(generate_model_response, model_manager.get_model(config['name']), inputs) for config in global_data['model_configs'] if model_manager.get_model(config['name'])]
         for i, future in enumerate(tqdm(as_completed(futures), total=len([f for f in futures]), desc="Generating responses")):
             model_name = global_data['model_configs'][i]['name']
+            responses[model_name] = await future
+    interaction_count += 1
+    if interaction_count >= global_data['auto_train_threshold']:
+        await auto_train_model()
+        interaction_count = 0
     return "\n\n".join([f"**{model}:**\n{response}" for model, response in responses.items()])
+async def auto_train_model():
+    training_data_content = global_data['training_data'].getvalue()
+    if training_data_content:
+        print("Auto training model with the following data:")
+        print(training_data_content)
+        await asyncio.sleep(1)
 @app.post("/generate_multimodel")
 async def api_generate_multimodel(request: Request):
     try: