Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
@@ -1,6 +1,6 @@
|
|
1 |
import os
|
2 |
import gc
|
3 |
-
import
|
4 |
from llama_cpp import Llama
|
5 |
from concurrent.futures import ThreadPoolExecutor, as_completed
|
6 |
import gradio as gr
|
@@ -8,30 +8,30 @@ from fastapi import FastAPI, Request, HTTPException
|
|
8 |
from fastapi.responses import JSONResponse
|
9 |
from tqdm import tqdm
|
10 |
from dotenv import load_dotenv
|
11 |
-
from
|
12 |
-
import
|
13 |
|
14 |
-
|
15 |
-
|
16 |
-
os.system("pip install llama-cpp-python")
|
17 |
|
18 |
app = FastAPI()
|
19 |
-
load_dotenv()
|
20 |
HUGGINGFACE_TOKEN = os.getenv("HUGGINGFACE_TOKEN")
|
21 |
|
22 |
global_data = {
|
23 |
'model_configs': [
|
24 |
-
{"repo_id": "Ffftdtd5dtft/gpt2-xl-Q2_K-GGUF", "
|
25 |
-
{"repo_id": "Ffftdtd5dtft/gemma-2-27b-Q2_K-GGUF", "
|
26 |
-
{"repo_id": "Ffftdtd5dtft/Phi-3-mini-128k-instruct-Q2_K-GGUF", "
|
27 |
-
{"repo_id": "Ffftdtd5dtft/starcoder2-3b-Q2_K-GGUF", "
|
28 |
-
{"repo_id": "Ffftdtd5dtft/Qwen2-1.5B-Instruct-Q2_K-GGUF", "
|
29 |
-
{"repo_id": "Ffftdtd5dtft/Mistral-Nemo-Instruct-2407-Q2_K-GGUF", "
|
30 |
-
{"repo_id": "Ffftdtd5dtft/Phi-3-mini-128k-instruct-IQ2_XXS-GGUF", "
|
31 |
-
{"repo_id": "Ffftdtd5dtft/TinyLlama-1.1B-Chat-v1.0-IQ1_S-GGUF", "
|
32 |
-
{"repo_id": "Ffftdtd5dtft/Meta-Llama-3.1-8B-Q2_K-GGUF", "
|
33 |
-
{"repo_id": "Ffftdtd5dtft/codegemma-2b-IQ1_S-GGUF", "
|
34 |
-
]
|
|
|
|
|
35 |
}
|
36 |
|
37 |
class ModelManager:
|
@@ -49,18 +49,14 @@ class ModelManager:
|
|
49 |
model_name = model_config['name']
|
50 |
if model_name not in self.models:
|
51 |
try:
|
52 |
-
|
53 |
-
filepath = os.path.join(tempdir.name, model_config['filename'])
|
54 |
-
model = Llama.from_pretrained(repo_id=model_config['repo_id'], filename=model_config['filename'], use_auth_token=HUGGINGFACE_TOKEN)
|
55 |
self.models[model_name] = model
|
56 |
-
model.model.model_path = filepath
|
57 |
except Exception as e:
|
58 |
-
print(f"Error loading {model_name}: {e}")
|
59 |
self.models[model_name] = None
|
60 |
finally:
|
61 |
gc.collect()
|
62 |
|
63 |
-
def get_model(self, model_name):
|
64 |
return self.models.get(model_name)
|
65 |
|
66 |
model_manager = ModelManager()
|
@@ -68,15 +64,17 @@ model_manager = ModelManager()
|
|
68 |
class ChatRequest(BaseModel):
|
69 |
message: str
|
70 |
|
71 |
-
|
72 |
-
def generate_model_response(model, inputs):
|
73 |
try:
|
74 |
response = model(inputs, max_tokens=150)
|
75 |
return response['choices'][0]['text']
|
76 |
except Exception as e:
|
77 |
return f"Error: Could not generate a response. Details: {e}"
|
78 |
|
79 |
-
|
|
|
|
|
|
|
80 |
inputs = message.strip()
|
81 |
responses = {}
|
82 |
|
@@ -84,10 +82,23 @@ async def process_message(message):
|
|
84 |
futures = [executor.submit(generate_model_response, model_manager.get_model(config['name']), inputs) for config in global_data['model_configs'] if model_manager.get_model(config['name'])]
|
85 |
for i, future in enumerate(tqdm(as_completed(futures), total=len([f for f in futures]), desc="Generating responses")):
|
86 |
model_name = global_data['model_configs'][i]['name']
|
87 |
-
responses[model_name] = future
|
|
|
|
|
|
|
|
|
|
|
|
|
88 |
|
89 |
return "\n\n".join([f"**{model}:**\n{response}" for model, response in responses.items()])
|
90 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
91 |
@app.post("/generate_multimodel")
|
92 |
async def api_generate_multimodel(request: Request):
|
93 |
try:
|
|
|
1 |
import os
|
2 |
import gc
|
3 |
+
import io
|
4 |
from llama_cpp import Llama
|
5 |
from concurrent.futures import ThreadPoolExecutor, as_completed
|
6 |
import gradio as gr
|
|
|
8 |
from fastapi.responses import JSONResponse
|
9 |
from tqdm import tqdm
|
10 |
from dotenv import load_dotenv
|
11 |
+
from pydantic import BaseModel
|
12 |
+
import asyncio
|
13 |
|
14 |
+
load_dotenv()
|
15 |
+
os.system("pip install --upgrade llama-cpp-python")
|
|
|
16 |
|
17 |
app = FastAPI()
|
|
|
18 |
HUGGINGFACE_TOKEN = os.getenv("HUGGINGFACE_TOKEN")
|
19 |
|
20 |
global_data = {
|
21 |
'model_configs': [
|
22 |
+
{"repo_id": "Ffftdtd5dtft/gpt2-xl-Q2_K-GGUF", "name": "GPT-2 XL"},
|
23 |
+
{"repo_id": "Ffftdtd5dtft/gemma-2-27b-Q2_K-GGUF", "name": "Gemma 2-27B"},
|
24 |
+
{"repo_id": "Ffftdtd5dtft/Phi-3-mini-128k-instruct-Q2_K-GGUF", "name": "Phi-3 Mini 128K Instruct"},
|
25 |
+
{"repo_id": "Ffftdtd5dtft/starcoder2-3b-Q2_K-GGUF", "name": "Starcoder2 3B"},
|
26 |
+
{"repo_id": "Ffftdtd5dtft/Qwen2-1.5B-Instruct-Q2_K-GGUF", "name": "Qwen2 1.5B Instruct"},
|
27 |
+
{"repo_id": "Ffftdtd5dtft/Mistral-Nemo-Instruct-2407-Q2_K-GGUF", "name": "Mistral Nemo Instruct 2407"},
|
28 |
+
{"repo_id": "Ffftdtd5dtft/Phi-3-mini-128k-instruct-IQ2_XXS-GGUF", "name": "Phi 3 Mini 128K Instruct XXS"},
|
29 |
+
{"repo_id": "Ffftdtd5dtft/TinyLlama-1.1B-Chat-v1.0-IQ1_S-GGUF", "name": "TinyLlama 1.1B Chat"},
|
30 |
+
{"repo_id": "Ffftdtd5dtft/Meta-Llama-3.1-8B-Q2_K-GGUF", "name": "Meta Llama 3.1-8B"},
|
31 |
+
{"repo_id": "Ffftdtd5dtft/codegemma-2b-IQ1_S-GGUF", "name": "Codegemma 2B"},
|
32 |
+
],
|
33 |
+
'training_data': io.StringIO(),
|
34 |
+
'auto_train_threshold': 10
|
35 |
}
|
36 |
|
37 |
class ModelManager:
|
|
|
49 |
model_name = model_config['name']
|
50 |
if model_name not in self.models:
|
51 |
try:
|
52 |
+
model = Llama.from_pretrained(repo_id=model_config['repo_id'], use_auth_token=HUGGINGFACE_TOKEN)
|
|
|
|
|
53 |
self.models[model_name] = model
|
|
|
54 |
except Exception as e:
|
|
|
55 |
self.models[model_name] = None
|
56 |
finally:
|
57 |
gc.collect()
|
58 |
|
59 |
+
def get_model(self, model_name: str):
|
60 |
return self.models.get(model_name)
|
61 |
|
62 |
model_manager = ModelManager()
|
|
|
64 |
class ChatRequest(BaseModel):
|
65 |
message: str
|
66 |
|
67 |
+
async def generate_model_response(model, inputs: str) -> str:
|
|
|
68 |
try:
|
69 |
response = model(inputs, max_tokens=150)
|
70 |
return response['choices'][0]['text']
|
71 |
except Exception as e:
|
72 |
return f"Error: Could not generate a response. Details: {e}"
|
73 |
|
74 |
+
interaction_count = 0
|
75 |
+
|
76 |
+
async def process_message(message: str) -> str:
|
77 |
+
global interaction_count
|
78 |
inputs = message.strip()
|
79 |
responses = {}
|
80 |
|
|
|
82 |
futures = [executor.submit(generate_model_response, model_manager.get_model(config['name']), inputs) for config in global_data['model_configs'] if model_manager.get_model(config['name'])]
|
83 |
for i, future in enumerate(tqdm(as_completed(futures), total=len([f for f in futures]), desc="Generating responses")):
|
84 |
model_name = global_data['model_configs'][i]['name']
|
85 |
+
responses[model_name] = await future
|
86 |
+
|
87 |
+
interaction_count += 1
|
88 |
+
|
89 |
+
if interaction_count >= global_data['auto_train_threshold']:
|
90 |
+
await auto_train_model()
|
91 |
+
interaction_count = 0
|
92 |
|
93 |
return "\n\n".join([f"**{model}:**\n{response}" for model, response in responses.items()])
|
94 |
|
95 |
+
async def auto_train_model():
|
96 |
+
training_data_content = global_data['training_data'].getvalue()
|
97 |
+
if training_data_content:
|
98 |
+
print("Auto training model with the following data:")
|
99 |
+
print(training_data_content)
|
100 |
+
await asyncio.sleep(1)
|
101 |
+
|
102 |
@app.post("/generate_multimodel")
|
103 |
async def api_generate_multimodel(request: Request):
|
104 |
try:
|