Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
@@ -5,12 +5,13 @@ import re
|
|
5 |
import gradio as gr
|
6 |
import os
|
7 |
import urllib3
|
|
|
8 |
from functools import lru_cache
|
9 |
from dotenv import load_dotenv
|
10 |
from fastapi import FastAPI, Request
|
11 |
from fastapi.responses import JSONResponse
|
12 |
-
|
13 |
-
import
|
14 |
|
15 |
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
|
16 |
|
@@ -37,11 +38,11 @@ global_data = {
|
|
37 |
}
|
38 |
|
39 |
response_cache = {}
|
40 |
-
model_cache_dir = "model_cache"
|
41 |
os.makedirs(model_cache_dir, exist_ok=True)
|
42 |
|
43 |
class ModelManager:
|
44 |
-
def __init__(self, max_models=
|
45 |
self.models = {}
|
46 |
self.max_models = max_models
|
47 |
self.model_cache_dir = model_cache_dir
|
@@ -99,9 +100,13 @@ def remove_duplicates(text):
|
|
99 |
@lru_cache(maxsize=128)
|
100 |
def generate_model_response(model, inputs):
|
101 |
try:
|
|
|
102 |
response = model(inputs, max_tokens=150)
|
|
|
|
|
103 |
return remove_duplicates(response['choices'][0]['text'])
|
104 |
-
except Exception:
|
|
|
105 |
return ""
|
106 |
|
107 |
async def process_message(message):
|
@@ -110,8 +115,9 @@ async def process_message(message):
|
|
110 |
return response_cache[inputs]
|
111 |
|
112 |
responses = {}
|
|
|
113 |
with ThreadPoolExecutor(max_workers=model_manager.max_models) as executor:
|
114 |
-
futures = [executor.submit(model_manager.load_model, config) for config in global_data['model_configs']]
|
115 |
for future in as_completed(futures):
|
116 |
future.result()
|
117 |
|
@@ -121,6 +127,8 @@ async def process_message(message):
|
|
121 |
responses[config['name']] = generate_model_response(model, inputs)
|
122 |
model_manager.unload_model(config['name'])
|
123 |
|
|
|
|
|
124 |
formatted_response = "\n\n".join([f"**{model}:**\n{response}" for model, response in responses.items()])
|
125 |
response_cache[inputs] = formatted_response
|
126 |
return formatted_response
|
|
|
5 |
import gradio as gr
|
6 |
import os
|
7 |
import urllib3
|
8 |
+
import pickle
|
9 |
from functools import lru_cache
|
10 |
from dotenv import load_dotenv
|
11 |
from fastapi import FastAPI, Request
|
12 |
from fastapi.responses import JSONResponse
|
13 |
+
import time
|
14 |
+
from tqdm import tqdm # Importando tqdm
|
15 |
|
16 |
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
|
17 |
|
|
|
38 |
}
|
39 |
|
40 |
response_cache = {}
|
41 |
+
model_cache_dir = "model_cache"
|
42 |
os.makedirs(model_cache_dir, exist_ok=True)
|
43 |
|
44 |
class ModelManager:
|
45 |
+
def __init__(self, max_models=2):
|
46 |
self.models = {}
|
47 |
self.max_models = max_models
|
48 |
self.model_cache_dir = model_cache_dir
|
|
|
100 |
@lru_cache(maxsize=128)
|
101 |
def generate_model_response(model, inputs):
|
102 |
try:
|
103 |
+
start_time = time.time()
|
104 |
response = model(inputs, max_tokens=150)
|
105 |
+
end_time = time.time()
|
106 |
+
print(f"Tiempo de generación del modelo: {end_time - start_time:.4f} segundos")
|
107 |
return remove_duplicates(response['choices'][0]['text'])
|
108 |
+
except Exception as e:
|
109 |
+
print(f"Error en la generación del modelo: {e}")
|
110 |
return ""
|
111 |
|
112 |
async def process_message(message):
|
|
|
115 |
return response_cache[inputs]
|
116 |
|
117 |
responses = {}
|
118 |
+
start_time = time.time()
|
119 |
with ThreadPoolExecutor(max_workers=model_manager.max_models) as executor:
|
120 |
+
futures = [executor.submit(model_manager.load_model, config) for config in tqdm(global_data['model_configs'], desc="Cargando modelos")]
|
121 |
for future in as_completed(futures):
|
122 |
future.result()
|
123 |
|
|
|
127 |
responses[config['name']] = generate_model_response(model, inputs)
|
128 |
model_manager.unload_model(config['name'])
|
129 |
|
130 |
+
end_time = time.time()
|
131 |
+
print(f"Tiempo total de procesamiento: {end_time - start_time:.4f} segundos")
|
132 |
formatted_response = "\n\n".join([f"**{model}:**\n{response}" for model, response in responses.items()])
|
133 |
response_cache[inputs] = formatted_response
|
134 |
return formatted_response
|