Yhhxhfh commited on
Commit
fea594a
1 Parent(s): 05c34a8

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +14 -6
app.py CHANGED
@@ -5,12 +5,13 @@ import re
5
  import gradio as gr
6
  import os
7
  import urllib3
 
8
  from functools import lru_cache
9
  from dotenv import load_dotenv
10
  from fastapi import FastAPI, Request
11
  from fastapi.responses import JSONResponse
12
- from queue import Queue
13
- import pickle #Para persistencia
14
 
15
  urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
16
 
@@ -37,11 +38,11 @@ global_data = {
37
  }
38
 
39
  response_cache = {}
40
- model_cache_dir = "model_cache" # Directorio para guardar modelos en disco
41
  os.makedirs(model_cache_dir, exist_ok=True)
42
 
43
  class ModelManager:
44
- def __init__(self, max_models=10):
45
  self.models = {}
46
  self.max_models = max_models
47
  self.model_cache_dir = model_cache_dir
@@ -99,9 +100,13 @@ def remove_duplicates(text):
99
  @lru_cache(maxsize=128)
100
  def generate_model_response(model, inputs):
101
  try:
 
102
  response = model(inputs, max_tokens=150)
 
 
103
  return remove_duplicates(response['choices'][0]['text'])
104
- except Exception:
 
105
  return ""
106
 
107
  async def process_message(message):
@@ -110,8 +115,9 @@ async def process_message(message):
110
  return response_cache[inputs]
111
 
112
  responses = {}
 
113
  with ThreadPoolExecutor(max_workers=model_manager.max_models) as executor:
114
- futures = [executor.submit(model_manager.load_model, config) for config in global_data['model_configs']]
115
  for future in as_completed(futures):
116
  future.result()
117
 
@@ -121,6 +127,8 @@ async def process_message(message):
121
  responses[config['name']] = generate_model_response(model, inputs)
122
  model_manager.unload_model(config['name'])
123
 
 
 
124
  formatted_response = "\n\n".join([f"**{model}:**\n{response}" for model, response in responses.items()])
125
  response_cache[inputs] = formatted_response
126
  return formatted_response
 
5
  import gradio as gr
6
  import os
7
  import urllib3
8
+ import pickle
9
  from functools import lru_cache
10
  from dotenv import load_dotenv
11
  from fastapi import FastAPI, Request
12
  from fastapi.responses import JSONResponse
13
+ import time
14
+ from tqdm import tqdm # Importando tqdm
15
 
16
  urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
17
 
 
38
  }
39
 
40
  response_cache = {}
41
+ model_cache_dir = "model_cache"
42
  os.makedirs(model_cache_dir, exist_ok=True)
43
 
44
  class ModelManager:
45
+ def __init__(self, max_models=2):
46
  self.models = {}
47
  self.max_models = max_models
48
  self.model_cache_dir = model_cache_dir
 
100
  @lru_cache(maxsize=128)
101
  def generate_model_response(model, inputs):
102
  try:
103
+ start_time = time.time()
104
  response = model(inputs, max_tokens=150)
105
+ end_time = time.time()
106
+ print(f"Tiempo de generación del modelo: {end_time - start_time:.4f} segundos")
107
  return remove_duplicates(response['choices'][0]['text'])
108
+ except Exception as e:
109
+ print(f"Error en la generación del modelo: {e}")
110
  return ""
111
 
112
  async def process_message(message):
 
115
  return response_cache[inputs]
116
 
117
  responses = {}
118
+ start_time = time.time()
119
  with ThreadPoolExecutor(max_workers=model_manager.max_models) as executor:
120
+ futures = [executor.submit(model_manager.load_model, config) for config in tqdm(global_data['model_configs'], desc="Cargando modelos")]
121
  for future in as_completed(futures):
122
  future.result()
123
 
 
127
  responses[config['name']] = generate_model_response(model, inputs)
128
  model_manager.unload_model(config['name'])
129
 
130
+ end_time = time.time()
131
+ print(f"Tiempo total de procesamiento: {end_time - start_time:.4f} segundos")
132
  formatted_response = "\n\n".join([f"**{model}:**\n{response}" for model, response in responses.items()])
133
  response_cache[inputs] = formatted_response
134
  return formatted_response