Update app.py
Browse files
app.py
CHANGED
@@ -5,11 +5,13 @@ import re
|
|
5 |
import asyncio
|
6 |
import gradio as gr
|
7 |
import os
|
8 |
-
import spaces
|
9 |
from dotenv import load_dotenv
|
10 |
from fastapi import FastAPI, Request
|
11 |
from fastapi.responses import JSONResponse
|
12 |
import urllib3
|
|
|
|
|
13 |
|
14 |
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
|
15 |
|
@@ -56,6 +58,7 @@ class ModelManager:
|
|
56 |
self.models[model_config['name']] = Llama.from_pretrained(repo_id=model_config['repo_id'], filename=model_config['filename'], use_auth_token=HUGGINGFACE_TOKEN)
|
57 |
except Exception as e:
|
58 |
print(f"Error loading model {model_config['name']}: {e}")
|
|
|
59 |
|
60 |
def load_all_models(self):
|
61 |
with ThreadPoolExecutor() as executor:
|
@@ -85,14 +88,25 @@ def remove_duplicates(text):
|
|
85 |
seen_lines.add(line)
|
86 |
return '\n'.join(unique_lines)
|
87 |
|
88 |
-
|
89 |
def generate_model_response(model, inputs):
|
90 |
try:
|
91 |
response = model(inputs)
|
92 |
return remove_duplicates(response['choices'][0]['text'])
|
93 |
except Exception as e:
|
94 |
-
|
95 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
96 |
|
97 |
def remove_repetitive_responses(responses):
|
98 |
unique_responses = {}
|
@@ -118,10 +132,16 @@ async def process_message(message):
|
|
118 |
|
119 |
@app.post("/generate_multimodel")
|
120 |
async def api_generate_multimodel(request: Request):
|
121 |
-
|
122 |
-
|
123 |
-
|
124 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
125 |
|
126 |
iface = gr.Interface(
|
127 |
fn=process_message,
|
|
|
5 |
import asyncio
|
6 |
import gradio as gr
|
7 |
import os
|
8 |
+
import spaces
|
9 |
from dotenv import load_dotenv
|
10 |
from fastapi import FastAPI, Request
|
11 |
from fastapi.responses import JSONResponse
|
12 |
import urllib3
|
13 |
+
import time
|
14 |
+
import random
|
15 |
|
16 |
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
|
17 |
|
|
|
58 |
self.models[model_config['name']] = Llama.from_pretrained(repo_id=model_config['repo_id'], filename=model_config['filename'], use_auth_token=HUGGINGFACE_TOKEN)
|
59 |
except Exception as e:
|
60 |
print(f"Error loading model {model_config['name']}: {e}")
|
61 |
+
pass # Add pass to handle exceptions during model loading
|
62 |
|
63 |
def load_all_models(self):
|
64 |
with ThreadPoolExecutor() as executor:
|
|
|
88 |
seen_lines.add(line)
|
89 |
return '\n'.join(unique_lines)
|
90 |
|
91 |
+
@spaces.GPU(queue=False, idle_timeout=0, timeout=0)
|
92 |
def generate_model_response(model, inputs):
|
93 |
try:
|
94 |
response = model(inputs)
|
95 |
return remove_duplicates(response['choices'][0]['text'])
|
96 |
except Exception as e:
|
97 |
+
if "You have exceeded your GPU quota" in str(e):
|
98 |
+
time.sleep(random.uniform(1, 3))
|
99 |
+
try:
|
100 |
+
response = model(inputs)
|
101 |
+
return remove_duplicates(response['choices'][0]['text'])
|
102 |
+
except Exception as e2:
|
103 |
+
print(f"Error generating model response (after retry): {e2}")
|
104 |
+
pass # Add pass to handle exceptions during retry
|
105 |
+
return ""
|
106 |
+
else:
|
107 |
+
print(f"Error generating model response: {e}")
|
108 |
+
pass # Add pass to handle other exceptions
|
109 |
+
return ""
|
110 |
|
111 |
def remove_repetitive_responses(responses):
|
112 |
unique_responses = {}
|
|
|
132 |
|
133 |
@app.post("/generate_multimodel")
|
134 |
async def api_generate_multimodel(request: Request):
|
135 |
+
while True:
|
136 |
+
try:
|
137 |
+
data = await request.json()
|
138 |
+
message = data["message"]
|
139 |
+
formatted_response = await process_message(message)
|
140 |
+
return JSONResponse({"response": formatted_response})
|
141 |
+
except Exception as e:
|
142 |
+
print(f"Error in API request handling: {e}")
|
143 |
+
pass # Add pass to handle exceptions in API request handling
|
144 |
+
time.sleep(300)
|
145 |
|
146 |
iface = gr.Interface(
|
147 |
fn=process_message,
|