Yhhxhfh commited on
Commit
b76928d
1 Parent(s): 436a488

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +28 -8
app.py CHANGED
@@ -5,11 +5,13 @@ import re
5
  import asyncio
6
  import gradio as gr
7
  import os
8
- import spaces # Keep spaces for other functionalities if needed
9
  from dotenv import load_dotenv
10
  from fastapi import FastAPI, Request
11
  from fastapi.responses import JSONResponse
12
  import urllib3
 
 
13
 
14
  urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
15
 
@@ -56,6 +58,7 @@ class ModelManager:
56
  self.models[model_config['name']] = Llama.from_pretrained(repo_id=model_config['repo_id'], filename=model_config['filename'], use_auth_token=HUGGINGFACE_TOKEN)
57
  except Exception as e:
58
  print(f"Error loading model {model_config['name']}: {e}")
 
59
 
60
  def load_all_models(self):
61
  with ThreadPoolExecutor() as executor:
@@ -85,14 +88,25 @@ def remove_duplicates(text):
85
  seen_lines.add(line)
86
  return '\n'.join(unique_lines)
87
 
88
- # Removed @spaces.GPU decorator
89
  def generate_model_response(model, inputs):
90
  try:
91
  response = model(inputs)
92
  return remove_duplicates(response['choices'][0]['text'])
93
  except Exception as e:
94
- print(f"Error generating model response: {e}")
95
- return ""
 
 
 
 
 
 
 
 
 
 
 
96
 
97
  def remove_repetitive_responses(responses):
98
  unique_responses = {}
@@ -118,10 +132,16 @@ async def process_message(message):
118
 
119
  @app.post("/generate_multimodel")
120
  async def api_generate_multimodel(request: Request):
121
- data = await request.json()
122
- message = data["message"]
123
- formatted_response = await process_message(message)
124
- return JSONResponse({"response": formatted_response})
 
 
 
 
 
 
125
 
126
  iface = gr.Interface(
127
  fn=process_message,
 
5
  import asyncio
6
  import gradio as gr
7
  import os
8
+ import spaces
9
  from dotenv import load_dotenv
10
  from fastapi import FastAPI, Request
11
  from fastapi.responses import JSONResponse
12
  import urllib3
13
+ import time
14
+ import random
15
 
16
  urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
17
 
 
58
  self.models[model_config['name']] = Llama.from_pretrained(repo_id=model_config['repo_id'], filename=model_config['filename'], use_auth_token=HUGGINGFACE_TOKEN)
59
  except Exception as e:
60
  print(f"Error loading model {model_config['name']}: {e}")
61
+ pass # Add pass to handle exceptions during model loading
62
 
63
  def load_all_models(self):
64
  with ThreadPoolExecutor() as executor:
 
88
  seen_lines.add(line)
89
  return '\n'.join(unique_lines)
90
 
91
+ @spaces.GPU(queue=False, idle_timeout=0, timeout=0)
92
  def generate_model_response(model, inputs):
93
  try:
94
  response = model(inputs)
95
  return remove_duplicates(response['choices'][0]['text'])
96
  except Exception as e:
97
+ if "You have exceeded your GPU quota" in str(e):
98
+ time.sleep(random.uniform(1, 3))
99
+ try:
100
+ response = model(inputs)
101
+ return remove_duplicates(response['choices'][0]['text'])
102
+ except Exception as e2:
103
+ print(f"Error generating model response (after retry): {e2}")
104
+ pass # Add pass to handle exceptions during retry
105
+ return ""
106
+ else:
107
+ print(f"Error generating model response: {e}")
108
+ pass # Add pass to handle other exceptions
109
+ return ""
110
 
111
  def remove_repetitive_responses(responses):
112
  unique_responses = {}
 
132
 
133
  @app.post("/generate_multimodel")
134
  async def api_generate_multimodel(request: Request):
135
+ while True:
136
+ try:
137
+ data = await request.json()
138
+ message = data["message"]
139
+ formatted_response = await process_message(message)
140
+ return JSONResponse({"response": formatted_response})
141
+ except Exception as e:
142
+ print(f"Error in API request handling: {e}")
143
+ pass # Add pass to handle exceptions in API request handling
144
+ time.sleep(300)
145
 
146
  iface = gr.Interface(
147
  fn=process_message,