Yhhxhfh commited on
Commit
f8544e9
1 Parent(s): 6133a63

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +39 -28
app.py CHANGED
@@ -1,6 +1,6 @@
1
  import os
2
  import gc
3
- import tempfile
4
  from llama_cpp import Llama
5
  from concurrent.futures import ThreadPoolExecutor, as_completed
6
  import gradio as gr
@@ -8,30 +8,30 @@ from fastapi import FastAPI, Request, HTTPException
8
  from fastapi.responses import JSONResponse
9
  from tqdm import tqdm
10
  from dotenv import load_dotenv
11
- from functools import lru_cache
12
- import urllib3
13
 
14
- urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
15
-
16
- os.system("pip install llama-cpp-python")
17
 
18
  app = FastAPI()
19
- load_dotenv()
20
  HUGGINGFACE_TOKEN = os.getenv("HUGGINGFACE_TOKEN")
21
 
22
  global_data = {
23
  'model_configs': [
24
- {"repo_id": "Ffftdtd5dtft/gpt2-xl-Q2_K-GGUF", "filename": "gpt2-xl-q2_k.gguf", "name": "GPT-2 XL"},
25
- {"repo_id": "Ffftdtd5dtft/gemma-2-27b-Q2_K-GGUF", "filename": "gemma-2-27b-q2_k.gguf", "name": "Gemma 2-27B"},
26
- {"repo_id": "Ffftdtd5dtft/Phi-3-mini-128k-instruct-Q2_K-GGUF", "filename": "phi-3-mini-128k-instruct-q2_k.gguf", "name": "Phi-3 Mini 128K Instruct"},
27
- {"repo_id": "Ffftdtd5dtft/starcoder2-3b-Q2_K-GGUF", "filename": "starcoder2-3b-q2_k.gguf", "name": "Starcoder2 3B"},
28
- {"repo_id": "Ffftdtd5dtft/Qwen2-1.5B-Instruct-Q2_K-GGUF", "filename": "qwen2-1.5b-instruct-q2_k.gguf", "name": "Qwen2 1.5B Instruct"},
29
- {"repo_id": "Ffftdtd5dtft/Mistral-Nemo-Instruct-2407-Q2_K-GGUF", "filename": "mistral-nemo-instruct-2407-q2_k.gguf", "name": "Mistral Nemo Instruct 2407"},
30
- {"repo_id": "Ffftdtd5dtft/Phi-3-mini-128k-instruct-IQ2_XXS-GGUF", "filename": "phi-3-mini-128k-instruct-iq2_xxs-imat.gguf", "name": "Phi 3 Mini 128K Instruct XXS"},
31
- {"repo_id": "Ffftdtd5dtft/TinyLlama-1.1B-Chat-v1.0-IQ1_S-GGUF", "filename": "tinyllama-1.1b-chat-v1.0-iq1_s-imat.gguf", "name": "TinyLlama 1.1B Chat"},
32
- {"repo_id": "Ffftdtd5dtft/Meta-Llama-3.1-8B-Q2_K-GGUF", "filename": "meta-llama-3.1-8b-q2_k.gguf", "name": "Meta Llama 3.1-8B"},
33
- {"repo_id": "Ffftdtd5dtft/codegemma-2b-IQ1_S-GGUF", "filename": "codegemma-2b-iq1_s-imat.gguf", "name": "Codegemma 2B"},
34
- ]
 
 
35
  }
36
 
37
  class ModelManager:
@@ -49,18 +49,14 @@ class ModelManager:
49
  model_name = model_config['name']
50
  if model_name not in self.models:
51
  try:
52
- tempdir = tempfile.TemporaryDirectory()
53
- filepath = os.path.join(tempdir.name, model_config['filename'])
54
- model = Llama.from_pretrained(repo_id=model_config['repo_id'], filename=model_config['filename'], use_auth_token=HUGGINGFACE_TOKEN)
55
  self.models[model_name] = model
56
- model.model.model_path = filepath
57
  except Exception as e:
58
- print(f"Error loading {model_name}: {e}")
59
  self.models[model_name] = None
60
  finally:
61
  gc.collect()
62
 
63
- def get_model(self, model_name):
64
  return self.models.get(model_name)
65
 
66
  model_manager = ModelManager()
@@ -68,15 +64,17 @@ model_manager = ModelManager()
68
  class ChatRequest(BaseModel):
69
  message: str
70
 
71
- @lru_cache(maxsize=128)
72
- def generate_model_response(model, inputs):
73
  try:
74
  response = model(inputs, max_tokens=150)
75
  return response['choices'][0]['text']
76
  except Exception as e:
77
  return f"Error: Could not generate a response. Details: {e}"
78
 
79
- async def process_message(message):
 
 
 
80
  inputs = message.strip()
81
  responses = {}
82
 
@@ -84,10 +82,23 @@ async def process_message(message):
84
  futures = [executor.submit(generate_model_response, model_manager.get_model(config['name']), inputs) for config in global_data['model_configs'] if model_manager.get_model(config['name'])]
85
  for i, future in enumerate(tqdm(as_completed(futures), total=len([f for f in futures]), desc="Generating responses")):
86
  model_name = global_data['model_configs'][i]['name']
87
- responses[model_name] = future.result()
 
 
 
 
 
 
88
 
89
  return "\n\n".join([f"**{model}:**\n{response}" for model, response in responses.items()])
90
 
 
 
 
 
 
 
 
91
  @app.post("/generate_multimodel")
92
  async def api_generate_multimodel(request: Request):
93
  try:
 
1
  import os
2
  import gc
3
+ import io
4
  from llama_cpp import Llama
5
  from concurrent.futures import ThreadPoolExecutor, as_completed
6
  import gradio as gr
 
8
  from fastapi.responses import JSONResponse
9
  from tqdm import tqdm
10
  from dotenv import load_dotenv
11
+ from pydantic import BaseModel
12
+ import asyncio
13
 
14
+ load_dotenv()
15
+ os.system("pip install --upgrade llama-cpp-python")
 
16
 
17
  app = FastAPI()
 
18
  HUGGINGFACE_TOKEN = os.getenv("HUGGINGFACE_TOKEN")
19
 
20
  global_data = {
21
  'model_configs': [
22
+ {"repo_id": "Ffftdtd5dtft/gpt2-xl-Q2_K-GGUF", "name": "GPT-2 XL"},
23
+ {"repo_id": "Ffftdtd5dtft/gemma-2-27b-Q2_K-GGUF", "name": "Gemma 2-27B"},
24
+ {"repo_id": "Ffftdtd5dtft/Phi-3-mini-128k-instruct-Q2_K-GGUF", "name": "Phi-3 Mini 128K Instruct"},
25
+ {"repo_id": "Ffftdtd5dtft/starcoder2-3b-Q2_K-GGUF", "name": "Starcoder2 3B"},
26
+ {"repo_id": "Ffftdtd5dtft/Qwen2-1.5B-Instruct-Q2_K-GGUF", "name": "Qwen2 1.5B Instruct"},
27
+ {"repo_id": "Ffftdtd5dtft/Mistral-Nemo-Instruct-2407-Q2_K-GGUF", "name": "Mistral Nemo Instruct 2407"},
28
+ {"repo_id": "Ffftdtd5dtft/Phi-3-mini-128k-instruct-IQ2_XXS-GGUF", "name": "Phi 3 Mini 128K Instruct XXS"},
29
+ {"repo_id": "Ffftdtd5dtft/TinyLlama-1.1B-Chat-v1.0-IQ1_S-GGUF", "name": "TinyLlama 1.1B Chat"},
30
+ {"repo_id": "Ffftdtd5dtft/Meta-Llama-3.1-8B-Q2_K-GGUF", "name": "Meta Llama 3.1-8B"},
31
+ {"repo_id": "Ffftdtd5dtft/codegemma-2b-IQ1_S-GGUF", "name": "Codegemma 2B"},
32
+ ],
33
+ 'training_data': io.StringIO(),
34
+ 'auto_train_threshold': 10
35
  }
36
 
37
  class ModelManager:
 
49
  model_name = model_config['name']
50
  if model_name not in self.models:
51
  try:
52
+ model = Llama.from_pretrained(repo_id=model_config['repo_id'], use_auth_token=HUGGINGFACE_TOKEN)
 
 
53
  self.models[model_name] = model
 
54
  except Exception as e:
 
55
  self.models[model_name] = None
56
  finally:
57
  gc.collect()
58
 
59
+ def get_model(self, model_name: str):
60
  return self.models.get(model_name)
61
 
62
  model_manager = ModelManager()
 
64
  class ChatRequest(BaseModel):
65
  message: str
66
 
67
+ async def generate_model_response(model, inputs: str) -> str:
 
68
  try:
69
  response = model(inputs, max_tokens=150)
70
  return response['choices'][0]['text']
71
  except Exception as e:
72
  return f"Error: Could not generate a response. Details: {e}"
73
 
74
+ interaction_count = 0
75
+
76
+ async def process_message(message: str) -> str:
77
+ global interaction_count
78
  inputs = message.strip()
79
  responses = {}
80
 
 
82
  futures = [executor.submit(generate_model_response, model_manager.get_model(config['name']), inputs) for config in global_data['model_configs'] if model_manager.get_model(config['name'])]
83
  for i, future in enumerate(tqdm(as_completed(futures), total=len([f for f in futures]), desc="Generating responses")):
84
  model_name = global_data['model_configs'][i]['name']
85
+ responses[model_name] = await future
86
+
87
+ interaction_count += 1
88
+
89
+ if interaction_count >= global_data['auto_train_threshold']:
90
+ await auto_train_model()
91
+ interaction_count = 0
92
 
93
  return "\n\n".join([f"**{model}:**\n{response}" for model, response in responses.items()])
94
 
95
+ async def auto_train_model():
96
+ training_data_content = global_data['training_data'].getvalue()
97
+ if training_data_content:
98
+ print("Auto training model with the following data:")
99
+ print(training_data_content)
100
+ await asyncio.sleep(1)
101
+
102
  @app.post("/generate_multimodel")
103
  async def api_generate_multimodel(request: Request):
104
  try: