|
import os |
|
from fastapi import FastAPI, HTTPException, Depends |
|
from pydantic import BaseModel |
|
from ctransformers import AutoModelForCausalLM |
|
|
|
|
|
class Validation(BaseModel): |
|
inputs: str |
|
temperature: float = 0.0 |
|
max_new_tokens: int = 1048 |
|
top_p: float = 0.15 |
|
repetition_penalty: float = 1.0 |
|
|
|
|
|
app = FastAPI() |
|
|
|
|
|
def setup_endpoints(app): |
|
model_base_path = './models' |
|
if not os.path.exists(model_base_path) or not os.path.isdir(model_base_path): |
|
raise RuntimeError("Models directory does not exist or is not a directory") |
|
|
|
model_dirs = [d for d in os.listdir(model_base_path) if os.path.isdir(os.path.join(model_base_path, d))] |
|
|
|
if not model_dirs: |
|
raise RuntimeError("No models found in the models directory") |
|
|
|
models = {} |
|
|
|
|
|
for model_name in model_dirs: |
|
model_path = os.path.join(model_base_path, model_name) |
|
try: |
|
model = AutoModelForCausalLM.from_pretrained(model_path, threads=2) |
|
models[model_name] = model |
|
except Exception as e: |
|
print(f"Failed to load model {model_name}: {e}") |
|
continue |
|
|
|
|
|
def get_model(model_name: str): |
|
if model_name not in models: |
|
raise HTTPException(status_code=404, detail="Model not found") |
|
return models[model_name] |
|
|
|
|
|
for model_name in model_dirs: |
|
@app.post(f"/{model_name}") |
|
async def generate_response(item: Validation, model=Depends(lambda: get_model(model_name))): |
|
try: |
|
response = model(item.inputs, |
|
temperature=item.temperature, |
|
max_new_tokens=item.max_new_tokens, |
|
top_p=item.top_p, |
|
repetition_penalty=item.repetition_penalty) |
|
return response |
|
except Exception as e: |
|
raise HTTPException(status_code=500, detail=str(e)) |
|
|
|
|
|
setup_endpoints(app) |
|
|