Canstralian
/

RabbitRedux

text-generation-inference

Inference Endpoints

Model card Files Files and versions Community

RabbitRedux / app.py

Canstralian's picture

Create app.py

3e66b84 verified 4 months ago

1.86 kB

	import os
	import logging
	from fastapi import FastAPI, HTTPException
	from pydantic import BaseModel
	from transformers import AutoAdapterModel, AutoTokenizer

	# Initialize the app
	app = FastAPI()
	logging.basicConfig(level=logging.INFO)

	# Load model and tokenizer once on startup
	MODEL_NAME = os.getenv("MODEL_NAME", "bert-base-uncased") # Set default model
	ADAPTER_NAME = os.getenv("ADAPTER_NAME", "Canstralian/RabbitRedux") # Adapter name

	try:
	logging.info("Loading model and adapter...")
	model = AutoAdapterModel.from_pretrained(MODEL_NAME)
	model.load_adapter(ADAPTER_NAME, set_active=True)
	tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
	logging.info("Model and adapter loaded successfully.")
	except Exception as e:
	logging.error("Error loading model or adapter:", exc_info=True)
	raise RuntimeError("Model or adapter loading failed.") from e

	# Define request and response data structures
	class PredictionRequest(BaseModel):
	text: str

	class PredictionResponse(BaseModel):
	text: str
	prediction: str

	# Endpoint for inference
	@app.post("/predict", response_model=PredictionResponse)
	async def predict(request: PredictionRequest):
	try:
	# Tokenize input text
	inputs = tokenizer(request.text, return_tensors="pt")
	# Perform inference
	outputs = model(**inputs)
	# Generate predicted text or classification (customize as needed)
	prediction = tokenizer.decode(outputs.logits.argmax(-1)[0], skip_special_tokens=True)

	return PredictionResponse(text=request.text, prediction=prediction)
	except Exception as e:
	logging.error("Error during prediction:", exc_info=True)
	raise HTTPException(status_code=500, detail="Prediction failed")

	# Health check endpoint
	@app.get("/health")
	async def health_check():
	return {"status": "healthy"}