import gradio as gr from transformers import T5Tokenizer, T5ForConditionalGeneration import torch from pydantic import BaseModel import spaces # Initialize FastAPI and Gradio device = torch.device("cuda" if torch.cuda.is_available() else "cpu") # Load the tokenizer and model once for use in both FastAPI and Gradio tokenizer = T5Tokenizer.from_pretrained("alpeshsonar/lot-t5-small-filter", legacy=False) model = T5ForConditionalGeneration.from_pretrained("alpeshsonar/lot-t5-small-filter", torch_dtype=torch.bfloat16).to(device) # Gradio interface @spaces.GPU(duration=120) def generate_text(input_text): inputs = tokenizer.encode("Extract lots from given text.\n" + input_text, return_tensors="pt").to(device) outputs = model.generate(inputs, max_new_tokens=1024) result = tokenizer.decode(outputs[0], skip_special_tokens=True) return result iface = gr.Interface(fn=generate_text, inputs="text", outputs="text", title="Line of Therapy") # Function to run both FastAPI and Gradio def run(): # Launch Gradio interface iface.launch(server_name="0.0.0.0", server_port=7860) if __name__ == "__main__": run()