from fastapi import FastAPI from fastapi.responses import RedirectResponse from transformers import AutoTokenizer, AutoModelForCausalLM import transformers # import torch model = "tiiuae/falcon-7b" tokenizer = AutoTokenizer.from_pretrained(model) pipeline = transformers.pipeline( "text-generation", model=model, tokenizer=tokenizer, trust_remote_code=True, device_map="auto", ) # torch_dtype=torch.bfloat16, app = FastAPI() @app.get("/") async def docs_redirect(): return RedirectResponse(url='/docs') @app.get("/generate") def generate(text: str): sequences = pipeline( "Girafatron is obsessed with giraffes, the most glorious animal on the face of this Earth. Giraftron believes all other animals are irrelevant when compared to the glorious majesty of the giraffe.\nDaniel: Hello, Girafatron!\nGirafatron:", max_length=200, do_sample=True, top_k=2, num_return_sequences=1, eos_token_id=tokenizer.eos_token_id, ) if sequences == None: return {"Error": "model cannot generate messaage"} for seq in sequences: print(f"Result: {seq['generated_text']}") return {"output": "TBD"}