Spaces:

alimrb
/

eff24

Sleeping

File size: 1,287 Bytes

d0452d9
 
 
 
cbc5205
3cf1317
d0452d9
 
 
 
 
 
 
 
 
 
c416b4a
d0452d9
c416b4a
d0452d9
 
 
2d38204
 
 
d0452d9
 
 
 
 
 
4b4029b
a2f1380
 
 
d0452d9
 
c416b4a
d0452d9
c416b4a
09c9222
c416b4a
4b4029b

import torch
from peft import PeftModel, PeftConfig
from transformers import AutoModelForCausalLM, AutoTokenizer

peft_model_id = f"alimrb/eff24-new"
config = PeftConfig.from_pretrained(peft_model_id) 
model = AutoModelForCausalLM.from_pretrained(
    config.base_model_name_or_path,
    return_dict=True,
    device_map="auto"
)
tokenizer = AutoTokenizer.from_pretrained(config.base_model_name_or_path)

# Load the Lora model
model = PeftModel.from_pretrained(model, peft_model_id)

def make_inference(question, answer):
    batch = tokenizer(
        f"### Question:\n{question}\n\n### Answer:",
        return_tensors="pt",
    )

    # Move batch to the same device as the model
    batch = {k: v.to(model.device) for k, v in batch.items()}

    with torch.cuda.amp.autocast():
        output_tokens = model.generate(**batch, max_new_tokens=50)

    return tokenizer.decode(output_tokens[0], skip_special_tokens=True)

if __name__ == "__main__":
    # Create a Gradio interface
    import gradio as gr
    
    gr.Interface(
        make_inference,
        [
            gr.Textbox(lines=2, label="Question"),
        ],
        gr.Textbox(label="Answer"),
        title="EFF24",
        description="EFF24 is a generative model that generates Answers for Questions."
    ).launch()