|
import torch |
|
from transformers import AutoModelForCausalLM, AutoTokenizer |
|
|
|
|
|
MODEL_NAME = "abhijsrwala/lora_model" |
|
|
|
def load_model(): |
|
|
|
model = AutoModelForCausalLM.from_pretrained(MODEL_NAME) |
|
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME) |
|
return model, tokenizer |
|
|
|
|
|
model, tokenizer = load_model() |
|
|
|
def handle_request(input_data): |
|
""" |
|
Handles inference requests. |
|
Args: |
|
input_data (str): The input text prompt. |
|
Returns: |
|
str: The model's response. |
|
""" |
|
|
|
inputs = tokenizer.encode(input_data, return_tensors="pt") |
|
|
|
|
|
outputs = model.generate(inputs, max_length=200, num_return_sequences=1) |
|
|
|
|
|
response = tokenizer.decode(outputs[0], skip_special_tokens=True) |
|
return response |
|
|