samdeniyi commited on
Commit
92d0750
1 Parent(s): 6288103

mini handler

Browse files
Files changed (2) hide show
  1. handler.py +28 -0
  2. requirements.txt +11 -0
handler.py ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from transformers import AutoModelForCausalLM, AutoTokenizer
2
+ from peft import PeftModel
3
+ import torch
4
+
5
+
6
+ class EndpointHandler:
7
+ def __init__(self, path="unsloth/Meta-Llama-3.1-8B-bnb-4bit"):
8
+ # Load model and tokenizer
9
+ self.tokenizer = AutoTokenizer.from_pretrained(path)
10
+ base_model = AutoModelForCausalLM.from_pretrained(path)
11
+ self.model = PeftModel.from_pretrained(base_model, path)
12
+ self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
13
+ self.model.to(self.device)
14
+
15
+ def __call__(self, data):
16
+ # Extract input text
17
+ input_text = data.get("inputs", {}).get("text", "")
18
+
19
+ # Tokenize input text
20
+ inputs = self.tokenizer(input_text, return_tensors="pt").to(self.device)
21
+
22
+ # Generate output
23
+ output_tokens = self.model.generate(inputs["input_ids"], max_length=1024)
24
+
25
+ # Decode generated tokens
26
+ generated_text = self.tokenizer.decode(output_tokens[0], skip_special_tokens=True)
27
+
28
+ return {"generated_text": generated_text}
requirements.txt ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ torch>=2.0.0
2
+ torchvision
3
+ transformers>=4.25.0
4
+ unsloth[colab-new] @ git+https://github.com/unslothai/unsloth.git
5
+ xformers==0.0.27
6
+ trl
7
+ peft
8
+ accelerate
9
+ bitsandbytes
10
+ triton
11
+ wandb