Update app.py
Browse files
app.py
CHANGED
@@ -60,7 +60,7 @@ accelerator = Accelerator()
|
|
60 |
model = AutoModelForCausalLM.from_pretrained(model_id, token= token,
|
61 |
# torch_dtype= torch.uint8,
|
62 |
torch_dtype=torch.float16,
|
63 |
-
|
64 |
# # # torch_dtype=torch.fl,
|
65 |
attn_implementation="flash_attention_2",
|
66 |
low_cpu_mem_usage=True,
|
@@ -96,6 +96,8 @@ def respond(
|
|
96 |
temperature,
|
97 |
top_p,
|
98 |
):
|
|
|
|
|
99 |
messages = []
|
100 |
json_obj = str_to_json(message)
|
101 |
print(json_obj)
|
|
|
60 |
model = AutoModelForCausalLM.from_pretrained(model_id, token= token,
|
61 |
# torch_dtype= torch.uint8,
|
62 |
torch_dtype=torch.float16,
|
63 |
+
load_in_4bit=True,
|
64 |
# # # torch_dtype=torch.fl,
|
65 |
attn_implementation="flash_attention_2",
|
66 |
low_cpu_mem_usage=True,
|
|
|
96 |
temperature,
|
97 |
top_p,
|
98 |
):
|
99 |
+
# model.to(accelerator.device)
|
100 |
+
|
101 |
messages = []
|
102 |
json_obj = str_to_json(message)
|
103 |
print(json_obj)
|