Spaces:
Paused
Paused
rodrigomasini
commited on
Commit
•
4ab4748
1
Parent(s):
8a638cc
Update app_v4.py
Browse files
app_v4.py
CHANGED
@@ -3,6 +3,7 @@ from transformers import AutoTokenizer
|
|
3 |
from auto_gptq import AutoGPTQForCausalLM
|
4 |
import torch
|
5 |
import subprocess
|
|
|
6 |
|
7 |
# Function to get memory info
|
8 |
def get_gpu_memory():
|
@@ -38,11 +39,11 @@ try:
|
|
38 |
model_basename="Jackson2-4bit-128g-GPTQ",
|
39 |
use_safetensors=True,
|
40 |
device=device,
|
41 |
-
max_memory={0: "
|
42 |
)
|
43 |
except RuntimeError as e:
|
44 |
if 'CUDA out of memory' in str(e):
|
45 |
-
st.error("CUDA out of memory. Try reducing the model size or input length.")
|
46 |
st.stop()
|
47 |
else:
|
48 |
raise e
|
@@ -60,5 +61,11 @@ if st.button("Generate the prompt"):
|
|
60 |
except RuntimeError as e:
|
61 |
if 'CUDA out of memory' in str(e):
|
62 |
st.error("CUDA out of memory during generation. Try reducing the input length.")
|
|
|
|
|
|
|
63 |
else:
|
|
|
|
|
|
|
64 |
raise e
|
|
|
3 |
from auto_gptq import AutoGPTQForCausalLM
|
4 |
import torch
|
5 |
import subprocess
|
6 |
+
import traceback
|
7 |
|
8 |
# Function to get memory info
|
9 |
def get_gpu_memory():
|
|
|
39 |
model_basename="Jackson2-4bit-128g-GPTQ",
|
40 |
use_safetensors=True,
|
41 |
device=device,
|
42 |
+
max_memory={0: "15GIB"}
|
43 |
)
|
44 |
except RuntimeError as e:
|
45 |
if 'CUDA out of memory' in str(e):
|
46 |
+
st.error("CUDA out of memory before toking generation. Try reducing the model size or input length.")
|
47 |
st.stop()
|
48 |
else:
|
49 |
raise e
|
|
|
61 |
except RuntimeError as e:
|
62 |
if 'CUDA out of memory' in str(e):
|
63 |
st.error("CUDA out of memory during generation. Try reducing the input length.")
|
64 |
+
# Log the detailed error message
|
65 |
+
with open('error_log.txt', 'a') as f:
|
66 |
+
f.write(traceback.format_exc())
|
67 |
else:
|
68 |
+
# Log the error and re-raise it
|
69 |
+
with open('error_log.txt', 'a') as f:
|
70 |
+
f.write(traceback.format_exc())
|
71 |
raise e
|