rodrigomasini commited on
Commit
4ab4748
1 Parent(s): 8a638cc

Update app_v4.py

Browse files
Files changed (1) hide show
  1. app_v4.py +9 -2
app_v4.py CHANGED
@@ -3,6 +3,7 @@ from transformers import AutoTokenizer
3
  from auto_gptq import AutoGPTQForCausalLM
4
  import torch
5
  import subprocess
 
6
 
7
  # Function to get memory info
8
  def get_gpu_memory():
@@ -38,11 +39,11 @@ try:
38
  model_basename="Jackson2-4bit-128g-GPTQ",
39
  use_safetensors=True,
40
  device=device,
41
- max_memory={0: "10GIB"}
42
  )
43
  except RuntimeError as e:
44
  if 'CUDA out of memory' in str(e):
45
- st.error("CUDA out of memory. Try reducing the model size or input length.")
46
  st.stop()
47
  else:
48
  raise e
@@ -60,5 +61,11 @@ if st.button("Generate the prompt"):
60
  except RuntimeError as e:
61
  if 'CUDA out of memory' in str(e):
62
  st.error("CUDA out of memory during generation. Try reducing the input length.")
 
 
 
63
  else:
 
 
 
64
  raise e
 
3
  from auto_gptq import AutoGPTQForCausalLM
4
  import torch
5
  import subprocess
6
+ import traceback
7
 
8
  # Function to get memory info
9
  def get_gpu_memory():
 
39
  model_basename="Jackson2-4bit-128g-GPTQ",
40
  use_safetensors=True,
41
  device=device,
42
+ max_memory={0: "15GIB"}
43
  )
44
  except RuntimeError as e:
45
  if 'CUDA out of memory' in str(e):
46
+ st.error("CUDA out of memory before toking generation. Try reducing the model size or input length.")
47
  st.stop()
48
  else:
49
  raise e
 
61
  except RuntimeError as e:
62
  if 'CUDA out of memory' in str(e):
63
  st.error("CUDA out of memory during generation. Try reducing the input length.")
64
+ # Log the detailed error message
65
+ with open('error_log.txt', 'a') as f:
66
+ f.write(traceback.format_exc())
67
  else:
68
+ # Log the error and re-raise it
69
+ with open('error_log.txt', 'a') as f:
70
+ f.write(traceback.format_exc())
71
  raise e