rphrp1985 commited on
Commit
17749ab
·
verified ·
1 Parent(s): 8e94850

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +10 -2
app.py CHANGED
@@ -5,13 +5,22 @@ from torch.cuda.amp import autocast
5
  import subprocess
6
  from huggingface_hub import InferenceClient
7
  import os
 
 
8
  """
9
  For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
10
  """
11
 
12
  from accelerate import init_empty_weights, infer_auto_device_map, load_checkpoint_and_dispatch
13
 
14
-
 
 
 
 
 
 
 
15
 
16
  subprocess.run(
17
  "pip install flash-attn --no-build-isolation",
@@ -78,7 +87,6 @@ def respond(
78
  input_ids = tokenizer.apply_chat_template(messages, tokenize=True, add_generation_prompt=True, return_tensors="pt").to('cuda')
79
  ## <BOS_TOKEN><|START_OF_TURN_TOKEN|><|USER_TOKEN|>Hello, how are you?<|END_OF_TURN_TOKEN|><|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>
80
  # with autocast():
81
- model= model.to('cuda')
82
  gen_tokens = model.generate(
83
  input_ids,
84
  max_new_tokens=100,
 
5
  import subprocess
6
  from huggingface_hub import InferenceClient
7
  import os
8
+ import psutil
9
+
10
  """
11
  For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
12
  """
13
 
14
  from accelerate import init_empty_weights, infer_auto_device_map, load_checkpoint_and_dispatch
15
 
16
+ subprocess.run(
17
+ "pip install psutil",
18
+
19
+ shell=True,
20
+ )
21
+ ram_info = psutil.virtual_memory()
22
+ print(f"Total RAM: {ram_info.total / (1024.0 ** 3)} GB")
23
+ print(f"Available RAM: {ram_info.available / (1024.0 ** 3)} GB")
24
 
25
  subprocess.run(
26
  "pip install flash-attn --no-build-isolation",
 
87
  input_ids = tokenizer.apply_chat_template(messages, tokenize=True, add_generation_prompt=True, return_tensors="pt").to('cuda')
88
  ## <BOS_TOKEN><|START_OF_TURN_TOKEN|><|USER_TOKEN|>Hello, how are you?<|END_OF_TURN_TOKEN|><|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>
89
  # with autocast():
 
90
  gen_tokens = model.generate(
91
  input_ids,
92
  max_new_tokens=100,