zerogpu-2

Running on Zero

rphrp1985 commited on Jun 12, 2024

Commit

23ab0e2

verified ·

1 Parent(s): 17749ab

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -18,9 +18,10 @@ subprocess.run(
     shell=True,
 )
-ram_info = psutil.virtual_memory()
-print(f"Total RAM: {ram_info.total / (1024.0 ** 3)} GB")
-print(f"Available RAM: {ram_info.available / (1024.0 ** 3)} GB")
 subprocess.run(
     "pip install flash-attn --no-build-isolation",
@@ -82,7 +83,7 @@ def respond(
     temperature,
     top_p,
 ):
     messages = [{"role": "user", "content": "Hello, how are you?"}]
     input_ids = tokenizer.apply_chat_template(messages, tokenize=True, add_generation_prompt=True, return_tensors="pt").to('cuda')
 ## <BOS_TOKEN><|START_OF_TURN_TOKEN|><|USER_TOKEN|>Hello, how are you?<|END_OF_TURN_TOKEN|><|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>

     shell=True,
 )
+def print_s1ystem():
+    ram_info = psutil.virtual_memory()
+    print(f"Total RAM: {ram_info.total / (1024.0 ** 3)} GB")
+    print(f"Available RAM: {ram_info.available / (1024.0 ** 3)} GB")
 subprocess.run(
     "pip install flash-attn --no-build-isolation",
     temperature,
     top_p,
 ):
+    print_s1ystem()
     messages = [{"role": "user", "content": "Hello, how are you?"}]
     input_ids = tokenizer.apply_chat_template(messages, tokenize=True, add_generation_prompt=True, return_tensors="pt").to('cuda')
 ## <BOS_TOKEN><|START_OF_TURN_TOKEN|><|USER_TOKEN|>Hello, how are you?<|END_OF_TURN_TOKEN|><|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>