rename

Browse files

Files changed (4) hide show

benchmark_llama.py +53 -0
clear_mem.py +8 -6
run_xl.py +4 -2
save_lora.py +62 -0

benchmark_llama.py ADDED Viewed

	@@ -0,0 +1,53 @@

+#!/usr/bin/env python3
+from transformers import AutoTokenizer, AutoModelForCausalLM
+import time
+import torch
+DEVICE = "cuda:1"
+tokenizer = AutoTokenizer.from_pretrained("meta-llama/Llama-2-7b-hf")
+model = AutoModelForCausalLM.from_pretrained("meta-llama/Llama-2-7b-hf", torch_dtype=torch.float16, low_cpu_mem_usage=True)
+model.to(DEVICE)
+# forward
+print("Forward benchmarks")
+print(50 * "=")
+for batch_size in (1, 4, 16):
+    for input_seq in (4, 16, 256):
+        input_ids = torch.ones((batch_size, input_seq), dtype=torch.long, device=DEVICE)
+        attention_mask = torch.ones_like(input_ids)
+        attention_mask[0, 3] = 0
+        times = []
+        for _ in range(3):
+            start_time = time.time()
+            with torch.no_grad():
+                logits = model(input_ids=input_ids, attention_mask=attention_mask).logits
+            times.append(time.time() - start_time)
+        result = min(times)
+        print(f"Forward bsz={batch_size}, input_seq={input_seq}: {result}")
+# generate
+print("Generate benchmarks")
+print(50 * "=")
+for batch_size in (1, 16):
+    for input_seq in (4, 256):
+        input_ids = torch.ones((batch_size, input_seq), dtype=torch.long, device=DEVICE)
+        attention_mask = torch.ones_like(input_ids)
+        attention_mask[0, 3] = 0
+        times = []
+        for _ in range(3):
+            start_time = time.time()
+            out = model.generate(input_ids=input_ids, max_new_tokens=256)
+            times.append(time.time() - start_time)
+        result = min(times)
+        print(f"Generate bsz={batch_size}, input_seq={input_seq}: {result}")

clear_mem.py CHANGED Viewed

@@ -1,10 +1,11 @@
 #!/usr/bin/env python3
 import torch
 import gc
-shape = (10,000)
-input = torch.ones((shape, shape), device="cuda")
 def clear_memory(model):
@@ -14,8 +15,9 @@ def clear_memory(model):
     torch.cuda.ipc_collect()
     torch.clear_autocast_cache()
-for _ in range(6):
-    linear = torch.nn.Linear(shape, shape).to("cuda")
-    output = linear(input)
-    clear_memory(linear)

 #!/usr/bin/env python3
 import torch
 import gc
+from diffusers import DiffusionPipeline
+shape = (30_000, 30_000)
+input = torch.randn(shape, device="cuda")
 def clear_memory(model):
     torch.cuda.ipc_collect()
     torch.clear_autocast_cache()
+for _ids in ["runwayml/stable-diffusion-v1-5", "CompVis/stable-diffusion-v1-4", "runwayml/stable-diffusion-v1-5", "CompVis/stable-diffusion-v1-4", "runwayml/stable-diffusion-v1-5"]:
+    pipe = DiffusionPipeline.from_pretrained(_ids, use_safetensors=True).to("cuda")
+    pipe("hey", num_inference_steps=1)
+    print("finished...")
+    clear_memory(pipe)

run_xl.py CHANGED Viewed

@@ -14,8 +14,10 @@ pipe2 = DiffusionPipeline.from_pretrained(
   variant="fp16",
   torch_dtype=torch.float16
 )
-pipe.enable_model_cpu_offload()
-pipe2.enable_model_cpu_offload()
 compel = Compel(
   tokenizer=[pipe.tokenizer, pipe.tokenizer_2] ,

   variant="fp16",
   torch_dtype=torch.float16
 )
+pipe.to("cuda")
+pipe2.to("cuda")
+# pipe.enable_model_cpu_offload()
+# pipe2.enable_model_cpu_offload()
 compel = Compel(
   tokenizer=[pipe.tokenizer, pipe.tokenizer_2] ,

save_lora.py ADDED Viewed

	@@ -0,0 +1,62 @@

+#!/usr/bin/env python3
+import torch
+from warnings import warn
+from diffusers import (
+    AutoencoderKL,
+    DiffusionPipeline,
+)
+import hashlib
+base = "stabilityai/stable-diffusion-xl-base-1.0"
+adapter1 = 'nerijs/pixel-art-xl'
+weightname1 = 'pixel-art-xl.safetensors'
+adapter2 = 'Alexzyx/lora-trained-xl-colab'
+weightname2 = None
+inputs = "elephant"
+kwargs = {}
+if torch.cuda.is_available():
+    kwargs["torch_dtype"] = torch.float16
+#vae = AutoencoderKL.from_pretrained(
+#    "madebyollin/sdxl-vae-fp16-fix",
+#    torch_dtype=torch.float16,  # load fp16 fix VAE
+#)
+#kwargs["vae"] = vae
+#kwargs["variant"] = "fp16"
+#
+model = DiffusionPipeline.from_pretrained(
+    base, **kwargs
+)
+if torch.cuda.is_available():
+    model.to("cuda")
+def inference(adapter, weightname):
+    model.load_lora_weights(adapter, weight_name=weightname)
+    try:
+        model.fuse_lora(safe_fusing=True)
+    except ValueError:
+        warn(f"{adapter} and {weightname} is broken. LoRA is not fused.")
+        model.unload_lora_weights()
+    data = model(inputs, num_inference_steps=1).images[0]
+    model.unfuse_lora()
+    model.unload_lora_weights()
+    filename = '/tmp/hello.jpg'
+    data.save(filename, format='jpeg')
+    with open(filename, 'rb') as f:
+        md5 = hashlib.md5(f.read()).hexdigest()
+    print("Adapter %s, md5sum %s" % (adapter, md5))
+    if md5 == '40c78c9fd4daeff01c988c3532fdd51b':
+        print("BLACK SCREEN IMAGE for adapter %s" % adapter)
+inference(adapter1, weightname1)
+inference(adapter2, weightname2)
+inference(adapter1, weightname1)
+inference(adapter1, weightname1)