Spaces:

snap-research
/

weights2weights

Running on Zero

App Files Files Community

amildravid4292 commited on Jul 22, 2024

Commit

7217618

verified ·

1 Parent(s): 45ec4cb

Update app.py

Browse files

Files changed (1) hide show

app.py +43 -64

app.py CHANGED Viewed

@@ -140,7 +140,7 @@ def sample_then_run(net):
     return net, image
 @torch.no_grad()
-@spaces.GPU(duration=120)
 def inference(net, prompt, negative_prompt, guidance_scale, ddim_steps, seed):
     mean.to(device)
     std.to(device)
@@ -197,77 +197,75 @@ def inference(net, prompt, negative_prompt, guidance_scale, ddim_steps, seed):
     image = Image.fromarray((image * 255).round().astype("uint8"))
-    del network
     return image
 @torch.no_grad()
-@spaces.GPU(duration=120)
-def edit_inference(self, prompt, negative_prompt, guidance_scale, ddim_steps, seed, start_noise, a1, a2, a3, a4):
-    device = self.device
-    self.unet.to(device)
-    self.text_encoder.to(device)
-    self.vae.to(device)
-    self.mean.to(device)
-    self.std.to(device)
-    self.v.to(device)
-    self.proj.to(device)
-    self.weights = torch.load("model.pt").to(device)
-    self.young.to(device)
-    self.pointy.to(device)
-    self.wavy.to(device)
-    self.thick.to(device)
-    network = LoRAw2w( self.weights.bfloat16(), self.mean.bfloat16(), self.std.bfloat16(), self.v[:, :1000].bfloat16(),
-                    self.unet,
                     rank=1,
                     multiplier=1.0,
                     alpha=27.0,
                     train_method="xattn-strict"
                 ).to(device, torch.bfloat16)
-    original_weights = self.weights.clone()
     #pad to same number of PCs
-    pcs_original = original_weights.shape[1]
-    pcs_edits = self.young.shape[1]
     padding =  torch.zeros((1,pcs_original-pcs_edits)).to(device)
-    young_pad = torch.cat((self.young, padding), 1)
-    pointy_pad = torch.cat((self.pointy, padding), 1)
-    wavy_pad = torch.cat((self.wavy, padding), 1)
-    thick_pad = torch.cat((self.thick, padding), 1)
-    edited_weights = original_weights+a1*1e6*young_pad+a2*1e6*pointy_pad+a3*1e6*wavy_pad+a4*2e6*thick_pad
     generator = torch.Generator(device=device).manual_seed(seed)
     latents = torch.randn(
-                (1, self.unet.in_channels, 512 // 8, 512 // 8),
                 generator = generator,
-                device = self.device
             ).bfloat16()
-    text_input = self.tokenizer(prompt, padding="max_length", max_length=self.tokenizer.model_max_length, truncation=True, return_tensors="pt")
-    text_embeddings = self.text_encoder(text_input.input_ids.to(device))[0]
     max_length = text_input.input_ids.shape[-1]
-    uncond_input = self.tokenizer(
                                     [negative_prompt], padding="max_length", max_length=max_length, return_tensors="pt"
                                 )
-    uncond_embeddings = self.text_encoder(uncond_input.input_ids.to(device))[0]
     text_embeddings = torch.cat([uncond_embeddings, text_embeddings]).bfloat16()
-    self.noise_scheduler.set_timesteps(ddim_steps)
-    latents = latents * self.noise_scheduler.init_noise_sigma
-    for i,t in enumerate(tqdm.tqdm(self.noise_scheduler.timesteps)):
         latent_model_input = torch.cat([latents] * 2)
-        latent_model_input = self.noise_scheduler.scale_model_input(latent_model_input, timestep=t)
         if t>start_noise:
             pass
@@ -276,7 +274,7 @@ def edit_inference(self, prompt, negative_prompt, guidance_scale, ddim_steps, se
             network.reset()
         with network:
-            noise_pred = self.unet(latent_model_input, t, encoder_hidden_states=text_embeddings, timestep_cond= None).sample
         #guidance
@@ -285,31 +283,12 @@ def edit_inference(self, prompt, negative_prompt, guidance_scale, ddim_steps, se
         latents = noise_scheduler.step(noise_pred, t, latents).prev_sample
     latents = 1 / 0.18215 * latents
-    image = self.vae.decode(latents.float()).sample
     image = (image / 2 + 0.5).clamp(0, 1)
     image = image.detach().cpu().float().permute(0, 2, 3, 1).numpy()[0]
     image = Image.fromarray((image * 255).round().astype("uint8"))
-    return image
-# @torch.no_grad()
-# @spaces.GPU(duration=120)
-# def sample_then_run(self):
-#     self.unet = UNet2DConditionModel.from_pretrained(
-#             "stablediffusionapi/realistic-vision-v51" , subfolder="unet", revision=None
-#         )
-#     self.unet.to(self.device, dtype=torch.bfloat16)
-#     self.weights = sample_weights(self.unet, self.proj, self.mean, self.std, self.v[:, :1000], self.device, factor = 1.00)
-#     prompt = "sks person"
-#     negative_prompt = "low quality, blurry, unfinished, nudity, weapon"
-#     seed = 5
-#     cfg = 3.0
-#     steps = 25
-#     image = self.inference(prompt, negative_prompt, cfg, steps, seed)
-#     torch.save(self.weights.cpu().detach(), "model.pt" )
-#     return image, "model.pt"
 class CustomImageDataset(Dataset):
@@ -535,9 +514,9 @@ with gr.Blocks(css="style.css") as demo:
         sample.click(fn=sample_then_run,inputs = [net], outputs=[net, input_image])
-        # submit.click(
-        #         fn=model.edit_inference, inputs=[prompt, negative_prompt, cfg, steps, seed, injection_step, a1, a2, a3, a4], outputs=[gallery]
-        #     )
         # file_input.change(fn=model.file_upload, inputs=file_input, outputs = gallery)

     return net, image
 @torch.no_grad()
+@spaces.GPU()
 def inference(net, prompt, negative_prompt, guidance_scale, ddim_steps, seed):
     mean.to(device)
     std.to(device)
     image = Image.fromarray((image * 255).round().astype("uint8"))
     return image
 @torch.no_grad()
+@spaces.GPU()
+def edit_inference(net, prompt, negative_prompt, guidance_scale, ddim_steps, seed, start_noise, a1, a2, a3, a4):
+    mean.to(device)
+    std.to(device)
+    v.to(device)
+    young.to(device)
+    pointy.to(device)
+    wavy.to(device)
+    thick.to(device)
+    weights = torch.load(net).to(device)
+    network = LoRAw2w(weights, mean, std, v[:, :1000],
+                    unet,
                     rank=1,
                     multiplier=1.0,
                     alpha=27.0,
                     train_method="xattn-strict"
                 ).to(device, torch.bfloat16)
     #pad to same number of PCs
+    pcs_original = weights.shape[1]
+    pcs_edits = young.shape[1]
     padding =  torch.zeros((1,pcs_original-pcs_edits)).to(device)
+    young_pad = torch.cat((young, padding), 1)
+    pointy_pad = torch.cat((pointy, padding), 1)
+    wavy_pad = torch.cat((wavy, padding), 1)
+    thick_pad = torch.cat((thick, padding), 1)
+    edited_weights = weights+a1*1e6*young_pad+a2*1e6*pointy_pad+a3*1e6*wavy_pad+a4*2e6*thick_pad
     generator = torch.Generator(device=device).manual_seed(seed)
     latents = torch.randn(
+                (1, unet.in_channels, 512 // 8, 512 // 8),
                 generator = generator,
+                device = device
             ).bfloat16()
+    text_input = tokenizer(prompt, padding="max_length", max_length=tokenizer.model_max_length, truncation=True, return_tensors="pt")
+    text_embeddings = text_encoder(text_input.input_ids.to(device))[0]
     max_length = text_input.input_ids.shape[-1]
+    uncond_input = tokenizer(
                                     [negative_prompt], padding="max_length", max_length=max_length, return_tensors="pt"
                                 )
+    uncond_embeddings = text_encoder(uncond_input.input_ids.to(device))[0]
     text_embeddings = torch.cat([uncond_embeddings, text_embeddings]).bfloat16()
+    noise_scheduler.set_timesteps(ddim_steps)
+    latents = latents * noise_scheduler.init_noise_sigma
+    for i,t in enumerate(tqdm.tqdm(noise_scheduler.timesteps)):
         latent_model_input = torch.cat([latents] * 2)
+        latent_model_input = noise_scheduler.scale_model_input(latent_model_input, timestep=t)
         if t>start_noise:
             pass
             network.reset()
         with network:
+            noise_pred = unet(latent_model_input, t, encoder_hidden_states=text_embeddings, timestep_cond= None).sample
         #guidance
         latents = noise_scheduler.step(noise_pred, t, latents).prev_sample
     latents = 1 / 0.18215 * latents
+    image = vae.decode(latents).sample
     image = (image / 2 + 0.5).clamp(0, 1)
     image = image.detach().cpu().float().permute(0, 2, 3, 1).numpy()[0]
     image = Image.fromarray((image * 255).round().astype("uint8"))
+    return net, image
 class CustomImageDataset(Dataset):
         sample.click(fn=sample_then_run,inputs = [net], outputs=[net, input_image])
+        submit.click(
+                fn=edit_inference, inputs=[net, prompt, negative_prompt, cfg, steps, seed, injection_step, a1, a2, a3, a4], outputs=[net, gallery]
+            )
         # file_input.change(fn=model.file_upload, inputs=file_input, outputs = gallery)