Spaces:
Sleeping
Sleeping
import zipfile | |
def unzip_content(): | |
try: | |
# First try using Python's zipfile | |
print("Attempting to unzip content using Python...") | |
with zipfile.ZipFile('./content.zip', 'r') as zip_ref: | |
zip_ref.extractall('.') | |
except Exception as e: | |
print(f"Python unzip failed: {str(e)}") | |
try: | |
# Fallback to system unzip command | |
print("Attempting to unzip content using system command...") | |
subprocess.run(['unzip', '-o', './content.zip'], check=True) | |
except Exception as e: | |
print(f"System unzip failed: {str(e)}") | |
raise Exception("Failed to unzip content using both methods") | |
print("Content successfully unzipped!") | |
# Try to unzip content at startup | |
try: | |
unzip_content() | |
except Exception as e: | |
print(f"Warning: Could not unzip content: {str(e)}") | |
import gradio as gr | |
import numpy as np | |
import torch | |
import torchvision | |
import torchvision.transforms | |
import torchvision.transforms.functional | |
import PIL | |
import matplotlib.pyplot as plt | |
import yaml | |
from omegaconf import OmegaConf | |
from CLIP import clip | |
import os | |
import sys | |
#os.chdir('./taming-transformers') | |
#from taming.models.vqgan import VQModel | |
#os.chdir('..') | |
taming_path = os.path.join(os.getcwd(), 'taming-transformers') | |
sys.path.append(taming_path) | |
from taming.models.vqgan import VQModel | |
from PIL import Image | |
import cv2 | |
import imageio | |
device = torch.device("cuda" if torch.cuda.is_available() else "cpu") | |
def create_video(image_folder='./generated', video_name='morphing_video.mp4'): | |
images = sorted([img for img in os.listdir(image_folder) if img.endswith(".png") or img.endswith(".jpg")]) | |
if len(images) == 0: | |
print("No images found in the folder.") | |
return None | |
frame = cv2.imread(os.path.join(image_folder, images[0])) | |
height, width, layers = frame.shape | |
video_writer = imageio.get_writer(video_name, fps=10) | |
for image in images: | |
img_path = os.path.join(image_folder, image) | |
img = imageio.imread(img_path) | |
video_writer.append_data(img) | |
video_writer.close() | |
return video_name | |
def save_from_tensors(tensor, output_dir, filename): | |
img = tensor.clone() | |
img = img.mul(255).byte() | |
img = img.cpu().numpy().transpose((1, 2, 0)) | |
os.makedirs(output_dir, exist_ok=True) | |
Image.fromarray(img).save(os.path.join(output_dir, filename)) | |
def norm_data(data): | |
return (data.clip(-1, 1) + 1) / 2 | |
def setup_clip_model(): | |
model, _ = clip.load('ViT-B/32', jit=False) | |
model.eval().to(device) | |
return model | |
def setup_vqgan_model(config_path, checkpoint_path): | |
config = OmegaConf.load(config_path) | |
model = VQModel(**config.model.params) | |
state_dict = torch.load(checkpoint_path, map_location="cpu")["state_dict"] | |
model.load_state_dict(state_dict, strict=False) | |
return model.eval().to(device) | |
def generator(x, model): | |
x = model.post_quant_conv(x) | |
x = model.decoder(x) | |
return x | |
def encode_text(text, clip_model): | |
t = clip.tokenize(text).to(device) | |
return clip_model.encode_text(t).detach().clone() | |
def create_encoding(include, exclude, extras, clip_model): | |
include_enc = [encode_text(text, clip_model) for text in include] | |
exclude_enc = [encode_text(text, clip_model) for text in exclude] | |
extras_enc = [encode_text(text, clip_model) for text in extras] | |
return include_enc, exclude_enc, extras_enc | |
def create_crops(img, num_crops=32, size1=225, noise_factor=0.05): | |
aug_transform = torch.nn.Sequential( | |
torchvision.transforms.RandomHorizontalFlip(), | |
torchvision.transforms.RandomAffine(30, translate=(0.1, 0.1), fill=0) | |
).to(device) | |
p = size1 // 2 | |
img = torch.nn.functional.pad(img, (p, p, p, p), mode='constant', value=0) | |
img = aug_transform(img) | |
crop_set = [] | |
for _ in range(num_crops): | |
gap1 = int(torch.normal(1.2, .3, ()).clip(.43, 1.9) * size1) | |
offsetx = torch.randint(0, int(size1 * 2 - gap1), ()) | |
offsety = torch.randint(0, int(size1 * 2 - gap1), ()) | |
crop = img[:, :, offsetx:offsetx + gap1, offsety:offsety + gap1] | |
crop = torch.nn.functional.interpolate(crop, (224, 224), mode='bilinear', align_corners=True) | |
crop_set.append(crop) | |
img_crops = torch.cat(crop_set, 0) | |
randnormal = torch.randn_like(img_crops, requires_grad=False) | |
randstotal = torch.rand((img_crops.shape[0], 1, 1, 1)).to(device) | |
img_crops = img_crops + noise_factor * randstotal * randnormal | |
return img_crops | |
def optimize_result(params, prompt, vqgan_model, clip_model, w1, w2, extras_enc, exclude_enc): | |
alpha = 1 | |
beta = 0.5 | |
out = generator(params, vqgan_model) | |
out = norm_data(out) | |
out = create_crops(out) | |
out = torchvision.transforms.Normalize((0.48145466, 0.4578275, 0.40821073), | |
(0.26862954, 0.26130258, 0.27577711))(out) | |
img_enc = clip_model.encode_image(out) | |
final_enc = w1 * prompt + w2 * extras_enc[0] | |
final_text_include_enc = final_enc / final_enc.norm(dim=-1, keepdim=True) | |
final_text_exclude_enc = exclude_enc[0] | |
main_loss = torch.cosine_similarity(final_text_include_enc, img_enc, dim=-1) | |
penalize_loss = torch.cosine_similarity(final_text_exclude_enc, img_enc, dim=-1) | |
return -alpha * main_loss.mean() + beta * penalize_loss.mean() | |
def optimize(params, optimizer, prompt, vqgan_model, clip_model, w1, w2, extras_enc, exclude_enc): | |
loss = optimize_result(params, prompt, vqgan_model, clip_model, w1, w2, extras_enc, exclude_enc) | |
optimizer.zero_grad() | |
loss.backward() | |
optimizer.step() | |
return loss | |
def training_loop(params, optimizer, include_enc, exclude_enc, extras_enc, vqgan_model, clip_model, w1, w2, | |
total_iter=200, show_step=1): | |
res_img = [] | |
res_z = [] | |
for prompt in include_enc: | |
for it in range(total_iter): | |
loss = optimize(params, optimizer, prompt, vqgan_model, clip_model, w1, w2, extras_enc, exclude_enc) | |
if it >= 0 and it % show_step == 0: | |
with torch.no_grad(): | |
generated = generator(params, vqgan_model) | |
new_img = norm_data(generated[0].to(device)) | |
res_img.append(new_img) | |
res_z.append(params.clone().detach()) | |
print(f"loss: {loss.item():.4f}\nno. of iteration: {it}") | |
torch.cuda.empty_cache() | |
return res_img, res_z | |
def generate_art(include_text, exclude_text, extras_text, num_iterations): | |
try: | |
# Process the input prompts | |
include = [x.strip() for x in include_text.split(',')] | |
exclude = [x.strip() for x in exclude_text.split(',')] | |
extras = [x.strip() for x in extras_text.split(',')] | |
w1, w2 = 1.0, 0.9 | |
# Setup models | |
clip_model = setup_clip_model() | |
vqgan_model = setup_vqgan_model("./models/vqgan_imagenet_f16_16384/configs/model.yaml", | |
"./models/vqgan_imagenet_f16_16384/checkpoints/last.ckpt") | |
# Parameters | |
learning_rate = 0.1 | |
batch_size = 1 | |
wd = 0.1 | |
size1, size2 = 225, 400 | |
# Initialize parameters | |
initial_image = PIL.Image.open('./gradient1.png') | |
initial_image = initial_image.resize((size2, size1)) | |
initial_image = torchvision.transforms.ToTensor()(initial_image).unsqueeze(0).to(device) | |
with torch.no_grad(): | |
z, _, _ = vqgan_model.encode(initial_image) | |
params = torch.nn.Parameter(z).to(device) | |
optimizer = torch.optim.AdamW([params], lr=learning_rate, weight_decay=wd) | |
params.data = params.data * 0.6 + torch.randn_like(params.data) * 0.4 | |
# Encode prompts | |
include_enc, exclude_enc, extras_enc = create_encoding(include, exclude, extras, clip_model) | |
# Run training loop | |
res_img, res_z = training_loop(params, optimizer, include_enc, exclude_enc, extras_enc, | |
vqgan_model, clip_model, w1, w2, total_iter=num_iterations) | |
# Save results | |
output_dir = "generated" | |
# Create output directory if it doesn't exist | |
os.makedirs(output_dir, exist_ok=True) | |
# Clear any existing files in the output directory | |
for file in os.listdir(output_dir): | |
file_path = os.path.join(output_dir, file) | |
if os.path.isfile(file_path): | |
os.remove(file_path) | |
for i, img in enumerate(res_img): | |
save_from_tensors(img, output_dir, f"generated_image_{i:03d}.png") | |
# Create video | |
video_path = create_video() | |
# Delete the generated folder and its contents after creating the video | |
import shutil | |
shutil.rmtree(output_dir) | |
return video_path | |
except Exception as e: | |
# If there's an error, ensure the generated folder is cleaned up | |
if os.path.exists("generated"): | |
import shutil | |
shutil.rmtree("generated") | |
raise e # Re-raise the exception to be handled by the calling function | |
def gradio_interface(include_text, exclude_text, extras_text, num_iterations): | |
try: | |
video_path = generate_art(include_text, exclude_text, extras_text, int(num_iterations)) | |
return video_path | |
except Exception as e: | |
return f"An error occurred: {str(e)}" | |
# Define and launch the Gradio app | |
iface = gr.Interface( | |
fn=gradio_interface, | |
inputs=[ | |
gr.Textbox(label="Include Prompts (comma-separated)", | |
value="desert, heavy rain, cactus"), | |
gr.Textbox(label="Exclude Prompts (comma-separated)", | |
value="confusing, blurry"), | |
gr.Textbox(label="Extra Style Prompts (comma-separated)", | |
value="desert, clear, detailed, beautiful, good shape, detailed"), | |
gr.Number(label="Number of Iterations", | |
value=200, minimum=1, maximum=1000) | |
], | |
outputs=gr.Video(label="Generated Morphing Video"), | |
title="VQGAN-CLIP Art Generator", | |
css="allow", | |
allow_flagging="never", | |
####### | |
description=""" | |
<a href="https://colab.research.google.com/drive/1ivRYvTaX90PRghQIqAdOyEawkY0YLefa?authuser=0#scrollTo=WE7aPQ0t1hd2"> | |
<img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/> | |
</a> | |
<a href="https://huggingface.co./spaces/your-username/your-space-name?duplicate=true"> | |
<img src="https://huggingface.co./datasets/huggingface/badges/raw/main/clone-space-lg.svg" alt="Clone Space"/> | |
</a> | |
<br><br> | |
Generate artistic videos using VQGAN-CLIP. | |
Enter your prompts separated by commas and adjust the number of iterations. | |
The model will generate a morphing video based on your inputs. | |
<br><br> | |
<b>Note:</b> This application requires GPU access. Please either: | |
<br>1. Use the Colab notebook (click the Colab badge above) with GPU runtime | |
<br>2. Clone this space (click Clone Space badge) and enable GPU in your personal copy""" | |
) | |
if __name__ == "__main__": | |
print("Checking GPU availability:", "GPU AVAILABLE" if torch.cuda.is_available() else "NO GPU FOUND") | |
iface.launch() |