Blackroot's picture
Upload 9 files
4a9ad28 verified
import torch
import torch.nn as nn
import torchvision.utils as vutils
from models import AsymmetricResidualUDiT
from safetensors.torch import load_file
import os
import argparse
from typing import Optional
def load_checkpoint(model: nn.Module, checkpoint_path: str) -> None:
state_dict = load_file(checkpoint_path)
# The training was done via torch compile which prefixes the model with this for whatever reason.
# Handle compiled model state dict by removing '_orig_mod.' prefix
if all(k.startswith('_orig_mod.') for k in state_dict.keys()):
state_dict = {k[10:]: v for k, v in state_dict.items()}
model.load_state_dict(state_dict)
def sample(model, n_samples=16, n_steps=50, image_size=256, device="cuda", sigma_min=0.001, dtype=torch.float32):
with torch.amp.autocast('cuda', dtype=dtype):
x = torch.randn(n_samples, 3, image_size, image_size, device=device)
ts = torch.linspace(0, 1, n_steps, device=device)
dt = 1/n_steps
# Forward Euler Integration step 0..1
with torch.no_grad():
for i in range(len(ts)):
t = ts[i]
t_input = t.repeat(n_samples, 1, 1, 1)
v_t = model(x, t_input)
x = x + v_t * dt
return x.float()
def main():
parser = argparse.ArgumentParser(description="Generate samples from a trained UDiT model")
parser.add_argument("checkpoint", type=str, help="Path to the model checkpoint (.safetensors)")
parser.add_argument("--samples", type=int, default=16, help="Number of samples to generate")
parser.add_argument("--steps", type=int, default=50, help="Number of sampling steps")
parser.add_argument("--output", type=str, default="output.png", help="Output filename")
parser.add_argument("--device", type=str, default="cuda" if torch.cuda.is_available() else "cpu",
help="Device to run inference on (cuda/cpu)")
args = parser.parse_args()
device = args.device
model = AsymmetricResidualUDiT(
in_channels=3,
base_channels=128,
num_levels=3,
patch_size=4,
encoder_blocks=3,
decoder_blocks=7,
encoder_transformer_thresh=2,
decoder_transformer_thresh=4,
mid_blocks=8
).to(device)
# Load state dict into model
load_checkpoint(model, args.checkpoint)
model.eval()
# Generate samples
print(f"Generating {args.samples} samples with {args.steps} steps...")
with torch.no_grad():
samples = sample(
model,
n_samples=args.samples,
n_steps=args.steps,
device=args.device,
dtype=torch.float32
)
# Save samples
os.makedirs(os.path.dirname(args.output) or ".", exist_ok=True)
vutils.save_image(samples, args.output, nrow=4, padding=2)
print(f"Samples saved to {args.output}")
if __name__ == "__main__":
main()