|
from PIL import Image |
|
import torch |
|
import torch.nn as nn |
|
import torch.nn.functional as F |
|
from huggingface_hub import hf_hub_download |
|
|
|
|
|
|
|
from transformers import DepthProConfig, DepthProImageProcessorFast, DepthProForDepthEstimation |
|
|
|
|
|
config = DepthProConfig(use_fov_model=False) |
|
model = DepthProForDepthEstimation(config) |
|
features = config.fusion_hidden_size |
|
semantic_classifier_dropout = 0.1 |
|
num_labels = 1 |
|
model.head.head = nn.Sequential( |
|
nn.Conv2d(features, features, kernel_size=3, padding=1, bias=False), |
|
nn.BatchNorm2d(features), |
|
nn.ReLU(), |
|
nn.Dropout(semantic_classifier_dropout), |
|
nn.Conv2d(features, features, kernel_size=1), |
|
nn.ConvTranspose2d(features, num_labels, kernel_size=2, stride=2, padding=0, bias=True), |
|
) |
|
|
|
|
|
weights_path = hf_hub_download(repo_id="geetu040/DepthPro_Segmentation_Human", filename="model_weights.pth") |
|
model.load_state_dict(torch.load(weights_path, map_location=torch.device('cpu'), weights_only=True)) |
|
|
|
|
|
device = torch.device("cuda" if torch.cuda.is_available() else "cpu") |
|
model = model.to(device) |
|
|
|
|
|
image_processor = DepthProImageProcessorFast() |
|
|
|
def predict(image): |
|
|
|
|
|
image = image.convert("RGB") |
|
|
|
|
|
inputs = image_processor(images=image, return_tensors="pt") |
|
inputs = {k: v.to(device) for k, v in inputs.items()} |
|
|
|
|
|
with torch.no_grad(): |
|
output = model(**inputs) |
|
|
|
|
|
output = output[0] |
|
output = F.interpolate( |
|
output.unsqueeze(0), |
|
size=(image.height, image.width) |
|
) |
|
output = output.squeeze() |
|
output = output.sigmoid() |
|
output = (output > 0.5).float() |
|
output = output.cpu() |
|
output = output * 255 |
|
output = output.numpy() |
|
output = output.astype('uint8') |
|
output = Image.fromarray(output) |
|
|
|
return output |
|
|