File size: 4,373 Bytes
0ea58a6
 
df6bc83
0ea58a6
 
 
 
df6bc83
 
0ea58a6
 
df6bc83
 
0ea58a6
 
640d64f
0ea58a6
 
9498f43
0ea58a6
c798906
 
8de0612
c798906
0ea58a6
c798906
 
ac0c7c2
c798906
 
 
ac0c7c2
9498f43
 
 
c798906
 
ac0c7c2
c798906
 
0ea58a6
c798906
 
 
0ea58a6
c798906
 
0ea58a6
c798906
 
ac0c7c2
c798906
0ea58a6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
df6bc83
 
 
 
 
 
 
0ea58a6
 
df6bc83
 
 
 
 
673f0b3
0ea58a6
 
 
 
 
 
df6bc83
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
import gradio as gr
import numpy as np
from transformers import BeitImageProcessor, BeitForSemanticSegmentation, DPTImageProcessor, DPTForDepthEstimation
from PIL import Image, ImageFilter
import torch
import cv2

# Load the updated BeitImageProcessor for segmentation
segmentation_processor = BeitImageProcessor.from_pretrained("microsoft/beit-base-finetuned-ade-640-640")
segmentation_model = BeitForSemanticSegmentation.from_pretrained("microsoft/beit-base-finetuned-ade-640-640")

# Load the updated DPTImageProcessor for depth estimation
depth_feature_extractor = DPTImageProcessor.from_pretrained("Intel/dpt-large")
depth_model = DPTForDepthEstimation.from_pretrained("Intel/dpt-large")

def apply_gaussian_blur(image):
    # Resize and preprocess the image
    image = image.resize((512, 512)).convert("RGB")
    inputs = segmentation_processor(image, return_tensors="pt")

    # Perform semantic segmentation using the model
    with torch.no_grad():
        outputs = segmentation_model(**inputs)
        logits = outputs.logits

    # Get the predicted class for each pixel
    segmentation = torch.argmax(logits, dim=1)[0]  # Shape: [height, width]

    # Create a binary mask for the 'person' class
    person_index = 12  # Assuming 12 is the 'person' class index
    binary_mask = (segmentation == person_index).numpy().astype(np.uint8) * 255  # Convert to 0 and 255

    # Resize the mask to match the image size (512x512)
    binary_mask = cv2.resize(binary_mask, (512, 512), interpolation=cv2.INTER_NEAREST)

    # Convert the original image to a numpy array
    image_np = np.array(image)

    # Apply Gaussian blur to the entire image
    blurred_image = cv2.GaussianBlur(image_np, (0, 0), sigmaX=15, sigmaY=15)

    # Normalize the mask to range between 0 and 1
    normalized_mask = binary_mask / 255.0
    normalized_mask = np.expand_dims(normalized_mask, axis=-1)  # Add channel dimension

    # Create the composite image with the blurred background
    final_image = (image_np * normalized_mask + blurred_image * (1 - normalized_mask)).astype(np.uint8)

    # Convert back to PIL Image
    final_image_pil = Image.fromarray(final_image)

    return final_image_pil

def apply_lens_blur(image):
    # Resize and preprocess the image
    image = image.resize((512, 512)).convert("RGB")
    depth_inputs = depth_feature_extractor(images=image, return_tensors="pt")

    # Perform depth estimation
    with torch.no_grad():
        depth_outputs = depth_model(**depth_inputs)
        predicted_depth = depth_outputs.predicted_depth[0].cpu().numpy()

    # Normalize and invert the depth map
    min_depth = predicted_depth.min()
    max_depth = predicted_depth.max()
    normalized_depth = (predicted_depth - min_depth) / (max_depth - min_depth)
    inverted_depth = 1 - normalized_depth

    # Resize the depth map to match the original image size
    depth_weight_resized = Image.fromarray((inverted_depth * 255).astype(np.uint8)).resize((512, 512))
    depth_weight_resized = np.array(depth_weight_resized) / 255.0
    depth_weight_resized = depth_weight_resized[:, :, np.newaxis]

    # Apply maximum Gaussian blur to the original image
    blurred_image = image.filter(ImageFilter.GaussianBlur(radius=15))

    # Convert images to numpy arrays
    original_np = np.array(image).astype(np.float32)
    blurred_np = np.array(blurred_image).astype(np.float32)

    # Blend the images based on the resized depth map
    output_np = (1 - depth_weight_resized) * original_np + depth_weight_resized * blurred_np

    # Convert back to uint8
    output_np = np.clip(output_np, 0, 255).astype(np.uint8)

    return Image.fromarray(output_np)

# Define a function to call the correct blurring function based on user selection
def apply_blur(effect, image):
    if effect == "Gaussian Blur":
        return apply_gaussian_blur(image)
    elif effect == "Lens Blur":
        return apply_lens_blur(image)

# Define the Gradio interface
interface = gr.Interface(
    fn=apply_blur,
    inputs=[
        gr.Dropdown(choices=["Gaussian Blur", "Lens Blur"], label="Select Blur Effect"),
        gr.Image(type="pil")
    ],
    outputs=gr.Image(type="pil"),
    title="Blur Effects with Hugging Face",
    description="Apply Gaussian Blur or Lens Blur to images using semantic segmentation or depth estimation."
)

# Launch the Gradio interface
interface.launch()