Spaces:

AIRI-Institute
/

StyleFeatureEditor

Running

App Files Files Community

StyleFeatureEditor / app.py

retir

fix deltaedit

7bc29a1 3 months ago

raw

history blame

15.1 kB

	import os
	from io import BytesIO

	import gradio as gr
	import grpc
	from PIL import Image
	import pandas as pd

	from inference_pb2 import SFERequest, SFEResponse, SFERequestMask, SFEResponseMask
	from inference_pb2_grpc import SFEServiceStub


	PREDEFINED_EDITINGS_DATA = {
	"glasses": ([-20.0, 30.0], False),
	"smile": ([-10.0, 10.0], False),
	"makeup": ([-10.0, 15.0], False),
	"eye_openness": ([-45.0, 30.0], True),
	"trimmed_beard": ([-30.0, 30.0], True),
	"face_roundness": ([-20.0, 15.0], False),
	"nose_length": ([-30.0, 30.0], True),
	"eyebrow_thickness": ([-20.0, 20.0], True),
	"displeased": ([-10.0, 10.0], False),
	"age": ([-10.0, 10.0], False),
	"rotation": ([-7.0, 7.0], False),
	"afro": ([0, 0.14], False),
	"angry": ([0, 0.14], False),
	"bobcut": ([0, 0.18], False),
	"bowlcut": ([0, 0.14], False),
	"mohawk": ([0, 0.1], False),
	"curly_hair": ([0, 0.12], False),
	"purple_hair": ([0, 0.12], False),
	"surprised": ([0, 0.1], False),
	"beyonce": ([0, 0.12], False),
	"hilary_clinton": ([0, 0.1], False),
	"depp": ([0, 0.12], False),
	"taylor_swift": ([0, 0.1], False),
	"trump": ([0, 0.1], False),
	"zuckerberg": ([0, 0.1], False),
	"black hair": ([-7.0, 10.0], False),
	"blond hair": ([-7.0, 10.0], True),
	"grey hair": ([-7.0, 7.0], True),
	"wavy hair": ([-7.0, 7.0], False),
	"receding hairline": ([-10.0, 10.0], True),
	"sideburns": ([-7.0, 7.0], True),
	"goatee": ([-7.0, 7.0], True),
	"gender swap": ([-10.0, 7.0], False)
	}

	DIRECTIONS_NAME_SWAP = {
	"smile" : "fs_smiling",
	"glasses": "fs_glasses",
	"makeup": "fs_makeup",
	"gender swap": "gender"
	}


	def denormalize_power(direction_name, directon_power):
	if direction_name not in PREDEFINED_EDITINGS_DATA:
	return directon_power
	original_range, is_reversed = PREDEFINED_EDITINGS_DATA[direction_name]
	if directon_power > 0:
	normalized = directon_power / 15 * abs(original_range[1])
	else:
	normalized = directon_power / 15 * abs(original_range[0])

	if is_reversed:
	normalized = -normalized
	return normalized


	def get_bytes(img):
	if img is None:
	return img

	buffered = BytesIO()
	img.save(buffered, format="JPEG")
	return buffered.getvalue()


	def bytes_to_image(image: bytes) -> Image.Image:
	image = Image.open(BytesIO(image))
	return image


	def edit_image(orig_image, edit_direction, edit_power, align, mask, progress=gr.Progress(track_tqdm=True)):
	if edit_direction in DIRECTIONS_NAME_SWAP:
	edit_direction = DIRECTIONS_NAME_SWAP[edit_direction]
	if not orig_image:
	return gr.update(visible=False), gr.update(visible=False), gr.update(value="Need to upload an input image ❗", visible=True)

	orig_image_bytes = get_bytes(orig_image)
	mask_bytes = get_bytes(mask)
	if mask_bytes is None:
	mask_bytes = b"mask"

	edit_power = denormalize_power(edit_direction, edit_power)

	with grpc.insecure_channel(os.environ["SERVER"]) as channel:
	stub = SFEServiceStub(channel)

	output: SFEResponse = stub.edit(
	SFERequest(orig_image=orig_image_bytes, direction=edit_direction, power=edit_power, align=align, mask=mask_bytes, use_cache=True)
	)

	if output.image == b"aligner error":
	return gr.update(visible=False), gr.update(visible=False), gr.update(value="Face aligner can not find face in your image 😢 Try to upload another one", visible=True)

	output_edited = bytes_to_image(output.image)
	output_inv = bytes_to_image(output.inv_image)
	return gr.update(value=output_edited, visible=True), gr.update(value=output_inv, visible=True), gr.update(visible=False)


	def edit_image_clip(orig_image, neutral_prompt, target_prompt, disentanglement, edit_power, align, mask, edit_method, progress=gr.Progress(track_tqdm=True)):
	if edit_method == "StyleClip":
	edit_direction = "_".join(["styleclip_global", neutral_prompt, target_prompt, str(disentanglement)])
	else:
	edit_power = edit_power / 10
	disentanglement = disentanglement / 3
	edit_direction = "_".join(["deltaedit", neutral_prompt, target_prompt, str(disentanglement)])
	return edit_image(orig_image, edit_direction, edit_power, align, mask, progress=None)


	def get_mask(input_image, align, mask_trashhold, progress=gr.Progress(track_tqdm=True)):
	if not input_image:
	return gr.update(visible=False), gr.update(value="Need to upload an input image ❗", visible=True)

	input_image_bytes = get_bytes(input_image)

	with grpc.insecure_channel(os.environ["SERVER"]) as channel:
	stub = SFEServiceStub(channel)

	output: SFEResponseMask = stub.generate_mask(
	SFERequestMask(orig_image=input_image_bytes, trashold=mask_trashhold, align=align, use_cache=True)
	)
	if output.mask == b"aligner error":
	return gr.update(visible=False), gr.update(value="Face aligner can not find face in your image 😢 Try to upload another one", visible=True)

	if output.mask == b"masker face parser error":
	return gr.update(visible=False), gr.update(value="Masker's face detector can't find face in your image 😢 Try to upload another one", visible=True)

	output_mask = bytes_to_image(output.mask)
	return gr.update(value=output_mask, visible=True), gr.update(visible=False)


	def get_demo():
	editings_table = pd.read_csv("editings_table.csv")
	editings_table = editings_table.style.set_properties(**{"text-align": "center"})
	editings_table = editings_table.set_table_styles([dict(selector="th", props=[("text-align", "center")])])

	with gr.Blocks() as demo:
	gr.Markdown("## StyleFeatureEditor")
	gr.Markdown(
	'<div style="display: flex; align-items: center; gap: 10px;">'
	'<span>Official Gradio demo for StyleFeatureEditor:</span>'
	'<a href="https://arxiv.org/abs/2406.10601"><img src="https://img.shields.io/badge/arXiv-2404.01094-b31b1b.svg" height=22.5></a>'
	'<a href="https://github.com/AIRI-Institute/StyleFeatureEditor"><img src="https://img.shields.io/badge/github-%23121011.svg?style=for-the-badge&logo=github&logoColor=white" height=22.5></a>'
	'<a href="https://huggingface.co./AIRI-Institute/StyleFeatureEditor"><img src="https://huggingface.co./datasets/huggingface/badges/resolve/main/model-on-hf-md.svg" height=22.5></a>'
	'<a href="https://colab.research.google.com/#fileId=https://github.com/AIRI-Institute/StyleFeatureEditor/blob/main/notebook/StyleFeatureEditor_inference.ipynb"><img src="https://colab.research.google.com/assets/colab-badge.svg" height=22.5></a>'
	'</div>'
	)
	with gr.Row():
	with gr.Column():
	with gr.Accordion("Input Image", open=True):
	input_image = gr.Image(label="Input image you want to edit", type="pil", height=300)
	align = gr.Checkbox(label="Align (crop and resize) the input image. For SFE to work well, it is necessary to align the input if it is not.", value=True)
	with gr.Accordion("Predefined Editings", open=True):
	with gr.Accordion("Description", open=False):
	gr.Markdown('''A branch of predefined editings gained from InterfaceGAN, Stylespace, GANSpace and StyleClip mappers. Look at the table below to see which direction is responsible for which editings.

	Editing power -- the greater the absolute value of this parameter, the more the selected edit will appear. Better use values in the range 7 - 13, lower values may not give the desired edit, higher values -- on the contrary -- may apply edit too much and create artefacts.

	Positive effect -- the effect applied to the image when positive editing power is used.

	Negative effect -- the effect applied to the image when negative editing power is used. It is usually the opposite of the positive effect.
	'''
	)

	gr.Dataframe(value=editings_table, datatype=["markdown","markdown","markdown","markdown"], interactive=False, wrap=True,
	column_widths=["25px", "25px", "25px", "25px"], height=300) # 100
	with gr.Row():
	predef_editing_direction = gr.Dropdown(list(PREDEFINED_EDITINGS_DATA.keys()), label="Editing direction", value="smile")
	predef_editing_power = gr.Slider(-20, 20, value=7, step=0.1, label="Editing power")
	btn_predef = gr.Button("Edit image")

	with gr.Accordion("Text Prompt Editings", open=False):
	with gr.Accordion("Description", open=False):
	gr.Markdown('''You can alse use editings from text prompts via StyleClip Global Mapper (https://arxiv.org/abs/2103.17249) or DeltaEdit (https://arxiv.org/abs/2303.06285). You just need to choose:
	Method -- method to use, StyleClip or DeltaEdit

	Editing power -- the greater the absolute value of this parameter, the more the selected edit will appear.

	Neutral prompt -- some neutral description of the original image (e.g. "a face").

	Target prompt -- text that contains the desired edit (e.g. "a smilling face").

	Disentanglement -- positive number, the less this attribute -- the more related attributes will also be changed (e.g. for grey hair editing, wrinkle, skin colour and glasses may also be edited)
	''')
	edit_method = gr.Dropdown(["StyleClip", "DeltaEdit"], label="Editing method", value="StyleClip")
	neutral_prompt = gr.Textbox(value="face with hair", label="Neutreal prompt (e.g. 'a face')")
	target_prompt = gr.Textbox(value="face with fire hair", label="Target prompt (e.g. 'a smilling face')")
	styleclip_editing_power = gr.Slider(-50, 50, value=10, step=1, label="Editing power")
	disentanglement = gr.Slider(0, 1, value=0.1, step=0.01, label="Disentanglement")
	btn_clip = gr.Button("Edit image")

	with gr.Accordion("Mask settings (optional)", open=False):
	gr.Markdown('''If some artefacts appear during editing (or some details disappear), you can specify an image mask to select which regions of the image should not be edited. The mask must have a size of 1024 x 1024 and represent an inversion of the original image.

	'''
	)
	mask = gr.Image(label="Upload mask for editing", type="pil", height=350)
	with gr.Accordion("Mask generating", open=False):
	gr.Markdown("Here you can generate mask that separates face (with hair) from the background.")
	with gr.Row():
	input_mask = gr.Image(label="Input image for mask generating", type="pil", height=240)
	output_mask = gr.Image(label="Generated mask", height=240)
	error_message_mask = gr.Textbox(label="⚠️ Error ⚠️", visible=False, elem_classes="error-message")
	align_mask = gr.Checkbox(label="To align (crop and resize image) or not. Only uncheck this box if the original image has already been aligned.", value=True)
	mask_trashhold = gr.Slider(0, 1, value=0.9, step=0.001, label="Mask trashold",
	info="The more this parameter, the more is face part, and the less is background part.")
	btn_mask = gr.Button("Generate mask")

	with gr.Column():
	with gr.Row():
	output_inv = gr.Image(label="Inversion result", visible=True)
	output_edit = gr.Image(label="Editing result", visible=True)
	error_message = gr.Textbox(label="⚠️ Error ⚠️", visible=False, elem_classes="error-message")
	gr.Markdown("If artefacts appear during editing -- try lowering the editing power or using a mask.")
	gr.Examples(
	label="Input Examples for editing",
	examples=[
	["images/scarlet.jpg"],
	["images/gosling.jpg"],
	["images/robert.png"],
	["images/smith.jpg"],
	["images/watson.jpeg"],
	],
	inputs=[input_image],
	examples_per_page=5
	)
	gr.Examples(
	label="Mask Examples for editing",
	examples=[
	["images/scarlet_mask.webp"],
	["images/gosling_mask.webp"],
	["images/robert_mask.webp"],
	["images/smith_mask.webp"],
	["images/watson_mask.webp"],
	],
	inputs=[mask]
	)
	gr.Examples(
	label="Input Examples for Mask generation",
	examples=[
	["images/scarlet.jpg"],
	["images/gosling.jpg"],
	["images/robert.png"],
	["images/smith.jpg"],
	["images/watson.jpeg"],
	],
	inputs=[input_mask]
	)


	btn_predef.click(
	fn=edit_image,
	inputs=[input_image, predef_editing_direction, predef_editing_power, align, mask],
	outputs=[output_edit, output_inv, error_message]
	)
	btn_clip.click(
	fn=edit_image_clip,
	inputs=[input_image, neutral_prompt, target_prompt, disentanglement, styleclip_editing_power, align, mask, edit_method],
	outputs=[output_edit, output_inv, error_message]
	)
	btn_mask.click(
	fn=get_mask,
	inputs=[input_mask, align_mask, mask_trashhold],
	outputs=[output_mask, error_message_mask]
	)

	gr.Markdown('''To cite the paper by the authors
	```
	@InProceedings{Bobkov_2024_CVPR,
	author = {Bobkov, Denis and Titov, Vadim and Alanov, Aibek and Vetrov, Dmitry},
	title = {The Devil is in the Details: StyleFeatureEditor for Detail-Rich StyleGAN Inversion and High Quality Image Editing},
	booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)},
	month = {June},
	year = {2024},
	pages = {9337-9346}
	}
	```
	''')
	return demo


	if __name__ == "__main__":
	demo = get_demo()
	demo.launch(server_name="0.0.0.0", server_port=7860)