import PIL import requests import torch import gradio as gr import random import datetime from PIL import Image import os import time from diffusers import ( StableDiffusionInstructPix2PixPipeline, EulerAncestralDiscreteScheduler, ) # Loading from Diffusers Library model_id = "timbrooks/instruct-pix2pix" pipe = StableDiffusionInstructPix2PixPipeline.from_pretrained( model_id, torch_dtype=torch.float16, revision="fp16" ) # , safety_checker=None) pipe.to("cuda") pipe.enable_xformers_memory_efficient_attention() pipe.unet.to(memory_format=torch.channels_last) counter = 0 help_text = """ Note: Functionality to revert your changes to previous/original image can be released in future versions. For now only forward editing is available. Some notes from the official [instruct-pix2pix](https://huggingface.co./spaces/timbrooks/instruct-pix2pix) Space by the authors and from the official [Diffusers docs](https://huggingface.co./docs/diffusers/main/en/api/pipelines/stable_diffusion/pix2pix) - If you're not getting what you want, there may be a few reasons: 1. Is the image not changing enough? Your guidance_scale may be too low. It should be >1. Higher guidance scale encourages to generate images that are closely linked to the text `prompt`, usually at the expense of lower image quality. This value dictates how similar the output should be to the input. This pipeline requires a value of at least `1`. It's possible your edit requires larger changes from the original image. 2. Alternatively, you can toggle image_guidance_scale. Image guidance scale is to push the generated image towards the inital image. Image guidance scale is enabled by setting `image_guidance_scale > 1`. Higher image guidance scale encourages to generate images that are closely linked to the source image `image`, usually at the expense of lower image quality. 3. I have observed that rephrasing the instruction sometimes improves results (e.g., "turn him into a dog" vs. "make him a dog" vs. "as a dog"). 4. Increasing the number of steps sometimes improves results. 5. Do faces look weird? The Stable Diffusion autoencoder has a hard time with faces that are small in the image. Try: * Cropping the image so the face takes up a larger portion of the frame. """ def chat( image_in, in_steps, in_guidance_scale, in_img_guidance_scale, image_hid, img_name, counter_out, prompt, history, progress=gr.Progress(track_tqdm=True), ): start = datetime.datetime.now() progress(0, desc="Starting...") # if message == "revert": --to add revert functionality later if counter_out > 0: edited_image = pipe( prompt, image=image_hid, num_inference_steps=int(in_steps), guidance_scale=float(in_guidance_scale), image_guidance_scale=float(in_img_guidance_scale), ).images[0] if os.path.exists(img_name): os.remove(img_name) temp_img_name = img_name[:-4] + str(int(time.time())) + ".png" # Create a file-like object with open(temp_img_name, "wb") as fp: # Save the image to the file-like object edited_image.save(fp) # Get the name of the saved image saved_image_name = fp.name # edited_image.save(temp_img_name) #, overwrite=True) counter_out += 1 else: seed = random.randint(0, 1000000) img_name = f"./edited_image_{seed}.png" edited_image = pipe( prompt, image=image_in, num_inference_steps=int(in_steps), guidance_scale=float(in_guidance_scale), image_guidance_scale=float(in_img_guidance_scale), ).images[0] if os.path.exists(img_name): os.remove(img_name) with open(img_name, "wb") as fp: # Save the image to the file-like object edited_image.save(fp) # Get the name of the saved image saved_image_name2 = fp.name print(f"Ran in {datetime.datetime.now() - start}") history = history or [] # Resizing (or not) the image for better display and adding supportive sample text add_text_list = [ "There you go", "Enjoy your image!", "Nice work! Wonder what you gonna do next!", "Way to go!", "Does this work for you?", "Something like this?", ] if counter_out > 0: response = ( random.choice(add_text_list) + '' ) history.append((prompt, response)) return history, history, edited_image, temp_img_name, counter_out else: response = ( random.choice(add_text_list) + '' ) # IMG_NAME history.append((prompt, response)) counter_out += 1 return history, history, edited_image, img_name, counter_out with gr.Blocks() as demo: gr.Markdown( """

Chat Interface with InstructPix2Pix: Give Image Editing Instructions

For faster inference without waiting in the queue, you may duplicate the space and upgrade to GPU in settings.
Duplicate Space **Note: Please be advised that a safety checker has been implemented in this public space. Any attempts to generate inappropriate or NSFW images will result in the display of a black screen as a precautionary measure for the protection of all users. We appreciate your cooperation in maintaining a safe and appropriate environment for all members of our community.**

""" ) with gr.Row(): with gr.Column(): image_in = gr.Image(type="pil", label="Original Image") text_in = gr.Textbox() state_in = gr.State() b1 = gr.Button("Edit the image!") with gr.Accordion( "Advance settings for Training and Inference", open=False ): gr.Markdown( "Advance settings for - Number of Inference steps, Guidanace scale, and Image guidance scale." ) in_steps = gr.Number( label="Enter the number of Inference steps", value=20 ) in_guidance_scale = gr.Slider( 1, 10, step=0.5, label="Set Guidance scale", value=7.5 ) in_img_guidance_scale = gr.Slider( 1, 10, step=0.5, label="Set Image Guidance scale", value=1.5 ) image_hid = gr.Image(type="pil", visible=False) img_name_temp_out = gr.Textbox(visible=False) counter_out = gr.Number(visible=False, value=0, precision=0) chatbot = gr.Chatbot() b1.click( chat, [ image_in, in_steps, in_guidance_scale, in_img_guidance_scale, image_hid, img_name_temp_out, counter_out, text_in, state_in, ], [chatbot, state_in, image_hid, img_name_temp_out, counter_out], ) # , queue=True) gr.Markdown(help_text) demo.queue(concurrency_count=10) demo.launch(debug=True, width="80%", height=2000)