Spaces:
Running
Running
import io | |
import matplotlib.pyplot as plt | |
import requests | |
import inflect | |
from PIL import Image | |
def load_image_from_url(url): | |
return Image.open(requests.get(url, stream=True).raw) | |
def render_results_in_image(in_pil_img, in_results): | |
plt.figure(figsize=(16, 10)) | |
plt.imshow(in_pil_img) | |
ax = plt.gca() | |
for prediction in in_results: | |
x, y = prediction['box']['xmin'], prediction['box']['ymin'] | |
w = prediction['box']['xmax'] - prediction['box']['xmin'] | |
h = prediction['box']['ymax'] - prediction['box']['ymin'] | |
ax.add_patch(plt.Rectangle((x, y), | |
w, | |
h, | |
fill=False, | |
color="green", | |
linewidth=2)) | |
ax.text( | |
x, | |
y, | |
f"{prediction['label']}: {round(prediction['score']*100, 1)}%", | |
color='red' | |
) | |
plt.axis("off") | |
# Save the modified image to a BytesIO object | |
img_buf = io.BytesIO() | |
plt.savefig(img_buf, format='png', | |
bbox_inches='tight', | |
pad_inches=0) | |
img_buf.seek(0) | |
modified_image = Image.open(img_buf) | |
# Close the plot to prevent it from being displayed | |
plt.close() | |
return modified_image | |
def summarize_predictions_natural_language(predictions): | |
summary = {} | |
p = inflect.engine() | |
for prediction in predictions: | |
label = prediction['label'] | |
if label in summary: | |
summary[label] += 1 | |
else: | |
summary[label] = 1 | |
result_string = "In this image, there are " | |
for i, (label, count) in enumerate(summary.items()): | |
count_string = p.number_to_words(count) | |
result_string += f"{count_string} {label}" | |
if count > 1: | |
result_string += "s" | |
result_string += " " | |
if i == len(summary) - 2: | |
result_string += "and " | |
# Remove the trailing comma and space | |
result_string = result_string.rstrip(', ') + "." | |
return result_string | |
##### To ignore warnings ##### | |
import warnings | |
import logging | |
from transformers import logging as hf_logging | |
def ignore_warnings(): | |
# Ignore specific Python warnings | |
warnings.filterwarnings("ignore", message="Some weights of the model checkpoint") | |
warnings.filterwarnings("ignore", message="Could not find image processor class") | |
warnings.filterwarnings("ignore", message="The `max_size` parameter is deprecated") | |
# Adjust logging for libraries using the logging module | |
logging.basicConfig(level=logging.ERROR) | |
hf_logging.set_verbosity_error() | |
######## | |
from transformers import pipeline | |
from PIL import Image | |
import gradio as gr | |
import numpy as np | |
import io | |
def processed_image(image): | |
# The uploaded image is a PIL image | |
od_pipe= pipeline("object-detection", model="facebook/detr-resnet-50") | |
pl_out = od_pipe(image) | |
processed_image=render_results_in_image(image,pl_out) | |
text=summarize_predictions_natural_language(pl_out) | |
return processed_image,text | |
iface = gr.Interface(processed_image, # Function to process the image | |
inputs=gr.Image(type="pil"), # Image upload input | |
outputs=[gr.Image(type="pil"),"text"] # Image output | |
) | |
iface.launch() | |
tts_pipe = pipeline("text-to-speech", model="kakao-enterprise/vits-vctk") | |
narrated_text=tts_pipe(text) |