EaindraKyaw's picture
Update app.py
3501ea0 verified
import io
import matplotlib.pyplot as plt
import requests
import inflect
from PIL import Image
def load_image_from_url(url):
return Image.open(requests.get(url, stream=True).raw)
def render_results_in_image(in_pil_img, in_results):
plt.figure(figsize=(16, 10))
plt.imshow(in_pil_img)
ax = plt.gca()
for prediction in in_results:
x, y = prediction['box']['xmin'], prediction['box']['ymin']
w = prediction['box']['xmax'] - prediction['box']['xmin']
h = prediction['box']['ymax'] - prediction['box']['ymin']
ax.add_patch(plt.Rectangle((x, y),
w,
h,
fill=False,
color="green",
linewidth=2))
ax.text(
x,
y,
f"{prediction['label']}: {round(prediction['score']*100, 1)}%",
color='red'
)
plt.axis("off")
# Save the modified image to a BytesIO object
img_buf = io.BytesIO()
plt.savefig(img_buf, format='png',
bbox_inches='tight',
pad_inches=0)
img_buf.seek(0)
modified_image = Image.open(img_buf)
# Close the plot to prevent it from being displayed
plt.close()
return modified_image
def summarize_predictions_natural_language(predictions):
summary = {}
p = inflect.engine()
for prediction in predictions:
label = prediction['label']
if label in summary:
summary[label] += 1
else:
summary[label] = 1
result_string = "In this image, there are "
for i, (label, count) in enumerate(summary.items()):
count_string = p.number_to_words(count)
result_string += f"{count_string} {label}"
if count > 1:
result_string += "s"
result_string += " "
if i == len(summary) - 2:
result_string += "and "
# Remove the trailing comma and space
result_string = result_string.rstrip(', ') + "."
return result_string
##### To ignore warnings #####
import warnings
import logging
from transformers import logging as hf_logging
def ignore_warnings():
# Ignore specific Python warnings
warnings.filterwarnings("ignore", message="Some weights of the model checkpoint")
warnings.filterwarnings("ignore", message="Could not find image processor class")
warnings.filterwarnings("ignore", message="The `max_size` parameter is deprecated")
# Adjust logging for libraries using the logging module
logging.basicConfig(level=logging.ERROR)
hf_logging.set_verbosity_error()
########
from transformers import pipeline
from PIL import Image
import gradio as gr
import numpy as np
import io
def processed_image(image):
# The uploaded image is a PIL image
od_pipe= pipeline("object-detection", model="facebook/detr-resnet-50")
pl_out = od_pipe(image)
processed_image=render_results_in_image(image,pl_out)
text=summarize_predictions_natural_language(pl_out)
return processed_image,text
iface = gr.Interface(processed_image, # Function to process the image
inputs=gr.Image(type="pil"), # Image upload input
outputs=[gr.Image(type="pil"),"text"] # Image output
)
iface.launch()
tts_pipe = pipeline("text-to-speech", model="kakao-enterprise/vits-vctk")
narrated_text=tts_pipe(text)