Spaces:

EaindraKyaw
/

Object_Detection

Running

App Files Files Community

Object_Detection / app.py

EaindraKyaw

Update app.py

3501ea0 verified 3 days ago

raw

history blame contribute delete

3.41 kB


	import io
	import matplotlib.pyplot as plt
	import requests
	import inflect
	from PIL import Image

	def load_image_from_url(url):
	return Image.open(requests.get(url, stream=True).raw)

	def render_results_in_image(in_pil_img, in_results):
	plt.figure(figsize=(16, 10))
	plt.imshow(in_pil_img)

	ax = plt.gca()

	for prediction in in_results:

	x, y = prediction['box']['xmin'], prediction['box']['ymin']
	w = prediction['box']['xmax'] - prediction['box']['xmin']
	h = prediction['box']['ymax'] - prediction['box']['ymin']

	ax.add_patch(plt.Rectangle((x, y),
	w,
	h,
	fill=False,
	color="green",
	linewidth=2))
	ax.text(
	x,
	y,
	f"{prediction['label']}: {round(prediction['score']*100, 1)}%",
	color='red'
	)

	plt.axis("off")

	# Save the modified image to a BytesIO object
	img_buf = io.BytesIO()
	plt.savefig(img_buf, format='png',
	bbox_inches='tight',
	pad_inches=0)
	img_buf.seek(0)
	modified_image = Image.open(img_buf)

	# Close the plot to prevent it from being displayed
	plt.close()

	return modified_image

	def summarize_predictions_natural_language(predictions):
	summary = {}
	p = inflect.engine()

	for prediction in predictions:
	label = prediction['label']
	if label in summary:
	summary[label] += 1
	else:
	summary[label] = 1

	result_string = "In this image, there are "
	for i, (label, count) in enumerate(summary.items()):
	count_string = p.number_to_words(count)
	result_string += f"{count_string} {label}"
	if count > 1:
	result_string += "s"

	result_string += " "

	if i == len(summary) - 2:
	result_string += "and "

	# Remove the trailing comma and space
	result_string = result_string.rstrip(', ') + "."

	return result_string


	##### To ignore warnings #####
	import warnings
	import logging
	from transformers import logging as hf_logging

	def ignore_warnings():
	# Ignore specific Python warnings
	warnings.filterwarnings("ignore", message="Some weights of the model checkpoint")
	warnings.filterwarnings("ignore", message="Could not find image processor class")
	warnings.filterwarnings("ignore", message="The `max_size` parameter is deprecated")

	# Adjust logging for libraries using the logging module
	logging.basicConfig(level=logging.ERROR)
	hf_logging.set_verbosity_error()

	########

	from transformers import pipeline
	from PIL import Image
	import gradio as gr
	import numpy as np
	import io

	def processed_image(image):
	# The uploaded image is a PIL image
	od_pipe= pipeline("object-detection", model="facebook/detr-resnet-50")
	pl_out = od_pipe(image)
	processed_image=render_results_in_image(image,pl_out)
	text=summarize_predictions_natural_language(pl_out)
	return processed_image,text

	iface = gr.Interface(processed_image, # Function to process the image
	inputs=gr.Image(type="pil"), # Image upload input
	outputs=[gr.Image(type="pil"),"text"] # Image output
	)

	iface.launch()

	tts_pipe = pipeline("text-to-speech", model="kakao-enterprise/vits-vctk")
	narrated_text=tts_pipe(text)