Spaces:

markllego
/

openai-gpt4-vision

Running

App Files Files Community

openai-gpt4-vision / app.py

markllego

IMPORTANT: Ask the user to provide UI & other improvements (#1)

6d09e4d 11 months ago

raw

history blame contribute delete

No virus

3.71 kB

	# Import the necessary libraries
	import gradio as gr
	import openai
	import base64
	import io
	import requests


	# Function to encode the image to base64
	def encode_image_to_base64(image):
	buffered = io.BytesIO()
	image.save(buffered, format="JPEG")
	img_str = base64.b64encode(buffered.getvalue()).decode("utf-8")
	return img_str


	# Function to send the image to the OpenAI API and get a response
	def ask_openai_with_image(api_key, instruction, json_prompt, low_quality_mode, image):
	# Set the OpenAI API key
	openai.api_key = api_key

	# Encode the uploaded image to base64
	base64_image = encode_image_to_base64(image)

	instruction = instruction.strip()

	if json_prompt.strip() != "":
	instruction = f"{instruction}\n\nReturn in JSON format and include the following attributes:\n\n{json_prompt.strip()}"

	# Create the payload with the base64 encoded image
	payload = {
	"model": "gpt-4-vision-preview",
	"messages": [
	{
	"role": "user",
	"content": [
	{
	"type": "text",
	"text": instruction,
	},
	{
	"type": "image_url",
	"image_url": {
	"url": f"data:image/jpeg;base64,{base64_image}",
	"detail": "low" if low_quality_mode else "high",
	},
	},
	],
	}
	],
	"max_tokens": 4095,
	}

	# Send the request to the OpenAI API
	response = requests.post(
	"https://api.openai.com/v1/chat/completions",
	headers={"Authorization": f"Bearer {openai.api_key}"},
	json=payload,
	)

	# Check if the request was successful
	if response.status_code == 200:
	response_json = response.json()
	print("Response JSON:", response_json) # Print the raw response JSON
	try:
	# Attempt to extract the content text
	return response_json["choices"][0]["message"]["content"]
	except Exception as e:
	# If there is an error in the JSON structure, print it
	print("Error in JSON structure:", e)
	print("Full JSON response:", response_json)
	return "Error processing the image response."
	else:
	# If an error occurred, return the error message
	return f"Error: {response.text}"


	json_schema = gr.Textbox(
	label="JSON Attributes",
	info="Define a list of attributes to force the model to respond in valid json format. Leave blank to disable json formatting.",
	lines=3,
	placeholder="""Example:
	- name: Name of the object
	- color: Color of the object
	""",
	)

	instructions = gr.Textbox(
	label="Instructions",
	info="Instructions for the vision model to follow. Leave blank to use default.",
	lines=2,
	placeholder="""Default:
	I've uploaded an image and I'd like to know what it depicts and any interesting details you can provide.""",
	)

	low_quality_mode = gr.Checkbox(
	label="Low Quality Mode",
	info="See here: https://platform.openai.com/docs/guides/vision/low-or-high-fidelity-image-understanding.",
	)

	# Create a Gradio interface
	vision_playground = gr.Interface(
	fn=ask_openai_with_image,
	inputs=[
	gr.Textbox(label="API Key"),
	instructions,
	json_schema,
	low_quality_mode,
	gr.Image(type="pil", label="Image"),
	],
	outputs=[gr.Markdown()],
	title="GPT-4-Vision Playground",
	description="Upload an image and get a description from GPT-4 with Vision.",
	)

	# Launch the app
	vision_playground.launch()