# Import the necessary libraries import gradio as gr import openai import base64 import io import requests # Function to encode the image to base64 def encode_image_to_base64(image): buffered = io.BytesIO() image.save(buffered, format="JPEG") img_str = base64.b64encode(buffered.getvalue()).decode("utf-8") return img_str # Function to send the image to the OpenAI API and get a response def ask_openai_with_image(api_key, instruction, json_prompt, low_quality_mode, image): # Set the OpenAI API key openai.api_key = api_key # Encode the uploaded image to base64 base64_image = encode_image_to_base64(image) instruction = instruction.strip() if json_prompt.strip() != "": instruction = f"{instruction}\n\nReturn in JSON format and include the following attributes:\n\n{json_prompt.strip()}" # Create the payload with the base64 encoded image payload = { "model": "gpt-4-vision-preview", "messages": [ { "role": "user", "content": [ { "type": "text", "text": instruction, }, { "type": "image_url", "image_url": { "url": f"data:image/jpeg;base64,{base64_image}", "detail": "low" if low_quality_mode else "high", }, }, ], } ], "max_tokens": 4095, } # Send the request to the OpenAI API response = requests.post( "https://api.openai.com/v1/chat/completions", headers={"Authorization": f"Bearer {openai.api_key}"}, json=payload, ) # Check if the request was successful if response.status_code == 200: response_json = response.json() print("Response JSON:", response_json) # Print the raw response JSON try: # Attempt to extract the content text return response_json["choices"][0]["message"]["content"] except Exception as e: # If there is an error in the JSON structure, print it print("Error in JSON structure:", e) print("Full JSON response:", response_json) return "Error processing the image response." else: # If an error occurred, return the error message return f"Error: {response.text}" json_schema = gr.Textbox( label="JSON Attributes", info="Define a list of attributes to force the model to respond in valid json format. Leave blank to disable json formatting.", lines=3, placeholder="""Example: - name: Name of the object - color: Color of the object """, ) instructions = gr.Textbox( label="Instructions", info="Instructions for the vision model to follow. Leave blank to use default.", lines=2, placeholder="""Default: I've uploaded an image and I'd like to know what it depicts and any interesting details you can provide.""", ) low_quality_mode = gr.Checkbox( label="Low Quality Mode", info="See here: https://platform.openai.com/docs/guides/vision/low-or-high-fidelity-image-understanding.", ) # Create a Gradio interface vision_playground = gr.Interface( fn=ask_openai_with_image, inputs=[ gr.Textbox(label="API Key"), instructions, json_schema, low_quality_mode, gr.Image(type="pil", label="Image"), ], outputs=[gr.Markdown()], title="GPT-4-Vision Playground", description="Upload an image and get a description from GPT-4 with Vision.", ) # Launch the app vision_playground.launch()