Spaces:
Sleeping
Sleeping
import base64 | |
import requests | |
from io import BytesIO | |
from PIL import Image | |
import os | |
from dotenv import load_dotenv | |
import json | |
# Using Gemini instead of OpenAI for this deployment only | |
from models.gemini_image_to_json import fetch_gemini_response | |
openaiprocess_image_to_json = fetch_gemini_response | |
# # Load the .env file | |
# load_dotenv() | |
# # Get the API key from the environment | |
# api_key = os.getenv('OPENAI_API_KEY') | |
# # Function to encode the image | |
# def encode_image(image): | |
# # Convert the image to RGB if it has an alpha channel | |
# if image.mode == 'RGBA': | |
# image = image.convert('RGB') | |
# buffered = BytesIO() | |
# image.save(buffered, format="JPEG") | |
# return base64.b64encode(buffered.getvalue()).decode('utf-8') | |
# def openaiprocess_image_to_json(image): | |
# print(f'fetching openai response') | |
# # Encode the image | |
# base64_image = encode_image(image) | |
# headers = { | |
# "Content-Type": "application/json", | |
# "Authorization": f"Bearer {api_key}" | |
# } | |
# PROMPT = ''' | |
# You are responsible for extracting the entities (nodes) and relationships (edges) from the images of mind maps. The mind maps are for Object Oriented Programming. | |
# Don't make up facts, just extracts them. Do not create new entity types that aren't mentioned in the image, and at the same time don't miss anything. | |
# Give the output in JSON format as follows: | |
# { | |
# "nodes": [ | |
# {"id": "1", "label": string}, | |
# {"id": "2", "label": string},... | |
# ], | |
# "edges": [ | |
# {"source": SOURCE_ID, "target": TARGET_ID, "type": "->"}, | |
# {"source": SOURCE_ID, "target": TARGET_ID, "type": "->"},... | |
# ] | |
# } | |
# Only return valid python dictionary, dont include (line jump)n in it, dont include spaces, only a dictionary. Do not include any other text outside the Dictionary structure. Make sure that i will get a valid Python dictionary. | |
# make sure that what you return as json_string i can use it in python in this function: json.loads(json_string) | |
# Now extract the entities and relationships from this image: | |
# ''' | |
# payload = { | |
# "model": "gpt-4o", | |
# "messages": [ | |
# { | |
# "role": "user", | |
# "content": [ | |
# { | |
# "type": "text", | |
# "text": PROMPT | |
# }, | |
# { | |
# "type": "image_url", | |
# "image_url": { | |
# "url": f"data:image/jpeg;base64,{base64_image}" | |
# } | |
# } | |
# ] | |
# } | |
# ] | |
# } | |
# # Send the request to the OpenAI API | |
# response = requests.post("https://api.openai.com/v1/chat/completions", headers=headers, json=payload) | |
# # Parse the response | |
# response_data = response.json() | |
# print(response_data) | |
# # Extract the JSON graph data from the response | |
# if "choices" in response_data and response_data["choices"]: | |
# content = response_data["choices"][0]["message"]["content"] | |
# try: | |
# graph_data = content | |
# except json.JSONDecodeError as e: | |
# print("Failed:", e) | |
# graph_data = None | |
# else: | |
# raise ValueError("No valid response from OpenAI API") | |
# return graph_data | |