File size: 2,650 Bytes
c042949
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
import base64
from openai import OpenAI
from typing import List, Dict, Any
from dotenv import load_dotenv
import os

load_dotenv()

# source
# https://platform.openai.com/docs/guides/vision?lang=python
def analyze_images(
    images: List[str],
    prompt: str,
    # api_key: str,
    model: str = "gpt-4-vision-preview",
    max_tokens: int = 300
    ) -> Dict[str, Any]:
    """
    Analyze multiple images using OpenAI's vision model.

    Args:
        images (List[str]): List of URLs and/or local paths to the image files.
        prompt (str): Prompt message for the AI model.
        api_key (str): Your OpenAI API key.
        model (str, optional): Name of the vision model to use. Defaults to "gpt-4-vision-preview".
        max_tokens (int, optional): Maximum number of tokens for the response. Defaults to 300.

    Returns:
        dict: JSON response from the API.
    """
    client = OpenAI()
    messages = [{
        "role": "user",
        "content": [{"type": "text", "text": prompt}]
    }]

    for image in images:
        if image.startswith("http://") or image.startswith("https://"):
            # Image is a URL
            messages.append({
                "role": "user",
                "content": [{"type": "image_url", "image_url": {"url": image}}]
            })
        else:
            # Image is a local path
            with open(image, "rb") as image_file:
                base64_image = base64.b64encode(image_file.read()).decode('utf-8')
            messages.append({
                "role": "user",
                "content": [{"type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{base64_image}"}}]
            })

    response = client.chat.completions.create(
        model=model,
        messages=messages,
        max_tokens=max_tokens
    )
    return response.choices[0]

def main():
    api_key = os.getenv("OPENAI_API_KEY")
    images = [
        "/workspaces/Maker-Tech-Tree/mesh_1.png",
        "/workspaces/Maker-Tech-Tree/mesh_2.png",
        "/workspaces/Maker-Tech-Tree/mesh_3.png",
    ]
    prompt = "I am creating an 3d model of a Glass lenses for refracting light,\
        using a text-to-3d model\
        Do these images look correct?\
        If not please make a suggesttion on how to improve the text input\
        As this response will be used in a pipeline please only output a new \
        potential prompt or output nothing, \
        Please keep the prompt to 5 25 words to not confuse the model"
    
    response = analyze_images(
        images, 
        prompt, 
        # api_key,
        )
    
    print(response)

if __name__ == "__main__":
    main()