import os import gradio as gr from together import Together import base64 # Initialize the Together client client = Together(api_key=os.environ.get('TOGETHER_API_KEY')) def process_image(image): # Convert the image to base64 buffered = BytesIO() image.save(buffered, format="PNG") img_str = base64.b64encode(buffered.getvalue()).decode() # Prepare the messages for the API call messages = [ {"role": "system", "content": "You are an AI assistant that can analyze images and generate code based on their content."}, {"role": "user", "content": [ {"type": "image_url", "image_url": f"data:image/png;base64,{img_str}"}, {"type": "text", "text": "Analyze this image and generate Python code that could recreate or represent the main elements seen in the image."} ]} ] # Make the API call response = client.chat.completions.create( model="meta-llama/Llama-Vision-Free", messages=messages, max_tokens=512, temperature=0.7, top_p=0.7, top_k=50, repetition_penalty=1, stop=["<|eot_id|>", "<|eom_id|>"] ) # Extract the generated code from the response generated_code = response.choices[0].message.content # Generate HTML to display the code with syntax highlighting html_output = f"""
{generated_code}
""" return html_output # Create the Gradio interface iface = gr.Interface( fn=process_image, inputs=gr.Image(type="pil"), outputs=gr.HTML(), title="Llama Vision Free Code Generation", description="Upload an image, and this demo will use the Llama Vision Free model to analyze it and generate relevant Python code." ) # Launch the interface iface.launch()