import gradio as gr from PIL import Image import requests import os from together import Together import base64 from io import BytesIO import numpy as np import traceback # Initialize Together client client = None def initialize_client(api_key=None): global client if api_key: client = Together(api_key=api_key) elif "TOGETHER_API_KEY" in os.environ: client = Together() else: raise ValueError("Please provide an API key or set the TOGETHER_API_KEY environment variable") def encode_image(image, max_size=(800, 800), quality=85): if isinstance(image, np.ndarray): image = Image.fromarray(image.astype('uint8'), 'RGB') image.thumbnail(max_size) if image.mode in ('RGBA', 'LA'): background = Image.new(image.mode[:-1], image.size, (255, 255, 255)) background.paste(image, mask=image.split()[-1]) image = background buffered = BytesIO() image.save(buffered, format="JPEG", quality=quality) return base64.b64encode(buffered.getvalue()).decode('utf-8') def generate_gradio_app(api_key, image): if not api_key: return "Error: API key not provided. Please enter your Together API key." try: initialize_client(api_key) encoded_image = encode_image(image) prompt = """You are an AI assistant specialized in UI/UX design and Gradio app development. Analyze the attached screenshot or UI mockup and generate a concise Gradio code that recreates the main elements of this design. Follow these steps: 1. Briefly describe the main elements of the UI. 2. Generate a compact Gradio Python code that recreates the essential UI elements. 3. Use appropriate Gradio components for key elements in the UI. 4. Include necessary imports at the beginning of the code. 5. Implement minimal placeholder functions for interactive elements. 6. Use gr.Blocks() to create a basic layout that captures the essence of the screenshot. 7. Include the gr.Blocks().launch() call at the end of the code. 8. Provide a runnable Gradio application focusing on the most important aspects of the UI. 9. Keep the code concise, aiming for no more than 2000 tokens. Please generate the Gradio code based on the provided image, focusing on the most crucial elements to fit within the token limit.""" messages = [ { "role": "user", "content": [ {"type": "text", "text": prompt}, {"type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{encoded_image}"}} ] } ] response = client.chat.completions.create( model="meta-llama/Llama-3.2-90B-Vision-Instruct-Turbo", messages=messages, max_tokens=2000, temperature=0.7, top_p=0.7, top_k=50, repetition_penalty=1, stop=["<|eot_id|>", "<|eom_id|>"] ) if response.choices and response.choices[0].message: generated_code = response.choices[0].message.content print("Generated code length:", len(generated_code)) return generated_code else: return "Error: Unexpected response structure from the API." except Exception as e: error_message = str(e) stack_trace = traceback.format_exc() return f"An error occurred: {error_message}\n\nStack trace:\n{stack_trace}\n\nPlease check your API key and try again." with gr.Blocks() as demo: gr.Markdown("# Generate Concise Gradio App from Wireframe") gr.Markdown("Enter your Together API key, upload an image of your UI design, and we'll generate a compact Gradio code to recreate its main elements.") api_key_input = gr.Textbox(label="Enter your Together API Key", type="password") with gr.Row(): with gr.Column(scale=1): image_input = gr.Image(label="Upload a screenshot", elem_id="image_upload") generate_button = gr.Button("Generate Gradio Code", variant="primary") with gr.Column(scale=2): code_output = gr.Code(language="python", label="Generated Gradio Code", lines=30) generate_button.click( fn=generate_gradio_app, inputs=[api_key_input, image_input], outputs=[code_output] ) if __name__ == "__main__": demo.launch(debug=True)