import os import gradio as gr from transformers import pipeline # Initialize the pipeline with the image-to-text model model_path = "Salesforce/blip-image-captioning-base" if not os.path.exists(model_path): raise FileNotFoundError(f"Model path {model_path} does not exist. Please provide a valid path.") # Initialize the image-to-text pipeline with the specified model pipe = pipeline("image-to-text", model=model_path) def launch(input): """ Function to generate image caption. Args: input (PIL.Image): Input image for captioning. Returns: str: Generated caption for the input image. """ out = pipe(input) return out[0]['generated_text'] # Create a Gradio interface for the image-to-text pipeline iface = gr.Interface( fn=launch, # Function to generate captions inputs=gr.Image(type='pil'), # Input type: Image (PIL format) outputs="text" # Output type: Text ) # Launch the Gradio interface iface.launch()