Spaces:
Running
Running
import os | |
import gradio as gr | |
from transformers import pipeline | |
from huggingface_hub import InferenceClient | |
from PIL import Image | |
import io | |
# Get the token from environment variable | |
hf_token = os.environ.get("HF_TOKEN") | |
if not hf_token: | |
raise ValueError("HF_TOKEN environment variable is not set. Please set it with your Hugging Face API token.") | |
# Initialize both local pipeline and Inference Client | |
local_extractor = pipeline("image-to-text", model="microsoft/git-base-textcaps") | |
api_client = InferenceClient(model="microsoft/git-base-textcaps", token=hf_token) | |
# Flag to track which mode is active | |
use_api = False | |
def switch_mode(): | |
global use_api | |
use_api = not use_api | |
return "Using API" if use_api else "Using Local Model (Slow unless duplicated and run on GPU" | |
def extract_text(image, mode_indicator): | |
# Convert image to PNG if it's not already | |
if image.format != 'PNG': | |
png_buffer = io.BytesIO() | |
image.save(png_buffer, format='PNG') | |
png_image = Image.open(png_buffer) | |
else: | |
png_image = image | |
if "API" in mode_indicator: | |
# Convert PIL Image to bytes | |
buffered = io.BytesIO() | |
png_image.save(buffered, format="PNG") | |
img_bytes = buffered.getvalue() | |
result = api_client.image_to_text(image=img_bytes) | |
else: | |
result = local_extractor(png_image) | |
return result[0]['generated_text'] if isinstance(result, list) else result | |
# Create the Gradio interface | |
with gr.Blocks() as iface: | |
gr.Markdown("# Image Text Extractor") | |
with gr.Row(): | |
image_input = gr.Image(type="pil") | |
text_output = gr.Textbox() | |
mode_button = gr.Button("Switch Mode") | |
mode_indicator = gr.Textbox(value="Using Local Model", label="Current Mode") | |
mode_button.click(switch_mode, outputs=mode_indicator) | |
image_input.change(extract_text, inputs=[image_input, mode_indicator], outputs=text_output) | |
iface.launch() |