Spaces:
Running
Running
import numpy as np | |
from safetensors import safe_open | |
from datasets import load_dataset | |
import torch | |
from multilingual_clip import pt_multilingual_clip | |
import transformers | |
import gradio as gr | |
import clip | |
def load_embeddings(file_path, key="vectors"): | |
with safe_open(file_path, framework="numpy") as f: | |
embeddings = f.get_tensor(key) | |
return embeddings | |
image_embeddings = load_embeddings("clothes_desc.safetensors") | |
image_embeddings = image_embeddings / np.linalg.norm( | |
image_embeddings, axis=1, keepdims=True | |
) | |
ds = load_dataset("wbensvage/clothes_desc")["train"] | |
model_name = "M-CLIP/LABSE-Vit-L-14" | |
model = pt_multilingual_clip.MultilingualCLIP.from_pretrained(model_name) | |
tokenizer = transformers.AutoTokenizer.from_pretrained(model_name) | |
def encode_text(texts, model, tokenizer): | |
with torch.no_grad(): | |
embs = model.forward(texts, tokenizer) | |
embs = embs.detach().cpu().numpy() | |
embs = embs / np.linalg.norm(embs, axis=1, keepdims=True) | |
return embs | |
def find_images(query, top_k): | |
query_embedding = encode_text([query], model, tokenizer) | |
similarity = np.dot(query_embedding, image_embeddings.T) | |
top_k_indices = np.argsort(-similarity[0])[:top_k] | |
images = [ds[int(i)]["image"] for i in top_k_indices] | |
return images | |
iface = gr.Interface( | |
fn=find_images, | |
inputs=[ | |
gr.Textbox(lines=2, placeholder="Enter search text here...", label="Query"), | |
gr.Slider(10, 50, step=10, value=20, label="Number of images"), | |
], | |
outputs=gr.Gallery(label="Search Results", columns=5, height="auto"), | |
title="Multilingual CLIP Image Search", | |
description="Enter a text query", | |
) | |
iface.launch() | |