davanstrien's picture
davanstrien HF staff
stop using gradio
1c9d91a
import asyncio
from typing import Dict, List
import gradio as gr
import httpx
API_URL = "http://localhost:8000"
async def fetch_similar_datasets(dataset_id: str, limit: int = 5) -> List[Dict]:
async with httpx.AsyncClient() as client:
response = await client.get(
f"{API_URL}/similarity/datasets",
params={"dataset_id": dataset_id, "k": limit},
)
if response.status_code == 200:
return response.json()["results"]
return []
async def fetch_similar_datasets_by_text(query: str, limit: int = 5) -> List[Dict]:
async with httpx.AsyncClient() as client:
response = await client.get(
f"{API_URL}/search/datasets", params={"query": query, "k": limit}
)
if response.status_code == 200:
return response.json()["results"]
return []
def format_results(results: List[Dict]) -> str:
markdown = ""
for result in results:
hub_id = result["dataset_id"]
similarity = result["similarity"]
summary = result.get("summary", "No summary available.")
url = f"https://huggingface.co./datasets/{hub_id}"
markdown += f"### [{hub_id}]({url})\n"
markdown += f"*Similarity: {similarity:.2f}*\n\n"
markdown += f"{summary}\n\n"
markdown += "---\n\n"
return markdown
with gr.Blocks() as demo:
gr.Markdown(
"""
# πŸ” Dataset Explorer
Find similar datasets or search by text query
""",
elem_classes=["center-text"],
)
with gr.Column(variant="panel"):
search_type = gr.Radio(
["Dataset ID", "Text Query"],
label="Search Method",
value="Dataset ID",
container=False,
)
with gr.Group():
dataset_id = gr.Textbox(
value="airtrain-ai/fineweb-edu-fortified",
label="Dataset ID",
container=False,
)
text_query = gr.Textbox(
label="Text Query",
placeholder="Enter at least 3 characters...",
container=False,
visible=False,
)
with gr.Row():
search_btn = gr.Button("πŸ” Search", size="lg")
max_results = gr.Slider(
minimum=1,
maximum=20,
step=1,
value=5,
label="Number of results",
)
results = gr.Markdown(elem_classes=["results-container"])
def toggle_input_visibility(choice):
return (
gr.update(visible=choice == "Dataset ID"),
gr.update(visible=choice == "Text Query"),
gr.update(visible=choice == "Dataset ID"),
)
search_type.change(
toggle_input_visibility,
inputs=[search_type],
outputs=[dataset_id, text_query, search_btn],
)
async def search_handler(search_type, dataset_id, text_query, limit):
if search_type == "Dataset ID":
results = await fetch_similar_datasets(dataset_id, limit)
else:
results = await fetch_similar_datasets_by_text(text_query, limit)
if not results:
return "No similar datasets found."
return format_results(results)
text_query.input(
lambda search_type, text_query, limit: asyncio.run(
search_handler(search_type, "", text_query, limit)
)
if len(text_query) >= 3
else None, # Only trigger after 3 characters
inputs=[search_type, text_query, max_results],
outputs=results,
api_name=False,
)
search_btn.click(
lambda search_type, dataset_id, text_query, limit: asyncio.run(
search_handler(search_type, dataset_id, text_query, limit)
),
inputs=[search_type, dataset_id, text_query, max_results],
outputs=results,
)
demo.launch()