Spaces:

jkorstad
/

spaces-explorer

Sleeping

App Files Files Community

spaces-explorer / app.py

jkorstad

Update app.py

fb53d5e verified about 2 months ago

raw

history blame

7.44 kB

	import gradio as gr
	import pandas as pd
	from huggingface_hub import HfApi
	from collections import defaultdict

	# ------------------------------------------------------
	# Get spaces with more details
	api = HfApi()
	spaces = api.list_spaces(limit=40000) # Limiting to 40000 for now

	# Create a DataFrame
	data = []
	for space in spaces:
	data.append({
	'id': space.id,
	'title': space.id.split('/')[-1],
	'author': space.author if space.author else space.id.split('/')[0],
	'likes': space.likes,
	'tags': space.tags if hasattr(space, 'tags') else [],
	})

	df = pd.DataFrame(data)
	print("Total spaces collected:", len(df))
	print("\nSample of the data:")
	print(df.head())

	# ------------------------------------------------------

	# Define categories and their keywords
	categories = {
	'Text-to-Speech': ['tts', 'speech', 'voice', 'audio', 'kokoro'],
	'Transcription': ['transcribe', 'transcription'],
	'Agents': ['agent', 'agents', 'smol', 'multi-step', 'autobot', 'autoGPT' 'agentic'],
	'Image Generation': ['stable-diffusion', 'diffusion', 'gan', 'image', 'img2img', 'style', 'art'],
	'Video': ['video', 'animation', 'motion', 'sora'],
	'Face/Portrait': ['face', 'portrait', 'gaze', 'facial'],
	'Chat/LLM': ['chat', 'llm', 'gpt', 'llama', 'text', 'language'],
	'3D': ['3d', 'mesh', 'point-cloud', 'depth'],
	'Audio': ['audio', 'music', 'sound', 'voice'],
	'Vision': ['vision', 'detection', 'recognition', 'classifier'],
	'CLIP': ['image-to-text', 'describe-image'],
	'Games': ['game', 'games', 'play', 'playground'],
	'Finance': ['finance', 'stock', 'money', 'currency', 'bank', 'market'],
	'SAM': ['sam', 'segmentation', 'mask'],
	'Science': ['science', 'physics', 'chemistry', 'biology', 'math', 'astronomy', 'geology', 'meteorology', 'engineering', 'medicine', 'health', 'nutrition', 'environment', 'ecology', 'geography', 'geology', 'geophysics'],
	'Education': ['education', 'school', 'university', 'college', 'teaching', 'learning', 'study', 'research'],
	'Graph': ['graph', 'network', 'node', 'edge', 'path', 'tree', 'cycle', 'flow', 'matching', 'coloring', 'swarm'],
	'Research': ['research', 'study', 'experiment', 'paper', 'discovery', 'innovation', 'exploration', 'analysis'],
	'Document Analyis': ['pdf', 'RAG', 'idefecs'],
	'WebGPU': ['localModel', 'webGPU'],
	'Point Tracking': ['CoTracker', 'tapir', 'tapnet', 'point', 'track'],
	'Games': ['game', 'Unity', 'UE5', 'Unreal'],
	'Leaderboard': ['arena', 'leaderboard', 'timeline'],
	'Other': [] # Default category
	}

	def categorize_space(title, tags):
	title_lower = title.lower()
	# Convert tags to lowercase if tags exist
	tags_lower = [t.lower() for t in tags] if tags else []

	for category, keywords in categories.items():
	# Check both title and tags for keywords
	if any(keyword in title_lower for keyword in keywords) or \
	any(keyword in tag for keyword in keywords for tag in tags_lower):
	return category
	return 'Other'

	# Add category to DataFrame
	df['category'] = df.apply(lambda x: categorize_space(x['title'], x['tags']), axis=1)

	# Show category distribution
	category_counts = df['category'].value_counts()
	print("\nCategory Distribution:")
	print(category_counts)

	# Show sample spaces from each category
	print("\nSample spaces from each category:")
	for category in categories.keys():
	print(f"\n{category}:")
	sample = df[df['category'] == category].head(3)
	print(sample[['title', 'likes']].to_string())

	# ------------------------------------------------------
	# Add total likes per category
	category_likes = df.groupby('category')['likes'].sum().sort_values(ascending=False)
	print("Total likes per category:")
	print(category_likes)

	print("\nTop 10 spaces in each category (sorted by likes):")
	for category in categories.keys():
	print(f"\n=== {category} ===")
	top_10 = df[df['category'] == category].nlargest(10, 'likes')[['title', 'likes']]
	# Format output with padding for better readability
	print(top_10.to_string(index=False))

	# ------------------------------------------------------

	# Add space URLs
	df['url'] = 'https://huggingface.co./spaces/' + df['id']

	# Let's show the top 5 spaces from each category with their links
	print("Top 5 spaces in each category with links:")
	for category in categories.keys():
	print(f"\n=== {category} ===")
	top_5 = df[df['category'] == category].nlargest(5, 'likes')[['title', 'likes', 'url']]
	# Format output with padding for better readability
	print(top_5.to_string(index=False))

	# ------------------------------------------------------

	def search_spaces(search_text, category):
	if category == "All Categories":
	spaces_df = df
	else:
	spaces_df = df[df['category'] == category]

	if search_text:
	spaces_df = spaces_df[spaces_df['title'].str.lower().str.contains(search_text.lower())]

	spaces = spaces_df.nlargest(20, 'likes')[['title', 'likes', 'url', 'category']]

	# Get category stats
	total_spaces = len(spaces_df)
	total_likes = spaces_df['likes'].sum()

	# Format the results as HTML with clickable links and stats
	html_content = f"""
	<div style='margin-bottom: 20px; padding: 10px; background-color: #f5f5f5; border-radius: 5px;'>
	<h3>Statistics:</h3>
	<p>Total Spaces: {total_spaces}</p>
	<p>Total Likes: {total_likes:,}</p>
	</div>
	<div style='max-height: 500px; overflow-y: auto;'>
	"""

	for _, row in spaces.iterrows():
	html_content += f"""
	<div style='margin: 10px; padding: 15px; border: 1px solid #ddd; border-radius: 5px; background-color: white;'>
	<h3><a href='{row['url']}' target='_blank' style='color: #2196F3; text-decoration: none;'>{row['title']}</a></h3>
	<p>Category: {row['category']}</p>
	<p>❤️ {row['likes']:,} likes</p>
	</div>
	"""
	html_content += "</div>"
	return html_content

	# Create the Gradio interface
	def create_app():
	with gr.Blocks(title="Hugging Face Spaces Explorer", theme=gr.themes.Soft()) as app:
	gr.Markdown("""
	# 🤗 Hugging Face Spaces Explorer
	Explore and discover popular Hugging Face Spaces by category
	""")

	with gr.Row():
	with gr.Column(scale=1):
	# Category selection
	category_dropdown = gr.Dropdown(
	choices=["All Categories"] + sorted(df['category'].unique()),
	label="Select Category",
	value="All Categories"
	)
	# Search box
	search_input = gr.Textbox(
	label="Search Spaces",
	placeholder="Enter search terms..."
	)

	# Display area for spaces
	spaces_display = gr.HTML(value=search_spaces("", "All Categories"))

	# Update display when category or search changes
	category_dropdown.change(
	fn=search_spaces,
	inputs=[search_input, category_dropdown],
	outputs=spaces_display
	)
	search_input.change(
	fn=search_spaces,
	inputs=[search_input, category_dropdown],
	outputs=spaces_display
	)

	return app

	# Launch the app
	app = create_app()
	app.launch(share=True)