Spaces:
Running
Running
import gradio as gr | |
import pandas as pd | |
import plotly.express as px | |
from datetime import datetime, timedelta | |
import requests | |
from io import BytesIO | |
def create_trend_chart(space_id, daily_ranks_df): | |
"""๋ผ์ธ ์ฐจํธ ์์ฑ ํจ์""" | |
if space_id is None or daily_ranks_df.empty: | |
return None | |
try: | |
space_data = daily_ranks_df[daily_ranks_df['id'] == space_id].copy() | |
if space_data.empty: | |
return None | |
space_data = space_data.sort_values('date') | |
fig = px.line( | |
space_data, | |
x='date', | |
y='rank', | |
title=f'Daily Rank Trend for {space_id}', | |
labels={'date': 'Date', 'rank': 'Rank'}, | |
markers=True, | |
height=500 | |
) | |
fig.update_layout( | |
xaxis_title="Date", | |
yaxis_title="Rank", | |
yaxis=dict( | |
range=[100, 1], | |
tickmode='linear', | |
tick0=1, | |
dtick=10 | |
), | |
hovermode='x unified', | |
plot_bgcolor='white', | |
paper_bgcolor='white', | |
showlegend=False, | |
margin=dict(t=50, r=20, b=40, l=40) | |
) | |
fig.update_xaxes(showgrid=True, gridwidth=1, gridcolor='lightgray') | |
fig.update_yaxes(showgrid=True, gridwidth=1, gridcolor='lightgray') | |
fig.update_traces( | |
line_color='#2563eb', | |
line_width=2, | |
marker=dict(size=8, color='#2563eb') | |
) | |
return fig | |
except Exception as e: | |
print(f"Error creating chart: {e}") | |
return None | |
def get_duplicate_spaces(top_100_spaces): | |
""" | |
๋์ผ username(๋๋ organization) ์๋ ์ฌ๋ฌ ์คํ์ด์ค๊ฐ ์ฌ๋ผ์จ ๊ฒฝ์ฐ | |
๊ฐ๊ฐ์ trendingScore๋ฅผ ํฉ์ฐํ ๋ค ์์ 20๋ช ์ ๋ฝ๋ ํจ์ | |
""" | |
# username/spacename -> username๋ง ์ถ์ถ | |
top_100_spaces['clean_id'] = top_100_spaces['id'].apply(lambda x: x.split('/')[0]) | |
# clean_id๋ณ ํธ๋ ๋ฉ ์ค์ฝ์ด ํฉ์ฐ | |
score_sums = top_100_spaces.groupby('clean_id')['trendingScore'].sum() | |
# ๋๋ฒ๊น ์ฉ ์ถ๋ ฅ | |
print("\n=== ID๋ณ ์ค์ฝ์ด ํฉ์ฐ ๊ฒฐ๊ณผ (์์ 20) ===") | |
for cid, score in score_sums.sort_values(ascending=False).head(20).items(): | |
print(f"Clean ID: {cid}, Total Score: {score}") | |
# ์์ 20๊ฐ ์ถ์ถ | |
top_20_scores = score_sums.sort_values(ascending=False).head(20) | |
return top_20_scores | |
def create_duplicates_chart(score_sums): | |
"""์ค๋ณต(๋๋ ์ฌ๋ฌ Spaces ์ด์) ์ฌ์ฉ์์ ๋ํ ํฉ์ฐ ์ฐจํธ ์์ฑ""" | |
if score_sums.empty: | |
return None | |
# ์๊ฐํ๋ฅผ ์ํ DataFrame ์์ฑ | |
df = pd.DataFrame({ | |
'id': score_sums.index, | |
'total_score': score_sums.values, | |
'rank': range(1, len(score_sums) + 1) | |
}) | |
# ๋๋ฒ๊น ์ฉ ์ถ๋ ฅ | |
print("\n=== ์ฐจํธ ๋ฐ์ดํฐ (clean_id ๋จ์) ===") | |
print(df) | |
fig = px.bar( | |
df, | |
x='id', | |
y='rank', | |
title="Top 20 Spaces by Combined Trending Score", | |
height=500, | |
text='total_score' | |
) | |
fig.update_layout( | |
showlegend=False, | |
margin=dict(t=50, r=20, b=40, l=40), | |
plot_bgcolor='white', | |
paper_bgcolor='white', | |
xaxis_tickangle=-45, | |
yaxis=dict( | |
range=[len(df) + 0.5, 0.5], | |
tickmode='linear', | |
tick0=1, | |
dtick=1 | |
) | |
) | |
fig.update_traces( | |
marker_color='#4CAF50', | |
texttemplate='%{text:.1f}', | |
textposition='outside', | |
hovertemplate='ID: %{x}<br>Rank: %{y}<br>Total Score: %{text:.1f}<extra></extra>' | |
) | |
fig.update_xaxes( | |
title_text="User ID", | |
showgrid=True, | |
gridwidth=1, | |
gridcolor='lightgray' | |
) | |
fig.update_yaxes( | |
title_text="Rank", | |
showgrid=True, | |
gridwidth=1, | |
gridcolor='lightgray' | |
) | |
return fig | |
def update_display(selection): | |
"""์ฌ์ฉ์๊ฐ Space๋ฅผ ์ ํํ์ ๋, ์์ธ ์ ๋ณด์ ํธ๋ ๋ ์ฐจํธ๋ฅผ ์ ๋ฐ์ดํธ""" | |
global daily_ranks_df | |
if not selection: | |
return None, gr.HTML(value="<div style='text-align: center; padding: 20px; color: #666;'>Select a space to view details</div>") | |
try: | |
space_id = selection | |
latest_data = daily_ranks_df[daily_ranks_df['id'] == space_id].sort_values('date').iloc[-1] | |
info_text = f""" | |
<div style="padding: 16px; background-color: white; border-radius: 8px; box-shadow: 0 1px 3px rgba(0,0,0,0.1);"> | |
<h3 style="margin: 0 0 12px 0;">Space Details</h3> | |
<p style="margin: 4px 0;"><strong>ID:</strong> {space_id}</p> | |
<p style="margin: 4px 0;"><strong>Current Rank:</strong> {int(latest_data['rank'])}</p> | |
<p style="margin: 4px 0;"><strong>Trending Score:</strong> {latest_data['trendingScore']:.2f}</p> | |
<p style="margin: 4px 0;"><strong>Created At:</strong> {latest_data['createdAt'].strftime('%Y-%m-%d')}</p> | |
<p style="margin: 12px 0 0 0;"> | |
<a href="https://huggingface.co./spaces/{space_id}" | |
target="_blank" | |
style="color: #2563eb; text-decoration: none;"> | |
View Space โ | |
</a> | |
</p> | |
</div> | |
""" | |
chart = create_trend_chart(space_id, daily_ranks_df) | |
return chart, gr.HTML(value=info_text) | |
except Exception as e: | |
print(f"Error in update_display: {e}") | |
return None, gr.HTML(value=f"<div style='color: red;'>Error processing data: {str(e)}</div>") | |
def load_and_process_data(): | |
""" | |
- Parquet ํ์ผ ๋ก๋ ํ 30์ผ ์ด๋ด ๋ฐ์ดํฐ๋ง ํํฐ๋ง | |
- (์ ํ) createdAt + id ๊ธฐ์ค ์ค๋ณต ์ ๊ฑฐ | |
- ๋ ์ง๋ณ ranking ์ฐ์ -> daily_ranks_df ๊ตฌ์ฑ | |
- ์ต์ ๋ ์ง ๊ธฐ์ค top 100 ์ถ์ถ ํ id ์ค๋ณต ์ ๊ฑฐ | |
""" | |
try: | |
url = "https://huggingface.co./datasets/cfahlgren1/hub-stats/resolve/main/spaces.parquet" | |
response = requests.get(url) | |
df = pd.read_parquet(BytesIO(response.content)) | |
# 30์ผ ์ ์์ ๊ณ์ฐ | |
thirty_days_ago = datetime.now() - timedelta(days=30) | |
df['createdAt'] = pd.to_datetime(df['createdAt']) | |
# 30์ผ ๋ด์ ์์ฑ๋ ๊ธฐ๋ก๋ง ํํฐ๋ง | |
df = df[df['createdAt'] >= thirty_days_ago].copy() | |
# (์ ํ) createdAt & id ๊ธฐ์ค ์ค๋ณต ์ ๊ฑฐ | |
df = ( | |
df | |
.sort_values(['createdAt', 'trendingScore'], ascending=[True, False]) | |
.drop_duplicates(subset=['createdAt', 'id'], keep='first') | |
.reset_index(drop=True) | |
) | |
# ๋ ์ง ๋ฒ์ ์์ฑ | |
dates = pd.date_range(start=thirty_days_ago, end=datetime.now(), freq='D') | |
daily_ranks = [] | |
# ๋ ์ง๋ณ๋ก rank ๊ณ์ฐ | |
for date in dates: | |
date_data = df[df['createdAt'].dt.date <= date.date()].copy() | |
date_data = date_data.sort_values(['trendingScore', 'id'], ascending=[False, True]) | |
date_data['rank'] = range(1, len(date_data) + 1) | |
date_data['date'] = date.date() | |
daily_ranks.append(date_data[['id', 'date', 'rank', 'trendingScore', 'createdAt']]) | |
# ์ผ์๋ณ ๋ญํน ๋ฐ์ดํฐ๋ฅผ ํฉ์นจ | |
daily_ranks_df = pd.concat(daily_ranks, ignore_index=True) | |
# ์ต์ ๋ ์ง ๊ธฐ์ค Top 100 ์ถ์ถ | |
latest_date = daily_ranks_df['date'].max() | |
top_100_spaces = daily_ranks_df[ | |
(daily_ranks_df['date'] == latest_date) & | |
(daily_ranks_df['rank'] <= 100) | |
].sort_values('rank').copy() | |
# id ๊ธฐ์ค ์ค๋ณต ์ ๊ฑฐ | |
top_100_spaces = top_100_spaces.drop_duplicates(subset=['id'], keep='first').reset_index(drop=True) | |
return daily_ranks_df, top_100_spaces | |
except Exception as e: | |
print(f"Error loading data: {e}") | |
return pd.DataFrame(), pd.DataFrame() | |
# ๋ฉ์ธ ์คํ | |
print("Loading initial data...") | |
daily_ranks_df, top_100_spaces = load_and_process_data() | |
print("Data loaded successfully!") | |
# ์ค๋ณต(์ฌ๋ฌ Space) ์ด์ ์ฌ์ฉ์์ ๋ํ ๋ฐ์ดํฐ ๊ณ์ฐ | |
duplicates = get_duplicate_spaces(top_100_spaces) | |
duplicates_chart = create_duplicates_chart(duplicates) | |
# Gradio ์ฑ ์์ฑ | |
with gr.Blocks(theme=gr.themes.Soft()) as demo: | |
gr.Markdown(""" | |
# HF Space Ranking Tracker (~30 Days) | |
Track, analyze, and discover trending AI applications in the Hugging Face ecosystem. | |
Our service continuously monitors and ranks all Spaces over a 30-day period, | |
providing detailed analytics and daily ranking changes for the top 100 performers. | |
""") | |
with gr.Tabs(): | |
with gr.Tab("Dashboard"): | |
with gr.Row(variant="panel"): | |
with gr.Column(scale=5): | |
trend_plot = gr.Plot( | |
label="Daily Rank Trend", | |
container=True | |
) | |
with gr.Column(scale=5): | |
duplicates_plot = gr.Plot( | |
label="Multiple Entries Analysis", | |
value=duplicates_chart, | |
container=True | |
) | |
with gr.Row(): | |
info_box = gr.HTML( | |
value="<div style='text-align: center; padding: 20px; color: #666;'>Select a space to view details</div>" | |
) | |
# Radio ๋ฒํผ์ ์จ๊น. ์๋ ์นด๋ ํด๋ฆญ์ผ๋ก ์ ํํ๋๋ก ์ ๋ | |
space_selection = gr.Radio( | |
choices=[row['id'] for _, row in top_100_spaces.iterrows()], | |
value=None, | |
visible=False | |
) | |
# Top 100 ์คํ์ด์ค๋ฅผ ์นด๋ ํํ๋ก ํ์ | |
html_content = """ | |
<div style='display: flex; flex-wrap: wrap; gap: 16px; justify-content: center;'> | |
""" + "".join([ | |
f""" | |
<div class="space-card" | |
data-space-id="{row['id']}" | |
style=" | |
border: 1px solid #e5e7eb; | |
border-radius: 8px; | |
padding: 16px; | |
margin: 8px; | |
background-color: hsl(210, {max(30, 90 - (row['rank'] / 100 * 60))}%, {min(97, 85 + (row['rank'] / 100 * 10))}%); | |
box-shadow: 0 1px 3px rgba(0,0,0,0.1); | |
display: inline-block; | |
width: 250px; | |
vertical-align: top; | |
cursor: pointer; | |
transition: all 0.2s; | |
" | |
onmouseover="this.style.transform='translateY(-2px)';this.style.boxShadow='0 4px 6px rgba(0,0,0,0.1)';" | |
onmouseout="this.style.transform='none';this.style.boxShadow='0 1px 3px rgba(0,0,0,0.1)';" | |
> | |
<div style="font-size: 1.2em; font-weight: bold; margin-bottom: 8px;"> | |
#{int(row['rank'])} | |
</div> | |
<div style="margin-bottom: 8px;"> | |
{row['id']} | |
</div> | |
<div style="color: #666; margin-bottom: 12px;"> | |
Score: {row['trendingScore']:.2f} | |
</div> | |
<div style="display: flex; gap: 8px;"> | |
<a href="https://huggingface.co./spaces/{row['id']}" | |
target="_blank" | |
style="padding: 6px 12px; background-color: white; color: #2563eb; text-decoration: none; border-radius: 4px; font-size: 0.9em; border: 1px solid #2563eb;" | |
onclick="event.stopPropagation();"> | |
View Space โ | |
</a> | |
<button onclick="event.preventDefault(); gradioEvent('{row['id']}');" | |
style="padding: 6px 12px; background-color: #2563eb; color: white; border: none; border-radius: 4px; cursor: pointer; font-size: 0.9em;"> | |
View Trend | |
</button> | |
</div> | |
</div> | |
""" | |
for _, row in top_100_spaces.iterrows() | |
]) + """ | |
</div> | |
<script> | |
function gradioEvent(spaceId) { | |
// Space ์นด๋ ๋ด "View Trend" ๋ฒํผ ํด๋ฆญ ์, ๋์๋๋ Radio ํญ๋ชฉ ์ ํ ์ด๋ฒคํธ ๋ฐ์ | |
const radio = document.querySelector(`input[type="radio"][value="${spaceId}"]`); | |
if (radio) { | |
radio.checked = true; | |
const event = new Event('change'); | |
radio.dispatchEvent(event); | |
} | |
} | |
</script> | |
""" | |
with gr.Row(): | |
space_grid = gr.HTML(value=html_content) | |
with gr.Tab("About"): | |
gr.Markdown(""" | |
### Our Tracking System | |
**What We Track** | |
- Daily ranking changes for all Hugging Face Spaces | |
- Comprehensive trending scores based on 30-day activity | |
- Detailed performance metrics for top 100 Spaces | |
- Historical ranking data with daily granularity | |
**Key Features** | |
- **Real-time Rankings**: Stay updated with daily rank changes | |
- **Interactive Visualizations**: Track ranking trajectories over time | |
- **Trend Analysis**: Identify emerging popular AI applications | |
- **Direct Access**: Quick links to explore trending Spaces | |
- **Performance Metrics**: Detailed trending scores and statistics | |
### Why Use HF Space Ranking Tracker? | |
- Discover trending AI demos and applications | |
- Monitor your Space's performance and popularity | |
- Identify emerging trends in the AI community | |
- Make data-driven decisions about your AI projects | |
- Stay ahead of the curve in AI application development | |
Our dashboard provides a comprehensive view of the Hugging Face Spaces ecosystem, | |
helping developers, researchers, and enthusiasts track and understand the dynamics of popular AI applications. | |
Whether you're monitoring your own Space's performance or discovering new trending applications, | |
HF Space Ranking Tracker offers the insights you need. | |
""") | |
# ์ฌ์ฉ์ ์ ํ์(=Radio.value ๋ณ๊ฒฝ ์) update_display ํธ์ถ | |
space_selection.change( | |
fn=update_display, | |
inputs=[space_selection], | |
outputs=[trend_plot, info_box], | |
api_name="update_display" | |
) | |
if __name__ == "__main__": | |
demo.launch(share=True) | |