|
import gradio as gr |
|
import pandas as pd |
|
import plotly.express as px |
|
import plotly.graph_objects as go |
|
from huggingface_hub import HfApi |
|
from datetime import datetime |
|
import numpy as np |
|
|
|
def format_number(num): |
|
"""Format large numbers with K, M suffix""" |
|
if num >= 1e6: |
|
return f"{num/1e6:.1f}M" |
|
elif num >= 1e3: |
|
return f"{num/1e3:.1f}K" |
|
return str(num) |
|
|
|
def fetch_stats(): |
|
"""Fetch all DeepSeek model statistics""" |
|
api = HfApi() |
|
|
|
|
|
original_models = [ |
|
"deepseek-ai/deepseek-r1", |
|
"deepseek-ai/deepseek-r1-zero", |
|
"deepseek-ai/deepseek-r1-distill-llama-70b", |
|
"deepseek-ai/deepseek-r1-distill-qwen-32b", |
|
"deepseek-ai/deepseek-r1-distill-qwen-14b", |
|
"deepseek-ai/deepseek-r1-distill-llama-8b", |
|
"deepseek-ai/deepseek-r1-distill-qwen-7b", |
|
"deepseek-ai/deepseek-r1-distill-qwen-1.5b" |
|
] |
|
|
|
original_stats = [] |
|
for model_id in original_models: |
|
try: |
|
info = api.model_info(model_id) |
|
original_stats.append({ |
|
'model_id': model_id, |
|
'downloads_30d': info.downloads if hasattr(info, 'downloads') else 0, |
|
'likes': info.likes if hasattr(info, 'likes') else 0 |
|
}) |
|
except Exception as e: |
|
print(f"Error fetching {model_id}: {str(e)}") |
|
|
|
model_types = ["adapter", "finetune", "merge", "quantized"] |
|
base_models = [ |
|
"DeepSeek-R1", |
|
"DeepSeek-R1-Zero", |
|
"DeepSeek-R1-Distill-Llama-70B", |
|
"DeepSeek-R1-Distill-Qwen-32B", |
|
"DeepSeek-R1-Distill-Qwen-14B", |
|
"DeepSeek-R1-Distill-Llama-8B", |
|
"DeepSeek-R1-Distill-Qwen-7B", |
|
"DeepSeek-R1-Distill-Qwen-1.5B" |
|
] |
|
|
|
derivative_stats = [] |
|
|
|
for base_model in base_models: |
|
for model_type in model_types: |
|
try: |
|
models = list(api.list_models( |
|
filter=f"base_model:{model_type}:deepseek-ai/{base_model}", |
|
full=True |
|
)) |
|
|
|
|
|
for model in models: |
|
derivative_stats.append({ |
|
'base_model': f"deepseek-ai/{base_model}", |
|
'model_type': model_type, |
|
'model_id': model.id, |
|
'downloads_30d': model.downloads if hasattr(model, 'downloads') else 0, |
|
'likes': model.likes if hasattr(model, 'likes') else 0 |
|
}) |
|
except Exception as e: |
|
print(f"Error fetching {model_type} models for {base_model}: {str(e)}") |
|
|
|
|
|
original_df = pd.DataFrame(original_stats, columns=['model_id', 'downloads_30d', 'likes']) |
|
derivative_df = pd.DataFrame(derivative_stats, columns=['base_model', 'model_type', 'model_id', 'downloads_30d', 'likes']) |
|
|
|
return original_df, derivative_df |
|
|
|
def create_stats_html(): |
|
"""Create HTML for displaying statistics""" |
|
original_df, derivative_df = fetch_stats() |
|
|
|
|
|
total_originals = len(original_df) |
|
total_derivatives = len(derivative_df) |
|
total_downloads_orig = original_df['downloads_30d'].sum() |
|
total_downloads_deriv = derivative_df['downloads_30d'].sum() |
|
|
|
|
|
if len(derivative_df) > 0: |
|
|
|
type_dist = derivative_df.groupby('model_type').agg({ |
|
'model_id': 'count', |
|
'downloads_30d': 'sum' |
|
}).reset_index() |
|
|
|
type_dist = derivative_df.groupby('model_type').agg({ |
|
'model_id': 'count', |
|
'downloads_30d': 'sum' |
|
}).reset_index() |
|
|
|
type_dist['model_type'] = type_dist['model_type'].str.capitalize() |
|
|
|
type_dist = type_dist.sort_values('downloads_30d', ascending=True) |
|
|
|
fig_types = go.Figure(data=[ |
|
go.Bar( |
|
x=list(type_dist['model_type']), |
|
y=list(type_dist['downloads_30d'].values), |
|
marker_color='rgb(55, 83, 109)' |
|
) |
|
]) |
|
|
|
fig_types.update_layout( |
|
title='Downloads by Model Type', |
|
|
|
yaxis_title='Downloads', |
|
plot_bgcolor='white', |
|
showlegend=False, |
|
bargap=0.3 |
|
) |
|
|
|
fig_types.update_traces( |
|
text=type_dist['downloads_30d'].apply(format_number), |
|
textposition='outside' |
|
) |
|
else: |
|
fig_types = px.bar(title='No data available') |
|
|
|
if len(derivative_df) > 0: |
|
top_models = derivative_df.nlargest(10, 'downloads_30d')[ |
|
['model_id', 'model_type', 'downloads_30d', 'likes'] |
|
].copy() |
|
|
|
|
|
top_models['model_type'] = top_models['model_type'].str.capitalize() |
|
|
|
|
|
top_models['downloads_30d'] = top_models['downloads_30d'].apply(format_number) |
|
|
|
|
|
top_models['model_id'] = top_models['model_id'].apply( |
|
lambda x: f'<a href="https://huggingface.co./{x}" target="_blank" onclick="window.open(\'https://huggingface.co./{x}\', \'_blank\')">{x}</a>' |
|
) |
|
else: |
|
top_models = pd.DataFrame(columns=['model_id', 'model_type', 'downloads_30d', 'likes']) |
|
|
|
summary_html = f""" |
|
<div style='padding: 20px; background-color: #f5f5f5; border-radius: 10px; margin-bottom: 20px;'> |
|
<h3 style='color: #333333;'>Summary Statistics</h3> |
|
<p style='color: #333333;'>Derivative Models Downloads: {format_number(total_downloads_deriv)} ({total_derivatives} models)</p> |
|
<p style='color: #333333;'>Original Models Downloads: {format_number(total_downloads_orig)} ({total_originals} models)</p> |
|
<p style='color: #333333;'>Last Updated: {datetime.now().strftime('%Y-%m-%d %H:%M UTC')}</p> |
|
</div> |
|
""" |
|
|
|
return summary_html, fig_types, top_models |
|
|
|
def create_interface(): |
|
"""Create Gradio interface""" |
|
with gr.Blocks(theme=gr.themes.Soft()) as interface: |
|
gr.HTML("<h1 style='text-align: center;'>DeepSeek Models Stats</h1>") |
|
|
|
with gr.Row(): |
|
with gr.Column(): |
|
summary_html = gr.HTML() |
|
with gr.Column(): |
|
plot = gr.Plot() |
|
|
|
with gr.Row(): |
|
table = gr.DataFrame( |
|
headers=["Model ID", "Type", "Downloads (30d)", "Likes"], |
|
label="Top 10 Most Downloaded Models", |
|
wrap=True, |
|
datatype=["html", "str", "str", "number"] |
|
) |
|
|
|
def update_stats(): |
|
summary, fig, top_models = create_stats_html() |
|
return summary, fig, top_models |
|
|
|
interface.load(update_stats, |
|
outputs=[summary_html, plot, table]) |
|
|
|
return interface |
|
|
|
|
|
demo = create_interface() |
|
demo.launch() |