import gradio as gr import pandas as pd import plotly.express as px import plotly.graph_objects as go from huggingface_hub import HfApi from datetime import datetime import numpy as np def format_number(num): """Format large numbers with K, M suffix""" if num >= 1e6: return f"{num/1e6:.1f}M" elif num >= 1e3: return f"{num/1e3:.1f}K" return str(num) def fetch_stats(): """Fetch all DeepSeek model statistics""" api = HfApi() # Fetch original models original_models = [ "deepseek-ai/deepseek-r1", "deepseek-ai/deepseek-r1-zero", "deepseek-ai/deepseek-r1-distill-llama-70b", "deepseek-ai/deepseek-r1-distill-qwen-32b", "deepseek-ai/deepseek-r1-distill-qwen-14b", "deepseek-ai/deepseek-r1-distill-llama-8b", "deepseek-ai/deepseek-r1-distill-qwen-7b", "deepseek-ai/deepseek-r1-distill-qwen-1.5b" ] original_stats = [] for model_id in original_models: try: info = api.model_info(model_id) original_stats.append({ 'model_id': model_id, 'downloads_30d': info.downloads if hasattr(info, 'downloads') else 0, 'likes': info.likes if hasattr(info, 'likes') else 0 }) except Exception as e: print(f"Error fetching {model_id}: {str(e)}") model_types = ["adapter", "finetune", "merge", "quantized"] base_models = [ "DeepSeek-R1", "DeepSeek-R1-Zero", "DeepSeek-R1-Distill-Llama-70B", "DeepSeek-R1-Distill-Qwen-32B", "DeepSeek-R1-Distill-Qwen-14B", "DeepSeek-R1-Distill-Llama-8B", "DeepSeek-R1-Distill-Qwen-7B", "DeepSeek-R1-Distill-Qwen-1.5B" ] derivative_stats = [] for base_model in base_models: for model_type in model_types: try: models = list(api.list_models( filter=f"base_model:{model_type}:deepseek-ai/{base_model}", full=True )) # Add each model to our stats for model in models: derivative_stats.append({ 'base_model': f"deepseek-ai/{base_model}", 'model_type': model_type, 'model_id': model.id, 'downloads_30d': model.downloads if hasattr(model, 'downloads') else 0, 'likes': model.likes if hasattr(model, 'likes') else 0 }) except Exception as e: print(f"Error fetching {model_type} models for {base_model}: {str(e)}") # Create DataFrames original_df = pd.DataFrame(original_stats, columns=['model_id', 'downloads_30d', 'likes']) derivative_df = pd.DataFrame(derivative_stats, columns=['base_model', 'model_type', 'model_id', 'downloads_30d', 'likes']) return original_df, derivative_df def create_stats_html(): """Create HTML for displaying statistics""" original_df, derivative_df = fetch_stats() # Create summary statistics total_originals = len(original_df) total_derivatives = len(derivative_df) total_downloads_orig = original_df['downloads_30d'].sum() total_downloads_deriv = derivative_df['downloads_30d'].sum() # Create derivative type distribution chart if len(derivative_df) > 0: # Create distribution by model type type_dist = derivative_df.groupby('model_type').agg({ 'model_id': 'count', 'downloads_30d': 'sum' }).reset_index() type_dist = derivative_df.groupby('model_type').agg({ 'model_id': 'count', 'downloads_30d': 'sum' }).reset_index() type_dist['model_type'] = type_dist['model_type'].str.capitalize() type_dist = type_dist.sort_values('downloads_30d', ascending=True) fig_types = go.Figure(data=[ go.Bar( x=list(type_dist['model_type']), # Convert to list y=list(type_dist['downloads_30d'].values), # Convert series to list of values marker_color='rgb(55, 83, 109)' ) ]) fig_types.update_layout( title='Downloads by Model Type', #xaxis_title='Model Type', yaxis_title='Downloads', plot_bgcolor='white', showlegend=False, bargap=0.3 ) fig_types.update_traces( text=type_dist['downloads_30d'].apply(format_number), textposition='outside' ) else: fig_types = px.bar(title='No data available') if len(derivative_df) > 0: top_models = derivative_df.nlargest(10, 'downloads_30d')[ ['model_id', 'model_type', 'downloads_30d', 'likes'] ].copy() # Create a copy to avoid SettingWithCopyWarning # Capitalize model types in the table top_models['model_type'] = top_models['model_type'].str.capitalize() # Format download numbers top_models['downloads_30d'] = top_models['downloads_30d'].apply(format_number) # Create clickable links for model_id top_models['model_id'] = top_models['model_id'].apply( lambda x: f'{x}' ) else: top_models = pd.DataFrame(columns=['model_id', 'model_type', 'downloads_30d', 'likes']) summary_html = f"""

Summary Statistics

Derivative Models Downloads: {format_number(total_downloads_deriv)} ({total_derivatives} models)

Original Models Downloads: {format_number(total_downloads_orig)} ({total_originals} models)

Last Updated: {datetime.now().strftime('%Y-%m-%d %H:%M UTC')}

""" return summary_html, fig_types, top_models def create_interface(): """Create Gradio interface""" with gr.Blocks(theme=gr.themes.Soft()) as interface: gr.HTML("

DeepSeek Models Stats

") with gr.Row(): with gr.Column(): summary_html = gr.HTML() with gr.Column(): plot = gr.Plot() with gr.Row(): table = gr.DataFrame( headers=["Model ID", "Type", "Downloads (30d)", "Likes"], label="Top 10 Most Downloaded Models", wrap=True, datatype=["html", "str", "str", "number"] ) def update_stats(): summary, fig, top_models = create_stats_html() return summary, fig, top_models interface.load(update_stats, outputs=[summary_html, plot, table]) return interface # Create and launch the interface demo = create_interface() demo.launch()