burtenshaw commited on
Commit
ec60e9a
Β·
1 Parent(s): afaf730

use percentile boundaries in app

Browse files
Files changed (8) hide show
  1. app.py +40 -25
  2. default.jpg +0 -0
  3. default.png +0 -0
  4. images/empty.png +0 -0
  5. images/space.png +0 -0
  6. percentiles.json +1 -0
  7. pyproject.toml +3 -0
  8. stats_dataset.ipynb +166 -0
app.py CHANGED
@@ -3,13 +3,28 @@ import gradio as gr
3
  from urllib.parse import urlencode
4
  import os
5
  from datetime import datetime
 
6
 
7
  # Load environment variables
8
 
9
  DEFAULT_IMAGE = "https://hub-recap.imglab-cdn.net/default.jpg?width=1200&text=%3Cspan+size%3D%2212pt%22+weight%3D%22bold%22%3EHugging+Face++%E2%9D%A4%EF%B8%8F+bartowski+in+2024%3C%2Fspan%3E%0A%0A%3Cspan+weight%3D%22bold%22%3E2%2C020%2C552%3C%2Fspan%3E+model+downloads%0A%3Cspan+weight%3D%22bold%22%3E5%2C407%3C%2Fspan%3E+model+likes%0A%3Cspan+weight%3D%22bold%22%3E0%3C%2Fspan%3E+dataset+downloads%0A%3Cspan+weight%3D%22bold%22%3E0%3C%2Fspan%3E+dataset+likes%0A%0A%3Cspan+size%3D%2210pt%22%3EMost+Popular+Contributions%3A%3C%2Fspan%3E%0AModel%3A+%3Cspan+weight%3D%22bold%22%3Ebartowski%2Fgemma-2-9b-it-GGUF%3C%2Fspan%3E%0A++%2843%2C949+downloads%2C+196+likes%29%0ADataset%3A+%3Cspan+weight%3D%22bold%22%3ENone%3C%2Fspan%3E%0A++%280+downloads%2C+0+likes%29%0ASpace%3A+%3Cspan+weight%3D%22bold%22%3Ebartowski%2Fgguf-metadata-updater%3C%2Fspan%3E%0A++%287+likes%29&text-width=800&text-height=600&text-padding=60&text-color=39%2C71%2C111&text-x=460&text-y=40&format=png&dpr=2"
10
- MAX_MODEL_ACTIVITY = 7354
11
- MAX_DATASET_ACTIVITY = 6564
12
- MAX_SPACE_ACTIVITY = 12026
 
 
 
 
 
 
 
 
 
 
 
 
 
 
13
 
14
 
15
  def create_image(stats, username):
@@ -19,39 +34,39 @@ def create_image(stats, username):
19
  dataset_activity = total_stats["Dataset Downloads"] + total_stats["Dataset Likes"]
20
  space_activity = total_stats["Space Likes"]
21
 
22
- # Calculate percentiles based on max values (removed min(100,...))
23
- top_items = stats["Most Popular Items"]
24
- model_percentile = round(
25
- (top_items["Top Model"]["likes"] / MAX_MODEL_ACTIVITY) * 100, 2
26
- )
27
- dataset_percentile = round(
28
- (top_items["Top Dataset"]["likes"] / MAX_DATASET_ACTIVITY) * 100, 2
29
- )
30
- space_percentile = round(
31
- (top_items["Top Space"]["likes"] / MAX_SPACE_ACTIVITY) * 100, 2
32
- )
33
 
34
- # Choose base image URL based on highest activity
35
- # check if no activity in any category
36
- # if everything is 0, we show the empty image
37
  if model_activity == 0 and dataset_activity == 0 and space_activity == 0:
38
  url = "https://hub-recap.imglab-cdn.net/images/empty.png"
39
  avatar = "new! We couldn't find your stats on the Hub, maybe in 2025?"
40
  elif model_activity >= max(dataset_activity, space_activity):
41
- url = "https://hub-recap.imglab-cdn.net/images/models.png"
42
- avatar = f"Model Pro (top {model_percentile}%)"
 
 
43
  elif dataset_activity >= max(model_activity, space_activity):
44
- url = "https://hub-recap.imglab-cdn.net/images/datasets.png"
45
- avatar = f"Dataset Guru (top {dataset_percentile}%)"
 
 
 
 
 
 
 
46
  else:
47
- url = "https://hub-recap.imglab-cdn.net/images/spaces.png"
48
- avatar = f"Space Artiste (top {space_percentile}%)"
49
 
50
  # Build text content with proper formatting
51
  text_parts = []
52
 
53
  text_parts.append(
54
- f'<span size="11pt" weight="bold">Hugging Face ❀️ {username} in 2024</span>'
55
  )
56
  text_parts.append("") # Empty line for spacing
57
 
@@ -117,7 +132,7 @@ def create_image(stats, username):
117
 
118
  # Update the avatar message with percentile
119
  text_parts.append("") # Empty line for spacing
120
- text_parts.append(f'<span size="9pt">You are a {avatar}! πŸŽ‰</span>')
121
 
122
  # Add additional percentile info if other categories are significant
123
  other_percentiles = []
 
3
  from urllib.parse import urlencode
4
  import os
5
  from datetime import datetime
6
+ import json
7
 
8
  # Load environment variables
9
 
10
  DEFAULT_IMAGE = "https://hub-recap.imglab-cdn.net/default.jpg?width=1200&text=%3Cspan+size%3D%2212pt%22+weight%3D%22bold%22%3EHugging+Face++%E2%9D%A4%EF%B8%8F+bartowski+in+2024%3C%2Fspan%3E%0A%0A%3Cspan+weight%3D%22bold%22%3E2%2C020%2C552%3C%2Fspan%3E+model+downloads%0A%3Cspan+weight%3D%22bold%22%3E5%2C407%3C%2Fspan%3E+model+likes%0A%3Cspan+weight%3D%22bold%22%3E0%3C%2Fspan%3E+dataset+downloads%0A%3Cspan+weight%3D%22bold%22%3E0%3C%2Fspan%3E+dataset+likes%0A%0A%3Cspan+size%3D%2210pt%22%3EMost+Popular+Contributions%3A%3C%2Fspan%3E%0AModel%3A+%3Cspan+weight%3D%22bold%22%3Ebartowski%2Fgemma-2-9b-it-GGUF%3C%2Fspan%3E%0A++%2843%2C949+downloads%2C+196+likes%29%0ADataset%3A+%3Cspan+weight%3D%22bold%22%3ENone%3C%2Fspan%3E%0A++%280+downloads%2C+0+likes%29%0ASpace%3A+%3Cspan+weight%3D%22bold%22%3Ebartowski%2Fgguf-metadata-updater%3C%2Fspan%3E%0A++%287+likes%29&text-width=800&text-height=600&text-padding=60&text-color=39%2C71%2C111&text-x=460&text-y=40&format=png&dpr=2"
11
+
12
+ # Load percentiles data
13
+ with open("percentiles.json") as f:
14
+ PERCENTILES = json.load(f)
15
+
16
+
17
+ def get_percentile_rank(likes, category):
18
+ if likes == 0:
19
+ return 0
20
+ percentiles = PERCENTILES[f"{category}_percentiles"]
21
+ if likes >= percentiles["p_99999"]:
22
+ return 99.999
23
+ elif likes >= percentiles["p_9999"]:
24
+ return 99.99
25
+ elif likes >= percentiles["p_999"]:
26
+ return 99.9
27
+ return 0
28
 
29
 
30
  def create_image(stats, username):
 
34
  dataset_activity = total_stats["Dataset Downloads"] + total_stats["Dataset Likes"]
35
  space_activity = total_stats["Space Likes"]
36
 
37
+ # Calculate percentiles based on likes
38
+ model_percentile = get_percentile_rank(total_stats["Model Likes"], "model")
39
+ dataset_percentile = get_percentile_rank(total_stats["Dataset Likes"], "dataset")
40
+ space_percentile = get_percentile_rank(space_activity, "space")
 
 
 
 
 
 
 
41
 
42
+ # Choose base image URL based on highest activity (keep using activity for image selection)
 
 
43
  if model_activity == 0 and dataset_activity == 0 and space_activity == 0:
44
  url = "https://hub-recap.imglab-cdn.net/images/empty.png"
45
  avatar = "new! We couldn't find your stats on the Hub, maybe in 2025?"
46
  elif model_activity >= max(dataset_activity, space_activity):
47
+ url = "https://hub-recap.imglab-cdn.net/images/model.png"
48
+ avatar = f"Model Pro" + (
49
+ f" (top {model_percentile}%)" if model_percentile > 0 else ""
50
+ )
51
  elif dataset_activity >= max(model_activity, space_activity):
52
+ url = "https://hub-recap.imglab-cdn.net/images/dataset.png"
53
+ avatar = f"Dataset Guru" + (
54
+ f" (top {dataset_percentile}%)" if dataset_percentile > 0 else ""
55
+ )
56
+ elif space_activity >= max(model_activity, dataset_activity):
57
+ url = "https://hub-recap.imglab-cdn.net/images/space.png"
58
+ avatar = f"Space Artiste" + (
59
+ f" (top {space_percentile}%)" if space_percentile > 0 else ""
60
+ )
61
  else:
62
+ url = "https://hub-recap.imglab-cdn.net/images/empty.png"
63
+ avatar = "new! We couldn't find your stats on the Hub, maybe in 2025?"
64
 
65
  # Build text content with proper formatting
66
  text_parts = []
67
 
68
  text_parts.append(
69
+ f'<span size="11pt" weight="bold">Hugging Face ❀️ {username} in 2024</span>'
70
  )
71
  text_parts.append("") # Empty line for spacing
72
 
 
132
 
133
  # Update the avatar message with percentile
134
  text_parts.append("") # Empty line for spacing
135
+ text_parts.append(f'<span size="9pt">You are a {avatar}!</span>')
136
 
137
  # Add additional percentile info if other categories are significant
138
  other_percentiles = []
default.jpg DELETED
Binary file (247 kB)
 
default.png ADDED
images/empty.png CHANGED
images/space.png CHANGED
percentiles.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"dataset_percentiles": {"p_99999": 1299, "p_9999": 491, "p_999": 125}, "model_percentiles": {"p_99999": 3698, "p_9999": 949, "p_999": 143}, "space_percentiles": {"p_99999": 6040, "p_9999": 1552, "p_999": 326}}
pyproject.toml CHANGED
@@ -5,6 +5,9 @@ description = "Add your description here"
5
  readme = "README.md"
6
  requires-python = ">=3.11"
7
  dependencies = [
 
8
  "gradio>=5.9.1",
 
 
9
  "requests>=2.32.3",
10
  ]
 
5
  readme = "README.md"
6
  requires-python = ">=3.11"
7
  dependencies = [
8
+ "datasets>=3.2.0",
9
  "gradio>=5.9.1",
10
+ "ipykernel>=6.29.5",
11
+ "pandas>=2.2.3",
12
  "requests>=2.32.3",
13
  ]
stats_dataset.ipynb ADDED
@@ -0,0 +1,166 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "code",
5
+ "execution_count": 2,
6
+ "metadata": {},
7
+ "outputs": [
8
+ {
9
+ "name": "stderr",
10
+ "output_type": "stream",
11
+ "text": [
12
+ "/Users/ben/code/hub-recap/.venv/lib/python3.11/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n",
13
+ " from .autonotebook import tqdm as notebook_tqdm\n"
14
+ ]
15
+ }
16
+ ],
17
+ "source": [
18
+ "from datasets import load_dataset\n",
19
+ "\n",
20
+ "ds = load_dataset(\"cfahlgren1/hub-stats\", \"datasets\")\n",
21
+ "ds_df = ds[\"train\"].to_pandas()"
22
+ ]
23
+ },
24
+ {
25
+ "cell_type": "code",
26
+ "execution_count": 3,
27
+ "metadata": {},
28
+ "outputs": [],
29
+ "source": [
30
+ "ds = load_dataset(\"cfahlgren1/hub-stats\", \"models\")\n",
31
+ "md_df = ds[\"train\"].to_pandas()"
32
+ ]
33
+ },
34
+ {
35
+ "cell_type": "code",
36
+ "execution_count": 30,
37
+ "metadata": {},
38
+ "outputs": [
39
+ {
40
+ "name": "stderr",
41
+ "output_type": "stream",
42
+ "text": [
43
+ "Generating train split: 100%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ| 309714/309714 [00:00<00:00, 353713.86 examples/s]\n"
44
+ ]
45
+ }
46
+ ],
47
+ "source": [
48
+ "ds = load_dataset(\"cfahlgren1/hub-stats\", \"spaces\")\n",
49
+ "sp_df = ds[\"train\"].to_pandas()"
50
+ ]
51
+ },
52
+ {
53
+ "cell_type": "code",
54
+ "execution_count": 40,
55
+ "metadata": {},
56
+ "outputs": [
57
+ {
58
+ "name": "stdout",
59
+ "output_type": "stream",
60
+ "text": [
61
+ "{'p_99999': 1299, 'p_9999': 491, 'p_999': 125}\n"
62
+ ]
63
+ }
64
+ ],
65
+ "source": [
66
+ "dataset_percentiles = {\n",
67
+ " \"p_99999\": int(ds_df[\"likes\"].quantile(0.99999)),\n",
68
+ " \"p_9999\": int(ds_df[\"likes\"].quantile(0.9999)),\n",
69
+ " \"p_999\": int(ds_df[\"likes\"].quantile(0.999)),\n",
70
+ "}\n",
71
+ "print(dataset_percentiles)"
72
+ ]
73
+ },
74
+ {
75
+ "cell_type": "code",
76
+ "execution_count": 41,
77
+ "metadata": {},
78
+ "outputs": [
79
+ {
80
+ "name": "stdout",
81
+ "output_type": "stream",
82
+ "text": [
83
+ "{'p_99999': 3698, 'p_9999': 949, 'p_999': 143}\n"
84
+ ]
85
+ }
86
+ ],
87
+ "source": [
88
+ "model_percentiles = {\n",
89
+ " \"p_99999\": int(md_df[\"likes\"].quantile(0.99999)),\n",
90
+ " \"p_9999\": int(md_df[\"likes\"].quantile(0.9999)),\n",
91
+ " \"p_999\": int(md_df[\"likes\"].quantile(0.999)),\n",
92
+ "}\n",
93
+ "print(model_percentiles)"
94
+ ]
95
+ },
96
+ {
97
+ "cell_type": "code",
98
+ "execution_count": 42,
99
+ "metadata": {},
100
+ "outputs": [
101
+ {
102
+ "name": "stdout",
103
+ "output_type": "stream",
104
+ "text": [
105
+ "{'p_99999': 6040, 'p_9999': 1552, 'p_999': 326}\n"
106
+ ]
107
+ }
108
+ ],
109
+ "source": [
110
+ "space_percentiles = {\n",
111
+ " \"p_99999\": int(sp_df[\"likes\"].quantile(0.99999)),\n",
112
+ " \"p_9999\": int(sp_df[\"likes\"].quantile(0.9999)),\n",
113
+ " \"p_999\": int(sp_df[\"likes\"].quantile(0.999)),\n",
114
+ "}\n",
115
+ "print(space_percentiles)"
116
+ ]
117
+ },
118
+ {
119
+ "cell_type": "code",
120
+ "execution_count": 43,
121
+ "metadata": {},
122
+ "outputs": [],
123
+ "source": [
124
+ "import json\n",
125
+ "\n",
126
+ "with open(\"percentiles.json\", \"w\") as f:\n",
127
+ " json.dump(\n",
128
+ " {\n",
129
+ " \"dataset_percentiles\": dataset_percentiles,\n",
130
+ " \"model_percentiles\": model_percentiles,\n",
131
+ " \"space_percentiles\": space_percentiles,\n",
132
+ " },\n",
133
+ " f,\n",
134
+ " )"
135
+ ]
136
+ },
137
+ {
138
+ "cell_type": "code",
139
+ "execution_count": null,
140
+ "metadata": {},
141
+ "outputs": [],
142
+ "source": []
143
+ }
144
+ ],
145
+ "metadata": {
146
+ "kernelspec": {
147
+ "display_name": ".venv",
148
+ "language": "python",
149
+ "name": "python3"
150
+ },
151
+ "language_info": {
152
+ "codemirror_mode": {
153
+ "name": "ipython",
154
+ "version": 3
155
+ },
156
+ "file_extension": ".py",
157
+ "mimetype": "text/x-python",
158
+ "name": "python",
159
+ "nbconvert_exporter": "python",
160
+ "pygments_lexer": "ipython3",
161
+ "version": "3.11.10"
162
+ }
163
+ },
164
+ "nbformat": 4,
165
+ "nbformat_minor": 2
166
+ }