Spaces:
Runtime error
Runtime error
Make clickable
Browse files
app.ipynb
CHANGED
@@ -8,44 +8,47 @@
|
|
8 |
"source": [
|
9 |
"# |export\n",
|
10 |
"import gradio as gr\n",
|
11 |
-
"import pandas as pd"
|
12 |
]
|
13 |
},
|
14 |
{
|
15 |
"cell_type": "code",
|
16 |
-
"execution_count":
|
17 |
"metadata": {},
|
18 |
"outputs": [],
|
19 |
"source": [
|
20 |
"# |export\n",
|
21 |
-
"df = pd.read_csv(\"
|
|
|
|
|
|
|
22 |
]
|
23 |
},
|
24 |
{
|
25 |
"cell_type": "code",
|
26 |
-
"execution_count":
|
27 |
"metadata": {},
|
28 |
"outputs": [],
|
29 |
"source": [
|
30 |
"# |export\n",
|
31 |
"# Drop footers\n",
|
32 |
-
"df = df.copy()[~df[\"Model\"].isna()]"
|
33 |
]
|
34 |
},
|
35 |
{
|
36 |
"cell_type": "code",
|
37 |
-
"execution_count":
|
38 |
"metadata": {},
|
39 |
"outputs": [],
|
40 |
"source": [
|
41 |
"# |export\n",
|
42 |
"# Drop TBA models\n",
|
43 |
-
"df = df.copy()[df[\"Parameters \\n(B)\"] != \"TBA\"]"
|
44 |
]
|
45 |
},
|
46 |
{
|
47 |
"cell_type": "code",
|
48 |
-
"execution_count":
|
49 |
"metadata": {},
|
50 |
"outputs": [
|
51 |
{
|
@@ -79,14 +82,14 @@
|
|
79 |
" <th>Announced\\n▼</th>\n",
|
80 |
" <th>Public?</th>\n",
|
81 |
" <th>Released</th>\n",
|
82 |
-
" <th>Paper
|
83 |
" <th>Notes</th>\n",
|
84 |
" </tr>\n",
|
85 |
" </thead>\n",
|
86 |
" <tbody>\n",
|
87 |
" <tr>\n",
|
88 |
-
" <th>
|
89 |
-
" <td>
|
90 |
" <td>Microsoft</td>\n",
|
91 |
" <td>NaN</td>\n",
|
92 |
" <td>1.6</td>\n",
|
@@ -96,14 +99,14 @@
|
|
96 |
" <td>Feb/2023</td>\n",
|
97 |
" <td>🔴</td>\n",
|
98 |
" <td>Feb/2023</td>\n",
|
99 |
-
" <td
|
100 |
" <td>Multimodal large language model (MLLM). Raven’...</td>\n",
|
101 |
" </tr>\n",
|
102 |
" <tr>\n",
|
103 |
-
" <th>
|
104 |
" <td>LLaMA-65B</td>\n",
|
105 |
" <td>Meta AI</td>\n",
|
106 |
-
" <td>https://
|
107 |
" <td>65</td>\n",
|
108 |
" <td>1400</td>\n",
|
109 |
" <td>22:1</td>\n",
|
@@ -111,11 +114,11 @@
|
|
111 |
" <td>Feb/2023</td>\n",
|
112 |
" <td>🟡</td>\n",
|
113 |
" <td>Feb/2023</td>\n",
|
114 |
-
" <td
|
115 |
" <td>Researchers only, noncommercial only. 'LLaMA-6...</td>\n",
|
116 |
" </tr>\n",
|
117 |
" <tr>\n",
|
118 |
-
" <th>
|
119 |
" <td>MOSS</td>\n",
|
120 |
" <td>Fudan University</td>\n",
|
121 |
" <td>https://moss.fastnlp.top/</td>\n",
|
@@ -126,11 +129,26 @@
|
|
126 |
" <td>Feb/2023</td>\n",
|
127 |
" <td>🟢</td>\n",
|
128 |
" <td>Feb/2023</td>\n",
|
129 |
-
" <td
|
130 |
" <td>Major bandwidth issues: https://www.reuters.co...</td>\n",
|
131 |
" </tr>\n",
|
132 |
" <tr>\n",
|
133 |
-
" <th>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
134 |
" <td>Luminous Supreme Control</td>\n",
|
135 |
" <td>Aleph Alpha</td>\n",
|
136 |
" <td>https://app.aleph-alpha.com/playground/completion</td>\n",
|
@@ -141,77 +159,69 @@
|
|
141 |
" <td>Feb/2023</td>\n",
|
142 |
" <td>🟢</td>\n",
|
143 |
" <td>Feb/2023</td>\n",
|
144 |
-
" <td
|
145 |
" <td>‘Control’ means instruction tuned</td>\n",
|
146 |
" </tr>\n",
|
147 |
-
" <tr>\n",
|
148 |
-
" <th>6</th>\n",
|
149 |
-
" <td>Multimodal-CoT</td>\n",
|
150 |
-
" <td>Amazon</td>\n",
|
151 |
-
" <td>https://github.com/amazon-science/mm-cot</td>\n",
|
152 |
-
" <td>0.738</td>\n",
|
153 |
-
" <td>NaN</td>\n",
|
154 |
-
" <td>NaN</td>\n",
|
155 |
-
" <td>🌋</td>\n",
|
156 |
-
" <td>Feb/2023</td>\n",
|
157 |
-
" <td>🟢</td>\n",
|
158 |
-
" <td>Feb/2023</td>\n",
|
159 |
-
" <td>🔗</td>\n",
|
160 |
-
" <td>Models <1B with vision CoT</td>\n",
|
161 |
-
" </tr>\n",
|
162 |
" </tbody>\n",
|
163 |
"</table>\n",
|
164 |
"</div>"
|
165 |
],
|
166 |
"text/plain": [
|
167 |
" Model Lab \\\n",
|
168 |
-
"
|
169 |
-
"
|
170 |
-
"
|
171 |
-
"
|
172 |
-
"
|
173 |
"\n",
|
174 |
" Selected \\nplaygrounds Parameters \\n(B) \\\n",
|
175 |
-
"
|
176 |
-
"
|
177 |
-
"
|
178 |
-
"
|
179 |
-
"
|
180 |
"\n",
|
181 |
" Tokens \\ntrained (B) Ratio T:P\\n(Chinchilla scaling) Training dataset \\\n",
|
182 |
-
"
|
183 |
-
"
|
184 |
-
"
|
185 |
-
"
|
186 |
-
"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
187 |
"\n",
|
188 |
-
"
|
189 |
-
"
|
190 |
-
"
|
191 |
-
"
|
192 |
-
"
|
193 |
-
"
|
194 |
"\n",
|
195 |
" Notes \n",
|
196 |
-
"
|
197 |
-
"
|
198 |
-
"
|
199 |
-
"
|
200 |
-
"
|
201 |
]
|
202 |
},
|
203 |
-
"execution_count":
|
204 |
"metadata": {},
|
205 |
"output_type": "execute_result"
|
206 |
}
|
207 |
],
|
208 |
"source": [
|
209 |
-
"df.head()"
|
210 |
]
|
211 |
},
|
212 |
{
|
213 |
"cell_type": "code",
|
214 |
-
"execution_count":
|
215 |
"metadata": {},
|
216 |
"outputs": [
|
217 |
{
|
@@ -245,13 +255,13 @@
|
|
245 |
" <th>Announced\\n▼</th>\n",
|
246 |
" <th>Public?</th>\n",
|
247 |
" <th>Released</th>\n",
|
248 |
-
" <th>Paper
|
249 |
" <th>Notes</th>\n",
|
250 |
" </tr>\n",
|
251 |
" </thead>\n",
|
252 |
" <tbody>\n",
|
253 |
" <tr>\n",
|
254 |
-
" <th>
|
255 |
" <td>Meena</td>\n",
|
256 |
" <td>Google</td>\n",
|
257 |
" <td>NaN</td>\n",
|
@@ -262,11 +272,11 @@
|
|
262 |
" <td>Jan/2020</td>\n",
|
263 |
" <td>🔴</td>\n",
|
264 |
" <td>Jan/2020</td>\n",
|
265 |
-
" <td
|
266 |
" <td>Dialogue model. Trained 61B tokens for 164x ep...</td>\n",
|
267 |
" </tr>\n",
|
268 |
" <tr>\n",
|
269 |
-
" <th>
|
270 |
" <td>RoBERTa</td>\n",
|
271 |
" <td>Meta AI</td>\n",
|
272 |
" <td>Hugging Face</td>\n",
|
@@ -277,11 +287,11 @@
|
|
277 |
" <td>Jul/2019</td>\n",
|
278 |
" <td>🟢</td>\n",
|
279 |
" <td>Jul/2019</td>\n",
|
280 |
-
" <td
|
281 |
" <td>See cite ROBERTA</td>\n",
|
282 |
" </tr>\n",
|
283 |
" <tr>\n",
|
284 |
-
" <th>
|
285 |
" <td>GPT-2</td>\n",
|
286 |
" <td>OpenAI</td>\n",
|
287 |
" <td>Hugging Face</td>\n",
|
@@ -292,11 +302,11 @@
|
|
292 |
" <td>Feb/2019</td>\n",
|
293 |
" <td>🟢</td>\n",
|
294 |
" <td>Nov/2019</td>\n",
|
295 |
-
" <td
|
296 |
" <td>Reddit outbound only</td>\n",
|
297 |
" </tr>\n",
|
298 |
" <tr>\n",
|
299 |
-
" <th>
|
300 |
" <td>GPT-1</td>\n",
|
301 |
" <td>OpenAI</td>\n",
|
302 |
" <td>Hugging Face</td>\n",
|
@@ -307,11 +317,11 @@
|
|
307 |
" <td>Jun/2018</td>\n",
|
308 |
" <td>🟢</td>\n",
|
309 |
" <td>Jun/2018</td>\n",
|
310 |
-
" <td
|
311 |
" <td>Books only</td>\n",
|
312 |
" </tr>\n",
|
313 |
" <tr>\n",
|
314 |
-
" <th>
|
315 |
" <td>BERT</td>\n",
|
316 |
" <td>Google</td>\n",
|
317 |
" <td>Hugging Face</td>\n",
|
@@ -322,7 +332,7 @@
|
|
322 |
" <td>Oct/2018</td>\n",
|
323 |
" <td>🟢</td>\n",
|
324 |
" <td>Oct/2018</td>\n",
|
325 |
-
" <td
|
326 |
" <td>NaN</td>\n",
|
327 |
" </tr>\n",
|
328 |
" </tbody>\n",
|
@@ -331,58 +341,86 @@
|
|
331 |
],
|
332 |
"text/plain": [
|
333 |
" Model Lab Selected \\nplaygrounds Parameters \\n(B) \\\n",
|
334 |
-
"
|
335 |
-
"
|
336 |
-
"
|
337 |
-
"
|
338 |
-
"
|
339 |
"\n",
|
340 |
" Tokens \\ntrained (B) Ratio T:P\\n(Chinchilla scaling) Training dataset \\\n",
|
341 |
-
"
|
342 |
-
"
|
343 |
-
"
|
344 |
-
"
|
345 |
-
"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
346 |
"\n",
|
347 |
-
"
|
348 |
-
"
|
349 |
-
"
|
350 |
-
"
|
351 |
-
"
|
352 |
-
"
|
353 |
"\n",
|
354 |
" Notes \n",
|
355 |
-
"
|
356 |
-
"
|
357 |
-
"
|
358 |
-
"
|
359 |
-
"
|
360 |
]
|
361 |
},
|
362 |
-
"execution_count":
|
363 |
"metadata": {},
|
364 |
"output_type": "execute_result"
|
365 |
}
|
366 |
],
|
367 |
"source": [
|
368 |
-
"df.tail()"
|
369 |
]
|
370 |
},
|
371 |
{
|
372 |
"cell_type": "code",
|
373 |
-
"execution_count":
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
374 |
"metadata": {},
|
375 |
"outputs": [],
|
376 |
"source": [
|
377 |
"# |export\n",
|
378 |
"title = \"\"\"<h1 align=\"center\">The Large Language Models Landscape</h1>\"\"\"\n",
|
379 |
"description = \"\"\"Large Language Models (LLMs) today come in a variety architectures and capabilities. This interactive landscape provides a visual overview of the most important LLMs, including their training data, size, release date, and whether they are openly accessible or not. It also includes notes on each model to provide additional context. This landscape is derived from data compiled by Dr. Alan D. Thompson at [lifearchitect.ai](https://lifearchitect.ai).\n",
|
380 |
-
"\"\"\""
|
381 |
]
|
382 |
},
|
383 |
{
|
384 |
"cell_type": "code",
|
385 |
-
"execution_count":
|
386 |
"metadata": {},
|
387 |
"outputs": [
|
388 |
{
|
@@ -410,7 +448,7 @@
|
|
410 |
"data": {
|
411 |
"text/plain": []
|
412 |
},
|
413 |
-
"execution_count":
|
414 |
"metadata": {},
|
415 |
"output_type": "execute_result"
|
416 |
}
|
@@ -420,29 +458,30 @@
|
|
420 |
"def value_func():\n",
|
421 |
" return df\n",
|
422 |
"\n",
|
|
|
423 |
"with gr.Blocks() as demo:\n",
|
424 |
" gr.Markdown(title)\n",
|
425 |
" gr.Markdown(description)\n",
|
426 |
-
" gr.DataFrame(value=value_func)\n",
|
427 |
"\n",
|
428 |
-
"demo.launch()"
|
429 |
]
|
430 |
},
|
431 |
{
|
432 |
"cell_type": "code",
|
433 |
-
"execution_count":
|
434 |
"metadata": {},
|
435 |
"outputs": [
|
436 |
{
|
437 |
"name": "stdout",
|
438 |
"output_type": "stream",
|
439 |
"text": [
|
440 |
-
"Closing server running on port:
|
441 |
]
|
442 |
}
|
443 |
],
|
444 |
"source": [
|
445 |
-
"demo.close()"
|
446 |
]
|
447 |
},
|
448 |
{
|
@@ -453,7 +492,7 @@
|
|
453 |
"source": [
|
454 |
"from nbdev.export import nb_export\n",
|
455 |
"\n",
|
456 |
-
"nb_export(\"app.ipynb\", lib_path=\".\", name=\"app\")"
|
457 |
]
|
458 |
},
|
459 |
{
|
|
|
8 |
"source": [
|
9 |
"# |export\n",
|
10 |
"import gradio as gr\n",
|
11 |
+
"import pandas as pd\n"
|
12 |
]
|
13 |
},
|
14 |
{
|
15 |
"cell_type": "code",
|
16 |
+
"execution_count": 2,
|
17 |
"metadata": {},
|
18 |
"outputs": [],
|
19 |
"source": [
|
20 |
"# |export\n",
|
21 |
+
"df = pd.read_csv(\n",
|
22 |
+
" \"https://docs.google.com/spreadsheets/d/e/2PACX-1vSC40sszorOjHfozmNqJT9lFiJhG94u3fbr3Ss_7fzcU3xqqJQuW1Ie_SNcWEB-uIsBi9NBUK7-ddet/pub?output=csv\",\n",
|
23 |
+
" skiprows=1,\n",
|
24 |
+
")\n"
|
25 |
]
|
26 |
},
|
27 |
{
|
28 |
"cell_type": "code",
|
29 |
+
"execution_count": 3,
|
30 |
"metadata": {},
|
31 |
"outputs": [],
|
32 |
"source": [
|
33 |
"# |export\n",
|
34 |
"# Drop footers\n",
|
35 |
+
"df = df.copy()[~df[\"Model\"].isna()]\n"
|
36 |
]
|
37 |
},
|
38 |
{
|
39 |
"cell_type": "code",
|
40 |
+
"execution_count": 4,
|
41 |
"metadata": {},
|
42 |
"outputs": [],
|
43 |
"source": [
|
44 |
"# |export\n",
|
45 |
"# Drop TBA models\n",
|
46 |
+
"df = df.copy()[df[\"Parameters \\n(B)\"] != \"TBA\"]\n"
|
47 |
]
|
48 |
},
|
49 |
{
|
50 |
"cell_type": "code",
|
51 |
+
"execution_count": 5,
|
52 |
"metadata": {},
|
53 |
"outputs": [
|
54 |
{
|
|
|
82 |
" <th>Announced\\n▼</th>\n",
|
83 |
" <th>Public?</th>\n",
|
84 |
" <th>Released</th>\n",
|
85 |
+
" <th>Paper / Repo</th>\n",
|
86 |
" <th>Notes</th>\n",
|
87 |
" </tr>\n",
|
88 |
" </thead>\n",
|
89 |
" <tbody>\n",
|
90 |
" <tr>\n",
|
91 |
+
" <th>3</th>\n",
|
92 |
+
" <td>Kosmos-1</td>\n",
|
93 |
" <td>Microsoft</td>\n",
|
94 |
" <td>NaN</td>\n",
|
95 |
" <td>1.6</td>\n",
|
|
|
99 |
" <td>Feb/2023</td>\n",
|
100 |
" <td>🔴</td>\n",
|
101 |
" <td>Feb/2023</td>\n",
|
102 |
+
" <td>https://arxiv.org/abs/2302.14045</td>\n",
|
103 |
" <td>Multimodal large language model (MLLM). Raven’...</td>\n",
|
104 |
" </tr>\n",
|
105 |
" <tr>\n",
|
106 |
+
" <th>4</th>\n",
|
107 |
" <td>LLaMA-65B</td>\n",
|
108 |
" <td>Meta AI</td>\n",
|
109 |
+
" <td>Weights leaked: https://github.com/facebookres...</td>\n",
|
110 |
" <td>65</td>\n",
|
111 |
" <td>1400</td>\n",
|
112 |
" <td>22:1</td>\n",
|
|
|
114 |
" <td>Feb/2023</td>\n",
|
115 |
" <td>🟡</td>\n",
|
116 |
" <td>Feb/2023</td>\n",
|
117 |
+
" <td>https://research.facebook.com/publications/lla...</td>\n",
|
118 |
" <td>Researchers only, noncommercial only. 'LLaMA-6...</td>\n",
|
119 |
" </tr>\n",
|
120 |
" <tr>\n",
|
121 |
+
" <th>5</th>\n",
|
122 |
" <td>MOSS</td>\n",
|
123 |
" <td>Fudan University</td>\n",
|
124 |
" <td>https://moss.fastnlp.top/</td>\n",
|
|
|
129 |
" <td>Feb/2023</td>\n",
|
130 |
" <td>🟢</td>\n",
|
131 |
" <td>Feb/2023</td>\n",
|
132 |
+
" <td>https://txsun1997.github.io/blogs/moss.html</td>\n",
|
133 |
" <td>Major bandwidth issues: https://www.reuters.co...</td>\n",
|
134 |
" </tr>\n",
|
135 |
" <tr>\n",
|
136 |
+
" <th>6</th>\n",
|
137 |
+
" <td>Palmyra</td>\n",
|
138 |
+
" <td>Writer</td>\n",
|
139 |
+
" <td>https://huggingface.co/models?search=palmyra</td>\n",
|
140 |
+
" <td>20</td>\n",
|
141 |
+
" <td>300</td>\n",
|
142 |
+
" <td>15:1</td>\n",
|
143 |
+
" <td>🌋</td>\n",
|
144 |
+
" <td>Feb/2023</td>\n",
|
145 |
+
" <td>🟢</td>\n",
|
146 |
+
" <td>Feb/2023</td>\n",
|
147 |
+
" <td>https://writer.com/blog/palmyra/</td>\n",
|
148 |
+
" <td>Only up to 5B available open-source 'trained o...</td>\n",
|
149 |
+
" </tr>\n",
|
150 |
+
" <tr>\n",
|
151 |
+
" <th>7</th>\n",
|
152 |
" <td>Luminous Supreme Control</td>\n",
|
153 |
" <td>Aleph Alpha</td>\n",
|
154 |
" <td>https://app.aleph-alpha.com/playground/completion</td>\n",
|
|
|
159 |
" <td>Feb/2023</td>\n",
|
160 |
" <td>🟢</td>\n",
|
161 |
" <td>Feb/2023</td>\n",
|
162 |
+
" <td>https://docs.aleph-alpha.com/docs/introduction...</td>\n",
|
163 |
" <td>‘Control’ means instruction tuned</td>\n",
|
164 |
" </tr>\n",
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
165 |
" </tbody>\n",
|
166 |
"</table>\n",
|
167 |
"</div>"
|
168 |
],
|
169 |
"text/plain": [
|
170 |
" Model Lab \\\n",
|
171 |
+
"3 Kosmos-1 Microsoft \n",
|
172 |
+
"4 LLaMA-65B Meta AI \n",
|
173 |
+
"5 MOSS Fudan University \n",
|
174 |
+
"6 Palmyra Writer \n",
|
175 |
+
"7 Luminous Supreme Control Aleph Alpha \n",
|
176 |
"\n",
|
177 |
" Selected \\nplaygrounds Parameters \\n(B) \\\n",
|
178 |
+
"3 NaN 1.6 \n",
|
179 |
+
"4 Weights leaked: https://github.com/facebookres... 65 \n",
|
180 |
+
"5 https://moss.fastnlp.top/ 20 \n",
|
181 |
+
"6 https://huggingface.co/models?search=palmyra 20 \n",
|
182 |
+
"7 https://app.aleph-alpha.com/playground/completion 70 \n",
|
183 |
"\n",
|
184 |
" Tokens \\ntrained (B) Ratio T:P\\n(Chinchilla scaling) Training dataset \\\n",
|
185 |
+
"3 360 225:1 🆆 📚⬆ 🕸 🌋 \n",
|
186 |
+
"4 1400 22:1 🆆 📚⬆ 🕸 🌋 \n",
|
187 |
+
"5 430 22:1 🕸 🌋 \n",
|
188 |
+
"6 300 15:1 🌋 \n",
|
189 |
+
"7 NaN NaN 🆆 📚⬆ 🕸 👥 \n",
|
190 |
+
"\n",
|
191 |
+
" Announced\\n▼ Public? Released \\\n",
|
192 |
+
"3 Feb/2023 🔴 Feb/2023 \n",
|
193 |
+
"4 Feb/2023 🟡 Feb/2023 \n",
|
194 |
+
"5 Feb/2023 🟢 Feb/2023 \n",
|
195 |
+
"6 Feb/2023 🟢 Feb/2023 \n",
|
196 |
+
"7 Feb/2023 🟢 Feb/2023 \n",
|
197 |
"\n",
|
198 |
+
" Paper / Repo \\\n",
|
199 |
+
"3 https://arxiv.org/abs/2302.14045 \n",
|
200 |
+
"4 https://research.facebook.com/publications/lla... \n",
|
201 |
+
"5 https://txsun1997.github.io/blogs/moss.html \n",
|
202 |
+
"6 https://writer.com/blog/palmyra/ \n",
|
203 |
+
"7 https://docs.aleph-alpha.com/docs/introduction... \n",
|
204 |
"\n",
|
205 |
" Notes \n",
|
206 |
+
"3 Multimodal large language model (MLLM). Raven’... \n",
|
207 |
+
"4 Researchers only, noncommercial only. 'LLaMA-6... \n",
|
208 |
+
"5 Major bandwidth issues: https://www.reuters.co... \n",
|
209 |
+
"6 Only up to 5B available open-source 'trained o... \n",
|
210 |
+
"7 ‘Control’ means instruction tuned "
|
211 |
]
|
212 |
},
|
213 |
+
"execution_count": 5,
|
214 |
"metadata": {},
|
215 |
"output_type": "execute_result"
|
216 |
}
|
217 |
],
|
218 |
"source": [
|
219 |
+
"df.head()\n"
|
220 |
]
|
221 |
},
|
222 |
{
|
223 |
"cell_type": "code",
|
224 |
+
"execution_count": 6,
|
225 |
"metadata": {},
|
226 |
"outputs": [
|
227 |
{
|
|
|
255 |
" <th>Announced\\n▼</th>\n",
|
256 |
" <th>Public?</th>\n",
|
257 |
" <th>Released</th>\n",
|
258 |
+
" <th>Paper / Repo</th>\n",
|
259 |
" <th>Notes</th>\n",
|
260 |
" </tr>\n",
|
261 |
" </thead>\n",
|
262 |
" <tbody>\n",
|
263 |
" <tr>\n",
|
264 |
+
" <th>92</th>\n",
|
265 |
" <td>Meena</td>\n",
|
266 |
" <td>Google</td>\n",
|
267 |
" <td>NaN</td>\n",
|
|
|
272 |
" <td>Jan/2020</td>\n",
|
273 |
" <td>🔴</td>\n",
|
274 |
" <td>Jan/2020</td>\n",
|
275 |
+
" <td>https://arxiv.org/abs/2001.09977</td>\n",
|
276 |
" <td>Dialogue model. Trained 61B tokens for 164x ep...</td>\n",
|
277 |
" </tr>\n",
|
278 |
" <tr>\n",
|
279 |
+
" <th>93</th>\n",
|
280 |
" <td>RoBERTa</td>\n",
|
281 |
" <td>Meta AI</td>\n",
|
282 |
" <td>Hugging Face</td>\n",
|
|
|
287 |
" <td>Jul/2019</td>\n",
|
288 |
" <td>🟢</td>\n",
|
289 |
" <td>Jul/2019</td>\n",
|
290 |
+
" <td>https://arxiv.org/abs/1907.11692</td>\n",
|
291 |
" <td>See cite ROBERTA</td>\n",
|
292 |
" </tr>\n",
|
293 |
" <tr>\n",
|
294 |
+
" <th>94</th>\n",
|
295 |
" <td>GPT-2</td>\n",
|
296 |
" <td>OpenAI</td>\n",
|
297 |
" <td>Hugging Face</td>\n",
|
|
|
302 |
" <td>Feb/2019</td>\n",
|
303 |
" <td>🟢</td>\n",
|
304 |
" <td>Nov/2019</td>\n",
|
305 |
+
" <td>https://openai.com/blog/better-language-models/</td>\n",
|
306 |
" <td>Reddit outbound only</td>\n",
|
307 |
" </tr>\n",
|
308 |
" <tr>\n",
|
309 |
+
" <th>95</th>\n",
|
310 |
" <td>GPT-1</td>\n",
|
311 |
" <td>OpenAI</td>\n",
|
312 |
" <td>Hugging Face</td>\n",
|
|
|
317 |
" <td>Jun/2018</td>\n",
|
318 |
" <td>🟢</td>\n",
|
319 |
" <td>Jun/2018</td>\n",
|
320 |
+
" <td>https://openai.com/blog/language-unsupervised/</td>\n",
|
321 |
" <td>Books only</td>\n",
|
322 |
" </tr>\n",
|
323 |
" <tr>\n",
|
324 |
+
" <th>96</th>\n",
|
325 |
" <td>BERT</td>\n",
|
326 |
" <td>Google</td>\n",
|
327 |
" <td>Hugging Face</td>\n",
|
|
|
332 |
" <td>Oct/2018</td>\n",
|
333 |
" <td>🟢</td>\n",
|
334 |
" <td>Oct/2018</td>\n",
|
335 |
+
" <td>https://arxiv.org/abs/1810.04805</td>\n",
|
336 |
" <td>NaN</td>\n",
|
337 |
" </tr>\n",
|
338 |
" </tbody>\n",
|
|
|
341 |
],
|
342 |
"text/plain": [
|
343 |
" Model Lab Selected \\nplaygrounds Parameters \\n(B) \\\n",
|
344 |
+
"92 Meena Google NaN 2.6 \n",
|
345 |
+
"93 RoBERTa Meta AI Hugging Face 0.355 \n",
|
346 |
+
"94 GPT-2 OpenAI Hugging Face 1.5 \n",
|
347 |
+
"95 GPT-1 OpenAI Hugging Face 0.1 \n",
|
348 |
+
"96 BERT Google Hugging Face 0.3 \n",
|
349 |
"\n",
|
350 |
" Tokens \\ntrained (B) Ratio T:P\\n(Chinchilla scaling) Training dataset \\\n",
|
351 |
+
"92 10000 3,847:1 👥 🌋 \n",
|
352 |
+
"93 2200 6,198:1 🆆 📚 ⬆ 🕸 \n",
|
353 |
+
"94 10 7:1 ⬆ \n",
|
354 |
+
"95 NaN NaN 📚 \n",
|
355 |
+
"96 137 457:1 🆆 📚 \n",
|
356 |
+
"\n",
|
357 |
+
" Announced\\n▼ Public? Released \\\n",
|
358 |
+
"92 Jan/2020 🔴 Jan/2020 \n",
|
359 |
+
"93 Jul/2019 🟢 Jul/2019 \n",
|
360 |
+
"94 Feb/2019 🟢 Nov/2019 \n",
|
361 |
+
"95 Jun/2018 🟢 Jun/2018 \n",
|
362 |
+
"96 Oct/2018 🟢 Oct/2018 \n",
|
363 |
"\n",
|
364 |
+
" Paper / Repo \\\n",
|
365 |
+
"92 https://arxiv.org/abs/2001.09977 \n",
|
366 |
+
"93 https://arxiv.org/abs/1907.11692 \n",
|
367 |
+
"94 https://openai.com/blog/better-language-models/ \n",
|
368 |
+
"95 https://openai.com/blog/language-unsupervised/ \n",
|
369 |
+
"96 https://arxiv.org/abs/1810.04805 \n",
|
370 |
"\n",
|
371 |
" Notes \n",
|
372 |
+
"92 Dialogue model. Trained 61B tokens for 164x ep... \n",
|
373 |
+
"93 See cite ROBERTA \n",
|
374 |
+
"94 Reddit outbound only \n",
|
375 |
+
"95 Books only \n",
|
376 |
+
"96 NaN "
|
377 |
]
|
378 |
},
|
379 |
+
"execution_count": 6,
|
380 |
"metadata": {},
|
381 |
"output_type": "execute_result"
|
382 |
}
|
383 |
],
|
384 |
"source": [
|
385 |
+
"df.tail()\n"
|
386 |
]
|
387 |
},
|
388 |
{
|
389 |
"cell_type": "code",
|
390 |
+
"execution_count": 9,
|
391 |
+
"metadata": {},
|
392 |
+
"outputs": [],
|
393 |
+
"source": [
|
394 |
+
"# |export\n",
|
395 |
+
"def make_clickable_cell(cell):\n",
|
396 |
+
" return f'<a target=\"_blank\" href=\"{cell}\">{cell}</a>'\n"
|
397 |
+
]
|
398 |
+
},
|
399 |
+
{
|
400 |
+
"cell_type": "code",
|
401 |
+
"execution_count": 11,
|
402 |
+
"metadata": {},
|
403 |
+
"outputs": [],
|
404 |
+
"source": [
|
405 |
+
"# |export\n",
|
406 |
+
"df[\"Paper / Repo\"] = df[\"Paper / Repo\"].apply(make_clickable_cell)"
|
407 |
+
]
|
408 |
+
},
|
409 |
+
{
|
410 |
+
"cell_type": "code",
|
411 |
+
"execution_count": 12,
|
412 |
"metadata": {},
|
413 |
"outputs": [],
|
414 |
"source": [
|
415 |
"# |export\n",
|
416 |
"title = \"\"\"<h1 align=\"center\">The Large Language Models Landscape</h1>\"\"\"\n",
|
417 |
"description = \"\"\"Large Language Models (LLMs) today come in a variety architectures and capabilities. This interactive landscape provides a visual overview of the most important LLMs, including their training data, size, release date, and whether they are openly accessible or not. It also includes notes on each model to provide additional context. This landscape is derived from data compiled by Dr. Alan D. Thompson at [lifearchitect.ai](https://lifearchitect.ai).\n",
|
418 |
+
"\"\"\"\n"
|
419 |
]
|
420 |
},
|
421 |
{
|
422 |
"cell_type": "code",
|
423 |
+
"execution_count": 16,
|
424 |
"metadata": {},
|
425 |
"outputs": [
|
426 |
{
|
|
|
448 |
"data": {
|
449 |
"text/plain": []
|
450 |
},
|
451 |
+
"execution_count": 16,
|
452 |
"metadata": {},
|
453 |
"output_type": "execute_result"
|
454 |
}
|
|
|
458 |
"def value_func():\n",
|
459 |
" return df\n",
|
460 |
"\n",
|
461 |
+
"\n",
|
462 |
"with gr.Blocks() as demo:\n",
|
463 |
" gr.Markdown(title)\n",
|
464 |
" gr.Markdown(description)\n",
|
465 |
+
" gr.components.DataFrame(value=value_func)\n",
|
466 |
"\n",
|
467 |
+
"demo.launch()\n"
|
468 |
]
|
469 |
},
|
470 |
{
|
471 |
"cell_type": "code",
|
472 |
+
"execution_count": 14,
|
473 |
"metadata": {},
|
474 |
"outputs": [
|
475 |
{
|
476 |
"name": "stdout",
|
477 |
"output_type": "stream",
|
478 |
"text": [
|
479 |
+
"Closing server running on port: 7861\n"
|
480 |
]
|
481 |
}
|
482 |
],
|
483 |
"source": [
|
484 |
+
"demo.close()\n"
|
485 |
]
|
486 |
},
|
487 |
{
|
|
|
492 |
"source": [
|
493 |
"from nbdev.export import nb_export\n",
|
494 |
"\n",
|
495 |
+
"nb_export(\"app.ipynb\", lib_path=\".\", name=\"app\")\n"
|
496 |
]
|
497 |
},
|
498 |
{
|
app.py
CHANGED
@@ -1,35 +1,53 @@
|
|
1 |
# AUTOGENERATED! DO NOT EDIT! File to edit: app.ipynb.
|
2 |
|
3 |
# %% auto 0
|
4 |
-
__all__ = ['df', 'title', 'description', 'value_func']
|
5 |
|
6 |
# %% app.ipynb 0
|
7 |
import gradio as gr
|
8 |
import pandas as pd
|
9 |
|
|
|
10 |
# %% app.ipynb 1
|
11 |
-
df = pd.read_csv(
|
|
|
|
|
|
|
|
|
12 |
|
13 |
# %% app.ipynb 2
|
14 |
# Drop footers
|
15 |
df = df.copy()[~df["Model"].isna()]
|
16 |
|
|
|
17 |
# %% app.ipynb 3
|
18 |
# Drop TBA models
|
19 |
df = df.copy()[df["Parameters \n(B)"] != "TBA"]
|
20 |
|
|
|
21 |
# %% app.ipynb 6
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
22 |
title = """<h1 align="center">The Large Language Models Landscape</h1>"""
|
23 |
description = """Large Language Models (LLMs) today come in a variety architectures and capabilities. This interactive landscape provides a visual overview of the most important LLMs, including their training data, size, release date, and whether they are openly accessible or not. It also includes notes on each model to provide additional context. This landscape is derived from data compiled by Dr. Alan D. Thompson at [lifearchitect.ai](https://lifearchitect.ai).
|
24 |
"""
|
25 |
|
26 |
-
|
|
|
27 |
def value_func():
|
28 |
return df
|
29 |
|
|
|
30 |
with gr.Blocks() as demo:
|
31 |
gr.Markdown(title)
|
32 |
gr.Markdown(description)
|
33 |
-
gr.DataFrame(value=value_func)
|
34 |
|
35 |
demo.launch()
|
|
|
|
1 |
# AUTOGENERATED! DO NOT EDIT! File to edit: app.ipynb.
|
2 |
|
3 |
# %% auto 0
|
4 |
+
__all__ = ['df', 'title', 'description', 'make_clickable_cell', 'value_func']
|
5 |
|
6 |
# %% app.ipynb 0
|
7 |
import gradio as gr
|
8 |
import pandas as pd
|
9 |
|
10 |
+
|
11 |
# %% app.ipynb 1
|
12 |
+
df = pd.read_csv(
|
13 |
+
"https://docs.google.com/spreadsheets/d/e/2PACX-1vSC40sszorOjHfozmNqJT9lFiJhG94u3fbr3Ss_7fzcU3xqqJQuW1Ie_SNcWEB-uIsBi9NBUK7-ddet/pub?output=csv",
|
14 |
+
skiprows=1,
|
15 |
+
)
|
16 |
+
|
17 |
|
18 |
# %% app.ipynb 2
|
19 |
# Drop footers
|
20 |
df = df.copy()[~df["Model"].isna()]
|
21 |
|
22 |
+
|
23 |
# %% app.ipynb 3
|
24 |
# Drop TBA models
|
25 |
df = df.copy()[df["Parameters \n(B)"] != "TBA"]
|
26 |
|
27 |
+
|
28 |
# %% app.ipynb 6
|
29 |
+
def make_clickable_cell(cell):
|
30 |
+
return f'<a target="_blank" href="{cell}">{cell}</a>'
|
31 |
+
|
32 |
+
|
33 |
+
# %% app.ipynb 7
|
34 |
+
df["Paper / Repo"] = df["Paper / Repo"].apply(make_clickable_cell)
|
35 |
+
|
36 |
+
# %% app.ipynb 8
|
37 |
title = """<h1 align="center">The Large Language Models Landscape</h1>"""
|
38 |
description = """Large Language Models (LLMs) today come in a variety architectures and capabilities. This interactive landscape provides a visual overview of the most important LLMs, including their training data, size, release date, and whether they are openly accessible or not. It also includes notes on each model to provide additional context. This landscape is derived from data compiled by Dr. Alan D. Thompson at [lifearchitect.ai](https://lifearchitect.ai).
|
39 |
"""
|
40 |
|
41 |
+
|
42 |
+
# %% app.ipynb 9
|
43 |
def value_func():
|
44 |
return df
|
45 |
|
46 |
+
|
47 |
with gr.Blocks() as demo:
|
48 |
gr.Markdown(title)
|
49 |
gr.Markdown(description)
|
50 |
+
gr.components.DataFrame(value=value_func)
|
51 |
|
52 |
demo.launch()
|
53 |
+
|