lewtun HF staff commited on
Commit
06a86d3
·
1 Parent(s): ef3747e

Bump pandas / hfh

Browse files
Files changed (3) hide show
  1. Untitled.ipynb +266 -0
  2. app.py +4 -2
  3. requirements.txt +2 -2
Untitled.ipynb ADDED
@@ -0,0 +1,266 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "code",
5
+ "execution_count": 1,
6
+ "id": "ed2ddd96-57f3-452e-9d28-e44654edbb65",
7
+ "metadata": {},
8
+ "outputs": [],
9
+ "source": [
10
+ "from huggingface_hub import DatasetFilter, list_datasets, HfApi, ModelFilter, DatasetSearchArguments\n",
11
+ "from pathlib import Path\n",
12
+ "from dotenv import load_dotenv\n",
13
+ "import os\n"
14
+ ]
15
+ },
16
+ {
17
+ "cell_type": "code",
18
+ "execution_count": 2,
19
+ "id": "c45ae63c-4e02-47e3-a3e9-895a7bc2702d",
20
+ "metadata": {},
21
+ "outputs": [],
22
+ "source": [
23
+ "if Path(\".env\").is_file():\n",
24
+ " load_dotenv(\".env\")\n",
25
+ "\n",
26
+ "auth_token = os.getenv(\"HF_HUB_TOKEN\")"
27
+ ]
28
+ },
29
+ {
30
+ "cell_type": "code",
31
+ "execution_count": 7,
32
+ "id": "23e088a3-276a-45bf-9373-4dfe934b5556",
33
+ "metadata": {},
34
+ "outputs": [],
35
+ "source": [
36
+ "filt = DatasetFilter(benchmark=\"raft\")\n",
37
+ "submissions = list_datasets(filter=filt, full=True, use_auth_token=auth_token)"
38
+ ]
39
+ },
40
+ {
41
+ "cell_type": "code",
42
+ "execution_count": 8,
43
+ "id": "641c4060",
44
+ "metadata": {},
45
+ "outputs": [
46
+ {
47
+ "name": "stdout",
48
+ "output_type": "stream",
49
+ "text": [
50
+ "\u001b[0;31mSignature:\u001b[0m\n",
51
+ "\u001b[0mlist_datasets\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\u001b[0m\n",
52
+ "\u001b[0;34m\u001b[0m \u001b[0;34m*\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\n",
53
+ "\u001b[0;34m\u001b[0m \u001b[0mfilter\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mUnion\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mhuggingface_hub\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mutils\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mendpoint_helpers\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mDatasetFilter\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mstr\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mIterable\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mstr\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mNoneType\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\n",
54
+ "\u001b[0;34m\u001b[0m \u001b[0mauthor\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mUnion\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mstr\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mNoneType\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\n",
55
+ "\u001b[0;34m\u001b[0m \u001b[0msearch\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mUnion\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mstr\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mNoneType\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\n",
56
+ "\u001b[0;34m\u001b[0m \u001b[0msort\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mUnion\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mLiteral\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'lastModified'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mstr\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mNoneType\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\n",
57
+ "\u001b[0;34m\u001b[0m \u001b[0mdirection\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mUnion\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mLiteral\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m-\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mNoneType\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\n",
58
+ "\u001b[0;34m\u001b[0m \u001b[0mlimit\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mUnion\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mint\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mNoneType\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\n",
59
+ "\u001b[0;34m\u001b[0m \u001b[0mcardData\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mUnion\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mbool\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mNoneType\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\n",
60
+ "\u001b[0;34m\u001b[0m \u001b[0mfull\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mUnion\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mbool\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mNoneType\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\n",
61
+ "\u001b[0;34m\u001b[0m \u001b[0muse_auth_token\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mUnion\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mstr\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mNoneType\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\n",
62
+ "\u001b[0;34m\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m->\u001b[0m \u001b[0mList\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mhuggingface_hub\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mhf_api\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mDatasetInfo\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
63
+ "\u001b[0;31mDocstring:\u001b[0m\n",
64
+ "Get the public list of all the datasets on huggingface.co\n",
65
+ "\n",
66
+ "Args:\n",
67
+ " filter ([`DatasetFilter`] or `str` or `Iterable`, *optional*):\n",
68
+ " A string or [`DatasetFilter`] which can be used to identify\n",
69
+ " datasets on the hub.\n",
70
+ " author (`str`, *optional*):\n",
71
+ " A string which identify the author of the returned models\n",
72
+ " search (`str`, *optional*):\n",
73
+ " A string that will be contained in the returned models.\n",
74
+ " sort (`Literal[\"lastModified\"]` or `str`, *optional*):\n",
75
+ " The key with which to sort the resulting datasets. Possible\n",
76
+ " values are the properties of the [`huggingface_hub.hf_api.DatasetInfo`] class.\n",
77
+ " direction (`Literal[-1]` or `int`, *optional*):\n",
78
+ " Direction in which to sort. The value `-1` sorts by descending\n",
79
+ " order while all other values sort by ascending order.\n",
80
+ " limit (`int`, *optional*):\n",
81
+ " The limit on the number of datasets fetched. Leaving this option\n",
82
+ " to `None` fetches all datasets.\n",
83
+ " cardData (`bool`, *optional*):\n",
84
+ " Whether to grab the metadata for the dataset as well. Can\n",
85
+ " contain useful information such as the PapersWithCode ID.\n",
86
+ " full (`bool`, *optional*):\n",
87
+ " Whether to fetch all dataset data, including the `lastModified`\n",
88
+ " and the `cardData`.\n",
89
+ " use_auth_token (`bool` or `str`, *optional*):\n",
90
+ " Whether to use the `auth_token` provided from the\n",
91
+ " `huggingface_hub` cli. If not logged in, a valid `auth_token`\n",
92
+ " can be passed in as a string.\n",
93
+ "\n",
94
+ "Example usage with the `filter` argument:\n",
95
+ "\n",
96
+ "```python\n",
97
+ ">>> from huggingface_hub import HfApi\n",
98
+ "\n",
99
+ ">>> api = HfApi()\n",
100
+ "\n",
101
+ ">>> # List all datasets\n",
102
+ ">>> api.list_datasets()\n",
103
+ "\n",
104
+ ">>> # Get all valid search arguments\n",
105
+ ">>> args = DatasetSearchArguments()\n",
106
+ "\n",
107
+ ">>> # List only the text classification datasets\n",
108
+ ">>> api.list_datasets(filter=\"task_categories:text-classification\")\n",
109
+ ">>> # Using the `DatasetFilter`\n",
110
+ ">>> filt = DatasetFilter(task_categories=\"text-classification\")\n",
111
+ ">>> # With `DatasetSearchArguments`\n",
112
+ ">>> filt = DatasetFilter(task=args.task_categories.text_classification)\n",
113
+ ">>> api.list_models(filter=filt)\n",
114
+ "\n",
115
+ ">>> # List only the datasets in russian for language modeling\n",
116
+ ">>> api.list_datasets(\n",
117
+ "... filter=(\"languages:ru\", \"task_ids:language-modeling\")\n",
118
+ "... )\n",
119
+ ">>> # Using the `DatasetFilter`\n",
120
+ ">>> filt = DatasetFilter(languages=\"ru\", task_ids=\"language-modeling\")\n",
121
+ ">>> # With `DatasetSearchArguments`\n",
122
+ ">>> filt = DatasetFilter(\n",
123
+ "... languages=args.languages.ru,\n",
124
+ "... task_ids=args.task_ids.language_modeling,\n",
125
+ "... )\n",
126
+ ">>> api.list_datasets(filter=filt)\n",
127
+ "```\n",
128
+ "\n",
129
+ "Example usage with the `search` argument:\n",
130
+ "\n",
131
+ "```python\n",
132
+ ">>> from huggingface_hub import HfApi\n",
133
+ "\n",
134
+ ">>> api = HfApi()\n",
135
+ "\n",
136
+ ">>> # List all datasets with \"text\" in their name\n",
137
+ ">>> api.list_datasets(search=\"text\")\n",
138
+ "\n",
139
+ ">>> # List all datasets with \"text\" in their name made by google\n",
140
+ ">>> api.list_datasets(search=\"text\", author=\"google\")\n",
141
+ "```\n",
142
+ "\u001b[0;31mFile:\u001b[0m ~/miniconda3/envs/raft-leaderboard/lib/python3.8/site-packages/huggingface_hub/hf_api.py\n",
143
+ "\u001b[0;31mType:\u001b[0m method\n"
144
+ ]
145
+ }
146
+ ],
147
+ "source": [
148
+ "?list_datasets"
149
+ ]
150
+ },
151
+ {
152
+ "cell_type": "code",
153
+ "execution_count": 9,
154
+ "id": "228750aa-6d92-4d26-971f-5248e056f54b",
155
+ "metadata": {},
156
+ "outputs": [
157
+ {
158
+ "data": {
159
+ "text/plain": [
160
+ "5"
161
+ ]
162
+ },
163
+ "execution_count": 9,
164
+ "metadata": {},
165
+ "output_type": "execute_result"
166
+ }
167
+ ],
168
+ "source": [
169
+ "len(submissions)"
170
+ ]
171
+ },
172
+ {
173
+ "cell_type": "code",
174
+ "execution_count": 11,
175
+ "id": "6dc34fa3-be44-4170-8daf-39f87aae5b34",
176
+ "metadata": {},
177
+ "outputs": [
178
+ {
179
+ "name": "stdout",
180
+ "output_type": "stream",
181
+ "text": [
182
+ "benchmark\n",
183
+ "type\n",
184
+ "submission_name\n"
185
+ ]
186
+ }
187
+ ],
188
+ "source": [
189
+ "for k,v in submissions[3].cardData.items():\n",
190
+ " print(k)"
191
+ ]
192
+ },
193
+ {
194
+ "cell_type": "code",
195
+ "execution_count": 12,
196
+ "id": "f4dd2dbc",
197
+ "metadata": {},
198
+ "outputs": [
199
+ {
200
+ "data": {
201
+ "text/plain": [
202
+ "DatasetInfo: {\n",
203
+ "\tid: moshew/my_raft\n",
204
+ "\tsha: 534086adc3aec801687316b3fe162e4231ab0a6b\n",
205
+ "\tlastModified: 2022-07-16T17:01:04.000Z\n",
206
+ "\ttags: ['benchmark:raft']\n",
207
+ "\tprivate: False\n",
208
+ "\tauthor: moshew\n",
209
+ "\tdescription: \n",
210
+ "\tcitation: @InProceedings{huggingface:dataset,\n",
211
+ "title = {A great new dataset},\n",
212
+ "author={huggingface, Inc.\n",
213
+ "},\n",
214
+ "year={2020}\n",
215
+ "}\n",
216
+ "\tcardData: {'benchmark': 'raft', 'type': 'prediction', 'submission_name': 'SetFit300'}\n",
217
+ "\tsiblings: None\n",
218
+ "\t_id: 621ffdd236468d709f183ac3\n",
219
+ "\tdisabled: False\n",
220
+ "\tgated: auto\n",
221
+ "\tgitalyUid: 0d29a8b3b8364fb2d86b3ad56d62ea4aaf13a5cf95884aa0381b966d79b045e1\n",
222
+ "\tlikes: 0\n",
223
+ "\tdownloads: 0\n",
224
+ "}"
225
+ ]
226
+ },
227
+ "execution_count": 12,
228
+ "metadata": {},
229
+ "output_type": "execute_result"
230
+ }
231
+ ],
232
+ "source": [
233
+ "submissions[0]"
234
+ ]
235
+ },
236
+ {
237
+ "cell_type": "code",
238
+ "execution_count": null,
239
+ "id": "27079f0b",
240
+ "metadata": {},
241
+ "outputs": [],
242
+ "source": []
243
+ }
244
+ ],
245
+ "metadata": {
246
+ "kernelspec": {
247
+ "display_name": "raft-leaderboard",
248
+ "language": "python",
249
+ "name": "python3"
250
+ },
251
+ "language_info": {
252
+ "codemirror_mode": {
253
+ "name": "ipython",
254
+ "version": 3
255
+ },
256
+ "file_extension": ".py",
257
+ "mimetype": "text/x-python",
258
+ "name": "python",
259
+ "nbconvert_exporter": "python",
260
+ "pygments_lexer": "ipython3",
261
+ "version": "3.8.15"
262
+ }
263
+ },
264
+ "nbformat": 4,
265
+ "nbformat_minor": 5
266
+ }
app.py CHANGED
@@ -21,7 +21,7 @@ FORMATTED_TASK_NAMES = sorted([" ".join(t.capitalize() for t in task.split("_"))
21
 
22
  def download_submissions():
23
  filt = DatasetFilter(benchmark="raft")
24
- all_submissions = list_datasets(filter=filt, cardData=True, use_auth_token=auth_token)
25
  submissions = []
26
 
27
  for dataset in all_submissions:
@@ -97,7 +97,9 @@ To submit to RAFT, follow the instruction posted on [this page](https://huggingf
97
  submissions = download_submissions()
98
  print(f"INFO - downloaded {len(submissions)} submissions")
99
  df = format_submissions(submissions)
100
- styler = df.style.set_precision(3).set_properties(**{"white-space": "pre-wrap", "text-align": "center"})
 
 
101
  # hack to remove index column: https://discuss.streamlit.io/t/questions-on-st-table/6878/3
102
  st.markdown(
103
  """
 
21
 
22
  def download_submissions():
23
  filt = DatasetFilter(benchmark="raft")
24
+ all_submissions = list_datasets(filter=filt, full=True, use_auth_token=auth_token)
25
  submissions = []
26
 
27
  for dataset in all_submissions:
 
97
  submissions = download_submissions()
98
  print(f"INFO - downloaded {len(submissions)} submissions")
99
  df = format_submissions(submissions)
100
+ styler = pd.io.formats.style.Styler(df, precision=3).set_properties(
101
+ **{"white-space": "pre-wrap", "text-align": "center"}
102
+ )
103
  # hack to remove index column: https://discuss.streamlit.io/t/questions-on-st-table/6878/3
104
  st.markdown(
105
  """
requirements.txt CHANGED
@@ -1,6 +1,6 @@
1
- pandas<=1.4
2
  python-dotenv
3
  protobuf~=3.19.0
4
- huggingface-hub==0.9.1
5
  datasets==2.8.0
6
  altair<5
 
1
+ pandas==2.0.3
2
  python-dotenv
3
  protobuf~=3.19.0
4
+ huggingface-hub==0.18.0
5
  datasets==2.8.0
6
  altair<5