File size: 32,577 Bytes
c30b770
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "\n",
    "import os\n",
    "import shutil\n",
    "import logging\n",
    "import pretty_errors\n",
    "\n",
    "import huggingface_hub\n",
    "from datasets import Dataset\n",
    "from datasets import load_dataset\n",
    "\n",
    "# Set up the logger\n",
    "logger = logging.getLogger('basic_logger')\n",
    "logger.setLevel(logging.INFO)\n",
    "\n",
    "# Set up the console handler with a simple format\n",
    "console_handler = logging.StreamHandler()\n",
    "console_handler.setLevel(logging.INFO)\n",
    "formatter = logging.Formatter(\n",
    "    '%Y-%m-%d %H:%M:%S - %(name)s - %(levelname)s - %(message)s'\n",
    ")\n",
    "console_handler.setFormatter(formatter)\n",
    "logger.addHandler(console_handler)\n",
    "\n",
    "DS_NAME = \"amaye15/object-segmentation\"\n",
    "\n",
    "DATA_DIR = \"data\"\n",
    "p = os.path.join(os.getcwd(), DATA_DIR)\n",
    "\n",
    "if os.path.exists(p):\n",
    "    shutil.rmtree(p)\n",
    "\n",
    "\n",
    "os.mkdir(p)\n",
    "\n",
    "def get_data():\n",
    "    ds = load_dataset(DS_NAME, cache_dir=p, streaming=True)\n",
    "    for row in ds[\"train\"]:\n",
    "        yield row\n",
    "\n",
    "#ds_processed = Dataset.from_generator(get_data)\n",
    "# ds_processed.push_to_hub(\"amaye15/tmp\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {},
   "outputs": [],
   "source": [
    "from huggingface_hub import scan_cache_dir\n",
    "\n",
    "repo_info = scan_cache_dir().repos\n",
    "\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "from "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {},
   "outputs": [],
   "source": [
    "from huggingface_hub import HfApi\n",
    "\n",
    "api = HfApi()\n",
    "\n",
    "# Get the list of revisions for the dataset\n",
    "revisions = api.list_repo_refs(repo_id=DS_NAME, repo_type=\"dataset\")\n",
    "\n",
    "# Check the latest commit\n",
    "# latest_commit = revisions[-1].commit_id\n",
    "# print(f\"Latest commit ID: {latest_commit}\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "GitRefs(branches=[GitRefInfo(name='main', ref='refs/heads/main', target_commit='962a9a67307296a7abc7e94c2811c450970b80df')], converts=[GitRefInfo(name='duckdb', ref='refs/convert/duckdb', target_commit='72baa589701a6cbea2b7497931c7adf1daf42121'), GitRefInfo(name='parquet', ref='refs/convert/parquet', target_commit='c209a987d23de50a04ec9766e04dde2e4db7f5fb')], tags=[], pull_requests=None)"
      ]
     },
     "execution_count": 20,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "revisions"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "frozenset({CachedRepoInfo(repo_id='amaye15/DaViT', repo_type='model', repo_path=PosixPath('/Users/andrewmayes/.cache/huggingface/hub/models--amaye15--DaViT'), size_on_disk=1677, nb_files=1, revisions=frozenset({CachedRevisionInfo(commit_hash='a96d58f5ca3d0b138d8efe7618a860b10f8d986b', snapshot_path=PosixPath('/Users/andrewmayes/.cache/huggingface/hub/models--amaye15--DaViT/snapshots/a96d58f5ca3d0b138d8efe7618a860b10f8d986b'), size_on_disk=1677, files=frozenset({CachedFileInfo(file_name='README.md', file_path=PosixPath('/Users/andrewmayes/.cache/huggingface/hub/models--amaye15--DaViT/snapshots/a96d58f5ca3d0b138d8efe7618a860b10f8d986b/README.md'), blob_path=PosixPath('/Users/andrewmayes/.cache/huggingface/hub/models--amaye15--DaViT/blobs/c928ad7cd8a9f9e48fc14780b84e5dd2ad6b1606'), size_on_disk=1677, blob_last_accessed=1722324257.4824574, blob_last_modified=1722324257.396636)}), refs=frozenset({'main'}), last_modified=1722324257.396636)}), last_accessed=1722324257.4824574, last_modified=1722324257.396636),\n",
       "           CachedRepoInfo(repo_id='amaye15/DaViT-Florence-2-large-ft', repo_type='model', repo_path=PosixPath('/Users/andrewmayes/.cache/huggingface/hub/models--amaye15--DaViT-Florence-2-large-ft'), size_on_disk=1834, nb_files=1, revisions=frozenset({CachedRevisionInfo(commit_hash='4cc7068026aaeb388ba2b0826abae30d670de3fc', snapshot_path=PosixPath('/Users/andrewmayes/.cache/huggingface/hub/models--amaye15--DaViT-Florence-2-large-ft/snapshots/4cc7068026aaeb388ba2b0826abae30d670de3fc'), size_on_disk=1834, files=frozenset({CachedFileInfo(file_name='README.md', file_path=PosixPath('/Users/andrewmayes/.cache/huggingface/hub/models--amaye15--DaViT-Florence-2-large-ft/snapshots/4cc7068026aaeb388ba2b0826abae30d670de3fc/README.md'), blob_path=PosixPath('/Users/andrewmayes/.cache/huggingface/hub/models--amaye15--DaViT-Florence-2-large-ft/blobs/ab9f4c4537bc89f3a4cb187db5d771be9242f09f'), size_on_disk=1834, blob_last_accessed=1722405977.6422648, blob_last_modified=1722405977.6310754)}), refs=frozenset({'main'}), last_modified=1722405977.6310754)}), last_accessed=1722405977.6422648, last_modified=1722405977.6310754),\n",
       "           CachedRepoInfo(repo_id='amaye15/NSFW', repo_type='dataset', repo_path=PosixPath('/Users/andrewmayes/.cache/huggingface/hub/datasets--amaye15--NSFW'), size_on_disk=1240, nb_files=2, revisions=frozenset({CachedRevisionInfo(commit_hash='c76b1c300fb672189feb59f8faa1027b2d6956b3', snapshot_path=PosixPath('/Users/andrewmayes/.cache/huggingface/hub/datasets--amaye15--NSFW/snapshots/c76b1c300fb672189feb59f8faa1027b2d6956b3'), size_on_disk=619, files=frozenset({CachedFileInfo(file_name='README.md', file_path=PosixPath('/Users/andrewmayes/.cache/huggingface/hub/datasets--amaye15--NSFW/snapshots/c76b1c300fb672189feb59f8faa1027b2d6956b3/README.md'), blob_path=PosixPath('/Users/andrewmayes/.cache/huggingface/hub/datasets--amaye15--NSFW/blobs/dc8bcda261a57d5275af975f1411afdadc094009'), size_on_disk=619, blob_last_accessed=1722723952.58199, blob_last_modified=1722723952.5701885)}), refs=frozenset(), last_modified=1722723952.5701885), CachedRevisionInfo(commit_hash='b5cfb52e5a260983c6e6f70c7b21574efce998b1', snapshot_path=PosixPath('/Users/andrewmayes/.cache/huggingface/hub/datasets--amaye15--NSFW/snapshots/b5cfb52e5a260983c6e6f70c7b21574efce998b1'), size_on_disk=621, files=frozenset({CachedFileInfo(file_name='README.md', file_path=PosixPath('/Users/andrewmayes/.cache/huggingface/hub/datasets--amaye15--NSFW/snapshots/b5cfb52e5a260983c6e6f70c7b21574efce998b1/README.md'), blob_path=PosixPath('/Users/andrewmayes/.cache/huggingface/hub/datasets--amaye15--NSFW/blobs/5924f8b7af709a9f080759cac11ea6f1c976df5d'), size_on_disk=621, blob_last_accessed=1722768256.9392703, blob_last_modified=1722768256.9274719)}), refs=frozenset({'main'}), last_modified=1722768256.9274719)}), last_accessed=1722768256.9392703, last_modified=1722768256.9274719),\n",
       "           CachedRepoInfo(repo_id='amaye15/Products-10k', repo_type='dataset', repo_path=PosixPath('/Users/andrewmayes/.cache/huggingface/hub/datasets--amaye15--Products-10k'), size_on_disk=620, nb_files=1, revisions=frozenset({CachedRevisionInfo(commit_hash='05b2a7a7513a04c95c8fd8c4fb925cd9bc03397c', snapshot_path=PosixPath('/Users/andrewmayes/.cache/huggingface/hub/datasets--amaye15--Products-10k/snapshots/05b2a7a7513a04c95c8fd8c4fb925cd9bc03397c'), size_on_disk=620, files=frozenset({CachedFileInfo(file_name='README.md', file_path=PosixPath('/Users/andrewmayes/.cache/huggingface/hub/datasets--amaye15--Products-10k/snapshots/05b2a7a7513a04c95c8fd8c4fb925cd9bc03397c/README.md'), blob_path=PosixPath('/Users/andrewmayes/.cache/huggingface/hub/datasets--amaye15--Products-10k/blobs/a71affed5a9687aeabd33f9aa94c9cde049eb533'), size_on_disk=620, blob_last_accessed=1723091983.5595, blob_last_modified=1723091983.547405)}), refs=frozenset({'main'}), last_modified=1723091983.547405)}), last_accessed=1723091983.5595, last_modified=1723091983.547405),\n",
       "           CachedRepoInfo(repo_id='amaye15/SwinV2-Base-Document-Classifier', repo_type='model', repo_path=PosixPath('/Users/andrewmayes/.cache/huggingface/hub/models--amaye15--SwinV2-Base-Document-Classifier'), size_on_disk=590, nb_files=1, revisions=frozenset({CachedRevisionInfo(commit_hash='b0968577b56aec082d7cde1d2b04f68173b8e674', snapshot_path=PosixPath('/Users/andrewmayes/.cache/huggingface/hub/models--amaye15--SwinV2-Base-Document-Classifier/snapshots/b0968577b56aec082d7cde1d2b04f68173b8e674'), size_on_disk=590, files=frozenset({CachedFileInfo(file_name='preprocessor_config.json', file_path=PosixPath('/Users/andrewmayes/.cache/huggingface/hub/models--amaye15--SwinV2-Base-Document-Classifier/snapshots/b0968577b56aec082d7cde1d2b04f68173b8e674/preprocessor_config.json'), blob_path=PosixPath('/Users/andrewmayes/.cache/huggingface/hub/models--amaye15--SwinV2-Base-Document-Classifier/blobs/86614921b04ad5b6e3d4ee5448f11efe6cc67917'), size_on_disk=590, blob_last_accessed=1722750225.5857947, blob_last_modified=1722750225.574735)}), refs=frozenset({'main'}), last_modified=1722750225.574735)}), last_accessed=1722750225.5857947, last_modified=1722750225.574735),\n",
       "           CachedRepoInfo(repo_id='amaye15/invoices', repo_type='dataset', repo_path=PosixPath('/Users/andrewmayes/.cache/huggingface/hub/datasets--amaye15--invoices'), size_on_disk=618, nb_files=1, revisions=frozenset({CachedRevisionInfo(commit_hash='f4e8d7dda1472da87125237182dc9f4d5fd860dc', snapshot_path=PosixPath('/Users/andrewmayes/.cache/huggingface/hub/datasets--amaye15--invoices/snapshots/f4e8d7dda1472da87125237182dc9f4d5fd860dc'), size_on_disk=618, files=frozenset({CachedFileInfo(file_name='README.md', file_path=PosixPath('/Users/andrewmayes/.cache/huggingface/hub/datasets--amaye15--invoices/snapshots/f4e8d7dda1472da87125237182dc9f4d5fd860dc/README.md'), blob_path=PosixPath('/Users/andrewmayes/.cache/huggingface/hub/datasets--amaye15--invoices/blobs/10d6a19135e958a4856ebfbd82b130f571667b26'), size_on_disk=618, blob_last_accessed=1723087468.3128088, blob_last_modified=1723087468.3009398)}), refs=frozenset({'main'}), last_modified=1723087468.3009398)}), last_accessed=1723087468.3128088, last_modified=1723087468.3009398),\n",
       "           CachedRepoInfo(repo_id='amaye15/receipts', repo_type='dataset', repo_path=PosixPath('/Users/andrewmayes/.cache/huggingface/hub/datasets--amaye15--receipts'), size_on_disk=617, nb_files=1, revisions=frozenset({CachedRevisionInfo(commit_hash='7eaf60e64883eee2a744c1e00658967e0b61aab3', snapshot_path=PosixPath('/Users/andrewmayes/.cache/huggingface/hub/datasets--amaye15--receipts/snapshots/7eaf60e64883eee2a744c1e00658967e0b61aab3'), size_on_disk=617, files=frozenset({CachedFileInfo(file_name='README.md', file_path=PosixPath('/Users/andrewmayes/.cache/huggingface/hub/datasets--amaye15--receipts/snapshots/7eaf60e64883eee2a744c1e00658967e0b61aab3/README.md'), blob_path=PosixPath('/Users/andrewmayes/.cache/huggingface/hub/datasets--amaye15--receipts/blobs/22cf712cf7551f2d2df0e6d87358a104fa485122'), size_on_disk=617, blob_last_accessed=1723085450.105201, blob_last_modified=1723085450.0932333)}), refs=frozenset({'main'}), last_modified=1723085450.0932333)}), last_accessed=1723085450.105201, last_modified=1723085450.0932333),\n",
       "           CachedRepoInfo(repo_id='amaye15/tmp', repo_type='dataset', repo_path=PosixPath('/Users/andrewmayes/.cache/huggingface/hub/datasets--amaye15--tmp'), size_on_disk=372, nb_files=1, revisions=frozenset({CachedRevisionInfo(commit_hash='b593656ae71cef84e90be18cf6bb29cdc74fd7ff', snapshot_path=PosixPath('/Users/andrewmayes/.cache/huggingface/hub/datasets--amaye15--tmp/snapshots/b593656ae71cef84e90be18cf6bb29cdc74fd7ff'), size_on_disk=372, files=frozenset({CachedFileInfo(file_name='README.md', file_path=PosixPath('/Users/andrewmayes/.cache/huggingface/hub/datasets--amaye15--tmp/snapshots/b593656ae71cef84e90be18cf6bb29cdc74fd7ff/README.md'), blob_path=PosixPath('/Users/andrewmayes/.cache/huggingface/hub/datasets--amaye15--tmp/blobs/c274c17b952e2eba2a83b9255b334db02fd75125'), size_on_disk=372, blob_last_accessed=1724597074.5835145, blob_last_modified=1724597074.5719097)}), refs=frozenset({'main'}), last_modified=1724597074.5719097)}), last_accessed=1724597074.5835145, last_modified=1724597074.5719097),\n",
       "           CachedRepoInfo(repo_id='caidas/swin2SR-realworld-sr-x4-64-bsrgan-psnr', repo_type='model', repo_path=PosixPath('/Users/andrewmayes/.cache/huggingface/hub/models--caidas--swin2SR-realworld-sr-x4-64-bsrgan-psnr'), size_on_disk=48461065, nb_files=3, revisions=frozenset({CachedRevisionInfo(commit_hash='bb13f02e45e88d00b6c202b3fbe6a181af144606', snapshot_path=PosixPath('/Users/andrewmayes/.cache/huggingface/hub/models--caidas--swin2SR-realworld-sr-x4-64-bsrgan-psnr/snapshots/bb13f02e45e88d00b6c202b3fbe6a181af144606'), size_on_disk=48461065, files=frozenset({CachedFileInfo(file_name='config.json', file_path=PosixPath('/Users/andrewmayes/.cache/huggingface/hub/models--caidas--swin2SR-realworld-sr-x4-64-bsrgan-psnr/snapshots/bb13f02e45e88d00b6c202b3fbe6a181af144606/config.json'), blob_path=PosixPath('/Users/andrewmayes/.cache/huggingface/hub/models--caidas--swin2SR-realworld-sr-x4-64-bsrgan-psnr/blobs/0a15b8aeffe63d67948215a81d191fd8190f16be'), size_on_disk=772, blob_last_accessed=1722954840.0557656, blob_last_modified=1722954840.043787), CachedFileInfo(file_name='preprocessor_config.json', file_path=PosixPath('/Users/andrewmayes/.cache/huggingface/hub/models--caidas--swin2SR-realworld-sr-x4-64-bsrgan-psnr/snapshots/bb13f02e45e88d00b6c202b3fbe6a181af144606/preprocessor_config.json'), blob_path=PosixPath('/Users/andrewmayes/.cache/huggingface/hub/models--caidas--swin2SR-realworld-sr-x4-64-bsrgan-psnr/blobs/539dbfb6265f0ece81a881579565e88b90668fc4'), size_on_disk=152, blob_last_accessed=1722954839.8127632, blob_last_modified=1722954839.8014247), CachedFileInfo(file_name='pytorch_model.bin', file_path=PosixPath('/Users/andrewmayes/.cache/huggingface/hub/models--caidas--swin2SR-realworld-sr-x4-64-bsrgan-psnr/snapshots/bb13f02e45e88d00b6c202b3fbe6a181af144606/pytorch_model.bin'), blob_path=PosixPath('/Users/andrewmayes/.cache/huggingface/hub/models--caidas--swin2SR-realworld-sr-x4-64-bsrgan-psnr/blobs/4a5f52a20932085557ed115f87c0ee8385e12f2719108c0dfd38c64aedea4710'), size_on_disk=48460141, blob_last_accessed=1722954848.1445184, blob_last_modified=1722954848.0298514)}), refs=frozenset({'main'}), last_modified=1722954848.0298514)}), last_accessed=1722954848.1445184, last_modified=1722954848.0298514),\n",
       "           CachedRepoInfo(repo_id='facebook/bart-large', repo_type='model', repo_path=PosixPath('/Users/andrewmayes/.cache/huggingface/hub/models--facebook--bart-large'), size_on_disk=1628, nb_files=1, revisions=frozenset({CachedRevisionInfo(commit_hash='cb48c1365bd826bd521f650dc2e0940aee54720c', snapshot_path=PosixPath('/Users/andrewmayes/.cache/huggingface/hub/models--facebook--bart-large/snapshots/cb48c1365bd826bd521f650dc2e0940aee54720c'), size_on_disk=1628, files=frozenset({CachedFileInfo(file_name='config.json', file_path=PosixPath('/Users/andrewmayes/.cache/huggingface/hub/models--facebook--bart-large/snapshots/cb48c1365bd826bd521f650dc2e0940aee54720c/config.json'), blob_path=PosixPath('/Users/andrewmayes/.cache/huggingface/hub/models--facebook--bart-large/blobs/79568cb2491a1a4da49f32fb723018158c222712'), size_on_disk=1628, blob_last_accessed=1722754758.8173473, blob_last_modified=1722754758.8058388)}), refs=frozenset({'main'}), last_modified=1722754758.8058388)}), last_accessed=1722754758.8173473, last_modified=1722754758.8058388),\n",
       "           CachedRepoInfo(repo_id='facebook/sam2-hiera-base-plus', repo_type='model', repo_path=PosixPath('/Users/andrewmayes/.cache/huggingface/hub/models--facebook--sam2-hiera-base-plus'), size_on_disk=323493298, nb_files=1, revisions=frozenset({CachedRevisionInfo(commit_hash='9bcec0ee2dcc1b6ae4b1674e2ed51ec71d2d31d9', snapshot_path=PosixPath('/Users/andrewmayes/.cache/huggingface/hub/models--facebook--sam2-hiera-base-plus/snapshots/9bcec0ee2dcc1b6ae4b1674e2ed51ec71d2d31d9'), size_on_disk=323493298, files=frozenset({CachedFileInfo(file_name='sam2_hiera_base_plus.pt', file_path=PosixPath('/Users/andrewmayes/.cache/huggingface/hub/models--facebook--sam2-hiera-base-plus/snapshots/9bcec0ee2dcc1b6ae4b1674e2ed51ec71d2d31d9/sam2_hiera_base_plus.pt'), blob_path=PosixPath('/Users/andrewmayes/.cache/huggingface/hub/models--facebook--sam2-hiera-base-plus/blobs/d0bb7f236400a49669ffdd1be617959a8b1d1065081789d7bbff88eded3a8071'), size_on_disk=323493298, blob_last_accessed=1723985664.6263692, blob_last_modified=1723985638.2220697)}), refs=frozenset({'main'}), last_modified=1723985638.2220697)}), last_accessed=1723985664.6263692, last_modified=1723985638.2220697),\n",
       "           CachedRepoInfo(repo_id='facebook/sam2-hiera-large', repo_type='model', repo_path=PosixPath('/Users/andrewmayes/.cache/huggingface/hub/models--facebook--sam2-hiera-large'), size_on_disk=897952466, nb_files=1, revisions=frozenset({CachedRevisionInfo(commit_hash='eba9be237c463eb950e64b65c223ad55c878c2ac', snapshot_path=PosixPath('/Users/andrewmayes/.cache/huggingface/hub/models--facebook--sam2-hiera-large/snapshots/eba9be237c463eb950e64b65c223ad55c878c2ac'), size_on_disk=897952466, files=frozenset({CachedFileInfo(file_name='sam2_hiera_large.pt', file_path=PosixPath('/Users/andrewmayes/.cache/huggingface/hub/models--facebook--sam2-hiera-large/snapshots/eba9be237c463eb950e64b65c223ad55c878c2ac/sam2_hiera_large.pt'), blob_path=PosixPath('/Users/andrewmayes/.cache/huggingface/hub/models--facebook--sam2-hiera-large/blobs/7442e4e9b732a508f80e141e7c2913437a3610ee0c77381a66658c3a445df87b'), size_on_disk=897952466, blob_last_accessed=1723985746.4751956, blob_last_modified=1723985745.5689125)}), refs=frozenset({'main'}), last_modified=1723985745.5689125)}), last_accessed=1723985746.4751956, last_modified=1723985745.5689125),\n",
       "           CachedRepoInfo(repo_id='microsoft/Florence-2-large-ft', repo_type='model', repo_path=PosixPath('/Users/andrewmayes/.cache/huggingface/hub/models--microsoft--Florence-2-large-ft'), size_on_disk=2647748, nb_files=8, revisions=frozenset({CachedRevisionInfo(commit_hash='bb44b80c15e943b1bf7cec6e076359cec6e40178', snapshot_path=PosixPath('/Users/andrewmayes/.cache/huggingface/hub/models--microsoft--Florence-2-large-ft/snapshots/bb44b80c15e943b1bf7cec6e076359cec6e40178'), size_on_disk=2647748, files=frozenset({CachedFileInfo(file_name='processing_florence2.py', file_path=PosixPath('/Users/andrewmayes/.cache/huggingface/hub/models--microsoft--Florence-2-large-ft/snapshots/bb44b80c15e943b1bf7cec6e076359cec6e40178/processing_florence2.py'), blob_path=PosixPath('/Users/andrewmayes/.cache/huggingface/hub/models--microsoft--Florence-2-large-ft/blobs/538110e8fd421258847d317cb62c40b9671d07a9'), size_on_disk=46372, blob_last_accessed=1722187335.8618798, blob_last_modified=1722187335.854595), CachedFileInfo(file_name='tokenizer_config.json', file_path=PosixPath('/Users/andrewmayes/.cache/huggingface/hub/models--microsoft--Florence-2-large-ft/snapshots/bb44b80c15e943b1bf7cec6e076359cec6e40178/tokenizer_config.json'), blob_path=PosixPath('/Users/andrewmayes/.cache/huggingface/hub/models--microsoft--Florence-2-large-ft/blobs/44784bc58d4cb18d3549ad71e062efcf032d9ef5'), size_on_disk=34, blob_last_accessed=1722187335.5466971, blob_last_modified=1722187334.7324762), CachedFileInfo(file_name='config.json', file_path=PosixPath('/Users/andrewmayes/.cache/huggingface/hub/models--microsoft--Florence-2-large-ft/snapshots/bb44b80c15e943b1bf7cec6e076359cec6e40178/config.json'), blob_path=PosixPath('/Users/andrewmayes/.cache/huggingface/hub/models--microsoft--Florence-2-large-ft/blobs/bff93d862796105c8cf1a0b3331ad3bec68aee91'), size_on_disk=2445, blob_last_accessed=1722186181.1469133, blob_last_modified=1722186180.799109), CachedFileInfo(file_name='vocab.json', file_path=PosixPath('/Users/andrewmayes/.cache/huggingface/hub/models--microsoft--Florence-2-large-ft/snapshots/bb44b80c15e943b1bf7cec6e076359cec6e40178/vocab.json'), blob_path=PosixPath('/Users/andrewmayes/.cache/huggingface/hub/models--microsoft--Florence-2-large-ft/blobs/94a2f4fd50e976bda926c700291522ea1a79323f'), size_on_disk=1099884, blob_last_accessed=1722187336.2418828, blob_last_modified=1722187336.7108266), CachedFileInfo(file_name='modeling_florence2.py', file_path=PosixPath('/Users/andrewmayes/.cache/huggingface/hub/models--microsoft--Florence-2-large-ft/snapshots/bb44b80c15e943b1bf7cec6e076359cec6e40178/modeling_florence2.py'), blob_path=PosixPath('/Users/andrewmayes/.cache/huggingface/hub/models--microsoft--Florence-2-large-ft/blobs/e5ee65134d1a5e98357f8d500c9b9af5f8c00a08'), size_on_disk=127219, blob_last_accessed=1722225017.2661808, blob_last_modified=1722225017.1880703), CachedFileInfo(file_name='configuration_florence2.py', file_path=PosixPath('/Users/andrewmayes/.cache/huggingface/hub/models--microsoft--Florence-2-large-ft/snapshots/bb44b80c15e943b1bf7cec6e076359cec6e40178/configuration_florence2.py'), blob_path=PosixPath('/Users/andrewmayes/.cache/huggingface/hub/models--microsoft--Florence-2-large-ft/blobs/622f74997c5612ff68d0e55063714f291d159166'), size_on_disk=15125, blob_last_accessed=1722187334.9981484, blob_last_modified=1722187334.9932766), CachedFileInfo(file_name='tokenizer.json', file_path=PosixPath('/Users/andrewmayes/.cache/huggingface/hub/models--microsoft--Florence-2-large-ft/snapshots/bb44b80c15e943b1bf7cec6e076359cec6e40178/tokenizer.json'), blob_path=PosixPath('/Users/andrewmayes/.cache/huggingface/hub/models--microsoft--Florence-2-large-ft/blobs/ad0bcbeb288f0d1373d88e0762e66357f55b8311'), size_on_disk=1355863, blob_last_accessed=1722187337.8523662, blob_last_modified=1722187337.4607415), CachedFileInfo(file_name='preprocessor_config.json', file_path=PosixPath('/Users/andrewmayes/.cache/huggingface/hub/models--microsoft--Florence-2-large-ft/snapshots/bb44b80c15e943b1bf7cec6e076359cec6e40178/preprocessor_config.json'), blob_path=PosixPath('/Users/andrewmayes/.cache/huggingface/hub/models--microsoft--Florence-2-large-ft/blobs/85cd7be3568df661ad536b6ab20d59b08ba079ae'), size_on_disk=806, blob_last_accessed=1722187335.9961612, blob_last_modified=1722187335.4969347)}), refs=frozenset({'main'}), last_modified=1722225017.1880703)}), last_accessed=1722225017.2661808, last_modified=1722225017.1880703),\n",
       "           CachedRepoInfo(repo_id='microsoft/swinv2-base-patch4-window16-256', repo_type='model', repo_path=PosixPath('/Users/andrewmayes/.cache/huggingface/hub/models--microsoft--swinv2-base-patch4-window16-256'), size_on_disk=351904021, nb_files=3, revisions=frozenset({CachedRevisionInfo(commit_hash='628b75ababc4dad9f5bbabc1bf8bb612c4ab2f78', snapshot_path=PosixPath('/Users/andrewmayes/.cache/huggingface/hub/models--microsoft--swinv2-base-patch4-window16-256/snapshots/628b75ababc4dad9f5bbabc1bf8bb612c4ab2f78'), size_on_disk=351904021, files=frozenset({CachedFileInfo(file_name='config.json', file_path=PosixPath('/Users/andrewmayes/.cache/huggingface/hub/models--microsoft--swinv2-base-patch4-window16-256/snapshots/628b75ababc4dad9f5bbabc1bf8bb612c4ab2f78/config.json'), blob_path=PosixPath('/Users/andrewmayes/.cache/huggingface/hub/models--microsoft--swinv2-base-patch4-window16-256/blobs/9f6070ac05bf6f561f789e8699a4dc387df58724'), size_on_disk=69910, blob_last_accessed=1722848474.6562126, blob_last_modified=1722848474.6428308), CachedFileInfo(file_name='preprocessor_config.json', file_path=PosixPath('/Users/andrewmayes/.cache/huggingface/hub/models--microsoft--swinv2-base-patch4-window16-256/snapshots/628b75ababc4dad9f5bbabc1bf8bb612c4ab2f78/preprocessor_config.json'), blob_path=PosixPath('/Users/andrewmayes/.cache/huggingface/hub/models--microsoft--swinv2-base-patch4-window16-256/blobs/fb816e3190d8ed24279c9975f45efeb660493c61'), size_on_disk=240, blob_last_accessed=1722847982.5025482, blob_last_modified=1722847982.4988532), CachedFileInfo(file_name='pytorch_model.bin', file_path=PosixPath('/Users/andrewmayes/.cache/huggingface/hub/models--microsoft--swinv2-base-patch4-window16-256/snapshots/628b75ababc4dad9f5bbabc1bf8bb612c4ab2f78/pytorch_model.bin'), blob_path=PosixPath('/Users/andrewmayes/.cache/huggingface/hub/models--microsoft--swinv2-base-patch4-window16-256/blobs/c9307c9aa168a730c370d472783ae8274408a059e95245e0d7fcf1a1d91cf9aa'), size_on_disk=351833871, blob_last_accessed=1723624967.1287704, blob_last_modified=1722848484.9202104)}), refs=frozenset({'main'}), last_modified=1722848484.9202104)}), last_accessed=1723624967.1287704, last_modified=1722848484.9202104),\n",
       "           CachedRepoInfo(repo_id='thanhhau097/swin2SR-realworld-sr-x4-64-bsrgan-psnr', repo_type='model', repo_path=PosixPath('/Users/andrewmayes/.cache/huggingface/hub/models--thanhhau097--swin2SR-realworld-sr-x4-64-bsrgan-psnr'), size_on_disk=48456429, nb_files=3, revisions=frozenset({CachedRevisionInfo(commit_hash='e345b33f8e7e14b0dce731505234a8425412e343', snapshot_path=PosixPath('/Users/andrewmayes/.cache/huggingface/hub/models--thanhhau097--swin2SR-realworld-sr-x4-64-bsrgan-psnr/snapshots/e345b33f8e7e14b0dce731505234a8425412e343'), size_on_disk=48456429, files=frozenset({CachedFileInfo(file_name='config.json', file_path=PosixPath('/Users/andrewmayes/.cache/huggingface/hub/models--thanhhau097--swin2SR-realworld-sr-x4-64-bsrgan-psnr/snapshots/e345b33f8e7e14b0dce731505234a8425412e343/config.json'), blob_path=PosixPath('/Users/andrewmayes/.cache/huggingface/hub/models--thanhhau097--swin2SR-realworld-sr-x4-64-bsrgan-psnr/blobs/0a15b8aeffe63d67948215a81d191fd8190f16be'), size_on_disk=772, blob_last_accessed=1722954764.2667823, blob_last_modified=1722954764.2559414), CachedFileInfo(file_name='preprocessor_config.json', file_path=PosixPath('/Users/andrewmayes/.cache/huggingface/hub/models--thanhhau097--swin2SR-realworld-sr-x4-64-bsrgan-psnr/snapshots/e345b33f8e7e14b0dce731505234a8425412e343/preprocessor_config.json'), blob_path=PosixPath('/Users/andrewmayes/.cache/huggingface/hub/models--thanhhau097--swin2SR-realworld-sr-x4-64-bsrgan-psnr/blobs/539dbfb6265f0ece81a881579565e88b90668fc4'), size_on_disk=152, blob_last_accessed=1722954763.868174, blob_last_modified=1722954763.8569045), CachedFileInfo(file_name='pytorch_model.bin', file_path=PosixPath('/Users/andrewmayes/.cache/huggingface/hub/models--thanhhau097--swin2SR-realworld-sr-x4-64-bsrgan-psnr/snapshots/e345b33f8e7e14b0dce731505234a8425412e343/pytorch_model.bin'), blob_path=PosixPath('/Users/andrewmayes/.cache/huggingface/hub/models--thanhhau097--swin2SR-realworld-sr-x4-64-bsrgan-psnr/blobs/91b0a2ca989b9c4e5a91124f67f552741594fd1bd41e3114d65a316d36f45e60'), size_on_disk=48455505, blob_last_accessed=1722954768.0409808, blob_last_modified=1722954767.9221504)}), refs=frozenset({'main'}), last_modified=1722954767.9221504)}), last_accessed=1722954768.0409808, last_modified=1722954767.9221504)})"
      ]
     },
     "execution_count": 15,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "scan_cache_dir().repos"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "facebook/sam2-hiera-base-plus\n",
      "caidas/swin2SR-realworld-sr-x4-64-bsrgan-psnr\n",
      "amaye15/receipts\n",
      "amaye15/DaViT-Florence-2-large-ft\n",
      "amaye15/tmp\n",
      "amaye15/Products-10k\n",
      "amaye15/invoices\n",
      "microsoft/Florence-2-large-ft\n",
      "microsoft/swinv2-base-patch4-window16-256\n",
      "thanhhau097/swin2SR-realworld-sr-x4-64-bsrgan-psnr\n",
      "amaye15/SwinV2-Base-Document-Classifier\n",
      "amaye15/DaViT\n",
      "facebook/sam2-hiera-large\n",
      "facebook/bart-large\n",
      "amaye15/NSFW\n"
     ]
    }
   ],
   "source": [
    "for r in repo_info:\n",
    "    #if r.repo_n == DS_NAME:\n",
    "\n",
    "    print(r.repo_id)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "825736a8246f4fb593f4847c5c2268b6",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Downloading readme:   0%|          | 0.00/5.24k [00:00<?, ?B/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "699d58320ad6465697460490bfffaf65",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Resolving data files:   0%|          | 0/38 [00:00<?, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "ds = load_dataset(DS_NAME, cache_dir=p, streaming=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "ename": "AttributeError",
     "evalue": "'IterableDataset' object has no attribute 'cleanup_cache_files'",
     "output_type": "error",
     "traceback": [
      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
      "\u001b[0;31mAttributeError\u001b[0m                            Traceback (most recent call last)",
      "Cell \u001b[0;32mIn[4], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m \u001b[43mds\u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mtrain\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m]\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mcleanup_cache_files\u001b[49m()\n",
      "\u001b[0;31mAttributeError\u001b[0m: 'IterableDataset' object has no attribute 'cleanup_cache_files'"
     ]
    }
   ],
   "source": [
    "ds[\"train\"].cleanup_cache_files()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "False"
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "os.path.exists(os.path.join(os.getcwd(), \"data\"))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Dataset({\n",
       "    features: ['image', 'masked_image', 'mask'],\n",
       "    num_rows: 37\n",
       "})"
      ]
     },
     "execution_count": 2,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "ds_processed"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "ename": "AttributeError",
     "evalue": "type object 'DatasetDict' has no attribute 'get_cache_files_size'",
     "output_type": "error",
     "traceback": [
      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
      "\u001b[0;31mAttributeError\u001b[0m                            Traceback (most recent call last)",
      "Cell \u001b[0;32mIn[2], line 4\u001b[0m\n\u001b[1;32m      1\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mdatasets\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m DatasetDict\n\u001b[1;32m      3\u001b[0m \u001b[38;5;66;03m# Get the cache size\u001b[39;00m\n\u001b[0;32m----> 4\u001b[0m cache_size \u001b[38;5;241m=\u001b[39m \u001b[43mDatasetDict\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mget_cache_files_size\u001b[49m()\n\u001b[1;32m      6\u001b[0m cache_size\n",
      "\u001b[0;31mAttributeError\u001b[0m: type object 'DatasetDict' has no attribute 'get_cache_files_size'"
     ]
    }
   ],
   "source": [
    "from datasets import DatasetDict\n",
    "\n",
    "# Get the cache size\n",
    "cache_size = DatasetDict.get_cache_files_size()\n",
    "\n",
    "cache_size"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "env",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.12.4"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}