File size: 32,577 Bytes
c30b770 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 |
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"\n",
"import os\n",
"import shutil\n",
"import logging\n",
"import pretty_errors\n",
"\n",
"import huggingface_hub\n",
"from datasets import Dataset\n",
"from datasets import load_dataset\n",
"\n",
"# Set up the logger\n",
"logger = logging.getLogger('basic_logger')\n",
"logger.setLevel(logging.INFO)\n",
"\n",
"# Set up the console handler with a simple format\n",
"console_handler = logging.StreamHandler()\n",
"console_handler.setLevel(logging.INFO)\n",
"formatter = logging.Formatter(\n",
" '%Y-%m-%d %H:%M:%S - %(name)s - %(levelname)s - %(message)s'\n",
")\n",
"console_handler.setFormatter(formatter)\n",
"logger.addHandler(console_handler)\n",
"\n",
"DS_NAME = \"amaye15/object-segmentation\"\n",
"\n",
"DATA_DIR = \"data\"\n",
"p = os.path.join(os.getcwd(), DATA_DIR)\n",
"\n",
"if os.path.exists(p):\n",
" shutil.rmtree(p)\n",
"\n",
"\n",
"os.mkdir(p)\n",
"\n",
"def get_data():\n",
" ds = load_dataset(DS_NAME, cache_dir=p, streaming=True)\n",
" for row in ds[\"train\"]:\n",
" yield row\n",
"\n",
"#ds_processed = Dataset.from_generator(get_data)\n",
"# ds_processed.push_to_hub(\"amaye15/tmp\")"
]
},
{
"cell_type": "code",
"execution_count": 16,
"metadata": {},
"outputs": [],
"source": [
"from huggingface_hub import scan_cache_dir\n",
"\n",
"repo_info = scan_cache_dir().repos\n",
"\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"from "
]
},
{
"cell_type": "code",
"execution_count": 19,
"metadata": {},
"outputs": [],
"source": [
"from huggingface_hub import HfApi\n",
"\n",
"api = HfApi()\n",
"\n",
"# Get the list of revisions for the dataset\n",
"revisions = api.list_repo_refs(repo_id=DS_NAME, repo_type=\"dataset\")\n",
"\n",
"# Check the latest commit\n",
"# latest_commit = revisions[-1].commit_id\n",
"# print(f\"Latest commit ID: {latest_commit}\")"
]
},
{
"cell_type": "code",
"execution_count": 20,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"GitRefs(branches=[GitRefInfo(name='main', ref='refs/heads/main', target_commit='962a9a67307296a7abc7e94c2811c450970b80df')], converts=[GitRefInfo(name='duckdb', ref='refs/convert/duckdb', target_commit='72baa589701a6cbea2b7497931c7adf1daf42121'), GitRefInfo(name='parquet', ref='refs/convert/parquet', target_commit='c209a987d23de50a04ec9766e04dde2e4db7f5fb')], tags=[], pull_requests=None)"
]
},
"execution_count": 20,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"revisions"
]
},
{
"cell_type": "code",
"execution_count": 15,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"frozenset({CachedRepoInfo(repo_id='amaye15/DaViT', repo_type='model', repo_path=PosixPath('/Users/andrewmayes/.cache/huggingface/hub/models--amaye15--DaViT'), size_on_disk=1677, nb_files=1, revisions=frozenset({CachedRevisionInfo(commit_hash='a96d58f5ca3d0b138d8efe7618a860b10f8d986b', snapshot_path=PosixPath('/Users/andrewmayes/.cache/huggingface/hub/models--amaye15--DaViT/snapshots/a96d58f5ca3d0b138d8efe7618a860b10f8d986b'), size_on_disk=1677, files=frozenset({CachedFileInfo(file_name='README.md', file_path=PosixPath('/Users/andrewmayes/.cache/huggingface/hub/models--amaye15--DaViT/snapshots/a96d58f5ca3d0b138d8efe7618a860b10f8d986b/README.md'), blob_path=PosixPath('/Users/andrewmayes/.cache/huggingface/hub/models--amaye15--DaViT/blobs/c928ad7cd8a9f9e48fc14780b84e5dd2ad6b1606'), size_on_disk=1677, blob_last_accessed=1722324257.4824574, blob_last_modified=1722324257.396636)}), refs=frozenset({'main'}), last_modified=1722324257.396636)}), last_accessed=1722324257.4824574, last_modified=1722324257.396636),\n",
" CachedRepoInfo(repo_id='amaye15/DaViT-Florence-2-large-ft', repo_type='model', repo_path=PosixPath('/Users/andrewmayes/.cache/huggingface/hub/models--amaye15--DaViT-Florence-2-large-ft'), size_on_disk=1834, nb_files=1, revisions=frozenset({CachedRevisionInfo(commit_hash='4cc7068026aaeb388ba2b0826abae30d670de3fc', snapshot_path=PosixPath('/Users/andrewmayes/.cache/huggingface/hub/models--amaye15--DaViT-Florence-2-large-ft/snapshots/4cc7068026aaeb388ba2b0826abae30d670de3fc'), size_on_disk=1834, files=frozenset({CachedFileInfo(file_name='README.md', file_path=PosixPath('/Users/andrewmayes/.cache/huggingface/hub/models--amaye15--DaViT-Florence-2-large-ft/snapshots/4cc7068026aaeb388ba2b0826abae30d670de3fc/README.md'), blob_path=PosixPath('/Users/andrewmayes/.cache/huggingface/hub/models--amaye15--DaViT-Florence-2-large-ft/blobs/ab9f4c4537bc89f3a4cb187db5d771be9242f09f'), size_on_disk=1834, blob_last_accessed=1722405977.6422648, blob_last_modified=1722405977.6310754)}), refs=frozenset({'main'}), last_modified=1722405977.6310754)}), last_accessed=1722405977.6422648, last_modified=1722405977.6310754),\n",
" CachedRepoInfo(repo_id='amaye15/NSFW', repo_type='dataset', repo_path=PosixPath('/Users/andrewmayes/.cache/huggingface/hub/datasets--amaye15--NSFW'), size_on_disk=1240, nb_files=2, revisions=frozenset({CachedRevisionInfo(commit_hash='c76b1c300fb672189feb59f8faa1027b2d6956b3', snapshot_path=PosixPath('/Users/andrewmayes/.cache/huggingface/hub/datasets--amaye15--NSFW/snapshots/c76b1c300fb672189feb59f8faa1027b2d6956b3'), size_on_disk=619, files=frozenset({CachedFileInfo(file_name='README.md', file_path=PosixPath('/Users/andrewmayes/.cache/huggingface/hub/datasets--amaye15--NSFW/snapshots/c76b1c300fb672189feb59f8faa1027b2d6956b3/README.md'), blob_path=PosixPath('/Users/andrewmayes/.cache/huggingface/hub/datasets--amaye15--NSFW/blobs/dc8bcda261a57d5275af975f1411afdadc094009'), size_on_disk=619, blob_last_accessed=1722723952.58199, blob_last_modified=1722723952.5701885)}), refs=frozenset(), last_modified=1722723952.5701885), CachedRevisionInfo(commit_hash='b5cfb52e5a260983c6e6f70c7b21574efce998b1', snapshot_path=PosixPath('/Users/andrewmayes/.cache/huggingface/hub/datasets--amaye15--NSFW/snapshots/b5cfb52e5a260983c6e6f70c7b21574efce998b1'), size_on_disk=621, files=frozenset({CachedFileInfo(file_name='README.md', file_path=PosixPath('/Users/andrewmayes/.cache/huggingface/hub/datasets--amaye15--NSFW/snapshots/b5cfb52e5a260983c6e6f70c7b21574efce998b1/README.md'), blob_path=PosixPath('/Users/andrewmayes/.cache/huggingface/hub/datasets--amaye15--NSFW/blobs/5924f8b7af709a9f080759cac11ea6f1c976df5d'), size_on_disk=621, blob_last_accessed=1722768256.9392703, blob_last_modified=1722768256.9274719)}), refs=frozenset({'main'}), last_modified=1722768256.9274719)}), last_accessed=1722768256.9392703, last_modified=1722768256.9274719),\n",
" CachedRepoInfo(repo_id='amaye15/Products-10k', repo_type='dataset', repo_path=PosixPath('/Users/andrewmayes/.cache/huggingface/hub/datasets--amaye15--Products-10k'), size_on_disk=620, nb_files=1, revisions=frozenset({CachedRevisionInfo(commit_hash='05b2a7a7513a04c95c8fd8c4fb925cd9bc03397c', snapshot_path=PosixPath('/Users/andrewmayes/.cache/huggingface/hub/datasets--amaye15--Products-10k/snapshots/05b2a7a7513a04c95c8fd8c4fb925cd9bc03397c'), size_on_disk=620, files=frozenset({CachedFileInfo(file_name='README.md', file_path=PosixPath('/Users/andrewmayes/.cache/huggingface/hub/datasets--amaye15--Products-10k/snapshots/05b2a7a7513a04c95c8fd8c4fb925cd9bc03397c/README.md'), blob_path=PosixPath('/Users/andrewmayes/.cache/huggingface/hub/datasets--amaye15--Products-10k/blobs/a71affed5a9687aeabd33f9aa94c9cde049eb533'), size_on_disk=620, blob_last_accessed=1723091983.5595, blob_last_modified=1723091983.547405)}), refs=frozenset({'main'}), last_modified=1723091983.547405)}), last_accessed=1723091983.5595, last_modified=1723091983.547405),\n",
" CachedRepoInfo(repo_id='amaye15/SwinV2-Base-Document-Classifier', repo_type='model', repo_path=PosixPath('/Users/andrewmayes/.cache/huggingface/hub/models--amaye15--SwinV2-Base-Document-Classifier'), size_on_disk=590, nb_files=1, revisions=frozenset({CachedRevisionInfo(commit_hash='b0968577b56aec082d7cde1d2b04f68173b8e674', snapshot_path=PosixPath('/Users/andrewmayes/.cache/huggingface/hub/models--amaye15--SwinV2-Base-Document-Classifier/snapshots/b0968577b56aec082d7cde1d2b04f68173b8e674'), size_on_disk=590, files=frozenset({CachedFileInfo(file_name='preprocessor_config.json', file_path=PosixPath('/Users/andrewmayes/.cache/huggingface/hub/models--amaye15--SwinV2-Base-Document-Classifier/snapshots/b0968577b56aec082d7cde1d2b04f68173b8e674/preprocessor_config.json'), blob_path=PosixPath('/Users/andrewmayes/.cache/huggingface/hub/models--amaye15--SwinV2-Base-Document-Classifier/blobs/86614921b04ad5b6e3d4ee5448f11efe6cc67917'), size_on_disk=590, blob_last_accessed=1722750225.5857947, blob_last_modified=1722750225.574735)}), refs=frozenset({'main'}), last_modified=1722750225.574735)}), last_accessed=1722750225.5857947, last_modified=1722750225.574735),\n",
" CachedRepoInfo(repo_id='amaye15/invoices', repo_type='dataset', repo_path=PosixPath('/Users/andrewmayes/.cache/huggingface/hub/datasets--amaye15--invoices'), size_on_disk=618, nb_files=1, revisions=frozenset({CachedRevisionInfo(commit_hash='f4e8d7dda1472da87125237182dc9f4d5fd860dc', snapshot_path=PosixPath('/Users/andrewmayes/.cache/huggingface/hub/datasets--amaye15--invoices/snapshots/f4e8d7dda1472da87125237182dc9f4d5fd860dc'), size_on_disk=618, files=frozenset({CachedFileInfo(file_name='README.md', file_path=PosixPath('/Users/andrewmayes/.cache/huggingface/hub/datasets--amaye15--invoices/snapshots/f4e8d7dda1472da87125237182dc9f4d5fd860dc/README.md'), blob_path=PosixPath('/Users/andrewmayes/.cache/huggingface/hub/datasets--amaye15--invoices/blobs/10d6a19135e958a4856ebfbd82b130f571667b26'), size_on_disk=618, blob_last_accessed=1723087468.3128088, blob_last_modified=1723087468.3009398)}), refs=frozenset({'main'}), last_modified=1723087468.3009398)}), last_accessed=1723087468.3128088, last_modified=1723087468.3009398),\n",
" CachedRepoInfo(repo_id='amaye15/receipts', repo_type='dataset', repo_path=PosixPath('/Users/andrewmayes/.cache/huggingface/hub/datasets--amaye15--receipts'), size_on_disk=617, nb_files=1, revisions=frozenset({CachedRevisionInfo(commit_hash='7eaf60e64883eee2a744c1e00658967e0b61aab3', snapshot_path=PosixPath('/Users/andrewmayes/.cache/huggingface/hub/datasets--amaye15--receipts/snapshots/7eaf60e64883eee2a744c1e00658967e0b61aab3'), size_on_disk=617, files=frozenset({CachedFileInfo(file_name='README.md', file_path=PosixPath('/Users/andrewmayes/.cache/huggingface/hub/datasets--amaye15--receipts/snapshots/7eaf60e64883eee2a744c1e00658967e0b61aab3/README.md'), blob_path=PosixPath('/Users/andrewmayes/.cache/huggingface/hub/datasets--amaye15--receipts/blobs/22cf712cf7551f2d2df0e6d87358a104fa485122'), size_on_disk=617, blob_last_accessed=1723085450.105201, blob_last_modified=1723085450.0932333)}), refs=frozenset({'main'}), last_modified=1723085450.0932333)}), last_accessed=1723085450.105201, last_modified=1723085450.0932333),\n",
" CachedRepoInfo(repo_id='amaye15/tmp', repo_type='dataset', repo_path=PosixPath('/Users/andrewmayes/.cache/huggingface/hub/datasets--amaye15--tmp'), size_on_disk=372, nb_files=1, revisions=frozenset({CachedRevisionInfo(commit_hash='b593656ae71cef84e90be18cf6bb29cdc74fd7ff', snapshot_path=PosixPath('/Users/andrewmayes/.cache/huggingface/hub/datasets--amaye15--tmp/snapshots/b593656ae71cef84e90be18cf6bb29cdc74fd7ff'), size_on_disk=372, files=frozenset({CachedFileInfo(file_name='README.md', file_path=PosixPath('/Users/andrewmayes/.cache/huggingface/hub/datasets--amaye15--tmp/snapshots/b593656ae71cef84e90be18cf6bb29cdc74fd7ff/README.md'), blob_path=PosixPath('/Users/andrewmayes/.cache/huggingface/hub/datasets--amaye15--tmp/blobs/c274c17b952e2eba2a83b9255b334db02fd75125'), size_on_disk=372, blob_last_accessed=1724597074.5835145, blob_last_modified=1724597074.5719097)}), refs=frozenset({'main'}), last_modified=1724597074.5719097)}), last_accessed=1724597074.5835145, last_modified=1724597074.5719097),\n",
" CachedRepoInfo(repo_id='caidas/swin2SR-realworld-sr-x4-64-bsrgan-psnr', repo_type='model', repo_path=PosixPath('/Users/andrewmayes/.cache/huggingface/hub/models--caidas--swin2SR-realworld-sr-x4-64-bsrgan-psnr'), size_on_disk=48461065, nb_files=3, revisions=frozenset({CachedRevisionInfo(commit_hash='bb13f02e45e88d00b6c202b3fbe6a181af144606', snapshot_path=PosixPath('/Users/andrewmayes/.cache/huggingface/hub/models--caidas--swin2SR-realworld-sr-x4-64-bsrgan-psnr/snapshots/bb13f02e45e88d00b6c202b3fbe6a181af144606'), size_on_disk=48461065, files=frozenset({CachedFileInfo(file_name='config.json', file_path=PosixPath('/Users/andrewmayes/.cache/huggingface/hub/models--caidas--swin2SR-realworld-sr-x4-64-bsrgan-psnr/snapshots/bb13f02e45e88d00b6c202b3fbe6a181af144606/config.json'), blob_path=PosixPath('/Users/andrewmayes/.cache/huggingface/hub/models--caidas--swin2SR-realworld-sr-x4-64-bsrgan-psnr/blobs/0a15b8aeffe63d67948215a81d191fd8190f16be'), size_on_disk=772, blob_last_accessed=1722954840.0557656, blob_last_modified=1722954840.043787), CachedFileInfo(file_name='preprocessor_config.json', file_path=PosixPath('/Users/andrewmayes/.cache/huggingface/hub/models--caidas--swin2SR-realworld-sr-x4-64-bsrgan-psnr/snapshots/bb13f02e45e88d00b6c202b3fbe6a181af144606/preprocessor_config.json'), blob_path=PosixPath('/Users/andrewmayes/.cache/huggingface/hub/models--caidas--swin2SR-realworld-sr-x4-64-bsrgan-psnr/blobs/539dbfb6265f0ece81a881579565e88b90668fc4'), size_on_disk=152, blob_last_accessed=1722954839.8127632, blob_last_modified=1722954839.8014247), CachedFileInfo(file_name='pytorch_model.bin', file_path=PosixPath('/Users/andrewmayes/.cache/huggingface/hub/models--caidas--swin2SR-realworld-sr-x4-64-bsrgan-psnr/snapshots/bb13f02e45e88d00b6c202b3fbe6a181af144606/pytorch_model.bin'), blob_path=PosixPath('/Users/andrewmayes/.cache/huggingface/hub/models--caidas--swin2SR-realworld-sr-x4-64-bsrgan-psnr/blobs/4a5f52a20932085557ed115f87c0ee8385e12f2719108c0dfd38c64aedea4710'), size_on_disk=48460141, blob_last_accessed=1722954848.1445184, blob_last_modified=1722954848.0298514)}), refs=frozenset({'main'}), last_modified=1722954848.0298514)}), last_accessed=1722954848.1445184, last_modified=1722954848.0298514),\n",
" CachedRepoInfo(repo_id='facebook/bart-large', repo_type='model', repo_path=PosixPath('/Users/andrewmayes/.cache/huggingface/hub/models--facebook--bart-large'), size_on_disk=1628, nb_files=1, revisions=frozenset({CachedRevisionInfo(commit_hash='cb48c1365bd826bd521f650dc2e0940aee54720c', snapshot_path=PosixPath('/Users/andrewmayes/.cache/huggingface/hub/models--facebook--bart-large/snapshots/cb48c1365bd826bd521f650dc2e0940aee54720c'), size_on_disk=1628, files=frozenset({CachedFileInfo(file_name='config.json', file_path=PosixPath('/Users/andrewmayes/.cache/huggingface/hub/models--facebook--bart-large/snapshots/cb48c1365bd826bd521f650dc2e0940aee54720c/config.json'), blob_path=PosixPath('/Users/andrewmayes/.cache/huggingface/hub/models--facebook--bart-large/blobs/79568cb2491a1a4da49f32fb723018158c222712'), size_on_disk=1628, blob_last_accessed=1722754758.8173473, blob_last_modified=1722754758.8058388)}), refs=frozenset({'main'}), last_modified=1722754758.8058388)}), last_accessed=1722754758.8173473, last_modified=1722754758.8058388),\n",
" CachedRepoInfo(repo_id='facebook/sam2-hiera-base-plus', repo_type='model', repo_path=PosixPath('/Users/andrewmayes/.cache/huggingface/hub/models--facebook--sam2-hiera-base-plus'), size_on_disk=323493298, nb_files=1, revisions=frozenset({CachedRevisionInfo(commit_hash='9bcec0ee2dcc1b6ae4b1674e2ed51ec71d2d31d9', snapshot_path=PosixPath('/Users/andrewmayes/.cache/huggingface/hub/models--facebook--sam2-hiera-base-plus/snapshots/9bcec0ee2dcc1b6ae4b1674e2ed51ec71d2d31d9'), size_on_disk=323493298, files=frozenset({CachedFileInfo(file_name='sam2_hiera_base_plus.pt', file_path=PosixPath('/Users/andrewmayes/.cache/huggingface/hub/models--facebook--sam2-hiera-base-plus/snapshots/9bcec0ee2dcc1b6ae4b1674e2ed51ec71d2d31d9/sam2_hiera_base_plus.pt'), blob_path=PosixPath('/Users/andrewmayes/.cache/huggingface/hub/models--facebook--sam2-hiera-base-plus/blobs/d0bb7f236400a49669ffdd1be617959a8b1d1065081789d7bbff88eded3a8071'), size_on_disk=323493298, blob_last_accessed=1723985664.6263692, blob_last_modified=1723985638.2220697)}), refs=frozenset({'main'}), last_modified=1723985638.2220697)}), last_accessed=1723985664.6263692, last_modified=1723985638.2220697),\n",
" CachedRepoInfo(repo_id='facebook/sam2-hiera-large', repo_type='model', repo_path=PosixPath('/Users/andrewmayes/.cache/huggingface/hub/models--facebook--sam2-hiera-large'), size_on_disk=897952466, nb_files=1, revisions=frozenset({CachedRevisionInfo(commit_hash='eba9be237c463eb950e64b65c223ad55c878c2ac', snapshot_path=PosixPath('/Users/andrewmayes/.cache/huggingface/hub/models--facebook--sam2-hiera-large/snapshots/eba9be237c463eb950e64b65c223ad55c878c2ac'), size_on_disk=897952466, files=frozenset({CachedFileInfo(file_name='sam2_hiera_large.pt', file_path=PosixPath('/Users/andrewmayes/.cache/huggingface/hub/models--facebook--sam2-hiera-large/snapshots/eba9be237c463eb950e64b65c223ad55c878c2ac/sam2_hiera_large.pt'), blob_path=PosixPath('/Users/andrewmayes/.cache/huggingface/hub/models--facebook--sam2-hiera-large/blobs/7442e4e9b732a508f80e141e7c2913437a3610ee0c77381a66658c3a445df87b'), size_on_disk=897952466, blob_last_accessed=1723985746.4751956, blob_last_modified=1723985745.5689125)}), refs=frozenset({'main'}), last_modified=1723985745.5689125)}), last_accessed=1723985746.4751956, last_modified=1723985745.5689125),\n",
" CachedRepoInfo(repo_id='microsoft/Florence-2-large-ft', repo_type='model', repo_path=PosixPath('/Users/andrewmayes/.cache/huggingface/hub/models--microsoft--Florence-2-large-ft'), size_on_disk=2647748, nb_files=8, revisions=frozenset({CachedRevisionInfo(commit_hash='bb44b80c15e943b1bf7cec6e076359cec6e40178', snapshot_path=PosixPath('/Users/andrewmayes/.cache/huggingface/hub/models--microsoft--Florence-2-large-ft/snapshots/bb44b80c15e943b1bf7cec6e076359cec6e40178'), size_on_disk=2647748, files=frozenset({CachedFileInfo(file_name='processing_florence2.py', file_path=PosixPath('/Users/andrewmayes/.cache/huggingface/hub/models--microsoft--Florence-2-large-ft/snapshots/bb44b80c15e943b1bf7cec6e076359cec6e40178/processing_florence2.py'), blob_path=PosixPath('/Users/andrewmayes/.cache/huggingface/hub/models--microsoft--Florence-2-large-ft/blobs/538110e8fd421258847d317cb62c40b9671d07a9'), size_on_disk=46372, blob_last_accessed=1722187335.8618798, blob_last_modified=1722187335.854595), CachedFileInfo(file_name='tokenizer_config.json', file_path=PosixPath('/Users/andrewmayes/.cache/huggingface/hub/models--microsoft--Florence-2-large-ft/snapshots/bb44b80c15e943b1bf7cec6e076359cec6e40178/tokenizer_config.json'), blob_path=PosixPath('/Users/andrewmayes/.cache/huggingface/hub/models--microsoft--Florence-2-large-ft/blobs/44784bc58d4cb18d3549ad71e062efcf032d9ef5'), size_on_disk=34, blob_last_accessed=1722187335.5466971, blob_last_modified=1722187334.7324762), CachedFileInfo(file_name='config.json', file_path=PosixPath('/Users/andrewmayes/.cache/huggingface/hub/models--microsoft--Florence-2-large-ft/snapshots/bb44b80c15e943b1bf7cec6e076359cec6e40178/config.json'), blob_path=PosixPath('/Users/andrewmayes/.cache/huggingface/hub/models--microsoft--Florence-2-large-ft/blobs/bff93d862796105c8cf1a0b3331ad3bec68aee91'), size_on_disk=2445, blob_last_accessed=1722186181.1469133, blob_last_modified=1722186180.799109), CachedFileInfo(file_name='vocab.json', file_path=PosixPath('/Users/andrewmayes/.cache/huggingface/hub/models--microsoft--Florence-2-large-ft/snapshots/bb44b80c15e943b1bf7cec6e076359cec6e40178/vocab.json'), blob_path=PosixPath('/Users/andrewmayes/.cache/huggingface/hub/models--microsoft--Florence-2-large-ft/blobs/94a2f4fd50e976bda926c700291522ea1a79323f'), size_on_disk=1099884, blob_last_accessed=1722187336.2418828, blob_last_modified=1722187336.7108266), CachedFileInfo(file_name='modeling_florence2.py', file_path=PosixPath('/Users/andrewmayes/.cache/huggingface/hub/models--microsoft--Florence-2-large-ft/snapshots/bb44b80c15e943b1bf7cec6e076359cec6e40178/modeling_florence2.py'), blob_path=PosixPath('/Users/andrewmayes/.cache/huggingface/hub/models--microsoft--Florence-2-large-ft/blobs/e5ee65134d1a5e98357f8d500c9b9af5f8c00a08'), size_on_disk=127219, blob_last_accessed=1722225017.2661808, blob_last_modified=1722225017.1880703), CachedFileInfo(file_name='configuration_florence2.py', file_path=PosixPath('/Users/andrewmayes/.cache/huggingface/hub/models--microsoft--Florence-2-large-ft/snapshots/bb44b80c15e943b1bf7cec6e076359cec6e40178/configuration_florence2.py'), blob_path=PosixPath('/Users/andrewmayes/.cache/huggingface/hub/models--microsoft--Florence-2-large-ft/blobs/622f74997c5612ff68d0e55063714f291d159166'), size_on_disk=15125, blob_last_accessed=1722187334.9981484, blob_last_modified=1722187334.9932766), CachedFileInfo(file_name='tokenizer.json', file_path=PosixPath('/Users/andrewmayes/.cache/huggingface/hub/models--microsoft--Florence-2-large-ft/snapshots/bb44b80c15e943b1bf7cec6e076359cec6e40178/tokenizer.json'), blob_path=PosixPath('/Users/andrewmayes/.cache/huggingface/hub/models--microsoft--Florence-2-large-ft/blobs/ad0bcbeb288f0d1373d88e0762e66357f55b8311'), size_on_disk=1355863, blob_last_accessed=1722187337.8523662, blob_last_modified=1722187337.4607415), CachedFileInfo(file_name='preprocessor_config.json', file_path=PosixPath('/Users/andrewmayes/.cache/huggingface/hub/models--microsoft--Florence-2-large-ft/snapshots/bb44b80c15e943b1bf7cec6e076359cec6e40178/preprocessor_config.json'), blob_path=PosixPath('/Users/andrewmayes/.cache/huggingface/hub/models--microsoft--Florence-2-large-ft/blobs/85cd7be3568df661ad536b6ab20d59b08ba079ae'), size_on_disk=806, blob_last_accessed=1722187335.9961612, blob_last_modified=1722187335.4969347)}), refs=frozenset({'main'}), last_modified=1722225017.1880703)}), last_accessed=1722225017.2661808, last_modified=1722225017.1880703),\n",
" CachedRepoInfo(repo_id='microsoft/swinv2-base-patch4-window16-256', repo_type='model', repo_path=PosixPath('/Users/andrewmayes/.cache/huggingface/hub/models--microsoft--swinv2-base-patch4-window16-256'), size_on_disk=351904021, nb_files=3, revisions=frozenset({CachedRevisionInfo(commit_hash='628b75ababc4dad9f5bbabc1bf8bb612c4ab2f78', snapshot_path=PosixPath('/Users/andrewmayes/.cache/huggingface/hub/models--microsoft--swinv2-base-patch4-window16-256/snapshots/628b75ababc4dad9f5bbabc1bf8bb612c4ab2f78'), size_on_disk=351904021, files=frozenset({CachedFileInfo(file_name='config.json', file_path=PosixPath('/Users/andrewmayes/.cache/huggingface/hub/models--microsoft--swinv2-base-patch4-window16-256/snapshots/628b75ababc4dad9f5bbabc1bf8bb612c4ab2f78/config.json'), blob_path=PosixPath('/Users/andrewmayes/.cache/huggingface/hub/models--microsoft--swinv2-base-patch4-window16-256/blobs/9f6070ac05bf6f561f789e8699a4dc387df58724'), size_on_disk=69910, blob_last_accessed=1722848474.6562126, blob_last_modified=1722848474.6428308), CachedFileInfo(file_name='preprocessor_config.json', file_path=PosixPath('/Users/andrewmayes/.cache/huggingface/hub/models--microsoft--swinv2-base-patch4-window16-256/snapshots/628b75ababc4dad9f5bbabc1bf8bb612c4ab2f78/preprocessor_config.json'), blob_path=PosixPath('/Users/andrewmayes/.cache/huggingface/hub/models--microsoft--swinv2-base-patch4-window16-256/blobs/fb816e3190d8ed24279c9975f45efeb660493c61'), size_on_disk=240, blob_last_accessed=1722847982.5025482, blob_last_modified=1722847982.4988532), CachedFileInfo(file_name='pytorch_model.bin', file_path=PosixPath('/Users/andrewmayes/.cache/huggingface/hub/models--microsoft--swinv2-base-patch4-window16-256/snapshots/628b75ababc4dad9f5bbabc1bf8bb612c4ab2f78/pytorch_model.bin'), blob_path=PosixPath('/Users/andrewmayes/.cache/huggingface/hub/models--microsoft--swinv2-base-patch4-window16-256/blobs/c9307c9aa168a730c370d472783ae8274408a059e95245e0d7fcf1a1d91cf9aa'), size_on_disk=351833871, blob_last_accessed=1723624967.1287704, blob_last_modified=1722848484.9202104)}), refs=frozenset({'main'}), last_modified=1722848484.9202104)}), last_accessed=1723624967.1287704, last_modified=1722848484.9202104),\n",
" CachedRepoInfo(repo_id='thanhhau097/swin2SR-realworld-sr-x4-64-bsrgan-psnr', repo_type='model', repo_path=PosixPath('/Users/andrewmayes/.cache/huggingface/hub/models--thanhhau097--swin2SR-realworld-sr-x4-64-bsrgan-psnr'), size_on_disk=48456429, nb_files=3, revisions=frozenset({CachedRevisionInfo(commit_hash='e345b33f8e7e14b0dce731505234a8425412e343', snapshot_path=PosixPath('/Users/andrewmayes/.cache/huggingface/hub/models--thanhhau097--swin2SR-realworld-sr-x4-64-bsrgan-psnr/snapshots/e345b33f8e7e14b0dce731505234a8425412e343'), size_on_disk=48456429, files=frozenset({CachedFileInfo(file_name='config.json', file_path=PosixPath('/Users/andrewmayes/.cache/huggingface/hub/models--thanhhau097--swin2SR-realworld-sr-x4-64-bsrgan-psnr/snapshots/e345b33f8e7e14b0dce731505234a8425412e343/config.json'), blob_path=PosixPath('/Users/andrewmayes/.cache/huggingface/hub/models--thanhhau097--swin2SR-realworld-sr-x4-64-bsrgan-psnr/blobs/0a15b8aeffe63d67948215a81d191fd8190f16be'), size_on_disk=772, blob_last_accessed=1722954764.2667823, blob_last_modified=1722954764.2559414), CachedFileInfo(file_name='preprocessor_config.json', file_path=PosixPath('/Users/andrewmayes/.cache/huggingface/hub/models--thanhhau097--swin2SR-realworld-sr-x4-64-bsrgan-psnr/snapshots/e345b33f8e7e14b0dce731505234a8425412e343/preprocessor_config.json'), blob_path=PosixPath('/Users/andrewmayes/.cache/huggingface/hub/models--thanhhau097--swin2SR-realworld-sr-x4-64-bsrgan-psnr/blobs/539dbfb6265f0ece81a881579565e88b90668fc4'), size_on_disk=152, blob_last_accessed=1722954763.868174, blob_last_modified=1722954763.8569045), CachedFileInfo(file_name='pytorch_model.bin', file_path=PosixPath('/Users/andrewmayes/.cache/huggingface/hub/models--thanhhau097--swin2SR-realworld-sr-x4-64-bsrgan-psnr/snapshots/e345b33f8e7e14b0dce731505234a8425412e343/pytorch_model.bin'), blob_path=PosixPath('/Users/andrewmayes/.cache/huggingface/hub/models--thanhhau097--swin2SR-realworld-sr-x4-64-bsrgan-psnr/blobs/91b0a2ca989b9c4e5a91124f67f552741594fd1bd41e3114d65a316d36f45e60'), size_on_disk=48455505, blob_last_accessed=1722954768.0409808, blob_last_modified=1722954767.9221504)}), refs=frozenset({'main'}), last_modified=1722954767.9221504)}), last_accessed=1722954768.0409808, last_modified=1722954767.9221504)})"
]
},
"execution_count": 15,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"scan_cache_dir().repos"
]
},
{
"cell_type": "code",
"execution_count": 17,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"facebook/sam2-hiera-base-plus\n",
"caidas/swin2SR-realworld-sr-x4-64-bsrgan-psnr\n",
"amaye15/receipts\n",
"amaye15/DaViT-Florence-2-large-ft\n",
"amaye15/tmp\n",
"amaye15/Products-10k\n",
"amaye15/invoices\n",
"microsoft/Florence-2-large-ft\n",
"microsoft/swinv2-base-patch4-window16-256\n",
"thanhhau097/swin2SR-realworld-sr-x4-64-bsrgan-psnr\n",
"amaye15/SwinV2-Base-Document-Classifier\n",
"amaye15/DaViT\n",
"facebook/sam2-hiera-large\n",
"facebook/bart-large\n",
"amaye15/NSFW\n"
]
}
],
"source": [
"for r in repo_info:\n",
" #if r.repo_n == DS_NAME:\n",
"\n",
" print(r.repo_id)"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "825736a8246f4fb593f4847c5c2268b6",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"Downloading readme: 0%| | 0.00/5.24k [00:00<?, ?B/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "699d58320ad6465697460490bfffaf65",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"Resolving data files: 0%| | 0/38 [00:00<?, ?it/s]"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"ds = load_dataset(DS_NAME, cache_dir=p, streaming=True)"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [
{
"ename": "AttributeError",
"evalue": "'IterableDataset' object has no attribute 'cleanup_cache_files'",
"output_type": "error",
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mAttributeError\u001b[0m Traceback (most recent call last)",
"Cell \u001b[0;32mIn[4], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m \u001b[43mds\u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mtrain\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m]\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mcleanup_cache_files\u001b[49m()\n",
"\u001b[0;31mAttributeError\u001b[0m: 'IterableDataset' object has no attribute 'cleanup_cache_files'"
]
}
],
"source": [
"ds[\"train\"].cleanup_cache_files()"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"False"
]
},
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"os.path.exists(os.path.join(os.getcwd(), \"data\"))"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"Dataset({\n",
" features: ['image', 'masked_image', 'mask'],\n",
" num_rows: 37\n",
"})"
]
},
"execution_count": 2,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"ds_processed"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [
{
"ename": "AttributeError",
"evalue": "type object 'DatasetDict' has no attribute 'get_cache_files_size'",
"output_type": "error",
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mAttributeError\u001b[0m Traceback (most recent call last)",
"Cell \u001b[0;32mIn[2], line 4\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mdatasets\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m DatasetDict\n\u001b[1;32m 3\u001b[0m \u001b[38;5;66;03m# Get the cache size\u001b[39;00m\n\u001b[0;32m----> 4\u001b[0m cache_size \u001b[38;5;241m=\u001b[39m \u001b[43mDatasetDict\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mget_cache_files_size\u001b[49m()\n\u001b[1;32m 6\u001b[0m cache_size\n",
"\u001b[0;31mAttributeError\u001b[0m: type object 'DatasetDict' has no attribute 'get_cache_files_size'"
]
}
],
"source": [
"from datasets import DatasetDict\n",
"\n",
"# Get the cache size\n",
"cache_size = DatasetDict.get_cache_files_size()\n",
"\n",
"cache_size"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "env",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.12.4"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
|