{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "\n", "import os\n", "import shutil\n", "import logging\n", "import pretty_errors\n", "\n", "import huggingface_hub\n", "from datasets import Dataset\n", "from datasets import load_dataset\n", "\n", "# Set up the logger\n", "logger = logging.getLogger('basic_logger')\n", "logger.setLevel(logging.INFO)\n", "\n", "# Set up the console handler with a simple format\n", "console_handler = logging.StreamHandler()\n", "console_handler.setLevel(logging.INFO)\n", "formatter = logging.Formatter(\n", " '%Y-%m-%d %H:%M:%S - %(name)s - %(levelname)s - %(message)s'\n", ")\n", "console_handler.setFormatter(formatter)\n", "logger.addHandler(console_handler)\n", "\n", "DS_NAME = \"amaye15/object-segmentation\"\n", "\n", "DATA_DIR = \"data\"\n", "p = os.path.join(os.getcwd(), DATA_DIR)\n", "\n", "if os.path.exists(p):\n", " shutil.rmtree(p)\n", "\n", "\n", "os.mkdir(p)\n", "\n", "def get_data():\n", " ds = load_dataset(DS_NAME, cache_dir=p, streaming=True)\n", " for row in ds[\"train\"]:\n", " yield row\n", "\n", "#ds_processed = Dataset.from_generator(get_data)\n", "# ds_processed.push_to_hub(\"amaye15/tmp\")" ] }, { "cell_type": "code", "execution_count": 16, "metadata": {}, "outputs": [], "source": [ "from huggingface_hub import scan_cache_dir\n", "\n", "repo_info = scan_cache_dir().repos\n", "\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "from " ] }, { "cell_type": "code", "execution_count": 19, "metadata": {}, "outputs": [], "source": [ "from huggingface_hub import HfApi\n", "\n", "api = HfApi()\n", "\n", "# Get the list of revisions for the dataset\n", "revisions = api.list_repo_refs(repo_id=DS_NAME, repo_type=\"dataset\")\n", "\n", "# Check the latest commit\n", "# latest_commit = revisions[-1].commit_id\n", "# print(f\"Latest commit ID: {latest_commit}\")" ] }, { "cell_type": "code", "execution_count": 20, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "GitRefs(branches=[GitRefInfo(name='main', ref='refs/heads/main', target_commit='962a9a67307296a7abc7e94c2811c450970b80df')], converts=[GitRefInfo(name='duckdb', ref='refs/convert/duckdb', target_commit='72baa589701a6cbea2b7497931c7adf1daf42121'), GitRefInfo(name='parquet', ref='refs/convert/parquet', target_commit='c209a987d23de50a04ec9766e04dde2e4db7f5fb')], tags=[], pull_requests=None)" ] }, "execution_count": 20, "metadata": {}, "output_type": "execute_result" } ], "source": [ "revisions" ] }, { "cell_type": "code", "execution_count": 15, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "frozenset({CachedRepoInfo(repo_id='amaye15/DaViT', repo_type='model', repo_path=PosixPath('/Users/andrewmayes/.cache/huggingface/hub/models--amaye15--DaViT'), size_on_disk=1677, nb_files=1, revisions=frozenset({CachedRevisionInfo(commit_hash='a96d58f5ca3d0b138d8efe7618a860b10f8d986b', snapshot_path=PosixPath('/Users/andrewmayes/.cache/huggingface/hub/models--amaye15--DaViT/snapshots/a96d58f5ca3d0b138d8efe7618a860b10f8d986b'), size_on_disk=1677, files=frozenset({CachedFileInfo(file_name='README.md', file_path=PosixPath('/Users/andrewmayes/.cache/huggingface/hub/models--amaye15--DaViT/snapshots/a96d58f5ca3d0b138d8efe7618a860b10f8d986b/README.md'), blob_path=PosixPath('/Users/andrewmayes/.cache/huggingface/hub/models--amaye15--DaViT/blobs/c928ad7cd8a9f9e48fc14780b84e5dd2ad6b1606'), size_on_disk=1677, blob_last_accessed=1722324257.4824574, blob_last_modified=1722324257.396636)}), refs=frozenset({'main'}), last_modified=1722324257.396636)}), last_accessed=1722324257.4824574, last_modified=1722324257.396636),\n", " CachedRepoInfo(repo_id='amaye15/DaViT-Florence-2-large-ft', repo_type='model', repo_path=PosixPath('/Users/andrewmayes/.cache/huggingface/hub/models--amaye15--DaViT-Florence-2-large-ft'), size_on_disk=1834, nb_files=1, revisions=frozenset({CachedRevisionInfo(commit_hash='4cc7068026aaeb388ba2b0826abae30d670de3fc', snapshot_path=PosixPath('/Users/andrewmayes/.cache/huggingface/hub/models--amaye15--DaViT-Florence-2-large-ft/snapshots/4cc7068026aaeb388ba2b0826abae30d670de3fc'), size_on_disk=1834, files=frozenset({CachedFileInfo(file_name='README.md', file_path=PosixPath('/Users/andrewmayes/.cache/huggingface/hub/models--amaye15--DaViT-Florence-2-large-ft/snapshots/4cc7068026aaeb388ba2b0826abae30d670de3fc/README.md'), blob_path=PosixPath('/Users/andrewmayes/.cache/huggingface/hub/models--amaye15--DaViT-Florence-2-large-ft/blobs/ab9f4c4537bc89f3a4cb187db5d771be9242f09f'), size_on_disk=1834, blob_last_accessed=1722405977.6422648, blob_last_modified=1722405977.6310754)}), refs=frozenset({'main'}), last_modified=1722405977.6310754)}), last_accessed=1722405977.6422648, last_modified=1722405977.6310754),\n", " CachedRepoInfo(repo_id='amaye15/NSFW', repo_type='dataset', repo_path=PosixPath('/Users/andrewmayes/.cache/huggingface/hub/datasets--amaye15--NSFW'), size_on_disk=1240, nb_files=2, revisions=frozenset({CachedRevisionInfo(commit_hash='c76b1c300fb672189feb59f8faa1027b2d6956b3', snapshot_path=PosixPath('/Users/andrewmayes/.cache/huggingface/hub/datasets--amaye15--NSFW/snapshots/c76b1c300fb672189feb59f8faa1027b2d6956b3'), size_on_disk=619, files=frozenset({CachedFileInfo(file_name='README.md', file_path=PosixPath('/Users/andrewmayes/.cache/huggingface/hub/datasets--amaye15--NSFW/snapshots/c76b1c300fb672189feb59f8faa1027b2d6956b3/README.md'), blob_path=PosixPath('/Users/andrewmayes/.cache/huggingface/hub/datasets--amaye15--NSFW/blobs/dc8bcda261a57d5275af975f1411afdadc094009'), size_on_disk=619, blob_last_accessed=1722723952.58199, blob_last_modified=1722723952.5701885)}), refs=frozenset(), last_modified=1722723952.5701885), CachedRevisionInfo(commit_hash='b5cfb52e5a260983c6e6f70c7b21574efce998b1', snapshot_path=PosixPath('/Users/andrewmayes/.cache/huggingface/hub/datasets--amaye15--NSFW/snapshots/b5cfb52e5a260983c6e6f70c7b21574efce998b1'), size_on_disk=621, files=frozenset({CachedFileInfo(file_name='README.md', file_path=PosixPath('/Users/andrewmayes/.cache/huggingface/hub/datasets--amaye15--NSFW/snapshots/b5cfb52e5a260983c6e6f70c7b21574efce998b1/README.md'), blob_path=PosixPath('/Users/andrewmayes/.cache/huggingface/hub/datasets--amaye15--NSFW/blobs/5924f8b7af709a9f080759cac11ea6f1c976df5d'), size_on_disk=621, blob_last_accessed=1722768256.9392703, blob_last_modified=1722768256.9274719)}), refs=frozenset({'main'}), last_modified=1722768256.9274719)}), last_accessed=1722768256.9392703, last_modified=1722768256.9274719),\n", " CachedRepoInfo(repo_id='amaye15/Products-10k', repo_type='dataset', repo_path=PosixPath('/Users/andrewmayes/.cache/huggingface/hub/datasets--amaye15--Products-10k'), size_on_disk=620, nb_files=1, revisions=frozenset({CachedRevisionInfo(commit_hash='05b2a7a7513a04c95c8fd8c4fb925cd9bc03397c', snapshot_path=PosixPath('/Users/andrewmayes/.cache/huggingface/hub/datasets--amaye15--Products-10k/snapshots/05b2a7a7513a04c95c8fd8c4fb925cd9bc03397c'), size_on_disk=620, files=frozenset({CachedFileInfo(file_name='README.md', file_path=PosixPath('/Users/andrewmayes/.cache/huggingface/hub/datasets--amaye15--Products-10k/snapshots/05b2a7a7513a04c95c8fd8c4fb925cd9bc03397c/README.md'), blob_path=PosixPath('/Users/andrewmayes/.cache/huggingface/hub/datasets--amaye15--Products-10k/blobs/a71affed5a9687aeabd33f9aa94c9cde049eb533'), size_on_disk=620, blob_last_accessed=1723091983.5595, blob_last_modified=1723091983.547405)}), refs=frozenset({'main'}), last_modified=1723091983.547405)}), last_accessed=1723091983.5595, last_modified=1723091983.547405),\n", " CachedRepoInfo(repo_id='amaye15/SwinV2-Base-Document-Classifier', repo_type='model', repo_path=PosixPath('/Users/andrewmayes/.cache/huggingface/hub/models--amaye15--SwinV2-Base-Document-Classifier'), size_on_disk=590, nb_files=1, revisions=frozenset({CachedRevisionInfo(commit_hash='b0968577b56aec082d7cde1d2b04f68173b8e674', snapshot_path=PosixPath('/Users/andrewmayes/.cache/huggingface/hub/models--amaye15--SwinV2-Base-Document-Classifier/snapshots/b0968577b56aec082d7cde1d2b04f68173b8e674'), size_on_disk=590, files=frozenset({CachedFileInfo(file_name='preprocessor_config.json', file_path=PosixPath('/Users/andrewmayes/.cache/huggingface/hub/models--amaye15--SwinV2-Base-Document-Classifier/snapshots/b0968577b56aec082d7cde1d2b04f68173b8e674/preprocessor_config.json'), blob_path=PosixPath('/Users/andrewmayes/.cache/huggingface/hub/models--amaye15--SwinV2-Base-Document-Classifier/blobs/86614921b04ad5b6e3d4ee5448f11efe6cc67917'), size_on_disk=590, blob_last_accessed=1722750225.5857947, blob_last_modified=1722750225.574735)}), refs=frozenset({'main'}), last_modified=1722750225.574735)}), last_accessed=1722750225.5857947, last_modified=1722750225.574735),\n", " CachedRepoInfo(repo_id='amaye15/invoices', repo_type='dataset', repo_path=PosixPath('/Users/andrewmayes/.cache/huggingface/hub/datasets--amaye15--invoices'), size_on_disk=618, nb_files=1, revisions=frozenset({CachedRevisionInfo(commit_hash='f4e8d7dda1472da87125237182dc9f4d5fd860dc', snapshot_path=PosixPath('/Users/andrewmayes/.cache/huggingface/hub/datasets--amaye15--invoices/snapshots/f4e8d7dda1472da87125237182dc9f4d5fd860dc'), size_on_disk=618, files=frozenset({CachedFileInfo(file_name='README.md', file_path=PosixPath('/Users/andrewmayes/.cache/huggingface/hub/datasets--amaye15--invoices/snapshots/f4e8d7dda1472da87125237182dc9f4d5fd860dc/README.md'), blob_path=PosixPath('/Users/andrewmayes/.cache/huggingface/hub/datasets--amaye15--invoices/blobs/10d6a19135e958a4856ebfbd82b130f571667b26'), size_on_disk=618, blob_last_accessed=1723087468.3128088, blob_last_modified=1723087468.3009398)}), refs=frozenset({'main'}), last_modified=1723087468.3009398)}), last_accessed=1723087468.3128088, last_modified=1723087468.3009398),\n", " CachedRepoInfo(repo_id='amaye15/receipts', repo_type='dataset', repo_path=PosixPath('/Users/andrewmayes/.cache/huggingface/hub/datasets--amaye15--receipts'), size_on_disk=617, nb_files=1, revisions=frozenset({CachedRevisionInfo(commit_hash='7eaf60e64883eee2a744c1e00658967e0b61aab3', snapshot_path=PosixPath('/Users/andrewmayes/.cache/huggingface/hub/datasets--amaye15--receipts/snapshots/7eaf60e64883eee2a744c1e00658967e0b61aab3'), size_on_disk=617, files=frozenset({CachedFileInfo(file_name='README.md', file_path=PosixPath('/Users/andrewmayes/.cache/huggingface/hub/datasets--amaye15--receipts/snapshots/7eaf60e64883eee2a744c1e00658967e0b61aab3/README.md'), blob_path=PosixPath('/Users/andrewmayes/.cache/huggingface/hub/datasets--amaye15--receipts/blobs/22cf712cf7551f2d2df0e6d87358a104fa485122'), size_on_disk=617, blob_last_accessed=1723085450.105201, blob_last_modified=1723085450.0932333)}), refs=frozenset({'main'}), last_modified=1723085450.0932333)}), last_accessed=1723085450.105201, last_modified=1723085450.0932333),\n", " CachedRepoInfo(repo_id='amaye15/tmp', repo_type='dataset', repo_path=PosixPath('/Users/andrewmayes/.cache/huggingface/hub/datasets--amaye15--tmp'), size_on_disk=372, nb_files=1, revisions=frozenset({CachedRevisionInfo(commit_hash='b593656ae71cef84e90be18cf6bb29cdc74fd7ff', snapshot_path=PosixPath('/Users/andrewmayes/.cache/huggingface/hub/datasets--amaye15--tmp/snapshots/b593656ae71cef84e90be18cf6bb29cdc74fd7ff'), size_on_disk=372, files=frozenset({CachedFileInfo(file_name='README.md', file_path=PosixPath('/Users/andrewmayes/.cache/huggingface/hub/datasets--amaye15--tmp/snapshots/b593656ae71cef84e90be18cf6bb29cdc74fd7ff/README.md'), blob_path=PosixPath('/Users/andrewmayes/.cache/huggingface/hub/datasets--amaye15--tmp/blobs/c274c17b952e2eba2a83b9255b334db02fd75125'), size_on_disk=372, blob_last_accessed=1724597074.5835145, blob_last_modified=1724597074.5719097)}), refs=frozenset({'main'}), last_modified=1724597074.5719097)}), last_accessed=1724597074.5835145, last_modified=1724597074.5719097),\n", " CachedRepoInfo(repo_id='caidas/swin2SR-realworld-sr-x4-64-bsrgan-psnr', repo_type='model', repo_path=PosixPath('/Users/andrewmayes/.cache/huggingface/hub/models--caidas--swin2SR-realworld-sr-x4-64-bsrgan-psnr'), size_on_disk=48461065, nb_files=3, revisions=frozenset({CachedRevisionInfo(commit_hash='bb13f02e45e88d00b6c202b3fbe6a181af144606', snapshot_path=PosixPath('/Users/andrewmayes/.cache/huggingface/hub/models--caidas--swin2SR-realworld-sr-x4-64-bsrgan-psnr/snapshots/bb13f02e45e88d00b6c202b3fbe6a181af144606'), size_on_disk=48461065, files=frozenset({CachedFileInfo(file_name='config.json', file_path=PosixPath('/Users/andrewmayes/.cache/huggingface/hub/models--caidas--swin2SR-realworld-sr-x4-64-bsrgan-psnr/snapshots/bb13f02e45e88d00b6c202b3fbe6a181af144606/config.json'), blob_path=PosixPath('/Users/andrewmayes/.cache/huggingface/hub/models--caidas--swin2SR-realworld-sr-x4-64-bsrgan-psnr/blobs/0a15b8aeffe63d67948215a81d191fd8190f16be'), size_on_disk=772, blob_last_accessed=1722954840.0557656, blob_last_modified=1722954840.043787), CachedFileInfo(file_name='preprocessor_config.json', file_path=PosixPath('/Users/andrewmayes/.cache/huggingface/hub/models--caidas--swin2SR-realworld-sr-x4-64-bsrgan-psnr/snapshots/bb13f02e45e88d00b6c202b3fbe6a181af144606/preprocessor_config.json'), blob_path=PosixPath('/Users/andrewmayes/.cache/huggingface/hub/models--caidas--swin2SR-realworld-sr-x4-64-bsrgan-psnr/blobs/539dbfb6265f0ece81a881579565e88b90668fc4'), size_on_disk=152, blob_last_accessed=1722954839.8127632, blob_last_modified=1722954839.8014247), CachedFileInfo(file_name='pytorch_model.bin', file_path=PosixPath('/Users/andrewmayes/.cache/huggingface/hub/models--caidas--swin2SR-realworld-sr-x4-64-bsrgan-psnr/snapshots/bb13f02e45e88d00b6c202b3fbe6a181af144606/pytorch_model.bin'), blob_path=PosixPath('/Users/andrewmayes/.cache/huggingface/hub/models--caidas--swin2SR-realworld-sr-x4-64-bsrgan-psnr/blobs/4a5f52a20932085557ed115f87c0ee8385e12f2719108c0dfd38c64aedea4710'), size_on_disk=48460141, blob_last_accessed=1722954848.1445184, blob_last_modified=1722954848.0298514)}), refs=frozenset({'main'}), last_modified=1722954848.0298514)}), last_accessed=1722954848.1445184, last_modified=1722954848.0298514),\n", " CachedRepoInfo(repo_id='facebook/bart-large', repo_type='model', repo_path=PosixPath('/Users/andrewmayes/.cache/huggingface/hub/models--facebook--bart-large'), size_on_disk=1628, nb_files=1, revisions=frozenset({CachedRevisionInfo(commit_hash='cb48c1365bd826bd521f650dc2e0940aee54720c', snapshot_path=PosixPath('/Users/andrewmayes/.cache/huggingface/hub/models--facebook--bart-large/snapshots/cb48c1365bd826bd521f650dc2e0940aee54720c'), size_on_disk=1628, files=frozenset({CachedFileInfo(file_name='config.json', file_path=PosixPath('/Users/andrewmayes/.cache/huggingface/hub/models--facebook--bart-large/snapshots/cb48c1365bd826bd521f650dc2e0940aee54720c/config.json'), blob_path=PosixPath('/Users/andrewmayes/.cache/huggingface/hub/models--facebook--bart-large/blobs/79568cb2491a1a4da49f32fb723018158c222712'), size_on_disk=1628, blob_last_accessed=1722754758.8173473, blob_last_modified=1722754758.8058388)}), refs=frozenset({'main'}), last_modified=1722754758.8058388)}), last_accessed=1722754758.8173473, last_modified=1722754758.8058388),\n", " CachedRepoInfo(repo_id='facebook/sam2-hiera-base-plus', repo_type='model', repo_path=PosixPath('/Users/andrewmayes/.cache/huggingface/hub/models--facebook--sam2-hiera-base-plus'), size_on_disk=323493298, nb_files=1, revisions=frozenset({CachedRevisionInfo(commit_hash='9bcec0ee2dcc1b6ae4b1674e2ed51ec71d2d31d9', snapshot_path=PosixPath('/Users/andrewmayes/.cache/huggingface/hub/models--facebook--sam2-hiera-base-plus/snapshots/9bcec0ee2dcc1b6ae4b1674e2ed51ec71d2d31d9'), size_on_disk=323493298, files=frozenset({CachedFileInfo(file_name='sam2_hiera_base_plus.pt', file_path=PosixPath('/Users/andrewmayes/.cache/huggingface/hub/models--facebook--sam2-hiera-base-plus/snapshots/9bcec0ee2dcc1b6ae4b1674e2ed51ec71d2d31d9/sam2_hiera_base_plus.pt'), blob_path=PosixPath('/Users/andrewmayes/.cache/huggingface/hub/models--facebook--sam2-hiera-base-plus/blobs/d0bb7f236400a49669ffdd1be617959a8b1d1065081789d7bbff88eded3a8071'), size_on_disk=323493298, blob_last_accessed=1723985664.6263692, blob_last_modified=1723985638.2220697)}), refs=frozenset({'main'}), last_modified=1723985638.2220697)}), last_accessed=1723985664.6263692, last_modified=1723985638.2220697),\n", " CachedRepoInfo(repo_id='facebook/sam2-hiera-large', repo_type='model', repo_path=PosixPath('/Users/andrewmayes/.cache/huggingface/hub/models--facebook--sam2-hiera-large'), size_on_disk=897952466, nb_files=1, revisions=frozenset({CachedRevisionInfo(commit_hash='eba9be237c463eb950e64b65c223ad55c878c2ac', snapshot_path=PosixPath('/Users/andrewmayes/.cache/huggingface/hub/models--facebook--sam2-hiera-large/snapshots/eba9be237c463eb950e64b65c223ad55c878c2ac'), size_on_disk=897952466, files=frozenset({CachedFileInfo(file_name='sam2_hiera_large.pt', file_path=PosixPath('/Users/andrewmayes/.cache/huggingface/hub/models--facebook--sam2-hiera-large/snapshots/eba9be237c463eb950e64b65c223ad55c878c2ac/sam2_hiera_large.pt'), blob_path=PosixPath('/Users/andrewmayes/.cache/huggingface/hub/models--facebook--sam2-hiera-large/blobs/7442e4e9b732a508f80e141e7c2913437a3610ee0c77381a66658c3a445df87b'), size_on_disk=897952466, blob_last_accessed=1723985746.4751956, blob_last_modified=1723985745.5689125)}), refs=frozenset({'main'}), last_modified=1723985745.5689125)}), last_accessed=1723985746.4751956, last_modified=1723985745.5689125),\n", " CachedRepoInfo(repo_id='microsoft/Florence-2-large-ft', repo_type='model', repo_path=PosixPath('/Users/andrewmayes/.cache/huggingface/hub/models--microsoft--Florence-2-large-ft'), size_on_disk=2647748, nb_files=8, revisions=frozenset({CachedRevisionInfo(commit_hash='bb44b80c15e943b1bf7cec6e076359cec6e40178', snapshot_path=PosixPath('/Users/andrewmayes/.cache/huggingface/hub/models--microsoft--Florence-2-large-ft/snapshots/bb44b80c15e943b1bf7cec6e076359cec6e40178'), size_on_disk=2647748, files=frozenset({CachedFileInfo(file_name='processing_florence2.py', file_path=PosixPath('/Users/andrewmayes/.cache/huggingface/hub/models--microsoft--Florence-2-large-ft/snapshots/bb44b80c15e943b1bf7cec6e076359cec6e40178/processing_florence2.py'), blob_path=PosixPath('/Users/andrewmayes/.cache/huggingface/hub/models--microsoft--Florence-2-large-ft/blobs/538110e8fd421258847d317cb62c40b9671d07a9'), size_on_disk=46372, blob_last_accessed=1722187335.8618798, blob_last_modified=1722187335.854595), CachedFileInfo(file_name='tokenizer_config.json', file_path=PosixPath('/Users/andrewmayes/.cache/huggingface/hub/models--microsoft--Florence-2-large-ft/snapshots/bb44b80c15e943b1bf7cec6e076359cec6e40178/tokenizer_config.json'), blob_path=PosixPath('/Users/andrewmayes/.cache/huggingface/hub/models--microsoft--Florence-2-large-ft/blobs/44784bc58d4cb18d3549ad71e062efcf032d9ef5'), size_on_disk=34, blob_last_accessed=1722187335.5466971, blob_last_modified=1722187334.7324762), CachedFileInfo(file_name='config.json', file_path=PosixPath('/Users/andrewmayes/.cache/huggingface/hub/models--microsoft--Florence-2-large-ft/snapshots/bb44b80c15e943b1bf7cec6e076359cec6e40178/config.json'), blob_path=PosixPath('/Users/andrewmayes/.cache/huggingface/hub/models--microsoft--Florence-2-large-ft/blobs/bff93d862796105c8cf1a0b3331ad3bec68aee91'), size_on_disk=2445, blob_last_accessed=1722186181.1469133, blob_last_modified=1722186180.799109), CachedFileInfo(file_name='vocab.json', file_path=PosixPath('/Users/andrewmayes/.cache/huggingface/hub/models--microsoft--Florence-2-large-ft/snapshots/bb44b80c15e943b1bf7cec6e076359cec6e40178/vocab.json'), blob_path=PosixPath('/Users/andrewmayes/.cache/huggingface/hub/models--microsoft--Florence-2-large-ft/blobs/94a2f4fd50e976bda926c700291522ea1a79323f'), size_on_disk=1099884, blob_last_accessed=1722187336.2418828, blob_last_modified=1722187336.7108266), CachedFileInfo(file_name='modeling_florence2.py', file_path=PosixPath('/Users/andrewmayes/.cache/huggingface/hub/models--microsoft--Florence-2-large-ft/snapshots/bb44b80c15e943b1bf7cec6e076359cec6e40178/modeling_florence2.py'), blob_path=PosixPath('/Users/andrewmayes/.cache/huggingface/hub/models--microsoft--Florence-2-large-ft/blobs/e5ee65134d1a5e98357f8d500c9b9af5f8c00a08'), size_on_disk=127219, blob_last_accessed=1722225017.2661808, blob_last_modified=1722225017.1880703), CachedFileInfo(file_name='configuration_florence2.py', file_path=PosixPath('/Users/andrewmayes/.cache/huggingface/hub/models--microsoft--Florence-2-large-ft/snapshots/bb44b80c15e943b1bf7cec6e076359cec6e40178/configuration_florence2.py'), blob_path=PosixPath('/Users/andrewmayes/.cache/huggingface/hub/models--microsoft--Florence-2-large-ft/blobs/622f74997c5612ff68d0e55063714f291d159166'), size_on_disk=15125, blob_last_accessed=1722187334.9981484, blob_last_modified=1722187334.9932766), CachedFileInfo(file_name='tokenizer.json', file_path=PosixPath('/Users/andrewmayes/.cache/huggingface/hub/models--microsoft--Florence-2-large-ft/snapshots/bb44b80c15e943b1bf7cec6e076359cec6e40178/tokenizer.json'), blob_path=PosixPath('/Users/andrewmayes/.cache/huggingface/hub/models--microsoft--Florence-2-large-ft/blobs/ad0bcbeb288f0d1373d88e0762e66357f55b8311'), size_on_disk=1355863, blob_last_accessed=1722187337.8523662, blob_last_modified=1722187337.4607415), CachedFileInfo(file_name='preprocessor_config.json', file_path=PosixPath('/Users/andrewmayes/.cache/huggingface/hub/models--microsoft--Florence-2-large-ft/snapshots/bb44b80c15e943b1bf7cec6e076359cec6e40178/preprocessor_config.json'), blob_path=PosixPath('/Users/andrewmayes/.cache/huggingface/hub/models--microsoft--Florence-2-large-ft/blobs/85cd7be3568df661ad536b6ab20d59b08ba079ae'), size_on_disk=806, blob_last_accessed=1722187335.9961612, blob_last_modified=1722187335.4969347)}), refs=frozenset({'main'}), last_modified=1722225017.1880703)}), last_accessed=1722225017.2661808, last_modified=1722225017.1880703),\n", " CachedRepoInfo(repo_id='microsoft/swinv2-base-patch4-window16-256', repo_type='model', repo_path=PosixPath('/Users/andrewmayes/.cache/huggingface/hub/models--microsoft--swinv2-base-patch4-window16-256'), size_on_disk=351904021, nb_files=3, revisions=frozenset({CachedRevisionInfo(commit_hash='628b75ababc4dad9f5bbabc1bf8bb612c4ab2f78', snapshot_path=PosixPath('/Users/andrewmayes/.cache/huggingface/hub/models--microsoft--swinv2-base-patch4-window16-256/snapshots/628b75ababc4dad9f5bbabc1bf8bb612c4ab2f78'), size_on_disk=351904021, files=frozenset({CachedFileInfo(file_name='config.json', file_path=PosixPath('/Users/andrewmayes/.cache/huggingface/hub/models--microsoft--swinv2-base-patch4-window16-256/snapshots/628b75ababc4dad9f5bbabc1bf8bb612c4ab2f78/config.json'), blob_path=PosixPath('/Users/andrewmayes/.cache/huggingface/hub/models--microsoft--swinv2-base-patch4-window16-256/blobs/9f6070ac05bf6f561f789e8699a4dc387df58724'), size_on_disk=69910, blob_last_accessed=1722848474.6562126, blob_last_modified=1722848474.6428308), CachedFileInfo(file_name='preprocessor_config.json', file_path=PosixPath('/Users/andrewmayes/.cache/huggingface/hub/models--microsoft--swinv2-base-patch4-window16-256/snapshots/628b75ababc4dad9f5bbabc1bf8bb612c4ab2f78/preprocessor_config.json'), blob_path=PosixPath('/Users/andrewmayes/.cache/huggingface/hub/models--microsoft--swinv2-base-patch4-window16-256/blobs/fb816e3190d8ed24279c9975f45efeb660493c61'), size_on_disk=240, blob_last_accessed=1722847982.5025482, blob_last_modified=1722847982.4988532), CachedFileInfo(file_name='pytorch_model.bin', file_path=PosixPath('/Users/andrewmayes/.cache/huggingface/hub/models--microsoft--swinv2-base-patch4-window16-256/snapshots/628b75ababc4dad9f5bbabc1bf8bb612c4ab2f78/pytorch_model.bin'), blob_path=PosixPath('/Users/andrewmayes/.cache/huggingface/hub/models--microsoft--swinv2-base-patch4-window16-256/blobs/c9307c9aa168a730c370d472783ae8274408a059e95245e0d7fcf1a1d91cf9aa'), size_on_disk=351833871, blob_last_accessed=1723624967.1287704, blob_last_modified=1722848484.9202104)}), refs=frozenset({'main'}), last_modified=1722848484.9202104)}), last_accessed=1723624967.1287704, last_modified=1722848484.9202104),\n", " CachedRepoInfo(repo_id='thanhhau097/swin2SR-realworld-sr-x4-64-bsrgan-psnr', repo_type='model', repo_path=PosixPath('/Users/andrewmayes/.cache/huggingface/hub/models--thanhhau097--swin2SR-realworld-sr-x4-64-bsrgan-psnr'), size_on_disk=48456429, nb_files=3, revisions=frozenset({CachedRevisionInfo(commit_hash='e345b33f8e7e14b0dce731505234a8425412e343', snapshot_path=PosixPath('/Users/andrewmayes/.cache/huggingface/hub/models--thanhhau097--swin2SR-realworld-sr-x4-64-bsrgan-psnr/snapshots/e345b33f8e7e14b0dce731505234a8425412e343'), size_on_disk=48456429, files=frozenset({CachedFileInfo(file_name='config.json', file_path=PosixPath('/Users/andrewmayes/.cache/huggingface/hub/models--thanhhau097--swin2SR-realworld-sr-x4-64-bsrgan-psnr/snapshots/e345b33f8e7e14b0dce731505234a8425412e343/config.json'), blob_path=PosixPath('/Users/andrewmayes/.cache/huggingface/hub/models--thanhhau097--swin2SR-realworld-sr-x4-64-bsrgan-psnr/blobs/0a15b8aeffe63d67948215a81d191fd8190f16be'), size_on_disk=772, blob_last_accessed=1722954764.2667823, blob_last_modified=1722954764.2559414), CachedFileInfo(file_name='preprocessor_config.json', file_path=PosixPath('/Users/andrewmayes/.cache/huggingface/hub/models--thanhhau097--swin2SR-realworld-sr-x4-64-bsrgan-psnr/snapshots/e345b33f8e7e14b0dce731505234a8425412e343/preprocessor_config.json'), blob_path=PosixPath('/Users/andrewmayes/.cache/huggingface/hub/models--thanhhau097--swin2SR-realworld-sr-x4-64-bsrgan-psnr/blobs/539dbfb6265f0ece81a881579565e88b90668fc4'), size_on_disk=152, blob_last_accessed=1722954763.868174, blob_last_modified=1722954763.8569045), CachedFileInfo(file_name='pytorch_model.bin', file_path=PosixPath('/Users/andrewmayes/.cache/huggingface/hub/models--thanhhau097--swin2SR-realworld-sr-x4-64-bsrgan-psnr/snapshots/e345b33f8e7e14b0dce731505234a8425412e343/pytorch_model.bin'), blob_path=PosixPath('/Users/andrewmayes/.cache/huggingface/hub/models--thanhhau097--swin2SR-realworld-sr-x4-64-bsrgan-psnr/blobs/91b0a2ca989b9c4e5a91124f67f552741594fd1bd41e3114d65a316d36f45e60'), size_on_disk=48455505, blob_last_accessed=1722954768.0409808, blob_last_modified=1722954767.9221504)}), refs=frozenset({'main'}), last_modified=1722954767.9221504)}), last_accessed=1722954768.0409808, last_modified=1722954767.9221504)})" ] }, "execution_count": 15, "metadata": {}, "output_type": "execute_result" } ], "source": [ "scan_cache_dir().repos" ] }, { "cell_type": "code", "execution_count": 17, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "facebook/sam2-hiera-base-plus\n", "caidas/swin2SR-realworld-sr-x4-64-bsrgan-psnr\n", "amaye15/receipts\n", "amaye15/DaViT-Florence-2-large-ft\n", "amaye15/tmp\n", "amaye15/Products-10k\n", "amaye15/invoices\n", "microsoft/Florence-2-large-ft\n", "microsoft/swinv2-base-patch4-window16-256\n", "thanhhau097/swin2SR-realworld-sr-x4-64-bsrgan-psnr\n", "amaye15/SwinV2-Base-Document-Classifier\n", "amaye15/DaViT\n", "facebook/sam2-hiera-large\n", "facebook/bart-large\n", "amaye15/NSFW\n" ] } ], "source": [ "for r in repo_info:\n", " #if r.repo_n == DS_NAME:\n", "\n", " print(r.repo_id)" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [ { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "825736a8246f4fb593f4847c5c2268b6", "version_major": 2, "version_minor": 0 }, "text/plain": [ "Downloading readme: 0%| | 0.00/5.24k [00:00 1\u001b[0m \u001b[43mds\u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mtrain\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m]\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mcleanup_cache_files\u001b[49m()\n", "\u001b[0;31mAttributeError\u001b[0m: 'IterableDataset' object has no attribute 'cleanup_cache_files'" ] } ], "source": [ "ds[\"train\"].cleanup_cache_files()" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "False" ] }, "execution_count": 3, "metadata": {}, "output_type": "execute_result" } ], "source": [ "os.path.exists(os.path.join(os.getcwd(), \"data\"))" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "Dataset({\n", " features: ['image', 'masked_image', 'mask'],\n", " num_rows: 37\n", "})" ] }, "execution_count": 2, "metadata": {}, "output_type": "execute_result" } ], "source": [ "ds_processed" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [ { "ename": "AttributeError", "evalue": "type object 'DatasetDict' has no attribute 'get_cache_files_size'", "output_type": "error", "traceback": [ "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", "\u001b[0;31mAttributeError\u001b[0m Traceback (most recent call last)", "Cell \u001b[0;32mIn[2], line 4\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mdatasets\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m DatasetDict\n\u001b[1;32m 3\u001b[0m \u001b[38;5;66;03m# Get the cache size\u001b[39;00m\n\u001b[0;32m----> 4\u001b[0m cache_size \u001b[38;5;241m=\u001b[39m \u001b[43mDatasetDict\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mget_cache_files_size\u001b[49m()\n\u001b[1;32m 6\u001b[0m cache_size\n", "\u001b[0;31mAttributeError\u001b[0m: type object 'DatasetDict' has no attribute 'get_cache_files_size'" ] } ], "source": [ "from datasets import DatasetDict\n", "\n", "# Get the cache size\n", "cache_size = DatasetDict.get_cache_files_size()\n", "\n", "cache_size" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "env", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.12.4" } }, "nbformat": 4, "nbformat_minor": 2 }