|
import os |
|
import zipfile |
|
|
|
|
|
total_files_count = 0 |
|
contains_all_files_count = 0 |
|
missing_files_count = 0 |
|
missing_files_info = [] |
|
|
|
def check_zip_contents(zip_file_path): |
|
global total_files_count |
|
global contains_all_files_count |
|
global missing_files_count |
|
global missing_files_info |
|
|
|
|
|
total_files_count += 1 |
|
|
|
try: |
|
|
|
contains_all_files_count = 0 |
|
with zipfile.ZipFile(zip_file_path, 'r') as zf: |
|
|
|
file_list = [os.path.normpath(item) for item in zf.namelist()] |
|
|
|
|
|
zip_file_name = os.path.splitext(os.path.basename(zip_file_path))[0] |
|
|
|
|
|
expected_folder = os.path.normpath(zip_file_name) |
|
expected_files = ['config.json', 'generation_config.json', 'pytorch_model.bin', 'source.spm', 'target.spm', 'tokenizer_config.json', 'vocab.json'] |
|
|
|
|
|
if expected_folder not in file_list: |
|
missing_files_count += 1 |
|
missing_files_info.append(f"{zip_file_name} does not contain the expected folder.\n") |
|
return |
|
|
|
|
|
missing_files = [] |
|
for expected_file in expected_files: |
|
file_path = os.path.join(expected_folder, expected_file) |
|
if file_path not in file_list: |
|
missing_files.append(expected_file) |
|
|
|
if not missing_files: |
|
contains_all_files_count += 1 |
|
else: |
|
missing_files_count += 1 |
|
missing_files_info.append(f"{zip_file_name} is missing the following files: {', '.join(missing_files)}\n") |
|
|
|
except zipfile.BadZipFile as e: |
|
print(f"Error: {e}") |
|
|
|
print(f"File path: {zip_file_path}") |
|
|
|
|
|
|
|
|
|
folder_path = os.getcwd() |
|
|
|
|
|
zip_files = [f for f in os.listdir(folder_path) if f.endswith('.zip')] |
|
|
|
|
|
for zip_file in zip_files: |
|
zip_file_path = os.path.join(folder_path, zip_file) |
|
check_zip_contents(zip_file_path) |
|
|
|
|
|
print(f"\nNumber of ZIP files containing all files and folders: {contains_all_files_count}") |
|
|
|
|
|
for info in missing_files_info: |
|
print(info) |
|
|
|
|
|
print(f"\nProcessed {total_files_count} ZIP files") |
|
print(f"Number of files missing: {missing_files_count}") |
|
|