|
import glob |
|
import os |
|
import shutil |
|
from sklearn.model_selection import train_test_split |
|
|
|
|
|
DATA_DIR = "/raid/DFUC24/datasets/dfuc2022/clean_thr0/" |
|
IMAGE_DIR = os.path.join(DATA_DIR, "images/") |
|
MASK_DIR = os.path.join(DATA_DIR, "labels/") |
|
|
|
|
|
image_files = sorted(glob.glob(os.path.join(IMAGE_DIR, '*.png'))) |
|
mask_files = sorted(glob.glob(os.path.join(MASK_DIR, '*.png'))) |
|
|
|
|
|
train_image_files, valid_image_files, train_mask_files, valid_mask_files = train_test_split( |
|
image_files, mask_files, test_size=0.15, random_state=42 |
|
) |
|
|
|
|
|
SEP_DIR = "/raid/DFUC24/datasets/dfuc2022/sep/" |
|
os.makedirs(os.path.join(SEP_DIR, 'images/train'), exist_ok=True) |
|
os.makedirs(os.path.join(SEP_DIR, 'images/valid'), exist_ok=True) |
|
os.makedirs(os.path.join(SEP_DIR, 'labels/train'), exist_ok=True) |
|
os.makedirs(os.path.join(SEP_DIR, 'labels/valid'), exist_ok=True) |
|
|
|
|
|
def copy_files(files, destination_dir): |
|
for file in files: |
|
file_name = os.path.basename(file) |
|
dest_path = os.path.join(destination_dir, file_name) |
|
shutil.copy(file, dest_path) |
|
|
|
|
|
copy_files(train_image_files, os.path.join(SEP_DIR, 'images/train')) |
|
copy_files(valid_image_files, os.path.join(SEP_DIR, 'images/valid')) |
|
copy_files(train_mask_files, os.path.join(SEP_DIR, 'labels/train')) |
|
copy_files(valid_mask_files, os.path.join(SEP_DIR, 'labels/valid')) |
|
|
|
print("DFUC22 dataset have been split and files have been copied successfully.") |