BBracke
/

DFUC24_baselines

Model card Files Files and versions Community

DFUC24_baselines / DFUC22_train_validation_split.py

BBracke

init

42b3dbb verified 6 months ago

raw

history blame

1.95 kB

	import glob
	import os
	import shutil
	from sklearn.model_selection import train_test_split

	# Define the directories where images and masks are stored of the DFUC2022 dataset
	DATA_DIR = "/raid/DFUC24/datasets/dfuc2022/clean_thr0/"
	IMAGE_DIR = os.path.join(DATA_DIR, "images/")
	MASK_DIR = os.path.join(DATA_DIR, "labels/")

	# Load image and mask filenames from disk
	image_files = sorted(glob.glob(os.path.join(IMAGE_DIR, '*.png')))
	mask_files = sorted(glob.glob(os.path.join(MASK_DIR, '*.png')))

	# Split image and mask filenames into train and valid subsets using sklearn train_test_split
	train_image_files, valid_image_files, train_mask_files, valid_mask_files = train_test_split(
	image_files, mask_files, test_size=0.15, random_state=42
	) # random_state=42 for reproducibility of the split

	# Create directories for train and valid subsets
	SEP_DIR = "/raid/DFUC24/datasets/dfuc2022/sep/"
	os.makedirs(os.path.join(SEP_DIR, 'images/train'), exist_ok=True)
	os.makedirs(os.path.join(SEP_DIR, 'images/valid'), exist_ok=True)
	os.makedirs(os.path.join(SEP_DIR, 'labels/train'), exist_ok=True)
	os.makedirs(os.path.join(SEP_DIR, 'labels/valid'), exist_ok=True)

	# Function to copy files to the respective directories
	def copy_files(files, destination_dir):
	for file in files:
	file_name = os.path.basename(file) # Extract the file name from the full path
	dest_path = os.path.join(destination_dir, file_name) # Create the destination path
	shutil.copy(file, dest_path) # Copy the file to the destination

	# Copy the files to the respective train and valid directories
	copy_files(train_image_files, os.path.join(SEP_DIR, 'images/train'))
	copy_files(valid_image_files, os.path.join(SEP_DIR, 'images/valid'))
	copy_files(train_mask_files, os.path.join(SEP_DIR, 'labels/train'))
	copy_files(valid_mask_files, os.path.join(SEP_DIR, 'labels/valid'))

	print("DFUC22 dataset have been split and files have been copied successfully.")