from src.dataset.GoodDataset import * import argparse def main(config): """ Main function to process the dataset and save it as a CSV file. Args: config: Namespace object containing the script arguments. """ # Initialize the dataset dataset = AugmentedDataset() positive_samples = dataset.fetch_positive_samples_parallel( num_samples=config.size, random=config.random, seed=config.seed, full=config.full ) dataset.save(config.output) if __name__ == "__main__": # Parse command-line arguments from src.utils.io_utils import PROJECT_ROOT parser = argparse.ArgumentParser(description="Generate and save a dataset based on the given configuration.") parser.add_argument("-s", "--size", type=int, default=10, help="Number of samples to generate.") parser.add_argument("-r", "--random", type=bool, default=True, help="Whether to sample randomly.") parser.add_argument("--seed", type=int, default=42, help="Random seed for reproducibility.") parser.add_argument("--full", action="store_true", help="Boolean flag to indicate full dataset mode.") parser.add_argument("-o", "--output", type=str, default=os.path.join(PROJECT_ROOT, "data/dataset.pkl"), help="Output file path to save the dataset as a CSV.") # Parse the arguments and pass to the main function config = parser.parse_args() main(config)