soeren
take all datasets
a36c062
raw
history blame
1.05 kB
import pickle
import datasets
import os
import umap
#force restart
if __name__ == "__main__":
cache_file = "dataset_cache.pkl"
if os.path.exists(cache_file):
# Load dataset from cache
with open(cache_file, "rb") as file:
dataset = pickle.load(file)
print("Dataset loaded from cache.")
else:
# Load dataset using datasets.load_dataset()
ds_train = datasets.load_dataset("renumics/speech_commands_enriched", split="train")
ds_test = datasets.load_dataset("renumics/speech_commands_enriched", split="test")
ds_validation = datasets.load_dataset("renumics/speech_commands_enriched", split="validation")
joined_dataset_enrichment = datasets.concatenate_datasets([ds_train, ds_validation, ds_test])
print("Dataset loaded using datasets.load_dataset().")
df = joined_dataset_enrichment.to_pandas()
# Save dataset to cache
with open(cache_file, "wb") as file:
pickle.dump(df, file)
print("Dataset saved to cache.")