from transformers import Wav2Vec2ForCTC, Wav2Vec2Processor, Trainer, TrainingArguments from datasets import load_dataset # Download model model_name = "facebook/wav2vec2-base-960h" model = Wav2Vec2ForCTC.from_pretrained(model_name) processor = Wav2Vec2Processor.from_pretrained(model_name) # Load dataset (replace with your dataset) dataset = load_dataset("librispeech_asr", "clean", split="train.100") # Example dataset # Preprocess function def preprocess_function(examples): audio = examples["audio"] inputs = processor(audio["array"], sampling_rate=audio["sampling_rate"], return_tensors="pt", padding=True) with processor.as_target_processor(): labels = processor(examples["text"], return_tensors="pt", padding=True) return { "input_values": inputs["input_values"][0], "labels": labels["input_ids"][0] } train_dataset = dataset.map(preprocess_function, remove_columns=dataset.column_names) # Training arguments training_args = TrainingArguments( output_dir="./sst_finetuned", per_device_train_batch_size=8, num_train_epochs=3, save_steps=500, logging_steps=10, ) # Initialize Trainer trainer = Trainer( model=model, args=training_args, train_dataset=train_dataset, ) # Fine-tune trainer.train() # Save fine-tuned model trainer.save_model("./sst_finetuned") processor.save_pretrained("./sst_finetuned") print("SST model fine-tuned and saved to './sst_finetuned'. Upload to models/sst_model in your Space.")