Yhhxhfh commited on
Commit
7e26346
1 Parent(s): a9bd469

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +5 -5
app.py CHANGED
@@ -14,9 +14,9 @@ tokenizer = GPT2Tokenizer.from_pretrained(model_name)
14
  model = GPT2LMHeadModel.from_pretrained(model_name)
15
 
16
  # Cargar datasets y mantener todo en RAM
17
- dataset_humanizado = load_dataset('daily_dialog', split='train')
18
- dataset_codigo = load_dataset('code_search_net', split='train')
19
- dataset_prompts = load_dataset('openai_humaneval', split='train')
20
 
21
  combined_dataset = concatenate_datasets([
22
  dataset_humanizado,
@@ -31,7 +31,7 @@ def tokenize_function(examples):
31
  tokenized_dataset = combined_dataset.map(tokenize_function, batched=True)
32
 
33
  training_args = TrainingArguments(
34
- output_dir='./results', # Puede ser usado para guardar resultados, pero no es necesario en RAM
35
  per_device_train_batch_size=100,
36
  per_device_eval_batch_size=100,
37
  num_train_epochs=0,
@@ -68,4 +68,4 @@ def run_training():
68
  run_training()
69
 
70
  import shutil
71
- shutil.rmtree('./results', ignore_errors=True) # Limpiar si es necesario, aunque puede no ser requerido si todo está en RAM
 
14
  model = GPT2LMHeadModel.from_pretrained(model_name)
15
 
16
  # Cargar datasets y mantener todo en RAM
17
+ dataset_humanizado = load_dataset('daily_dialog', split='train', trust_remote_code=True)
18
+ dataset_codigo = load_dataset('code_search_net', split='train', trust_remote_code=True)
19
+ dataset_prompts = load_dataset('openai_humaneval', split='train', trust_remote_code=True)
20
 
21
  combined_dataset = concatenate_datasets([
22
  dataset_humanizado,
 
31
  tokenized_dataset = combined_dataset.map(tokenize_function, batched=True)
32
 
33
  training_args = TrainingArguments(
34
+ output_dir='./results',
35
  per_device_train_batch_size=100,
36
  per_device_eval_batch_size=100,
37
  num_train_epochs=0,
 
68
  run_training()
69
 
70
  import shutil
71
+ shutil.rmtree('./results', ignore_errors=True)