Update app.py
Browse files
app.py
CHANGED
@@ -14,9 +14,9 @@ tokenizer = GPT2Tokenizer.from_pretrained(model_name)
|
|
14 |
model = GPT2LMHeadModel.from_pretrained(model_name)
|
15 |
|
16 |
# Cargar datasets y mantener todo en RAM
|
17 |
-
dataset_humanizado = load_dataset('daily_dialog', split='train')
|
18 |
-
dataset_codigo = load_dataset('code_search_net', split='train')
|
19 |
-
dataset_prompts = load_dataset('openai_humaneval', split='train')
|
20 |
|
21 |
combined_dataset = concatenate_datasets([
|
22 |
dataset_humanizado,
|
@@ -31,7 +31,7 @@ def tokenize_function(examples):
|
|
31 |
tokenized_dataset = combined_dataset.map(tokenize_function, batched=True)
|
32 |
|
33 |
training_args = TrainingArguments(
|
34 |
-
output_dir='./results',
|
35 |
per_device_train_batch_size=100,
|
36 |
per_device_eval_batch_size=100,
|
37 |
num_train_epochs=0,
|
@@ -68,4 +68,4 @@ def run_training():
|
|
68 |
run_training()
|
69 |
|
70 |
import shutil
|
71 |
-
shutil.rmtree('./results', ignore_errors=True)
|
|
|
14 |
model = GPT2LMHeadModel.from_pretrained(model_name)
|
15 |
|
16 |
# Cargar datasets y mantener todo en RAM
|
17 |
+
dataset_humanizado = load_dataset('daily_dialog', split='train', trust_remote_code=True)
|
18 |
+
dataset_codigo = load_dataset('code_search_net', split='train', trust_remote_code=True)
|
19 |
+
dataset_prompts = load_dataset('openai_humaneval', split='train', trust_remote_code=True)
|
20 |
|
21 |
combined_dataset = concatenate_datasets([
|
22 |
dataset_humanizado,
|
|
|
31 |
tokenized_dataset = combined_dataset.map(tokenize_function, batched=True)
|
32 |
|
33 |
training_args = TrainingArguments(
|
34 |
+
output_dir='./results',
|
35 |
per_device_train_batch_size=100,
|
36 |
per_device_eval_batch_size=100,
|
37 |
num_train_epochs=0,
|
|
|
68 |
run_training()
|
69 |
|
70 |
import shutil
|
71 |
+
shutil.rmtree('./results', ignore_errors=True)
|