Update app.py
Browse files
app.py
CHANGED
@@ -9,9 +9,11 @@ with open("train.json", "r", encoding="utf-8") as f:
|
|
9 |
train_data = json.load(f)
|
10 |
|
11 |
# ✅ 2. Pārveido datus Hugging Face dataset formātā
|
12 |
-
dataset = Dataset.from_list(
|
|
|
|
|
13 |
|
14 |
-
# ✅ 3. Sadala datus treniņam un testam (80%
|
15 |
train_test_split = dataset.train_test_split(test_size=0.2)
|
16 |
train_dataset = train_test_split["train"]
|
17 |
eval_dataset = train_test_split["test"]
|
@@ -36,27 +38,27 @@ eval_dataset = eval_dataset.map(preprocess_data, batched=True)
|
|
36 |
# ✅ 6. Definē treniņa parametrus
|
37 |
training_args = TrainingArguments(
|
38 |
output_dir="./trained_model",
|
39 |
-
evaluation_strategy="epoch",
|
40 |
-
save_strategy="epoch",
|
41 |
logging_strategy="epoch",
|
42 |
learning_rate=5e-5,
|
43 |
per_device_train_batch_size=4,
|
44 |
per_device_eval_batch_size=4,
|
45 |
-
num_train_epochs=3,
|
46 |
weight_decay=0.01,
|
47 |
-
push_to_hub=False # ✅ Pārvērties
|
48 |
)
|
49 |
|
50 |
data_collator = DataCollatorForSeq2Seq(tokenizer, model=model)
|
51 |
|
52 |
-
# ✅ 7.
|
53 |
trainer = Trainer(
|
54 |
model=model,
|
55 |
args=training_args,
|
56 |
train_dataset=train_dataset,
|
57 |
-
eval_dataset=eval_dataset,
|
58 |
tokenizer=tokenizer,
|
59 |
-
data_collator=data_collator
|
60 |
)
|
61 |
|
62 |
# ✅ 8. Sāc AI apmācību
|
@@ -70,7 +72,8 @@ tokenizer.save_pretrained("./trained_model")
|
|
70 |
results = trainer.evaluate()
|
71 |
print("📊 Testēšanas rezultāti:", results)
|
72 |
|
73 |
-
# ✅ 11. Ja vēlies
|
74 |
# (Pirms tam jāpieslēdzas Hugging Face ar `notebook_login()`)
|
75 |
-
trainer.push_to_hub("lietotajsvards/elektro-ai")
|
|
|
76 |
|
|
|
9 |
train_data = json.load(f)
|
10 |
|
11 |
# ✅ 2. Pārveido datus Hugging Face dataset formātā
|
12 |
+
dataset = Dataset.from_list(
|
13 |
+
[{"input_text": d["jautājums"], "target_text": d["atbilde"]} for d in train_data]
|
14 |
+
)
|
15 |
|
16 |
+
# ✅ 3. Sadala datus treniņam un testam (80% treniņam, 20% testam)
|
17 |
train_test_split = dataset.train_test_split(test_size=0.2)
|
18 |
train_dataset = train_test_split["train"]
|
19 |
eval_dataset = train_test_split["test"]
|
|
|
38 |
# ✅ 6. Definē treniņa parametrus
|
39 |
training_args = TrainingArguments(
|
40 |
output_dir="./trained_model",
|
41 |
+
evaluation_strategy="epoch",
|
42 |
+
save_strategy="epoch",
|
43 |
logging_strategy="epoch",
|
44 |
learning_rate=5e-5,
|
45 |
per_device_train_batch_size=4,
|
46 |
per_device_eval_batch_size=4,
|
47 |
+
num_train_epochs=3,
|
48 |
weight_decay=0.01,
|
49 |
+
push_to_hub=False, # ✅ Pārvērties True, ja vēlies augšupielādēt Hugging Face Hub
|
50 |
)
|
51 |
|
52 |
data_collator = DataCollatorForSeq2Seq(tokenizer, model=model)
|
53 |
|
54 |
+
# ✅ 7. Izveidot `Trainer` klasi
|
55 |
trainer = Trainer(
|
56 |
model=model,
|
57 |
args=training_args,
|
58 |
train_dataset=train_dataset,
|
59 |
+
eval_dataset=eval_dataset,
|
60 |
tokenizer=tokenizer,
|
61 |
+
data_collator=data_collator,
|
62 |
)
|
63 |
|
64 |
# ✅ 8. Sāc AI apmācību
|
|
|
72 |
results = trainer.evaluate()
|
73 |
print("📊 Testēšanas rezultāti:", results)
|
74 |
|
75 |
+
# ✅ 11. Ja vēlies augšupielādēt AI uz Hugging Face Hub
|
76 |
# (Pirms tam jāpieslēdzas Hugging Face ar `notebook_login()`)
|
77 |
+
# trainer.push_to_hub("lietotajsvards/elektro-ai")
|
78 |
+
|
79 |
|