AIenergy / app.py
janushex's picture
Update app.py
fb5fcd3 verified
import torch
import json
import os
from transformers import T5Tokenizer, T5ForConditionalGeneration, Trainer, TrainingArguments, DataCollatorForSeq2Seq
from datasets import Dataset
# ✅ 1. Ielādē treniņa datus
with open("train.json", "r", encoding="utf-8") as f:
train_data = json.load(f)
# ✅ 2. Pārveido datus Hugging Face dataset formātā
dataset = Dataset.from_list(
[{"input_text": d["jautājums"], "target_text": d["atbilde"]} for d in train_data]
)
# ✅ 3. Sadala datus treniņam un testam (80% treniņam, 20% testam)
train_test_split = dataset.train_test_split(test_size=0.2)
train_dataset = train_test_split["train"]
eval_dataset = train_test_split["test"]
# ✅ 4. Izvēlas modeli (`mT5-small` vai citu)
model_name = "google/mt5-small"
device = "cuda" if torch.cuda.is_available() else "cpu"
tokenizer = T5Tokenizer.from_pretrained(model_name)
model = T5ForConditionalGeneration.from_pretrained(model_name).to(device)
# ✅ 5. Sagatavo datus treniņam
def preprocess_data(examples):
model_inputs = tokenizer(examples["input_text"], max_length=128, truncation=True)
labels = tokenizer(examples["target_text"], max_length=128, truncation=True)
model_inputs["labels"] = labels["input_ids"]
return model_inputs
train_dataset = train_dataset.map(preprocess_data, batched=True)
eval_dataset = eval_dataset.map(preprocess_data, batched=True)
# ✅ 6. Definē treniņa parametrus
training_args = TrainingArguments(
output_dir="./trained_model",
evaluation_strategy="epoch",
save_strategy="epoch",
logging_strategy="epoch",
learning_rate=5e-5,
per_device_train_batch_size=4,
per_device_eval_batch_size=4,
num_train_epochs=3,
weight_decay=0.01,
push_to_hub=False, # ✅ Pārvērties True, ja vēlies augšupielādēt Hugging Face Hub
)
data_collator = DataCollatorForSeq2Seq(tokenizer, model=model)
# ✅ 7. Izveidot `Trainer` klasi
trainer = Trainer(
model=model,
args=training_args,
train_dataset=train_dataset,
eval_dataset=eval_dataset,
tokenizer=tokenizer,
data_collator=data_collator,
)
# ✅ 8. Sāc AI apmācību
trainer.train()
# ✅ 9. Saglabā trenēto modeli
model.save_pretrained("./trained_model")
tokenizer.save_pretrained("./trained_model")
# ✅ 10. Novērtē AI precizitāti pēc apmācības
results = trainer.evaluate()
print("📊 Testēšanas rezultāti:", results)
# ✅ 11. Ja vēlies augšupielādēt AI uz Hugging Face Hub
# (Pirms tam jāpieslēdzas Hugging Face ar `notebook_login()`)
# trainer.push_to_hub("lietotajsvards/elektro-ai")