janushex commited on
Commit
fb5fcd3
·
verified ·
1 Parent(s): 6e4ad28

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +14 -11
app.py CHANGED
@@ -9,9 +9,11 @@ with open("train.json", "r", encoding="utf-8") as f:
9
  train_data = json.load(f)
10
 
11
  # ✅ 2. Pārveido datus Hugging Face dataset formātā
12
- dataset = Dataset.from_list([{"input_text": d["question"], "target_text": d["answer"]} for d in train_data])
 
 
13
 
14
- # ✅ 3. Sadala datus treniņam un testam (80% apmācībai, 20% testam)
15
  train_test_split = dataset.train_test_split(test_size=0.2)
16
  train_dataset = train_test_split["train"]
17
  eval_dataset = train_test_split["test"]
@@ -36,27 +38,27 @@ eval_dataset = eval_dataset.map(preprocess_data, batched=True)
36
  # ✅ 6. Definē treniņa parametrus
37
  training_args = TrainingArguments(
38
  output_dir="./trained_model",
39
- evaluation_strategy="epoch", # ✅ Veic novērtēšanu pēc katras epohas
40
- save_strategy="epoch", # ✅ Saglabā modeli pēc katras epohas
41
  logging_strategy="epoch",
42
  learning_rate=5e-5,
43
  per_device_train_batch_size=4,
44
  per_device_eval_batch_size=4,
45
- num_train_epochs=3, # ✅ Cik reizes AI mācīsies
46
  weight_decay=0.01,
47
- push_to_hub=False # ✅ Pārvērties uz True, ja vēlies augšupielādēt Hugging Face Hub
48
  )
49
 
50
  data_collator = DataCollatorForSeq2Seq(tokenizer, model=model)
51
 
52
- # ✅ 7. Izveido `Trainer` klasi
53
  trainer = Trainer(
54
  model=model,
55
  args=training_args,
56
  train_dataset=train_dataset,
57
- eval_dataset=eval_dataset, # ✅ Testēšanas datu kopa
58
  tokenizer=tokenizer,
59
- data_collator=data_collator
60
  )
61
 
62
  # ✅ 8. Sāc AI apmācību
@@ -70,7 +72,8 @@ tokenizer.save_pretrained("./trained_model")
70
  results = trainer.evaluate()
71
  print("📊 Testēšanas rezultāti:", results)
72
 
73
- # ✅ 11. Ja vēlies, augšupielādē AI uz Hugging Face Hub
74
  # (Pirms tam jāpieslēdzas Hugging Face ar `notebook_login()`)
75
- trainer.push_to_hub("lietotajsvards/elektro-ai")
 
76
 
 
9
  train_data = json.load(f)
10
 
11
  # ✅ 2. Pārveido datus Hugging Face dataset formātā
12
+ dataset = Dataset.from_list(
13
+ [{"input_text": d["jautājums"], "target_text": d["atbilde"]} for d in train_data]
14
+ )
15
 
16
+ # ✅ 3. Sadala datus treniņam un testam (80% treniņam, 20% testam)
17
  train_test_split = dataset.train_test_split(test_size=0.2)
18
  train_dataset = train_test_split["train"]
19
  eval_dataset = train_test_split["test"]
 
38
  # ✅ 6. Definē treniņa parametrus
39
  training_args = TrainingArguments(
40
  output_dir="./trained_model",
41
+ evaluation_strategy="epoch",
42
+ save_strategy="epoch",
43
  logging_strategy="epoch",
44
  learning_rate=5e-5,
45
  per_device_train_batch_size=4,
46
  per_device_eval_batch_size=4,
47
+ num_train_epochs=3,
48
  weight_decay=0.01,
49
+ push_to_hub=False, # ✅ Pārvērties True, ja vēlies augšupielādēt Hugging Face Hub
50
  )
51
 
52
  data_collator = DataCollatorForSeq2Seq(tokenizer, model=model)
53
 
54
+ # ✅ 7. Izveidot `Trainer` klasi
55
  trainer = Trainer(
56
  model=model,
57
  args=training_args,
58
  train_dataset=train_dataset,
59
+ eval_dataset=eval_dataset,
60
  tokenizer=tokenizer,
61
+ data_collator=data_collator,
62
  )
63
 
64
  # ✅ 8. Sāc AI apmācību
 
72
  results = trainer.evaluate()
73
  print("📊 Testēšanas rezultāti:", results)
74
 
75
+ # ✅ 11. Ja vēlies augšupielādēt AI uz Hugging Face Hub
76
  # (Pirms tam jāpieslēdzas Hugging Face ar `notebook_login()`)
77
+ # trainer.push_to_hub("lietotajsvards/elektro-ai")
78
+
79