dejanseo commited on
Commit
8edab9a
1 Parent(s): 16c36c0

Upload 6 files

Browse files
Files changed (6) hide show
  1. config.json +44 -0
  2. model.safetensors +3 -0
  3. spiece.model +3 -0
  4. tokenizer.json +0 -0
  5. tokenizer_config.json +1 -0
  6. train.py +93 -0
config.json ADDED
@@ -0,0 +1,44 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "albert-base-v2",
3
+ "architectures": [
4
+ "AlbertForSequenceClassification"
5
+ ],
6
+ "attention_probs_dropout_prob": 0,
7
+ "bos_token_id": 2,
8
+ "classifier_dropout_prob": 0.1,
9
+ "down_scale_factor": 1,
10
+ "embedding_size": 128,
11
+ "eos_token_id": 3,
12
+ "gap_size": 0,
13
+ "hidden_act": "gelu_new",
14
+ "hidden_dropout_prob": 0,
15
+ "hidden_size": 768,
16
+ "id2label": {
17
+ "0": "LABEL_0",
18
+ "1": "LABEL_1",
19
+ "2": "LABEL_2"
20
+ },
21
+ "initializer_range": 0.02,
22
+ "inner_group_num": 1,
23
+ "intermediate_size": 3072,
24
+ "label2id": {
25
+ "LABEL_0": 0,
26
+ "LABEL_1": 1,
27
+ "LABEL_2": 2
28
+ },
29
+ "layer_norm_eps": 1e-12,
30
+ "max_position_embeddings": 512,
31
+ "model_type": "albert",
32
+ "net_structure_type": 0,
33
+ "num_attention_heads": 12,
34
+ "num_hidden_groups": 1,
35
+ "num_hidden_layers": 12,
36
+ "num_memory_blocks": 0,
37
+ "pad_token_id": 0,
38
+ "position_embedding_type": "absolute",
39
+ "problem_type": "single_label_classification",
40
+ "torch_dtype": "float32",
41
+ "transformers_version": "4.42.0.dev0",
42
+ "type_vocab_size": 2,
43
+ "vocab_size": 30000
44
+ }
model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8000a8d12b39ac9177adf5dbf2b532020795034f652684803573c766f94f2621
3
+ size 46746988
spiece.model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fefb02b667a6c5c2fe27602d28e5fb3428f66ab89c7d6f388e7c8d44a02d0336
3
+ size 760289
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"model_max_length": 512}
train.py ADDED
@@ -0,0 +1,93 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ from transformers import AlbertTokenizer, AlbertForSequenceClassification, Trainer, TrainingArguments
3
+ from datasets import load_dataset
4
+ import evaluate
5
+ import wandb
6
+ import numpy as np
7
+
8
+ # Initialize WandB
9
+ wandb.init(entity="dejan", project="good-vibes")
10
+
11
+ # Adjustable parameters
12
+ model_name = "albert-base-v2"
13
+ batch_size = 32
14
+ epochs = 10
15
+ learning_rate = 2e-5
16
+ gradient_clip_value = 1.0
17
+ warmup_steps = 500
18
+
19
+ # Load tokenizer and model
20
+ tokenizer = AlbertTokenizer.from_pretrained(model_name)
21
+ model = AlbertForSequenceClassification.from_pretrained(model_name, num_labels=3)
22
+
23
+ # Load dataset
24
+ dataset = load_dataset('csv', data_files={'train': 'sentences.csv'})
25
+ dataset = dataset['train'].train_test_split(test_size=0.1)
26
+
27
+ # Preprocess the data
28
+ def preprocess_function(examples):
29
+ return tokenizer(examples['text'], padding='max_length', truncation=True)
30
+
31
+ encoded_dataset = dataset.map(preprocess_function, batched=True)
32
+ encoded_dataset = encoded_dataset.rename_column("label", "labels")
33
+
34
+ # Define metrics
35
+ accuracy_metric = evaluate.load("accuracy")
36
+ f1_metric = evaluate.load("f1")
37
+ precision_metric = evaluate.load("precision")
38
+ recall_metric = evaluate.load("recall")
39
+
40
+ def compute_metrics(eval_pred):
41
+ logits, labels = eval_pred
42
+ predictions = np.argmax(logits, axis=-1)
43
+ accuracy = accuracy_metric.compute(predictions=predictions, references=labels)
44
+ f1 = f1_metric.compute(predictions=predictions, references=labels, average='weighted')
45
+ precision = precision_metric.compute(predictions=predictions, references=labels, average='weighted')
46
+ recall = recall_metric.compute(predictions=predictions, references=labels, average='weighted')
47
+ return {
48
+ "accuracy": accuracy["accuracy"],
49
+ "f1": f1["f1"],
50
+ "precision": precision["precision"],
51
+ "recall": recall["recall"]
52
+ }
53
+
54
+ # Training arguments
55
+ training_args = TrainingArguments(
56
+ output_dir="./results",
57
+ evaluation_strategy="epoch",
58
+ save_strategy="epoch",
59
+ learning_rate=learning_rate,
60
+ per_device_train_batch_size=batch_size,
61
+ per_device_eval_batch_size=batch_size,
62
+ num_train_epochs=epochs,
63
+ weight_decay=0.01,
64
+ logging_dir="./logs",
65
+ logging_steps=10,
66
+ load_best_model_at_end=True,
67
+ metric_for_best_model="accuracy", # Use accuracy to define the best model
68
+ greater_is_better=True, # Set to True if higher metric value is better
69
+ gradient_accumulation_steps=2,
70
+ fp16=True,
71
+ report_to="wandb",
72
+ run_name="albert-finetuning",
73
+ warmup_steps=warmup_steps,
74
+ max_grad_norm=gradient_clip_value # Correct parameter for gradient clipping
75
+ )
76
+
77
+ # Trainer
78
+ trainer = Trainer(
79
+ model=model,
80
+ args=training_args,
81
+ train_dataset=encoded_dataset['train'],
82
+ eval_dataset=encoded_dataset['test'],
83
+ compute_metrics=compute_metrics
84
+ )
85
+
86
+ # Train the model
87
+ trainer.train()
88
+
89
+ # Save the model
90
+ trainer.save_model("fine-tuned-albert-base-v2")
91
+
92
+ # Finish WandB run
93
+ wandb.finish()