# Setup

In [17]:
from datetime import datetime
import numpy as np
import torch
from torch import nn
from transformers import BertTokenizer, BertModel
from huggingface_hub import (
    PyTorchModelHubMixin,
    notebook_login,
    ModelCard,
    ModelCardData,
    EvalResult,
)
from datasets import DatasetDict, load_dataset
from torch.utils.data import Dataset, DataLoader
from statsmodels.stats.proportion import proportion_confint

In [2]:
notebook_login(new_session=False)

# Functions

In [12]:
def my_print(x):
    time_str = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
    print(time_str, x)


def model_metrics(model, dataloader):
    criterion = nn.CrossEntropyLoss()
    model.eval()
    with torch.no_grad():
        total_loss = 0
        total_correct = 0
        total_length = 0
        for batch in dataloader:
            input_ids = batch["input_ids"].to(device)
            attention_mask = batch["attention_mask"].to(device)
            labels = batch["labels"].to(device)

            outputs = model(input_ids, attention_mask)
            loss = criterion(outputs, labels)
            predictions_cpu = torch.argmax(outputs, dim=1).cpu().numpy()
            labels_cpu = labels.cpu().numpy()
            correct_count = (predictions_cpu == labels_cpu).sum()

            total_loss += loss.item()
            total_correct += correct_count
            total_length += len(labels_cpu)
        avg_loss = total_loss / len(dataloader)
        avg_acc = total_correct / total_length
    model.train()
    return float(avg_loss), float(avg_acc)


def print_model_status(epoch, num_epochs, model, train_dataloader, test_dataloader):
    train_loss, train_acc = model_metrics(model, train_dataloader)
    test_loss, test_acc = model_metrics(model, test_dataloader)
    loss_str = f"Loss: Train {train_loss:0.3f}, Test {test_loss:0.3f}"
    acc_str = f"Acc: Train {train_acc:0.3f}, Test {test_acc:0.3f}"
    my_print(f"Epoch {epoch+1:2}/{num_epochs} done. {loss_str}; and {acc_str}")
    metrics = dict(
        train_loss=train_loss,
        train_acc=train_acc,
        test_loss=test_loss,
        test_acc=test_acc,
    )
    return metrics


class BertClassifier(nn.Module, PyTorchModelHubMixin):
    def __init__(self, num_labels=8, bert_variety="bert-base-uncased"):
        super().__init__()
        self.bert = BertModel.from_pretrained(bert_variety)
        self.config = self.bert.config
        self.config.num_labels = num_labels
        self.dropout = nn.Dropout(0.05)
        self.classifier = nn.Linear(self.bert.pooler.dense.out_features, num_labels)

    def forward(self, input_ids, attention_mask):
        outputs = self.bert(input_ids=input_ids, attention_mask=attention_mask)
        pooled_output = outputs.pooler_output
        pooled_output = self.dropout(pooled_output)
        logits = self.classifier(pooled_output)
        return logits


class TextDataset(Dataset):
    def __init__(self, texts, labels, tokenizer, max_length=256):
        self.texts = texts
        self.encodings = tokenizer(
            texts,
            truncation=True,
            padding=True,
            max_length=max_length,
            return_tensors="pt",
        )
        self.labels = torch.tensor([int(l[0]) for l in labels])

    def __getitem__(self, idx):
        item = {key: val[idx] for key, val in self.encodings.items()}
        item["labels"] = self.labels[idx]
        return item

    def __len__(self) -> int:
        return len(self.labels)


def train_model(model, train_dataloader, test_dataloader, device, num_epochs):
    optimizer = torch.optim.AdamW(model.parameters(), lr=2e-5)
    criterion = nn.CrossEntropyLoss()
    model.train()

    _ = print_model_status(-1, num_epochs, model, train_dataloader, test_dataloader)
    for epoch in range(num_epochs):
        total_loss = 0
        for batch in train_dataloader:
            optimizer.zero_grad()

            input_ids = batch["input_ids"].to(device)
            attention_mask = batch["attention_mask"].to(device)
            labels = batch["labels"].to(device)

            outputs = model(input_ids, attention_mask)
            loss = criterion(outputs, labels)

            loss.backward()
            optimizer.step()

            total_loss += loss.item()
        avg_loss = total_loss / len(train_dataloader)
        metrics = print_model_status(
            epoch, num_epochs, model, train_dataloader, test_dataloader
        )
    return metrics

In [13]:
if torch.backends.mps.is_available():
    device = torch.device("mps")
    torch.mps.empty_cache()
elif torch.cuda.is_available():
    device = torch.device("cuda")
else:
    device = torch.device("cpu")

In [14]:
def run_training(
    max_dataset_size=16 * 200,
    bert_variety="bert-base-uncased",
    max_length=256,
    num_epochs=3,
    batch_size=32,
):
    training_regime = dict(
        max_dataset_size=max_dataset_size,
        bert_variety=bert_variety,
        max_length=max_length,
        num_epochs=num_epochs,
        batch_size=batch_size,
    )
    hf_dataset = load_dataset("quotaclimat/frugalaichallenge-text-train")
    test_size = 0.2
    test_seed = 42
    train_test = hf_dataset["train"].train_test_split(
        test_size=test_size, seed=test_seed
    )
    train_dataset = train_test["train"]
    test_dataset = train_test["test"]
    if not max_dataset_size == "full" and max_dataset_size < len(hf_dataset["train"]):
        train_dataset = train_dataset[:max_dataset_size]
        test_dataset = test_dataset[:max_dataset_size]
    else:
        train_dataset = train_dataset
        test_dataset = test_dataset

    tokenizer = BertTokenizer.from_pretrained(bert_variety, max_length=max_length)
    model = BertClassifier(bert_variety=bert_variety)
    if torch.backends.mps.is_available():
        device = torch.device("mps")
        torch.mps.empty_cache()
    elif torch.cuda.is_available():
        device = torch.device("cuda")
    else:
        device = torch.device("cpu")
    model.to(device)

    text_dataset_train = TextDataset(
        train_dataset["quote"],
        train_dataset["label"],
        tokenizer=tokenizer,
        max_length=max_length,
    )
    text_dataset_test = TextDataset(
        test_dataset["quote"],
        test_dataset["label"],
        tokenizer=tokenizer,
        max_length=max_length,
    )
    dataloader_train = DataLoader(
        text_dataset_train, batch_size=batch_size, shuffle=True
    )
    dataloader_test = DataLoader(
        text_dataset_test, batch_size=batch_size, shuffle=False
    )

    metrics = train_model(
        model, dataloader_train, dataloader_test, device, num_epochs=num_epochs
    )
    return model, tokenizer, training_regime, metrics

# Exploration

In [15]:
base_model_repo = "google/bert_uncased_L-12_H-768_A-12"
model_and_repo_name = "frugal-ai-text-bert-base"

## Check if runs

In [16]:
model, tokenizer, regime, metrics = run_training(
    max_dataset_size=16 * 100,
    bert_variety=base_model_repo,
    max_length=128,
    num_epochs=3,
    batch_size=16,
)

4872 1219
8 8


KeyboardInterrupt: 

In [None]:
model.eval()
test_text = [
    "This was a great experience!",  # 0_not_relevant
    "My favorite hike is Laguna de los Tres.",  # 0_not_relevant
    "Crops will grow great in Finland if it's warmer there.",  # 3_not_bad
    "Climate change is fake.",  # 1_not_happening
    "The apparent warming is caused by solar cycles.",  # 2_not_human
    "Solar panels emit bad vibes.",  # 4_solutions_harmful_unnecessary
    "All those so-called scientists are Democrats.",  # 6_proponents_biased
]
test_encoding = tokenizer(
    test_text,
    truncation=True,
    padding=True,
    return_tensors="pt",
    max_length=256,
)

with torch.no_grad():
    test_input_ids = test_encoding["input_ids"].to(device)
    test_attention_mask = test_encoding["attention_mask"].to(device)
    outputs = model(test_input_ids, test_attention_mask)
    predictions = torch.argmax(outputs, dim=1)
    my_print(f"Predictions: {predictions}")

## Hyperparameters

Overall top performance per model. Machine: bert-base is using an Nvidia 1xL40S, no inference time cleaverness attempted.

[accidentally cheating bert-base by trainging on full dataset](https://huggingface.co./datasets/frugal-ai-challenge/public-leaderboard-text/blob/main/submissions/Nonnormalizable_20250117_220350.json):\
acc 0.954, energy 0.736 Wh

[bert-base some hp tuning](https://huggingface.co./datasets/frugal-ai-challenge/public-leaderboard-text/blob/main/submissions/Nonnormalizable_20250120_231350.json):\
acc 0.707, energy 0.803 Wh

Added normal data loader, batch size 32. Moved to Nvidia T4 small.

bert-tiny\
acc 0.618, energy 0.079 Wh

bert-mini\
acc 0.650, energy 0.129 Wh

bert-small\
acc 0.656, energy 0.256 Wh

bert-medium\
acc 0.645, energy 0.273 Wh

bert-base\
acc 0.691, energy 1.053 Wh

In [23]:
nobs = 1219
acc = 0.656
proportion_confint(
    count=int(nobs * acc),
    nobs=nobs,
    method="jeffreys",
)

(0.6284344081642794, 0.6817389605903139)

Looking at bert-tiny.
Scanning max_length and batch_size with num_epochs set to 3, looks like we want 256 and 16. That gets us\
`2025-01-21 10:18:56 Epoch 3/3 done. Loss: Train 1.368, Test 1.432; and Acc: Train 0.499, Test 0.477`.

Then looking at num_epochs, we saturate test set performance at 15 (~3 minutes), giving e.g.\
`2025-01-21 10:38:30 Epoch 15/20 done. Loss: Train 0.553, Test 1.157; and Acc: Train 0.833, Test 0.595`

For bert-mini, just looking at num_epochs, we choose 8\
`2025-01-22 10:56:12 Epoch  8/20 done. Loss: Train 0.305, Test 1.090; and Acc: Train 0.920, Test 0.646`

For bert-small, 4\
`2025-01-22 11:39:41 Epoch  4/15 done. Loss: Train 0.301, Test 0.978; and Acc: Train 0.920, Test 0.664`

For bert-medium, 4\
`2025-01-22 12:09:51 Epoch  4/10 done. Loss: Train 0.294, Test 1.020; and Acc: Train 0.922, Test 0.660`

For bert-base, 3 does happen to be correct, just checking for completeness\
`2025-01-22 12:59:10 Epoch  3/7 done. Loss: Train 0.156, Test 0.930; and Acc: Train 0.964, Test 0.703`

In [9]:
static_hyperparams = dict(
    max_dataset_size="full",
    bert_variety=base_model_repo,
    max_length=256,
    batch_size=16,
)

In [10]:
model, tokenizer, training_regime, testing_metrics = run_training(
    **static_hyperparams,
    num_epochs=3,
)

2025-01-22 13:21:10 Epoch  0/3 done. Loss: Train 2.088, Test 2.085; and Acc: Train 0.137, Test 0.135
2025-01-22 13:26:50 Epoch  1/3 done. Loss: Train 0.780, Test 1.012; and Acc: Train 0.747, Test 0.648
2025-01-22 13:32:30 Epoch  2/3 done. Loss: Train 0.346, Test 0.890; and Acc: Train 0.904, Test 0.689
2025-01-22 13:38:14 Epoch  3/3 done. Loss: Train 0.167, Test 0.968; and Acc: Train 0.959, Test 0.691


# Model to upload

In [11]:
card_data = ModelCardData(
    model_name=model_and_repo_name,
    base_model=static_hyperparams["bert_variety"],
    license="apache-2.0",
    language=["en"],
    datasets=["QuotaClimat/frugalaichallenge-text-train"],
    tags=["model_hub_mixin", "pytorch_model_hub_mixin", "climate"],
    pipeline_tag="text-classification",
)
card = ModelCard.from_template(
    card_data,
    model_summary=f"Classify text into 8 categories of climate misinformation using {base_model_repo}.",
    model_description="Fine trained BERT for classifying climate information as part of the Frugal AI Challenge, for submission to https://huggingface.co./frugal-ai-challenge and scoring on accuracy and efficiency. Trainied on only the non-evaluation 80% of the data, so it's (non-cheating) score will be lower.",
    developers="Andre Bach",
    funded_by="N/A",
    shared_by="Andre Bach",
    model_type="Text classification",
    repo=model_and_repo_name,
    training_regime=training_regime,
    testing_metrics=testing_metrics,
)
# print(card_data.to_yaml())
# print(card)

In [12]:
model_final = model
tokenizer_final = tokenizer

In [13]:
model_final.eval()
test_text = [
    "This was a great experience!",  # 0_not_relevant
    "My favorite hike is Laguna de los Tres.",  # 0_not_relevant
    "Crops will grow great in Finland if it's warmer there.",  # 3_not_bad
    "Climate change is fake.",  # 1_not_happening
    "The apparent warming is caused by solar cycles.",  # 2_not_human
    "Solar panels emit bad vibes.",  # 4_solutions_harmful_unnecessary
    "All those so-called scientists are Democrats.",  # 6_proponents_biased
]
test_encoding = tokenizer_final(
    test_text,
    truncation=True,
    padding=True,
    return_tensors="pt",
    max_length=256,
)

with torch.no_grad():
    test_input_ids = test_encoding["input_ids"].to(device)
    test_attention_mask = test_encoding["attention_mask"].to(device)
    outputs = model_final(test_input_ids, test_attention_mask)
    predictions = torch.argmax(outputs, dim=1)
    my_print(f"Predictions: {predictions}")

2025-01-22 13:38:14 Predictions: tensor([0, 0, 3, 1, 2, 4, 6], device='mps:0')


In [14]:
model_final.push_to_hub(model_and_repo_name)
tokenizer_final.push_to_hub(model_and_repo_name)
model_final.config.push_to_hub(model_and_repo_name)
card.push_to_hub(f"Nonnormalizable/{model_and_repo_name}")

model.safetensors:   0%|          | 0.00/438M [00:00<?, ?B/s]

CommitInfo(commit_url='https://huggingface.co./Nonnormalizable/frugal-ai-text-bert-base/commit/46ba6471d612d348636c07c47f57d90dd14c9f74', commit_message='Upload README.md with huggingface_hub', commit_description='', oid='46ba6471d612d348636c07c47f57d90dd14c9f74', pr_url=None, repo_url=RepoUrl('https://huggingface.co./Nonnormalizable/frugal-ai-text-bert-base', endpoint='https://huggingface.co.', repo_type='model', repo_id='Nonnormalizable/frugal-ai-text-bert-base'), pr_revision=None, pr_num=None)