|
import os |
|
from uuid import uuid4 |
|
|
|
from datasets import load_dataset |
|
|
|
from autotrain.dataset import AutoTrainDataset |
|
from autotrain.project import Project |
|
|
|
|
|
RANDOM_ID = str(uuid4()) |
|
DATASET = "amazon_reviews_multi" |
|
PROJECT_NAME = f"amazon_reviews_multi_{RANDOM_ID}" |
|
TASK = "text_multi_class_classification" |
|
MODEL = "bert-base-uncased" |
|
|
|
USERNAME = os.environ["AUTOTRAIN_USERNAME"] |
|
TOKEN = os.environ["HF_TOKEN"] |
|
|
|
|
|
if __name__ == "__main__": |
|
dataset = load_dataset(DATASET, "en") |
|
train = dataset["train"] |
|
validation = dataset["test"] |
|
|
|
|
|
train_df = train.to_pandas() |
|
validation_df = validation.to_pandas() |
|
|
|
|
|
dset = AutoTrainDataset( |
|
train_data=[train_df], |
|
valid_data=[validation_df], |
|
task=TASK, |
|
token=TOKEN, |
|
project_name=PROJECT_NAME, |
|
username=USERNAME, |
|
column_mapping={"text": "review_body", "label": "stars"}, |
|
percent_valid=None, |
|
) |
|
dset.prepare() |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
job1 = { |
|
"task": TASK, |
|
"learning_rate": 1e-5, |
|
"optimizer": "adamw_torch", |
|
"scheduler": "linear", |
|
"epochs": 5, |
|
} |
|
|
|
job2 = { |
|
"task": TASK, |
|
"learning_rate": 3e-5, |
|
"optimizer": "adamw_torch", |
|
"scheduler": "cosine", |
|
"epochs": 5, |
|
} |
|
|
|
job3 = { |
|
"task": TASK, |
|
"learning_rate": 5e-5, |
|
"optimizer": "sgd", |
|
"scheduler": "cosine", |
|
"epochs": 5, |
|
} |
|
|
|
jobs = [job1, job2, job3] |
|
project = Project(dataset=dset, hub_model=MODEL, job_params=jobs) |
|
project_id = project.create() |
|
project.approve(project_id) |
|
|