stable_diffusion_lora / autotrain-advanced /examples /text_classification_multiclass.py
Hemanth-thunder's picture
End of training
c0551d3
import os
from uuid import uuid4
from datasets import load_dataset
from autotrain.dataset import AutoTrainDataset
from autotrain.project import Project
RANDOM_ID = str(uuid4())
DATASET = "amazon_reviews_multi"
PROJECT_NAME = f"amazon_reviews_multi_{RANDOM_ID}"
TASK = "text_multi_class_classification"
MODEL = "bert-base-uncased"
USERNAME = os.environ["AUTOTRAIN_USERNAME"]
TOKEN = os.environ["HF_TOKEN"]
if __name__ == "__main__":
dataset = load_dataset(DATASET, "en")
train = dataset["train"]
validation = dataset["test"]
# convert to pandas dataframe
train_df = train.to_pandas()
validation_df = validation.to_pandas()
# prepare dataset for AutoTrain
dset = AutoTrainDataset(
train_data=[train_df],
valid_data=[validation_df],
task=TASK,
token=TOKEN,
project_name=PROJECT_NAME,
username=USERNAME,
column_mapping={"text": "review_body", "label": "stars"},
percent_valid=None,
)
dset.prepare()
#
# How to get params for a task:
#
# from autotrain.params import Params
# params = Params(task=TASK, training_type="hub_model").get()
# print(params) to get full list of params for the task
# define params in proper format
job1 = {
"task": TASK,
"learning_rate": 1e-5,
"optimizer": "adamw_torch",
"scheduler": "linear",
"epochs": 5,
}
job2 = {
"task": TASK,
"learning_rate": 3e-5,
"optimizer": "adamw_torch",
"scheduler": "cosine",
"epochs": 5,
}
job3 = {
"task": TASK,
"learning_rate": 5e-5,
"optimizer": "sgd",
"scheduler": "cosine",
"epochs": 5,
}
jobs = [job1, job2, job3]
project = Project(dataset=dset, hub_model=MODEL, job_params=jobs)
project_id = project.create()
project.approve(project_id)