Upload 5 files

Files changed (5) hide show

model_config.yaml ADDED Viewed

+name: distilbert_text_classification
+config_type: model
+task: text_classification
+num_labels: 3
+id2label:
+  '0': negative
+  '1': positive
+  '2': neutral
+activation: gelu
+attention_dropout: 0.1
+dim: 768
+dropout: 0.1
+hidden_dim: 3072
+initializer_range: 0.02
+max_position_embeddings: 512
+n_heads: 12
+n_layers: 6
+output_past: true
+pad_token_id: 0
+qa_dropout: 0.1
+tie_weights_: true
+vocab_size: 42000

preprocessor/tokenizer.json ADDED Viewed

The diff for this file is too large to render. See raw diff

preprocessor/tokenizer_config.yaml ADDED Viewed

+name: wordpiece_tokenizer
+config_type: preprocessor
+pretrained_path: hezar-ai/bert-base-fa
+max_length: 512
+truncation_strategy: longest_first
+truncation_direction: right
+stride: 0
+padding_strategy: longest
+padding_direction: right
+pad_to_multiple_of: 0
+pad_token_id: 0
+pad_token: '[PAD]'
+pad_token_type_id: 0
+unk_token: '[UNK]'
+special_tokens:
+- '[UNK]'
+- '[SEP]'
+- '[CLS]'
+- '[PAD]'
+- '[MASK]'
+wordpieces_prefix: '##'
+train_config:
+  name: wordpiece_tokenizer
+  config_type: preprocessor
+  vocab_size: 30000
+  min_frequency: 2
+  limit_alphabet: 1000
+  initial_alphabet: []
+  show_progress: true

train/dataset_config.yaml ADDED Viewed

+name: text_classification
+config_type: dataset
+task: text_classification
+path: hezar-ai/sentiment_digikala_snappfood
+tokenizer_path: hezar-ai/bert-base-fa
+label_field: label
+text_field: text
+id2label:
+  '0': negative
+  '1': positive
+  '2': neutral
+label2id:
+  negative: '0'
+  positive: '1'
+  neutral: '2'
+num_labels: 3

train/train_config.yaml ADDED Viewed

+name: bert_text_classification
+config_type: train
+device: cuda
+init_weights_from: hezar-ai/bert-base-fa
+seed: 42
+batch_size: 8
+metrics:
+  f1:
+    task: multiclass
+num_train_epochs: 10
+checkpoints_dir: checkpoints/