bert model

#17

by richasinha12 - opened Feb 16, 2023

←

Files changed (8) hide show

README.md CHANGED Viewed

@@ -131,7 +131,6 @@ This model reaches an accuracy of 91.3 on the dev set (for comparison, Bert bert
 - **Parent Model:** For more details about DistilBERT, we encourage users to check out [this model card](https://huggingface.co/distilbert-base-uncased).
 - **Resources for more information:**
     - [Model Documentation](https://huggingface.co/docs/transformers/main/en/model_doc/distilbert#transformers.DistilBertForSequenceClassification)
-    - [DistilBERT paper](https://arxiv.org/abs/1910.01108)
 ## How to Get Started With the Model
@@ -141,8 +140,8 @@ Example of single-label classification:
 import torch
 from transformers import DistilBertTokenizer, DistilBertForSequenceClassification
-tokenizer = DistilBertTokenizer.from_pretrained("distilbert-base-uncased-finetuned-sst-2-english")
-model = DistilBertForSequenceClassification.from_pretrained("distilbert-base-uncased-finetuned-sst-2-english")
 inputs = tokenizer("Hello, my dog is cute", return_tensors="pt")
 with torch.no_grad():

 - **Parent Model:** For more details about DistilBERT, we encourage users to check out [this model card](https://huggingface.co/distilbert-base-uncased).
 - **Resources for more information:**
     - [Model Documentation](https://huggingface.co/docs/transformers/main/en/model_doc/distilbert#transformers.DistilBertForSequenceClassification)
 ## How to Get Started With the Model
 import torch
 from transformers import DistilBertTokenizer, DistilBertForSequenceClassification
+tokenizer = DistilBertTokenizer.from_pretrained("distilbert-base-uncased")
+model = DistilBertForSequenceClassification.from_pretrained("distilbert-base-uncased")
 inputs = tokenizer("Hello, my dog is cute", return_tensors="pt")
 with torch.no_grad():

onnx/added_tokens.json DELETED Viewed

@@ -1,7 +0,0 @@
-{
-  "[CLS]": 101,
-  "[MASK]": 103,
-  "[PAD]": 0,
-  "[SEP]": 102,
-  "[UNK]": 100
-}

onnx/config.json DELETED Viewed

@@ -1,33 +0,0 @@
-{
-  "_name_or_path": "distilbert-base-uncased-finetuned-sst-2-english",
-  "activation": "gelu",
-  "architectures": [
-    "DistilBertForSequenceClassification"
-  ],
-  "attention_dropout": 0.1,
-  "dim": 768,
-  "dropout": 0.1,
-  "finetuning_task": "sst-2",
-  "hidden_dim": 3072,
-  "id2label": {
-    "0": "NEGATIVE",
-    "1": "POSITIVE"
-  },
-  "initializer_range": 0.02,
-  "label2id": {
-    "NEGATIVE": 0,
-    "POSITIVE": 1
-  },
-  "max_position_embeddings": 512,
-  "model_type": "distilbert",
-  "n_heads": 12,
-  "n_layers": 6,
-  "output_past": true,
-  "pad_token_id": 0,
-  "qa_dropout": 0.1,
-  "seq_classif_dropout": 0.2,
-  "sinusoidal_pos_embds": false,
-  "tie_weights_": true,
-  "transformers_version": "4.34.0",
-  "vocab_size": 30522
-}

onnx/model.onnx DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:252cf7048af94a1599019fef35961b2bd3d6db13df0b0a4b032b92baeae31939
-size 267955711

onnx/special_tokens_map.json DELETED Viewed

@@ -1,7 +0,0 @@
-{
-  "cls_token": "[CLS]",
-  "mask_token": "[MASK]",
-  "pad_token": "[PAD]",
-  "sep_token": "[SEP]",
-  "unk_token": "[UNK]"
-}

onnx/tokenizer.json DELETED Viewed

The diff for this file is too large to render. See raw diff

onnx/tokenizer_config.json DELETED Viewed

@@ -1,58 +0,0 @@
-{
-  "added_tokens_decoder": {
-    "0": {
-      "content": "[PAD]",
-      "lstrip": false,
-      "normalized": false,
-      "rstrip": false,
-      "single_word": false,
-      "special": true
-    },
-    "100": {
-      "content": "[UNK]",
-      "lstrip": false,
-      "normalized": false,
-      "rstrip": false,
-      "single_word": false,
-      "special": true
-    },
-    "101": {
-      "content": "[CLS]",
-      "lstrip": false,
-      "normalized": false,
-      "rstrip": false,
-      "single_word": false,
-      "special": true
-    },
-    "102": {
-      "content": "[SEP]",
-      "lstrip": false,
-      "normalized": false,
-      "rstrip": false,
-      "single_word": false,
-      "special": true
-    },
-    "103": {
-      "content": "[MASK]",
-      "lstrip": false,
-      "normalized": false,
-      "rstrip": false,
-      "single_word": false,
-      "special": true
-    }
-  },
-  "additional_special_tokens": [],
-  "clean_up_tokenization_spaces": true,
-  "cls_token": "[CLS]",
-  "do_basic_tokenize": true,
-  "do_lower_case": true,
-  "mask_token": "[MASK]",
-  "model_max_length": 512,
-  "never_split": null,
-  "pad_token": "[PAD]",
-  "sep_token": "[SEP]",
-  "strip_accents": null,
-  "tokenize_chinese_chars": true,
-  "tokenizer_class": "DistilBertTokenizer",
-  "unk_token": "[UNK]"
-}

onnx/vocab.txt DELETED Viewed

The diff for this file is too large to render. See raw diff