initial commit

Files changed (10) hide show

README.md +43 -0
model/dict.txt +0 -0
model/gpt2-merges.txt +0 -0
model/gpt2-vocab.json +0 -0
model/merges.txt +0 -0
model/special_tokens_map.json +23 -0
model/tokenizer_config.json +31 -0
model/vocab.json +0 -0
run.sh +2 -0
run_model.py +75 -0

README.md ADDED Viewed

	@@ -0,0 +1,43 @@

+---
+tags:
+- opt_metasq
+---
+# This repo let's you run the following checkpoint using facebookresearch/metaseq.
+Do the following:
+## 1. Install PyTorch
+```
+pip3 install torch==1.10.1+cu113 torchvision==0.11.2+cu113 torchaudio==0.10.1+cu113 -f https://download.pytorch.org/whl/cu113/torch_stable.html
+```
+## 2. Install Megatron
+```
+git clone https://github.com/patrickvonplaten/Megatron-LM.git
+cd Megatron-LM
+pip3 install six regex
+pip3 install -e .
+```
+## 3. Install fairscale
+```
+git clone https://github.com/facebookresearch/fairscale.git
+cd fairscale
+git checkout prefetch_fsdp_params_simple
+pip3 install -e .
+```
+## 4. Install metaseq
+```
+git clone https://github.com/patrickvonplaten/metaseq.git
+cd metaseq
+pip3 install -e .
+```
+## 5. Clone this repo (click top right on "How to clone")
+## 6. Run the following:
+```bash
+cd <path/to/cloned/repo>
+bash run.sh
+```

model/dict.txt ADDED Viewed

The diff for this file is too large to render. See raw diff

model/gpt2-merges.txt ADDED Viewed

The diff for this file is too large to render. See raw diff

model/gpt2-vocab.json ADDED Viewed

The diff for this file is too large to render. See raw diff

model/merges.txt ADDED Viewed

The diff for this file is too large to render. See raw diff

model/special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,23 @@

+{
+  "bos_token": {
+    "content": "<|endoftext|>",
+    "lstrip": false,
+    "normalized": true,
+    "rstrip": false,
+    "single_word": false
+  },
+  "eos_token": {
+    "content": "<|endoftext|>",
+    "lstrip": false,
+    "normalized": true,
+    "rstrip": false,
+    "single_word": false
+  },
+  "unk_token": {
+    "content": "<|endoftext|>",
+    "lstrip": false,
+    "normalized": true,
+    "rstrip": false,
+    "single_word": false
+  }
+}

model/tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,31 @@

+{
+  "add_bos_token": false,
+  "add_prefix_space": false,
+  "bos_token": {
+    "__type": "AddedToken",
+    "content": "<|endoftext|>",
+    "lstrip": false,
+    "normalized": true,
+    "rstrip": false,
+    "single_word": false
+  },
+  "eos_token": {
+    "__type": "AddedToken",
+    "content": "<|endoftext|>",
+    "lstrip": false,
+    "normalized": true,
+    "rstrip": false,
+    "single_word": false
+  },
+  "errors": "replace",
+  "pad_token": null,
+  "tokenizer_class": "GPT2Tokenizer",
+  "unk_token": {
+    "__type": "AddedToken",
+    "content": "<|endoftext|>",
+    "lstrip": false,
+    "normalized": true,
+    "rstrip": false,
+    "single_word": false
+  }
+}

model/vocab.json ADDED Viewed

The diff for this file is too large to render. See raw diff

run.sh ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ #!/usr/bin/env bash
2	+ CUDA_VISIBLE_DEVICES="0" torchrun run_model.py --pipeline-model-parallel-size 1 --tensor-model-parallel-size 1

run_model.py ADDED Viewed

	@@ -0,0 +1,75 @@

+#!/usr/bin/env python3
+import os
+from transformers import AutoTokenizer, GPT2Tokenizer
+#from megatron.initialize import initialize_megatron
+from metaseq import checkpoint_utils
+from transformers import OPTForCausalLM
+import torch
+path = "./model"
+hf_path = "/home/patrick/facebook/opt-125m"
+vocab_file = os.path.join(path, "gpt2-vocab.json")
+merges_file = os.path.join(path, "gpt2-merges.txt")
+tokenizer = GPT2Tokenizer(vocab_file, merges_file)
+tokenizer.save_pretrained(path)
+checkpoint = checkpoint_utils.load_model_ensemble_and_task(
+    [os.path.join(path, "restored.pt")],
+    arg_overrides={
+        "vocab_filename": vocab_file,
+        "merges_filename": merges_file,
+    }
+)
+model = checkpoint[0][0].eval()
+model = model
+hf_model = OPTForCausalLM.from_pretrained(hf_path)
+# forward passes
+def single_batch_forward_logits(prompts):
+    input_ids = tokenizer(prompts, return_tensors="pt").input_ids
+    input_ids = torch.cat([torch.tensor([[0]]), input_ids], dim=-1)
+    input_ids = input_ids
+    with torch.no_grad():
+        logits = model(input_ids)[0]
+    return logits
+# forward hf
+def forward_hf(prompts):
+    input_ids = tokenizer(prompts, return_tensors="pt").input_ids
+    input_ids = torch.cat([torch.tensor([[0]]), input_ids], dim=-1)
+    input_ids = input_ids
+    with torch.no_grad():
+        logits = hf_model(input_ids)[0]
+    return logits
+prompts = [
+   "Today is a beautiful day and I want to",
+   "In the city of",
+   "Paris is the capital of France and",
+   "Computers and mobile phones have taken",
+]
+print("Next word generation")
+for prompt in prompts:
+    print("-------------")
+    print(f"Prompt: {prompt}...\n")
+    logits_fsq = single_batch_forward_logits(prompt)
+    pred_next_token = torch.argmax(logits_fsq[0, -1], -1)
+    next_token = tokenizer.convert_ids_to_tokens([pred_next_token])
+    next_token = next_token[0].replace("Ġ", "")
+    print(f"Next word: {next_token}")
+    print("-------------")
+    logits = forward_hf(prompt)
+    pred_next_token = torch.argmax(logits[0, -1], -1)
+    next_token = tokenizer.convert_ids_to_tokens([pred_next_token])
+    next_token = next_token[0].replace("Ġ", "")
+    print(f"Next word: {next_token}")
+    print("-------------")
+print("Is equal:", torch.allclose(logits_fsq.cpu(), logits.cpu(), atol=1e-3))