Arthur commited on
Commit
03d48b6
·
1 Parent(s): dd765f4

initial commit

Browse files
README.md ADDED
@@ -0,0 +1,43 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ tags:
3
+ - opt_metasq
4
+ ---
5
+
6
+ # This repo let's you run the following checkpoint using facebookresearch/metaseq.
7
+
8
+ Do the following:
9
+
10
+ ## 1. Install PyTorch
11
+ ```
12
+ pip3 install torch==1.10.1+cu113 torchvision==0.11.2+cu113 torchaudio==0.10.1+cu113 -f https://download.pytorch.org/whl/cu113/torch_stable.html
13
+ ```
14
+
15
+ ## 2. Install Megatron
16
+ ```
17
+ git clone https://github.com/patrickvonplaten/Megatron-LM.git
18
+ cd Megatron-LM
19
+ pip3 install six regex
20
+ pip3 install -e .
21
+ ```
22
+ ## 3. Install fairscale
23
+ ```
24
+ git clone https://github.com/facebookresearch/fairscale.git
25
+ cd fairscale
26
+ git checkout prefetch_fsdp_params_simple
27
+ pip3 install -e .
28
+ ```
29
+ ## 4. Install metaseq
30
+ ```
31
+ git clone https://github.com/patrickvonplaten/metaseq.git
32
+ cd metaseq
33
+ pip3 install -e .
34
+ ```
35
+
36
+ ## 5. Clone this repo (click top right on "How to clone")
37
+
38
+ ## 6. Run the following:
39
+
40
+ ```bash
41
+ cd <path/to/cloned/repo>
42
+ bash run.sh
43
+ ```
model/dict.txt ADDED
The diff for this file is too large to render. See raw diff
 
model/gpt2-merges.txt ADDED
The diff for this file is too large to render. See raw diff
 
model/gpt2-vocab.json ADDED
The diff for this file is too large to render. See raw diff
 
model/merges.txt ADDED
The diff for this file is too large to render. See raw diff
 
model/special_tokens_map.json ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": {
3
+ "content": "<|endoftext|>",
4
+ "lstrip": false,
5
+ "normalized": true,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "eos_token": {
10
+ "content": "<|endoftext|>",
11
+ "lstrip": false,
12
+ "normalized": true,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "unk_token": {
17
+ "content": "<|endoftext|>",
18
+ "lstrip": false,
19
+ "normalized": true,
20
+ "rstrip": false,
21
+ "single_word": false
22
+ }
23
+ }
model/tokenizer_config.json ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_bos_token": false,
3
+ "add_prefix_space": false,
4
+ "bos_token": {
5
+ "__type": "AddedToken",
6
+ "content": "<|endoftext|>",
7
+ "lstrip": false,
8
+ "normalized": true,
9
+ "rstrip": false,
10
+ "single_word": false
11
+ },
12
+ "eos_token": {
13
+ "__type": "AddedToken",
14
+ "content": "<|endoftext|>",
15
+ "lstrip": false,
16
+ "normalized": true,
17
+ "rstrip": false,
18
+ "single_word": false
19
+ },
20
+ "errors": "replace",
21
+ "pad_token": null,
22
+ "tokenizer_class": "GPT2Tokenizer",
23
+ "unk_token": {
24
+ "__type": "AddedToken",
25
+ "content": "<|endoftext|>",
26
+ "lstrip": false,
27
+ "normalized": true,
28
+ "rstrip": false,
29
+ "single_word": false
30
+ }
31
+ }
model/vocab.json ADDED
The diff for this file is too large to render. See raw diff
 
run.sh ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ #!/usr/bin/env bash
2
+ CUDA_VISIBLE_DEVICES="0" torchrun run_model.py --pipeline-model-parallel-size 1 --tensor-model-parallel-size 1
run_model.py ADDED
@@ -0,0 +1,75 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ import os
3
+ from transformers import AutoTokenizer, GPT2Tokenizer
4
+ #from megatron.initialize import initialize_megatron
5
+ from metaseq import checkpoint_utils
6
+ from transformers import OPTForCausalLM
7
+ import torch
8
+
9
+ path = "./model"
10
+ hf_path = "/home/patrick/facebook/opt-125m"
11
+
12
+
13
+ vocab_file = os.path.join(path, "gpt2-vocab.json")
14
+ merges_file = os.path.join(path, "gpt2-merges.txt")
15
+
16
+ tokenizer = GPT2Tokenizer(vocab_file, merges_file)
17
+ tokenizer.save_pretrained(path)
18
+
19
+ checkpoint = checkpoint_utils.load_model_ensemble_and_task(
20
+ [os.path.join(path, "restored.pt")],
21
+ arg_overrides={
22
+ "vocab_filename": vocab_file,
23
+ "merges_filename": merges_file,
24
+ }
25
+ )
26
+
27
+ model = checkpoint[0][0].eval()
28
+ model = model
29
+
30
+ hf_model = OPTForCausalLM.from_pretrained(hf_path)
31
+
32
+ # forward passes
33
+ def single_batch_forward_logits(prompts):
34
+ input_ids = tokenizer(prompts, return_tensors="pt").input_ids
35
+ input_ids = torch.cat([torch.tensor([[0]]), input_ids], dim=-1)
36
+ input_ids = input_ids
37
+ with torch.no_grad():
38
+ logits = model(input_ids)[0]
39
+ return logits
40
+
41
+ # forward hf
42
+ def forward_hf(prompts):
43
+ input_ids = tokenizer(prompts, return_tensors="pt").input_ids
44
+ input_ids = torch.cat([torch.tensor([[0]]), input_ids], dim=-1)
45
+ input_ids = input_ids
46
+ with torch.no_grad():
47
+ logits = hf_model(input_ids)[0]
48
+ return logits
49
+
50
+ prompts = [
51
+ "Today is a beautiful day and I want to",
52
+ "In the city of",
53
+ "Paris is the capital of France and",
54
+ "Computers and mobile phones have taken",
55
+ ]
56
+
57
+ print("Next word generation")
58
+ for prompt in prompts:
59
+ print("-------------")
60
+ print(f"Prompt: {prompt}...\n")
61
+ logits_fsq = single_batch_forward_logits(prompt)
62
+ pred_next_token = torch.argmax(logits_fsq[0, -1], -1)
63
+ next_token = tokenizer.convert_ids_to_tokens([pred_next_token])
64
+ next_token = next_token[0].replace("Ġ", "")
65
+ print(f"Next word: {next_token}")
66
+ print("-------------")
67
+ logits = forward_hf(prompt)
68
+ pred_next_token = torch.argmax(logits[0, -1], -1)
69
+ next_token = tokenizer.convert_ids_to_tokens([pred_next_token])
70
+ next_token = next_token[0].replace("Ġ", "")
71
+ print(f"Next word: {next_token}")
72
+ print("-------------")
73
+
74
+
75
+ print("Is equal:", torch.allclose(logits_fsq.cpu(), logits.cpu(), atol=1e-3))