model
Browse files- scripts/TRAIN.md +22 -4
scripts/TRAIN.md
CHANGED
@@ -1,6 +1,6 @@
|
|
1 |
# Train
|
2 |
|
3 |
-
##
|
4 |
|
5 |
```bash
|
6 |
cd scripts
|
@@ -9,14 +9,32 @@ source venv/bin/activate
|
|
9 |
pip install -U -r requirements.in
|
10 |
```
|
11 |
|
12 |
-
## Tokenizer
|
13 |
-
|
14 |
```bash
|
15 |
python -B train_tokenizer.py
|
16 |
```
|
17 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
18 |
## Model
|
19 |
|
20 |
```bash
|
21 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
22 |
```
|
|
|
1 |
# Train
|
2 |
|
3 |
+
## Tokenizer
|
4 |
|
5 |
```bash
|
6 |
cd scripts
|
|
|
9 |
pip install -U -r requirements.in
|
10 |
```
|
11 |
|
|
|
|
|
12 |
```bash
|
13 |
python -B train_tokenizer.py
|
14 |
```
|
15 |
|
16 |
+
## Dataset
|
17 |
+
|
18 |
+
```bash
|
19 |
+
cd scripts
|
20 |
+
python -m venv venv-lit
|
21 |
+
source venv-lit/bin/activate
|
22 |
+
pip install -U -r requirements-lit.in
|
23 |
+
```
|
24 |
+
|
25 |
+
```bash
|
26 |
+
python -B prepare_pretrain_dataset.py
|
27 |
+
```
|
28 |
+
|
29 |
## Model
|
30 |
|
31 |
```bash
|
32 |
+
cd scripts
|
33 |
+
python -m venv venv-lit
|
34 |
+
source venv-lit/bin/activate
|
35 |
+
pip install -U -r requirements-lit.in
|
36 |
+
```
|
37 |
+
|
38 |
+
```bash
|
39 |
+
litgpt pretrain --data LitData --data.data_path "../data/" --config ./model.yaml
|
40 |
```
|