mtasic85 commited on
Commit
d2eb966
1 Parent(s): bf45302
Files changed (1) hide show
  1. scripts/TRAIN.md +22 -4
scripts/TRAIN.md CHANGED
@@ -1,6 +1,6 @@
1
  # Train
2
 
3
- ## Environment
4
 
5
  ```bash
6
  cd scripts
@@ -9,14 +9,32 @@ source venv/bin/activate
9
  pip install -U -r requirements.in
10
  ```
11
 
12
- ## Tokenizer
13
-
14
  ```bash
15
  python -B train_tokenizer.py
16
  ```
17
 
 
 
 
 
 
 
 
 
 
 
 
 
 
18
  ## Model
19
 
20
  ```bash
21
- python -B train_model.py
 
 
 
 
 
 
 
22
  ```
 
1
  # Train
2
 
3
+ ## Tokenizer
4
 
5
  ```bash
6
  cd scripts
 
9
  pip install -U -r requirements.in
10
  ```
11
 
 
 
12
  ```bash
13
  python -B train_tokenizer.py
14
  ```
15
 
16
+ ## Dataset
17
+
18
+ ```bash
19
+ cd scripts
20
+ python -m venv venv-lit
21
+ source venv-lit/bin/activate
22
+ pip install -U -r requirements-lit.in
23
+ ```
24
+
25
+ ```bash
26
+ python -B prepare_pretrain_dataset.py
27
+ ```
28
+
29
  ## Model
30
 
31
  ```bash
32
+ cd scripts
33
+ python -m venv venv-lit
34
+ source venv-lit/bin/activate
35
+ pip install -U -r requirements-lit.in
36
+ ```
37
+
38
+ ```bash
39
+ litgpt pretrain --data LitData --data.data_path "../data/" --config ./model.yaml
40
  ```