add github
Browse files- README.md +44 -3
- README_CN.md +43 -2
- images/vllm_web_demo.png +0 -0
- images/web_demo.png +0 -0
README.md
CHANGED
@@ -36,7 +36,7 @@
|
|
36 |
|
37 |
**Github:** [https://github.com/seanzhang-zhichen/llama3-chinese](https://github.com/seanzhang-zhichen/llama3-chinese)
|
38 |
|
39 |
-
![DEMO](./images/
|
40 |
|
41 |
|
42 |
## Download Model
|
@@ -63,7 +63,6 @@ git clone https://www.modelscope.cn/LLM-Research/Meta-Llama-3-8B.git
|
|
63 |
```bash
|
64 |
git lfs install
|
65 |
git clone https://www.modelscope.cn/seanzhang/Llama3-Chinese-Lora.git
|
66 |
-
|
67 |
```
|
68 |
|
69 |
**From HuggingFace**
|
@@ -96,6 +95,48 @@ git lfs install
|
|
96 |
git clone https://huggingface.co/zhichen/Llama3-Chinese
|
97 |
```
|
98 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
99 |
|
100 |
## VLLM WEB DEMO
|
101 |
|
@@ -131,7 +172,7 @@ If you used Llama3-Chinese in your research, cite it in the following format:
|
|
131 |
```latex
|
132 |
@misc{Llama3-Chinese,
|
133 |
title={Llama3-Chinese},
|
134 |
-
author={Zhichen Zhang},
|
135 |
year={2024},
|
136 |
howpublished={\url{https://github.com/seanzhang-zhichen/llama3-chinese}},
|
137 |
}
|
|
|
36 |
|
37 |
**Github:** [https://github.com/seanzhang-zhichen/llama3-chinese](https://github.com/seanzhang-zhichen/llama3-chinese)
|
38 |
|
39 |
+
![DEMO](./images/web_demo.png)
|
40 |
|
41 |
|
42 |
## Download Model
|
|
|
63 |
```bash
|
64 |
git lfs install
|
65 |
git clone https://www.modelscope.cn/seanzhang/Llama3-Chinese-Lora.git
|
|
|
66 |
```
|
67 |
|
68 |
**From HuggingFace**
|
|
|
95 |
git clone https://huggingface.co/zhichen/Llama3-Chinese
|
96 |
```
|
97 |
|
98 |
+
## Inference
|
99 |
+
|
100 |
+
```python
|
101 |
+
from transformers import AutoTokenizer, AutoModelForCausalLM
|
102 |
+
|
103 |
+
model_id = "zhichen/Llama3-Chinese"
|
104 |
+
|
105 |
+
tokenizer = AutoTokenizer.from_pretrained(model_id)
|
106 |
+
model = AutoModelForCausalLM.from_pretrained(model_id, torch_dtype="auto", device_map="auto")
|
107 |
+
|
108 |
+
messages = [
|
109 |
+
{"role": "system", "content": "You are a helpful assistant."},
|
110 |
+
{"role": "user", "content": "你好"},
|
111 |
+
]
|
112 |
+
|
113 |
+
input_ids = tokenizer.apply_chat_template(
|
114 |
+
messages, add_generation_prompt=True, return_tensors="pt"
|
115 |
+
).to(model.device)
|
116 |
+
|
117 |
+
outputs = model.generate(
|
118 |
+
input_ids,
|
119 |
+
max_new_tokens=2048,
|
120 |
+
do_sample=True,
|
121 |
+
temperature=0.7,
|
122 |
+
top_p=0.95,
|
123 |
+
)
|
124 |
+
response = outputs[0][input_ids.shape[-1]:]
|
125 |
+
print(tokenizer.decode(response, skip_special_tokens=True))
|
126 |
+
```
|
127 |
+
|
128 |
+
## CLI DEMO
|
129 |
+
|
130 |
+
```bash
|
131 |
+
python cli_demo.py --model_path zhichen/Llama3-Chinese
|
132 |
+
```
|
133 |
+
|
134 |
+
## WEB DEMO
|
135 |
+
|
136 |
+
```bash
|
137 |
+
python web_demo.py --model_path zhichen/Llama3-Chinese
|
138 |
+
```
|
139 |
+
|
140 |
|
141 |
## VLLM WEB DEMO
|
142 |
|
|
|
172 |
```latex
|
173 |
@misc{Llama3-Chinese,
|
174 |
title={Llama3-Chinese},
|
175 |
+
author={Zhichen Zhang, Xin LU, Long Chen},
|
176 |
year={2024},
|
177 |
howpublished={\url{https://github.com/seanzhang-zhichen/llama3-chinese}},
|
178 |
}
|
README_CN.md
CHANGED
@@ -37,7 +37,7 @@
|
|
37 |
|
38 |
**Github:** [https://github.com/seanzhang-zhichen/llama3-chinese](https://github.com/seanzhang-zhichen/llama3-chinese)
|
39 |
|
40 |
-
![DEMO](./images/
|
41 |
|
42 |
|
43 |
## 模型下载
|
@@ -96,6 +96,47 @@ git clone https://huggingface.co/zhichen/Llama3-Chinese
|
|
96 |
```
|
97 |
|
98 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
99 |
|
100 |
|
101 |
## vllm web 推理
|
@@ -133,7 +174,7 @@ Llama3-Chinese项目代码的授权协议为 [The Apache License 2.0](./LICENSE)
|
|
133 |
```latex
|
134 |
@misc{Llama3-Chinese,
|
135 |
title={Llama3-Chinese},
|
136 |
-
author={Zhichen Zhang},
|
137 |
year={2024},
|
138 |
howpublished={\url{https://github.com/seanzhang-zhichen/llama3-chinese}},
|
139 |
}
|
|
|
37 |
|
38 |
**Github:** [https://github.com/seanzhang-zhichen/llama3-chinese](https://github.com/seanzhang-zhichen/llama3-chinese)
|
39 |
|
40 |
+
![DEMO](./images/web_demo.png)
|
41 |
|
42 |
|
43 |
## 模型下载
|
|
|
96 |
```
|
97 |
|
98 |
|
99 |
+
## 推理
|
100 |
+
|
101 |
+
```python
|
102 |
+
from transformers import AutoTokenizer, AutoModelForCausalLM
|
103 |
+
|
104 |
+
model_id = "zhichen/Llama3-Chinese"
|
105 |
+
|
106 |
+
tokenizer = AutoTokenizer.from_pretrained(model_id)
|
107 |
+
model = AutoModelForCausalLM.from_pretrained(model_id, torch_dtype="auto", device_map="auto")
|
108 |
+
|
109 |
+
messages = [
|
110 |
+
{"role": "system", "content": "You are a helpful assistant."},
|
111 |
+
{"role": "user", "content": "你好"},
|
112 |
+
]
|
113 |
+
|
114 |
+
input_ids = tokenizer.apply_chat_template(
|
115 |
+
messages, add_generation_prompt=True, return_tensors="pt"
|
116 |
+
).to(model.device)
|
117 |
+
|
118 |
+
outputs = model.generate(
|
119 |
+
input_ids,
|
120 |
+
max_new_tokens=2048,
|
121 |
+
do_sample=True,
|
122 |
+
temperature=0.7,
|
123 |
+
top_p=0.95,
|
124 |
+
)
|
125 |
+
response = outputs[0][input_ids.shape[-1]:]
|
126 |
+
print(tokenizer.decode(response, skip_special_tokens=True))
|
127 |
+
```
|
128 |
+
|
129 |
+
## 命令行推理
|
130 |
+
|
131 |
+
```bash
|
132 |
+
python cli_demo.py --model_path zhichen/Llama3-Chinese
|
133 |
+
```
|
134 |
+
|
135 |
+
## web推理
|
136 |
+
|
137 |
+
```bash
|
138 |
+
python web_demo.py --model_path zhichen/Llama3-Chinese
|
139 |
+
```
|
140 |
|
141 |
|
142 |
## vllm web 推理
|
|
|
174 |
```latex
|
175 |
@misc{Llama3-Chinese,
|
176 |
title={Llama3-Chinese},
|
177 |
+
author={Zhichen Zhang, Xin LU, Long Chen},
|
178 |
year={2024},
|
179 |
howpublished={\url{https://github.com/seanzhang-zhichen/llama3-chinese}},
|
180 |
}
|
images/vllm_web_demo.png
DELETED
Binary file (542 kB)
|
|
images/web_demo.png
ADDED