Update README.md
Browse files
README.md
CHANGED
@@ -113,6 +113,46 @@ response = outputs[0][input_ids.shape[-1]:]
|
|
113 |
print(tokenizer.decode(response, skip_special_tokens=True))
|
114 |
```
|
115 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
116 |
## Examples
|
117 |
|
118 |
```
|
|
|
113 |
print(tokenizer.decode(response, skip_special_tokens=True))
|
114 |
```
|
115 |
|
116 |
+
### Use with vllm
|
117 |
+
|
118 |
+
[vllm-project/vllm](https://github.com/vllm-project/vllm)
|
119 |
+
|
120 |
+
```python
|
121 |
+
from vllm import LLM, SamplingParams
|
122 |
+
|
123 |
+
model_id = "haqishen/Llama-3-8B-Japanese-Instruct"
|
124 |
+
|
125 |
+
llm = LLM(
|
126 |
+
model=model_id,
|
127 |
+
trust_remote_code=True,
|
128 |
+
tensor_parallel_size=2,
|
129 |
+
)
|
130 |
+
tokenizer = llm.get_tokenizer()
|
131 |
+
|
132 |
+
messages = [
|
133 |
+
{"role": "system", "content": "あなたは、常に海賊の言葉で返事する海賊チャットボットです!"},
|
134 |
+
{"role": "user", "content": "自己紹介してください"},
|
135 |
+
]
|
136 |
+
|
137 |
+
conversations = tokenizer.apply_chat_template(
|
138 |
+
messages,
|
139 |
+
tokenize=False,
|
140 |
+
add_generation_prompt=True
|
141 |
+
)
|
142 |
+
|
143 |
+
outputs = llm.generate(
|
144 |
+
[conversations],
|
145 |
+
SamplingParams(
|
146 |
+
temperature=0.6,
|
147 |
+
top_p=0.9,
|
148 |
+
max_tokens=1024,
|
149 |
+
stop_token_ids=[tokenizer.eos_token_id, tokenizer.convert_tokens_to_ids("<|eot_id|>")],
|
150 |
+
)
|
151 |
+
)
|
152 |
+
print(outputs[0].outputs[0].text.strip())
|
153 |
+
```
|
154 |
+
|
155 |
+
|
156 |
## Examples
|
157 |
|
158 |
```
|