Update README.md
Browse files
README.md
CHANGED
@@ -48,6 +48,23 @@ print(tokenizer.decode(outputs[0], skip_special_tokens=True))
|
|
48 |
# rating:sfw, rating:general, 1girl, ahoge, braid, closed eyes, collared dress, dress, flower, full body, hair flower, hair ornament, long hair, night, night sky, outdoors, parted lips, pink flower, pink hair, short sleeves, sky, solo, straight hair, sunflower, very long hair, white flower
|
49 |
```
|
50 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
51 |
#### Flash attention (optional)
|
52 |
|
53 |
Using flash attention can optimize computations, but it is currently only compatible with Linux.
|
@@ -86,11 +103,16 @@ ort_model = ORTModelForCausalLM.from_pretrained(MODEL_NAME)
|
|
86 |
# qunatized version
|
87 |
# ort_model = ORTModelForCausalLM.from_pretrained(MODEL_NAME, file_name="model_quantized.onnx")
|
88 |
|
89 |
-
|
90 |
-
|
|
|
|
|
|
|
|
|
|
|
91 |
|
92 |
with torch.no_grad():
|
93 |
-
outputs =
|
94 |
|
95 |
print(tokenizer.decode(outputs[0], skip_special_tokens=True))
|
96 |
```
|
|
|
48 |
# rating:sfw, rating:general, 1girl, ahoge, braid, closed eyes, collared dress, dress, flower, full body, hair flower, hair ornament, long hair, night, night sky, outdoors, parted lips, pink flower, pink hair, short sleeves, sky, solo, straight hair, sunflower, very long hair, white flower
|
49 |
```
|
50 |
|
51 |
+
You can use `tokenizer.apply_chat_template` to simplify constructiing of prompts:
|
52 |
+
|
53 |
+
```py
|
54 |
+
inputs = tokenizer.apply_chat_template({
|
55 |
+
"rating": "rating:sfw, rating:general",
|
56 |
+
"copyright": "original",
|
57 |
+
"character": "",
|
58 |
+
"general": "1girl",
|
59 |
+
"length": "<|long|>"
|
60 |
+
}, tokenize=True) # tokenize=False to preview prompt
|
61 |
+
# same as input_ids of "<|bos|><rating>rating:sfw, rating:general</rating><copyright>original</copyright><character></character><general><|long|>1girl<|input_end|>"
|
62 |
+
with torch.no_grad():
|
63 |
+
outputs = model.generate(inputs, generation_config=generation_config)
|
64 |
+
```
|
65 |
+
|
66 |
+
See [chat_templating document](https://huggingface.co/docs/transformers/main/en/chat_templating) for more detail about `apply_chat_template`.
|
67 |
+
|
68 |
#### Flash attention (optional)
|
69 |
|
70 |
Using flash attention can optimize computations, but it is currently only compatible with Linux.
|
|
|
103 |
# qunatized version
|
104 |
# ort_model = ORTModelForCausalLM.from_pretrained(MODEL_NAME, file_name="model_quantized.onnx")
|
105 |
|
106 |
+
inputs = tokenizer.apply_chat_template({
|
107 |
+
"rating": "rating:sfw, rating:general",
|
108 |
+
"copyright": "original",
|
109 |
+
"character": "",
|
110 |
+
"general": "1girl",
|
111 |
+
"length": "<|long|>"
|
112 |
+
}, tokenize=True)
|
113 |
|
114 |
with torch.no_grad():
|
115 |
+
outputs = ort_model.generate(inputs, generation_config=generation_config)
|
116 |
|
117 |
print(tokenizer.decode(outputs[0], skip_special_tokens=True))
|
118 |
```
|