andrescodas
commited on
Commit
·
fafcaf0
1
Parent(s):
c5f603a
update the example for "Inference with Hugging Face library"
Browse files
README.md
CHANGED
@@ -101,37 +101,47 @@ analysis is needed to assess potential harm or bias in the proposed application.
|
|
101 |
**Inference with Hugging Face library**
|
102 |
|
103 |
```python
|
104 |
-
import transformers
|
105 |
import torch
|
|
|
106 |
|
107 |
-
|
108 |
-
|
109 |
-
|
110 |
-
|
|
|
|
|
111 |
|
|
|
|
|
112 |
tokenizer = transformers.AutoTokenizer.from_pretrained(
|
113 |
-
|
114 |
-
|
115 |
-
|
116 |
-
use_fast=False,
|
117 |
-
add_special_tokens=False,
|
118 |
-
)
|
119 |
|
120 |
system_message = "You are Orca, an AI language model created by Microsoft. You are a cautious assistant. You carefully follow instructions. You are helpful and harmless and you follow ethical guidelines and promote positive behavior."
|
121 |
-
user_message = "
|
122 |
|
123 |
-
# We use Chat Markup Language https://github.com/MicrosoftDocs/azure-docs/blob/main/articles/ai-services/openai/includes/chat-markup-language.md#working-with-chat-markup-language-chatml
|
124 |
-
prompt =
|
125 |
|
126 |
inputs = tokenizer(prompt, return_tensors='pt')
|
127 |
-
|
|
|
128 |
|
129 |
-
|
130 |
-
|
131 |
-
|
132 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
133 |
|
134 |
-
print(
|
135 |
```
|
136 |
|
137 |
|
|
|
101 |
**Inference with Hugging Face library**
|
102 |
|
103 |
```python
|
|
|
104 |
import torch
|
105 |
+
import transformers
|
106 |
|
107 |
+
if torch.cuda.is_available():
|
108 |
+
torch.set_default_device("cuda")
|
109 |
+
else:
|
110 |
+
torch.set_default_device("cpu")
|
111 |
+
|
112 |
+
model = transformers.AutoModelForCausalLM.from_pretrained("microsoft/Orca-2-7b", device_map='auto')
|
113 |
|
114 |
+
# https://github.com/huggingface/transformers/issues/27132
|
115 |
+
# please use the slow tokenizer since fast and slow tokenizer produces different tokens
|
116 |
tokenizer = transformers.AutoTokenizer.from_pretrained(
|
117 |
+
"microsoft/Orca-2-7b",
|
118 |
+
use_fast=False,
|
119 |
+
)
|
|
|
|
|
|
|
120 |
|
121 |
system_message = "You are Orca, an AI language model created by Microsoft. You are a cautious assistant. You carefully follow instructions. You are helpful and harmless and you follow ethical guidelines and promote positive behavior."
|
122 |
+
user_message = "How can you determine if a restaurant is popular among locals or mainly attracts tourists, and why might this information be useful?"
|
123 |
|
124 |
+
# We use Chat Markup Language https://github.com/MicrosoftDocs/azure-docs/blob/main/articles/ai-services/openai/includes/chat-markup-language.md#working-with-chat-markup-language-chatml
|
125 |
+
prompt = f"<|im_start|>system\n{system_message}<|im_end|>\n<|im_start|>user\n{user_message}<|im_end|>\n<|im_start|>assistant"
|
126 |
|
127 |
inputs = tokenizer(prompt, return_tensors='pt')
|
128 |
+
output_ids = model.generate(inputs["input_ids"],)
|
129 |
+
answer = tokenizer.batch_decode(output_ids)[0]
|
130 |
|
131 |
+
print(answer)
|
132 |
+
|
133 |
+
# This example continues showing how to add a second turn message by the user to the conversation
|
134 |
+
second_turn_user_message = "Give me a list of the key points of your first answer."
|
135 |
+
|
136 |
+
# we set add_special_tokens=False because we dont want to automatically add a bos_token between messages
|
137 |
+
second_turn_message_in_markup = f"\n<|im_start|>user\n{second_turn_user_message}<|im_end|>\n<|im_start|>assistant"
|
138 |
+
second_turn_tokens = tokenizer(second_turn_message_in_markup, return_tensors='pt', add_special_tokens=False)
|
139 |
+
second_turn_input = torch.cat([output_ids, second_turn_tokens['input_ids']], dim=1)
|
140 |
+
|
141 |
+
output_ids_2 = model.generate(second_turn_input,)
|
142 |
+
second_turn_answer = tokenizer.batch_decode(output_ids_2)[0]
|
143 |
|
144 |
+
print(second_turn_answer)
|
145 |
```
|
146 |
|
147 |
|