xu song
commited on
Commit
·
98b5498
1
Parent(s):
46ea35b
update
Browse files- models/cpp_qwen2.py +21 -13
models/cpp_qwen2.py
CHANGED
@@ -64,6 +64,12 @@ class Qwen2Simulator(Simulator):
|
|
64 |
f"n_threads={self.llm.n_threads}, n_ctx={self.llm.n_ctx}, "
|
65 |
f"env[CACHE]={os.environ.get('CACHE', None)}")
|
66 |
|
|
|
|
|
|
|
|
|
|
|
|
|
67 |
self.generation_kwargs = dict(
|
68 |
temperature=config.DEFAULT_TEMPERATURE,
|
69 |
top_p=config.DEFAULT_TOP_P,
|
@@ -71,28 +77,32 @@ class Qwen2Simulator(Simulator):
|
|
71 |
max_tokens=config.DEFAULT_MAX_TOKENS,
|
72 |
repeat_penalty=1.1,
|
73 |
# qwen2-0.5b-chat 有时内容生成结束没有<|im_end|>,直接跟 <|im_start|>
|
74 |
-
stop=
|
75 |
-
"<|im_end|>",
|
76 |
-
"<|im_start|>",
|
77 |
-
"<|endoftext|>",
|
78 |
-
],
|
79 |
)
|
80 |
|
|
|
|
|
|
|
81 |
def tokenize(self, text):
|
82 |
return self.llm.tokenize(text.encode("utf-8"))
|
83 |
|
|
|
|
|
|
|
|
|
|
|
84 |
|
85 |
def generate(self, history, stream=True):
|
86 |
if history[-1]['role'] in ["user"]:
|
87 |
-
start_tokens = self.
|
88 |
elif history[-1]['role'] in ["assistant", "system"]:
|
89 |
-
start_tokens = self.
|
90 |
|
91 |
input_ids = []
|
92 |
for message in history:
|
93 |
-
if "tokens" not in message:
|
94 |
message["tokens"] = self.tokenize(message["content"])
|
95 |
-
input_ids += self.tokenize(f"<|im_start|>{message['role']}\n") \
|
96 |
+ message["tokens"] \
|
97 |
+ self.tokenize("<|im_end|>\n")
|
98 |
input_ids += start_tokens
|
@@ -120,7 +130,6 @@ class Qwen2Simulator(Simulator):
|
|
120 |
print(f'finish_reason with text: {stream["choices"][0]["text"]}')
|
121 |
|
122 |
|
123 |
-
|
124 |
bot = Qwen2Simulator()
|
125 |
|
126 |
if __name__ == "__main__":
|
@@ -131,11 +140,10 @@ if __name__ == "__main__":
|
|
131 |
for generated_text, generated_tokens in bot.generate(messages, stream=True):
|
132 |
print(generated_text, generated_tokens)
|
133 |
|
134 |
-
|
135 |
for i in range(3):
|
136 |
-
messages.append(
|
|
|
137 |
print("######## requesting", messages)
|
138 |
for generated_text, generated_tokens in bot.generate(messages, stream=True):
|
139 |
pass
|
140 |
# print(generated_text, all_tokens)
|
141 |
-
|
|
|
64 |
f"n_threads={self.llm.n_threads}, n_ctx={self.llm.n_ctx}, "
|
65 |
f"env[CACHE]={os.environ.get('CACHE', None)}")
|
66 |
|
67 |
+
self.stop_words = [
|
68 |
+
"<|im_end|>",
|
69 |
+
"<|im_start|>",
|
70 |
+
"<|endoftext|>",
|
71 |
+
]
|
72 |
+
self.stop_tokens = self.tokenize(self.stop_words)
|
73 |
self.generation_kwargs = dict(
|
74 |
temperature=config.DEFAULT_TEMPERATURE,
|
75 |
top_p=config.DEFAULT_TOP_P,
|
|
|
77 |
max_tokens=config.DEFAULT_MAX_TOKENS,
|
78 |
repeat_penalty=1.1,
|
79 |
# qwen2-0.5b-chat 有时内容生成结束没有<|im_end|>,直接跟 <|im_start|>
|
80 |
+
stop=self.stop_words,
|
|
|
|
|
|
|
|
|
81 |
)
|
82 |
|
83 |
+
self.user_start_tokens = self.tokenize("<|im_start|>user\n")
|
84 |
+
self.assistant_start_tokens = self.tokenize("<|im_start|>assistant\n")
|
85 |
+
|
86 |
def tokenize(self, text):
|
87 |
return self.llm.tokenize(text.encode("utf-8"))
|
88 |
|
89 |
+
def _strip_stoptokens(self, tokens):
|
90 |
+
while tokens and tokens[0] in self.stop_tokens:
|
91 |
+
tokens.pop(0)
|
92 |
+
while tokens and tokens[-1] in self.stop_tokens:
|
93 |
+
tokens.pop()
|
94 |
|
95 |
def generate(self, history, stream=True):
|
96 |
if history[-1]['role'] in ["user"]:
|
97 |
+
start_tokens = self.assistant_start_tokens
|
98 |
elif history[-1]['role'] in ["assistant", "system"]:
|
99 |
+
start_tokens = self.user_start_tokens
|
100 |
|
101 |
input_ids = []
|
102 |
for message in history:
|
103 |
+
if "tokens" not in message: # tokens
|
104 |
message["tokens"] = self.tokenize(message["content"])
|
105 |
+
input_ids += self._strip_stoptokens(self.tokenize(f"<|im_start|>{message['role']}\n")) \
|
106 |
+ message["tokens"] \
|
107 |
+ self.tokenize("<|im_end|>\n")
|
108 |
input_ids += start_tokens
|
|
|
130 |
print(f'finish_reason with text: {stream["choices"][0]["text"]}')
|
131 |
|
132 |
|
|
|
133 |
bot = Qwen2Simulator()
|
134 |
|
135 |
if __name__ == "__main__":
|
|
|
140 |
for generated_text, generated_tokens in bot.generate(messages, stream=True):
|
141 |
print(generated_text, generated_tokens)
|
142 |
|
|
|
143 |
for i in range(3):
|
144 |
+
messages.append(
|
145 |
+
{"role": "user" if i % 2 == 0 else "assistant", "content": generated_text, "tokens": generated_tokens})
|
146 |
print("######## requesting", messages)
|
147 |
for generated_text, generated_tokens in bot.generate(messages, stream=True):
|
148 |
pass
|
149 |
# print(generated_text, all_tokens)
|
|