xu song commited on
Commit
98b5498
·
1 Parent(s): 46ea35b
Files changed (1) hide show
  1. models/cpp_qwen2.py +21 -13
models/cpp_qwen2.py CHANGED
@@ -64,6 +64,12 @@ class Qwen2Simulator(Simulator):
64
  f"n_threads={self.llm.n_threads}, n_ctx={self.llm.n_ctx}, "
65
  f"env[CACHE]={os.environ.get('CACHE', None)}")
66
 
 
 
 
 
 
 
67
  self.generation_kwargs = dict(
68
  temperature=config.DEFAULT_TEMPERATURE,
69
  top_p=config.DEFAULT_TOP_P,
@@ -71,28 +77,32 @@ class Qwen2Simulator(Simulator):
71
  max_tokens=config.DEFAULT_MAX_TOKENS,
72
  repeat_penalty=1.1,
73
  # qwen2-0.5b-chat 有时内容生成结束没有<|im_end|>,直接跟 <|im_start|>
74
- stop=[
75
- "<|im_end|>",
76
- "<|im_start|>",
77
- "<|endoftext|>",
78
- ],
79
  )
80
 
 
 
 
81
  def tokenize(self, text):
82
  return self.llm.tokenize(text.encode("utf-8"))
83
 
 
 
 
 
 
84
 
85
  def generate(self, history, stream=True):
86
  if history[-1]['role'] in ["user"]:
87
- start_tokens = self.tokenize("<|im_start|>assistant\n")
88
  elif history[-1]['role'] in ["assistant", "system"]:
89
- start_tokens = self.tokenize("<|im_start|>user\n")
90
 
91
  input_ids = []
92
  for message in history:
93
- if "tokens" not in message:
94
  message["tokens"] = self.tokenize(message["content"])
95
- input_ids += self.tokenize(f"<|im_start|>{message['role']}\n") \
96
  + message["tokens"] \
97
  + self.tokenize("<|im_end|>\n")
98
  input_ids += start_tokens
@@ -120,7 +130,6 @@ class Qwen2Simulator(Simulator):
120
  print(f'finish_reason with text: {stream["choices"][0]["text"]}')
121
 
122
 
123
-
124
  bot = Qwen2Simulator()
125
 
126
  if __name__ == "__main__":
@@ -131,11 +140,10 @@ if __name__ == "__main__":
131
  for generated_text, generated_tokens in bot.generate(messages, stream=True):
132
  print(generated_text, generated_tokens)
133
 
134
-
135
  for i in range(3):
136
- messages.append({"role": "user" if i % 2 == 0 else "assistant", "content": generated_text, "tokens": generated_tokens})
 
137
  print("######## requesting", messages)
138
  for generated_text, generated_tokens in bot.generate(messages, stream=True):
139
  pass
140
  # print(generated_text, all_tokens)
141
-
 
64
  f"n_threads={self.llm.n_threads}, n_ctx={self.llm.n_ctx}, "
65
  f"env[CACHE]={os.environ.get('CACHE', None)}")
66
 
67
+ self.stop_words = [
68
+ "<|im_end|>",
69
+ "<|im_start|>",
70
+ "<|endoftext|>",
71
+ ]
72
+ self.stop_tokens = self.tokenize(self.stop_words)
73
  self.generation_kwargs = dict(
74
  temperature=config.DEFAULT_TEMPERATURE,
75
  top_p=config.DEFAULT_TOP_P,
 
77
  max_tokens=config.DEFAULT_MAX_TOKENS,
78
  repeat_penalty=1.1,
79
  # qwen2-0.5b-chat 有时内容生成结束没有<|im_end|>,直接跟 <|im_start|>
80
+ stop=self.stop_words,
 
 
 
 
81
  )
82
 
83
+ self.user_start_tokens = self.tokenize("<|im_start|>user\n")
84
+ self.assistant_start_tokens = self.tokenize("<|im_start|>assistant\n")
85
+
86
  def tokenize(self, text):
87
  return self.llm.tokenize(text.encode("utf-8"))
88
 
89
+ def _strip_stoptokens(self, tokens):
90
+ while tokens and tokens[0] in self.stop_tokens:
91
+ tokens.pop(0)
92
+ while tokens and tokens[-1] in self.stop_tokens:
93
+ tokens.pop()
94
 
95
  def generate(self, history, stream=True):
96
  if history[-1]['role'] in ["user"]:
97
+ start_tokens = self.assistant_start_tokens
98
  elif history[-1]['role'] in ["assistant", "system"]:
99
+ start_tokens = self.user_start_tokens
100
 
101
  input_ids = []
102
  for message in history:
103
+ if "tokens" not in message: # tokens
104
  message["tokens"] = self.tokenize(message["content"])
105
+ input_ids += self._strip_stoptokens(self.tokenize(f"<|im_start|>{message['role']}\n")) \
106
  + message["tokens"] \
107
  + self.tokenize("<|im_end|>\n")
108
  input_ids += start_tokens
 
130
  print(f'finish_reason with text: {stream["choices"][0]["text"]}')
131
 
132
 
 
133
  bot = Qwen2Simulator()
134
 
135
  if __name__ == "__main__":
 
140
  for generated_text, generated_tokens in bot.generate(messages, stream=True):
141
  print(generated_text, generated_tokens)
142
 
 
143
  for i in range(3):
144
+ messages.append(
145
+ {"role": "user" if i % 2 == 0 else "assistant", "content": generated_text, "tokens": generated_tokens})
146
  print("######## requesting", messages)
147
  for generated_text, generated_tokens in bot.generate(messages, stream=True):
148
  pass
149
  # print(generated_text, all_tokens)