simonJJJ commited on
Commit
d95cdef
1 Parent(s): 0ff4c6e
Files changed (1) hide show
  1. tokenization_qwen.py +4 -0
tokenization_qwen.py CHANGED
@@ -366,9 +366,13 @@ class QWenTokenizer(PreTrainedTokenizer):
366
 
367
  def from_list_format(self, list_format: List[Dict]):
368
  text = ''
 
369
  for ele in list_format:
370
  if 'image' in ele:
 
 
371
  text += self.image_start_tag + ele['image'] + self.image_end_tag
 
372
  elif 'text' in ele:
373
  text += ele['text']
374
  elif 'box' in ele:
 
366
 
367
  def from_list_format(self, list_format: List[Dict]):
368
  text = ''
369
+ num_images = 0
370
  for ele in list_format:
371
  if 'image' in ele:
372
+ num_images += 1
373
+ text += f'Picture {num_images}:'
374
  text += self.image_start_tag + ele['image'] + self.image_end_tag
375
+ text += '\n'
376
  elif 'text' in ele:
377
  text += ele['text']
378
  elif 'box' in ele: