kevin36524 commited on
Commit
b973cbb
·
verified ·
1 Parent(s): 7c62b77

Update export_qwen2_wc.py

Browse files
Files changed (1) hide show
  1. export_qwen2_wc.py +13 -87
export_qwen2_wc.py CHANGED
@@ -207,7 +207,7 @@ def generate() -> None:
207
  if is_first_run:
208
  model_inp = input_ids
209
  is_first_run = False
210
- print(f"KEVINDEBUG model_inp: {model_inp} causal_mask: {causal_mask}")
211
  output = torch_model(model_inp, causal_mask) # Start with a sub-squence that long so need multiple previous when size only very lwo larger later same past arg a so try keeping right padded!
212
 
213
  # Get the most likely token IDs
@@ -215,7 +215,7 @@ def generate() -> None:
215
 
216
  # Append the generated token IDs to the output tensor
217
  output_tokens = torch.cat((output_tokens, output_ids[:, -1, None]), dim=-1)
218
- print(f"KEVINDEBUG output_tokens: {output_tokens}")
219
 
220
  # Update the causal mask
221
  causal_mask = torch.ones((1, 1, 1, output_tokens.shape[-1] + 1), dtype=torch.float32)
@@ -269,91 +269,17 @@ def export() -> None:
269
  mlmodel_fp16.save("StatefulQwen2_0_5_BInstructFP16.mlpackage")
270
 
271
  # Block-wise quantize model weights to int4
272
- op_config = ct.optimize.coreml.OpLinearQuantizerConfig(
273
- mode="linear_symmetric",
274
- dtype="int4",
275
- granularity="per_block",
276
- block_size=32,
277
- )
278
- config = ct.optimize.coreml.OptimizationConfig(global_config=op_config)
279
- mlmodel_int4 = ct.optimize.coreml.linear_quantize_weights(mlmodel_fp16, config=config)
280
- mlmodel_int4._spec.description.metadata.userDefined.update({METADATA_TOKENIZER: MODEL_ID})
281
- mlmodel_int4.save("StatefulQwen2_0_5_BInstructInt4.mlpackage")
282
 
283
 
284
  if __name__ == "__main__":
285
- generate()
286
-
287
- ###
288
- #KEVINDEBUG model_inp: tensor([[ 8420, 374, 264, 32794, 911, 60249]]) causal_mask: tensor([[[[1., 1., 1., 1., 1., 1., 1.]]]])
289
- #KEVINDEBUG output_tokens: tensor([[ 8420, 374, 264, 32794, 911, 60249, 11]])
290
- #KEVINDEBUG model_inp: tensor([[11]]) causal_mask: tensor([[[[1., 1., 1., 1., 1., 1., 1., 1.]]]])
291
- #KEVINDEBUG output_tokens: tensor([[ 8420, 374, 264, 32794, 911, 60249, 11, 17689]])
292
- #KEVINDEBUG model_inp: tensor([[17689]]) causal_mask: tensor([[[[1., 1., 1., 1., 1., 1., 1., 1., 1.]]]])
293
- #KEVINDEBUG output_tokens: tensor([[ 8420, 374, 264, 32794, 911, 60249, 11, 17689, 13]])
294
- #KEVINDEBUG model_inp: tensor([[13]]) causal_mask: tensor([[[[1., 1., 1., 1., 1., 1., 1., 1., 1., 1.]]]])
295
- #KEVINDEBUG output_tokens: tensor([[ 8420, 374, 264, 32794, 911, 60249, 11, 17689, 13, 1084]])
296
- #KEVINDEBUG model_inp: tensor([[1084]]) causal_mask: tensor([[[[1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.]]]])
297
- #KEVINDEBUG output_tokens: tensor([[ 8420, 374, 264, 32794, 911, 60249, 11, 17689, 13, 1084,
298
- # 374]])
299
- #KEVINDEBUG model_inp: tensor([[374]]) causal_mask: tensor([[[[1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.]]]])
300
- #KEVINDEBUG output_tokens: tensor([[ 8420, 374, 264, 32794, 911, 60249, 11, 17689, 13, 1084,
301
- # 374, 264]])
302
- #KEVINDEBUG model_inp: tensor([[264]]) causal_mask: tensor([[[[1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.]]]])
303
- #KEVINDEBUG output_tokens: tensor([[ 8420, 374, 264, 32794, 911, 60249, 11, 17689, 13, 1084,
304
- # 374, 264, 32794]])
305
- #KEVINDEBUG model_inp: tensor([[32794]]) causal_mask: tensor([[[[1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.]]]])
306
- #KEVINDEBUG output_tokens: tensor([[ 8420, 374, 264, 32794, 911, 60249, 11, 17689, 13, 1084,
307
- # 374, 264, 32794, 911]])
308
- #KEVINDEBUG model_inp: tensor([[911]]) causal_mask: tensor([[[[1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.]]]])
309
- #KEVINDEBUG output_tokens: tensor([[ 8420, 374, 264, 32794, 911, 60249, 11, 17689, 13, 1084,
310
- # 374, 264, 32794, 911, 279]])
311
- #KEVINDEBUG model_inp: tensor([[279]]) causal_mask: tensor([[[[1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.]]]])
312
- #KEVINDEBUG output_tokens: tensor([[ 8420, 374, 264, 32794, 911, 60249, 11, 17689, 13, 1084,
313
- # 374, 264, 32794, 911, 279, 3283]])
314
- #KEVINDEBUG model_inp: tensor([[3283]]) causal_mask: tensor([[[[1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.]]]])
315
- #KEVINDEBUG output_tokens: tensor([[ 8420, 374, 264, 32794, 911, 60249, 11, 17689, 13, 1084,
316
- # 374, 264, 32794, 911, 279, 3283, 315]])
317
- #KEVINDEBUG model_inp: tensor([[315]]) causal_mask: tensor([[[[1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
318
- # 1.]]]])
319
- #KEVINDEBUG output_tokens: tensor([[ 8420, 374, 264, 32794, 911, 60249, 11, 17689, 13, 1084,
320
- # 374, 264, 32794, 911, 279, 3283, 315, 60249]])
321
- #KEVINDEBUG model_inp: tensor([[60249]]) causal_mask: tensor([[[[1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
322
- # 1., 1.]]]])
323
- #KEVINDEBUG output_tokens: tensor([[ 8420, 374, 264, 32794, 911, 60249, 11, 17689, 13, 1084,
324
- # 374, 264, 32794, 911, 279, 3283, 315, 60249, 11]])
325
- #KEVINDEBUG model_inp: tensor([[11]]) causal_mask: tensor([[[[1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
326
- # 1., 1., 1.]]]])
327
- #KEVINDEBUG output_tokens: tensor([[ 8420, 374, 264, 32794, 911, 60249, 11, 17689, 13, 1084,
328
- # 374, 264, 32794, 911, 279, 3283, 315, 60249, 11, 17689]])
329
- #KEVINDEBUG model_inp: tensor([[17689]]) causal_mask: tensor([[[[1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
330
- # 1., 1., 1., 1.]]]])
331
- #KEVINDEBUG output_tokens: tensor([[ 8420, 374, 264, 32794, 911, 60249, 11, 17689, 13, 1084,
332
- # 374, 264, 32794, 911, 279, 3283, 315, 60249, 11, 17689,
333
- # 13]])
334
- #KEVINDEBUG model_inp: tensor([[13]]) causal_mask: tensor([[[[1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
335
- # 1., 1., 1., 1., 1.]]]])
336
- #KEVINDEBUG output_tokens: tensor([[ 8420, 374, 264, 32794, 911, 60249, 11, 17689, 13, 1084,
337
- # 374, 264, 32794, 911, 279, 3283, 315, 60249, 11, 17689,
338
- # 13, 576]])
339
- #KEVINDEBUG model_inp: tensor([[576]]) causal_mask: tensor([[[[1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
340
- # 1., 1., 1., 1., 1., 1.]]]])
341
- #KEVINDEBUG output_tokens: tensor([[ 8420, 374, 264, 32794, 911, 60249, 11, 17689, 13, 1084,
342
- # 374, 264, 32794, 911, 279, 3283, 315, 60249, 11, 17689,
343
- # 13, 576, 32794]])
344
- #KEVINDEBUG model_inp: tensor([[32794]]) causal_mask: tensor([[[[1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
345
- # 1., 1., 1., 1., 1., 1., 1.]]]])
346
- #KEVINDEBUG output_tokens: tensor([[ 8420, 374, 264, 32794, 911, 60249, 11, 17689, 13, 1084,
347
- # 374, 264, 32794, 911, 279, 3283, 315, 60249, 11, 17689,
348
- # 13, 576, 32794, 374]])
349
- #KEVINDEBUG model_inp: tensor([[374]]) causal_mask: tensor([[[[1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
350
- # 1., 1., 1., 1., 1., 1., 1., 1.]]]])
351
- #KEVINDEBUG output_tokens: tensor([[ 8420, 374, 264, 32794, 911, 60249, 11, 17689, 13, 1084,
352
- # 374, 264, 32794, 911, 279, 3283, 315, 60249, 11, 17689,
353
- # 13, 576, 32794, 374, 5326]])
354
- #KEVINDEBUG model_inp: tensor([[5326]]) causal_mask: tensor([[[[1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
355
- # 1., 1., 1., 1., 1., 1., 1., 1., 1.]]]])
356
- #KEVINDEBUG output_tokens: tensor([[ 8420, 374, 264, 32794, 911, 60249, 11, 17689, 13, 1084,
357
- # 374, 264, 32794, 911, 279, 3283, 315, 60249, 11, 17689,
358
- # 13, 576, 32794, 374, 5326, 304]])
359
- #input : Here is a poem about Valencia output: Here is a poem about Valencia, Spain. It is a poem about the city of Valencia, Spain. The poem is written in
 
207
  if is_first_run:
208
  model_inp = input_ids
209
  is_first_run = False
210
+ #print(f"KEVINDEBUG model_inp: {model_inp} causal_mask: {causal_mask}")
211
  output = torch_model(model_inp, causal_mask) # Start with a sub-squence that long so need multiple previous when size only very lwo larger later same past arg a so try keeping right padded!
212
 
213
  # Get the most likely token IDs
 
215
 
216
  # Append the generated token IDs to the output tensor
217
  output_tokens = torch.cat((output_tokens, output_ids[:, -1, None]), dim=-1)
218
+ #print(f"KEVINDEBUG output_tokens: {output_tokens}")
219
 
220
  # Update the causal mask
221
  causal_mask = torch.ones((1, 1, 1, output_tokens.shape[-1] + 1), dtype=torch.float32)
 
269
  mlmodel_fp16.save("StatefulQwen2_0_5_BInstructFP16.mlpackage")
270
 
271
  # Block-wise quantize model weights to int4
272
+ # op_config = ct.optimize.coreml.OpLinearQuantizerConfig(
273
+ # mode="linear_symmetric",
274
+ # dtype="int4",
275
+ # granularity="per_block",
276
+ # block_size=32,
277
+ # )
278
+ # config = ct.optimize.coreml.OptimizationConfig(global_config=op_config)
279
+ # mlmodel_int4 = ct.optimize.coreml.linear_quantize_weights(mlmodel_fp16, config=config)
280
+ # mlmodel_int4._spec.description.metadata.userDefined.update({METADATA_TOKENIZER: MODEL_ID})
281
+ # mlmodel_int4.save("StatefulQwen2_0_5_BInstructInt4.mlpackage")
282
 
283
 
284
  if __name__ == "__main__":
285
+ export()