kevin36524
/

qwen2_0_5_instruct_int4_coreml

Core ML

Model card Files Files and versions Community

kevin36524 commited on Aug 9, 2024

Commit

7c62b77

verified ·

1 Parent(s): 934151d

Upload export_qwen2_wc.py with huggingface_hub

Browse files

Files changed (1) hide show

export_qwen2_wc.py +78 -83

export_qwen2_wc.py CHANGED Viewed

@@ -180,12 +180,20 @@ def generate() -> None:
     torch_model = StatefulQwen2ForCausalLM(MODEL_ID, max_context_size=max_context_size)
     torch_model.eval()
-    input_ids: torch.Tensor = torch.tensor([[7985, 264, 32794, 911, 60249]], dtype=torch.int32)
     causal_mask: torch.Tensor = torch.ones((1, 1, 1, input_ids.shape[-1] + 1), dtype=torch.float32)
     # Set the output length
     output_length = 20
     # Initialize the output tensor
     output_tokens = input_ids
@@ -195,9 +203,12 @@ def generate() -> None:
         #torch_model.kv_cache.past_seen_tokens = causal_mask.shape[-1] - output_tokens.shape[-1]
         # Get the model output
-        model_inp = output_tokens[:, -20:]
         print(f"KEVINDEBUG model_inp: {model_inp} causal_mask: {causal_mask}")
-        output = torch_model(output_tokens[:, -20:], causal_mask)  # Start with a sub-squence that long so need multiple previous when size only very lwo larger later same past arg a so try keeping right padded!
         # Get the most likely token IDs
         output_ids = torch.argmax(output, dim=-1)
@@ -209,9 +220,6 @@ def generate() -> None:
         # Update the causal mask
         causal_mask = torch.ones((1, 1, 1, output_tokens.shape[-1] + 1), dtype=torch.float32)
-    # Decode output tokens using the tokenizer
-    from transformers import AutoTokenizer
-    tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
     decoded_output = tokenizer.decode(output_tokens[0], skip_special_tokens=True)
     print(f"input : {tokenizer.decode(input_ids[0])} output: {decoded_output}")
@@ -277,88 +285,75 @@ if __name__ == "__main__":
     generate()
 ###
-#(venv) kevin36524@instance-20240808-212842:~$ python export_qwen2_wc.py
-#Failed to load _MLModelProxy: No module named 'coremltools.libcoremlpython'
-#KEVINDEBUG model_inp: tensor([[ 7985,   264, 32794,   911, 60249]], dtype=torch.int32) causal_mask: tensor([[[[1., 1., 1., 1., 1., 1.]]]])
-#KEVINDEBUG output_tokens: tensor([[ 7985,   264, 32794,   911, 60249,    11]])
-#KEVINDEBUG model_inp: tensor([[ 7985,   264, 32794,   911, 60249,    11]]) causal_mask: tensor([[[[1., 1., 1., 1., 1., 1., 1.]]]])
-#KEVINDEBUG output_tokens: tensor([[ 7985,   264, 32794,   911, 60249,    11, 17689]])
-#KEVINDEBUG model_inp: tensor([[ 7985,   264, 32794,   911, 60249,    11, 17689]]) causal_mask: tensor([[[[1., 1., 1., 1., 1., 1., 1., 1.]]]])
-#KEVINDEBUG output_tokens: tensor([[ 7985,   264, 32794,   911, 60249,    11, 17689,    11]])
-#KEVINDEBUG model_inp: tensor([[ 7985,   264, 32794,   911, 60249,    11, 17689,    11]]) causal_mask: tensor([[[[1., 1., 1., 1., 1., 1., 1., 1., 1.]]]])
-#KEVINDEBUG output_tokens: tensor([[ 7985,   264, 32794,   911, 60249,    11, 17689,    11, 21080]])
-#KEVINDEBUG model_inp: tensor([[ 7985,   264, 32794,   911, 60249,    11, 17689,    11, 21080]]) causal_mask: tensor([[[[1., 1., 1., 1., 1., 1., 1., 1., 1., 1.]]]])
-#KEVINDEBUG output_tokens: tensor([[ 7985,   264, 32794,   911, 60249,    11, 17689,    11, 21080,   389]])
-#KEVINDEBUG model_inp: tensor([[ 7985,   264, 32794,   911, 60249,    11, 17689,    11, 21080,   389]]) causal_mask: tensor([[[[1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.]]]])
-#KEVINDEBUG output_tokens: tensor([[ 7985,   264, 32794,   911, 60249,    11, 17689,    11, 21080,   389,
-#          1181]])
-#KEVINDEBUG model_inp: tensor([[ 7985,   264, 32794,   911, 60249,    11, 17689,    11, 21080,   389,
-#          1181]]) causal_mask: tensor([[[[1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.]]]])
-#KEVINDEBUG output_tokens: tensor([[ 7985,   264, 32794,   911, 60249,    11, 17689,    11, 21080,   389,
-#          1181, 17646]])
-#KEVINDEBUG model_inp: tensor([[ 7985,   264, 32794,   911, 60249,    11, 17689,    11, 21080,   389,
-#          1181, 17646]]) causal_mask: tensor([[[[1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.]]]])
-#KEVINDEBUG output_tokens: tensor([[ 7985,   264, 32794,   911, 60249,    11, 17689,    11, 21080,   389,
-#          1181, 17646,    11]])
-#KEVINDEBUG model_inp: tensor([[ 7985,   264, 32794,   911, 60249,    11, 17689,    11, 21080,   389,
-#          1181, 17646,    11]]) causal_mask: tensor([[[[1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.]]]])
-#KEVINDEBUG output_tokens: tensor([[ 7985,   264, 32794,   911, 60249,    11, 17689,    11, 21080,   389,
-#          1181, 17646,    11,  7674]])
-#KEVINDEBUG model_inp: tensor([[ 7985,   264, 32794,   911, 60249,    11, 17689,    11, 21080,   389,
-#          1181, 17646,    11,  7674]]) causal_mask: tensor([[[[1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.]]]])
-#KEVINDEBUG output_tokens: tensor([[ 7985,   264, 32794,   911, 60249,    11, 17689,    11, 21080,   389,
-#          1181, 17646,    11,  7674,    11]])
-#KEVINDEBUG model_inp: tensor([[ 7985,   264, 32794,   911, 60249,    11, 17689,    11, 21080,   389,
-#          1181, 17646,    11,  7674,    11]]) causal_mask: tensor([[[[1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.]]]])
-#KEVINDEBUG output_tokens: tensor([[ 7985,   264, 32794,   911, 60249,    11, 17689,    11, 21080,   389,
-#          1181, 17646,    11,  7674,    11,   323]])
-#KEVINDEBUG model_inp: tensor([[ 7985,   264, 32794,   911, 60249,    11, 17689,    11, 21080,   389,
-#          1181, 17646,    11,  7674,    11,   323]]) causal_mask: tensor([[[[1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.]]]])
-#KEVINDEBUG output_tokens: tensor([[ 7985,   264, 32794,   911, 60249,    11, 17689,    11, 21080,   389,
-#          1181, 17646,    11,  7674,    11,   323, 35005]])
-#KEVINDEBUG model_inp: tensor([[ 7985,   264, 32794,   911, 60249,    11, 17689,    11, 21080,   389,
-#          1181, 17646,    11,  7674,    11,   323, 35005]]) causal_mask: tensor([[[[1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
 #           1.]]]])
-#KEVINDEBUG output_tokens: tensor([[ 7985,   264, 32794,   911, 60249,    11, 17689,    11, 21080,   389,
-#          1181, 17646,    11,  7674,    11,   323, 35005,    13]])
-#KEVINDEBUG model_inp: tensor([[ 7985,   264, 32794,   911, 60249,    11, 17689,    11, 21080,   389,
-#          1181, 17646,    11,  7674,    11,   323, 35005,    13]]) causal_mask: tensor([[[[1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
 #           1., 1.]]]])
-#KEVINDEBUG output_tokens: tensor([[ 7985,   264, 32794,   911, 60249,    11, 17689,    11, 21080,   389,
-#          1181, 17646,    11,  7674,    11,   323, 35005,    13,  5443]])
-#KEVINDEBUG model_inp: tensor([[ 7985,   264, 32794,   911, 60249,    11, 17689,    11, 21080,   389,
-#          1181, 17646,    11,  7674,    11,   323, 35005,    13,  5443]]) causal_mask: tensor([[[[1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
 #           1., 1., 1.]]]])
-#KEVINDEBUG output_tokens: tensor([[ 7985,   264, 32794,   911, 60249,    11, 17689,    11, 21080,   389,
-#          1181, 17646,    11,  7674,    11,   323, 35005,    13,  5443, 42020]])
-#KEVINDEBUG model_inp: tensor([[ 7985,   264, 32794,   911, 60249,    11, 17689,    11, 21080,   389,
-#          1181, 17646,    11,  7674,    11,   323, 35005,    13,  5443, 42020]]) causal_mask: tensor([[[[1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
 #           1., 1., 1., 1.]]]])
-#KEVINDEBUG output_tokens: tensor([[ 7985,   264, 32794,   911, 60249,    11, 17689,    11, 21080,   389,
-#          1181, 17646,    11,  7674,    11,   323, 35005,    13,  5443, 42020,
-#         40445]])
-#KEVINDEBUG model_inp: tensor([[  264, 32794,   911, 60249,    11, 17689,    11, 21080,   389,  1181,
-#         17646,    11,  7674,    11,   323, 35005,    13,  5443, 42020, 40445]]) causal_mask: tensor([[[[1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
 #           1., 1., 1., 1., 1.]]]])
-#KEVINDEBUG output_tokens: tensor([[ 7985,   264, 32794,   911, 60249,    11, 17689,    11, 21080,   389,
-#          1181, 17646,    11,  7674,    11,   323, 35005,    13,  5443, 42020,
-#         40445,   323]])
-#KEVINDEBUG model_inp: tensor([[32794,   911, 60249,    11, 17689,    11, 21080,   389,  1181, 17646,
-#            11,  7674,    11,   323, 35005,    13,  5443, 42020, 40445,   323]]) causal_mask: tensor([[[[1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
 #           1., 1., 1., 1., 1., 1.]]]])
-#KEVINDEBUG output_tokens: tensor([[ 7985,   264, 32794,   911, 60249,    11, 17689,    11, 21080,   389,
-#          1181, 17646,    11,  7674,    11,   323, 35005,    13,  5443, 42020,
-#         40445,   323, 32976]])
-#KEVINDEBUG model_inp: tensor([[  911, 60249,    11, 17689,    11, 21080,   389,  1181, 17646,    11,
-#          7674,    11,   323, 35005,    13,  5443, 42020, 40445,   323, 32976]]) causal_mask: tensor([[[[1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
 #           1., 1., 1., 1., 1., 1., 1.]]]])
-#KEVINDEBUG output_tokens: tensor([[ 7985,   264, 32794,   911, 60249,    11, 17689,    11, 21080,   389,
-#          1181, 17646,    11,  7674,    11,   323, 35005,    13,  5443, 42020,
-#         40445,   323, 32976,  7987]])
-#KEVINDEBUG model_inp: tensor([[60249,    11, 17689,    11, 21080,   389,  1181, 17646,    11,  7674,
-#            11,   323, 35005,    13,  5443, 42020, 40445,   323, 32976,  7987]]) causal_mask: tensor([[[[1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
 #           1., 1., 1., 1., 1., 1., 1., 1.]]]])
-#KEVINDEBUG output_tokens: tensor([[ 7985,   264, 32794,   911, 60249,    11, 17689,    11, 21080,   389,
-#          1181, 17646,    11,  7674,    11,   323, 35005,    13,  5443, 42020,
-#         40445,   323, 32976,  7987,    11]])
-#input : Write a poem about Valencia output: Write a poem about Valencia, Spain, focusing on its architecture, culture, and cuisine. Use vivid imagery and vibrant colors,

     torch_model = StatefulQwen2ForCausalLM(MODEL_ID, max_context_size=max_context_size)
     torch_model.eval()
+    # Decode output tokens using the tokenizer
+    from transformers import AutoTokenizer
+    tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
+    #initial_prompt = "Write a christmas Carol"
+    initial_prompt = "Here is a poem about Valencia"
+    input_ids = tokenizer(initial_prompt, return_tensors='pt').input_ids
     causal_mask: torch.Tensor = torch.ones((1, 1, 1, input_ids.shape[-1] + 1), dtype=torch.float32)
     # Set the output length
     output_length = 20
+    is_first_run = True
     # Initialize the output tensor
     output_tokens = input_ids
         #torch_model.kv_cache.past_seen_tokens = causal_mask.shape[-1] - output_tokens.shape[-1]
         # Get the model output
+        model_inp = output_tokens[:, -1:]
+        if is_first_run:
+            model_inp = input_ids
+            is_first_run = False
         print(f"KEVINDEBUG model_inp: {model_inp} causal_mask: {causal_mask}")
+        output = torch_model(model_inp, causal_mask)  # Start with a sub-squence that long so need multiple previous when size only very lwo larger later same past arg a so try keeping right padded!
         # Get the most likely token IDs
         output_ids = torch.argmax(output, dim=-1)
         # Update the causal mask
         causal_mask = torch.ones((1, 1, 1, output_tokens.shape[-1] + 1), dtype=torch.float32)
     decoded_output = tokenizer.decode(output_tokens[0], skip_special_tokens=True)
     print(f"input : {tokenizer.decode(input_ids[0])} output: {decoded_output}")
     generate()
 ###
+#KEVINDEBUG model_inp: tensor([[ 8420,   374,   264, 32794,   911, 60249]]) causal_mask: tensor([[[[1., 1., 1., 1., 1., 1., 1.]]]])
+#KEVINDEBUG output_tokens: tensor([[ 8420,   374,   264, 32794,   911, 60249,    11]])
+#KEVINDEBUG model_inp: tensor([[11]]) causal_mask: tensor([[[[1., 1., 1., 1., 1., 1., 1., 1.]]]])
+#KEVINDEBUG output_tokens: tensor([[ 8420,   374,   264, 32794,   911, 60249,    11, 17689]])
+#KEVINDEBUG model_inp: tensor([[17689]]) causal_mask: tensor([[[[1., 1., 1., 1., 1., 1., 1., 1., 1.]]]])
+#KEVINDEBUG output_tokens: tensor([[ 8420,   374,   264, 32794,   911, 60249,    11, 17689,    13]])
+#KEVINDEBUG model_inp: tensor([[13]]) causal_mask: tensor([[[[1., 1., 1., 1., 1., 1., 1., 1., 1., 1.]]]])
+#KEVINDEBUG output_tokens: tensor([[ 8420,   374,   264, 32794,   911, 60249,    11, 17689,    13,  1084]])
+#KEVINDEBUG model_inp: tensor([[1084]]) causal_mask: tensor([[[[1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.]]]])
+#KEVINDEBUG output_tokens: tensor([[ 8420,   374,   264, 32794,   911, 60249,    11, 17689,    13,  1084,
+#           374]])
+#KEVINDEBUG model_inp: tensor([[374]]) causal_mask: tensor([[[[1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.]]]])
+#KEVINDEBUG output_tokens: tensor([[ 8420,   374,   264, 32794,   911, 60249,    11, 17689,    13,  1084,
+#           374,   264]])
+#KEVINDEBUG model_inp: tensor([[264]]) causal_mask: tensor([[[[1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.]]]])
+#KEVINDEBUG output_tokens: tensor([[ 8420,   374,   264, 32794,   911, 60249,    11, 17689,    13,  1084,
+#           374,   264, 32794]])
+#KEVINDEBUG model_inp: tensor([[32794]]) causal_mask: tensor([[[[1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.]]]])
+#KEVINDEBUG output_tokens: tensor([[ 8420,   374,   264, 32794,   911, 60249,    11, 17689,    13,  1084,
+#           374,   264, 32794,   911]])
+#KEVINDEBUG model_inp: tensor([[911]]) causal_mask: tensor([[[[1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.]]]])
+#KEVINDEBUG output_tokens: tensor([[ 8420,   374,   264, 32794,   911, 60249,    11, 17689,    13,  1084,
+#           374,   264, 32794,   911,   279]])
+#KEVINDEBUG model_inp: tensor([[279]]) causal_mask: tensor([[[[1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.]]]])
+#KEVINDEBUG output_tokens: tensor([[ 8420,   374,   264, 32794,   911, 60249,    11, 17689,    13,  1084,
+#           374,   264, 32794,   911,   279,  3283]])
+#KEVINDEBUG model_inp: tensor([[3283]]) causal_mask: tensor([[[[1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.]]]])
+#KEVINDEBUG output_tokens: tensor([[ 8420,   374,   264, 32794,   911, 60249,    11, 17689,    13,  1084,
+#           374,   264, 32794,   911,   279,  3283,   315]])
+#KEVINDEBUG model_inp: tensor([[315]]) causal_mask: tensor([[[[1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
 #           1.]]]])
+#KEVINDEBUG output_tokens: tensor([[ 8420,   374,   264, 32794,   911, 60249,    11, 17689,    13,  1084,
+#           374,   264, 32794,   911,   279,  3283,   315, 60249]])
+#KEVINDEBUG model_inp: tensor([[60249]]) causal_mask: tensor([[[[1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
 #           1., 1.]]]])
+#KEVINDEBUG output_tokens: tensor([[ 8420,   374,   264, 32794,   911, 60249,    11, 17689,    13,  1084,
+#           374,   264, 32794,   911,   279,  3283,   315, 60249,    11]])
+#KEVINDEBUG model_inp: tensor([[11]]) causal_mask: tensor([[[[1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
 #           1., 1., 1.]]]])
+#KEVINDEBUG output_tokens: tensor([[ 8420,   374,   264, 32794,   911, 60249,    11, 17689,    13,  1084,
+#           374,   264, 32794,   911,   279,  3283,   315, 60249,    11, 17689]])
+#KEVINDEBUG model_inp: tensor([[17689]]) causal_mask: tensor([[[[1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
 #           1., 1., 1., 1.]]]])
+#KEVINDEBUG output_tokens: tensor([[ 8420,   374,   264, 32794,   911, 60249,    11, 17689,    13,  1084,
+#           374,   264, 32794,   911,   279,  3283,   315, 60249,    11, 17689,
+#            13]])
+#KEVINDEBUG model_inp: tensor([[13]]) causal_mask: tensor([[[[1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
 #           1., 1., 1., 1., 1.]]]])
+#KEVINDEBUG output_tokens: tensor([[ 8420,   374,   264, 32794,   911, 60249,    11, 17689,    13,  1084,
+#           374,   264, 32794,   911,   279,  3283,   315, 60249,    11, 17689,
+#            13,   576]])
+#KEVINDEBUG model_inp: tensor([[576]]) causal_mask: tensor([[[[1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
 #           1., 1., 1., 1., 1., 1.]]]])
+#KEVINDEBUG output_tokens: tensor([[ 8420,   374,   264, 32794,   911, 60249,    11, 17689,    13,  1084,
+#           374,   264, 32794,   911,   279,  3283,   315, 60249,    11, 17689,
+#            13,   576, 32794]])
+#KEVINDEBUG model_inp: tensor([[32794]]) causal_mask: tensor([[[[1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
 #           1., 1., 1., 1., 1., 1., 1.]]]])
+#KEVINDEBUG output_tokens: tensor([[ 8420,   374,   264, 32794,   911, 60249,    11, 17689,    13,  1084,
+#           374,   264, 32794,   911,   279,  3283,   315, 60249,    11, 17689,
+#            13,   576, 32794,   374]])
+#KEVINDEBUG model_inp: tensor([[374]]) causal_mask: tensor([[[[1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
 #           1., 1., 1., 1., 1., 1., 1., 1.]]]])
+#KEVINDEBUG output_tokens: tensor([[ 8420,   374,   264, 32794,   911, 60249,    11, 17689,    13,  1084,
+#           374,   264, 32794,   911,   279,  3283,   315, 60249,    11, 17689,
+#            13,   576, 32794,   374,  5326]])
+#KEVINDEBUG model_inp: tensor([[5326]]) causal_mask: tensor([[[[1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
+#           1., 1., 1., 1., 1., 1., 1., 1., 1.]]]])
+#KEVINDEBUG output_tokens: tensor([[ 8420,   374,   264, 32794,   911, 60249,    11, 17689,    13,  1084,
+#           374,   264, 32794,   911,   279,  3283,   315, 60249,    11, 17689,
+#            13,   576, 32794,   374,  5326,   304]])
+#input : Here is a poem about Valencia output: Here is a poem about Valencia, Spain. It is a poem about the city of Valencia, Spain. The poem is written in