Can you share the converting code ?

#7
by Q-bert - opened

I couldn't write converting code but you did it. Can you share code with me pls ?

Not sure if this is still useful, but sure - here's the script I wrote to convert this originally.

import copy
import os
import safetensors.torch
import glob
import json


def transform_st(path: str, out_dir: str):
    data = safetensors.torch.load_file(path)
    old_keys = list(data.keys())
    for key in old_keys:
        old_key = key
        if ".ln1." in key:
            key = key.replace(".ln1.", ".input_layernorm.")
        if ".ln2." in key:
            key = key.replace(".ln2.", ".post_attention_layernorm.")
        if key != old_key:
            data[key] = data[old_key]
            del data[old_key]
    safetensors.torch.save_file(
        data, os.path.join(out_dir, os.path.basename(path)), metadata={"format": "pt"}
    )


def process_model(path: str, out_path: str):
    for p in glob.glob(os.path.join(path, "model-*.safetensors")):
        transform_st(p, out_path)

    with open(os.path.join(path, "model.safetensors.index.json", "r")) as fd:
        index_data = json.load(fd)

    new_index = {"metadata": copy.copy(index_data["metadata"]), "weight_map": {}}
    for key in index_data["weight_map"]:
        new_key = key.replace(".ln1.", ".input_layernorm.").replace(
            ".ln2.", ".post_attention_layernorm."
        )
        new_index["weight_map"][new_key] = index_data["weight_map"][key]

    with open(
        os.path.join(out_path, "model.safetensors.index.json", "w", encoding="utf-8")
    ) as fd:
        json.dump(new_index, fd)


process_model("/workspace/Yi-34B", "/workspace/Yi-34B-Llama")

Not sure if this is still useful, but sure - here's the script I wrote to convert this originally.

import copy
import os
import safetensors.torch
import glob
import json


def transform_st(path: str, out_dir: str):
    data = safetensors.torch.load_file(path)
    old_keys = list(data.keys())
    for key in old_keys:
        old_key = key
        if ".ln1." in key:
            key = key.replace(".ln1.", ".input_layernorm.")
        if ".ln2." in key:
            key = key.replace(".ln2.", ".post_attention_layernorm.")
        if key != old_key:
            data[key] = data[old_key]
            del data[old_key]
    safetensors.torch.save_file(
        data, os.path.join(out_dir, os.path.basename(path)), metadata={"format": "pt"}
    )


def process_model(path: str, out_path: str):
    for p in glob.glob(os.path.join(path, "model-*.safetensors")):
        transform_st(p, out_path)

    with open(os.path.join(path, "model.safetensors.index.json", "r")) as fd:
        index_data = json.load(fd)

    new_index = {"metadata": copy.copy(index_data["metadata"]), "weight_map": {}}
    for key in index_data["weight_map"]:
        new_key = key.replace(".ln1.", ".input_layernorm.").replace(
            ".ln2.", ".post_attention_layernorm."
        )
        new_index["weight_map"][new_key] = index_data["weight_map"][key]

    with open(
        os.path.join(out_path, "model.safetensors.index.json", "w", encoding="utf-8")
    ) as fd:
        json.dump(new_index, fd)


process_model("/workspace/Yi-34B", "/workspace/Yi-34B-Llama")

Can we do the same for this model? It is almost exactly the same as the stablelm llama.

https://huggingface.co./stabilityai/stablelm-zephyr-3b

Can we do the same for this model? It is almost exactly the same as the stablelm llama.

https://huggingface.co./stabilityai/stablelm-zephyr-3b

StableLM has a bias term in their norm layers, which Llama does not. You could strip them out and try to use the rest of the weights with LlamaForCausalLM, but it's pretty likely it won't be coherent without fine-tuning.

Hey, I'm pretty sure there's a typo in that code, FWIW. I don't know python, so it took me a while to override the presumption that I had no idea what was going on and figure out that:

@@ -27,7 +27,7 @@
for p in glob.glob(os.path.join(path, "model-*.safetensors")):
transform_st(p, out_path)

  • with open(os.path.join(path, "model.safetensors.index.json", "r")) as fd:
  • with open(os.path.join(path, "model.safetensors.index.json"), "r") as fd:
    index_data = json.load(fd)
    new_index = {"metadata": copy.copy(index_data["metadata"]), "weight_map": {}}

@@ -38,7 +38,7 @@
new_index["weight_map"][new_key] = index_data["weight_map"][key]

 with open(
  •    os.path.join(out_path, "model.safetensors.index.json", "w", encoding="utf-8")
    
  •   os.path.join(out_path, "model.safetensors.index.json"), "w", encoding="utf-8"
    
    ) as fd:
    json.dump(new_index, fd)

You're killin' me over here! ;-)

Or did I somehow miss something? All I know for sure is that the thing isn't trying to e.g. append 'w' to the path now, heh.
But really I'm leaving this to ask about how to handle the tokenizer. I'd like to convert some yi-based models to use the llama tokenizer so that I don't have to use trust_remote_code, because KoboldAI doesn't support that. Thanks.

Umm, well, that's supposed to be a unified diff. That butllet point is the +. I don't have time to figure out how HF parses all of this.
So umm here u can use this too lol: https://pastebin.com/qweZcERP

Sign up or log in to comment