File size: 5,255 Bytes
796d506
 
 
ad03828
796d506
 
 
86b9c38
796d506
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ad03828
 
796d506
 
 
 
 
ad03828
3cc265e
796d506
73a53d1
 
796d506
ad03828
 
 
 
 
 
796d506
 
 
ad03828
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
import pathlib
import tempfile
from typing import Generator

import gradio as gr
import torch
import yaml
from gradio_logsview import LogsView

has_gpu = torch.cuda.is_available()

cli = "mergekit-yaml config.yaml merge --copy-tokenizer" + (
    " --cuda --low-cpu-memory"
    if has_gpu
    else " --allow-crimes --out-shard-size 1B --lazy-unpickle"
)

print(cli)

## This Space is heavily inspired by LazyMergeKit by Maxime Labonne
## https://colab.research.google.com/drive/1obulZ1ROXHjYLn6PPZJwRR6GzgQogxxb


MARKDOWN_DESCRIPTION = """
# mergekit-gui

The fastest way to perform a model merge πŸ”₯

Specify a YAML configuration file (see examples below) and a HF token and this app will perform the merge and upload the merged model to your user profile.
"""

MARKDOWN_ARTICLE = """
___

## Merge Configuration

[Mergekit](https://github.com/arcee-ai/mergekit) configurations are YAML documents specifying the operations to perform in order to produce your merged model.
Below are the primary elements of a configuration file:

- `merge_method`: Specifies the method to use for merging models. See [Merge Methods](https://github.com/arcee-ai/mergekit#merge-methods) for a list.
- `slices`: Defines slices of layers from different models to be used. This field is mutually exclusive with `models`.
- `models`: Defines entire models to be used for merging. This field is mutually exclusive with `slices`.
- `base_model`: Specifies the base model used in some merging methods.
- `parameters`: Holds various parameters such as weights and densities, which can also be specified at different levels of the configuration.
- `dtype`: Specifies the data type used for the merging operation.
- `tokenizer_source`: Determines how to construct a tokenizer for the merged model.

## Merge Methods

A quick overview of the currently supported merge methods:

| Method                                                                                       | `merge_method` value | Multi-Model | Uses base model |
| -------------------------------------------------------------------------------------------- | -------------------- | ----------- | --------------- |
| Linear ([Model Soups](https://arxiv.org/abs/2203.05482))                                     | `linear`             | βœ…          | ❌              |
| SLERP                                                                                        | `slerp`              | ❌          | βœ…              |
| [Task Arithmetic](https://arxiv.org/abs/2212.04089)                                          | `task_arithmetic`    | βœ…          | βœ…              |
| [TIES](https://arxiv.org/abs/2306.01708)                                                     | `ties`               | βœ…          | βœ…              |
| [DARE](https://arxiv.org/abs/2311.03099) [TIES](https://arxiv.org/abs/2306.01708)            | `dare_ties`          | βœ…          | βœ…              |
| [DARE](https://arxiv.org/abs/2311.03099) [Task Arithmetic](https://arxiv.org/abs/2212.04089) | `dare_linear`        | βœ…          | βœ…              |
| Passthrough                                                                                  | `passthrough`        | ❌          | ❌              |
| [Model Stock](https://arxiv.org/abs/2403.19522)                                              | `model_stock`        | βœ…          | βœ…              |

"""

examples = [[f.name, f.read_text()] for f in pathlib.Path("examples").glob("*.yml")]


def merge(
    example_filename: str, yaml_config: str, hf_token: str, repo_name: str
) -> Generator[str, None, None]:
    if not yaml_config:
        raise gr.Error("Empty yaml, pick an example below")
    try:
        _ = yaml.safe_load(yaml_config)
    except Exception as e:
        raise gr.Error(f"Invalid yaml {e}")

    with tempfile.TemporaryDirectory() as tmpdirname:
        tmpdir = pathlib.Path(tmpdirname)
        with open(tmpdir / "config.yaml", "w", encoding="utf-8") as f:
            f.write(yaml_config)

        yield from LogsView.run_process(f"cd {tmpdir} && {cli}".split())

        ## TODO(implement upload at the end of the merge, and display the repo URL)


with gr.Blocks() as demo:
    gr.Markdown(MARKDOWN_DESCRIPTION)

    with gr.Row():
        filename = gr.Textbox(visible=False, label="filename")
        config = gr.Code(
            language="yaml",
            lines=10,
            label="config.yaml",
        )
        with gr.Column():
            token = gr.Textbox(
                lines=1,
                label="HF Write Token",
                info="https://hf.co/settings/token",
                type="password",
                placeholder="optional, will not upload merge if empty (dry-run)",
            )
            repo_name = gr.Textbox(
                lines=1,
                label="Repo name",
                placeholder="optional, will create a random name if empty",
            )
    button = gr.Button("Merge", variant="primary")
    logs = LogsView()
    gr.Examples(examples, label="Examples", inputs=[filename, config], outputs=[logs])
    gr.Markdown(MARKDOWN_ARTICLE)

    button.click(fn=merge, inputs=[filename, config, token, repo_name], outputs=[logs])

demo.queue(default_concurrency_limit=1).launch()