File size: 2,625 Bytes
c557532
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
import gradio as gr
from quantizer_gr import quantize_gr, get_model_class

css = """

.title { font-size: 3em; align-items: center; text-align: center; }

.info { align-items: center; text-align: center; }

.block.result { margin: 1em 0; padding: 1em; box-shadow: 0 0 3px 3px #664422, 0 0 3px 2px #664422 inset; border-radius: 6px; background: #665544; }

.desc [src$='#float'] { float: right; margin: 20px; }

"""

with gr.Blocks(theme="NoCrypt/miku@>=1.2.2", fill_width=True, css=css, delete_cache=(60, 3600)) as demo:
    with gr.Column():
        gr.Markdown("# Quantizer Alpha (Does not work in CPU space)", elem_classes="title")
        with gr.Group():
            with gr.Row():
                repo_id = gr.Textbox(label="Repo ID", placeholder="author/model", value="", lines=1)
                with gr.Column():
                    hf_token = gr.Textbox(label="Your HF write token", placeholder="hf_...", value="", max_lines=1)
                    gr.Markdown("Your token is available at [hf.co/settings/tokens](https://huggingface.co./settings/tokens).", elem_classes="info")
            with gr.Row():
                newrepo_id = gr.Textbox(label="Upload repo ID", placeholder="yourid/newrepo", value="", max_lines=1)
                is_private = gr.Checkbox(label="Create private repo", value=True)
                is_overwrite = gr.Checkbox(label="Overwrite repo", value=False)
            with gr.Accordion("Advanced", open=False):
                with gr.Row():
                    qtype = gr.Radio(label="Quantization algorithm", choices=["nf4"], value="nf4")
                    dtype = gr.Radio(label="Computation data type", choices=["fp16", "fp32", "bf16", "fp8", "default"], value="bf16")
                    mclass = gr.Radio(label="Model class", choices=get_model_class(), value=get_model_class()[0])
        run_button = gr.Button(value="Run", variant="primary")
        with gr.Group():
            uploaded_urls = gr.CheckboxGroup(visible=False, choices=[], value=[]) # hidden
            urls_md = gr.Markdown("<br><br>", elem_classes="result", visible=True)
            clear_button = gr.Button(value="Clear Output", variant="secondary")
    gr.DuplicateButton(value="Duplicate Space")

    gr.on(
        triggers=[run_button.click],
        fn=quantize_gr,
        inputs=[repo_id, hf_token, uploaded_urls, newrepo_id, is_private, is_overwrite, dtype, qtype, mclass],
        outputs=[uploaded_urls, urls_md],
    )
    clear_button.click(lambda: ([], "<br><br>"), None, [uploaded_urls, urls_md], queue=False, show_api=False)

demo.queue()
demo.launch()