pcuenq HF staff commited on
Commit
3b6bdba
·
1 Parent(s): 84cdc38

Add VB's app

Browse files
Files changed (4) hide show
  1. .gitattributes +1 -0
  2. app.py +136 -4
  3. error.png +3 -0
  4. llama.png +3 -0
.gitattributes CHANGED
@@ -19,6 +19,7 @@
19
  *.pb filter=lfs diff=lfs merge=lfs -text
20
  *.pickle filter=lfs diff=lfs merge=lfs -text
21
  *.pkl filter=lfs diff=lfs merge=lfs -text
 
22
  *.pt filter=lfs diff=lfs merge=lfs -text
23
  *.pth filter=lfs diff=lfs merge=lfs -text
24
  *.rar filter=lfs diff=lfs merge=lfs -text
 
19
  *.pb filter=lfs diff=lfs merge=lfs -text
20
  *.pickle filter=lfs diff=lfs merge=lfs -text
21
  *.pkl filter=lfs diff=lfs merge=lfs -text
22
+ *.png filter=lfs diff=lfs merge=lfs -text
23
  *.pt filter=lfs diff=lfs merge=lfs -text
24
  *.pth filter=lfs diff=lfs merge=lfs -text
25
  *.rar filter=lfs diff=lfs merge=lfs -text
app.py CHANGED
@@ -1,7 +1,139 @@
 
 
 
 
1
  import gradio as gr
2
 
3
- def greet(name):
4
- return "Hello " + name + "!"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5
 
6
- demo = gr.Interface(fn=greet, inputs="text", outputs="text")
7
- demo.launch(server_name="0.0.0.0")
 
1
+ import os
2
+ import shutil
3
+ import subprocess
4
+
5
  import gradio as gr
6
 
7
+ from huggingface_hub import create_repo, HfApi
8
+ from huggingface_hub import snapshot_download
9
+ from huggingface_hub import whoami
10
+ from huggingface_hub import ModelCard
11
+
12
+ from textwrap import dedent
13
+
14
+ LLAMA_LIKE_ARCHS = ["MistralForCausalLM", "LlamaForCausalLM"]
15
+
16
+ def script_to_use(model_id, api):
17
+ info = api.model_info(model_id)
18
+ if info.config is None:
19
+ return None
20
+ arch = info.config.get("architectures", None)
21
+ if arch is None:
22
+ return None
23
+ arch = arch[0]
24
+ return "convert.py" if arch in LLAMA_LIKE_ARCHS else "convert-hf-to-gguf.py"
25
+
26
+ def process_model(model_id, q_method, hf_token):
27
+ model_name = model_id.split('/')[-1]
28
+ fp16 = f"{model_name}/{model_name.lower()}.fp16.bin"
29
+
30
+ try:
31
+ api = HfApi(token=hf_token)
32
+
33
+ snapshot_download(repo_id=model_id, local_dir=model_name, local_dir_use_symlinks=False)
34
+ print("Model downloaded successully!")
35
+
36
+ conversion_script = script_to_use(model_id, api)
37
+ fp16_conversion = f"python llama.cpp/{conversion_script} {model_name} --outtype f16 --outfile {fp16}"
38
+ result = subprocess.run(fp16_conversion, shell=True, capture_output=True)
39
+ if result.returncode != 0:
40
+ raise Exception(f"Error converting to fp16: {result.stderr}")
41
+ print("Model converted to fp16 successully!")
42
+
43
+ qtype = f"{model_name}/{model_name.lower()}.{q_method.upper()}.gguf"
44
+ quantise_ggml = f"./llama.cpp/quantize {fp16} {qtype} {q_method}"
45
+ result = subprocess.run(quantise_ggml, shell=True, capture_output=True)
46
+ if result.returncode != 0:
47
+ raise Exception(f"Error quantizing: {result.stderr}")
48
+ print("Quantised successfully!")
49
+
50
+ # Create empty repo
51
+ new_repo_url = api.create_repo(repo_id=f"{model_name}-{q_method}-GGUF", exist_ok=True)
52
+ new_repo_id = new_repo_url.repo_id
53
+ print("Repo created successfully!", new_repo_url)
54
+
55
+ card = ModelCard.load(model_id)
56
+ card.data.tags = ["llama-cpp"] if card.data.tags is None else card.data.tags + ["llama-cpp"]
57
+ card.text = dedent(
58
+ f"""
59
+ # {new_repo_id}
60
+ This model was converted to GGUF format from [`{model_id}`](https://huggingface.co/{model_id}) using llama.cpp.
61
+ Refer to the [original model card](https://huggingface.co/{model_id}) for more details on the model.
62
+ ## Use with llama.cpp
63
+
64
+ ```bash
65
+ brew install ggerganov/ggerganov/llama.cpp
66
+ ```
67
+
68
+ ```bash
69
+ llama-cli --hf-repo {new_repo_id} --model {qtype.split("/")[-1]} -p "The meaning to life and the universe is "
70
+ ```
71
+
72
+ ```bash
73
+ llama-server --hf-repo {new_repo_id} --model {qtype.split("/")[-1]} -c 2048
74
+ ```
75
+ """
76
+ )
77
+ card.save(os.path.join(model_name, "README-new.md"))
78
+
79
+ api.upload_file(
80
+ path_or_fileobj=qtype,
81
+ path_in_repo=qtype.split("/")[-1],
82
+ repo_id=new_repo_id,
83
+ )
84
+
85
+ api.upload_file(
86
+ path_or_fileobj=f"{model_name}/README-new.md",
87
+ path_in_repo="README.md",
88
+ repo_id=new_repo_id,
89
+ )
90
+ print("Uploaded successfully!")
91
+
92
+ return (
93
+ f'Find your repo <a href=\'{new_repo_url}\' target="_blank" style="text-decoration:underline">here</a>',
94
+ "llama.png",
95
+ )
96
+ except Exception as e:
97
+ return (f"Error: {e}", "error.png")
98
+ finally:
99
+ shutil.rmtree(model_name, ignore_errors=True)
100
+ print("Folder cleaned up successfully!")
101
+
102
+
103
+ # Create Gradio interface
104
+ iface = gr.Interface(
105
+ fn=process_model,
106
+ inputs=[
107
+ gr.Textbox(
108
+ lines=1,
109
+ label="Hub Model ID",
110
+ info="Model repo ID",
111
+ placeholder="TinyLlama/TinyLlama-1.1B-Chat-v1.0",
112
+ value="TinyLlama/TinyLlama-1.1B-Chat-v1.0"
113
+ ),
114
+ gr.Dropdown(
115
+ ["Q2_K", "Q3_K_S", "Q3_K_M", "Q3_K_L", "Q4_0", "Q4_K_S", "Q4_K_M", "Q5_0", "Q5_K_S", "Q5_K_M", "Q6_K", "Q8_0"],
116
+ label="Quantization Method",
117
+ info="GGML quantisation type",
118
+ value="Q4_K_M",
119
+ filterable=False
120
+ ),
121
+ gr.Textbox(
122
+ lines=1,
123
+ label="HF Write Token",
124
+ info="https://hf.co/settings/token",
125
+ type="password",
126
+ )
127
+ ],
128
+ outputs=[
129
+ gr.Markdown(label="output"),
130
+ gr.Image(show_label=False),
131
+ ],
132
+ title="Create your own GGUF Quants!",
133
+ description="Create GGUF quants from any Hugging Face repository! You need to specify a write token obtained in https://hf.co/settings/tokens.",
134
+ article="<p>Find your write token at <a href='https://huggingface.co/settings/tokens' target='_blank'>token settings</a></p>",
135
+
136
+ )
137
 
138
+ # Launch the interface
139
+ iface.launch(server_name="0.0.0.0", debug=True)
error.png ADDED

Git LFS Details

  • SHA256: de04fcbc70f41e4735ab169480b74eb4e90d76f50d6977a19d04e444cdb0937e
  • Pointer size: 131 Bytes
  • Size of remote file: 740 kB
llama.png ADDED

Git LFS Details

  • SHA256: a287a47ae4c6f87a363471130be4c916948664792a7a8efbca1bdaaf8d016ebc
  • Pointer size: 132 Bytes
  • Size of remote file: 1.8 MB