File size: 9,225 Bytes
e695ade
 
 
 
fe3e2a9
e695ade
b681a48
e695ade
 
 
 
 
 
b681a48
e695ade
 
 
 
4df7f15
b158147
6a57610
b158147
e695ade
 
b158147
e695ade
2cdde93
e695ade
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
fe3e2a9
5e25d21
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6b057bb
 
 
 
 
 
 
 
 
 
 
 
94d6df0
6b057bb
 
f784967
 
 
 
 
fe3e2a9
f784967
 
 
 
fe3e2a9
 
f784967
 
 
 
 
 
 
 
 
 
 
fe3e2a9
 
 
f784967
 
 
 
 
fe3e2a9
 
 
f784967
 
 
 
 
 
 
 
6b057bb
fe3e2a9
f784967
 
 
 
 
09e1b1f
f784967
 
 
09e1b1f
94d6df0
 
f784967
 
 
 
 
94d6df0
 
f972d6c
f784967
70b9a5b
 
 
 
 
e695ade
70b9a5b
e695ade
 
 
 
478cf2e
7573ddc
330081e
 
478cf2e
e695ade
 
 
 
 
 
 
5e25d21
fe3e2a9
b158147
 
85b9b71
e695ade
478cf2e
8a70d34
d2d62ba
478d2d5
b158147
d2d62ba
6b057bb
50bf865
 
 
b158147
 
50bf865
85b9b71
 
 
 
6b057bb
e695ade
b158147
e695ade
 
6b057bb
8a70d34
51470bc
e695ade
 
 
 
 
09bfa15
1a5257b
e695ade
 
0a7c76d
da5fa7d
e695ade
09bfa15
e695ade
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
import os
import shutil
import subprocess
import signal

os.environ["GRADIO_ANALYTICS_ENABLED"] = "False"
import gradio as gr

from huggingface_hub import create_repo, HfApi
from huggingface_hub import snapshot_download
from huggingface_hub import whoami
from huggingface_hub import ModelCard

from gradio_huggingfacehub_search import HuggingfaceHubSearch
from apscheduler.schedulers.background import BackgroundScheduler
from textwrap import dedent

HF_TOKEN = os.environ.get("HF_TOKEN")
OLLAMA_USERNAME = os.environ.get("OLLAMA_USERNAME").lower()
ollama_pubkey = open(f"{HOME}/.ollama/id_ed25519.pub", "r")

def ollamafy_model(model_id, ollamafy, ollama_q_method, latest, maintainer, oauth_token: gr.OAuthToken | None):
    if oauth_token.token is None:
        raise ValueError("You must be logged in to use GGUF-my-repo")
        
    model_name = model_id.split('/')[-1]
    fp16 = f"{model_name}-fp16.gguf"

    try:
        api = HfApi(token=oauth_token.token)
        dl_pattern = ["*.md", "*.json", "*.model"]
        pattern = (
            "*.safetensors"
            if any(
                file.path.endswith(".safetensors")
                for file in api.list_repo_tree(
                    repo_id=model_id,
                    recursive=True,
                )
            )
            else "*.bin"
        )
        dl_pattern += pattern
        
        if not os.path.isfile(fp16):
            api.snapshot_download(repo_id=model_id, local_dir=model_name, local_dir_use_symlinks=False, allow_patterns=dl_pattern)
            print("Model downloaded successfully!")
            print(f"Current working directory: {os.getcwd()}")
            print(f"Model directory contents: {os.listdir(model_name)}")
    
            conversion_script = "convert_hf_to_gguf.py"
            fp16_conversion = f"python llama.cpp/{conversion_script} {model_name} --outtype f16 --outfile {fp16}"
            result = subprocess.run(fp16_conversion, shell=True, capture_output=True)
            print(result)
            if result.returncode != 0:
                raise Exception(f"Error converting to fp16: {result.stderr}")
            print("Model converted to fp16 successfully!")
            print(f"Converted model path: {fp16}")
            
            HfApi().delete_repo(repo_id=model_id)

        ### Ollamafy ###
        if ollama_model:
            model_maintainer = model_id.split('/')[-2]
            ollama_model_name = model_maintainer.lower() + '_' + model_name.lower() 
            ollama_modelfile_name = model_name + '_modelfile'
            # model_path = f"{HOME}/.cache/huggingface/hub/{model_id}"
          
            ollama_modelfile = open(ollama_modelfile_name, "w")
            # ollama_modelfile_path = quantized_gguf_path
            ollama_modelfile.write(quantized_gguf_path)
            ollama_modelfile.close()
            
            print(quantized_gguf_path)
    
            # for ollama_q_method in ollama_q_methods:
            if ollama_q_method == "FP16":
                ollama_conversion = f"ollama create -f {model_file} {OLLAMA_USERNAME}/{ollama_model_name}:{ollama_q_method.lower()}"
            else:
                ollama_conversion = f"ollama create -q {ollama_q_method} -f {model_file} {OLLAMA_USERNAME}/{ollama_model_name}:{ollama_q_method.lower()}"
                
            ollama_conversion_result = subprocess.run(ollama_conversion, shell=True, capture_output=True)
            print(ollama_conversion_result)
            if ollama_conversion_result.returncode != 0:
                raise Exception(f"Error converting to Ollama: {ollama_conversion_result.stderr}")
            else: 
                print("Model converted to Ollama successfully!")
            
            if maintainer:
                ollama_push =  f"ollama push  {OLLAMA_USERNAME}/{model_name}:{q_method.lower()}"
                ollama_rm =  f"ollama rm  {OLLAMA_USERNAME}/{model_name}:{q_method.lower()}"
            else: 
                ollama_push =  f"ollama push  {OLLAMA_USERNAME}/{ollama_model_name}:{q_method.lower()}"
                ollama_rm =  f"ollama rm  {OLLAMA_USERNAME}/{ollama_model_name}:{q_method.lower()}"
                
            ollama_push_result = subprocess.run(ollama_push, shell=True, capture_output=True)
            print(ollama_push_result)
            if ollama_push_result.returncode != 0:
                raise Exception(f"Error pushing to Ollama: {ollama_push_result.stderr}")      
            else: 
                print("Model pushed to Ollama library successfully!")
            
            ollama_rm_result = subprocess.run(ollama_rm, shell=True, capture_output=True)
            print(ollama_rm_result)
            if ollama_rm_result.returncode != 0:
                raise Exception(f"Error removing to Ollama: {ollama_rm_result.stderr}")
            else: 
                print("Model pushed to Ollama library successfully!")
          
    
            if latest:
                ollama_copy =  f"ollama cp  {OLLAMA_USERNAME}/{model_id.lower()}:{q_method.lower()} {OLLAMA_USERNAME}/{model_id.lower()}:latest"
                ollama_copy_result = subprocess.run(ollama_copy, shell=True, capture_output=True)
                print(ollama_copy_result)
                if ollama_copy_result.returncode != 0:
                    raise Exception(f"Error converting to Ollama: {ollama_push_result.stderr}")
                print("Model pushed to Ollama library successfully!")
                
                if maintainer:
                    ollama_push_latest =  f"ollama push  {OLLAMA_USERNAME}/{model_name}:latest"
                    ollama_rm_latest =  f"ollama rm  {OLLAMA_USERNAME}/{model_name}:latest"
                else:    
                    ollama_push_latest =  f"ollama push  {OLLAMA_USERNAME}/{ollama_model_name}:latest"
                    ollama_rm_latest =  f"ollama rm  {OLLAMA_USERNAME}/{ollama_model_name}:latest"
                    
                ollama_push_latest_result = subprocess.run(ollama_push_latest, shell=True, capture_output=True)
                print(ollama_push_latest_result)
                if ollama_push_latest_result.returncode != 0:
                    raise Exception(f"Error pushing to Ollama: {ollama_push_result.stderr}")
                else:
                    print("Model pushed to Ollama library successfully!")

                ollama_rm_latest_result = subprocess.run(ollama_rm_latest, shell=True, capture_output=True)
                print(ollama_rm_latest_result)
                if ollama_rm_latest_result.returncode != 0:
                    raise Exception(f"Error pushing to Ollama: {ollama_rm_latest.stderr}")
                else:
                    print("Model pushed to Ollama library successfully!")
        
   
    except Exception as e:
        return (f"Error: {e}", "error.png")
    finally:
        shutil.rmtree(model_name, ignore_errors=True)
        print("Folder cleaned up successfully!")

        
css="""/* Custom CSS to allow scrolling */
.gradio-container {overflow-y: auto;}
"""
# Create Gradio interface
with gr.Blocks(css=css) as demo:
    gr.Markdown("You must be logged in to use Ollamafy.")
    gr.Markdown(ollama_pubkey.read().rstrip())
    ollama_pubkey.close()
    gr.LoginButton(min_width=250)

    model_id = HuggingfaceHubSearch(
        label="Hub Model ID",
        placeholder="Search for model id on Huggingface",
        search_type="model",
    )

    ollama_q_method = gr.Dropdown(
        ["FP16", "Q3_K_S", "Q3_K_M", "Q3_K_L", "Q4_0", "Q4_1", "Q4_K_S", "Q4_K_M", "Q5_0", "Q5_1", "Q5_K_S", "Q5_K_M", "Q6_K", "Q8_0"],
        label="Ollama Quantization Method",
        info="Chose which quantization will created and exported to the Ollama Library",
        value="FP16"
    )

    latest = gr.Checkbox(
        value=False,
        label="Latest",
        info="Push Model to the Ollama Library with the :latest tag"
    )
    
    maintainer = gr.Checkbox(
        value=False,
        label="Maintainer",
        info="This is your original repository on both Hugging Face and Ollama. 
        DO NOT USE unless using the same USERNAME on both platforms."
    )
    ollama_username = gr.Textbox(
        label="Ollama.com Library Username",
        info="test",
    )
    
    iface = gr.Interface(
        fn=ollamafy_model,
        inputs=[
            model_id,
            ollama_q_method,
            latest,
            maintainer
        ],
        outputs=[
            gr.Markdown(label="output"),
            gr.Image(show_label=False),
        ],
        title="Create your own Ollama Models and Push them to the Ollama Library, blazingly fast ⚡!",
        description="Sampled from https://huggingface.co./spaces/ggml-org/gguf-my-repo and https://huggingface.co./spaces/gingdev/ollama-server",
        api_name=False
    )
    
#username = whoami(oauth_token.token)["name"]
def restart_space():
    HfApi().restart_space(repo_id="unclemusclez/ollamafy", token=HF_TOKEN, factory_reboot=True)

scheduler = BackgroundScheduler()
scheduler.add_job(restart_space, "interval", seconds=21600)
scheduler.start()

# Launch the interface
demo.queue(default_concurrency_limit=1, max_size=5).launch(debug=True, show_api=False)