Spaces:
Runtime error
Runtime error
File size: 9,365 Bytes
e695ade fe3e2a9 e695ade b681a48 e695ade b681a48 e695ade c358045 85cc441 a4a3ccb b158147 dee5d68 7c932c6 cf8ffe4 9eac70c 85cc441 5b80620 e89573f e695ade 2cdde93 e695ade 85cc441 e695ade fe3e2a9 5e25d21 6b057bb 19bd34c 3e12a3f 19bd34c 3e12a3f 94d6df0 19bd34c 3e12a3f 19bd34c 3e12a3f 19bd34c 3e12a3f 19bd34c f784967 3e12a3f 19bd34c 3e12a3f f784967 19bd34c fe3e2a9 19bd34c fe3e2a9 f784967 19bd34c 70b9a5b e695ade 70b9a5b e695ade 478cf2e 19bd34c e695ade 7f796a5 e695ade 5e25d21 fe3e2a9 b158147 19bd34c 85b9b71 e695ade 478cf2e 8a70d34 d2d62ba 478d2d5 19bd34c d2d62ba 9887b14 50bf865 539c30e 50bf865 6b057bb e695ade b158147 e695ade 19bd34c e695ade dbe89f7 6b057bb 8a70d34 51470bc e695ade 7c500a5 5b80620 e695ade 0a7c76d e695ade 025ab78 85cc441 e695ade |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 |
import os
import shutil
import subprocess
import signal
os.environ["GRADIO_ANALYTICS_ENABLED"] = "False"
import gradio as gr
from huggingface_hub import create_repo, HfApi
from huggingface_hub import snapshot_download
from huggingface_hub import whoami
from huggingface_hub import ModelCard
from gradio_huggingfacehub_search import HuggingfaceHubSearch
from apscheduler.schedulers.background import BackgroundScheduler
from textwrap import dedent
HOME = os.environ.get("HOME")
# token = os.environ.get("HF_TOKEN")
library_username = os.environ.get("OLLAMA_USERNAME").lower()
ollama_pubkey = open(f"{HOME}/.ollama/id_ed25519.pub", "r")
def ollamafy_model(login, account, model_id, ollama_library_username, ollama_q_method, latest, maintainer):
auth_token: gr.OAuthToken | None
ollama_library_username: library_username | None
if oauth_token.token is None:
raise ValueError("You must be logged in to use Ollamafy")
# username = whoami(oauth_token.token)["name"]
model_name = model_id.split('/')[-1]
fp16 = f"{model_name}-fp16.gguf"
try:
api = HfApi(token=oauth_token.token)
dl_pattern = ["*.md", "*.json", "*.model"]
pattern = (
"*.safetensors"
if any(
file.path.endswith(".safetensors")
for file in api.list_repo_tree(
repo_id=model_id,
recursive=True,
)
)
else "*.bin"
)
dl_pattern += pattern
if not os.path.isfile(fp16):
api.snapshot_download(repo_id=model_id, local_dir=model_name, local_dir_use_symlinks=False, allow_patterns=dl_pattern)
print("Model downloaded successfully!")
print(f"Current working directory: {os.getcwd()}")
print(f"Model directory contents: {os.listdir(model_name)}")
conversion_script = "convert_hf_to_gguf.py"
fp16_conversion = f"python llama.cpp/{conversion_script} {model_name} --outtype f16 --outfile {fp16}"
result = subprocess.run(fp16_conversion, shell=True, capture_output=True)
print(result)
if result.returncode != 0:
raise Exception(f"Error converting to fp16: {result.stderr}")
print("Model converted to fp16 successfully!")
print(f"Converted model path: {fp16}")
HfApi().delete_repo(repo_id=model_id)
### Ollamafy ###
model_maintainer = model_id.split('/')[-2]
ollama_model_name = model_maintainer.lower() + '_' + model_name.lower()
ollama_modelfile_name = model_name + '_modelfile'
# model_path = f"{HOME}/.cache/huggingface/hub/{model_id}"
ollama_modelfile = open(ollama_modelfile_name, "w")
# ollama_modelfile_path = quantized_gguf_path
ollama_modelfile.write(quantized_gguf_path)
ollama_modelfile.close()
print(quantized_gguf_path)
# for ollama_q_method in ollama_q_methods:
if ollama_q_method == "FP16":
ollama_conversion = f"ollama create -f {model_file} {library_username}/{ollama_model_name}:{ollama_q_method.lower()}"
else:
ollama_conversion = f"ollama create -q {ollama_q_method} -f {model_file} {library_username}/{ollama_model_name}:{ollama_q_method.lower()}"
ollama_conversion_result = subprocess.run(ollama_conversion, shell=True, capture_output=True)
print(ollama_conversion_result)
if ollama_conversion_result.returncode != 0:
raise Exception(f"Error converting to Ollama: {ollama_conversion_result.stderr}")
else:
print("Model converted to Ollama successfully!")
if maintainer:
ollama_push = f"ollama push {library_username}/{model_name}:{q_method.lower()}"
ollama_rm = f"ollama rm {library_username}/{model_name}:{q_method.lower()}"
else:
ollama_push = f"ollama push {library_username}/{ollama_model_name}:{q_method.lower()}"
ollama_rm = f"ollama rm {library_username}/{ollama_model_name}:{q_method.lower()}"
ollama_push_result = subprocess.run(ollama_push, shell=True, capture_output=True)
print(ollama_push_result)
if ollama_push_result.returncode != 0:
raise Exception(f"Error pushing to Ollama: {ollama_push_result.stderr}")
else:
print("Model pushed to Ollama library successfully!")
ollama_rm_result = subprocess.run(ollama_rm, shell=True, capture_output=True)
print(ollama_rm_result)
if ollama_rm_result.returncode != 0:
raise Exception(f"Error removing to Ollama: {ollama_rm_result.stderr}")
else:
print("Model pushed to Ollama library successfully!")
if latest:
ollama_copy = f"ollama cp {library_username}/{model_id.lower()}:{q_method.lower()} {library_username}/{model_id.lower()}:latest"
ollama_copy_result = subprocess.run(ollama_copy, shell=True, capture_output=True)
print(ollama_copy_result)
if ollama_copy_result.returncode != 0:
raise Exception(f"Error converting to Ollama: {ollama_push_result.stderr}")
print("Model pushed to Ollama library successfully!")
if maintainer:
ollama_push_latest = f"ollama push {library_username}/{model_name}:latest"
ollama_rm_latest = f"ollama rm {library_username}/{model_name}:latest"
else:
ollama_push_latest = f"ollama push {library_username}/{ollama_model_name}:latest"
ollama_rm_latest = f"ollama rm {library_username}/{ollama_model_name}:latest"
ollama_push_latest_result = subprocess.run(ollama_push_latest, shell=True, capture_output=True)
print(ollama_push_latest_result)
if ollama_push_latest_result.returncode != 0:
raise Exception(f"Error pushing to Ollama: {ollama_push_result.stderr}")
else:
print("Model pushed to Ollama library successfully!")
ollama_rm_latest_result = subprocess.run(ollama_rm_latest, shell=True, capture_output=True)
print(ollama_rm_latest_result)
if ollama_rm_latest_result.returncode != 0:
raise Exception(f"Error pushing to Ollama: {ollama_rm_latest.stderr}")
else:
print("Model pushed to Ollama library successfully!")
except Exception as e:
return (f"Error: {e}", "error.png")
finally:
shutil.rmtree(model_name, ignore_errors=True)
print("Folder cleaned up successfully!")
css="""/* Custom CSS to allow scrolling */
.gradio-container {overflow-y: auto;}
"""
# Create Gradio interface
with gr.Blocks(css=css) as demo:
login = gr.LoginButton(
min_width=250,
)
model_id = HuggingfaceHubSearch(
label="Hugging Face Hub Model ID",
placeholder="Search for model id on Huggingface",
search_type="model",
)
ollama_q_method = gr.Dropdown(
["FP16", "Q3_K_S", "Q3_K_M", "Q3_K_L", "Q4_0", "Q4_1", "Q4_K_S", "Q4_K_M", "Q5_0", "Q5_1", "Q5_K_S", "Q5_K_M", "Q6_K", "Q8_0"],
label="Ollama Quantization Method",
info="Chose which quantization will created and exported to the Ollama Library.",
value="FP16"
)
latest = gr.Checkbox(
value=False,
label="Latest",
info="Push Model to the Ollama Library with the :latest tag."
)
ollama_library_username = gr.Textbox(
label="Ollama Library Username",
info="Input your username from Ollama to push this model to their Library.",
)
account = gr.Code (
ollama_pubkey.read().rstrip(),
label="Ollama SSH pubkey",
# info="Copy this and paste it into your Ollama profile.",
)
maintainer = gr.Checkbox(
value=False,
label="Maintainer",
info="Use this option is your original repository on both Hugging Face and Ollama."
)
iface = gr.Interface(
fn=ollamafy_model,
inputs=[
login,
account,
model_id,
ollama_library_username,
ollama_q_method,
latest,
maintainer
],
outputs=[
gr.Markdown(label="output"),
gr.Image(show_label=False),
],
title="Ollamafy",
description="Import Hugging Face Models to Ollama and Push them to the Ollama Library 🦙 \n\n Sampled from: \n\n - https://huggingface.co./spaces/ggml-org/gguf-my-repo \n\n - https://huggingface.co./spaces/gingdev/ollama-server",
api_name=False
)
def restart_space():
ollama_pubkey.close(),
# HfApi().restart_space(repo_id="unclemusclez/ollamafy", token=HF_TOKEN, library_username=OLLAMA_USERNAME, factory_reboot=True)
HfApi().restart_space(repo_id="unclemusclez/ollamafy", factory_reboot=True)
scheduler = BackgroundScheduler()
scheduler.add_job(restart_space, "interval", seconds=21600)
scheduler.start()
# Launch the interface
demo.queue(default_concurrency_limit=1, max_size=5).launch(debug=True, show_api=False) |