Spaces:

unclemusclez
/

ollamafy

Runtime error

File size: 9,365 Bytes

e695ade
 
 
 
fe3e2a9
e695ade
b681a48
e695ade
 
 
 
 
 
b681a48
e695ade
 
 
c358045
85cc441
a4a3ccb
b158147
dee5d68
7c932c6
cf8ffe4
9eac70c
85cc441
5b80620
e89573f
e695ade
2cdde93
e695ade
 
85cc441
e695ade
 
 
 
 
 
 
 
 
 
 
 
 
fe3e2a9
5e25d21
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6b057bb
 
19bd34c
 
 
 
 
 
 
 
 
 
 
 
 
 
3e12a3f
19bd34c
3e12a3f
94d6df0
19bd34c
 
 
 
 
 
 
 
3e12a3f
 
19bd34c
3e12a3f
 
19bd34c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3e12a3f
19bd34c
 
 
 
 
f784967
 
3e12a3f
 
19bd34c
3e12a3f
 
f784967
19bd34c
 
 
 
 
fe3e2a9
19bd34c
 
 
 
 
 
fe3e2a9
f784967
19bd34c
70b9a5b
 
 
 
 
e695ade
70b9a5b
e695ade
 
 
 
478cf2e
19bd34c
 
 
 
e695ade
7f796a5
e695ade
 
 
 
5e25d21
fe3e2a9
b158147
19bd34c
85b9b71
e695ade
478cf2e
8a70d34
d2d62ba
478d2d5
19bd34c
d2d62ba
9887b14
 
 
 
 
 
 
 
 
 
 
 
50bf865
 
 
539c30e
50bf865
6b057bb
e695ade
b158147
e695ade
19bd34c
 
e695ade
dbe89f7
6b057bb
8a70d34
51470bc
e695ade
 
 
 
 
7c500a5
5b80620
e695ade
 
0a7c76d
e695ade
025ab78
85cc441
 
e695ade

import os
import shutil
import subprocess
import signal

os.environ["GRADIO_ANALYTICS_ENABLED"] = "False"
import gradio as gr

from huggingface_hub import create_repo, HfApi
from huggingface_hub import snapshot_download
from huggingface_hub import whoami
from huggingface_hub import ModelCard

from gradio_huggingfacehub_search import HuggingfaceHubSearch
from apscheduler.schedulers.background import BackgroundScheduler
from textwrap import dedent

HOME = os.environ.get("HOME")
# token = os.environ.get("HF_TOKEN")
library_username = os.environ.get("OLLAMA_USERNAME").lower()
ollama_pubkey = open(f"{HOME}/.ollama/id_ed25519.pub", "r")

def ollamafy_model(login, account, model_id, ollama_library_username, ollama_q_method, latest, maintainer):
    auth_token: gr.OAuthToken | None
    ollama_library_username: library_username | None 
    if oauth_token.token is None:
        raise ValueError("You must be logged in to use Ollamafy")
    # username = whoami(oauth_token.token)["name"]
    model_name = model_id.split('/')[-1]
    fp16 = f"{model_name}-fp16.gguf"

    try:
        api = HfApi(token=oauth_token.token)   
        dl_pattern = ["*.md", "*.json", "*.model"]
        pattern = (
            "*.safetensors"
            if any(
                file.path.endswith(".safetensors")
                for file in api.list_repo_tree(
                    repo_id=model_id,
                    recursive=True,
                )
            )
            else "*.bin"
        )
        dl_pattern += pattern
        
        if not os.path.isfile(fp16):
            api.snapshot_download(repo_id=model_id, local_dir=model_name, local_dir_use_symlinks=False, allow_patterns=dl_pattern)
            print("Model downloaded successfully!")
            print(f"Current working directory: {os.getcwd()}")
            print(f"Model directory contents: {os.listdir(model_name)}")
    
            conversion_script = "convert_hf_to_gguf.py"
            fp16_conversion = f"python llama.cpp/{conversion_script} {model_name} --outtype f16 --outfile {fp16}"
            result = subprocess.run(fp16_conversion, shell=True, capture_output=True)
            print(result)
            if result.returncode != 0:
                raise Exception(f"Error converting to fp16: {result.stderr}")
            print("Model converted to fp16 successfully!")
            print(f"Converted model path: {fp16}")
            
            HfApi().delete_repo(repo_id=model_id)

        ### Ollamafy ###
        model_maintainer = model_id.split('/')[-2]
        ollama_model_name = model_maintainer.lower() + '_' + model_name.lower() 
        ollama_modelfile_name = model_name + '_modelfile'
        # model_path = f"{HOME}/.cache/huggingface/hub/{model_id}"
      
        ollama_modelfile = open(ollama_modelfile_name, "w")
        # ollama_modelfile_path = quantized_gguf_path
        ollama_modelfile.write(quantized_gguf_path)
        ollama_modelfile.close()
        
        print(quantized_gguf_path)

        # for ollama_q_method in ollama_q_methods:
        if ollama_q_method == "FP16":
            ollama_conversion = f"ollama create -f {model_file} {library_username}/{ollama_model_name}:{ollama_q_method.lower()}"
        else:
            ollama_conversion = f"ollama create -q {ollama_q_method} -f {model_file} {library_username}/{ollama_model_name}:{ollama_q_method.lower()}"
            
        ollama_conversion_result = subprocess.run(ollama_conversion, shell=True, capture_output=True)
        print(ollama_conversion_result)
        if ollama_conversion_result.returncode != 0:
            raise Exception(f"Error converting to Ollama: {ollama_conversion_result.stderr}")
        else: 
            print("Model converted to Ollama successfully!")
        
        if maintainer:
            ollama_push =  f"ollama push  {library_username}/{model_name}:{q_method.lower()}"
            ollama_rm =  f"ollama rm  {library_username}/{model_name}:{q_method.lower()}"
        else: 
            ollama_push =  f"ollama push  {library_username}/{ollama_model_name}:{q_method.lower()}"
            ollama_rm =  f"ollama rm  {library_username}/{ollama_model_name}:{q_method.lower()}"
            
        ollama_push_result = subprocess.run(ollama_push, shell=True, capture_output=True)
        print(ollama_push_result)
        if ollama_push_result.returncode != 0:
            raise Exception(f"Error pushing to Ollama: {ollama_push_result.stderr}")      
        else: 
            print("Model pushed to Ollama library successfully!")
        
        ollama_rm_result = subprocess.run(ollama_rm, shell=True, capture_output=True)
        print(ollama_rm_result)
        if ollama_rm_result.returncode != 0:
            raise Exception(f"Error removing to Ollama: {ollama_rm_result.stderr}")
        else: 
            print("Model pushed to Ollama library successfully!")
      

        if latest:
            ollama_copy =  f"ollama cp {library_username}/{model_id.lower()}:{q_method.lower()} {library_username}/{model_id.lower()}:latest"
            ollama_copy_result = subprocess.run(ollama_copy, shell=True, capture_output=True)
            print(ollama_copy_result)
            if ollama_copy_result.returncode != 0:
                raise Exception(f"Error converting to Ollama: {ollama_push_result.stderr}")
            print("Model pushed to Ollama library successfully!")
            
            if maintainer:
                ollama_push_latest =  f"ollama push  {library_username}/{model_name}:latest"
                ollama_rm_latest =  f"ollama rm  {library_username}/{model_name}:latest"
            else:    
                ollama_push_latest =  f"ollama push  {library_username}/{ollama_model_name}:latest"
                ollama_rm_latest =  f"ollama rm  {library_username}/{ollama_model_name}:latest"
                
            ollama_push_latest_result = subprocess.run(ollama_push_latest, shell=True, capture_output=True)
            print(ollama_push_latest_result)
            if ollama_push_latest_result.returncode != 0:
                raise Exception(f"Error pushing to Ollama: {ollama_push_result.stderr}")
            else:
                print("Model pushed to Ollama library successfully!")

            ollama_rm_latest_result = subprocess.run(ollama_rm_latest, shell=True, capture_output=True)
            print(ollama_rm_latest_result)
            if ollama_rm_latest_result.returncode != 0:
                raise Exception(f"Error pushing to Ollama: {ollama_rm_latest.stderr}")
            else:
                print("Model pushed to Ollama library successfully!")
    

    except Exception as e:
        return (f"Error: {e}", "error.png")
    finally:
        shutil.rmtree(model_name, ignore_errors=True)
        print("Folder cleaned up successfully!")

        
css="""/* Custom CSS to allow scrolling */
.gradio-container {overflow-y: auto;}
"""
# Create Gradio interface
with gr.Blocks(css=css) as demo:
    login = gr.LoginButton(
        min_width=250,
    )
    
    model_id = HuggingfaceHubSearch(
        label="Hugging Face Hub Model ID",
        placeholder="Search for model id on Huggingface",
        search_type="model",
    )

    ollama_q_method = gr.Dropdown(
        ["FP16", "Q3_K_S", "Q3_K_M", "Q3_K_L", "Q4_0", "Q4_1", "Q4_K_S", "Q4_K_M", "Q5_0", "Q5_1", "Q5_K_S", "Q5_K_M", "Q6_K", "Q8_0"],
        label="Ollama Quantization Method",
        info="Chose which quantization will created and exported to the Ollama Library.",
        value="FP16"
    )

    latest = gr.Checkbox(
        value=False,
        label="Latest",
        info="Push Model to the Ollama Library with the :latest tag."
    )

    ollama_library_username = gr.Textbox(
        label="Ollama Library Username",
        info="Input your username from Ollama to push this model to their Library.",
    )
        
    account = gr.Code (
        ollama_pubkey.read().rstrip(),
        label="Ollama SSH pubkey",
        # info="Copy this and paste it into your Ollama profile.",
    )
            
    maintainer = gr.Checkbox(
        value=False,
        label="Maintainer",
        info="Use this option is your original repository on both Hugging Face and Ollama."
    )
    
    iface = gr.Interface(
        fn=ollamafy_model,
        inputs=[
            login,
            account,
            model_id,
            ollama_library_username,
            ollama_q_method,
            latest,
            maintainer
        ],
        outputs=[
            gr.Markdown(label="output"),
            gr.Image(show_label=False),
        ],
        title="Ollamafy",
        description="Import Hugging Face Models to Ollama and Push them to the Ollama Library 🦙 \n\n Sampled from: \n\n - https://huggingface.co./spaces/ggml-org/gguf-my-repo \n\n - https://huggingface.co./spaces/gingdev/ollama-server",
        api_name=False
    )
    
def restart_space():
    ollama_pubkey.close(),
    # HfApi().restart_space(repo_id="unclemusclez/ollamafy", token=HF_TOKEN, library_username=OLLAMA_USERNAME, factory_reboot=True)
    HfApi().restart_space(repo_id="unclemusclez/ollamafy", factory_reboot=True)

scheduler = BackgroundScheduler()
scheduler.add_job(restart_space, "interval", seconds=21600)
scheduler.start()

# Launch the interface
demo.queue(default_concurrency_limit=1, max_size=5).launch(debug=True, show_api=False)