Spaces:

unclemusclez
/

ollamafy

Runtime error

File size: 9,225 Bytes

e695ade
 
 
 
fe3e2a9
e695ade
b681a48
e695ade
 
 
 
 
 
b681a48
e695ade
 
 
 
4df7f15
b158147
6a57610
b158147
e695ade
 
b158147
e695ade
2cdde93
e695ade
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
fe3e2a9
5e25d21
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6b057bb
 
 
 
 
 
 
 
 
 
 
 
94d6df0
6b057bb
 
f784967
 
 
 
 
fe3e2a9
f784967
 
 
 
fe3e2a9
 
f784967
 
 
 
 
 
 
 
 
 
 
fe3e2a9
 
 
f784967
 
 
 
 
fe3e2a9
 
 
f784967
 
 
 
 
 
 
 
6b057bb
fe3e2a9
f784967
 
 
 
 
09e1b1f
f784967
 
 
09e1b1f
94d6df0
 
f784967
 
 
 
 
94d6df0
 
f972d6c
f784967
70b9a5b
 
 
 
 
e695ade
70b9a5b
e695ade
 
 
 
478cf2e
7573ddc
330081e
 
478cf2e
e695ade
 
 
 
 
 
 
5e25d21
fe3e2a9
b158147
 
85b9b71
e695ade
478cf2e
8a70d34
d2d62ba
478d2d5
b158147
d2d62ba
6b057bb
50bf865
 
 
b158147
 
50bf865
85b9b71
 
 
 
6b057bb
e695ade
b158147
e695ade
 
6b057bb
8a70d34
51470bc
e695ade
 
 
 
 
09bfa15
1a5257b
e695ade
 
0a7c76d
da5fa7d
e695ade
09bfa15
e695ade

import os
import shutil
import subprocess
import signal

os.environ["GRADIO_ANALYTICS_ENABLED"] = "False"
import gradio as gr

from huggingface_hub import create_repo, HfApi
from huggingface_hub import snapshot_download
from huggingface_hub import whoami
from huggingface_hub import ModelCard

from gradio_huggingfacehub_search import HuggingfaceHubSearch
from apscheduler.schedulers.background import BackgroundScheduler
from textwrap import dedent

HF_TOKEN = os.environ.get("HF_TOKEN")
OLLAMA_USERNAME = os.environ.get("OLLAMA_USERNAME").lower()
ollama_pubkey = open(f"{HOME}/.ollama/id_ed25519.pub", "r")

def ollamafy_model(model_id, ollamafy, ollama_q_method, latest, maintainer, oauth_token: gr.OAuthToken | None):
    if oauth_token.token is None:
        raise ValueError("You must be logged in to use GGUF-my-repo")
        
    model_name = model_id.split('/')[-1]
    fp16 = f"{model_name}-fp16.gguf"

    try:
        api = HfApi(token=oauth_token.token)
        dl_pattern = ["*.md", "*.json", "*.model"]
        pattern = (
            "*.safetensors"
            if any(
                file.path.endswith(".safetensors")
                for file in api.list_repo_tree(
                    repo_id=model_id,
                    recursive=True,
                )
            )
            else "*.bin"
        )
        dl_pattern += pattern
        
        if not os.path.isfile(fp16):
            api.snapshot_download(repo_id=model_id, local_dir=model_name, local_dir_use_symlinks=False, allow_patterns=dl_pattern)
            print("Model downloaded successfully!")
            print(f"Current working directory: {os.getcwd()}")
            print(f"Model directory contents: {os.listdir(model_name)}")
    
            conversion_script = "convert_hf_to_gguf.py"
            fp16_conversion = f"python llama.cpp/{conversion_script} {model_name} --outtype f16 --outfile {fp16}"
            result = subprocess.run(fp16_conversion, shell=True, capture_output=True)
            print(result)
            if result.returncode != 0:
                raise Exception(f"Error converting to fp16: {result.stderr}")
            print("Model converted to fp16 successfully!")
            print(f"Converted model path: {fp16}")
            
            HfApi().delete_repo(repo_id=model_id)

        ### Ollamafy ###
        if ollama_model:
            model_maintainer = model_id.split('/')[-2]
            ollama_model_name = model_maintainer.lower() + '_' + model_name.lower() 
            ollama_modelfile_name = model_name + '_modelfile'
            # model_path = f"{HOME}/.cache/huggingface/hub/{model_id}"
          
            ollama_modelfile = open(ollama_modelfile_name, "w")
            # ollama_modelfile_path = quantized_gguf_path
            ollama_modelfile.write(quantized_gguf_path)
            ollama_modelfile.close()
            
            print(quantized_gguf_path)
    
            # for ollama_q_method in ollama_q_methods:
            if ollama_q_method == "FP16":
                ollama_conversion = f"ollama create -f {model_file} {OLLAMA_USERNAME}/{ollama_model_name}:{ollama_q_method.lower()}"
            else:
                ollama_conversion = f"ollama create -q {ollama_q_method} -f {model_file} {OLLAMA_USERNAME}/{ollama_model_name}:{ollama_q_method.lower()}"
                
            ollama_conversion_result = subprocess.run(ollama_conversion, shell=True, capture_output=True)
            print(ollama_conversion_result)
            if ollama_conversion_result.returncode != 0:
                raise Exception(f"Error converting to Ollama: {ollama_conversion_result.stderr}")
            else: 
                print("Model converted to Ollama successfully!")
            
            if maintainer:
                ollama_push =  f"ollama push  {OLLAMA_USERNAME}/{model_name}:{q_method.lower()}"
                ollama_rm =  f"ollama rm  {OLLAMA_USERNAME}/{model_name}:{q_method.lower()}"
            else: 
                ollama_push =  f"ollama push  {OLLAMA_USERNAME}/{ollama_model_name}:{q_method.lower()}"
                ollama_rm =  f"ollama rm  {OLLAMA_USERNAME}/{ollama_model_name}:{q_method.lower()}"
                
            ollama_push_result = subprocess.run(ollama_push, shell=True, capture_output=True)
            print(ollama_push_result)
            if ollama_push_result.returncode != 0:
                raise Exception(f"Error pushing to Ollama: {ollama_push_result.stderr}")      
            else: 
                print("Model pushed to Ollama library successfully!")
            
            ollama_rm_result = subprocess.run(ollama_rm, shell=True, capture_output=True)
            print(ollama_rm_result)
            if ollama_rm_result.returncode != 0:
                raise Exception(f"Error removing to Ollama: {ollama_rm_result.stderr}")
            else: 
                print("Model pushed to Ollama library successfully!")
          
    
            if latest:
                ollama_copy =  f"ollama cp  {OLLAMA_USERNAME}/{model_id.lower()}:{q_method.lower()} {OLLAMA_USERNAME}/{model_id.lower()}:latest"
                ollama_copy_result = subprocess.run(ollama_copy, shell=True, capture_output=True)
                print(ollama_copy_result)
                if ollama_copy_result.returncode != 0:
                    raise Exception(f"Error converting to Ollama: {ollama_push_result.stderr}")
                print("Model pushed to Ollama library successfully!")
                
                if maintainer:
                    ollama_push_latest =  f"ollama push  {OLLAMA_USERNAME}/{model_name}:latest"
                    ollama_rm_latest =  f"ollama rm  {OLLAMA_USERNAME}/{model_name}:latest"
                else:    
                    ollama_push_latest =  f"ollama push  {OLLAMA_USERNAME}/{ollama_model_name}:latest"
                    ollama_rm_latest =  f"ollama rm  {OLLAMA_USERNAME}/{ollama_model_name}:latest"
                    
                ollama_push_latest_result = subprocess.run(ollama_push_latest, shell=True, capture_output=True)
                print(ollama_push_latest_result)
                if ollama_push_latest_result.returncode != 0:
                    raise Exception(f"Error pushing to Ollama: {ollama_push_result.stderr}")
                else:
                    print("Model pushed to Ollama library successfully!")

                ollama_rm_latest_result = subprocess.run(ollama_rm_latest, shell=True, capture_output=True)
                print(ollama_rm_latest_result)
                if ollama_rm_latest_result.returncode != 0:
                    raise Exception(f"Error pushing to Ollama: {ollama_rm_latest.stderr}")
                else:
                    print("Model pushed to Ollama library successfully!")
        
   
    except Exception as e:
        return (f"Error: {e}", "error.png")
    finally:
        shutil.rmtree(model_name, ignore_errors=True)
        print("Folder cleaned up successfully!")

        
css="""/* Custom CSS to allow scrolling */
.gradio-container {overflow-y: auto;}
"""
# Create Gradio interface
with gr.Blocks(css=css) as demo:
    gr.Markdown("You must be logged in to use Ollamafy.")
    gr.Markdown(ollama_pubkey.read().rstrip())
    ollama_pubkey.close()
    gr.LoginButton(min_width=250)

    model_id = HuggingfaceHubSearch(
        label="Hub Model ID",
        placeholder="Search for model id on Huggingface",
        search_type="model",
    )

    ollama_q_method = gr.Dropdown(
        ["FP16", "Q3_K_S", "Q3_K_M", "Q3_K_L", "Q4_0", "Q4_1", "Q4_K_S", "Q4_K_M", "Q5_0", "Q5_1", "Q5_K_S", "Q5_K_M", "Q6_K", "Q8_0"],
        label="Ollama Quantization Method",
        info="Chose which quantization will created and exported to the Ollama Library",
        value="FP16"
    )

    latest = gr.Checkbox(
        value=False,
        label="Latest",
        info="Push Model to the Ollama Library with the :latest tag"
    )
    
    maintainer = gr.Checkbox(
        value=False,
        label="Maintainer",
        info="This is your original repository on both Hugging Face and Ollama. 
        DO NOT USE unless using the same USERNAME on both platforms."
    )
    ollama_username = gr.Textbox(
        label="Ollama.com Library Username",
        info="test",
    )
    
    iface = gr.Interface(
        fn=ollamafy_model,
        inputs=[
            model_id,
            ollama_q_method,
            latest,
            maintainer
        ],
        outputs=[
            gr.Markdown(label="output"),
            gr.Image(show_label=False),
        ],
        title="Create your own Ollama Models and Push them to the Ollama Library, blazingly fast ⚡!",
        description="Sampled from https://huggingface.co./spaces/ggml-org/gguf-my-repo and https://huggingface.co./spaces/gingdev/ollama-server",
        api_name=False
    )
    
#username = whoami(oauth_token.token)["name"]
def restart_space():
    HfApi().restart_space(repo_id="unclemusclez/ollamafy", token=HF_TOKEN, factory_reboot=True)

scheduler = BackgroundScheduler()
scheduler.add_job(restart_space, "interval", seconds=21600)
scheduler.start()

# Launch the interface
demo.queue(default_concurrency_limit=1, max_size=5).launch(debug=True, show_api=False)