import os
import shutil
import subprocess
import signal
os.environ["GRADIO_ANALYTICS_ENABLED"] = "False"
import gradio as gr

from huggingface_hub import create_repo, HfApi
from huggingface_hub import snapshot_download
from huggingface_hub import whoami
from huggingface_hub import ModelCard

from gradio_huggingfacehub_search import HuggingfaceHubSearch

from apscheduler.schedulers.background import BackgroundScheduler

from textwrap import dedent

HF_TOKEN = os.environ.get("HF_TOKEN")
OLLAMA_USERNAME = os.environ.get("OLLAMA_USERNAME")
ollama_pubkey = open("/home/ollamafy/.ollama/id_ed25519.pub", "r").read().rstrip()
print(ollama_pubkey) 
# ollama_pubkey_read = print(ollama_pubkey.read()) 


def process_model(model_id, q_method, latest, maintainer, oauth_token: gr.OAuthToken | None):
#def process_model(model_id, q_method, latest):
    if oauth_token.token is None:
        raise ValueError("You must be logged in to use GGUF-my-repo")
    model_name = model_id.split('/')[-1]
    model_maintainer = model_id.split('/')[-2]
    ollama_model_name = model_maintainer + '_' + model_name 


    try:
        api = HfApi(token=oauth_token.token)

        dl_pattern = ["*.md", "*.json", "*.model"]

        pattern = (
            "*.safetensors"
            if any(
                file.path.endswith(".safetensors")
                for file in api.list_repo_tree(
                    repo_id=model_id,
                    recursive=True,
                )
            )
            else "*.bin"
        )

        dl_pattern += pattern

        api.snapshot_download(repo_id=model_id, local_dir=model_name, local_dir_use_symlinks=False, allow_patterns=dl_pattern)
        print("Model downloaded successfully!")
        print(f"Current working directory: {os.getcwd()}")
        print(f"Model directory contents: {os.listdir(model_name)}")

        model_file = model_name + '_modelfile'
      
        f = open(model_file, "w")
        model_path = 'FROM ' + '"' + model_id + '"'
        print(f.write(model_path)) 
        ollama_conversion = f"ollama create  -f {model_file} {OLLAMA_USERNAME}/{ollama_model_name}:{q_method}"

        
        ollama_conversion_result = subprocess.run(ollama_conversion, shell=True, capture_output=True)
        print(ollama_conversion_result)
        if ollama_conversion_result.returncode != 0:
            raise Exception(f"Error converting to Ollama: {ollama_conversion_result.stderr}")
        print("Model converted to Ollama successfully!")
        if maintainer == True:
            ollama_push =  f"ollama push  {OLLAMA_USERNAME}/{model_name}:{q_method}"
        else: 
            ollama_push =  f"ollama push  {OLLAMA_USERNAME}/{ollama_model_name}:{q_method}"
        ollama_push_result = subprocess.run(ollama_push, shell=True, capture_output=True)
        print(ollama_push_result)
        if ollama_push_result.returncode != 0:
            raise Exception(f"Error converting to Ollama: {ollama_push_result.stderr}")
        print("Model pushed to Ollama library successfully!")
        
        if latest == True:
            ollama_copy =  f"ollama cp  {OLLAMA_USERNAME}/{model_id}:{q_method} {OLLAMA_USERNAME}/{model_id}:latest"
            ollama_copy_result = subprocess.run(ollama_copy, shell=True, capture_output=True)
            print(ollama_copy_result)
            if ollama_copy_result.returncode != 0:
                raise Exception(f"Error converting to Ollama: {ollama_push_result.stderr}")
            print("Model pushed to Ollama library successfully!")
            if maintainer == True:
                llama_push_latest =  f"ollama push  {OLLAMA_USERNAME}/{model_name}:latest"
            else:    
                ollama_push_latest =  f"ollama push  {OLLAMA_USERNAME}/{ollama_model_name}:latest"
            ollama_push_latest_result = subprocess.run(ollama_push_latest, shell=True, capture_output=True)
            print(ollama_push_latest_result)
            if ollama_push_latest_result.returncode != 0:
                raise Exception(f"Error converting to Ollama: {ollama_push_result.stderr}")
            print("Model pushed to Ollama library successfully!")
            
       
    except Exception as e:
        return (f"Error: {e}", "error.png")
    finally:
        shutil.rmtree(model_name, ignore_errors=True)
        print("Folder cleaned up successfully!")

        
css="""/* Custom CSS to allow scrolling */
.gradio-container {overflow-y: auto;}
"""
# Create Gradio interface
with gr.Blocks(css=css) as demo: 
    gr.Markdown("You must be logged in to use Ollamafy.")
    gr.Markdown(ollama_pubkey)
    gr.LoginButton(min_width=250)

    model_id = HuggingfaceHubSearch(
        label="Hub Model ID",
        placeholder="Search for model id on Huggingface",
        search_type="model",
    )

    q_method = gr.Dropdown(
        ["", "Q3_k_s", "Q3_k_m", "Q3_k_l", "Q4_0", "Q4_1", "Q4_k_s", "Q4_k_m", "Q5_0", "Q5_1", "Q5_k_s", "Q5_k_m", "Q6_k","Q8_0"],
        label="Quantization Method",
        info="Ollama Quantization Types",
        value="", 
        filterable=False,
        visible=True
    )
    latest = gr.Checkbox(
        value=False,
        label="Latest",
        info="Copy Model to Ollama Library with the :latest tag"
    )
    maintainer = gr.Checkbox(
        value=False,
        label="Maintainer",
        info="This is your original repository on both Huggin Face and Ollama. (DO NOT USE!!!)"
    )
    iface = gr.Interface(
        fn=process_model,
        inputs=[
            model_id,
            q_method,
            latest,
            maintainer
        ],
        outputs=[
            gr.Markdown(label="output"),
            gr.Image(show_label=False),
        ],
        title="Create your own Ollama Models and Push them to the Ollama Library, blazingly fast ⚡!",
        description=ollama_pubkey,
        api_name=False
    )
    
#username = whoami(oauth_token.token)["name"]
def restart_space():
    HfApi().restart_space(repo_id="unclemusclez/ollamafy", token=HF_TOKEN, factory_reboot=True)

scheduler = BackgroundScheduler()
scheduler.add_job(restart_space, "interval", seconds=21600)
scheduler.start()

# Launch the interface
demo.queue(default_concurrency_limit=1, max_size=5).launch(debug=True, show_api=False)