Ollama

Sleeping

App Files Files Community

vilarin commited on Jul 8, 2024

Commit

7eeaa8f

verified ·

1 Parent(s): a9fe0e7

Update app.py

Browse files

Files changed (1) hide show

app.py +88 -45

app.py CHANGED Viewed

@@ -2,6 +2,7 @@ import os
 import threading
 import time
 import subprocess
 OLLAMA = os.path.expanduser("~/ollama")
@@ -10,20 +11,23 @@ if not os.path.exists(OLLAMA):
     os.chmod(OLLAMA, 0o755)
 def ollama_service_thread():
-    subprocess.run("~/ollama serve", shell=True)
 OLLAMA_SERVICE_THREAD = threading.Thread(target=ollama_service_thread)
-OLLAMA_SERVICE_THREAD.start()
-print("Giving ollama serve a moment")
-time.sleep(10)
 # Uncomment and modify the model to what you want locally
 # model = "moondream"
-model = os.environ.get("MODEL")
-subprocess.run(f"~/ollama pull {model}", shell=True)
 import copy
 import gradio as gr
@@ -38,7 +42,9 @@ DESCRIPTION = f"""
 <center>
 <p>Feel free to test models with ollama.
 <br>
-Easy to modify and running models you want.
 </p>
 </center>
 """
@@ -54,46 +60,78 @@ h3 {
     text-align: center;
 }
 """
-def stream_chat(message: str, history: list, temperature: float, max_new_tokens: int, top_p: float, top_k: int, penalty: float):
-    conversation = []
-    for prompt, answer in history:
-        conversation.extend([
-            {"role": "user", "content": prompt},
-            {"role": "assistant", "content": answer},
-        ])
-    conversation.append({"role": "user", "content": message})
-    print(f"Conversation is -\n{conversation}")
-    response = client.chat(
-        model=model,
-        messages=conversation,
-        stream=True,
-        options={
-            'num_predict': max_new_tokens,
-            'temperature': temperature,
-            'top_p': top_p,
-            'top_k': top_k,
-            'repeat_penalty': penalty,
-            'low_vram': True,
-        },
-    )
-    buffer = ""
-    for chunk in response:
-        buffer += chunk["message"]["content"]
-        yield buffer
-chatbot = gr.Chatbot(height=600)
 with gr.Blocks(css=CSS, theme="soft") as demo:
     gr.HTML(TITLE)
-    gr.HTML(DESCRIPTION)
     gr.DuplicateButton(value="Duplicate Space for private use", elem_classes="duplicate-button")
     gr.ChatInterface(
         fn=stream_chat,
@@ -101,6 +139,11 @@ with gr.Blocks(css=CSS, theme="soft") as demo:
         fill_height=True,
         additional_inputs_accordion=gr.Accordion(label="⚙️ Parameters", open=False, render=False),
         additional_inputs=[
             gr.Slider(
                 minimum=0,
                 maximum=1,

 import threading
 import time
 import subprocess
+import spaces
 OLLAMA = os.path.expanduser("~/ollama")
     os.chmod(OLLAMA, 0o755)
 def ollama_service_thread():
+    global process
+    process = subprocess.Popen("~/ollama serve", shell=True)
+    process.wait()
 OLLAMA_SERVICE_THREAD = threading.Thread(target=ollama_service_thread)
+# OLLAMA_SERVICE_THREAD.start()
+def terminate():
+    if process:
+        os.killpg(os.getpgid(process.pid), signal.SIGTERM)
+    OLLAMA_SERVICE_THREAD.join()
 # Uncomment and modify the model to what you want locally
 # model = "moondream"
+# model = os.environ.get("MODEL")
+# subprocess.run(f"~/ollama pull {model}", shell=True)
 import copy
 import gradio as gr
 <center>
 <p>Feel free to test models with ollama.
 <br>
+Input <em>/pull model_name</em> to pull model.
+<br>
+Input <em>/list</em> to get model list.
 </p>
 </center>
 """
     text-align: center;
 }
 """
+INIT_SIGN = ""
+def init():
+    OLLAMA_SERVICE_THREAD.start()
+    print("Giving ollama serve a moment")
+    time.sleep(10)
+    INIT_SIGN = "FINISHED"
+def ollama_func(command):
+    c1, c2 = command.split(" ")
+    function_map = {
+        "/init": init(),
+        "/pull": ollama.pull(c2),
+        "/list": ollama.list(),
+        "/bye": terminate(),
+    }
+    if c1 in function_map:
+        function_map[c1]
+    else:
+        print("No supported command.")
+@spaces.GPU()
+def launch():
+    OLLAMA_SERVICE_THREAD.start()
+    print("Giving ollama serve a moment")
+    time.sleep(10)
+def stream_chat(message: str, history: list, model: str, temperature: float, max_new_tokens: int, top_p: float, top_k: int, penalty: float):
+    if message.startswith("/"):
+        ollama_func(message)
+    else:
+        if INIT_SIGN:
+            return "Please Enter /init to initialize Ollama"
+        else:
+            launch()
+            conversation = []
+            for prompt, answer in history:
+                conversation.extend([
+                    {"role": "user", "content": prompt},
+                    {"role": "assistant", "content": answer},
+                ])
+            conversation.append({"role": "user", "content": message})
+            print(f"Conversation is -\n{conversation}")
+            response = client.chat(
+                model=model,
+                messages=conversation,
+                stream=True,
+                options={
+                    'num_predict': max_new_tokens,
+                    'temperature': temperature,
+                    'top_p': top_p,
+                    'top_k': top_k,
+                    'repeat_penalty': penalty,
+                    'low_vram': True,
+                },
+            )
+            terminate()
+            buffer = ""
+            for chunk in response:
+                buffer += chunk["message"]["content"]
+                yield buffer
+chatbot = gr.Chatbot(height=600, placeholder=DESCRIPTION)
 with gr.Blocks(css=CSS, theme="soft") as demo:
     gr.HTML(TITLE)
     gr.DuplicateButton(value="Duplicate Space for private use", elem_classes="duplicate-button")
     gr.ChatInterface(
         fn=stream_chat,
         fill_height=True,
         additional_inputs_accordion=gr.Accordion(label="⚙️ Parameters", open=False, render=False),
         additional_inputs=[
+            gr.Textbox(
+                value="qwen2:0.5b",
+                label="Model",
+                render=False,
+            )
             gr.Slider(
                 minimum=0,
                 maximum=1,