vilarin commited on
Commit
7eeaa8f
·
verified ·
1 Parent(s): a9fe0e7

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +88 -45
app.py CHANGED
@@ -2,6 +2,7 @@ import os
2
  import threading
3
  import time
4
  import subprocess
 
5
 
6
  OLLAMA = os.path.expanduser("~/ollama")
7
 
@@ -10,20 +11,23 @@ if not os.path.exists(OLLAMA):
10
  os.chmod(OLLAMA, 0o755)
11
 
12
  def ollama_service_thread():
13
- subprocess.run("~/ollama serve", shell=True)
14
-
 
 
15
  OLLAMA_SERVICE_THREAD = threading.Thread(target=ollama_service_thread)
16
- OLLAMA_SERVICE_THREAD.start()
17
 
18
- print("Giving ollama serve a moment")
19
- time.sleep(10)
 
 
20
 
21
  # Uncomment and modify the model to what you want locally
22
  # model = "moondream"
23
- model = os.environ.get("MODEL")
24
-
25
- subprocess.run(f"~/ollama pull {model}", shell=True)
26
 
 
27
 
28
  import copy
29
  import gradio as gr
@@ -38,7 +42,9 @@ DESCRIPTION = f"""
38
  <center>
39
  <p>Feel free to test models with ollama.
40
  <br>
41
- Easy to modify and running models you want.
 
 
42
  </p>
43
  </center>
44
  """
@@ -54,46 +60,78 @@ h3 {
54
  text-align: center;
55
  }
56
  """
57
-
58
-
59
- def stream_chat(message: str, history: list, temperature: float, max_new_tokens: int, top_p: float, top_k: int, penalty: float):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
60
 
61
- conversation = []
62
- for prompt, answer in history:
63
- conversation.extend([
64
- {"role": "user", "content": prompt},
65
- {"role": "assistant", "content": answer},
66
- ])
67
- conversation.append({"role": "user", "content": message})
68
-
69
- print(f"Conversation is -\n{conversation}")
70
-
71
- response = client.chat(
72
- model=model,
73
- messages=conversation,
74
- stream=True,
75
- options={
76
- 'num_predict': max_new_tokens,
77
- 'temperature': temperature,
78
- 'top_p': top_p,
79
- 'top_k': top_k,
80
- 'repeat_penalty': penalty,
81
- 'low_vram': True,
82
- },
83
- )
84
-
85
- buffer = ""
86
- for chunk in response:
87
- buffer += chunk["message"]["content"]
88
- yield buffer
89
-
90
-
91
-
92
- chatbot = gr.Chatbot(height=600)
 
 
 
 
 
 
 
 
93
 
94
  with gr.Blocks(css=CSS, theme="soft") as demo:
95
  gr.HTML(TITLE)
96
- gr.HTML(DESCRIPTION)
97
  gr.DuplicateButton(value="Duplicate Space for private use", elem_classes="duplicate-button")
98
  gr.ChatInterface(
99
  fn=stream_chat,
@@ -101,6 +139,11 @@ with gr.Blocks(css=CSS, theme="soft") as demo:
101
  fill_height=True,
102
  additional_inputs_accordion=gr.Accordion(label="⚙️ Parameters", open=False, render=False),
103
  additional_inputs=[
 
 
 
 
 
104
  gr.Slider(
105
  minimum=0,
106
  maximum=1,
 
2
  import threading
3
  import time
4
  import subprocess
5
+ import spaces
6
 
7
  OLLAMA = os.path.expanduser("~/ollama")
8
 
 
11
  os.chmod(OLLAMA, 0o755)
12
 
13
  def ollama_service_thread():
14
+ global process
15
+ process = subprocess.Popen("~/ollama serve", shell=True)
16
+ process.wait()
17
+
18
  OLLAMA_SERVICE_THREAD = threading.Thread(target=ollama_service_thread)
19
+ # OLLAMA_SERVICE_THREAD.start()
20
 
21
+ def terminate():
22
+ if process:
23
+ os.killpg(os.getpgid(process.pid), signal.SIGTERM)
24
+ OLLAMA_SERVICE_THREAD.join()
25
 
26
  # Uncomment and modify the model to what you want locally
27
  # model = "moondream"
28
+ # model = os.environ.get("MODEL")
 
 
29
 
30
+ # subprocess.run(f"~/ollama pull {model}", shell=True)
31
 
32
  import copy
33
  import gradio as gr
 
42
  <center>
43
  <p>Feel free to test models with ollama.
44
  <br>
45
+ Input <em>/pull model_name</em> to pull model.
46
+ <br>
47
+ Input <em>/list</em> to get model list.
48
  </p>
49
  </center>
50
  """
 
60
  text-align: center;
61
  }
62
  """
63
+ INIT_SIGN = ""
64
+
65
+ def init():
66
+ OLLAMA_SERVICE_THREAD.start()
67
+ print("Giving ollama serve a moment")
68
+ time.sleep(10)
69
+ INIT_SIGN = "FINISHED"
70
+
71
+ def ollama_func(command):
72
+ c1, c2 = command.split(" ")
73
+ function_map = {
74
+ "/init": init(),
75
+ "/pull": ollama.pull(c2),
76
+ "/list": ollama.list(),
77
+ "/bye": terminate(),
78
+ }
79
+ if c1 in function_map:
80
+ function_map[c1]
81
+ else:
82
+ print("No supported command.")
83
+
84
+ @spaces.GPU()
85
+ def launch():
86
+ OLLAMA_SERVICE_THREAD.start()
87
+ print("Giving ollama serve a moment")
88
+ time.sleep(10)
89
+
90
+ def stream_chat(message: str, history: list, model: str, temperature: float, max_new_tokens: int, top_p: float, top_k: int, penalty: float):
91
 
92
+ if message.startswith("/"):
93
+ ollama_func(message)
94
+ else:
95
+ if INIT_SIGN:
96
+ return "Please Enter /init to initialize Ollama"
97
+ else:
98
+ launch()
99
+ conversation = []
100
+ for prompt, answer in history:
101
+ conversation.extend([
102
+ {"role": "user", "content": prompt},
103
+ {"role": "assistant", "content": answer},
104
+ ])
105
+ conversation.append({"role": "user", "content": message})
106
+
107
+ print(f"Conversation is -\n{conversation}")
108
+
109
+ response = client.chat(
110
+ model=model,
111
+ messages=conversation,
112
+ stream=True,
113
+ options={
114
+ 'num_predict': max_new_tokens,
115
+ 'temperature': temperature,
116
+ 'top_p': top_p,
117
+ 'top_k': top_k,
118
+ 'repeat_penalty': penalty,
119
+ 'low_vram': True,
120
+ },
121
+ )
122
+
123
+ terminate()
124
+
125
+ buffer = ""
126
+ for chunk in response:
127
+ buffer += chunk["message"]["content"]
128
+ yield buffer
129
+
130
+
131
+ chatbot = gr.Chatbot(height=600, placeholder=DESCRIPTION)
132
 
133
  with gr.Blocks(css=CSS, theme="soft") as demo:
134
  gr.HTML(TITLE)
 
135
  gr.DuplicateButton(value="Duplicate Space for private use", elem_classes="duplicate-button")
136
  gr.ChatInterface(
137
  fn=stream_chat,
 
139
  fill_height=True,
140
  additional_inputs_accordion=gr.Accordion(label="⚙️ Parameters", open=False, render=False),
141
  additional_inputs=[
142
+ gr.Textbox(
143
+ value="qwen2:0.5b",
144
+ label="Model",
145
+ render=False,
146
+ )
147
  gr.Slider(
148
  minimum=0,
149
  maximum=1,