OpenSourceRonin commited on
Commit
5c539b4
1 Parent(s): d1789cc

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +9 -110
app.py CHANGED
@@ -1,10 +1,6 @@
1
  import spaces
2
  import os
3
  import threading
4
- from collections import deque
5
-
6
- import plotly.graph_objs as go
7
- import pynvml
8
 
9
  import gradio as gr
10
  from huggingface_hub import snapshot_download
@@ -30,100 +26,6 @@ models = [
30
  },
31
  ]
32
 
33
- # Queues for storing historical data (saving the last 100 GPU utilization and memory usage values)
34
- gpu_util_history = deque(maxlen=100)
35
- mem_usage_history = deque(maxlen=100)
36
-
37
-
38
- def initialize_nvml():
39
- """
40
- Initialize NVML (NVIDIA Management Library).
41
- """
42
- pynvml.nvmlInit()
43
-
44
-
45
- def get_gpu_info():
46
- """
47
- Get GPU utilization and memory usage information.
48
-
49
- Returns:
50
- dict: A dictionary containing GPU utilization and memory usage information.
51
- """
52
- handle = pynvml.nvmlDeviceGetHandleByIndex(0) # Assuming a single GPU setup
53
- utilization = pynvml.nvmlDeviceGetUtilizationRates(handle)
54
- memory = pynvml.nvmlDeviceGetMemoryInfo(handle)
55
-
56
- gpu_info = {
57
- 'gpu_util': utilization.gpu,
58
- 'mem_used': memory.used / 1024**2, # Convert bytes to MiB
59
- 'mem_total': memory.total / 1024**2, # Convert bytes to MiB
60
- 'mem_percent': (memory.used / memory.total) * 100
61
- }
62
- return gpu_info
63
-
64
-
65
- def _update_charts(chart_height: int = 200) -> go.Figure:
66
- """
67
- Update the GPU utilization and memory usage charts.
68
-
69
- Args:
70
- chart_height (int, optional): used to set the height of the chart. Defaults to 200.
71
-
72
- Returns:
73
- plotly.graph_objs.Figure: The updated figure containing the GPU and memory usage charts.
74
- """
75
- # obtain GPU information
76
- gpu_info = get_gpu_info()
77
-
78
- # records the latest GPU utilization and memory usage values
79
- gpu_util = round(gpu_info.get('gpu_util', 0), 1)
80
- mem_used = round(gpu_info.get('mem_used', 0) / 1024, 2) # Convert MiB to GiB
81
- gpu_util_history.append(gpu_util)
82
- mem_usage_history.append(mem_used)
83
-
84
- # create GPU utilization line chart
85
- gpu_trace = go.Scatter(
86
- y=list(gpu_util_history),
87
- mode='lines+markers',
88
- text=list(gpu_util_history),
89
- line=dict(shape='spline', color='blue'), # Make the line smooth and set color
90
- yaxis='y1' # Link to y-axis 1
91
- )
92
-
93
- # create memory usage line chart
94
- mem_trace = go.Scatter(
95
- y=list(mem_usage_history),
96
- mode='lines+markers',
97
- text=list(mem_usage_history),
98
- line=dict(shape='spline', color='red'), # Make the line smooth and set color
99
- yaxis='y2' # Link to y-axis 2
100
- )
101
-
102
- # set the layout of the chart
103
- layout = go.Layout(
104
- xaxis=dict(title=None, showticklabels=False, ticks=''),
105
- yaxis=dict(
106
- title='GPU Utilization (%)',
107
- range=[-5, 110],
108
- titlefont=dict(color='blue'),
109
- tickfont=dict(color='blue'),
110
- ),
111
- yaxis2=dict(title='Memory Usage (GiB)',
112
- range=[0, max(24,
113
- max(mem_usage_history) + 1)],
114
- titlefont=dict(color='red'),
115
- tickfont=dict(color='red'),
116
- overlaying='y',
117
- side='right'),
118
- height=chart_height, # set the height of the chart
119
- margin=dict(l=10, r=10, t=0, b=0), # set the margin of the chart
120
- showlegend=False # disable the legend
121
- )
122
-
123
- fig = go.Figure(data=[gpu_trace, mem_trace], layout=layout)
124
- return fig
125
-
126
-
127
  def initialize_history():
128
  """
129
  Initializes the GPU utilization and memory usage history.
@@ -134,13 +36,6 @@ def initialize_history():
134
  mem_usage_history.append(round(gpu_info.get('mem_percent', 0), 1))
135
 
136
 
137
- def enable_gpu_info():
138
- pynvml.nvmlInit()
139
-
140
-
141
- def disable_gpu_info():
142
- pynvml.nvmlShutdown()
143
-
144
  model_choices = [f"{model['name']} ({model['bits']})" for model in models]
145
  display_to_model = {f"{model['name']} ({model['bits']})": model['name'] for model in models}
146
 
@@ -159,7 +54,8 @@ def download_models_in_background():
159
  download_thread = threading.Thread(target=download_models_in_background)
160
  download_thread.start()
161
 
162
- loaded_models = {}
 
163
 
164
  @spaces.GPU
165
  def respond(
@@ -173,12 +69,16 @@ def respond(
173
  ):
174
  model_name = display_to_model[selected_model_display_label]
175
 
 
 
 
176
  # Check if the model is already loaded
177
- if model_name not in loaded_models:
178
  # Load and store the model in the cache
179
- loaded_models[model_name] = get_chat_loop_generator(model_name)
 
180
 
181
- chat_completion = loaded_models[model_name]
182
 
183
  messages = [{"role": "system", "content": system_message}]
184
 
@@ -240,4 +140,3 @@ with gr.Blocks(fill_height=True) as demo:
240
  if __name__ == "__main__":
241
  share = os.getenv("SHARE_LINK", None) in ["1", "true", "True"]
242
  demo.launch(share=share)
243
- # disable_gpu_info()
 
1
  import spaces
2
  import os
3
  import threading
 
 
 
 
4
 
5
  import gradio as gr
6
  from huggingface_hub import snapshot_download
 
26
  },
27
  ]
28
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
29
  def initialize_history():
30
  """
31
  Initializes the GPU utilization and memory usage history.
 
36
  mem_usage_history.append(round(gpu_info.get('mem_percent', 0), 1))
37
 
38
 
 
 
 
 
 
 
 
39
  model_choices = [f"{model['name']} ({model['bits']})" for model in models]
40
  display_to_model = {f"{model['name']} ({model['bits']})": model['name'] for model in models}
41
 
 
54
  download_thread = threading.Thread(target=download_models_in_background)
55
  download_thread.start()
56
 
57
+ loaded_model = None
58
+ loaded_model_name = None
59
 
60
  @spaces.GPU
61
  def respond(
 
69
  ):
70
  model_name = display_to_model[selected_model_display_label]
71
 
72
+ global loaded_model
73
+ global loaded_model_name
74
+
75
  # Check if the model is already loaded
76
+ if model_name is not loaded_model_name:
77
  # Load and store the model in the cache
78
+ loaded_model = get_chat_loop_generator(model_name)
79
+ loaded_model_name = model_name
80
 
81
+ chat_completion = loaded_model
82
 
83
  messages = [{"role": "system", "content": system_message}]
84
 
 
140
  if __name__ == "__main__":
141
  share = os.getenv("SHARE_LINK", None) in ["1", "true", "True"]
142
  demo.launch(share=share)