1 | # -*- coding: utf-8 -*- |
||
2 | # |
||
3 | # This file is part of Glances. |
||
4 | # |
||
5 | # Copyright (C) 2018 Kirby Banman <[email protected]> |
||
6 | # |
||
7 | # Glances is free software; you can redistribute it and/or modify |
||
8 | # it under the terms of the GNU Lesser General Public License as published by |
||
9 | # the Free Software Foundation, either version 3 of the License, or |
||
10 | # (at your option) any later version. |
||
11 | # |
||
12 | # Glances is distributed in the hope that it will be useful, |
||
13 | # but WITHOUT ANY WARRANTY; without even the implied warranty of |
||
14 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
||
15 | # GNU Lesser General Public License for more details. |
||
16 | # |
||
17 | # You should have received a copy of the GNU Lesser General Public License |
||
18 | # along with this program. If not, see <http://www.gnu.org/licenses/>. |
||
19 | |||
20 | """GPU plugin (limited to NVIDIA chipsets).""" |
||
21 | |||
22 | from glances.compat import nativestr |
||
23 | from glances.logger import logger |
||
24 | from glances.plugins.glances_plugin import GlancesPlugin |
||
25 | |||
26 | try: |
||
27 | import pynvml |
||
28 | except Exception as e: |
||
29 | import_error_tag = True |
||
30 | # Display debu message if import KeyError |
||
31 | logger.warning("Missing Python Lib ({}), Nvidia GPU plugin is disabled".format(e)) |
||
32 | else: |
||
33 | import_error_tag = False |
||
34 | |||
35 | # Define the history items list |
||
36 | # All items in this list will be historised if the --enable-history tag is set |
||
37 | items_history_list = [{'name': 'proc', |
||
38 | 'description': 'GPU processor', |
||
39 | 'y_unit': '%'}, |
||
40 | {'name': 'mem', |
||
41 | 'description': 'Memory consumption', |
||
42 | 'y_unit': '%'}] |
||
43 | |||
44 | |||
45 | class Plugin(GlancesPlugin): |
||
46 | """Glances GPU plugin (limited to NVIDIA chipsets). |
||
47 | |||
48 | stats is a list of dictionaries with one entry per GPU |
||
49 | """ |
||
50 | |||
51 | def __init__(self, args=None): |
||
52 | """Init the plugin.""" |
||
53 | super(Plugin, self).__init__(args=args, |
||
54 | stats_init_value=[]) |
||
55 | |||
56 | # Init the NVidia API |
||
57 | self.init_nvidia() |
||
58 | |||
59 | # We want to display the stat in the curse interface |
||
60 | self.display_curse = True |
||
61 | |||
62 | def init_nvidia(self): |
||
63 | """Init the NVIDIA API.""" |
||
64 | if import_error_tag: |
||
65 | self.nvml_ready = False |
||
66 | |||
67 | try: |
||
68 | pynvml.nvmlInit() |
||
69 | self.device_handles = get_device_handles() |
||
70 | self.nvml_ready = True |
||
71 | except Exception: |
||
72 | logger.debug("pynvml could not be initialized.") |
||
73 | self.nvml_ready = False |
||
74 | |||
75 | return self.nvml_ready |
||
76 | |||
77 | def get_key(self): |
||
78 | """Return the key of the list.""" |
||
79 | return 'gpu_id' |
||
80 | |||
81 | @GlancesPlugin._check_decorator |
||
82 | @GlancesPlugin._log_result_decorator |
||
83 | def update(self): |
||
84 | """Update the GPU stats.""" |
||
85 | # Init new stats |
||
86 | stats = self.get_init_value() |
||
87 | |||
88 | # !!! JUST FOR TEST (because i did not have any NVidia GPU... :() |
||
89 | # self.stats = [{"key": "gpu_id", "mem": None, "proc": 60, "gpu_id": 0, "name": "GeForce GTX 560 Ti"}] |
||
90 | # self.stats = [{"key": "gpu_id", "mem": 10, "proc": 60, "gpu_id": 0, "name": "GeForce GTX 560 Ti"}] |
||
91 | # self.stats = [{"key": "gpu_id", "mem": 48.64645, "proc": 60.73, "gpu_id": 0, "name": "GeForce GTX 560 Ti"}, |
||
92 | # {"key": "gpu_id", "mem": 70.743, "proc": 80.28, "gpu_id": 1, "name": "GeForce GTX 560 Ti"}, |
||
0 ignored issues
–
show
|
|||
93 | # {"key": "gpu_id", "mem": 0, "proc": 0, "gpu_id": 2, "name": "GeForce GTX 560 Ti"}] |
||
94 | # self.stats = [{"key": "gpu_id", "mem": 48.64645, "proc": 60.73, "gpu_id": 0, "name": "GeForce GTX 560 Ti"}, |
||
95 | # {"key": "gpu_id", "mem": None, "proc": 80.28, "gpu_id": 1, "name": "GeForce GTX 560 Ti"}, |
||
96 | # {"key": "gpu_id", "mem": 0, "proc": 0, "gpu_id": 2, "name": "ANOTHER GPU"}] |
||
97 | # !!! TO BE COMMENTED |
||
98 | |||
99 | if not self.nvml_ready: |
||
100 | return self.stats |
||
101 | |||
102 | if self.input_method == 'local': |
||
103 | stats = self.get_device_stats() |
||
104 | elif self.input_method == 'snmp': |
||
105 | # not available |
||
106 | pass |
||
107 | |||
108 | # Update the stats |
||
109 | self.stats = stats |
||
110 | |||
111 | return self.stats |
||
112 | |||
113 | def update_views(self): |
||
114 | """Update stats views.""" |
||
115 | # Call the father's method |
||
116 | super(Plugin, self).update_views() |
||
117 | |||
118 | # Add specifics informations |
||
119 | # Alert |
||
120 | for i in self.stats: |
||
121 | # Init the views for the current GPU |
||
122 | self.views[i[self.get_key()]] = {'proc': {}, 'mem': {}} |
||
123 | # Processor alert |
||
124 | if 'proc' in i: |
||
125 | alert = self.get_alert(i['proc'], header='proc') |
||
126 | self.views[i[self.get_key()]]['proc']['decoration'] = alert |
||
127 | # Memory alert |
||
128 | if 'mem' in i: |
||
129 | alert = self.get_alert(i['mem'], header='mem') |
||
130 | self.views[i[self.get_key()]]['mem']['decoration'] = alert |
||
131 | |||
132 | return True |
||
133 | |||
134 | def msg_curse(self, args=None, max_width=None): |
||
135 | """Return the dict to display in the curse interface.""" |
||
136 | # Init the return message |
||
137 | ret = [] |
||
138 | |||
139 | # Only process if stats exist, not empty (issue #871) and plugin not disabled |
||
140 | if not self.stats or (self.stats == []) or self.is_disable(): |
||
141 | return ret |
||
142 | |||
143 | # Check if all GPU have the same name |
||
144 | same_name = all(s['name'] == self.stats[0]['name'] for s in self.stats) |
||
145 | |||
146 | # gpu_stats contain the first GPU in the list |
||
147 | gpu_stats = self.stats[0] |
||
148 | |||
149 | # Header |
||
150 | header = '' |
||
151 | if len(self.stats) > 1: |
||
152 | header += '{} '.format(len(self.stats)) |
||
153 | if same_name: |
||
154 | header += '{} {}'.format('GPU', gpu_stats['name']) |
||
155 | else: |
||
156 | header += '{}'.format('GPU') |
||
157 | msg = header[:17] |
||
158 | ret.append(self.curse_add_line(msg, "TITLE")) |
||
159 | |||
160 | # Build the string message |
||
161 | if len(self.stats) == 1 or args.meangpu: |
||
162 | # GPU stat summary or mono GPU |
||
163 | # New line |
||
164 | ret.append(self.curse_new_line()) |
||
165 | # GPU PROC |
||
166 | try: |
||
167 | mean_proc = sum(s['proc'] for s in self.stats if s is not None) / len(self.stats) |
||
168 | except TypeError: |
||
169 | mean_proc_msg = '{:>4}'.format('N/A') |
||
170 | else: |
||
171 | mean_proc_msg = '{:>3.0f}%'.format(mean_proc) |
||
172 | if len(self.stats) > 1: |
||
173 | msg = '{:13}'.format('proc mean:') |
||
174 | else: |
||
175 | msg = '{:13}'.format('proc:') |
||
176 | ret.append(self.curse_add_line(msg)) |
||
177 | ret.append(self.curse_add_line( |
||
178 | mean_proc_msg, self.get_views(item=gpu_stats[self.get_key()], |
||
179 | key='proc', |
||
180 | option='decoration'))) |
||
181 | # New line |
||
182 | ret.append(self.curse_new_line()) |
||
183 | # GPU MEM |
||
184 | try: |
||
185 | mean_mem = sum(s['mem'] for s in self.stats if s is not None) / len(self.stats) |
||
186 | except TypeError: |
||
187 | mean_mem_msg = '{:>4}'.format('N/A') |
||
188 | else: |
||
189 | mean_mem_msg = '{:>3.0f}%'.format(mean_mem) |
||
190 | if len(self.stats) > 1: |
||
191 | msg = '{:13}'.format('mem mean:') |
||
192 | else: |
||
193 | msg = '{:13}'.format('mem:') |
||
194 | ret.append(self.curse_add_line(msg)) |
||
195 | ret.append(self.curse_add_line( |
||
196 | mean_mem_msg, self.get_views(item=gpu_stats[self.get_key()], |
||
197 | key='mem', |
||
198 | option='decoration'))) |
||
199 | else: |
||
200 | # Multi GPU |
||
201 | for gpu_stats in self.stats: |
||
202 | # New line |
||
203 | ret.append(self.curse_new_line()) |
||
204 | # GPU ID + PROC + MEM |
||
205 | id_msg = '{}'.format(gpu_stats['gpu_id']) |
||
206 | try: |
||
207 | proc_msg = '{:>3.0f}%'.format(gpu_stats['proc']) |
||
208 | except ValueError: |
||
209 | proc_msg = '{:>4}'.format('N/A') |
||
210 | try: |
||
211 | mem_msg = '{:>3.0f}%'.format(gpu_stats['mem']) |
||
212 | except ValueError: |
||
213 | mem_msg = '{:>4}'.format('N/A') |
||
214 | msg = '{}: {} mem: {}'.format(id_msg, proc_msg, mem_msg) |
||
215 | ret.append(self.curse_add_line(msg)) |
||
216 | |||
217 | return ret |
||
218 | |||
219 | def get_device_stats(self): |
||
220 | """Get GPU stats.""" |
||
221 | stats = [] |
||
222 | |||
223 | for index, device_handle in enumerate(self.device_handles): |
||
224 | device_stats = {} |
||
225 | # Dictionnary key is the GPU_ID |
||
226 | device_stats['key'] = self.get_key() |
||
227 | # GPU id (for multiple GPU, start at 0) |
||
228 | device_stats['gpu_id'] = index |
||
229 | # GPU name |
||
230 | device_stats['name'] = get_device_name(device_handle) |
||
231 | # Memory consumption in % (not available on all GPU) |
||
232 | device_stats['mem'] = get_mem(device_handle) |
||
233 | # Processor consumption in % |
||
234 | device_stats['proc'] = get_proc(device_handle) |
||
235 | stats.append(device_stats) |
||
236 | |||
237 | return stats |
||
238 | |||
239 | def exit(self): |
||
240 | """Overwrite the exit method to close the GPU API.""" |
||
241 | if self.nvml_ready: |
||
242 | try: |
||
243 | pynvml.nvmlShutdown() |
||
244 | except Exception as e: |
||
245 | logger.debug("pynvml failed to shutdown correctly ({})".format(e)) |
||
246 | |||
247 | # Call the father exit method |
||
248 | super(Plugin, self).exit() |
||
249 | |||
250 | |||
251 | def get_device_handles(): |
||
252 | """Get a list of NVML device handles, one per device. |
||
253 | |||
254 | Can throw NVMLError. |
||
255 | """ |
||
256 | return [pynvml.nvmlDeviceGetHandleByIndex(i) for i in range(pynvml.nvmlDeviceGetCount())] |
||
257 | |||
258 | |||
259 | def get_device_name(device_handle): |
||
260 | """Get GPU device name.""" |
||
261 | try: |
||
262 | return nativestr(pynvml.nvmlDeviceGetName(device_handle)) |
||
263 | except pynvml.NVMlError: |
||
264 | return "NVIDIA" |
||
265 | |||
266 | |||
267 | def get_mem(device_handle): |
||
268 | """Get GPU device memory consumption in percent.""" |
||
269 | try: |
||
270 | memory_info = pynvml.nvmlDeviceGetMemoryInfo(device_handle) |
||
271 | return memory_info.used * 100.0 / memory_info.total |
||
272 | except pynvml.NVMLError: |
||
273 | return None |
||
274 | |||
275 | |||
276 | def get_proc(device_handle): |
||
277 | """Get GPU device CPU consumption in percent.""" |
||
278 | try: |
||
279 | return pynvml.nvmlDeviceGetUtilizationRates(device_handle).gpu |
||
280 | except pynvml.NVMLError: |
||
281 | return None |
||
282 |
This check looks for lines that are too long. You can specify the maximum line length.