1 | # -*- coding: utf-8 -*- |
||
2 | # |
||
3 | # This file is part of Glances. |
||
4 | # |
||
5 | # Copyright (C) 2018 Kirby Banman <[email protected]> |
||
6 | # |
||
7 | # Glances is free software; you can redistribute it and/or modify |
||
8 | # it under the terms of the GNU Lesser General Public License as published by |
||
9 | # the Free Software Foundation, either version 3 of the License, or |
||
10 | # (at your option) any later version. |
||
11 | # |
||
12 | # Glances is distributed in the hope that it will be useful, |
||
13 | # but WITHOUT ANY WARRANTY; without even the implied warranty of |
||
14 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
||
15 | # GNU Lesser General Public License for more details. |
||
16 | # |
||
17 | # You should have received a copy of the GNU Lesser General Public License |
||
18 | # along with this program. If not, see <http://www.gnu.org/licenses/>. |
||
19 | |||
20 | """GPU plugin (limited to NVIDIA chipsets).""" |
||
21 | |||
22 | from glances.compat import nativestr |
||
0 ignored issues
–
show
introduced
by
Loading history...
|
|||
23 | from glances.logger import logger |
||
0 ignored issues
–
show
|
|||
24 | from glances.plugins.glances_plugin import GlancesPlugin |
||
0 ignored issues
–
show
|
|||
25 | |||
26 | try: |
||
27 | import pynvml |
||
0 ignored issues
–
show
|
|||
28 | except Exception as e: |
||
0 ignored issues
–
show
Catching very general exceptions such as
Exception is usually not recommended.
Generally, you would want to handle very specific errors in the exception handler. This ensure that you do not hide other types of errors which should be fixed. So, unless you specifically plan to handle any error, consider adding a more specific exception.
Loading history...
The name
e does not conform to the variable naming conventions ((([a-z][a-z0-9_]{2,30})|(_[a-z0-9_]*))$ ).
This check looks for invalid names for a range of different identifiers. You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements. If your project includes a Pylint configuration file, the settings contained in that file take precedence. To find out more about Pylint, please refer to their site.
Loading history...
|
|||
29 | import_error_tag = True |
||
0 ignored issues
–
show
The name
import_error_tag does not conform to the constant naming conventions ((([A-Z_][A-Z0-9_]*)|(__.*__))$ ).
This check looks for invalid names for a range of different identifiers. You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements. If your project includes a Pylint configuration file, the settings contained in that file take precedence. To find out more about Pylint, please refer to their site.
Loading history...
|
|||
30 | # Display debu message if import KeyError |
||
31 | logger.warning("Missing Python Lib ({}), Nvidia GPU plugin is disabled".format(e)) |
||
0 ignored issues
–
show
|
|||
32 | else: |
||
33 | import_error_tag = False |
||
0 ignored issues
–
show
The name
import_error_tag does not conform to the constant naming conventions ((([A-Z_][A-Z0-9_]*)|(__.*__))$ ).
This check looks for invalid names for a range of different identifiers. You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements. If your project includes a Pylint configuration file, the settings contained in that file take precedence. To find out more about Pylint, please refer to their site.
Loading history...
|
|||
34 | |||
35 | # Define the history items list |
||
36 | # All items in this list will be historised if the --enable-history tag is set |
||
37 | items_history_list = [{'name': 'proc', |
||
0 ignored issues
–
show
The name
items_history_list does not conform to the constant naming conventions ((([A-Z_][A-Z0-9_]*)|(__.*__))$ ).
This check looks for invalid names for a range of different identifiers. You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements. If your project includes a Pylint configuration file, the settings contained in that file take precedence. To find out more about Pylint, please refer to their site.
Loading history...
|
|||
38 | 'description': 'GPU processor', |
||
39 | 'y_unit': '%'}, |
||
40 | {'name': 'mem', |
||
41 | 'description': 'Memory consumption', |
||
42 | 'y_unit': '%'}] |
||
43 | |||
44 | |||
45 | class Plugin(GlancesPlugin): |
||
46 | """Glances GPU plugin (limited to NVIDIA chipsets). |
||
47 | |||
48 | stats is a list of dictionaries with one entry per GPU |
||
49 | """ |
||
50 | |||
51 | def __init__(self, args=None, config=None): |
||
52 | """Init the plugin.""" |
||
53 | super(Plugin, self).__init__(args=args, |
||
54 | config=config, |
||
55 | stats_init_value=[]) |
||
56 | |||
57 | # Init the NVidia API |
||
58 | self.init_nvidia() |
||
59 | |||
60 | # We want to display the stat in the curse interface |
||
61 | self.display_curse = True |
||
62 | |||
63 | def init_nvidia(self): |
||
64 | """Init the NVIDIA API.""" |
||
65 | if import_error_tag: |
||
66 | self.nvml_ready = False |
||
67 | |||
68 | try: |
||
69 | pynvml.nvmlInit() |
||
70 | self.device_handles = get_device_handles() |
||
71 | self.nvml_ready = True |
||
72 | except Exception: |
||
0 ignored issues
–
show
Catching very general exceptions such as
Exception is usually not recommended.
Generally, you would want to handle very specific errors in the exception handler. This ensure that you do not hide other types of errors which should be fixed. So, unless you specifically plan to handle any error, consider adding a more specific exception.
Loading history...
|
|||
73 | logger.debug("pynvml could not be initialized.") |
||
74 | self.nvml_ready = False |
||
75 | |||
76 | return self.nvml_ready |
||
77 | |||
78 | def get_key(self): |
||
79 | """Return the key of the list.""" |
||
80 | return 'gpu_id' |
||
81 | |||
82 | @GlancesPlugin._check_decorator |
||
83 | @GlancesPlugin._log_result_decorator |
||
84 | def update(self): |
||
85 | """Update the GPU stats.""" |
||
86 | # Init new stats |
||
87 | stats = self.get_init_value() |
||
88 | |||
89 | # !!! JUST FOR TEST (because i did not have any NVidia GPU... :() |
||
90 | # self.stats = [{"key": "gpu_id", "mem": None, "proc": 60, "gpu_id": 0, "name": "GeForce GTX 560 Ti"}] |
||
0 ignored issues
–
show
|
|||
91 | # self.stats = [{"key": "gpu_id", "mem": 10, "proc": 60, "gpu_id": 0, "name": "GeForce GTX 560 Ti"}] |
||
0 ignored issues
–
show
|
|||
92 | # self.stats = [{"key": "gpu_id", "mem": 48.64645, "proc": 60.73, "gpu_id": 0, "name": "GeForce GTX 560 Ti"}, |
||
0 ignored issues
–
show
|
|||
93 | # {"key": "gpu_id", "mem": 70.743, "proc": 80.28, "gpu_id": 1, "name": "GeForce GTX 560 Ti"}, |
||
0 ignored issues
–
show
|
|||
94 | # {"key": "gpu_id", "mem": 0, "proc": 0, "gpu_id": 2, "name": "GeForce GTX 560 Ti"}] |
||
0 ignored issues
–
show
|
|||
95 | # self.stats = [{"key": "gpu_id", "mem": 48.64645, "proc": 60.73, "gpu_id": 0, "name": "GeForce GTX 560 Ti"}, |
||
0 ignored issues
–
show
|
|||
96 | # {"key": "gpu_id", "mem": None, "proc": 80.28, "gpu_id": 1, "name": "GeForce GTX 560 Ti"}, |
||
0 ignored issues
–
show
|
|||
97 | # {"key": "gpu_id", "mem": 0, "proc": 0, "gpu_id": 2, "name": "ANOTHER GPU"}] |
||
0 ignored issues
–
show
|
|||
98 | # !!! TO BE COMMENTED |
||
99 | |||
100 | if not self.nvml_ready: |
||
101 | return self.stats |
||
102 | |||
103 | if self.input_method == 'local': |
||
104 | stats = self.get_device_stats() |
||
105 | elif self.input_method == 'snmp': |
||
106 | # not available |
||
107 | pass |
||
108 | |||
109 | # Update the stats |
||
110 | self.stats = stats |
||
111 | |||
112 | return self.stats |
||
113 | |||
114 | def update_views(self): |
||
115 | """Update stats views.""" |
||
116 | # Call the father's method |
||
117 | super(Plugin, self).update_views() |
||
118 | |||
119 | # Add specifics informations |
||
120 | # Alert |
||
121 | for i in self.stats: |
||
122 | # Init the views for the current GPU |
||
123 | self.views[i[self.get_key()]] = {'proc': {}, 'mem': {}} |
||
124 | # Processor alert |
||
125 | if 'proc' in i: |
||
126 | alert = self.get_alert(i['proc'], header='proc') |
||
127 | self.views[i[self.get_key()]]['proc']['decoration'] = alert |
||
128 | # Memory alert |
||
129 | if 'mem' in i: |
||
130 | alert = self.get_alert(i['mem'], header='mem') |
||
131 | self.views[i[self.get_key()]]['mem']['decoration'] = alert |
||
132 | |||
133 | return True |
||
134 | |||
135 | def msg_curse(self, args=None, max_width=None): |
||
136 | """Return the dict to display in the curse interface.""" |
||
137 | # Init the return message |
||
138 | ret = [] |
||
139 | |||
140 | # Only process if stats exist, not empty (issue #871) and plugin not disabled |
||
0 ignored issues
–
show
|
|||
141 | if not self.stats or (self.stats == []) or self.is_disable(): |
||
142 | return ret |
||
143 | |||
144 | # Check if all GPU have the same name |
||
145 | same_name = all(s['name'] == self.stats[0]['name'] for s in self.stats) |
||
146 | |||
147 | # gpu_stats contain the first GPU in the list |
||
148 | gpu_stats = self.stats[0] |
||
149 | |||
150 | # Header |
||
151 | header = '' |
||
152 | if len(self.stats) > 1: |
||
153 | header += '{} '.format(len(self.stats)) |
||
154 | if same_name: |
||
155 | header += '{} {}'.format('GPU', gpu_stats['name']) |
||
156 | else: |
||
157 | header += '{}'.format('GPU') |
||
158 | msg = header[:17] |
||
159 | ret.append(self.curse_add_line(msg, "TITLE")) |
||
160 | |||
161 | # Build the string message |
||
162 | if len(self.stats) == 1 or args.meangpu: |
||
163 | # GPU stat summary or mono GPU |
||
164 | # New line |
||
165 | ret.append(self.curse_new_line()) |
||
166 | # GPU PROC |
||
167 | try: |
||
168 | mean_proc = sum(s['proc'] for s in self.stats if s is not None) / len(self.stats) |
||
0 ignored issues
–
show
|
|||
169 | except TypeError: |
||
170 | mean_proc_msg = '{:>4}'.format('N/A') |
||
171 | else: |
||
172 | mean_proc_msg = '{:>3.0f}%'.format(mean_proc) |
||
173 | if len(self.stats) > 1: |
||
174 | msg = '{:13}'.format('proc mean:') |
||
175 | else: |
||
176 | msg = '{:13}'.format('proc:') |
||
177 | ret.append(self.curse_add_line(msg)) |
||
178 | ret.append(self.curse_add_line( |
||
179 | mean_proc_msg, self.get_views(item=gpu_stats[self.get_key()], |
||
180 | key='proc', |
||
181 | option='decoration'))) |
||
182 | # New line |
||
183 | ret.append(self.curse_new_line()) |
||
184 | # GPU MEM |
||
185 | try: |
||
186 | mean_mem = sum(s['mem'] for s in self.stats if s is not None) / len(self.stats) |
||
0 ignored issues
–
show
|
|||
187 | except TypeError: |
||
188 | mean_mem_msg = '{:>4}'.format('N/A') |
||
189 | else: |
||
190 | mean_mem_msg = '{:>3.0f}%'.format(mean_mem) |
||
191 | if len(self.stats) > 1: |
||
192 | msg = '{:13}'.format('mem mean:') |
||
193 | else: |
||
194 | msg = '{:13}'.format('mem:') |
||
195 | ret.append(self.curse_add_line(msg)) |
||
196 | ret.append(self.curse_add_line( |
||
197 | mean_mem_msg, self.get_views(item=gpu_stats[self.get_key()], |
||
198 | key='mem', |
||
199 | option='decoration'))) |
||
200 | else: |
||
201 | # Multi GPU |
||
202 | for gpu_stats in self.stats: |
||
203 | # New line |
||
204 | ret.append(self.curse_new_line()) |
||
205 | # GPU ID + PROC + MEM |
||
206 | id_msg = '{}'.format(gpu_stats['gpu_id']) |
||
207 | try: |
||
208 | proc_msg = '{:>3.0f}%'.format(gpu_stats['proc']) |
||
209 | except ValueError: |
||
210 | proc_msg = '{:>4}'.format('N/A') |
||
211 | try: |
||
212 | mem_msg = '{:>3.0f}%'.format(gpu_stats['mem']) |
||
213 | except ValueError: |
||
214 | mem_msg = '{:>4}'.format('N/A') |
||
215 | msg = '{}: {} mem: {}'.format(id_msg, proc_msg, mem_msg) |
||
216 | ret.append(self.curse_add_line(msg)) |
||
217 | |||
218 | return ret |
||
219 | |||
220 | def get_device_stats(self): |
||
221 | """Get GPU stats.""" |
||
222 | stats = [] |
||
223 | |||
224 | for index, device_handle in enumerate(self.device_handles): |
||
225 | device_stats = {} |
||
226 | # Dictionnary key is the GPU_ID |
||
227 | device_stats['key'] = self.get_key() |
||
228 | # GPU id (for multiple GPU, start at 0) |
||
229 | device_stats['gpu_id'] = index |
||
230 | # GPU name |
||
231 | device_stats['name'] = get_device_name(device_handle) |
||
232 | # Memory consumption in % (not available on all GPU) |
||
233 | device_stats['mem'] = get_mem(device_handle) |
||
234 | # Processor consumption in % |
||
235 | device_stats['proc'] = get_proc(device_handle) |
||
236 | stats.append(device_stats) |
||
237 | |||
238 | return stats |
||
239 | |||
240 | def exit(self): |
||
241 | """Overwrite the exit method to close the GPU API.""" |
||
242 | if self.nvml_ready: |
||
243 | try: |
||
244 | pynvml.nvmlShutdown() |
||
245 | except Exception as e: |
||
0 ignored issues
–
show
Catching very general exceptions such as
Exception is usually not recommended.
Generally, you would want to handle very specific errors in the exception handler. This ensure that you do not hide other types of errors which should be fixed. So, unless you specifically plan to handle any error, consider adding a more specific exception.
Loading history...
The name
e does not conform to the variable naming conventions ((([a-z][a-z0-9_]{2,30})|(_[a-z0-9_]*))$ ).
This check looks for invalid names for a range of different identifiers. You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements. If your project includes a Pylint configuration file, the settings contained in that file take precedence. To find out more about Pylint, please refer to their site.
Loading history...
|
|||
246 | logger.debug("pynvml failed to shutdown correctly ({})".format(e)) |
||
0 ignored issues
–
show
|
|||
247 | |||
248 | # Call the father exit method |
||
249 | super(Plugin, self).exit() |
||
250 | |||
251 | |||
252 | def get_device_handles(): |
||
253 | """Get a list of NVML device handles, one per device. |
||
254 | |||
255 | Can throw NVMLError. |
||
256 | """ |
||
257 | return [pynvml.nvmlDeviceGetHandleByIndex(i) for i in range(pynvml.nvmlDeviceGetCount())] |
||
0 ignored issues
–
show
|
|||
258 | |||
259 | |||
260 | def get_device_name(device_handle): |
||
261 | """Get GPU device name.""" |
||
262 | try: |
||
263 | return nativestr(pynvml.nvmlDeviceGetName(device_handle)) |
||
264 | except pynvml.NVMlError: |
||
265 | return "NVIDIA" |
||
266 | |||
267 | |||
268 | def get_mem(device_handle): |
||
269 | """Get GPU device memory consumption in percent.""" |
||
270 | try: |
||
271 | memory_info = pynvml.nvmlDeviceGetMemoryInfo(device_handle) |
||
272 | return memory_info.used * 100.0 / memory_info.total |
||
0 ignored issues
–
show
|
|||
273 | except pynvml.NVMLError: |
||
274 | return None |
||
275 | |||
276 | |||
277 | def get_proc(device_handle): |
||
278 | """Get GPU device CPU consumption in percent.""" |
||
279 | try: |
||
280 | return pynvml.nvmlDeviceGetUtilizationRates(device_handle).gpu |
||
281 | except pynvml.NVMLError: |
||
282 | return None |
||
283 |