glances.plugins.glances_gpu   B
last analyzed

Complexity

Total Complexity 51

Size/Duplication

Total Lines 345
Duplicated Lines 0 %

Importance

Changes 0
Metric Value
eloc 179
dl 0
loc 345
rs 7.92
c 0
b 0
f 0
wmc 51

8 Methods

Rating   Name   Duplication   Size   Complexity  
A Plugin.__init__() 0 9 1
A Plugin.init_nvidia() 0 14 3
A Plugin.get_key() 0 3 1
F Plugin.msg_curse() 0 111 21
A Plugin.get_device_stats() 0 23 2
A Plugin.update_views() 0 24 5
A Plugin.exit() 0 10 3
A Plugin.update() 0 55 4

6 Functions

Rating   Name   Duplication   Size   Complexity  
A get_mem() 0 7 2
A get_temperature() 0 6 2
A get_fan_speed() 0 6 2
A get_proc() 0 6 2
A get_device_handles() 0 6 1
A get_device_name() 0 6 2

How to fix   Complexity   

Complexity

Complex classes like glances.plugins.glances_gpu often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes.

Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.

1
# -*- coding: utf-8 -*-
2
#
3
# This file is part of Glances.
4
#
5
# Copyright (C) 2020 Kirby Banman <[email protected]>
6
#
7
# SPDX-License-Identifier: LGPL-3.0-only
8
#
9
10
"""GPU plugin (limited to NVIDIA chipsets)."""
11
12
from glances.compat import nativestr, to_fahrenheit
13
from glances.logger import logger
14
from glances.plugins.glances_plugin import GlancesPlugin
15
16
# In Glances 3.1.4 or higher, we use the py3nvml lib (see issue #1523)
17
try:
18
    import py3nvml.py3nvml as pynvml
19
except Exception as e:
20
    import_error_tag = True
21
    # Display debug message if import KeyError
22
    logger.warning("Missing Python Lib ({}), Nvidia GPU plugin is disabled".format(e))
23
else:
24
    import_error_tag = False
25
26
# Define the history items list
27
# All items in this list will be historised if the --enable-history tag is set
28
items_history_list = [
29
    {'name': 'proc', 'description': 'GPU processor', 'y_unit': '%'},
30
    {'name': 'mem', 'description': 'Memory consumption', 'y_unit': '%'},
31
]
32
33
34
class Plugin(GlancesPlugin):
35
    """Glances GPU plugin (limited to NVIDIA chipsets).
36
37
    stats is a list of dictionaries with one entry per GPU
38
    """
39
40
    def __init__(self, args=None, config=None):
41
        """Init the plugin."""
42
        super(Plugin, self).__init__(args=args, config=config, stats_init_value=[])
43
44
        # Init the Nvidia API
45
        self.init_nvidia()
46
47
        # We want to display the stat in the curse interface
48
        self.display_curse = True
49
50
    def init_nvidia(self):
51
        """Init the NVIDIA API."""
52
        if import_error_tag:
53
            self.nvml_ready = False
54
55
        try:
56
            pynvml.nvmlInit()
57
            self.device_handles = get_device_handles()
58
            self.nvml_ready = True
59
        except Exception:
60
            logger.debug("pynvml could not be initialized.")
61
            self.nvml_ready = False
62
63
        return self.nvml_ready
64
65
    def get_key(self):
66
        """Return the key of the list."""
67
        return 'gpu_id'
68
69
    @GlancesPlugin._check_decorator
70
    @GlancesPlugin._log_result_decorator
71
    def update(self):
72
        """Update the GPU stats."""
73
        # Init new stats
74
        stats = self.get_init_value()
75
76
        if not self.nvml_ready:
77
            # !!!
78
            # Uncomment to test on computer without GPU
79
            # One GPU sample:
80
            # self.stats = [
81
            #     {
82
            #         "key": "gpu_id",
83
            #         "gpu_id": 0,
84
            #         "name": "Fake GeForce GTX",
85
            #         "mem": 5.792331695556641,
86
            #         "proc": 4,
87
            #         "temperature": 26,
88
            #         "fan_speed": 30
89
            #     }
90
            # ]
91
            # Two GPU sample:
92
            # self.stats = [
93
            #     {
94
            #         "key": "gpu_id",
95
            #         "gpu_id": 0,
96
            #         "name": "Fake GeForce GTX1",
97
            #         "mem": 5.792331695556641,
98
            #         "proc": 4,
99
            #         "temperature": 26,
100
            #         "fan_speed": 30
101
            #     },
102
            #     {
103
            #         "key": "gpu_id",
104
            #         "gpu_id": 1,
105
            #         "name": "Fake GeForce GTX2",
106
            #         "mem": 15,
107
            #         "proc": 8,
108
            #         "temperature": 65,
109
            #         "fan_speed": 75
110
            #     }
111
            # ]
112
            return self.stats
113
114
        if self.input_method == 'local':
115
            stats = self.get_device_stats()
116
        elif self.input_method == 'snmp':
117
            # not available
118
            pass
119
120
        # Update the stats
121
        self.stats = stats
122
123
        return self.stats
124
125
    def update_views(self):
126
        """Update stats views."""
127
        # Call the father's method
128
        super(Plugin, self).update_views()
129
130
        # Add specifics information
131
        # Alert
132
        for i in self.stats:
133
            # Init the views for the current GPU
134
            self.views[i[self.get_key()]] = {'proc': {}, 'mem': {}, 'temperature': {}}
135
            # Processor alert
136
            if 'proc' in i:
137
                alert = self.get_alert(i['proc'], header='proc')
138
                self.views[i[self.get_key()]]['proc']['decoration'] = alert
139
            # Memory alert
140
            if 'mem' in i:
141
                alert = self.get_alert(i['mem'], header='mem')
142
                self.views[i[self.get_key()]]['mem']['decoration'] = alert
143
            # Temperature alert
144
            if 'temperature' in i:
145
                alert = self.get_alert(i['temperature'], header='temperature')
146
                self.views[i[self.get_key()]]['temperature']['decoration'] = alert
147
148
        return True
149
150
    def msg_curse(self, args=None, max_width=None):
151
        """Return the dict to display in the curse interface."""
152
        # Init the return message
153
        ret = []
154
155
        # Only process if stats exist, not empty (issue #871) and plugin not disabled
156
        if not self.stats or (self.stats == []) or self.is_disabled():
157
            return ret
158
159
        # Check if all GPU have the same name
160
        same_name = all(s['name'] == self.stats[0]['name'] for s in self.stats)
161
162
        # gpu_stats contain the first GPU in the list
163
        gpu_stats = self.stats[0]
164
165
        # Header
166
        header = ''
167
        if len(self.stats) > 1:
168
            header += '{} '.format(len(self.stats))
169
        if same_name:
170
            header += '{} {}'.format('GPU', gpu_stats['name'])
171
        else:
172
            header += '{}'.format('GPU')
173
        msg = header[:17]
174
        ret.append(self.curse_add_line(msg, "TITLE"))
175
176
        # Build the string message
177
        if len(self.stats) == 1 or args.meangpu:
178
            # GPU stat summary or mono GPU
179
            # New line
180
            ret.append(self.curse_new_line())
181
            # GPU PROC
182
            try:
183
                mean_proc = sum(s['proc'] for s in self.stats if s is not None) / len(self.stats)
184
            except TypeError:
185
                mean_proc_msg = '{:>4}'.format('N/A')
186
            else:
187
                mean_proc_msg = '{:>3.0f}%'.format(mean_proc)
188
            if len(self.stats) > 1:
189
                msg = '{:13}'.format('proc mean:')
190
            else:
191
                msg = '{:13}'.format('proc:')
192
            ret.append(self.curse_add_line(msg))
193
            ret.append(
194
                self.curse_add_line(
195
                    mean_proc_msg, self.get_views(item=gpu_stats[self.get_key()], key='proc', option='decoration')
196
                )
197
            )
198
            # New line
199
            ret.append(self.curse_new_line())
200
            # GPU MEM
201
            try:
202
                mean_mem = sum(s['mem'] for s in self.stats if s is not None) / len(self.stats)
203
            except TypeError:
204
                mean_mem_msg = '{:>4}'.format('N/A')
205
            else:
206
                mean_mem_msg = '{:>3.0f}%'.format(mean_mem)
207
            if len(self.stats) > 1:
208
                msg = '{:13}'.format('mem mean:')
209
            else:
210
                msg = '{:13}'.format('mem:')
211
            ret.append(self.curse_add_line(msg))
212
            ret.append(
213
                self.curse_add_line(
214
                    mean_mem_msg, self.get_views(item=gpu_stats[self.get_key()], key='mem', option='decoration')
215
                )
216
            )
217
            # New line
218
            ret.append(self.curse_new_line())
219
            # GPU TEMPERATURE
220
            try:
221
                mean_temperature = sum(s['temperature'] for s in self.stats if s is not None) / len(self.stats)
222
            except TypeError:
223
                mean_temperature_msg = '{:>4}'.format('N/A')
224
            else:
225
                unit = 'C'
226
                if args.fahrenheit:
227
                    mean_temperature = to_fahrenheit(mean_temperature)
228
                    unit = 'F'
229
                mean_temperature_msg = '{:>3.0f}{}'.format(mean_temperature, unit)
230
            if len(self.stats) > 1:
231
                msg = '{:13}'.format('temp mean:')
232
            else:
233
                msg = '{:13}'.format('temperature:')
234
            ret.append(self.curse_add_line(msg))
235
            ret.append(
236
                self.curse_add_line(
237
                    mean_temperature_msg,
238
                    self.get_views(item=gpu_stats[self.get_key()], key='temperature', option='decoration'),
239
                )
240
            )
241
        else:
242
            # Multi GPU
243
            # Temperature is not displayed in this mode...
244
            for gpu_stats in self.stats:
245
                # New line
246
                ret.append(self.curse_new_line())
247
                # GPU ID + PROC + MEM + TEMPERATURE
248
                id_msg = '{}'.format(gpu_stats['gpu_id'])
249
                try:
250
                    proc_msg = '{:>3.0f}%'.format(gpu_stats['proc'])
251
                except (ValueError, TypeError):
252
                    proc_msg = '{:>4}'.format('N/A')
253
                try:
254
                    mem_msg = '{:>3.0f}%'.format(gpu_stats['mem'])
255
                except (ValueError, TypeError):
256
                    mem_msg = '{:>4}'.format('N/A')
257
                msg = '{}: {} mem: {}'.format(id_msg, proc_msg, mem_msg)
258
                ret.append(self.curse_add_line(msg))
259
260
        return ret
261
262
    def get_device_stats(self):
263
        """Get GPU stats."""
264
        stats = []
265
266
        for index, device_handle in enumerate(self.device_handles):
267
            device_stats = dict()
268
            # Dictionary key is the GPU_ID
269
            device_stats['key'] = self.get_key()
270
            # GPU id (for multiple GPU, start at 0)
271
            device_stats['gpu_id'] = index
272
            # GPU name
273
            device_stats['name'] = get_device_name(device_handle)
274
            # Memory consumption in % (not available on all GPU)
275
            device_stats['mem'] = get_mem(device_handle)
276
            # Processor consumption in %
277
            device_stats['proc'] = get_proc(device_handle)
278
            # Processor temperature in °C
279
            device_stats['temperature'] = get_temperature(device_handle)
280
            # Fan speed in %
281
            device_stats['fan_speed'] = get_fan_speed(device_handle)
282
            stats.append(device_stats)
283
284
        return stats
285
286
    def exit(self):
287
        """Overwrite the exit method to close the GPU API."""
288
        if self.nvml_ready:
289
            try:
290
                pynvml.nvmlShutdown()
291
            except Exception as e:
292
                logger.debug("pynvml failed to shutdown correctly ({})".format(e))
293
294
        # Call the father exit method
295
        super(Plugin, self).exit()
296
297
298
def get_device_handles():
299
    """Get a list of NVML device handles, one per device.
300
301
    Can throw NVMLError.
302
    """
303
    return [pynvml.nvmlDeviceGetHandleByIndex(i) for i in range(pynvml.nvmlDeviceGetCount())]
304
305
306
def get_device_name(device_handle):
307
    """Get GPU device name."""
308
    try:
309
        return nativestr(pynvml.nvmlDeviceGetName(device_handle))
310
    except pynvml.NVMLError:
311
        return "NVIDIA"
312
313
314
def get_mem(device_handle):
315
    """Get GPU device memory consumption in percent."""
316
    try:
317
        memory_info = pynvml.nvmlDeviceGetMemoryInfo(device_handle)
318
        return memory_info.used * 100.0 / memory_info.total
319
    except pynvml.NVMLError:
320
        return None
321
322
323
def get_proc(device_handle):
324
    """Get GPU device CPU consumption in percent."""
325
    try:
326
        return pynvml.nvmlDeviceGetUtilizationRates(device_handle).gpu
327
    except pynvml.NVMLError:
328
        return None
329
330
331
def get_temperature(device_handle):
332
    """Get GPU device CPU temperature in Celsius."""
333
    try:
334
        return pynvml.nvmlDeviceGetTemperature(device_handle, pynvml.NVML_TEMPERATURE_GPU)
335
    except pynvml.NVMLError:
336
        return None
337
338
339
def get_fan_speed(device_handle):
340
    """Get GPU device fan speed in percent."""
341
    try:
342
        return pynvml.nvmlDeviceGetFanSpeed(device_handle)
343
    except pynvml.NVMLError:
344
        return None
345