Test Failed
Push — develop ( 504450...f0e8ef )
by Nicolas
03:16
created

glances/plugins/glances_gpu.py (1 issue)

1
# -*- coding: utf-8 -*-
2
#
3
# This file is part of Glances.
4
#
5
# Copyright (C) 2018 Kirby Banman <[email protected]>
6
#
7
# Glances is free software; you can redistribute it and/or modify
8
# it under the terms of the GNU Lesser General Public License as published by
9
# the Free Software Foundation, either version 3 of the License, or
10
# (at your option) any later version.
11
#
12
# Glances is distributed in the hope that it will be useful,
13
# but WITHOUT ANY WARRANTY; without even the implied warranty of
14
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15
# GNU Lesser General Public License for more details.
16
#
17
# You should have received a copy of the GNU Lesser General Public License
18
# along with this program. If not, see <http://www.gnu.org/licenses/>.
19
20
"""GPU plugin (limited to NVIDIA chipsets)."""
21
22
from glances.compat import nativestr
23
from glances.logger import logger
24
from glances.plugins.glances_plugin import GlancesPlugin
25
26
try:
27
    import pynvml
28
except Exception as e:
29
    import_error_tag = True
30
    # Display debu message if import KeyError
31
    logger.warning("Missing Python Lib ({}), Nvidia GPU plugin is disabled".format(e))
32
else:
33
    import_error_tag = False
34
35
# Define the history items list
36
# All items in this list will be historised if the --enable-history tag is set
37
items_history_list = [{'name': 'proc',
38
                       'description': 'GPU processor',
39
                       'y_unit': '%'},
40
                      {'name': 'mem',
41
                       'description': 'Memory consumption',
42
                       'y_unit': '%'}]
43
44
45
class Plugin(GlancesPlugin):
46
    """Glances GPU plugin (limited to NVIDIA chipsets).
47
48
    stats is a list of dictionaries with one entry per GPU
49
    """
50
51
    def __init__(self, args=None):
52
        """Init the plugin."""
53
        super(Plugin, self).__init__(args=args,
54
                                     stats_init_value=[])
55
56
        # Init the NVidia API
57
        self.init_nvidia()
58
59
        # We want to display the stat in the curse interface
60
        self.display_curse = True
61
62
    def init_nvidia(self):
63
        """Init the NVIDIA API."""
64
        if import_error_tag:
65
            self.nvml_ready = False
66
67
        try:
68
            pynvml.nvmlInit()
69
            self.device_handles = get_device_handles()
70
            self.nvml_ready = True
71
        except Exception:
72
            logger.debug("pynvml could not be initialized.")
73
            self.nvml_ready = False
74
75
        return self.nvml_ready
76
77
    def get_key(self):
78
        """Return the key of the list."""
79
        return 'gpu_id'
80
81
    @GlancesPlugin._check_decorator
82
    @GlancesPlugin._log_result_decorator
83
    def update(self):
84
        """Update the GPU stats."""
85
        # Init new stats
86
        stats = self.get_init_value()
87
88
        # !!! JUST FOR TEST (because i did not have any NVidia GPU... :()
89
        # self.stats = [{"key": "gpu_id", "mem": None, "proc": 60, "gpu_id": 0, "name": "GeForce GTX 560 Ti"}]
90
        # self.stats = [{"key": "gpu_id", "mem": 10, "proc": 60, "gpu_id": 0, "name": "GeForce GTX 560 Ti"}]
91
        # self.stats = [{"key": "gpu_id", "mem": 48.64645, "proc": 60.73, "gpu_id": 0, "name": "GeForce GTX 560 Ti"},
92
        #               {"key": "gpu_id", "mem": 70.743, "proc": 80.28, "gpu_id": 1, "name": "GeForce GTX 560 Ti"},
0 ignored issues
show
This line is too long as per the coding-style (115/80).

This check looks for lines that are too long. You can specify the maximum line length.

Loading history...
93
        #               {"key": "gpu_id", "mem": 0, "proc": 0, "gpu_id": 2, "name": "GeForce GTX 560 Ti"}]
94
        # self.stats = [{"key": "gpu_id", "mem": 48.64645, "proc": 60.73, "gpu_id": 0, "name": "GeForce GTX 560 Ti"},
95
        #               {"key": "gpu_id", "mem": None, "proc": 80.28, "gpu_id": 1, "name": "GeForce GTX 560 Ti"},
96
        #               {"key": "gpu_id", "mem": 0, "proc": 0, "gpu_id": 2, "name": "ANOTHER GPU"}]
97
        # !!! TO BE COMMENTED
98
99
        if not self.nvml_ready:
100
            return self.stats
101
102
        if self.input_method == 'local':
103
            stats = self.get_device_stats()
104
        elif self.input_method == 'snmp':
105
            # not available
106
            pass
107
108
        # Update the stats
109
        self.stats = stats
110
111
        return self.stats
112
113
    def update_views(self):
114
        """Update stats views."""
115
        # Call the father's method
116
        super(Plugin, self).update_views()
117
118
        # Add specifics informations
119
        # Alert
120
        for i in self.stats:
121
            # Init the views for the current GPU
122
            self.views[i[self.get_key()]] = {'proc': {}, 'mem': {}}
123
            # Processor alert
124
            if 'proc' in i:
125
                alert = self.get_alert(i['proc'], header='proc')
126
                self.views[i[self.get_key()]]['proc']['decoration'] = alert
127
            # Memory alert
128
            if 'mem' in i:
129
                alert = self.get_alert(i['mem'], header='mem')
130
                self.views[i[self.get_key()]]['mem']['decoration'] = alert
131
132
        return True
133
134
    def msg_curse(self, args=None, max_width=None):
135
        """Return the dict to display in the curse interface."""
136
        # Init the return message
137
        ret = []
138
139
        # Only process if stats exist, not empty (issue #871) and plugin not disabled
140
        if not self.stats or (self.stats == []) or self.is_disable():
141
            return ret
142
143
        # Check if all GPU have the same name
144
        same_name = all(s['name'] == self.stats[0]['name'] for s in self.stats)
145
146
        # gpu_stats contain the first GPU in the list
147
        gpu_stats = self.stats[0]
148
149
        # Header
150
        header = ''
151
        if len(self.stats) > 1:
152
            header += '{} '.format(len(self.stats))
153
        if same_name:
154
            header += '{} {}'.format('GPU', gpu_stats['name'])
155
        else:
156
            header += '{}'.format('GPU')
157
        msg = header[:17]
158
        ret.append(self.curse_add_line(msg, "TITLE"))
159
160
        # Build the string message
161
        if len(self.stats) == 1 or args.meangpu:
162
            # GPU stat summary or mono GPU
163
            # New line
164
            ret.append(self.curse_new_line())
165
            # GPU PROC
166
            try:
167
                mean_proc = sum(s['proc'] for s in self.stats if s is not None) / len(self.stats)
168
            except TypeError:
169
                mean_proc_msg = '{:>4}'.format('N/A')
170
            else:
171
                mean_proc_msg = '{:>3.0f}%'.format(mean_proc)
172
            if len(self.stats) > 1:
173
                msg = '{:13}'.format('proc mean:')
174
            else:
175
                msg = '{:13}'.format('proc:')
176
            ret.append(self.curse_add_line(msg))
177
            ret.append(self.curse_add_line(
178
                mean_proc_msg, self.get_views(item=gpu_stats[self.get_key()],
179
                                              key='proc',
180
                                              option='decoration')))
181
            # New line
182
            ret.append(self.curse_new_line())
183
            # GPU MEM
184
            try:
185
                mean_mem = sum(s['mem'] for s in self.stats if s is not None) / len(self.stats)
186
            except TypeError:
187
                mean_mem_msg = '{:>4}'.format('N/A')
188
            else:
189
                mean_mem_msg = '{:>3.0f}%'.format(mean_mem)
190
            if len(self.stats) > 1:
191
                msg = '{:13}'.format('mem mean:')
192
            else:
193
                msg = '{:13}'.format('mem:')
194
            ret.append(self.curse_add_line(msg))
195
            ret.append(self.curse_add_line(
196
                mean_mem_msg, self.get_views(item=gpu_stats[self.get_key()],
197
                                             key='mem',
198
                                             option='decoration')))
199
        else:
200
            # Multi GPU
201
            for gpu_stats in self.stats:
202
                # New line
203
                ret.append(self.curse_new_line())
204
                # GPU ID + PROC + MEM
205
                id_msg = '{}'.format(gpu_stats['gpu_id'])
206
                try:
207
                    proc_msg = '{:>3.0f}%'.format(gpu_stats['proc'])
208
                except ValueError:
209
                    proc_msg = '{:>4}'.format('N/A')
210
                try:
211
                    mem_msg = '{:>3.0f}%'.format(gpu_stats['mem'])
212
                except ValueError:
213
                    mem_msg = '{:>4}'.format('N/A')
214
                msg = '{}: {} mem: {}'.format(id_msg, proc_msg, mem_msg)
215
                ret.append(self.curse_add_line(msg))
216
217
        return ret
218
219
    def get_device_stats(self):
220
        """Get GPU stats."""
221
        stats = []
222
223
        for index, device_handle in enumerate(self.device_handles):
224
            device_stats = {}
225
            # Dictionnary key is the GPU_ID
226
            device_stats['key'] = self.get_key()
227
            # GPU id (for multiple GPU, start at 0)
228
            device_stats['gpu_id'] = index
229
            # GPU name
230
            device_stats['name'] = get_device_name(device_handle)
231
            # Memory consumption in % (not available on all GPU)
232
            device_stats['mem'] = get_mem(device_handle)
233
            # Processor consumption in %
234
            device_stats['proc'] = get_proc(device_handle)
235
            stats.append(device_stats)
236
237
        return stats
238
239
    def exit(self):
240
        """Overwrite the exit method to close the GPU API."""
241
        if self.nvml_ready:
242
            try:
243
                pynvml.nvmlShutdown()
244
            except Exception as e:
245
                logger.debug("pynvml failed to shutdown correctly ({})".format(e))
246
247
        # Call the father exit method
248
        super(Plugin, self).exit()
249
250
251
def get_device_handles():
252
    """Get a list of NVML device handles, one per device.
253
254
    Can throw NVMLError.
255
    """
256
    return [pynvml.nvmlDeviceGetHandleByIndex(i) for i in range(pynvml.nvmlDeviceGetCount())]
257
258
259
def get_device_name(device_handle):
260
    """Get GPU device name."""
261
    try:
262
        return nativestr(pynvml.nvmlDeviceGetName(device_handle))
263
    except pynvml.NVMlError:
264
        return "NVIDIA"
265
266
267
def get_mem(device_handle):
268
    """Get GPU device memory consumption in percent."""
269
    try:
270
        memory_info = pynvml.nvmlDeviceGetMemoryInfo(device_handle)
271
        return memory_info.used * 100.0 / memory_info.total
272
    except pynvml.NVMLError:
273
        return None
274
275
276
def get_proc(device_handle):
277
    """Get GPU device CPU consumption in percent."""
278
    try:
279
        return pynvml.nvmlDeviceGetUtilizationRates(device_handle).gpu
280
    except pynvml.NVMLError:
281
        return None
282