|
1
|
|
|
# -*- coding: utf-8 -*- |
|
2
|
|
|
# |
|
3
|
|
|
# This file is part of Glances. |
|
4
|
|
|
# |
|
5
|
|
|
# SPDX-FileCopyrightText: 2024 Nicolas Hennion <[email protected]> |
|
6
|
|
|
# |
|
7
|
|
|
# SPDX-License-Identifier: LGPL-3.0-only |
|
8
|
|
|
# |
|
9
|
|
|
|
|
10
|
|
|
"""NVidia Extension unit for Glances' GPU plugin.""" |
|
11
|
|
|
|
|
12
|
|
|
from glances.logger import logger |
|
13
|
|
|
from glances.globals import nativestr |
|
14
|
|
|
try: |
|
15
|
|
|
import pynvml |
|
16
|
|
|
except Exception as e: |
|
17
|
|
|
import_nvidia_error_tag = True |
|
18
|
|
|
# Display debug message if import KeyError |
|
19
|
|
|
logger.warning("Missing Python Lib ({}), Nvidia GPU plugin is disabled".format(e)) |
|
20
|
|
|
else: |
|
21
|
|
|
import_nvidia_error_tag = False |
|
22
|
|
|
|
|
23
|
|
|
|
|
24
|
|
|
class NvidiaGPU: |
|
25
|
|
|
"""GPU card class.""" |
|
26
|
|
|
|
|
27
|
|
|
def __init__(self): |
|
28
|
|
|
"""Init Nvidia GPU card class.""" |
|
29
|
|
|
if import_nvidia_error_tag: |
|
30
|
|
|
self.device_handles = [] |
|
31
|
|
|
else: |
|
32
|
|
|
try: |
|
33
|
|
|
pynvml.nvmlInit() |
|
34
|
|
|
self.device_handles = get_device_handles() |
|
35
|
|
|
except Exception: |
|
36
|
|
|
logger.debug("pynvml could not be initialized.") |
|
37
|
|
|
self.device_handles = [] |
|
38
|
|
|
|
|
39
|
|
|
def exit(self): |
|
40
|
|
|
"""Close NVidia GPU class.""" |
|
41
|
|
|
if self.device_handles != []: |
|
42
|
|
|
try: |
|
43
|
|
|
pynvml.nvmlShutdown() |
|
44
|
|
|
except Exception as e: |
|
45
|
|
|
logger.debug("pynvml failed to shutdown correctly ({})".format(e)) |
|
46
|
|
|
|
|
47
|
|
|
def get_device_stats(self): |
|
48
|
|
|
"""Get Nvidia GPU stats.""" |
|
49
|
|
|
stats = [] |
|
50
|
|
|
|
|
51
|
|
|
for index, device_handle in enumerate(self.device_handles): |
|
52
|
|
|
device_stats = dict() |
|
53
|
|
|
# Dictionary key is the GPU_ID |
|
54
|
|
|
device_stats['key'] = 'gpu_id' |
|
55
|
|
|
# GPU id (for multiple GPU, start at 0) |
|
56
|
|
|
device_stats['gpu_id'] = f'nvidia{index}' |
|
57
|
|
|
# GPU name |
|
58
|
|
|
device_stats['name'] = get_device_name(device_handle) |
|
59
|
|
|
# Memory consumption in % (not available on all GPU) |
|
60
|
|
|
device_stats['mem'] = get_mem(device_handle) |
|
61
|
|
|
# Processor consumption in % |
|
62
|
|
|
device_stats['proc'] = get_proc(device_handle) |
|
63
|
|
|
# Processor temperature in °C |
|
64
|
|
|
device_stats['temperature'] = get_temperature(device_handle) |
|
65
|
|
|
# Fan speed in % |
|
66
|
|
|
device_stats['fan_speed'] = get_fan_speed(device_handle) |
|
67
|
|
|
stats.append(device_stats) |
|
68
|
|
|
|
|
69
|
|
|
return stats |
|
70
|
|
|
|
|
71
|
|
|
|
|
72
|
|
|
def get_device_handles(): |
|
73
|
|
|
"""Get a list of NVML device handles, one per device. |
|
74
|
|
|
|
|
75
|
|
|
Can throw NVMLError. |
|
76
|
|
|
""" |
|
77
|
|
|
return [pynvml.nvmlDeviceGetHandleByIndex(i) for i in range(pynvml.nvmlDeviceGetCount())] |
|
78
|
|
|
|
|
79
|
|
|
|
|
80
|
|
|
def get_device_name(device_handle): |
|
81
|
|
|
"""Get GPU device name.""" |
|
82
|
|
|
try: |
|
83
|
|
|
return nativestr(pynvml.nvmlDeviceGetName(device_handle)) |
|
84
|
|
|
except pynvml.NVMLError: |
|
85
|
|
|
return "NVIDIA" |
|
86
|
|
|
|
|
87
|
|
|
|
|
88
|
|
|
def get_mem(device_handle): |
|
89
|
|
|
"""Get GPU device memory consumption in percent.""" |
|
90
|
|
|
try: |
|
91
|
|
|
memory_info = pynvml.nvmlDeviceGetMemoryInfo(device_handle) |
|
92
|
|
|
return memory_info.used * 100.0 / memory_info.total |
|
93
|
|
|
except pynvml.NVMLError: |
|
94
|
|
|
return None |
|
95
|
|
|
|
|
96
|
|
|
|
|
97
|
|
|
def get_proc(device_handle): |
|
98
|
|
|
"""Get GPU device CPU consumption in percent.""" |
|
99
|
|
|
try: |
|
100
|
|
|
return pynvml.nvmlDeviceGetUtilizationRates(device_handle).gpu |
|
101
|
|
|
except pynvml.NVMLError: |
|
102
|
|
|
return None |
|
103
|
|
|
|
|
104
|
|
|
|
|
105
|
|
|
def get_temperature(device_handle): |
|
106
|
|
|
"""Get GPU device CPU temperature in Celsius.""" |
|
107
|
|
|
try: |
|
108
|
|
|
return pynvml.nvmlDeviceGetTemperature(device_handle, pynvml.NVML_TEMPERATURE_GPU) |
|
109
|
|
|
except pynvml.NVMLError: |
|
110
|
|
|
return None |
|
111
|
|
|
|
|
112
|
|
|
|
|
113
|
|
|
def get_fan_speed(device_handle): |
|
114
|
|
|
"""Get GPU device fan speed in percent.""" |
|
115
|
|
|
try: |
|
116
|
|
|
return pynvml.nvmlDeviceGetFanSpeed(device_handle) |
|
117
|
|
|
except pynvml.NVMLError: |
|
118
|
|
|
return None |
|
119
|
|
|
|