Issues (49)

glances/plugins/gpu/cards/nvidia.py (1 issue)

1
#
2
# This file is part of Glances.
3
#
4
# SPDX-FileCopyrightText: 2024 Nicolas Hennion <[email protected]>
5
#
6
# SPDX-License-Identifier: LGPL-3.0-only
7
#
8
9
"""NVidia Extension unit for Glances' GPU plugin."""
10
11
from glances.globals import nativestr
12
from glances.logger import logger
13
14
try:
15
    import pynvml
16
except Exception as e:
17
    nvidia_gpu_enable = False
18
    # Display debug message if import KeyError
19
    logger.warning(f"Missing Python Lib ({e}), Nvidia GPU plugin is disabled")
20
else:
21
    nvidia_gpu_enable = True
22
23
24
class NvidiaGPU:
25
    """GPU card class."""
26
27
    def __init__(self):
28
        """Init Nvidia GPU card class."""
29
        if not nvidia_gpu_enable:
30
            self.device_handles = []
31
        else:
32
            try:
33
                pynvml.nvmlInit()
34
                self.device_handles = get_device_list()
35
            except Exception:
36
                logger.debug("pynvml could not be initialized.")
37
                self.device_handles = []
38
39
    def exit(self):
40
        """Close NVidia GPU class."""
41
        if self.device_handles != []:
42
            try:
43
                pynvml.nvmlShutdown()
44
            except Exception as e:
45
                logger.debug(f"pynvml failed to shutdown correctly ({e})")
46
47 View Code Duplication
    def get_device_stats(self):
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
48
        """Get Nvidia GPU stats."""
49
        stats = []
50
51
        for index, device_handle in enumerate(self.device_handles):
52
            device_stats = {}
53
            # Dictionary key is the GPU_ID
54
            device_stats['key'] = 'gpu_id'
55
            # GPU id (for multiple GPU, start at 0)
56
            device_stats['gpu_id'] = f'nvidia{index}'
57
            # GPU name
58
            device_stats['name'] = get_device_name(device_handle)
59
            # Memory consumption in % (not available on all GPU)
60
            device_stats['mem'] = get_mem(device_handle)
61
            # Processor consumption in %
62
            device_stats['proc'] = get_proc(device_handle)
63
            # Processor temperature in °C
64
            device_stats['temperature'] = get_temperature(device_handle)
65
            # Fan speed in %
66
            device_stats['fan_speed'] = get_fan_speed(device_handle)
67
            stats.append(device_stats)
68
69
        return stats
70
71
72
def get_device_list():
73
    """Get a list of NVML device handles, one per device.
74
75
    Can throw NVMLError.
76
    """
77
    return [pynvml.nvmlDeviceGetHandleByIndex(i) for i in range(pynvml.nvmlDeviceGetCount())]
78
79
80
def get_device_name(device_handle):
81
    """Get GPU device name."""
82
    try:
83
        return nativestr(pynvml.nvmlDeviceGetName(device_handle))
84
    except pynvml.NVMLError:
85
        return "NVIDIA"
86
87
88
def get_mem(device_handle):
89
    """Get GPU device memory consumption in percent."""
90
    try:
91
        memory_info = pynvml.nvmlDeviceGetMemoryInfo(device_handle)
92
        return memory_info.used * 100.0 / memory_info.total
93
    except pynvml.NVMLError:
94
        return None
95
96
97
def get_proc(device_handle):
98
    """Get GPU device CPU consumption in percent."""
99
    try:
100
        return pynvml.nvmlDeviceGetUtilizationRates(device_handle).gpu
101
    except pynvml.NVMLError:
102
        return None
103
104
105
def get_temperature(device_handle):
106
    """Get GPU device CPU temperature in Celsius."""
107
    try:
108
        return pynvml.nvmlDeviceGetTemperature(device_handle, pynvml.NVML_TEMPERATURE_GPU)
109
    except pynvml.NVMLError:
110
        return None
111
112
113
def get_fan_speed(device_handle):
114
    """Get GPU device fan speed in percent."""
115
    try:
116
        return pynvml.nvmlDeviceGetFanSpeed(device_handle)
117
    except pynvml.NVMLError:
118
        return None
119