Issues (51)

glances/plugins/gpu/cards/nvidia.py (1 issue)

1
#
2
# This file is part of Glances.
3
#
4
# SPDX-FileCopyrightText: 2024 Nicolas Hennion <[email protected]>
5
#
6
# SPDX-License-Identifier: LGPL-3.0-only
7
#
8
9
"""NVidia Extension unit for Glances' GPU plugin."""
10
11
import os
12
import sys
13
14
from glances.globals import nativestr
15
from glances.logger import logger
16
17
NVML_LIB = 'libnvidia-ml.so.1'
18
19
try:
20
    # Avoid importing pynvml if NVML_LIB is not installed
21
    from ctypes import CDLL
22
23
    if sys.platform[:3] == "win":
24
        try:
25
            CDLL(os.path.join(os.getenv("WINDIR", "C:/Windows"), "System32/nvml.dll"))
26
        except OSError:
27
            CDLL(os.path.join(os.getenv("ProgramFiles", "C:/Program Files"), "NVIDIA Corporation/NVSMI/nvml.dll"))
28
    else:
29
        CDLL(NVML_LIB)
30
    import pynvml
31
except OSError:
32
    nvidia_gpu_enable = False
33
    # NNVML_LIB lib not found (NVidia driver not installed)
34
    logger.debug(f"NVML Shared Library ({NVML_LIB}) not Found, Nvidia GPU plugin is disabled")
35
except Exception as e:
36
    nvidia_gpu_enable = False
37
    # Display warning message if import KeyError
38
    logger.debug(f"Missing Python Lib ({e}), Nvidia GPU plugin is disabled")
39
else:
40
    nvidia_gpu_enable = True
41
42
43
class NvidiaGPU:
44
    """GPU card class."""
45
46
    def __init__(self):
47
        """Init Nvidia GPU card class."""
48
        if not nvidia_gpu_enable:
49
            self.device_handles = []
50
        else:
51
            try:
52
                pynvml.nvmlInit()
53
                self.device_handles = get_device_list()
54
            except Exception:
55
                logger.debug("pynvml could not be initialized.")
56
                self.device_handles = []
57
58
    def exit(self):
59
        """Close NVidia GPU class."""
60
        if self.device_handles != []:
61
            try:
62
                pynvml.nvmlShutdown()
63
            except Exception as e:
64
                logger.debug(f"pynvml failed to shutdown correctly ({e})")
65
66 View Code Duplication
    def get_device_stats(self):
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
67
        """Get Nvidia GPU stats."""
68
        stats = []
69
70
        for index, device_handle in enumerate(self.device_handles):
71
            device_stats = {}
72
            # Dictionary key is the GPU_ID
73
            device_stats['key'] = 'gpu_id'
74
            # GPU id (for multiple GPU, start at 0)
75
            device_stats['gpu_id'] = f'nvidia{index}'
76
            # GPU name
77
            device_stats['name'] = get_device_name(device_handle)
78
            # Memory consumption in % (not available on all GPU)
79
            device_stats['mem'] = get_mem(device_handle)
80
            # Processor consumption in %
81
            device_stats['proc'] = get_proc(device_handle)
82
            # Processor temperature in °C
83
            device_stats['temperature'] = get_temperature(device_handle)
84
            # Fan speed in %
85
            device_stats['fan_speed'] = get_fan_speed(device_handle)
86
            stats.append(device_stats)
87
88
        return stats
89
90
91
def get_device_list():
92
    """Get a list of NVML device handles, one per device.
93
94
    Can throw NVMLError.
95
    """
96
    return [pynvml.nvmlDeviceGetHandleByIndex(i) for i in range(pynvml.nvmlDeviceGetCount())]
97
98
99
def get_device_name(device_handle):
100
    """Get GPU device name."""
101
    try:
102
        return nativestr(pynvml.nvmlDeviceGetName(device_handle))
103
    except pynvml.NVMLError:
104
        return "NVIDIA"
105
106
107
def get_mem(device_handle):
108
    """Get GPU device memory consumption in percent."""
109
    try:
110
        memory_info = pynvml.nvmlDeviceGetMemoryInfo(device_handle)
111
        return memory_info.used * 100.0 / memory_info.total
112
    except pynvml.NVMLError:
113
        return None
114
115
116
def get_proc(device_handle):
117
    """Get GPU device CPU consumption in percent."""
118
    try:
119
        return pynvml.nvmlDeviceGetUtilizationRates(device_handle).gpu
120
    except pynvml.NVMLError:
121
        return None
122
123
124
def get_temperature(device_handle):
125
    """Get GPU device CPU temperature in Celsius."""
126
    try:
127
        return pynvml.nvmlDeviceGetTemperature(device_handle, pynvml.NVML_TEMPERATURE_GPU)
128
    except pynvml.NVMLError:
129
        return None
130
131
132
def get_fan_speed(device_handle):
133
    """Get GPU device fan speed in percent."""
134
    try:
135
        return pynvml.nvmlDeviceGetFanSpeed(device_handle)
136
    except pynvml.NVMLError:
137
        return None
138