Proc.proc_net_snmp() - Code Metrics - Inspection of "added ~sane defauls to proc reader, made log forma..." - s4z/plumd - Measure and Improve Code Quality continuously with Scrutinizer

Completed

Push — master ( fa474b...faefc5 )

by Kenny

created 2016-08-12 23:39 UTC

Proc.proc_net_snmp() F

↳ Parent: Proc

Complexity

Conditions

Size

Total Lines

Duplication

Lines	0
Ratio	0 %

Importance

Changes	3
Bugs	2	Features	1

Metric	Value
cc	10
c	3
b	2
f	1
dl	0
loc	49
rs	3.7894

How to fix Complexity

# -*- coding: utf-8 -*-

__author__ = 'Kenny Freeman'
__email__ = '[email protected]'
__license__ = "ISCL"
__docformat__ = 'reStructuredText'

import re
import os
import time
import os.path
import traceback
import multiprocessing
from collections import deque

import plumd
import plumd.plugins
from plumd.calc import Differential
from plumd.util import get_file_map, get_file_map_list, get_file_list, get_file


## todo: switch from list with pop(0) to deque
class Proc(plumd.plugins.Reader):
    """Plugin to measure various kernel metrics from /proc."""
    defaults = {
        'poll.interval': 10,
        'proc_path': '/proc',
        'skip_proc_stat': ['btime'],
        'skip_proc_meminfo': ["Active(anon)","Active(file)",
            "AnonHugePages","AnonPages","Bounce","DirectMap2M","CommitLimit",
            "DirectMap4k","HugePages_Free","Hugepagesize",
            "HugePages_Rsvd","HugePages_Surp","HugePages_Total",
            "Inactive","Inactive(anon)","Inactive(file)",
            "KernelStack","NFS_Unstable","PageTables",
            "Shmem","Slab","SReclaimable","SUnreclaim",
            "SwapCached","SwapFree","SwapTotal","Writeback","WritebackTmp"],
        'skip_proc_net_snmp': ["Icmp.InAddrMaskReps","Icmp.InAddrMasks",
            "Icmp.InTimestampReps","Icmp.InTimestamps","Icmp.OutAddrMaskReps",
            "Icmp.OutAddrMasks","Icmp.OutTimestampReps","Icmp.OutTimestamps",
            "Ip.DefaultTTL","Ip.Forwarding","Tcp.MaxConn","Tcp.RtoAlgorithm",
            "Tcp.RtoMax","Tcp.RtoMin"],
        'skip_proc_net_sockstat': [],
        'skip_proc_net_netstat': ["TcpExt.ArpFilter","TcpExt.BusyPollRxPackets",
            "TcpExt.DelayedACKLocked","TcpExt.DelayedACKLost",
            "TcpExt.DelayedACKs","TcpExt.EmbryonicRsts",
            "TcpExt.IPReversePathFilter","TcpExt.LockDroppedIcmps",
            "TcpExt.OfoPruned","TcpExt.OutOfWindowIcmps",
            "TcpExt.PAWSActive","TcpExt.PAWSEstab", "TcpExt.PAWSPassive",
            "TcpExt.PruneCalled","TcpExt.RcvPruned",
            "TcpExt.TCPACKSkippedChallenge","TcpExt.TCPACKSkippedFinWait2",
            "TcpExt.TCPACKSkippedPAWS","TcpExt.TCPACKSkippedSeq",
            "TcpExt.TCPACKSkippedSynRecv","TcpExt.TCPACKSkippedTimeWait",
            "TcpExt.TCPAutoCorking","TcpExt.TCPChallengeACK",
            "TcpExt.TCPDirectCopyFromBacklog","TcpExt.TCPDirectCopyFromPrequeue",
            "TcpExt.TCPDSACKIgnoredNoUndo","TcpExt.TCPDSACKIgnoredOld",
            "TcpExt.TCPDSACKOfoRecv","TcpExt.TCPDSACKOfoSent",
            "TcpExt.TCPDSACKOldSent","TcpExt.TCPDSACKRecv","TcpExt.TCPDSACKUndo",
            "TcpExt.TCPFACKReorder","TcpExt.TCPFromZeroWindowAdv",
            "TcpExt.TCPFullUndo","TcpExt.TCPHPAcks","TcpExt.TCPHPHits",
            "TcpExt.TCPHPHitsToUser","TcpExt.TCPHystartDelayCwnd",
            "TcpExt.TCPHystartDelayDetect","TcpExt.TCPHystartTrainCwnd",
            "TcpExt.TCPHystartTrainDetect","TcpExt.TCPLossFailures",
            "TcpExt.TCPLossProbeRecovery","TcpExt.TCPLossProbes",
            "TcpExt.TCPLossUndo","TcpExt.TCPLostRetransmit",
            "TcpExt.TCPMD5NotFound","TcpExt.TCPMD5Unexpected",
            "TcpExt.TCPMTUPFail","TcpExt.TCPMTUPSuccess","TcpExt.TCPOFODrop",
            "TcpExt.TCPOFOMerge","TcpExt.TCPOFOQueue","TcpExt.TCPOrigDataSent",
            "TcpExt.TCPPartialUndo","TcpExt.TCPPrequeued",
            "TcpExt.TCPPrequeueDropped","TcpExt.TCPPureAcks",
            "TcpExt.TCPRcvCoalesce","TcpExt.TCPRcvCollapsed",
            "TcpExt.TCPRenoFailures","TcpExt.TCPRenoRecovery",
            "TcpExt.TCPRenoRecoveryFail","TcpExt.TCPRenoReorder",
            "TcpExt.TCPRetransFail","TcpExt.TCPSACKDiscard",
            "TcpExt.TCPSackFailures","TcpExt.TCPSackMerged",
            "TcpExt.TCPSackRecovery","TcpExt.TCPSackRecoveryFail",
            "TcpExt.TCPSACKReneging","TcpExt.TCPSACKReorder",
            "TcpExt.TCPSackShifted","TcpExt.TCPSackShiftFallback",
            "TcpExt.TCPSchedulerFailed","TcpExt.TCPSpuriousRTOs",
            "TcpExt.TCPSpuriousRtxHostQueues","TcpExt.TCPSYNChallenge",
            "TcpExt.TCPToZeroWindowAdv","TcpExt.TCPTSReorder",
            "TcpExt.TCPWantZeroWindowAdv","TcpExt.TCPWinProbe",
            "TcpExt.TCPKeepAlive", "TcpExt.TCPFastOpenCookieReqd",
            "IpExt.InNoECTPkts", "IpExt.InCEPkts"],
        'cpu_metrics': ["user", "nice", "system", "idle", "iowait", "irq",
                        "softirq","steal", "guest", "guest_nice"],
        'per_cpu': False,
        'diskstats_dev_re': "dm-\d",
        'diskstats_cols': ["r", "r_merge", "r_sector", "r_time", "w", "w_merge",
                           "w_sector", "w_time", "io_inprog", "io_time",
                           "io_weighted_time"],
        'net_dev_re': "(virbr\d+)|(vnet\d+)",
        'net_dev_cols': ["rx_bytes", "rx_pkt", "rx_errs", "rx_drop",
                         "rx_fifo_errs", "rx_frame_errs", "rx_compressed",
                         "rx_mcast", "tx_bytes", "tx_pkt", "tx_errs", "tx_drop",
                         "tx_fifo_errs", "collissions", "carrier",
                         "tx_compressed"],
        'net_snmp_items': ["Ip:", "Icmp:", "IcmpMsg:", "Tcp:", "Udp:",
                           "UdpLite:"]
    }

    def __init__(self, log, config):
        """Plugin to measure various kernel metrics from /proc.

        :param log: A logger
        :type log: logging.RootLogger
        :param config: a plumd.config.Conf configuration helper instance.
        :type config: plumd.config.Conf
        """
        super(Proc, self).__init__(log, config)
        self.config.defaults(Proc.defaults)
        self.calc = Differential()
        self.proc_path = config.get('proc_path')
        self.page_size = os.sysconf("SC_PAGESIZE")
        self.diskstats_dev_re = re.compile(config.get('diskstats_dev_re'))
        self.net_dev_re = re.compile(config.get('net_dev_re'))


    def poll(self):
        """Poll for kernel metrics under /proc.

        :rtype: ResultSet
        """
        ret = plumd.ResultSet([])
        ret.add(self.proc_stat())
        ret.add(self.proc_meminfo())
        ret.add(self.proc_loadavg())
        ret.add(self.proc_swap())
        ret.add(self.proc_uptime())
        ret.add(self.proc_diskstats())
        ret.add(self.proc_net_dev())
        ret.add(self.proc_net_snmp())
        ret.add(self.proc_net_sockstat())
        ret.add(self.proc_net_netstat())
        return ret


    def proc_stat_cpu_percent(self, key, val, ts):

        """Return cpu utilization metrics in percentage.

        :param key: The metric name (eg. cpu, cpu0, cpu1, etc)
        :type key: str
        :param val: A deque populated with the metric values from stat
        :type val: deque
        :rtype: list
        """
        ret = []
        total = sum([ float(i) for i in val])
        cpu = self.config.get('cpu_metrics')
        for map_val in cpu:
            if len(val) < 1:
                break
            metric_val = float(val.popleft())
            mstr = "{0}.{1}".format(key, map_val)
            percent_val = metric_val / total * 100.00
            ret.append(plumd.Float(mstr, percent_val))
        return ret


    def proc_stat_cpu(self, key, val, ts):

        """Return cpu utilization metrics in USER_HZ or Jiffies
        (most likely units of 100Hz intervals ie. 100ms intervals).

        :param key: The metric name (eg. cpu, cpu0, cpu1, etc)
        :type key: str
        :param val: A deque populated with the metric values from stat
        :type val: deque
        :rtype: list
        """
        ret = []
        total = sum([ float(i) for i in val])
        cpu = self.config.get('cpu_metrics')
        for map_val in cpu:
            if len(val) < 1:
                break
            metric_val = float(val.popleft())
            mstr = "{0}.{1}".format(key, map_val)
            percent_val = float(metric_val / total) * 100.00
            mval = self.calc.per_second(key, percent_val, ts)
            ret.append(plumd.Float(mstr, mval))
        return ret


    def proc_stat(self):
        """Return cpu utilization and process metrics from proc file stat.

        :rtype: plumd.Result
        """
        skip = self.config.get('skip_proc_stat')
        per_cpu = self.config.get('per_cpu')
        result = plumd.Result("stat")
        fname = "{0}/stat".format(self.proc_path)
        # read and process /proc/stat
        dat = get_file_map(fname, 0, 0)
        ts = time.time()
        # parse
        for key, val in dat.items():
            # cpu is the only special metric
            if val is None:
                self.log.error("proc_stat: null value for {0}".format(key))
                continue
            elif key in skip:
                continue
            elif key == "cpu":
                result.add_list(self.proc_stat_cpu_percent(key, val, ts))
            elif key.startswith("cpu"):
                if not per_cpu:
                    continue
                result.add_list(self.proc_stat_cpu_percent(key, val, ts))
            else:
                mval = self.calc.per_second(key, float(val[0]), ts)
                result.add(plumd.Int(key, mval))
        return result


    def proc_meminfo(self):
        """Return memory utilization metrics from proc file mem.

        :rtype: plumd.Result
        """
        skip = self.config.get('skip_proc_meminfo')
        result = plumd.Result("mem")
        fname = "{0}/meminfo".format(self.proc_path)
        # read and process /proc/stat
        dat = get_file_map(fname, 0, 0)
        ts = time.time()
        # parse
        for key, val in dat.items():
            mstr = key.replace(":", "")
            # cpu is the only special metric
            if val is None:
                self.log.error("proc_meminfo: null value for {0}".format(mstr))
                continue
            elif mstr in skip:
                continue
            else:
                #mval = dcalc.per_second(key, float(val[0]), ts)
                result.add(plumd.Int(mstr, val[0]))
        return result


    def proc_loadavg(self):
        """Return 1, 5 and 15 minute load averages from proc file loadavg.

        :rtype: plumd.Result
        """
        result = plumd.Result("loadavg")
        fname = "{0}/loadavg".format(self.proc_path)
        dat = []
        # read and process /proc/stat
        try:
            dat = get_file(fname).split()
        except Exception as e:
            tb = traceback.format_exc()
            self.log.error("proc_loadavg: exception: {0} : {1}".format(e, tb))
            return result
        if len(dat) >= 3:
            result.add(plumd.Float("1", dat[0]))
            result.add(plumd.Float("5", dat[1]))
            result.add(plumd.Float("15", dat[2]))
        return result


    def proc_swap(self):
        """Return swap file usage metrics from proc file swap.

        :rtype: plumd.Result
        """
        result = plumd.Result("swap")
        fname = "{0}/swaps".format(self.proc_path)
        dat = []
        # read and process /proc/stat
        dat = get_file_list(fname)
        # header: file, type, size, used, priority
        if len(dat) > 1:
            dat.popleft()
        for entry in dat:
            if not entry:
                continue
            #sfname, stype, ssize, sused, sprio = ("", None, 0, 0, 0)
            try:
                sfname, stype, ssize, sused, sprio = entry.split()
            except Exception as e:
                tb = traceback.format_exc()
                self.log.error("proc_swap: exception: {0}: {1}".format(e, tb))
                continue
            sname = os.path.basename(sfname)
            mstr = "{0}.used".format(sname)
            result.add(plumd.Float(mstr, sused))
            mstr = "{0}.size".format(sname)
            result.add(plumd.Float(mstr, ssize))
            sfree = float(ssize) - float(sused)
            mstr = "{0}.free".format(sname)
            result.add(plumd.Float(mstr, sfree))
        return result


    def proc_uptime(self):
        """Return uptime from proc file swap.

        :rtype: plumd.Result
        """
        result = plumd.Result("uptime")
        fname = "{0}/uptime".format(self.proc_path)
        dat = []
        # read and process /proc/stat
        try:
            up, idle = get_file(fname).split()
        except Exception as e:
            tb = traceback.format_exc()
            self.log.error("proc_uptime: exception: {0}: {1}".format(e, tb))
            return result
        pidle = float(idle)/float(up) * 100 / multiprocessing.cpu_count()
        result.add(plumd.Float("up", up))
        result.add(plumd.Float("idle", idle))
        result.add(plumd.Float("idle_percent", pidle))
        return result


    def proc_diskstats(self):

        """Return disk io metrics from proc file diskstats.

        :rtype: plumd.Result
        """
        # times in ms
        cols = self.config.get('diskstats_cols')
        result = plumd.Result("diskstats")
        fname = "{0}/diskstats".format(self.proc_path)
        dat = {}
        # read and process /proc/stat
        try:
            dat = get_file_map(fname, 2, 0)
        except Exception as e:
            tb = traceback.format_exc()
            self.log.error("proc_diskstats: exception: {0}: {1}".format(e, tb))
            return result
        ts = time.time()
        for key, val in dat.items():
            if self.diskstats_dev_re.match(key):
                continue
            if len(val) != 13:
                self.log.error("proc_diskstats: invalid entry: {0}".format(val))
                continue
            for mname in cols:
                mval = int(val.popleft())
                mstr = "{0}.{1}".format(key, mname)
                dval = self.calc.per_second(mstr, mval, ts)
                result.add(plumd.Int(mstr, dval))
        return result


    def proc_net_dev(self):

        """Return network interface metrics from proc file net/dev.

        Add entries to the configuration value 'net_dev_re' to skip
        any network interfaces that match the regular expression.

        :rtype: plumd.Result
        """
        cols = self.config.get('net_dev_cols')
        result = plumd.Result("net")
        fname = "{0}/net/dev".format(self.proc_path)
        dat = {}
        # read and process /proc/stat
        try:
            dat = get_file_map(fname, 0, 0)
        except Exception as e:
            tb = traceback.format_exc()
            self.log.error("proc_net_dev: exception: {0}: {1}".format(e, tb))
            return result
        ts = time.time()
        for key, val in dat.items():
            key = key.replace(":", "")
            if self.net_dev_re.match(key):
                continue
            if len(val) < len(cols):
                #self.log.error("proc_net_dev: invalid entry: {0}".format(val))
                continue
            for mname in cols:
                if len(val) < 1:
                    break
                mval = int(val.popleft())
                mstr = "{0}.{1}".format(key, mname)
                dval = self.calc.per_second(mstr, mval, ts)
                result.add(plumd.Int(mstr, dval))
        return result


    def proc_net_snmp(self):
        """Return network protocol metrics from proc file net/snmp.

        Add entries to the configuration value 'skip_proc_net_snmp' to skip
        metrics.

        Add entries to the configuration value 'net_snmp_items' to match the
        format/order of the proc file net/snmp entries on the system.

        :rtype: plumd.Result
        """
        skip = self.config.get('skip_proc_net_snmp')
        items = self.config.get('net_snmp_items')
        result = plumd.Result("net_snmp")
        fname = "{0}/net/snmp".format(self.proc_path)
        dat = {}
        # read and process - dat is a list of lines from fname
        dat = get_file_list(fname)
        ts = time.time()

        # process each pair of lines
        for item in items:
            # older kernels may not have all items
            if len(dat) < 2:
                break
            try:
                # first line is a list of: metric: header values
                header = deque(dat.popleft().split())
                # second line is a list of: <metric>: metric values
                vals = deque([ int(i) for i in dat.popleft().split()[1:] ])
            except Exception as e:
                tb = traceback.format_exc()
                self.log.error("proc_net_snmp: exception: {0}: {1}".format(e, tb))
                continue
            if len(header) < 2 or header[0] != item:
                self.log.error("proc_net_snmp: invalid entry: {0}: {1}".format(header, item))
                continue
            # first value is the name of the metric eg. Ip, Icmp, etc
            mheader = header.popleft().replace(":", "")
            for mname in header:
                if len(vals) < 1:
                    break
                mval = vals.popleft()
                mstr = "{0}.{1}".format(mheader, mname)
                if mstr in skip:
                    continue
                dval = self.calc.per_second(mstr, mval, ts)
                result.add(plumd.Int(mstr, dval))
        return result


    def proc_net_sockstat(self):
        """Return network socket metrics from proc file net/sockstat.

        Note: sockstat.TCP.mem is measured in pages, you can get the system page
        size from os.sysconf("SC_PAGESIZE")

        Note: FRAG: ip fragmentation related

        :rtype: plumd.Result
        """
        skip = self.config.get('skip_proc_net_sockstat')
        result = plumd.Result("sockstat")
        fname = "{0}/net/sockstat".format(self.proc_path)
        # sys/net/ipv4/tcp_mem format: min, pressure, max
        fname_limits = "{0}/sys/net/ipv4/tcp_mem".format(self.proc_path)
        # orphan limit: /proc/sys/net/ipv4/tcp_max_orphans
        fname_orph = "{0}/sys/net/ipv4/tcp_max_orphans".format(self.proc_path)
        dat = {}
        # read and process - dat is a list of lines from fname
        dat = get_file_map_list(fname, 0, 0)
        ts = time.time()
        # each entry is a key: [metric, val, metric, val]
        for key, val in dat.items():
            if len(val) < 2:
                continue
            mstr = key.replace(":", "")
            if mstr in skip:
                continue
            mnames = val[::2]
            mvals = deque([ int(i) for i in val[1::2] ])
            if len(mnames) != len(mvals):
                self.log.error("proc_net_sockstat: invalid entry: {0}".format(mnames))
                continue
            for mname in mnames:
                metric = "{0}.{1}".format(mstr, mname)
                result.add(plumd.Int(metric, mvals.popleft()))
        # also record configured tcp mem limits
        dat = get_file(fname_limits).split()
        if len(dat) == 3:
            # eg. for alerting/dashboard on pages allocated vs max values
            result.add(plumd.Int("TCP.mem_min", dat[0]))
            result.add(plumd.Int("TCP.mem_pressure", dat[1]))
            result.add(plumd.Int("TCP.mem_max", dat[1]))
        dat = get_file(fname_orph)
        result.add(plumd.Int("TCP.orphan_max", dat))
        return result


    def proc_net_netstat(self):
        """Return detailed network statitistics proc file net/netstat.

        Note: add entries to the configuration value 'skip_proc_net_netstat' to
        skip metric names (eg. 'TcpExt.TCPMTUPSuccess'). Defaults should be
        reasonable however.

        Note: ECT1Pkts and ECT0Pkts relate to ECT congestion notifications.

        :rtype: plumd.Result
        """
        skip = self.config.get('skip_proc_net_netstat')
        result = plumd.Result("netstat")
        fname = "{0}/net/netstat".format(self.proc_path)
        dat = {}
        # read and process - dat is a list of lines from fname
        dat = get_file_list(fname)
        ts = time.time()
        while len(dat) > 1:
            headers = deque(dat.popleft().split())
            if len(dat) < 1 or len(headers) < 1:
                break
            mvals = deque([ int(i) for i in dat.popleft().split()[1:] ])
            mstr = headers.popleft().replace(":", "")
            if len(headers) != len(mvals):
                self.log.error("proc_net_netstat: invalid entry: {0}".format(headers))
                continue
            for mname in headers:
                if len(mvals) < 1:
                    break
                metric = "{0}.{1}".format(mstr, mname)
                if metric in skip:
                    continue
                dval = self.calc.per_second(metric, mvals.popleft(), ts)
                result.add(plumd.Int(metric, dval))
        return result


1		# -- coding: utf-8 --
2
3		__author__ = 'Kenny Freeman'
4		__email__ = '[email protected]'
5		__license__ = "ISCL"
6		__docformat__ = 'reStructuredText'
7
8		import re
9		import os
10		import time
11		import os.path
12		import traceback
13		import multiprocessing
14		from collections import deque
15
16		import plumd
17		import plumd.plugins
18		from plumd.calc import Differential
19		from plumd.util import get_file_map, get_file_map_list, get_file_list, get_file
20
21
22		## todo: switch from list with pop(0) to deque
23		class Proc(plumd.plugins.Reader):
24		"""Plugin to measure various kernel metrics from /proc."""
25		defaults = {
26		'poll.interval': 10,
27		'proc_path': '/proc',
28		'skip_proc_stat': ['btime'],
29		'skip_proc_meminfo': ["Active(anon)","Active(file)",
30		"AnonHugePages","AnonPages","Bounce","DirectMap2M","CommitLimit",
31		"DirectMap4k","HugePages_Free","Hugepagesize",
32		"HugePages_Rsvd","HugePages_Surp","HugePages_Total",
33		"Inactive","Inactive(anon)","Inactive(file)",
34		"KernelStack","NFS_Unstable","PageTables",
35		"Shmem","Slab","SReclaimable","SUnreclaim",
36		"SwapCached","SwapFree","SwapTotal","Writeback","WritebackTmp"],
37		'skip_proc_net_snmp': ["Icmp.InAddrMaskReps","Icmp.InAddrMasks",
38		"Icmp.InTimestampReps","Icmp.InTimestamps","Icmp.OutAddrMaskReps",
39		"Icmp.OutAddrMasks","Icmp.OutTimestampReps","Icmp.OutTimestamps",
40		"Ip.DefaultTTL","Ip.Forwarding","Tcp.MaxConn","Tcp.RtoAlgorithm",
41		"Tcp.RtoMax","Tcp.RtoMin"],
42		'skip_proc_net_sockstat': [],
43		'skip_proc_net_netstat': ["TcpExt.ArpFilter","TcpExt.BusyPollRxPackets",
44		"TcpExt.DelayedACKLocked","TcpExt.DelayedACKLost",
45		"TcpExt.DelayedACKs","TcpExt.EmbryonicRsts",
46		"TcpExt.IPReversePathFilter","TcpExt.LockDroppedIcmps",
47		"TcpExt.OfoPruned","TcpExt.OutOfWindowIcmps",
48		"TcpExt.PAWSActive","TcpExt.PAWSEstab", "TcpExt.PAWSPassive",
49		"TcpExt.PruneCalled","TcpExt.RcvPruned",
50		"TcpExt.TCPACKSkippedChallenge","TcpExt.TCPACKSkippedFinWait2",
51		"TcpExt.TCPACKSkippedPAWS","TcpExt.TCPACKSkippedSeq",
52		"TcpExt.TCPACKSkippedSynRecv","TcpExt.TCPACKSkippedTimeWait",
53		"TcpExt.TCPAutoCorking","TcpExt.TCPChallengeACK",
54		"TcpExt.TCPDirectCopyFromBacklog","TcpExt.TCPDirectCopyFromPrequeue",
55		"TcpExt.TCPDSACKIgnoredNoUndo","TcpExt.TCPDSACKIgnoredOld",
56		"TcpExt.TCPDSACKOfoRecv","TcpExt.TCPDSACKOfoSent",
57		"TcpExt.TCPDSACKOldSent","TcpExt.TCPDSACKRecv","TcpExt.TCPDSACKUndo",
58		"TcpExt.TCPFACKReorder","TcpExt.TCPFromZeroWindowAdv",
59		"TcpExt.TCPFullUndo","TcpExt.TCPHPAcks","TcpExt.TCPHPHits",
60		"TcpExt.TCPHPHitsToUser","TcpExt.TCPHystartDelayCwnd",
61		"TcpExt.TCPHystartDelayDetect","TcpExt.TCPHystartTrainCwnd",
62		"TcpExt.TCPHystartTrainDetect","TcpExt.TCPLossFailures",
63		"TcpExt.TCPLossProbeRecovery","TcpExt.TCPLossProbes",
64		"TcpExt.TCPLossUndo","TcpExt.TCPLostRetransmit",
65		"TcpExt.TCPMD5NotFound","TcpExt.TCPMD5Unexpected",
66		"TcpExt.TCPMTUPFail","TcpExt.TCPMTUPSuccess","TcpExt.TCPOFODrop",
67		"TcpExt.TCPOFOMerge","TcpExt.TCPOFOQueue","TcpExt.TCPOrigDataSent",
68		"TcpExt.TCPPartialUndo","TcpExt.TCPPrequeued",
69		"TcpExt.TCPPrequeueDropped","TcpExt.TCPPureAcks",
70		"TcpExt.TCPRcvCoalesce","TcpExt.TCPRcvCollapsed",
71		"TcpExt.TCPRenoFailures","TcpExt.TCPRenoRecovery",
72		"TcpExt.TCPRenoRecoveryFail","TcpExt.TCPRenoReorder",
73		"TcpExt.TCPRetransFail","TcpExt.TCPSACKDiscard",
74		"TcpExt.TCPSackFailures","TcpExt.TCPSackMerged",
75		"TcpExt.TCPSackRecovery","TcpExt.TCPSackRecoveryFail",
76		"TcpExt.TCPSACKReneging","TcpExt.TCPSACKReorder",
77		"TcpExt.TCPSackShifted","TcpExt.TCPSackShiftFallback",
78		"TcpExt.TCPSchedulerFailed","TcpExt.TCPSpuriousRTOs",
79		"TcpExt.TCPSpuriousRtxHostQueues","TcpExt.TCPSYNChallenge",
80		"TcpExt.TCPToZeroWindowAdv","TcpExt.TCPTSReorder",
81		"TcpExt.TCPWantZeroWindowAdv","TcpExt.TCPWinProbe",
82		"TcpExt.TCPKeepAlive", "TcpExt.TCPFastOpenCookieReqd",
83		"IpExt.InNoECTPkts", "IpExt.InCEPkts"],
84		'cpu_metrics': ["user", "nice", "system", "idle", "iowait", "irq",
85		"softirq","steal", "guest", "guest_nice"],
86		'per_cpu': False,
87		'diskstats_dev_re': "dm-\d",
88		'diskstats_cols': ["r", "r_merge", "r_sector", "r_time", "w", "w_merge",
89		"w_sector", "w_time", "io_inprog", "io_time",
90		"io_weighted_time"],
91		'net_dev_re': "(virbr\d+)\|(vnet\d+)",
92		'net_dev_cols': ["rx_bytes", "rx_pkt", "rx_errs", "rx_drop",
93		"rx_fifo_errs", "rx_frame_errs", "rx_compressed",
94		"rx_mcast", "tx_bytes", "tx_pkt", "tx_errs", "tx_drop",
95		"tx_fifo_errs", "collissions", "carrier",
96		"tx_compressed"],
97		'net_snmp_items': ["Ip:", "Icmp:", "IcmpMsg:", "Tcp:", "Udp:",
98		"UdpLite:"]
99		}
100
101		def __init__(self, log, config):
102		"""Plugin to measure various kernel metrics from /proc.
103
104		:param log: A logger
105		:type log: logging.RootLogger
106		:param config: a plumd.config.Conf configuration helper instance.
107		:type config: plumd.config.Conf
108		"""
109		super(Proc, self).__init__(log, config)
110		self.config.defaults(Proc.defaults)
111		self.calc = Differential()
112		self.proc_path = config.get('proc_path')
113		self.page_size = os.sysconf("SC_PAGESIZE")
114		self.diskstats_dev_re = re.compile(config.get('diskstats_dev_re'))
115		self.net_dev_re = re.compile(config.get('net_dev_re'))
116
117
118		def poll(self):
119		"""Poll for kernel metrics under /proc.
120
121		:rtype: ResultSet
122		"""
123		ret = plumd.ResultSet([])
124		ret.add(self.proc_stat())
125		ret.add(self.proc_meminfo())
126		ret.add(self.proc_loadavg())
127		ret.add(self.proc_swap())
128		ret.add(self.proc_uptime())
129		ret.add(self.proc_diskstats())
130		ret.add(self.proc_net_dev())
131		ret.add(self.proc_net_snmp())
132		ret.add(self.proc_net_sockstat())
133		ret.add(self.proc_net_netstat())
134		return ret
135
136
137	View Code Duplication	def proc_stat_cpu_percent(self, key, val, ts):
		1 ignored issue – show Duplication introduced 2016-08-10 15:03 UTC by Report Bug Copy Issue Report This code seems to be duplicated in your project. Loading history...
138		"""Return cpu utilization metrics in percentage.
139
140		:param key: The metric name (eg. cpu, cpu0, cpu1, etc)
141		:type key: str
142		:param val: A deque populated with the metric values from stat
143		:type val: deque
144		:rtype: list
145		"""
146		ret = []
147		total = sum([ float(i) for i in val])
148		cpu = self.config.get('cpu_metrics')
149		for map_val in cpu:
150		if len(val) < 1:
151		break
152		metric_val = float(val.popleft())
153		mstr = "{0}.{1}".format(key, map_val)
154		percent_val = metric_val / total * 100.00
155		ret.append(plumd.Float(mstr, percent_val))
156		return ret
157
158
159	View Code Duplication	def proc_stat_cpu(self, key, val, ts):
		1 ignored issue – show Duplication introduced 2016-08-10 15:03 UTC by Report Bug Copy Issue Report This code seems to be duplicated in your project. Loading history...
160		"""Return cpu utilization metrics in USER_HZ or Jiffies
161		(most likely units of 100Hz intervals ie. 100ms intervals).
162
163		:param key: The metric name (eg. cpu, cpu0, cpu1, etc)
164		:type key: str
165		:param val: A deque populated with the metric values from stat
166		:type val: deque
167		:rtype: list
168		"""
169		ret = []
170		total = sum([ float(i) for i in val])
171		cpu = self.config.get('cpu_metrics')
172		for map_val in cpu:
173		if len(val) < 1:
174		break
175		metric_val = float(val.popleft())
176		mstr = "{0}.{1}".format(key, map_val)
177		percent_val = float(metric_val / total) * 100.00
178		mval = self.calc.per_second(key, percent_val, ts)
179		ret.append(plumd.Float(mstr, mval))
180		return ret
181
182
183		def proc_stat(self):
184		"""Return cpu utilization and process metrics from proc file stat.
185
186		:rtype: plumd.Result
187		"""
188		skip = self.config.get('skip_proc_stat')
189		per_cpu = self.config.get('per_cpu')
190		result = plumd.Result("stat")
191		fname = "{0}/stat".format(self.proc_path)
192		# read and process /proc/stat
193		dat = get_file_map(fname, 0, 0)
194		ts = time.time()
195		# parse
196		for key, val in dat.items():
197		# cpu is the only special metric
198		if val is None:
199		self.log.error("proc_stat: null value for {0}".format(key))
200		continue
201		elif key in skip:
202		continue
203		elif key == "cpu":
204		result.add_list(self.proc_stat_cpu_percent(key, val, ts))
205		elif key.startswith("cpu"):
206		if not per_cpu:
207		continue
208		result.add_list(self.proc_stat_cpu_percent(key, val, ts))
209		else:
210		mval = self.calc.per_second(key, float(val[0]), ts)
211		result.add(plumd.Int(key, mval))
212		return result
213
214
215		def proc_meminfo(self):
216		"""Return memory utilization metrics from proc file mem.
217
218		:rtype: plumd.Result
219		"""
220		skip = self.config.get('skip_proc_meminfo')
221		result = plumd.Result("mem")
222		fname = "{0}/meminfo".format(self.proc_path)
223		# read and process /proc/stat
224		dat = get_file_map(fname, 0, 0)
225		ts = time.time()
226		# parse
227		for key, val in dat.items():
228		mstr = key.replace(":", "")
229		# cpu is the only special metric
230		if val is None:
231		self.log.error("proc_meminfo: null value for {0}".format(mstr))
232		continue
233		elif mstr in skip:
234		continue
235		else:
236		#mval = dcalc.per_second(key, float(val[0]), ts)
237		result.add(plumd.Int(mstr, val[0]))
238		return result
239
240
241		def proc_loadavg(self):
242		"""Return 1, 5 and 15 minute load averages from proc file loadavg.
243
244		:rtype: plumd.Result
245		"""
246		result = plumd.Result("loadavg")
247		fname = "{0}/loadavg".format(self.proc_path)
248		dat = []
249		# read and process /proc/stat
250		try:
251		dat = get_file(fname).split()
252		except Exception as e:
253		tb = traceback.format_exc()
254		self.log.error("proc_loadavg: exception: {0} : {1}".format(e, tb))
255		return result
256		if len(dat) >= 3:
257		result.add(plumd.Float("1", dat[0]))
258		result.add(plumd.Float("5", dat[1]))
259		result.add(plumd.Float("15", dat[2]))
260		return result
261
262
263		def proc_swap(self):
264		"""Return swap file usage metrics from proc file swap.
265
266		:rtype: plumd.Result
267		"""
268		result = plumd.Result("swap")
269		fname = "{0}/swaps".format(self.proc_path)
270		dat = []
271		# read and process /proc/stat
272		dat = get_file_list(fname)
273		# header: file, type, size, used, priority
274		if len(dat) > 1:
275		dat.popleft()
276		for entry in dat:
277		if not entry:
278		continue
279		#sfname, stype, ssize, sused, sprio = ("", None, 0, 0, 0)
280		try:
281		sfname, stype, ssize, sused, sprio = entry.split()
282		except Exception as e:
283		tb = traceback.format_exc()
284		self.log.error("proc_swap: exception: {0}: {1}".format(e, tb))
285		continue
286		sname = os.path.basename(sfname)
287		mstr = "{0}.used".format(sname)
288		result.add(plumd.Float(mstr, sused))
289		mstr = "{0}.size".format(sname)
290		result.add(plumd.Float(mstr, ssize))
291		sfree = float(ssize) - float(sused)
292		mstr = "{0}.free".format(sname)
293		result.add(plumd.Float(mstr, sfree))
294		return result
295
296
297		def proc_uptime(self):
298		"""Return uptime from proc file swap.
299
300		:rtype: plumd.Result
301		"""
302		result = plumd.Result("uptime")
303		fname = "{0}/uptime".format(self.proc_path)
304		dat = []
305		# read and process /proc/stat
306		try:
307		up, idle = get_file(fname).split()
308		except Exception as e:
309		tb = traceback.format_exc()
310		self.log.error("proc_uptime: exception: {0}: {1}".format(e, tb))
311		return result
312		pidle = float(idle)/float(up) * 100 / multiprocessing.cpu_count()
313		result.add(plumd.Float("up", up))
314		result.add(plumd.Float("idle", idle))
315		result.add(plumd.Float("idle_percent", pidle))
316		return result
317
318
319	View Code Duplication	def proc_diskstats(self):
		1 ignored issue – show Duplication introduced 2016-08-02 16:06 UTC by Report Bug Copy Issue Report This code seems to be duplicated in your project. Loading history...
320		"""Return disk io metrics from proc file diskstats.
321
322		:rtype: plumd.Result
323		"""
324		# times in ms
325		cols = self.config.get('diskstats_cols')
326		result = plumd.Result("diskstats")
327		fname = "{0}/diskstats".format(self.proc_path)
328		dat = {}
329		# read and process /proc/stat
330		try:
331		dat = get_file_map(fname, 2, 0)
332		except Exception as e:
333		tb = traceback.format_exc()
334		self.log.error("proc_diskstats: exception: {0}: {1}".format(e, tb))
335		return result
336		ts = time.time()
337		for key, val in dat.items():
338		if self.diskstats_dev_re.match(key):
339		continue
340		if len(val) != 13:
341		self.log.error("proc_diskstats: invalid entry: {0}".format(val))
342		continue
343		for mname in cols:
344		mval = int(val.popleft())
345		mstr = "{0}.{1}".format(key, mname)
346		dval = self.calc.per_second(mstr, mval, ts)
347		result.add(plumd.Int(mstr, dval))
348		return result
349
350
351	View Code Duplication	def proc_net_dev(self):
		1 ignored issue – show Duplication introduced 2016-08-02 16:06 UTC by Report Bug Copy Issue Report This code seems to be duplicated in your project. Loading history...
352		"""Return network interface metrics from proc file net/dev.
353
354		Add entries to the configuration value 'net_dev_re' to skip
355		any network interfaces that match the regular expression.
356
357		:rtype: plumd.Result
358		"""
359		cols = self.config.get('net_dev_cols')
360		result = plumd.Result("net")
361		fname = "{0}/net/dev".format(self.proc_path)
362		dat = {}
363		# read and process /proc/stat
364		try:
365		dat = get_file_map(fname, 0, 0)
366		except Exception as e:
367		tb = traceback.format_exc()
368		self.log.error("proc_net_dev: exception: {0}: {1}".format(e, tb))
369		return result
370		ts = time.time()
371		for key, val in dat.items():
372		key = key.replace(":", "")
373		if self.net_dev_re.match(key):
374		continue
375		if len(val) < len(cols):
376		#self.log.error("proc_net_dev: invalid entry: {0}".format(val))
377		continue
378		for mname in cols:
379		if len(val) < 1:
380		break
381		mval = int(val.popleft())
382		mstr = "{0}.{1}".format(key, mname)
383		dval = self.calc.per_second(mstr, mval, ts)
384		result.add(plumd.Int(mstr, dval))
385		return result
386
387
388		def proc_net_snmp(self):
389		"""Return network protocol metrics from proc file net/snmp.
390
391		Add entries to the configuration value 'skip_proc_net_snmp' to skip
392		metrics.
393
394		Add entries to the configuration value 'net_snmp_items' to match the
395		format/order of the proc file net/snmp entries on the system.
396
397		:rtype: plumd.Result
398		"""
399		skip = self.config.get('skip_proc_net_snmp')
400		items = self.config.get('net_snmp_items')
401		result = plumd.Result("net_snmp")
402		fname = "{0}/net/snmp".format(self.proc_path)
403		dat = {}
404		# read and process - dat is a list of lines from fname
405		dat = get_file_list(fname)
406		ts = time.time()
407
408		# process each pair of lines
409		for item in items:
410		# older kernels may not have all items
411		if len(dat) < 2:
412		break
413		try:
414		# first line is a list of: metric: header values
415		header = deque(dat.popleft().split())
416		# second line is a list of: <metric>: metric values
417		vals = deque([ int(i) for i in dat.popleft().split()[1:] ])
418		except Exception as e:
419		tb = traceback.format_exc()
420		self.log.error("proc_net_snmp: exception: {0}: {1}".format(e, tb))
421		continue
422		if len(header) < 2 or header[0] != item:
423		self.log.error("proc_net_snmp: invalid entry: {0}: {1}".format(header, item))
424		continue
425		# first value is the name of the metric eg. Ip, Icmp, etc
426		mheader = header.popleft().replace(":", "")
427		for mname in header:
428		if len(vals) < 1:
429		break
430		mval = vals.popleft()
431		mstr = "{0}.{1}".format(mheader, mname)
432		if mstr in skip:
433		continue
434		dval = self.calc.per_second(mstr, mval, ts)
435		result.add(plumd.Int(mstr, dval))
436		return result
437
438
439		def proc_net_sockstat(self):
440		"""Return network socket metrics from proc file net/sockstat.
441
442		Note: sockstat.TCP.mem is measured in pages, you can get the system page
443		size from os.sysconf("SC_PAGESIZE")
444
445		Note: FRAG: ip fragmentation related
446
447		:rtype: plumd.Result
448		"""
449		skip = self.config.get('skip_proc_net_sockstat')
450		result = plumd.Result("sockstat")
451		fname = "{0}/net/sockstat".format(self.proc_path)
452		# sys/net/ipv4/tcp_mem format: min, pressure, max
453		fname_limits = "{0}/sys/net/ipv4/tcp_mem".format(self.proc_path)
454		# orphan limit: /proc/sys/net/ipv4/tcp_max_orphans
455		fname_orph = "{0}/sys/net/ipv4/tcp_max_orphans".format(self.proc_path)
456		dat = {}
457		# read and process - dat is a list of lines from fname
458		dat = get_file_map_list(fname, 0, 0)
459		ts = time.time()
460		# each entry is a key: [metric, val, metric, val]
461		for key, val in dat.items():
462		if len(val) < 2:
463		continue
464		mstr = key.replace(":", "")
465		if mstr in skip:
466		continue
467		mnames = val[::2]
468		mvals = deque([ int(i) for i in val[1::2] ])
469		if len(mnames) != len(mvals):
470		self.log.error("proc_net_sockstat: invalid entry: {0}".format(mnames))
471		continue
472		for mname in mnames:
473		metric = "{0}.{1}".format(mstr, mname)
474		result.add(plumd.Int(metric, mvals.popleft()))
475		# also record configured tcp mem limits
476		dat = get_file(fname_limits).split()
477		if len(dat) == 3:
478		# eg. for alerting/dashboard on pages allocated vs max values
479		result.add(plumd.Int("TCP.mem_min", dat[0]))
480		result.add(plumd.Int("TCP.mem_pressure", dat[1]))
481		result.add(plumd.Int("TCP.mem_max", dat[1]))
482		dat = get_file(fname_orph)
483		result.add(plumd.Int("TCP.orphan_max", dat))
484		return result
485
486
487		def proc_net_netstat(self):
488		"""Return detailed network statitistics proc file net/netstat.
489
490		Note: add entries to the configuration value 'skip_proc_net_netstat' to
491		skip metric names (eg. 'TcpExt.TCPMTUPSuccess'). Defaults should be
492		reasonable however.
493
494		Note: ECT1Pkts and ECT0Pkts relate to ECT congestion notifications.
495
496		:rtype: plumd.Result
497		"""
498		skip = self.config.get('skip_proc_net_netstat')
499		result = plumd.Result("netstat")
500		fname = "{0}/net/netstat".format(self.proc_path)
501		dat = {}
502		# read and process - dat is a list of lines from fname
503		dat = get_file_list(fname)
504		ts = time.time()
505		while len(dat) > 1:
506		headers = deque(dat.popleft().split())
507		if len(dat) < 1 or len(headers) < 1:
508		break
509		mvals = deque([ int(i) for i in dat.popleft().split()[1:] ])
510		mstr = headers.popleft().replace(":", "")
511		if len(headers) != len(mvals):
512		self.log.error("proc_net_netstat: invalid entry: {0}".format(headers))
513		continue
514		for mname in headers:
515		if len(mvals) < 1:
516		break
517		metric = "{0}.{1}".format(mstr, mname)
518		if metric in skip:
519		continue
520		dval = self.calc.per_second(metric, mvals.popleft(), ts)
521		result.add(plumd.Int(metric, dval))
522		return result
523

s4z / plumd

Push — master ( fa474b...faefc5 )

Proc.proc_net_snmp() F

Complexity

Size

Duplication

Importance

How to fix Complexity

Complexity

Duplication Side-by-Side

Filter issues like