Proc.proc_conntrack() - Code Metrics - Inspection of "added python3 support, added conntract reader, doc..." - s4z/plumd - Measure and Improve Code Quality continuously with Scrutinizer

Completed

Push — master ( f161ca...d2dc5c )

by Kenny

created 2016-08-13 11:40 UTC

Proc.proc_conntrack() B

↳ Parent: Proc

Complexity

Conditions

Size

Total Lines

Duplication

Lines	0
Ratio	0 %

Importance

Changes	1
Bugs	0	Features	1

Metric	Value
cc	3
dl	0
loc	28
rs	8.8571
c	1
b	0
f	1

# -*- coding: utf-8 -*-

__author__ = 'Kenny Freeman'
__email__ = '[email protected]'
__license__ = "ISCL"
__docformat__ = 'reStructuredText'

import re
import os
import time
import os.path
import traceback
import multiprocessing
from collections import deque

import plumd
import plumd.plugins
from plumd.calc import Differential
from plumd.util import get_file_map, get_file_map_list, get_file_list, get_file


## todo: switch from list with pop(0) to deque
class Proc(plumd.plugins.Reader):
    """Plugin to measure various kernel metrics from /proc."""
    defaults = {
        'poll.interval': 10,
        'proc_path': '/proc',
        'skip_funcs': [],
        'skip_proc_stat': ['btime'],
        'skip_proc_meminfo': ["Active(anon)","Active(file)",
            "AnonHugePages","AnonPages","Bounce","DirectMap2M","CommitLimit",
            "DirectMap4k","HugePages_Free","Hugepagesize",
            "HugePages_Rsvd","HugePages_Surp","HugePages_Total",
            "Inactive","Inactive(anon)","Inactive(file)",
            "KernelStack","NFS_Unstable","PageTables",
            "Shmem","Slab","SReclaimable","SUnreclaim",
            "SwapCached","SwapFree","SwapTotal","Writeback","WritebackTmp"],
        'skip_proc_net_snmp': ["Icmp.InAddrMaskReps","Icmp.InAddrMasks",
            "Icmp.InTimestampReps","Icmp.InTimestamps","Icmp.OutAddrMaskReps",
            "Icmp.OutAddrMasks","Icmp.OutTimestampReps","Icmp.OutTimestamps",
            "Ip.DefaultTTL","Ip.Forwarding","Tcp.MaxConn","Tcp.RtoAlgorithm",
            "Tcp.RtoMax","Tcp.RtoMin"],
        'skip_proc_net_sockstat': [],
        'skip_proc_net_netstat': ["TcpExt.ArpFilter","TcpExt.BusyPollRxPackets",
            "TcpExt.DelayedACKLocked","TcpExt.DelayedACKLost",
            "TcpExt.DelayedACKs","TcpExt.EmbryonicRsts",
            "TcpExt.IPReversePathFilter","TcpExt.LockDroppedIcmps",
            "TcpExt.OfoPruned","TcpExt.OutOfWindowIcmps",
            "TcpExt.PAWSActive","TcpExt.PAWSEstab", "TcpExt.PAWSPassive",
            "TcpExt.PruneCalled","TcpExt.RcvPruned",
            "TcpExt.TCPACKSkippedChallenge","TcpExt.TCPACKSkippedFinWait2",
            "TcpExt.TCPACKSkippedPAWS","TcpExt.TCPACKSkippedSeq",
            "TcpExt.TCPACKSkippedSynRecv","TcpExt.TCPACKSkippedTimeWait",
            "TcpExt.TCPAutoCorking","TcpExt.TCPChallengeACK",
            "TcpExt.TCPDirectCopyFromBacklog","TcpExt.TCPDirectCopyFromPrequeue",
            "TcpExt.TCPDSACKIgnoredNoUndo","TcpExt.TCPDSACKIgnoredOld",
            "TcpExt.TCPDSACKOfoRecv","TcpExt.TCPDSACKOfoSent",
            "TcpExt.TCPDSACKOldSent","TcpExt.TCPDSACKRecv","TcpExt.TCPDSACKUndo",
            "TcpExt.TCPFACKReorder","TcpExt.TCPFromZeroWindowAdv",
            "TcpExt.TCPFullUndo","TcpExt.TCPHPAcks","TcpExt.TCPHPHits",
            "TcpExt.TCPHPHitsToUser","TcpExt.TCPHystartDelayCwnd",
            "TcpExt.TCPHystartDelayDetect","TcpExt.TCPHystartTrainCwnd",
            "TcpExt.TCPHystartTrainDetect","TcpExt.TCPLossFailures",
            "TcpExt.TCPLossProbeRecovery","TcpExt.TCPLossProbes",
            "TcpExt.TCPLossUndo","TcpExt.TCPLostRetransmit",
            "TcpExt.TCPMD5NotFound","TcpExt.TCPMD5Unexpected",
            "TcpExt.TCPMTUPFail","TcpExt.TCPMTUPSuccess","TcpExt.TCPOFODrop",
            "TcpExt.TCPOFOMerge","TcpExt.TCPOFOQueue","TcpExt.TCPOrigDataSent",
            "TcpExt.TCPPartialUndo","TcpExt.TCPPrequeued",
            "TcpExt.TCPPrequeueDropped","TcpExt.TCPPureAcks",
            "TcpExt.TCPRcvCoalesce","TcpExt.TCPRcvCollapsed",
            "TcpExt.TCPRenoFailures","TcpExt.TCPRenoRecovery",
            "TcpExt.TCPRenoRecoveryFail","TcpExt.TCPRenoReorder",
            "TcpExt.TCPRetransFail","TcpExt.TCPSACKDiscard",
            "TcpExt.TCPSackFailures","TcpExt.TCPSackMerged",
            "TcpExt.TCPSackRecovery","TcpExt.TCPSackRecoveryFail",
            "TcpExt.TCPSACKReneging","TcpExt.TCPSACKReorder",
            "TcpExt.TCPSackShifted","TcpExt.TCPSackShiftFallback",
            "TcpExt.TCPSchedulerFailed","TcpExt.TCPSpuriousRTOs",
            "TcpExt.TCPSpuriousRtxHostQueues","TcpExt.TCPSYNChallenge",
            "TcpExt.TCPToZeroWindowAdv","TcpExt.TCPTSReorder",
            "TcpExt.TCPWantZeroWindowAdv","TcpExt.TCPWinProbe",
            "TcpExt.TCPKeepAlive", "TcpExt.TCPFastOpenCookieReqd",
            "IpExt.InNoECTPkts", "IpExt.InCEPkts"],
        'cpu_metrics': ["user", "nice", "system", "idle", "iowait", "irq",
                        "softirq","steal", "guest", "guest_nice"],
        'per_cpu': False,
        'diskstats_dev_re': "dm-\d",
        'diskstats_cols': ["r", "r_merge", "r_sector", "r_time", "w", "w_merge",
                           "w_sector", "w_time", "io_inprog", "io_time",
                           "io_weighted_time"],
        'net_dev_re': "(virbr\d+)|(vnet\d+)",
        'net_dev_cols': ["rx_bytes", "rx_pkt", "rx_errs", "rx_drop",
                         "rx_fifo_errs", "rx_frame_errs", "rx_compressed",
                         "rx_mcast", "tx_bytes", "tx_pkt", "tx_errs", "tx_drop",
                         "tx_fifo_errs", "collissions", "carrier",
                         "tx_compressed"],
        'net_snmp_items': ["Ip:", "Icmp:", "IcmpMsg:", "Tcp:", "Udp:",
                           "UdpLite:"]
    }

    def __init__(self, log, config):
        """Plugin to measure various kernel metrics from /proc.

        :param log: A logger
        :type log: logging.RootLogger
        :param config: a plumd.config.Conf configuration helper instance.
        :type config: plumd.config.Conf
        """
        super(Proc, self).__init__(log, config)
        self.config.defaults(Proc.defaults)
        self.calc = Differential()
        self.proc_path = config.get('proc_path')
        self.page_size = os.sysconf("SC_PAGESIZE")
        self.diskstats_dev_re = re.compile(config.get('diskstats_dev_re'))
        self.net_dev_re = re.compile(config.get('net_dev_re'))
        ## allow disabling of each collector via configuration
        all_functions = [self.proc_stat, self.proc_swap, self.proc_meminfo,
                         self.proc_loadavg, self.proc_uptime,
                         self.proc_net_dev, self.proc_diskstats,
                         self.proc_net_snmp, self.proc_net_netstat,
                         self.proc_conntrack, self.proc_net_sockstat]
        skip_functions = self.config.get('skip_funcs')
        self.funcs = []
        for func in all_functions:
            if func.__name__ in skip_functions:
                continue
            self.funcs.append(func)


    def poll(self):
        """Poll for kernel metrics under /proc.

        :rtype: ResultSet
        """
        ret = plumd.ResultSet([])
        for func in self.funcs:

            ret.add(func())
        return ret


    def proc_stat_cpu_percent(self, key, val, ts):
        """Return cpu utilization metrics in percentage.

        :param key: The metric name (eg. cpu, cpu0, cpu1, etc)
        :type key: str
        :param val: A deque populated with the metric values from stat
        :type val: deque
        :rtype: list
        """
        ret = []
        total = sum([ float(i) for i in val])
        cpu = self.config.get('cpu_metrics')
        for map_val in cpu:
            if len(val) < 1:
                break
            metric_val = float(val.popleft())
            mstr = "{0}.{1}".format(key, map_val)
            percent_val = metric_val / total * 100.00

            ret.append(plumd.Float(mstr, percent_val))
        return ret


    def proc_stat_cpu(self, key, val, ts):
        """Return cpu utilization metrics in USER_HZ or Jiffies
        (most likely units of 100Hz intervals ie. 100ms intervals).

        :param key: The metric name (eg. cpu, cpu0, cpu1, etc)
        :type key: str
        :param val: A deque populated with the metric values from stat
        :type val: deque
        :rtype: list
        """
        ret = []
        total = sum([ float(i) for i in val])
        cpu = self.config.get('cpu_metrics')
        for map_val in cpu:
            if len(val) < 1:
                break
            metric_val = float(val.popleft())
            mstr = "{0}.{1}".format(key, map_val)
            percent_val = float(metric_val / total) * 100.00
            mval = self.calc.per_second(key, percent_val, ts)
            ret.append(plumd.Float(mstr, mval))
        return ret


    def proc_stat(self):
        """Return cpu utilization and process metrics from proc file stat.

        :rtype: plumd.Result
        """
        skip = self.config.get('skip_proc_stat')
        per_cpu = self.config.get('per_cpu')
        result = plumd.Result("stat")
        fname = "{0}/stat".format(self.proc_path)
        # read and process /proc/stat
        dat = get_file_map(fname, 0, 0)
        ts = time.time()
        # parse
        for key, val in dat.items():
            # cpu is the only special metric
            if val is None:
                self.log.error("proc_stat: null value for {0}".format(key))
                continue
            elif key in skip:
                continue
            elif key == "cpu":
                result.add_list(self.proc_stat_cpu_percent(key, val, ts))
            elif key.startswith("cpu"):
                if not per_cpu:
                    continue
                result.add_list(self.proc_stat_cpu_percent(key, val, ts))
            else:
                mval = self.calc.per_second(key, float(val[0]), ts)
                result.add(plumd.Int(key, mval))
        return result


    def proc_meminfo(self):
        """Return memory utilization metrics from proc file mem.

        :rtype: plumd.Result
        """
        skip = self.config.get('skip_proc_meminfo')
        result = plumd.Result("mem")
        fname = "{0}/meminfo".format(self.proc_path)
        # read and process /proc/stat
        dat = get_file_map(fname, 0, 0)
        ts = time.time()
        # parse
        for key, val in dat.items():
            mstr = key.replace(":", "")
            # cpu is the only special metric
            if val is None:
                self.log.error("proc_meminfo: null value for {0}".format(mstr))
                continue
            elif mstr in skip:
                continue
            else:
                #mval = dcalc.per_second(key, float(val[0]), ts)
                result.add(plumd.Int(mstr, val[0]))
        return result


    def proc_loadavg(self):
        """Return 1, 5 and 15 minute load averages from proc file loadavg.

        :rtype: plumd.Result
        """
        result = plumd.Result("loadavg")
        fname = "{0}/loadavg".format(self.proc_path)
        dat = []
        # read and process /proc/stat
        try:
            dat = get_file(fname).split()
        except Exception as e:
            tb = traceback.format_exc()
            self.log.error("proc_loadavg: exception: {0} : {1}".format(e, tb))
            return result
        if len(dat) >= 3:
            result.add(plumd.Float("1", dat[0]))
            result.add(plumd.Float("5", dat[1]))
            result.add(plumd.Float("15", dat[2]))
        return result


    def proc_swap(self):
        """Return swap file usage metrics from proc file swap.

        :rtype: plumd.Result
        """
        result = plumd.Result("swap")
        fname = "{0}/swaps".format(self.proc_path)
        dat = []
        # read and process /proc/stat
        dat = get_file_list(fname)
        # header: file, type, size, used, priority
        if len(dat) > 1:
            dat.popleft()
        for entry in dat:
            if not entry:
                continue
            #sfname, stype, ssize, sused, sprio = ("", None, 0, 0, 0)
            try:
                sfname, stype, ssize, sused, sprio = entry.split()
            except Exception as e:
                tb = traceback.format_exc()
                self.log.error("proc_swap: exception: {0}: {1}".format(e, tb))
                continue
            sname = os.path.basename(sfname)
            mstr = "{0}.used".format(sname)
            result.add(plumd.Float(mstr, sused))
            mstr = "{0}.size".format(sname)
            result.add(plumd.Float(mstr, ssize))
            sfree = float(ssize) - float(sused)
            mstr = "{0}.free".format(sname)
            result.add(plumd.Float(mstr, sfree))
        return result


    def proc_uptime(self):
        """Return uptime from proc file swap.

        :rtype: plumd.Result
        """
        result = plumd.Result("uptime")
        fname = "{0}/uptime".format(self.proc_path)
        dat = []
        # read and process /proc/stat
        try:
            up, idle = get_file(fname).split()
        except Exception as e:
            tb = traceback.format_exc()
            self.log.error("proc_uptime: exception: {0}: {1}".format(e, tb))
            return result
        pidle = float(idle)/float(up) * 100 / multiprocessing.cpu_count()
        result.add(plumd.Float("up", up))
        result.add(plumd.Float("idle", idle))

        result.add(plumd.Float("idle_percent", pidle))
        return result


    def proc_diskstats(self):
        """Return disk io metrics from proc file diskstats.

        :rtype: plumd.Result
        """
        # times in ms
        cols = self.config.get('diskstats_cols')
        result = plumd.Result("diskstats")
        fname = "{0}/diskstats".format(self.proc_path)
        dat = {}
        # read and process /proc/stat
        try:
            dat = get_file_map(fname, 2, 0)
        except Exception as e:
            tb = traceback.format_exc()
            self.log.error("proc_diskstats: exception: {0}: {1}".format(e, tb))
            return result
        ts = time.time()
        for key, val in dat.items():
            if self.diskstats_dev_re.match(key):
                continue
            if len(val) != 13:
                self.log.error("proc_diskstats: invalid entry: {0}".format(val))
                continue
            for mname in cols:
                mval = int(val.popleft())
                mstr = "{0}.{1}".format(key, mname)
                dval = self.calc.per_second(mstr, mval, ts)

                result.add(plumd.Int(mstr, dval))
        return result


    def proc_net_dev(self):
        """Return network interface metrics from proc file net/dev.

        Add entries to the configuration value 'net_dev_re' to skip
        any network interfaces that match the regular expression.

        :rtype: plumd.Result
        """
        cols = self.config.get('net_dev_cols')
        result = plumd.Result("net")
        fname = "{0}/net/dev".format(self.proc_path)
        dat = {}
        # read and process /proc/stat
        try:
            dat = get_file_map(fname, 0, 0)
        except Exception as e:
            tb = traceback.format_exc()
            self.log.error("proc_net_dev: exception: {0}: {1}".format(e, tb))
            return result
        ts = time.time()
        for key, val in dat.items():
            key = key.replace(":", "")
            if self.net_dev_re.match(key):
                continue
            if len(val) < len(cols):
                #self.log.error("proc_net_dev: invalid entry: {0}".format(val))
                continue
            for mname in cols:
                if len(val) < 1:
                    break
                mval = int(val.popleft())
                mstr = "{0}.{1}".format(key, mname)
                dval = self.calc.per_second(mstr, mval, ts)
                result.add(plumd.Int(mstr, dval))
        return result


    def proc_net_snmp(self):
        """Return network protocol metrics from proc file net/snmp.

        Add entries to the configuration value 'skip_proc_net_snmp' to skip
        metrics.

        Add entries to the configuration value 'net_snmp_items' to match the
        format/order of the proc file net/snmp entries on the system.

        :rtype: plumd.Result
        """
        skip = self.config.get('skip_proc_net_snmp')
        items = self.config.get('net_snmp_items')
        result = plumd.Result("net_snmp")
        fname = "{0}/net/snmp".format(self.proc_path)
        dat = {}
        # read and process - dat is a list of lines from fname
        dat = get_file_list(fname)
        ts = time.time()

        # process each pair of lines
        for item in items:
            # older kernels may not have all items
            if len(dat) < 2:
                break
            try:
                # first line is a list of: metric: header values
                header = deque(dat.popleft().split())
                # second line is a list of: <metric>: metric values
                vals = deque([ int(i) for i in dat.popleft().split()[1:] ])
            except Exception as e:
                tb = traceback.format_exc()
                self.log.error("proc_net_snmp: exception: {0}: {1}".format(e, tb))
                continue
            if len(header) < 2 or header[0] != item:
                self.log.error("proc_net_snmp: invalid entry: {0}: {1}".format(header, item))
                continue
            # first value is the name of the metric eg. Ip, Icmp, etc
            mheader = header.popleft().replace(":", "")
            for mname in header:
                if len(vals) < 1:
                    break
                mval = vals.popleft()
                mstr = "{0}.{1}".format(mheader, mname)
                if mstr in skip:
                    continue
                dval = self.calc.per_second(mstr, mval, ts)
                result.add(plumd.Int(mstr, dval))
        return result


    def proc_net_sockstat(self):
        """Return network socket metrics from proc file net/sockstat.

        Note: sockstat.TCP.mem is measured in pages, you can get the system page
        size from os.sysconf("SC_PAGESIZE")

        Note: FRAG: ip fragmentation related

        :rtype: plumd.Result
        """
        skip = self.config.get('skip_proc_net_sockstat')
        result = plumd.Result("sockstat")
        fname = "{0}/net/sockstat".format(self.proc_path)
        # sys/net/ipv4/tcp_mem format: min, pressure, max
        fname_limits = "{0}/sys/net/ipv4/tcp_mem".format(self.proc_path)
        # orphan limit: /proc/sys/net/ipv4/tcp_max_orphans
        fname_orph = "{0}/sys/net/ipv4/tcp_max_orphans".format(self.proc_path)
        dat = {}
        # read and process - dat is a list of lines from fname
        dat = get_file_map_list(fname, 0, 0)
        ts = time.time()
        # each entry is a key: [metric, val, metric, val]
        for key, val in dat.items():
            if len(val) < 2:
                continue
            mstr = key.replace(":", "")
            if mstr in skip:
                continue
            mnames = val[::2]
            mvals = deque([ int(i) for i in val[1::2] ])
            if len(mnames) != len(mvals):
                self.log.error("proc_net_sockstat: invalid entry: {0}".format(mnames))
                continue
            for mname in mnames:
                metric = "{0}.{1}".format(mstr, mname)
                result.add(plumd.Int(metric, mvals.popleft()))
        # also record configured tcp mem limits
        dat = get_file(fname_limits).split()
        if len(dat) == 3:
            # eg. for alerting/dashboard on pages allocated vs max values
            result.add(plumd.Int("TCP.mem_min", dat[0]))
            result.add(plumd.Int("TCP.mem_pressure", dat[1]))
            result.add(plumd.Int("TCP.mem_max", dat[1]))
        dat = get_file(fname_orph)
        result.add(plumd.Int("TCP.orphan_max", dat))
        return result


    def proc_net_netstat(self):
        """Return detailed network statitistics proc file net/netstat.

        Note: add entries to the configuration value 'skip_proc_net_netstat' to
        skip metric names (eg. 'TcpExt.TCPMTUPSuccess'). Defaults should be
        reasonable however.

        Note: ECT1Pkts and ECT0Pkts relate to ECT congestion notifications.

        :rtype: plumd.Result
        """
        skip = self.config.get('skip_proc_net_netstat')
        result = plumd.Result("netstat")
        fname = "{0}/net/netstat".format(self.proc_path)
        dat = {}
        # read and process - dat is a list of lines from fname
        dat = get_file_list(fname)
        ts = time.time()
        while len(dat) > 1:
            headers = deque(dat.popleft().split())
            if len(dat) < 1 or len(headers) < 1:
                break
            mvals = deque([ int(i) for i in dat.popleft().split()[1:] ])
            mstr = headers.popleft().replace(":", "")
            if len(headers) != len(mvals):
                self.log.error("proc_net_netstat: invalid entry: {0}".format(headers))
                continue
            for mname in headers:
                if len(mvals) < 1:
                    break
                metric = "{0}.{1}".format(mstr, mname)
                if metric in skip:
                    continue
                dval = self.calc.per_second(metric, mvals.popleft(), ts)
                result.add(plumd.Int(metric, dval))
        return result



    def proc_conntrack(self):
        """Return current conntrack count and max.

        :rtype: plumd.Result
        """
        result = plumd.Result("conntrack")
        fname_cnt = "{0}/sys/net/netfilter/nf_conntrack_count"
        fname_cnt = fname_cnt.format(self.proc_path)
        fname_max = "{0}/sys/net/nf_conntrack_max".format(self.proc_path)
        if not os.path.isfile(fname_cnt):
            # try these - either no iptables or older kernel
            fname_cnt = "{0}/sys/net/ipv4/netfilter/ip_conntrack_count"
            fname_cnt = fname_cnt.format(self.proc_path)
            fname_max = "{0}/sys/net/ipv4/netfilter/ip_conntrack_max"
            fname_max = fname_max.format(self.proc_path)
        curr_val = 0
        max_val = 0
        # read and process /proc/stat
        try:
            curr_val = get_file(fname_cnt)
            max_val = get_file(fname_max)
        except Exception as e:
            tb = traceback.format_exc()
            self.log.error("proc_conntrack: exception: {0}: {1}".format(e, tb))
            return result
        result.add(plumd.Int("cur", curr_val))
        result.add(plumd.Int("max", max_val))
        return result


1		# -- coding: utf-8 --
2
3		__author__ = 'Kenny Freeman'
4		__email__ = '[email protected]'
5		__license__ = "ISCL"
6		__docformat__ = 'reStructuredText'
7
8		import re
9		import os
10		import time
11		import os.path
12		import traceback
13		import multiprocessing
14		from collections import deque
15
16		import plumd
17		import plumd.plugins
18		from plumd.calc import Differential
19		from plumd.util import get_file_map, get_file_map_list, get_file_list, get_file
20
21
22		## todo: switch from list with pop(0) to deque
23		class Proc(plumd.plugins.Reader):
24		"""Plugin to measure various kernel metrics from /proc."""
25		defaults = {
26		'poll.interval': 10,
27		'proc_path': '/proc',
28		'skip_funcs': [],
29		'skip_proc_stat': ['btime'],
30		'skip_proc_meminfo': ["Active(anon)","Active(file)",
31		"AnonHugePages","AnonPages","Bounce","DirectMap2M","CommitLimit",
32		"DirectMap4k","HugePages_Free","Hugepagesize",
33		"HugePages_Rsvd","HugePages_Surp","HugePages_Total",
34		"Inactive","Inactive(anon)","Inactive(file)",
35		"KernelStack","NFS_Unstable","PageTables",
36		"Shmem","Slab","SReclaimable","SUnreclaim",
37		"SwapCached","SwapFree","SwapTotal","Writeback","WritebackTmp"],
38		'skip_proc_net_snmp': ["Icmp.InAddrMaskReps","Icmp.InAddrMasks",
39		"Icmp.InTimestampReps","Icmp.InTimestamps","Icmp.OutAddrMaskReps",
40		"Icmp.OutAddrMasks","Icmp.OutTimestampReps","Icmp.OutTimestamps",
41		"Ip.DefaultTTL","Ip.Forwarding","Tcp.MaxConn","Tcp.RtoAlgorithm",
42		"Tcp.RtoMax","Tcp.RtoMin"],
43		'skip_proc_net_sockstat': [],
44		'skip_proc_net_netstat': ["TcpExt.ArpFilter","TcpExt.BusyPollRxPackets",
45		"TcpExt.DelayedACKLocked","TcpExt.DelayedACKLost",
46		"TcpExt.DelayedACKs","TcpExt.EmbryonicRsts",
47		"TcpExt.IPReversePathFilter","TcpExt.LockDroppedIcmps",
48		"TcpExt.OfoPruned","TcpExt.OutOfWindowIcmps",
49		"TcpExt.PAWSActive","TcpExt.PAWSEstab", "TcpExt.PAWSPassive",
50		"TcpExt.PruneCalled","TcpExt.RcvPruned",
51		"TcpExt.TCPACKSkippedChallenge","TcpExt.TCPACKSkippedFinWait2",
52		"TcpExt.TCPACKSkippedPAWS","TcpExt.TCPACKSkippedSeq",
53		"TcpExt.TCPACKSkippedSynRecv","TcpExt.TCPACKSkippedTimeWait",
54		"TcpExt.TCPAutoCorking","TcpExt.TCPChallengeACK",
55		"TcpExt.TCPDirectCopyFromBacklog","TcpExt.TCPDirectCopyFromPrequeue",
56		"TcpExt.TCPDSACKIgnoredNoUndo","TcpExt.TCPDSACKIgnoredOld",
57		"TcpExt.TCPDSACKOfoRecv","TcpExt.TCPDSACKOfoSent",
58		"TcpExt.TCPDSACKOldSent","TcpExt.TCPDSACKRecv","TcpExt.TCPDSACKUndo",
59		"TcpExt.TCPFACKReorder","TcpExt.TCPFromZeroWindowAdv",
60		"TcpExt.TCPFullUndo","TcpExt.TCPHPAcks","TcpExt.TCPHPHits",
61		"TcpExt.TCPHPHitsToUser","TcpExt.TCPHystartDelayCwnd",
62		"TcpExt.TCPHystartDelayDetect","TcpExt.TCPHystartTrainCwnd",
63		"TcpExt.TCPHystartTrainDetect","TcpExt.TCPLossFailures",
64		"TcpExt.TCPLossProbeRecovery","TcpExt.TCPLossProbes",
65		"TcpExt.TCPLossUndo","TcpExt.TCPLostRetransmit",
66		"TcpExt.TCPMD5NotFound","TcpExt.TCPMD5Unexpected",
67		"TcpExt.TCPMTUPFail","TcpExt.TCPMTUPSuccess","TcpExt.TCPOFODrop",
68		"TcpExt.TCPOFOMerge","TcpExt.TCPOFOQueue","TcpExt.TCPOrigDataSent",
69		"TcpExt.TCPPartialUndo","TcpExt.TCPPrequeued",
70		"TcpExt.TCPPrequeueDropped","TcpExt.TCPPureAcks",
71		"TcpExt.TCPRcvCoalesce","TcpExt.TCPRcvCollapsed",
72		"TcpExt.TCPRenoFailures","TcpExt.TCPRenoRecovery",
73		"TcpExt.TCPRenoRecoveryFail","TcpExt.TCPRenoReorder",
74		"TcpExt.TCPRetransFail","TcpExt.TCPSACKDiscard",
75		"TcpExt.TCPSackFailures","TcpExt.TCPSackMerged",
76		"TcpExt.TCPSackRecovery","TcpExt.TCPSackRecoveryFail",
77		"TcpExt.TCPSACKReneging","TcpExt.TCPSACKReorder",
78		"TcpExt.TCPSackShifted","TcpExt.TCPSackShiftFallback",
79		"TcpExt.TCPSchedulerFailed","TcpExt.TCPSpuriousRTOs",
80		"TcpExt.TCPSpuriousRtxHostQueues","TcpExt.TCPSYNChallenge",
81		"TcpExt.TCPToZeroWindowAdv","TcpExt.TCPTSReorder",
82		"TcpExt.TCPWantZeroWindowAdv","TcpExt.TCPWinProbe",
83		"TcpExt.TCPKeepAlive", "TcpExt.TCPFastOpenCookieReqd",
84		"IpExt.InNoECTPkts", "IpExt.InCEPkts"],
85		'cpu_metrics': ["user", "nice", "system", "idle", "iowait", "irq",
86		"softirq","steal", "guest", "guest_nice"],
87		'per_cpu': False,
88		'diskstats_dev_re': "dm-\d",
89		'diskstats_cols': ["r", "r_merge", "r_sector", "r_time", "w", "w_merge",
90		"w_sector", "w_time", "io_inprog", "io_time",
91		"io_weighted_time"],
92		'net_dev_re': "(virbr\d+)\|(vnet\d+)",
93		'net_dev_cols': ["rx_bytes", "rx_pkt", "rx_errs", "rx_drop",
94		"rx_fifo_errs", "rx_frame_errs", "rx_compressed",
95		"rx_mcast", "tx_bytes", "tx_pkt", "tx_errs", "tx_drop",
96		"tx_fifo_errs", "collissions", "carrier",
97		"tx_compressed"],
98		'net_snmp_items': ["Ip:", "Icmp:", "IcmpMsg:", "Tcp:", "Udp:",
99		"UdpLite:"]
100		}
101
102		def __init__(self, log, config):
103		"""Plugin to measure various kernel metrics from /proc.
104
105		:param log: A logger
106		:type log: logging.RootLogger
107		:param config: a plumd.config.Conf configuration helper instance.
108		:type config: plumd.config.Conf
109		"""
110		super(Proc, self).__init__(log, config)
111		self.config.defaults(Proc.defaults)
112		self.calc = Differential()
113		self.proc_path = config.get('proc_path')
114		self.page_size = os.sysconf("SC_PAGESIZE")
115		self.diskstats_dev_re = re.compile(config.get('diskstats_dev_re'))
116		self.net_dev_re = re.compile(config.get('net_dev_re'))
117		## allow disabling of each collector via configuration
118		all_functions = [self.proc_stat, self.proc_swap, self.proc_meminfo,
119		self.proc_loadavg, self.proc_uptime,
120		self.proc_net_dev, self.proc_diskstats,
121		self.proc_net_snmp, self.proc_net_netstat,
122		self.proc_conntrack, self.proc_net_sockstat]
123		skip_functions = self.config.get('skip_funcs')
124		self.funcs = []
125		for func in all_functions:
126		if func.__name__ in skip_functions:
127		continue
128		self.funcs.append(func)
129
130
131		def poll(self):
132		"""Poll for kernel metrics under /proc.
133
134		:rtype: ResultSet
135		"""
136		ret = plumd.ResultSet([])
137	View Code Duplication	for func in self.funcs:
		1 ignored issue – show Duplication introduced 2016-08-10 15:03 UTC by Report Bug Copy Issue Report This code seems to be duplicated in your project. Loading history...
138		ret.add(func())
139		return ret
140
141
142		def proc_stat_cpu_percent(self, key, val, ts):
143		"""Return cpu utilization metrics in percentage.
144
145		:param key: The metric name (eg. cpu, cpu0, cpu1, etc)
146		:type key: str
147		:param val: A deque populated with the metric values from stat
148		:type val: deque
149		:rtype: list
150		"""
151		ret = []
152		total = sum([ float(i) for i in val])
153		cpu = self.config.get('cpu_metrics')
154		for map_val in cpu:
155		if len(val) < 1:
156		break
157		metric_val = float(val.popleft())
158		mstr = "{0}.{1}".format(key, map_val)
159	View Code Duplication	percent_val = metric_val / total * 100.00
		1 ignored issue – show Duplication introduced 2016-08-10 15:03 UTC by Report Bug Copy Issue Report This code seems to be duplicated in your project. Loading history...
160		ret.append(plumd.Float(mstr, percent_val))
161		return ret
162
163
164		def proc_stat_cpu(self, key, val, ts):
165		"""Return cpu utilization metrics in USER_HZ or Jiffies
166		(most likely units of 100Hz intervals ie. 100ms intervals).
167
168		:param key: The metric name (eg. cpu, cpu0, cpu1, etc)
169		:type key: str
170		:param val: A deque populated with the metric values from stat
171		:type val: deque
172		:rtype: list
173		"""
174		ret = []
175		total = sum([ float(i) for i in val])
176		cpu = self.config.get('cpu_metrics')
177		for map_val in cpu:
178		if len(val) < 1:
179		break
180		metric_val = float(val.popleft())
181		mstr = "{0}.{1}".format(key, map_val)
182		percent_val = float(metric_val / total) * 100.00
183		mval = self.calc.per_second(key, percent_val, ts)
184		ret.append(plumd.Float(mstr, mval))
185		return ret
186
187
188		def proc_stat(self):
189		"""Return cpu utilization and process metrics from proc file stat.
190
191		:rtype: plumd.Result
192		"""
193		skip = self.config.get('skip_proc_stat')
194		per_cpu = self.config.get('per_cpu')
195		result = plumd.Result("stat")
196		fname = "{0}/stat".format(self.proc_path)
197		# read and process /proc/stat
198		dat = get_file_map(fname, 0, 0)
199		ts = time.time()
200		# parse
201		for key, val in dat.items():
202		# cpu is the only special metric
203		if val is None:
204		self.log.error("proc_stat: null value for {0}".format(key))
205		continue
206		elif key in skip:
207		continue
208		elif key == "cpu":
209		result.add_list(self.proc_stat_cpu_percent(key, val, ts))
210		elif key.startswith("cpu"):
211		if not per_cpu:
212		continue
213		result.add_list(self.proc_stat_cpu_percent(key, val, ts))
214		else:
215		mval = self.calc.per_second(key, float(val[0]), ts)
216		result.add(plumd.Int(key, mval))
217		return result
218
219
220		def proc_meminfo(self):
221		"""Return memory utilization metrics from proc file mem.
222
223		:rtype: plumd.Result
224		"""
225		skip = self.config.get('skip_proc_meminfo')
226		result = plumd.Result("mem")
227		fname = "{0}/meminfo".format(self.proc_path)
228		# read and process /proc/stat
229		dat = get_file_map(fname, 0, 0)
230		ts = time.time()
231		# parse
232		for key, val in dat.items():
233		mstr = key.replace(":", "")
234		# cpu is the only special metric
235		if val is None:
236		self.log.error("proc_meminfo: null value for {0}".format(mstr))
237		continue
238		elif mstr in skip:
239		continue
240		else:
241		#mval = dcalc.per_second(key, float(val[0]), ts)
242		result.add(plumd.Int(mstr, val[0]))
243		return result
244
245
246		def proc_loadavg(self):
247		"""Return 1, 5 and 15 minute load averages from proc file loadavg.
248
249		:rtype: plumd.Result
250		"""
251		result = plumd.Result("loadavg")
252		fname = "{0}/loadavg".format(self.proc_path)
253		dat = []
254		# read and process /proc/stat
255		try:
256		dat = get_file(fname).split()
257		except Exception as e:
258		tb = traceback.format_exc()
259		self.log.error("proc_loadavg: exception: {0} : {1}".format(e, tb))
260		return result
261		if len(dat) >= 3:
262		result.add(plumd.Float("1", dat[0]))
263		result.add(plumd.Float("5", dat[1]))
264		result.add(plumd.Float("15", dat[2]))
265		return result
266
267
268		def proc_swap(self):
269		"""Return swap file usage metrics from proc file swap.
270
271		:rtype: plumd.Result
272		"""
273		result = plumd.Result("swap")
274		fname = "{0}/swaps".format(self.proc_path)
275		dat = []
276		# read and process /proc/stat
277		dat = get_file_list(fname)
278		# header: file, type, size, used, priority
279		if len(dat) > 1:
280		dat.popleft()
281		for entry in dat:
282		if not entry:
283		continue
284		#sfname, stype, ssize, sused, sprio = ("", None, 0, 0, 0)
285		try:
286		sfname, stype, ssize, sused, sprio = entry.split()
287		except Exception as e:
288		tb = traceback.format_exc()
289		self.log.error("proc_swap: exception: {0}: {1}".format(e, tb))
290		continue
291		sname = os.path.basename(sfname)
292		mstr = "{0}.used".format(sname)
293		result.add(plumd.Float(mstr, sused))
294		mstr = "{0}.size".format(sname)
295		result.add(plumd.Float(mstr, ssize))
296		sfree = float(ssize) - float(sused)
297		mstr = "{0}.free".format(sname)
298		result.add(plumd.Float(mstr, sfree))
299		return result
300
301
302		def proc_uptime(self):
303		"""Return uptime from proc file swap.
304
305		:rtype: plumd.Result
306		"""
307		result = plumd.Result("uptime")
308		fname = "{0}/uptime".format(self.proc_path)
309		dat = []
310		# read and process /proc/stat
311		try:
312		up, idle = get_file(fname).split()
313		except Exception as e:
314		tb = traceback.format_exc()
315		self.log.error("proc_uptime: exception: {0}: {1}".format(e, tb))
316		return result
317		pidle = float(idle)/float(up) * 100 / multiprocessing.cpu_count()
318		result.add(plumd.Float("up", up))
319	View Code Duplication	result.add(plumd.Float("idle", idle))
		1 ignored issue – show Duplication introduced 2016-08-02 16:06 UTC by Report Bug Copy Issue Report This code seems to be duplicated in your project. Loading history...
320		result.add(plumd.Float("idle_percent", pidle))
321		return result
322
323
324		def proc_diskstats(self):
325		"""Return disk io metrics from proc file diskstats.
326
327		:rtype: plumd.Result
328		"""
329		# times in ms
330		cols = self.config.get('diskstats_cols')
331		result = plumd.Result("diskstats")
332		fname = "{0}/diskstats".format(self.proc_path)
333		dat = {}
334		# read and process /proc/stat
335		try:
336		dat = get_file_map(fname, 2, 0)
337		except Exception as e:
338		tb = traceback.format_exc()
339		self.log.error("proc_diskstats: exception: {0}: {1}".format(e, tb))
340		return result
341		ts = time.time()
342		for key, val in dat.items():
343		if self.diskstats_dev_re.match(key):
344		continue
345		if len(val) != 13:
346		self.log.error("proc_diskstats: invalid entry: {0}".format(val))
347		continue
348		for mname in cols:
349		mval = int(val.popleft())
350		mstr = "{0}.{1}".format(key, mname)
351	View Code Duplication	dval = self.calc.per_second(mstr, mval, ts)
		1 ignored issue – show Duplication introduced 2016-08-02 16:06 UTC by Report Bug Copy Issue Report This code seems to be duplicated in your project. Loading history...
352		result.add(plumd.Int(mstr, dval))
353		return result
354
355
356		def proc_net_dev(self):
357		"""Return network interface metrics from proc file net/dev.
358
359		Add entries to the configuration value 'net_dev_re' to skip
360		any network interfaces that match the regular expression.
361
362		:rtype: plumd.Result
363		"""
364		cols = self.config.get('net_dev_cols')
365		result = plumd.Result("net")
366		fname = "{0}/net/dev".format(self.proc_path)
367		dat = {}
368		# read and process /proc/stat
369		try:
370		dat = get_file_map(fname, 0, 0)
371		except Exception as e:
372		tb = traceback.format_exc()
373		self.log.error("proc_net_dev: exception: {0}: {1}".format(e, tb))
374		return result
375		ts = time.time()
376		for key, val in dat.items():
377		key = key.replace(":", "")
378		if self.net_dev_re.match(key):
379		continue
380		if len(val) < len(cols):
381		#self.log.error("proc_net_dev: invalid entry: {0}".format(val))
382		continue
383		for mname in cols:
384		if len(val) < 1:
385		break
386		mval = int(val.popleft())
387		mstr = "{0}.{1}".format(key, mname)
388		dval = self.calc.per_second(mstr, mval, ts)
389		result.add(plumd.Int(mstr, dval))
390		return result
391
392
393		def proc_net_snmp(self):
394		"""Return network protocol metrics from proc file net/snmp.
395
396		Add entries to the configuration value 'skip_proc_net_snmp' to skip
397		metrics.
398
399		Add entries to the configuration value 'net_snmp_items' to match the
400		format/order of the proc file net/snmp entries on the system.
401
402		:rtype: plumd.Result
403		"""
404		skip = self.config.get('skip_proc_net_snmp')
405		items = self.config.get('net_snmp_items')
406		result = plumd.Result("net_snmp")
407		fname = "{0}/net/snmp".format(self.proc_path)
408		dat = {}
409		# read and process - dat is a list of lines from fname
410		dat = get_file_list(fname)
411		ts = time.time()
412
413		# process each pair of lines
414		for item in items:
415		# older kernels may not have all items
416		if len(dat) < 2:
417		break
418		try:
419		# first line is a list of: metric: header values
420		header = deque(dat.popleft().split())
421		# second line is a list of: <metric>: metric values
422		vals = deque([ int(i) for i in dat.popleft().split()[1:] ])
423		except Exception as e:
424		tb = traceback.format_exc()
425		self.log.error("proc_net_snmp: exception: {0}: {1}".format(e, tb))
426		continue
427		if len(header) < 2 or header[0] != item:
428		self.log.error("proc_net_snmp: invalid entry: {0}: {1}".format(header, item))
429		continue
430		# first value is the name of the metric eg. Ip, Icmp, etc
431		mheader = header.popleft().replace(":", "")
432		for mname in header:
433		if len(vals) < 1:
434		break
435		mval = vals.popleft()
436		mstr = "{0}.{1}".format(mheader, mname)
437		if mstr in skip:
438		continue
439		dval = self.calc.per_second(mstr, mval, ts)
440		result.add(plumd.Int(mstr, dval))
441		return result
442
443
444		def proc_net_sockstat(self):
445		"""Return network socket metrics from proc file net/sockstat.
446
447		Note: sockstat.TCP.mem is measured in pages, you can get the system page
448		size from os.sysconf("SC_PAGESIZE")
449
450		Note: FRAG: ip fragmentation related
451
452		:rtype: plumd.Result
453		"""
454		skip = self.config.get('skip_proc_net_sockstat')
455		result = plumd.Result("sockstat")
456		fname = "{0}/net/sockstat".format(self.proc_path)
457		# sys/net/ipv4/tcp_mem format: min, pressure, max
458		fname_limits = "{0}/sys/net/ipv4/tcp_mem".format(self.proc_path)
459		# orphan limit: /proc/sys/net/ipv4/tcp_max_orphans
460		fname_orph = "{0}/sys/net/ipv4/tcp_max_orphans".format(self.proc_path)
461		dat = {}
462		# read and process - dat is a list of lines from fname
463		dat = get_file_map_list(fname, 0, 0)
464		ts = time.time()
465		# each entry is a key: [metric, val, metric, val]
466		for key, val in dat.items():
467		if len(val) < 2:
468		continue
469		mstr = key.replace(":", "")
470		if mstr in skip:
471		continue
472		mnames = val[::2]
473		mvals = deque([ int(i) for i in val[1::2] ])
474		if len(mnames) != len(mvals):
475		self.log.error("proc_net_sockstat: invalid entry: {0}".format(mnames))
476		continue
477		for mname in mnames:
478		metric = "{0}.{1}".format(mstr, mname)
479		result.add(plumd.Int(metric, mvals.popleft()))
480		# also record configured tcp mem limits
481		dat = get_file(fname_limits).split()
482		if len(dat) == 3:
483		# eg. for alerting/dashboard on pages allocated vs max values
484		result.add(plumd.Int("TCP.mem_min", dat[0]))
485		result.add(plumd.Int("TCP.mem_pressure", dat[1]))
486		result.add(plumd.Int("TCP.mem_max", dat[1]))
487		dat = get_file(fname_orph)
488		result.add(plumd.Int("TCP.orphan_max", dat))
489		return result
490
491
492		def proc_net_netstat(self):
493		"""Return detailed network statitistics proc file net/netstat.
494
495		Note: add entries to the configuration value 'skip_proc_net_netstat' to
496		skip metric names (eg. 'TcpExt.TCPMTUPSuccess'). Defaults should be
497		reasonable however.
498
499		Note: ECT1Pkts and ECT0Pkts relate to ECT congestion notifications.
500
501		:rtype: plumd.Result
502		"""
503		skip = self.config.get('skip_proc_net_netstat')
504		result = plumd.Result("netstat")
505		fname = "{0}/net/netstat".format(self.proc_path)
506		dat = {}
507		# read and process - dat is a list of lines from fname
508		dat = get_file_list(fname)
509		ts = time.time()
510		while len(dat) > 1:
511		headers = deque(dat.popleft().split())
512		if len(dat) < 1 or len(headers) < 1:
513		break
514		mvals = deque([ int(i) for i in dat.popleft().split()[1:] ])
515		mstr = headers.popleft().replace(":", "")
516		if len(headers) != len(mvals):
517		self.log.error("proc_net_netstat: invalid entry: {0}".format(headers))
518		continue
519		for mname in headers:
520		if len(mvals) < 1:
521		break
522		metric = "{0}.{1}".format(mstr, mname)
523		if metric in skip:
524		continue
525		dval = self.calc.per_second(metric, mvals.popleft(), ts)
526		result.add(plumd.Int(metric, dval))
527		return result
528
529
530
531		def proc_conntrack(self):
532		"""Return current conntrack count and max.
533
534		:rtype: plumd.Result
535		"""
536		result = plumd.Result("conntrack")
537		fname_cnt = "{0}/sys/net/netfilter/nf_conntrack_count"
538		fname_cnt = fname_cnt.format(self.proc_path)
539		fname_max = "{0}/sys/net/nf_conntrack_max".format(self.proc_path)
540		if not os.path.isfile(fname_cnt):
541		# try these - either no iptables or older kernel
542		fname_cnt = "{0}/sys/net/ipv4/netfilter/ip_conntrack_count"
543		fname_cnt = fname_cnt.format(self.proc_path)
544		fname_max = "{0}/sys/net/ipv4/netfilter/ip_conntrack_max"
545		fname_max = fname_max.format(self.proc_path)
546		curr_val = 0
547		max_val = 0
548		# read and process /proc/stat
549		try:
550		curr_val = get_file(fname_cnt)
551		max_val = get_file(fname_max)
552		except Exception as e:
553		tb = traceback.format_exc()
554		self.log.error("proc_conntrack: exception: {0}: {1}".format(e, tb))
555		return result
556		result.add(plumd.Int("cur", curr_val))
557		result.add(plumd.Int("max", max_val))
558		return result
559

s4z / plumd

Push — master ( f161ca...d2dc5c )

Proc.proc_conntrack() B

Complexity

Size

Duplication

Importance

Duplication Side-by-Side

Filter issues like