org_fedora_oscap.content_handling - Code Metrics - Inspection of "Merge pull request #147 from jan-cerny/scapcontent..." - OpenSCAP/oscap-anaconda-addon - Measure and Improve Code Quality continuously with Scrutinizer

Completed

Push — rhel8-branch ( b8bdaf...ff1c74 )

by Matěj

created 2021-06-09 15:25 UTC

org_fedora_oscap.content_handling A

↳ Parent: Project

Complexity

Total Complexity

Size/Duplication

Total Lines	143
Duplicated Lines	0 %

Importance

Changes

Metric	Value
wmc	26
eloc	68
dl	0
loc	143
rs	10
c	0
b	0
f	0

2 Functions

Rating	Name	Duplication	Size	Complexity
F	explore_content_files()	0	55	15
A	parse_HTML_from_content()	0	13	1

5 Methods

Rating	Name	Size	Complexity
A	ParseHTMLContent.handle_data()	2	1
A	ParseHTMLContent.handle_endtag()	5	3
A	ParseHTMLContent.handle_starttag()	7	4
A	ParseHTMLContent.__init__()	3	1
A	ParseHTMLContent.get_content()	2	1

#
# Copyright (C) 2013  Red Hat, Inc.
#
# This copyrighted material is made available to anyone wishing to use,
# modify, copy, or redistribute it subject to the terms and conditions of
# the GNU General Public License v.2, or (at your option) any later version.
# This program is distributed in the hope that it will be useful, but WITHOUT
# ANY WARRANTY expressed or implied, including the implied warranties of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
# Public License for more details.  You should have received a copy of the
# GNU General Public License along with this program; if not, write to the
# Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
# 02110-1301, USA.  Any Red Hat trademarks that are incorporated in the
# source code or documentation are not subject to the GNU General Public
# License and may only be used or replicated with the express permission of
# Red Hat, Inc.
#
# Red Hat Author(s): Vratislav Podzimek <[email protected]>
#

"""
Module with various classes for SCAP content processing and retrieving data
from it.

"""

import os.path

from collections import namedtuple
from pyanaconda.core.util import execReadlines
try:
    from html.parser import HTMLParser
except ImportError:
    from HTMLParser import HTMLParser

import logging
log = logging.getLogger("anaconda")


class ParseHTMLContent(HTMLParser):
    """Parser class for HTML tags within content"""

    def __init__(self):
        HTMLParser.__init__(self)
        self.content = ""

    def handle_starttag(self, tag, attrs):
        if tag == "html:ul":
            self.content += "\n"
        elif tag == "html:li":
            self.content += "\n"
        elif tag == "html:br":
            self.content += "\n"

    def handle_endtag(self, tag):
        if tag == "html:ul":
            self.content += "\n"
        elif tag == "html:li":
            self.content += "\n"

    def handle_data(self, data):
        self.content += data.strip()

    def get_content(self):
        return self.content


def parse_HTML_from_content(content):
    """This is a very simple HTML to text parser.

    HTML tags will be removed while trying to maintain readability
    of content.

    :param content: content whose HTML tags will be parsed
    :return: content without HTML tags
    """

    parser = ParseHTMLContent()
    parser.feed(content)
    return parser.get_content()


# namedtuple class for info about content files found
# pylint: disable-msg=C0103
ContentFiles = namedtuple("ContentFiles", ["xccdf", "cpe", "tailoring"])


def explore_content_files(fpaths):
    """
    Function for finding content files in a list of file paths. SIMPLY PICKS
    THE FIRST USABLE CONTENT FILE OF A PARTICULAR TYPE AND JUST PREFERS DATA
    STREAMS OVER STANDALONE BENCHMARKS.

    :param fpaths: a list of file paths to search for content files in
    :type fpaths: [str]
    :return: ContentFiles instance containing the file names of the XCCDF file,
        CPE dictionary and tailoring file or "" in place of those items
        if not found
    :rtype: ContentFiles

    """

    def get_doc_type(file_path):
        content_type = "unknown"
        try:
            for line in execReadlines("oscap", ["info", file_path]):
                if line.startswith("Document type:"):
                    _prefix, _sep, type_info = line.partition(":")
                    content_type = type_info.strip()
                    break
        except OSError:
            # 'oscap info' exitted with a non-zero exit code -> unknown doc
            # type
            pass
        log.info("OSCAP addon: Identified {file_path} as {content_type}"
                 .format(file_path=file_path, content_type=content_type))
        return content_type

    xccdf_file = ""
    cpe_file = ""
    tailoring_file = ""
    found_ds = False

    for fpath in fpaths:
        doc_type = get_doc_type(fpath)
        if not doc_type:
            continue

        # prefer DS over standalone XCCDF
        if doc_type == "Source Data Stream" and (not xccdf_file or not found_ds):
            xccdf_file = fpath
            found_ds = True
        elif doc_type == "XCCDF Checklist" and not xccdf_file:
            xccdf_file = fpath
        elif doc_type == "CPE Dictionary" and not cpe_file:
            cpe_file = fpath
        elif doc_type == "XCCDF Tailoring" and not tailoring_file:
            tailoring_file = fpath

    # TODO: raise exception if no xccdf_file is found?
    files = ContentFiles(xccdf_file, cpe_file, tailoring_file)
    return files


1			#
2			# Copyright (C) 2013 Red Hat, Inc.
3			#
4			# This copyrighted material is made available to anyone wishing to use,
5			# modify, copy, or redistribute it subject to the terms and conditions of
6			# the GNU General Public License v.2, or (at your option) any later version.
7			# This program is distributed in the hope that it will be useful, but WITHOUT
8			# ANY WARRANTY expressed or implied, including the implied warranties of
9			# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
10			# Public License for more details. You should have received a copy of the
11			# GNU General Public License along with this program; if not, write to the
12			# Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
13			# 02110-1301, USA. Any Red Hat trademarks that are incorporated in the
14			# source code or documentation are not subject to the GNU General Public
15			# License and may only be used or replicated with the express permission of
16			# Red Hat, Inc.
17			#
18			# Red Hat Author(s): Vratislav Podzimek <[email protected]>
19			#
20
21			"""
22			Module with various classes for SCAP content processing and retrieving data
23			from it.
24
25			"""
26
27			import os.path
28
29			from collections import namedtuple
30			from pyanaconda.core.util import execReadlines
31			try:
32			from html.parser import HTMLParser
33			except ImportError:
34			from HTMLParser import HTMLParser
35
36			import logging
37			log = logging.getLogger("anaconda")
38
39
40			class ParseHTMLContent(HTMLParser):
41			"""Parser class for HTML tags within content"""
42
43			def __init__(self):
44			HTMLParser.__init__(self)
45			self.content = ""
46
47			def handle_starttag(self, tag, attrs):
48			if tag == "html:ul":
49			self.content += "\n"
50			elif tag == "html:li":
51			self.content += "\n"
52			elif tag == "html:br":
53			self.content += "\n"
54
55			def handle_endtag(self, tag):
56			if tag == "html:ul":
57			self.content += "\n"
58			elif tag == "html:li":
59			self.content += "\n"
60
61			def handle_data(self, data):
62			self.content += data.strip()
63
64			def get_content(self):
65			return self.content
66
67
68			def parse_HTML_from_content(content):
69			"""This is a very simple HTML to text parser.
70
71			HTML tags will be removed while trying to maintain readability
72			of content.
73
74			:param content: content whose HTML tags will be parsed
75			:return: content without HTML tags
76			"""
77
78			parser = ParseHTMLContent()
79			parser.feed(content)
80			return parser.get_content()
81
82
83			# namedtuple class for info about content files found
84			# pylint: disable-msg=C0103
85			ContentFiles = namedtuple("ContentFiles", ["xccdf", "cpe", "tailoring"])
86
87
88			def explore_content_files(fpaths):
89			"""
90			Function for finding content files in a list of file paths. SIMPLY PICKS
91			THE FIRST USABLE CONTENT FILE OF A PARTICULAR TYPE AND JUST PREFERS DATA
92			STREAMS OVER STANDALONE BENCHMARKS.
93
94			:param fpaths: a list of file paths to search for content files in
95			:type fpaths: [str]
96			:return: ContentFiles instance containing the file names of the XCCDF file,
97			CPE dictionary and tailoring file or "" in place of those items
98			if not found
99			:rtype: ContentFiles
100
101			"""
102
103			def get_doc_type(file_path):
104			content_type = "unknown"
105			try:
106			for line in execReadlines("oscap", ["info", file_path]):
107			if line.startswith("Document type:"):
108			_prefix, _sep, type_info = line.partition(":")
109			content_type = type_info.strip()
110			break
111			except OSError:
112			# 'oscap info' exitted with a non-zero exit code -> unknown doc
113			# type
114			pass
115			log.info("OSCAP addon: Identified {file_path} as {content_type}"
116			.format(file_path=file_path, content_type=content_type))
117			return content_type
118
119			xccdf_file = ""
120			cpe_file = ""
121			tailoring_file = ""
122			found_ds = False
123
124			for fpath in fpaths:
125			doc_type = get_doc_type(fpath)
126			if not doc_type:
127			continue
128
129			# prefer DS over standalone XCCDF
130			if doc_type == "Source Data Stream" and (not xccdf_file or not found_ds):
131			xccdf_file = fpath
132			found_ds = True
133			elif doc_type == "XCCDF Checklist" and not xccdf_file:
134			xccdf_file = fpath
135			elif doc_type == "CPE Dictionary" and not cpe_file:
136			cpe_file = fpath
137			elif doc_type == "XCCDF Tailoring" and not tailoring_file:
138			tailoring_file = fpath
139
140			# TODO: raise exception if no xccdf_file is found?
141			files = ContentFiles(xccdf_file, cpe_file, tailoring_file)
142			return files
143

OpenSCAP / oscap-anaconda-addon

Push — rhel8-branch ( b8bdaf...ff1c74 )

org_fedora_oscap.content_handling A

Complexity

Size/Duplication

Importance

2 Functions

5 Methods

Duplication Side-by-Side

Filter issues like