org_fedora_oscap.content_handling - Code Metrics - Inspection of "Merge rhel9 branch into master" - OpenSCAP/oscap-anaconda-addon - Measure and Improve Code Quality continuously with Scrutinizer

Passed

Pull Request — master (#178)

by Matěj

created 2021-11-18 16:57 UTC

org_fedora_oscap.content_handling A

↳ Parent: Project

Complexity

Total Complexity

Size/Duplication

Total Lines	179
Duplicated Lines	0 %

Importance

Changes

Metric	Value
eloc	86
dl	0
loc	179
rs	10
c	0
b	0
f	0
wmc	30

5 Methods

Rating	Name	Size	Complexity
A	ParseHTMLContent.handle_data()	2	1
A	ParseHTMLContent.handle_endtag()	5	3
A	ParseHTMLContent.handle_starttag()	7	4
A	ParseHTMLContent.__init__()	3	1
A	ParseHTMLContent.get_content()	2	1

4 Functions

Rating	Name	Size	Complexity
A	parse_HTML_from_content()	13	1
A	identify_files()	3	1
B	get_doc_type()	21	6
D	explore_content_files()	38	12

#
# Copyright (C) 2013  Red Hat, Inc.
#
# This copyrighted material is made available to anyone wishing to use,
# modify, copy, or redistribute it subject to the terms and conditions of
# the GNU General Public License v.2, or (at your option) any later version.
# This program is distributed in the hope that it will be useful, but WITHOUT
# ANY WARRANTY expressed or implied, including the implied warranties of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
# Public License for more details.  You should have received a copy of the
# GNU General Public License along with this program; if not, write to the
# Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
# 02110-1301, USA.  Any Red Hat trademarks that are incorporated in the
# source code or documentation are not subject to the GNU General Public
# License and may only be used or replicated with the express permission of
# Red Hat, Inc.
#
# Red Hat Author(s): Vratislav Podzimek <[email protected]>
#

"""
Module with various classes for SCAP content processing and retrieving data
from it.

"""

import os.path

from collections import namedtuple
import multiprocessing

from pyanaconda.core.util import execReadlines
try:
    from html.parser import HTMLParser
except ImportError:
    from HTMLParser import HTMLParser

import logging
log = logging.getLogger("anaconda")


CONTENT_TYPES = dict(
    DATASTREAM="Source Data Stream",
    XCCDF_CHECKLIST="XCCDF Checklist",
    OVAL="OVAL Definitions",
    CPE_DICT="CPE Dictionary",
    TAILORING="XCCDF Tailoring",
)


class ContentHandlingError(Exception):
    """Exception class for errors related to SCAP content handling."""

    pass


class ContentCheckError(ContentHandlingError):
    """
    Exception class for errors related to content (integrity,...) checking.
    """

    pass


class ParseHTMLContent(HTMLParser):
    """Parser class for HTML tags within content"""

    def __init__(self):
        HTMLParser.__init__(self)
        self.content = ""

    def handle_starttag(self, tag, attrs):
        if tag == "html:ul":
            self.content += "\n"
        elif tag == "html:li":
            self.content += "\n"
        elif tag == "html:br":
            self.content += "\n"

    def handle_endtag(self, tag):
        if tag == "html:ul":
            self.content += "\n"
        elif tag == "html:li":
            self.content += "\n"

    def handle_data(self, data):
        self.content += data.strip()

    def get_content(self):
        return self.content


def parse_HTML_from_content(content):
    """This is a very simple HTML to text parser.

    HTML tags will be removed while trying to maintain readability
    of content.

    :param content: content whose HTML tags will be parsed
    :return: content without HTML tags
    """

    parser = ParseHTMLContent()
    parser.feed(content)
    return parser.get_content()


# namedtuple class for info about content files found
# pylint: disable-msg=C0103
ContentFiles = namedtuple("ContentFiles", ["xccdf", "cpe", "tailoring"])


def identify_files(fpaths):
    result = {path: get_doc_type(path) for path in fpaths}
    return result


def get_doc_type(file_path):
    content_type = "unknown"
    try:
        for line in execReadlines("oscap", ["info", file_path]):
            if line.startswith("Document type:"):
                _prefix, _sep, type_info = line.partition(":")
                content_type = type_info.strip()
                break
    except OSError:
        # 'oscap info' exitted with a non-zero exit code -> unknown doc
        # type
        pass
    except UnicodeDecodeError:
        # 'oscap info' supplied weird output, which happens when it tries
        # to explain why it can't examine e.g. a JPG.
        pass
    except Exception as e:
        log.warning(f"OSCAP addon: Unexpected error when looking at {file_path}: {str(e)}")
    log.info("OSCAP addon: Identified {file_path} as {content_type}"
             .format(file_path=file_path, content_type=content_type))
    return content_type


def explore_content_files(fpaths):
    """
    Function for finding content files in a list of file paths. SIMPLY PICKS
    THE FIRST USABLE CONTENT FILE OF A PARTICULAR TYPE AND JUST PREFERS DATA
    STREAMS OVER STANDALONE BENCHMARKS.

    :param fpaths: a list of file paths to search for content files in
    :type fpaths: [str]
    :return: ContentFiles instance containing the file names of the XCCDF file,
        CPE dictionary and tailoring file or "" in place of those items
        if not found
    :rtype: ContentFiles

    """
    xccdf_file = ""
    cpe_file = ""
    tailoring_file = ""
    found_ds = False

    for fpath in fpaths:
        doc_type = get_doc_type(fpath)
        if not doc_type:
            continue

        # prefer DS over standalone XCCDF
        if doc_type == "Source Data Stream" and (not xccdf_file or not found_ds):
            xccdf_file = fpath
            found_ds = True
        elif doc_type == "XCCDF Checklist" and not xccdf_file:
            xccdf_file = fpath
        elif doc_type == "CPE Dictionary" and not cpe_file:
            cpe_file = fpath
        elif doc_type == "XCCDF Tailoring" and not tailoring_file:
            tailoring_file = fpath

    # TODO: raise exception if no xccdf_file is found?
    files = ContentFiles(xccdf_file, cpe_file, tailoring_file)
    return files


1			#
2			# Copyright (C) 2013 Red Hat, Inc.
3			#
4			# This copyrighted material is made available to anyone wishing to use,
5			# modify, copy, or redistribute it subject to the terms and conditions of
6			# the GNU General Public License v.2, or (at your option) any later version.
7			# This program is distributed in the hope that it will be useful, but WITHOUT
8			# ANY WARRANTY expressed or implied, including the implied warranties of
9			# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
10			# Public License for more details. You should have received a copy of the
11			# GNU General Public License along with this program; if not, write to the
12			# Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
13			# 02110-1301, USA. Any Red Hat trademarks that are incorporated in the
14			# source code or documentation are not subject to the GNU General Public
15			# License and may only be used or replicated with the express permission of
16			# Red Hat, Inc.
17			#
18			# Red Hat Author(s): Vratislav Podzimek <[email protected]>
19			#
20
21			"""
22			Module with various classes for SCAP content processing and retrieving data
23			from it.
24
25			"""
26
27			import os.path
28
29			from collections import namedtuple
30			import multiprocessing
31
32			from pyanaconda.core.util import execReadlines
33			try:
34			from html.parser import HTMLParser
35			except ImportError:
36			from HTMLParser import HTMLParser
37
38			import logging
39			log = logging.getLogger("anaconda")
40
41
42			CONTENT_TYPES = dict(
43			DATASTREAM="Source Data Stream",
44			XCCDF_CHECKLIST="XCCDF Checklist",
45			OVAL="OVAL Definitions",
46			CPE_DICT="CPE Dictionary",
47			TAILORING="XCCDF Tailoring",
48			)
49
50
51			class ContentHandlingError(Exception):
52			"""Exception class for errors related to SCAP content handling."""
53
54			pass
55
56
57			class ContentCheckError(ContentHandlingError):
58			"""
59			Exception class for errors related to content (integrity,...) checking.
60			"""
61
62			pass
63
64
65			class ParseHTMLContent(HTMLParser):
66			"""Parser class for HTML tags within content"""
67
68			def __init__(self):
69			HTMLParser.__init__(self)
70			self.content = ""
71
72			def handle_starttag(self, tag, attrs):
73			if tag == "html:ul":
74			self.content += "\n"
75			elif tag == "html:li":
76			self.content += "\n"
77			elif tag == "html:br":
78			self.content += "\n"
79
80			def handle_endtag(self, tag):
81			if tag == "html:ul":
82			self.content += "\n"
83			elif tag == "html:li":
84			self.content += "\n"
85
86			def handle_data(self, data):
87			self.content += data.strip()
88
89			def get_content(self):
90			return self.content
91
92
93			def parse_HTML_from_content(content):
94			"""This is a very simple HTML to text parser.
95
96			HTML tags will be removed while trying to maintain readability
97			of content.
98
99			:param content: content whose HTML tags will be parsed
100			:return: content without HTML tags
101			"""
102
103			parser = ParseHTMLContent()
104			parser.feed(content)
105			return parser.get_content()
106
107
108			# namedtuple class for info about content files found
109			# pylint: disable-msg=C0103
110			ContentFiles = namedtuple("ContentFiles", ["xccdf", "cpe", "tailoring"])
111
112
113			def identify_files(fpaths):
114			result = {path: get_doc_type(path) for path in fpaths}
115			return result
116
117
118			def get_doc_type(file_path):
119			content_type = "unknown"
120			try:
121			for line in execReadlines("oscap", ["info", file_path]):
122			if line.startswith("Document type:"):
123			_prefix, _sep, type_info = line.partition(":")
124			content_type = type_info.strip()
125			break
126			except OSError:
127			# 'oscap info' exitted with a non-zero exit code -> unknown doc
128			# type
129			pass
130			except UnicodeDecodeError:
131			# 'oscap info' supplied weird output, which happens when it tries
132			# to explain why it can't examine e.g. a JPG.
133			pass
134			except Exception as e:
135			log.warning(f"OSCAP addon: Unexpected error when looking at {file_path}: {str(e)}")
136			log.info("OSCAP addon: Identified {file_path} as {content_type}"
137			.format(file_path=file_path, content_type=content_type))
138			return content_type
139
140
141			def explore_content_files(fpaths):
142			"""
143			Function for finding content files in a list of file paths. SIMPLY PICKS
144			THE FIRST USABLE CONTENT FILE OF A PARTICULAR TYPE AND JUST PREFERS DATA
145			STREAMS OVER STANDALONE BENCHMARKS.
146
147			:param fpaths: a list of file paths to search for content files in
148			:type fpaths: [str]
149			:return: ContentFiles instance containing the file names of the XCCDF file,
150			CPE dictionary and tailoring file or "" in place of those items
151			if not found
152			:rtype: ContentFiles
153
154			"""
155			xccdf_file = ""
156			cpe_file = ""
157			tailoring_file = ""
158			found_ds = False
159
160			for fpath in fpaths:
161			doc_type = get_doc_type(fpath)
162			if not doc_type:
163			continue
164
165			# prefer DS over standalone XCCDF
166			if doc_type == "Source Data Stream" and (not xccdf_file or not found_ds):
167			xccdf_file = fpath
168			found_ds = True
169			elif doc_type == "XCCDF Checklist" and not xccdf_file:
170			xccdf_file = fpath
171			elif doc_type == "CPE Dictionary" and not cpe_file:
172			cpe_file = fpath
173			elif doc_type == "XCCDF Tailoring" and not tailoring_file:
174			tailoring_file = fpath
175
176			# TODO: raise exception if no xccdf_file is found?
177			files = ContentFiles(xccdf_file, cpe_file, tailoring_file)
178			return files
179

OpenSCAP / oscap-anaconda-addon

Pull Request — master (#178)

org_fedora_oscap.content_handling A

Complexity

Size/Duplication

Importance

5 Methods

4 Functions

Duplication Side-by-Side

Filter issues like