Completed
Push — rhel8-branch ( b8bdaf...ff1c74 )
by Matěj
25s queued 21s
created

org_fedora_oscap.content_handling   A

Complexity

Total Complexity 26

Size/Duplication

Total Lines 143
Duplicated Lines 0 %

Importance

Changes 0
Metric Value
wmc 26
eloc 68
dl 0
loc 143
rs 10
c 0
b 0
f 0

2 Functions

Rating   Name   Duplication   Size   Complexity  
F explore_content_files() 0 55 15
A parse_HTML_from_content() 0 13 1

5 Methods

Rating   Name   Duplication   Size   Complexity  
A ParseHTMLContent.handle_data() 0 2 1
A ParseHTMLContent.handle_endtag() 0 5 3
A ParseHTMLContent.handle_starttag() 0 7 4
A ParseHTMLContent.__init__() 0 3 1
A ParseHTMLContent.get_content() 0 2 1
1
#
2
# Copyright (C) 2013  Red Hat, Inc.
3
#
4
# This copyrighted material is made available to anyone wishing to use,
5
# modify, copy, or redistribute it subject to the terms and conditions of
6
# the GNU General Public License v.2, or (at your option) any later version.
7
# This program is distributed in the hope that it will be useful, but WITHOUT
8
# ANY WARRANTY expressed or implied, including the implied warranties of
9
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
10
# Public License for more details.  You should have received a copy of the
11
# GNU General Public License along with this program; if not, write to the
12
# Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
13
# 02110-1301, USA.  Any Red Hat trademarks that are incorporated in the
14
# source code or documentation are not subject to the GNU General Public
15
# License and may only be used or replicated with the express permission of
16
# Red Hat, Inc.
17
#
18
# Red Hat Author(s): Vratislav Podzimek <[email protected]>
19
#
20
21
"""
22
Module with various classes for SCAP content processing and retrieving data
23
from it.
24
25
"""
26
27
import os.path
28
29
from collections import namedtuple
30
from pyanaconda.core.util import execReadlines
31
try:
32
    from html.parser import HTMLParser
33
except ImportError:
34
    from HTMLParser import HTMLParser
35
36
import logging
37
log = logging.getLogger("anaconda")
38
39
40
class ParseHTMLContent(HTMLParser):
41
    """Parser class for HTML tags within content"""
42
43
    def __init__(self):
44
        HTMLParser.__init__(self)
45
        self.content = ""
46
47
    def handle_starttag(self, tag, attrs):
48
        if tag == "html:ul":
49
            self.content += "\n"
50
        elif tag == "html:li":
51
            self.content += "\n"
52
        elif tag == "html:br":
53
            self.content += "\n"
54
55
    def handle_endtag(self, tag):
56
        if tag == "html:ul":
57
            self.content += "\n"
58
        elif tag == "html:li":
59
            self.content += "\n"
60
61
    def handle_data(self, data):
62
        self.content += data.strip()
63
64
    def get_content(self):
65
        return self.content
66
67
68
def parse_HTML_from_content(content):
69
    """This is a very simple HTML to text parser.
70
71
    HTML tags will be removed while trying to maintain readability
72
    of content.
73
74
    :param content: content whose HTML tags will be parsed
75
    :return: content without HTML tags
76
    """
77
78
    parser = ParseHTMLContent()
79
    parser.feed(content)
80
    return parser.get_content()
81
82
83
# namedtuple class for info about content files found
84
# pylint: disable-msg=C0103
85
ContentFiles = namedtuple("ContentFiles", ["xccdf", "cpe", "tailoring"])
86
87
88
def explore_content_files(fpaths):
89
    """
90
    Function for finding content files in a list of file paths. SIMPLY PICKS
91
    THE FIRST USABLE CONTENT FILE OF A PARTICULAR TYPE AND JUST PREFERS DATA
92
    STREAMS OVER STANDALONE BENCHMARKS.
93
94
    :param fpaths: a list of file paths to search for content files in
95
    :type fpaths: [str]
96
    :return: ContentFiles instance containing the file names of the XCCDF file,
97
        CPE dictionary and tailoring file or "" in place of those items
98
        if not found
99
    :rtype: ContentFiles
100
101
    """
102
103
    def get_doc_type(file_path):
104
        content_type = "unknown"
105
        try:
106
            for line in execReadlines("oscap", ["info", file_path]):
107
                if line.startswith("Document type:"):
108
                    _prefix, _sep, type_info = line.partition(":")
109
                    content_type = type_info.strip()
110
                    break
111
        except OSError:
112
            # 'oscap info' exitted with a non-zero exit code -> unknown doc
113
            # type
114
            pass
115
        log.info("OSCAP addon: Identified {file_path} as {content_type}"
116
                 .format(file_path=file_path, content_type=content_type))
117
        return content_type
118
119
    xccdf_file = ""
120
    cpe_file = ""
121
    tailoring_file = ""
122
    found_ds = False
123
124
    for fpath in fpaths:
125
        doc_type = get_doc_type(fpath)
126
        if not doc_type:
127
            continue
128
129
        # prefer DS over standalone XCCDF
130
        if doc_type == "Source Data Stream" and (not xccdf_file or not found_ds):
131
            xccdf_file = fpath
132
            found_ds = True
133
        elif doc_type == "XCCDF Checklist" and not xccdf_file:
134
            xccdf_file = fpath
135
        elif doc_type == "CPE Dictionary" and not cpe_file:
136
            cpe_file = fpath
137
        elif doc_type == "XCCDF Tailoring" and not tailoring_file:
138
            tailoring_file = fpath
139
140
    # TODO: raise exception if no xccdf_file is found?
141
    files = ContentFiles(xccdf_file, cpe_file, tailoring_file)
142
    return files
143