|
1
|
|
|
# |
|
2
|
|
|
# Copyright (C) 2013 Red Hat, Inc. |
|
3
|
|
|
# |
|
4
|
|
|
# This copyrighted material is made available to anyone wishing to use, |
|
5
|
|
|
# modify, copy, or redistribute it subject to the terms and conditions of |
|
6
|
|
|
# the GNU General Public License v.2, or (at your option) any later version. |
|
7
|
|
|
# This program is distributed in the hope that it will be useful, but WITHOUT |
|
8
|
|
|
# ANY WARRANTY expressed or implied, including the implied warranties of |
|
9
|
|
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General |
|
10
|
|
|
# Public License for more details. You should have received a copy of the |
|
11
|
|
|
# GNU General Public License along with this program; if not, write to the |
|
12
|
|
|
# Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA |
|
13
|
|
|
# 02110-1301, USA. Any Red Hat trademarks that are incorporated in the |
|
14
|
|
|
# source code or documentation are not subject to the GNU General Public |
|
15
|
|
|
# License and may only be used or replicated with the express permission of |
|
16
|
|
|
# Red Hat, Inc. |
|
17
|
|
|
# |
|
18
|
|
|
# Red Hat Author(s): Vratislav Podzimek <[email protected]> |
|
19
|
|
|
# |
|
20
|
|
|
|
|
21
|
|
|
""" |
|
22
|
|
|
Module with various classes for SCAP content processing and retrieving data |
|
23
|
|
|
from it. |
|
24
|
|
|
|
|
25
|
|
|
""" |
|
26
|
|
|
|
|
27
|
|
|
import os.path |
|
28
|
|
|
|
|
29
|
|
|
from collections import namedtuple |
|
30
|
|
|
from pyanaconda.core.util import execReadlines |
|
31
|
|
|
try: |
|
32
|
|
|
from html.parser import HTMLParser |
|
33
|
|
|
except ImportError: |
|
34
|
|
|
from HTMLParser import HTMLParser |
|
35
|
|
|
|
|
36
|
|
|
import logging |
|
37
|
|
|
log = logging.getLogger("anaconda") |
|
38
|
|
|
|
|
39
|
|
|
|
|
40
|
|
|
class ParseHTMLContent(HTMLParser): |
|
41
|
|
|
"""Parser class for HTML tags within content""" |
|
42
|
|
|
|
|
43
|
|
|
def __init__(self): |
|
44
|
|
|
HTMLParser.__init__(self) |
|
45
|
|
|
self.content = "" |
|
46
|
|
|
|
|
47
|
|
|
def handle_starttag(self, tag, attrs): |
|
48
|
|
|
if tag == "html:ul": |
|
49
|
|
|
self.content += "\n" |
|
50
|
|
|
elif tag == "html:li": |
|
51
|
|
|
self.content += "\n" |
|
52
|
|
|
elif tag == "html:br": |
|
53
|
|
|
self.content += "\n" |
|
54
|
|
|
|
|
55
|
|
|
def handle_endtag(self, tag): |
|
56
|
|
|
if tag == "html:ul": |
|
57
|
|
|
self.content += "\n" |
|
58
|
|
|
elif tag == "html:li": |
|
59
|
|
|
self.content += "\n" |
|
60
|
|
|
|
|
61
|
|
|
def handle_data(self, data): |
|
62
|
|
|
self.content += data.strip() |
|
63
|
|
|
|
|
64
|
|
|
def get_content(self): |
|
65
|
|
|
return self.content |
|
66
|
|
|
|
|
67
|
|
|
|
|
68
|
|
|
def parse_HTML_from_content(content): |
|
69
|
|
|
"""This is a very simple HTML to text parser. |
|
70
|
|
|
|
|
71
|
|
|
HTML tags will be removed while trying to maintain readability |
|
72
|
|
|
of content. |
|
73
|
|
|
|
|
74
|
|
|
:param content: content whose HTML tags will be parsed |
|
75
|
|
|
:return: content without HTML tags |
|
76
|
|
|
""" |
|
77
|
|
|
|
|
78
|
|
|
parser = ParseHTMLContent() |
|
79
|
|
|
parser.feed(content) |
|
80
|
|
|
return parser.get_content() |
|
81
|
|
|
|
|
82
|
|
|
|
|
83
|
|
|
# namedtuple class for info about content files found |
|
84
|
|
|
# pylint: disable-msg=C0103 |
|
85
|
|
|
ContentFiles = namedtuple("ContentFiles", ["xccdf", "cpe", "tailoring"]) |
|
86
|
|
|
|
|
87
|
|
|
|
|
88
|
|
|
def explore_content_files(fpaths): |
|
89
|
|
|
""" |
|
90
|
|
|
Function for finding content files in a list of file paths. SIMPLY PICKS |
|
91
|
|
|
THE FIRST USABLE CONTENT FILE OF A PARTICULAR TYPE AND JUST PREFERS DATA |
|
92
|
|
|
STREAMS OVER STANDALONE BENCHMARKS. |
|
93
|
|
|
|
|
94
|
|
|
:param fpaths: a list of file paths to search for content files in |
|
95
|
|
|
:type fpaths: [str] |
|
96
|
|
|
:return: ContentFiles instance containing the file names of the XCCDF file, |
|
97
|
|
|
CPE dictionary and tailoring file or "" in place of those items |
|
98
|
|
|
if not found |
|
99
|
|
|
:rtype: ContentFiles |
|
100
|
|
|
|
|
101
|
|
|
""" |
|
102
|
|
|
|
|
103
|
|
|
def get_doc_type(file_path): |
|
104
|
|
|
content_type = "unknown" |
|
105
|
|
|
try: |
|
106
|
|
|
for line in execReadlines("oscap", ["info", file_path]): |
|
107
|
|
|
if line.startswith("Document type:"): |
|
108
|
|
|
_prefix, _sep, type_info = line.partition(":") |
|
109
|
|
|
content_type = type_info.strip() |
|
110
|
|
|
break |
|
111
|
|
|
except OSError: |
|
112
|
|
|
# 'oscap info' exitted with a non-zero exit code -> unknown doc |
|
113
|
|
|
# type |
|
114
|
|
|
pass |
|
115
|
|
|
log.info("OSCAP addon: Identified {file_path} as {content_type}" |
|
116
|
|
|
.format(file_path=file_path, content_type=content_type)) |
|
117
|
|
|
return content_type |
|
118
|
|
|
|
|
119
|
|
|
xccdf_file = "" |
|
120
|
|
|
cpe_file = "" |
|
121
|
|
|
tailoring_file = "" |
|
122
|
|
|
found_ds = False |
|
123
|
|
|
|
|
124
|
|
|
for fpath in fpaths: |
|
125
|
|
|
doc_type = get_doc_type(fpath) |
|
126
|
|
|
if not doc_type: |
|
127
|
|
|
continue |
|
128
|
|
|
|
|
129
|
|
|
# prefer DS over standalone XCCDF |
|
130
|
|
|
if doc_type == "Source Data Stream" and (not xccdf_file or not found_ds): |
|
131
|
|
|
xccdf_file = fpath |
|
132
|
|
|
found_ds = True |
|
133
|
|
|
elif doc_type == "XCCDF Checklist" and not xccdf_file: |
|
134
|
|
|
xccdf_file = fpath |
|
135
|
|
|
elif doc_type == "CPE Dictionary" and not cpe_file: |
|
136
|
|
|
cpe_file = fpath |
|
137
|
|
|
elif doc_type == "XCCDF Tailoring" and not tailoring_file: |
|
138
|
|
|
tailoring_file = fpath |
|
139
|
|
|
|
|
140
|
|
|
# TODO: raise exception if no xccdf_file is found? |
|
141
|
|
|
files = ContentFiles(xccdf_file, cpe_file, tailoring_file) |
|
142
|
|
|
return files |
|
143
|
|
|
|