Passed
Pull Request — master (#178)
by Matěj
01:34 queued 14s
created

org_fedora_oscap.content_discovery.clear_all()   A

Complexity

Conditions 1

Size

Total Lines 17
Code Lines 13

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
cc 1
eloc 13
nop 1
dl 0
loc 17
rs 9.75
c 0
b 0
f 0
1
import threading
2
import logging
3
import pathlib
4
import shutil
5
from glob import glob
6
from typing import List
7
8
from pyanaconda.core import constants
9
from pyanaconda.threading import threadMgr
10
from pykickstart.errors import KickstartValueError
11
12
from org_fedora_oscap import data_fetch, utils
13
from org_fedora_oscap import common
14
from org_fedora_oscap import content_handling
15
from org_fedora_oscap import rule_handling
16
17
from org_fedora_oscap.common import _
18
19
log = logging.getLogger("anaconda")
20
21
22
def is_network(scheme):
23
    return any(
24
        scheme.startswith(net_prefix)
25
        for net_prefix in data_fetch.NET_URL_PREFIXES)
26
27
28
def clear_all(data):
29
    data.content_type = ""
30
    data.content_url = ""
31
    data.datastream_id = ""
32
    data.xccdf_id = ""
33
    data.profile_id = ""
34
    data.content_path = ""
35
    data.cpe_path = ""
36
    data.tailoring_path = ""
37
38
    data.fingerprint = ""
39
40
    data.certificates = ""
41
42
    # internal values
43
    data.rule_data = rule_handling.RuleData()
44
    data.dry_run = False
45
46
47
class ContentBringer:
48
    CONTENT_DOWNLOAD_LOCATION = pathlib.Path(common.INSTALLATION_CONTENT_DIR)
49
    DEFAULT_SSG_DATA_STREAM_PATH = f"{common.SSG_DIR}/{common.SSG_CONTENT}"
50
51
    def __init__(self, addon_data):
52
        self.content_uri_scheme = ""
53
        self.content_uri_path = ""
54
        self.fetched_content = ""
55
56
        self.activity_lock = threading.Lock()
57
        self.now_fetching_or_processing = False
58
59
        self.CONTENT_DOWNLOAD_LOCATION.mkdir(parents=True, exist_ok=True)
60
61
        self._addon_data = addon_data
62
63
    def get_content_type(self, url):
64
        if url.endswith(".rpm"):
65
            return "rpm"
66
        elif any(url.endswith(arch_type) for arch_type in common.SUPPORTED_ARCHIVES):
67
            return "archive"
68
        else:
69
            return "file"
70
71
    @property
72
    def content_uri(self):
73
        return self.content_uri_scheme + "://" + self.content_uri_path
74
75
    @content_uri.setter
76
    def content_uri(self, uri):
77
        scheme, path = uri.split("://", 1)
78
        self.content_uri_path = path
79
        self.content_uri_scheme = scheme
80
81
    def fetch_content(self, what_if_fail, ca_certs_path=""):
82
        """
83
        Initiate fetch of the content into an appropriate directory
84
85
        Args:
86
            what_if_fail: Callback accepting exception as an argument that
87
                should handle them in the calling layer.
88
            ca_certs_path: Path to the HTTPS certificate file
89
        """
90
        self.content_uri = self._addon_data.content_url
91
        shutil.rmtree(self.CONTENT_DOWNLOAD_LOCATION, ignore_errors=True)
92
        self.CONTENT_DOWNLOAD_LOCATION.mkdir(parents=True, exist_ok=True)
93
        fetching_thread_name = self._fetch_files(
94
            self.content_uri_scheme, self.content_uri_path,
95
            self.CONTENT_DOWNLOAD_LOCATION, ca_certs_path, what_if_fail)
96
        return fetching_thread_name
97
98
    def _fetch_files(self, scheme, path, destdir, ca_certs_path, what_if_fail):
99
        with self.activity_lock:
100
            if self.now_fetching_or_processing:
101
                msg = "OSCAP Addon: Strange, it seems that we are already fetching something."
102
                log.warn(msg)
103
                return
104
            self.now_fetching_or_processing = True
105
106
        fetching_thread_name = None
107
        try:
108
            fetching_thread_name = self._start_actual_fetch(scheme, path, destdir, ca_certs_path)
109
        except Exception as exc:
110
            with self.activity_lock:
111
                self.now_fetching_or_processing = False
112
            what_if_fail(exc)
113
114
        # We are not finished yet with the fetch
115
        return fetching_thread_name
116
117
    def _start_actual_fetch(self, scheme, path, destdir, ca_certs_path):
118
        fetching_thread_name = None
119
        url = scheme + "://" + path
120
121
        if "/" not in path:
122
            msg = f"Missing the path component of the '{url}' URL"
123
            raise KickstartValueError(msg)
124
        basename = path.rsplit("/", 1)[1]
125
        if not basename:
126
            msg = f"Unable to deduce basename from the '{url}' URL"
127
            raise KickstartValueError(msg)
128
129
        dest = destdir / basename
130
131
        if is_network(scheme):
132
            fetching_thread_name = data_fetch.wait_and_fetch_net_data(
133
                url,
134
                dest,
135
                ca_certs_path
136
            )
137
        else:  # invalid schemes are handled down the road
138
            fetching_thread_name = data_fetch.fetch_local_data(
139
                url,
140
                dest,
141
            )
142
        return fetching_thread_name
143
144
    def finish_content_fetch(self, fetching_thread_name, fingerprint, report_callback, dest_filename,
145
                             what_if_fail):
146
        """
147
        Finish any ongoing fetch and analyze what has been fetched.
148
149
        After the fetch is completed, it analyzes verifies fetched content if applicable,
150
        analyzes it and compiles into an instance of ObtainedContent.
151
152
        Args:
153
            fetching_thread_name: Name of the fetching thread
154
                or None if we are only after the analysis
155
            fingerprint: A checksum for downloaded file verification
156
            report_callback: Means for the method to send user-relevant messages outside
157
            dest_filename: The target of the fetch operation. Can be falsy -
158
                in this case there is no content filename defined
159
            what_if_fail: Callback accepting exception as an argument
160
                that should handle them in the calling layer.
161
162
        Returns:
163
            Instance of ObtainedContent if everything went well, or None.
164
        """
165
        try:
166
            content = self._finish_actual_fetch(fetching_thread_name, fingerprint, report_callback, dest_filename)
167
        except Exception as exc:
168
            what_if_fail(exc)
169
            content = None
170
        finally:
171
            with self.activity_lock:
172
                self.now_fetching_or_processing = False
173
174
        return content
175
176
    def _verify_fingerprint(self, dest_filename, fingerprint=""):
177
        if not fingerprint:
178
            log.info("OSCAP Addon: No fingerprint provided, skipping integrity check")
179
            return
180
181
        hash_obj = utils.get_hashing_algorithm(fingerprint)
182
        digest = utils.get_file_fingerprint(dest_filename,
183
                                            hash_obj)
184
        if digest != fingerprint:
185
            log.error(
186
                "OSCAP Addon: "
187
                f"File {dest_filename} failed integrity check - assumed a "
188
                f"{hash_obj.name} hash and '{fingerprint}', got '{digest}'"
189
            )
190
            msg = _(f"OSCAP Addon: Integrity check of the content failed - {hash_obj.name} hash didn't match")
191
            raise content_handling.ContentCheckError(msg)
192
        log.info(f"Integrity check passed using {hash_obj.name} hash")
193
194
    def _finish_actual_fetch(self, wait_for, fingerprint, report_callback, dest_filename):
195
        if wait_for:
196
            log.info(f"OSCAP Addon: Waiting for thread {wait_for}")
197
            threadMgr.wait(wait_for)
198
            log.info(f"OSCAP Addon: Finished waiting for thread {wait_for}")
199
        actually_fetched_content = wait_for is not None
200
201
        if fingerprint and dest_filename:
202
            self._verify_fingerprint(dest_filename, fingerprint)
203
204
        fpaths = self._gather_available_files(actually_fetched_content, dest_filename)
205
206
        structured_content = ObtainedContent(self.CONTENT_DOWNLOAD_LOCATION)
207
        content_type = self.get_content_type(str(dest_filename))
208
        log.info(f"OSCAP Addon: started to look at the content")
209
        if content_type in ("archive", "rpm"):
210
            structured_content.add_content_archive(dest_filename)
211
212
        labelled_files = content_handling.identify_files(fpaths)
213
        for fname, label in labelled_files.items():
214
            structured_content.add_file(fname, label)
215
216
        if fingerprint and dest_filename:
217
            structured_content.record_verification(dest_filename)
218
219
        log.info(f"OSCAP Addon: finished looking at the content")
220
        return structured_content
221
222
    def _gather_available_files(self, actually_fetched_content, dest_filename):
223
        fpaths = []
224
        if not actually_fetched_content:
225
            if not dest_filename:  # using scap-security-guide
226
                fpaths = [self.DEFAULT_SSG_DATA_STREAM_PATH]
227
            else:  # Using downloaded XCCDF/OVAL/DS/tailoring
228
                fpaths = pathlib.Path(self.CONTENT_DOWNLOAD_LOCATION).rglob("*")
229
                fpaths = [str(p) for p in fpaths if p.is_file()]
230
        else:
231
            dest_filename = pathlib.Path(dest_filename)
232
            # RPM is an archive at this phase
233
            content_type = self.get_content_type(str(dest_filename))
234
            if content_type in ("archive", "rpm"):
235
                try:
236
                    fpaths = common.extract_data(
237
                        str(dest_filename),
238
                        str(dest_filename.parent)
239
                    )
240
                except common.ExtractionError as err:
241
                    msg = f"Failed to extract the '{dest_filename}' archive: {str(err)}"
242
                    log.error("OSCAP Addon: " + msg)
243
                    raise err
244
245
            elif content_type == "file":
246
                fpaths = [str(dest_filename)]
247
            else:
248
                raise common.OSCAPaddonError("Unsupported content type")
249
        return fpaths
250
251
    def use_downloaded_content(self, content):
252
        preferred_content = self.get_preferred_content(content)
253
254
        # We know that we have ended up with a datastream-like content,
255
        # but if we can't convert an archive to a datastream.
256
        # self._addon_data.content_type = "datastream"
257
        self._addon_data.content_path = str(preferred_content.relative_to(content.root))
258
259
        preferred_tailoring = self.get_preferred_tailoring(content)
260
        if content.tailoring:
261
            self._addon_data.tailoring_path = str(preferred_tailoring.relative_to(content.root))
262
263
    def use_system_content(self, content=None):
264
        clear_all(self._addon_data)
265
        self._addon_data.content_type = "scap-security-guide"
266
        self._addon_data.content_path = common.get_ssg_path()
267
268
    def get_preferred_content(self, content):
269
        if self._addon_data.content_path:
270
            preferred_content = content.find_expected_usable_content(self._addon_data.content_path)
271
        else:
272
            preferred_content = content.select_main_usable_content()
273
        return preferred_content
274
275
    def get_preferred_tailoring(self, content):
276
        tailoring_path = self._addon_data.tailoring_path
277
        if tailoring_path:
278
            if tailoring_path != str(content.tailoring.relative_to(content.root)):
279
                msg = f"Expected a tailoring {tailoring_path}, but it couldn't be found"
280
                raise content_handling.ContentHandlingError(msg)
281
        return content.tailoring
282
283
284
class ObtainedContent:
285
    """
286
    This class aims to assist the gathered files discovery -
287
    the addon can downloaded files directly, or they can be extracted for an archive.
288
    The class enables user to quickly understand what is available,
289
    and whether the current set of contents is usable for further processing.
290
    """
291
    def __init__(self, root):
292
        self.labelled_files = dict()
293
        self.datastream = None  # type: Pathlib.Path
294
        self.xccdf = None  # type: Pathlib.Path
295
        self.ovals = []  # type: List[Pathlib.Path]
296
        self.tailoring = None  # type: Pathlib.Path
297
        self.archive = None  # type: Pathlib.Path
298
        self.verified = None  # type: Pathlib.Path
299
        self.root = pathlib.Path(root)
300
301
    def record_verification(self, path):
302
        """
303
        Declare a file as verified (typically by means of a checksum)
304
        """
305
        path = pathlib.Path(path)
306
        assert path in self.labelled_files
307
        self.verified = path
308
309
    def add_content_archive(self, fname):
310
        """
311
        If files come from an archive, record this information using this function.
312
        """
313
        path = pathlib.Path(fname)
314
        self.labelled_files[path] = None
315
        self.archive = path
316
317
    def _assign_content_type(self, attribute_name, new_value):
318
        old_value = getattr(self, attribute_name)
319
        if old_value and old_value != new_value:
320
            msg = (
321
                f"When dealing with {attribute_name}, "
322
                f"there was already the {old_value.name} when setting the new {new_value.name}")
323
            raise content_handling.ContentHandlingError(msg)
324
        setattr(self, attribute_name, new_value)
325
326
    def add_file(self, fname, label=None):
327
        if not label:
328
            label = content_handling.identify_files([fname])[fname]
329
        path = pathlib.Path(fname)
330
        if label == content_handling.CONTENT_TYPES["TAILORING"]:
331
            self._assign_content_type("tailoring", path)
332
        elif label == content_handling.CONTENT_TYPES["DATASTREAM"]:
333
            self._assign_content_type("datastream", path)
334
        elif label == content_handling.CONTENT_TYPES["OVAL"]:
335
            self.ovals.append(path)
336
        elif label == content_handling.CONTENT_TYPES["XCCDF_CHECKLIST"]:
337
            self._assign_content_type("xccdf", path)
338
        self.labelled_files[path] = label
339
340
    def _datastream_content(self):
341
        if not self.datastream:
342
            return None
343
        if not self.datastream.exists():
344
            return None
345
        return self.datastream
346
347
    def _xccdf_content(self):
348
        if not self.xccdf or not self.ovals:
349
            return None
350
        some_ovals_exist = any([path.exists() for path in self.ovals])
351
        if not (self.xccdf.exists() and some_ovals_exist):
352
            return None
353
        return self.xccdf
354
355
    def find_expected_usable_content(self, relative_expected_content_path):
356
        content_path = self.root / relative_expected_content_path
357
        eligible_main_content = (self._datastream_content(), self._xccdf_content())
358
359
        if content_path in eligible_main_content:
360
            return content_path
361
        else:
362
            if not content_path.exists():
363
                msg = f"Couldn't find '{content_path}' among the available content"
364
            else:
365
                msg = (
366
                    f"File '{content_path}' is not a valid datastream "
367
                    "or a valid XCCDF of a XCCDF-OVAL file tuple")
368
            raise content_handling.ContentHandlingError(msg)
369
370
    def select_main_usable_content(self):
371
        if self._datastream_content():
372
            return self._datastream_content()
373
        elif self._xccdf_content():
374
            return self._xccdf_content()
375
        else:
376
            msg = (
377
                "Couldn't find a valid datastream or a valid XCCDF-OVAL file tuple "
378
                "among the available content")
379
            raise content_handling.ContentHandlingError(msg)
380