Passed
Pull Request — master (#219)
by Matěj
01:19
created

ContentBringer.reduce_files()   A

Complexity

Conditions 5

Size

Total Lines 13
Code Lines 11

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
cc 5
eloc 11
nop 4
dl 0
loc 13
rs 9.3333
c 0
b 0
f 0
1
import threading
2
import logging
3
import pathlib
4
import shutil
5
import os
6
from glob import glob
7
from typing import List
8
9
from pyanaconda.core import constants
10
from pyanaconda.threading import threadMgr
11
from pykickstart.errors import KickstartValueError
12
13
from org_fedora_oscap import data_fetch, utils
14
from org_fedora_oscap import common
15
from org_fedora_oscap import content_handling
16
from org_fedora_oscap.content_handling import CONTENT_TYPES
17
from org_fedora_oscap import rule_handling
18
19
from org_fedora_oscap.common import _
20
21
log = logging.getLogger("anaconda")
22
23
24
def is_network(scheme):
25
    return any(
26
        scheme.startswith(net_prefix)
27
        for net_prefix in data_fetch.NET_URL_PREFIXES)
28
29
30
def clear_all(data):
31
    data.content_type = ""
32
    data.content_url = ""
33
    data.datastream_id = ""
34
    data.xccdf_id = ""
35
    data.profile_id = ""
36
    data.content_path = ""
37
    data.cpe_path = ""
38
    data.tailoring_path = ""
39
40
    data.fingerprint = ""
41
42
    data.certificates = ""
43
44
    # internal values
45
    data.rule_data = rule_handling.RuleData()
46
    data.dry_run = False
47
48
49
class ContentBringer:
50
    CONTENT_DOWNLOAD_LOCATION = pathlib.Path(common.INSTALLATION_CONTENT_DIR)
51
    DEFAULT_SSG_DATA_STREAM_PATH = f"{common.SSG_DIR}/{common.SSG_CONTENT}"
52
53
    def __init__(self, addon_data):
54
        self.content_uri_scheme = ""
55
        self.content_uri_path = ""
56
        self.fetched_content = ""
57
58
        self.activity_lock = threading.Lock()
59
        self.now_fetching_or_processing = False
60
61
        self.CONTENT_DOWNLOAD_LOCATION.mkdir(parents=True, exist_ok=True)
62
63
        self._addon_data = addon_data
64
65
    def get_content_type(self, url):
66
        if url.endswith(".rpm"):
67
            return "rpm"
68
        elif any(url.endswith(arch_type) for arch_type in common.SUPPORTED_ARCHIVES):
69
            return "archive"
70
        else:
71
            return "file"
72
73
    @property
74
    def content_uri(self):
75
        return self.content_uri_scheme + "://" + self.content_uri_path
76
77
    @content_uri.setter
78
    def content_uri(self, uri):
79
        scheme, path = uri.split("://", 1)
80
        self.content_uri_path = path
81
        self.content_uri_scheme = scheme
82
83
    def fetch_content(self, what_if_fail, ca_certs_path=""):
84
        """
85
        Initiate fetch of the content into an appropriate directory
86
87
        Args:
88
            what_if_fail: Callback accepting exception as an argument that
89
                should handle them in the calling layer.
90
            ca_certs_path: Path to the HTTPS certificate file
91
        """
92
        self.content_uri = self._addon_data.content_url
93
        shutil.rmtree(self.CONTENT_DOWNLOAD_LOCATION, ignore_errors=True)
94
        self.CONTENT_DOWNLOAD_LOCATION.mkdir(parents=True, exist_ok=True)
95
        fetching_thread_name = self._fetch_files(
96
            self.content_uri_scheme, self.content_uri_path,
97
            self.CONTENT_DOWNLOAD_LOCATION, ca_certs_path, what_if_fail)
98
        return fetching_thread_name
99
100
    def _fetch_files(self, scheme, path, destdir, ca_certs_path, what_if_fail):
101
        with self.activity_lock:
102
            if self.now_fetching_or_processing:
103
                msg = "OSCAP Addon: Strange, it seems that we are already fetching something."
104
                log.warn(msg)
105
                return
106
            self.now_fetching_or_processing = True
107
108
        fetching_thread_name = None
109
        try:
110
            fetching_thread_name = self._start_actual_fetch(scheme, path, destdir, ca_certs_path)
111
        except Exception as exc:
112
            with self.activity_lock:
113
                self.now_fetching_or_processing = False
114
            what_if_fail(exc)
115
116
        # We are not finished yet with the fetch
117
        return fetching_thread_name
118
119
    def _start_actual_fetch(self, scheme, path, destdir, ca_certs_path):
120
        fetching_thread_name = None
121
        url = scheme + "://" + path
122
123
        if "/" not in path:
124
            msg = f"Missing the path component of the '{url}' URL"
125
            raise KickstartValueError(msg)
126
        basename = path.rsplit("/", 1)[1]
127
        if not basename:
128
            msg = f"Unable to deduce basename from the '{url}' URL"
129
            raise KickstartValueError(msg)
130
131
        dest = destdir / basename
132
133
        if is_network(scheme):
134
            fetching_thread_name = data_fetch.wait_and_fetch_net_data(
135
                url,
136
                dest,
137
                ca_certs_path
138
            )
139
        else:  # invalid schemes are handled down the road
140
            fetching_thread_name = data_fetch.fetch_local_data(
141
                url,
142
                dest,
143
            )
144
        return fetching_thread_name
145
146
    def finish_content_fetch(self, fetching_thread_name, fingerprint, report_callback, dest_filename,
147
                             what_if_fail):
148
        """
149
        Finish any ongoing fetch and analyze what has been fetched.
150
151
        After the fetch is completed, it analyzes verifies fetched content if applicable,
152
        analyzes it and compiles into an instance of ObtainedContent.
153
154
        Args:
155
            fetching_thread_name: Name of the fetching thread
156
                or None if we are only after the analysis
157
            fingerprint: A checksum for downloaded file verification
158
            report_callback: Means for the method to send user-relevant messages outside
159
            dest_filename: The target of the fetch operation. Can be falsy -
160
                in this case there is no content filename defined
161
            what_if_fail: Callback accepting exception as an argument
162
                that should handle them in the calling layer.
163
164
        Returns:
165
            Instance of ObtainedContent if everything went well, or None.
166
        """
167
        try:
168
            content = self._finish_actual_fetch(fetching_thread_name, fingerprint, report_callback, dest_filename)
169
        except Exception as exc:
170
            what_if_fail(exc)
171
            content = None
172
        finally:
173
            with self.activity_lock:
174
                self.now_fetching_or_processing = False
175
176
        return content
177
178
    def _verify_fingerprint(self, dest_filename, fingerprint=""):
179
        if not fingerprint:
180
            log.info("OSCAP Addon: No fingerprint provided, skipping integrity check")
181
            return
182
183
        hash_obj = utils.get_hashing_algorithm(fingerprint)
184
        digest = utils.get_file_fingerprint(dest_filename,
185
                                            hash_obj)
186
        if digest != fingerprint:
187
            log.error(
188
                "OSCAP Addon: "
189
                f"File {dest_filename} failed integrity check - assumed a "
190
                f"{hash_obj.name} hash and '{fingerprint}', got '{digest}'"
191
            )
192
            msg = _(f"OSCAP Addon: Integrity check of the content failed - {hash_obj.name} hash didn't match")
193
            raise content_handling.ContentCheckError(msg)
194
        log.info(f"Integrity check passed using {hash_obj.name} hash")
195
196
    def filter_discovered_content(self, labelled_files):
197
        expected_path = self._addon_data.content_path
198
        categories = (CONTENT_TYPES["DATASTREAM"], CONTENT_TYPES["XCCDF_CHECKLIST"])
199
        if expected_path:
200
            labelled_files = self.reduce_files(labelled_files, expected_path, categories)
201
202
        expected_path = self._addon_data.tailoring_path
203
        categories = (CONTENT_TYPES["TAILORING"], )
204
        if expected_path:
205
            labelled_files = self.reduce_files(labelled_files, expected_path, categories)
206
207
        expected_path = self._addon_data.cpe_path
208
        categories = (CONTENT_TYPES["CPE_DICT"], )
209
        if expected_path:
210
            labelled_files = self.reduce_files(labelled_files, expected_path, categories)
211
212
        return labelled_files
213
214
    def reduce_files(self, labelled_files, expected_path, categories):
215
        reduced_files = dict()
216
        if expected_path not in labelled_files:
217
            msg = (
218
                f"Expected a file {expected_path} to be part of the supplied content, "
219
                f"but it was not the case, got only {list(labelled_files.keys())}"
220
            )
221
            raise RuntimeError(msg)
222
        for path, label in labelled_files.items():
223
            if label in categories and path != expected_path:
224
                continue
225
            reduced_files[path] = label
226
        return reduced_files
227
228
    def _finish_actual_fetch(self, wait_for, fingerprint, report_callback, dest_filename):
229
        if wait_for:
230
            log.info(f"OSCAP Addon: Waiting for thread {wait_for}")
231
            threadMgr.wait(wait_for)
232
            log.info(f"OSCAP Addon: Finished waiting for thread {wait_for}")
233
        actually_fetched_content = wait_for is not None
234
235
        if fingerprint and dest_filename:
236
            self._verify_fingerprint(dest_filename, fingerprint)
237
238
        fpaths = self._gather_available_files(actually_fetched_content, dest_filename)
239
240
        structured_content = ObtainedContent(self.CONTENT_DOWNLOAD_LOCATION)
241
        content_type = self.get_content_type(str(dest_filename))
242
        log.info(f"OSCAP Addon: started to look at the content")
243
        if content_type in ("archive", "rpm"):
244
            structured_content.add_content_archive(dest_filename)
245
246
        labelled_filenames = content_handling.identify_files(fpaths)
247
        labelled_relative_filenames = {
248
            os.path.relpath(path, self.CONTENT_DOWNLOAD_LOCATION): label
249
            for path, label in labelled_filenames.items()}
250
        labelled_relative_filenames = self.filter_discovered_content(labelled_relative_filenames)
251
252
        for rel_fname, label in labelled_relative_filenames.items():
253
            fname = self.CONTENT_DOWNLOAD_LOCATION / rel_fname
254
            structured_content.add_file(str(fname), label)
255
256
        if fingerprint and dest_filename:
257
            structured_content.record_verification(dest_filename)
258
259
        log.info(f"OSCAP Addon: finished looking at the content")
260
        return structured_content
261
262
    def _gather_available_files(self, actually_fetched_content, dest_filename):
263
        fpaths = []
264
        if not actually_fetched_content:
265
            if not dest_filename:  # using scap-security-guide
266
                fpaths = [self.DEFAULT_SSG_DATA_STREAM_PATH]
267
            else:  # Using downloaded XCCDF/OVAL/DS/tailoring
268
                fpaths = pathlib.Path(self.CONTENT_DOWNLOAD_LOCATION).rglob("*")
269
                fpaths = [str(p) for p in fpaths if p.is_file()]
270
        else:
271
            dest_filename = pathlib.Path(dest_filename)
272
            # RPM is an archive at this phase
273
            content_type = self.get_content_type(str(dest_filename))
274
            if content_type in ("archive", "rpm"):
275
                try:
276
                    fpaths = common.extract_data(
277
                        str(dest_filename),
278
                        str(dest_filename.parent)
279
                    )
280
                except common.ExtractionError as err:
281
                    msg = f"Failed to extract the '{dest_filename}' archive: {str(err)}"
282
                    log.error("OSCAP Addon: " + msg)
283
                    raise err
284
285
            elif content_type == "file":
286
                fpaths = [str(dest_filename)]
287
            else:
288
                raise common.OSCAPaddonError("Unsupported content type")
289
        return fpaths
290
291
    def use_downloaded_content(self, content):
292
        preferred_content = self.get_preferred_content(content)
293
294
        # We know that we have ended up with a datastream-like content,
295
        # but if we can't convert an archive to a datastream.
296
        # self._addon_data.content_type = "datastream"
297
        self._addon_data.content_path = str(preferred_content.relative_to(content.root))
298
299
        preferred_tailoring = self.get_preferred_tailoring(content)
300
        if content.tailoring:
301
            self._addon_data.tailoring_path = str(preferred_tailoring.relative_to(content.root))
302
303
    def use_system_content(self, content=None):
304
        clear_all(self._addon_data)
305
        self._addon_data.content_type = "scap-security-guide"
306
        self._addon_data.content_path = common.get_ssg_path()
307
308
    def get_preferred_content(self, content):
309
        if self._addon_data.content_path:
310
            preferred_content = content.find_expected_usable_content(self._addon_data.content_path)
311
        else:
312
            preferred_content = content.select_main_usable_content()
313
        return preferred_content
314
315
    def get_preferred_tailoring(self, content):
316
        tailoring_path = self._addon_data.tailoring_path
317
        if tailoring_path:
318
            if tailoring_path != str(content.tailoring.relative_to(content.root)):
319
                msg = f"Expected a tailoring {tailoring_path}, but it couldn't be found"
320
                raise content_handling.ContentHandlingError(msg)
321
        return content.tailoring
322
323
324
class ObtainedContent:
325
    """
326
    This class aims to assist the gathered files discovery -
327
    the addon can downloaded files directly, or they can be extracted for an archive.
328
    The class enables user to quickly understand what is available,
329
    and whether the current set of contents is usable for further processing.
330
    """
331
    def __init__(self, root):
332
        self.labelled_files = dict()
333
        self.datastream = None  # type: Pathlib.Path
334
        self.xccdf = None  # type: Pathlib.Path
335
        self.ovals = []  # type: List[Pathlib.Path]
336
        self.tailoring = None  # type: Pathlib.Path
337
        self.archive = None  # type: Pathlib.Path
338
        self.verified = None  # type: Pathlib.Path
339
        self.root = pathlib.Path(root)
340
341
    def record_verification(self, path):
342
        """
343
        Declare a file as verified (typically by means of a checksum)
344
        """
345
        path = pathlib.Path(path)
346
        assert path in self.labelled_files
347
        self.verified = path
348
349
    def add_content_archive(self, fname):
350
        """
351
        If files come from an archive, record this information using this function.
352
        """
353
        path = pathlib.Path(fname)
354
        self.labelled_files[path] = None
355
        self.archive = path
356
357
    def _assign_content_type(self, attribute_name, new_value):
358
        old_value = getattr(self, attribute_name)
359
        if old_value and old_value != new_value:
360
            msg = (
361
                f"When dealing with {attribute_name}, "
362
                f"there was already the {old_value.name} when setting the new {new_value.name}")
363
            raise content_handling.ContentHandlingError(msg)
364
        setattr(self, attribute_name, new_value)
365
366
    def add_file(self, fname, label=None):
367
        if not label:
368
            label = content_handling.identify_files([fname])[fname]
369
        path = pathlib.Path(fname)
370
        if label == content_handling.CONTENT_TYPES["TAILORING"]:
371
            self._assign_content_type("tailoring", path)
372
        elif label == content_handling.CONTENT_TYPES["DATASTREAM"]:
373
            self._assign_content_type("datastream", path)
374
        elif label == content_handling.CONTENT_TYPES["OVAL"]:
375
            self.ovals.append(path)
376
        elif label == content_handling.CONTENT_TYPES["XCCDF_CHECKLIST"]:
377
            self._assign_content_type("xccdf", path)
378
        self.labelled_files[path] = label
379
380
    def _datastream_content(self):
381
        if not self.datastream:
382
            return None
383
        if not self.datastream.exists():
384
            return None
385
        return self.datastream
386
387
    def _xccdf_content(self):
388
        if not self.xccdf or not self.ovals:
389
            return None
390
        some_ovals_exist = any([path.exists() for path in self.ovals])
391
        if not (self.xccdf.exists() and some_ovals_exist):
392
            return None
393
        return self.xccdf
394
395
    def find_expected_usable_content(self, relative_expected_content_path):
396
        content_path = self.root / relative_expected_content_path
397
        eligible_main_content = (self._datastream_content(), self._xccdf_content())
398
399
        if content_path in eligible_main_content:
400
            return content_path
401
        else:
402
            if not content_path.exists():
403
                msg = f"Couldn't find '{content_path}' among the available content"
404
            else:
405
                msg = (
406
                    f"File '{content_path}' is not a valid datastream "
407
                    "or a valid XCCDF of a XCCDF-OVAL file tuple")
408
            raise content_handling.ContentHandlingError(msg)
409
410
    def select_main_usable_content(self):
411
        if self._datastream_content():
412
            return self._datastream_content()
413
        elif self._xccdf_content():
414
            return self._xccdf_content()
415
        else:
416
            msg = (
417
                "Couldn't find a valid datastream or a valid XCCDF-OVAL file tuple "
418
                "among the available content")
419
            raise content_handling.ContentHandlingError(msg)
420