org_fedora_oscap.data_fetch._curl_fetch()   F
last analyzed

Complexity

Conditions 18

Size

Total Lines 86
Code Lines 50

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
eloc 50
dl 0
loc 86
rs 1.2
c 0
b 0
f 0
cc 18
nop 3

How to fix   Long Method    Complexity   

Long Method

Small methods make your code easier to understand, in particular if combined with a good name. Besides, if your method is small, finding a good name is usually much easier.

For example, if you find yourself adding comments to a method's body, this is usually a good sign to extract the commented part to a new method, and use the comment as a starting point when coming up with a good name for this new method.

Commonly applied refactorings include:

Complexity

Complex classes like org_fedora_oscap.data_fetch._curl_fetch() often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes.

Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.

1
"""
2
Module for fetching files via HTTP and FTP. Directly or over SSL (HTTPS) with
3
server certificate validation.
4
5
"""
6
7
import re
8
import os
9
import os.path
10
import pycurl
11
12
from pyanaconda.core.configuration.anaconda import conf
13
from pyanaconda.core import constants
14
from pyanaconda.threading import threadMgr, AnacondaThread
15
from pyanaconda.modules.common.constants.services import NETWORK
16
17
from org_fedora_oscap import common
18
from org_fedora_oscap.common import _
19
from org_fedora_oscap import utils
20
21
import logging
22
log = logging.getLogger("anaconda")
23
24
25
# everything else should be private
26
__all__ = ["fetch_data", "can_fetch_from"]
27
28
# prefixes of the URLs that need network connection
29
NET_URL_PREFIXES = ("http", "https", "ftp")
30
31
# prefixes of the URLs that may not need network connection
32
LOCAL_URL_PREFIXES = ("file",)
33
34
# TODO: needs improvements
35
HTTP_URL_RE_STR = r"(https?)://(.*)"
36
HTTP_URL_RE = re.compile(HTTP_URL_RE_STR)
37
38
FTP_URL_RE_STR = r"(ftp)://(.*)"
39
FTP_URL_RE = re.compile(FTP_URL_RE_STR)
40
41
FILE_URL_RE_STR = r"(file)://(.*)"
42
FILE_URL_RE = re.compile(FILE_URL_RE_STR)
43
44
45
class DataFetchError(common.OSCAPaddonError):
46
    """Parent class for the exception classes defined in this module."""
47
48
    pass
49
50
51
class CertificateValidationError(DataFetchError):
52
    """Class for the certificate validation related errors."""
53
54
    pass
55
56
57
class WrongRequestError(DataFetchError):
58
    """Class for the wrong combination of parameters errors."""
59
60
    pass
61
62
63
class UnknownURLformatError(DataFetchError):
64
    """Class for invalid URL cases."""
65
66
    pass
67
68
69
class FetchError(DataFetchError):
70
    """
71
    Class for the errors when fetching data. Usually due to I/O errors.
72
73
    """
74
75
    pass
76
77
78
def fetch_local_data(url, out_file):
79
    """
80
    Function that fetches data locally.
81
82
    :see: org_fedora_oscap.data_fetch.fetch_data
83
    :return: the name of the thread running fetch_data
84
    :rtype: str
85
86
    """
87
    fetch_data_thread = AnacondaThread(name=common.THREAD_FETCH_DATA,
88
                                       target=fetch_data,
89
                                       args=(url, out_file, None),
90
                                       fatal=False)
91
92
    # register and run the thread
93
    threadMgr.add(fetch_data_thread)
94
95
    return common.THREAD_FETCH_DATA
96
97
98
def wait_and_fetch_net_data(url, out_file, ca_certs_path=None):
99
    """
100
    Function that waits for network connection and starts a thread that fetches
101
    data over network.
102
103
    :see: org_fedora_oscap.data_fetch.fetch_data
104
    :return: the name of the thread running fetch_data
105
    :rtype: str
106
107
    """
108
109
    # get thread that tries to establish a network connection
110
    nm_conn_thread = threadMgr.get(constants.THREAD_WAIT_FOR_CONNECTING_NM)
111
    if nm_conn_thread:
112
        # NM still connecting, wait for it to finish
113
        nm_conn_thread.join()
114
115
    network_proxy = NETWORK.get_proxy()
116
    if not network_proxy.Connected:
117
        raise common.OSCAPaddonNetworkError(_("Network connection needed to fetch data."))
118
119
    log.info(f"Fetching data from {url}")
120
    fetch_data_thread = AnacondaThread(name=common.THREAD_FETCH_DATA,
121
                                       target=fetch_data,
122
                                       args=(url, out_file, ca_certs_path),
123
                                       fatal=False)
124
125
    # register and run the thread
126
    threadMgr.add(fetch_data_thread)
127
128
    return common.THREAD_FETCH_DATA
129
130
131
def can_fetch_from(url):
132
    """
133
    Function telling whether the fetch_data function understands the type of
134
    given URL or not.
135
136
    :param url: URL
137
    :type url: str
138
    :return: whether the type of the URL is supported or not
139
    :rtype: str
140
141
    """
142
    resources = NET_URL_PREFIXES + LOCAL_URL_PREFIXES
143
    return any(url.startswith(prefix) for prefix in resources)
144
145
146
def fetch_data(url, out_file, ca_certs_path=None):
147
    """
148
    Fetch data from a given URL. If the URL starts with https://, ca_certs_path can
149
    be a path to PEM file with CA certificate chain to validate server
150
    certificate.
151
152
    :param url: URL of the data
153
    :type url: str
154
    :param out_file: path to the output file
155
    :type out_file: str
156
    :param ca_certs_path: path to a PEM file with CA certificate chain
157
    :type ca_certs_path: str
158
    :raise WrongRequestError: if a wrong combination of arguments is passed
159
                              (ca_certs_path file path given and url starting with
160
                              http://) or arguments don't have required format
161
    :raise CertificateValidationError: if server certificate validation fails
162
    :raise FetchError: if data fetching fails (usually due to I/O errors)
163
164
    """
165
166
    # create the directory for the out_file if it doesn't exist
167
    out_dir = os.path.dirname(out_file)
168
    utils.ensure_dir_exists(out_dir)
169
170
    if can_fetch_from(url):
171
        _curl_fetch(url, out_file, ca_certs_path)
172
    else:
173
        msg = "Cannot fetch data from '%s': unknown URL format" % url
174
        raise UnknownURLformatError(msg)
175
    log.info(f"Data fetch from {url} completed")
176
177
178
def _curl_fetch(url, out_file, ca_certs_path=None):
179
    """
180
    Function that fetches data and writes it out to the given file path. If a
181
    path to the file with CA certificates is given and the url starts with
182
    'https', the server certificate is validated.
183
184
    :param url: url of the data that has to start with 'http://' or "https://"
185
    :type url: str
186
    :param out_file: path to the output file
187
    :type out_file: str
188
    :param ca_certs_path: path to the file with CA certificates for server
189
                     certificate validation
190
    :type ca_certs_path: str
191
    :raise WrongRequestError: if a wrong combination of arguments is passed
192
                              (ca_certs_path file path given and url starting with
193
                              http://) or arguments don't have required format
194
    :raise CertificateValidationError: if server certificate validation fails
195
    :raise FetchError: if data fetching fails (usually due to I/O errors)
196
197
    """
198
199
    if url.startswith("ftp"):
200
        match = FTP_URL_RE.match(url)
201
        if not match:
202
            msg = "Wrong url not matching '%s'" % FTP_URL_RE_STR
203
            raise WrongRequestError(msg)
204
        else:
205
            protocol, path = match.groups()
206
            if '@' not in path:
207
                # no user:pass given -> use anonymous login to the FTP server
208
                url = protocol + "://anonymous:@" + path
209
    elif url.startswith("file"):
210
        match = FILE_URL_RE.match(url)
211
        if not match:
212
            msg = "Wrong url not matching '%s'" % FILE_URL_RE_STR
213
            raise WrongRequestError(msg)
214
    else:
215
        match = HTTP_URL_RE.match(url)
216
        if not match:
217
            msg = "Wrong url not matching '%s'" % HTTP_URL_RE_STR
218
            raise WrongRequestError(msg)
219
220
    # the first group contains the protocol, the second one the rest
221
    protocol = match.groups()[0]
222
223
    if not out_file:
224
        raise WrongRequestError("out_file cannot be an empty string")
225
226
    if ca_certs_path and protocol != "https":
227
        msg = "Cannot verify server certificate when using plain HTTP"
228
        raise WrongRequestError(msg)
229
230
    curl = pycurl.Curl()
231
    curl.setopt(pycurl.URL, url)
232
233
    if ca_certs_path and protocol == "https":
234
        # the strictest verification
235
        curl.setopt(pycurl.SSL_VERIFYHOST, 2)
236
        curl.setopt(pycurl.SSL_VERIFYPEER, 1)
237
        curl.setopt(pycurl.CAINFO, ca_certs_path)
238
239
    # may be turned off by flags (specified on command line, take precedence)
240
    if not conf.payload.verify_ssl:
241
        log.warning("Disabling SSL verification due to the noverifyssl flag")
242
        curl.setopt(pycurl.SSL_VERIFYHOST, 0)
243
        curl.setopt(pycurl.SSL_VERIFYPEER, 0)
244
245
    try:
246
        with open(out_file, "wb") as fobj:
247
            curl.setopt(pycurl.WRITEDATA, fobj)
248
            curl.perform()
249
    except pycurl.error as err:
250
        # first arg is the error code
251
        if err.args[0] == pycurl.E_SSL_CACERT:
252
            msg = "Failed to connect to server and validate its "\
253
                  "certificate: %s" % err
254
            raise CertificateValidationError(msg)
255
        else:
256
            msg = "Failed to fetch data: %s" % err
257
            raise FetchError(msg)
258
259
    if protocol in ("http", "https"):
260
        return_code = curl.getinfo(pycurl.HTTP_CODE)
261
        if 400 <= return_code < 600:
262
            msg = _(f"Failed to fetch data - the request returned HTTP error code {return_code}")
263
            raise FetchError(msg)
264