Passed
Push — master ( 5927b1...ccc1cd )
by Christophe
01:43 queued 38s
created

aiscalator.core.utils   B

Complexity

Total Complexity 49

Size/Duplication

Total Lines 375
Duplicated Lines 0 %

Importance

Changes 0
Metric Value
wmc 49
eloc 139
dl 0
loc 375
rs 8.48
c 0
b 0
f 0

11 Functions

Rating   Name   Duplication   Size   Complexity  
A notebook_file() 0 21 2
A log_info() 0 6 2
A check_notebook() 0 30 4
A check_notebook_dir() 0 21 5
A subprocess_run() 0 39 4
A data_file() 0 14 1
D copy_replace() 0 45 13
A find() 0 24 3
A format_file_content() 0 25 3
A sha256() 0 10 3
A wait_for_jupyter_lab() 0 44 4

3 Methods

Rating   Name   Duplication   Size   Complexity  
A BackgroundThreadRunner.__init__() 0 9 2
A BackgroundThreadRunner.process() 0 3 1
A BackgroundThreadRunner.run() 0 7 2

How to fix   Complexity   

Complexity

Complex classes like aiscalator.core.utils often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes.

Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.

1
# -*- coding: utf-8 -*-
2
# Apache Software License 2.0
3
#
4
# Copyright (c) 2018, Christophe Duong
5
#
6
# Licensed under the Apache License, Version 2.0 (the "License");
7
# you may not use this file except in compliance with the License.
8
# You may obtain a copy of the License at
9
#
10
# http://www.apache.org/licenses/LICENSE-2.0
11
#
12
# Unless required by applicable law or agreed to in writing, software
13
# distributed under the License is distributed on an "AS IS" BASIS,
14
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15
# See the License for the specific language governing permissions and
16
# limitations under the License.
17
"""
18
Various Utility functions
19
"""
20
import hashlib
21
import logging
22
import os
23
import re
24
import webbrowser
25
from pathlib import Path
26
from shlex import quote
27
from subprocess import PIPE  # nosec
28
from subprocess import STDOUT
29
from subprocess import Popen
30
from threading import Thread
31
from time import sleep
32
33
from aiscalator.core.log_regex_analyzer import LogRegexAnalyzer
34
35
36
def data_file(path):
37
    """
38
    Utility function to find resources data file packaged along with code
39
40
    Parameters
41
    ----------
42
    path : path
43
        path to the resource file in the package
44
45
    Returns
46
    -------
47
        absolute path to the resource data file
48
    """
49
    return os.path.join(os.path.abspath(os.path.dirname(__file__)), path)
50
51
52
def find(collection, item, field='name'):
53
    """
54
    Finds an element in a collection which has a field equal
55
    to particular item value
56
57
    Parameters
58
    ----------
59
    collection : Set
60
        Collection of objects
61
    item
62
        value of the item that we are looking for
63
    field : string
64
        Name of the field from the object to inspect
65
66
    Returns
67
    -------
68
    object
69
        Corresponding element that has a field matching item in
70
        the collection
71
    """
72
    for element in collection:
73
        if element[field] == item:
74
            return element
75
    return None
76
77
78
def copy_replace(src, dst, pattern=None, replace_value=None):
79
    """
80
    Copies a file from src to dst replacing pattern by replace_value
81
82
    Parameters
83
    ----------
84
    src : string
85
        Path to the source filename to copy from
86
    dst : string
87
        Path to the output filename to copy to
88
    pattern
89
        list of Patterns to replace inside the src file
90
    replace_value
91
        list of Values to replace by in the dst file
92
93
    """
94
    file1 = open(src, 'r') if isinstance(src, str) else src
95
    file2 = open(dst, 'w') if isinstance(dst, str) else dst
96
    pattern = (
97
        [pattern] if isinstance(pattern, str)
98
        else pattern
99
    )
100
    replace_value = (
101
        [replace_value] if isinstance(replace_value, str)
102
        else replace_value
103
    )
104
    if replace_value and pattern:
105
        if len(replace_value) != len(pattern):
106
            raise Exception("Invalid parameters: pattern and replace_value"
107
                            " have different sizes.")
108
        rules = [
109
            (re.compile(regex, re.IGNORECASE), value)
110
            for regex, value in zip(pattern, replace_value)
111
        ]
112
    else:
113
        rules = []
114
    for line in file1:
115
        if rules:
116
            for rule in rules:
117
                line = re.sub(rule[0], rule[1], line)
118
        file2.write(line)
119
    if isinstance(src, str):
120
        file1.close()
121
    if isinstance(dst, str):
122
        file2.close()
123
124
125
def log_info(pipe):
126
    """ Default logging function """
127
    logger = logging.getLogger(__name__)
128
    for line in iter(pipe.readline, b''):
129
        logger.debug(line)
130
    return True
131
132
133
class BackgroundThreadRunner():
134
    """
135
    Worker Thread to run logging output in the background
136
137
    ...
138
139
    Attributes
140
    ----------
141
    _process :
142
        Process object of the command running in the background
143
    _log_function : function(stream -> bool)
144
        callback function to log the output of the command
145
    _no_redirect : bool
146
        whether the subprocess STDOUT and STDERR should be redirected to logs
147
    _worker : Thread
148
        Thread object
149
    """
150
    def __init__(self, command, log_function, no_redirect=False):
151
        self._no_redirect = no_redirect
152
        if no_redirect:
153
            self._process = Popen(command)  # nosec
154
        else:
155
            self._process = Popen(command, stdout=PIPE, stderr=STDOUT)  # nosec
156
        self._log_function = log_function
157
        self._worker = Thread(name='worker', target=self.run)
158
        self._worker.start()
159
160
    def run(self):
161
        """
162
        Starts the Thread, process the output of the process.
163
164
        """
165
        if not self._no_redirect:
166
            self._log_function(self._process.stdout)
167
168
    def process(self):
169
        """Returns the process object."""
170
        return self._process
171
172
173
def subprocess_run(command, log_function=log_info,
174
                   no_redirect=False, wait=True):
175
    """
176
    Run command in a subprocess while redirecting output to log_function.
177
178
    The subprocess either runs synchroneoulsy or in the background depending on
179
    the wait parameter.
180
181
    Parameters
182
    ----------
183
    command : List
184
        Command to run in the subprocess
185
    log_function : function
186
        Callback function to log the output of the subprocess
187
    no_redirect : bool
188
        whether the subprocess STDOUT and STDERR should be redirected to logs
189
    wait : bool
190
        Whether the subprocess should be run synchroneously or in
191
        the background
192
    Returns
193
    -------
194
    int
195
        return code of the subprocess
196
    BackgroundThreadRunner
197
        the thread running in the background
198
    """
199
    if wait:
200
        if no_redirect:
201
            process = Popen(command, shell=False)  # nosec
202
        else:
203
            process = Popen(command,
204
                            stdout=PIPE,
205
                            stderr=STDOUT,
206
                            shell=False)  # nosec
207
            with process.stdout:
208
                log_function(process.stdout)
209
        return process.wait()
210
    else:
211
        return BackgroundThreadRunner(command, log_function, no_redirect)
212
213
214
def format_file_content(content, prefix="", suffix=""):
215
    """
216
    Reformat the content of a file line by line, adding prefix and suffix
217
    strings.
218
219
    Parameters
220
    ----------
221
    content : str
222
        path to the file to reformat its content
223
    prefix : str
224
        add to each line this prefix string
225
    suffix : str
226
        add to each line this suffix string
227
    Returns
228
    -------
229
    str
230
        Formatted content of the file
231
    """
232
    result = ""
233
    with open(content, "r") as file:
234
        for line in file:
235
            # TODO handle comments
236
            # TODO check validity of the line for extra security
237
            result += prefix + quote(line) + suffix
238
    return result
239
240
241
def sha256(file: str):
242
    """
243
    Reads a file content and returns its sha256 hash.
244
245
    """
246
    sha = hashlib.sha256()
247
    with open(file, "rb") as content:
248
        for line in content:
249
            sha.update(line)
250
    return sha.hexdigest()
251
252
253
def wait_for_jupyter_lab(commands, logger, notebook, port, folder):
254
    """
255
    Starts jupyter lab and wait for it to start, returning the url it's
256
    running from.
257
258
    Parameters
259
    ----------
260
    commands: list
261
        List of commands to run to start the process
262
    logger : logging.Logger
263
        Logger object
264
    notebook : str
265
        path to the notebook
266
    port :
267
        port on which the jupyter lab is listening
268
    folder : str
269
        path in the container to reach the notebook
270
271
    Returns
272
    -------
273
    str
274
        url from which it is serving the jupyter lab
275
    """
276
    log = LogRegexAnalyzer(b'.*http://.*:8888/.token=([a-zA-Z0-9]+)(\r)?\n')
277
    logger.info("Running...: %s", " ".join(commands))
278
    subprocess_run(commands, log_function=log.grep_logs, wait=False)
279
    for i in range(5):
280
        sleep(2)
281
        if log.artifact():
282
            break
283
        msg = "docker run does not seem to be up yet..."
284
        msg += " retrying (%s/5)"
285
        logger.warning(msg, i)
286
    if log.artifact():
287
        # TODO handle url better (not always localhost?)
288
        url = ("http://localhost:" + str(port) +
289
               "/lab/tree/" + folder + "/" +
290
               notebook + "?token=" +
291
               log.artifact())
292
        logger.info("%s is up and running.", url)
293
        # TODO --no-browser option
294
        webbrowser.open(url)
295
        return url
296
    return ""
297
298
299
def check_notebook(code_path, from_format="py:percent"):
300
    """
301
    Checks existence of notebook file and regenerates using
302
    jupytext from associated .py file if possible.
303
    Otherwise, create an empty notebook file.
304
305
    Parameters
306
    ----------
307
    code_path : str
308
        path to the notebook to check
309
    from_format : str
310
        jupytext format of the .py input file
311
312
    """
313
    if not os.path.exists(code_path):
314
        code_path_dir = os.path.dirname(code_path)
315
        if code_path_dir:
316
            os.makedirs(code_path_dir, exist_ok=True)
317
        copy_replace(data_file("../config/template/notebook.json"),
318
                     code_path)
319
        notebook, notebook_py = notebook_file(code_path)
320
        if os.path.isfile(notebook_py):
321
            subprocess_run([
322
                "jupytext", "--from", from_format, "--to", "ipynb",
323
                notebook_py, "-o", notebook
324
            ])
325
            # touch notebook.py so jupytext doesn't complain when
326
            # opening in the jupyter lab when the py is behind the
327
            # ipynb in modification time
328
            Path(notebook_py).touch()
329
330
331
def check_notebook_dir(code_path, from_format="py:percent"):
332
    """
333
    Check a folder and generate all notebook files that might
334
    be required in that folder.
335
336
    Parameters
337
    ----------
338
    code_path : str
339
        path to a file in the folder
340
    from_format : str
341
        jupytext format of potential .py files
342
343
    """
344
    check_notebook(code_path, from_format)
345
    code_path_dir = os.path.dirname(code_path)
346
    for file in os.listdir(code_path_dir):
347
        file = os.path.join(code_path_dir, file)
348
        if file != code_path:
349
            if file.endswith(".py") or file.endswith(".ipynb"):
350
                notebook, _ = notebook_file(file)
351
                check_notebook(notebook, from_format)
352
353
354
def notebook_file(code_path):
355
    """
356
    Parse a path to return both the ipynb and py versions of
357
    the file.
358
359
    Parameters
360
    ----------
361
    code_path : str
362
        path to a file
363
364
    Returns
365
    -------
366
    (str, str)
367
        tuple of 2 paths to ipynb and py files
368
369
    """
370
    if '.' in code_path:
371
        base_code_path = os.path.splitext(os.path.basename(code_path))[0]
372
        code_path_dir = os.path.dirname(code_path)
373
        code_path = os.path.join(code_path_dir, base_code_path)
374
    return code_path + '.ipynb', code_path + '.py'
375