Passed
Push — master ( 722b81...0abd85 )
by Christophe
02:12 queued 01:03
created

aiscalator.core.utils.copy_replace()   D

Complexity

Conditions 13

Size

Total Lines 45
Code Lines 25

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
eloc 25
dl 0
loc 45
rs 4.2
c 0
b 0
f 0
cc 13
nop 4

How to fix   Complexity   

Complexity

Complex classes like aiscalator.core.utils.copy_replace() often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes.

Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.

1
# -*- coding: utf-8 -*-
2
# Apache Software License 2.0
3
#
4
# Copyright (c) 2018, Christophe Duong
5
#
6
# Licensed under the Apache License, Version 2.0 (the "License");
7
# you may not use this file except in compliance with the License.
8
# You may obtain a copy of the License at
9
#
10
# http://www.apache.org/licenses/LICENSE-2.0
11
#
12
# Unless required by applicable law or agreed to in writing, software
13
# distributed under the License is distributed on an "AS IS" BASIS,
14
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15
# See the License for the specific language governing permissions and
16
# limitations under the License.
17
"""
18
Various Utility functions
19
"""
20
import hashlib
21
import logging
22
import os
23
import re
24
import webbrowser
25
from shlex import quote
26
from subprocess import PIPE  # nosec
27
from subprocess import STDOUT
28
from subprocess import Popen
29
from threading import Thread
30
from time import sleep
31
32
from aiscalator.core.log_regex_analyzer import LogRegexAnalyzer
33
34
35
def data_file(path):
36
    """
37
    Utility function to find resources data file packaged along with code
38
39
    Parameters
40
    ----------
41
    path : path
42
        path to the resource file in the package
43
44
    Returns
45
    -------
46
        absolute path to the resource data file
47
    """
48
    return os.path.join(os.path.abspath(os.path.dirname(__file__)), path)
49
50
51
def find(collection, item, field='name'):
52
    """
53
    Finds an element in a collection which has a field equal
54
    to particular item value
55
56
    Parameters
57
    ----------
58
    collection : Set
59
        Collection of objects
60
    item
61
        value of the item that we are looking for
62
    field : string
63
        Name of the field from the object to inspect
64
65
    Returns
66
    -------
67
    object
68
        Corresponding element that has a field matching item in
69
        the collection
70
    """
71
    for element in collection:
72
        if element[field] == item:
73
            return element
74
    return None
75
76
77
def copy_replace(src, dst, pattern=None, replace_value=None):
78
    """
79
    Copies a file from src to dst replacing pattern by replace_value
80
81
    Parameters
82
    ----------
83
    src : string
84
        Path to the source filename to copy from
85
    dst : string
86
        Path to the output filename to copy to
87
    pattern
88
        list of Patterns to replace inside the src file
89
    replace_value
90
        list of Values to replace by in the dst file
91
92
    """
93
    file1 = open(src, 'r') if isinstance(src, str) else src
94
    file2 = open(dst, 'w') if isinstance(dst, str) else dst
95
    pattern = (
96
        [pattern] if isinstance(pattern, str)
97
        else pattern
98
    )
99
    replace_value = (
100
        [replace_value] if isinstance(replace_value, str)
101
        else replace_value
102
    )
103
    if replace_value and pattern:
104
        if len(replace_value) != len(pattern):
105
            raise Exception("Invalid parameters: pattern and replace_value"
106
                            " have different sizes.")
107
        rules = [
108
            (re.compile(regex, re.IGNORECASE), value)
109
            for regex, value in zip(pattern, replace_value)
110
        ]
111
    else:
112
        rules = []
113
    for line in file1:
114
        if rules:
115
            for rule in rules:
116
                line = re.sub(rule[0], rule[1], line)
117
        file2.write(line)
118
    if isinstance(src, str):
119
        file1.close()
120
    if isinstance(dst, str):
121
        file2.close()
122
123
124
def log_info(pipe):
125
    """ Default logging function """
126
    logger = logging.getLogger(__name__)
127
    for line in iter(pipe.readline, b''):
128
        logger.debug(line)
129
    return True
130
131
132
class BackgroundThreadRunner():
133
    """
134
    Worker Thread to run logging output in the background
135
136
    ...
137
138
    Attributes
139
    ----------
140
    _process :
141
        Process object of the command running in the background
142
    _log_function : function(stream -> bool)
143
        callback function to log the output of the command
144
    _no_redirect : bool
145
        whether the subprocess STDOUT and STDERR should be redirected to logs
146
    _worker : Thread
147
        Thread object
148
    """
149
    def __init__(self, command, log_function, no_redirect=False):
150
        self._no_redirect = no_redirect
151
        if no_redirect:
152
            self._process = Popen(command)  # nosec
153
        else:
154
            self._process = Popen(command, stdout=PIPE, stderr=STDOUT)  # nosec
155
        self._log_function = log_function
156
        self._worker = Thread(name='worker', target=self.run)
157
        self._worker.start()
158
159
    def run(self):
160
        """
161
        Starts the Thread, process the output of the process.
162
163
        """
164
        if not self._no_redirect:
165
            self._log_function(self._process.stdout)
166
167
    def process(self):
168
        """Returns the process object."""
169
        return self._process
170
171
172
def subprocess_run(command, log_function=log_info,
173
                   no_redirect=False, wait=True):
174
    """
175
    Run command in a subprocess while redirecting output to log_function.
176
177
    The subprocess either runs synchroneoulsy or in the background depending on
178
    the wait parameter.
179
180
    Parameters
181
    ----------
182
    command : List
183
        Command to run in the subprocess
184
    log_function : function
185
        Callback function to log the output of the subprocess
186
    no_redirect : bool
187
        whether the subprocess STDOUT and STDERR should be redirected to logs
188
    wait : bool
189
        Whether the subprocess should be run synchroneously or in
190
        the background
191
    Returns
192
    -------
193
    int
194
        return code of the subprocess
195
    BackgroundThreadRunner
196
        the thread running in the background
197
    """
198
    if wait:
199
        if no_redirect:
200
            process = Popen(command, shell=False)  # nosec
201
        else:
202
            process = Popen(command,
203
                            stdout=PIPE,
204
                            stderr=STDOUT,
205
                            shell=False)  # nosec
206
            with process.stdout:
207
                log_function(process.stdout)
208
        return process.wait()
209
    else:
210
        return BackgroundThreadRunner(command, log_function, no_redirect)
211
212
213
def format_file_content(content, prefix="", suffix=""):
214
    """
215
    Reformat the content of a file line by line, adding prefix and suffix
216
    strings.
217
218
    Parameters
219
    ----------
220
    content : str
221
        path to the file to reformat its content
222
    prefix : str
223
        add to each line this prefix string
224
    suffix : str
225
        add to each line this suffix string
226
    Returns
227
    -------
228
    str
229
        Formatted content of the file
230
    """
231
    result = ""
232
    with open(content, "r") as file:
233
        for line in file:
234
            # TODO handle comments
235
            # TODO check validity of the line for extra security
236
            result += prefix + quote(line) + suffix
237
    return result
238
239
240
def sha256(file: str):
241
    """
242
    Reads a file content and returns its sha256 hash.
243
244
    """
245
    sha = hashlib.sha256()
246
    with open(file, "rb") as content:
247
        for line in content:
248
            sha.update(line)
249
    return sha.hexdigest()
250
251
252
def wait_for_jupyter_lab(commands, logger, notebook, port, folder):
253
    """
254
255
    Parameters
256
    ----------
257
    commands
258
    logger
259
    notebook
260
    port
261
262
    Returns
263
    -------
264
265
    """
266
    log = LogRegexAnalyzer(b'http://.*:8888/.token=([a-zA-Z0-9]+)\n')
267
    logger.info("Running...: %s", " ".join(commands))
268
    subprocess_run(commands, log_function=log.grep_logs, wait=False)
269
    for i in range(5):
270
        sleep(2)
271
        if log.artifact():
272
            break
273
        msg = "docker run does not seem to be up yet..."
274
        msg += " retrying (%s/5)"
275
        logger.warning(msg, i)
276
    if log.artifact():
277
        # TODO handle url better (not always localhost?)
278
        url = ("http://localhost:" + str(port) +
279
               "/lab/tree/work/" + folder + "/" +
280
               notebook + "?token=" +
281
               log.artifact())
282
        logger.info("%s is up and running.", url)
283
        # TODO --no-browser option
284
        webbrowser.open(url)
285
        return url
286
    return ""
287