_prepare_docker_image_env()   F
last analyzed

Complexity

Conditions 15

Size

Total Lines 55
Code Lines 35

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
eloc 35
dl 0
loc 55
rs 2.9998
c 0
b 0
f 0
cc 15
nop 1

How to fix   Long Method    Complexity   

Long Method

Small methods make your code easier to understand, in particular if combined with a good name. Besides, if your method is small, finding a good name is usually much easier.

For example, if you find yourself adding comments to a method's body, this is usually a good sign to extract the commented part to a new method, and use the comment as a starting point when coming up with a good name for this new method.

Commonly applied refactorings include:

Complexity

Complex classes like aiscalator.jupyter.command._prepare_docker_image_env() often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes.

Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.

1
# -*- coding: utf-8 -*-
2
# Apache Software License 2.0
3
#
4
# Copyright (c) 2018, Christophe Duong
5
#
6
# Licensed under the Apache License, Version 2.0 (the "License");
7
# you may not use this file except in compliance with the License.
8
# You may obtain a copy of the License at
9
#
10
# http://www.apache.org/licenses/LICENSE-2.0
11
#
12
# Unless required by applicable law or agreed to in writing, software
13
# distributed under the License is distributed on an "AS IS" BASIS,
14
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15
# See the License for the specific language governing permissions and
16
# limitations under the License.
17
"""
18
Implementations of commands for Jupyter
19
"""
20
import datetime
21
import logging
22
import os.path
23
import sys
24
from os import makedirs
25
26
from aiscalator.core.config import AiscalatorConfig
27
from aiscalator.core.config import convert_to_format
28
from aiscalator.core.log_regex_analyzer import LogRegexAnalyzer
29
from aiscalator.core.utils import check_notebook_dir
30
from aiscalator.core.utils import copy_replace
31
from aiscalator.core.utils import data_file
32
from aiscalator.core.utils import notebook_file
33
from aiscalator.core.utils import subprocess_run
34
from aiscalator.core.utils import wait_for_jupyter_lab
35
from aiscalator.jupyter.docker_image import build
36
37
38
def _prepare_docker_env(conf: AiscalatorConfig, program, reason):
39
    """
40
    Assembles the list of commands to execute a docker run call
41
42
    When calling "docker run ...", this function also adds a set of
43
    additional parameters to mount the proper volumes and expose the
44
    correct environment for the call in the docker image mapped to the
45
    host directories. This is done so only some specific data and code
46
    folders are accessible within the docker image.
47
48
    Parameters
49
    ----------
50
    conf : AiscalatorConfig
51
        Configuration object for the step
52
    program : List
53
        the rest of the commands to execute as part of
54
        the docker run call
55
56
    Returns
57
    -------
58
    List
59
        The full Array of Strings representing the commands to execute
60
        in the docker run call
61
    """
62
    logger = logging.getLogger(__name__)
63
    commands = [
64
        "docker", "run", "--name", conf.step_container_name() + "_" + reason,
65
        "--rm"
66
    ]
67
    for env in conf.user_env_file(conf.step_field("task.env")):
68
        if os.path.isfile(env):
69
            commands += ["--env-file", env]
70
    commands += _prepare_docker_image_env(conf)
71
    code_path = conf.step_file_path('task.code_path')
72
    if conf.has_step_field('task.code_format'):
73
        from_format = conf.step_field('task.code_format')
74
    else:
75
        from_format = "py"
76
    from_format += ':'
77
    if conf.has_step_field('task.jupytext_format'):
78
        from_format += conf.step_field('task.jupytext_format')
79
    else:
80
        from_format += "percent"
81
    notebook, _ = notebook_file(code_path)
82
    check_notebook_dir(logger, notebook, from_format)
83
    commands += [
84
        "--mount", "type=bind,source=" + os.path.dirname(notebook) +
85
        ",target=/home/jovyan/work/notebook/",
86
    ]
87
    commands += _prepare_task_env(conf)
88
    if conf.has_step_field("task.execution_dir_path"):
89
        execution_dir_path = conf.step_file_path('task.execution_dir_path')
90
        if execution_dir_path:
91
            makedirs(execution_dir_path, exist_ok=True)
92
        commands += [
93
            "--mount", "type=bind,source=" +
94
            execution_dir_path +
95
            ",target=/home/jovyan/work/notebook_run/"
96
        ]
97
    commands += program
98
    return commands
99
100
101
def _prepare_docker_image_env(conf: AiscalatorConfig):
102
    """
103
    Assemble the list of volumes to mount specific to
104
    building the docker image
105
106
    Parameters
107
    ----------
108
    conf : AiscalatorConfig
109
        Configuration object for the step
110
111
    Returns
112
    -------
113
    list
114
        list of commands to bind those volumes
115
    """
116
    commands = []
117
    if conf.config_path() is not None:
118
        commands += [
119
            "--mount",
120
            "type=bind,source=" + os.path.realpath(conf.config_path()) +
121
            ",target="
122
            "/home/jovyan/work/" + os.path.basename(conf.config_path()),
123
        ]
124
    if conf.has_step_field("docker_image.apt_repository_path"):
125
        apt_repo = conf.step_file_path('docker_image.apt_repository_path')
126
        if apt_repo and os.path.isfile(apt_repo):
127
            commands += [
128
                "--mount", "type=bind,source=" + apt_repo +
129
                ",target=/home/jovyan/work/apt_repository.txt",
130
            ]
131
    if conf.has_step_field("docker_image.apt_package_path"):
132
        apt_packages = conf.step_file_path('docker_image.apt_package_path')
133
        if apt_packages and os.path.isfile(apt_packages):
134
            commands += [
135
                "--mount", "type=bind,source=" + apt_packages +
136
                ",target=/home/jovyan/work/apt_packages.txt",
137
            ]
138
    if conf.has_step_field("docker_image.requirements_path"):
139
        requirements = conf.step_file_path('docker_image.requirements_path')
140
        if requirements and os.path.isfile(requirements):
141
            commands += [
142
                "--mount", "type=bind,source=" + requirements +
143
                ",target=/home/jovyan/work/requirements.txt",
144
            ]
145
    if conf.has_step_field("docker_image.lab_extension_path"):
146
        lab_extensions = conf.step_file_path('docker_image.lab_extension_path')
147
        if lab_extensions and os.path.isfile(lab_extensions):
148
            commands += [
149
                "--mount", "type=bind,source=" + lab_extensions +
150
                ",target=/home/jovyan/work/lab_extensions.txt",
151
            ]
152
    # allow to pass a list of extra options like ["--network", "bridge"]
153
    if conf.has_step_field("docker_image.docker_extra_options"):
154
        commands += conf.step_field("docker_image.docker_extra_options")
155
    return commands
156
157
158
def _prepare_task_env(conf: AiscalatorConfig):
159
    """
160
    Assemble the list of volumes to mount specific to
161
    the task execution
162
163
    Parameters
164
    ----------
165
    conf : AiscalatorConfig
166
        Configuration object for the step
167
168
    Returns
169
    -------
170
    list
171
        list of commands to bind those volumes
172
    """
173
    commands = []
174
    if conf.root_dir():
175
        commands += _mount_path(conf, "task.modules_src_path",
176
                                "/home/jovyan/work/modules/")
177
        commands += _mount_path(conf, "task.input_data_path",
178
                                "/home/jovyan/work/data/input/",
179
                                readonly=True)
180
        commands += _mount_path(conf, "task.output_data_path",
181
                                "/home/jovyan/work/data/output/",
182
                                make_dirs=True)
183
    return commands
184
185
186
def _mount_path(conf: AiscalatorConfig, field, target_path,
187
                readonly=False, make_dirs=False):
188
    """
189
    Returu commands to mount path from list field into the
190
    docker image when running.
191
192
    Parameters
193
    ----------
194
    conf : AiscalatorConfig
195
        Configuration object for the step
196
    field : str
197
        the field in the configuration step that contains the path
198
    target_path : str
199
        where to mount them inside the container
200
    readonly : bool
201
        flag to mount the path as read-only
202
    make_dirs : bool
203
        flag to create the folder on the host before mounting if
204
        it doesn't exists.
205
206
    Returns
207
    -------
208
    list
209
        commands to mount all the paths from the field
210
211
    """
212
    commands = []
213
    if conf.has_step_field(field):
214
        for value in conf.step_field(field):
215
            # TODO handle URL
216
            for i in value:
217
                if make_dirs:
218
                    makedirs(os.path.realpath(conf.root_dir() + value[i]),
219
                             exist_ok=True)
220
                if os.path.exists(conf.root_dir() + value[i]):
221
                    commands += [
222
                        "--mount",
223
                        "type=bind,source=" +
224
                        os.path.realpath(conf.root_dir() + value[i]) +
225
                        ",target=" + os.path.join(target_path, i) +
226
                        (",readonly" if readonly else "")
227
                    ]
228
    return commands
229
230
231
def jupyter_run(conf: AiscalatorConfig, prepare_only=False,
232
                param=None, param_raw=None):
233
    """
234
    Executes the step in browserless mode using papermill
235
236
    Parameters
237
    ----------
238
    conf : AiscalatorConfig
239
        Configuration object for the step
240
    prepare_only : bool
241
        Indicates if papermill should replace the parameters of the
242
        notebook only or it should execute all the cells too
243
244
    Returns
245
    -------
246
    string
247
        the path to the output notebook resulting from the execution
248
        of this step
249
    """
250
    logger = logging.getLogger(__name__)
251
    conf.validate_config()
252
    docker_image = build(conf)
253
    if not docker_image:
254
        raise Exception("Failed to build docker image")
255
    notebook, _ = notebook_file(conf.step_file_path('task.code_path'))
256
    notebook = os.path.join("/home/jovyan/work/notebook/",
257
                            os.path.basename(notebook))
258
    notebook_output = conf.step_notebook_output_path(notebook)
259
    commands = _prepare_docker_env(conf, [
260
        docker_image, "bash", "start-papermill.sh",
261
        "papermill",
262
        notebook, notebook_output
263
    ], "run_" + conf.step_name() + "_"
264
       # add timestamp to name to handle multiple concurrent runs
265
       + datetime.datetime.now().strftime('%Y%m%d_%H%M%S'))
266
    if prepare_only:
267
        commands.append("--prepare-only")
268
    parameters = conf.step_extract_parameters()
269
    if parameters:
270
        commands += parameters
271
    if param:
272
        for parameter in param:
273
            commands += ["-p", parameter[0], parameter[1]]
274
    if param_raw:
275
        for raw_parameter in param_raw:
276
            commands += ["-r", raw_parameter[0], raw_parameter[1]]
277
    log = LogRegexAnalyzer()
278
    logger.info("Running...: %s", " ".join(commands))
279
    returncode = subprocess_run(commands, log_function=log.grep_logs)
280
    if returncode:
281
        logger.error("Run was not successful, returned status code is: "
282
                     + str(returncode))
283
        sys.exit(returncode)
284
    return os.path.join(conf.step_file_path('task.execution_dir_path'),
285
                        os.path.basename(notebook_output))
286
287
288
def jupyter_edit(conf: AiscalatorConfig, param=None, param_raw=None):
289
    """
290
    Starts a Jupyter Lab environment configured to edit the focused step
291
292
    Parameters
293
    ----------
294
    conf : AiscalatorConfig
295
        Configuration object for the step
296
    param : list
297
        list of tuples of parameters
298
    param_raw : list
299
        list of tuples of raw parameters
300
    Returns
301
    -------
302
    string
303
        Url of the running jupyter lab
304
    """
305
    logger = logging.getLogger(__name__)
306
    conf.validate_config()
307
    docker_image = build(conf)
308
    if docker_image:
309
        # TODO: shutdown other jupyter lab still running
310
        notebook, _ = notebook_file(conf.step_field('task.code_path'))
311
        notebook = os.path.basename(notebook)
312
        if conf.step_extract_parameters() or param or param_raw:
313
            jupyter_run(conf, prepare_only=True,
314
                        param=param,
315
                        param_raw=param_raw)
316
        commands = _prepare_docker_env(conf, [
317
            # TODO: improve port publishing
318
            "-p", "10000:8888",
319
            "-p", "4040:4040",
320
            docker_image, "start.sh",
321
            'jupyter', 'lab'
322
        ], "edit")
323
        return wait_for_jupyter_lab(commands, logger, notebook,
324
                                    10000, "work/notebook")
325
    raise Exception("Failed to build docker image")
326
327
328
def jupyter_new(name, path, output_format="hocon"):
329
    """
330
    Starts a Jupyter Lab environment configured to edit a brand new step
331
332
    Parameters
333
    ----------
334
    name : str
335
        name of the new step
336
    path : str
337
        path to where the new step files should be created
338
    output_format : str
339
        the format of the new configuration file to produce
340
    Returns
341
    -------
342
    string
343
        Url of the running jupyter lab
344
    """
345
    step_file = os.path.join(path, name, name) + '.conf'
346
    if os.path.dirname(step_file):
347
        makedirs(os.path.dirname(step_file), exist_ok=True)
348
    copy_replace(data_file("../config/template/step.conf"), step_file,
349
                 pattern="Untitled", replace_value=name)
350
    if output_format != 'hocon':
351
        file = os.path.join(path, name, name) + '.' + output_format
352
        step_file = convert_to_format(step_file, output=file,
353
                                      output_format=output_format)
354
355
    notebook = os.path.join(path, name, 'notebook', name) + '.ipynb'
356
    if os.path.dirname(notebook):
357
        makedirs(os.path.dirname(notebook), exist_ok=True)
358
    copy_replace(data_file("../config/template/notebook.json"), notebook)
359
360
    open(os.path.join(path, name, "apt_repository.txt"), 'a').close()
361
    open(os.path.join(path, name, "apt_packages.txt"), 'a').close()
362
    open(os.path.join(path, name, "requirements.txt"), 'a').close()
363
    open(os.path.join(path, name, "lab_extensions.txt"), 'a').close()
364
    jupyter_edit(AiscalatorConfig(config=step_file,
365
                                  step_selection=name))
366