1
|
|
|
# -*- coding: utf-8 -*- |
2
|
|
|
# Apache Software License 2.0 |
3
|
|
|
# |
4
|
|
|
# Copyright (c) 2018, Christophe Duong |
5
|
|
|
# |
6
|
|
|
# Licensed under the Apache License, Version 2.0 (the "License"); |
7
|
|
|
# you may not use this file except in compliance with the License. |
8
|
|
|
# You may obtain a copy of the License at |
9
|
|
|
# |
10
|
|
|
# http://www.apache.org/licenses/LICENSE-2.0 |
11
|
|
|
# |
12
|
|
|
# Unless required by applicable law or agreed to in writing, software |
13
|
|
|
# distributed under the License is distributed on an "AS IS" BASIS, |
14
|
|
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
15
|
|
|
# See the License for the specific language governing permissions and |
16
|
|
|
# limitations under the License. |
17
|
|
|
""" |
18
|
|
|
Implementations of commands for Airflow |
19
|
|
|
""" |
20
|
|
|
import logging |
21
|
|
|
import re |
22
|
|
|
from grp import getgrgid |
23
|
|
|
from os import listdir |
24
|
|
|
from os import makedirs |
25
|
|
|
from os import remove |
26
|
|
|
from os import stat |
27
|
|
|
from os import symlink |
28
|
|
|
from os.path import abspath |
29
|
|
|
from os.path import basename |
30
|
|
|
from os.path import dirname |
31
|
|
|
from os.path import exists |
32
|
|
|
from os.path import isfile |
33
|
|
|
from os.path import islink |
34
|
|
|
from os.path import join |
35
|
|
|
from os.path import realpath |
36
|
|
|
from tempfile import TemporaryDirectory |
37
|
|
|
|
38
|
|
|
from aiscalator.core import utils |
39
|
|
|
from aiscalator.core.config import AiscalatorConfig |
40
|
|
|
from aiscalator.core.log_regex_analyzer import LogRegexAnalyzer |
41
|
|
|
|
42
|
|
|
|
43
|
|
|
def _docker_compose(conf: AiscalatorConfig, |
44
|
|
|
extra_commands: list): |
45
|
|
|
""" |
46
|
|
|
Run the docker-compose command |
47
|
|
|
|
48
|
|
|
Parameters |
49
|
|
|
---------- |
50
|
|
|
conf : AiscalatorConfig |
51
|
|
|
Configuration object for the application |
52
|
|
|
extra_commands : list |
53
|
|
|
list of sub-commands to run in docker-compose |
54
|
|
|
|
55
|
|
|
""" |
56
|
|
|
logger = logging.getLogger(__name__) |
57
|
|
|
conf.validate_config() |
58
|
|
|
dockerfile = join(conf.app_config_home(), "config", |
59
|
|
|
conf.airflow_docker_compose_file()) |
60
|
|
|
commands = ["docker-compose"] |
61
|
|
|
# Prepare a temp folder to run the command from |
62
|
|
|
with TemporaryDirectory(prefix="aiscalator_") as tmp: |
63
|
|
|
with open(join(tmp, ".env"), mode="w") as env_file: |
64
|
|
|
# concatenate all the env files into one |
65
|
|
|
for env in conf.user_env_file(): |
66
|
|
|
if isfile(env): |
67
|
|
|
with open(env, mode="r") as file: |
68
|
|
|
for line in file: |
69
|
|
|
env_file.write(line) |
70
|
|
|
utils.copy_replace(join(tmp, ".env"), |
71
|
|
|
join(dirname(dockerfile), ".env")) |
72
|
|
|
commands += ["-f", dockerfile] + extra_commands |
73
|
|
|
logger.info("Running...: %s", " ".join(commands)) |
74
|
|
|
utils.subprocess_run(commands, no_redirect=True) |
75
|
|
|
|
76
|
|
|
|
77
|
|
|
def airflow_setup(conf: AiscalatorConfig, |
78
|
|
|
config_home: str, |
79
|
|
|
workspace: list, |
80
|
|
|
append: bool = True): |
81
|
|
|
""" |
82
|
|
|
Setup the airflow configuration files and environment |
83
|
|
|
|
84
|
|
|
Parameters |
85
|
|
|
---------- |
86
|
|
|
conf : AiscalatorConfig |
87
|
|
|
Configuration object for the application |
88
|
|
|
config_home : str |
89
|
|
|
path to the configuration home directory |
90
|
|
|
workspace : list |
91
|
|
|
List of path to directories to mount as volumes |
92
|
|
|
to airflow workers to use as workspaces |
93
|
|
|
append : bool |
94
|
|
|
flag to tell if workspace should be appended to |
95
|
|
|
the list in the config or replace it. |
96
|
|
|
|
97
|
|
|
""" |
98
|
|
|
logger = logging.getLogger(__name__) |
99
|
|
|
conf.validate_config() |
100
|
|
|
if config_home: |
101
|
|
|
makedirs(config_home, exist_ok=True) |
102
|
|
|
conf.redefine_app_config_home(config_home) |
103
|
|
|
ws_path = "airflow.setup.workspace_paths" |
104
|
|
|
if conf.app_config_has(ws_path): |
105
|
|
|
if append: |
106
|
|
|
workspace += conf.app_config()[ws_path] |
107
|
|
|
conf.redefine_airflow_workspaces(workspace) |
108
|
|
|
image = 'latest' |
109
|
|
|
if _docker_compose_grep(conf): |
110
|
|
|
image = _airflow_docker_build(conf) |
111
|
|
|
if not image: |
112
|
|
|
raise Exception("Failed to build docker image") |
113
|
|
|
src = utils.data_file("../config/docker/airflow/config/") |
114
|
|
|
dst = join(conf.app_config_home(), "config") |
115
|
|
|
logger.info("Generating a new configuration folder for aiscalator:\n\t%s", |
116
|
|
|
dst) |
117
|
|
|
makedirs(dst, exist_ok=True) |
118
|
|
|
makedirs(join(conf.app_config_home(), "dags"), exist_ok=True) |
119
|
|
|
makedirs(join(conf.app_config_home(), "pgdata"), exist_ok=True) |
120
|
|
|
pattern = [ |
121
|
|
|
r"(\s+)# - workspace #", |
122
|
|
|
"aiscalator/airflow:latest", |
123
|
|
|
] |
124
|
|
|
workspace = [] |
125
|
|
|
makedirs(join(conf.app_config_home(), |
126
|
|
|
"workspace"), exist_ok=True) |
127
|
|
|
for line in conf.app_config()[ws_path]: |
128
|
|
|
host_src, container_dst = _split_workspace_string(conf, line) |
129
|
|
|
workspace += [r"\1- " + host_src + ':' + container_dst] |
130
|
|
|
workspace += [r"\1# - workspace #"] |
131
|
|
|
value = [ |
132
|
|
|
"\n".join(workspace), |
133
|
|
|
"aiscalator/airflow:" + image, |
134
|
|
|
] |
135
|
|
|
for file in listdir(src): |
136
|
|
|
utils.copy_replace(join(src, file), |
137
|
|
|
join(dst, file), |
138
|
|
|
pattern=pattern, |
139
|
|
|
replace_value=value) |
140
|
|
|
|
141
|
|
|
|
142
|
|
|
def airflow_up(conf: AiscalatorConfig): |
143
|
|
|
""" |
144
|
|
|
Starts an airflow environment |
145
|
|
|
|
146
|
|
|
Parameters |
147
|
|
|
---------- |
148
|
|
|
conf : AiscalatorConfig |
149
|
|
|
Configuration object for the application |
150
|
|
|
|
151
|
|
|
""" |
152
|
|
|
if _docker_compose_grep(conf): |
153
|
|
|
if not _airflow_docker_build(conf): |
154
|
|
|
raise Exception("Failed to build docker image") |
155
|
|
|
_docker_compose(conf, ["up", "-d"]) |
156
|
|
|
|
157
|
|
|
|
158
|
|
|
def _docker_compose_grep(conf: AiscalatorConfig): |
159
|
|
|
""" |
160
|
|
|
Checks if the docker-compose file is using the |
161
|
|
|
aiscalator/airflow docker image. In which case, |
162
|
|
|
we need to make sure that image is properly built |
163
|
|
|
and available. |
164
|
|
|
|
165
|
|
|
Parameters |
166
|
|
|
---------- |
167
|
|
|
conf : AiscalatorConfig |
168
|
|
|
Configuration object for the application |
169
|
|
|
|
170
|
|
|
Returns |
171
|
|
|
------- |
172
|
|
|
bool |
173
|
|
|
Returns if aiscalator/airflow docker image is |
174
|
|
|
needed and should be built. |
175
|
|
|
""" |
176
|
|
|
docker_compose_file = join(conf.app_config_home(), "config", |
177
|
|
|
conf.airflow_docker_compose_file()) |
178
|
|
|
pattern = re.compile(r"\s+image:\s+aiscalator/airflow") |
179
|
|
|
try: |
180
|
|
|
with open(docker_compose_file, "r") as file: |
181
|
|
|
for line in file: |
182
|
|
|
if re.match(pattern, line): |
183
|
|
|
# docker compose needs the image |
184
|
|
|
return True |
185
|
|
|
except FileNotFoundError: |
186
|
|
|
# no docker compose, default will need the image |
187
|
|
|
return True |
188
|
|
|
return False |
189
|
|
|
|
190
|
|
|
|
191
|
|
|
def _airflow_docker_build(conf: AiscalatorConfig): |
192
|
|
|
""" Build the aiscalator/airflow image and return its ID.""" |
193
|
|
|
logger = logging.getLogger(__name__) |
194
|
|
|
# TODO get airflow dockerfile from conf? |
195
|
|
|
conf.app_config_home() |
196
|
|
|
dockerfile_dir = utils.data_file("../config/docker/airflow") |
197
|
|
|
# TODO customize dockerfile with apt_packages, requirements etc |
198
|
|
|
docker_gid, docker_group = _find_docker_gid_group() |
199
|
|
|
commands = [ |
200
|
|
|
"docker", "build", |
201
|
|
|
"--build-arg", "DOCKER_GID=" + str(docker_gid), |
202
|
|
|
"--build-arg", "DOCKER_GROUP=" + str(docker_group), |
203
|
|
|
"--rm", "-t", "aiscalator/airflow:latest", |
204
|
|
|
dockerfile_dir |
205
|
|
|
] |
206
|
|
|
log = LogRegexAnalyzer(b'Successfully built ([a-zA-Z0-9]+)\n') |
207
|
|
|
logger.info("Running...: %s", " ".join(commands)) |
208
|
|
|
utils.subprocess_run(commands, log_function=log.grep_logs) |
209
|
|
|
result = log.artifact() |
210
|
|
|
if result: |
211
|
|
|
# tag the image built with the sha256 of the dockerfile |
212
|
|
|
tag = utils.sha256(join(dockerfile_dir, 'Dockerfile'))[:12] |
213
|
|
|
commands = [ |
214
|
|
|
"docker", "tag", result, "aiscalator/airflow:" + tag |
215
|
|
|
] |
216
|
|
|
logger.info("Running...: %s", " ".join(commands)) |
217
|
|
|
utils.subprocess_run(commands) |
218
|
|
|
return tag |
219
|
|
|
return None |
220
|
|
|
|
221
|
|
|
|
222
|
|
|
def _find_docker_gid_group(): |
223
|
|
|
"""Returns the group ID and name owning the /var/run/docker.sock file""" |
224
|
|
|
stat_info = stat('/var/run/docker.sock') |
225
|
|
|
if stat_info: |
226
|
|
|
gid = stat_info.st_gid |
227
|
|
|
return gid, getgrgid(gid)[0] |
228
|
|
|
return None, None |
229
|
|
|
|
230
|
|
|
|
231
|
|
|
def airflow_down(conf: AiscalatorConfig): |
232
|
|
|
""" |
233
|
|
|
Stop an airflow environment |
234
|
|
|
|
235
|
|
|
Parameters |
236
|
|
|
---------- |
237
|
|
|
conf : AiscalatorConfig |
238
|
|
|
Configuration object for the application |
239
|
|
|
|
240
|
|
|
""" |
241
|
|
|
_docker_compose(conf, ["down"]) |
242
|
|
|
|
243
|
|
|
|
244
|
|
|
def airflow_cmd(conf: AiscalatorConfig, service="webserver", cmd=None): |
245
|
|
|
""" |
246
|
|
|
Execute an airflow subcommand |
247
|
|
|
|
248
|
|
|
Parameters |
249
|
|
|
---------- |
250
|
|
|
conf : AiscalatorConfig |
251
|
|
|
Configuration object for the application |
252
|
|
|
service : string |
253
|
|
|
service name of the container where to run the command |
254
|
|
|
cmd : list |
255
|
|
|
subcommands to run |
256
|
|
|
""" |
257
|
|
|
commands = [ |
258
|
|
|
"run", "--rm", service, |
259
|
|
|
] |
260
|
|
|
if cmd is not None: |
261
|
|
|
commands += cmd |
262
|
|
|
else: |
263
|
|
|
commands += ["airflow"] |
264
|
|
|
_docker_compose(conf, commands) |
265
|
|
|
|
266
|
|
|
|
267
|
|
|
def airflow_edit(conf: AiscalatorConfig): |
268
|
|
|
""" |
269
|
|
|
Starts an airflow environment |
270
|
|
|
|
271
|
|
|
Parameters |
272
|
|
|
---------- |
273
|
|
|
conf : AiscalatorConfig |
274
|
|
|
Configuration object for the application |
275
|
|
|
|
276
|
|
|
""" |
277
|
|
|
logger = logging.getLogger(__name__) |
278
|
|
|
conf.validate_config() |
279
|
|
|
docker_image = _airflow_docker_build(conf) |
280
|
|
|
if docker_image: |
281
|
|
|
# TODO: shutdown other jupyter lab still running |
282
|
|
|
port = 10001 |
283
|
|
|
notebook = basename(conf.dag_field('definition.code_path')) |
284
|
|
|
commands = _prepare_docker_env(conf, [ |
285
|
|
|
"aiscalator/airflow:" + docker_image, 'jupyter', 'lab' |
286
|
|
|
], port) |
287
|
|
|
return utils.wait_for_jupyter_lab(commands, logger, notebook, |
288
|
|
|
port, "dags") |
289
|
|
|
raise Exception("Failed to build docker image") |
290
|
|
|
|
291
|
|
|
|
292
|
|
|
def _prepare_docker_env(conf: AiscalatorConfig, program, port): |
293
|
|
|
""" |
294
|
|
|
Assembles the list of commands to execute a docker run call |
295
|
|
|
|
296
|
|
|
When calling "docker run ...", this function also adds a set of |
297
|
|
|
additional parameters to mount the proper volumes and expose the |
298
|
|
|
correct environment for the call in the docker image. |
299
|
|
|
|
300
|
|
|
Parameters |
301
|
|
|
---------- |
302
|
|
|
conf : AiscalatorConfig |
303
|
|
|
Configuration object for the step |
304
|
|
|
program : List |
305
|
|
|
the rest of the commands to execute as part of |
306
|
|
|
the docker run call |
307
|
|
|
|
308
|
|
|
Returns |
309
|
|
|
------- |
310
|
|
|
List |
311
|
|
|
The full Array of Strings representing the commands to execute |
312
|
|
|
in the docker run call |
313
|
|
|
""" |
314
|
|
|
commands = [ |
315
|
|
|
"docker", "run", "--name", conf.dag_container_name(), "--rm", |
316
|
|
|
# TODO improve port publishing |
317
|
|
|
"-p", str(port) + ":8888", |
318
|
|
|
] |
319
|
|
|
for env in conf.user_env_file(): |
320
|
|
|
if isfile(env): |
321
|
|
|
commands += ["--env-file", env] |
322
|
|
|
code_path = conf.dag_file_path('definition.code_path') |
323
|
|
|
commands += [ |
324
|
|
|
"--mount", "type=bind,source=" + dirname(code_path) + |
325
|
|
|
",target=/usr/local/airflow/work/dags/", |
326
|
|
|
] |
327
|
|
|
ws_path = "airflow.setup.workspace_paths" |
328
|
|
|
if conf.app_config_has(ws_path): |
329
|
|
|
makedirs(join(conf.app_config_home(), |
330
|
|
|
"workspace"), exist_ok=True) |
331
|
|
|
for folder in conf.app_config()[ws_path]: |
332
|
|
|
src, dst = _split_workspace_string(conf, folder) |
333
|
|
|
commands += [ |
334
|
|
|
"--mount", "type=bind,source=" + src + |
335
|
|
|
",target=" + dst |
336
|
|
|
] |
337
|
|
|
commands += program |
338
|
|
|
return commands |
339
|
|
|
|
340
|
|
|
|
341
|
|
|
def _split_workspace_string(conf: AiscalatorConfig, workspace): |
342
|
|
|
""" |
343
|
|
|
Interprets the workspace string and split into src and dst |
344
|
|
|
paths: |
345
|
|
|
- The src is a path on the host machine. |
346
|
|
|
- The dst is a path on the container. |
347
|
|
|
In case, the workspace string doesn't specify both paths |
348
|
|
|
separated by a ':', this function will automatically mount it |
349
|
|
|
in the $app_config_home_directory/work/ folder creating a |
350
|
|
|
symbolic link with the same basename as the workspace. |
351
|
|
|
|
352
|
|
|
Parameters |
353
|
|
|
---------- |
354
|
|
|
conf : AiscalatorConfig |
355
|
|
|
Configuration object for the step |
356
|
|
|
workspace : str |
357
|
|
|
the workspace string to interpret |
358
|
|
|
Returns |
359
|
|
|
------- |
360
|
|
|
(str, str) |
361
|
|
|
A tuple with both src and dst paths |
362
|
|
|
""" |
363
|
|
|
logger = logging.getLogger(__name__) |
364
|
|
|
root_dir = conf.app_config_home() |
365
|
|
|
if workspace.strip(): |
366
|
|
|
if ':' in workspace: |
367
|
|
|
src = abspath(workspace.split(':')[0]) |
368
|
|
|
if not src.startswith('/'): |
369
|
|
|
src = abspath(join(root_dir, src)) |
370
|
|
|
dst = workspace.split(':')[1] |
371
|
|
|
if not dst.startswith('/'): |
372
|
|
|
dst = abspath(join(root_dir, dst)) |
373
|
|
|
else: |
374
|
|
|
src = abspath(workspace) |
375
|
|
|
if not src.startswith('/'): |
376
|
|
|
src = abspath(join(root_dir, src)) |
377
|
|
|
dst = join("workspace", basename(workspace.strip('/'))) |
378
|
|
|
link = join(root_dir, dst) |
379
|
|
|
if realpath(src) != realpath(link): |
380
|
|
|
if exists(link) and islink(link): |
381
|
|
|
logger.warning("Removing an existing symbolic" |
382
|
|
|
" link %s -> %s", |
383
|
|
|
link, realpath(link)) |
384
|
|
|
remove(link) |
385
|
|
|
if not exists(link): |
386
|
|
|
logger.info("Creating a symbolic link %s -> %s", link, src) |
387
|
|
|
symlink(src, link) |
388
|
|
|
dst = "/usr/local/airflow/work/" + dst |
389
|
|
|
return src, dst |
390
|
|
|
return None, None |
391
|
|
|
|
392
|
|
|
|
393
|
|
|
def airflow_push(conf: AiscalatorConfig): |
394
|
|
|
""" |
395
|
|
|
Starts an airflow environment |
396
|
|
|
|
397
|
|
|
Parameters |
398
|
|
|
---------- |
399
|
|
|
conf : AiscalatorConfig |
400
|
|
|
Configuration object for the application |
401
|
|
|
|
402
|
|
|
""" |
403
|
|
|
# TODO to implement |
404
|
|
|
logging.error("Not implemented yet %s", conf.app_config_home()) |
405
|
|
|
|