|
1
|
|
|
""" |
|
2
|
|
|
This module provides functionality required for managing endpoint objects in |
|
3
|
|
|
TabPy. It provides a way to download endpoint files from remote |
|
4
|
|
|
and then properly cleanup local the endpoint files on update/remove of endpoint |
|
5
|
|
|
objects. |
|
6
|
|
|
|
|
7
|
|
|
The local temporary files for TabPy will by default located at |
|
8
|
|
|
/tmp/query_objects |
|
9
|
|
|
|
|
10
|
|
|
""" |
|
11
|
1 |
|
import logging |
|
12
|
1 |
|
import os |
|
13
|
1 |
|
import shutil |
|
14
|
1 |
|
from re import compile as _compile |
|
15
|
|
|
|
|
16
|
|
|
|
|
17
|
1 |
|
_name_checker = _compile(r"^[a-zA-Z0-9-_\s]+$") |
|
18
|
|
|
|
|
19
|
|
|
|
|
20
|
1 |
|
def _check_endpoint_name(name, logger=logging.getLogger(__name__)): |
|
21
|
|
|
"""Checks that the endpoint name is valid by comparing it with an RE and |
|
22
|
|
|
checking that it is not reserved.""" |
|
23
|
1 |
|
if not isinstance(name, str): |
|
24
|
1 |
|
msg = "Endpoint name must be a string" |
|
25
|
1 |
|
logger.log(logging.CRITICAL, msg) |
|
26
|
1 |
|
raise TypeError(msg) |
|
27
|
|
|
|
|
28
|
1 |
|
if name == "": |
|
29
|
1 |
|
msg = "Endpoint name cannot be empty" |
|
30
|
1 |
|
logger.log(logging.CRITICAL, msg) |
|
31
|
1 |
|
raise ValueError(msg) |
|
32
|
|
|
|
|
33
|
1 |
|
if not _name_checker.match(name): |
|
34
|
1 |
|
msg = ( |
|
35
|
|
|
"Endpoint name can only contain: a-z, A-Z, 0-9," |
|
36
|
|
|
" underscore, hyphens and spaces." |
|
37
|
|
|
) |
|
38
|
1 |
|
logger.log(logging.CRITICAL, msg) |
|
39
|
1 |
|
raise ValueError(msg) |
|
40
|
|
|
|
|
41
|
|
|
|
|
42
|
1 |
|
def grab_files(directory): |
|
43
|
|
|
""" |
|
44
|
|
|
Generator that returns all files in a directory. |
|
45
|
|
|
""" |
|
46
|
|
|
if not os.path.isdir(directory): |
|
47
|
|
|
return |
|
48
|
|
|
else: |
|
49
|
|
|
for name in os.listdir(directory): |
|
50
|
|
|
full_path = os.path.join(directory, name) |
|
51
|
|
|
if os.path.isdir(full_path): |
|
52
|
|
|
for entry in grab_files(full_path): |
|
53
|
|
|
yield entry |
|
54
|
|
|
elif os.path.isfile(full_path): |
|
55
|
|
|
yield full_path |
|
56
|
|
|
|
|
57
|
|
|
|
|
58
|
1 |
|
def cleanup_endpoint_files( |
|
59
|
|
|
name, query_path, logger=logging.getLogger(__name__), retain_versions=None |
|
60
|
|
|
): |
|
61
|
|
|
""" |
|
62
|
|
|
Cleanup the disk space a certain endpiont uses. |
|
63
|
|
|
|
|
64
|
|
|
Parameters |
|
65
|
|
|
---------- |
|
66
|
|
|
name : str |
|
67
|
|
|
The endpoint name |
|
68
|
|
|
|
|
69
|
|
|
retain_version : int, optional |
|
70
|
|
|
If given, then all files for this endpoint are removed except the |
|
71
|
|
|
folder for the given version, otherwise, all files for that endpoint |
|
72
|
|
|
are removed. |
|
73
|
|
|
""" |
|
74
|
|
|
_check_endpoint_name(name, logger=logger) |
|
75
|
|
|
local_dir = os.path.join(query_path, name) |
|
76
|
|
|
|
|
77
|
|
|
# nothing to clean, this is true for state file path where we load |
|
78
|
|
|
# Query Object directly from the state path instead of downloading |
|
79
|
|
|
# to temporary location |
|
80
|
|
|
if not os.path.exists(local_dir): |
|
81
|
|
|
return |
|
82
|
|
|
|
|
83
|
|
|
if not retain_versions: |
|
84
|
|
|
shutil.rmtree(local_dir) |
|
85
|
|
|
else: |
|
86
|
|
|
retain_folders = [ |
|
87
|
|
|
os.path.join(local_dir, str(version)) for version in retain_versions |
|
88
|
|
|
] |
|
89
|
|
|
logger.log(logging.INFO, f"Retain folders: {retain_folders}") |
|
90
|
|
|
|
|
91
|
|
|
for file_or_dir in os.listdir(local_dir): |
|
92
|
|
|
candidate_dir = os.path.join(local_dir, file_or_dir) |
|
93
|
|
|
if os.path.isdir(candidate_dir) and (candidate_dir not in retain_folders): |
|
94
|
|
|
shutil.rmtree(candidate_dir) |
|
95
|
|
|
|