1
|
|
|
""" |
2
|
|
|
This module provides functionality required for managing endpoint objects in |
3
|
|
|
TabPy. It provides a way to download endpoint files from remote |
4
|
|
|
and then properly cleanup local the endpoint files on update/remove of endpoint |
5
|
|
|
objects. |
6
|
|
|
|
7
|
|
|
The local temporary files for TabPy will by default located at |
8
|
|
|
/tmp/query_objects |
9
|
|
|
|
10
|
|
|
""" |
11
|
1 |
|
import logging |
12
|
1 |
|
import os |
13
|
1 |
|
import shutil |
14
|
1 |
|
from re import compile as _compile |
15
|
|
|
|
16
|
|
|
|
17
|
1 |
|
_name_checker = _compile(r"^[a-zA-Z0-9-_\s]+$") |
18
|
|
|
|
19
|
|
|
|
20
|
1 |
|
def _check_endpoint_name(name, logger=logging.getLogger(__name__)): |
21
|
|
|
"""Checks that the endpoint name is valid by comparing it with an RE and |
22
|
|
|
checking that it is not reserved.""" |
23
|
1 |
|
if not isinstance(name, str): |
24
|
1 |
|
msg = "Endpoint name must be a string" |
25
|
1 |
|
logger.log(logging.CRITICAL, msg) |
26
|
1 |
|
raise TypeError(msg) |
27
|
|
|
|
28
|
1 |
|
if name == "": |
29
|
1 |
|
msg = "Endpoint name cannot be empty" |
30
|
1 |
|
logger.log(logging.CRITICAL, msg) |
31
|
1 |
|
raise ValueError(msg) |
32
|
|
|
|
33
|
1 |
|
if not _name_checker.match(name): |
34
|
1 |
|
msg = ( |
35
|
|
|
"Endpoint name can only contain: a-z, A-Z, 0-9," |
36
|
|
|
" underscore, hyphens and spaces." |
37
|
|
|
) |
38
|
1 |
|
logger.log(logging.CRITICAL, msg) |
39
|
1 |
|
raise ValueError(msg) |
40
|
|
|
|
41
|
|
|
|
42
|
1 |
|
def grab_files(directory): |
43
|
|
|
""" |
44
|
|
|
Generator that returns all files in a directory. |
45
|
|
|
""" |
46
|
|
|
if not os.path.isdir(directory): |
47
|
|
|
return |
48
|
|
|
else: |
49
|
|
|
for name in os.listdir(directory): |
50
|
|
|
full_path = os.path.join(directory, name) |
51
|
|
|
if os.path.isdir(full_path): |
52
|
|
|
for entry in grab_files(full_path): |
53
|
|
|
yield entry |
54
|
|
|
elif os.path.isfile(full_path): |
55
|
|
|
yield full_path |
56
|
|
|
|
57
|
|
|
|
58
|
1 |
|
def cleanup_endpoint_files( |
59
|
|
|
name, query_path, logger=logging.getLogger(__name__), retain_versions=None |
60
|
|
|
): |
61
|
|
|
""" |
62
|
|
|
Cleanup the disk space a certain endpiont uses. |
63
|
|
|
|
64
|
|
|
Parameters |
65
|
|
|
---------- |
66
|
|
|
name : str |
67
|
|
|
The endpoint name |
68
|
|
|
|
69
|
|
|
retain_version : int, optional |
70
|
|
|
If given, then all files for this endpoint are removed except the |
71
|
|
|
folder for the given version, otherwise, all files for that endpoint |
72
|
|
|
are removed. |
73
|
|
|
""" |
74
|
|
|
_check_endpoint_name(name, logger=logger) |
75
|
|
|
local_dir = os.path.join(query_path, name) |
76
|
|
|
|
77
|
|
|
# nothing to clean, this is true for state file path where we load |
78
|
|
|
# Query Object directly from the state path instead of downloading |
79
|
|
|
# to temporary location |
80
|
|
|
if not os.path.exists(local_dir): |
81
|
|
|
return |
82
|
|
|
|
83
|
|
|
if not retain_versions: |
84
|
|
|
shutil.rmtree(local_dir) |
85
|
|
|
else: |
86
|
|
|
retain_folders = [ |
87
|
|
|
os.path.join(local_dir, str(version)) for version in retain_versions |
88
|
|
|
] |
89
|
|
|
logger.log(logging.INFO, f"Retain folders: {retain_folders}") |
90
|
|
|
|
91
|
|
|
for file_or_dir in os.listdir(local_dir): |
92
|
|
|
candidate_dir = os.path.join(local_dir, file_or_dir) |
93
|
|
|
if os.path.isdir(candidate_dir) and (candidate_dir not in retain_folders): |
94
|
|
|
shutil.rmtree(candidate_dir) |
95
|
|
|
|