1
|
|
|
from __future__ import annotations |
|
|
|
|
2
|
|
|
|
3
|
|
|
import os |
4
|
|
|
from collections import Set |
|
|
|
|
5
|
|
|
from dataclasses import dataclass |
6
|
|
|
from pathlib import Path |
7
|
|
|
from typing import Type, TypeVar, Any, Mapping, Optional, Collection, Union |
8
|
|
|
|
9
|
|
|
import orjson |
|
|
|
|
10
|
|
|
from chembl_webresource_client.settings import Settings as ChemblSettings |
|
|
|
|
11
|
|
|
from mandos.model.utils.resources import MandosResources |
12
|
|
|
from pocketutils.core.dot_dict import NestedDotDict |
|
|
|
|
13
|
|
|
from pocketutils.core.query_utils import QueryExecutor |
|
|
|
|
14
|
|
|
from pocketutils.core.exceptions import DirDoesNotExistError, FileDoesNotExistError, ConfigError |
|
|
|
|
15
|
|
|
from pocketutils.tools.common_tools import CommonTools |
|
|
|
|
16
|
|
|
from suretime import Suretime |
|
|
|
|
17
|
|
|
|
18
|
|
|
from mandos.model.utils.setup import logger |
|
|
|
|
19
|
|
|
|
20
|
|
|
defaults: Mapping[str, Any] = orjson.loads( |
21
|
|
|
MandosResources.path("default_settings.json").read_text(encoding="utf8") |
22
|
|
|
) |
23
|
|
|
T = TypeVar("T") |
|
|
|
|
24
|
|
|
|
25
|
|
|
|
26
|
|
|
class Globals: |
|
|
|
|
27
|
|
|
chembl_settings = ChemblSettings.Instance() |
28
|
|
|
cwd = os.getcwd() |
29
|
|
|
where_am_i_installed = Path(__file__).parent.parent.parent |
30
|
|
|
is_in_ci = CommonTools.parse_bool(os.environ.get("IS_IN_CI", "false")) |
31
|
|
|
if is_in_ci: |
32
|
|
|
mandos_path = Path(__file__).parent.parent.parent / "tests" / "resources" / ".mandos-cache" |
33
|
|
|
else: |
34
|
|
|
_default_mandos_home = Path.home() / ".mandos" |
35
|
|
|
env_vars = {k.lower(): v for k, v in os.environ.items()} |
36
|
|
|
mandos_path = Path(env_vars.get("MANDOS_HOME", _default_mandos_home)) |
37
|
|
|
settings_path = mandos_path / "settings.toml" |
38
|
|
|
disable_chembl = CommonTools.parse_bool(os.environ.get("MANDOS_NO_CHEMBL", "false")) |
39
|
|
|
disable_pubchem = CommonTools.parse_bool(os.environ.get("MANDOS_NO_PUBCHEM", "false")) |
40
|
|
|
|
41
|
|
|
|
42
|
|
|
@dataclass(frozen=True, repr=True) |
|
|
|
|
43
|
|
|
class Settings: |
44
|
|
|
""" """ |
45
|
|
|
|
46
|
|
|
is_testing: bool |
47
|
|
|
ntp_continent: str |
48
|
|
|
cache_path: Path |
49
|
|
|
cache_gzip: bool |
50
|
|
|
chembl_expire_sec: int |
51
|
|
|
chembl_n_tries: int |
52
|
|
|
chembl_timeout_sec: int |
53
|
|
|
chembl_backoff_factor: float |
54
|
|
|
chembl_query_delay_min: float |
55
|
|
|
chembl_query_delay_max: float |
56
|
|
|
chembl_fast_save: bool |
57
|
|
|
pubchem_expire_sec: int |
58
|
|
|
pubchem_n_tries: int |
59
|
|
|
pubchem_timeout_sec: float |
60
|
|
|
pubchem_backoff_factor: float |
61
|
|
|
pubchem_query_delay_min: float |
62
|
|
|
pubchem_query_delay_max: float |
63
|
|
|
hmdb_expire_sec: int |
64
|
|
|
hmdb_timeout_sec: float |
65
|
|
|
hmdb_backoff_factor: float |
66
|
|
|
hmdb_query_delay_min: float |
67
|
|
|
hmdb_query_delay_max: float |
68
|
|
|
taxon_expire_sec: int |
69
|
|
|
archive_filename_suffix: str |
70
|
|
|
default_table_suffix: str |
71
|
|
|
selenium_driver: str |
72
|
|
|
selenium_driver_path: Optional[Path] |
73
|
|
|
|
74
|
|
|
def __post_init__(self): |
75
|
|
|
pass |
76
|
|
|
|
77
|
|
|
@property |
78
|
|
|
def all_cache_paths(self) -> Set[Path]: |
|
|
|
|
79
|
|
|
return { |
80
|
|
|
self.chembl_cache_path, |
81
|
|
|
self.pubchem_cache_path, |
82
|
|
|
self.g2p_cache_path, |
83
|
|
|
self.hmdb_cache_path, |
84
|
|
|
self.taxonomy_cache_path, |
85
|
|
|
} |
86
|
|
|
|
87
|
|
|
@property |
88
|
|
|
def driver_path(self) -> Path: |
|
|
|
|
89
|
|
|
return self.cache_path / "driver" |
90
|
|
|
|
91
|
|
|
@property |
92
|
|
|
def chembl_cache_path(self) -> Path: |
|
|
|
|
93
|
|
|
return self.cache_path / "chembl" |
94
|
|
|
|
95
|
|
|
@property |
96
|
|
|
def chembl_scrape_path(self) -> Path: |
|
|
|
|
97
|
|
|
return self.chembl_cache_path / "scrape" |
98
|
|
|
|
99
|
|
|
@property |
100
|
|
|
def pubchem_cache_path(self) -> Path: |
|
|
|
|
101
|
|
|
return self.cache_path / "pubchem" |
102
|
|
|
|
103
|
|
|
@property |
104
|
|
|
def g2p_cache_path(self) -> Path: |
|
|
|
|
105
|
|
|
return self.cache_path / "g2p" |
106
|
|
|
|
107
|
|
|
@property |
108
|
|
|
def hmdb_cache_path(self) -> Path: |
|
|
|
|
109
|
|
|
return self.cache_path / "hmdb" |
110
|
|
|
|
111
|
|
|
@property |
112
|
|
|
def taxonomy_cache_path(self) -> Path: |
|
|
|
|
113
|
|
|
return self.cache_path / "taxonomy" |
114
|
|
|
|
115
|
|
|
@classmethod |
116
|
|
|
def from_file(cls, path: Path) -> Settings: |
|
|
|
|
117
|
|
|
return cls.load(NestedDotDict.read_toml(path)) |
118
|
|
|
|
119
|
|
|
@classmethod |
120
|
|
|
def empty(cls) -> Settings: |
|
|
|
|
121
|
|
|
return cls.load(NestedDotDict({})) |
122
|
|
|
|
123
|
|
|
@classmethod |
124
|
|
|
def load(cls, data: NestedDotDict) -> Settings: |
|
|
|
|
125
|
|
|
def get(s: str, t: Type[T]) -> T: |
|
|
|
|
126
|
|
|
try: |
127
|
|
|
return data.get_as(s, t, defaults[s]) |
128
|
|
|
except TypeError: |
129
|
|
|
raise ConfigError(f"Key {s}={data.get(s), defaults[s]} is not of type {t}") |
130
|
|
|
|
131
|
|
|
_continent = Suretime.Types.NtpContinents.of |
132
|
|
|
return cls( |
133
|
|
|
is_testing=get("is_testing", bool), |
134
|
|
|
ntp_continent=get("continent_code", _continent), |
135
|
|
|
cache_path=Path(get("cache.path", str)).expanduser(), |
136
|
|
|
cache_gzip=get("cache.gzip", bool), |
137
|
|
|
chembl_expire_sec=get("query.chembl.expire_sec", int), |
138
|
|
|
chembl_n_tries=get("query.chembl.n_tries", int), |
139
|
|
|
chembl_fast_save=get("query.chembl.fast_save", bool), |
140
|
|
|
chembl_timeout_sec=get("query.chembl.timeout_sec", int), |
141
|
|
|
chembl_backoff_factor=get("query.chembl.backoff_factor", float), |
142
|
|
|
chembl_query_delay_min=get("query.chembl.delay_sec", float), |
143
|
|
|
chembl_query_delay_max=get("query.chembl.delay_sec", float), |
144
|
|
|
pubchem_expire_sec=get("query.pubchem.expire_sec", int), |
145
|
|
|
pubchem_timeout_sec=get("query.pubchem.timeout_sec", int), |
146
|
|
|
pubchem_backoff_factor=get("query.pubchem.backoff_factor", float), |
147
|
|
|
pubchem_query_delay_min=get("query.pubchem.delay_sec", float), |
148
|
|
|
pubchem_query_delay_max=get("query.pubchem.delay_sec", float), |
149
|
|
|
pubchem_n_tries=get("query.pubchem.n_tries", int), |
150
|
|
|
hmdb_expire_sec=get("query.hmdb.expire_sec", int), |
151
|
|
|
hmdb_timeout_sec=get("query.hmdb.timeout_sec", int), |
152
|
|
|
hmdb_backoff_factor=get("query.hmdb.backoff_factor", float), |
153
|
|
|
hmdb_query_delay_min=get("query.hmdb.delay_sec", float), |
154
|
|
|
hmdb_query_delay_max=get("query.hmdb.delay_sec", float), |
155
|
|
|
taxon_expire_sec=get("query.taxa.expire_sec", int), |
156
|
|
|
archive_filename_suffix=get("cache.archive_filename_suffix", str), |
157
|
|
|
default_table_suffix=get("default_table_suffix", str), |
158
|
|
|
selenium_driver=get("selenium_driver", str).title(), |
159
|
|
|
selenium_driver_path=get("selenium_driver_path", Path), |
160
|
|
|
) |
161
|
|
|
|
162
|
|
|
@classmethod |
163
|
|
|
def defaults(cls) -> Mapping[str, Any]: |
|
|
|
|
164
|
|
|
return dict(defaults) |
165
|
|
|
|
166
|
|
|
def configure(self): |
|
|
|
|
167
|
|
|
""" """ |
168
|
|
|
if not Globals.disable_chembl: |
169
|
|
|
instance = Globals.chembl_settings |
170
|
|
|
instance.CACHING = True |
171
|
|
|
instance.CACHE_NAME = str(self.chembl_cache_path / "chembl.sqlite") |
172
|
|
|
instance.TOTAL_RETRIES = self.chembl_n_tries |
173
|
|
|
instance.FAST_SAVE = self.chembl_fast_save |
174
|
|
|
instance.TIMEOUT = self.chembl_timeout_sec |
175
|
|
|
instance.BACKOFF_FACTOR = self.chembl_backoff_factor |
176
|
|
|
instance.CACHE_EXPIRE = self.chembl_expire_sec |
177
|
|
|
|
178
|
|
|
@classmethod |
179
|
|
|
def set_path_for_selenium(cls) -> None: |
|
|
|
|
180
|
|
|
cls.add_to_path( |
181
|
|
|
[MANDOS_SETTINGS.driver_path, MandosResources.dir(), Globals.where_am_i_installed] |
182
|
|
|
) |
183
|
|
|
|
184
|
|
|
@classmethod |
185
|
|
|
def add_to_path(cls, paths: Collection[Union[None, str, Path]]) -> None: |
|
|
|
|
186
|
|
|
paths = {Path(p) for p in paths if p is not None} |
187
|
|
|
for path in paths: |
188
|
|
|
if path.exists() and not path.is_dir() and not path.is_mount(): |
189
|
|
|
raise DirDoesNotExistError(f"Path {path} is not a directory or mount") |
190
|
|
|
paths = os.pathsep.join({str(p) for p in paths}) |
191
|
|
|
if len(paths) > 0: |
192
|
|
|
os.environ["PATH"] += os.pathsep + paths |
193
|
|
|
logger.debug(f"Added to PATH: {paths}") |
194
|
|
|
|
195
|
|
|
|
196
|
|
|
if Globals.settings_path.exists(): |
197
|
|
|
MANDOS_SETTINGS = Settings.from_file(Globals.settings_path) |
198
|
|
|
logger.info(f"Read settings at {Globals.settings_path}") |
199
|
|
|
else: |
200
|
|
|
MANDOS_SETTINGS = Settings.empty() |
201
|
|
|
logger.info(f"Using defaults (no file at {Globals.settings_path})") |
202
|
|
|
MANDOS_SETTINGS.configure() |
203
|
|
|
logger.debug(f"Setting ChEMBL cache to {MANDOS_SETTINGS.chembl_cache_path}") |
204
|
|
|
|
205
|
|
|
|
206
|
|
|
class QueryExecutors: |
|
|
|
|
207
|
|
|
chembl = QueryExecutor( |
208
|
|
|
MANDOS_SETTINGS.chembl_query_delay_min, MANDOS_SETTINGS.chembl_query_delay_max |
209
|
|
|
) |
210
|
|
|
pubchem = QueryExecutor( |
211
|
|
|
MANDOS_SETTINGS.pubchem_query_delay_min, MANDOS_SETTINGS.pubchem_query_delay_max |
212
|
|
|
) |
213
|
|
|
hmdb = QueryExecutor( |
214
|
|
|
MANDOS_SETTINGS.pubchem_query_delay_min, MANDOS_SETTINGS.pubchem_query_delay_max |
215
|
|
|
) |
216
|
|
|
|
217
|
|
|
|
218
|
|
|
QUERY_EXECUTORS = QueryExecutors |
|
|
|
|
219
|
|
|
|
220
|
|
|
|
221
|
|
|
__all__ = ["MANDOS_SETTINGS", "QUERY_EXECUTORS"] |
222
|
|
|
|