1
|
|
|
from __future__ import annotations |
|
|
|
|
2
|
|
|
|
3
|
|
|
import os |
4
|
|
|
from dataclasses import dataclass |
5
|
|
|
from pathlib import Path |
6
|
|
|
|
7
|
|
|
from chembl_webresource_client.settings import Settings as ChemblSettings |
|
|
|
|
8
|
|
|
from pocketutils.core.dot_dict import NestedDotDict |
|
|
|
|
9
|
|
|
from pocketutils.tools.common_tools import CommonTools |
|
|
|
|
10
|
|
|
|
11
|
|
|
from mandos import logger |
12
|
|
|
|
13
|
|
|
ONE_YEAR = 60 * 60 * 24 * 365 |
14
|
|
|
|
15
|
|
|
|
16
|
|
|
class Globals: |
|
|
|
|
17
|
|
|
chembl_settings = ChemblSettings.Instance() |
18
|
|
|
is_in_ci = CommonTools.parse_bool(os.environ.get("IS_IN_CI", "false")) |
19
|
|
|
if is_in_ci: |
20
|
|
|
mandos_path = Path(__file__).parent.parent.parent / "tests" / "resources" / ".mandos-cache" |
21
|
|
|
else: |
22
|
|
|
mandos_path = Path( |
23
|
|
|
{k.lower(): v for k, v in os.environ.items()}.get( |
24
|
|
|
"MANDOS_HOME", Path.home() / ".mandos" |
25
|
|
|
) |
26
|
|
|
) |
27
|
|
|
settings_path = mandos_path / "settings.toml" |
28
|
|
|
chembl_cache = mandos_path / "chembl" |
29
|
|
|
taxonomy_cache = mandos_path / "taxonomy" |
30
|
|
|
disable_chembl = CommonTools.parse_bool(os.environ.get("MANDOS_NO_CHEMBL", "false")) |
31
|
|
|
disable_pubchem = CommonTools.parse_bool(os.environ.get("MANDOS_NO_PUBCHEM", "false")) |
32
|
|
|
|
33
|
|
|
|
34
|
|
|
@dataclass(frozen=True, repr=True) |
|
|
|
|
35
|
|
|
class Settings: |
36
|
|
|
"""""" |
37
|
|
|
|
38
|
|
|
is_testing: bool |
39
|
|
|
cache_path: Path |
40
|
|
|
cache_gzip: bool |
41
|
|
|
chembl_expire_sec: int |
42
|
|
|
chembl_n_retries: int |
43
|
|
|
chembl_timeout_sec: int |
44
|
|
|
chembl_backoff_factor: float |
45
|
|
|
chembl_query_delay_min: float |
46
|
|
|
chembl_query_delay_max: float |
47
|
|
|
chembl_fast_save: bool |
48
|
|
|
pubchem_expire_sec: int |
49
|
|
|
pubchem_n_retries: int |
50
|
|
|
pubchem_timeout_sec: float |
51
|
|
|
pubchem_backoff_factor: float |
52
|
|
|
pubchem_query_delay_min: float |
53
|
|
|
pubchem_query_delay_max: float |
54
|
|
|
pubchem_use_parent: bool |
55
|
|
|
taxonomy_filename_format: str |
56
|
|
|
|
57
|
|
|
@property |
58
|
|
|
def chembl_cache_path(self) -> Path: |
|
|
|
|
59
|
|
|
return self.cache_path / "chembl" |
60
|
|
|
|
61
|
|
|
@property |
62
|
|
|
def pubchem_cache_path(self) -> Path: |
|
|
|
|
63
|
|
|
return self.cache_path / "pubchem" |
64
|
|
|
|
65
|
|
|
@property |
66
|
|
|
def hmdb_cache_path(self) -> Path: |
|
|
|
|
67
|
|
|
return self.cache_path / "hmdb" |
68
|
|
|
|
69
|
|
|
@property |
70
|
|
|
def taxonomy_cache_path(self) -> Path: |
|
|
|
|
71
|
|
|
return self.cache_path / "taxonomy" |
72
|
|
|
|
73
|
|
|
@property |
74
|
|
|
def match_cache_path(self) -> Path: |
|
|
|
|
75
|
|
|
return self.cache_path / "match" |
76
|
|
|
|
77
|
|
|
@classmethod |
78
|
|
|
def from_file(cls, path: Path) -> Settings: |
|
|
|
|
79
|
|
|
return cls.load(NestedDotDict.read_toml(path)) |
80
|
|
|
|
81
|
|
|
@classmethod |
82
|
|
|
def empty(cls) -> Settings: |
|
|
|
|
83
|
|
|
return cls.load(NestedDotDict({})) |
84
|
|
|
|
85
|
|
|
@classmethod |
86
|
|
|
def load(cls, data: NestedDotDict) -> Settings: |
|
|
|
|
87
|
|
|
# 117571 |
88
|
|
|
return cls( |
89
|
|
|
is_testing=data.get_as("mandos.is_testing", bool, False), |
90
|
|
|
cache_path=data.get_as("mandos.cache.path", Path, Globals.mandos_path).expanduser(), |
91
|
|
|
cache_gzip=data.get_as("mandos.cache.gzip", bool), |
92
|
|
|
chembl_expire_sec=data.get_as("mandos.query.chembl.expire_sec", int, ONE_YEAR), |
93
|
|
|
chembl_n_retries=data.get_as("mandos.query.chembl.n_retries", int, 1), |
94
|
|
|
chembl_fast_save=data.get_as("mandos.query.chembl.fast_save", bool, True), |
95
|
|
|
chembl_timeout_sec=data.get_as("mandos.query.chembl.timeout_sec", int, 1), |
96
|
|
|
chembl_backoff_factor=data.get_as( |
97
|
|
|
"mandos.query.chembl.pubchem_backoff_factor", float, 2 |
98
|
|
|
), |
99
|
|
|
chembl_query_delay_min=data.get_as("mandos.query.chembl.delay_sec", float, 0.25), |
100
|
|
|
chembl_query_delay_max=data.get_as("mandos.query.chembl.delay_sec", float, 0.25), |
101
|
|
|
pubchem_expire_sec=data.get_as("mandos.query.pubchem.expire_sec", int, ONE_YEAR), |
102
|
|
|
pubchem_timeout_sec=data.get_as("mandos.query.pubchem.timeout_sec", int, 1), |
103
|
|
|
pubchem_backoff_factor=data.get_as( |
104
|
|
|
"mandos.query.pubchem.pubchem_backoff_factor", float, 2 |
105
|
|
|
), |
106
|
|
|
pubchem_query_delay_min=data.get_as("mandos.query.pubchem.delay_sec", float, 0.25), |
107
|
|
|
pubchem_query_delay_max=data.get_as("mandos.query.pubchem.delay_sec", float, 0.25), |
108
|
|
|
pubchem_n_retries=data.get_as("mandos.query.pubchem.n_retries", int, 1), |
109
|
|
|
pubchem_use_parent=data.get_as("mandos.query.pubchem.use_parent", bool, True), |
110
|
|
|
taxonomy_filename_format=data.get_as( |
111
|
|
|
"mandos.cache.taxonomy_filename_format", str, "{}.tsv.gz" |
112
|
|
|
), |
113
|
|
|
) |
114
|
|
|
|
115
|
|
|
def configure(self): |
|
|
|
|
116
|
|
|
"""""" |
117
|
|
|
if not Globals.disable_chembl: |
118
|
|
|
instance = Globals.chembl_settings |
119
|
|
|
instance.CACHING = True |
120
|
|
|
if not Globals.is_in_ci: # not sure if this is needed |
121
|
|
|
instance.CACHE_NAME = str(self.chembl_cache_path / "chembl.sqlite") |
122
|
|
|
instance.TOTAL_RETRIES = self.chembl_n_retries |
123
|
|
|
instance.FAST_SAVE = self.chembl_fast_save |
124
|
|
|
instance.TIMEOUT = self.chembl_timeout_sec |
125
|
|
|
instance.BACKOFF_FACTOR = self.chembl_backoff_factor |
126
|
|
|
instance.CACHE_EXPIRE = self.chembl_expire_sec |
127
|
|
|
self.chembl_cache_path.mkdir(exist_ok=True, parents=True) |
128
|
|
|
self.pubchem_cache_path.mkdir(exist_ok=True, parents=True) |
129
|
|
|
self.hmdb_cache_path.mkdir(exist_ok=True, parents=True) |
130
|
|
|
self.taxonomy_cache_path.mkdir(exist_ok=True, parents=True) |
131
|
|
|
self.match_cache_path.mkdir(exist_ok=True, parents=True) |
132
|
|
|
|
133
|
|
|
|
134
|
|
|
if Globals.settings_path.exists(): |
135
|
|
|
MANDOS_SETTINGS = Settings.from_file(Globals.settings_path) |
136
|
|
|
logger.info(f"Read settings at {Globals.settings_path}") |
137
|
|
|
else: |
138
|
|
|
MANDOS_SETTINGS = Settings.empty() |
139
|
|
|
logger.info(f"Using default settings (no file at {Globals.settings_path})") |
140
|
|
|
MANDOS_SETTINGS.configure() |
141
|
|
|
logger.debug(f"Setting ChEMBL cache to {MANDOS_SETTINGS.chembl_cache_path}") |
142
|
|
|
|
143
|
|
|
|
144
|
|
|
__all__ = ["MANDOS_SETTINGS"] |
145
|
|
|
|