Passed
Push — main ( 4b9dc0...1b55d1 )
by Douglas
06:16 queued 02:32
created

mandos.model.settings.Settings.configure_chembl()   A

Complexity

Conditions 2

Size

Total Lines 13
Code Lines 12

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
cc 2
eloc 12
nop 1
dl 0
loc 13
rs 9.8
c 0
b 0
f 0
1
from __future__ import annotations
0 ignored issues
show
introduced by
Missing module docstring
Loading history...
2
3
import dataclasses
4
import os
5
from dataclasses import dataclass
6
from pathlib import Path
7
from typing import AbstractSet, Any, Collection, Mapping, Optional, Type, TypeVar, Union
8
9
from pocketutils.core.dot_dict import NestedDotDict
0 ignored issues
show
introduced by
Unable to import 'pocketutils.core.dot_dict'
Loading history...
10
from pocketutils.core.exceptions import ConfigError, DirDoesNotExistError, XValueError
0 ignored issues
show
introduced by
Unable to import 'pocketutils.core.exceptions'
Loading history...
11
from pocketutils.core.query_utils import QueryExecutor
0 ignored issues
show
introduced by
Unable to import 'pocketutils.core.query_utils'
Loading history...
12
from pocketutils.tools.common_tools import CommonTools
0 ignored issues
show
introduced by
Unable to import 'pocketutils.tools.common_tools'
Loading history...
13
from pocketutils.tools.sys_tools import SystemTools
0 ignored issues
show
introduced by
Unable to import 'pocketutils.tools.sys_tools'
Loading history...
14
from suretime import Suretime
0 ignored issues
show
introduced by
Unable to import 'suretime'
Loading history...
15
from typeddfs import FileFormat, FrozeDict
0 ignored issues
show
introduced by
Unable to import 'typeddfs'
Loading history...
16
17
from mandos.model.utils.globals import Globals
18
from mandos.model.utils.setup import LOG_SETUP, MandosResources, logger
19
20
defaults: Mapping[str, Any] = FrozeDict(MandosResources.json_dict("default_settings.json"))
21
max_coeff = 1.1
0 ignored issues
show
Coding Style Naming introduced by
Constant name "max_coeff" doesn't conform to UPPER_CASE naming style ('([^\\W\\da-z][^\\Wa-z]*|__.*__)$' pattern)

This check looks for invalid names for a range of different identifiers.

You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements.

If your project includes a Pylint configuration file, the settings contained in that file take precedence.

To find out more about Pylint, please refer to their site.

Loading history...
22
T = TypeVar("T")
0 ignored issues
show
Coding Style Naming introduced by
Class name "T" doesn't conform to PascalCase naming style ('[^\\W\\da-z][^\\W_]+$' pattern)

This check looks for invalid names for a range of different identifiers.

You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements.

If your project includes a Pylint configuration file, the settings contained in that file take precedence.

To find out more about Pylint, please refer to their site.

Loading history...
23
24
25
@dataclass(frozen=True, repr=True)
0 ignored issues
show
Documentation introduced by
Empty class docstring
Loading history...
best-practice introduced by
Too many instance attributes (32/7)
Loading history...
26
class Settings:
27
    """ """
28
29
    is_testing: bool
30
    ntp_continent: str
31
    table_suffix: str
32
    log_suffix: str
33
    cache_path: Path
34
    cache_gzip: bool
35
    save_every: int
36
    sanitize_paths: bool
37
    chembl_expire_sec: int
38
    chembl_n_tries: int
39
    chembl_timeout_sec: int
40
    chembl_backoff_factor: float
41
    chembl_query_delay_min: float
42
    chembl_query_delay_max: float
43
    chembl_fast_save: bool
44
    pubchem_expire_sec: int
45
    pubchem_n_tries: int
46
    pubchem_timeout_sec: float
47
    pubchem_backoff_factor: float
48
    pubchem_query_delay_min: float
49
    pubchem_query_delay_max: float
50
    hmdb_expire_sec: int
51
    hmdb_timeout_sec: float
52
    hmdb_backoff_factor: float
53
    hmdb_query_delay_min: float
54
    hmdb_query_delay_max: float
55
    taxon_expire_sec: int
56
    archive_filename_suffix: str
57
    selenium_driver: str
58
    selenium_driver_path: Optional[Path]
59
    log_signals: bool
60
    log_exit: bool
61
62
    @property
63
    def as_dict(self) -> Mapping[str, Any]:
0 ignored issues
show
introduced by
Missing function or method docstring
Loading history...
64
        return dataclasses.asdict(self)
65
66
    @property
67
    def all_cache_paths(self) -> AbstractSet[Path]:
0 ignored issues
show
introduced by
Missing function or method docstring
Loading history...
68
        return {
69
            self.chembl_cache_path,
70
            self.pubchem_cache_path,
71
            self.g2p_cache_path,
72
            self.hmdb_cache_path,
73
            self.taxonomy_cache_path,
74
        }
75
76
    @property
77
    def driver_path(self) -> Path:
0 ignored issues
show
introduced by
Missing function or method docstring
Loading history...
78
        return self.cache_path / "driver"
79
80
    @property
81
    def chembl_cache_path(self) -> Path:
0 ignored issues
show
introduced by
Missing function or method docstring
Loading history...
82
        return self.cache_path / "chembl"
83
84
    @property
85
    def chembl_scrape_path(self) -> Path:
0 ignored issues
show
introduced by
Missing function or method docstring
Loading history...
86
        return self.chembl_cache_path / "scrape"
87
88
    @property
89
    def pubchem_cache_path(self) -> Path:
0 ignored issues
show
introduced by
Missing function or method docstring
Loading history...
90
        return self.cache_path / "pubchem"
91
92
    @property
93
    def g2p_cache_path(self) -> Path:
0 ignored issues
show
introduced by
Missing function or method docstring
Loading history...
94
        return self.cache_path / "g2p"
95
96
    @property
97
    def hmdb_cache_path(self) -> Path:
0 ignored issues
show
introduced by
Missing function or method docstring
Loading history...
98
        return self.cache_path / "hmdb"
99
100
    @property
101
    def taxonomy_cache_path(self) -> Path:
0 ignored issues
show
introduced by
Missing function or method docstring
Loading history...
102
        return self.cache_path / "taxonomy"
103
104
    @classmethod
105
    def from_file(cls, path: Path) -> Settings:
0 ignored issues
show
introduced by
Missing function or method docstring
Loading history...
106
        return cls.load(NestedDotDict.read_toml(path))
107
108
    @classmethod
109
    def empty(cls) -> Settings:
0 ignored issues
show
introduced by
Missing function or method docstring
Loading history...
110
        return cls.load(NestedDotDict({}))
111
112
    def __post_init__(self):
113
        # check these things
114
        FileFormat.from_suffix(self.table_suffix)
115
        FileFormat.from_suffix(self.archive_filename_suffix)
116
        LOG_SETUP.guess_file_sink_info(self.log_suffix)
117
        for k, v in self.as_dict.items():
0 ignored issues
show
Coding Style Naming introduced by
Variable name "v" doesn't conform to snake_case naming style ('([^\\W\\dA-Z][^\\WA-Z]2,|_[^\\WA-Z]*|__[^\\WA-Z\\d_][^\\WA-Z]+__)$' pattern)

This check looks for invalid names for a range of different identifiers.

You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements.

If your project includes a Pylint configuration file, the settings contained in that file take precedence.

To find out more about Pylint, please refer to their site.

Loading history...
118
            # this happens to work for now -- we have none that can be < 0
119
            if isinstance(v, (int, float)) and v < 0:
120
                raise XValueError(f"{k} = {v} < 0")
121
122
    @classmethod
123
    def load(cls, data: NestedDotDict) -> Settings:
0 ignored issues
show
introduced by
Missing function or method docstring
Loading history...
124
        extra_default_keys = dict(defaults)
125
126
        def get(s: str, t: Type[T]) -> T:
0 ignored issues
show
Coding Style Naming introduced by
Argument name "t" doesn't conform to snake_case naming style ('([^\\W\\dA-Z][^\\WA-Z]2,|_[^\\WA-Z]*|__[^\\WA-Z\\d_][^\\WA-Z]+__)$' pattern)

This check looks for invalid names for a range of different identifiers.

You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements.

If your project includes a Pylint configuration file, the settings contained in that file take precedence.

To find out more about Pylint, please refer to their site.

Loading history...
Coding Style Naming introduced by
Argument name "s" doesn't conform to snake_case naming style ('([^\\W\\dA-Z][^\\WA-Z]2,|_[^\\WA-Z]*|__[^\\WA-Z\\d_][^\\WA-Z]+__)$' pattern)

This check looks for invalid names for a range of different identifiers.

You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements.

If your project includes a Pylint configuration file, the settings contained in that file take precedence.

To find out more about Pylint, please refer to their site.

Loading history...
127
            if s in extra_default_keys:  # could be accessed more than once
128
                del extra_default_keys[s]
129
            try:
130
                return data.get_as(s, t, defaults[s])
131
            except TypeError:
132
                raise ConfigError(f"Key {s}={data.get(s), defaults[s]} is not of type {t}")
133
134
        _continent = Suretime.Types.NtpContinents.of
135
        _selenium_path = get("query.selenium_driver_path", Path)
136
        if _selenium_path is not None:
137
            _selenium_path = _selenium_path.expanduser()
138
        chembl_delay = get("query.chembl.delay_sec", float)
139
        pubchem_delay = get("query.pubchem.delay_sec", float)
140
        hmdb_delay = get("query.hmdb.delay_sec", float)
141
        data = cls(
142
            is_testing=get("is_testing", bool),
143
            ntp_continent=get("search.ntp_continent_code", _continent),
144
            table_suffix=get("search.default_table_suffix", str),
145
            log_suffix=get("search.default_log_suffix", str),
146
            save_every=get("search.save_every", int),
147
            sanitize_paths=get("search.sanitize_paths", bool),
148
            cache_path=Path(get("cache.path", str)).expanduser(),
149
            chembl_expire_sec=get("cache.chembl.expire_sec", int),
150
            pubchem_expire_sec=get("cache.pubchem.expire_sec", int),
151
            taxon_expire_sec=get("cache.taxa.expire_sec", int),
152
            cache_gzip=get("cache.gzip", bool),
153
            archive_filename_suffix=get("cache.archive_filename_suffix", str),
154
            chembl_n_tries=get("query.chembl.n_tries", int),
155
            chembl_fast_save=get("query.chembl.fast_save", bool),
156
            chembl_timeout_sec=get("query.chembl.timeout_sec", int),
157
            chembl_backoff_factor=get("query.chembl.backoff_factor", float),
158
            chembl_query_delay_min=chembl_delay,
159
            chembl_query_delay_max=chembl_delay * max_coeff,
160
            pubchem_timeout_sec=get("query.pubchem.timeout_sec", int),
161
            hmdb_expire_sec=get("cache.hmdb.expire_sec", int),
162
            pubchem_backoff_factor=get("query.pubchem.backoff_factor", float),
163
            pubchem_query_delay_min=get("query.pubchem.delay_sec", float),
164
            pubchem_query_delay_max=pubchem_delay * max_coeff,
165
            pubchem_n_tries=get("query.pubchem.n_tries", int),
166
            hmdb_timeout_sec=get("query.hmdb.timeout_sec", int),
167
            hmdb_backoff_factor=get("query.hmdb.backoff_factor", float),
168
            hmdb_query_delay_min=hmdb_delay,
169
            hmdb_query_delay_max=hmdb_delay * max_coeff,
170
            selenium_driver=get("query.selenium_driver", str).title(),
171
            selenium_driver_path=_selenium_path,
172
            log_signals=get("cli.log_signals", bool),
173
            log_exit=get("cli.log_exit", bool),
174
        )
175
        # we got all the required fields
176
        # make sure we don't have extra keys in defaults
177
        if len(extra_default_keys) > 0:
178
            raise AssertionError(
179
                f"There are {len(extra_default_keys)} extra defaults"
180
                + f"in {defaults}: {extra_default_keys}"
181
            )
182
        return data
183
184
    @classmethod
185
    def defaults(cls) -> Mapping[str, Any]:
0 ignored issues
show
introduced by
Missing function or method docstring
Loading history...
186
        return defaults
187
188
    def configure(self):
0 ignored issues
show
Documentation introduced by
Empty method docstring
Loading history...
189
        """ """
190
        if self.log_exit:
191
            SystemTools.trace_exit(CommonTools.make_writer(logger.trace))
192
        if self.log_signals:
193
            SystemTools.trace_signals(CommonTools.make_writer(logger.trace))
194
195
    def configure_chembl(self):
0 ignored issues
show
introduced by
Missing function or method docstring
Loading history...
196
        from chembl_webresource_client.settings import Settings as ChemblSettings
0 ignored issues
show
introduced by
Unable to import 'chembl_webresource_client.settings'
Loading history...
introduced by
Import outside toplevel (chembl_webresource_client.settings.Settings)
Loading history...
197
198
        if not Globals.disable_chembl:
199
            instance = ChemblSettings.Instance()
200
            instance.CACHING = True
201
            instance.CACHE_NAME = str(self.chembl_cache_path.resolve() / "chembl.sqlite")
202
            logger.debug(f"ChEMBL cache is at {instance.CACHE_NAME}")
203
            instance.TOTAL_RETRIES = self.chembl_n_tries
204
            instance.FAST_SAVE = self.chembl_fast_save
205
            instance.TIMEOUT = self.chembl_timeout_sec
206
            instance.BACKOFF_FACTOR = self.chembl_backoff_factor
207
            instance.CACHE_EXPIRE = self.chembl_expire_sec
208
209
    @classmethod
210
    def set_path_for_selenium(cls) -> None:
0 ignored issues
show
introduced by
Missing function or method docstring
Loading history...
211
        cls.add_to_path([SETTINGS.driver_path, MandosResources.dir(), Globals.where_am_i_installed])
212
213
    @classmethod
214
    def add_to_path(cls, paths: Collection[Union[None, str, Path]]) -> None:
0 ignored issues
show
introduced by
Missing function or method docstring
Loading history...
215
        paths = {Path(p) for p in paths if p is not None}
216
        for path in paths:
217
            if path.exists() and not path.is_dir() and not path.is_mount():
218
                raise DirDoesNotExistError(f"Path {path} is not a directory or mount")
219
        paths = os.pathsep.join({str(p) for p in paths})
220
        if len(paths) > 0:
221
            os.environ["PATH"] += os.pathsep + paths
222
        logger.debug(f"Added to PATH: {paths}")
223
224
225
if Globals.settings_path.exists():
226
    SETTINGS = Settings.from_file(Globals.settings_path)
227
    logger.success(f"Read settings at {Globals.settings_path}")
228
else:
229
    SETTINGS = Settings.empty()
230
    logger.success(f"Using defaults (no file at {Globals.settings_path})")
231
SETTINGS.configure()
232
233
234
class QueryExecutors:
0 ignored issues
show
introduced by
Missing class docstring
Loading history...
235
    chembl = QueryExecutor(SETTINGS.chembl_query_delay_min, SETTINGS.chembl_query_delay_max)
236
    pubchem = QueryExecutor(SETTINGS.pubchem_query_delay_min, SETTINGS.pubchem_query_delay_max)
237
    hmdb = QueryExecutor(SETTINGS.hmdb_query_delay_min, SETTINGS.hmdb_query_delay_max)
238
239
240
QUERY_EXECUTORS = QueryExecutors
0 ignored issues
show
Coding Style Naming introduced by
Class name "QUERY_EXECUTORS" doesn't conform to PascalCase naming style ('[^\\W\\da-z][^\\W_]+$' pattern)

This check looks for invalid names for a range of different identifiers.

You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements.

If your project includes a Pylint configuration file, the settings contained in that file take precedence.

To find out more about Pylint, please refer to their site.

Loading history...
241
242
243
__all__ = ["SETTINGS", "QUERY_EXECUTORS"]
244