Passed
Push — dependabot/pip/sphinx-rtd-them... ( 5d7d96...5d0a9b )
by
unknown
01:56
created

MultiSearch.final_checksum_path()   A

Complexity

Conditions 1

Size

Total Lines 3
Code Lines 3

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
cc 1
eloc 3
nop 1
dl 0
loc 3
rs 10
c 0
b 0
f 0
1
"""
2
Runner.
3
"""
4
5
from __future__ import annotations
6
7
from dataclasses import dataclass
8
from pathlib import Path
9
from typing import Any, Mapping, MutableMapping, Optional, Sequence, Type, Union
10
11
import pandas as pd
0 ignored issues
show
introduced by
Unable to import 'pandas'
Loading history...
12
import typer
0 ignored issues
show
introduced by
Unable to import 'typer'
Loading history...
13
from pocketutils.core.exceptions import PathExistsError, IllegalStateError
0 ignored issues
show
introduced by
Unable to import 'pocketutils.core.exceptions'
Loading history...
14
from typeddfs import TypedDfs
0 ignored issues
show
introduced by
Unable to import 'typeddfs'
Loading history...
15
from typeddfs.abs_dfs import AbsDf
0 ignored issues
show
introduced by
Unable to import 'typeddfs.abs_dfs'
Loading history...
16
from typeddfs.checksums import Checksums
0 ignored issues
show
introduced by
Unable to import 'typeddfs.checksums'
Loading history...
17
from typeddfs.utils import Utils
0 ignored issues
show
introduced by
Unable to import 'typeddfs.utils'
Loading history...
18
19
from mandos.entry._arg_utils import EntryUtils
20
from mandos.entry.abstract_entries import Entry
21
from mandos.entry.api_singletons import Apis
22
from mandos.entry.entry_commands import Entries
23
from mandos.model.hit_dfs import HitDf
24
from mandos.model.settings import SETTINGS
25
from mandos.model.utils.fancy_logger import LogSinkInfo
26
from mandos.model.utils.reflection_utils import InjectionError
27
from mandos.model.utils.setup import logger
28
29
cli = typer.Typer()
30
Apis.set_default()
31
Chembl, Pubchem = Apis.Chembl, Apis.Pubchem
32
33
EntriesByCmd: MutableMapping[str, Type[Entry]] = {e.cmd(): e for e in Entries}
34
35
# these are not permitted in individual searches
36
forbidden_keys = {"to", "no_setup"}
37
38
SearchExplainDf = (
39
    TypedDfs.typed("SearchExplainDf")
40
    .require("key", "search", "source", dtype=str)
41
    .require("desc", "args", dtype=str)
42
    .reserve("category", dtype=str)
43
    .strict()
44
    .secure()
45
).build()
46
47
48
def _no_duplicate_keys(self: AbsDf) -> Optional[str]:
49
    group = self[["key"]].groupby("key").count().to_dict()
50
    bad = {k for k, v in group.items() if v > 1}
51
    if len(bad) > 0:
52
        return f"Duplicate keys: {', '.join(bad)}"
53
    return None
54
55
56
def _no_illegal_cols(self: AbsDf) -> Optional[str]:
57
    illegal = {c for c in ["to", "path"] if c in self.columns}
58
    if len(illegal) > 0:
59
        return f"Illegal keys {', '.join(illegal)}"
60
    return None
61
62
63
SearchConfigDf = (
64
    TypedDfs.typed("SearchConfigDf")
65
    .require("key", "source", dtype=str)
66
    .verify(_no_duplicate_keys)
67
    .verify(_no_illegal_cols)
68
    .add_read_kwargs("toml", aot="search")
69
    .add_write_kwargs("toml", aot="search")
70
    .secure()
71
    .build()
72
)
73
74
75
@dataclass(frozen=True, repr=True)
0 ignored issues
show
introduced by
Missing class docstring
Loading history...
76
class MultiSearch:
77
    config: SearchConfigDf
78
    input_path: Path
79
    out_dir: Path
80
    suffix: str
81
    replace: bool
82
    proceed: bool
83
    log_path: Optional[Path]
84
85
    @property
86
    def final_path(self) -> Path:
0 ignored issues
show
introduced by
Missing function or method docstring
Loading history...
87
        name = "search_" + self.input_path.name + self.suffix
88
        return self.out_dir / name
89
90
    @property
91
    def final_checksum_path(self) -> Path:
0 ignored issues
show
introduced by
Missing function or method docstring
Loading history...
92
        return Checksums.get_hash_file(self.final_path, algorithm=SETTINGS.search_checksum_alg)
93
94
    @property
95
    def is_complete(self):
0 ignored issues
show
introduced by
Missing function or method docstring
Loading history...
96
        return self.final_checksum_path.exists()
97
98
    @property
99
    def doc_path(self) -> Path:
0 ignored issues
show
introduced by
Missing function or method docstring
Loading history...
100
        return Path(str(self.final_path.with_suffix("")) + "_doc.tsv")
101
102
    def __post_init__(self):
103
        if not self.replace and self.is_complete:
104
            raise PathExistsError(f"Path {self.final_path} is complete but --replace is not set")
105
        if not self.proceed and self.final_path.exists():
106
            raise PathExistsError(f"Path {self.final_path} exists but --proceed is not set")
107
108
    def run(self) -> None:
0 ignored issues
show
introduced by
Missing function or method docstring
Loading history...
109
        # build up the list of Entry classes first, and run ``test`` on each one
110
        # that's to check that the parameters are correct before running anything
111
        commands = self._build_commands()
112
        if len(commands) == 0:
113
            logger.warning(f"No searches — nothing to do")
0 ignored issues
show
introduced by
Using an f-string that does not have any interpolated variables
Loading history...
114
            return
115
        # write a file describing all of the searches
116
        self.write_docs(commands)
117
        # build and test
118
        for cmd in commands:
119
            try:
120
                cmd.test(replace=self.replace, proceed=self.proceed)
121
            except Exception:
122
                logger.error(f"Bad search {cmd}")
123
                raise
124
        logger.notice("Searches look ok")
125
        # start!
126
        for cmd in commands:
127
            cmd.run(replace=self.replace, proceed=self.proceed)
128
        logger.notice("Done with all searches!")
129
        # write the final file
130
        df = HitDf(pd.concat([HitDf.read_file(cmd.output_path) for cmd in commands]))
0 ignored issues
show
Coding Style Naming introduced by
Variable name "df" doesn't conform to snake_case naming style ('([^\\W\\dA-Z][^\\WA-Z]2,|_[^\\WA-Z]*|__[^\\WA-Z\\d_][^\\WA-Z]+__)$' pattern)

This check looks for invalid names for a range of different identifiers.

You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements.

If your project includes a Pylint configuration file, the settings contained in that file take precedence.

To find out more about Pylint, please refer to their site.

Loading history...
131
        df.write_file(self.final_path, file_hash=True)
132
        logger.notice(f"Concatenated results to {self.final_path}")
133
134
    def _build_commands(self) -> Sequence[CmdRunner]:
135
        commands = {}
136
        for i in range(len(self.config)):
137
            data = {
138
                k: v
139
                for k, v in self.config.iloc[i].to_dict().items()
140
                if v is not None and not pd.isna(v)
141
            }
142
            key = data["key"]
143
            with logger.contextualize(key=key):
144
                default_to = self.out_dir / (key + SETTINGS.table_suffix)
145
                # TODO: produces bad logging about being overwritten
0 ignored issues
show
Coding Style introduced by
TODO and FIXME comments should generally be avoided.
Loading history...
146
                data["to"] = EntryUtils.adjust_filename(None, default=default_to, replace=True)
147
                data["log"] = self._get_log_path(key)
148
                cmd = CmdRunner.build(data, self.input_path)
149
                commands[cmd.key] = cmd
150
        # log about replacing
151
        replacing = {k for k, v in commands.items() if v.was_run}
152
        if len(replacing) > 0:
153
            replacing = Utils.join_to_str(replacing, last="and")
154
            logger.notice(f"Overwriting results for {replacing}")
155
        return list(commands.values())
156
157
    def write_docs(self, commands: Sequence[CmdRunner]) -> None:
0 ignored issues
show
introduced by
Missing function or method docstring
Loading history...
158
        rows = []
159
        for cmd in commands:
160
            name = cmd.cmd.get_search_type().search_name()
161
            cat = cmd.category
162
            src = cmd.cmd.get_search_type().primary_data_source()
163
            desc = cmd.cmd.describe()
164
            args = ", ".join([f"{k}={v}" for k, v in cmd.params.items()])
165
            ser = dict(key=cmd.key, search=name, category=cat, source=src, desc=desc, args=args)
166
            rows.append(pd.Series(ser))
167
        df = SearchExplainDf(rows)
0 ignored issues
show
Coding Style Naming introduced by
Variable name "df" doesn't conform to snake_case naming style ('([^\\W\\dA-Z][^\\WA-Z]2,|_[^\\WA-Z]*|__[^\\WA-Z\\d_][^\\WA-Z]+__)$' pattern)

This check looks for invalid names for a range of different identifiers.

You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements.

If your project includes a Pylint configuration file, the settings contained in that file take precedence.

To find out more about Pylint, please refer to their site.

Loading history...
168
        df.write_file(self.doc_path, mkdirs=True)
169
170
    def _get_log_path(self, key: str):
171
        if self.log_path is None:
172
            suffix = SETTINGS.log_suffix
173
        else:
174
            suffix = LogSinkInfo.guess(self.log_path).suffix
175
        return self.out_dir / (key + suffix)
176
177
178
@dataclass(frozen=True, repr=True)
0 ignored issues
show
introduced by
Missing class docstring
Loading history...
179
class CmdRunner:
180
    cmd: Type[Entry]
181
    params: MutableMapping[str, Union[int, str, float]]
182
    input_path: Path
183
    category: Optional[str]
184
185
    @property
186
    def key(self) -> str:
0 ignored issues
show
introduced by
Missing function or method docstring
Loading history...
187
        return self.params["key"]
188
189
    @property
190
    def output_path(self) -> Path:
0 ignored issues
show
introduced by
Missing function or method docstring
Loading history...
191
        return Path(self.params["to"])
192
193
    @property
194
    def done_path(self) -> Path:
0 ignored issues
show
introduced by
Missing function or method docstring
Loading history...
195
        return Checksums.get_hash_dir(self.output_path, algorithm=SETTINGS.search_checksum_alg)
196
197
    @property
198
    def was_run(self) -> bool:
0 ignored issues
show
introduced by
Missing function or method docstring
Loading history...
199
        if not self.done_path.exists():
200
            return False
201
        sums = Checksums.parse_hash_file_resolved(self.done_path)
202
        done = self.output_path in sums
203
        if done and not self.output_path.exists():
204
            raise IllegalStateError(f"{self.output_path} marked complete but does not exist")
205
        return done
206
207
    def test(self, *, replace: bool, proceed: bool) -> None:
0 ignored issues
show
introduced by
Missing function or method docstring
Loading history...
208
        if self.output_path.exists() and not self.was_run and not proceed and not replace:
209
            raise PathExistsError(f"Path {self.output_path} exists but not finished")
210
        with logger.contextualize(key=self.key):
211
            self.cmd.test(self.input_path, **self.params)
212
213
    def run(self, *, replace: bool, proceed: bool) -> None:
0 ignored issues
show
Unused Code introduced by
The argument proceed seems to be unused.
Loading history...
introduced by
Missing function or method docstring
Loading history...
214
        # we already checked that we're allowed to proceed
215
        if replace or not self.was_run:
216
            with logger.contextualize(key=self.key):
217
                self.cmd.run(self.input_path, **self.params)
218
219
    @classmethod
220
    def build(cls, data: Mapping[str, Any], input_path: Path):
0 ignored issues
show
introduced by
Missing function or method docstring
Loading history...
221
        key, cmd = data["key"], data["source"]
222
        try:
223
            cmd = EntriesByCmd[cmd]
224
        except KeyError:
225
            raise InjectionError(f"Search command {cmd} (key {key}) does not exist") from None
226
        params = {}
227
        # we need to explicitly add the defaults from the OptionInfo instances
228
        params.update(cmd.default_param_values().items())
229
        # do this after: the defaults had path, key, and to
230
        params["key"] = key
231
        # now add the params we got for this command's section
232
        params.update({k: v for k, v in data.items() if k != "source" and k != "category"})
0 ignored issues
show
Unused Code introduced by
Consider merging these comparisons with "in" to "k not in ('source', 'category')"
Loading history...
233
        category = data.get("category")
234
        runner = CmdRunner(cmd, params, input_path, category)
235
        return runner
236
237
238
__all__ = ["MultiSearch", "SearchExplainDf", "SearchConfigDf"]
239