Passed
Push — main ( 2b775d...83a9fb )
by Douglas
04:59 queued 02:43
created

mandos.entry.multi_searches   A

Complexity

Total Complexity 41

Size/Duplication

Total Lines 244
Duplicated Lines 0 %

Importance

Changes 0
Metric Value
eloc 195
dl 0
loc 244
rs 9.1199
c 0
b 0
f 0
wmc 41

14 Methods

Rating   Name   Duplication   Size   Complexity  
A CmdRunner.key() 0 3 1
A MultiSearch.run() 0 21 4
A MultiSearch.final_path() 0 4 1
A MultiSearch.build() 0 21 1
A CmdRunner.done_path() 0 3 1
A CmdRunner.run() 0 2 1
B CmdRunner.build() 0 44 6
B MultiSearch.__post_init__() 0 8 7
D MultiSearch._build_commands() 0 25 12
A CmdRunner.output_path() 0 3 1
A CmdRunner.test() 0 2 1
A CmdRunner.was_run() 0 6 2
A MultiSearch.explain_path() 0 3 1
A MultiSearch.to_table() 0 11 2

How to fix   Complexity   

Complexity

Complex classes like mandos.entry.multi_searches often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes.

Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.

1
"""
2
Runner.
3
"""
4
5
from __future__ import annotations
6
7
from dataclasses import dataclass
8
from pathlib import Path
9
from typing import Sequence, Type, Union, Optional, MutableMapping
10
11
import pandas as pd
0 ignored issues
show
introduced by
Unable to import 'pandas'
Loading history...
12
import tomlkit
0 ignored issues
show
introduced by
Unable to import 'tomlkit'
Loading history...
13
import typer
0 ignored issues
show
introduced by
Unable to import 'typer'
Loading history...
14
from pocketutils.core.exceptions import (
0 ignored issues
show
introduced by
Unable to import 'pocketutils.core.exceptions'
Loading history...
15
    ReservedError,
16
    AlreadyUsedError,
17
    PathExistsError,
18
    XValueError,
19
)
20
from typeddfs import TypedDfs
0 ignored issues
show
introduced by
Unable to import 'typeddfs'
Loading history...
21
from tomlkit.api import Table, AoT
0 ignored issues
show
introduced by
Unable to import 'tomlkit.api'
Loading history...
22
from typeddfs.checksums import Checksums
0 ignored issues
show
introduced by
Imports from package typeddfs are not grouped
Loading history...
introduced by
Unable to import 'typeddfs.checksums'
Loading history...
23
from typeddfs.file_formats import CompressionFormat
0 ignored issues
show
Unused Code introduced by
Unused CompressionFormat imported from typeddfs.file_formats
Loading history...
introduced by
Unable to import 'typeddfs.file_formats'
Loading history...
24
25
from mandos.model.utils.setup import logger, MandosLogging
26
from mandos.entry.api_singletons import Apis
27
from mandos.entry.entry_commands import Entries
28
from mandos.entry.abstract_entries import Entry
29
from mandos.model.utils.reflection_utils import InjectionError
30
from mandos.model.hits import HitFrame
31
32
cli = typer.Typer()
33
Apis.set_default()
34
Chembl, Pubchem = Apis.Chembl, Apis.Pubchem
35
36
EntriesByCmd: MutableMapping[str, Type[Entry]] = {e.cmd(): e for e in Entries}
37
38
# these are only permitted in 'meta', not individual searches
39
meta_keys = {"log", "stderr"}
40
forbidden_keys = {"to", "no_setup"}
41
42
SearchExplainDf = (
43
    TypedDfs.typed("SearchExplainDf")
44
    .require("key", "search", "source", dtype=str)
45
    .require("category", "desc", "args", dtype=str)
46
    .strict()
47
    .secure()
48
).build()
49
50
51
@dataclass(frozen=True, repr=True)
0 ignored issues
show
introduced by
Missing class docstring
Loading history...
best-practice introduced by
Too many instance attributes (8/7)
Loading history...
52
class MultiSearch:
53
    # 'meta' allows us to set defaults for things like --to
54
    meta: Table
55
    searches: AoT
56
    toml_path: Path
57
    input_path: Path
58
    out_dir: Path
59
    suffix: str
60
    replace: bool
61
    log_path: Optional[Path]
62
63
    @property
64
    def final_path(self) -> Path:
0 ignored issues
show
introduced by
Missing function or method docstring
Loading history...
65
        name = "search_" + self.input_path.name + "_" + self.toml_path.name + self.suffix
66
        return self.out_dir / name
67
68
    @property
69
    def explain_path(self) -> Path:
0 ignored issues
show
introduced by
Missing function or method docstring
Loading history...
70
        return Path(str(self.final_path.with_suffix("")) + "_explain.tsv")
71
72
    def __post_init__(self):
73
        if not self.replace and self.final_path.exists():
74
            raise PathExistsError(f"Path {self.final_path} exists but --replace is not set")
75
        if not self.replace and self.explain_path.exists():
76
            raise PathExistsError(f"Path {self.explain_path} exists but --replace is not set")
77
        for key, value in dict(self.meta).items():
0 ignored issues
show
Unused Code introduced by
The variable value seems to be unused.
Loading history...
78
            if key not in meta_keys:
79
                raise ReservedError(f"{key} in 'meta' not supported.")
80
81
    @classmethod
82
    def build(
0 ignored issues
show
introduced by
Missing function or method docstring
Loading history...
best-practice introduced by
Too many arguments (7/5)
Loading history...
83
        cls,
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
84
        input_path: Path,
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
85
        out_dir: Path,
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
86
        suffix: str,
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
87
        toml_path: Path,
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
88
        replace: bool,
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
89
        log_path: Optional[Path],
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
90
    ) -> MultiSearch:
91
        toml = tomlkit.loads(Path(toml_path).read_text(encoding="utf8"))
92
        searches = toml.get("search", [])
93
        return MultiSearch(
94
            toml.get("meta", []),
95
            searches,
96
            toml_path,
97
            input_path,
98
            out_dir,
99
            suffix,
100
            replace,
101
            log_path,
102
        )
103
104
    def to_table(self) -> SearchExplainDf:
0 ignored issues
show
introduced by
Missing function or method docstring
Loading history...
105
        rows = []
106
        for cmd in self._build_commands():
107
            name = cmd.cmd.get_search_type().search_name()
108
            cat = cmd.category
109
            src = cmd.cmd.get_search_type().primary_data_source()
110
            desc = cmd.cmd.describe()
111
            args = ", ".join([f"{k}={v}" for k, v in cmd.params.items()])
112
            ser = dict(key=cmd.key, search=name, category=cat, source=src, desc=desc, args=args)
113
            rows.append(pd.Series(ser))
114
        return SearchExplainDf(rows)
115
116
    def run(self) -> None:
0 ignored issues
show
introduced by
Missing function or method docstring
Loading history...
117
        # build up the list of Entry classes first, and run ``test`` on each one
118
        # that's to check that the parameters are correct before running anything
119
        commands = self._build_commands()
120
        if len(commands) == 0:
121
            logger.warning(f"No searches -- nothing to do")
0 ignored issues
show
introduced by
Using an f-string that does not have any interpolated variables
Loading history...
122
            return
123
        # write a metadata file describing all of the searches
124
        explain = self.to_table()
125
        explain.write_file(self.explain_path, mkdirs=True)
126
        for cmd in commands:
127
            cmd.test()
128
            logger.info(f"Search {cmd.key} looks ok.")
129
        logger.notice("All searches look ok.")
130
        for cmd in commands:
131
            cmd.run()
132
        logger.notice("Done with all searches!")
133
        # write the final file
134
        df = HitFrame(pd.concat([HitFrame.read_file(cmd.output_path) for cmd in commands]))
0 ignored issues
show
Coding Style Naming introduced by
Variable name "df" doesn't conform to snake_case naming style ('([^\\W\\dA-Z][^\\WA-Z]2,|_[^\\WA-Z]*|__[^\\WA-Z\\d_][^\\WA-Z]+__)$' pattern)

This check looks for invalid names for a range of different identifiers.

You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements.

If your project includes a Pylint configuration file, the settings contained in that file take precedence.

To find out more about Pylint, please refer to their site.

Loading history...
135
        df.write_file(self.final_path)
136
        logger.notice(f"Concatenated file to {self.final_path}")
137
138
    def _build_commands(self) -> Sequence[CmdRunner]:
139
        commands = {}
140
        skipping = []
141
        replacing = []
142
        for search in self.searches:
143
            cmd = CmdRunner.build(
144
                search, self.meta, self.input_path, self.out_dir, self.suffix, self.log_path
145
            )
146
            if cmd.output_path.exists() and not cmd.done_path.exists():
147
                logger.error(f"Path {cmd.output_path} exists but not marked as complete.")
148
            elif cmd.was_run and self.replace:
149
                replacing += [cmd]
150
            elif cmd.was_run and not self.replace:
151
                skipping += [cmd]
152
            if cmd.key in commands:
153
                raise AlreadyUsedError(f"Repeated search key '{cmd.key}'")
154
            if cmd not in skipping:
155
                commands[cmd.key] = cmd
156
        if len(skipping) > 0:
157
            skipping = ", ".join([c.key for c in skipping])
158
            logger.notice(f"Skipping searches {skipping} (already run).")
159
        if len(replacing) > 0:
160
            replacing = ", ".join([c.key for c in skipping])
161
            logger.notice(f"Overwriting results for searches {replacing}.")
162
        return list(commands.values())
163
164
165
@dataclass(frozen=True, repr=True)
0 ignored issues
show
introduced by
Missing class docstring
Loading history...
166
class CmdRunner:
167
    cmd: Type[Entry]
168
    params: MutableMapping[str, Union[int, str, float]]
169
    input_path: Path
170
    category: Optional[str]
171
172
    @property
173
    def key(self) -> str:
0 ignored issues
show
introduced by
Missing function or method docstring
Loading history...
174
        return self.params["key"]
175
176
    @property
177
    def output_path(self) -> Path:
0 ignored issues
show
introduced by
Missing function or method docstring
Loading history...
178
        return Path(self.params["to"])
179
180
    @property
181
    def done_path(self) -> Path:
0 ignored issues
show
introduced by
Missing function or method docstring
Loading history...
182
        return Checksums.get_hash_dir(self.output_path.parent)
183
184
    @property
185
    def was_run(self) -> bool:
0 ignored issues
show
introduced by
Missing function or method docstring
Loading history...
186
        if not self.done_path.exists():
187
            return False
188
        sums = Checksums.parse_hash_file_resolved(self.done_path)
189
        return self.output_path in sums
190
191
    def test(self) -> None:
0 ignored issues
show
introduced by
Missing function or method docstring
Loading history...
192
        self.cmd.test(self.input_path, **self.params)
193
194
    def run(self) -> None:
0 ignored issues
show
introduced by
Missing function or method docstring
Loading history...
195
        self.cmd.run(self.input_path, **self.params)
196
197
    @classmethod
198
    def build(
0 ignored issues
show
introduced by
Missing function or method docstring
Loading history...
Coding Style Naming introduced by
Argument name "e" doesn't conform to snake_case naming style ('([^\\W\\dA-Z][^\\WA-Z]2,|_[^\\WA-Z]*|__[^\\WA-Z\\d_][^\\WA-Z]+__)$' pattern)

This check looks for invalid names for a range of different identifiers.

You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements.

If your project includes a Pylint configuration file, the settings contained in that file take precedence.

To find out more about Pylint, please refer to their site.

Loading history...
best-practice introduced by
Too many arguments (7/5)
Loading history...
199
        cls,
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
200
        e: Table,
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
201
        meta: Table,
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
202
        input_path: Path,
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
203
        out_dir: Path,
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
204
        suffix: str,
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
205
        cli_log: Optional[Path],
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
206
    ):
207
        cmd = e["source"].value
208
        key = e.get("key", cmd)
209
        if "log" in meta:
210
            if len(meta["log"].value) == 1:
211
                raise XValueError("'log' is empty")
212
            log = key + meta["log"].value
213
            MandosLogging.get_log_suffix(cli_log)  # just check
214
        elif cli_log is not None:
215
            log = key + MandosLogging.get_log_suffix(cli_log)
216
        else:
217
            log = key + ".log"
218
        log = out_dir / log
219
        try:
220
            cmd = EntriesByCmd[cmd]
221
        except KeyError:
222
            raise InjectionError(f"Search command {cmd} (key {key}) does not exist")
223
        # use defaults
224
        params = dict(meta)
225
        # they shouldn't pass any of these args
226
        bad = {b for b in {*meta_keys, "path", "no_setup", "to"} if b in e}
227
        if len(bad) > 0:
228
            raise ReservedError(f"Forbidden keys in [[search]] ({cmd}): {','.join(bad)}")
229
        # update the defaults from 'meta' (e.g. 'verbose')
230
        # skip the source -- it's the command name
231
        # stupidly, we need to explicitly add the defaults from the OptionInfo instances
232
        params.update(cmd.default_param_values().items())
233
        # do this after: the defaults had path, key, and to
234
        params["key"] = key
235
        params["to"] = out_dir / (key + suffix)
236
        params["log"] = log
237
        # now add the params we got for this command's section
238
        params.update({k: v for k, v in e.items() if k != "source" and k != "category"})
0 ignored issues
show
Unused Code introduced by
Consider merging these comparisons with "in" to "k not in ('source', 'category')"
Loading history...
239
        category = e.get("category")
240
        return CmdRunner(cmd, params, input_path, category)
241
242
243
__all__ = ["MultiSearch"]
244