Passed
Push — main ( cee75c...37036d )
by Douglas
02:08
created

mandos.entry.multi_searches   A

Complexity

Total Complexity 28

Size/Duplication

Total Lines 188
Duplicated Lines 0 %

Importance

Changes 0
Metric Value
eloc 143
dl 0
loc 188
rs 10
c 0
b 0
f 0
wmc 28

14 Methods

Rating   Name   Duplication   Size   Complexity  
A CmdRunner.key() 0 3 1
A MultiSearch.run() 0 18 3
A MultiSearch.final_path() 0 7 2
A MultiSearch.build() 0 6 1
A CmdRunner.done_path() 0 3 1
A CmdRunner.run() 0 2 1
A CmdRunner.build() 0 29 3
A MultiSearch.__post_init__() 0 7 5
A MultiSearch._build_commands() 0 15 5
A CmdRunner.output_path() 0 3 1
A CmdRunner.test() 0 2 1
A CmdRunner.was_run() 0 3 1
A MultiSearch.explain_path() 0 3 1
A MultiSearch.to_table() 0 11 2
1
"""
2
Runner.
3
"""
4
5
from __future__ import annotations
6
7
from dataclasses import dataclass
8
from pathlib import Path
9
from typing import Dict, Sequence, Type, Union, Optional, MutableMapping
0 ignored issues
show
Unused Code introduced by
Unused Dict imported from typing
Loading history...
10
11
import pandas as pd
0 ignored issues
show
introduced by
Unable to import 'pandas'
Loading history...
12
import typer
0 ignored issues
show
introduced by
Unable to import 'typer'
Loading history...
13
from typeddfs import TypedDfs
0 ignored issues
show
introduced by
Unable to import 'typeddfs'
Loading history...
14
from pocketutils.core.dot_dict import NestedDotDict
0 ignored issues
show
introduced by
Unable to import 'pocketutils.core.dot_dict'
Loading history...
15
16
from mandos import logger
17
from mandos.entry.api_singletons import Apis
18
from mandos.entry.entry_commands import Entries
19
from mandos.entry.abstract_entries import Entry
20
from mandos.model.utils.reflection_utils import InjectionError
21
from mandos.model.hits import HitFrame
22
from mandos.model.settings import MANDOS_SETTINGS
23
24
cli = typer.Typer()
25
Apis.set_default()
26
Chembl, Pubchem = Apis.Chembl, Apis.Pubchem
27
28
EntriesByCmd: MutableMapping[str, Type[Entry]] = {e.cmd(): e for e in Entries}
29
30
# these are only permitted in 'meta', not individual searches
31
meta_keys = {"verbose", "quiet", "check", "log", "to"}
32
forbidden_keys = {"dir", "out-dir", "out_dir"}
33
34
SearchExplainDf = (
35
    TypedDfs.typed("SearchExplainDf")
36
    .require("key", "search", "source", dtype=str)
37
    .require("category", "desc", "args", dtype=str)
38
    .strict()
39
    .secure()
40
).build()
41
42
43
@dataclass(frozen=True, repr=True)
0 ignored issues
show
introduced by
Missing class docstring
Loading history...
44
class MultiSearch:
45
    # 'meta' allows us to set defaults for things like --to
46
    meta: NestedDotDict
47
    searches: Sequence[NestedDotDict]
48
    toml_path: Path
49
    input_path: Path
50
    out_dir: Path
51
52
    @property
53
    def final_path(self) -> Path:
0 ignored issues
show
introduced by
Missing function or method docstring
Loading history...
54
        if "to" in self.meta:
55
            fmt = Path(self.meta["to"]).suffix
56
        else:
57
            fmt = MANDOS_SETTINGS.default_table_suffix
58
        return self.out_dir / ("search_" + self.input_path.name + "_" + self.toml_path.name + fmt)
59
60
    @property
61
    def explain_path(self) -> Path:
0 ignored issues
show
introduced by
Missing function or method docstring
Loading history...
62
        return Path(str(self.final_path.with_suffix("")) + "_explain.tsv")
63
64
    def __post_init__(self):
65
        to = self.meta.get_as("to", str)
0 ignored issues
show
Coding Style Naming introduced by
Variable name "to" doesn't conform to snake_case naming style ('([^\\W\\dA-Z][^\\WA-Z]2,|_[^\\WA-Z]*|__[^\\WA-Z\\d_][^\\WA-Z]+__)$' pattern)

This check looks for invalid names for a range of different identifiers.

You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements.

If your project includes a Pylint configuration file, the settings contained in that file take precedence.

To find out more about Pylint, please refer to their site.

Loading history...
66
        if to is not None and not to.startswith("."):
67
            raise ValueError(f"Argument 'to' ({to})' must start with '.'.")
68
        for key, value in self.meta.items():
0 ignored issues
show
Unused Code introduced by
The variable value seems to be unused.
Loading history...
69
            if key not in meta_keys:
70
                raise ValueError(f"{key} in 'meta' not supported.")
71
72
    @classmethod
73
    def build(cls, input_path: Path, out_dir: Path, toml_path: Path) -> MultiSearch:
0 ignored issues
show
introduced by
Missing function or method docstring
Loading history...
74
        toml = NestedDotDict.read_toml(toml_path)
75
        searches = toml.get_as("search", list, [])
76
        meta = toml.sub("meta")
77
        return MultiSearch(meta, searches, toml_path, input_path, out_dir)
78
79
    def to_table(self) -> SearchExplainDf:
0 ignored issues
show
introduced by
Missing function or method docstring
Loading history...
80
        rows = []
81
        for cmd in self._build_commands():
82
            name = cmd.cmd.get_search_type().search_name
83
            cat = cmd.category
84
            src = cmd.cmd.get_search_type()(cmd.key).data_source
85
            desc = cmd.cmd.describe()
86
            args = ", ".join([f"{k}={v}" for k, v in cmd.params.items()])
87
            ser = dict(key=cmd.key, search=name, category=cat, source=src, desc=desc, args=args)
88
            rows.append(pd.Series(ser))
89
        return SearchExplainDf(rows)
90
91
    def run(self) -> None:
0 ignored issues
show
introduced by
Missing function or method docstring
Loading history...
92
        # build up the list of Entry classes first, and run ``test`` on each one
93
        # that's to check that the parameters are correct before running anything
94
        commands = self._build_commands()
95
        # write a metadata file describing all of the searches
96
        explain = self.to_table()
97
        explain.write_file(self.explain_path)
98
        for cmd in commands:
99
            cmd.test()
100
            logger.info(f"Search {cmd.key} looks ok.")
101
        logger.notice("All searches look ok.")
102
        for cmd in commands:
103
            cmd.run()
104
        logger.notice("Done with all searches!")
105
        # write the final file
106
        df = HitFrame(pd.concat([HitFrame.read_file(cmd.output_path) for cmd in commands]))
0 ignored issues
show
Coding Style Naming introduced by
Variable name "df" doesn't conform to snake_case naming style ('([^\\W\\dA-Z][^\\WA-Z]2,|_[^\\WA-Z]*|__[^\\WA-Z\\d_][^\\WA-Z]+__)$' pattern)

This check looks for invalid names for a range of different identifiers.

You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements.

If your project includes a Pylint configuration file, the settings contained in that file take precedence.

To find out more about Pylint, please refer to their site.

Loading history...
107
        df.write_file(self.final_path)
108
        logger.notice(f"Concatenated file to {self.final_path}")
109
110
    def _build_commands(self) -> Sequence[CmdRunner]:
111
        commands = {}
112
        skipping = []
113
        for search in self.searches:
114
            cmd = CmdRunner.build(search, self.meta, self.input_path, self.out_dir)
115
            if cmd.was_run:
116
                skipping += [cmd]
117
            else:
118
                commands[cmd.key] = cmd
119
                if cmd.key in commands:
120
                    raise ValueError(f"Repeated search key '{cmd.key}'")
121
        if len(skipping) > 0:
122
            skipping = ", ".join([c.key for c in skipping])
123
            logger.notice(f"Skipping searches {skipping} (already run).")
124
        return list(commands.values())
125
126
127
@dataclass(frozen=True, repr=True)
0 ignored issues
show
introduced by
Missing class docstring
Loading history...
128
class CmdRunner:
129
    cmd: Type[Entry]
130
    params: MutableMapping[str, Union[int, str, float]]
131
    input_path: Path
132
    category: Optional[str]
133
134
    @property
135
    def key(self) -> str:
0 ignored issues
show
introduced by
Missing function or method docstring
Loading history...
136
        return self.params["key"]
137
138
    @property
139
    def output_path(self) -> Path:
0 ignored issues
show
introduced by
Missing function or method docstring
Loading history...
140
        return Path(self.params["to"])
141
142
    @property
143
    def was_run(self) -> bool:
0 ignored issues
show
introduced by
Missing function or method docstring
Loading history...
144
        return self.done_path.exists()
145
146
    @property
147
    def done_path(self) -> Path:
0 ignored issues
show
introduced by
Missing function or method docstring
Loading history...
148
        return self.output_path.with_suffix(self.output_path.suffix + ".done")
149
150
    def test(self) -> None:
0 ignored issues
show
introduced by
Missing function or method docstring
Loading history...
151
        self.cmd.test(self.input_path, **{**self.params, **dict(quiet=True)})
152
153
    def run(self) -> None:
0 ignored issues
show
introduced by
Missing function or method docstring
Loading history...
154
        self.cmd.run(self.input_path, **{**self.params, **dict(no_setup=True, check=False)})
155
156
    @classmethod
157
    def build(cls, e: NestedDotDict, meta: NestedDotDict, input_path: Path, out_dir: Path):
0 ignored issues
show
introduced by
Missing function or method docstring
Loading history...
Coding Style Naming introduced by
Argument name "e" doesn't conform to snake_case naming style ('([^\\W\\dA-Z][^\\WA-Z]2,|_[^\\WA-Z]*|__[^\\WA-Z\\d_][^\\WA-Z]+__)$' pattern)

This check looks for invalid names for a range of different identifiers.

You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements.

If your project includes a Pylint configuration file, the settings contained in that file take precedence.

To find out more about Pylint, please refer to their site.

Loading history...
158
        cmd = e.req_as("source", str)
159
        key = e.get_as("key", str, cmd)
160
        to = meta.get_as("to", str, MANDOS_SETTINGS.default_table_suffix)
0 ignored issues
show
Coding Style Naming introduced by
Variable name "to" doesn't conform to snake_case naming style ('([^\\W\\dA-Z][^\\WA-Z]2,|_[^\\WA-Z]*|__[^\\WA-Z\\d_][^\\WA-Z]+__)$' pattern)

This check looks for invalid names for a range of different identifiers.

You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements.

If your project includes a Pylint configuration file, the settings contained in that file take precedence.

To find out more about Pylint, please refer to their site.

Loading history...
161
        try:
162
            cmd = EntriesByCmd[cmd]
163
        except KeyError:
164
            raise InjectionError(f"Search command {cmd} (key {key}) does not exist")
165
        # use defaults
166
        params = dict(meta)
167
        # they shouldn't pass any of these args
168
        bad = {b for b in {*meta_keys, "path", "no_setup", "out_dir"} if b in e}
169
        if len(bad) > 0:
170
            raise ValueError(f"Forbidden keys in [[search]] ({cmd}): {','.join(bad)}")
171
        # update the defaults from 'meta' (e.g. 'verbose')
172
        # skip the source -- it's the command name
173
        # stupidly, we need to explicitly add the defaults from the OptionInfo instances
174
        params.update(cmd.default_param_values().items())
175
        # do this after: the defaults had path, key, and to
176
        params["key"] = key
177
        params["path"] = e.req_as("path", Path)
178
        params["out_dir"] = out_dir
179
        params.setdefault("to", to)
180
        # now add the params we got for this command's section
181
        params.update({k: v for k, v in e.items() if k != "source" and k != "category"})
0 ignored issues
show
Unused Code introduced by
Consider merging these comparisons with "in" to "k not in ('source', 'category')"
Loading history...
182
        del params["check"]
183
        category = e.get_as("category", str)
184
        return CmdRunner(cmd, params, input_path, category)
185
186
187
__all__ = ["MultiSearch"]
188