Passed
Push — dependabot/pip/sphinx-copybutt... ( c72176...cfd31d )
by
unknown
07:42 queued 05:46
created

MultiSearch._build_commands()   A

Complexity

Conditions 5

Size

Total Lines 15
Code Lines 14

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
cc 5
eloc 14
nop 1
dl 0
loc 15
rs 9.2333
c 0
b 0
f 0
1
"""
2
Runner.
3
"""
4
5
from __future__ import annotations
6
7
from dataclasses import dataclass
8
from pathlib import Path
9
from typing import Dict, Sequence, List, Type, Union, Optional
0 ignored issues
show
Unused Code introduced by
Unused List imported from typing
Loading history...
10
11
import pandas as pd
0 ignored issues
show
introduced by
Unable to import 'pandas'
Loading history...
12
import typer
0 ignored issues
show
introduced by
Unable to import 'typer'
Loading history...
13
from typeddfs import TypedDfs
0 ignored issues
show
introduced by
Unable to import 'typeddfs'
Loading history...
14
from pocketutils.core.dot_dict import NestedDotDict
0 ignored issues
show
introduced by
Unable to import 'pocketutils.core.dot_dict'
Loading history...
15
16
from mandos import logger
17
from mandos.entries.api_singletons import Apis
18
from mandos.entries.entries import Entries, Entry
19
from mandos.model import InjectionError
20
from mandos.model.hits import HitFrame
21
from mandos.model.settings import MANDOS_SETTINGS
22
23
cli = typer.Typer()
24
Apis.set_default()
25
Chembl, Pubchem = Apis.Chembl, Apis.Pubchem
26
27
EntriesByCmd: Dict[str, Type[Entry]] = {e.cmd(): e for e in Entries}
28
29
# these are only permitted in 'meta', not individual searches
30
meta_keys = {"verbose", "quiet", "check", "log", "to"}
31
forbidden_keys = {"dir", "out-dir", "out_dir"}
32
33
SearchExplainDf = (
34
    TypedDfs.typed("SearchExplainDf").require(
35
        "key", "search", "source", "category", "desc", "args", dtype=str
36
    )
37
).build()
38
39
40
@dataclass(frozen=True, repr=True)
0 ignored issues
show
introduced by
Missing class docstring
Loading history...
41
class MultiSearch:
42
    # 'meta' allows us to set defaults for things like --to
43
    meta: NestedDotDict
44
    searches: Sequence[NestedDotDict]
45
    toml_path: Path
46
    input_path: Path
47
    out_dir: Path
48
49
    @property
50
    def final_path(self) -> Path:
0 ignored issues
show
introduced by
Missing function or method docstring
Loading history...
51
        if "to" in self.meta:
52
            fmt = Path(self.meta["to"]).suffix
53
        else:
54
            fmt = MANDOS_SETTINGS.default_table_suffix
55
        return self.out_dir / ("search_" + self.input_path.name + "_" + self.toml_path.name + fmt)
56
57
    def __post_init__(self):
58
        to = self.meta.get_as("to", str)
0 ignored issues
show
Coding Style Naming introduced by
Variable name "to" doesn't conform to snake_case naming style ('([^\\W\\dA-Z][^\\WA-Z]2,|_[^\\WA-Z]*|__[^\\WA-Z\\d_][^\\WA-Z]+__)$' pattern)

This check looks for invalid names for a range of different identifiers.

You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements.

If your project includes a Pylint configuration file, the settings contained in that file take precedence.

To find out more about Pylint, please refer to their site.

Loading history...
59
        if to is not None and not to.startswith("."):
60
            raise ValueError(f"Argument 'to' ({to})' must start with '.'.")
61
        for key, value in self.meta.items():
0 ignored issues
show
Unused Code introduced by
The variable value seems to be unused.
Loading history...
62
            if key not in meta_keys:
63
                raise ValueError(f"{key} in 'meta' not supported.")
64
65
    @classmethod
66
    def build(cls, input_path: Path, out_dir: Path, toml_path: Path) -> MultiSearch:
0 ignored issues
show
introduced by
Missing function or method docstring
Loading history...
67
        toml = NestedDotDict.read_toml(toml_path)
68
        searches = toml.get_as("search", list, [])
69
        meta = toml.sub("meta")
70
        return MultiSearch(meta, searches, toml_path, input_path, out_dir)
71
72
    def to_table(self) -> SearchExplainDf:
0 ignored issues
show
introduced by
Missing function or method docstring
Loading history...
73
        rows = []
74
        for cmd in self._build_commands():
75
            name = cmd.cmd.get_search_type().search_name
76
            cat = cmd.category
77
            src = cmd.cmd.get_search_type()(cmd.key).data_source
78
            desc = cmd.cmd.describe()
79
            args = ", ".join([f"{k}={v}" for k, v in cmd.params.items()])
80
            ser = dict(key=cmd.key, search=name, category=cat, source=src, desc=desc, args=args)
81
            rows.append(pd.Series(ser))
82
        return SearchExplainDf(rows)
83
84
    def run(self) -> None:
0 ignored issues
show
introduced by
Missing function or method docstring
Loading history...
85
        # build up the list of Entry classes first, and run ``test`` on each one
86
        # that's to check that the parameters are correct before running anything
87
        commands = self._build_commands()
88
        for cmd in commands:
89
            cmd.test()
90
            logger.info(f"Search {cmd.key} looks ok.")
91
        logger.notice("All searches look ok.")
92
        for cmd in commands:
93
            cmd.run()
94
        logger.notice("Done with all searches!")
95
        df = HitFrame(pd.concat([HitFrame.read_file(cmd.output_path) for cmd in commands]))
0 ignored issues
show
Coding Style Naming introduced by
Variable name "df" doesn't conform to snake_case naming style ('([^\\W\\dA-Z][^\\WA-Z]2,|_[^\\WA-Z]*|__[^\\WA-Z\\d_][^\\WA-Z]+__)$' pattern)

This check looks for invalid names for a range of different identifiers.

You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements.

If your project includes a Pylint configuration file, the settings contained in that file take precedence.

To find out more about Pylint, please refer to their site.

Loading history...
96
        df.write_file(self.final_path)
97
        logger.notice(f"Concatenated file to {self.final_path}")
98
99
    def _build_commands(self) -> Sequence[CmdRunner]:
100
        commands = {}
101
        skipping = []
102
        for search in self.searches:
103
            cmd = CmdRunner.build(search, self.meta, self.input_path, self.out_dir)
104
            if cmd.was_run:
105
                skipping += [cmd]
106
            else:
107
                commands[cmd.key] = cmd
108
                if cmd.key in commands:
109
                    raise ValueError(f"Repeated search key '{cmd.key}'")
110
        if len(skipping) > 0:
111
            skipping = ", ".join([c.key for c in skipping])
112
            logger.notice(f"Skipping searches {skipping} (already run).")
113
        return list(commands.values())
114
115
116
@dataclass(frozen=True, repr=True)
0 ignored issues
show
introduced by
Missing class docstring
Loading history...
117
class CmdRunner:
118
    cmd: Type[Entry]
119
    params: Dict[str, Union[int, str, float]]
120
    input_path: Path
121
    category: Optional[str]
122
123
    @property
124
    def key(self) -> str:
0 ignored issues
show
introduced by
Missing function or method docstring
Loading history...
125
        return self.params["key"]
126
127
    @property
128
    def output_path(self) -> Path:
0 ignored issues
show
introduced by
Missing function or method docstring
Loading history...
129
        return Path(self.params["to"])
130
131
    @property
132
    def was_run(self) -> bool:
0 ignored issues
show
introduced by
Missing function or method docstring
Loading history...
133
        return self.done_path.exists()
134
135
    @property
136
    def done_path(self) -> Path:
0 ignored issues
show
introduced by
Missing function or method docstring
Loading history...
137
        return self.output_path.with_suffix(self.output_path.suffix + ".done")
138
139
    def test(self) -> None:
0 ignored issues
show
introduced by
Missing function or method docstring
Loading history...
140
        self.cmd.test(self.input_path, **{**self.params, **dict(quiet=True)})
141
142
    def run(self) -> None:
0 ignored issues
show
introduced by
Missing function or method docstring
Loading history...
143
        self.cmd.run(self.input_path, **{**self.params, **dict(no_setup=True, check=False)})
144
145
    @classmethod
146
    def build(cls, e: NestedDotDict, meta: NestedDotDict, input_path: Path, out_dir: Path):
0 ignored issues
show
Coding Style Naming introduced by
Argument name "e" doesn't conform to snake_case naming style ('([^\\W\\dA-Z][^\\WA-Z]2,|_[^\\WA-Z]*|__[^\\WA-Z\\d_][^\\WA-Z]+__)$' pattern)

This check looks for invalid names for a range of different identifiers.

You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements.

If your project includes a Pylint configuration file, the settings contained in that file take precedence.

To find out more about Pylint, please refer to their site.

Loading history...
introduced by
Missing function or method docstring
Loading history...
147
        cmd = e.req_as("source", str)
148
        key = e.get_as("key", str, cmd)
149
        to = meta.get_as("to", str, MANDOS_SETTINGS.default_table_suffix)
0 ignored issues
show
Coding Style Naming introduced by
Variable name "to" doesn't conform to snake_case naming style ('([^\\W\\dA-Z][^\\WA-Z]2,|_[^\\WA-Z]*|__[^\\WA-Z\\d_][^\\WA-Z]+__)$' pattern)

This check looks for invalid names for a range of different identifiers.

You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements.

If your project includes a Pylint configuration file, the settings contained in that file take precedence.

To find out more about Pylint, please refer to their site.

Loading history...
150
        try:
151
            cmd = EntriesByCmd[cmd]
152
        except KeyError:
153
            raise InjectionError(f"Search command {cmd} (key {key}) does not exist")
154
        # use defaults
155
        params = dict(meta)
156
        # they shouldn't pass any of these args
157
        bad = {b for b in {*meta_keys, "path", "no_setup", "out_dir"} if b in e}
158
        if len(bad) > 0:
159
            raise ValueError(f"Forbidden keys in [[search]] ({cmd}): {','.join(bad)}")
160
        # update the defaults from 'meta' (e.g. 'verbose')
161
        # skip the source -- it's the command name
162
        # stupidly, we need to explicitly add the defaults from the OptionInfo instances
163
        params.update(cmd.default_param_values().items())
164
        # do this after: the defaults had path, key, and to
165
        params["key"] = key
166
        params["path"] = e.req_as("path", Path)
167
        params["out_dir"] = out_dir
168
        params.setdefault("to", to)
169
        # now add the params we got for this command's section
170
        params.update({k: v for k, v in e.items() if k != "source" and k != "category"})
0 ignored issues
show
Unused Code introduced by
Consider merging these comparisons with "in" to "k not in ('source', 'category')"
Loading history...
171
        del params["check"]
172
        category = e.get_as("category", str)
173
        return CmdRunner(cmd, params, input_path, category)
174
175
176
__all__ = ["MultiSearch"]
177