Passed
Push — main ( d041b6...a80564 )
by Douglas
02:56 queued 01:15
created

MultiSearch.explain_path()   A

Complexity

Conditions 1

Size

Total Lines 3
Code Lines 3

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
cc 1
eloc 3
nop 1
dl 0
loc 3
rs 10
c 0
b 0
f 0
1
"""
2
Runner.
3
"""
4
5
from __future__ import annotations
6
7
from dataclasses import dataclass
8
from pathlib import Path
9
from typing import Dict, Sequence, List, Type, Union, Optional
0 ignored issues
show
Unused Code introduced by
Unused List imported from typing
Loading history...
10
11
import pandas as pd
0 ignored issues
show
introduced by
Unable to import 'pandas'
Loading history...
12
import typer
0 ignored issues
show
introduced by
Unable to import 'typer'
Loading history...
13
from typeddfs import TypedDfs
0 ignored issues
show
introduced by
Unable to import 'typeddfs'
Loading history...
14
from pocketutils.core.dot_dict import NestedDotDict
0 ignored issues
show
introduced by
Unable to import 'pocketutils.core.dot_dict'
Loading history...
15
16
from mandos import logger
17
from mandos.entries.api_singletons import Apis
18
from mandos.entries.entries import Entries, Entry
19
from mandos.model import InjectionError
20
from mandos.model.hits import HitFrame
21
from mandos.model.settings import MANDOS_SETTINGS
22
23
cli = typer.Typer()
24
Apis.set_default()
25
Chembl, Pubchem = Apis.Chembl, Apis.Pubchem
26
27
EntriesByCmd: Dict[str, Type[Entry]] = {e.cmd(): e for e in Entries}
28
29
# these are only permitted in 'meta', not individual searches
30
meta_keys = {"verbose", "quiet", "check", "log", "to"}
31
forbidden_keys = {"dir", "out-dir", "out_dir"}
32
33
SearchExplainDf = (
34
    TypedDfs.typed("SearchExplainDf").require(
35
        "key", "search", "source", "category", "desc", "args", dtype=str
36
    )
37
).build()
38
39
40
@dataclass(frozen=True, repr=True)
0 ignored issues
show
introduced by
Missing class docstring
Loading history...
41
class MultiSearch:
42
    # 'meta' allows us to set defaults for things like --to
43
    meta: NestedDotDict
44
    searches: Sequence[NestedDotDict]
45
    toml_path: Path
46
    input_path: Path
47
    out_dir: Path
48
49
    @property
50
    def final_path(self) -> Path:
0 ignored issues
show
introduced by
Missing function or method docstring
Loading history...
51
        if "to" in self.meta:
52
            fmt = Path(self.meta["to"]).suffix
53
        else:
54
            fmt = MANDOS_SETTINGS.default_table_suffix
55
        return self.out_dir / ("search_" + self.input_path.name + "_" + self.toml_path.name + fmt)
56
57
    @property
58
    def explain_path(self) -> Path:
0 ignored issues
show
introduced by
Missing function or method docstring
Loading history...
59
        return Path(str(self.final_path.with_suffix("")) + "_explain.tsv")
60
61
    def __post_init__(self):
62
        to = self.meta.get_as("to", str)
0 ignored issues
show
Coding Style Naming introduced by
Variable name "to" doesn't conform to snake_case naming style ('([^\\W\\dA-Z][^\\WA-Z]2,|_[^\\WA-Z]*|__[^\\WA-Z\\d_][^\\WA-Z]+__)$' pattern)

This check looks for invalid names for a range of different identifiers.

You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements.

If your project includes a Pylint configuration file, the settings contained in that file take precedence.

To find out more about Pylint, please refer to their site.

Loading history...
63
        if to is not None and not to.startswith("."):
64
            raise ValueError(f"Argument 'to' ({to})' must start with '.'.")
65
        for key, value in self.meta.items():
0 ignored issues
show
Unused Code introduced by
The variable value seems to be unused.
Loading history...
66
            if key not in meta_keys:
67
                raise ValueError(f"{key} in 'meta' not supported.")
68
69
    @classmethod
70
    def build(cls, input_path: Path, out_dir: Path, toml_path: Path) -> MultiSearch:
0 ignored issues
show
introduced by
Missing function or method docstring
Loading history...
71
        toml = NestedDotDict.read_toml(toml_path)
72
        searches = toml.get_as("search", list, [])
73
        meta = toml.sub("meta")
74
        return MultiSearch(meta, searches, toml_path, input_path, out_dir)
75
76
    def to_table(self) -> SearchExplainDf:
0 ignored issues
show
introduced by
Missing function or method docstring
Loading history...
77
        rows = []
78
        for cmd in self._build_commands():
79
            name = cmd.cmd.get_search_type().search_name
80
            cat = cmd.category
81
            src = cmd.cmd.get_search_type()(cmd.key).data_source
82
            desc = cmd.cmd.describe()
83
            args = ", ".join([f"{k}={v}" for k, v in cmd.params.items()])
84
            ser = dict(key=cmd.key, search=name, category=cat, source=src, desc=desc, args=args)
85
            rows.append(pd.Series(ser))
86
        return SearchExplainDf(rows)
87
88
    def run(self) -> None:
0 ignored issues
show
introduced by
Missing function or method docstring
Loading history...
89
        # build up the list of Entry classes first, and run ``test`` on each one
90
        # that's to check that the parameters are correct before running anything
91
        commands = self._build_commands()
92
        for cmd in commands:
93
            cmd.test()
94
            logger.info(f"Search {cmd.key} looks ok.")
95
        logger.notice("All searches look ok.")
96
        for cmd in commands:
97
            cmd.run()
98
        logger.notice("Done with all searches!")
99
        df = HitFrame(pd.concat([HitFrame.read_file(cmd.output_path) for cmd in commands]))
0 ignored issues
show
Coding Style Naming introduced by
Variable name "df" doesn't conform to snake_case naming style ('([^\\W\\dA-Z][^\\WA-Z]2,|_[^\\WA-Z]*|__[^\\WA-Z\\d_][^\\WA-Z]+__)$' pattern)

This check looks for invalid names for a range of different identifiers.

You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements.

If your project includes a Pylint configuration file, the settings contained in that file take precedence.

To find out more about Pylint, please refer to their site.

Loading history...
100
        df.write_file(self.final_path)
101
        logger.notice(f"Concatenated file to {self.final_path}")
102
        explain = self.to_table()
103
        explain.write_file(self.explain_path)
104
105
    def _build_commands(self) -> Sequence[CmdRunner]:
106
        commands = {}
107
        skipping = []
108
        for search in self.searches:
109
            cmd = CmdRunner.build(search, self.meta, self.input_path, self.out_dir)
110
            if cmd.was_run:
111
                skipping += [cmd]
112
            else:
113
                commands[cmd.key] = cmd
114
                if cmd.key in commands:
115
                    raise ValueError(f"Repeated search key '{cmd.key}'")
116
        if len(skipping) > 0:
117
            skipping = ", ".join([c.key for c in skipping])
118
            logger.notice(f"Skipping searches {skipping} (already run).")
119
        return list(commands.values())
120
121
122
@dataclass(frozen=True, repr=True)
0 ignored issues
show
introduced by
Missing class docstring
Loading history...
123
class CmdRunner:
124
    cmd: Type[Entry]
125
    params: Dict[str, Union[int, str, float]]
126
    input_path: Path
127
    category: Optional[str]
128
129
    @property
130
    def key(self) -> str:
0 ignored issues
show
introduced by
Missing function or method docstring
Loading history...
131
        return self.params["key"]
132
133
    @property
134
    def output_path(self) -> Path:
0 ignored issues
show
introduced by
Missing function or method docstring
Loading history...
135
        return Path(self.params["to"])
136
137
    @property
138
    def was_run(self) -> bool:
0 ignored issues
show
introduced by
Missing function or method docstring
Loading history...
139
        return self.done_path.exists()
140
141
    @property
142
    def done_path(self) -> Path:
0 ignored issues
show
introduced by
Missing function or method docstring
Loading history...
143
        return self.output_path.with_suffix(self.output_path.suffix + ".done")
144
145
    def test(self) -> None:
0 ignored issues
show
introduced by
Missing function or method docstring
Loading history...
146
        self.cmd.test(self.input_path, **{**self.params, **dict(quiet=True)})
147
148
    def run(self) -> None:
0 ignored issues
show
introduced by
Missing function or method docstring
Loading history...
149
        self.cmd.run(self.input_path, **{**self.params, **dict(no_setup=True, check=False)})
150
151
    @classmethod
152
    def build(cls, e: NestedDotDict, meta: NestedDotDict, input_path: Path, out_dir: Path):
0 ignored issues
show
Coding Style Naming introduced by
Argument name "e" doesn't conform to snake_case naming style ('([^\\W\\dA-Z][^\\WA-Z]2,|_[^\\WA-Z]*|__[^\\WA-Z\\d_][^\\WA-Z]+__)$' pattern)

This check looks for invalid names for a range of different identifiers.

You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements.

If your project includes a Pylint configuration file, the settings contained in that file take precedence.

To find out more about Pylint, please refer to their site.

Loading history...
introduced by
Missing function or method docstring
Loading history...
153
        cmd = e.req_as("source", str)
154
        key = e.get_as("key", str, cmd)
155
        to = meta.get_as("to", str, MANDOS_SETTINGS.default_table_suffix)
0 ignored issues
show
Coding Style Naming introduced by
Variable name "to" doesn't conform to snake_case naming style ('([^\\W\\dA-Z][^\\WA-Z]2,|_[^\\WA-Z]*|__[^\\WA-Z\\d_][^\\WA-Z]+__)$' pattern)

This check looks for invalid names for a range of different identifiers.

You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements.

If your project includes a Pylint configuration file, the settings contained in that file take precedence.

To find out more about Pylint, please refer to their site.

Loading history...
156
        try:
157
            cmd = EntriesByCmd[cmd]
158
        except KeyError:
159
            raise InjectionError(f"Search command {cmd} (key {key}) does not exist")
160
        # use defaults
161
        params = dict(meta)
162
        # they shouldn't pass any of these args
163
        bad = {b for b in {*meta_keys, "path", "no_setup", "out_dir"} if b in e}
164
        if len(bad) > 0:
165
            raise ValueError(f"Forbidden keys in [[search]] ({cmd}): {','.join(bad)}")
166
        # update the defaults from 'meta' (e.g. 'verbose')
167
        # skip the source -- it's the command name
168
        # stupidly, we need to explicitly add the defaults from the OptionInfo instances
169
        params.update(cmd.default_param_values().items())
170
        # do this after: the defaults had path, key, and to
171
        params["key"] = key
172
        params["path"] = e.req_as("path", Path)
173
        params["out_dir"] = out_dir
174
        params.setdefault("to", to)
175
        # now add the params we got for this command's section
176
        params.update({k: v for k, v in e.items() if k != "source" and k != "category"})
0 ignored issues
show
Unused Code introduced by
Consider merging these comparisons with "in" to "k not in ('source', 'category')"
Loading history...
177
        del params["check"]
178
        category = e.get_as("category", str)
179
        return CmdRunner(cmd, params, input_path, category)
180
181
182
__all__ = ["MultiSearch"]
183