mandos.entry.multi_searches.MultiSearch._build_commands() - Code Metrics - Inspection of "fix: various" - dmyersturnbull/mandos - Measure and Improve Code Quality continuously with Scrutinizer

Passed

Push — main ( 2b775d...83a9fb )

by Douglas

created 2021-09-23 02:56 UTC

MultiSearch._build_commands() D

↳ Parent: mandos.entry.multi_searches

Complexity

Conditions

Size

Total Lines	25
Code Lines	24

Duplication

Lines	0
Ratio	0 %

Importance

Changes

Metric	Value
cc	12
eloc	24
nop	1
dl	0
loc	25
rs	4.8
c	0
b	0
f	0

How to fix Complexity

"""
Runner.
"""

from __future__ import annotations

from dataclasses import dataclass
from pathlib import Path
from typing import Sequence, Type, Union, Optional, MutableMapping

import pandas as pd

import tomlkit

import typer

from pocketutils.core.exceptions import (

    ReservedError,
    AlreadyUsedError,
    PathExistsError,
    XValueError,
)
from typeddfs import TypedDfs

from tomlkit.api import Table, AoT

from typeddfs.checksums import Checksums

from typeddfs.file_formats import CompressionFormat


from mandos.model.utils.setup import logger, MandosLogging
from mandos.entry.api_singletons import Apis
from mandos.entry.entry_commands import Entries
from mandos.entry.abstract_entries import Entry
from mandos.model.utils.reflection_utils import InjectionError
from mandos.model.hits import HitFrame

cli = typer.Typer()
Apis.set_default()
Chembl, Pubchem = Apis.Chembl, Apis.Pubchem

EntriesByCmd: MutableMapping[str, Type[Entry]] = {e.cmd(): e for e in Entries}

# these are only permitted in 'meta', not individual searches
meta_keys = {"log", "stderr"}
forbidden_keys = {"to", "no_setup"}

SearchExplainDf = (
    TypedDfs.typed("SearchExplainDf")
    .require("key", "search", "source", dtype=str)
    .require("category", "desc", "args", dtype=str)
    .strict()
    .secure()
).build()


@dataclass(frozen=True, repr=True)

class MultiSearch:
    # 'meta' allows us to set defaults for things like --to
    meta: Table
    searches: AoT
    toml_path: Path
    input_path: Path
    out_dir: Path
    suffix: str
    replace: bool
    log_path: Optional[Path]

    @property
    def final_path(self) -> Path:

        name = "search_" + self.input_path.name + "_" + self.toml_path.name + self.suffix
        return self.out_dir / name

    @property
    def explain_path(self) -> Path:

        return Path(str(self.final_path.with_suffix("")) + "_explain.tsv")

    def __post_init__(self):
        if not self.replace and self.final_path.exists():
            raise PathExistsError(f"Path {self.final_path} exists but --replace is not set")
        if not self.replace and self.explain_path.exists():
            raise PathExistsError(f"Path {self.explain_path} exists but --replace is not set")
        for key, value in dict(self.meta).items():

            if key not in meta_keys:
                raise ReservedError(f"{key} in 'meta' not supported.")

    @classmethod
    def build(

        cls,

        input_path: Path,

        out_dir: Path,

        suffix: str,

        toml_path: Path,

        replace: bool,

        log_path: Optional[Path],

    ) -> MultiSearch:
        toml = tomlkit.loads(Path(toml_path).read_text(encoding="utf8"))
        searches = toml.get("search", [])
        return MultiSearch(
            toml.get("meta", []),
            searches,
            toml_path,
            input_path,
            out_dir,
            suffix,
            replace,
            log_path,
        )

    def to_table(self) -> SearchExplainDf:

        rows = []
        for cmd in self._build_commands():
            name = cmd.cmd.get_search_type().search_name()
            cat = cmd.category
            src = cmd.cmd.get_search_type().primary_data_source()
            desc = cmd.cmd.describe()
            args = ", ".join([f"{k}={v}" for k, v in cmd.params.items()])
            ser = dict(key=cmd.key, search=name, category=cat, source=src, desc=desc, args=args)
            rows.append(pd.Series(ser))
        return SearchExplainDf(rows)

    def run(self) -> None:

        # build up the list of Entry classes first, and run ``test`` on each one
        # that's to check that the parameters are correct before running anything
        commands = self._build_commands()
        if len(commands) == 0:
            logger.warning(f"No searches -- nothing to do")

            return
        # write a metadata file describing all of the searches
        explain = self.to_table()
        explain.write_file(self.explain_path, mkdirs=True)
        for cmd in commands:
            cmd.test()
            logger.info(f"Search {cmd.key} looks ok.")
        logger.notice("All searches look ok.")
        for cmd in commands:
            cmd.run()
        logger.notice("Done with all searches!")
        # write the final file
        df = HitFrame(pd.concat([HitFrame.read_file(cmd.output_path) for cmd in commands]))

        df.write_file(self.final_path)
        logger.notice(f"Concatenated file to {self.final_path}")

    def _build_commands(self) -> Sequence[CmdRunner]:
        commands = {}
        skipping = []
        replacing = []
        for search in self.searches:
            cmd = CmdRunner.build(
                search, self.meta, self.input_path, self.out_dir, self.suffix, self.log_path
            )
            if cmd.output_path.exists() and not cmd.done_path.exists():
                logger.error(f"Path {cmd.output_path} exists but not marked as complete.")
            elif cmd.was_run and self.replace:
                replacing += [cmd]
            elif cmd.was_run and not self.replace:
                skipping += [cmd]
            if cmd.key in commands:
                raise AlreadyUsedError(f"Repeated search key '{cmd.key}'")
            if cmd not in skipping:
                commands[cmd.key] = cmd
        if len(skipping) > 0:
            skipping = ", ".join([c.key for c in skipping])
            logger.notice(f"Skipping searches {skipping} (already run).")
        if len(replacing) > 0:
            replacing = ", ".join([c.key for c in skipping])
            logger.notice(f"Overwriting results for searches {replacing}.")
        return list(commands.values())


@dataclass(frozen=True, repr=True)

class CmdRunner:
    cmd: Type[Entry]
    params: MutableMapping[str, Union[int, str, float]]
    input_path: Path
    category: Optional[str]

    @property
    def key(self) -> str:

        return self.params["key"]

    @property
    def output_path(self) -> Path:

        return Path(self.params["to"])

    @property
    def done_path(self) -> Path:

        return Checksums.get_hash_dir(self.output_path.parent)

    @property
    def was_run(self) -> bool:

        if not self.done_path.exists():
            return False
        sums = Checksums.parse_hash_file_resolved(self.done_path)
        return self.output_path in sums

    def test(self) -> None:

        self.cmd.test(self.input_path, **self.params)

    def run(self) -> None:

        self.cmd.run(self.input_path, **self.params)

    @classmethod
    def build(

        cls,

        e: Table,

        meta: Table,

        input_path: Path,

        out_dir: Path,

        suffix: str,

        cli_log: Optional[Path],

    ):
        cmd = e["source"].value
        key = e.get("key", cmd)
        if "log" in meta:
            if len(meta["log"].value) == 1:
                raise XValueError("'log' is empty")
            log = key + meta["log"].value
            MandosLogging.get_log_suffix(cli_log)  # just check
        elif cli_log is not None:
            log = key + MandosLogging.get_log_suffix(cli_log)
        else:
            log = key + ".log"
        log = out_dir / log
        try:
            cmd = EntriesByCmd[cmd]
        except KeyError:
            raise InjectionError(f"Search command {cmd} (key {key}) does not exist")
        # use defaults
        params = dict(meta)
        # they shouldn't pass any of these args
        bad = {b for b in {*meta_keys, "path", "no_setup", "to"} if b in e}
        if len(bad) > 0:
            raise ReservedError(f"Forbidden keys in [[search]] ({cmd}): {','.join(bad)}")
        # update the defaults from 'meta' (e.g. 'verbose')
        # skip the source -- it's the command name
        # stupidly, we need to explicitly add the defaults from the OptionInfo instances
        params.update(cmd.default_param_values().items())
        # do this after: the defaults had path, key, and to
        params["key"] = key
        params["to"] = out_dir / (key + suffix)
        params["log"] = log
        # now add the params we got for this command's section
        params.update({k: v for k, v in e.items() if k != "source" and k != "category"})

        category = e.get("category")
        return CmdRunner(cmd, params, input_path, category)


__all__ = ["MultiSearch"]


1			"""
2			Runner.
3			"""
4
5			from __future__ import annotations
6
7			from dataclasses import dataclass
8			from pathlib import Path
9			from typing import Sequence, Type, Union, Optional, MutableMapping
10
11			import pandas as pd
			0 ignored issues – show introduced 2021-07-05 18:49 UTC by Report Bug Copy Issue Report Unable to import 'pandas' Loading history...
12			import tomlkit
			0 ignored issues – show introduced 2021-09-23 03:01 UTC by Report Bug Copy Issue Report Unable to import 'tomlkit' Loading history...
13			import typer
			0 ignored issues – show introduced 2021-03-21 02:08 UTC by Report Bug Copy Issue Report Unable to import 'typer' Loading history...
14			from pocketutils.core.exceptions import (
			0 ignored issues – show introduced 2021-09-23 03:01 UTC by Report Bug Copy Issue Report Unable to import 'pocketutils.core.exceptions' Loading history...
15			ReservedError,
16			AlreadyUsedError,
17			PathExistsError,
18			XValueError,
19			)
20			from typeddfs import TypedDfs
			0 ignored issues – show introduced 2021-07-05 18:49 UTC by Report Bug Copy Issue Report Unable to import 'typeddfs' Loading history...
21			from tomlkit.api import Table, AoT
			0 ignored issues – show introduced 2021-09-23 03:01 UTC by Report Bug Copy Issue Report Unable to import 'tomlkit.api' Loading history...
22			from typeddfs.checksums import Checksums
			0 ignored issues – show introduced 2021-09-23 03:01 UTC by Report Bug Copy Issue Report Imports from package typeddfs are not grouped Loading history... introduced 2021-09-23 03:01 UTC by Report Bug Copy Issue Report Unable to import 'typeddfs.checksums' Loading history...
23			from typeddfs.file_formats import CompressionFormat
			0 ignored issues – show Unused Code introduced 2021-09-23 03:01 UTC by Report Bug Copy Issue Report Unused CompressionFormat imported from typeddfs.file_formats Loading history... introduced 2021-09-23 03:01 UTC by Report Bug Copy Issue Report Unable to import 'typeddfs.file_formats' Loading history...
24
25			from mandos.model.utils.setup import logger, MandosLogging
26			from mandos.entry.api_singletons import Apis
27			from mandos.entry.entry_commands import Entries
28			from mandos.entry.abstract_entries import Entry
29			from mandos.model.utils.reflection_utils import InjectionError
30			from mandos.model.hits import HitFrame
31
32			cli = typer.Typer()
33			Apis.set_default()
34			Chembl, Pubchem = Apis.Chembl, Apis.Pubchem
35
36			EntriesByCmd: MutableMapping[str, Type[Entry]] = {e.cmd(): e for e in Entries}
37
38			# these are only permitted in 'meta', not individual searches
39			meta_keys = {"log", "stderr"}
40			forbidden_keys = {"to", "no_setup"}
41
42			SearchExplainDf = (
43			TypedDfs.typed("SearchExplainDf")
44			.require("key", "search", "source", dtype=str)
45			.require("category", "desc", "args", dtype=str)
46			.strict()
47			.secure()
48			).build()
49
50
51			@dataclass(frozen=True, repr=True)
			0 ignored issues – show introduced 2021-07-05 18:49 UTC by Report Bug Copy Issue Report Missing class docstring Loading history... best-practice introduced 2021-09-23 03:01 UTC by Report Bug Copy Issue Report Too many instance attributes (8/7) Loading history...
52			class MultiSearch:
53			# 'meta' allows us to set defaults for things like --to
54			meta: Table
55			searches: AoT
56			toml_path: Path
57			input_path: Path
58			out_dir: Path
59			suffix: str
60			replace: bool
61			log_path: Optional[Path]
62
63			@property
64			def final_path(self) -> Path:
			0 ignored issues – show introduced 2021-03-21 02:08 UTC by Report Bug Copy Issue Report Missing function or method docstring Loading history...
65			name = "search_" + self.input_path.name + "_" + self.toml_path.name + self.suffix
66			return self.out_dir / name
67
68			@property
69			def explain_path(self) -> Path:
			0 ignored issues – show introduced 2021-07-05 18:49 UTC by Report Bug Copy Issue Report Missing function or method docstring Loading history...
70			return Path(str(self.final_path.with_suffix("")) + "_explain.tsv")
71
72			def __post_init__(self):
73			if not self.replace and self.final_path.exists():
74			raise PathExistsError(f"Path {self.final_path} exists but --replace is not set")
75			if not self.replace and self.explain_path.exists():
76			raise PathExistsError(f"Path {self.explain_path} exists but --replace is not set")
77			for key, value in dict(self.meta).items():
			0 ignored issues – show Unused Code introduced 2021-07-05 18:49 UTC by Report Bug Copy Issue Report The variable `value` seems to be unused. Loading history...
78			if key not in meta_keys:
79			raise ReservedError(f"{key} in 'meta' not supported.")
80
81			@classmethod
82			def build(
			0 ignored issues – show introduced 2021-07-05 18:49 UTC by Report Bug Copy Issue Report Missing function or method docstring Loading history... best-practice introduced 2021-09-23 03:01 UTC by Report Bug Copy Issue Report Too many arguments (7/5) Loading history...
83			cls,
			0 ignored issues – show Coding Style introduced 2021-09-13 00:16 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
84			input_path: Path,
			0 ignored issues – show Coding Style introduced 2021-09-23 03:01 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
85			out_dir: Path,
			0 ignored issues – show Coding Style introduced 2021-09-23 03:01 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
86			suffix: str,
			0 ignored issues – show Coding Style introduced 2021-09-23 03:01 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
87			toml_path: Path,
			0 ignored issues – show Coding Style introduced 2021-09-23 03:01 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
88			replace: bool,
			0 ignored issues – show Coding Style introduced 2021-09-23 03:01 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
89			log_path: Optional[Path],
			0 ignored issues – show Coding Style introduced 2021-09-23 03:01 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
90			) -> MultiSearch:
91			toml = tomlkit.loads(Path(toml_path).read_text(encoding="utf8"))
92			searches = toml.get("search", [])
93			return MultiSearch(
94			toml.get("meta", []),
95			searches,
96			toml_path,
97			input_path,
98			out_dir,
99			suffix,
100			replace,
101			log_path,
102			)
103
104			def to_table(self) -> SearchExplainDf:
			0 ignored issues – show introduced 2021-07-05 18:49 UTC by Report Bug Copy Issue Report Missing function or method docstring Loading history...
105			rows = []
106			for cmd in self._build_commands():
107			name = cmd.cmd.get_search_type().search_name()
108			cat = cmd.category
109			src = cmd.cmd.get_search_type().primary_data_source()
110			desc = cmd.cmd.describe()
111			args = ", ".join([f"{k}={v}" for k, v in cmd.params.items()])
112			ser = dict(key=cmd.key, search=name, category=cat, source=src, desc=desc, args=args)
113			rows.append(pd.Series(ser))
114			return SearchExplainDf(rows)
115
116			def run(self) -> None:
			0 ignored issues – show introduced 2021-07-05 18:49 UTC by Report Bug Copy Issue Report Missing function or method docstring Loading history...
117			# build up the list of Entry classes first, and run ``test`` on each one
118			# that's to check that the parameters are correct before running anything
119			commands = self._build_commands()
120			if len(commands) == 0:
121			logger.warning(f"No searches -- nothing to do")
			0 ignored issues – show introduced 2021-09-23 03:01 UTC by Report Bug Copy Issue Report Using an f-string that does not have any interpolated variables Loading history...
122			return
123			# write a metadata file describing all of the searches
124			explain = self.to_table()
125			explain.write_file(self.explain_path, mkdirs=True)
126			for cmd in commands:
127			cmd.test()
128			logger.info(f"Search {cmd.key} looks ok.")
129			logger.notice("All searches look ok.")
130			for cmd in commands:
131			cmd.run()
132			logger.notice("Done with all searches!")
133			# write the final file
134			df = HitFrame(pd.concat([HitFrame.read_file(cmd.output_path) for cmd in commands]))
			0 ignored issues – show Coding Style Naming introduced 2021-07-05 18:49 UTC by Report Bug Copy Issue Report Variable name "df" doesn't conform to snake_case naming style ('([^\\W\\dA-Z][^\\WA-Z]2,\|_[^\\WA-Z]*\|__[^\\WA-Z\\d_][^\\WA-Z]+__)$' pattern) This check looks for invalid names for a range of different identifiers. You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements. If your project includes a Pylint configuration file, the settings contained in that file take precedence. To find out more about Pylint, please refer to their site. Loading history...
135			df.write_file(self.final_path)
136			logger.notice(f"Concatenated file to {self.final_path}")
137
138			def _build_commands(self) -> Sequence[CmdRunner]:
139			commands = {}
140			skipping = []
141			replacing = []
142			for search in self.searches:
143			cmd = CmdRunner.build(
144			search, self.meta, self.input_path, self.out_dir, self.suffix, self.log_path
145			)
146			if cmd.output_path.exists() and not cmd.done_path.exists():
147			logger.error(f"Path {cmd.output_path} exists but not marked as complete.")
148			elif cmd.was_run and self.replace:
149			replacing += [cmd]
150			elif cmd.was_run and not self.replace:
151			skipping += [cmd]
152			if cmd.key in commands:
153			raise AlreadyUsedError(f"Repeated search key '{cmd.key}'")
154			if cmd not in skipping:
155			commands[cmd.key] = cmd
156			if len(skipping) > 0:
157			skipping = ", ".join([c.key for c in skipping])
158			logger.notice(f"Skipping searches {skipping} (already run).")
159			if len(replacing) > 0:
160			replacing = ", ".join([c.key for c in skipping])
161			logger.notice(f"Overwriting results for searches {replacing}.")
162			return list(commands.values())
163
164
165			@dataclass(frozen=True, repr=True)
			0 ignored issues – show introduced 2021-07-05 18:49 UTC by Report Bug Copy Issue Report Missing class docstring Loading history...
166			class CmdRunner:
167			cmd: Type[Entry]
168			params: MutableMapping[str, Union[int, str, float]]
169			input_path: Path
170			category: Optional[str]
171
172			@property
173			def key(self) -> str:
			0 ignored issues – show introduced 2021-07-05 18:49 UTC by Report Bug Copy Issue Report Missing function or method docstring Loading history...
174			return self.params["key"]
175
176			@property
177			def output_path(self) -> Path:
			0 ignored issues – show introduced 2021-07-05 18:49 UTC by Report Bug Copy Issue Report Missing function or method docstring Loading history...
178			return Path(self.params["to"])
179
180			@property
181			def done_path(self) -> Path:
			0 ignored issues – show introduced 2021-07-05 18:49 UTC by Report Bug Copy Issue Report Missing function or method docstring Loading history...
182			return Checksums.get_hash_dir(self.output_path.parent)
183
184			@property
185			def was_run(self) -> bool:
			0 ignored issues – show introduced 2021-07-05 18:49 UTC by Report Bug Copy Issue Report Missing function or method docstring Loading history...
186			if not self.done_path.exists():
187			return False
188			sums = Checksums.parse_hash_file_resolved(self.done_path)
189			return self.output_path in sums
190
191			def test(self) -> None:
			0 ignored issues – show introduced 2021-07-05 18:49 UTC by Report Bug Copy Issue Report Missing function or method docstring Loading history...
192			self.cmd.test(self.input_path, **self.params)
193
194			def run(self) -> None:
			0 ignored issues – show introduced 2021-07-05 18:49 UTC by Report Bug Copy Issue Report Missing function or method docstring Loading history...
195			self.cmd.run(self.input_path, **self.params)
196
197			@classmethod
198			def build(
			0 ignored issues – show introduced 2021-07-05 19:05 UTC by Report Bug Copy Issue Report Missing function or method docstring Loading history... Coding Style Naming introduced 2021-07-05 18:49 UTC by Report Bug Copy Issue Report Argument name "e" doesn't conform to snake_case naming style ('([^\\W\\dA-Z][^\\WA-Z]2,\|_[^\\WA-Z]*\|__[^\\WA-Z\\d_][^\\WA-Z]+__)$' pattern) This check looks for invalid names for a range of different identifiers. You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements. If your project includes a Pylint configuration file, the settings contained in that file take precedence. To find out more about Pylint, please refer to their site. Loading history... best-practice introduced 2021-09-23 03:01 UTC by Report Bug Copy Issue Report Too many arguments (7/5) Loading history...
199			cls,
			0 ignored issues – show Coding Style introduced 2021-09-23 03:01 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
200			e: Table,
			0 ignored issues – show Coding Style introduced 2021-09-23 03:01 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
201			meta: Table,
			0 ignored issues – show Coding Style introduced 2021-09-23 03:01 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
202			input_path: Path,
			0 ignored issues – show Coding Style introduced 2021-09-23 03:01 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
203			out_dir: Path,
			0 ignored issues – show Coding Style introduced 2021-09-23 03:01 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
204			suffix: str,
			0 ignored issues – show Coding Style introduced 2021-09-23 03:01 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
205			cli_log: Optional[Path],
			0 ignored issues – show Coding Style introduced 2021-09-23 03:01 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
206			):
207			cmd = e["source"].value
208			key = e.get("key", cmd)
209			if "log" in meta:
210			if len(meta["log"].value) == 1:
211			raise XValueError("'log' is empty")
212			log = key + meta["log"].value
213			MandosLogging.get_log_suffix(cli_log) # just check
214			elif cli_log is not None:
215			log = key + MandosLogging.get_log_suffix(cli_log)
216			else:
217			log = key + ".log"
218			log = out_dir / log
219			try:
220			cmd = EntriesByCmd[cmd]
221			except KeyError:
222			raise InjectionError(f"Search command {cmd} (key {key}) does not exist")
223			# use defaults
224			params = dict(meta)
225			# they shouldn't pass any of these args
226			bad = {b for b in {*meta_keys, "path", "no_setup", "to"} if b in e}
227			if len(bad) > 0:
228			raise ReservedError(f"Forbidden keys in [[search]] ({cmd}): {','.join(bad)}")
229			# update the defaults from 'meta' (e.g. 'verbose')
230			# skip the source -- it's the command name
231			# stupidly, we need to explicitly add the defaults from the OptionInfo instances
232			params.update(cmd.default_param_values().items())
233			# do this after: the defaults had path, key, and to
234			params["key"] = key
235			params["to"] = out_dir / (key + suffix)
236			params["log"] = log
237			# now add the params we got for this command's section
238			params.update({k: v for k, v in e.items() if k != "source" and k != "category"})
			0 ignored issues – show Unused Code introduced 2021-07-05 18:49 UTC by Report Bug Copy Issue Report Consider merging these comparisons with "in" to "k not in ('source', 'category')" Loading history...
239			category = e.get("category")
240			return CmdRunner(cmd, params, input_path, category)
241
242
243			__all__ = ["MultiSearch"]
244

dmyersturnbull / mandos

Push — main ( 2b775d...83a9fb )

MultiSearch._build_commands() D

Complexity

Size

Duplication

Importance

How to fix Complexity

Complexity

Duplication Side-by-Side

Filter issues like