mandos.entry.searchers - Code Metrics - Inspection of "feat: nicer" - dmyersturnbull/mandos - Measure and Improve Code Quality continuously with Scrutinizer

Passed

Push — main ( cee75c...37036d )

by Douglas

created 2021-09-06 00:52 UTC

mandos.entry.searchers A

↳ Parent: Project

Complexity

Total Complexity

Size/Duplication

Total Lines	89
Duplicated Lines	0 %

Importance

Changes

Metric	Value
eloc	51
dl	0
loc	89
rs	10
c	0
b	0
f	0
wmc	7

3 Methods

Rating	Name	Size	Complexity
A	Searcher.__init__()	16	1
A	Searcher.search()	12	3
A	Searcher._search_one()	16	2

1 Function

Rating	Name	Duplication	Size	Complexity
A	_fix_cols()	0	2	1

"""
Run searches and write files.
"""

from __future__ import annotations

from pathlib import Path
from typing import Optional, Sequence

from pocketutils.core.dot_dict import NestedDotDict

from pocketutils.tools.common_tools import CommonTools

from typeddfs import TypedDfs


from mandos import logger
from mandos.entry.paths import EntryPaths
from mandos.model.searches import Search


def _fix_cols(df):

    return df.rename(columns={s: s.lower() for s in df.columns})


InputFrame = (
    TypedDfs.typed("InputFrame")
    .require("inchikey")
    .reserve("inchi", "smiles", "compound_id", dtype=str)
    .post(_fix_cols)
    .strict(cols=False)
    .secure()
).build()


class Searcher:
    """
    Executes one or more searches and saves the results to CSV files.
    Create and use once.
    """

    def __init__(self, searches: Sequence[Search], to: Sequence[Path], input_path: Path):
        """
        Constructor.

        Args:
            searches:
            input_path: Path to the input file of one of the formats:
                - .txt containing one InChI Key per line
                - .csv, .tsv, .tab, csv.gz, .tsv.gz, .tab.gz, or .feather containing a column called inchikey

        """
        self.what = searches
        self.input_path: Optional[Path] = input_path
        self.input_df: InputFrame = None
        self.output_paths = {
            what.key: EntryPaths.output_path_of(what, input_path, path)
            for what, path in CommonTools.zip_list(searches, to)
        }

    def search(self) -> Searcher:
        """
        Performs the search, and writes data.
        """
        if self.input_df is not None:
            raise ValueError(f"Already ran a search")

        self.input_df = InputFrame.read_file(self.input_path)
        logger.info(f"Read {len(self.input_df)} input compounds")
        inchikeys = self.input_df["inchikey"].unique()
        for what in self.what:
            self._search_one(what, inchikeys)
        return self

    def _search_one(self, what: Search, inchikeys: Sequence[str]):
        output_path = self.output_paths[what.key]
        metadata_path = output_path.with_suffix(".json.metadata")
        df = what.find_to_df(inchikeys)

        # keep all of the original extra columns from the input
        # e.g. if the user had 'inchi' or 'smiles' or 'pretty_name'
        for extra_col in [c for c in self.input_df.columns if c != "inchikey"]:
            extra_mp = self.input_df.set_index("inchikey")[extra_col].to_dict()
            df[extra_col] = df["lookup"].map(extra_mp.get)
        # write the (intermediate) file
        df.write_file(output_path)
        # write metadata
        params = {k: str(v) for k, v in what.get_params().items() if k not in {"key", "api"}}
        metadata = NestedDotDict(dict(key=what.key, search=what.search_class, params=params))
        metadata.write_json(metadata_path)
        logger.info(f"Wrote {what.key} to {output_path}")


__all__ = ["Searcher", "InputFrame"]


1			"""
2			Run searches and write files.
3			"""
4
5			from __future__ import annotations
6
7			from pathlib import Path
8			from typing import Optional, Sequence
9
10			from pocketutils.core.dot_dict import NestedDotDict
			0 ignored issues – show introduced 2021-03-21 02:08 UTC by Report Bug Copy Issue Report Unable to import 'pocketutils.core.dot_dict' Loading history...
11			from pocketutils.tools.common_tools import CommonTools
			0 ignored issues – show introduced 2021-03-31 03:41 UTC by Report Bug Copy Issue Report Unable to import 'pocketutils.tools.common_tools' Loading history...
12			from typeddfs import TypedDfs
			0 ignored issues – show introduced 2021-03-21 02:08 UTC by Report Bug Copy Issue Report Unable to import 'typeddfs' Loading history...
13
14			from mandos import logger
15			from mandos.entry.paths import EntryPaths
16			from mandos.model.searches import Search
17
18
19			def _fix_cols(df):
			0 ignored issues – show Coding Style Naming introduced 2021-08-02 23:39 UTC by Report Bug Copy Issue Report Argument name "df" doesn't conform to snake_case naming style ('([^\\W\\dA-Z][^\\WA-Z]2,\|_[^\\WA-Z]*\|__[^\\WA-Z\\d_][^\\WA-Z]+__)$' pattern) This check looks for invalid names for a range of different identifiers. You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements. If your project includes a Pylint configuration file, the settings contained in that file take precedence. To find out more about Pylint, please refer to their site. Loading history...
20			return df.rename(columns={s: s.lower() for s in df.columns})
21
22
23			InputFrame = (
24			TypedDfs.typed("InputFrame")
25			.require("inchikey")
26			.reserve("inchi", "smiles", "compound_id", dtype=str)
27			.post(_fix_cols)
28			.strict(cols=False)
29			.secure()
30			).build()
31
32
33			class Searcher:
34			"""
35			Executes one or more searches and saves the results to CSV files.
36			Create and use once.
37			"""
38
39			def __init__(self, searches: Sequence[Search], to: Sequence[Path], input_path: Path):
40			"""
41			Constructor.
42
43			Args:
44			searches:
45			input_path: Path to the input file of one of the formats:
46			- .txt containing one InChI Key per line
47			- .csv, .tsv, .tab, csv.gz, .tsv.gz, .tab.gz, or .feather containing a column called inchikey
			0 ignored issues – show Coding Style introduced 2021-04-01 01:11 UTC by Report Bug Copy Issue Report This line is too long as per the coding-style (109/100). This check looks for lines that are too long. You can specify the maximum line length. Loading history...
48			"""
49			self.what = searches
50			self.input_path: Optional[Path] = input_path
51			self.input_df: InputFrame = None
52			self.output_paths = {
53			what.key: EntryPaths.output_path_of(what, input_path, path)
54			for what, path in CommonTools.zip_list(searches, to)
55			}
56
57			def search(self) -> Searcher:
58			"""
59			Performs the search, and writes data.
60			"""
61			if self.input_df is not None:
62			raise ValueError(f"Already ran a search")
			0 ignored issues – show introduced 2021-03-21 02:08 UTC by Report Bug Copy Issue Report Using an f-string that does not have any interpolated variables Loading history...
63			self.input_df = InputFrame.read_file(self.input_path)
64			logger.info(f"Read {len(self.input_df)} input compounds")
65			inchikeys = self.input_df["inchikey"].unique()
66			for what in self.what:
67			self._search_one(what, inchikeys)
68			return self
69
70			def _search_one(self, what: Search, inchikeys: Sequence[str]):
71			output_path = self.output_paths[what.key]
72			metadata_path = output_path.with_suffix(".json.metadata")
73			df = what.find_to_df(inchikeys)
			0 ignored issues – show Coding Style Naming introduced 2021-03-21 02:08 UTC by Report Bug Copy Issue Report Variable name "df" doesn't conform to snake_case naming style ('([^\\W\\dA-Z][^\\WA-Z]2,\|_[^\\WA-Z]*\|__[^\\WA-Z\\d_][^\\WA-Z]+__)$' pattern) This check looks for invalid names for a range of different identifiers. You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements. If your project includes a Pylint configuration file, the settings contained in that file take precedence. To find out more about Pylint, please refer to their site. Loading history...
74			# keep all of the original extra columns from the input
75			# e.g. if the user had 'inchi' or 'smiles' or 'pretty_name'
76			for extra_col in [c for c in self.input_df.columns if c != "inchikey"]:
77			extra_mp = self.input_df.set_index("inchikey")[extra_col].to_dict()
78			df[extra_col] = df["lookup"].map(extra_mp.get)
79			# write the (intermediate) file
80			df.write_file(output_path)
81			# write metadata
82			params = {k: str(v) for k, v in what.get_params().items() if k not in {"key", "api"}}
83			metadata = NestedDotDict(dict(key=what.key, search=what.search_class, params=params))
84			metadata.write_json(metadata_path)
85			logger.info(f"Wrote {what.key} to {output_path}")
86
87
88			__all__ = ["Searcher", "InputFrame"]
89

dmyersturnbull / mandos

Push — main ( cee75c...37036d )

mandos.entry.searchers A

Complexity

Size/Duplication

Importance

3 Methods

1 Function

Duplication Side-by-Side

Filter issues like