mandos.cli - Code Metrics - dmyersturnbull/mandos - Measure and Improve Code Quality continuously with Scrutinizer

mandos.cli A
last analyzed 2021-01-25 23:07 UTC

↳ Parent: Project

Complexity

Total Complexity

Size/Duplication

Total Lines	164
Duplicated Lines	0 %

Importance

Changes

Metric	Value
eloc	95
dl	0
loc	164
rs	10
c	0
b	0
f	0
wmc	12

6 Methods

Rating	Name	Size	Complexity
A	Commands.search()	27	2
A	Commands.process_tax()	21	2
A	What.__new__()	4	1
A	What.clazz()	3	1
A	What.__init__()	2	1
B	Commands.search_for()	38	5

"""
Command-line interface for mandos.
"""

from __future__ import annotations

import enum
import logging
from pathlib import Path, PurePath
from typing import Any, Mapping, Optional, Sequence
from typing import Tuple as Tup
from typing import Type, Union

import pandas as pd

import typer

from chembl_webresource_client.new_client import new_client as Chembl

from pocketutils.core.dot_dict import NestedDotDict


from mandos.chembl_api import ChemblApi
from mandos.model import Search, Triple
from mandos.model.caches import TaxonomyFactories
from mandos.model.settings import DEFAULT_TAXONOMY_CACHE, Settings
from mandos.search.chembl.activity_search import ActivitySearch
from mandos.search.chembl.atc_search import AtcSearch
from mandos.search.chembl.go_search import GoSearchFactory, GoType
from mandos.search.chembl.indication_search import IndicationSearch
from mandos.search.chembl.mechanism_search import MechanismSearch

logger = logging.getLogger(__package__)
cli = typer.Typer()


class What(enum.Enum):
    """
    List of search items.
    """

    activity = enum.auto(), ActivitySearch
    mechanism = enum.auto(), MechanismSearch
    atc = enum.auto(), AtcSearch
    trial = enum.auto(), IndicationSearch
    go_proc_moa = enum.auto(), GoSearchFactory.create(GoType.process, MechanismSearch)
    go_fn_moa = enum.auto(), GoSearchFactory.create(GoType.function, MechanismSearch)
    go_comp_moa = enum.auto(), GoSearchFactory.create(GoType.component, MechanismSearch)
    go_proc_act = enum.auto(), GoSearchFactory.create(GoType.process, ActivitySearch)
    go_fn_act = enum.auto(), GoSearchFactory.create(GoType.function, ActivitySearch)
    go_comp_act = enum.auto(), GoSearchFactory.create(GoType.component, ActivitySearch)

    def __new__(cls, *args, **kwargs):

        obj = object.__new__(cls)
        obj._value_ = args[0]
        return obj

    # ignore the first param since it's already set by __new__
    def __init__(self, _: str, clazz: Type[Search]):
        self._clazz_ = clazz

    @property
    def clazz(self) -> Type[Search]:

        return self._clazz_


class Commands:
    """
    Entry points for mandos.
    """

    @staticmethod
    @cli.command()
    def search(
        what: str,

        path: Path,

        config: Optional[Path] = None,

    ) -> None:
        """
        Process data.

        Args:
            what: Comma-separated list of ``activity``, ``mechanism``, ``atc``, and ``indication``.
            path: Path to the input file of one of the formats:
                - .txt containing one key (InChI / CHEMBL ID) per line
                - .csv/.tsv/.tab containing one key per row
                - .csv/.tsv/.tab of a symmetric affinity matrix, with a row header and column header with the keys

            config: Path to a TOML config file
        """
        for w in what.split(","):

            w = What[w.lower()]

            df, triples = Commands.search_for(w, path, config=config)

            df_out = Path(str(path.with_suffix("")) + "-" + w.name.lower() + ".csv")
            df.to_csv(df_out)
            triples_out = df_out.with_suffix(".triples.txt")
            triples_out.write_text("\n".join([t.statement for t in triples]), encoding="utf8")
            triples_out.write_text(
                "\n".join([Triple.tab_header(), *[t.tabs for t in triples]]), encoding="utf8"
            )

    @staticmethod
    @cli.command(hidden=True)
    def process_tax(
        taxon: int,

        cache_path: Optional[Path] = None,

    ) -> None:
        """
        Preps a new taxonomy file for use in mandos.
        Just returns if a corresponding file already exists in the resources dir or mandos cache (``~/.mandos``).

        Otherwise, downloads a tab-separated file from UniProt.
        (To find manually, follow the ``All lower taxonomy nodes`` link and click ``Download``.)
        Then applies fixes and reduces the file size, creating a new file alongside.
        Puts both the raw data and fixed data in the cache under ``~/.mandos/taxonomy/``.

        Args:
            taxon: The **ID** of the UniProt taxon
            cache_path:
        """
        if cache_path is None:
            cache_path = DEFAULT_TAXONOMY_CACHE
        TaxonomyFactories.from_uniprot(cache_path).load(taxon)

    @staticmethod
    def search_for(
        what: What,

        compounds: Union[Sequence[str], PurePath],

        config: Union[None, Mapping[str, Any], Path],

    ) -> Tup[pd.DataFrame, Sequence[Triple]]:
        """

        Args:
            what:
            compounds:
            config:

        Returns:

        """
        if isinstance(compounds, (PurePath, str)):
            compounds = Path(compounds).read_text(encoding="utf8").splitlines()
        compounds = [c.strip() for c in compounds if len(c.strip()) > 0]
        if config is None:
            settings = Settings.load(NestedDotDict({}))
        elif isinstance(config, PurePath):
            settings = Settings.load(NestedDotDict.read_toml(config))
        elif isinstance(config, NestedDotDict):
            settings = config
        else:
            settings = Settings.load(NestedDotDict(config))
        settings.set()
        compounds = list(compounds)
        api = ChemblApi.wrap(Chembl)
        taxonomy = TaxonomyFactories.from_uniprot(settings.taxonomy_cache_path).load(settings.taxon)
        hits = what.clazz(api, settings, taxonomy).find_all(compounds)
        # collapse over and sort the triples
        triples = sorted(list({hit.to_triple() for hit in hits}))
        df = pd.DataFrame(

            [pd.Series({f: getattr(h, f) for f in what.clazz.hit_fields()}) for h in hits]
        )
        return df, triples


if __name__ == "__main__":
    cli()


__all__ = ["Commands", "What"]


1			"""
2			Command-line interface for mandos.
3			"""
4
5			from __future__ import annotations
6
7			import enum
8			import logging
9			from pathlib import Path, PurePath
10			from typing import Any, Mapping, Optional, Sequence
11			from typing import Tuple as Tup
12			from typing import Type, Union
13
14			import pandas as pd
			0 ignored issues – show introduced 2021-01-25 23:06 UTC by Report Bug Copy Issue Report Unable to import 'pandas' Loading history...
15			import typer
			0 ignored issues – show introduced 2021-01-25 23:06 UTC by Report Bug Copy Issue Report Unable to import 'typer' Loading history...
16			from chembl_webresource_client.new_client import new_client as Chembl
			0 ignored issues – show introduced 2021-01-25 23:06 UTC by Report Bug Copy Issue Report Unable to import 'chembl_webresource_client.new_client' Loading history...
17			from pocketutils.core.dot_dict import NestedDotDict
			0 ignored issues – show introduced 2021-01-25 23:06 UTC by Report Bug Copy Issue Report Unable to import 'pocketutils.core.dot_dict' Loading history...
18
19			from mandos.chembl_api import ChemblApi
20			from mandos.model import Search, Triple
21			from mandos.model.caches import TaxonomyFactories
22			from mandos.model.settings import DEFAULT_TAXONOMY_CACHE, Settings
23			from mandos.search.chembl.activity_search import ActivitySearch
24			from mandos.search.chembl.atc_search import AtcSearch
25			from mandos.search.chembl.go_search import GoSearchFactory, GoType
26			from mandos.search.chembl.indication_search import IndicationSearch
27			from mandos.search.chembl.mechanism_search import MechanismSearch
28
29			logger = logging.getLogger(__package__)
30			cli = typer.Typer()
31
32
33			class What(enum.Enum):
34			"""
35			List of search items.
36			"""
37
38			activity = enum.auto(), ActivitySearch
39			mechanism = enum.auto(), MechanismSearch
40			atc = enum.auto(), AtcSearch
41			trial = enum.auto(), IndicationSearch
42			go_proc_moa = enum.auto(), GoSearchFactory.create(GoType.process, MechanismSearch)
43			go_fn_moa = enum.auto(), GoSearchFactory.create(GoType.function, MechanismSearch)
44			go_comp_moa = enum.auto(), GoSearchFactory.create(GoType.component, MechanismSearch)
45			go_proc_act = enum.auto(), GoSearchFactory.create(GoType.process, ActivitySearch)
46			go_fn_act = enum.auto(), GoSearchFactory.create(GoType.function, ActivitySearch)
47			go_comp_act = enum.auto(), GoSearchFactory.create(GoType.component, ActivitySearch)
48
49			def __new__(cls, args, *kwargs):
			0 ignored issues – show Unused Code introduced 2021-01-25 23:06 UTC by Report Bug Copy Issue Report The argument `kwargs` seems to be unused. Loading history...
50			obj = object.__new__(cls)
51			obj._value_ = args[0]
52			return obj
53
54			# ignore the first param since it's already set by __new__
55			def __init__(self, _: str, clazz: Type[Search]):
56			self._clazz_ = clazz
57
58			@property
59			def clazz(self) -> Type[Search]:
			0 ignored issues – show introduced 2021-01-25 23:06 UTC by Report Bug Copy Issue Report Missing function or method docstring Loading history...
60			return self._clazz_
61
62
63			class Commands:
64			"""
65			Entry points for mandos.
66			"""
67
68			@staticmethod
69			@cli.command()
70			def search(
71			what: str,
			0 ignored issues – show Coding Style introduced 2021-01-25 23:06 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
72			path: Path,
			0 ignored issues – show Coding Style introduced 2021-01-25 23:06 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
73			config: Optional[Path] = None,
			0 ignored issues – show Coding Style introduced 2021-01-25 23:06 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
74			) -> None:
75			"""
76			Process data.
77
78			Args:
79			what: Comma-separated list of ``activity``, ``mechanism``, ``atc``, and ``indication``.
80			path: Path to the input file of one of the formats:
81			- .txt containing one key (InChI / CHEMBL ID) per line
82			- .csv/.tsv/.tab containing one key per row
83			- .csv/.tsv/.tab of a symmetric affinity matrix, with a row header and column header with the keys
			0 ignored issues – show Coding Style introduced 2021-01-25 23:06 UTC by Report Bug Copy Issue Report This line is too long as per the coding-style (114/100). This check looks for lines that are too long. You can specify the maximum line length. Loading history...
84			config: Path to a TOML config file
85			"""
86			for w in what.split(","):
			0 ignored issues – show Coding Style Naming introduced 2021-01-25 23:06 UTC by Report Bug Copy Issue Report Variable name "w" doesn't conform to snake_case naming style ('([^\\W\\dA-Z][^\\WA-Z]2,\|_[^\\WA-Z]*\|__[^\\WA-Z\\d_][^\\WA-Z]+__)$' pattern) This check looks for invalid names for a range of different identifiers. You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements. If your project includes a Pylint configuration file, the settings contained in that file take precedence. To find out more about Pylint, please refer to their site. Loading history...
87			w = What[w.lower()]
			0 ignored issues – show Coding Style Naming introduced 2021-01-25 23:06 UTC by Report Bug Copy Issue Report Variable name "w" doesn't conform to snake_case naming style ('([^\\W\\dA-Z][^\\WA-Z]2,\|_[^\\WA-Z]*\|__[^\\WA-Z\\d_][^\\WA-Z]+__)$' pattern) This check looks for invalid names for a range of different identifiers. You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements. If your project includes a Pylint configuration file, the settings contained in that file take precedence. To find out more about Pylint, please refer to their site. Loading history...
88			df, triples = Commands.search_for(w, path, config=config)
			0 ignored issues – show Coding Style Naming introduced 2021-01-25 23:06 UTC by Report Bug Copy Issue Report Variable name "df" doesn't conform to snake_case naming style ('([^\\W\\dA-Z][^\\WA-Z]2,\|_[^\\WA-Z]*\|__[^\\WA-Z\\d_][^\\WA-Z]+__)$' pattern) This check looks for invalid names for a range of different identifiers. You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements. If your project includes a Pylint configuration file, the settings contained in that file take precedence. To find out more about Pylint, please refer to their site. Loading history...
89			df_out = Path(str(path.with_suffix("")) + "-" + w.name.lower() + ".csv")
90			df.to_csv(df_out)
91			triples_out = df_out.with_suffix(".triples.txt")
92			triples_out.write_text("\n".join([t.statement for t in triples]), encoding="utf8")
93			triples_out.write_text(
94			"\n".join([Triple.tab_header(), *[t.tabs for t in triples]]), encoding="utf8"
95			)
96
97			@staticmethod
98			@cli.command(hidden=True)
99			def process_tax(
100			taxon: int,
			0 ignored issues – show Coding Style introduced 2021-01-25 23:06 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
101			cache_path: Optional[Path] = None,
			0 ignored issues – show Coding Style introduced 2021-01-25 23:06 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
102			) -> None:
103			"""
104			Preps a new taxonomy file for use in mandos.
105			Just returns if a corresponding file already exists in the resources dir or mandos cache (``~/.mandos``).
			0 ignored issues – show Coding Style introduced 2021-01-25 23:06 UTC by Report Bug Copy Issue Report This line is too long as per the coding-style (113/100). This check looks for lines that are too long. You can specify the maximum line length. Loading history...
106			Otherwise, downloads a tab-separated file from UniProt.
107			(To find manually, follow the ``All lower taxonomy nodes`` link and click ``Download``.)
108			Then applies fixes and reduces the file size, creating a new file alongside.
109			Puts both the raw data and fixed data in the cache under ``~/.mandos/taxonomy/``.
110
111			Args:
112			taxon: The ID of the UniProt taxon
113			cache_path:
114			"""
115			if cache_path is None:
116			cache_path = DEFAULT_TAXONOMY_CACHE
117			TaxonomyFactories.from_uniprot(cache_path).load(taxon)
118
119			@staticmethod
120			def search_for(
121			what: What,
			0 ignored issues – show Coding Style introduced 2021-01-25 23:06 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
122			compounds: Union[Sequence[str], PurePath],
			0 ignored issues – show Coding Style introduced 2021-01-25 23:06 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
123			config: Union[None, Mapping[str, Any], Path],
			0 ignored issues – show Coding Style introduced 2021-01-25 23:06 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
124			) -> Tup[pd.DataFrame, Sequence[Triple]]:
125			"""
126
127			Args:
128			what:
129			compounds:
130			config:
131
132			Returns:
133
134			"""
135			if isinstance(compounds, (PurePath, str)):
136			compounds = Path(compounds).read_text(encoding="utf8").splitlines()
137			compounds = [c.strip() for c in compounds if len(c.strip()) > 0]
138			if config is None:
139			settings = Settings.load(NestedDotDict({}))
140			elif isinstance(config, PurePath):
141			settings = Settings.load(NestedDotDict.read_toml(config))
142			elif isinstance(config, NestedDotDict):
143			settings = config
144			else:
145			settings = Settings.load(NestedDotDict(config))
146			settings.set()
147			compounds = list(compounds)
148			api = ChemblApi.wrap(Chembl)
149			taxonomy = TaxonomyFactories.from_uniprot(settings.taxonomy_cache_path).load(settings.taxon)
150			hits = what.clazz(api, settings, taxonomy).find_all(compounds)
151			# collapse over and sort the triples
152			triples = sorted(list({hit.to_triple() for hit in hits}))
153			df = pd.DataFrame(
			0 ignored issues – show Coding Style Naming introduced 2021-01-25 23:06 UTC by Report Bug Copy Issue Report Variable name "df" doesn't conform to snake_case naming style ('([^\\W\\dA-Z][^\\WA-Z]2,\|_[^\\WA-Z]*\|__[^\\WA-Z\\d_][^\\WA-Z]+__)$' pattern) This check looks for invalid names for a range of different identifiers. You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements. If your project includes a Pylint configuration file, the settings contained in that file take precedence. To find out more about Pylint, please refer to their site. Loading history...
154			[pd.Series({f: getattr(h, f) for f in what.clazz.hit_fields()}) for h in hits]
155			)
156			return df, triples
157
158
159			if __name__ == "__main__":
160			cli()
161
162
163			__all__ = ["Commands", "What"]
164

dmyersturnbull / mandos

mandos.cli A last analyzed 2021-01-25 23:07 UTC

Complexity

Size/Duplication

Importance

6 Methods

Duplication Side-by-Side

Filter issues like

mandos.cli A
last analyzed 2021-01-25 23:07 UTC