mandos.cli.Commands.go_search() - Code Metrics - Inspection of "Bump flake8-bugbear from 20.11.1 to 21.3.2" - dmyersturnbull/mandos - Measure and Improve Code Quality continuously with Scrutinizer

Passed

Push — dependabot/pip/flake8-bugbear-... ( 8d4b2b...22089b )

unknown

created 2021-03-15 02:31 UTC

mandos.cli.Commands.go_search() A

↳ Parent: mandos.cli

Complexity

Conditions

Size

Total Lines	29
Code Lines	24

Duplication

Lines	0
Ratio	0 %

Importance

Changes

Metric	Value
cc	1
eloc	24
nop	9
dl	0
loc	29
rs	9.304
c	0
b	0
f	0

How to fix Many Parameters

"""
Command-line interface for mandos.
"""

from __future__ import annotations

import inspect
import logging
from pathlib import Path, PurePath
from typing import Sequence, Set, Optional, Mapping, Any

from typing import Tuple as Tup
from typing import Type, Union


import pandas as pd

import typer

from chembl_webresource_client.new_client import new_client as Chembl

from pocketutils.core.dot_dict import NestedDotDict


from mandos.model.chembl_api import ChemblApi
from mandos.model.chembl_support.chembl_targets import TargetType
from mandos.model.hits import Triple
from mandos.model.pubchem_api import (
    CachingPubchemApi,
    QueryingPubchemApi,
    PubchemCompoundLookupError,
)
from mandos.model.searches import Search
from mandos.model.taxonomy import Taxonomy
from mandos.model.taxonomy_caches import TaxonomyFactories
from mandos.model.settings import MANDOS_SETTINGS
from mandos.search.chembl.binding_search import BindingSearch
from mandos.search.chembl.atc_search import AtcSearch
from mandos.search.chembl.go_search import GoType, GoSearch
from mandos.search.chembl.indication_search import IndicationSearch
from mandos.search.chembl.mechanism_search import MechanismSearch

logger = logging.getLogger(__package__)
cli = typer.Typer()


class Utils:

    @staticmethod
    def split(st: str) -> Set[str]:

        return {s.strip() for s in st.split(",")}

    @staticmethod
    def get_taxon(taxon: int) -> Taxonomy:

        return TaxonomyFactories.from_uniprot(MANDOS_SETTINGS.taxonomy_cache_path).load(taxon)

    @staticmethod
    def get_target_types(st: str) -> Set[str]:

        st = st.strip()
        if st == "all":
            return {str(s) for s in TargetType.all_types()}
        if st == "known":
            return {str(s) for s in TargetType.all_types() if not s.is_unknown}
        if st == "protein":
            return {str(s) for s in TargetType.protein_types()}
        return Utils.split(st)

    @staticmethod
    def get_params() -> Mapping[str, Any]:

        frame = inspect.getouterframes(inspect.currentframe())[1][3]
        # assume there aren't any varargs or kwargs -- that would be strange in a CLI
        xx = inspect.getargvalues(frame)

        return {a: xx.locals[a] for a in xx.args}


class Searcher:

    def __init__(self, search: Search):
        self.what = search
        self._params = Utils.get_params()

    def search(
        self,

        path: Path,

    ) -> Tup[pd.DataFrame, Sequence[Triple]]:
        """
        Performs the search, and writes data.

        Args:
            path: Path to the input file of one of the formats:
                - .txt containing one key (InChI / CHEMBL ID) per line
                - .csv/.tsv/.tab containing one key per row
                - .csv/.tsv/.tab of a symmetric affinity matrix, with a row header and column header with the keys


        Returns:

        """
        df, triples = self.search_for(path)

        df_out = Path(str(path.with_suffix("")) + "-" + self.what.search_name.lower() + ".csv")
        df.to_csv(df_out)
        triples_out = df_out.with_suffix(".triples.txt")
        triples_out.write_text("\n".join([t.statement for t in triples]), encoding="utf8")
        triples_out.write_text(
            "\n".join([Triple.tab_header(), *[t.tabs for t in triples]]), encoding="utf8"
        )
        NestedDotDict(dict(args=self._params)).write_json(df_out.with_suffix(".metadata.json"))
        return df, triples

    def search_for(
        self,

        compounds: Union[Sequence[str], PurePath],

    ) -> Tup[pd.DataFrame, Sequence[Triple]]:
        """
        Performs the search. Does not write any files.

        Args:
            compounds:

        Returns:

        """
        if isinstance(compounds, (PurePath, str)):
            compounds = Path(compounds).read_text(encoding="utf8").splitlines()
        compounds = [c.strip() for c in compounds if len(c.strip()) > 0]
        cache = CachingPubchemApi(MANDOS_SETTINGS.pubchem_cache_path, QueryingPubchemApi())
        compounds = list(compounds)
        # TODO

        for compound in compounds:
            try:
                cache.fetch_data(compound)
            except PubchemCompoundLookupError:
                logger.error(f"Did not find compound {compound}")

                logger.debug(f"Did not find compound {compound}", exc_info=True)

        hits = self.what.find_all(compounds)
        # collapse over and sort the triples
        triples = sorted(list({hit.to_triple() for hit in hits}))
        df = pd.DataFrame(

            [pd.Series({f: getattr(h, f) for f in self.what.hit_fields()}) for h in hits]
        )
        return df, triples


class Commands:
    """
    Entry points for mandos.
    """

    @staticmethod
    @cli.command("chembl:binding")
    def binding(

        path: Path,

        taxon: int = 7742,

        traversal_strategy: str = "strategy0",

        target_types: str = "single_protein,protein_family,protein_complex,protein_complex_group,selectivity_group",

        min_confidence: int = 3,

        relations: str = "<,<=,=",

        min_pchembl: float = 6.0,

        banned_flags: str = "potential missing data,potential transcription error,outside typical range",

    ) -> Tup[pd.DataFrame, Sequence[Triple]]:
        """
        Process data.
        """
        api = ChemblApi.wrap(Chembl)
        search = BindingSearch(
            chembl_api=api,
            tax=Utils.get_taxon(taxon),
            traversal_strategy=traversal_strategy,
            allowed_target_types=Utils.get_target_types(target_types),
            min_confidence_score=min_confidence,
            allowed_relations=Utils.split(relations),
            min_pchembl=min_pchembl,
            banned_flags=Utils.split(banned_flags),
        )
        return Searcher(search).search(path)

    @staticmethod
    @cli.command("chembl:mechanism")
    def mechanism(
        path: Path,

        taxon: int = 7742,

        traversal_strategy: str = "strategy0",

        target_types: str = "single_protein,protein_family,protein_complex,protein_complex_group,selectivity_group",

        min_confidence: int = 3,

    ) -> Tup[pd.DataFrame, Sequence[Triple]]:
        """
        Process data.
        """
        api = ChemblApi.wrap(Chembl)
        search = MechanismSearch(
            chembl_api=api,
            tax=Utils.get_taxon(taxon),
            traversal_strategy=traversal_strategy,
            allowed_target_types=Utils.get_target_types(target_types),
            min_confidence_score=min_confidence,
        )
        return Searcher(search).search(path)

    @staticmethod
    @cli.command("chembl:trials")
    def trials(
        path: Path,

        min_phase: int = 3,

    ) -> Tup[pd.DataFrame, Sequence[Triple]]:
        """
        Process data.
        """
        api = ChemblApi.wrap(Chembl)
        search = IndicationSearch(
            chembl_api=api,
            min_phase=min_phase,
        )
        return Searcher(search).search(path)

    @staticmethod
    @cli.command("chembl:atc")
    def atc(
        path: Path,

    ) -> Tup[pd.DataFrame, Sequence[Triple]]:
        """
        Process data.
        """
        api = ChemblApi.wrap(Chembl)
        search = AtcSearch(
            chembl_api=api,
        )
        return Searcher(search).search(path)

    @staticmethod
    @cli.command("chembl:go")
    def go_search(

        path: Path,

        kind: GoType,

        taxon: int = 7742,

        traversal_strategy: str = "strategy0",

        target_types: str = "single_protein,protein_family,protein_complex,protein_complex_group,selectivity_group",

        min_confidence: int = 3,

        relations: str = "<,<=,=",

        min_pchembl: float = 6.0,

        banned_flags: str = "potential missing data,potential transcription error,outside typical range",

    ) -> Tup[pd.DataFrame, Sequence[Triple]]:
        """
        Process data.
        """
        api = ChemblApi.wrap(Chembl)
        binding_search = BindingSearch(
            chembl_api=api,
            tax=Utils.get_taxon(taxon),
            traversal_strategy=traversal_strategy,
            allowed_target_types=Utils.get_target_types(target_types),
            min_confidence_score=min_confidence,
            allowed_relations=Utils.split(relations),
            min_pchembl=min_pchembl,
            banned_flags=Utils.split(banned_flags),
        )
        search = GoSearch(api, kind, binding_search)
        return Searcher(search).search(path)

    @staticmethod
    @cli.command(hidden=True)
    def process_tax(
        taxon: int,

    ) -> None:
        """
        Preps a new taxonomy file for use in mandos.
        Just returns if a corresponding file already exists in the resources dir or mandos cache (``~/.mandos``).

        Otherwise, downloads a tab-separated file from UniProt.
        (To find manually, follow the ``All lower taxonomy nodes`` link and click ``Download``.)
        Then applies fixes and reduces the file size, creating a new file alongside.
        Puts both the raw data and fixed data in the cache under ``~/.mandos/taxonomy/``.

        Args:
            taxon: The **ID** of the UniProt taxon
        """
        TaxonomyFactories.from_uniprot(MANDOS_SETTINGS.taxonomy_cache_path).load(taxon)


if __name__ == "__main__":
    cli()


__all__ = ["Commands", "Searcher"]


1			"""
2			Command-line interface for mandos.
3			"""
4
5			from __future__ import annotations
6
7			import inspect
8			import logging
9			from pathlib import Path, PurePath
10			from typing import Sequence, Set, Optional, Mapping, Any
			0 ignored issues – show Unused Code introduced 2021-03-10 02:41 UTC by Report Bug Copy Issue Report Unused Optional imported from typing Loading history...
11			from typing import Tuple as Tup
12			from typing import Type, Union
			0 ignored issues – show Unused Code introduced 2021-03-10 02:41 UTC by Report Bug Copy Issue Report Unused Type imported from typing Loading history...
13
14			import pandas as pd
			0 ignored issues – show introduced 2021-01-25 23:06 UTC by Report Bug Copy Issue Report Unable to import 'pandas' Loading history...
15			import typer
			0 ignored issues – show introduced 2021-01-25 23:06 UTC by Report Bug Copy Issue Report Unable to import 'typer' Loading history...
16			from chembl_webresource_client.new_client import new_client as Chembl
			0 ignored issues – show introduced 2021-01-25 23:06 UTC by Report Bug Copy Issue Report Unable to import 'chembl_webresource_client.new_client' Loading history...
17			from pocketutils.core.dot_dict import NestedDotDict
			0 ignored issues – show introduced 2021-01-25 23:06 UTC by Report Bug Copy Issue Report Unable to import 'pocketutils.core.dot_dict' Loading history...
18
19			from mandos.model.chembl_api import ChemblApi
20			from mandos.model.chembl_support.chembl_targets import TargetType
21			from mandos.model.hits import Triple
22			from mandos.model.pubchem_api import (
23			CachingPubchemApi,
24			QueryingPubchemApi,
25			PubchemCompoundLookupError,
26			)
27			from mandos.model.searches import Search
28			from mandos.model.taxonomy import Taxonomy
29			from mandos.model.taxonomy_caches import TaxonomyFactories
30			from mandos.model.settings import MANDOS_SETTINGS
31			from mandos.search.chembl.binding_search import BindingSearch
32			from mandos.search.chembl.atc_search import AtcSearch
33			from mandos.search.chembl.go_search import GoType, GoSearch
34			from mandos.search.chembl.indication_search import IndicationSearch

dmyersturnbull / mandos

Push — dependabot/pip/flake8-bugbear-... ( 8d4b2b...22089b )

mandos.cli.Commands.go_search() A

Complexity

Size

Duplication

Importance

How to fix Many Parameters

Many Parameters

Duplication Side-by-Side

Filter issues like