Passed
Push — dependabot/pip/flake8-bugbear-... ( 8d4b2b...22089b )
by
unknown
03:19 queued 01:48
created

mandos.cli.Commands.go_search()   A

Complexity

Conditions 1

Size

Total Lines 29
Code Lines 24

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
cc 1
eloc 24
nop 9
dl 0
loc 29
rs 9.304
c 0
b 0
f 0

How to fix   Many Parameters   

Many Parameters

Methods with many parameters are not only hard to understand, but their parameters also often become inconsistent when you need more, or different data.

There are several approaches to avoid long parameter lists:

1
"""
2
Command-line interface for mandos.
3
"""
4
5
from __future__ import annotations
6
7
import inspect
8
import logging
9
from pathlib import Path, PurePath
10
from typing import Sequence, Set, Optional, Mapping, Any
0 ignored issues
show
Unused Code introduced by
Unused Optional imported from typing
Loading history...
11
from typing import Tuple as Tup
12
from typing import Type, Union
0 ignored issues
show
Unused Code introduced by
Unused Type imported from typing
Loading history...
13
14
import pandas as pd
0 ignored issues
show
introduced by
Unable to import 'pandas'
Loading history...
15
import typer
0 ignored issues
show
introduced by
Unable to import 'typer'
Loading history...
16
from chembl_webresource_client.new_client import new_client as Chembl
0 ignored issues
show
introduced by
Unable to import 'chembl_webresource_client.new_client'
Loading history...
17
from pocketutils.core.dot_dict import NestedDotDict
0 ignored issues
show
introduced by
Unable to import 'pocketutils.core.dot_dict'
Loading history...
18
19
from mandos.model.chembl_api import ChemblApi
20
from mandos.model.chembl_support.chembl_targets import TargetType
21
from mandos.model.hits import Triple
22
from mandos.model.pubchem_api import (
23
    CachingPubchemApi,
24
    QueryingPubchemApi,
25
    PubchemCompoundLookupError,
26
)
27
from mandos.model.searches import Search
28
from mandos.model.taxonomy import Taxonomy
29
from mandos.model.taxonomy_caches import TaxonomyFactories
30
from mandos.model.settings import MANDOS_SETTINGS
31
from mandos.search.chembl.binding_search import BindingSearch
32
from mandos.search.chembl.atc_search import AtcSearch
33
from mandos.search.chembl.go_search import GoType, GoSearch
34
from mandos.search.chembl.indication_search import IndicationSearch
35
from mandos.search.chembl.mechanism_search import MechanismSearch
36
37
logger = logging.getLogger(__package__)
38
cli = typer.Typer()
39
40
41
class Utils:
0 ignored issues
show
introduced by
Missing class docstring
Loading history...
42
    @staticmethod
43
    def split(st: str) -> Set[str]:
0 ignored issues
show
introduced by
Missing function or method docstring
Loading history...
Coding Style Naming introduced by
Argument name "st" doesn't conform to snake_case naming style ('([^\\W\\dA-Z][^\\WA-Z]2,|_[^\\WA-Z]*|__[^\\WA-Z\\d_][^\\WA-Z]+__)$' pattern)

This check looks for invalid names for a range of different identifiers.

You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements.

If your project includes a Pylint configuration file, the settings contained in that file take precedence.

To find out more about Pylint, please refer to their site.

Loading history...
44
        return {s.strip() for s in st.split(",")}
45
46
    @staticmethod
47
    def get_taxon(taxon: int) -> Taxonomy:
0 ignored issues
show
introduced by
Missing function or method docstring
Loading history...
48
        return TaxonomyFactories.from_uniprot(MANDOS_SETTINGS.taxonomy_cache_path).load(taxon)
49
50
    @staticmethod
51
    def get_target_types(st: str) -> Set[str]:
0 ignored issues
show
introduced by
Missing function or method docstring
Loading history...
Coding Style Naming introduced by
Argument name "st" doesn't conform to snake_case naming style ('([^\\W\\dA-Z][^\\WA-Z]2,|_[^\\WA-Z]*|__[^\\WA-Z\\d_][^\\WA-Z]+__)$' pattern)

This check looks for invalid names for a range of different identifiers.

You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements.

If your project includes a Pylint configuration file, the settings contained in that file take precedence.

To find out more about Pylint, please refer to their site.

Loading history...
52
        st = st.strip()
53
        if st == "all":
54
            return {str(s) for s in TargetType.all_types()}
55
        if st == "known":
56
            return {str(s) for s in TargetType.all_types() if not s.is_unknown}
57
        if st == "protein":
58
            return {str(s) for s in TargetType.protein_types()}
59
        return Utils.split(st)
60
61
    @staticmethod
62
    def get_params() -> Mapping[str, Any]:
0 ignored issues
show
introduced by
Missing function or method docstring
Loading history...
63
        frame = inspect.getouterframes(inspect.currentframe())[1][3]
64
        # assume there aren't any varargs or kwargs -- that would be strange in a CLI
65
        xx = inspect.getargvalues(frame)
0 ignored issues
show
Coding Style Naming introduced by
Variable name "xx" doesn't conform to snake_case naming style ('([^\\W\\dA-Z][^\\WA-Z]2,|_[^\\WA-Z]*|__[^\\WA-Z\\d_][^\\WA-Z]+__)$' pattern)

This check looks for invalid names for a range of different identifiers.

You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements.

If your project includes a Pylint configuration file, the settings contained in that file take precedence.

To find out more about Pylint, please refer to their site.

Loading history...
66
        return {a: xx.locals[a] for a in xx.args}
67
68
69
class Searcher:
0 ignored issues
show
introduced by
Missing class docstring
Loading history...
70
    def __init__(self, search: Search):
71
        self.what = search
72
        self._params = Utils.get_params()
73
74
    def search(
75
        self,
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
76
        path: Path,
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
77
    ) -> Tup[pd.DataFrame, Sequence[Triple]]:
78
        """
79
        Performs the search, and writes data.
80
81
        Args:
82
            path: Path to the input file of one of the formats:
83
                - .txt containing one key (InChI / CHEMBL ID) per line
84
                - .csv/.tsv/.tab containing one key per row
85
                - .csv/.tsv/.tab of a symmetric affinity matrix, with a row header and column header with the keys
0 ignored issues
show
Coding Style introduced by
This line is too long as per the coding-style (114/100).

This check looks for lines that are too long. You can specify the maximum line length.

Loading history...
86
87
        Returns:
88
89
        """
90
        df, triples = self.search_for(path)
0 ignored issues
show
Coding Style Naming introduced by
Variable name "df" doesn't conform to snake_case naming style ('([^\\W\\dA-Z][^\\WA-Z]2,|_[^\\WA-Z]*|__[^\\WA-Z\\d_][^\\WA-Z]+__)$' pattern)

This check looks for invalid names for a range of different identifiers.

You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements.

If your project includes a Pylint configuration file, the settings contained in that file take precedence.

To find out more about Pylint, please refer to their site.

Loading history...
91
        df_out = Path(str(path.with_suffix("")) + "-" + self.what.search_name.lower() + ".csv")
92
        df.to_csv(df_out)
93
        triples_out = df_out.with_suffix(".triples.txt")
94
        triples_out.write_text("\n".join([t.statement for t in triples]), encoding="utf8")
95
        triples_out.write_text(
96
            "\n".join([Triple.tab_header(), *[t.tabs for t in triples]]), encoding="utf8"
97
        )
98
        NestedDotDict(dict(args=self._params)).write_json(df_out.with_suffix(".metadata.json"))
99
        return df, triples
100
101
    def search_for(
102
        self,
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
103
        compounds: Union[Sequence[str], PurePath],
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
104
    ) -> Tup[pd.DataFrame, Sequence[Triple]]:
105
        """
106
        Performs the search. Does not write any files.
107
108
        Args:
109
            compounds:
110
111
        Returns:
112
113
        """
114
        if isinstance(compounds, (PurePath, str)):
115
            compounds = Path(compounds).read_text(encoding="utf8").splitlines()
116
        compounds = [c.strip() for c in compounds if len(c.strip()) > 0]
117
        cache = CachingPubchemApi(MANDOS_SETTINGS.pubchem_cache_path, QueryingPubchemApi())
118
        compounds = list(compounds)
119
        # TODO
0 ignored issues
show
Coding Style introduced by
TODO and FIXME comments should generally be avoided.
Loading history...
120
        for compound in compounds:
121
            try:
122
                cache.fetch_data(compound)
123
            except PubchemCompoundLookupError:
124
                logger.error(f"Did not find compound {compound}")
0 ignored issues
show
introduced by
Use lazy % formatting in logging functions
Loading history...
125
                logger.debug(f"Did not find compound {compound}", exc_info=True)
0 ignored issues
show
introduced by
Use lazy % formatting in logging functions
Loading history...
126
        hits = self.what.find_all(compounds)
127
        # collapse over and sort the triples
128
        triples = sorted(list({hit.to_triple() for hit in hits}))
129
        df = pd.DataFrame(
0 ignored issues
show
Coding Style Naming introduced by
Variable name "df" doesn't conform to snake_case naming style ('([^\\W\\dA-Z][^\\WA-Z]2,|_[^\\WA-Z]*|__[^\\WA-Z\\d_][^\\WA-Z]+__)$' pattern)

This check looks for invalid names for a range of different identifiers.

You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements.

If your project includes a Pylint configuration file, the settings contained in that file take precedence.

To find out more about Pylint, please refer to their site.

Loading history...
130
            [pd.Series({f: getattr(h, f) for f in self.what.hit_fields()}) for h in hits]
131
        )
132
        return df, triples
133
134
135
class Commands:
136
    """
137
    Entry points for mandos.
138
    """
139
140
    @staticmethod
141
    @cli.command("chembl:binding")
142
    def binding(
0 ignored issues
show
best-practice introduced by
Too many arguments (8/5)
Loading history...
143
        path: Path,
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
144
        taxon: int = 7742,
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
145
        traversal_strategy: str = "strategy0",
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
146
        target_types: str = "single_protein,protein_family,protein_complex,protein_complex_group,selectivity_group",
0 ignored issues
show
Coding Style introduced by
This line is too long as per the coding-style (116/100).

This check looks for lines that are too long. You can specify the maximum line length.

Loading history...
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
147
        min_confidence: int = 3,
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
148
        relations: str = "<,<=,=",
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
149
        min_pchembl: float = 6.0,
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
150
        banned_flags: str = "potential missing data,potential transcription error,outside typical range",
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
Coding Style introduced by
This line is too long as per the coding-style (105/100).

This check looks for lines that are too long. You can specify the maximum line length.

Loading history...
151
    ) -> Tup[pd.DataFrame, Sequence[Triple]]:
152
        """
153
        Process data.
154
        """
155
        api = ChemblApi.wrap(Chembl)
156
        search = BindingSearch(
157
            chembl_api=api,
158
            tax=Utils.get_taxon(taxon),
159
            traversal_strategy=traversal_strategy,
160
            allowed_target_types=Utils.get_target_types(target_types),
161
            min_confidence_score=min_confidence,
162
            allowed_relations=Utils.split(relations),
163
            min_pchembl=min_pchembl,
164
            banned_flags=Utils.split(banned_flags),
165
        )
166
        return Searcher(search).search(path)
167
168
    @staticmethod
169
    @cli.command("chembl:mechanism")
170
    def mechanism(
171
        path: Path,
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
172
        taxon: int = 7742,
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
173
        traversal_strategy: str = "strategy0",
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
174
        target_types: str = "single_protein,protein_family,protein_complex,protein_complex_group,selectivity_group",
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
Coding Style introduced by
This line is too long as per the coding-style (116/100).

This check looks for lines that are too long. You can specify the maximum line length.

Loading history...
175
        min_confidence: int = 3,
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
176
    ) -> Tup[pd.DataFrame, Sequence[Triple]]:
177
        """
178
        Process data.
179
        """
180
        api = ChemblApi.wrap(Chembl)
181
        search = MechanismSearch(
182
            chembl_api=api,
183
            tax=Utils.get_taxon(taxon),
184
            traversal_strategy=traversal_strategy,
185
            allowed_target_types=Utils.get_target_types(target_types),
186
            min_confidence_score=min_confidence,
187
        )
188
        return Searcher(search).search(path)
189
190
    @staticmethod
191
    @cli.command("chembl:trials")
192
    def trials(
193
        path: Path,
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
194
        min_phase: int = 3,
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
195
    ) -> Tup[pd.DataFrame, Sequence[Triple]]:
196
        """
197
        Process data.
198
        """
199
        api = ChemblApi.wrap(Chembl)
200
        search = IndicationSearch(
201
            chembl_api=api,
202
            min_phase=min_phase,
203
        )
204
        return Searcher(search).search(path)
205
206
    @staticmethod
207
    @cli.command("chembl:atc")
208
    def atc(
209
        path: Path,
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
210
    ) -> Tup[pd.DataFrame, Sequence[Triple]]:
211
        """
212
        Process data.
213
        """
214
        api = ChemblApi.wrap(Chembl)
215
        search = AtcSearch(
216
            chembl_api=api,
217
        )
218
        return Searcher(search).search(path)
219
220
    @staticmethod
221
    @cli.command("chembl:go")
222
    def go_search(
0 ignored issues
show
best-practice introduced by
Too many arguments (9/5)
Loading history...
223
        path: Path,
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
224
        kind: GoType,
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
225
        taxon: int = 7742,
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
226
        traversal_strategy: str = "strategy0",
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
227
        target_types: str = "single_protein,protein_family,protein_complex,protein_complex_group,selectivity_group",
0 ignored issues
show
Coding Style introduced by
This line is too long as per the coding-style (116/100).

This check looks for lines that are too long. You can specify the maximum line length.

Loading history...
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
228
        min_confidence: int = 3,
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
229
        relations: str = "<,<=,=",
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
230
        min_pchembl: float = 6.0,
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
231
        banned_flags: str = "potential missing data,potential transcription error,outside typical range",
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
Coding Style introduced by
This line is too long as per the coding-style (105/100).

This check looks for lines that are too long. You can specify the maximum line length.

Loading history...
232
    ) -> Tup[pd.DataFrame, Sequence[Triple]]:
233
        """
234
        Process data.
235
        """
236
        api = ChemblApi.wrap(Chembl)
237
        binding_search = BindingSearch(
238
            chembl_api=api,
239
            tax=Utils.get_taxon(taxon),
240
            traversal_strategy=traversal_strategy,
241
            allowed_target_types=Utils.get_target_types(target_types),
242
            min_confidence_score=min_confidence,
243
            allowed_relations=Utils.split(relations),
244
            min_pchembl=min_pchembl,
245
            banned_flags=Utils.split(banned_flags),
246
        )
247
        search = GoSearch(api, kind, binding_search)
248
        return Searcher(search).search(path)
249
250
    @staticmethod
251
    @cli.command(hidden=True)
252
    def process_tax(
253
        taxon: int,
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
254
    ) -> None:
255
        """
256
        Preps a new taxonomy file for use in mandos.
257
        Just returns if a corresponding file already exists in the resources dir or mandos cache (``~/.mandos``).
0 ignored issues
show
Coding Style introduced by
This line is too long as per the coding-style (113/100).

This check looks for lines that are too long. You can specify the maximum line length.

Loading history...
258
        Otherwise, downloads a tab-separated file from UniProt.
259
        (To find manually, follow the ``All lower taxonomy nodes`` link and click ``Download``.)
260
        Then applies fixes and reduces the file size, creating a new file alongside.
261
        Puts both the raw data and fixed data in the cache under ``~/.mandos/taxonomy/``.
262
263
        Args:
264
            taxon: The **ID** of the UniProt taxon
265
        """
266
        TaxonomyFactories.from_uniprot(MANDOS_SETTINGS.taxonomy_cache_path).load(taxon)
267
268
269
if __name__ == "__main__":
270
    cli()
271
272
273
__all__ = ["Commands", "Searcher"]
274