Passed
Push — dependabot/pip/flake8-bugbear-... ( 8d4b2b...22089b )
by
unknown
03:19 queued 01:48
created

mandos.cli.Utils.get_params()   A

Complexity

Conditions 1

Size

Total Lines 6
Code Lines 5

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
cc 1
eloc 5
nop 0
dl 0
loc 6
rs 10
c 0
b 0
f 0
1
"""
2
Command-line interface for mandos.
3
"""
4
5
from __future__ import annotations
6
7
import inspect
8
import logging
9
from pathlib import Path, PurePath
10
from typing import Sequence, Set, Optional, Mapping, Any
0 ignored issues
show
Unused Code introduced by
Unused Optional imported from typing
Loading history...
11
from typing import Tuple as Tup
12
from typing import Type, Union
0 ignored issues
show
Unused Code introduced by
Unused Type imported from typing
Loading history...
13
14
import pandas as pd
0 ignored issues
show
introduced by
Unable to import 'pandas'
Loading history...
15
import typer
0 ignored issues
show
introduced by
Unable to import 'typer'
Loading history...
16
from chembl_webresource_client.new_client import new_client as Chembl
0 ignored issues
show
introduced by
Unable to import 'chembl_webresource_client.new_client'
Loading history...
17
from pocketutils.core.dot_dict import NestedDotDict
0 ignored issues
show
introduced by
Unable to import 'pocketutils.core.dot_dict'
Loading history...
18
19
from mandos.model.chembl_api import ChemblApi
20
from mandos.model.chembl_support.chembl_targets import TargetType
21
from mandos.model.hits import Triple
22
from mandos.model.pubchem_api import (
23
    CachingPubchemApi,
24
    QueryingPubchemApi,
25
    PubchemCompoundLookupError,
26
)
27
from mandos.model.searches import Search
28
from mandos.model.taxonomy import Taxonomy
29
from mandos.model.taxonomy_caches import TaxonomyFactories
30
from mandos.model.settings import MANDOS_SETTINGS
31
from mandos.search.chembl.binding_search import BindingSearch
32
from mandos.search.chembl.atc_search import AtcSearch
33
from mandos.search.chembl.go_search import GoType, GoSearch
34
from mandos.search.chembl.indication_search import IndicationSearch
35
from mandos.search.chembl.mechanism_search import MechanismSearch
36
37
logger = logging.getLogger(__package__)
38
cli = typer.Typer()
39
40
41
class Utils:
0 ignored issues
show
introduced by
Missing class docstring
Loading history...
42
    @staticmethod
43
    def split(st: str) -> Set[str]:
0 ignored issues
show
introduced by
Missing function or method docstring
Loading history...
Coding Style Naming introduced by
Argument name "st" doesn't conform to snake_case naming style ('([^\\W\\dA-Z][^\\WA-Z]2,|_[^\\WA-Z]*|__[^\\WA-Z\\d_][^\\WA-Z]+__)$' pattern)

This check looks for invalid names for a range of different identifiers.

You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements.

If your project includes a Pylint configuration file, the settings contained in that file take precedence.

To find out more about Pylint, please refer to their site.

Loading history...
44
        return {s.strip() for s in st.split(",")}
45
46
    @staticmethod
47
    def get_taxon(taxon: int) -> Taxonomy:
0 ignored issues
show
introduced by
Missing function or method docstring
Loading history...
48
        return TaxonomyFactories.from_uniprot(MANDOS_SETTINGS.taxonomy_cache_path).load(taxon)
49
50
    @staticmethod
51
    def get_target_types(st: str) -> Set[str]:
0 ignored issues
show
introduced by
Missing function or method docstring
Loading history...
Coding Style Naming introduced by
Argument name "st" doesn't conform to snake_case naming style ('([^\\W\\dA-Z][^\\WA-Z]2,|_[^\\WA-Z]*|__[^\\WA-Z\\d_][^\\WA-Z]+__)$' pattern)

This check looks for invalid names for a range of different identifiers.

You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements.

If your project includes a Pylint configuration file, the settings contained in that file take precedence.

To find out more about Pylint, please refer to their site.

Loading history...
52
        st = st.strip()
53
        if st == "all":
54
            return {str(s) for s in TargetType.all_types()}
55
        if st == "known":
56
            return {str(s) for s in TargetType.all_types() if not s.is_unknown}
57
        if st == "protein":
58
            return {str(s) for s in TargetType.protein_types()}
59
        return Utils.split(st)
60
61
    @staticmethod
62
    def get_params() -> Mapping[str, Any]:
0 ignored issues
show
introduced by
Missing function or method docstring
Loading history...
63
        frame = inspect.getouterframes(inspect.currentframe())[1][3]
64
        # assume there aren't any varargs or kwargs -- that would be strange in a CLI
65
        xx = inspect.getargvalues(frame)
0 ignored issues
show
Coding Style Naming introduced by
Variable name "xx" doesn't conform to snake_case naming style ('([^\\W\\dA-Z][^\\WA-Z]2,|_[^\\WA-Z]*|__[^\\WA-Z\\d_][^\\WA-Z]+__)$' pattern)

This check looks for invalid names for a range of different identifiers.

You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements.

If your project includes a Pylint configuration file, the settings contained in that file take precedence.

To find out more about Pylint, please refer to their site.

Loading history...
66
        return {a: xx.locals[a] for a in xx.args}
67
68
69
class Searcher:
0 ignored issues
show
introduced by
Missing class docstring
Loading history...
70
    def __init__(self, search: Search):
71
        self.what = search
72
        self._params = Utils.get_params()
73
74
    def search(
75
        self,
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
76
        path: Path,
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
77
    ) -> Tup[pd.DataFrame, Sequence[Triple]]:
78
        """
79
        Performs the search, and writes data.
80
81
        Args:
82
            path: Path to the input file of one of the formats:
83
                - .txt containing one key (InChI / CHEMBL ID) per line
84
                - .csv/.tsv/.tab containing one key per row
85
                - .csv/.tsv/.tab of a symmetric affinity matrix, with a row header and column header with the keys
0 ignored issues
show
Coding Style introduced by
This line is too long as per the coding-style (114/100).

This check looks for lines that are too long. You can specify the maximum line length.

Loading history...
86
87
        Returns:
88
89
        """
90
        df, triples = self.search_for(path)
0 ignored issues
show
Coding Style Naming introduced by
Variable name "df" doesn't conform to snake_case naming style ('([^\\W\\dA-Z][^\\WA-Z]2,|_[^\\WA-Z]*|__[^\\WA-Z\\d_][^\\WA-Z]+__)$' pattern)

This check looks for invalid names for a range of different identifiers.

You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements.

If your project includes a Pylint configuration file, the settings contained in that file take precedence.

To find out more about Pylint, please refer to their site.

Loading history...
91
        df_out = Path(str(path.with_suffix("")) + "-" + self.what.search_name.lower() + ".csv")
92
        df.to_csv(df_out)
93
        triples_out = df_out.with_suffix(".triples.txt")
94
        triples_out.write_text("\n".join([t.statement for t in triples]), encoding="utf8")
95
        triples_out.write_text(
96
            "\n".join([Triple.tab_header(), *[t.tabs for t in triples]]), encoding="utf8"
97
        )
98
        NestedDotDict(dict(args=self._params)).write_json(df_out.with_suffix(".metadata.json"))
99
        return df, triples
100
101
    def search_for(
102
        self,
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
103
        compounds: Union[Sequence[str], PurePath],
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
104
    ) -> Tup[pd.DataFrame, Sequence[Triple]]:
105
        """
106
        Performs the search. Does not write any files.
107
108
        Args:
109
            compounds:
110
111
        Returns:
112
113
        """
114
        if isinstance(compounds, (PurePath, str)):
115
            compounds = Path(compounds).read_text(encoding="utf8").splitlines()
116
        compounds = [c.strip() for c in compounds if len(c.strip()) > 0]
117
        cache = CachingPubchemApi(MANDOS_SETTINGS.pubchem_cache_path, QueryingPubchemApi())
118
        compounds = list(compounds)
119
        # TODO
0 ignored issues
show
Coding Style introduced by
TODO and FIXME comments should generally be avoided.
Loading history...
120
        for compound in compounds:
121
            try:
122
                cache.fetch_data(compound)
123
            except PubchemCompoundLookupError:
124
                logger.error(f"Did not find compound {compound}")
0 ignored issues
show
introduced by
Use lazy % formatting in logging functions
Loading history...
125
                logger.debug(f"Did not find compound {compound}", exc_info=True)
0 ignored issues
show
introduced by
Use lazy % formatting in logging functions
Loading history...
126
        hits = self.what.find_all(compounds)
127
        # collapse over and sort the triples
128
        triples = sorted(list({hit.to_triple() for hit in hits}))
129
        df = pd.DataFrame(
0 ignored issues
show
Coding Style Naming introduced by
Variable name "df" doesn't conform to snake_case naming style ('([^\\W\\dA-Z][^\\WA-Z]2,|_[^\\WA-Z]*|__[^\\WA-Z\\d_][^\\WA-Z]+__)$' pattern)

This check looks for invalid names for a range of different identifiers.

You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements.

If your project includes a Pylint configuration file, the settings contained in that file take precedence.

To find out more about Pylint, please refer to their site.

Loading history...
130
            [pd.Series({f: getattr(h, f) for f in self.what.hit_fields()}) for h in hits]
131
        )
132
        return df, triples
133
134
135
class Commands:
136
    """
137
    Entry points for mandos.
138
    """
139
140
    @staticmethod
141
    @cli.command("chembl:binding")
142
    def binding(
0 ignored issues
show
best-practice introduced by
Too many arguments (8/5)
Loading history...
143
        path: Path,
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
144
        taxon: int = 7742,
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
145
        traversal_strategy: str = "strategy0",
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
146
        target_types: str = "single_protein,protein_family,protein_complex,protein_complex_group,selectivity_group",
0 ignored issues
show
Coding Style introduced by
This line is too long as per the coding-style (116/100).

This check looks for lines that are too long. You can specify the maximum line length.

Loading history...
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
147
        min_confidence: int = 3,
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
148
        relations: str = "<,<=,=",
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
149
        min_pchembl: float = 6.0,
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
150
        banned_flags: str = "potential missing data,potential transcription error,outside typical range",
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
Coding Style introduced by
This line is too long as per the coding-style (105/100).

This check looks for lines that are too long. You can specify the maximum line length.

Loading history...
151
    ) -> Tup[pd.DataFrame, Sequence[Triple]]:
152
        """
153
        Process data.
154
        """
155
        api = ChemblApi.wrap(Chembl)
156
        search = BindingSearch(
157
            chembl_api=api,
158
            tax=Utils.get_taxon(taxon),
159
            traversal_strategy=traversal_strategy,
160
            allowed_target_types=Utils.get_target_types(target_types),
161
            min_confidence_score=min_confidence,
162
            allowed_relations=Utils.split(relations),
163
            min_pchembl=min_pchembl,
164
            banned_flags=Utils.split(banned_flags),
165
        )
166
        return Searcher(search).search(path)
167
168
    @staticmethod
169
    @cli.command("chembl:mechanism")
170
    def mechanism(
171
        path: Path,
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
172
        taxon: int = 7742,
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
173
        traversal_strategy: str = "strategy0",
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
174
        target_types: str = "single_protein,protein_family,protein_complex,protein_complex_group,selectivity_group",
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
Coding Style introduced by
This line is too long as per the coding-style (116/100).

This check looks for lines that are too long. You can specify the maximum line length.

Loading history...
175
        min_confidence: int = 3,
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
176
    ) -> Tup[pd.DataFrame, Sequence[Triple]]:
177
        """
178
        Process data.
179
        """
180
        api = ChemblApi.wrap(Chembl)
181
        search = MechanismSearch(
182
            chembl_api=api,
183
            tax=Utils.get_taxon(taxon),
184
            traversal_strategy=traversal_strategy,
185
            allowed_target_types=Utils.get_target_types(target_types),
186
            min_confidence_score=min_confidence,
187
        )
188
        return Searcher(search).search(path)
189
190
    @staticmethod
191
    @cli.command("chembl:trials")
192
    def trials(
193
        path: Path,
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
194
        min_phase: int = 3,
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
195
    ) -> Tup[pd.DataFrame, Sequence[Triple]]:
196
        """
197
        Process data.
198
        """
199
        api = ChemblApi.wrap(Chembl)
200
        search = IndicationSearch(
201
            chembl_api=api,
202
            min_phase=min_phase,
203
        )
204
        return Searcher(search).search(path)
205
206
    @staticmethod
207
    @cli.command("chembl:atc")
208
    def atc(
209
        path: Path,
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
210
    ) -> Tup[pd.DataFrame, Sequence[Triple]]:
211
        """
212
        Process data.
213
        """
214
        api = ChemblApi.wrap(Chembl)
215
        search = AtcSearch(
216
            chembl_api=api,
217
        )
218
        return Searcher(search).search(path)
219
220
    @staticmethod
221
    @cli.command("chembl:go")
222
    def go_search(
0 ignored issues
show
best-practice introduced by
Too many arguments (9/5)
Loading history...
223
        path: Path,
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
224
        kind: GoType,
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
225
        taxon: int = 7742,
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
226
        traversal_strategy: str = "strategy0",
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
227
        target_types: str = "single_protein,protein_family,protein_complex,protein_complex_group,selectivity_group",
0 ignored issues
show
Coding Style introduced by
This line is too long as per the coding-style (116/100).

This check looks for lines that are too long. You can specify the maximum line length.

Loading history...
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
228
        min_confidence: int = 3,
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
229
        relations: str = "<,<=,=",
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
230
        min_pchembl: float = 6.0,
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
231
        banned_flags: str = "potential missing data,potential transcription error,outside typical range",
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
Coding Style introduced by
This line is too long as per the coding-style (105/100).

This check looks for lines that are too long. You can specify the maximum line length.

Loading history...
232
    ) -> Tup[pd.DataFrame, Sequence[Triple]]:
233
        """
234
        Process data.
235
        """
236
        api = ChemblApi.wrap(Chembl)
237
        binding_search = BindingSearch(
238
            chembl_api=api,
239
            tax=Utils.get_taxon(taxon),
240
            traversal_strategy=traversal_strategy,
241
            allowed_target_types=Utils.get_target_types(target_types),
242
            min_confidence_score=min_confidence,
243
            allowed_relations=Utils.split(relations),
244
            min_pchembl=min_pchembl,
245
            banned_flags=Utils.split(banned_flags),
246
        )
247
        search = GoSearch(api, kind, binding_search)
248
        return Searcher(search).search(path)
249
250
    @staticmethod
251
    @cli.command(hidden=True)
252
    def process_tax(
253
        taxon: int,
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
254
    ) -> None:
255
        """
256
        Preps a new taxonomy file for use in mandos.
257
        Just returns if a corresponding file already exists in the resources dir or mandos cache (``~/.mandos``).
0 ignored issues
show
Coding Style introduced by
This line is too long as per the coding-style (113/100).

This check looks for lines that are too long. You can specify the maximum line length.

Loading history...
258
        Otherwise, downloads a tab-separated file from UniProt.
259
        (To find manually, follow the ``All lower taxonomy nodes`` link and click ``Download``.)
260
        Then applies fixes and reduces the file size, creating a new file alongside.
261
        Puts both the raw data and fixed data in the cache under ``~/.mandos/taxonomy/``.
262
263
        Args:
264
            taxon: The **ID** of the UniProt taxon
265
        """
266
        TaxonomyFactories.from_uniprot(MANDOS_SETTINGS.taxonomy_cache_path).load(taxon)
267
268
269
if __name__ == "__main__":
270
    cli()
271
272
273
__all__ = ["Commands", "Searcher"]
274