mandos.cli   A
last analyzed

Complexity

Total Complexity 12

Size/Duplication

Total Lines 164
Duplicated Lines 0 %

Importance

Changes 0
Metric Value
eloc 95
dl 0
loc 164
rs 10
c 0
b 0
f 0
wmc 12

6 Methods

Rating   Name   Duplication   Size   Complexity  
A Commands.search() 0 27 2
A Commands.process_tax() 0 21 2
A What.__new__() 0 4 1
A What.clazz() 0 3 1
A What.__init__() 0 2 1
B Commands.search_for() 0 38 5
1
"""
2
Command-line interface for mandos.
3
"""
4
5
from __future__ import annotations
6
7
import enum
8
import logging
9
from pathlib import Path, PurePath
10
from typing import Any, Mapping, Optional, Sequence
11
from typing import Tuple as Tup
12
from typing import Type, Union
13
14
import pandas as pd
0 ignored issues
show
introduced by
Unable to import 'pandas'
Loading history...
15
import typer
0 ignored issues
show
introduced by
Unable to import 'typer'
Loading history...
16
from chembl_webresource_client.new_client import new_client as Chembl
0 ignored issues
show
introduced by
Unable to import 'chembl_webresource_client.new_client'
Loading history...
17
from pocketutils.core.dot_dict import NestedDotDict
0 ignored issues
show
introduced by
Unable to import 'pocketutils.core.dot_dict'
Loading history...
18
19
from mandos.chembl_api import ChemblApi
20
from mandos.model import Search, Triple
21
from mandos.model.caches import TaxonomyFactories
22
from mandos.model.settings import DEFAULT_TAXONOMY_CACHE, Settings
23
from mandos.search.chembl.activity_search import ActivitySearch
24
from mandos.search.chembl.atc_search import AtcSearch
25
from mandos.search.chembl.go_search import GoSearchFactory, GoType
26
from mandos.search.chembl.indication_search import IndicationSearch
27
from mandos.search.chembl.mechanism_search import MechanismSearch
28
29
logger = logging.getLogger(__package__)
30
cli = typer.Typer()
31
32
33
class What(enum.Enum):
34
    """
35
    List of search items.
36
    """
37
38
    activity = enum.auto(), ActivitySearch
39
    mechanism = enum.auto(), MechanismSearch
40
    atc = enum.auto(), AtcSearch
41
    trial = enum.auto(), IndicationSearch
42
    go_proc_moa = enum.auto(), GoSearchFactory.create(GoType.process, MechanismSearch)
43
    go_fn_moa = enum.auto(), GoSearchFactory.create(GoType.function, MechanismSearch)
44
    go_comp_moa = enum.auto(), GoSearchFactory.create(GoType.component, MechanismSearch)
45
    go_proc_act = enum.auto(), GoSearchFactory.create(GoType.process, ActivitySearch)
46
    go_fn_act = enum.auto(), GoSearchFactory.create(GoType.function, ActivitySearch)
47
    go_comp_act = enum.auto(), GoSearchFactory.create(GoType.component, ActivitySearch)
48
49
    def __new__(cls, *args, **kwargs):
0 ignored issues
show
Unused Code introduced by
The argument kwargs seems to be unused.
Loading history...
50
        obj = object.__new__(cls)
51
        obj._value_ = args[0]
52
        return obj
53
54
    # ignore the first param since it's already set by __new__
55
    def __init__(self, _: str, clazz: Type[Search]):
56
        self._clazz_ = clazz
57
58
    @property
59
    def clazz(self) -> Type[Search]:
0 ignored issues
show
introduced by
Missing function or method docstring
Loading history...
60
        return self._clazz_
61
62
63
class Commands:
64
    """
65
    Entry points for mandos.
66
    """
67
68
    @staticmethod
69
    @cli.command()
70
    def search(
71
        what: str,
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
72
        path: Path,
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
73
        config: Optional[Path] = None,
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
74
    ) -> None:
75
        """
76
        Process data.
77
78
        Args:
79
            what: Comma-separated list of ``activity``, ``mechanism``, ``atc``, and ``indication``.
80
            path: Path to the input file of one of the formats:
81
                - .txt containing one key (InChI / CHEMBL ID) per line
82
                - .csv/.tsv/.tab containing one key per row
83
                - .csv/.tsv/.tab of a symmetric affinity matrix, with a row header and column header with the keys
0 ignored issues
show
Coding Style introduced by
This line is too long as per the coding-style (114/100).

This check looks for lines that are too long. You can specify the maximum line length.

Loading history...
84
            config: Path to a TOML config file
85
        """
86
        for w in what.split(","):
0 ignored issues
show
Coding Style Naming introduced by
Variable name "w" doesn't conform to snake_case naming style ('([^\\W\\dA-Z][^\\WA-Z]2,|_[^\\WA-Z]*|__[^\\WA-Z\\d_][^\\WA-Z]+__)$' pattern)

This check looks for invalid names for a range of different identifiers.

You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements.

If your project includes a Pylint configuration file, the settings contained in that file take precedence.

To find out more about Pylint, please refer to their site.

Loading history...
87
            w = What[w.lower()]
0 ignored issues
show
Coding Style Naming introduced by
Variable name "w" doesn't conform to snake_case naming style ('([^\\W\\dA-Z][^\\WA-Z]2,|_[^\\WA-Z]*|__[^\\WA-Z\\d_][^\\WA-Z]+__)$' pattern)

This check looks for invalid names for a range of different identifiers.

You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements.

If your project includes a Pylint configuration file, the settings contained in that file take precedence.

To find out more about Pylint, please refer to their site.

Loading history...
88
            df, triples = Commands.search_for(w, path, config=config)
0 ignored issues
show
Coding Style Naming introduced by
Variable name "df" doesn't conform to snake_case naming style ('([^\\W\\dA-Z][^\\WA-Z]2,|_[^\\WA-Z]*|__[^\\WA-Z\\d_][^\\WA-Z]+__)$' pattern)

This check looks for invalid names for a range of different identifiers.

You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements.

If your project includes a Pylint configuration file, the settings contained in that file take precedence.

To find out more about Pylint, please refer to their site.

Loading history...
89
            df_out = Path(str(path.with_suffix("")) + "-" + w.name.lower() + ".csv")
90
            df.to_csv(df_out)
91
            triples_out = df_out.with_suffix(".triples.txt")
92
            triples_out.write_text("\n".join([t.statement for t in triples]), encoding="utf8")
93
            triples_out.write_text(
94
                "\n".join([Triple.tab_header(), *[t.tabs for t in triples]]), encoding="utf8"
95
            )
96
97
    @staticmethod
98
    @cli.command(hidden=True)
99
    def process_tax(
100
        taxon: int,
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
101
        cache_path: Optional[Path] = None,
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
102
    ) -> None:
103
        """
104
        Preps a new taxonomy file for use in mandos.
105
        Just returns if a corresponding file already exists in the resources dir or mandos cache (``~/.mandos``).
0 ignored issues
show
Coding Style introduced by
This line is too long as per the coding-style (113/100).

This check looks for lines that are too long. You can specify the maximum line length.

Loading history...
106
        Otherwise, downloads a tab-separated file from UniProt.
107
        (To find manually, follow the ``All lower taxonomy nodes`` link and click ``Download``.)
108
        Then applies fixes and reduces the file size, creating a new file alongside.
109
        Puts both the raw data and fixed data in the cache under ``~/.mandos/taxonomy/``.
110
111
        Args:
112
            taxon: The **ID** of the UniProt taxon
113
            cache_path:
114
        """
115
        if cache_path is None:
116
            cache_path = DEFAULT_TAXONOMY_CACHE
117
        TaxonomyFactories.from_uniprot(cache_path).load(taxon)
118
119
    @staticmethod
120
    def search_for(
121
        what: What,
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
122
        compounds: Union[Sequence[str], PurePath],
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
123
        config: Union[None, Mapping[str, Any], Path],
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
124
    ) -> Tup[pd.DataFrame, Sequence[Triple]]:
125
        """
126
127
        Args:
128
            what:
129
            compounds:
130
            config:
131
132
        Returns:
133
134
        """
135
        if isinstance(compounds, (PurePath, str)):
136
            compounds = Path(compounds).read_text(encoding="utf8").splitlines()
137
        compounds = [c.strip() for c in compounds if len(c.strip()) > 0]
138
        if config is None:
139
            settings = Settings.load(NestedDotDict({}))
140
        elif isinstance(config, PurePath):
141
            settings = Settings.load(NestedDotDict.read_toml(config))
142
        elif isinstance(config, NestedDotDict):
143
            settings = config
144
        else:
145
            settings = Settings.load(NestedDotDict(config))
146
        settings.set()
147
        compounds = list(compounds)
148
        api = ChemblApi.wrap(Chembl)
149
        taxonomy = TaxonomyFactories.from_uniprot(settings.taxonomy_cache_path).load(settings.taxon)
150
        hits = what.clazz(api, settings, taxonomy).find_all(compounds)
151
        # collapse over and sort the triples
152
        triples = sorted(list({hit.to_triple() for hit in hits}))
153
        df = pd.DataFrame(
0 ignored issues
show
Coding Style Naming introduced by
Variable name "df" doesn't conform to snake_case naming style ('([^\\W\\dA-Z][^\\WA-Z]2,|_[^\\WA-Z]*|__[^\\WA-Z\\d_][^\\WA-Z]+__)$' pattern)

This check looks for invalid names for a range of different identifiers.

You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements.

If your project includes a Pylint configuration file, the settings contained in that file take precedence.

To find out more about Pylint, please refer to their site.

Loading history...
154
            [pd.Series({f: getattr(h, f) for f in what.clazz.hit_fields()}) for h in hits]
155
        )
156
        return df, triples
157
158
159
if __name__ == "__main__":
160
    cli()
161
162
163
__all__ = ["Commands", "What"]
164