Passed
Push — main ( 4e4203...cdf0f7 )
by Douglas
01:39
created

mandos.cli.Commands.find()   A

Complexity

Conditions 2

Size

Total Lines 18
Code Lines 13

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
cc 2
eloc 13
nop 4
dl 0
loc 18
rs 9.75
c 0
b 0
f 0
1
"""
2
Command-line interface for mandos.
3
"""
4
5
from __future__ import annotations
6
7
import logging
8
from pathlib import Path
9
10
import typer
0 ignored issues
show
introduced by
Unable to import 'typer'
Loading history...
11
12
from mandos import logger
13
from mandos.model.settings import MANDOS_SETTINGS
14
from mandos.model.taxonomy import TaxonomyDf
15
from mandos.model.taxonomy_caches import TaxonomyFactories
16
from mandos.entries.entries import Entries, _Typer
17
from mandos.entries.api_singletons import Apis
18
from mandos.entries.multi_searches import MultiSearch
19
from mandos.entries.searcher import SearcherUtils
20
21
# IMPORTANT!
22
Apis.set_default()
23
cli = typer.Typer()
24
# _old_wrap_text = copy(click.formatting.wrap_text)
25
# def _new_wrap_text(
26
#    text, width=100, initial_indent="", subsequent_indent="", preserve_paragraphs=False
27
# ):
28
#    return _old_wrap_text(text, 100, initial_indent, subsequent_indent, preserve_paragraphs)
29
# click.formatting.wrap_text = _new_wrap_text
30
31
32
class Commands:
33
    """
34
    Entry points for mandos.
35
    """
36
37
    @staticmethod
38
    def search(
39
        path: Path = _Typer.path,
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
40
        config: Path = typer.Argument(
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
41
            None,
42
            help=".toml config file. See docs.",
43
            exists=True,
44
            dir_okay=False,
45
            readable=True,
46
        ),
47
    ) -> None:
48
        """
49
        Run multiple searches.
50
        """
51
        MultiSearch(path, config).search()
52
53
    @staticmethod
54
    def find(
55
        path: Path = _Typer.path,
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
56
        pubchem: bool = typer.Option(True, help="Download data from PubChem"),
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
57
        chembl: bool = typer.Option(True, help="Download data from ChEMBL"),
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
58
        hmdb: bool = typer.Option(True, help="Download data from HMDB"),
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
59
    ) -> None:
60
        """
61
        Fetches and caches compound data.
62
        Useful to check what you can see before running a search.
63
        """
64
        out_path = path.with_suffix(".ids.csv")
65
        if out_path.exists():
66
            raise FileExistsError(out_path)
67
        inchikeys = SearcherUtils.read(path)
68
        df = SearcherUtils.dl(inchikeys, pubchem=pubchem, chembl=chembl, hmdb=hmdb)
0 ignored issues
show
Coding Style Naming introduced by
Variable name "df" doesn't conform to snake_case naming style ('([^\\W\\dA-Z][^\\WA-Z]2,|_[^\\WA-Z]*|__[^\\WA-Z\\d_][^\\WA-Z]+__)$' pattern)

This check looks for invalid names for a range of different identifiers.

You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements.

If your project includes a Pylint configuration file, the settings contained in that file take precedence.

To find out more about Pylint, please refer to their site.

Loading history...
69
        df.to_csv(out_path)
70
        typer.echo(f"Wrote to {out_path}")
71
72
    @staticmethod
73
    def build_taxonomy(
0 ignored issues
show
Coding Style Naming introduced by
Argument name "to" doesn't conform to snake_case naming style ('([^\\W\\dA-Z][^\\WA-Z]2,|_[^\\WA-Z]*|__[^\\WA-Z\\d_][^\\WA-Z]+__)$' pattern)

This check looks for invalid names for a range of different identifiers.

You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements.

If your project includes a Pylint configuration file, the settings contained in that file take precedence.

To find out more about Pylint, please refer to their site.

Loading history...
74
        taxa: str = typer.Argument(
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
75
            None,
76
            help="""
77
            UniProt taxon ID or scientific name, comma-separated.
78
            Scientific names are only permitted for subsets of vertebrata.
79
        """,
80
        ),
81
        to: Path = typer.Option(
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
82
            None,
83
            show_default=False,
84
            help="""
85
        Output file; can be CSV, TSV, feather, etc.
86
        If it starts with '.', uses the default path but changes the format and filename extension.
87
88
        [default: <taxon-id,<taxon-id>,...>.feather]
89
        """,
90
        ),
91
    ):
92
        """
93
        Writes a CSV file of the descendents of given taxa.
94
        """
95
        taxon_ids = [
96
            int(taxon.strip()) if taxon.isdigit() else taxon.strip() for taxon in taxa.split(",")
97
        ]
98
        # get the filename
99
        # by default we'll just use the inputs
100
        default_path = Path(",".join([str(t).strip() for t in taxa]) + ".tab.gz")
101
        if to is None:
102
            to = default_path
103
        elif str(to).startswith("."):
104
            to = default_path.with_suffix(str(to))
105
        to.parent.mkdir(exist_ok=True, parents=True)
106
        # TODO: this is quite inefficient
0 ignored issues
show
Coding Style introduced by
TODO and FIXME comments should generally be avoided.
Loading history...
107
        # we're potentially reading in the vertebrata file multiple times
108
        # we could instead read it in, then concatenate the matching subtrees
109
        # however, this is moderately efficient if you ask for, e.g., Mammalia and Plantae
110
        # then it'll download Plantae but just get Mammalia from the resource-file Vertebrata
111
        taxes = []
112
        for taxon in taxon_ids:
113
            tax = TaxonomyFactories.from_uniprot(MANDOS_SETTINGS.taxonomy_cache_path).load(taxon)
114
            taxes.append(tax.to_df())
115
        final_tax = TaxonomyDf.concat(taxes, ignore_index=True)
116
        final_tax = final_tax.drop_duplicates().sort_values("taxon")
117
        final_tax.write_file(to)
118
119
    @staticmethod
120
    def dl_tax(
121
        taxon: int,
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
122
    ) -> None:
123
        """
124
        Preps a new taxonomy file for use in mandos.
125
        Just returns if a corresponding file already exists in the resources dir or mandos cache (``~/.mandos``).
0 ignored issues
show
Coding Style introduced by
This line is too long as per the coding-style (113/100).

This check looks for lines that are too long. You can specify the maximum line length.

Loading history...
126
        Otherwise, downloads a tab-separated file from UniProt.
127
        (To find manually, follow the ``All lower taxonomy nodes`` link and click ``Download``.)
128
        Then applies fixes and reduces the file size, creating a new file alongside.
129
        Puts both the raw data and fixed data in the cache under ``~/.mandos/taxonomy/``.
130
131
        Args:
132
            taxon: The **ID** of the UniProt taxon
133
        """
134
        TaxonomyFactories.from_uniprot(MANDOS_SETTINGS.taxonomy_cache_path).load(taxon)
135
136
137
# Oh dear this is a nightmare
138
# it's really hard to create typer commands with dynamically configured params --
139
# we really need to rely on its inferring of params
140
# that makes this really hard to do well
141
for entry in Entries:
142
    from typer.models import CommandInfo
0 ignored issues
show
introduced by
Unable to import 'typer.models'
Loading history...
143
144
    info = CommandInfo(entry.cmd(), callback=entry.run)
145
    cli.registered_commands.append(info)
146
    # print(f"Registered {entry.cmd()} to {entry}")
147
    setattr(Commands, entry.cmd(), entry.run)
148
149
cli.registered_commands.extend(
150
    [
151
        CommandInfo("@search", callback=Commands.search),
0 ignored issues
show
introduced by
The variable CommandInfo does not seem to be defined in case the for loop on line 141 is not entered. Are you sure this can never be the case?
Loading history...
152
        CommandInfo("@tax-tree", callback=Commands.build_taxonomy),
153
        CommandInfo("@tax-dl", callback=Commands.dl_tax, hidden=True),
154
    ]
155
)
156
157
158
if __name__ == "__main__":
159
    # logging.basicConfig(level=0)
160
    import sys
161
162
    root = logging.getLogger()
163
    handler = logging.StreamHandler(sys.stdout)
164
    handler.setLevel(0)
165
    formatter = logging.Formatter("%(levelname)-7s %(asctime)s %(message)s", "%Y%m%d:%H:%M:%S")
166
    handler.setFormatter(formatter)
167
    root.addHandler(handler)
168
    logger.addHandler(handler)
169
    # log_factory = PrettyRecordFactory(10, 12, 5, width=100, symbols=True).modifying(logger)
170
    # good start; can be changed
171
    root.setLevel(logging.WARNING)
172
    logger.setLevel(logging.INFO)
173
    cli()
174
175
176
__all__ = ["Commands"]
177