1
|
|
|
""" |
2
|
|
|
Command-line interface for mandos. |
3
|
|
|
""" |
4
|
|
|
|
5
|
|
|
from __future__ import annotations |
6
|
|
|
|
7
|
|
|
import logging |
|
|
|
|
8
|
|
|
import sys |
|
|
|
|
9
|
|
|
from pathlib import Path |
10
|
|
|
from typing import Type |
11
|
|
|
|
12
|
|
|
import pandas as pd |
|
|
|
|
13
|
|
|
import typer |
|
|
|
|
14
|
|
|
from typer.models import CommandInfo |
|
|
|
|
15
|
|
|
from typeddfs import TypedDfs |
|
|
|
|
16
|
|
|
|
17
|
|
|
from mandos import logger, MandosLogging |
18
|
|
|
from mandos.model.settings import MANDOS_SETTINGS |
19
|
|
|
from mandos.model.taxonomy import TaxonomyDf |
20
|
|
|
from mandos.model.taxonomy_caches import TaxonomyFactories |
21
|
|
|
from mandos.entries.entries import Entries |
22
|
|
|
from mandos.entries.args import EntryArgs |
23
|
|
|
from mandos.entries.api_singletons import Apis |
24
|
|
|
from mandos.entries.multi_searches import MultiSearch |
25
|
|
|
from mandos.entries.searcher import SearcherUtils |
26
|
|
|
|
27
|
|
|
cli = typer.Typer() |
28
|
|
|
|
29
|
|
|
|
30
|
|
|
class Commands: |
31
|
|
|
""" |
32
|
|
|
Entry points for mandos. |
33
|
|
|
""" |
34
|
|
|
|
35
|
|
|
@staticmethod |
36
|
|
|
def search( |
37
|
|
|
config: Path = typer.Argument( |
|
|
|
|
38
|
|
|
None, |
39
|
|
|
help=".toml config file. See docs.", |
40
|
|
|
exists=True, |
41
|
|
|
dir_okay=False, |
42
|
|
|
readable=True, |
43
|
|
|
) |
44
|
|
|
) -> None: |
45
|
|
|
""" |
46
|
|
|
Run multiple searches. |
47
|
|
|
""" |
48
|
|
|
MultiSearch(path, config).search() |
|
|
|
|
49
|
|
|
|
50
|
|
|
@staticmethod |
51
|
|
|
def find( |
52
|
|
|
path: Path = EntryArgs.path, |
|
|
|
|
53
|
|
|
pubchem: bool = typer.Option(True, help="Download data from PubChem"), |
|
|
|
|
54
|
|
|
chembl: bool = typer.Option(True, help="Download data from ChEMBL"), |
|
|
|
|
55
|
|
|
hmdb: bool = typer.Option(True, help="Download data from HMDB"), |
|
|
|
|
56
|
|
|
) -> None: |
57
|
|
|
""" |
58
|
|
|
Fetches and caches compound data. |
59
|
|
|
Useful to check what you can see before running a search. |
60
|
|
|
""" |
61
|
|
|
out_path = path.with_suffix(".ids.csv") |
62
|
|
|
if out_path.exists(): |
63
|
|
|
raise FileExistsError(out_path) |
64
|
|
|
inchikeys = SearcherUtils.read(path) |
65
|
|
|
df = SearcherUtils.dl(inchikeys, pubchem=pubchem, chembl=chembl, hmdb=hmdb) |
|
|
|
|
66
|
|
|
df.to_csv(out_path) |
67
|
|
|
typer.echo(f"Wrote to {out_path}") |
68
|
|
|
|
69
|
|
|
@staticmethod |
70
|
|
|
def build_taxonomy( |
|
|
|
|
71
|
|
|
taxa: str = EntryArgs.taxa, |
|
|
|
|
72
|
|
|
to: Path = typer.Option( |
|
|
|
|
73
|
|
|
None, |
74
|
|
|
show_default=False, |
75
|
|
|
help=""" |
76
|
|
|
Output file; can be CSV, TSV, feather, etc. |
77
|
|
|
If it starts with '.', uses the default path but changes the format and filename extension. |
78
|
|
|
|
79
|
|
|
[default: <taxon-id,<taxon-id>,...>.feather] |
80
|
|
|
""", |
81
|
|
|
), |
82
|
|
|
): |
83
|
|
|
""" |
84
|
|
|
Writes a CSV file of the descendents of given taxa. |
85
|
|
|
""" |
86
|
|
|
taxon_ids = [ |
87
|
|
|
int(taxon.strip()) if taxon.strip().isdigit() else taxon.strip() |
88
|
|
|
for taxon in taxa.split(",") |
89
|
|
|
] |
90
|
|
|
# get the filename |
91
|
|
|
# by default we'll just use the inputs |
92
|
|
|
if to is None: |
93
|
|
|
to = Path(",".join([str(t) for t in taxon_ids]) + ".tab.gz") |
94
|
|
|
elif str(to).startswith("."): |
95
|
|
|
to = Path(",".join([str(t) for t in taxon_ids]) + str(to)) |
96
|
|
|
to.parent.mkdir(exist_ok=True, parents=True) |
97
|
|
|
# TODO: this is quite inefficient |
|
|
|
|
98
|
|
|
# we're potentially reading in the vertebrata file multiple times |
99
|
|
|
# we could instead read it in, then concatenate the matching subtrees |
100
|
|
|
# however, this is moderately efficient if you ask for, e.g., Mammalia and Plantae |
101
|
|
|
# then it'll download Plantae but just get Mammalia from the resource-file Vertebrata |
102
|
|
|
logger.error(to) |
103
|
|
|
taxes = [] |
104
|
|
|
for taxon in taxon_ids: |
105
|
|
|
tax = TaxonomyFactories.from_uniprot(MANDOS_SETTINGS.taxonomy_cache_path).load(taxon) |
106
|
|
|
taxes.append(tax.to_df()) |
107
|
|
|
final_tax = TaxonomyDf(pd.concat(taxes, ignore_index=True)) |
108
|
|
|
final_tax = final_tax.drop_duplicates().sort_values("taxon") |
109
|
|
|
# if it's text, just write one taxon ID per line |
110
|
|
|
is_text = any((to.name.endswith(".txt" + c) for c in {"", ".gz", ".zip", ".xz", ".bz2"})) |
|
|
|
|
111
|
|
|
if is_text: |
112
|
|
|
final_tax = TypedDfs.wrap(final_tax[["taxon"]]) |
113
|
|
|
# write the file |
114
|
|
|
final_tax.write_file(to) |
115
|
|
|
|
116
|
|
|
@staticmethod |
117
|
|
|
def dl_tax( |
118
|
|
|
taxon: int = typer.Argument(None, help="The **ID** of the UniProt taxon"), |
|
|
|
|
119
|
|
|
) -> None: |
120
|
|
|
""" |
121
|
|
|
Preps a new taxonomy file for use in mandos. |
122
|
|
|
Just returns if a corresponding file already exists in the resources dir or mandos cache (``~/.mandos``). |
|
|
|
|
123
|
|
|
Otherwise, downloads a tab-separated file from UniProt. |
124
|
|
|
(To find manually, follow the ``All lower taxonomy nodes`` link and click ``Download``.) |
125
|
|
|
Then applies fixes and reduces the file size, creating a new file alongside. |
126
|
|
|
Puts both the raw data and fixed data in the cache under ``~/.mandos/taxonomy/``. |
127
|
|
|
""" |
128
|
|
|
TaxonomyFactories.from_uniprot(MANDOS_SETTINGS.taxonomy_cache_path).load(taxon) |
129
|
|
|
|
130
|
|
|
|
131
|
|
|
def _init_commands(): |
132
|
|
|
# Oh dear this is a nightmare |
133
|
|
|
# it's really hard to create typer commands with dynamically configured params -- |
134
|
|
|
# we really need to rely on its inferring of params |
135
|
|
|
# that makes this really hard to do well |
136
|
|
|
for entry in Entries: |
137
|
|
|
|
138
|
|
|
info = CommandInfo(entry.cmd(), callback=entry.run) |
139
|
|
|
cli.registered_commands.append(info) |
140
|
|
|
# print(f"Registered {entry.cmd()} to {entry}") |
141
|
|
|
setattr(Commands, entry.cmd(), entry.run) |
142
|
|
|
|
143
|
|
|
cli.registered_commands.extend( |
144
|
|
|
[ |
145
|
|
|
CommandInfo("@search", callback=Commands.search), |
146
|
|
|
CommandInfo("@tax-tree", callback=Commands.build_taxonomy), |
147
|
|
|
CommandInfo("@tax-dl", callback=Commands.dl_tax, hidden=True), |
148
|
|
|
] |
149
|
|
|
) |
150
|
|
|
|
151
|
|
|
|
152
|
|
|
_init_commands() |
153
|
|
|
|
154
|
|
|
|
155
|
|
|
class MandosCli: |
156
|
|
|
""" |
157
|
|
|
Global entry point for various stuff. For import by consumers. |
158
|
|
|
""" |
159
|
|
|
|
160
|
|
|
settings = MANDOS_SETTINGS |
161
|
|
|
logger = logger |
|
|
|
|
162
|
|
|
logging = MandosLogging |
163
|
|
|
main = cli |
164
|
|
|
commands = Commands |
165
|
|
|
|
166
|
|
|
@classmethod |
167
|
|
|
def init(cls) -> Type[MandosCli]: |
|
|
|
|
168
|
|
|
MandosLogging.init() |
169
|
|
|
Apis.set_default() |
170
|
|
|
return cls |
171
|
|
|
|
172
|
|
|
|
173
|
|
|
if __name__ == "__main__": |
174
|
|
|
MandosCli.init().main() |
175
|
|
|
|
176
|
|
|
|
177
|
|
|
__all__ = ["MandosCli"] |
178
|
|
|
|