|
1
|
|
|
""" |
|
2
|
|
|
Command-line interface for mandos. |
|
3
|
|
|
""" |
|
4
|
|
|
|
|
5
|
|
|
from __future__ import annotations |
|
6
|
|
|
|
|
7
|
|
|
import logging |
|
|
|
|
|
|
8
|
|
|
import sys |
|
|
|
|
|
|
9
|
|
|
from pathlib import Path |
|
10
|
|
|
from typing import Type |
|
11
|
|
|
|
|
12
|
|
|
import pandas as pd |
|
|
|
|
|
|
13
|
|
|
import typer |
|
|
|
|
|
|
14
|
|
|
from typer.models import CommandInfo |
|
|
|
|
|
|
15
|
|
|
from typeddfs import TypedDfs |
|
|
|
|
|
|
16
|
|
|
|
|
17
|
|
|
from mandos import logger, MandosLogging |
|
18
|
|
|
from mandos.model.settings import MANDOS_SETTINGS |
|
19
|
|
|
from mandos.model.taxonomy import TaxonomyDf |
|
20
|
|
|
from mandos.model.taxonomy_caches import TaxonomyFactories |
|
21
|
|
|
from mandos.entries.entries import Entries |
|
22
|
|
|
from mandos.entries.args import EntryArgs |
|
23
|
|
|
from mandos.entries.api_singletons import Apis |
|
24
|
|
|
from mandos.entries.multi_searches import MultiSearch |
|
25
|
|
|
from mandos.entries.searcher import SearcherUtils |
|
26
|
|
|
|
|
27
|
|
|
cli = typer.Typer() |
|
28
|
|
|
|
|
29
|
|
|
|
|
30
|
|
|
class Commands: |
|
31
|
|
|
""" |
|
32
|
|
|
Entry points for mandos. |
|
33
|
|
|
""" |
|
34
|
|
|
|
|
35
|
|
|
@staticmethod |
|
36
|
|
|
def search( |
|
37
|
|
|
config: Path = typer.Argument( |
|
|
|
|
|
|
38
|
|
|
None, |
|
39
|
|
|
help=".toml config file. See docs.", |
|
40
|
|
|
exists=True, |
|
41
|
|
|
dir_okay=False, |
|
42
|
|
|
readable=True, |
|
43
|
|
|
) |
|
44
|
|
|
) -> None: |
|
45
|
|
|
""" |
|
46
|
|
|
Run multiple searches. |
|
47
|
|
|
""" |
|
48
|
|
|
MultiSearch(path, config).search() |
|
|
|
|
|
|
49
|
|
|
|
|
50
|
|
|
@staticmethod |
|
51
|
|
|
def find( |
|
52
|
|
|
path: Path = EntryArgs.path, |
|
|
|
|
|
|
53
|
|
|
pubchem: bool = typer.Option(True, help="Download data from PubChem"), |
|
|
|
|
|
|
54
|
|
|
chembl: bool = typer.Option(True, help="Download data from ChEMBL"), |
|
|
|
|
|
|
55
|
|
|
hmdb: bool = typer.Option(True, help="Download data from HMDB"), |
|
|
|
|
|
|
56
|
|
|
) -> None: |
|
57
|
|
|
""" |
|
58
|
|
|
Fetches and caches compound data. |
|
59
|
|
|
Useful to check what you can see before running a search. |
|
60
|
|
|
""" |
|
61
|
|
|
out_path = path.with_suffix(".ids.csv") |
|
62
|
|
|
if out_path.exists(): |
|
63
|
|
|
raise FileExistsError(out_path) |
|
64
|
|
|
inchikeys = SearcherUtils.read(path) |
|
65
|
|
|
df = SearcherUtils.dl(inchikeys, pubchem=pubchem, chembl=chembl, hmdb=hmdb) |
|
|
|
|
|
|
66
|
|
|
df.to_csv(out_path) |
|
67
|
|
|
typer.echo(f"Wrote to {out_path}") |
|
68
|
|
|
|
|
69
|
|
|
@staticmethod |
|
70
|
|
|
def build_taxonomy( |
|
|
|
|
|
|
71
|
|
|
taxa: str = EntryArgs.taxa, |
|
|
|
|
|
|
72
|
|
|
to: Path = typer.Option( |
|
|
|
|
|
|
73
|
|
|
None, |
|
74
|
|
|
show_default=False, |
|
75
|
|
|
help=""" |
|
76
|
|
|
Output file; can be CSV, TSV, feather, etc. |
|
77
|
|
|
If it starts with '.', uses the default path but changes the format and filename extension. |
|
78
|
|
|
|
|
79
|
|
|
[default: <taxon-id,<taxon-id>,...>.feather] |
|
80
|
|
|
""", |
|
81
|
|
|
), |
|
82
|
|
|
): |
|
83
|
|
|
""" |
|
84
|
|
|
Writes a CSV file of the descendents of given taxa. |
|
85
|
|
|
""" |
|
86
|
|
|
taxon_ids = [ |
|
87
|
|
|
int(taxon.strip()) if taxon.strip().isdigit() else taxon.strip() |
|
88
|
|
|
for taxon in taxa.split(",") |
|
89
|
|
|
] |
|
90
|
|
|
# get the filename |
|
91
|
|
|
# by default we'll just use the inputs |
|
92
|
|
|
if to is None: |
|
93
|
|
|
to = Path(",".join([str(t) for t in taxon_ids]) + ".tab.gz") |
|
94
|
|
|
elif str(to).startswith("."): |
|
95
|
|
|
to = Path(",".join([str(t) for t in taxon_ids]) + str(to)) |
|
96
|
|
|
to.parent.mkdir(exist_ok=True, parents=True) |
|
97
|
|
|
# TODO: this is quite inefficient |
|
|
|
|
|
|
98
|
|
|
# we're potentially reading in the vertebrata file multiple times |
|
99
|
|
|
# we could instead read it in, then concatenate the matching subtrees |
|
100
|
|
|
# however, this is moderately efficient if you ask for, e.g., Mammalia and Plantae |
|
101
|
|
|
# then it'll download Plantae but just get Mammalia from the resource-file Vertebrata |
|
102
|
|
|
logger.error(to) |
|
103
|
|
|
taxes = [] |
|
104
|
|
|
for taxon in taxon_ids: |
|
105
|
|
|
tax = TaxonomyFactories.from_uniprot(MANDOS_SETTINGS.taxonomy_cache_path).load(taxon) |
|
106
|
|
|
taxes.append(tax.to_df()) |
|
107
|
|
|
final_tax = TaxonomyDf(pd.concat(taxes, ignore_index=True)) |
|
108
|
|
|
final_tax = final_tax.drop_duplicates().sort_values("taxon") |
|
109
|
|
|
# if it's text, just write one taxon ID per line |
|
110
|
|
|
is_text = any((to.name.endswith(".txt" + c) for c in {"", ".gz", ".zip", ".xz", ".bz2"})) |
|
|
|
|
|
|
111
|
|
|
if is_text: |
|
112
|
|
|
final_tax = TypedDfs.wrap(final_tax[["taxon"]]) |
|
113
|
|
|
# write the file |
|
114
|
|
|
final_tax.write_file(to) |
|
115
|
|
|
|
|
116
|
|
|
@staticmethod |
|
117
|
|
|
def dl_tax( |
|
118
|
|
|
taxon: int = typer.Argument(None, help="The **ID** of the UniProt taxon"), |
|
|
|
|
|
|
119
|
|
|
) -> None: |
|
120
|
|
|
""" |
|
121
|
|
|
Preps a new taxonomy file for use in mandos. |
|
122
|
|
|
Just returns if a corresponding file already exists in the resources dir or mandos cache (``~/.mandos``). |
|
|
|
|
|
|
123
|
|
|
Otherwise, downloads a tab-separated file from UniProt. |
|
124
|
|
|
(To find manually, follow the ``All lower taxonomy nodes`` link and click ``Download``.) |
|
125
|
|
|
Then applies fixes and reduces the file size, creating a new file alongside. |
|
126
|
|
|
Puts both the raw data and fixed data in the cache under ``~/.mandos/taxonomy/``. |
|
127
|
|
|
""" |
|
128
|
|
|
TaxonomyFactories.from_uniprot(MANDOS_SETTINGS.taxonomy_cache_path).load(taxon) |
|
129
|
|
|
|
|
130
|
|
|
|
|
131
|
|
|
def _init_commands(): |
|
132
|
|
|
# Oh dear this is a nightmare |
|
133
|
|
|
# it's really hard to create typer commands with dynamically configured params -- |
|
134
|
|
|
# we really need to rely on its inferring of params |
|
135
|
|
|
# that makes this really hard to do well |
|
136
|
|
|
for entry in Entries: |
|
137
|
|
|
|
|
138
|
|
|
info = CommandInfo(entry.cmd(), callback=entry.run) |
|
139
|
|
|
cli.registered_commands.append(info) |
|
140
|
|
|
# print(f"Registered {entry.cmd()} to {entry}") |
|
141
|
|
|
setattr(Commands, entry.cmd(), entry.run) |
|
142
|
|
|
|
|
143
|
|
|
cli.registered_commands.extend( |
|
144
|
|
|
[ |
|
145
|
|
|
CommandInfo("@search", callback=Commands.search), |
|
146
|
|
|
CommandInfo("@tax-tree", callback=Commands.build_taxonomy), |
|
147
|
|
|
CommandInfo("@tax-dl", callback=Commands.dl_tax, hidden=True), |
|
148
|
|
|
] |
|
149
|
|
|
) |
|
150
|
|
|
|
|
151
|
|
|
|
|
152
|
|
|
_init_commands() |
|
153
|
|
|
|
|
154
|
|
|
|
|
155
|
|
|
class MandosCli: |
|
156
|
|
|
""" |
|
157
|
|
|
Global entry point for various stuff. For import by consumers. |
|
158
|
|
|
""" |
|
159
|
|
|
|
|
160
|
|
|
settings = MANDOS_SETTINGS |
|
161
|
|
|
logger = logger |
|
|
|
|
|
|
162
|
|
|
logging = MandosLogging |
|
163
|
|
|
main = cli |
|
164
|
|
|
commands = Commands |
|
165
|
|
|
|
|
166
|
|
|
@classmethod |
|
167
|
|
|
def init(cls) -> Type[MandosCli]: |
|
|
|
|
|
|
168
|
|
|
MandosLogging.init() |
|
169
|
|
|
Apis.set_default() |
|
170
|
|
|
return cls |
|
171
|
|
|
|
|
172
|
|
|
|
|
173
|
|
|
if __name__ == "__main__": |
|
174
|
|
|
MandosCli.init().main() |
|
175
|
|
|
|
|
176
|
|
|
|
|
177
|
|
|
__all__ = ["MandosCli"] |
|
178
|
|
|
|