|
1
|
|
|
""" |
|
2
|
|
|
Command-line interface for mandos. |
|
3
|
|
|
""" |
|
4
|
|
|
|
|
5
|
|
|
from __future__ import annotations |
|
6
|
|
|
|
|
7
|
|
|
import logging |
|
8
|
|
|
from pathlib import Path |
|
9
|
|
|
|
|
10
|
|
|
import typer |
|
|
|
|
|
|
11
|
|
|
|
|
12
|
|
|
from mandos import logger |
|
13
|
|
|
from mandos.model.settings import MANDOS_SETTINGS |
|
14
|
|
|
from mandos.model.taxonomy import TaxonomyDf |
|
15
|
|
|
from mandos.model.taxonomy_caches import TaxonomyFactories |
|
16
|
|
|
from mandos.entries.entries import Entries, _Typer |
|
17
|
|
|
from mandos.entries.api_singletons import Apis |
|
18
|
|
|
from mandos.entries.multi_searches import MultiSearch |
|
19
|
|
|
from mandos.entries.searcher import SearcherUtils |
|
20
|
|
|
|
|
21
|
|
|
# IMPORTANT! |
|
22
|
|
|
Apis.set_default() |
|
23
|
|
|
cli = typer.Typer() |
|
24
|
|
|
# _old_wrap_text = copy(click.formatting.wrap_text) |
|
25
|
|
|
# def _new_wrap_text( |
|
26
|
|
|
# text, width=100, initial_indent="", subsequent_indent="", preserve_paragraphs=False |
|
27
|
|
|
# ): |
|
28
|
|
|
# return _old_wrap_text(text, 100, initial_indent, subsequent_indent, preserve_paragraphs) |
|
29
|
|
|
# click.formatting.wrap_text = _new_wrap_text |
|
30
|
|
|
|
|
31
|
|
|
|
|
32
|
|
|
class Commands: |
|
33
|
|
|
""" |
|
34
|
|
|
Entry points for mandos. |
|
35
|
|
|
""" |
|
36
|
|
|
|
|
37
|
|
|
@staticmethod |
|
38
|
|
|
def search( |
|
39
|
|
|
path: Path = _Typer.path, |
|
|
|
|
|
|
40
|
|
|
config: Path = typer.Argument( |
|
|
|
|
|
|
41
|
|
|
None, |
|
42
|
|
|
help=".toml config file. See docs.", |
|
43
|
|
|
exists=True, |
|
44
|
|
|
dir_okay=False, |
|
45
|
|
|
readable=True, |
|
46
|
|
|
), |
|
47
|
|
|
) -> None: |
|
48
|
|
|
""" |
|
49
|
|
|
Run multiple searches. |
|
50
|
|
|
""" |
|
51
|
|
|
MultiSearch(path, config).search() |
|
52
|
|
|
|
|
53
|
|
|
@staticmethod |
|
54
|
|
|
def find( |
|
55
|
|
|
path: Path = _Typer.path, |
|
|
|
|
|
|
56
|
|
|
pubchem: bool = typer.Option(True, help="Download data from PubChem"), |
|
|
|
|
|
|
57
|
|
|
chembl: bool = typer.Option(True, help="Download data from ChEMBL"), |
|
|
|
|
|
|
58
|
|
|
hmdb: bool = typer.Option(True, help="Download data from HMDB"), |
|
|
|
|
|
|
59
|
|
|
) -> None: |
|
60
|
|
|
""" |
|
61
|
|
|
Fetches and caches compound data. |
|
62
|
|
|
Useful to check what you can see before running a search. |
|
63
|
|
|
""" |
|
64
|
|
|
out_path = path.with_suffix(".ids.csv") |
|
65
|
|
|
if out_path.exists(): |
|
66
|
|
|
raise FileExistsError(out_path) |
|
67
|
|
|
inchikeys = SearcherUtils.read(path) |
|
68
|
|
|
df = SearcherUtils.dl(inchikeys, pubchem=pubchem, chembl=chembl, hmdb=hmdb) |
|
|
|
|
|
|
69
|
|
|
df.to_csv(out_path) |
|
70
|
|
|
typer.echo(f"Wrote to {out_path}") |
|
71
|
|
|
|
|
72
|
|
|
@staticmethod |
|
73
|
|
|
def build_taxonomy( |
|
|
|
|
|
|
74
|
|
|
taxa: str = typer.Argument( |
|
|
|
|
|
|
75
|
|
|
None, |
|
76
|
|
|
help=""" |
|
77
|
|
|
UniProt taxon ID or scientific name, comma-separated. |
|
78
|
|
|
Scientific names are only permitted for subsets of vertebrata. |
|
79
|
|
|
""", |
|
80
|
|
|
), |
|
81
|
|
|
to: Path = typer.Option( |
|
|
|
|
|
|
82
|
|
|
None, |
|
83
|
|
|
show_default=False, |
|
84
|
|
|
help=""" |
|
85
|
|
|
Output file; can be CSV, TSV, feather, etc. |
|
86
|
|
|
If it starts with '.', uses the default path but changes the format and filename extension. |
|
87
|
|
|
|
|
88
|
|
|
[default: <taxon-id,<taxon-id>,...>.feather] |
|
89
|
|
|
""", |
|
90
|
|
|
), |
|
91
|
|
|
): |
|
92
|
|
|
""" |
|
93
|
|
|
Writes a CSV file of the descendents of given taxa. |
|
94
|
|
|
""" |
|
95
|
|
|
taxon_ids = [ |
|
96
|
|
|
int(taxon.strip()) if taxon.isdigit() else taxon.strip() for taxon in taxa.split(",") |
|
97
|
|
|
] |
|
98
|
|
|
# get the filename |
|
99
|
|
|
# by default we'll just use the inputs |
|
100
|
|
|
default_path = Path(",".join([str(t).strip() for t in taxa]) + ".tab.gz") |
|
101
|
|
|
if to is None: |
|
102
|
|
|
to = default_path |
|
103
|
|
|
elif str(to).startswith("."): |
|
104
|
|
|
to = default_path.with_suffix(str(to)) |
|
105
|
|
|
to.parent.mkdir(exist_ok=True, parents=True) |
|
106
|
|
|
# TODO: this is quite inefficient |
|
|
|
|
|
|
107
|
|
|
# we're potentially reading in the vertebrata file multiple times |
|
108
|
|
|
# we could instead read it in, then concatenate the matching subtrees |
|
109
|
|
|
# however, this is moderately efficient if you ask for, e.g., Mammalia and Plantae |
|
110
|
|
|
# then it'll download Plantae but just get Mammalia from the resource-file Vertebrata |
|
111
|
|
|
taxes = [] |
|
112
|
|
|
for taxon in taxon_ids: |
|
113
|
|
|
tax = TaxonomyFactories.from_uniprot(MANDOS_SETTINGS.taxonomy_cache_path).load(taxon) |
|
114
|
|
|
taxes.append(tax.to_df()) |
|
115
|
|
|
final_tax = TaxonomyDf.concat(taxes, ignore_index=True) |
|
116
|
|
|
final_tax = final_tax.drop_duplicates().sort_values("taxon") |
|
117
|
|
|
final_tax.write_file(to) |
|
118
|
|
|
|
|
119
|
|
|
@staticmethod |
|
120
|
|
|
def dl_tax( |
|
121
|
|
|
taxon: int, |
|
|
|
|
|
|
122
|
|
|
) -> None: |
|
123
|
|
|
""" |
|
124
|
|
|
Preps a new taxonomy file for use in mandos. |
|
125
|
|
|
Just returns if a corresponding file already exists in the resources dir or mandos cache (``~/.mandos``). |
|
|
|
|
|
|
126
|
|
|
Otherwise, downloads a tab-separated file from UniProt. |
|
127
|
|
|
(To find manually, follow the ``All lower taxonomy nodes`` link and click ``Download``.) |
|
128
|
|
|
Then applies fixes and reduces the file size, creating a new file alongside. |
|
129
|
|
|
Puts both the raw data and fixed data in the cache under ``~/.mandos/taxonomy/``. |
|
130
|
|
|
|
|
131
|
|
|
Args: |
|
132
|
|
|
taxon: The **ID** of the UniProt taxon |
|
133
|
|
|
""" |
|
134
|
|
|
TaxonomyFactories.from_uniprot(MANDOS_SETTINGS.taxonomy_cache_path).load(taxon) |
|
135
|
|
|
|
|
136
|
|
|
|
|
137
|
|
|
# Oh dear this is a nightmare |
|
138
|
|
|
# it's really hard to create typer commands with dynamically configured params -- |
|
139
|
|
|
# we really need to rely on its inferring of params |
|
140
|
|
|
# that makes this really hard to do well |
|
141
|
|
|
for entry in Entries: |
|
142
|
|
|
from typer.models import CommandInfo |
|
|
|
|
|
|
143
|
|
|
|
|
144
|
|
|
info = CommandInfo(entry.cmd(), callback=entry.run) |
|
145
|
|
|
cli.registered_commands.append(info) |
|
146
|
|
|
# print(f"Registered {entry.cmd()} to {entry}") |
|
147
|
|
|
setattr(Commands, entry.cmd(), entry.run) |
|
148
|
|
|
|
|
149
|
|
|
cli.registered_commands.extend( |
|
150
|
|
|
[ |
|
151
|
|
|
CommandInfo("@search", callback=Commands.search), |
|
|
|
|
|
|
152
|
|
|
CommandInfo("@tax-tree", callback=Commands.build_taxonomy), |
|
153
|
|
|
CommandInfo("@tax-dl", callback=Commands.dl_tax, hidden=True), |
|
154
|
|
|
] |
|
155
|
|
|
) |
|
156
|
|
|
|
|
157
|
|
|
|
|
158
|
|
|
if __name__ == "__main__": |
|
159
|
|
|
# logging.basicConfig(level=0) |
|
160
|
|
|
import sys |
|
161
|
|
|
|
|
162
|
|
|
root = logging.getLogger() |
|
163
|
|
|
handler = logging.StreamHandler(sys.stdout) |
|
164
|
|
|
handler.setLevel(0) |
|
165
|
|
|
formatter = logging.Formatter("%(levelname)-7s %(asctime)s %(message)s", "%Y%m%d:%H:%M:%S") |
|
166
|
|
|
handler.setFormatter(formatter) |
|
167
|
|
|
root.addHandler(handler) |
|
168
|
|
|
logger.addHandler(handler) |
|
169
|
|
|
# log_factory = PrettyRecordFactory(10, 12, 5, width=100, symbols=True).modifying(logger) |
|
170
|
|
|
# good start; can be changed |
|
171
|
|
|
root.setLevel(logging.WARNING) |
|
172
|
|
|
logger.setLevel(logging.INFO) |
|
173
|
|
|
cli() |
|
174
|
|
|
|
|
175
|
|
|
|
|
176
|
|
|
__all__ = ["Commands"] |
|
177
|
|
|
|