|
1
|
|
|
""" |
|
2
|
|
|
Command-line interface for mandos. |
|
3
|
|
|
""" |
|
4
|
|
|
|
|
5
|
|
|
from __future__ import annotations |
|
6
|
|
|
|
|
7
|
|
|
from pathlib import Path |
|
8
|
|
|
from typing import Optional |
|
9
|
|
|
|
|
10
|
|
|
import typer |
|
|
|
|
|
|
11
|
|
|
from mandos.model.taxonomy import TaxonomyDf |
|
12
|
|
|
|
|
13
|
|
|
from mandos.entry._entry_utils import EntryUtils |
|
14
|
|
|
from mandos.entry.docs import Documenter |
|
15
|
|
|
|
|
16
|
|
|
from mandos import logger |
|
17
|
|
|
from mandos.entry.searchers import InputFrame |
|
18
|
|
|
from mandos.model.utils.setup import MANDOS_SETUP |
|
19
|
|
|
from typeddfs.utils import Utils as TypedDfsUtils |
|
|
|
|
|
|
20
|
|
|
from mandos.analysis.filtration import Filtration |
|
|
|
|
|
|
21
|
|
|
from mandos.analysis.reification import Reifier |
|
22
|
|
|
from mandos.entry._common_args import CommonArgs |
|
23
|
|
|
from mandos.entry._arg_utils import Arg, Opt, ArgUtils |
|
24
|
|
|
from mandos.entry._common_args import CommonArgs as Ca |
|
|
|
|
|
|
25
|
|
|
from mandos.entry.multi_searches import MultiSearch, SearchExplainDf |
|
26
|
|
|
from mandos.entry.fillers import CompoundIdFiller, IdMatchFrame |
|
27
|
|
|
from mandos.model.utils.resources import MandosResources |
|
28
|
|
|
from mandos.model.apis.g2p_api import CachingG2pApi |
|
29
|
|
|
from mandos.model.hits import HitFrame |
|
30
|
|
|
from mandos.model.settings import MANDOS_SETTINGS |
|
31
|
|
|
from mandos.model.taxonomy_caches import TaxonomyFactories |
|
32
|
|
|
|
|
33
|
|
|
DEF_SUFFIX = MANDOS_SETTINGS.default_table_suffix |
|
34
|
|
|
|
|
35
|
|
|
|
|
36
|
|
|
class _InsertedCommandListSingleton: |
|
37
|
|
|
commands = None |
|
38
|
|
|
|
|
39
|
|
|
|
|
40
|
|
|
class MiscCommands: |
|
|
|
|
|
|
41
|
|
|
@staticmethod |
|
42
|
|
|
def list_default_settings( |
|
43
|
|
|
log: Optional[Path] = CommonArgs.log, |
|
|
|
|
|
|
44
|
|
|
stderr: str = CommonArgs.stderr, |
|
|
|
|
|
|
45
|
|
|
): |
|
46
|
|
|
""" |
|
47
|
|
|
Write the default settings to stdout. |
|
48
|
|
|
""" |
|
49
|
|
|
MANDOS_SETUP(log, stderr) |
|
50
|
|
|
for k, v in MANDOS_SETTINGS.defaults().items(): |
|
|
|
|
|
|
51
|
|
|
typer.echo(f"{k} = {v}") |
|
52
|
|
|
|
|
53
|
|
|
@staticmethod |
|
54
|
|
|
def document( |
|
|
|
|
|
|
55
|
|
|
to: Path = Opt.out_file( |
|
|
|
|
|
|
56
|
|
|
r""" |
|
57
|
|
|
The path to write command documentation to. |
|
58
|
|
|
|
|
59
|
|
|
The filename can end with .txt with optional compression (e.g. .txt.gz) |
|
60
|
|
|
for formatted text output alongside --format. |
|
61
|
|
|
|
|
62
|
|
|
Can can also use any table format: .feather; .snappy/.parquet; |
|
63
|
|
|
or .csv, .tsv, .tab, .json, .flexwf (with optional .gz/.bz2/.zip/.xz). |
|
64
|
|
|
|
|
65
|
|
|
[default: "commands-level<level>.txt"] |
|
66
|
|
|
""" |
|
67
|
|
|
), |
|
68
|
|
|
style: str = Opt.val( |
|
|
|
|
|
|
69
|
|
|
rf""" |
|
70
|
|
|
The format for formatted text output. |
|
71
|
|
|
|
|
72
|
|
|
This is ignored if --to is not a .txt file (or .txt.gz, etc.). |
|
73
|
|
|
The choices are: "table", "document", {", ".join(TypedDfsUtils.table_formats())}. |
|
74
|
|
|
|
|
75
|
|
|
"table" is a special style that saves in any machine-readable table format, such |
|
76
|
|
|
as Feather or Parquet (determined by --to). |
|
77
|
|
|
|
|
78
|
|
|
"document" is a special style that emits non-table-like flat text. |
|
79
|
|
|
""", |
|
80
|
|
|
"--style", |
|
81
|
|
|
default="table", |
|
82
|
|
|
), |
|
83
|
|
|
width: int = Opt.val( |
|
|
|
|
|
|
84
|
|
|
r""" |
|
85
|
|
|
Max number of characters for a cell. |
|
86
|
|
|
|
|
87
|
|
|
After that, the text is wrapped. |
|
88
|
|
|
Only applies when writing formatted text (.txt, etc.). |
|
89
|
|
|
|
|
90
|
|
|
[default: 40 if level > 1; 100 otherwise] |
|
91
|
|
|
""", |
|
92
|
|
|
default=None, |
|
93
|
|
|
show_default=False, |
|
94
|
|
|
), |
|
95
|
|
|
level: int = Opt.val( |
|
|
|
|
|
|
96
|
|
|
r""" |
|
97
|
|
|
The amount of detail to output. |
|
98
|
|
|
|
|
99
|
|
|
- 1 : show a 1-line description |
|
100
|
|
|
|
|
101
|
|
|
- 2 : Show a 1-line description, plus parameter names |
|
102
|
|
|
|
|
103
|
|
|
- 3 : Show the full description, plus parameter names, types, and 1-line descriptions |
|
104
|
|
|
|
|
105
|
|
|
- 4 : Show the full description, plus parameter names types, and full descriptions |
|
106
|
|
|
|
|
107
|
|
|
- 5 : Same as 4, but enable --hidden and --common |
|
108
|
|
|
""", |
|
109
|
|
|
default=4, |
|
110
|
|
|
min=1, |
|
111
|
|
|
max=5, |
|
112
|
|
|
), |
|
113
|
|
|
main_only: bool = Opt.flag(r"Only include main commands."), |
|
|
|
|
|
|
114
|
|
|
search_only: bool = Opt.flag(r"Only include search commands."), |
|
|
|
|
|
|
115
|
|
|
hidden: bool = Opt.flag(r"Show hidden commands."), |
|
|
|
|
|
|
116
|
|
|
common: bool = Opt.flag( |
|
|
|
|
|
|
117
|
|
|
r""" |
|
118
|
|
|
Show common arguments and options. |
|
119
|
|
|
|
|
120
|
|
|
Normally --log, --quiet, and --verbose are excluded, |
|
121
|
|
|
along with path, --key, --to, --as-of for searches, |
|
122
|
|
|
and the hidden flags for searches --check and --no-setup. |
|
123
|
|
|
""" |
|
124
|
|
|
), |
|
125
|
|
|
replace: bool = Ca.replace, |
|
|
|
|
|
|
126
|
|
|
log: Optional[Path] = Ca.log, |
|
|
|
|
|
|
127
|
|
|
stderr: str = CommonArgs.stderr, |
|
|
|
|
|
|
128
|
|
|
): |
|
129
|
|
|
r""" |
|
130
|
|
|
Write documentation on commands to a file. |
|
131
|
|
|
""" |
|
132
|
|
|
MANDOS_SETUP(log, stderr) |
|
133
|
|
|
if level == 5: |
|
134
|
|
|
hidden = common = True |
|
135
|
|
|
if width is None and level == 1: |
|
136
|
|
|
width = 100 |
|
137
|
|
|
elif width is None: |
|
138
|
|
|
width = 40 |
|
139
|
|
|
if width == 0: |
|
140
|
|
|
width = 9223372036854775807 |
|
141
|
|
|
default = f"commands-level{level}.txt" |
|
142
|
|
|
to = EntryUtils.adjust_filename(to, default, replace) |
|
143
|
|
|
doc = Documenter( |
|
144
|
|
|
level=level, |
|
145
|
|
|
main=main_only, |
|
146
|
|
|
search=search_only, |
|
147
|
|
|
hidden=hidden, |
|
148
|
|
|
common=common, |
|
149
|
|
|
width=width, |
|
150
|
|
|
) |
|
151
|
|
|
doc.document(_InsertedCommandListSingleton.commands, to, style) |
|
152
|
|
|
|
|
153
|
|
|
@staticmethod |
|
154
|
|
|
def search( |
|
155
|
|
|
path: Path = Ca.in_compound_table, |
|
|
|
|
|
|
156
|
|
|
config: Path = Arg.in_file( |
|
|
|
|
|
|
157
|
|
|
r""" |
|
158
|
|
|
TOML config file. See docs. |
|
159
|
|
|
""" |
|
160
|
|
|
), |
|
161
|
|
|
out_dir: Path = Ca.out_misc_dir, |
|
|
|
|
|
|
162
|
|
|
log: Optional[Path] = Ca.log, |
|
|
|
|
|
|
163
|
|
|
stderr: str = CommonArgs.stderr, |
|
|
|
|
|
|
164
|
|
|
) -> None: |
|
165
|
|
|
r""" |
|
166
|
|
|
Run multiple searches. |
|
167
|
|
|
""" |
|
168
|
|
|
MANDOS_SETUP(log, stderr) |
|
169
|
|
|
MultiSearch.build(path, out_dir, config).run() |
|
170
|
|
|
|
|
171
|
|
|
@staticmethod |
|
172
|
|
|
def detail_search( |
|
|
|
|
|
|
173
|
|
|
config: Path = Arg.in_file( |
|
|
|
|
|
|
174
|
|
|
r""" |
|
175
|
|
|
TOML config file. See docs. |
|
176
|
|
|
""" |
|
177
|
|
|
), |
|
178
|
|
|
to: Path = Opt.out_path( |
|
|
|
|
|
|
179
|
|
|
rf""" |
|
180
|
|
|
Write the table here. |
|
181
|
|
|
|
|
182
|
|
|
{Ca.output_formats} |
|
183
|
|
|
|
|
184
|
|
|
{ArgUtils.df_description(SearchExplainDf)} |
|
185
|
|
|
|
|
186
|
|
|
[default: <config>-details{DEF_SUFFIX}] |
|
187
|
|
|
""" |
|
188
|
|
|
), |
|
189
|
|
|
replace: bool = Ca.replace, |
|
|
|
|
|
|
190
|
|
|
log: Optional[Path] = Ca.log, |
|
|
|
|
|
|
191
|
|
|
stderr: str = CommonArgs.stderr, |
|
|
|
|
|
|
192
|
|
|
) -> None: |
|
193
|
|
|
r""" |
|
194
|
|
|
Write details about a search (:search). |
|
195
|
|
|
""" |
|
196
|
|
|
MANDOS_SETUP(log, stderr) |
|
197
|
|
|
to = EntryUtils.adjust_filename( |
|
198
|
|
|
to, config.parent / (config.name + f"-details{DEF_SUFFIX}"), replace |
|
199
|
|
|
) |
|
200
|
|
|
search = MultiSearch.build(Path("."), Path("."), config) |
|
201
|
|
|
df = search.to_table() |
|
|
|
|
|
|
202
|
|
|
df.write_file(to) |
|
203
|
|
|
logger.notice(f"Wrote search details to {to}") |
|
204
|
|
|
|
|
205
|
|
|
@staticmethod |
|
206
|
|
|
def serve( |
|
|
|
|
|
|
207
|
|
|
port: int = Opt.val(r"Port to serve on", default=1540), |
|
|
|
|
|
|
208
|
|
|
db: str = Opt.val("Name of the MySQL database", default="mandos"), |
|
|
|
|
|
|
209
|
|
|
log: Optional[Path] = Ca.log, |
|
|
|
|
|
|
210
|
|
|
stderr: str = CommonArgs.stderr, |
|
|
|
|
|
|
211
|
|
|
) -> None: |
|
212
|
|
|
r""" |
|
213
|
|
|
Start a REST server. |
|
214
|
|
|
|
|
215
|
|
|
The connection information is stored in your global settings file. |
|
216
|
|
|
""" |
|
217
|
|
|
MANDOS_SETUP(log, stderr) |
|
218
|
|
|
|
|
219
|
|
|
@staticmethod |
|
220
|
|
|
def export_db( |
|
|
|
|
|
|
221
|
|
|
path: Path = Ca.in_annotations_file, |
|
|
|
|
|
|
222
|
|
|
db: str = Opt.val(r"Name of the MySQL database", default="mandos"), |
|
|
|
|
|
|
223
|
|
|
host: str = Opt.val( |
|
|
|
|
|
|
224
|
|
|
r"Database hostname (ignored if ``--socket`` is passed", default="127.0.0.1" |
|
225
|
|
|
), |
|
226
|
|
|
socket: Optional[str] = Opt.val("Path to a Unix socket (if set, ``--host`` is ignored)"), |
|
|
|
|
|
|
227
|
|
|
user: Optional[str] = Opt.val("Database username (empty if not set)"), |
|
|
|
|
|
|
228
|
|
|
password: Optional[str] = Opt.val("Database password (empty if not set)"), |
|
|
|
|
|
|
229
|
|
|
as_of: Optional[str] = CommonArgs.as_of, |
|
|
|
|
|
|
230
|
|
|
log: Optional[Path] = CommonArgs.log, |
|
|
|
|
|
|
231
|
|
|
stderr: str = CommonArgs.stderr, |
|
|
|
|
|
|
232
|
|
|
) -> None: |
|
233
|
|
|
r""" |
|
234
|
|
|
Export to a relational database. |
|
235
|
|
|
|
|
236
|
|
|
Saves data from Mandos search commands to a database for serving via REST. |
|
237
|
|
|
|
|
238
|
|
|
See also: ``:serve``. |
|
239
|
|
|
""" |
|
240
|
|
|
MANDOS_SETUP(log, stderr) |
|
241
|
|
|
|
|
242
|
|
|
@staticmethod |
|
243
|
|
|
def init_db( |
|
|
|
|
|
|
244
|
|
|
db: str = Opt.val(r"Name of the MySQL database", default="mandos"), |
|
|
|
|
|
|
245
|
|
|
host: str = Opt.val( |
|
|
|
|
|
|
246
|
|
|
r"Database hostname (ignored if ``--socket`` is passed", default="127.0.0.1" |
|
247
|
|
|
), |
|
248
|
|
|
socket: Optional[str] = Opt.val("Path to a Unix socket (if set, ``--host`` is ignored)"), |
|
|
|
|
|
|
249
|
|
|
user: Optional[str] = Opt.val("Database username (empty if not set)"), |
|
|
|
|
|
|
250
|
|
|
password: Optional[str] = Opt.val("Database password (empty if not set)"), |
|
|
|
|
|
|
251
|
|
|
overwrite: bool = Opt.flag(r"Delete the database if it exists"), |
|
|
|
|
|
|
252
|
|
|
yes: bool = Ca.yes, |
|
|
|
|
|
|
253
|
|
|
log: Optional[Path] = CommonArgs.log, |
|
|
|
|
|
|
254
|
|
|
stderr: str = CommonArgs.stderr, |
|
|
|
|
|
|
255
|
|
|
) -> None: |
|
256
|
|
|
r""" |
|
257
|
|
|
Initialize an empty database. |
|
258
|
|
|
""" |
|
259
|
|
|
MANDOS_SETUP(log, stderr) |
|
260
|
|
|
|
|
261
|
|
|
@staticmethod |
|
262
|
|
|
def fill( |
|
|
|
|
|
|
263
|
|
|
path: Path = Arg.in_file( |
|
|
|
|
|
|
264
|
|
|
rf""" |
|
265
|
|
|
The path to the file listing compounds by various IDs. |
|
266
|
|
|
|
|
267
|
|
|
{Ca.input_formats} |
|
268
|
|
|
|
|
269
|
|
|
Can use columns called 'inchikey', 'chembl_id', and 'pubchem_id'. |
|
270
|
|
|
Other columns are permitted but will not be used. |
|
271
|
|
|
|
|
272
|
|
|
{ArgUtils.df_description(InputFrame)} |
|
273
|
|
|
""", |
|
274
|
|
|
), |
|
275
|
|
|
to: Path = Opt.out_path( |
|
|
|
|
|
|
276
|
|
|
rf""" |
|
277
|
|
|
A table of compounds and their database IDs will be written here. |
|
278
|
|
|
|
|
279
|
|
|
{Ca.output_formats} |
|
280
|
|
|
|
|
281
|
|
|
{ArgUtils.df_description(IdMatchFrame)} |
|
282
|
|
|
|
|
283
|
|
|
[default: <path>-ids-<start-time>{DEF_SUFFIX}] |
|
284
|
|
|
""" |
|
285
|
|
|
), |
|
286
|
|
|
no_pubchem: bool = Opt.flag("Do not use PubChem.", "--no-pubchem"), |
|
|
|
|
|
|
287
|
|
|
no_chembl: bool = Opt.flag("Do not use ChEMBL.", "--no-chembl"), |
|
|
|
|
|
|
288
|
|
|
replace: bool = Ca.replace, |
|
|
|
|
|
|
289
|
|
|
log: Optional[Path] = Ca.log, |
|
|
|
|
|
|
290
|
|
|
stderr: str = CommonArgs.stderr, |
|
|
|
|
|
|
291
|
|
|
) -> None: |
|
292
|
|
|
r""" |
|
293
|
|
|
Fill in missing IDs from existing compound data. |
|
294
|
|
|
|
|
295
|
|
|
The idea is to find a ChEMBL ID, a PubChem ID, and parent-compound InChI/InChI Key. |
|
296
|
|
|
Useful to check compound/ID associations before running a search. |
|
297
|
|
|
|
|
298
|
|
|
To be filled, each row must should have a non-null value for |
|
299
|
|
|
"inchikey", "chembl_id", and/or "pubchem_id". |
|
300
|
|
|
"inchi" will be used but not to match to PubChem and ChEMBL. |
|
301
|
|
|
|
|
302
|
|
|
No existing columns will be dropped or modified. |
|
303
|
|
|
Any conflicting column will be renamed to 'origin_<column>'. |
|
304
|
|
|
E.g. 'inchikey' will be renamed to 'origin_inchikey'. |
|
305
|
|
|
(Do not include a column beginning with 'origin_'). |
|
306
|
|
|
|
|
307
|
|
|
Final columns (assuming --no-chembl and --no-pubchem) will include: |
|
308
|
|
|
inchikey, inchi, pubchem_id, chembl_id, pubchem_inch, chembl_inchi, |
|
309
|
|
|
pubchem_inchikey, and chembl_inchikey. |
|
310
|
|
|
The "inchikey" and "inchikey" columns will be the "best" available: |
|
311
|
|
|
chembl (preferred), then pubchem, then your source inchikey column. |
|
312
|
|
|
In cases where PubChem and ChEMBL differ, an error will be logged. |
|
313
|
|
|
You can always check the columns "origin_inchikey" (yours), |
|
314
|
|
|
chembl_inchikey, and pubchem_inchikey. |
|
315
|
|
|
|
|
316
|
|
|
The steps are: |
|
317
|
|
|
|
|
318
|
|
|
- If "chembl_id" or "pubchem_id" is non-null, uses that to find an InChI Key (for each). |
|
319
|
|
|
|
|
320
|
|
|
- Otherwise, if only "inchikey" is non-null, uses it to find ChEMBL and PubChem records. |
|
321
|
|
|
|
|
322
|
|
|
- Log an error if the inchikeys or inchis differ between PubChem and ChEMBL. |
|
323
|
|
|
|
|
324
|
|
|
- Set the final "inchi" and "inchikey" to the best choice, |
|
325
|
|
|
falling back to the input inchi and inchikey if they are missing. |
|
326
|
|
|
""" |
|
327
|
|
|
MANDOS_SETUP(log, stderr) |
|
328
|
|
|
default = str(Path(path).with_suffix("")) + "-filled" + "".join(path.suffixes) |
|
329
|
|
|
to = EntryUtils.adjust_filename(to, default, replace) |
|
330
|
|
|
df = IdMatchFrame.read_file(path) |
|
|
|
|
|
|
331
|
|
|
df = CompoundIdFiller(chembl=not no_chembl, pubchem=not no_pubchem).fill(df) |
|
|
|
|
|
|
332
|
|
|
df.write_file(to) |
|
333
|
|
|
|
|
334
|
|
|
@staticmethod |
|
335
|
|
|
def cache_data( |
|
336
|
|
|
path: Path = Ca.in_compound_table, |
|
|
|
|
|
|
337
|
|
|
no_pubchem: bool = Opt.flag(r"Do not download data from PubChem", "--no-pubchem"), |
|
|
|
|
|
|
338
|
|
|
no_chembl: bool = Opt.flag(r"Do not fetch IDs from ChEMBL", "--no_chembl"), |
|
|
|
|
|
|
339
|
|
|
log: Optional[Path] = CommonArgs.log, |
|
|
|
|
|
|
340
|
|
|
stderr: str = CommonArgs.stderr, |
|
|
|
|
|
|
341
|
|
|
) -> None: |
|
342
|
|
|
r""" |
|
343
|
|
|
Fetch and cache compound data. |
|
344
|
|
|
|
|
345
|
|
|
Useful to freeze data before running a search. |
|
346
|
|
|
""" |
|
347
|
|
|
MANDOS_SETUP(log, stderr) |
|
348
|
|
|
logger.error(f"Not implemented fully yet.") |
|
|
|
|
|
|
349
|
|
|
df = IdMatchFrame.read_file(path) |
|
|
|
|
|
|
350
|
|
|
df = CompoundIdFiller(chembl=not no_chembl, pubchem=not no_pubchem).fill(df) |
|
|
|
|
|
|
351
|
|
|
logger.notice(f"Done caching.") |
|
|
|
|
|
|
352
|
|
|
|
|
353
|
|
|
@staticmethod |
|
354
|
|
|
def export_taxa( |
|
|
|
|
|
|
355
|
|
|
taxa: str = Ca.taxa, |
|
|
|
|
|
|
356
|
|
|
forbid: str = Opt.val( |
|
|
|
|
|
|
357
|
|
|
r"""Exclude descendents of these taxa IDs or names (comma-separated).""", default="" |
|
358
|
|
|
), |
|
359
|
|
|
to: Path = typer.Option( |
|
|
|
|
|
|
360
|
|
|
None, |
|
361
|
|
|
help=rf""" |
|
362
|
|
|
Where to export. |
|
363
|
|
|
|
|
364
|
|
|
{Ca.output_formats} |
|
365
|
|
|
|
|
366
|
|
|
{ArgUtils.df_description(TaxonomyDf)} |
|
367
|
|
|
|
|
368
|
|
|
[default: ./<taxa>-<datetime>{DEF_SUFFIX}] |
|
369
|
|
|
""", |
|
370
|
|
|
), |
|
371
|
|
|
replace: bool = Ca.replace, |
|
|
|
|
|
|
372
|
|
|
in_cache: bool = CommonArgs.in_cache, |
|
|
|
|
|
|
373
|
|
|
log: Optional[Path] = CommonArgs.log, |
|
|
|
|
|
|
374
|
|
|
stderr: str = CommonArgs.stderr, |
|
|
|
|
|
|
375
|
|
|
): |
|
376
|
|
|
""" |
|
377
|
|
|
Export a taxonomic tree to a table. |
|
378
|
|
|
|
|
379
|
|
|
Writes a taxonomy of given taxa and their descendants to a table. |
|
380
|
|
|
""" |
|
381
|
|
|
MANDOS_SETUP(log, stderr) |
|
382
|
|
|
concat = taxa + "-" + forbid |
|
383
|
|
|
taxa = ArgUtils.parse_taxa(taxa) |
|
384
|
|
|
forbid = ArgUtils.parse_taxa(forbid) |
|
385
|
|
|
default = concat + "-" + MandosResources.start_timestamp_filesys + DEF_SUFFIX |
|
386
|
|
|
to = EntryUtils.adjust_filename(to, default, replace) |
|
387
|
|
|
my_tax = TaxonomyFactories.get_smart_taxonomy(taxa, forbid) |
|
388
|
|
|
my_tax = my_tax.to_df() |
|
389
|
|
|
my_tax.write_file(to, mkdirs=True) |
|
390
|
|
|
|
|
391
|
|
|
@staticmethod |
|
392
|
|
|
def cache_taxa( |
|
393
|
|
|
taxa: str = Opt.val( |
|
|
|
|
|
|
394
|
|
|
r""" |
|
395
|
|
|
Either "vertebrata", "all", or a comma-separated list of UniProt taxon IDs. |
|
396
|
|
|
|
|
397
|
|
|
"all" is only valid when --replace is passed; |
|
398
|
|
|
this will regenerate all taxonomy files that are found in the cache. |
|
399
|
|
|
""", |
|
400
|
|
|
default="", |
|
401
|
|
|
), |
|
402
|
|
|
replace: bool = Ca.replace, |
|
|
|
|
|
|
403
|
|
|
log: Optional[Path] = CommonArgs.log, |
|
|
|
|
|
|
404
|
|
|
stderr: str = CommonArgs.stderr, |
|
|
|
|
|
|
405
|
|
|
) -> None: |
|
406
|
|
|
""" |
|
407
|
|
|
Prep a new taxonomy file for use in mandos. |
|
408
|
|
|
|
|
409
|
|
|
With --replace set, will delete any existing file. |
|
410
|
|
|
This can be useful to make sure your cached taxonomy is up-to-date before running. |
|
411
|
|
|
|
|
412
|
|
|
Downloads and converts a tab-separated file from UniProt. |
|
413
|
|
|
(To find manually, follow the ``All lower taxonomy nodes`` link and click ``Download``.) |
|
414
|
|
|
Then applies fixes and reduces the file size, creating a new file alongside. |
|
415
|
|
|
Puts both the raw data and fixed data in the cache under ``~/.mandos/taxonomy/``. |
|
416
|
|
|
""" |
|
417
|
|
|
if taxa == "": |
|
418
|
|
|
logger.info("No taxa were specified. No data downloaded.") |
|
419
|
|
|
return |
|
420
|
|
|
if ( |
|
421
|
|
|
taxa not in ["all", "vertebrata"] |
|
|
|
|
|
|
422
|
|
|
and not taxa.replace(",", "").replace(" ", "").isdigit() |
|
|
|
|
|
|
423
|
|
|
): |
|
424
|
|
|
raise ValueError(f"Use either 'all', 'vertebrata', or a UniProt taxon ID") |
|
|
|
|
|
|
425
|
|
|
if taxa == "all" and not replace: |
|
426
|
|
|
raise ValueError(f"Use --replace with taxon 'all'") |
|
|
|
|
|
|
427
|
|
|
MANDOS_SETUP(log, stderr) |
|
428
|
|
|
factory = TaxonomyFactories.from_uniprot() |
|
429
|
|
|
if taxa == "all" and replace: |
|
430
|
|
|
listed = TaxonomyFactories.list_cached_files() |
|
431
|
|
|
for p in listed.values(): |
|
|
|
|
|
|
432
|
|
|
p.unlink() |
|
433
|
|
|
factory.rebuild_vertebrata() |
|
434
|
|
|
for t in listed.keys(): |
|
|
|
|
|
|
435
|
|
|
factory.load_dl(t) |
|
436
|
|
|
elif taxa == "vertebrata" and (replace or not factory.resolve_path(7742).exists()): |
|
437
|
|
|
factory.rebuild_vertebrata() |
|
438
|
|
|
elif taxa == "vertebrata": |
|
439
|
|
|
factory.load_vertebrate(7742) # should usually do nothing |
|
440
|
|
|
else: |
|
441
|
|
|
for taxon in [int(t.strip()) for t in taxa.split(",")]: |
|
442
|
|
|
factory.delete_exact(taxon) |
|
443
|
|
|
|
|
444
|
|
|
@staticmethod |
|
445
|
|
|
def cache_g2p( |
|
446
|
|
|
replace: bool = Ca.replace, |
|
|
|
|
|
|
447
|
|
|
log: Optional[Path] = CommonArgs.log, |
|
|
|
|
|
|
448
|
|
|
stderr: str = CommonArgs.stderr, |
|
|
|
|
|
|
449
|
|
|
) -> None: |
|
450
|
|
|
""" |
|
451
|
|
|
Caches GuideToPharmacology data. |
|
452
|
|
|
|
|
453
|
|
|
With --replace set, will overwrite existing cached data. |
|
454
|
|
|
Data will generally be stored under``~/.mandos/g2p/``. |
|
455
|
|
|
""" |
|
456
|
|
|
MANDOS_SETUP(log, stderr) |
|
457
|
|
|
api = CachingG2pApi(MANDOS_SETTINGS.g2p_cache_path) |
|
458
|
|
|
api.download(force=replace) |
|
459
|
|
|
|
|
460
|
|
|
@staticmethod |
|
461
|
|
|
def cache_clear( |
|
462
|
|
|
log: Optional[Path] = CommonArgs.log, |
|
|
|
|
|
|
463
|
|
|
stderr: str = CommonArgs.stderr, |
|
|
|
|
|
|
464
|
|
|
yes: bool = CommonArgs.yes, |
|
|
|
|
|
|
465
|
|
|
) -> None: |
|
466
|
|
|
""" |
|
467
|
|
|
Deletes all cached data. |
|
468
|
|
|
""" |
|
469
|
|
|
MANDOS_SETUP(log, stderr) |
|
470
|
|
|
typer.echo(f"Will recursively delete all of these paths:") |
|
|
|
|
|
|
471
|
|
|
for p in MANDOS_SETTINGS.all_cache_paths: |
|
|
|
|
|
|
472
|
|
|
typer.echo(f" {p}") |
|
473
|
|
|
if not yes: |
|
474
|
|
|
typer.confirm("Delete?", abort=True) |
|
475
|
|
|
for p in MANDOS_SETTINGS.all_cache_paths: |
|
|
|
|
|
|
476
|
|
|
p.unlink(missing_ok=True) |
|
477
|
|
|
logger.notice("Deleted all cached data") |
|
478
|
|
|
|
|
479
|
|
|
@staticmethod |
|
480
|
|
|
def concat( |
|
|
|
|
|
|
481
|
|
|
path: Path = Arg.in_dir( |
|
|
|
|
|
|
482
|
|
|
rf""" |
|
483
|
|
|
Directory containing results from a mandos search. |
|
484
|
|
|
|
|
485
|
|
|
{Ca.input_formats} |
|
486
|
|
|
""" |
|
487
|
|
|
), |
|
488
|
|
|
to: Optional[Path] = Ca.out_annotations_file, |
|
|
|
|
|
|
489
|
|
|
replace: bool = Ca.replace, |
|
|
|
|
|
|
490
|
|
|
log: Optional[Path] = CommonArgs.log, |
|
|
|
|
|
|
491
|
|
|
stderr: str = CommonArgs.stderr, |
|
|
|
|
|
|
492
|
|
|
) -> None: |
|
493
|
|
|
r""" |
|
494
|
|
|
Concatenate Mandos annotation files into one. |
|
495
|
|
|
|
|
496
|
|
|
Note that ``:search`` automatically performs this; |
|
497
|
|
|
this is needed only if you want to combine results from multiple independent searches. |
|
498
|
|
|
""" |
|
499
|
|
|
MANDOS_SETUP(log, stderr) |
|
500
|
|
|
default = path / ("concat" + DEF_SUFFIX) |
|
501
|
|
|
to = EntryUtils.adjust_filename(to, default, replace) |
|
502
|
|
|
for found in path.iterdir(): |
|
|
|
|
|
|
503
|
|
|
pass |
|
504
|
|
|
|
|
505
|
|
|
@staticmethod |
|
506
|
|
|
def filter( |
|
|
|
|
|
|
507
|
|
|
path: Path = Ca.out_annotations_file, |
|
|
|
|
|
|
508
|
|
|
by: Optional[Path] = Arg.in_file( |
|
|
|
|
|
|
509
|
|
|
r""" |
|
510
|
|
|
Path to a file containing filters. |
|
511
|
|
|
|
|
512
|
|
|
See the docs for more info. |
|
513
|
|
|
""" |
|
514
|
|
|
), |
|
515
|
|
|
to: Optional[Path] = Ca.out_annotations_file, |
|
|
|
|
|
|
516
|
|
|
replace: bool = Ca.replace, |
|
|
|
|
|
|
517
|
|
|
log: Optional[Path] = CommonArgs.log, |
|
|
|
|
|
|
518
|
|
|
stderr: str = CommonArgs.stderr, |
|
|
|
|
|
|
519
|
|
|
) -> None: |
|
520
|
|
|
""" |
|
521
|
|
|
Filters by simple expressions. |
|
522
|
|
|
""" |
|
523
|
|
|
MANDOS_SETUP(log, stderr) |
|
524
|
|
|
default = str(path) + "-filter-" + by.stem + DEF_SUFFIX |
|
525
|
|
|
to = EntryUtils.adjust_filename(to, default, replace) |
|
526
|
|
|
df = HitFrame.read_file(path) |
|
|
|
|
|
|
527
|
|
|
Filtration.from_file(by).apply(df).write_file(to) |
|
528
|
|
|
|
|
529
|
|
|
@staticmethod |
|
530
|
|
|
def export_state( |
|
|
|
|
|
|
531
|
|
|
path: Path = Ca.in_annotations_file, |
|
|
|
|
|
|
532
|
|
|
to: Optional[Path] = Opt.out_path( |
|
|
|
|
|
|
533
|
|
|
""" |
|
534
|
|
|
Path to the output file. |
|
535
|
|
|
|
|
536
|
|
|
Valid formats and filename suffixes are .nt and .txt with an optional .gz, .zip, or .xz. |
|
537
|
|
|
If only a filename suffix is provided, will use that suffix with the default directory. |
|
538
|
|
|
If no suffix is provided, will interpret the path as a directory and use the default filename. |
|
|
|
|
|
|
539
|
|
|
Will fail if the file exists and ``--replace`` is not set. |
|
540
|
|
|
|
|
541
|
|
|
[default: <path>-statements.nt] |
|
542
|
|
|
""" |
|
543
|
|
|
), |
|
544
|
|
|
replace: bool = Ca.replace, |
|
|
|
|
|
|
545
|
|
|
log: Optional[Path] = CommonArgs.log, |
|
|
|
|
|
|
546
|
|
|
stderr: str = CommonArgs.stderr, |
|
|
|
|
|
|
547
|
|
|
) -> None: |
|
548
|
|
|
""" |
|
549
|
|
|
Output simple N-triples statements. |
|
550
|
|
|
|
|
551
|
|
|
Each statement is of this form, where the InChI Key refers to the input data: |
|
552
|
|
|
|
|
553
|
|
|
`"InChI Key" "predicate" "object" .` |
|
554
|
|
|
""" |
|
555
|
|
|
MANDOS_SETUP(log, stderr) |
|
556
|
|
|
default = f"{path}-statements.nt" |
|
557
|
|
|
to = EntryUtils.adjust_filename(to, default, replace) |
|
558
|
|
|
hits = HitFrame.read_file(path).to_hits() |
|
559
|
|
|
with to.open() as f: |
|
|
|
|
|
|
560
|
|
|
for hit in hits: |
|
561
|
|
|
f.write(hit.to_triple.n_triples) |
|
562
|
|
|
|
|
563
|
|
|
@staticmethod |
|
564
|
|
|
def export_reify( |
|
|
|
|
|
|
565
|
|
|
path: Path = Ca.in_annotations_file, |
|
|
|
|
|
|
566
|
|
|
to: Optional[Path] = Opt.out_path( |
|
|
|
|
|
|
567
|
|
|
r""" |
|
568
|
|
|
Path to the output file. |
|
569
|
|
|
|
|
570
|
|
|
The filename suffix should be either .nt (N-triples) or .ttl (Turtle), |
|
571
|
|
|
with an optional .gz, .zip, or .xz. |
|
572
|
|
|
If only a filename suffix is provided, will use that suffix with the default directory. |
|
573
|
|
|
If no suffix is provided, will interpret the path as a directory but use the default filename. |
|
|
|
|
|
|
574
|
|
|
Will fail if the file exists and ``--replace`` is not set. |
|
575
|
|
|
|
|
576
|
|
|
[default: <path>-reified.nt] |
|
577
|
|
|
""" |
|
578
|
|
|
), |
|
579
|
|
|
replace: bool = Ca.replace, |
|
|
|
|
|
|
580
|
|
|
log: Optional[Path] = CommonArgs.log, |
|
|
|
|
|
|
581
|
|
|
stderr: str = CommonArgs.stderr, |
|
|
|
|
|
|
582
|
|
|
) -> None: |
|
583
|
|
|
""" |
|
584
|
|
|
Outputs reified semantic triples. |
|
585
|
|
|
""" |
|
586
|
|
|
MANDOS_SETUP(log, stderr) |
|
587
|
|
|
default = f"{path}-reified.nt" |
|
588
|
|
|
to = EntryUtils.adjust_filename(to, default, replace) |
|
589
|
|
|
hits = HitFrame.read_file(path).to_hits() |
|
590
|
|
|
with to.open() as f: |
|
|
|
|
|
|
591
|
|
|
for triple in Reifier().reify(hits): |
|
592
|
|
|
f.write(triple.n_triples) |
|
593
|
|
|
|
|
594
|
|
|
@staticmethod |
|
595
|
|
|
def export_copy( |
|
|
|
|
|
|
596
|
|
|
path: Path = Ca.in_annotations_file, |
|
|
|
|
|
|
597
|
|
|
to: Optional[Path] = Opt.out_path( |
|
|
|
|
|
|
598
|
|
|
rf""" |
|
599
|
|
|
Path to the output file. |
|
600
|
|
|
|
|
601
|
|
|
{Ca.output_formats} |
|
602
|
|
|
|
|
603
|
|
|
[default: <path.parent>/export{DEF_SUFFIX}] |
|
604
|
|
|
""" |
|
605
|
|
|
), |
|
606
|
|
|
replace: bool = Ca.replace, |
|
|
|
|
|
|
607
|
|
|
log: Optional[Path] = CommonArgs.log, |
|
|
|
|
|
|
608
|
|
|
stderr: str = CommonArgs.stderr, |
|
|
|
|
|
|
609
|
|
|
) -> None: |
|
610
|
|
|
""" |
|
611
|
|
|
Copies and/or converts annotation files. |
|
612
|
|
|
|
|
613
|
|
|
Example: ``:export:copy --to .snappy`` to highly compress a data set. |
|
614
|
|
|
""" |
|
615
|
|
|
MANDOS_SETUP(log, stderr) |
|
616
|
|
|
default = path.parent / DEF_SUFFIX |
|
617
|
|
|
to = EntryUtils.adjust_filename(to, default, replace) |
|
618
|
|
|
df = HitFrame.read_file(path) |
|
|
|
|
|
|
619
|
|
|
df.write_file(to) |
|
620
|
|
|
|
|
621
|
|
|
|
|
622
|
|
|
__all__ = ["MiscCommands"] |
|
623
|
|
|
|