1
|
|
|
""" |
2
|
|
|
Command-line interface for mandos. |
3
|
|
|
""" |
4
|
|
|
|
5
|
|
|
from __future__ import annotations |
6
|
|
|
|
7
|
|
|
from pathlib import Path |
8
|
|
|
from typing import Optional, List |
9
|
|
|
|
10
|
|
|
import typer |
|
|
|
|
11
|
|
|
from typeddfs import FileFormat |
|
|
|
|
12
|
|
|
|
13
|
|
|
from mandos.entries.docs import Documenter |
14
|
|
|
from mandos.entries.searcher import InputFrame |
15
|
|
|
|
16
|
|
|
from mandos import logger, MANDOS_SETUP |
17
|
|
|
from mandos.analysis.io_defns import SimilarityDfLongForm |
18
|
|
|
from mandos.analysis.concordance import ConcordanceCalculation |
19
|
|
|
from mandos.analysis.distances import MatrixCalculation |
20
|
|
|
from mandos.analysis.filtration import Filtration |
21
|
|
|
from mandos.analysis.enrichment import EnrichmentCalculation, RealAlg, BoolAlg |
22
|
|
|
from mandos.analysis.io_defns import ScoreDf |
23
|
|
|
from mandos.analysis.prepping import MatrixPrep |
24
|
|
|
from mandos.analysis.reification import Reifier |
25
|
|
|
from mandos.entries.common_args import Arg, CommonArgs |
26
|
|
|
from mandos.entries.common_args import CommonArgs as Ca |
|
|
|
|
27
|
|
|
from mandos.entries.common_args import Opt |
28
|
|
|
from mandos.entries.multi_searches import MultiSearch |
29
|
|
|
from mandos.entries.filler import CompoundIdFiller, IdMatchFrame |
30
|
|
|
from mandos.model import MandosResources |
31
|
|
|
from mandos.model.utils import MiscUtils |
32
|
|
|
from mandos.model.apis.g2p_api import CachingG2pApi |
33
|
|
|
from mandos.model.hits import HitFrame |
34
|
|
|
from mandos.model.settings import MANDOS_SETTINGS |
35
|
|
|
from mandos.model.taxonomy_caches import TaxonomyFactories |
36
|
|
|
from mandos.analysis.projection import UMAP |
37
|
|
|
|
38
|
|
|
set_up = MANDOS_SETUP |
39
|
|
|
DEF_SUFFIX = MANDOS_SETTINGS.default_table_suffix |
40
|
|
|
|
41
|
|
|
if UMAP is None: |
42
|
|
|
_umap_params = {} |
43
|
|
|
else: |
44
|
|
|
_umap_params = { |
45
|
|
|
k: v |
46
|
|
|
for k, v in UMAP().get_params(deep=False).items() |
47
|
|
|
if k not in {"random_state", "metric"} |
48
|
|
|
} |
49
|
|
|
|
50
|
|
|
|
51
|
|
|
class _InsertedCommandListSingleton: |
52
|
|
|
commands = None |
53
|
|
|
|
54
|
|
|
|
55
|
|
|
class MiscCommands: |
|
|
|
|
56
|
|
|
@staticmethod |
57
|
|
|
def describe( |
|
|
|
|
58
|
|
|
to: Path = Ca.doc_output, |
|
|
|
|
59
|
|
|
style: str = Ca.doc_style, |
|
|
|
|
60
|
|
|
width: int = Opt.val( |
|
|
|
|
61
|
|
|
r""" |
62
|
|
|
Max line width for a single cell. |
63
|
|
|
|
64
|
|
|
After that, the text is wrapped. |
65
|
|
|
Only applies when writing formatted text (.txt, etc.). |
66
|
|
|
""", |
67
|
|
|
default=100, |
68
|
|
|
), |
69
|
|
|
flatten: bool = Opt.flag( |
|
|
|
|
70
|
|
|
r""" |
71
|
|
|
Flatten all columns into a single one. |
72
|
|
|
|
73
|
|
|
Only affects pretty-printing. |
74
|
|
|
""" |
75
|
|
|
), |
76
|
|
|
level: int = Opt.val( |
|
|
|
|
77
|
|
|
r""" |
78
|
|
|
The amount of detail to output. |
79
|
|
|
|
80
|
|
|
- 1 : show a 1-line description |
81
|
|
|
|
82
|
|
|
- 2 : Show a 1-line description, plus parameter names |
83
|
|
|
|
84
|
|
|
- 3 : Show the full description, plus parameter names, types, and 1-line descriptions |
85
|
|
|
|
86
|
|
|
- 4 : Show the full description, plus parameter names types, and full descriptions |
87
|
|
|
|
88
|
|
|
- 5 : Same as 4, but enable --hidden and --common |
89
|
|
|
""", |
90
|
|
|
default=4, |
91
|
|
|
), |
92
|
|
|
main_only: bool = Opt.flag(r"Only include main commands."), |
|
|
|
|
93
|
|
|
search_only: bool = Opt.flag(r"Only include search commands."), |
|
|
|
|
94
|
|
|
hidden: bool = Opt.flag(r"Show hidden commands."), |
|
|
|
|
95
|
|
|
common: bool = Opt.flag( |
|
|
|
|
96
|
|
|
r""" |
97
|
|
|
Show common arguments and options. |
98
|
|
|
|
99
|
|
|
Normally --log, --quiet, and --verbose are excluded, |
100
|
|
|
along with path, --key, --to, --as-of for searches, |
101
|
|
|
and the hidden flags for searches --check and --no-setup. |
102
|
|
|
""" |
103
|
|
|
), |
104
|
|
|
replace: bool = CommonArgs.replace, |
|
|
|
|
105
|
|
|
log: Optional[Path] = CommonArgs.log_path, |
|
|
|
|
106
|
|
|
quiet: bool = CommonArgs.quiet, |
|
|
|
|
107
|
|
|
verbose: bool = CommonArgs.verbose, |
|
|
|
|
108
|
|
|
): |
109
|
|
|
r""" |
110
|
|
|
Write documentation on commands to a file. |
111
|
|
|
""" |
112
|
|
|
set_up(log, quiet, verbose) |
113
|
|
|
if level == 5: |
114
|
|
|
hidden = common = True |
115
|
|
|
default = f"commands-level{level}.txt" |
116
|
|
|
to = MiscUtils.adjust_filename(to, default, replace) |
117
|
|
|
doc = Documenter( |
118
|
|
|
level=level, |
119
|
|
|
main=main_only, |
120
|
|
|
search=search_only, |
121
|
|
|
hidden=hidden, |
122
|
|
|
common=common, |
123
|
|
|
width=width, |
124
|
|
|
flatten=flatten, |
125
|
|
|
) |
126
|
|
|
doc.document(_InsertedCommandListSingleton.commands, to, style) |
127
|
|
|
|
128
|
|
|
@staticmethod |
129
|
|
|
def search( |
|
|
|
|
130
|
|
|
path: Path = Ca.compounds, |
|
|
|
|
131
|
|
|
config: Path = Arg.in_file( |
|
|
|
|
132
|
|
|
r""" |
133
|
|
|
TOML config file. See docs. |
134
|
|
|
""" |
135
|
|
|
), |
136
|
|
|
log: Optional[Path] = CommonArgs.log_path, |
|
|
|
|
137
|
|
|
quiet: bool = CommonArgs.quiet, |
|
|
|
|
138
|
|
|
verbose: bool = CommonArgs.verbose, |
|
|
|
|
139
|
|
|
out_dir: Path = Ca.out_dir, |
|
|
|
|
140
|
|
|
) -> None: |
141
|
|
|
r""" |
142
|
|
|
Run multiple searches. |
143
|
|
|
""" |
144
|
|
|
set_up(log, quiet, verbose) |
145
|
|
|
MultiSearch.build(path, out_dir, config).run() |
146
|
|
|
|
147
|
|
|
@staticmethod |
148
|
|
|
def serve( |
|
|
|
|
149
|
|
|
port: int = Opt.val(r"Port to serve on", default=1540), |
|
|
|
|
150
|
|
|
db: str = Opt.val("Name of the MySQL database", default="mandos"), |
|
|
|
|
151
|
|
|
log: Optional[Path] = CommonArgs.log_path, |
|
|
|
|
152
|
|
|
quiet: bool = CommonArgs.quiet, |
|
|
|
|
153
|
|
|
verbose: bool = CommonArgs.verbose, |
|
|
|
|
154
|
|
|
) -> None: |
155
|
|
|
r""" |
156
|
|
|
Start a REST server. |
157
|
|
|
|
158
|
|
|
The connection information is stored in your global settings file. |
159
|
|
|
""" |
160
|
|
|
set_up(log, quiet, verbose) |
161
|
|
|
|
162
|
|
|
@staticmethod |
163
|
|
|
def export_db( |
|
|
|
|
164
|
|
|
path: Path = Ca.file_input, |
|
|
|
|
165
|
|
|
db: str = Opt.val(r"Name of the MySQL database", default="mandos"), |
|
|
|
|
166
|
|
|
host: str = Opt.val( |
|
|
|
|
167
|
|
|
r"Database hostname (ignored if ``--socket`` is passed", default="127.0.0.1" |
168
|
|
|
), |
169
|
|
|
socket: Optional[str] = Opt.val("Path to a Unix socket (if set, ``--host`` is ignored)"), |
|
|
|
|
170
|
|
|
user: Optional[str] = Opt.val("Database username (empty if not set)"), |
|
|
|
|
171
|
|
|
password: Optional[str] = Opt.val("Database password (empty if not set)"), |
|
|
|
|
172
|
|
|
as_of: Optional[str] = CommonArgs.as_of, |
|
|
|
|
173
|
|
|
log: Optional[Path] = CommonArgs.log_path, |
|
|
|
|
174
|
|
|
quiet: bool = CommonArgs.quiet, |
|
|
|
|
175
|
|
|
verbose: bool = CommonArgs.verbose, |
|
|
|
|
176
|
|
|
) -> None: |
177
|
|
|
r""" |
178
|
|
|
Export to a relational database. |
179
|
|
|
|
180
|
|
|
Saves data from Mandos search commands to a database for serving via REST. |
181
|
|
|
|
182
|
|
|
See also: ``:serve``. |
183
|
|
|
""" |
184
|
|
|
set_up(log, quiet, verbose) |
185
|
|
|
|
186
|
|
|
@staticmethod |
187
|
|
|
def fill( |
|
|
|
|
188
|
|
|
path: Path = Ca.compounds_to_fill, |
|
|
|
|
189
|
|
|
to: Path = Ca.id_table_to, |
|
|
|
|
190
|
|
|
no_pubchem: bool = Opt.flag("Do not use PubChem.", "--no-pubchem"), |
|
|
|
|
191
|
|
|
no_chembl: bool = Opt.flag("Do not use ChEMBL.", "--no-chembl"), |
|
|
|
|
192
|
|
|
replace: bool = Ca.replace, |
|
|
|
|
193
|
|
|
log: Optional[Path] = Ca.log_path, |
|
|
|
|
194
|
|
|
quiet: bool = Ca.quiet, |
|
|
|
|
195
|
|
|
verbose: bool = Ca.verbose, |
|
|
|
|
196
|
|
|
) -> None: |
197
|
|
|
r""" |
198
|
|
|
Fill in missing IDs from existing compound data. |
199
|
|
|
|
200
|
|
|
The idea is to find a ChEMBL ID, a PubChem ID, and parent-compound InChI/InChI Key. |
201
|
|
|
Useful to check compound/ID associations before running a search. |
202
|
|
|
|
203
|
|
|
To be filled, each row must should have a non-null value for |
204
|
|
|
"inchikey", "chembl_id", and/or "pubchem_id". |
205
|
|
|
"inchi" will be used but not to match to PubChem and ChEMBL. |
206
|
|
|
|
207
|
|
|
No existing columns will be dropped or modified. |
208
|
|
|
Any conflicting column will be renamed to 'origin_<column>'. |
209
|
|
|
E.g. 'inchikey' will be renamed to 'origin_inchikey'. |
210
|
|
|
(Do not include a column beginning with 'origin_'). |
211
|
|
|
|
212
|
|
|
Final columns (assuming --no-chembl and --no-pubchem) will include: |
213
|
|
|
inchikey, inchi, pubchem_id, chembl_id, pubchem_inch, chembl_inchi, |
214
|
|
|
pubchem_inchikey, and chembl_inchikey. |
215
|
|
|
The "inchikey" and "inchikey" columns will be the "best" available: |
216
|
|
|
chembl (preferred), then pubchem, then your source inchikey column. |
217
|
|
|
In cases where PubChem and ChEMBL differ, an error will be logged. |
218
|
|
|
You can always check the columns "origin_inchikey" (yours), |
219
|
|
|
chembl_inchikey, and pubchem_inchikey. |
220
|
|
|
|
221
|
|
|
The steps are: |
222
|
|
|
|
223
|
|
|
- If "chembl_id" or "pubchem_id" is non-null, uses that to find an InChI Key (for each). |
224
|
|
|
|
225
|
|
|
- Otherwise, if only "inchikey" is non-null, uses it to find ChEMBL and PubChem records. |
226
|
|
|
|
227
|
|
|
- Log an error if the inchikeys or inchis differ between PubChem and ChEMBL. |
228
|
|
|
|
229
|
|
|
- Set the final "inchi" and "inchikey" to the best choice, |
230
|
|
|
falling back to the input inchi and inchikey if they are missing. |
231
|
|
|
""" |
232
|
|
|
set_up(log, quiet, verbose) |
233
|
|
|
default = str(Path(path).with_suffix("")) + "-filled" + "".join(path.suffixes) |
234
|
|
|
to = MiscUtils.adjust_filename(to, default, replace) |
235
|
|
|
df = IdMatchFrame.read_file(path) |
|
|
|
|
236
|
|
|
df = CompoundIdFiller(chembl=not no_chembl, pubchem=not no_pubchem).fill(df) |
|
|
|
|
237
|
|
|
df.write_file(to) |
238
|
|
|
|
239
|
|
|
@staticmethod |
240
|
|
|
def cache_data( |
|
|
|
|
241
|
|
|
path: Path = Ca.compounds, |
|
|
|
|
242
|
|
|
no_pubchem: bool = Opt.flag(r"Do not download data from PubChem", "--no-pubchem"), |
|
|
|
|
243
|
|
|
no_chembl: bool = Opt.flag(r"Do not fetch IDs from ChEMBL", "--no_chembl"), |
|
|
|
|
244
|
|
|
log: Optional[Path] = CommonArgs.log_path, |
|
|
|
|
245
|
|
|
quiet: bool = CommonArgs.quiet, |
|
|
|
|
246
|
|
|
verbose: bool = CommonArgs.verbose, |
|
|
|
|
247
|
|
|
) -> None: |
248
|
|
|
r""" |
249
|
|
|
Fetch and cache compound data. |
250
|
|
|
|
251
|
|
|
Useful to freeze data before running a search. |
252
|
|
|
""" |
253
|
|
|
set_up(log, quiet, verbose) |
254
|
|
|
logger.error(f"Not implemented fully yet.") |
|
|
|
|
255
|
|
|
df = IdMatchFrame.read_file(path) |
|
|
|
|
256
|
|
|
df = CompoundIdFiller(chembl=not no_chembl, pubchem=not no_pubchem).fill(df) |
|
|
|
|
257
|
|
|
logger.notice(f"Done caching.") |
|
|
|
|
258
|
|
|
|
259
|
|
|
@staticmethod |
260
|
|
|
def export_taxa( |
|
|
|
|
261
|
|
|
taxa: str = Ca.taxa, |
|
|
|
|
262
|
|
|
forbid: str = Opt.val( |
|
|
|
|
263
|
|
|
r"""Exclude descendents of these taxa IDs or names (comma-separated).""", default="" |
264
|
|
|
), |
265
|
|
|
to: Path = typer.Option( |
|
|
|
|
266
|
|
|
None, |
267
|
|
|
help=rf""" |
268
|
|
|
Where to export. |
269
|
|
|
|
270
|
|
|
{Ca.output_formats} |
271
|
|
|
|
272
|
|
|
[default: ./<taxa>-<datetime>.{DEF_SUFFIX}] |
273
|
|
|
""", |
274
|
|
|
), |
275
|
|
|
replace: bool = Ca.replace, |
|
|
|
|
276
|
|
|
in_cache: bool = CommonArgs.in_cache, |
|
|
|
|
277
|
|
|
log: Optional[Path] = CommonArgs.log_path, |
|
|
|
|
278
|
|
|
quiet: bool = CommonArgs.quiet, |
|
|
|
|
279
|
|
|
verbose: bool = CommonArgs.verbose, |
|
|
|
|
280
|
|
|
): |
281
|
|
|
""" |
282
|
|
|
Export a taxonomic tree to a table. |
283
|
|
|
|
284
|
|
|
Writes a taxonomy of given taxa and their descendants to a table. |
285
|
|
|
""" |
286
|
|
|
set_up(log, quiet, verbose) |
287
|
|
|
concat = taxa + "-" + forbid |
288
|
|
|
taxa = Ca.parse_taxa(taxa) |
289
|
|
|
forbid = Ca.parse_taxa(forbid) |
290
|
|
|
default = concat + "-" + MandosResources.start_timestamp_filesys + DEF_SUFFIX |
291
|
|
|
to = MiscUtils.adjust_filename(to, default, replace) |
292
|
|
|
my_tax = TaxonomyFactories.get_smart_taxonomy(taxa, forbid) |
293
|
|
|
my_tax = my_tax.to_df() |
294
|
|
|
to.parent.mkdir(exist_ok=True, parents=True) |
295
|
|
|
my_tax.write_file(to) |
296
|
|
|
|
297
|
|
|
@staticmethod |
298
|
|
|
def cache_taxa( |
299
|
|
|
taxa: str = Opt.val( |
|
|
|
|
300
|
|
|
r""" |
301
|
|
|
Either "vertebrata", "all", or a comma-separated list of UniProt taxon IDs. |
302
|
|
|
|
303
|
|
|
"all" is only valid when --replace is passed; |
304
|
|
|
this will regenerate all taxonomy files that are found in the cache. |
305
|
|
|
""", |
306
|
|
|
default="", |
307
|
|
|
), |
308
|
|
|
replace: bool = Ca.replace, |
|
|
|
|
309
|
|
|
log: Optional[Path] = CommonArgs.log_path, |
|
|
|
|
310
|
|
|
quiet: bool = CommonArgs.quiet, |
|
|
|
|
311
|
|
|
verbose: bool = CommonArgs.verbose, |
|
|
|
|
312
|
|
|
) -> None: |
313
|
|
|
""" |
314
|
|
|
Prep a new taxonomy file for use in mandos. |
315
|
|
|
|
316
|
|
|
With --replace set, will delete any existing file. |
317
|
|
|
This can be useful to make sure your cached taxonomy is up-to-date before running. |
318
|
|
|
|
319
|
|
|
Downloads and converts a tab-separated file from UniProt. |
320
|
|
|
(To find manually, follow the ``All lower taxonomy nodes`` link and click ``Download``.) |
321
|
|
|
Then applies fixes and reduces the file size, creating a new file alongside. |
322
|
|
|
Puts both the raw data and fixed data in the cache under ``~/.mandos/taxonomy/``. |
323
|
|
|
""" |
324
|
|
|
if taxa == "": |
325
|
|
|
logger.info("No taxa were specified. No data downloaded.") |
326
|
|
|
return |
327
|
|
|
if ( |
328
|
|
|
taxa not in ["all", "vertebrata"] |
|
|
|
|
329
|
|
|
and not taxa.replace(",", "").replace(" ", "").isdigit() |
|
|
|
|
330
|
|
|
): |
331
|
|
|
raise ValueError(f"Use either 'all', 'vertebrata', or a UniProt taxon ID") |
|
|
|
|
332
|
|
|
if taxa == "all" and not replace: |
333
|
|
|
raise ValueError(f"Use --replace with taxon 'all'") |
|
|
|
|
334
|
|
|
set_up(log, quiet, verbose) |
335
|
|
|
factory = TaxonomyFactories.from_uniprot() |
336
|
|
|
if taxa == "all" and replace: |
337
|
|
|
listed = TaxonomyFactories.list_cached_files() |
338
|
|
|
for p in listed.values(): |
|
|
|
|
339
|
|
|
p.unlink() |
340
|
|
|
factory.rebuild_vertebrata() |
341
|
|
|
for t in listed.keys(): |
|
|
|
|
342
|
|
|
factory.load_dl(t) |
343
|
|
|
elif taxa == "vertebrata" and (replace or not factory.resolve_path(7742).exists()): |
344
|
|
|
factory.rebuild_vertebrata() |
345
|
|
|
elif taxa == "vertebrata": |
346
|
|
|
factory.load_vertebrate(7742) # should usually do nothing |
347
|
|
|
else: |
348
|
|
|
for taxon in [int(t.strip()) for t in taxa.split(",")]: |
349
|
|
|
factory.delete_exact(taxon) |
350
|
|
|
|
351
|
|
|
@staticmethod |
352
|
|
|
def cache_g2p( |
353
|
|
|
replace: bool = Ca.replace, |
|
|
|
|
354
|
|
|
log: Optional[Path] = CommonArgs.log_path, |
|
|
|
|
355
|
|
|
quiet: bool = CommonArgs.quiet, |
|
|
|
|
356
|
|
|
verbose: bool = CommonArgs.verbose, |
|
|
|
|
357
|
|
|
) -> None: |
358
|
|
|
""" |
359
|
|
|
Caches GuideToPharmacology data. |
360
|
|
|
|
361
|
|
|
With --replace set, will overwrite existing cached data. |
362
|
|
|
Data will generally be stored under``~/.mandos/g2p/``. |
363
|
|
|
""" |
364
|
|
|
set_up(log, quiet, verbose) |
365
|
|
|
api = CachingG2pApi(MANDOS_SETTINGS.g2p_cache_path) |
366
|
|
|
api.download(force=replace) |
367
|
|
|
|
368
|
|
|
@staticmethod |
369
|
|
|
def cache_clear( |
370
|
|
|
log: Optional[Path] = CommonArgs.log_path, |
|
|
|
|
371
|
|
|
quiet: bool = CommonArgs.quiet, |
|
|
|
|
372
|
|
|
verbose: bool = CommonArgs.verbose, |
|
|
|
|
373
|
|
|
yes: bool = CommonArgs.yes, |
|
|
|
|
374
|
|
|
) -> None: |
375
|
|
|
""" |
376
|
|
|
Deletes all cached data. |
377
|
|
|
""" |
378
|
|
|
set_up(log, quiet, verbose) |
379
|
|
|
typer.echo(f"Will recursively delete all of these paths:") |
|
|
|
|
380
|
|
|
for p in MANDOS_SETTINGS.all_cache_paths: |
|
|
|
|
381
|
|
|
typer.echo(f" {p}") |
382
|
|
|
if not yes: |
383
|
|
|
typer.confirm("Delete?", abort=True) |
384
|
|
|
for p in MANDOS_SETTINGS.all_cache_paths: |
|
|
|
|
385
|
|
|
p.unlink(missing_ok=True) |
386
|
|
|
logger.notice("Deleted all cached data") |
387
|
|
|
|
388
|
|
|
@staticmethod |
389
|
|
|
def concat( |
|
|
|
|
390
|
|
|
path: Path = Ca.input_dir, |
|
|
|
|
391
|
|
|
to: Optional[Path] = Ca.to_single, |
|
|
|
|
392
|
|
|
replace: bool = Ca.replace, |
|
|
|
|
393
|
|
|
log: Optional[Path] = CommonArgs.log_path, |
|
|
|
|
394
|
|
|
quiet: bool = CommonArgs.quiet, |
|
|
|
|
395
|
|
|
verbose: bool = CommonArgs.verbose, |
|
|
|
|
396
|
|
|
) -> None: |
397
|
|
|
r""" |
398
|
|
|
Concatenate Mandos annotation files into one. |
399
|
|
|
|
400
|
|
|
Note that ``:search`` automatically performs this; |
401
|
|
|
this is needed only if you want to combine results from multiple independent searches. |
402
|
|
|
""" |
403
|
|
|
set_up(log, quiet, verbose) |
404
|
|
|
default = path / ("concat" + DEF_SUFFIX) |
405
|
|
|
to = MiscUtils.adjust_filename(to, default, replace) |
406
|
|
|
for found in path.iterdir(): |
|
|
|
|
407
|
|
|
pass |
408
|
|
|
|
409
|
|
|
@staticmethod |
410
|
|
|
def filter( |
|
|
|
|
411
|
|
|
path: Path = Ca.to_single, |
|
|
|
|
412
|
|
|
by: Optional[Path] = Arg.in_file( |
|
|
|
|
413
|
|
|
r""" |
414
|
|
|
Path to a TOML (.toml) file containing filters. |
415
|
|
|
|
416
|
|
|
The file contains a list of ``mandos.filter`` keys, |
417
|
|
|
each containing an expression on a single column. |
418
|
|
|
This is only meant for simple, quick-and-dirty filtration. |
419
|
|
|
|
420
|
|
|
See the docs for more info. |
421
|
|
|
""" |
422
|
|
|
), |
423
|
|
|
to: Optional[Path] = Ca.to_single, |
|
|
|
|
424
|
|
|
replace: bool = Ca.replace, |
|
|
|
|
425
|
|
|
log: Optional[Path] = CommonArgs.log_path, |
|
|
|
|
426
|
|
|
quiet: bool = CommonArgs.quiet, |
|
|
|
|
427
|
|
|
verbose: bool = CommonArgs.verbose, |
|
|
|
|
428
|
|
|
) -> None: |
429
|
|
|
""" |
430
|
|
|
Filters by simple expressions. |
431
|
|
|
""" |
432
|
|
|
set_up(log, quiet, verbose) |
433
|
|
|
default = str(path) + "-filter-" + by.stem + DEF_SUFFIX |
434
|
|
|
to = MiscUtils.adjust_filename(to, default, replace) |
435
|
|
|
df = HitFrame.read_file(path) |
|
|
|
|
436
|
|
|
Filtration.from_file(by).apply(df).write_file(to) |
437
|
|
|
|
438
|
|
|
@staticmethod |
439
|
|
|
def export_state( |
|
|
|
|
440
|
|
|
path: Path = Ca.file_input, |
|
|
|
|
441
|
|
|
to: Optional[Path] = Opt.out_path( |
|
|
|
|
442
|
|
|
""" |
443
|
|
|
Path to the output file. |
444
|
|
|
|
445
|
|
|
Valid formats and filename suffixes are .nt and .txt with an optional .gz, .zip, or .xz. |
446
|
|
|
If only a filename suffix is provided, will use that suffix with the default directory. |
447
|
|
|
If no suffix is provided, will interpret the path as a directory and use the default filename. |
|
|
|
|
448
|
|
|
Will fail if the file exists and ``--replace`` is not set. |
449
|
|
|
|
450
|
|
|
[default: <path>-statements.nt] |
451
|
|
|
""" |
452
|
|
|
), |
453
|
|
|
replace: bool = Ca.replace, |
|
|
|
|
454
|
|
|
log: Optional[Path] = CommonArgs.log_path, |
|
|
|
|
455
|
|
|
quiet: bool = CommonArgs.quiet, |
|
|
|
|
456
|
|
|
verbose: bool = CommonArgs.verbose, |
|
|
|
|
457
|
|
|
) -> None: |
458
|
|
|
""" |
459
|
|
|
Output simple N-triples statements. |
460
|
|
|
|
461
|
|
|
Each statement is of this form, where the InChI Key refers to the input data: |
462
|
|
|
|
463
|
|
|
`"InChI Key" "predicate" "object" .` |
464
|
|
|
""" |
465
|
|
|
set_up(log, quiet, verbose) |
466
|
|
|
default = f"{path}-statements.nt" |
467
|
|
|
to = MiscUtils.adjust_filename(to, default, replace) |
468
|
|
|
hits = HitFrame.read_file(path).to_hits() |
469
|
|
|
with to.open() as f: |
|
|
|
|
470
|
|
|
for hit in hits: |
471
|
|
|
f.write(hit.to_triple.n_triples) |
472
|
|
|
|
473
|
|
|
@staticmethod |
474
|
|
|
def export_reify( |
|
|
|
|
475
|
|
|
path: Path = Ca.file_input, |
|
|
|
|
476
|
|
|
to: Optional[Path] = Opt.out_path( |
|
|
|
|
477
|
|
|
r""" |
478
|
|
|
Path to the output file. |
479
|
|
|
|
480
|
|
|
The filename suffix should be either .nt (N-triples) or .ttl (Turtle), |
481
|
|
|
with an optional .gz, .zip, or .xz. |
482
|
|
|
If only a filename suffix is provided, will use that suffix with the default directory. |
483
|
|
|
If no suffix is provided, will interpret the path as a directory but use the default filename. |
|
|
|
|
484
|
|
|
Will fail if the file exists and ``--replace`` is not set. |
485
|
|
|
|
486
|
|
|
[default: <path>-reified.nt] |
487
|
|
|
""" |
488
|
|
|
), |
489
|
|
|
replace: bool = Ca.replace, |
|
|
|
|
490
|
|
|
log: Optional[Path] = CommonArgs.log_path, |
|
|
|
|
491
|
|
|
quiet: bool = CommonArgs.quiet, |
|
|
|
|
492
|
|
|
verbose: bool = CommonArgs.verbose, |
|
|
|
|
493
|
|
|
) -> None: |
494
|
|
|
""" |
495
|
|
|
Outputs reified semantic triples. |
496
|
|
|
""" |
497
|
|
|
set_up(log, quiet, verbose) |
498
|
|
|
default = f"{path}-reified.nt" |
499
|
|
|
to = MiscUtils.adjust_filename(to, default, replace) |
500
|
|
|
hits = HitFrame.read_file(path).to_hits() |
501
|
|
|
with to.open() as f: |
|
|
|
|
502
|
|
|
for triple in Reifier().reify(hits): |
503
|
|
|
f.write(triple.n_triples) |
504
|
|
|
|
505
|
|
|
@staticmethod |
506
|
|
|
def export_copy( |
|
|
|
|
507
|
|
|
path: Path = Ca.file_input, |
|
|
|
|
508
|
|
|
to: Optional[Path] = Opt.out_path( |
|
|
|
|
509
|
|
|
rf""" |
510
|
|
|
Path to the output file. |
511
|
|
|
|
512
|
|
|
{Ca.output_formats} |
513
|
|
|
|
514
|
|
|
[default: <path.parent>/export{DEF_SUFFIX}] |
515
|
|
|
""" |
516
|
|
|
), |
517
|
|
|
replace: bool = Ca.replace, |
|
|
|
|
518
|
|
|
log: Optional[Path] = CommonArgs.log_path, |
|
|
|
|
519
|
|
|
quiet: bool = CommonArgs.quiet, |
|
|
|
|
520
|
|
|
verbose: bool = CommonArgs.verbose, |
|
|
|
|
521
|
|
|
) -> None: |
522
|
|
|
""" |
523
|
|
|
Copies and/or converts annotation files. |
524
|
|
|
|
525
|
|
|
Example: ``:export:copy --to .snappy`` to highly compress a data set. |
526
|
|
|
""" |
527
|
|
|
set_up(log, quiet, verbose) |
528
|
|
|
default = path.parent / DEF_SUFFIX |
529
|
|
|
to = MiscUtils.adjust_filename(to, default, replace) |
530
|
|
|
df = HitFrame.read_file(path) |
|
|
|
|
531
|
|
|
df.write_file(to) |
532
|
|
|
|
533
|
|
|
@staticmethod |
534
|
|
|
def calc_analysis( |
|
|
|
|
535
|
|
|
path: Path = Ca.file_input, |
|
|
|
|
536
|
|
|
phi: Path = Ca.input_matrix, |
|
|
|
|
537
|
|
|
scores: Path = Ca.alpha_input, |
|
|
|
|
538
|
|
|
seed: int = Ca.seed, |
|
|
|
|
539
|
|
|
samples: int = Ca.boot, |
|
|
|
|
540
|
|
|
to: Optional[Path] = Ca.misc_out_dir, |
|
|
|
|
541
|
|
|
replace: bool = Ca.replace, |
|
|
|
|
542
|
|
|
log: Optional[Path] = CommonArgs.log_path, |
|
|
|
|
543
|
|
|
quiet: bool = CommonArgs.quiet, |
|
|
|
|
544
|
|
|
verbose: bool = CommonArgs.verbose, |
|
|
|
|
545
|
|
|
) -> None: |
546
|
|
|
""" |
547
|
|
|
Shorthand for multiple calculations and plots. |
548
|
|
|
|
549
|
|
|
Generates n-triple statements and reified n-triples. |
550
|
|
|
Calculates correlation and enrichment using ``scores``, |
551
|
|
|
psi matrices (one per variable), and concordance between psi and tau matrices (tau). |
552
|
|
|
Plots UMAP of psi variables, enrichment bar plots, correlation violin plots, |
553
|
|
|
phi-vs-psi scatter and line plots, and phi-vs-psi (tau) violin plots. |
554
|
|
|
""" |
555
|
|
|
|
556
|
|
|
@staticmethod |
557
|
|
|
def calc_score( |
|
|
|
|
558
|
|
|
path: Path = Ca.file_input, |
|
|
|
|
559
|
|
|
scores: Path = Ca.alpha_input, |
|
|
|
|
560
|
|
|
bool_alg: Optional[str] = Opt.val( |
|
|
|
|
561
|
|
|
rf""" |
562
|
|
|
Algorithm to use for scores starting with 'is_'. |
563
|
|
|
|
564
|
|
|
Allowed values: {Ca.list(BoolAlg)} |
565
|
|
|
""", |
566
|
|
|
default="alpha", |
567
|
|
|
), |
568
|
|
|
real_alg: Optional[str] = Opt.val( |
|
|
|
|
569
|
|
|
rf""" |
570
|
|
|
Algorithm to use for scores starting with 'score_'. |
571
|
|
|
|
572
|
|
|
Allowed values: {Ca.list(RealAlg)} |
573
|
|
|
""", |
574
|
|
|
default="weighted", |
575
|
|
|
), |
576
|
|
|
on: bool = Ca.on, |
|
|
|
|
577
|
|
|
boot: int = Ca.boot, |
|
|
|
|
578
|
|
|
seed: int = Ca.seed, |
|
|
|
|
579
|
|
|
to: Optional[Path] = Ca.alpha_to, |
|
|
|
|
580
|
|
|
replace: bool = Ca.replace, |
|
|
|
|
581
|
|
|
log: Optional[Path] = CommonArgs.log_path, |
|
|
|
|
582
|
|
|
quiet: bool = CommonArgs.quiet, |
|
|
|
|
583
|
|
|
verbose: bool = CommonArgs.verbose, |
|
|
|
|
584
|
|
|
) -> None: |
585
|
|
|
""" |
586
|
|
|
Compare annotations to user-supplied values. |
587
|
|
|
|
588
|
|
|
Calculates correlation between provided scores and object/predicate pairs. |
589
|
|
|
For booleans, compares annotations for hits and non-hits. |
590
|
|
|
See the docs for more info. |
591
|
|
|
""" |
592
|
|
|
set_up(log, quiet, verbose) |
593
|
|
|
default = f"{path}-{scores.name}{DEF_SUFFIX}" |
594
|
|
|
to = MiscUtils.adjust_filename(to, default, replace) |
595
|
|
|
hits = HitFrame.read_file(path) |
596
|
|
|
scores = ScoreDf.read_file(scores) |
597
|
|
|
calculator = EnrichmentCalculation(bool_alg, real_alg, boot, seed) |
598
|
|
|
df = calculator.calculate(hits, scores) |
|
|
|
|
599
|
|
|
df.write_file(to) |
600
|
|
|
|
601
|
|
|
@staticmethod |
602
|
|
|
def calc_psi( |
|
|
|
|
603
|
|
|
path: Path = Ca.file_input, |
|
|
|
|
604
|
|
|
algorithm: str = Opt.val( |
|
|
|
|
605
|
|
|
r""" |
606
|
|
|
The algorithm for calculating similarity between annotation sets. |
607
|
|
|
|
608
|
|
|
Currently, only "j" (J') is supported. Refer to the docs for the equation. |
609
|
|
|
""", |
610
|
|
|
default="j", |
611
|
|
|
), |
612
|
|
|
to: Path = Ca.output_matrix, |
|
|
|
|
613
|
|
|
replace: bool = Ca.replace, |
|
|
|
|
614
|
|
|
log: Optional[Path] = CommonArgs.log_path, |
|
|
|
|
615
|
|
|
quiet: bool = CommonArgs.quiet, |
|
|
|
|
616
|
|
|
verbose: bool = CommonArgs.verbose, |
|
|
|
|
617
|
|
|
) -> None: |
618
|
|
|
r""" |
619
|
|
|
Calculate a similarity matrix from annotations. |
620
|
|
|
|
621
|
|
|
The data are output as a dataframe (CSV by default), where rows and columns correspond |
622
|
|
|
to compounds, and the cell i,j is the overlap J' in annotations between compounds i and j. |
623
|
|
|
""" |
624
|
|
|
set_up(log, quiet, verbose) |
625
|
|
|
default = path.parent / (algorithm + DEF_SUFFIX) |
626
|
|
|
to = MiscUtils.adjust_filename(to, default, replace) |
627
|
|
|
hits = HitFrame.read_file(path).to_hits() |
628
|
|
|
calculator = MatrixCalculation.create(algorithm) |
629
|
|
|
matrix = calculator.calc_all(hits) |
630
|
|
|
matrix.write_file(to) |
631
|
|
|
|
632
|
|
|
@staticmethod |
633
|
|
|
def calc_ecfp( |
|
|
|
|
634
|
|
|
path: Path = CommonArgs.compounds, |
|
|
|
|
635
|
|
|
radius: int = Opt.val(r"""Radius of the ECFP fingerprint.""", default=4), |
|
|
|
|
636
|
|
|
n_bits: int = Opt.val(r"""Number of bits.""", default=2048), |
|
|
|
|
637
|
|
|
psi: bool = Opt.flag( |
|
|
|
|
638
|
|
|
r"""Use "psi" as the type in the resulting matrix instead of "phi".""" |
639
|
|
|
), |
640
|
|
|
to: Path = Ca.output_matrix, |
|
|
|
|
641
|
|
|
replace: bool = Ca.replace, |
|
|
|
|
642
|
|
|
log: Optional[Path] = CommonArgs.log_path, |
|
|
|
|
643
|
|
|
quiet: bool = CommonArgs.quiet, |
|
|
|
|
644
|
|
|
verbose: bool = CommonArgs.verbose, |
|
|
|
|
645
|
|
|
) -> None: |
646
|
|
|
r""" |
647
|
|
|
Compute a similarity matrix from ECFP fingerprints. |
648
|
|
|
|
649
|
|
|
Requires rdkit to be installed. |
650
|
|
|
|
651
|
|
|
This is a bit faster than computing using a search and then calculating with ``:calc:psi``. |
652
|
|
|
Values range from 0 (no overlap) to 1 (identical). |
653
|
|
|
The type will be "phi" -- in contrast to using :calc:phi. |
654
|
|
|
See ``:calc:phi`` for more info. |
655
|
|
|
This is most useful for comparing a phenotypic phi against pure structural similarity. |
656
|
|
|
""" |
657
|
|
|
set_up(log, quiet, verbose) |
658
|
|
|
name = f"ecfp{radius}-n{n_bits}" |
659
|
|
|
default = path.parent / (name + DEF_SUFFIX) |
660
|
|
|
to = MiscUtils.adjust_filename(to, default, replace) |
661
|
|
|
df = InputFrame.read_file(path) |
|
|
|
|
662
|
|
|
kind = "psi" if psi else "phi" |
663
|
|
|
short = MatrixPrep.ecfp_matrix(df, radius, n_bits) |
664
|
|
|
long_form = MatrixPrep(kind, False, False, False).create({name: short}) |
665
|
|
|
long_form.write_file(to) |
666
|
|
|
|
667
|
|
|
@staticmethod |
668
|
|
|
def calc_tau( |
|
|
|
|
669
|
|
|
phi: Path = Ca.input_matrix, |
|
|
|
|
670
|
|
|
psi: Path = Ca.input_matrix, |
|
|
|
|
671
|
|
|
algorithm: str = Opt.val( |
|
|
|
|
672
|
|
|
r""" |
673
|
|
|
The algorithm for calculating concordance. |
674
|
|
|
|
675
|
|
|
Currently, only "tau" is supported. |
676
|
|
|
This calculation is a modified Kendall’s τ-a, where disconcordant ignores ties. |
677
|
|
|
See the docs for more info. |
678
|
|
|
""", |
679
|
|
|
default="tau", |
680
|
|
|
), |
681
|
|
|
seed: int = Ca.seed, |
|
|
|
|
682
|
|
|
samples: int = Ca.boot, |
|
|
|
|
683
|
|
|
to: Optional[Path] = Opt.out_file( |
|
|
|
|
684
|
|
|
rf""" |
685
|
|
|
The path to a table for output. |
686
|
|
|
|
687
|
|
|
{Ca.output_formats} |
688
|
|
|
|
689
|
|
|
[default: <input-path.parent>/<algorithm>-concordance.{DEF_SUFFIX}] |
690
|
|
|
""", |
691
|
|
|
), |
692
|
|
|
replace: bool = Ca.replace, |
|
|
|
|
693
|
|
|
log: Optional[Path] = CommonArgs.log_path, |
|
|
|
|
694
|
|
|
quiet: bool = CommonArgs.quiet, |
|
|
|
|
695
|
|
|
verbose: bool = CommonArgs.verbose, |
|
|
|
|
696
|
|
|
) -> None: |
697
|
|
|
r""" |
698
|
|
|
Calculate correlation between matrices. |
699
|
|
|
|
700
|
|
|
Values are calculated over bootstrap, outputting a table. |
701
|
|
|
|
702
|
|
|
Phi is typically a phenotypic matrix, and psi a matrix from Mandos. |
703
|
|
|
This command is designed to calculate the similarity between compound annotations |
704
|
|
|
(from Mandos) and some user-input compound–compound similarity matrix. |
705
|
|
|
(For example, vectors from a high-content cell screen. |
706
|
|
|
See ``:calc:correlation`` or ``:calc:enrichment`` if you have a single variable, |
707
|
|
|
such as a hit or lead-like score. |
708
|
|
|
""" |
709
|
|
|
set_up(log, quiet, verbose) |
710
|
|
|
default = phi.parent / f"{psi.stem}-{algorithm}{DEF_SUFFIX}" |
711
|
|
|
to = MiscUtils.adjust_filename(to, default, replace) |
712
|
|
|
phi = SimilarityDfLongForm.read_file(phi) |
713
|
|
|
psi = SimilarityDfLongForm.read_file(psi) |
714
|
|
|
calculator = ConcordanceCalculation.create(algorithm, phi, psi, samples, seed) |
715
|
|
|
concordance = calculator.calc_all(phi, psi) |
716
|
|
|
concordance.write_file(to) |
717
|
|
|
|
718
|
|
|
@staticmethod |
719
|
|
|
def calc_project( |
|
|
|
|
720
|
|
|
psi_matrix: Path = Ca.input_matrix, |
|
|
|
|
721
|
|
|
algorithm: str = Opt.val( |
|
|
|
|
722
|
|
|
r""" |
723
|
|
|
Projection algorithm. |
724
|
|
|
|
725
|
|
|
Currently only "umap" is supported. |
726
|
|
|
""", |
727
|
|
|
default="umap", |
728
|
|
|
), |
729
|
|
|
seed: str = Opt.val( |
|
|
|
|
730
|
|
|
r""" |
731
|
|
|
Random seed (integer or 'none'). |
732
|
|
|
|
733
|
|
|
Setting to 'none' may increase performance. |
734
|
|
|
""", |
735
|
|
|
default=0, |
736
|
|
|
), |
737
|
|
|
params: str = Opt.val( |
|
|
|
|
738
|
|
|
rf""" |
739
|
|
|
Parameters fed to the algorithm. |
740
|
|
|
|
741
|
|
|
This is a comma-separated list of key=value pairs. |
742
|
|
|
For example: ``n_neighbors=4,n_components=12,min_dist=0.8`` |
743
|
|
|
Supports all UMAP parameters except random_state and metric: |
744
|
|
|
|
745
|
|
|
{Ca.definition_list(_umap_params) if UMAP else "<list is unavailable>"} |
746
|
|
|
""", |
747
|
|
|
default="", |
748
|
|
|
), |
749
|
|
|
to: Optional[Path] = Ca.project_to, |
|
|
|
|
750
|
|
|
replace: bool = Ca.replace, |
|
|
|
|
751
|
|
|
log: Optional[Path] = CommonArgs.log_path, |
|
|
|
|
752
|
|
|
quiet: bool = CommonArgs.quiet, |
|
|
|
|
753
|
|
|
verbose: bool = CommonArgs.verbose, |
|
|
|
|
754
|
|
|
) -> None: |
755
|
|
|
r""" |
756
|
|
|
Calculate compound UMAP from psi matrices. |
757
|
|
|
|
758
|
|
|
The input should probably be calculated from ``:calc:matrix``. |
759
|
|
|
Saves a table of the UMAP coordinates. |
760
|
|
|
""" |
761
|
|
|
if algorithm == "umap" and UMAP is None: |
762
|
|
|
raise ImportError(f"UMAP is not available") |
|
|
|
|
763
|
|
|
|
764
|
|
|
@staticmethod |
765
|
|
|
def format_phi( |
|
|
|
|
766
|
|
|
matrices: List[Path] = Ca.input_matrix_short_form, |
|
|
|
|
767
|
|
|
kind: str = Ca.var_type, |
|
|
|
|
768
|
|
|
to: Path = Ca.output_matrix, |
|
|
|
|
769
|
|
|
replace: bool = Ca.replace, |
|
|
|
|
770
|
|
|
normalize: bool = Opt.flag( |
|
|
|
|
771
|
|
|
r"""Rescale values to between 0 and 1 by (v-min) / (max-min). (Performed after negation.)""" |
|
|
|
|
772
|
|
|
), |
773
|
|
|
log10: bool = Opt.val(r"""Rescales values by log10. (Performed after normalization.)"""), |
|
|
|
|
774
|
|
|
invert: bool = Opt.val(r"""Multiplies the values by -1. (Performed first.)"""), |
|
|
|
|
775
|
|
|
log: Optional[Path] = CommonArgs.log_path, |
|
|
|
|
776
|
|
|
quiet: bool = CommonArgs.quiet, |
|
|
|
|
777
|
|
|
verbose: bool = CommonArgs.verbose, |
|
|
|
|
778
|
|
|
): |
779
|
|
|
r""" |
780
|
|
|
Convert phi matrices to one long-form matrix. |
781
|
|
|
|
782
|
|
|
The keys will be derived from the filenames. |
783
|
|
|
""" |
784
|
|
|
set_up(log, quiet, verbose) |
785
|
|
|
default = "." |
786
|
|
|
if to is None: |
787
|
|
|
try: |
788
|
|
|
default = next(iter({mx.parent for mx in matrices})) |
789
|
|
|
except StopIteration: |
790
|
|
|
logger.warning(f"Outputting to {default}") |
791
|
|
|
to = MiscUtils.adjust_filename(to, default, replace) |
792
|
|
|
long_form = MatrixPrep(kind, normalize, log10, invert).from_files(matrices) |
793
|
|
|
long_form.write_file(to) |
794
|
|
|
|
795
|
|
|
@staticmethod |
796
|
|
|
def plot_project( |
|
|
|
|
797
|
|
|
umap_df: Path = Ca.project_input, |
|
|
|
|
798
|
|
|
style: Optional[Path] = Ca.style_for_compounds, |
|
|
|
|
799
|
|
|
color_col: Optional[str] = Ca.color_col, |
|
|
|
|
800
|
|
|
marker_col: Optional[str] = Ca.marker_col, |
|
|
|
|
801
|
|
|
to: Optional[Path] = Ca.plot_to, |
|
|
|
|
802
|
|
|
) -> None: |
803
|
|
|
r""" |
804
|
|
|
Plot UMAP, etc. of compounds from psi matrices. |
805
|
|
|
|
806
|
|
|
Will plot one variable (psi) per column. |
807
|
|
|
""" |
808
|
|
|
|
809
|
|
|
@staticmethod |
810
|
|
|
def plot_score( |
|
|
|
|
811
|
|
|
path: Path = Ca.input_correlation, |
|
|
|
|
812
|
|
|
kind: str = Ca.plot_kind, |
|
|
|
|
813
|
|
|
style: Optional[Path] = Ca.style_for_pairs, |
|
|
|
|
814
|
|
|
color_col: Optional[str] = Ca.color_col, |
|
|
|
|
815
|
|
|
marker_col: Optional[str] = Ca.marker_col, |
|
|
|
|
816
|
|
|
ci: float = Ca.ci, |
|
|
|
|
817
|
|
|
to: Optional[Path] = Ca.plot_to, |
|
|
|
|
818
|
|
|
) -> None: |
819
|
|
|
r""" |
820
|
|
|
Plot correlation to scores. |
821
|
|
|
|
822
|
|
|
Visualizes the correlation between predicate/object pairs and user-supplied scores. |
823
|
|
|
Will output one figure (file) per scoring function. |
824
|
|
|
Will plot (psi, score-fn) pairs over a grid, |
825
|
|
|
one row per scoring function and column per psi. |
826
|
|
|
""" |
827
|
|
|
|
828
|
|
|
@staticmethod |
829
|
|
|
def plot_phi_psi( |
|
|
|
|
830
|
|
|
path: Path = Ca.input_matrix, |
|
|
|
|
831
|
|
|
join: Optional[bool] = Opt.flag( |
|
|
|
|
832
|
|
|
r""" |
833
|
|
|
Pool all psi variables into a single column with multiple plots. |
834
|
|
|
""" |
835
|
|
|
), |
836
|
|
|
kind: str = Opt.val( |
|
|
|
|
837
|
|
|
r""" |
838
|
|
|
Either 'points', 'lines', or 'points+lines'. |
839
|
|
|
|
840
|
|
|
- points: Scatter plots of (phi, psi) values. |
841
|
|
|
|
842
|
|
|
- lines: Plot a linear interpolation. |
843
|
|
|
|
844
|
|
|
- ci: Plot a linear interpolation with a confidence band. |
845
|
|
|
|
846
|
|
|
- points+lines: Both 'points' and 'lines'. |
847
|
|
|
""", |
848
|
|
|
"--type", |
849
|
|
|
), |
850
|
|
|
ci: float = Ca.ci, |
|
|
|
|
851
|
|
|
sort_by: str = Opt.val( |
|
|
|
|
852
|
|
|
r""" |
853
|
|
|
Which axis to sort by: 'phi'/'x' or 'psi'/'y'. |
854
|
|
|
|
855
|
|
|
Sorting by psi values (y-axis) makes it easier to compare psi variables, |
856
|
|
|
while sorting by phi values (x-axis) makes it easier to compare phi variables. |
857
|
|
|
""", |
858
|
|
|
default="psi", |
859
|
|
|
), |
860
|
|
|
style: Optional[Path] = Ca.style_for_psi, |
|
|
|
|
861
|
|
|
color_col: Optional[str] = Ca.color_col, |
|
|
|
|
862
|
|
|
marker_col: Optional[str] = Ca.marker_col, |
|
|
|
|
863
|
|
|
to: Optional[Path] = Ca.plot_to, |
|
|
|
|
864
|
|
|
) -> None: |
865
|
|
|
r""" |
866
|
|
|
Plot line plots of phi against psi. |
867
|
|
|
|
868
|
|
|
Plots scatter plots of (phi, psi) values, sorted by phi values. |
869
|
|
|
All plots are log/log (all similarity values should be scaled from 0 to 1). |
870
|
|
|
|
871
|
|
|
For each unique phi matrix and psi matrix, flattens the matrices and plots |
872
|
|
|
the flattened (n choose 2 - n) pairs of each jointly, phi mapped to the y-axis |
873
|
|
|
and psi mapped to the x-axis. |
874
|
|
|
|
875
|
|
|
Without --split: |
876
|
|
|
|
877
|
|
|
Will show values for all psi variables together. |
878
|
|
|
If ``--color`` is not set, will choose a palette. |
879
|
|
|
Works best with ``--type lines``. |
880
|
|
|
|
881
|
|
|
With --split: |
882
|
|
|
|
883
|
|
|
Will plot each (phi, psi) pair over a grid, one plot per cell: |
884
|
|
|
One row per phi and one column per psi. |
885
|
|
|
""" |
886
|
|
|
|
887
|
|
|
@staticmethod |
888
|
|
|
def plot_tau( |
|
|
|
|
889
|
|
|
path: Path = Ca.input_matrix, |
|
|
|
|
890
|
|
|
split: bool = Opt.flag( |
|
|
|
|
891
|
|
|
r""" |
892
|
|
|
Split each violin into phi #1 on the left and phi #2 on the right. |
893
|
|
|
|
894
|
|
|
Useful to compare two phi variables. Requires exactly 2. |
895
|
|
|
""" |
896
|
|
|
), |
897
|
|
|
style: Optional[Path] = Ca.style_for_psi, |
|
|
|
|
898
|
|
|
color_col: Optional[str] = Ca.color_col, |
|
|
|
|
899
|
|
|
marker_col: Optional[str] = Ca.marker_col, |
|
|
|
|
900
|
|
|
to: Optional[Path] = Ca.plot_to, |
|
|
|
|
901
|
|
|
) -> None: |
902
|
|
|
r""" |
903
|
|
|
Plot violin plots from tau values. |
904
|
|
|
|
905
|
|
|
The input data should be generated by ``:calc:phi-vs-psi.tau``. |
906
|
|
|
|
907
|
|
|
Will plot each (phi, psi) pair over a grid, one row per phi and one column per psi |
908
|
|
|
(unless ``--split`` is set). |
909
|
|
|
""" |
910
|
|
|
|
911
|
|
|
|
912
|
|
|
__all__ = ["MiscCommands"] |
913
|
|
|
|