Passed
Push — main ( 7b3fbc...cf9f8c )
by Douglas
01:44
created

mandos.commands.MiscCommands.deposit()   A

Complexity

Conditions 1

Size

Total Lines 12
Code Lines 9

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
cc 1
eloc 9
nop 6
dl 0
loc 12
rs 9.95
c 0
b 0
f 0
1
"""
2
Command-line interface for mandos.
3
"""
4
5
from __future__ import annotations
6
7
import re
8
from pathlib import Path
9
from typing import Optional
10
11
import typer
0 ignored issues
show
introduced by
Unable to import 'typer'
Loading history...
12
13
from mandos.analysis import SimilarityDf
14
from mandos.analysis.concordance import TauConcordanceCalculator
15
from mandos.analysis.distances import JPrimeMatrixCalculator
16
from mandos.analysis.filtration import Filtration
17
from mandos.analysis.reification import ReifiedExporter
18
from mandos.model import MiscUtils, START_NTP_TIMESTAMP
19
from mandos.model.hits import HitFrame
20
from mandos.model.settings import MANDOS_SETTINGS
21
from mandos.model.taxonomy_caches import TaxonomyFactories
22
from mandos.entries.multi_searches import MultiSearch
23
from mandos.entries.searcher import SearcherUtils
24
from mandos.entries.common_args import Arg, Opt, CommonArgs
25
26
27
class MiscCommands:
0 ignored issues
show
introduced by
Missing class docstring
Loading history...
28
    @staticmethod
29
    def search(
30
        path: Path = CommonArgs.compounds,
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
31
        config: Path = Arg.in_file(
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
32
            r"""
33
            A TOML config file. See docs.
34
            """
35
        ),
36
    ) -> None:
37
        """
38
        Run multiple searches.
39
        """
40
        MultiSearch(path, config).search()
41
42
    @staticmethod
43
    def serve(
0 ignored issues
show
Coding Style Naming introduced by
Argument name "db" doesn't conform to snake_case naming style ('([^\\W\\dA-Z][^\\WA-Z]2,|_[^\\WA-Z]*|__[^\\WA-Z\\d_][^\\WA-Z]+__)$' pattern)

This check looks for invalid names for a range of different identifiers.

You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements.

If your project includes a Pylint configuration file, the settings contained in that file take precedence.

To find out more about Pylint, please refer to their site.

Loading history...
44
        port: int = Opt.val("A port to serve on", default=1540),
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
45
        db: str = Opt.val("The name of the MySQL database", default="mandos"),
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
46
    ) -> None:
47
        r"""
48
        Start the REST server.
49
50
        The connection information is stored in your global settings file.
51
        """
52
53
    @staticmethod
54
    def deposit(
0 ignored issues
show
best-practice introduced by
Too many arguments (6/5)
Loading history...
Coding Style Naming introduced by
Argument name "db" doesn't conform to snake_case naming style ('([^\\W\\dA-Z][^\\WA-Z]2,|_[^\\WA-Z]*|__[^\\WA-Z\\d_][^\\WA-Z]+__)$' pattern)

This check looks for invalid names for a range of different identifiers.

You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements.

If your project includes a Pylint configuration file, the settings contained in that file take precedence.

To find out more about Pylint, please refer to their site.

Loading history...
55
        path: Path = CommonArgs.file_input,
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
56
        db: str = Opt.val("The name of the MySQL database", default="mandos"),
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
57
        host: str = Opt.val(
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
58
            "Database hostname (ignored if ``--socket`` is passed", default="127.0.0.1"
59
        ),
60
        socket: Optional[str] = Opt.val("Path to a Unix socket (if set, ``--host`` is ignored)"),
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
61
        user: Optional[str] = Opt.val("Database username (empty if not set)"),
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
62
        password: Optional[str] = Opt.val("Database password (empty if not set)"),
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
63
    ) -> None:
64
        r"""
65
        Export to a relational database.
66
67
        Saves data from Mandos search commands to a database for serving via REST.
68
69
        See also: ``:serve``.
70
        """
71
72
    @staticmethod
73
    def find(
0 ignored issues
show
best-practice introduced by
Too many arguments (7/5)
Loading history...
Coding Style Naming introduced by
Argument name "to" doesn't conform to snake_case naming style ('([^\\W\\dA-Z][^\\WA-Z]2,|_[^\\WA-Z]*|__[^\\WA-Z\\d_][^\\WA-Z]+__)$' pattern)

This check looks for invalid names for a range of different identifiers.

You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements.

If your project includes a Pylint configuration file, the settings contained in that file take precedence.

To find out more about Pylint, please refer to their site.

Loading history...
74
        path: Path = CommonArgs.compounds,
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
75
        to: Path = Opt.out_path(
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
76
            rf"""
77
            A table of compounds and their matching database IDs will be written here.
78
79
            {CommonArgs.output_formats}
80
81
            [default: <path>-ids-<start-time>.{MANDOS_SETTINGS.default_table_suffix}]
82
            """
83
        ),
84
        replace: bool = CommonArgs.replace,
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
85
        pubchem: bool = typer.Option(True, help="Download data from PubChem"),
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
86
        chembl: bool = typer.Option(True, help="Download data from ChEMBL"),
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
87
        hmdb: bool = typer.Option(True, help="Download data from HMDB"),
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
88
        complain: bool = Opt.flag("Log each time a compound is not found"),
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
89
    ) -> None:
90
        r"""
91
        Fetches and caches compound data.
92
93
        Useful to check what you can see before running a search.
94
        """
95
        default = str(path) + "-ids" + START_NTP_TIMESTAMP + MANDOS_SETTINGS.default_table_suffix
96
        to = MiscUtils.adjust_filename(to, default, replace)
97
        inchikeys = SearcherUtils.read(path)
98
        df = SearcherUtils.dl(
0 ignored issues
show
Coding Style Naming introduced by
Variable name "df" doesn't conform to snake_case naming style ('([^\\W\\dA-Z][^\\WA-Z]2,|_[^\\WA-Z]*|__[^\\WA-Z\\d_][^\\WA-Z]+__)$' pattern)

This check looks for invalid names for a range of different identifiers.

You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements.

If your project includes a Pylint configuration file, the settings contained in that file take precedence.

To find out more about Pylint, please refer to their site.

Loading history...
99
            inchikeys, pubchem=pubchem, chembl=chembl, hmdb=hmdb, complain=complain
100
        )
101
        df.write_file(to)
102
        typer.echo(f"Wrote to {to}")
103
104
    @staticmethod
105
    def build_taxonomy(
0 ignored issues
show
Coding Style Naming introduced by
Argument name "to" doesn't conform to snake_case naming style ('([^\\W\\dA-Z][^\\WA-Z]2,|_[^\\WA-Z]*|__[^\\WA-Z\\d_][^\\WA-Z]+__)$' pattern)

This check looks for invalid names for a range of different identifiers.

You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements.

If your project includes a Pylint configuration file, the settings contained in that file take precedence.

To find out more about Pylint, please refer to their site.

Loading history...
106
        taxa: str = CommonArgs.taxa,
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
107
        forbid: str = Opt.val(
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
108
            r"""Exclude descendents of these taxa IDs or names (comma-separated).""", default=""
109
        ),
110
        to: Path = typer.Option(
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
111
            None,
112
            help=rf"""
113
            Where to export a table of the taxonomy.
114
115
            {CommonArgs.output_formats}
116
117
            [default: ./<taxa>-<datetime>.{MANDOS_SETTINGS.default_table_suffix}]
118
            """,
119
        ),
120
        replace: bool = CommonArgs.replace,
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
121
    ):
122
        """
123
        Exports a taxonomic tree to a table.
124
125
        Writes a taxonomy of given taxa and their descendants to a table.
126
        """
127
        concat = taxa + "-" + forbid
128
        taxa = CommonArgs.parse_taxa(taxa)
129
        forbid = CommonArgs.parse_taxa(forbid)
130
        default = concat + "-" + START_NTP_TIMESTAMP + MANDOS_SETTINGS.default_table_suffix
131
        to = MiscUtils.adjust_filename(to, default, replace)
132
        my_tax = TaxonomyFactories.get_smart_taxonomy(taxa, forbid)
133
        my_tax = my_tax.to_df()
134
        to.parent.mkdir(exist_ok=True, parents=True)
135
        my_tax.write_file(to)
136
137
    @staticmethod
138
    def dl_tax(
139
        taxon: int = Arg.x("The **ID** of the UniProt taxon"),
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
140
    ) -> None:
141
        """
142
        Preps a new taxonomy file for use in mandos.
143
        Just returns if a corresponding file already exists in the resources dir or mandos cache (``~/.mandos``).
0 ignored issues
show
Coding Style introduced by
This line is too long as per the coding-style (113/100).

This check looks for lines that are too long. You can specify the maximum line length.

Loading history...
144
        Otherwise, downloads a tab-separated file from UniProt.
145
        (To find manually, follow the ``All lower taxonomy nodes`` link and click ``Download``.)
146
        Then applies fixes and reduces the file size, creating a new file alongside.
147
        Puts both the raw data and fixed data in the cache under ``~/.mandos/taxonomy/``.
148
        """
149
        TaxonomyFactories.from_uniprot(MANDOS_SETTINGS.taxonomy_cache_path).load(taxon)
150
151
    @staticmethod
152
    def concat(
0 ignored issues
show
Coding Style Naming introduced by
Argument name "to" doesn't conform to snake_case naming style ('([^\\W\\dA-Z][^\\WA-Z]2,|_[^\\WA-Z]*|__[^\\WA-Z\\d_][^\\WA-Z]+__)$' pattern)

This check looks for invalid names for a range of different identifiers.

You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements.

If your project includes a Pylint configuration file, the settings contained in that file take precedence.

To find out more about Pylint, please refer to their site.

Loading history...
153
        path: Path = CommonArgs.dir_input,
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
154
        exclude: Optional[str] = CommonArgs.exclude,
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
155
        to: Optional[Path] = CommonArgs.to_single,
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
156
        replace: bool = CommonArgs.replace,
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
157
    ) -> None:
158
        r"""
159
        Concatenates Mandos annotation files into one.
160
161
        Note that ``:search`` automatically performs this;
162
        this is needed only if you want to combine results from multiple independent searches.
163
        """
164
        default = path / ("concat" + MANDOS_SETTINGS.default_table_suffix)
165
        to = MiscUtils.adjust_filename(to, default, replace)
166
        exclude = re.compile(exclude)
167
        for found in path.iterdir():
168
            if exclude.fullmatch(found.name) is None:
169
                pass
170
171
    @staticmethod
172
    def filter_taxa(
0 ignored issues
show
Coding Style Naming introduced by
Argument name "to" doesn't conform to snake_case naming style ('([^\\W\\dA-Z][^\\WA-Z]2,|_[^\\WA-Z]*|__[^\\WA-Z\\d_][^\\WA-Z]+__)$' pattern)

This check looks for invalid names for a range of different identifiers.

You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements.

If your project includes a Pylint configuration file, the settings contained in that file take precedence.

To find out more about Pylint, please refer to their site.

Loading history...
173
        path: Path = CommonArgs.file_input,
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
174
        to: Path = Opt.out_path(
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
175
            f"""
176
            An output path (file or directory).
177
178
            {CommonArgs.output_formats}
179
180
            [default: <path>/<filters>.feather]
181
            """
182
        ),
183
        allow: str = CommonArgs.taxa,
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
184
        forbid: str = CommonArgs.taxa,
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
185
        replace: bool = CommonArgs.replace,
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
186
    ):
187
        """
188
        Filter by taxa.
189
190
        You can include any number of taxa to allow and any number to forbid.
191
        All descendents of the specified taxa are used.
192
        Taxa will be excluded if they fall under both.
193
194
        Note that the <path> argument *could* not be from Mandos.
195
        All that is required is a column called ``taxon``, ``taxon_id``, or ``taxon_name``.
196
197
        See also: :filter, which is more general.
198
        """
199
        concat = allow + "-" + forbid
200
        allow = CommonArgs.parse_taxa(allow)
201
        forbid = CommonArgs.parse_taxa(forbid)
202
        if to is None:
203
            to = path.parent / (concat + MANDOS_SETTINGS.default_table_suffix)
204
        default = str(path) + "-filter-taxa-" + concat + MANDOS_SETTINGS.default_table_suffix
205
        to = MiscUtils.adjust_filename(to, default, replace)
206
        df = HitFrame.read_file(path)
0 ignored issues
show
Coding Style Naming introduced by
Variable name "df" doesn't conform to snake_case naming style ('([^\\W\\dA-Z][^\\WA-Z]2,|_[^\\WA-Z]*|__[^\\WA-Z\\d_][^\\WA-Z]+__)$' pattern)

This check looks for invalid names for a range of different identifiers.

You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements.

If your project includes a Pylint configuration file, the settings contained in that file take precedence.

To find out more about Pylint, please refer to their site.

Loading history...
207
        my_tax = TaxonomyFactories.get_smart_taxonomy(allow, forbid)
208
        cols = [c for c in ["taxon", "taxon_id", "taxon_name"] if c in df.columns]
209
210
        def permit(row) -> bool:
211
            return any((my_tax.get_by_id_or_name(getattr(row, c)) is not None for c in cols))
212
213
        df = df[df.apply(permit)]
0 ignored issues
show
Coding Style Naming introduced by
Variable name "df" doesn't conform to snake_case naming style ('([^\\W\\dA-Z][^\\WA-Z]2,|_[^\\WA-Z]*|__[^\\WA-Z\\d_][^\\WA-Z]+__)$' pattern)

This check looks for invalid names for a range of different identifiers.

You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements.

If your project includes a Pylint configuration file, the settings contained in that file take precedence.

To find out more about Pylint, please refer to their site.

Loading history...
214
        df.write_file(to)
215
216
    @staticmethod
217
    def filter(
0 ignored issues
show
Coding Style Naming introduced by
Argument name "to" doesn't conform to snake_case naming style ('([^\\W\\dA-Z][^\\WA-Z]2,|_[^\\WA-Z]*|__[^\\WA-Z\\d_][^\\WA-Z]+__)$' pattern)

This check looks for invalid names for a range of different identifiers.

You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements.

If your project includes a Pylint configuration file, the settings contained in that file take precedence.

To find out more about Pylint, please refer to their site.

Loading history...
Coding Style Naming introduced by
Argument name "by" doesn't conform to snake_case naming style ('([^\\W\\dA-Z][^\\WA-Z]2,|_[^\\WA-Z]*|__[^\\WA-Z\\d_][^\\WA-Z]+__)$' pattern)

This check looks for invalid names for a range of different identifiers.

You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements.

If your project includes a Pylint configuration file, the settings contained in that file take precedence.

To find out more about Pylint, please refer to their site.

Loading history...
218
        path: Path = CommonArgs.to_single,
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
219
        by: Optional[Path] = Arg.in_file(
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
220
            """
221
            The path to a TOML (.toml) file containing filters.
222
223
            The file contains a list of ``mandos.filter`` keys,
224
            each containing an expression on a single column.
225
            This is only meant for simple, quick-and-dirty filtration.
226
227
            See the docs for more info.
228
            """
229
        ),
230
        to: Optional[Path] = CommonArgs.to_single,
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
231
        replace: bool = CommonArgs.replace,
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
232
    ) -> None:
233
        """
234
        Filters by simple expressions.
235
        """
236
        if to is None:
237
            to = path.parent / (by.stem + MANDOS_SETTINGS.default_table_suffix)
238
        default = str(path) + "-filter-" + by.stem + MANDOS_SETTINGS.default_table_suffix
239
        to = MiscUtils.adjust_filename(to, default, replace)
240
        df = HitFrame.read_file(path)
0 ignored issues
show
Coding Style Naming introduced by
Variable name "df" doesn't conform to snake_case naming style ('([^\\W\\dA-Z][^\\WA-Z]2,|_[^\\WA-Z]*|__[^\\WA-Z\\d_][^\\WA-Z]+__)$' pattern)

This check looks for invalid names for a range of different identifiers.

You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements.

If your project includes a Pylint configuration file, the settings contained in that file take precedence.

To find out more about Pylint, please refer to their site.

Loading history...
241
        Filtration.from_file(by).apply(df).write_file(to)
242
243
    @staticmethod
244
    def state(
0 ignored issues
show
Coding Style Naming introduced by
Argument name "to" doesn't conform to snake_case naming style ('([^\\W\\dA-Z][^\\WA-Z]2,|_[^\\WA-Z]*|__[^\\WA-Z\\d_][^\\WA-Z]+__)$' pattern)

This check looks for invalid names for a range of different identifiers.

You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements.

If your project includes a Pylint configuration file, the settings contained in that file take precedence.

To find out more about Pylint, please refer to their site.

Loading history...
245
        path: Path = CommonArgs.file_input,
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
246
        to: Optional[Path] = Opt.out_path(
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
247
            """
248
            The path to the output file.
249
250
            Valid formats and filename suffixes are .nt and .txt with an optional .gz, .zip, or .xz.
251
            If only a filename suffix is provided, will use that suffix with the default directory.
252
            If no suffix is provided, will interpret the path as a directory but use the default filename.
0 ignored issues
show
Coding Style introduced by
This line is too long as per the coding-style (106/100).

This check looks for lines that are too long. You can specify the maximum line length.

Loading history...
253
            Will fail if the file exists and ``--replace`` is not set.
254
255
            [default: <path>-statements.nt]
256
        """
257
        ),
258
        replace: bool = CommonArgs.replace,
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
259
    ) -> None:
260
        """
261
        Outputs simple N-triples statements.
262
263
        Each statement is of this form, where the InChI Key refers to the input data:
264
265
        `"InChI Key" "predicate" "object" .`
266
        """
267
        default = str(path) + "-statements.nt"
268
        to = MiscUtils.adjust_filename(to, default, replace)
269
        hits = HitFrame.read_file(path).to_hits()
270
        with to.open() as f:
0 ignored issues
show
Coding Style Naming introduced by
Variable name "f" doesn't conform to snake_case naming style ('([^\\W\\dA-Z][^\\WA-Z]2,|_[^\\WA-Z]*|__[^\\WA-Z\\d_][^\\WA-Z]+__)$' pattern)

This check looks for invalid names for a range of different identifiers.

You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements.

If your project includes a Pylint configuration file, the settings contained in that file take precedence.

To find out more about Pylint, please refer to their site.

Loading history...
271
            for hit in hits:
272
                f.write(hit.to_triple().n_triples)
273
274
    @staticmethod
275
    def reify(
0 ignored issues
show
Coding Style Naming introduced by
Argument name "to" doesn't conform to snake_case naming style ('([^\\W\\dA-Z][^\\WA-Z]2,|_[^\\WA-Z]*|__[^\\WA-Z\\d_][^\\WA-Z]+__)$' pattern)

This check looks for invalid names for a range of different identifiers.

You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements.

If your project includes a Pylint configuration file, the settings contained in that file take precedence.

To find out more about Pylint, please refer to their site.

Loading history...
276
        path: Path = CommonArgs.file_input,
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
277
        to: Optional[Path] = Opt.out_path(
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
278
            r"""
279
            The path to the output file.
280
281
            The filename suffix should be either .nt (N-triples) or .ttl (Turtle),
282
            with an optional .gz, .zip, or .xz.
283
            If only a filename suffix is provided, will use that suffix with the default directory.
284
            If no suffix is provided, will interpret the path as a directory but use the default filename.
0 ignored issues
show
Coding Style introduced by
This line is too long as per the coding-style (106/100).

This check looks for lines that are too long. You can specify the maximum line length.

Loading history...
285
            Will fail if the file exists and ``--replace`` is not set.
286
287
            [default: <path>-reified.nt]
288
        """
289
        ),
290
        replace: bool = CommonArgs.replace,
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
291
    ) -> None:
292
        """
293
        Outputs reified semantic triples.
294
        """
295
        default = str(path) + "-reified.nt"
296
        to = MiscUtils.adjust_filename(to, default, replace)
297
        hits = HitFrame.read_file(path).to_hits()
298
        with to.open() as f:
0 ignored issues
show
Coding Style Naming introduced by
Variable name "f" doesn't conform to snake_case naming style ('([^\\W\\dA-Z][^\\WA-Z]2,|_[^\\WA-Z]*|__[^\\WA-Z\\d_][^\\WA-Z]+__)$' pattern)

This check looks for invalid names for a range of different identifiers.

You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements.

If your project includes a Pylint configuration file, the settings contained in that file take precedence.

To find out more about Pylint, please refer to their site.

Loading history...
299
            for triple in ReifiedExporter().reify(hits):
300
                f.write(triple.n_triples)
301
302
    @staticmethod
303
    def copy(
0 ignored issues
show
Coding Style Naming introduced by
Argument name "to" doesn't conform to snake_case naming style ('([^\\W\\dA-Z][^\\WA-Z]2,|_[^\\WA-Z]*|__[^\\WA-Z\\d_][^\\WA-Z]+__)$' pattern)

This check looks for invalid names for a range of different identifiers.

You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements.

If your project includes a Pylint configuration file, the settings contained in that file take precedence.

To find out more about Pylint, please refer to their site.

Loading history...
304
        path: Path = CommonArgs.file_input,
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
305
        to: Optional[Path] = Opt.out_path(
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
306
            rf"""
307
            The path to the output file.
308
309
            {CommonArgs.output_formats}
310
311
            [default: <path.parent>/export{MANDOS_SETTINGS.default_table_suffix}]
312
        """
313
        ),
314
        replace: bool = CommonArgs.replace,
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
315
    ) -> None:
316
        """
317
        Copies and/or converts annotation files.
318
319
        Example: ``:export:copy --to .snappy`` to highly compress a data set.
320
        """
321
322
        default = str(path.parent / MANDOS_SETTINGS.default_table_suffix)
323
        to = MiscUtils.adjust_filename(to, default, replace)
324
325
    @staticmethod
326
    def score(
0 ignored issues
show
Coding Style Naming introduced by
Argument name "to" doesn't conform to snake_case naming style ('([^\\W\\dA-Z][^\\WA-Z]2,|_[^\\WA-Z]*|__[^\\WA-Z\\d_][^\\WA-Z]+__)$' pattern)

This check looks for invalid names for a range of different identifiers.

You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements.

If your project includes a Pylint configuration file, the settings contained in that file take precedence.

To find out more about Pylint, please refer to their site.

Loading history...
327
        path: Path = CommonArgs.file_input,
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
328
        scores: Path = Arg.in_file(
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
329
            rf"""
330
            Path to a table containing scores.
331
332
            Must contain a column called ``inchikey`` or ``compound_id``
333
            matching the InChI Keys or compound IDs you provided for the search.
334
335
            Any number of scores may be included via columns.
336
            Each column must match the pattern ``^(?:score)|(?:score[-_ +:].*)$``.
337
            These values must be floating-point.
338
339
            For enrichment, you may also include columns signifying "hit vs. not".
340
            These columns must match the pattern ``^is[_- +:]$``.
341
            Values must be boolean (true/false, t/f, yes/no, y/n, 1/0).
342
343
            Example columns:
344
345
                inchikey    compound_id    is_hit    score_alpha
346
347
            {CommonArgs.input_formats}
348
            """
349
        ),
350
        to: Optional[Path] = Opt.out_file(
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
351
            rf"""
352
            Path to write regression info to.
353
354
            {CommonArgs.output_formats}
355
356
            Columns will correspond to the columns you provided.
357
            For example, ``r_score_alpha`` for the regression coefficient
358
            of the score ``alpha``, and ``fold_is_hit`` for the fraction (hits / non-hits) for ``is_hit``.
0 ignored issues
show
Coding Style introduced by
This line is too long as per the coding-style (106/100).

This check looks for lines that are too long. You can specify the maximum line length.

Loading history...
359
360
            [default: <path>-<scores.filename>{MANDOS_SETTINGS.default_table_suffix}]
361
            """
362
        ),
363
        counts: bool = Opt.flag(
0 ignored issues
show
Unused Code introduced by
The argument counts seems to be unused.
Loading history...
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
364
            rf"""Use only the *number* of object/predicate pairs, rather than their weights.""",
0 ignored issues
show
introduced by
Using an f-string that does not have any interpolated variables
Loading history...
365
        ),
366
        replace: bool = CommonArgs.replace,
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
367
    ) -> None:
368
        """
369
        Compares annotations to user-supplied values.
370
371
        Calculates correlation between provided scores and object/predicate pairs,
372
        and/or enrichment of pairs for boolean scores.
373
374
        The values used are *weighted object/predicate pairs**,
375
        unless ``--counts`` is passed.
376
        See the docs for more info.
377
        """
378
        default = str(path) + "-" + scores.name + MANDOS_SETTINGS.default_table_suffix
379
        to = MiscUtils.adjust_filename(to, default, replace)
380
381
    @staticmethod
382
    def matrix(
0 ignored issues
show
Coding Style Naming introduced by
Argument name "to" doesn't conform to snake_case naming style ('([^\\W\\dA-Z][^\\WA-Z]2,|_[^\\WA-Z]*|__[^\\WA-Z\\d_][^\\WA-Z]+__)$' pattern)

This check looks for invalid names for a range of different identifiers.

You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements.

If your project includes a Pylint configuration file, the settings contained in that file take precedence.

To find out more about Pylint, please refer to their site.

Loading history...
383
        path: Path = CommonArgs.file_input,
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
384
        algorithm: str = Opt.val(
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
385
            r"""
386
            The algorithm for calculating similarity between annotation sets.
387
388
            Currently, only "j" (J') is supported. Refer to the docs for the equation.
389
            """
390
        ),
391
        to: Optional[Path] = Opt.out_file(
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
392
            rf"""
393
            The path to a similarity matrix file.
394
395
            {CommonArgs.output_formats}
396
            .txt is assumed to be whitespace-delimited.
397
398
            [default: <input-path.parent>/<algorithm>-similarity.{MANDOS_SETTINGS.default_table_suffix}]
0 ignored issues
show
Coding Style introduced by
This line is too long as per the coding-style (104/100).

This check looks for lines that are too long. You can specify the maximum line length.

Loading history...
399
            """
400
        ),
401
        replace: bool = CommonArgs.replace,
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
402
    ) -> None:
403
        r"""
404
        Calculates a similarity matrix from annotations.
405
406
        The data are output as a dataframe (CSV by default), where rows and columns correspond
407
        to compounds, and the cell i,j is the overlap J' in annotations between compounds i and j.
408
        """
409
        default = path.parent / (algorithm + MANDOS_SETTINGS.default_table_suffix)
410
        to = MiscUtils.adjust_filename(to, default, replace)
411
        hits = HitFrame.read_file(path).to_hits()
412
        matrix = JPrimeMatrixCalculator().calc(hits)
413
        matrix.write_file(to)
414
415
    @staticmethod
416
    def concordance(
0 ignored issues
show
Coding Style Naming introduced by
Argument name "to" doesn't conform to snake_case naming style ('([^\\W\\dA-Z][^\\WA-Z]2,|_[^\\WA-Z]*|__[^\\WA-Z\\d_][^\\WA-Z]+__)$' pattern)

This check looks for invalid names for a range of different identifiers.

You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements.

If your project includes a Pylint configuration file, the settings contained in that file take precedence.

To find out more about Pylint, please refer to their site.

Loading history...
best-practice introduced by
Too many arguments (7/5)
Loading history...
417
        phi: Path = CommonArgs.input_matrix,
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
418
        psi: Path = CommonArgs.input_matrix,
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
419
        algorithm: str = Opt.val(
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
420
            r"""
421
            The algorithm for calculating concordance.
422
423
            Currently, only "tau" is supported.
424
            This calculation is a modified Kendall’s  τ-a, where disconcordant ignores ties.
425
            See the docs for more info.
426
            """
427
        ),
428
        seed: int = CommonArgs.seed,
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
429
        samples: int = CommonArgs.n_samples,
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
430
        to: Optional[Path] = Opt.out_file(
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
431
            rf"""
432
            The path to a dataframe file for output.
433
434
            {CommonArgs.output_formats}
435
436
            [default: <input-path.parent>/<algorithm>-concordance.{MANDOS_SETTINGS.default_table_suffix}]
0 ignored issues
show
Coding Style introduced by
This line is too long as per the coding-style (105/100).

This check looks for lines that are too long. You can specify the maximum line length.

Loading history...
437
            """
438
        ),
439
        replace: bool = CommonArgs.replace,
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
440
    ) -> None:
441
        r"""
442
        Calculate correlation between matrices.
443
444
        Values are calculated over bootstrap, outputting a dataframe (CSV by default).
445
446
        Phi is typically a phenotypic matrix, and psi a matrix from Mandos.
447
        Alternatively, these might be two matrices from Mandos.
448
449
        This command is designed to calculate the similarity between compound annotations
450
        (from Mandos) and some user-input compound–compound similarity matrix.
451
        (For example, vectors from a high-content cell screen.
452
        See ``:calc:score`` if you have a single variable,
453
        such as a hit or lead-like score.
454
        """
455
        if to is None:
456
            to = phi.parent / (psi.stem + "-" + algorithm + MANDOS_SETTINGS.default_table_suffix)
457
        if to.exists() and not replace:
458
            raise FileExistsError(f"File {to} already exists")
459
        phi = SimilarityDf.read_file(phi)
460
        psi = SimilarityDf.read_file(psi)
461
        concordance = TauConcordanceCalculator(samples, seed).calc(phi, psi)
462
        concordance.write_file(to)
463
464
465
__all__ = ["MiscCommands"]
466