Passed
Push — main ( ec3fe3...82dd22 )
by Douglas
02:00
created

mandos.commands.MiscCommands.prep_phi()   A

Complexity

Conditions 3

Size

Total Lines 30
Code Lines 23

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
cc 3
eloc 23
nop 10
dl 0
loc 30
rs 9.328
c 0
b 0
f 0

How to fix   Many Parameters   

Many Parameters

Methods with many parameters are not only hard to understand, but their parameters also often become inconsistent when you need more, or different data.

There are several approaches to avoid long parameter lists:

1
"""
2
Command-line interface for mandos.
3
"""
4
5
from __future__ import annotations
6
7
from pathlib import Path
8
from typing import Optional, List
9
10
import typer
0 ignored issues
show
introduced by
Unable to import 'typer'
Loading history...
11
12
from mandos import logger, MANDOS_SETUP
13
from mandos.analysis.io_defns import SimilarityDfLongForm, SimilarityDfShortForm
0 ignored issues
show
Unused Code introduced by
Unused SimilarityDfShortForm imported from mandos.analysis.io_defns
Loading history...
14
from mandos.analysis.concordance import ConcordanceCalculation
15
from mandos.analysis.distances import MatrixCalculation
16
from mandos.analysis.filtration import Filtration
17
from mandos.analysis.enrichment import EnrichmentCalculation, RealAlg, BoolAlg
18
from mandos.analysis.io_defns import ScoreDf
19
from mandos.analysis.prepping import MatrixPrep
20
from mandos.analysis.projection import UmapCalc
0 ignored issues
show
Unused Code introduced by
Unused UmapCalc imported from mandos.analysis.projection
Loading history...
21
from mandos.analysis.reification import Reifier
22
from mandos.entries.common_args import Arg, CommonArgs
23
from mandos.entries.common_args import CommonArgs as Ca
0 ignored issues
show
Unused Code introduced by
The import CommonArgs was already done on line 22. You should be able to
remove this line.
Loading history...
24
from mandos.entries.common_args import Opt
25
from mandos.entries.multi_searches import MultiSearch
26
from mandos.entries.searcher import SearcherUtils, InputFrame, CompoundIdFiller, IdMatchFrame
27
from mandos.model import START_TIMESTAMP, MiscUtils
28
from mandos.model.hits import HitFrame
29
from mandos.model.settings import MANDOS_SETTINGS
30
from mandos.model.taxonomy_caches import TaxonomyFactories
31
from mandos.analysis.projection import UMAP
32
from mandos.model.rdkit_utils import RdkitUtils, Fingerprint
0 ignored issues
show
Unused Code introduced by
Unused RdkitUtils imported from mandos.model.rdkit_utils
Loading history...
Unused Code introduced by
Unused Fingerprint imported from mandos.model.rdkit_utils
Loading history...
33
34
set_up = MANDOS_SETUP
35
DEF_SUFFIX = MANDOS_SETTINGS.default_table_suffix
36
37
if UMAP is None:
38
    _umap_params = {}
39
else:
40
    _umap_params = {
41
        k: v
42
        for k, v in UMAP().get_params(deep=False).items()
43
        if k not in {"random_state", "metric"}
44
    }
45
46
47
class MiscCommands:
0 ignored issues
show
introduced by
Missing class docstring
Loading history...
best-practice introduced by
Too many public methods (23/20)
Loading history...
48
    @staticmethod
49
    def search(
0 ignored issues
show
best-practice introduced by
Too many arguments (6/5)
Loading history...
50
        path: Path = Ca.compounds,
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
51
        config: Path = Arg.in_file(
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
52
            r"""
53
            TOML config file. See docs.
54
            """
55
        ),
56
        log: Optional[Path] = CommonArgs.log_path,
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
57
        quiet: bool = CommonArgs.quiet,
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
58
        verbose: bool = CommonArgs.verbose,
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
59
        out_dir: Path = Ca.out_dir,
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
60
    ) -> None:
61
        """
62
        Run multiple searches.
63
        """
64
        set_up(log, quiet, verbose)
65
        MultiSearch.build(path, out_dir, config).run()
66
67
    @staticmethod
68
    def serve(
0 ignored issues
show
Coding Style Naming introduced by
Argument name "db" doesn't conform to snake_case naming style ('([^\\W\\dA-Z][^\\WA-Z]2,|_[^\\WA-Z]*|__[^\\WA-Z\\d_][^\\WA-Z]+__)$' pattern)

This check looks for invalid names for a range of different identifiers.

You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements.

If your project includes a Pylint configuration file, the settings contained in that file take precedence.

To find out more about Pylint, please refer to their site.

Loading history...
69
        port: int = Opt.val(r"Port to serve on", default=1540),
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
Unused Code introduced by
The argument port seems to be unused.
Loading history...
70
        db: str = Opt.val("Name of the MySQL database", default="mandos"),
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
Unused Code introduced by
The argument db seems to be unused.
Loading history...
71
        log: Optional[Path] = CommonArgs.log_path,
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
72
        quiet: bool = CommonArgs.quiet,
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
73
        verbose: bool = CommonArgs.verbose,
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
74
    ) -> None:
75
        r"""
76
        Start a REST server.
77
78
        The connection information is stored in your global settings file.
79
        """
80
        set_up(log, quiet, verbose)
81
82
    @staticmethod
83
    def deposit(
0 ignored issues
show
Coding Style Naming introduced by
Argument name "db" doesn't conform to snake_case naming style ('([^\\W\\dA-Z][^\\WA-Z]2,|_[^\\WA-Z]*|__[^\\WA-Z\\d_][^\\WA-Z]+__)$' pattern)

This check looks for invalid names for a range of different identifiers.

You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements.

If your project includes a Pylint configuration file, the settings contained in that file take precedence.

To find out more about Pylint, please refer to their site.

Loading history...
best-practice introduced by
Too many arguments (10/5)
Loading history...
84
        path: Path = Ca.file_input,
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
Unused Code introduced by
The argument path seems to be unused.
Loading history...
85
        db: str = Opt.val(r"Name of the MySQL database", default="mandos"),
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
Unused Code introduced by
The argument db seems to be unused.
Loading history...
86
        host: str = Opt.val(
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
Unused Code introduced by
The argument host seems to be unused.
Loading history...
87
            r"Database hostname (ignored if ``--socket`` is passed", default="127.0.0.1"
88
        ),
89
        socket: Optional[str] = Opt.val("Path to a Unix socket (if set, ``--host`` is ignored)"),
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
Unused Code introduced by
The argument socket seems to be unused.
Loading history...
90
        user: Optional[str] = Opt.val("Database username (empty if not set)"),
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
Unused Code introduced by
The argument user seems to be unused.
Loading history...
91
        password: Optional[str] = Opt.val("Database password (empty if not set)"),
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
Unused Code introduced by
The argument password seems to be unused.
Loading history...
92
        as_of: Optional[str] = CommonArgs.as_of,
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
Unused Code introduced by
The argument as_of seems to be unused.
Loading history...
93
        log: Optional[Path] = CommonArgs.log_path,
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
94
        quiet: bool = CommonArgs.quiet,
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
95
        verbose: bool = CommonArgs.verbose,
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
96
    ) -> None:
97
        r"""
98
        Export to a relational database.
99
100
        Saves data from Mandos search commands to a database for serving via REST.
101
102
        See also: ``:serve``.
103
        """
104
        set_up(log, quiet, verbose)
105
106
    @staticmethod
107
    def fill(
0 ignored issues
show
Coding Style Naming introduced by
Argument name "to" doesn't conform to snake_case naming style ('([^\\W\\dA-Z][^\\WA-Z]2,|_[^\\WA-Z]*|__[^\\WA-Z\\d_][^\\WA-Z]+__)$' pattern)

This check looks for invalid names for a range of different identifiers.

You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements.

If your project includes a Pylint configuration file, the settings contained in that file take precedence.

To find out more about Pylint, please refer to their site.

Loading history...
best-practice introduced by
Too many arguments (6/5)
Loading history...
108
        path: Path = Ca.compounds_to_fill,
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
109
        to: Path = Ca.id_table_to,
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
110
        replace: bool = Ca.replace,
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
111
        log: Optional[Path] = CommonArgs.log_path,
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
112
        quiet: bool = CommonArgs.quiet,
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
113
        verbose: bool = CommonArgs.verbose,
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
114
    ) -> None:
115
        r"""
116
        Match IDs; fetch and cache compound data.
117
118
        Useful to check what you can see before running a search.
119
        """
120
        set_up(log, quiet, verbose)
121
        default = str(path) + "-ids" + START_TIMESTAMP + DEF_SUFFIX
122
        to = MiscUtils.adjust_filename(to, default, replace)
123
        df = IdMatchFrame.read_file(path)
0 ignored issues
show
Coding Style Naming introduced by
Variable name "df" doesn't conform to snake_case naming style ('([^\\W\\dA-Z][^\\WA-Z]2,|_[^\\WA-Z]*|__[^\\WA-Z\\d_][^\\WA-Z]+__)$' pattern)

This check looks for invalid names for a range of different identifiers.

You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements.

If your project includes a Pylint configuration file, the settings contained in that file take precedence.

To find out more about Pylint, please refer to their site.

Loading history...
124
        df = CompoundIdFiller.fill(df)
0 ignored issues
show
Coding Style Naming introduced by
Variable name "df" doesn't conform to snake_case naming style ('([^\\W\\dA-Z][^\\WA-Z]2,|_[^\\WA-Z]*|__[^\\WA-Z\\d_][^\\WA-Z]+__)$' pattern)

This check looks for invalid names for a range of different identifiers.

You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements.

If your project includes a Pylint configuration file, the settings contained in that file take precedence.

To find out more about Pylint, please refer to their site.

Loading history...
125
        df.write_file(to)
126
        typer.echo(f"Wrote to {to}")
127
128
    @staticmethod
129
    def cache(
0 ignored issues
show
best-practice introduced by
Too many arguments (7/5)
Loading history...
130
        path: Path = Ca.compounds,
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
131
        no_pubchem: bool = Opt.flag(r"Do not download data from PubChem"),
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
132
        no_chembl: bool = Opt.flag(r"Do not fetch IDs from ChEMBL"),
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
133
        no_hmdb: bool = Opt.flag(r"Do not download data from HMDB"),
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
134
        log: Optional[Path] = CommonArgs.log_path,
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
135
        quiet: bool = CommonArgs.quiet,
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
136
        verbose: bool = CommonArgs.verbose,
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
137
    ) -> None:
138
        r"""
139
        Fetch and cache compound data.
140
141
        Useful to freeze data before running a search.
142
        """
143
        set_up(log, quiet, verbose)
144
        inchikeys = SearcherUtils.read(path)
145
        SearcherUtils.dl(inchikeys, pubchem=not no_pubchem, chembl=not no_chembl, hmdb=not no_hmdb)
146
        typer.echo(f"Done caching.")
0 ignored issues
show
introduced by
Using an f-string that does not have any interpolated variables
Loading history...
147
148
    @staticmethod
149
    def build_taxonomy(
0 ignored issues
show
Coding Style Naming introduced by
Argument name "to" doesn't conform to snake_case naming style ('([^\\W\\dA-Z][^\\WA-Z]2,|_[^\\WA-Z]*|__[^\\WA-Z\\d_][^\\WA-Z]+__)$' pattern)

This check looks for invalid names for a range of different identifiers.

You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements.

If your project includes a Pylint configuration file, the settings contained in that file take precedence.

To find out more about Pylint, please refer to their site.

Loading history...
best-practice introduced by
Too many arguments (8/5)
Loading history...
150
        taxa: str = Ca.taxa,
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
151
        forbid: str = Opt.val(
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
152
            r"""Exclude descendents of these taxa IDs or names (comma-separated).""", default=""
153
        ),
154
        to: Path = typer.Option(
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
155
            None,
156
            help=rf"""
157
            Where to export.
158
159
            {Ca.output_formats}
160
161
            [default: ./<taxa>-<datetime>.{DEF_SUFFIX}]
162
            """,
163
        ),
164
        replace: bool = Ca.replace,
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
165
        in_cache: bool = CommonArgs.in_cache,
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
Unused Code introduced by
The argument in_cache seems to be unused.
Loading history...
166
        log: Optional[Path] = CommonArgs.log_path,
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
167
        quiet: bool = CommonArgs.quiet,
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
168
        verbose: bool = CommonArgs.verbose,
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
169
    ):
170
        """
171
        Export a taxonomic tree to a table.
172
173
        Writes a taxonomy of given taxa and their descendants to a table.
174
        """
175
        set_up(log, quiet, verbose)
176
        concat = taxa + "-" + forbid
177
        taxa = Ca.parse_taxa(taxa)
178
        forbid = Ca.parse_taxa(forbid)
179
        default = concat + "-" + START_TIMESTAMP + DEF_SUFFIX
180
        to = MiscUtils.adjust_filename(to, default, replace)
181
        my_tax = TaxonomyFactories.get_smart_taxonomy(taxa, forbid)
182
        my_tax = my_tax.to_df()
183
        to.parent.mkdir(exist_ok=True, parents=True)
184
        my_tax.write_file(to)
185
186
    @staticmethod
187
    def dl_tax(
188
        taxa: str = Opt.val(
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
189
            r"""
190
            Either "vertebrata", "all", or a comma-separated list of UniProt taxon IDs.
191
192
            "all" is only valid when --replace is passed;
193
            this will regenerate all taxonomy files that are found in the cache.
194
            """,
195
            default="",
196
        ),
197
        replace: bool = Ca.replace,
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
198
        log: Optional[Path] = CommonArgs.log_path,
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
199
        quiet: bool = CommonArgs.quiet,
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
200
        verbose: bool = CommonArgs.verbose,
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
201
    ) -> None:
202
        """
203
        Prep a new taxonomy file for use in mandos.
204
205
        With --replace set, will delete any existing file.
206
        This can be useful to make sure your cached taxonomy is up-to-date before running.
207
208
        Downloads and converts a tab-separated file from UniProt.
209
        (To find manually, follow the ``All lower taxonomy nodes`` link and click ``Download``.)
210
        Then applies fixes and reduces the file size, creating a new file alongside.
211
        Puts both the raw data and fixed data in the cache under ``~/.mandos/taxonomy/``.
212
        """
213
        if taxa == "":
214
            logger.info("No taxa were specified. No data downloaded.")
215
            return
216
        if (
217
            taxa not in ["all", "vertebrata"]
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
218
            and not taxa.replace(",", "").replace(" ", "").isdigit()
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
219
        ):
220
            raise ValueError(f"Use either 'all', 'vertebrata', or a UniProt taxon ID")
0 ignored issues
show
introduced by
Using an f-string that does not have any interpolated variables
Loading history...
221
        if taxa == "all" and not replace:
222
            raise ValueError(f"Use --replace with taxon 'all'")
0 ignored issues
show
introduced by
Using an f-string that does not have any interpolated variables
Loading history...
223
        set_up(log, quiet, verbose)
224
        factory = TaxonomyFactories.from_uniprot()
225
        if taxa == "all" and replace:
226
            listed = TaxonomyFactories.list_cached_files()
227
            for p in listed.values():
0 ignored issues
show
Coding Style Naming introduced by
Variable name "p" doesn't conform to snake_case naming style ('([^\\W\\dA-Z][^\\WA-Z]2,|_[^\\WA-Z]*|__[^\\WA-Z\\d_][^\\WA-Z]+__)$' pattern)

This check looks for invalid names for a range of different identifiers.

You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements.

If your project includes a Pylint configuration file, the settings contained in that file take precedence.

To find out more about Pylint, please refer to their site.

Loading history...
228
                p.unlink()
229
            factory.rebuild_vertebrata()
230
            for t in listed.keys():
0 ignored issues
show
Coding Style Naming introduced by
Variable name "t" doesn't conform to snake_case naming style ('([^\\W\\dA-Z][^\\WA-Z]2,|_[^\\WA-Z]*|__[^\\WA-Z\\d_][^\\WA-Z]+__)$' pattern)

This check looks for invalid names for a range of different identifiers.

You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements.

If your project includes a Pylint configuration file, the settings contained in that file take precedence.

To find out more about Pylint, please refer to their site.

Loading history...
231
                factory.load_dl(t)
232
        elif taxa == "vertebrata" and (replace or not factory.resolve_path(7742).exists()):
233
            factory.rebuild_vertebrata()
234
        elif taxa == "vertebrata":
235
            factory.load_vertebrate(7742)  # should usually do nothing
236
        else:
237
            for taxon in [int(t.strip()) for t in taxa.split(",")]:
238
                factory.delete_exact(taxon)
239
240
    @staticmethod
241
    def concat(
0 ignored issues
show
Coding Style Naming introduced by
Argument name "to" doesn't conform to snake_case naming style ('([^\\W\\dA-Z][^\\WA-Z]2,|_[^\\WA-Z]*|__[^\\WA-Z\\d_][^\\WA-Z]+__)$' pattern)

This check looks for invalid names for a range of different identifiers.

You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements.

If your project includes a Pylint configuration file, the settings contained in that file take precedence.

To find out more about Pylint, please refer to their site.

Loading history...
best-practice introduced by
Too many arguments (6/5)
Loading history...
242
        path: Path = Ca.input_dir,
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
243
        to: Optional[Path] = Ca.to_single,
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
244
        replace: bool = Ca.replace,
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
245
        log: Optional[Path] = CommonArgs.log_path,
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
246
        quiet: bool = CommonArgs.quiet,
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
247
        verbose: bool = CommonArgs.verbose,
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
248
    ) -> None:
249
        r"""
250
        Concatenate Mandos annotation files into one.
251
252
        Note that ``:search`` automatically performs this;
253
        this is needed only if you want to combine results from multiple independent searches.
254
        """
255
        set_up(log, quiet, verbose)
256
        default = path / ("concat" + DEF_SUFFIX)
257
        to = MiscUtils.adjust_filename(to, default, replace)
258
        for found in path.iterdir():
0 ignored issues
show
Unused Code introduced by
The variable found seems to be unused.
Loading history...
259
            pass
260
261
    @staticmethod
262
    def filter(
0 ignored issues
show
Coding Style Naming introduced by
Argument name "to" doesn't conform to snake_case naming style ('([^\\W\\dA-Z][^\\WA-Z]2,|_[^\\WA-Z]*|__[^\\WA-Z\\d_][^\\WA-Z]+__)$' pattern)

This check looks for invalid names for a range of different identifiers.

You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements.

If your project includes a Pylint configuration file, the settings contained in that file take precedence.

To find out more about Pylint, please refer to their site.

Loading history...
Coding Style Naming introduced by
Argument name "by" doesn't conform to snake_case naming style ('([^\\W\\dA-Z][^\\WA-Z]2,|_[^\\WA-Z]*|__[^\\WA-Z\\d_][^\\WA-Z]+__)$' pattern)

This check looks for invalid names for a range of different identifiers.

You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements.

If your project includes a Pylint configuration file, the settings contained in that file take precedence.

To find out more about Pylint, please refer to their site.

Loading history...
best-practice introduced by
Too many arguments (7/5)
Loading history...
263
        path: Path = Ca.to_single,
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
264
        by: Optional[Path] = Arg.in_file(
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
265
            r"""
266
            Path to a TOML (.toml) file containing filters.
267
268
            The file contains a list of ``mandos.filter`` keys,
269
            each containing an expression on a single column.
270
            This is only meant for simple, quick-and-dirty filtration.
271
272
            See the docs for more info.
273
            """
274
        ),
275
        to: Optional[Path] = Ca.to_single,
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
276
        replace: bool = Ca.replace,
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
277
        log: Optional[Path] = CommonArgs.log_path,
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
278
        quiet: bool = CommonArgs.quiet,
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
279
        verbose: bool = CommonArgs.verbose,
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
280
    ) -> None:
281
        """
282
        Filters by simple expressions.
283
        """
284
        set_up(log, quiet, verbose)
285
        default = str(path) + "-filter-" + by.stem + DEF_SUFFIX
286
        to = MiscUtils.adjust_filename(to, default, replace)
287
        df = HitFrame.read_file(path)
0 ignored issues
show
Coding Style Naming introduced by
Variable name "df" doesn't conform to snake_case naming style ('([^\\W\\dA-Z][^\\WA-Z]2,|_[^\\WA-Z]*|__[^\\WA-Z\\d_][^\\WA-Z]+__)$' pattern)

This check looks for invalid names for a range of different identifiers.

You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements.

If your project includes a Pylint configuration file, the settings contained in that file take precedence.

To find out more about Pylint, please refer to their site.

Loading history...
288
        Filtration.from_file(by).apply(df).write_file(to)
289
290
    @staticmethod
291
    def state(
0 ignored issues
show
Coding Style Naming introduced by
Argument name "to" doesn't conform to snake_case naming style ('([^\\W\\dA-Z][^\\WA-Z]2,|_[^\\WA-Z]*|__[^\\WA-Z\\d_][^\\WA-Z]+__)$' pattern)

This check looks for invalid names for a range of different identifiers.

You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements.

If your project includes a Pylint configuration file, the settings contained in that file take precedence.

To find out more about Pylint, please refer to their site.

Loading history...
best-practice introduced by
Too many arguments (6/5)
Loading history...
292
        path: Path = Ca.file_input,
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
293
        to: Optional[Path] = Opt.out_path(
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
294
            """
295
            Path to the output file.
296
297
            Valid formats and filename suffixes are .nt and .txt with an optional .gz, .zip, or .xz.
298
            If only a filename suffix is provided, will use that suffix with the default directory.
299
            If no suffix is provided, will interpret the path as a directory and use the default filename.
0 ignored issues
show
Coding Style introduced by
This line is too long as per the coding-style (106/100).

This check looks for lines that are too long. You can specify the maximum line length.

Loading history...
300
            Will fail if the file exists and ``--replace`` is not set.
301
302
            [default: <path>-statements.nt]
303
        """
304
        ),
305
        replace: bool = Ca.replace,
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
306
        log: Optional[Path] = CommonArgs.log_path,
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
307
        quiet: bool = CommonArgs.quiet,
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
308
        verbose: bool = CommonArgs.verbose,
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
309
    ) -> None:
310
        """
311
        Output simple N-triples statements.
312
313
        Each statement is of this form, where the InChI Key refers to the input data:
314
315
        `"InChI Key" "predicate" "object" .`
316
        """
317
        set_up(log, quiet, verbose)
318
        default = f"{path}-statements.nt"
319
        to = MiscUtils.adjust_filename(to, default, replace)
320
        hits = HitFrame.read_file(path).to_hits()
321
        with to.open() as f:
0 ignored issues
show
Coding Style Naming introduced by
Variable name "f" doesn't conform to snake_case naming style ('([^\\W\\dA-Z][^\\WA-Z]2,|_[^\\WA-Z]*|__[^\\WA-Z\\d_][^\\WA-Z]+__)$' pattern)

This check looks for invalid names for a range of different identifiers.

You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements.

If your project includes a Pylint configuration file, the settings contained in that file take precedence.

To find out more about Pylint, please refer to their site.

Loading history...
322
            for hit in hits:
323
                f.write(hit.to_triple.n_triples)
324
325
    @staticmethod
326
    def reify(
0 ignored issues
show
Coding Style Naming introduced by
Argument name "to" doesn't conform to snake_case naming style ('([^\\W\\dA-Z][^\\WA-Z]2,|_[^\\WA-Z]*|__[^\\WA-Z\\d_][^\\WA-Z]+__)$' pattern)

This check looks for invalid names for a range of different identifiers.

You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements.

If your project includes a Pylint configuration file, the settings contained in that file take precedence.

To find out more about Pylint, please refer to their site.

Loading history...
best-practice introduced by
Too many arguments (6/5)
Loading history...
327
        path: Path = Ca.file_input,
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
328
        to: Optional[Path] = Opt.out_path(
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
329
            r"""
330
            Path to the output file.
331
332
            The filename suffix should be either .nt (N-triples) or .ttl (Turtle),
333
            with an optional .gz, .zip, or .xz.
334
            If only a filename suffix is provided, will use that suffix with the default directory.
335
            If no suffix is provided, will interpret the path as a directory but use the default filename.
0 ignored issues
show
Coding Style introduced by
This line is too long as per the coding-style (106/100).

This check looks for lines that are too long. You can specify the maximum line length.

Loading history...
336
            Will fail if the file exists and ``--replace`` is not set.
337
338
            [default: <path>-reified.nt]
339
            """
340
        ),
341
        replace: bool = Ca.replace,
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
342
        log: Optional[Path] = CommonArgs.log_path,
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
343
        quiet: bool = CommonArgs.quiet,
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
344
        verbose: bool = CommonArgs.verbose,
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
345
    ) -> None:
346
        """
347
        Outputs reified semantic triples.
348
        """
349
        set_up(log, quiet, verbose)
350
        default = f"{path}-reified.nt"
351
        to = MiscUtils.adjust_filename(to, default, replace)
352
        hits = HitFrame.read_file(path).to_hits()
353
        with to.open() as f:
0 ignored issues
show
Coding Style Naming introduced by
Variable name "f" doesn't conform to snake_case naming style ('([^\\W\\dA-Z][^\\WA-Z]2,|_[^\\WA-Z]*|__[^\\WA-Z\\d_][^\\WA-Z]+__)$' pattern)

This check looks for invalid names for a range of different identifiers.

You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements.

If your project includes a Pylint configuration file, the settings contained in that file take precedence.

To find out more about Pylint, please refer to their site.

Loading history...
354
            for triple in Reifier().reify(hits):
355
                f.write(triple.n_triples)
356
357
    @staticmethod
358
    def copy(
0 ignored issues
show
Coding Style Naming introduced by
Argument name "to" doesn't conform to snake_case naming style ('([^\\W\\dA-Z][^\\WA-Z]2,|_[^\\WA-Z]*|__[^\\WA-Z\\d_][^\\WA-Z]+__)$' pattern)

This check looks for invalid names for a range of different identifiers.

You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements.

If your project includes a Pylint configuration file, the settings contained in that file take precedence.

To find out more about Pylint, please refer to their site.

Loading history...
best-practice introduced by
Too many arguments (6/5)
Loading history...
359
        path: Path = Ca.file_input,
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
360
        to: Optional[Path] = Opt.out_path(
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
361
            rf"""
362
            Path to the output file.
363
364
            {Ca.output_formats}
365
366
            [default: <path.parent>/export{DEF_SUFFIX}]
367
        """
368
        ),
369
        replace: bool = Ca.replace,
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
370
        log: Optional[Path] = CommonArgs.log_path,
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
371
        quiet: bool = CommonArgs.quiet,
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
372
        verbose: bool = CommonArgs.verbose,
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
373
    ) -> None:
374
        """
375
        Copies and/or converts annotation files.
376
377
        Example: ``:export:copy --to .snappy`` to highly compress a data set.
378
        """
379
        set_up(log, quiet, verbose)
380
        default = path.parent / DEF_SUFFIX
381
        to = MiscUtils.adjust_filename(to, default, replace)
382
        df = HitFrame.read_file(path)
0 ignored issues
show
Coding Style Naming introduced by
Variable name "df" doesn't conform to snake_case naming style ('([^\\W\\dA-Z][^\\WA-Z]2,|_[^\\WA-Z]*|__[^\\WA-Z\\d_][^\\WA-Z]+__)$' pattern)

This check looks for invalid names for a range of different identifiers.

You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements.

If your project includes a Pylint configuration file, the settings contained in that file take precedence.

To find out more about Pylint, please refer to their site.

Loading history...
383
        df.write_file(to)
384
385
    @staticmethod
386
    def analyze(
0 ignored issues
show
Coding Style Naming introduced by
Argument name "to" doesn't conform to snake_case naming style ('([^\\W\\dA-Z][^\\WA-Z]2,|_[^\\WA-Z]*|__[^\\WA-Z\\d_][^\\WA-Z]+__)$' pattern)

This check looks for invalid names for a range of different identifiers.

You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements.

If your project includes a Pylint configuration file, the settings contained in that file take precedence.

To find out more about Pylint, please refer to their site.

Loading history...
best-practice introduced by
Too many arguments (10/5)
Loading history...
387
        path: Path = Ca.file_input,
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
388
        phi: Path = Ca.input_matrix,
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
389
        scores: Path = Ca.alpha_input,
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
390
        seed: int = Ca.seed,
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
391
        samples: int = Ca.boot,
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
392
        to: Optional[Path] = Ca.misc_out_dir,
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
393
        replace: bool = Ca.replace,
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
394
        log: Optional[Path] = CommonArgs.log_path,
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
395
        quiet: bool = CommonArgs.quiet,
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
396
        verbose: bool = CommonArgs.verbose,
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
397
    ) -> None:
398
        """
399
        Shorthand for multiple calculations and plots.
400
401
        Generates n-triple statements and reified n-triples.
402
        Calculates correlation and enrichment using ``scores``,
403
        psi matrices (one per variable), and concordance between psi and tau matrices (tau).
404
        Plots UMAP of psi variables, enrichment bar plots, correlation violin plots,
405
        phi-vs-psi scatter and line plots, and phi-vs-psi (tau) violin plots.
406
        """
407
408
    @staticmethod
409
    def alpha(
0 ignored issues
show
Coding Style Naming introduced by
Argument name "to" doesn't conform to snake_case naming style ('([^\\W\\dA-Z][^\\WA-Z]2,|_[^\\WA-Z]*|__[^\\WA-Z\\d_][^\\WA-Z]+__)$' pattern)

This check looks for invalid names for a range of different identifiers.

You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements.

If your project includes a Pylint configuration file, the settings contained in that file take precedence.

To find out more about Pylint, please refer to their site.

Loading history...
Coding Style Naming introduced by
Argument name "on" doesn't conform to snake_case naming style ('([^\\W\\dA-Z][^\\WA-Z]2,|_[^\\WA-Z]*|__[^\\WA-Z\\d_][^\\WA-Z]+__)$' pattern)

This check looks for invalid names for a range of different identifiers.

You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements.

If your project includes a Pylint configuration file, the settings contained in that file take precedence.

To find out more about Pylint, please refer to their site.

Loading history...
Comprehensibility introduced by
This function exceeds the maximum number of variables (16/15).
Loading history...
best-practice introduced by
Too many arguments (12/5)
Loading history...
410
        path: Path = Ca.file_input,
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
411
        scores: Path = Ca.alpha_input,
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
412
        bool_alg: Optional[str] = Opt.val(
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
413
            rf"""
414
            Algorithm to use for scores starting with 'is_'.
415
416
            Allowed values: {Ca.list(BoolAlg)}
417
            """,
418
            default="alpha",
419
        ),
420
        real_alg: Optional[str] = Opt.val(
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
421
            rf"""
422
            Algorithm to use for scores starting with 'score_'.
423
424
            Allowed values: {Ca.list(RealAlg)}
425
            """,
426
            default="weighted",
427
        ),
428
        on: bool = Ca.on,
0 ignored issues
show
Unused Code introduced by
The argument on seems to be unused.
Loading history...
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
429
        boot: int = Ca.boot,
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
430
        seed: int = Ca.seed,
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
431
        to: Optional[Path] = Ca.alpha_to,
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
432
        replace: bool = Ca.replace,
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
433
        log: Optional[Path] = CommonArgs.log_path,
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
434
        quiet: bool = CommonArgs.quiet,
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
435
        verbose: bool = CommonArgs.verbose,
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
436
    ) -> None:
437
        """
438
        Compare annotations to user-supplied values.
439
440
        Calculates correlation between provided scores and object/predicate pairs.
441
        For booleans, compares annotations for hits and non-hits.
442
        See the docs for more info.
443
        """
444
        set_up(log, quiet, verbose)
445
        default = f"{path}-{scores.name}{DEF_SUFFIX}"
446
        to = MiscUtils.adjust_filename(to, default, replace)
447
        hits = HitFrame.read_file(path)
448
        scores = ScoreDf.read_file(scores)
449
        calculator = EnrichmentCalculation(bool_alg, real_alg, boot, seed)
450
        df = calculator.calculate(hits, scores)
0 ignored issues
show
Coding Style Naming introduced by
Variable name "df" doesn't conform to snake_case naming style ('([^\\W\\dA-Z][^\\WA-Z]2,|_[^\\WA-Z]*|__[^\\WA-Z\\d_][^\\WA-Z]+__)$' pattern)

This check looks for invalid names for a range of different identifiers.

You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements.

If your project includes a Pylint configuration file, the settings contained in that file take precedence.

To find out more about Pylint, please refer to their site.

Loading history...
451
        df.write_file(to)
452
453
    @staticmethod
454
    def psi(
0 ignored issues
show
Coding Style Naming introduced by
Argument name "to" doesn't conform to snake_case naming style ('([^\\W\\dA-Z][^\\WA-Z]2,|_[^\\WA-Z]*|__[^\\WA-Z\\d_][^\\WA-Z]+__)$' pattern)

This check looks for invalid names for a range of different identifiers.

You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements.

If your project includes a Pylint configuration file, the settings contained in that file take precedence.

To find out more about Pylint, please refer to their site.

Loading history...
best-practice introduced by
Too many arguments (7/5)
Loading history...
455
        path: Path = Ca.file_input,
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
456
        algorithm: str = Opt.val(
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
457
            r"""
458
            The algorithm for calculating similarity between annotation sets.
459
460
            Currently, only "j" (J') is supported. Refer to the docs for the equation.
461
            """,
462
            default="j",
463
        ),
464
        to: Path = Ca.output_matrix,
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
465
        replace: bool = Ca.replace,
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
466
        log: Optional[Path] = CommonArgs.log_path,
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
467
        quiet: bool = CommonArgs.quiet,
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
468
        verbose: bool = CommonArgs.verbose,
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
469
    ) -> None:
470
        r"""
471
        Calculate a similarity matrix from annotations.
472
473
        The data are output as a dataframe (CSV by default), where rows and columns correspond
474
        to compounds, and the cell i,j is the overlap J' in annotations between compounds i and j.
475
        """
476
        set_up(log, quiet, verbose)
477
        default = path.parent / (algorithm + DEF_SUFFIX)
478
        to = MiscUtils.adjust_filename(to, default, replace)
479
        hits = HitFrame.read_file(path).to_hits()
480
        calculator = MatrixCalculation.create(algorithm)
481
        matrix = calculator.calc_all(hits)
482
        matrix.write_file(to)
483
484
    @staticmethod
485
    def calc_ecfp_psi(
0 ignored issues
show
best-practice introduced by
Too many arguments (9/5)
Loading history...
Coding Style Naming introduced by
Argument name "to" doesn't conform to snake_case naming style ('([^\\W\\dA-Z][^\\WA-Z]2,|_[^\\WA-Z]*|__[^\\WA-Z\\d_][^\\WA-Z]+__)$' pattern)

This check looks for invalid names for a range of different identifiers.

You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements.

If your project includes a Pylint configuration file, the settings contained in that file take precedence.

To find out more about Pylint, please refer to their site.

Loading history...
486
        path: Path = CommonArgs.compounds,
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
487
        radius: int = Opt.val(r"""Radius of the ECFP fingerprint.""", default=4),
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
488
        n_bits: int = Opt.val(r"""Number of bits.""", default=2048),
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
489
        psi: bool = Opt.flag(
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
490
            r"""Use "psi" as the type in the resulting matrix instead of "phi"."""
491
        ),
492
        to: Path = Ca.output_matrix,
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
493
        replace: bool = Ca.replace,
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
494
        log: Optional[Path] = CommonArgs.log_path,
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
495
        quiet: bool = CommonArgs.quiet,
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
496
        verbose: bool = CommonArgs.verbose,
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
497
    ) -> None:
498
        r"""
499
        Compute a similarity matrix from ECFP fingerprints.
500
501
        Requires rdkit to be installed.
502
503
        This is a bit faster than computing using a search and then calculating with ``:calc:psi``.
504
        Values range from 0 (no overlap) to 1 (identical).
505
        The type will be "phi" -- in contrast to using :calc:phi.
506
        See ``:calc:phi`` for more info.
507
        This is most useful for comparing a phenotypic phi against pure structural similarity.
508
        """
509
        set_up(log, quiet, verbose)
510
        name = f"ecfp{radius}-n{n_bits}"
511
        default = path.parent / (name + DEF_SUFFIX)
512
        to = MiscUtils.adjust_filename(to, default, replace)
513
        df = InputFrame.read_file(path)
0 ignored issues
show
Coding Style Naming introduced by
Variable name "df" doesn't conform to snake_case naming style ('([^\\W\\dA-Z][^\\WA-Z]2,|_[^\\WA-Z]*|__[^\\WA-Z\\d_][^\\WA-Z]+__)$' pattern)

This check looks for invalid names for a range of different identifiers.

You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements.

If your project includes a Pylint configuration file, the settings contained in that file take precedence.

To find out more about Pylint, please refer to their site.

Loading history...
514
        kind = "psi" if psi else "phi"
515
        short = MatrixPrep.ecfp_matrix(df, radius, n_bits)
516
        long_form = MatrixPrep(kind, False, False, False).create({name: short})
517
        long_form.write_file(to)
518
519
    @staticmethod
520
    def tau(
0 ignored issues
show
Coding Style Naming introduced by
Argument name "to" doesn't conform to snake_case naming style ('([^\\W\\dA-Z][^\\WA-Z]2,|_[^\\WA-Z]*|__[^\\WA-Z\\d_][^\\WA-Z]+__)$' pattern)

This check looks for invalid names for a range of different identifiers.

You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements.

If your project includes a Pylint configuration file, the settings contained in that file take precedence.

To find out more about Pylint, please refer to their site.

Loading history...
best-practice introduced by
Too many arguments (10/5)
Loading history...
521
        phi: Path = Ca.input_matrix,
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
522
        psi: Path = Ca.input_matrix,
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
523
        algorithm: str = Opt.val(
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
524
            r"""
525
            The algorithm for calculating concordance.
526
527
            Currently, only "tau" is supported.
528
            This calculation is a modified Kendall’s  τ-a, where disconcordant ignores ties.
529
            See the docs for more info.
530
            """,
531
            default="tau",
532
        ),
533
        seed: int = Ca.seed,
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
534
        samples: int = Ca.boot,
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
535
        to: Optional[Path] = Opt.out_file(
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
536
            rf"""
537
            The path to a table for output.
538
539
            {Ca.output_formats}
540
541
            [default: <input-path.parent>/<algorithm>-concordance.{DEF_SUFFIX}]
542
            """,
543
        ),
544
        replace: bool = Ca.replace,
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
545
        log: Optional[Path] = CommonArgs.log_path,
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
546
        quiet: bool = CommonArgs.quiet,
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
547
        verbose: bool = CommonArgs.verbose,
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
548
    ) -> None:
549
        r"""
550
        Calculate correlation between matrices.
551
552
        Values are calculated over bootstrap, outputting a table.
553
554
        Phi is typically a phenotypic matrix, and psi a matrix from Mandos.
555
        This command is designed to calculate the similarity between compound annotations
556
        (from Mandos) and some user-input compound–compound similarity matrix.
557
        (For example, vectors from a high-content cell screen.
558
        See ``:calc:correlation`` or ``:calc:enrichment`` if you have a single variable,
559
        such as a hit or lead-like score.
560
        """
561
        set_up(log, quiet, verbose)
562
        default = phi.parent / f"{psi.stem}-{algorithm}{DEF_SUFFIX}"
563
        to = MiscUtils.adjust_filename(to, default, replace)
564
        phi = SimilarityDfLongForm.read_file(phi)
565
        psi = SimilarityDfLongForm.read_file(psi)
566
        calculator = ConcordanceCalculation.create(algorithm, phi, psi, samples, seed)
567
        concordance = calculator.calc_all(phi, psi)
568
        concordance.write_file(to)
569
570
    @staticmethod
571
    def calc_umap(
0 ignored issues
show
Coding Style Naming introduced by
Argument name "to" doesn't conform to snake_case naming style ('([^\\W\\dA-Z][^\\WA-Z]2,|_[^\\WA-Z]*|__[^\\WA-Z\\d_][^\\WA-Z]+__)$' pattern)

This check looks for invalid names for a range of different identifiers.

You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements.

If your project includes a Pylint configuration file, the settings contained in that file take precedence.

To find out more about Pylint, please refer to their site.

Loading history...
best-practice introduced by
Too many arguments (9/5)
Loading history...
572
        psi_matrix: Path = Ca.input_matrix,
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
Unused Code introduced by
The argument psi_matrix seems to be unused.
Loading history...
573
        algorithm: str = Opt.val(
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
574
            r"""
575
            Projection algorithm.
576
577
            Currently only "umap" is supported.
578
            """,
579
            default="umap",
580
        ),
581
        seed: str = Opt.val(
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
Unused Code introduced by
The argument seed seems to be unused.
Loading history...
582
            r"""
583
            Random seed (integer or 'none').
584
585
            Setting to 'none' may increase performance.
586
            """,
587
            default=0,
588
        ),
589
        params: str = Opt.val(
0 ignored issues
show
Unused Code introduced by
The argument params seems to be unused.
Loading history...
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
590
            rf"""
591
            Parameters fed to the algorithm.
592
593
            This is a comma-separated list of key=value pairs.
594
            For example: ``n_neighbors=4,n_components=12,min_dist=0.8``
595
            Supports all UMAP parameters except random_state and metric:
596
597
            {Ca.definition_list(_umap_params) if UMAP else "<list is unavailable>"}
598
            """,
599
            default="",
600
        ),
601
        to: Optional[Path] = Ca.project_to,
0 ignored issues
show
Unused Code introduced by
The argument to seems to be unused.
Loading history...
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
602
        replace: bool = Ca.replace,
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
Unused Code introduced by
The argument replace seems to be unused.
Loading history...
603
        log: Optional[Path] = CommonArgs.log_path,
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
Unused Code introduced by
The argument log seems to be unused.
Loading history...
604
        quiet: bool = CommonArgs.quiet,
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
Unused Code introduced by
The argument quiet seems to be unused.
Loading history...
605
        verbose: bool = CommonArgs.verbose,
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
Unused Code introduced by
The argument verbose seems to be unused.
Loading history...
606
    ) -> None:
607
        r"""
608
        Calculate compound UMAP from psi matrices.
609
610
        The input should probably be calculated from ``:calc:matrix``.
611
        Saves a table of the UMAP coordinates.
612
        """
613
        if algorithm == "umap" and UMAP is None:
614
            raise ImportError(f"UMAP is not available")
0 ignored issues
show
introduced by
Using an f-string that does not have any interpolated variables
Loading history...
615
616
    @staticmethod
617
    def prep_phi(
0 ignored issues
show
Coding Style Naming introduced by
Argument name "to" doesn't conform to snake_case naming style ('([^\\W\\dA-Z][^\\WA-Z]2,|_[^\\WA-Z]*|__[^\\WA-Z\\d_][^\\WA-Z]+__)$' pattern)

This check looks for invalid names for a range of different identifiers.

You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements.

If your project includes a Pylint configuration file, the settings contained in that file take precedence.

To find out more about Pylint, please refer to their site.

Loading history...
best-practice introduced by
Too many arguments (10/5)
Loading history...
618
        matrices: List[Path] = Ca.input_matrix_short_form,
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
619
        kind: str = Ca.var_type,
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
620
        to: Path = Ca.output_matrix,
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
621
        replace: bool = Ca.replace,
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
622
        normalize: bool = Opt.flag(
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
623
            r"""Rescale values to between 0 and 1 by (v-min) / (max-min). (Performed after negation.)"""
0 ignored issues
show
Coding Style introduced by
This line is too long as per the coding-style (104/100).

This check looks for lines that are too long. You can specify the maximum line length.

Loading history...
624
        ),
625
        log10: bool = Opt.val(r"""Rescales values by log10. (Performed after normalization.)"""),
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
626
        invert: bool = Opt.val(r"""Multiplies the values by -1. (Performed first.)"""),
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
627
        log: Optional[Path] = CommonArgs.log_path,
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
628
        quiet: bool = CommonArgs.quiet,
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
629
        verbose: bool = CommonArgs.verbose,
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
630
    ):
631
        r"""
632
        Convert phi matrices to one long-form matrix.
633
634
        The keys will be derived from the filenames.
635
        """
636
        set_up(log, quiet, verbose)
637
        default = "."
638
        if to is None:
639
            try:
640
                default = next(iter({mx.parent for mx in matrices}))
641
            except StopIteration:
642
                logger.warning(f"Outputting to {default}")
643
        to = MiscUtils.adjust_filename(to, default, replace)
644
        long_form = MatrixPrep(kind, normalize, log10, invert).from_files(matrices)
645
        long_form.write_file(to)
646
647
    @staticmethod
648
    def plot_umap(
0 ignored issues
show
Coding Style Naming introduced by
Argument name "to" doesn't conform to snake_case naming style ('([^\\W\\dA-Z][^\\WA-Z]2,|_[^\\WA-Z]*|__[^\\WA-Z\\d_][^\\WA-Z]+__)$' pattern)

This check looks for invalid names for a range of different identifiers.

You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements.

If your project includes a Pylint configuration file, the settings contained in that file take precedence.

To find out more about Pylint, please refer to their site.

Loading history...
649
        umap_df: Path = Ca.project_input,
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
650
        style: Optional[Path] = Ca.style_for_compounds,
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
651
        color_col: Optional[str] = Ca.color_col,
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
652
        marker_col: Optional[str] = Ca.marker_col,
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
653
        to: Optional[Path] = Ca.plot_to,
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
654
    ) -> None:
655
        r"""
656
        Plot UMAP, etc. of compounds from psi matrices.
657
658
        Will plot one variable (psi) per column.
659
        """
660
661
    @staticmethod
662
    def plot_score(
0 ignored issues
show
Coding Style Naming introduced by
Argument name "to" doesn't conform to snake_case naming style ('([^\\W\\dA-Z][^\\WA-Z]2,|_[^\\WA-Z]*|__[^\\WA-Z\\d_][^\\WA-Z]+__)$' pattern)

This check looks for invalid names for a range of different identifiers.

You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements.

If your project includes a Pylint configuration file, the settings contained in that file take precedence.

To find out more about Pylint, please refer to their site.

Loading history...
Coding Style Naming introduced by
Argument name "ci" doesn't conform to snake_case naming style ('([^\\W\\dA-Z][^\\WA-Z]2,|_[^\\WA-Z]*|__[^\\WA-Z\\d_][^\\WA-Z]+__)$' pattern)

This check looks for invalid names for a range of different identifiers.

You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements.

If your project includes a Pylint configuration file, the settings contained in that file take precedence.

To find out more about Pylint, please refer to their site.

Loading history...
best-practice introduced by
Too many arguments (7/5)
Loading history...
663
        path: Path = Ca.input_correlation,
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
664
        kind: str = Ca.plot_kind,
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
665
        style: Optional[Path] = Ca.style_for_pairs,
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
666
        color_col: Optional[str] = Ca.color_col,
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
667
        marker_col: Optional[str] = Ca.marker_col,
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
668
        ci: float = Ca.ci,
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
669
        to: Optional[Path] = Ca.plot_to,
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
670
    ) -> None:
671
        r"""
672
        Plot correlation to scores.
673
674
        Visualizes the correlation between predicate/object pairs and user-supplied scores.
675
        Will output one figure (file) per scoring function.
676
        Will plot (psi, score-fn) pairs over a grid,
677
        one row per scoring function and column per psi.
678
        """
679
680
    @staticmethod
681
    def plot_pairing(
0 ignored issues
show
Coding Style Naming introduced by
Argument name "to" doesn't conform to snake_case naming style ('([^\\W\\dA-Z][^\\WA-Z]2,|_[^\\WA-Z]*|__[^\\WA-Z\\d_][^\\WA-Z]+__)$' pattern)

This check looks for invalid names for a range of different identifiers.

You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements.

If your project includes a Pylint configuration file, the settings contained in that file take precedence.

To find out more about Pylint, please refer to their site.

Loading history...
best-practice introduced by
Too many arguments (9/5)
Loading history...
Coding Style Naming introduced by
Argument name "ci" doesn't conform to snake_case naming style ('([^\\W\\dA-Z][^\\WA-Z]2,|_[^\\WA-Z]*|__[^\\WA-Z\\d_][^\\WA-Z]+__)$' pattern)

This check looks for invalid names for a range of different identifiers.

You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements.

If your project includes a Pylint configuration file, the settings contained in that file take precedence.

To find out more about Pylint, please refer to their site.

Loading history...
682
        path: Path = Ca.input_matrix,
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
683
        join: Optional[bool] = Opt.flag(
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
684
            r"""
685
            Pool all psi variables into a single column with multiple plots.
686
            """
687
        ),
688
        kind: str = Opt.val(
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
689
            r"""
690
            Either 'points', 'lines', or 'points+lines'.
691
692
            - points: Scatter plots of (phi, psi) values.
693
694
            - lines: Plot a linear interpolation.
695
696
            - ci: Plot a linear interpolation with a confidence band.
697
698
            - points+lines: Both 'points' and 'lines'.
699
            """,
700
            "--type",
701
        ),
702
        ci: float = Ca.ci,
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
703
        sort_by: str = Opt.val(
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
704
            r"""
705
            Which axis to sort by: 'phi'/'x' or 'psi'/'y'.
706
707
            Sorting by psi values (y-axis) makes it easier to compare psi variables,
708
            while sorting by phi values (x-axis) makes it easier to compare phi variables.
709
            """,
710
            default="psi",
711
        ),
712
        style: Optional[Path] = Ca.style_for_psi,
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
713
        color_col: Optional[str] = Ca.color_col,
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
714
        marker_col: Optional[str] = Ca.marker_col,
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
715
        to: Optional[Path] = Ca.plot_to,
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
716
    ) -> None:
717
        r"""
718
        Plot line plots of phi against psi.
719
720
        Plots scatter plots of (phi, psi) values, sorted by phi values.
721
        All plots are log/log (all similarity values should be scaled from 0 to 1).
722
723
        For each unique phi matrix and psi matrix, flattens the matrices and plots
724
        the flattened (n choose 2 - n) pairs of each jointly, phi mapped to the y-axis
725
        and psi mapped to the x-axis.
726
727
        Without --split:
728
729
        Will show values for all psi variables together.
730
        If ``--color`` is not set, will choose a palette.
731
        Works best with ``--type lines``.
732
733
        With --split:
734
735
        Will plot each (phi, psi) pair over a grid, one plot per cell:
736
        One row per phi and one column per psi.
737
        """
738
739
    @staticmethod
740
    def plot_pairing_violin(
0 ignored issues
show
best-practice introduced by
Too many arguments (6/5)
Loading history...
Coding Style Naming introduced by
Argument name "to" doesn't conform to snake_case naming style ('([^\\W\\dA-Z][^\\WA-Z]2,|_[^\\WA-Z]*|__[^\\WA-Z\\d_][^\\WA-Z]+__)$' pattern)

This check looks for invalid names for a range of different identifiers.

You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements.

If your project includes a Pylint configuration file, the settings contained in that file take precedence.

To find out more about Pylint, please refer to their site.

Loading history...
741
        path: Path = Ca.input_matrix,
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
742
        split: bool = Opt.flag(
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
743
            r"""
744
            Split each violin into phi #1 on the left and phi #2 on the right.
745
746
            Useful to compare two phi variables. Requires exactly 2.
747
            """
748
        ),
749
        style: Optional[Path] = Ca.style_for_psi,
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
750
        color_col: Optional[str] = Ca.color_col,
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
751
        marker_col: Optional[str] = Ca.marker_col,
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
752
        to: Optional[Path] = Ca.plot_to,
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
753
    ) -> None:
754
        r"""
755
        Plot violin plots from tau values.
756
757
        The input data should be generated by ``:calc:phi-vs-psi.tau``.
758
759
        Will plot each (phi, psi) pair over a grid, one row per phi and one column per psi
760
        (unless ``--split`` is set).
761
        """
762
763
764
__all__ = ["MiscCommands"]
765