Passed
Push — dependabot/pip/pandas-1.3.0 ( cc3196 )
by
unknown
21:01 queued 18:59
created

mandos.entries.common_args.CommonArgs.parse_taxa()   A

Complexity

Conditions 1

Size

Total Lines 4
Code Lines 4

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
cc 1
eloc 4
nop 1
dl 0
loc 4
rs 10
c 0
b 0
f 0
1
"""
2
Common argument processing and arguments for Typer.
3
"""
4
import enum
5
import os
6
from inspect import cleandoc
7
from pathlib import Path
8
from typing import (Any, Callable, Iterable, Mapping, Optional, Sequence,
9
                    TypeVar, Union)
10
11
import typer
0 ignored issues
show
introduced by
Unable to import 'typer'
Loading history...
12
13
from mandos.model import CleverEnum
0 ignored issues
show
Unused Code introduced by
Unused CleverEnum imported from mandos.model
Loading history...
14
from mandos.model.settings import MANDOS_SETTINGS
15
16
T = TypeVar("T", covariant=True)
0 ignored issues
show
Coding Style Naming introduced by
Class name "T" doesn't conform to PascalCase naming style ('[^\\W\\da-z][^\\W_]+$' pattern)

This check looks for invalid names for a range of different identifiers.

You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements.

If your project includes a Pylint configuration file, the settings contained in that file take precedence.

To find out more about Pylint, please refer to their site.

Loading history...
17
18
19
class _Args:
20
    @staticmethod
21
    def _arg(doc: str, *names, default: Optional[T] = None, req: bool = False, **kwargs):
22
        kwargs = dict(
23
            help=cleandoc(doc),
24
            **kwargs,
25
            allow_dash=True,
26
        )
27
        return typer.Argument(default, **kwargs) if req else typer.Option(default, *names, **kwargs)
28
29
    @staticmethod
30
    def _path(
31
        doc: str, *names, default: Optional[str], f: bool, d: bool, out: bool, req: bool, **kwargs
0 ignored issues
show
Coding Style Naming introduced by
Variable name "f" doesn't conform to snake_case naming style ('([^\\W\\dA-Z][^\\WA-Z]2,|_[^\\WA-Z]*|__[^\\WA-Z\\d_][^\\WA-Z]+__)$' pattern)

This check looks for invalid names for a range of different identifiers.

You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements.

If your project includes a Pylint configuration file, the settings contained in that file take precedence.

To find out more about Pylint, please refer to their site.

Loading history...
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
Coding Style Naming introduced by
Variable name "d" doesn't conform to snake_case naming style ('([^\\W\\dA-Z][^\\WA-Z]2,|_[^\\WA-Z]*|__[^\\WA-Z\\d_][^\\WA-Z]+__)$' pattern)

This check looks for invalid names for a range of different identifiers.

You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements.

If your project includes a Pylint configuration file, the settings contained in that file take precedence.

To find out more about Pylint, please refer to their site.

Loading history...
32
    ):
33
        # if it's None, we're going to have a special default set afterward, so we'll explain it in the doc
0 ignored issues
show
Coding Style introduced by
This line is too long as per the coding-style (107/100).

This check looks for lines that are too long. You can specify the maximum line length.

Loading history...
34
        if out and default is None:
35
            kwargs = dict(show_default=True, **kwargs)
36
        kwargs = {
37
            **dict(
38
                exists=not out,
39
                dir_okay=d,
40
                file_okay=f,
41
                readable=out,
42
                writable=not out,
43
            ),
44
            **kwargs,
45
        }
46
        return _Args._arg(doc, *names, default=default, req=req, **kwargs)
47
48
49
class Arg(_Args):
0 ignored issues
show
introduced by
Missing class docstring
Loading history...
50
    @staticmethod
51
    def out_file(doc: str, *names, default: Optional[str] = None, **kwargs):
0 ignored issues
show
introduced by
Missing function or method docstring
Loading history...
52
        return _Args._path(
53
            doc, *names, default=default, f=True, d=False, out=True, req=True, **kwargs
54
        )
55
56
    @staticmethod
57
    def out_dir(doc: str, *names, default: Optional[str] = None, **kwargs):
0 ignored issues
show
introduced by
Missing function or method docstring
Loading history...
58
        return _Args._path(
59
            doc, *names, default=default, f=True, d=True, out=True, req=True, **kwargs
60
        )
61
62
    @staticmethod
63
    def out_path(doc: str, *names, default: Optional[str] = None, **kwargs):
0 ignored issues
show
introduced by
Missing function or method docstring
Loading history...
64
        return _Args._path(
65
            doc, *names, default=default, f=True, d=True, out=False, req=True, **kwargs
66
        )
67
68
    @staticmethod
69
    def in_file(doc: str, *names, default: Optional[str] = None, **kwargs):
0 ignored issues
show
introduced by
Missing function or method docstring
Loading history...
70
        return _Args._path(
71
            doc, *names, default=default, f=True, d=False, out=False, req=True, **kwargs
72
        )
73
74
    @staticmethod
75
    def in_dir(doc: str, *names, default: Optional[str] = None, **kwargs):
0 ignored issues
show
introduced by
Missing function or method docstring
Loading history...
76
        return _Args._path(
77
            doc, *names, default=default, f=False, d=True, out=False, req=True, **kwargs
78
        )
79
80
    @staticmethod
81
    def in_path(doc: str, *names, default: Optional[str] = None, **kwargs):
0 ignored issues
show
introduced by
Missing function or method docstring
Loading history...
82
        return _Args._path(
83
            doc, *names, default=default, f=True, d=True, out=False, req=True, **kwargs
84
        )
85
86
    @staticmethod
87
    def x(doc: str, *names, default: Optional[T] = None, **kwargs):
0 ignored issues
show
Coding Style Naming introduced by
Method name "x" doesn't conform to snake_case naming style ('([^\\W\\dA-Z][^\\WA-Z]2,|_[^\\WA-Z]*|__[^\\WA-Z\\d_][^\\WA-Z]+__)$' pattern)

This check looks for invalid names for a range of different identifiers.

You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements.

If your project includes a Pylint configuration file, the settings contained in that file take precedence.

To find out more about Pylint, please refer to their site.

Loading history...
introduced by
Missing function or method docstring
Loading history...
88
        return _Args._arg(doc, *names, default=default, req=True, **kwargs)
89
90
91
class Opt(_Args):
0 ignored issues
show
introduced by
Missing class docstring
Loading history...
92
    @staticmethod
93
    def out_file(doc: str, *names, default: Optional[str] = None, **kwargs):
0 ignored issues
show
introduced by
Missing function or method docstring
Loading history...
94
        return _Args._path(
95
            doc, *names, default=default, f=True, d=False, out=True, req=False, **kwargs
96
        )
97
98
    @staticmethod
99
    def out_dir(doc: str, *names, default: Optional[str] = None, **kwargs):
0 ignored issues
show
introduced by
Missing function or method docstring
Loading history...
100
        return _Args._path(
101
            doc, *names, default=default, f=True, d=True, out=True, req=False, **kwargs
102
        )
103
104
    @staticmethod
105
    def out_path(doc: str, *names, default: Optional[str] = None, **kwargs):
0 ignored issues
show
introduced by
Missing function or method docstring
Loading history...
106
        return _Args._path(
107
            doc,
108
            *names,
109
            default=default,
110
            f=True,
111
            d=True,
112
            out=False,
113
            req=False,
114
            exists=False,
115
            **kwargs,
116
        )
117
118
    @staticmethod
119
    def in_file(doc: str, *names, default: Optional[str] = None, **kwargs):
0 ignored issues
show
introduced by
Missing function or method docstring
Loading history...
120
        return _Args._path(
121
            doc, *names, default=default, f=True, d=False, out=False, req=False, **kwargs
122
        )
123
124
    @staticmethod
125
    def in_dir(doc: str, *names, default: Optional[str] = None, **kwargs):
0 ignored issues
show
introduced by
Missing function or method docstring
Loading history...
126
        return _Args._path(
127
            doc, *names, default=default, f=False, d=True, out=False, req=False, **kwargs
128
        )
129
130
    @staticmethod
131
    def in_path(doc: str, *names, default: Optional[str] = None, **kwargs):
0 ignored issues
show
introduced by
Missing function or method docstring
Loading history...
132
        return _Args._path(
133
            doc,
134
            *names,
135
            default=default,
136
            f=True,
137
            d=True,
138
            out=False,
139
            req=False,
140
            exists=False,
141
            **kwargs,
142
        )
143
144
    @staticmethod
145
    def val(doc: str, *names, default: Optional[T] = None, **kwargs):
0 ignored issues
show
introduced by
Missing function or method docstring
Loading history...
146
        return _Args._arg(doc, *names, default=default, req=False, **kwargs)
147
148
    @staticmethod
149
    def flag(doc: str, *names, **kwargs):
0 ignored issues
show
introduced by
Missing function or method docstring
Loading history...
150
        return _Args._arg(doc, *names, default=False, req=False, **kwargs)
151
152
153
def _strip(s: str) -> str:
0 ignored issues
show
Coding Style Naming introduced by
Argument name "s" doesn't conform to snake_case naming style ('([^\\W\\dA-Z][^\\WA-Z]2,|_[^\\WA-Z]*|__[^\\WA-Z\\d_][^\\WA-Z]+__)$' pattern)

This check looks for invalid names for a range of different identifiers.

You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements.

If your project includes a Pylint configuration file, the settings contained in that file take precedence.

To find out more about Pylint, please refer to their site.

Loading history...
154
    return s.strip().strip("'").strip('"').strip()
155
156
157
class CommonArgs:
0 ignored issues
show
introduced by
Missing class docstring
Loading history...
158
    @staticmethod
159
    def definition_bullets(dct: Mapping[Any, Any], colon: str = ": ", indent: int = 12) -> str:
0 ignored issues
show
introduced by
Missing function or method docstring
Loading history...
160
        joiner = os.linesep * 2 + " " * indent
161
        jesus = [f" - {k}{colon}{v}" for k, v in dct.items()]
162
        return joiner.join(jesus)
163
164
    @staticmethod
165
    def definition_list(dct: Mapping[Any, Any], colon: str = ": ", sep: str = "; ") -> str:
0 ignored issues
show
introduced by
Missing function or method docstring
Loading history...
166
        jesus = [f"{k}{colon}{v}" for k, v in dct.items()]
167
        return sep.join(jesus)
168
169
    @staticmethod
170
    def list(
0 ignored issues
show
introduced by
Missing function or method docstring
Loading history...
171
        lst: Iterable[Any], attr: Union[None, str, Callable[[Any], Any]] = None, sep: str = "; "
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
172
    ) -> str:
173
        x = []
0 ignored issues
show
Coding Style Naming introduced by
Variable name "x" doesn't conform to snake_case naming style ('([^\\W\\dA-Z][^\\WA-Z]2,|_[^\\WA-Z]*|__[^\\WA-Z\\d_][^\\WA-Z]+__)$' pattern)

This check looks for invalid names for a range of different identifiers.

You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements.

If your project includes a Pylint configuration file, the settings contained in that file take precedence.

To find out more about Pylint, please refer to their site.

Loading history...
174
        for v in lst:
0 ignored issues
show
Coding Style Naming introduced by
Variable name "v" doesn't conform to snake_case naming style ('([^\\W\\dA-Z][^\\WA-Z]2,|_[^\\WA-Z]*|__[^\\WA-Z\\d_][^\\WA-Z]+__)$' pattern)

This check looks for invalid names for a range of different identifiers.

You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements.

If your project includes a Pylint configuration file, the settings contained in that file take precedence.

To find out more about Pylint, please refer to their site.

Loading history...
175
            if attr is None and isinstance(v, enum.Enum):
176
                x += [v.name]
0 ignored issues
show
Coding Style Naming introduced by
Variable name "x" doesn't conform to snake_case naming style ('([^\\W\\dA-Z][^\\WA-Z]2,|_[^\\WA-Z]*|__[^\\WA-Z\\d_][^\\WA-Z]+__)$' pattern)

This check looks for invalid names for a range of different identifiers.

You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements.

If your project includes a Pylint configuration file, the settings contained in that file take precedence.

To find out more about Pylint, please refer to their site.

Loading history...
177
            elif attr is None:
178
                x += [str(v)]
0 ignored issues
show
Coding Style Naming introduced by
Variable name "x" doesn't conform to snake_case naming style ('([^\\W\\dA-Z][^\\WA-Z]2,|_[^\\WA-Z]*|__[^\\WA-Z\\d_][^\\WA-Z]+__)$' pattern)

This check looks for invalid names for a range of different identifiers.

You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements.

If your project includes a Pylint configuration file, the settings contained in that file take precedence.

To find out more about Pylint, please refer to their site.

Loading history...
179
            elif isinstance(attr, str):
180
                x += [str(getattr(v, attr))]
0 ignored issues
show
Coding Style Naming introduced by
Variable name "x" doesn't conform to snake_case naming style ('([^\\W\\dA-Z][^\\WA-Z]2,|_[^\\WA-Z]*|__[^\\WA-Z\\d_][^\\WA-Z]+__)$' pattern)

This check looks for invalid names for a range of different identifiers.

You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements.

If your project includes a Pylint configuration file, the settings contained in that file take precedence.

To find out more about Pylint, please refer to their site.

Loading history...
181
            else:
182
                x += [str(attr(v))]
0 ignored issues
show
Coding Style Naming introduced by
Variable name "x" doesn't conform to snake_case naming style ('([^\\W\\dA-Z][^\\WA-Z]2,|_[^\\WA-Z]*|__[^\\WA-Z\\d_][^\\WA-Z]+__)$' pattern)

This check looks for invalid names for a range of different identifiers.

You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements.

If your project includes a Pylint configuration file, the settings contained in that file take precedence.

To find out more about Pylint, please refer to their site.

Loading history...
183
        return sep.join(x)
184
185
    @staticmethod
186
    def parse_taxon_id_or_name(taxon: Union[int, str]) -> Union[int, str]:
0 ignored issues
show
introduced by
Missing function or method docstring
Loading history...
187
        if isinstance(taxon, str):
0 ignored issues
show
unused-code introduced by
Unnecessary "elif" after "return"
Loading history...
188
            return taxon
189
        elif isinstance(taxon, str) and taxon.isdigit():
190
            return int(taxon)
191
        raise ValueError(f"Taxon {taxon} must be an ID or name")
192
193
    @staticmethod
194
    def parse_taxon_id(taxon: Union[int, str]) -> int:
0 ignored issues
show
introduced by
Missing function or method docstring
Loading history...
195
        try:
196
            return int(taxon)
197
        except ValueError:
198
            raise ValueError(f"Taxon {taxon} must be an exact ID") from None
199
200
    @staticmethod
201
    def parse_taxa(taxa: str) -> Sequence[Union[int, str]]:
0 ignored issues
show
introduced by
Missing function or method docstring
Loading history...
202
        taxa = [_strip(t) for t in taxa.split(",")]
203
        return [CommonArgs.parse_taxon_id_or_name(t) for t in taxa]
204
205
    output_formats = r"""
206
        The filename extension must be one of: .feather; .snappy/.parquet;
207
        .csv, .tsv, .tab, .json (with optional .gz/.bz2/.zip/.xz);
208
        Feather (.feather) and Parquet (.snappy) are recommended.
209
        If only a filename suffix is provided, only sets the format and filename suffix.
210
        If no extension is provided, interprets that path as a directory and uses the default format (Feather).
211
212
        Will fail if the file exists, unless `--replace` is passed.
213
    """
214
215
    input_formats = r"""
216
        The filename extension must be one of: .feather; .snappy/.parquet;
217
        .csv, .tsv, .tab (with optional .gz/.bz2/.zip/.xz);
218
        Feather (.feather) and Parquet (.snappy) are recommended formats.
219
        (Some other formats, such as .json or .h5, may be permitted but are discouraged.)
220
    """
221
222
    file_input = Arg.in_file("The path to a file output by `:concat` or `:search`.", "input")
223
224
    compounds = Arg.in_file(
225
        """
226
        The path to the input file.
227
        One of:
228
229
          (A) *.txt, *.lines, or *.list (optionally with .gz/.zip/.xz/.bz2)), with one InChI Key per line;
0 ignored issues
show
Coding Style introduced by
This line is too long as per the coding-style (106/100).

This check looks for lines that are too long. You can specify the maximum line length.

Loading history...
230
231
          (B) A *.csv, *.tsv, *.tab file (or .gz/.zip/.xz/.bz2 variant) with a column called 'inchikey'; OR
0 ignored issues
show
Coding Style introduced by
This line is too long as per the coding-style (107/100).

This check looks for lines that are too long. You can specify the maximum line length.

Loading history...
232
233
          (C) An Arrow *.feather file or Parquet *.snappy file with a column called 'inchikey'
234
        """
235
    )
236
237
    dir_input = Arg.in_dir(
238
        rf"""
239
        The path to a directory containing files output from mandos search.
240
241
        {input_formats}
242
        Note that *all* matching files will be included.
243
        Provide ``--exclude`` if needed.
244
        """
245
    )
246
247
    to_single = Opt.out_file(
248
        rf"""
249
        The path to the output file.
250
251
        {output_formats}
252
253
        [default: <input-path>/{...}.{MANDOS_SETTINGS.default_table_suffix}.gz]
254
        """,
255
        "--to",
256
    )
257
258
    input_matrix: Path = Arg.in_file(
259
        rf"""
260
        The path to a similarity matrix file to write to.
261
262
        {input_formats}
263
        .txt/.txt.gz/etc. is assumed to be whitespace-delimited.
264
        TCompounds can be referenced by InChI Key or compound ID (matching what you provided for the search).
0 ignored issues
show
Coding Style introduced by
This line is too long as per the coding-style (109/100).

This check looks for lines that are too long. You can specify the maximum line length.

Loading history...
265
        The set of compounds here must exactly match the set of compounds in the input files.
266
        For Excel and text formats, the first row and the first column (header and index) indicate the compounds.
0 ignored issues
show
Coding Style introduced by
This line is too long as per the coding-style (113/100).

This check looks for lines that are too long. You can specify the maximum line length.

Loading history...
267
268
        Values must be floating-point.
269
        """
270
    )
271
    replace: bool = typer.Option(
272
        False, help="Replace output file(s) if they exist. See also: --skip"
273
    )
274
275
    taxa = Opt.val(
276
        r"""
277
        The IDs or names of UniProt taxa, comma-separated.
278
        Taxon names and common names can be used for vertebrate species (where available).
279
280
        This can have a significant effect on searches. See the docs for more info.
281
282
        [default: 7742] (Euteleostomi)
283
        """,
284
        "--taxa",
285
        "7742",
286
        show_default=False,
287
    )
288
289
    seed = Opt.val(r"A random seed (integer).", "--seed", default=0)
290
291
    n_samples = Opt.val(
292
        "Number of bootstrap samples (positive integer).",
293
        "--samples",
294
        min=1,
295
        default=2000,
296
    )
297
298
    exclude = Opt.val("A glob pattern matching input filenames to ignore.")
299
300
    verbose: bool = Opt.flag(
301
        r"Configure logger to output INFO (use ``--quiet`` for less info)",
302
        "-v",
303
        "--verbose",
304
    )
305
306
    quiet: bool = Opt.flag(
307
        r"Configure logger to output only ERROR (use ``--verbose`` for more info)",
308
        "-q",
309
        "--quiet",
310
    )
311
312
    in_cache: bool = Opt.flag(
313
        r"Do not download any data and fail if needed data is not cached.",
314
        hidden=True,
315
    )
316
317
    as_of: Optional[str] = Opt.val(
318
        f"""
319
        Restrict to data that was cached as of some date and time.
320
        This option can be useful for reproducibility.
321
322
        Note that this should imply that underlying data sources (such as of deposition or publication)
0 ignored issues
show
Coding Style introduced by
This line is too long as per the coding-style (103/100).

This check looks for lines that are too long. You can specify the maximum line length.

Loading history...
323
        are restricted by this datetime, but that is not checked.
324
325
        Examples:
326
327
            - --as-of 2021-10-11T14:12:13Z
328
            - --as-of 2021-10-11T14:12:13+14:00
329
            - --as-of 2021-10-11T14:12:13.496Z
330
            - --as-of "2021-10-11 14:12:13,496,915+14:00"
331
            - --as-of "2021-10-11 14:12:13-8:00 [America/Los_Angeles]"
332
333
        This is a subset of ISO 8601, represented as ``YYYY-mm-dd('T'|' '):hh:MM:ss(i)Z``.
334
        Precision must be nanosecond or less, and ``,`` and ``.`` are equivalent as a thousands separator.
0 ignored issues
show
Coding Style introduced by
This line is too long as per the coding-style (106/100).

This check looks for lines that are too long. You can specify the maximum line length.

Loading history...
335
        You can provide an IANA zone name in square brackets for context, but the offset is still required.
0 ignored issues
show
Coding Style introduced by
This line is too long as per the coding-style (107/100).

This check looks for lines that are too long. You can specify the maximum line length.

Loading history...
336
        """
0 ignored issues
show
introduced by
Using an f-string that does not have any interpolated variables
Loading history...
337
    )
338
339
    log_path = Opt.out_path(
340
        r"""
341
        Also log to a file.
342
        The suffix can be .log, .log.gz, .log.zip, or .json, .json.gz, or .json.gz.
343
        You can prefix the path with :LEVEL: to control the level. For example, ``:INFO:out.log``
344
        """,
345
        "--log",
346
        show_default=True,
347
    )
348
349
    no_setup: bool = Opt.flag(
350
        r"Skip setup, such as configuring logging.",
351
        "--no-setup",
352
        hidden=True,
353
    )
354
355
356
cli = typer.Typer()
357
358
359
@cli.command()
360
def run(
0 ignored issues
show
Coding Style Naming introduced by
Argument name "x" doesn't conform to snake_case naming style ('([^\\W\\dA-Z][^\\WA-Z]2,|_[^\\WA-Z]*|__[^\\WA-Z\\d_][^\\WA-Z]+__)$' pattern)

This check looks for invalid names for a range of different identifiers.

You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements.

If your project includes a Pylint configuration file, the settings contained in that file take precedence.

To find out more about Pylint, please refer to their site.

Loading history...
introduced by
Missing function or method docstring
Loading history...
361
    path: Path = CommonArgs.dir_input,
0 ignored issues
show
Unused Code introduced by
The argument path seems to be unused.
Loading history...
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
362
    x=CommonArgs.log_path,
0 ignored issues
show
Unused Code introduced by
The argument x seems to be unused.
Loading history...
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
363
):
364
    pass
365
366
367
if __name__ == "__main__":
368
    typer.run(run)
369
370
371
__all__ = ["CommonArgs", "Arg", "Opt"]
372