Passed
Push — dependabot/pip/pyarrow-4.0.1 ( ca09ce...b2836e )
by
unknown
02:18 queued 20s
created

CommonArgs.parse_taxon_id_or_name()   A

Complexity

Conditions 4

Size

Total Lines 7
Code Lines 7

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
cc 4
eloc 7
nop 1
dl 0
loc 7
rs 10
c 0
b 0
f 0
1
"""
2
Common argument processing and arguments for Typer.
3
"""
4
import enum
5
import os
6
from inspect import cleandoc
7
from pathlib import Path
8
from typing import Any, Callable, Iterable, Mapping, Optional, Sequence, TypeVar, Union
9
10
import typer
0 ignored issues
show
introduced by
Unable to import 'typer'
Loading history...
11
12
from mandos.model import CleverEnum
0 ignored issues
show
Unused Code introduced by
Unused CleverEnum imported from mandos.model
Loading history...
13
from mandos.model.settings import MANDOS_SETTINGS
14
15
T = TypeVar("T", covariant=True)
0 ignored issues
show
Coding Style Naming introduced by
Class name "T" doesn't conform to PascalCase naming style ('[^\\W\\da-z][^\\W_]+$' pattern)

This check looks for invalid names for a range of different identifiers.

You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements.

If your project includes a Pylint configuration file, the settings contained in that file take precedence.

To find out more about Pylint, please refer to their site.

Loading history...
16
17
18
class _Args:
19
    @staticmethod
20
    def _arg(doc: str, *names, default: Optional[T] = None, req: bool = False, **kwargs):
21
        kwargs = dict(
22
            help=cleandoc(doc),
23
            **kwargs,
24
            allow_dash=True,
25
        )
26
        return typer.Argument(default, **kwargs) if req else typer.Option(default, *names, **kwargs)
27
28
    @staticmethod
29
    def _path(
30
        doc: str, *names, default: Optional[str], f: bool, d: bool, out: bool, req: bool, **kwargs
0 ignored issues
show
Coding Style Naming introduced by
Variable name "d" doesn't conform to snake_case naming style ('([^\\W\\dA-Z][^\\WA-Z]2,|_[^\\WA-Z]*|__[^\\WA-Z\\d_][^\\WA-Z]+__)$' pattern)

This check looks for invalid names for a range of different identifiers.

You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements.

If your project includes a Pylint configuration file, the settings contained in that file take precedence.

To find out more about Pylint, please refer to their site.

Loading history...
Coding Style Naming introduced by
Variable name "f" doesn't conform to snake_case naming style ('([^\\W\\dA-Z][^\\WA-Z]2,|_[^\\WA-Z]*|__[^\\WA-Z\\d_][^\\WA-Z]+__)$' pattern)

This check looks for invalid names for a range of different identifiers.

You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements.

If your project includes a Pylint configuration file, the settings contained in that file take precedence.

To find out more about Pylint, please refer to their site.

Loading history...
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
31
    ):
32
        # if it's None, we're going to have a special default set afterward, so we'll explain it in the doc
0 ignored issues
show
Coding Style introduced by
This line is too long as per the coding-style (107/100).

This check looks for lines that are too long. You can specify the maximum line length.

Loading history...
33
        if out and default is None:
34
            kwargs = dict(show_default=True, **kwargs)
35
        kwargs = {
36
            **dict(
37
                exists=not out,
38
                dir_okay=d,
39
                file_okay=f,
40
                readable=out,
41
                writable=not out,
42
            ),
43
            **kwargs,
44
        }
45
        return _Args._arg(doc, *names, default=default, req=req, **kwargs)
46
47
48
class Arg(_Args):
0 ignored issues
show
introduced by
Missing class docstring
Loading history...
49
    @staticmethod
50
    def out_file(doc: str, *names, default: Optional[str] = None, **kwargs):
0 ignored issues
show
introduced by
Missing function or method docstring
Loading history...
51
        return _Args._path(
52
            doc, *names, default=default, f=True, d=False, out=True, req=True, **kwargs
53
        )
54
55
    @staticmethod
56
    def out_dir(doc: str, *names, default: Optional[str] = None, **kwargs):
0 ignored issues
show
introduced by
Missing function or method docstring
Loading history...
57
        return _Args._path(
58
            doc, *names, default=default, f=True, d=True, out=True, req=True, **kwargs
59
        )
60
61
    @staticmethod
62
    def out_path(doc: str, *names, default: Optional[str] = None, **kwargs):
0 ignored issues
show
introduced by
Missing function or method docstring
Loading history...
63
        return _Args._path(
64
            doc, *names, default=default, f=True, d=True, out=False, req=True, **kwargs
65
        )
66
67
    @staticmethod
68
    def in_file(doc: str, *names, default: Optional[str] = None, **kwargs):
0 ignored issues
show
introduced by
Missing function or method docstring
Loading history...
69
        return _Args._path(
70
            doc, *names, default=default, f=True, d=False, out=False, req=True, **kwargs
71
        )
72
73
    @staticmethod
74
    def in_dir(doc: str, *names, default: Optional[str] = None, **kwargs):
0 ignored issues
show
introduced by
Missing function or method docstring
Loading history...
75
        return _Args._path(
76
            doc, *names, default=default, f=False, d=True, out=False, req=True, **kwargs
77
        )
78
79
    @staticmethod
80
    def in_path(doc: str, *names, default: Optional[str] = None, **kwargs):
0 ignored issues
show
introduced by
Missing function or method docstring
Loading history...
81
        return _Args._path(
82
            doc, *names, default=default, f=True, d=True, out=False, req=True, **kwargs
83
        )
84
85
    @staticmethod
86
    def x(doc: str, *names, default: Optional[T] = None, **kwargs):
0 ignored issues
show
introduced by
Missing function or method docstring
Loading history...
Coding Style Naming introduced by
Method name "x" doesn't conform to snake_case naming style ('([^\\W\\dA-Z][^\\WA-Z]2,|_[^\\WA-Z]*|__[^\\WA-Z\\d_][^\\WA-Z]+__)$' pattern)

This check looks for invalid names for a range of different identifiers.

You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements.

If your project includes a Pylint configuration file, the settings contained in that file take precedence.

To find out more about Pylint, please refer to their site.

Loading history...
87
        return _Args._arg(doc, *names, default=default, req=True, **kwargs)
88
89
90
class Opt(_Args):
0 ignored issues
show
introduced by
Missing class docstring
Loading history...
91
    @staticmethod
92
    def out_file(doc: str, *names, default: Optional[str] = None, **kwargs):
0 ignored issues
show
introduced by
Missing function or method docstring
Loading history...
93
        return _Args._path(
94
            doc, *names, default=default, f=True, d=False, out=True, req=False, **kwargs
95
        )
96
97
    @staticmethod
98
    def out_dir(doc: str, *names, default: Optional[str] = None, **kwargs):
0 ignored issues
show
introduced by
Missing function or method docstring
Loading history...
99
        return _Args._path(
100
            doc, *names, default=default, f=True, d=True, out=True, req=False, **kwargs
101
        )
102
103
    @staticmethod
104
    def out_path(doc: str, *names, default: Optional[str] = None, **kwargs):
0 ignored issues
show
introduced by
Missing function or method docstring
Loading history...
105
        return _Args._path(
106
            doc,
107
            *names,
108
            default=default,
109
            f=True,
110
            d=True,
111
            out=False,
112
            req=False,
113
            exists=False,
114
            **kwargs,
115
        )
116
117
    @staticmethod
118
    def in_file(doc: str, *names, default: Optional[str] = None, **kwargs):
0 ignored issues
show
introduced by
Missing function or method docstring
Loading history...
119
        return _Args._path(
120
            doc, *names, default=default, f=True, d=False, out=False, req=False, **kwargs
121
        )
122
123
    @staticmethod
124
    def in_dir(doc: str, *names, default: Optional[str] = None, **kwargs):
0 ignored issues
show
introduced by
Missing function or method docstring
Loading history...
125
        return _Args._path(
126
            doc, *names, default=default, f=False, d=True, out=False, req=False, **kwargs
127
        )
128
129
    @staticmethod
130
    def in_path(doc: str, *names, default: Optional[str] = None, **kwargs):
0 ignored issues
show
introduced by
Missing function or method docstring
Loading history...
131
        return _Args._path(
132
            doc,
133
            *names,
134
            default=default,
135
            f=True,
136
            d=True,
137
            out=False,
138
            req=False,
139
            exists=False,
140
            **kwargs,
141
        )
142
143
    @staticmethod
144
    def val(doc: str, *names, default: Optional[T] = None, **kwargs):
0 ignored issues
show
introduced by
Missing function or method docstring
Loading history...
145
        return _Args._arg(doc, *names, default=default, req=False, **kwargs)
146
147
    @staticmethod
148
    def flag(doc: str, *names, **kwargs):
0 ignored issues
show
introduced by
Missing function or method docstring
Loading history...
149
        return _Args._arg(doc, *names, default=False, req=False, **kwargs)
150
151
152
def _strip(s: str) -> str:
0 ignored issues
show
Coding Style Naming introduced by
Argument name "s" doesn't conform to snake_case naming style ('([^\\W\\dA-Z][^\\WA-Z]2,|_[^\\WA-Z]*|__[^\\WA-Z\\d_][^\\WA-Z]+__)$' pattern)

This check looks for invalid names for a range of different identifiers.

You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements.

If your project includes a Pylint configuration file, the settings contained in that file take precedence.

To find out more about Pylint, please refer to their site.

Loading history...
153
    return s.strip().strip("'").strip('"').strip()
154
155
156
class CommonArgs:
0 ignored issues
show
introduced by
Missing class docstring
Loading history...
157
    @staticmethod
158
    def definition_bullets(dct: Mapping[Any, Any], colon: str = ": ", indent: int = 12) -> str:
0 ignored issues
show
introduced by
Missing function or method docstring
Loading history...
159
        joiner = os.linesep * 2 + " " * indent
160
        jesus = [f" - {k}{colon}{v}" for k, v in dct.items()]
161
        return joiner.join(jesus)
162
163
    @staticmethod
164
    def definition_list(dct: Mapping[Any, Any], colon: str = ": ", sep: str = "; ") -> str:
0 ignored issues
show
introduced by
Missing function or method docstring
Loading history...
165
        jesus = [f"{k}{colon}{v}" for k, v in dct.items()]
166
        return sep.join(jesus)
167
168
    @staticmethod
169
    def list(
0 ignored issues
show
introduced by
Missing function or method docstring
Loading history...
170
        lst: Iterable[Any], attr: Union[None, str, Callable[[Any], Any]] = None, sep: str = "; "
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
171
    ) -> str:
172
        x = []
0 ignored issues
show
Coding Style Naming introduced by
Variable name "x" doesn't conform to snake_case naming style ('([^\\W\\dA-Z][^\\WA-Z]2,|_[^\\WA-Z]*|__[^\\WA-Z\\d_][^\\WA-Z]+__)$' pattern)

This check looks for invalid names for a range of different identifiers.

You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements.

If your project includes a Pylint configuration file, the settings contained in that file take precedence.

To find out more about Pylint, please refer to their site.

Loading history...
173
        for v in lst:
0 ignored issues
show
Coding Style Naming introduced by
Variable name "v" doesn't conform to snake_case naming style ('([^\\W\\dA-Z][^\\WA-Z]2,|_[^\\WA-Z]*|__[^\\WA-Z\\d_][^\\WA-Z]+__)$' pattern)

This check looks for invalid names for a range of different identifiers.

You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements.

If your project includes a Pylint configuration file, the settings contained in that file take precedence.

To find out more about Pylint, please refer to their site.

Loading history...
174
            if attr is None and isinstance(v, enum.Enum):
175
                x += [v.name]
0 ignored issues
show
Coding Style Naming introduced by
Variable name "x" doesn't conform to snake_case naming style ('([^\\W\\dA-Z][^\\WA-Z]2,|_[^\\WA-Z]*|__[^\\WA-Z\\d_][^\\WA-Z]+__)$' pattern)

This check looks for invalid names for a range of different identifiers.

You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements.

If your project includes a Pylint configuration file, the settings contained in that file take precedence.

To find out more about Pylint, please refer to their site.

Loading history...
176
            elif attr is None:
177
                x += [str(v)]
0 ignored issues
show
Coding Style Naming introduced by
Variable name "x" doesn't conform to snake_case naming style ('([^\\W\\dA-Z][^\\WA-Z]2,|_[^\\WA-Z]*|__[^\\WA-Z\\d_][^\\WA-Z]+__)$' pattern)

This check looks for invalid names for a range of different identifiers.

You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements.

If your project includes a Pylint configuration file, the settings contained in that file take precedence.

To find out more about Pylint, please refer to their site.

Loading history...
178
            elif isinstance(attr, str):
179
                x += [str(getattr(v, attr))]
0 ignored issues
show
Coding Style Naming introduced by
Variable name "x" doesn't conform to snake_case naming style ('([^\\W\\dA-Z][^\\WA-Z]2,|_[^\\WA-Z]*|__[^\\WA-Z\\d_][^\\WA-Z]+__)$' pattern)

This check looks for invalid names for a range of different identifiers.

You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements.

If your project includes a Pylint configuration file, the settings contained in that file take precedence.

To find out more about Pylint, please refer to their site.

Loading history...
180
            else:
181
                x += [str(attr(v))]
0 ignored issues
show
Coding Style Naming introduced by
Variable name "x" doesn't conform to snake_case naming style ('([^\\W\\dA-Z][^\\WA-Z]2,|_[^\\WA-Z]*|__[^\\WA-Z\\d_][^\\WA-Z]+__)$' pattern)

This check looks for invalid names for a range of different identifiers.

You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements.

If your project includes a Pylint configuration file, the settings contained in that file take precedence.

To find out more about Pylint, please refer to their site.

Loading history...
182
        return sep.join(x)
183
184
    @staticmethod
185
    def parse_taxon_id_or_name(taxon: Union[int, str]) -> Union[int, str]:
0 ignored issues
show
introduced by
Missing function or method docstring
Loading history...
186
        if isinstance(taxon, str):
0 ignored issues
show
unused-code introduced by
Unnecessary "elif" after "return"
Loading history...
187
            return taxon
188
        elif isinstance(taxon, str) and taxon.isdigit():
189
            return int(taxon)
190
        raise ValueError(f"Taxon {taxon} must be an ID or name")
191
192
    @staticmethod
193
    def parse_taxon_id(taxon: Union[int, str]) -> int:
0 ignored issues
show
introduced by
Missing function or method docstring
Loading history...
194
        try:
195
            return int(taxon)
196
        except ValueError:
197
            raise ValueError(f"Taxon {taxon} must be an exact ID") from None
198
199
    @staticmethod
200
    def parse_taxa(taxa: str) -> Sequence[Union[int, str]]:
0 ignored issues
show
introduced by
Missing function or method docstring
Loading history...
201
        taxa = [_strip(t) for t in taxa.split(",")]
202
        return [CommonArgs.parse_taxon_id_or_name(t) for t in taxa]
203
204
    output_formats = r"""
205
        The filename extension must be one of: .feather; .snappy/.parquet;
206
        .csv, .tsv, .tab, .json (with optional .gz/.bz2/.zip/.xz);
207
        Feather (.feather) and Parquet (.snappy) are recommended.
208
        If only a filename suffix is provided, only sets the format and filename suffix.
209
        If no extension is provided, interprets that path as a directory and uses the default format (Feather).
210
211
        Will fail if the file exists, unless `--replace` is passed.
212
    """
213
214
    input_formats = r"""
215
        The filename extension must be one of: .feather; .snappy/.parquet;
216
        .csv, .tsv, .tab (with optional .gz/.bz2/.zip/.xz);
217
        Feather (.feather) and Parquet (.snappy) are recommended formats.
218
        (Some other formats, such as .json or .h5, may be permitted but are discouraged.)
219
    """
220
221
    file_input = Arg.in_file("The path to a file output by `:concat` or `:search`.", "input")
222
223
    compounds = Arg.in_file(
224
        """
225
        The path to the input file.
226
        One of:
227
228
          (A) *.txt, *.lines, or *.list (optionally with .gz/.zip/.xz/.bz2)), with one InChI Key per line;
0 ignored issues
show
Coding Style introduced by
This line is too long as per the coding-style (106/100).

This check looks for lines that are too long. You can specify the maximum line length.

Loading history...
229
230
          (B) A *.csv, *.tsv, *.tab file (or .gz/.zip/.xz/.bz2 variant) with a column called 'inchikey'; OR
0 ignored issues
show
Coding Style introduced by
This line is too long as per the coding-style (107/100).

This check looks for lines that are too long. You can specify the maximum line length.

Loading history...
231
232
          (C) An Arrow *.feather file or Parquet *.snappy file with a column called 'inchikey'
233
        """
234
    )
235
236
    input_dir = Arg.in_dir(
237
        rf"""
238
        The path to a directory containing files output from mandos search.
239
240
        {input_formats}
241
        Note that *all* matching files will be included.
242
        Provide ``--exclude`` if needed.
243
        """
244
    )
245
246
    to_single = Opt.out_file(
247
        rf"""
248
        The path to the output file.
249
250
        {output_formats}
251
252
        [default: <input-path>/{...}.{MANDOS_SETTINGS.default_table_suffix}]
253
        """,
254
        "--to",
255
    )
256
257
    out_dir = Opt.val(
258
        rf"""
259
        Choose the output directory.
260
261
        If ``--to`` is set to a relative path, this value is prepended to ``--to``.
262
263
        Examples:
264
265
        - ``--dir output --to abc.snappy`` yields ``output/abc.snappy`` (to sets filename+format)
266
        - ``--dir output --to .snappy`` yields ``output/<key>.snappy`` (to sets format)
267
        - ``--dir output --to my_dir`` yields ``output/my_dir`` (to sets dir)
268
        - ``--dir output --to /my/absolute/path`` will error
269
        - ``--dir output --to /my/absolute/path/abc.snappy`` will error
270
271
        {output_formats}
272
273
        [default: inferred from --to]
274
        """,
275
    )
276
277
    input_matrix: Path = Arg.in_file(
278
        rf"""
279
        The path to a similarity matrix file to write to.
280
281
        {input_formats}
282
        .txt/.txt.gz/etc. is assumed to be whitespace-delimited.
283
        TCompounds can be referenced by InChI Key or compound ID (matching what you provided for the search).
0 ignored issues
show
Coding Style introduced by
This line is too long as per the coding-style (109/100).

This check looks for lines that are too long. You can specify the maximum line length.

Loading history...
284
        The set of compounds here must exactly match the set of compounds in the input files.
285
        For Excel and text formats, the first row and the first column (header and index) indicate the compounds.
0 ignored issues
show
Coding Style introduced by
This line is too long as per the coding-style (113/100).

This check looks for lines that are too long. You can specify the maximum line length.

Loading history...
286
287
        Values must be floating-point.
288
        """
289
    )
290
    replace: bool = typer.Option(
291
        False, help="Replace output file(s) if they exist. See also: --skip"
292
    )
293
294
    taxa = Opt.val(
295
        r"""
296
        The IDs or names of UniProt taxa, comma-separated.
297
        Taxon names and common names can be used for vertebrate species (where available).
298
299
        This can have a significant effect on searches. See the docs for more info.
300
301
        [default: 7742] (Euteleostomi)
302
        """,
303
        "--taxa",
304
        "7742",
305
        show_default=False,
306
    )
307
308
    seed = Opt.val(r"A random seed (integer).", "--seed", default=0)
309
310
    n_samples = Opt.val(
311
        "Number of bootstrap samples (positive integer).",
312
        "--samples",
313
        min=1,
314
        default=2000,
315
    )
316
317
    exclude = Opt.val("A glob pattern matching input filenames to ignore.")
318
319
    verbose: bool = Opt.flag(
320
        r"Configure logger to output INFO (use ``--quiet`` for less info)",
321
        "-v",
322
        "--verbose",
323
    )
324
325
    quiet: bool = Opt.flag(
326
        r"Configure logger to output only ERROR (use ``--verbose`` for more info)",
327
        "-q",
328
        "--quiet",
329
    )
330
331
    in_cache: bool = Opt.flag(
332
        r"Do not download any data and fail if needed data is not cached.",
333
        hidden=True,
334
    )
335
336
    as_of: Optional[str] = Opt.val(
337
        f"""
338
        Restrict to data that was cached as of some date and time.
339
        This option can be useful for reproducibility.
340
341
        Note that this should imply that underlying data sources (such as of deposition or publication)
0 ignored issues
show
Coding Style introduced by
This line is too long as per the coding-style (103/100).

This check looks for lines that are too long. You can specify the maximum line length.

Loading history...
342
        are restricted by this datetime, but that is not checked.
343
344
        Examples:
345
346
            - --as-of 2021-10-11T14:12:13Z
347
            - --as-of 2021-10-11T14:12:13+14:00
348
            - --as-of 2021-10-11T14:12:13.496Z
349
            - --as-of "2021-10-11 14:12:13,496,915+14:00"
350
            - --as-of "2021-10-11 14:12:13-8:00 [America/Los_Angeles]"
351
352
        This is a subset of ISO 8601, represented as ``YYYY-mm-dd('T'|' '):hh:MM:ss(i)Z``.
353
        Precision must be nanosecond or less, and ``,`` and ``.`` are equivalent as a thousands separator.
0 ignored issues
show
Coding Style introduced by
This line is too long as per the coding-style (106/100).

This check looks for lines that are too long. You can specify the maximum line length.

Loading history...
354
        You can provide an IANA zone name in square brackets for context, but the offset is still required.
0 ignored issues
show
Coding Style introduced by
This line is too long as per the coding-style (107/100).

This check looks for lines that are too long. You can specify the maximum line length.

Loading history...
355
        """
0 ignored issues
show
introduced by
Using an f-string that does not have any interpolated variables
Loading history...
356
    )
357
358
    log_path = Opt.out_path(
359
        r"""
360
        Also log to a file.
361
        The suffix can be .log, .log.gz, .log.zip, or .json, .json.gz, or .json.gz.
362
        You can prefix the path with :LEVEL: to control the level. For example, ``:INFO:out.log``
363
        """,
364
        "--log",
365
        show_default=True,
366
    )
367
368
    no_setup: bool = Opt.flag(
369
        r"Skip setup, such as configuring logging.",
370
        "--no-setup",
371
        hidden=True,
372
    )
373
374
375
cli = typer.Typer()
376
377
378
@cli.command()
379
def run(
0 ignored issues
show
Coding Style Naming introduced by
Argument name "x" doesn't conform to snake_case naming style ('([^\\W\\dA-Z][^\\WA-Z]2,|_[^\\WA-Z]*|__[^\\WA-Z\\d_][^\\WA-Z]+__)$' pattern)

This check looks for invalid names for a range of different identifiers.

You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements.

If your project includes a Pylint configuration file, the settings contained in that file take precedence.

To find out more about Pylint, please refer to their site.

Loading history...
introduced by
Missing function or method docstring
Loading history...
380
    path: Path = CommonArgs.input_dir,
0 ignored issues
show
Unused Code introduced by
The argument path seems to be unused.
Loading history...
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
381
    x=CommonArgs.log_path,
0 ignored issues
show
Unused Code introduced by
The argument x seems to be unused.
Loading history...
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
382
):
383
    pass
384
385
386
if __name__ == "__main__":
387
    typer.run(run)
388
389
390
__all__ = ["CommonArgs", "Arg", "Opt"]
391