Passed
Push — main ( cdf0f7...3de8e8 )
by Douglas
01:40
created

mandos.entries.args.EntryArgs.key()   A

Complexity

Conditions 1

Size

Total Lines 7
Code Lines 7

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
cc 1
eloc 7
nop 1
dl 0
loc 7
rs 10
c 0
b 0
f 0
1
from inspect import cleandoc as doc
0 ignored issues
show
introduced by
Missing module docstring
Loading history...
2
from pathlib import Path
3
from typing import Mapping, Optional
4
5
import typer
0 ignored issues
show
introduced by
Unable to import 'typer'
Loading history...
6
7
from mandos.model.chembl_support import DataValidityComment
8
from mandos.model.chembl_support.chembl_targets import TargetType, ConfidenceLevel
9
from mandos.search.chembl.target_traversal import TargetTraversalStrategies
10
11
12
def _stringify(keys: Mapping[str, str]):
13
    return ", ".join((k if v is None else f"{k} ({v.lower()})" for k, v in keys.items()))
0 ignored issues
show
Comprehensibility Best Practice introduced by
The variable k does not seem to be defined.
Loading history...
Comprehensibility Best Practice introduced by
The variable v does not seem to be defined.
Loading history...
14
15
16
class EntryArgs:
0 ignored issues
show
introduced by
Missing class docstring
Loading history...
17
18
    path = typer.Argument(
19
        ...,
20
        exists=True,
21
        dir_okay=False,
22
        readable=True,
23
        help=doc(
24
            """
25
            The path to the input file.
26
            One of:
27
28
              (A) *.txt, *.lines, or *.list (optionally with .gz/.zip/.xz/.bz2)), with one InChI Key per line;
0 ignored issues
show
Coding Style introduced by
This line is too long as per the coding-style (110/100).

This check looks for lines that are too long. You can specify the maximum line length.

Loading history...
29
30
              (B) A *.csv, *.tsv, *.tab file (or .gz/.zip/.xz/.bz2 variant) with a column called 'inchikey'; OR
0 ignored issues
show
Coding Style introduced by
This line is too long as per the coding-style (111/100).

This check looks for lines that are too long. You can specify the maximum line length.

Loading history...
31
32
              (C) An Arrow *.feather file or Parquet *.snappy file with a column called 'inchikey'
33
        """
34
        ),
35
    )
36
37
    to = typer.Option(
38
        None,
39
        show_default=False,
40
        help=doc(
41
            """
42
            The path to the output file.
43
            If not set, chooses <input-path>-<search>.csv.gz
44
            The filename extension should be one of: .csv, .tsv, .tab, .json (with optional .gz/.bz2/.zip/.xz);
0 ignored issues
show
Coding Style introduced by
This line is too long as per the coding-style (111/100).

This check looks for lines that are too long. You can specify the maximum line length.

Loading history...
45
            .feather; .snappy (or .parquet); or .h5.
46
            Feather (.feather), Parquet (.snappy), and tab-delimited (.tsv.gz) are recommended.
47
            If H5, will add a new dataset named <key> to any existing archive.
48
            Will fail if the file exists unless the `--overwrite` flag is set.
49
50
            If only the filename extension is provided (e.g. --to '.feather'), will only change the output format
0 ignored issues
show
Coding Style introduced by
This line is too long as per the coding-style (113/100).

This check looks for lines that are too long. You can specify the maximum line length.

Loading history...
51
            (and filename extension).
52
        """
53
        ),
54
    )
55
56
    replace: bool = typer.Option(False, help="Replace output file if they exist. See also: --skip")
57
58
    skip: bool = typer.Option(
59
        False, help="Skip any search if the output file exists (only warns). See also: --replace"
60
    )
61
62
    in_cache: bool = typer.Option(
63
        False,
64
        help="Do not download any data. Fails if the needed data is not cached.",
65
        hidden=True,
66
    )
67
68
    verbose: bool = typer.Option(
69
        False,
70
        "--verbose",
71
        help="Configure logger to output INFO (use ``--quiet`` for less info)",
72
    )
73
74
    quiet: bool = typer.Option(
75
        False,
76
        "--quiet",
77
        help="Configure logger to output only ERROR (use ``--verbose`` for more info)",
78
    )
79
80
    log_path: Optional[Path] = typer.Option(
81
        None,
82
        "--log",
83
        help="""
84
            Also log to a file.
85
            The suffix can be .log, .log.gz, .log.zip, or .json, .json.gz, or .json.gz.
86
            You can prefix the path with :LEVEL: to control the level. For example, :INFO:out.log
87
        """,
88
    )
89
90
    no_setup: bool = typer.Option(
91
        False,
92
        "--no-setup",
93
        hidden=True,
94
        help="Skip setup, such as configuring logging.",
95
    )
96
97
    @staticmethod
98
    def key(name: str) -> typer.Option:
0 ignored issues
show
introduced by
Missing function or method docstring
Loading history...
99
        return typer.Option(
100
            name,
101
            min=1,
102
            max=120,
103
            help="""
104
            A free-text unique key for the search.
105
            Should be a short, <60-character name that describes the search and any parameters.
106
            The output file will be named according to a 'sanitized' variant of this value.
107
            """,
108
        )
109
110
    test = typer.Option(
111
        False,
112
        "--check",
113
        help="Do not run searches; just check that the parameters are ok.",
114
    )
115
116
    taxa = typer.Option(
117
        "7742",
118
        show_default=False,
119
        help=doc(
120
            """
121
        The IDs or names of UniProt taxa, comma-separated.
122
        Taxon names and common names can be used for vertebrate species (where available).
123
124
        This can have a significant effect on searches. See the docs for more info.
125
126
        [default: 7742] (Euteleostomi)
127
        """
128
        ),
129
    )
130
131
    atc_level = typer.Option(
132
        "1,2,3,4", min=1, max=4, help="""List of ATC levels, comma-separated."""
133
    )
134
135
    min_cooccurrence_score = typer.Option(
136
        0.0,
137
        help="Minimum enrichment score, inclusive. See docs for more info.",
138
        min=0.0,
139
    )
140
141
    min_cooccurring_articles = typer.Option(
142
        0,
143
        help="Minimum number of articles for both the compound and object, inclusive.",
144
        min=0,
145
    )
146
147
    name_must_match = typer.Option(
148
        False,
149
        help=doc(
150
            """
151
        Require that the name of the compound(s) exactly matches the compound name on PubChem (case-insensitive)
0 ignored issues
show
Coding Style introduced by
This line is too long as per the coding-style (112/100).

This check looks for lines that are too long. You can specify the maximum line length.

Loading history...
152
        """
153
        ),
154
    )
155
156
    acute_effect_level = typer.Option(
157
        2,
158
        min=1,
159
        max=2,
160
        help="""
161
      The level in the ChemIDPlus hierarchy of effect names.
162
      Level 1: e.g. 'behavioral'
163
      Level 2: 'behavioral: excitement'
164
      """,
165
    )
166
167
    traversal_strategy = typer.Option(
168
        "@null",
169
        "--traversal",
170
        show_default=False,
171
        help=doc(
172
            """
173
        Target traversal strategy name, file, or class.
174
        Dictates the way the network of ChEMBL targets is traversed (from the annotated target as a source).
0 ignored issues
show
Coding Style introduced by
This line is too long as per the coding-style (108/100).

This check looks for lines that are too long. You can specify the maximum line length.

Loading history...
175
        Specifies the network links that are followed and which targets are 'accepted' for final annotations.
0 ignored issues
show
Coding Style introduced by
This line is too long as per the coding-style (109/100).

This check looks for lines that are too long. You can specify the maximum line length.

Loading history...
176
        This option has a dramatic effect on the search. See the docs for more info.
177
178
        Can be one of:
179
        (A) A standard strategy name, starting with @;
180
        (B) The path to a ``*.strat`` file; OR
181
        (C) The fully qualified name of a ``TargetTraversal``
182
183
        The standard traversal strategies are: {}
184
185
        [default: @null] (No traversal; targets as-is)
186
        """.format(
187
                "; ".join(TargetTraversalStrategies.standard_strategies())
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation (remove 4 spaces).
Loading history...
188
            )
189
        ),
190
    )
191
192
    target_types = typer.Option(
193
        "@molecular",
194
        "--targets",
195
        show_default=False,
196
        help=doc(
197
            """
198
        The accepted target types, comma-separated.
199
200
        NOTE: This affects only the types are are accepted after traversal,
201
        and the types must be included in the traversal.
202
        This means that this must be AT LEAST as restrictive as the traversal strategy.
203
204
        The ChEMBL-defined types are:
205
206
          {}
207
208
        These special names are also accepted:
209
210
          - {}
211
212
        [default: @molecular]
213
        """.format(
214
                "; ".join([s.name for s in TargetType.all_types()]),
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation (remove 4 spaces).
Loading history...
215
                "\n\n          - ".join(
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation (remove 4 spaces).
Loading history...
216
                    [f"{k} ({v})" for k, v in TargetType.special_type_names().items()]
217
                ),
218
            )
219
        ),
220
    )
221
222
    min_confidence = typer.Option(
223
        3,
224
        "--confidence",
225
        min=0,
226
        max=9,
227
        show_default=False,
228
        help=doc(
229
            """
230
        Minimum target confidence score, inclusive.
231
        This is useful to modify in only some cases. More important options are min_pchembl and taxa.
0 ignored issues
show
Coding Style introduced by
This line is too long as per the coding-style (101/100).

This check looks for lines that are too long. You can specify the maximum line length.

Loading history...
232
233
        Values are: {}
234
235
        [default: 3]
236
        """.format(
237
                "; ".join([f"{s.value} ({s.name})" for s in ConfidenceLevel])
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation (remove 4 spaces).
Loading history...
238
            )
239
        ),
240
    )
241
242
    relations = typer.Option(
243
        "<,<=,=",
244
        "--relations",
245
        show_default=False,
246
        help=doc(
247
            """
248
        Assay activity relations allowed, comma-separated.
249
        If post-processing yourself, consider including all.
250
        Values are: <, <=, =, >, >=, ~.
251
        [default: <,<=,=]
252
        """
253
        ),
254
    )
255
256
    min_pchembl = typer.Option(
257
        6.0,
258
        "--pchembl",
259
        min=0.0,
260
        show_default=False,
261
        help=doc(
262
            """
263
        Minimum pCHEMBL value, inclusive.
264
        If post-processing yourself, consider setting to 0.0.
265
        [default: 6.0]
266
        """
267
        ),
268
    )
269
270
    banned_flags = typer.Option(
271
        "@negative",
272
        show_default=False,
273
        help=doc(
274
            """
275
        Exclude activity annotations with data validity flags, comma-separated.
276
        It is rare to need to change this.
277
278
        Values are: {}.
279
280
        Special sets are:
281
282
          - @all (all flags are banned)
283
284
          - @negative ({})
285
286
          - @positive ({})
287
288
        [default: @negative]
289
        """.format(
290
                "; ".join([s.name for s in DataValidityComment]),
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation (remove 4 spaces).
Loading history...
291
                ", ".join([s.name for s in DataValidityComment.negative_comments()]),
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation (remove 4 spaces).
Loading history...
292
                ", ".join([s.name for s in DataValidityComment.positive_comments()]),
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation (remove 4 spaces).
Loading history...
293
            ),
294
        ),
295
    )
296
297
    binding_search_name = typer.Option(
298
        None,
299
        help="""
300
        The fully qualified name of a class inheriting ``BindingSearch``.
301
        If specified, all parameters above are passed to its constructor.
302
        """,
303
    )
304
305
    chembl_trial = typer.Option(
306
        0,
307
        "--phase",
308
        show_default=False,
309
        help=doc(
310
            """
311
        Minimum phase of a clinical trial, inclusive.
312
        Values are: 0, 1, 2, 3.
313
        [default: 0]
314
        """
315
        ),
316
        min=0,
317
        max=3,
318
    )
319
320
    KNOWN_USEFUL_KEYS: Mapping[str, str] = {
321
        "weight": "Molecular Weight",
322
        "xlogp3": None,
323
        "hydrogen-bond-donors": "Hydrogen Bond Donor Count",
324
        "hydrogen-bond-acceptors": "Hydrogen Bond Acceptor Count",
325
        "rotatable-bonds": "Rotatable Bond Count",
326
        "exact-mass": None,
327
        "monoisotopic-mass": None,
328
        "tpsa": "Topological Polar Surface Area",
329
        "heavy-atoms": "Heavy Atom Count",
330
        "charge": "Formal Charge",
331
        "complexity": None,
332
    }
333
    KNOWN_USELESS_KEYS: Mapping[str, str] = {
334
        "components": "Covalently-Bonded Unit Count",
335
        "isotope-atoms": "Isotope Atom Count",
336
        "defined-atom-stereocenter-count": None,
337
        "undefined-atom-stereocenter-count": None,
338
        "defined-bond-stereocenter-count": None,
339
        "undefined-bond-stereocenter-count": None,
340
        "compound-is-canonicalized": None,
341
    }
342
343
    pubchem_computed_keys = typer.Option(
344
        "weight,xlogp3,tpsa,complexity,exact-mass,heavy-atom-count,charge",
345
        help="""
346
            The keys of the computed properties, comma-separated.
347
            Key names are case-insensitive and ignore punctuation like underscores and hyphens.
348
349
            Known keys are: {}
350
351
            Known, less-useful (metadata-like) keys are: {}
352
        """.format(
353
            _stringify(KNOWN_USEFUL_KEYS), _stringify(KNOWN_USELESS_KEYS)
354
        ),
355
    )
356
357
358
__all__ = ["EntryArgs"]
359