1
|
|
|
from inspect import cleandoc |
|
|
|
|
2
|
|
|
from typing import Mapping |
3
|
|
|
|
4
|
|
|
import typer |
|
|
|
|
5
|
|
|
|
6
|
|
|
from mandos.entry.utils._arg_utils import ArgUtils, Opt |
7
|
|
|
from mandos.model.apis.chembl_support.chembl_targets import ConfidenceLevel, TargetType |
8
|
|
|
from mandos.model.apis.chembl_support.target_traversal import TargetTraversalStrategies |
9
|
|
|
|
10
|
|
|
|
11
|
|
|
def _stringify(keys: Mapping[str, str]): |
12
|
|
|
return ", ".join((k if v is None else f"{k} ({v.lower()})" for k, v in keys.items())) |
|
|
|
|
13
|
|
|
|
14
|
|
|
|
15
|
|
|
class EntryArgs: |
|
|
|
|
16
|
|
|
@staticmethod |
17
|
|
|
def key(name: str) -> typer.Option: |
|
|
|
|
18
|
|
|
return typer.Option( |
19
|
|
|
name, |
20
|
|
|
min=1, |
21
|
|
|
max=120, |
22
|
|
|
help=cleandoc( |
23
|
|
|
r""" |
24
|
|
|
A unique key to designate the search. |
25
|
|
|
|
26
|
|
|
A <60-character name that describes the search and parameters. |
27
|
|
|
Intermediate output filenames will use this value. |
28
|
|
|
""" |
29
|
|
|
), |
30
|
|
|
) |
31
|
|
|
|
32
|
|
|
check = Opt.flag( |
33
|
|
|
r"Do not run searches; just check everything.", |
34
|
|
|
hidden=True, |
35
|
|
|
) |
36
|
|
|
|
37
|
|
|
############################################################################################### |
38
|
|
|
# CHEMBL # |
39
|
|
|
############################################################################################### |
40
|
|
|
|
41
|
|
|
traversal = typer.Option( |
42
|
|
|
"@null", |
43
|
|
|
"--traversal", |
44
|
|
|
show_default=False, |
45
|
|
|
help=cleandoc( |
46
|
|
|
rf""" |
47
|
|
|
Target traversal strategy name, file, or class. |
48
|
|
|
This is an experimental option. See the docs. |
49
|
|
|
|
50
|
|
|
Can be one of: |
51
|
|
|
(A) A standard strategy name, starting with @; |
52
|
|
|
(B) The path to a *.strat file; OR |
53
|
|
|
(C) The fully qualified name of a TargetTraversal |
54
|
|
|
|
55
|
|
|
Standard strategies: |
56
|
|
|
{ArgUtils.list(TargetTraversalStrategies.standard_strategies(), sep="; ")} |
57
|
|
|
|
58
|
|
|
[default: @null] (leave targets as-is) |
59
|
|
|
""" |
60
|
|
|
), |
61
|
|
|
) |
62
|
|
|
|
63
|
|
|
target_types = typer.Option( |
64
|
|
|
"@molecular", |
65
|
|
|
"--targets", |
66
|
|
|
help=cleandoc( |
67
|
|
|
f""" |
68
|
|
|
The accepted target types, comma-separated. |
69
|
|
|
|
70
|
|
|
NOTE: This affects only the types are are accepted after traversal, |
71
|
|
|
and the types must be included in the traversal. |
72
|
|
|
This means that this must be AT LEAST as restrictive as the traversal strategy. |
73
|
|
|
|
74
|
|
|
The ChEMBL-defined types are: |
75
|
|
|
{ArgUtils.list(TargetType)} |
76
|
|
|
|
77
|
|
|
These special names are also accepted: |
78
|
|
|
|
79
|
|
|
{ArgUtils.definition_bullets(TargetType.special_type_names())} |
80
|
|
|
""" |
81
|
|
|
), |
82
|
|
|
) |
83
|
|
|
|
84
|
|
|
min_confidence = typer.Option( |
85
|
|
|
3, |
86
|
|
|
"--confidence", |
87
|
|
|
min=0, |
88
|
|
|
max=9, |
89
|
|
|
show_default=False, |
90
|
|
|
help=cleandoc( |
91
|
|
|
rf""" |
92
|
|
|
Minimum target confidence score, inclusive. |
93
|
|
|
|
94
|
|
|
This is useful to modify in only some cases. |
95
|
|
|
More important options are min_pchembl and taxa. |
96
|
|
|
|
97
|
|
|
Values are: {ArgUtils.list(ConfidenceLevel)} |
98
|
|
|
|
99
|
|
|
[default: 3] ("Target assigned is molecular non-protein target") |
100
|
|
|
""" |
101
|
|
|
), |
102
|
|
|
) |
103
|
|
|
|
104
|
|
|
min_pchembl = typer.Option( |
105
|
|
|
0.0, |
106
|
|
|
"--pchembl", |
107
|
|
|
min=0.0, |
108
|
|
|
help=cleandoc( |
109
|
|
|
""" |
110
|
|
|
Minimum pCHEMBL value, inclusive. |
111
|
|
|
|
112
|
|
|
Set to 0 if "cutoff" is set. |
113
|
|
|
""" |
114
|
|
|
), |
115
|
|
|
) |
116
|
|
|
|
117
|
|
|
binds_cutoff = typer.Option( |
118
|
|
|
7.0, |
119
|
|
|
"--binding", |
120
|
|
|
min=0.0, |
121
|
|
|
show_default=False, |
122
|
|
|
help=cleandoc( |
123
|
|
|
""" |
124
|
|
|
Cutoff of pCHEMBL at which "binds" is declared. |
125
|
|
|
|
126
|
|
|
Applies only if the relation is >, >=, =, or ~. |
127
|
|
|
|
128
|
|
|
[default: 7.0 (100 nanomolar)] |
129
|
|
|
""" |
130
|
|
|
), |
131
|
|
|
) |
132
|
|
|
|
133
|
|
|
min_threshold = typer.Option( |
134
|
|
|
70, |
135
|
|
|
"--min-threshold", |
136
|
|
|
min=70, |
137
|
|
|
help=cleandoc( |
138
|
|
|
""" |
139
|
|
|
Minimum pCHEMBL threshold used to limit the true examples when training the QSAR model. |
140
|
|
|
|
141
|
|
|
Must be either 70, 80, or 90. |
142
|
|
|
An "active" or "inactive" prediction is required for this threshold or higher. |
143
|
|
|
""" |
144
|
|
|
), |
145
|
|
|
) |
146
|
|
|
|
147
|
|
|
binding_search_name = typer.Option( |
148
|
|
|
None, |
149
|
|
|
help=cleandoc( |
150
|
|
|
r""" |
151
|
|
|
The fully qualified name of a class inheriting ``BindingSearch``. |
152
|
|
|
|
153
|
|
|
If specified, all parameters above are passed to its constructor. |
154
|
|
|
""" |
155
|
|
|
), |
156
|
|
|
) |
157
|
|
|
|
158
|
|
|
chembl_trial = typer.Option( |
159
|
|
|
0, |
160
|
|
|
"--phase", |
161
|
|
|
help=cleandoc( |
162
|
|
|
r""" |
163
|
|
|
Minimum clinical trial phase, inclusive. |
164
|
|
|
|
165
|
|
|
Values are: 0, 1, 2, 3. |
166
|
|
|
""" |
167
|
|
|
), |
168
|
|
|
min=0, |
169
|
|
|
max=3, |
170
|
|
|
) |
171
|
|
|
|
172
|
|
|
atc_level = typer.Option("1,2,3,4", help="""List of ATC levels, comma-separated.""") |
173
|
|
|
|
174
|
|
|
############################################################################################### |
175
|
|
|
# PUBCHEM # |
176
|
|
|
############################################################################################### |
177
|
|
|
|
178
|
|
|
min_cooccurrence_score = typer.Option( |
179
|
|
|
0.0, |
180
|
|
|
help=r"Minimum enrichment score, inclusive. See the docs.", |
181
|
|
|
min=0.0, |
182
|
|
|
) |
183
|
|
|
|
184
|
|
|
min_cooccurring_articles = typer.Option( |
185
|
|
|
0, |
186
|
|
|
help=r"Minimum number of articles for both the compound and object, inclusive.", |
187
|
|
|
min=0, |
188
|
|
|
) |
189
|
|
|
|
190
|
|
|
match_name = Opt.flag( |
191
|
|
|
r""" |
192
|
|
|
Require that the name of the compound(s) exactly matches those on PubChem (case-insensitive). |
|
|
|
|
193
|
|
|
""" |
194
|
|
|
) |
195
|
|
|
|
196
|
|
|
banned_sources = Opt.val( |
197
|
|
|
r""" |
198
|
|
|
Comma-separated list of sources to exclude. |
199
|
|
|
""" |
200
|
|
|
) |
201
|
|
|
|
202
|
|
|
min_nanomolar = Opt.val( |
203
|
|
|
r""" |
204
|
|
|
Minimum tissue concentration in nanomolar required to include. |
205
|
|
|
""", |
206
|
|
|
default=1, |
207
|
|
|
) |
208
|
|
|
|
209
|
|
|
acute_effect_level = typer.Option( |
210
|
|
|
2, |
211
|
|
|
min=1, |
212
|
|
|
max=2, |
213
|
|
|
help=cleandoc( |
214
|
|
|
r""" |
215
|
|
|
The level in the ChemIDPlus hierarchy of effect names. |
216
|
|
|
(E.g. 'behavioral' for level 1 and 'behavioral: excitement' for level 2.) |
217
|
|
|
""" |
218
|
|
|
), |
219
|
|
|
) |
220
|
|
|
|
221
|
|
|
KNOWN_USEFUL_KEYS: Mapping[str, str] = { |
222
|
|
|
"weight": "Molecular Weight", |
223
|
|
|
"xlogp3": None, |
224
|
|
|
"hydrogen-bond-donors": "Hydrogen Bond Donor Count", |
225
|
|
|
"hydrogen-bond-acceptors": "Hydrogen Bond Acceptor Count", |
226
|
|
|
"rotatable-bonds": "Rotatable Bond Count", |
227
|
|
|
"exact-mass": None, |
228
|
|
|
"monoisotopic-mass": None, |
229
|
|
|
"tpsa": "Topological Polar Surface Area", |
230
|
|
|
"heavy-atoms": "Heavy Atom Count", |
231
|
|
|
"charge": "Formal Charge", |
232
|
|
|
"complexity": None, |
233
|
|
|
} |
234
|
|
|
|
235
|
|
|
KNOWN_USELESS_KEYS: Mapping[str, str] = { |
236
|
|
|
"components": "Covalently-Bonded Unit Count", |
237
|
|
|
"isotope-atoms": "Isotope Atom Count", |
238
|
|
|
"defined-atom-stereocenter-count": None, |
239
|
|
|
"undefined-atom-stereocenter-count": None, |
240
|
|
|
"defined-bond-stereocenter-count": None, |
241
|
|
|
"undefined-bond-stereocenter-count": None, |
242
|
|
|
"compound-is-canonicalized": None, |
243
|
|
|
} |
244
|
|
|
|
245
|
|
|
pubchem_computed_keys = typer.Option( |
246
|
|
|
"weight,xlogp3,tpsa,complexity,exact-mass,heavy-atom-count,charge", |
247
|
|
|
help=cleandoc( |
248
|
|
|
rf""" |
249
|
|
|
The keys of the computed properties, comma-separated. |
250
|
|
|
|
251
|
|
|
Keys are case-insensitive and mainly ignore punctuation. |
252
|
|
|
|
253
|
|
|
Main keys: {_stringify(KNOWN_USEFUL_KEYS)} |
254
|
|
|
|
255
|
|
|
Less-useful keys: {_stringify(KNOWN_USELESS_KEYS)} |
256
|
|
|
""" |
257
|
|
|
), |
258
|
|
|
) |
259
|
|
|
|
260
|
|
|
############################################################################################### |
261
|
|
|
# G2P # |
262
|
|
|
############################################################################################### |
263
|
|
|
|
264
|
|
|
############################################################################################### |
265
|
|
|
# HMDB # |
266
|
|
|
############################################################################################### |
267
|
|
|
|
268
|
|
|
############################################################################################### |
269
|
|
|
# META # |
270
|
|
|
############################################################################################### |
271
|
|
|
|
272
|
|
|
|
273
|
|
|
__all__ = ["EntryArgs"] |
274
|
|
|
|