1
|
|
|
""" |
2
|
|
|
PubChem querying API. |
3
|
|
|
""" |
4
|
|
|
from __future__ import annotations |
5
|
|
|
|
6
|
|
|
import io |
7
|
|
|
import time |
8
|
|
|
from datetime import datetime, timezone |
9
|
|
|
from typing import Any, List, Mapping, NamedTuple, Optional, Sequence, Union |
10
|
|
|
from urllib.error import HTTPError |
11
|
|
|
|
12
|
|
|
import orjson |
|
|
|
|
13
|
|
|
import pandas as pd |
|
|
|
|
14
|
|
|
import regex |
|
|
|
|
15
|
|
|
from pocketutils.core.dot_dict import NestedDotDict |
|
|
|
|
16
|
|
|
from pocketutils.core.exceptions import ( |
|
|
|
|
17
|
|
|
DataIntegrityError, |
18
|
|
|
DownloadError, |
19
|
|
|
LookupFailedError, |
20
|
|
|
) |
21
|
|
|
from pocketutils.core.query_utils import QueryExecutor, QueryMixin |
|
|
|
|
22
|
|
|
|
23
|
|
|
from mandos.model.apis.pubchem_api import PubchemApi, PubchemCompoundLookupError |
24
|
|
|
from mandos.model.apis.pubchem_support.pubchem_data import PubchemData |
25
|
|
|
from mandos.model.settings import QUERY_EXECUTORS, SETTINGS |
26
|
|
|
from mandos.model.utils.setup import logger |
27
|
|
|
|
28
|
|
|
_html_cid_pattern = regex.compile( |
29
|
|
|
r'<meta property="og:url" content="https://pubchem\.ncbi\.nlm\.nih\.gov/compound/(\d+)">', |
30
|
|
|
flags=regex.V1, |
31
|
|
|
) |
32
|
|
|
|
33
|
|
|
|
34
|
|
|
class _CidInchikey(NamedTuple): |
35
|
|
|
cid: int |
36
|
|
|
inchikey: str |
37
|
|
|
|
38
|
|
|
|
39
|
|
|
class QueryingPubchemApi(PubchemApi, QueryMixin): |
|
|
|
|
40
|
|
|
def __init__( |
|
|
|
|
41
|
|
|
self, |
|
|
|
|
42
|
|
|
chem_data: bool = True, |
|
|
|
|
43
|
|
|
extra_tables: bool = False, |
|
|
|
|
44
|
|
|
classifiers: bool = False, |
|
|
|
|
45
|
|
|
extra_classifiers: bool = False, |
|
|
|
|
46
|
|
|
executor: QueryExecutor = QUERY_EXECUTORS.pubchem, |
|
|
|
|
47
|
|
|
): |
48
|
|
|
self._use_chem_data = chem_data |
49
|
|
|
self._use_extra_tables = extra_tables |
50
|
|
|
self._use_classifiers = classifiers |
51
|
|
|
self._use_extra_classifiers = extra_classifiers |
52
|
|
|
self._executor = executor |
53
|
|
|
|
54
|
|
|
_pug = "https://pubchem.ncbi.nlm.nih.gov/rest/pug" |
55
|
|
|
_pug_view = "https://pubchem.ncbi.nlm.nih.gov/rest/pug_view" |
56
|
|
|
_sdg = "https://pubchem.ncbi.nlm.nih.gov/sdq/sdqagent.cgi" |
57
|
|
|
_classifications = "https://pubchem.ncbi.nlm.nih.gov/classification/cgi/classifications.fcgi" |
58
|
|
|
_link_db = "https://pubchem.ncbi.nlm.nih.gov/link_db/link_db_server.cgi" |
59
|
|
|
|
60
|
|
|
def find_inchikey(self, cid: int) -> str: |
|
|
|
|
61
|
|
|
# return self.fetch_data(cid).names_and_identifiers.inchikey |
62
|
|
|
props = self.fetch_properties(cid) |
63
|
|
|
return props["InChIKey"] |
64
|
|
|
|
65
|
|
|
def find_id(self, inchikey: str) -> Optional[int]: |
|
|
|
|
66
|
|
|
# we have to scrape to get the parent anyway, |
67
|
|
|
# so just download it |
68
|
|
|
# TODO: there's a faster way |
|
|
|
|
69
|
|
|
try: |
70
|
|
|
return self.fetch_data(inchikey).cid |
71
|
|
|
except PubchemCompoundLookupError: |
72
|
|
|
logger.opt(exception=True).debug(f"Could not find pubchem ID for {inchikey}") |
73
|
|
|
return None |
74
|
|
|
|
75
|
|
|
def fetch_properties(self, cid: int) -> Mapping[str, Any]: |
|
|
|
|
76
|
|
|
url = f"{self._pug}/compound/cid/{cid}/JSON" |
77
|
|
|
# |
78
|
|
|
try: |
79
|
|
|
matches: NestedDotDict = self._query_json(url) |
80
|
|
|
except HTTPError: |
81
|
|
|
raise PubchemCompoundLookupError(f"Failed finding pubchem compound {cid}") |
82
|
|
|
props = matches["PC_Compounds"][0]["props"] |
83
|
|
|
props = {NestedDotDict(p).get("urn.label"): p.get("value") for p in props} |
84
|
|
|
|
85
|
|
|
def _get_val(v): |
|
|
|
|
86
|
|
|
v = NestedDotDict(v) |
87
|
|
|
for t in ["ival", "fval", "sval"]: |
|
|
|
|
88
|
|
|
if t in v.keys(): |
89
|
|
|
return v[t] |
90
|
|
|
|
91
|
|
|
props = {k: _get_val(v) for k, v in props.items() if k is not None and v is not None} |
92
|
|
|
logger.debug(f"DLed properties for {cid}") |
93
|
|
|
return props |
94
|
|
|
|
95
|
|
|
def fetch_data(self, inchikey: Union[str, int]) -> [PubchemData]: |
96
|
|
|
# Dear God this is terrible |
97
|
|
|
# Here are the steps: |
98
|
|
|
# 1. Download HTML for the InChI key and scrape the CID |
99
|
|
|
# 2. Download the "display" JSON data from the CID |
100
|
|
|
# 3. Look for a Parent-type related compound. If it exists, download its display data |
101
|
|
|
# 4. Download the structural data and append it |
102
|
|
|
# 5. Download the external table CSVs and append them |
103
|
|
|
# 6. Download the link sets and append them |
104
|
|
|
# 7. Download the classifiers (hierarchies) and append them |
105
|
|
|
# 8. Attach metadata about how we found this. |
106
|
|
|
# 9. Return the stupid, stupid result as a massive JSON struct. |
107
|
|
|
logger.info(f"Downloading PubChem data for {inchikey}") |
108
|
|
|
if isinstance(inchikey, int): |
109
|
|
|
cid = inchikey |
110
|
|
|
# note: this might not be the parent |
111
|
|
|
# that's ok -- we're about to fix that |
112
|
|
|
inchikey = self.find_inchikey(cid) |
113
|
|
|
logger.debug(f"Matched CID {cid} to {inchikey}") |
114
|
|
|
else: |
115
|
|
|
cid = self._scrape_cid(inchikey) |
116
|
|
|
logger.debug(f"Matched inchikey {inchikey} to CID {cid} (scraped)") |
117
|
|
|
stack = [] |
118
|
|
|
data = self._fetch_data(cid, inchikey, stack) |
119
|
|
|
logger.debug(f"DLed raw data for {cid}/{inchikey}") |
120
|
|
|
data = self._get_parent(cid, inchikey, data, stack) |
121
|
|
|
logger.debug(f"DLed PubChem compound {cid}") |
122
|
|
|
return data |
123
|
|
|
|
124
|
|
|
def _scrape_cid(self, inchikey: str) -> int: |
125
|
|
|
# This is awful |
126
|
|
|
# Every attempt to get the actual, correct, unique CID corresponding to the inchikey |
127
|
|
|
# failed with every proper PubChem API |
128
|
|
|
# We can't use <pug_view>/data/compound/<inchikey> -- we can only use a CID there |
129
|
|
|
# I found it with a PUG API |
130
|
|
|
# https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/CID/GJSURZIOUXUGAL-UHFFFAOYSA-N/record/JSON |
131
|
|
|
# But that returns multiple results!! |
132
|
|
|
# There's no apparent way to find out which one is real |
133
|
|
|
# I tried then querying each found CID, getting the display data, and looking at their parents |
|
|
|
|
134
|
|
|
# Unfortunately, we end up with multiple contradictory parents |
135
|
|
|
# Plus, that's insanely slow -- we have to get the full JSON data for each parent |
136
|
|
|
# Every worse -- the PubChem API docs LIE!! |
137
|
|
|
# Using ?cids_type=parent DOES NOT GIVE THE PARENT compound |
138
|
|
|
# Ex: https://pubchem.ncbi.nlm.nih.gov/compound/656832 |
139
|
|
|
# This is cocaine HCl, which has cocaine (446220) as a parent |
140
|
|
|
# https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/cid/656832/JSON |
141
|
|
|
# gives 656832 back again |
142
|
|
|
# same thing when querying by inchikey |
143
|
|
|
# Ultimately, I found that I can get HTML containing the CID from an inchikey |
144
|
|
|
# From there, we'll just have to download its "display" data and get the parent, then download that data |
|
|
|
|
145
|
|
|
url = f"https://pubchem.ncbi.nlm.nih.gov/compound/{inchikey}" |
146
|
|
|
html = None |
147
|
|
|
try: |
148
|
|
|
for i in range(SETTINGS.pubchem_n_tries): |
|
|
|
|
149
|
|
|
try: |
150
|
|
|
html = self._query(url) |
151
|
|
|
except ConnectionAbortedError: |
152
|
|
|
logger.opt(exception=True).warning( |
153
|
|
|
f"Connection aborted for {inchikey} [url: {url}]" |
154
|
|
|
) |
155
|
|
|
continue |
156
|
|
|
except HTTPError: |
157
|
|
|
raise PubchemCompoundLookupError( |
158
|
|
|
f"Failed finding pubchem compound (HTML) from {inchikey} [url: {url}]" |
159
|
|
|
) |
160
|
|
|
if html is None: |
161
|
|
|
raise AssertionError(f"Impossible!!") |
|
|
|
|
162
|
|
|
match = _html_cid_pattern.search(html) |
163
|
|
|
if match is None: |
164
|
|
|
raise DataIntegrityError( |
165
|
|
|
f"Something is wrong with the HTML from {url}; og:url not found" |
166
|
|
|
) |
167
|
|
|
return int(match.group(1)) |
168
|
|
|
|
169
|
|
|
def _get_parent( |
170
|
|
|
self, cid: int, inchikey: str, data: PubchemData, stack: List[_CidInchikey] |
|
|
|
|
171
|
|
|
) -> PubchemData: |
172
|
|
|
# guard with is not None: we're not caching, so don't do it twice |
173
|
|
|
p = data.parent_or_none |
|
|
|
|
174
|
|
|
if p is None: |
175
|
|
|
logger.info(f"{cid}/{inchikey} is its own parent") |
176
|
|
|
return data |
177
|
|
|
try: |
178
|
|
|
logger.info(f"{cid}/{inchikey} has parent {p}") |
179
|
|
|
del data |
180
|
|
|
return self._fetch_data(p, inchikey, stack) |
181
|
|
|
except HTTPError: |
182
|
|
|
raise PubchemCompoundLookupError( |
183
|
|
|
f"Failed finding pubchem parent compound (JSON)" |
184
|
|
|
f"for cid {p}, child cid {cid}, inchikey {inchikey}" |
185
|
|
|
) |
186
|
|
|
|
187
|
|
|
def _fetch_data(self, cid: int, inchikey: str, stack: List[_CidInchikey]) -> PubchemData: |
188
|
|
|
when_started = datetime.now(timezone.utc).astimezone() |
189
|
|
|
t0 = time.monotonic() |
|
|
|
|
190
|
|
|
try: |
191
|
|
|
data = self._fetch_core_data(cid, stack) |
192
|
|
|
except HTTPError: |
193
|
|
|
raise PubchemCompoundLookupError( |
194
|
|
|
f"Failed finding pubchem compound (JSON) from cid {cid}, inchikey {inchikey}" |
195
|
|
|
) |
196
|
|
|
t1 = time.monotonic() |
|
|
|
|
197
|
|
|
when_finished = datetime.now(timezone.utc).astimezone() |
198
|
|
|
logger.trace(f"Downloaded {cid} in {t1-t0} s") |
199
|
|
|
data["meta"] = self._get_metadata(inchikey, when_started, when_finished, t0, t1) |
200
|
|
|
self._strip_by_key_in_place(data, "DisplayControls") |
201
|
|
|
stack.append(_CidInchikey(cid, inchikey)) |
202
|
|
|
logger.trace(f"Stack: {stack}") |
203
|
|
|
return PubchemData(NestedDotDict(data)) |
204
|
|
|
|
205
|
|
|
def _fetch_core_data(self, cid: int, stack: List[_CidInchikey]) -> dict: |
206
|
|
|
return dict( |
207
|
|
|
record=self._fetch_display_data(cid), |
208
|
|
|
linked_records=self._get_linked_records(cid, stack), |
209
|
|
|
structure=self._fetch_structure_data(cid), |
210
|
|
|
external_tables=self._fetch_external_tables(cid), |
211
|
|
|
link_sets=self._fetch_external_linksets(cid), |
212
|
|
|
classifications=self._fetch_hierarchies(cid), |
213
|
|
|
properties=NestedDotDict(self.fetch_properties(cid)), |
214
|
|
|
) |
215
|
|
|
|
216
|
|
|
def _get_metadata( |
|
|
|
|
217
|
|
|
self, inchikey: str, started: datetime, finished: datetime, t0: float, t1: float |
|
|
|
|
218
|
|
|
): |
219
|
|
|
return dict( |
220
|
|
|
timestamp_fetch_started=started.isoformat(), |
221
|
|
|
timestamp_fetch_finished=finished.isoformat(), |
222
|
|
|
from_lookup=inchikey, |
223
|
|
|
fetch_secs_taken=str(t1 - t0), |
224
|
|
|
) |
225
|
|
|
|
226
|
|
|
def _get_linked_records(self, cid: int, stack: List[_CidInchikey]) -> NestedDotDict: |
227
|
|
|
url = f"{self._pug}/compound/cid/{cid}/cids/JSON?cids_type=same_parent_stereo" |
228
|
|
|
data = self._query_json(url).sub("IdentifierList") |
229
|
|
|
logger.debug(f"DLed {len(data.get('CID', []))} linked records for {cid}") |
230
|
|
|
results = { |
231
|
|
|
"CID": [*data.get("CID", []), *[s for s, _ in stack]], |
232
|
|
|
"inchikey": [i for _, i in stack], |
233
|
|
|
} |
234
|
|
|
logger.debug(f"Linked records are: {results}") |
235
|
|
|
return NestedDotDict(results) |
236
|
|
|
|
237
|
|
|
def _fetch_display_data(self, cid: int) -> Optional[NestedDotDict]: |
238
|
|
|
url = f"{self._pug_view}/data/compound/{cid}/JSON/?response_type=display" |
239
|
|
|
data = self._query_json(url)["Record"] |
240
|
|
|
logger.debug(f"DLed display data for {cid}") |
241
|
|
|
return data |
242
|
|
|
|
243
|
|
|
def _fetch_structure_data(self, cid: int) -> NestedDotDict: |
244
|
|
|
if not self._use_chem_data: |
245
|
|
|
return NestedDotDict({}) |
246
|
|
|
url = f"{self._pug}/compound/cid/{cid}/JSON" |
247
|
|
|
data = self._query_json(url)["PC_Compounds"][0] |
248
|
|
|
del data["props"] # redundant with props section in record |
249
|
|
|
logger.debug(f"DLed structure for {cid}") |
250
|
|
|
return data |
251
|
|
|
|
252
|
|
|
def _fetch_external_tables(self, cid: int) -> Mapping[str, str]: |
253
|
|
|
x = { |
|
|
|
|
254
|
|
|
ext_table: self._fetch_external_table(cid, ext_table) |
255
|
|
|
for ext_table in self._tables_to_use.values() |
256
|
|
|
} |
257
|
|
|
logger.debug(f"DLed {len(self._tables_to_use)} external tables for {cid}") |
258
|
|
|
return x |
259
|
|
|
|
260
|
|
|
def _fetch_external_linksets(self, cid: int) -> Mapping[str, str]: |
261
|
|
|
x = { |
|
|
|
|
262
|
|
|
table: self._fetch_external_linkset(cid, table) |
263
|
|
|
for table in self._linksets_to_use.values() |
264
|
|
|
} |
265
|
|
|
logger.debug(f"DLed {len(self._linksets_to_use)} external linksets for {cid}") |
266
|
|
|
return x |
267
|
|
|
|
268
|
|
|
def _fetch_hierarchies(self, cid: int) -> NestedDotDict: |
269
|
|
|
build_up = {} |
270
|
|
|
for hname, hid in self._hierarchies_to_use.items(): |
271
|
|
|
try: |
272
|
|
|
build_up[hname] = self._fetch_hierarchy(cid, hname, hid) |
273
|
|
|
except (HTTPError, KeyError, LookupError) as e: |
|
|
|
|
274
|
|
|
logger.debug(f"No data for classifier {hid}, compound {cid}: {e}") |
275
|
|
|
# These list all of the child nodes for each node |
276
|
|
|
# Some of them are > 1000 items -- they're HUGE |
277
|
|
|
# We don't expect to need to navigate to children |
278
|
|
|
self._strip_by_key_in_place(build_up, "ChildID") |
279
|
|
|
logger.debug(f"DLed {len(self._hierarchies_to_use)} hierarchies for {cid}") |
280
|
|
|
return NestedDotDict(build_up) |
281
|
|
|
|
282
|
|
|
def _fetch_external_table(self, cid: int, table: str) -> Sequence[dict]: |
283
|
|
|
url = self._external_table_url(cid, table) |
284
|
|
|
data = self._query(url) |
285
|
|
|
df: pd.DataFrame = pd.read_csv(io.StringIO(data)).reset_index() |
|
|
|
|
286
|
|
|
logger.debug(f"DLed table {table} with {len(df)} rows for {cid}") |
287
|
|
|
return list(df.to_dict(orient="records")) |
288
|
|
|
|
289
|
|
|
def _fetch_external_linkset(self, cid: int, table: str) -> NestedDotDict: |
290
|
|
|
url = f"{self._link_db}?format=JSON&type={table}&operation=GetAllLinks&id_1={cid}" |
291
|
|
|
data = self._query(url) |
292
|
|
|
logger.debug(f"DLed linkset {table} rows for {cid}") |
293
|
|
|
return NestedDotDict(orjson.loads(data)) |
294
|
|
|
|
295
|
|
|
def _fetch_hierarchy(self, cid: int, hname: str, hid: int) -> Sequence[dict]: |
296
|
|
|
url = f"{self._classifications}?format=json&hid={hid}&search_uid_type=cid&search_uid={cid}&search_type=list&response_type=display" |
|
|
|
|
297
|
|
|
data: Sequence[dict] = orjson.loads(self._query(url))["Hierarchies"] |
298
|
|
|
# underneath Hierarchies is a list of Hierarchy |
299
|
|
|
logger.debug(f"Found data for classifier {hid}, compound {cid}") |
300
|
|
|
if len(data) == 0: |
301
|
|
|
raise LookupFailedError(f"Failed getting hierarchy {hid}") |
302
|
|
|
logger.debug(f"DLed hierarchy {hname} ({hid}) for {cid}") |
303
|
|
|
return data |
304
|
|
|
|
305
|
|
|
@property |
306
|
|
|
def _tables_to_use(self) -> Mapping[str, str]: |
307
|
|
|
dct = { |
308
|
|
|
"drug:clinicaltrials.gov:clinical_trials": "clinicaltrials", |
309
|
|
|
"pharm:pubchem:reactions": "pathwayreaction", |
310
|
|
|
"uses:cpdat:uses": "cpdat", |
311
|
|
|
"tox:chemidplus:acute_effects": "chemidplus", |
312
|
|
|
"dis:ctd:associated_disorders_and_diseases": "ctd_chemical_disease", |
313
|
|
|
"lit:pubchem:depositor_provided_pubmed_citations": "pubmed", |
314
|
|
|
"bio:dgidb:drug_gene_interactions": "dgidb", |
315
|
|
|
"bio:ctd:chemical_gene_interactions": "ctdchemicalgene", |
316
|
|
|
"bio:drugbank:drugbank_interactions": "drugbank", |
317
|
|
|
"bio:drugbank:drug_drug_interactions": "drugbankddi", |
318
|
|
|
"bio:pubchem:bioassay_results": "bioactivity", |
319
|
|
|
} |
320
|
|
|
if self._use_extra_tables: |
321
|
|
|
dct.update( |
322
|
|
|
{ |
323
|
|
|
"patent:depositor_provided_patent_identifiers": "patent", |
324
|
|
|
"bio:rcsb_pdb:protein_bound_3d_structures": "pdb", |
325
|
|
|
"related:pubchem:related_compounds_with_annotation": "compound", |
326
|
|
|
} |
327
|
|
|
) |
328
|
|
|
return dct |
329
|
|
|
|
330
|
|
|
@property |
331
|
|
|
def _linksets_to_use(self) -> Mapping[str, str]: |
332
|
|
|
return { |
333
|
|
|
"lit:pubchem:chemical_cooccurrences_in_literature": "ChemicalNeighbor", |
334
|
|
|
"lit:pubchem:gene_cooccurrences_in_literature": "ChemicalGeneSymbolNeighbor", |
335
|
|
|
"lit:pubchem:disease_cooccurrences_in_literature": "ChemicalDiseaseNeighbor", |
336
|
|
|
} |
337
|
|
|
|
338
|
|
|
@property |
339
|
|
|
def _hierarchies_to_use(self) -> Mapping[str, int]: |
340
|
|
|
if not self._use_classifiers: |
341
|
|
|
return {} |
342
|
|
|
dct = { |
343
|
|
|
"MeSH Tree": 1, |
344
|
|
|
"ChEBI Ontology": 2, |
345
|
|
|
"WHO ATC Classification System": 79, |
346
|
|
|
"Guide to PHARMACOLOGY Target Classification": 92, |
347
|
|
|
"ChEMBL Target Tree": 87, |
348
|
|
|
} |
349
|
|
|
if self._use_extra_classifiers: |
350
|
|
|
dct.update( |
351
|
|
|
{ |
352
|
|
|
"KEGG: Phytochemical Compounds": 5, |
353
|
|
|
"KEGG: Drug": 14, |
354
|
|
|
"KEGG: USP": 15, |
355
|
|
|
"KEGG: Major components of natural products": 69, |
356
|
|
|
"KEGG: Target-based Classification of Drugs": 22, |
357
|
|
|
"KEGG: OTC drugs": 25, |
358
|
|
|
"KEGG: Drug Classes": 96, |
359
|
|
|
"CAMEO Chemicals": 86, |
360
|
|
|
"EPA CPDat Classification": 99, |
361
|
|
|
"FDA Pharm Classes": 78, |
362
|
|
|
"ChemIDplus": 84, |
363
|
|
|
} |
364
|
|
|
) |
365
|
|
|
return dct |
366
|
|
|
|
367
|
|
|
def _external_table_url(self, cid: int, collection: str) -> str: |
368
|
|
|
return ( |
369
|
|
|
self._sdg |
370
|
|
|
+ "?infmt=json" |
371
|
|
|
+ "&outfmt=csv" |
372
|
|
|
+ "&query={ download : * , collection : " |
373
|
|
|
+ collection |
374
|
|
|
+ " , where :{ ands :[{ cid : " |
375
|
|
|
+ str(cid) |
376
|
|
|
+ " }]}}" |
377
|
|
|
).replace(" ", "%22") |
378
|
|
|
|
379
|
|
|
def _query_json(self, url: str) -> NestedDotDict: |
380
|
|
|
data = self._query(url) |
381
|
|
|
data = NestedDotDict(orjson.loads(data)) |
382
|
|
|
if "Fault" in data: |
383
|
|
|
raise DownloadError( |
384
|
|
|
f"PubChem query failed ({data.get('Code')}) on {url}: {data.get('Message')}" |
385
|
|
|
) |
386
|
|
|
logger.trace( |
387
|
|
|
f"Fetched JSON has {data.n_bytes_total()} bytes and {data.n_elements_total()} elements" |
388
|
|
|
) |
389
|
|
|
return data |
390
|
|
|
|
391
|
|
|
@property |
392
|
|
|
def executor(self) -> QueryExecutor: |
|
|
|
|
393
|
|
|
raise NotImplementedError() |
394
|
|
|
|
395
|
|
|
def _strip_by_key_in_place(self, data: Union[dict, list], bad_key: str) -> None: |
396
|
|
|
if isinstance(data, list): |
397
|
|
|
for x in data: |
|
|
|
|
398
|
|
|
self._strip_by_key_in_place(x, bad_key) |
399
|
|
|
elif isinstance(data, dict): |
400
|
|
|
for k, v in list(data.items()): |
|
|
|
|
401
|
|
|
if k == bad_key: |
402
|
|
|
del data[k] |
403
|
|
|
elif isinstance(v, (list, dict)): |
404
|
|
|
self._strip_by_key_in_place(v, bad_key) |
405
|
|
|
|
406
|
|
|
|
407
|
|
|
__all__ = ["QueryingPubchemApi"] |
408
|
|
|
|