|
1
|
|
|
from __future__ import annotations |
|
|
|
|
|
|
2
|
|
|
|
|
3
|
|
|
import abc |
|
4
|
|
|
import enum |
|
|
|
|
|
|
5
|
|
|
import logging |
|
|
|
|
|
|
6
|
|
|
import re |
|
7
|
|
|
from dataclasses import dataclass |
|
8
|
|
|
from typing import Sequence, Type, Union, FrozenSet, Optional, TypeVar, Any |
|
|
|
|
|
|
9
|
|
|
|
|
10
|
|
|
from pocketutils.core.dot_dict import NestedDotDict |
|
|
|
|
|
|
11
|
|
|
|
|
12
|
|
|
from mandos.model import AbstractHit, Search |
|
13
|
|
|
from mandos.chembl_api import ChemblApi |
|
14
|
|
|
from mandos.model.settings import Settings |
|
15
|
|
|
from mandos.model.taxonomy import Taxonomy |
|
16
|
|
|
from mandos.pubchem_api import ( |
|
|
|
|
|
|
17
|
|
|
CachingPubchemApi, |
|
18
|
|
|
QueryingPubchemApi, |
|
19
|
|
|
PubchemData, |
|
20
|
|
|
TitleAndSummary, |
|
21
|
|
|
RelatedRecords, |
|
22
|
|
|
ChemicalAndPhysicalProperties, |
|
23
|
|
|
DrugAndMedicationInformation, |
|
24
|
|
|
PharmacologyAndBiochemistry, |
|
25
|
|
|
SafetyAndHazards, |
|
26
|
|
|
Toxicity, |
|
27
|
|
|
AssociatedDisordersAndDiseases, |
|
28
|
|
|
Literature, |
|
29
|
|
|
BiomolecularInteractionsAndPathways, |
|
30
|
|
|
Classification, |
|
31
|
|
|
) |
|
32
|
|
|
|
|
33
|
|
|
|
|
34
|
|
|
@dataclass(frozen=True, order=True, repr=True) |
|
|
|
|
|
|
35
|
|
|
class PubchemClassHit(AbstractHit): |
|
36
|
|
|
@property |
|
37
|
|
|
def predicate(self) -> str: |
|
38
|
|
|
return "is in" |
|
39
|
|
|
|
|
40
|
|
|
|
|
41
|
|
|
H = TypeVar("H", bound=AbstractHit, covariant=True) |
|
|
|
|
|
|
42
|
|
|
|
|
43
|
|
|
|
|
44
|
|
|
class PubchemSearch(Search[H], metaclass=abc.ABCMeta): |
|
|
|
|
|
|
45
|
|
|
def __init__(self, chembl_api: ChemblApi, config: Settings, tax: Taxonomy): |
|
46
|
|
|
super().__init__(chembl_api, config, tax) |
|
47
|
|
|
self.pubchem_api = CachingPubchemApi(config.cache_path, QueryingPubchemApi(), compress=True) |
|
48
|
|
|
|
|
49
|
|
|
def find(self, lookup: str) -> Sequence[H]: |
|
|
|
|
|
|
50
|
|
|
data = self.pubchem_api.fetch_data(lookup) |
|
51
|
|
|
return self.process(lookup, data) |
|
52
|
|
|
|
|
53
|
|
|
def process(self, lookup: str, data: PubchemData) -> Sequence[H]: |
|
|
|
|
|
|
54
|
|
|
raise NotImplementedError() |
|
55
|
|
|
|
|
56
|
|
|
|
|
57
|
|
|
class PubchemSearchFactory: |
|
|
|
|
|
|
58
|
|
|
|
|
59
|
|
|
pattern = re.compile(r"(?<!^)(?=[A-Z])") |
|
60
|
|
|
|
|
61
|
|
|
@classmethod |
|
62
|
|
|
def cat( |
|
|
|
|
|
|
63
|
|
|
cls, full_field, name: Optional[str] = None, object_field: Optional[str] = None |
|
|
|
|
|
|
64
|
|
|
) -> Type[PubchemSearch]: |
|
65
|
|
|
clazz_name, field_name = str(full_field).split(" ", 3)[-2].split(".") |
|
66
|
|
|
clazz_name = PubchemSearchFactory.pattern.sub("_", clazz_name.__name__).lower() |
|
67
|
|
|
if name is None: |
|
68
|
|
|
name = field_name.replace("_", " ") |
|
69
|
|
|
|
|
70
|
|
|
class MyClassSearch(PubchemSearch[PubchemClassHit]): |
|
|
|
|
|
|
71
|
|
|
def process(self, lookup: str, data: PubchemData) -> Sequence[PubchemClassHit]: |
|
|
|
|
|
|
72
|
|
|
values = getattr(getattr(data, clazz_name), field_name) |
|
73
|
|
|
if object_field is not None: |
|
74
|
|
|
values = frozenset([getattr(x, object_field) for x in values]) |
|
75
|
|
|
if not isinstance(values, frozenset): |
|
76
|
|
|
values = frozenset({values}) |
|
77
|
|
|
hits = [] |
|
78
|
|
|
for value in values: |
|
79
|
|
|
""" |
|
80
|
|
|
record_id: Optional[str] |
|
81
|
|
|
compound_id: str |
|
82
|
|
|
inchikey: str |
|
83
|
|
|
compound_lookup: str |
|
84
|
|
|
compound_name: str |
|
85
|
|
|
object_id: str |
|
86
|
|
|
object_name: str |
|
87
|
|
|
""" |
|
|
|
|
|
|
88
|
|
|
hit = PubchemClassHit( |
|
|
|
|
|
|
89
|
|
|
record_id=None, |
|
90
|
|
|
compound_id=str(data.cid), |
|
91
|
|
|
inchikey=data.chemical_and_physical_properties.inchikey, |
|
92
|
|
|
compound_lookup=lookup, |
|
93
|
|
|
compound_name=data.name, |
|
94
|
|
|
object_id=str(value), |
|
95
|
|
|
object_name=str(value), |
|
96
|
|
|
) |
|
97
|
|
|
return hits |
|
98
|
|
|
|
|
99
|
|
|
MyClassSearch.__name__ = name |
|
100
|
|
|
return MyClassSearch |
|
101
|
|
|
|
|
102
|
|
|
|
|
103
|
|
|
F = PubchemSearchFactory |
|
|
|
|
|
|
104
|
|
|
P = PubchemData |
|
|
|
|
|
|
105
|
|
|
|
|
106
|
|
|
DeaClassSearch = F.cat(DrugAndMedicationInformation.dea_class) |
|
107
|
|
|
DeaScheduleSearch = F.cat(DrugAndMedicationInformation.dea_schedule) |
|
108
|
|
|
HsdbUsesSearch = F.cat(DrugAndMedicationInformation.hsdb_uses) |
|
109
|
|
|
ClinicalTrialsSearch = F.cat(DrugAndMedicationInformation.clinical_trials) |
|
110
|
|
|
|