|
1
|
|
|
import re |
|
|
|
|
|
|
2
|
|
|
from dataclasses import dataclass |
|
3
|
|
|
from typing import Sequence, Optional, Set |
|
4
|
|
|
|
|
5
|
|
|
from mandos.model.pubchem_api import PubchemApi |
|
6
|
|
|
from mandos.model.pubchem_support.pubchem_data import PubchemData |
|
7
|
|
|
from mandos.model.pubchem_support.pubchem_models import Activity, AssayType, Bioactivity |
|
|
|
|
|
|
8
|
|
|
from mandos.search.pubchem import PubchemHit, PubchemSearch |
|
9
|
|
|
|
|
10
|
|
|
|
|
11
|
|
|
@dataclass(frozen=True, order=True, repr=True) |
|
|
|
|
|
|
12
|
|
|
class BioactivityHit(PubchemHit): |
|
13
|
|
|
"""""" |
|
14
|
|
|
|
|
15
|
|
|
activity: str |
|
16
|
|
|
confirmatory: bool |
|
17
|
|
|
micromolar: float |
|
18
|
|
|
relation: str |
|
19
|
|
|
species: Optional[str] |
|
20
|
|
|
compound_name_in_assay: str |
|
21
|
|
|
referrer: str |
|
22
|
|
|
|
|
23
|
|
|
|
|
24
|
|
|
class BioactivitySearch(PubchemSearch[BioactivityHit]): |
|
|
|
|
|
|
25
|
|
|
"""""" |
|
26
|
|
|
|
|
27
|
|
|
def __init__( |
|
28
|
|
|
self, |
|
|
|
|
|
|
29
|
|
|
key: str, |
|
|
|
|
|
|
30
|
|
|
api: PubchemApi, |
|
|
|
|
|
|
31
|
|
|
assay_types: Set[AssayType], |
|
|
|
|
|
|
32
|
|
|
compound_name_must_match: bool, |
|
|
|
|
|
|
33
|
|
|
): |
|
34
|
|
|
super().__init__(key, api) |
|
35
|
|
|
self.assay_types = assay_types |
|
36
|
|
|
self.compound_name_must_match = compound_name_must_match |
|
37
|
|
|
|
|
38
|
|
|
@property |
|
39
|
|
|
def data_source(self) -> str: |
|
|
|
|
|
|
40
|
|
|
return "PubChem" |
|
41
|
|
|
|
|
42
|
|
|
def find(self, inchikey: str) -> Sequence[BioactivityHit]: |
|
|
|
|
|
|
43
|
|
|
data = self.api.fetch_data(inchikey) |
|
44
|
|
|
results = [] |
|
45
|
|
|
for dd in data.biological_test_results.bioactivity: |
|
|
|
|
|
|
46
|
|
|
if ( |
|
47
|
|
|
not self.compound_name_must_match or dd.compound_name.lower() == data.name.lower() |
|
|
|
|
|
|
48
|
|
|
) and dd.assay_type in self.assay_types: |
|
49
|
|
|
results.append(self.process(inchikey, data, dd)) |
|
50
|
|
|
return results |
|
51
|
|
|
|
|
52
|
|
|
def process(self, inchikey: str, data: PubchemData, dd: Bioactivity) -> BioactivityHit: |
|
|
|
|
|
|
53
|
|
|
# strip off the species name |
|
54
|
|
|
match = re.compile(r"^(.+?)\([^)]+\)?$").fullmatch(dd.target_name) |
|
55
|
|
|
target = match.group(1).strip() |
|
56
|
|
|
species = None if match.group(2).strip() == "" else match.group(2).strip() |
|
57
|
|
|
return BioactivityHit( |
|
58
|
|
|
record_id=None, |
|
59
|
|
|
origin_inchikey=inchikey, |
|
60
|
|
|
matched_inchikey=data.names_and_identifiers.inchikey, |
|
61
|
|
|
compound_id=str(data.cid), |
|
62
|
|
|
compound_name=data.name, |
|
63
|
|
|
predicate=dd.activity.name.lower(), |
|
64
|
|
|
object_id=dd.gene_id, |
|
65
|
|
|
object_name=target, |
|
66
|
|
|
search_key=self.key, |
|
67
|
|
|
search_class=self.search_class, |
|
68
|
|
|
data_source=self.data_source + ":" + dd.assay_ref, |
|
69
|
|
|
activity=dd.activity.name.lower(), |
|
70
|
|
|
confirmatory=dd.assay_type is AssayType.confirmatory, |
|
71
|
|
|
micromolar=dd.activity_value, |
|
72
|
|
|
relation=dd.activity_name, |
|
73
|
|
|
species=species, |
|
74
|
|
|
compound_name_in_assay=dd.compound_name, |
|
75
|
|
|
referrer=dd.assay_ref, |
|
76
|
|
|
) |
|
77
|
|
|
|
|
78
|
|
|
|
|
79
|
|
|
__all__ = ["BioactivityHit", "BioactivitySearch"] |
|
80
|
|
|
|