1
|
|
|
import abc |
|
|
|
|
2
|
|
|
from datetime import datetime |
3
|
|
|
from pathlib import Path |
4
|
|
|
from typing import Optional, Type |
5
|
|
|
|
6
|
|
|
import decorateme |
|
|
|
|
7
|
|
|
import numpy as np |
|
|
|
|
8
|
|
|
import orjson |
|
|
|
|
9
|
|
|
import pandas as pd |
|
|
|
|
10
|
|
|
from pocketutils.core.enums import TrueFalseUnknown |
|
|
|
|
11
|
|
|
from pocketutils.core.exceptions import UnsupportedOpError |
|
|
|
|
12
|
|
|
from pocketutils.tools.common_tools import CommonTools |
|
|
|
|
13
|
|
|
from typeddfs import TypedDf, TypedDfs |
|
|
|
|
14
|
|
|
|
15
|
|
|
from mandos.model import Api, CompoundNotFoundError |
16
|
|
|
from mandos.model.apis.g2p_support.g2p_data import G2pData, G2pInteraction |
17
|
|
|
from mandos.model.settings import SETTINGS |
18
|
|
|
from mandos.model.utils.setup import logger |
19
|
|
|
|
20
|
|
|
LIGANDS_URL = "https://www.guidetopharmacology.org/DATA/ligand_id_mapping.tsv" |
21
|
|
|
INTERACTIONS_URL = "https://www.guidetopharmacology.org/DATA/interactions.tsv" |
22
|
|
|
_DEF_SUFFIX = SETTINGS.archive_filename_suffix |
23
|
|
|
|
24
|
|
|
|
25
|
|
|
def _oint(x: str) -> Optional[int]: |
|
|
|
|
26
|
|
|
if x is None or isinstance(x, str) and x.strip() == "": |
27
|
|
|
return None |
28
|
|
|
return int(x) |
29
|
|
|
|
30
|
|
|
|
31
|
|
|
class G2pCompoundLookupError(CompoundNotFoundError): |
|
|
|
|
32
|
|
|
""" """ |
33
|
|
|
|
34
|
|
|
|
35
|
|
|
LigandDf = ( |
36
|
|
|
TypedDfs.typed("LigandDf") |
37
|
|
|
.require("Ligand id", dtype=int) |
38
|
|
|
.require("Name", "Type", "Approved", "PubChem CID", "InChIKey", dtype=str) |
39
|
|
|
.strict() |
40
|
|
|
.secure() |
41
|
|
|
.hash(file=True) |
42
|
|
|
).build() |
43
|
|
|
|
44
|
|
|
|
45
|
|
|
InteractionDf = ( |
46
|
|
|
TypedDfs.typed("InteractionDf") |
47
|
|
|
.require("target", "target_id", dtype=str) |
48
|
|
|
.require("target_gene_symbol", "target_uniprot", dtype=str) |
49
|
|
|
.require("target_species", dtype=str) |
50
|
|
|
.require("ligand", dtype=str) |
51
|
|
|
.require("ligand_id", dtype=int) |
52
|
|
|
.require("type", "action", dtype=str) |
53
|
|
|
.require("selectivity", "endogenous", "primary_target", dtype=str) |
54
|
|
|
.require("affinity_units", dtype=str) |
55
|
|
|
.require("affinity_median", dtype=np.float64) |
56
|
|
|
.strict() |
57
|
|
|
.secure() |
58
|
|
|
.hash(file=True) |
59
|
|
|
).build() |
60
|
|
|
|
61
|
|
|
|
62
|
|
|
class G2pApi(Api, metaclass=abc.ABCMeta): |
|
|
|
|
63
|
|
|
def fetch(self, inchikey: str) -> G2pData: |
|
|
|
|
64
|
|
|
raise NotImplementedError() |
65
|
|
|
|
66
|
|
|
def __eq__(self, other): |
67
|
|
|
raise UnsupportedOpError(f"Cannot compare {self.__class__.__name__}") |
68
|
|
|
|
69
|
|
|
def __repr__(self): |
70
|
|
|
return f"{self.__class__.__name__}()" |
71
|
|
|
|
72
|
|
|
def __str__(self): |
73
|
|
|
return repr(self) |
74
|
|
|
|
75
|
|
|
|
76
|
|
|
class CachingG2pApi(G2pApi, metaclass=abc.ABCMeta): |
|
|
|
|
77
|
|
|
def __init__(self, cache_path: Path = SETTINGS.g2p_cache_path): |
78
|
|
|
self.cache_path = Path(cache_path) |
79
|
|
|
self.ligands: LigandDf = None |
80
|
|
|
self.interactions: InteractionDf = None |
81
|
|
|
|
82
|
|
|
def fetch(self, inchikey: str) -> G2pData: |
|
|
|
|
83
|
|
|
""" """ |
84
|
|
|
series = self.ligands[self.ligands["inchikey"] == inchikey] |
85
|
|
|
if len(series) == 0: |
86
|
|
|
raise G2pCompoundLookupError(f"G2P ligand {inchikey} not found") |
87
|
|
|
basic = dict(CommonTools.only(series).to_dict()) |
88
|
|
|
g2pid = int(basic["Ligand id"]) |
89
|
|
|
interactions = [ |
90
|
|
|
self._convert_interaction(series) |
91
|
|
|
for series in self.interactions[self.interactions["ligand_id"] == g2pid] |
92
|
|
|
] |
93
|
|
|
return G2pData( |
94
|
|
|
inchikey=basic["InChIKey"], |
95
|
|
|
g2pid=g2pid, |
96
|
|
|
name=basic["ligand"], |
97
|
|
|
type=basic["Type"], |
98
|
|
|
approved=TrueFalseUnknown.of(basic["Approved"]), |
99
|
|
|
pubchem_id=_oint(basic["PubChem ID"]), |
100
|
|
|
interactions=interactions, |
101
|
|
|
) |
102
|
|
|
|
103
|
|
|
def download(self, force: bool = False) -> None: |
|
|
|
|
104
|
|
|
if self.ligands is None or self.interactions is None or force: |
105
|
|
|
# always download both together -- we don't want them non-synced |
106
|
|
|
exists = self.ligands_path.exists() and self.interactions_path.exists() |
107
|
|
|
if exists and not force: |
108
|
|
|
self.ligands = LigandDf.read_file(self.ligands_path) |
109
|
|
|
self.interactions = InteractionDf.read_file(self.ligands_path) |
110
|
|
|
else: |
111
|
|
|
logger.info(f"Downloading G2P data...") |
|
|
|
|
112
|
|
|
self.ligands = LigandDf.read_file(LIGANDS_URL, sep="\t") |
113
|
|
|
self.ligands.write_file(self.ligands_path) |
114
|
|
|
self.interactions = InteractionDf.read_file(INTERACTIONS_URL, sep="\t") |
115
|
|
|
self.interactions.write_file(self.interactions_path) |
116
|
|
|
info = dict(dt_downloaded=datetime.now().isoformat()) |
117
|
|
|
info = orjson.dumps(info).decode(encoding="utf8") |
118
|
|
|
(self.cache_path / "info.json").write_text(info, encoding="utf8") |
119
|
|
|
if exists: |
120
|
|
|
logger.notice(f"Overwrote existing cached G2P data in {self.cache_path}") |
121
|
|
|
else: |
122
|
|
|
logger.notice(f"Cached missing G2P data to {self.cache_path}") |
123
|
|
|
|
124
|
|
|
@property |
125
|
|
|
def ligands_path(self) -> Path: |
|
|
|
|
126
|
|
|
return (self.cache_path / "ligands").with_suffix(_DEF_SUFFIX) |
127
|
|
|
|
128
|
|
|
@property |
129
|
|
|
def interactions_path(self) -> Path: |
|
|
|
|
130
|
|
|
return (self.cache_path / "interactions").with_suffix(_DEF_SUFFIX) |
131
|
|
|
|
132
|
|
|
def _load_file(self, clazz: Type[TypedDf], path: Path, url: str) -> pd.DataFrame: |
133
|
|
|
if path.exists(): |
|
|
|
|
134
|
|
|
return clazz.read_file(self.ligands_path) |
135
|
|
|
else: |
136
|
|
|
df = clazz.read_file(url) |
|
|
|
|
137
|
|
|
df.write_file(self.ligands_path) |
138
|
|
|
return df |
139
|
|
|
|
140
|
|
|
def _convert_interaction(self, series: pd.Series) -> G2pInteraction: |
|
|
|
|
141
|
|
|
d = dict(series.to_dict()) |
|
|
|
|
142
|
|
|
sel_map = { |
143
|
|
|
"Selective": TrueFalseUnknown.true, |
144
|
|
|
"Non-selective": TrueFalseUnknown.false, |
145
|
|
|
"Not Determined": TrueFalseUnknown.unknown, |
146
|
|
|
} |
147
|
|
|
d["selectivity"] = sel_map.get(d["selectivity"], TrueFalseUnknown.unknown) |
148
|
|
|
d["primary_target"] = TrueFalseUnknown.of(d["primary_target"]) |
149
|
|
|
d["endogenous"] = TrueFalseUnknown.of(d["endogenous"]) |
150
|
|
|
return G2pInteraction(**d) |
151
|
|
|
|
152
|
|
|
def __repr__(self): |
153
|
|
|
loaded = "not loaded" if self.ligands is None else f"n={len(self.ligands)}" |
154
|
|
|
return f"{self.__class__.__name__}({self.cache_path} : {loaded})" |
155
|
|
|
|
156
|
|
|
def __str__(self): |
157
|
|
|
return repr(self) |
158
|
|
|
|
159
|
|
|
|
160
|
|
|
_all__ = ["G2pApi", "CachedG2pApi"] |
161
|
|
|
|