1
|
|
|
import abc |
|
|
|
|
2
|
|
|
import time |
|
|
|
|
3
|
|
|
from datetime import datetime |
4
|
|
|
from pathlib import Path |
5
|
|
|
from typing import Optional, Type |
6
|
|
|
|
7
|
|
|
import numpy as np |
|
|
|
|
8
|
|
|
import orjson |
|
|
|
|
9
|
|
|
import pandas as pd |
|
|
|
|
10
|
|
|
from pocketutils.tools.common_tools import CommonTools |
|
|
|
|
11
|
|
|
from typeddfs import TypedDf, TypedDfs |
|
|
|
|
12
|
|
|
|
13
|
|
|
from mandos.model import Api, CompoundNotFoundError |
14
|
|
|
from mandos.model.apis.g2p_data import G2pData, G2pInteraction, TrueFalseUnknown |
15
|
|
|
|
16
|
|
|
LIGANDS_URL = "https://www.guidetopharmacology.org/DATA/ligand_id_mapping.tsv" |
17
|
|
|
INTERACTIONS_URL = "https://www.guidetopharmacology.org/DATA/interactions.tsv" |
18
|
|
|
|
19
|
|
|
|
20
|
|
|
def _oint(x: str) -> Optional[int]: |
|
|
|
|
21
|
|
|
if x is None or isinstance(x, str) and x.strip() == "": |
22
|
|
|
return None |
23
|
|
|
return int(x) |
24
|
|
|
|
25
|
|
|
|
26
|
|
|
LigandDf = ( |
27
|
|
|
TypedDfs.typed("LigandDf") |
28
|
|
|
.require("Ligand id", dtype=int) |
29
|
|
|
.require("Name", "Type", "Approved", "PubChem CID", "InChIKey", dtype=str) |
30
|
|
|
).build() |
31
|
|
|
|
32
|
|
|
|
33
|
|
|
InteractionDf = ( |
34
|
|
|
TypedDfs.typed("InteractionDf") |
35
|
|
|
.require( |
36
|
|
|
"target", "target_id", "target_gene_symbol", "target_uniprot", "target_species", dtype=str |
37
|
|
|
) |
38
|
|
|
.require("ligand", dtype=str) |
39
|
|
|
.require("ligand_id", dtype=int) |
40
|
|
|
.require("type", "action", dtype=str) |
41
|
|
|
.require("selectivity", "endogenous", "primary_target", dtype=str) |
42
|
|
|
.require("affinity_units", dtype=str) |
43
|
|
|
.require("affinity_median", dtype=np.float64) |
44
|
|
|
).build() |
45
|
|
|
|
46
|
|
|
|
47
|
|
|
class G2pApi(Api, metaclass=abc.ABCMeta): |
|
|
|
|
48
|
|
|
def fetch(self, inchikey: str) -> G2pData: |
|
|
|
|
49
|
|
|
raise NotImplementedError() |
50
|
|
|
|
51
|
|
|
|
52
|
|
|
class CachedG2pApi(G2pApi, metaclass=abc.ABCMeta): |
|
|
|
|
53
|
|
|
def __init__(self, cache_path: Path): |
54
|
|
|
self.cache_path = Path(cache_path) |
55
|
|
|
self.ligands: LigandDf = None |
56
|
|
|
self.interactions: InteractionDf = None |
57
|
|
|
|
58
|
|
|
def fetch(self, inchikey: str) -> G2pData: |
|
|
|
|
59
|
|
|
""" """ |
60
|
|
|
series = self.ligands[self.ligands["inchikey"] == inchikey] |
61
|
|
|
if len(series) == 0: |
62
|
|
|
raise CompoundNotFoundError(f"G2P ligand {inchikey} not found") |
63
|
|
|
basic = dict(CommonTools.only(series).to_dict()) |
64
|
|
|
g2pid = int(basic["Ligand id"]) |
65
|
|
|
interactions = [ |
66
|
|
|
self._convert_interaction(series) |
67
|
|
|
for series in self.interactions[self.interactions["ligand_id"] == g2pid] |
68
|
|
|
] |
69
|
|
|
return G2pData( |
70
|
|
|
inchikey=basic["InChIKey"], |
71
|
|
|
g2pid=g2pid, |
72
|
|
|
name=basic["ligand"], |
73
|
|
|
type=basic["Type"], |
74
|
|
|
approved=TrueFalseUnknown.parse(basic["Approved"]), |
75
|
|
|
pubchem_id=_oint(basic["PubChem ID"]), |
76
|
|
|
interactions=interactions, |
77
|
|
|
) |
78
|
|
|
|
79
|
|
|
def download(self, force: bool = False) -> None: |
|
|
|
|
80
|
|
|
if self.ligands is None or self.interactions is None or force: |
81
|
|
|
# always download both together -- we don't want them non-synced |
82
|
|
|
if self.ligands_path.exists() and self.interactions_path.exists() and not force: |
83
|
|
|
self.ligands = LigandDf.read_file(self.ligands_path) |
84
|
|
|
self.interactions = InteractionDf.read_file(self.ligands_path) |
85
|
|
|
else: |
86
|
|
|
self.ligands = LigandDf.read_file(LIGANDS_URL, sep="\t") |
87
|
|
|
self.ligands.write_file(self.ligands_path) |
88
|
|
|
self.interactions = InteractionDf.read_file(INTERACTIONS_URL, sep="\t") |
89
|
|
|
self.interactions.write_file(self.interactions_path) |
90
|
|
|
info = dict(dt_downloaded=datetime.now().isoformat()) |
91
|
|
|
info = orjson.dumps(info).decode(encoding="utf8") |
92
|
|
|
(self.cache_path / "info.json").write_text(info) |
93
|
|
|
|
94
|
|
|
@property |
95
|
|
|
def ligands_path(self) -> Path: |
|
|
|
|
96
|
|
|
return self.cache_path / "ligands.feather" |
97
|
|
|
|
98
|
|
|
@property |
99
|
|
|
def interactions_path(self) -> Path: |
|
|
|
|
100
|
|
|
return self.cache_path / "interactions.feather" |
101
|
|
|
|
102
|
|
|
def _load_file(self, clazz: Type[TypedDf], path: Path, url: str) -> pd.DataFrame: |
103
|
|
|
if path.exists(): |
|
|
|
|
104
|
|
|
return clazz.read_file(self.ligands_path, sep="\t") |
105
|
|
|
else: |
106
|
|
|
df = clazz.read_file(url, sep="\t") |
|
|
|
|
107
|
|
|
df.to_csv(self.ligands_path, sep="\t") |
108
|
|
|
return df |
109
|
|
|
|
110
|
|
|
def _convert_interaction(self, series: pd.Series) -> G2pInteraction: |
|
|
|
|
111
|
|
|
d = dict(series.to_dict()) |
|
|
|
|
112
|
|
|
sel_map = { |
113
|
|
|
"Selective": TrueFalseUnknown.true, |
114
|
|
|
"Non-selective": TrueFalseUnknown.false, |
115
|
|
|
"Not Determined": TrueFalseUnknown.unknown, |
116
|
|
|
} |
117
|
|
|
d["selectivity"] = sel_map.get(d["selectivity"], TrueFalseUnknown.unknown) |
118
|
|
|
d["primary_target"] = TrueFalseUnknown.parse(d["primary_target"]) |
119
|
|
|
d["endogenous"] = TrueFalseUnknown.parse(d["endogenous"]) |
120
|
|
|
return G2pInteraction(**d) |
121
|
|
|
|
122
|
|
|
def __repr__(self): |
123
|
|
|
loaded = "not loaded" if self.ligands is None else f"n={len(self.ligands)}" |
124
|
|
|
return f"{self.__class__.__name__}({self.cache_path} : {loaded})" |
125
|
|
|
|
126
|
|
|
def __str__(self): |
127
|
|
|
return repr(self) |
128
|
|
|
|
129
|
|
|
def __eq__(self, other): |
130
|
|
|
raise NotImplementedError(f"Cannot compare {self.__class__.__name__}") |
131
|
|
|
|
132
|
|
|
|
133
|
|
|
_all__ = ["G2pApi", "CachedG2pApi"] |
134
|
|
|
|