|
1
|
|
|
import abc |
|
|
|
|
|
|
2
|
|
|
import time |
|
|
|
|
|
|
3
|
|
|
from datetime import datetime |
|
4
|
|
|
from pathlib import Path |
|
5
|
|
|
from typing import Optional, Type |
|
6
|
|
|
|
|
7
|
|
|
import numpy as np |
|
|
|
|
|
|
8
|
|
|
import orjson |
|
|
|
|
|
|
9
|
|
|
import pandas as pd |
|
|
|
|
|
|
10
|
|
|
from typeddfs import TypedDfs, TypedDf |
|
|
|
|
|
|
11
|
|
|
from pocketutils.tools.common_tools import CommonTools |
|
|
|
|
|
|
12
|
|
|
|
|
13
|
|
|
from mandos.model import Api, CompoundNotFoundError |
|
14
|
|
|
from mandos.model.apis.g2p_data import G2pData, G2pInteraction, TrueFalseUnknown |
|
15
|
|
|
|
|
16
|
|
|
LIGANDS_URL = "https://www.guidetopharmacology.org/DATA/ligand_id_mapping.tsv" |
|
17
|
|
|
INTERACTIONS_URL = "https://www.guidetopharmacology.org/DATA/interactions.tsv" |
|
18
|
|
|
|
|
19
|
|
|
|
|
20
|
|
|
def _oint(x: str) -> Optional[int]: |
|
|
|
|
|
|
21
|
|
|
if x is None or isinstance(x, str) and x.strip() == "": |
|
22
|
|
|
return None |
|
23
|
|
|
return int(x) |
|
24
|
|
|
|
|
25
|
|
|
|
|
26
|
|
|
LigandDf = ( |
|
27
|
|
|
TypedDfs.typed("LigandDf") |
|
28
|
|
|
.require("Ligand id", dtype=int) |
|
29
|
|
|
.require("Name", "Type", "Approved", "PubChem CID", "InChIKey", dtype=str) |
|
30
|
|
|
).build() |
|
31
|
|
|
|
|
32
|
|
|
|
|
33
|
|
|
InteractionDf = ( |
|
34
|
|
|
TypedDfs.typed("InteractionDf") |
|
35
|
|
|
.require( |
|
36
|
|
|
"target", "target_id", "target_gene_symbol", "target_uniprot", "target_species", dtype=str |
|
37
|
|
|
) |
|
38
|
|
|
.require("ligand", dtype=str) |
|
39
|
|
|
.require("ligand_id", dtype=int) |
|
40
|
|
|
.require("type", "action", dtype=str) |
|
41
|
|
|
.require("selectivity", "endogenous", "primary_target", dtype=str) |
|
42
|
|
|
.require("affinity_units", dtype=str) |
|
43
|
|
|
.require("affinity_median", dtype=np.float64) |
|
44
|
|
|
).build() |
|
45
|
|
|
|
|
46
|
|
|
|
|
47
|
|
|
class G2pApi(Api, metaclass=abc.ABCMeta): |
|
|
|
|
|
|
48
|
|
|
def fetch(self, inchikey: str) -> G2pData: |
|
|
|
|
|
|
49
|
|
|
raise NotImplementedError() |
|
50
|
|
|
|
|
51
|
|
|
|
|
52
|
|
|
class CachedG2pApi(G2pApi, metaclass=abc.ABCMeta): |
|
|
|
|
|
|
53
|
|
|
def __init__(self, cache_path: Path): |
|
54
|
|
|
self.cache_path = Path(cache_path) |
|
55
|
|
|
self.ligands: LigandDf = None |
|
56
|
|
|
self.interactions: InteractionDf = None |
|
57
|
|
|
|
|
58
|
|
|
def fetch(self, inchikey: str) -> G2pData: |
|
|
|
|
|
|
59
|
|
|
""" """ |
|
60
|
|
|
series = self.ligands[self.ligands["inchikey"] == inchikey] |
|
61
|
|
|
if len(series) == 0: |
|
62
|
|
|
raise CompoundNotFoundError(f"G2P ligand {inchikey} not found") |
|
63
|
|
|
basic = dict(CommonTools.only(series).to_dict()) |
|
64
|
|
|
g2pid = int(basic["Ligand id"]) |
|
65
|
|
|
interactions = [ |
|
66
|
|
|
self._convert_interaction(series) |
|
67
|
|
|
for series in self.interactions[self.interactions["ligand_id"] == g2pid] |
|
68
|
|
|
] |
|
69
|
|
|
return G2pData( |
|
70
|
|
|
inchikey=basic["InChIKey"], |
|
71
|
|
|
g2pid=g2pid, |
|
72
|
|
|
name=basic["ligand"], |
|
73
|
|
|
type=basic["Type"], |
|
74
|
|
|
approved=TrueFalseUnknown.parse(basic["Approved"]), |
|
75
|
|
|
pubchem_id=_oint(basic["PubChem ID"]), |
|
76
|
|
|
interactions=interactions, |
|
77
|
|
|
) |
|
78
|
|
|
|
|
79
|
|
|
def download(self, force: bool = False) -> None: |
|
|
|
|
|
|
80
|
|
|
if self.ligands is None or self.interactions is None or force: |
|
81
|
|
|
# always download both together -- we don't want them non-synced |
|
82
|
|
|
if self.ligands_path.exists() and self.interactions_path.exists() and not force: |
|
83
|
|
|
self.ligands = LigandDf.read_file(self.ligands_path) |
|
84
|
|
|
self.interactions = InteractionDf.read_file(self.ligands_path) |
|
85
|
|
|
else: |
|
86
|
|
|
self.ligands = LigandDf.read_file(LIGANDS_URL, sep="\t") |
|
87
|
|
|
self.ligands.write_file(self.ligands_path) |
|
88
|
|
|
self.interactions = InteractionDf.read_file(INTERACTIONS_URL, sep="\t") |
|
89
|
|
|
self.interactions.write_file(self.interactions_path) |
|
90
|
|
|
info = dict(dt_downloaded=datetime.now().isoformat()) |
|
91
|
|
|
info = orjson.dumps(info).decode(encoding="utf8") |
|
92
|
|
|
(self.cache_path / "info.json").write_text(info) |
|
93
|
|
|
|
|
94
|
|
|
@property |
|
95
|
|
|
def ligands_path(self) -> Path: |
|
|
|
|
|
|
96
|
|
|
return self.cache_path / "ligands.feather" |
|
97
|
|
|
|
|
98
|
|
|
@property |
|
99
|
|
|
def interactions_path(self) -> Path: |
|
|
|
|
|
|
100
|
|
|
return self.cache_path / "interactions.feather" |
|
101
|
|
|
|
|
102
|
|
|
def _load_file(self, clazz: Type[TypedDf], path: Path, url: str) -> pd.DataFrame: |
|
103
|
|
|
if path.exists(): |
|
|
|
|
|
|
104
|
|
|
return clazz.read_file(self.ligands_path, sep="\t") |
|
105
|
|
|
else: |
|
106
|
|
|
df = clazz.read_file(url, sep="\t") |
|
|
|
|
|
|
107
|
|
|
df.to_csv(self.ligands_path, sep="\t") |
|
108
|
|
|
return df |
|
109
|
|
|
|
|
110
|
|
|
def _convert_interaction(self, series: pd.Series) -> G2pInteraction: |
|
|
|
|
|
|
111
|
|
|
d = dict(series.to_dict()) |
|
|
|
|
|
|
112
|
|
|
sel_map = { |
|
113
|
|
|
"Selective": TrueFalseUnknown.true, |
|
114
|
|
|
"Non-selective": TrueFalseUnknown.false, |
|
115
|
|
|
"Not Determined": TrueFalseUnknown.unknown, |
|
116
|
|
|
} |
|
117
|
|
|
d["selectivity"] = sel_map.get(d["selectivity"], TrueFalseUnknown.unknown) |
|
118
|
|
|
d["primary_target"] = TrueFalseUnknown.parse(d["primary_target"]) |
|
119
|
|
|
d["endogenous"] = TrueFalseUnknown.parse(d["endogenous"]) |
|
120
|
|
|
return G2pInteraction(**d) |
|
121
|
|
|
|
|
122
|
|
|
def __repr__(self): |
|
123
|
|
|
loaded = "not loaded" if self.ligands is None else f"n={len(self.ligands)}" |
|
124
|
|
|
return f"{self.__class__.__name__}({self.cache_path} : {loaded})" |
|
125
|
|
|
|
|
126
|
|
|
def __str__(self): |
|
127
|
|
|
return repr(self) |
|
128
|
|
|
|
|
129
|
|
|
def __eq__(self, other): |
|
130
|
|
|
raise NotImplementedError(f"Cannot compare {self.__class__.__name__}") |
|
131
|
|
|
|
|
132
|
|
|
|
|
133
|
|
|
_all__ = ["G2pApi", "CachedG2pApi"] |
|
134
|
|
|
|