1
|
|
|
import abc |
|
|
|
|
2
|
|
|
from datetime import datetime |
3
|
|
|
from pathlib import Path |
4
|
|
|
from typing import Optional, Type |
5
|
|
|
|
6
|
|
|
import numpy as np |
|
|
|
|
7
|
|
|
import orjson |
|
|
|
|
8
|
|
|
import pandas as pd |
|
|
|
|
9
|
|
|
from pocketutils.tools.common_tools import CommonTools |
|
|
|
|
10
|
|
|
from typeddfs import TypedDf, TypedDfs |
|
|
|
|
11
|
|
|
|
12
|
|
|
from mandos import logger |
13
|
|
|
from mandos.model import Api, CompoundNotFoundError |
14
|
|
|
from mandos.model.settings import MANDOS_SETTINGS |
15
|
|
|
from mandos.model.apis.g2p_support.g2p_data import G2pData, G2pInteraction |
16
|
|
|
from mandos.model.utils import TrueFalseUnknown |
17
|
|
|
|
18
|
|
|
LIGANDS_URL = "https://www.guidetopharmacology.org/DATA/ligand_id_mapping.tsv" |
19
|
|
|
INTERACTIONS_URL = "https://www.guidetopharmacology.org/DATA/interactions.tsv" |
20
|
|
|
_DEF_SUFFIX = MANDOS_SETTINGS.archive_filename_suffix |
21
|
|
|
|
22
|
|
|
|
23
|
|
|
def _oint(x: str) -> Optional[int]: |
|
|
|
|
24
|
|
|
if x is None or isinstance(x, str) and x.strip() == "": |
25
|
|
|
return None |
26
|
|
|
return int(x) |
27
|
|
|
|
28
|
|
|
|
29
|
|
|
LigandDf = ( |
30
|
|
|
TypedDfs.typed("LigandDf") |
31
|
|
|
.require("Ligand id", dtype=int) |
32
|
|
|
.require("Name", "Type", "Approved", "PubChem CID", "InChIKey", dtype=str) |
33
|
|
|
).build() |
34
|
|
|
|
35
|
|
|
|
36
|
|
|
InteractionDf = ( |
37
|
|
|
TypedDfs.typed("InteractionDf") |
38
|
|
|
.require("target", "target_id", dtype=str) |
39
|
|
|
.require("target_gene_symbol", "target_uniprot", dtype=str) |
40
|
|
|
.require("target_species", dtype=str) |
41
|
|
|
.require("ligand", dtype=str) |
42
|
|
|
.require("ligand_id", dtype=int) |
43
|
|
|
.require("type", "action", dtype=str) |
44
|
|
|
.require("selectivity", "endogenous", "primary_target", dtype=str) |
45
|
|
|
.require("affinity_units", dtype=str) |
46
|
|
|
.require("affinity_median", dtype=np.float64) |
47
|
|
|
).build() |
48
|
|
|
|
49
|
|
|
|
50
|
|
|
class G2pApi(Api, metaclass=abc.ABCMeta): |
|
|
|
|
51
|
|
|
def fetch(self, inchikey: str) -> G2pData: |
|
|
|
|
52
|
|
|
raise NotImplementedError() |
53
|
|
|
|
54
|
|
|
|
55
|
|
|
class CachingG2pApi(G2pApi, metaclass=abc.ABCMeta): |
|
|
|
|
56
|
|
|
def __init__(self, cache_path: Path = MANDOS_SETTINGS.g2p_cache_path): |
57
|
|
|
self.cache_path = Path(cache_path) |
58
|
|
|
self.ligands: LigandDf = None |
59
|
|
|
self.interactions: InteractionDf = None |
60
|
|
|
|
61
|
|
|
def fetch(self, inchikey: str) -> G2pData: |
|
|
|
|
62
|
|
|
""" """ |
63
|
|
|
series = self.ligands[self.ligands["inchikey"] == inchikey] |
64
|
|
|
if len(series) == 0: |
65
|
|
|
raise CompoundNotFoundError(f"G2P ligand {inchikey} not found") |
66
|
|
|
basic = dict(CommonTools.only(series).to_dict()) |
67
|
|
|
g2pid = int(basic["Ligand id"]) |
68
|
|
|
interactions = [ |
69
|
|
|
self._convert_interaction(series) |
70
|
|
|
for series in self.interactions[self.interactions["ligand_id"] == g2pid] |
71
|
|
|
] |
72
|
|
|
return G2pData( |
73
|
|
|
inchikey=basic["InChIKey"], |
74
|
|
|
g2pid=g2pid, |
75
|
|
|
name=basic["ligand"], |
76
|
|
|
type=basic["Type"], |
77
|
|
|
approved=TrueFalseUnknown.parse(basic["Approved"]), |
78
|
|
|
pubchem_id=_oint(basic["PubChem ID"]), |
79
|
|
|
interactions=interactions, |
80
|
|
|
) |
81
|
|
|
|
82
|
|
|
def download(self, force: bool = False) -> None: |
|
|
|
|
83
|
|
|
if self.ligands is None or self.interactions is None or force: |
84
|
|
|
# always download both together -- we don't want them non-synced |
85
|
|
|
exists = self.ligands_path.exists() and self.interactions_path.exists() |
86
|
|
|
if exists and not force: |
87
|
|
|
self.ligands = LigandDf.read_file(self.ligands_path) |
88
|
|
|
self.interactions = InteractionDf.read_file(self.ligands_path) |
89
|
|
|
else: |
90
|
|
|
logger.info(f"Downloading G2P data...") |
|
|
|
|
91
|
|
|
self.ligands = LigandDf.read_file(LIGANDS_URL, sep="\t") |
92
|
|
|
self.ligands.write_file(self.ligands_path) |
93
|
|
|
self.interactions = InteractionDf.read_file(INTERACTIONS_URL, sep="\t") |
94
|
|
|
self.interactions.write_file(self.interactions_path) |
95
|
|
|
info = dict(dt_downloaded=datetime.now().isoformat()) |
96
|
|
|
info = orjson.dumps(info).decode(encoding="utf8") |
97
|
|
|
(self.cache_path / "info.json").write_text(info) |
98
|
|
|
if exists: |
99
|
|
|
logger.notice(f"Cached missing G2P data to {self.cache_path}") |
100
|
|
|
else: |
101
|
|
|
logger.notice(f"Overwrote existing cached G2P data in {self.cache_path}") |
102
|
|
|
|
103
|
|
|
@property |
104
|
|
|
def ligands_path(self) -> Path: |
|
|
|
|
105
|
|
|
return (self.cache_path / "ligands").with_suffix(_DEF_SUFFIX) |
106
|
|
|
|
107
|
|
|
@property |
108
|
|
|
def interactions_path(self) -> Path: |
|
|
|
|
109
|
|
|
return (self.cache_path / "interactions").with_suffix(_DEF_SUFFIX) |
110
|
|
|
|
111
|
|
|
def _load_file(self, clazz: Type[TypedDf], path: Path, url: str) -> pd.DataFrame: |
112
|
|
|
if path.exists(): |
|
|
|
|
113
|
|
|
return clazz.read_file(self.ligands_path) |
114
|
|
|
else: |
115
|
|
|
df = clazz.read_file(url) |
|
|
|
|
116
|
|
|
df.write_file(self.ligands_path) |
117
|
|
|
return df |
118
|
|
|
|
119
|
|
|
def _convert_interaction(self, series: pd.Series) -> G2pInteraction: |
|
|
|
|
120
|
|
|
d = dict(series.to_dict()) |
|
|
|
|
121
|
|
|
sel_map = { |
122
|
|
|
"Selective": TrueFalseUnknown.true, |
123
|
|
|
"Non-selective": TrueFalseUnknown.false, |
124
|
|
|
"Not Determined": TrueFalseUnknown.unknown, |
125
|
|
|
} |
126
|
|
|
d["selectivity"] = sel_map.get(d["selectivity"], TrueFalseUnknown.unknown) |
127
|
|
|
d["primary_target"] = TrueFalseUnknown.parse(d["primary_target"]) |
128
|
|
|
d["endogenous"] = TrueFalseUnknown.parse(d["endogenous"]) |
129
|
|
|
return G2pInteraction(**d) |
130
|
|
|
|
131
|
|
|
def __repr__(self): |
132
|
|
|
loaded = "not loaded" if self.ligands is None else f"n={len(self.ligands)}" |
133
|
|
|
return f"{self.__class__.__name__}({self.cache_path} : {loaded})" |
134
|
|
|
|
135
|
|
|
def __str__(self): |
136
|
|
|
return repr(self) |
137
|
|
|
|
138
|
|
|
def __eq__(self, other): |
139
|
|
|
raise NotImplementedError(f"Cannot compare {self.__class__.__name__}") |
140
|
|
|
|
141
|
|
|
|
142
|
|
|
_all__ = ["G2pApi", "CachedG2pApi"] |
143
|
|
|
|