Passed
Push — main ( 9813db...5006f2 )
by Douglas
01:43
created

mandos.model.apis.g2p_api.CachingG2pApi.download()   B

Complexity

Conditions 7

Size

Total Lines 20
Code Lines 17

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
cc 7
eloc 17
nop 2
dl 0
loc 20
rs 8
c 0
b 0
f 0
1
import abc
0 ignored issues
show
introduced by
Missing module docstring
Loading history...
2
from datetime import datetime
3
from pathlib import Path
4
from typing import Optional, Type
5
6
import numpy as np
0 ignored issues
show
introduced by
Unable to import 'numpy'
Loading history...
7
import orjson
0 ignored issues
show
introduced by
Unable to import 'orjson'
Loading history...
8
import pandas as pd
0 ignored issues
show
introduced by
Unable to import 'pandas'
Loading history...
9
from pocketutils.tools.common_tools import CommonTools
0 ignored issues
show
introduced by
Unable to import 'pocketutils.tools.common_tools'
Loading history...
10
from typeddfs import TypedDf, TypedDfs
0 ignored issues
show
introduced by
Unable to import 'typeddfs'
Loading history...
11
12
from mandos import logger
13
from mandos.model import Api, CompoundNotFoundError
14
from mandos.model.settings import MANDOS_SETTINGS
15
from mandos.model.apis.g2p_support.g2p_data import G2pData, G2pInteraction
16
from mandos.model.utils import TrueFalseUnknown
17
18
LIGANDS_URL = "https://www.guidetopharmacology.org/DATA/ligand_id_mapping.tsv"
19
INTERACTIONS_URL = "https://www.guidetopharmacology.org/DATA/interactions.tsv"
20
_DEF_SUFFIX = MANDOS_SETTINGS.archive_filename_suffix
21
22
23
def _oint(x: str) -> Optional[int]:
0 ignored issues
show
Coding Style Naming introduced by
Argument name "x" doesn't conform to snake_case naming style ('([^\\W\\dA-Z][^\\WA-Z]2,|_[^\\WA-Z]*|__[^\\WA-Z\\d_][^\\WA-Z]+__)$' pattern)

This check looks for invalid names for a range of different identifiers.

You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements.

If your project includes a Pylint configuration file, the settings contained in that file take precedence.

To find out more about Pylint, please refer to their site.

Loading history...
24
    if x is None or isinstance(x, str) and x.strip() == "":
25
        return None
26
    return int(x)
27
28
29
LigandDf = (
30
    TypedDfs.typed("LigandDf")
31
    .require("Ligand id", dtype=int)
32
    .require("Name", "Type", "Approved", "PubChem CID", "InChIKey", dtype=str)
33
).build()
34
35
36
InteractionDf = (
37
    TypedDfs.typed("InteractionDf")
38
    .require("target", "target_id", dtype=str)
39
    .require("target_gene_symbol", "target_uniprot", dtype=str)
40
    .require("target_species", dtype=str)
41
    .require("ligand", dtype=str)
42
    .require("ligand_id", dtype=int)
43
    .require("type", "action", dtype=str)
44
    .require("selectivity", "endogenous", "primary_target", dtype=str)
45
    .require("affinity_units", dtype=str)
46
    .require("affinity_median", dtype=np.float64)
47
).build()
48
49
50
class G2pApi(Api, metaclass=abc.ABCMeta):
0 ignored issues
show
introduced by
Missing class docstring
Loading history...
51
    def fetch(self, inchikey: str) -> G2pData:
0 ignored issues
show
introduced by
Missing function or method docstring
Loading history...
52
        raise NotImplementedError()
53
54
55
class CachingG2pApi(G2pApi, metaclass=abc.ABCMeta):
0 ignored issues
show
introduced by
Missing class docstring
Loading history...
56
    def __init__(self, cache_path: Path = MANDOS_SETTINGS.g2p_cache_path):
57
        self.cache_path = Path(cache_path)
58
        self.ligands: LigandDf = None
59
        self.interactions: InteractionDf = None
60
61
    def fetch(self, inchikey: str) -> G2pData:
0 ignored issues
show
Documentation introduced by
Empty method docstring
Loading history...
62
        """ """
63
        series = self.ligands[self.ligands["inchikey"] == inchikey]
64
        if len(series) == 0:
65
            raise CompoundNotFoundError(f"G2P ligand {inchikey} not found")
66
        basic = dict(CommonTools.only(series).to_dict())
67
        g2pid = int(basic["Ligand id"])
68
        interactions = [
69
            self._convert_interaction(series)
70
            for series in self.interactions[self.interactions["ligand_id"] == g2pid]
71
        ]
72
        return G2pData(
73
            inchikey=basic["InChIKey"],
74
            g2pid=g2pid,
75
            name=basic["ligand"],
76
            type=basic["Type"],
77
            approved=TrueFalseUnknown.parse(basic["Approved"]),
78
            pubchem_id=_oint(basic["PubChem ID"]),
79
            interactions=interactions,
80
        )
81
82
    def download(self, force: bool = False) -> None:
0 ignored issues
show
introduced by
Missing function or method docstring
Loading history...
83
        if self.ligands is None or self.interactions is None or force:
84
            # always download both together -- we don't want them non-synced
85
            exists = self.ligands_path.exists() and self.interactions_path.exists()
86
            if exists and not force:
87
                self.ligands = LigandDf.read_file(self.ligands_path)
88
                self.interactions = InteractionDf.read_file(self.ligands_path)
89
            else:
90
                logger.info(f"Downloading G2P data...")
0 ignored issues
show
introduced by
Using an f-string that does not have any interpolated variables
Loading history...
91
                self.ligands = LigandDf.read_file(LIGANDS_URL, sep="\t")
92
                self.ligands.write_file(self.ligands_path)
93
                self.interactions = InteractionDf.read_file(INTERACTIONS_URL, sep="\t")
94
                self.interactions.write_file(self.interactions_path)
95
                info = dict(dt_downloaded=datetime.now().isoformat())
96
                info = orjson.dumps(info).decode(encoding="utf8")
97
                (self.cache_path / "info.json").write_text(info)
98
                if exists:
99
                    logger.notice(f"Cached missing G2P data to {self.cache_path}")
100
                else:
101
                    logger.notice(f"Overwrote existing cached G2P data in {self.cache_path}")
102
103
    @property
104
    def ligands_path(self) -> Path:
0 ignored issues
show
introduced by
Missing function or method docstring
Loading history...
105
        return (self.cache_path / "ligands").with_suffix(_DEF_SUFFIX)
106
107
    @property
108
    def interactions_path(self) -> Path:
0 ignored issues
show
introduced by
Missing function or method docstring
Loading history...
109
        return (self.cache_path / "interactions").with_suffix(_DEF_SUFFIX)
110
111
    def _load_file(self, clazz: Type[TypedDf], path: Path, url: str) -> pd.DataFrame:
112
        if path.exists():
0 ignored issues
show
unused-code introduced by
Unnecessary "else" after "return"
Loading history...
113
            return clazz.read_file(self.ligands_path)
114
        else:
115
            df = clazz.read_file(url)
0 ignored issues
show
Coding Style Naming introduced by
Variable name "df" doesn't conform to snake_case naming style ('([^\\W\\dA-Z][^\\WA-Z]2,|_[^\\WA-Z]*|__[^\\WA-Z\\d_][^\\WA-Z]+__)$' pattern)

This check looks for invalid names for a range of different identifiers.

You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements.

If your project includes a Pylint configuration file, the settings contained in that file take precedence.

To find out more about Pylint, please refer to their site.

Loading history...
116
            df.write_file(self.ligands_path)
117
            return df
118
119
    def _convert_interaction(self, series: pd.Series) -> G2pInteraction:
0 ignored issues
show
Coding Style introduced by
This method could be written as a function/class method.

If a method does not access any attributes of the class, it could also be implemented as a function or static method. This can help improve readability. For example

class Foo:
    def some_method(self, x, y):
        return x + y;

could be written as

class Foo:
    @classmethod
    def some_method(cls, x, y):
        return x + y;
Loading history...
120
        d = dict(series.to_dict())
0 ignored issues
show
Coding Style Naming introduced by
Variable name "d" doesn't conform to snake_case naming style ('([^\\W\\dA-Z][^\\WA-Z]2,|_[^\\WA-Z]*|__[^\\WA-Z\\d_][^\\WA-Z]+__)$' pattern)

This check looks for invalid names for a range of different identifiers.

You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements.

If your project includes a Pylint configuration file, the settings contained in that file take precedence.

To find out more about Pylint, please refer to their site.

Loading history...
121
        sel_map = {
122
            "Selective": TrueFalseUnknown.true,
123
            "Non-selective": TrueFalseUnknown.false,
124
            "Not Determined": TrueFalseUnknown.unknown,
125
        }
126
        d["selectivity"] = sel_map.get(d["selectivity"], TrueFalseUnknown.unknown)
127
        d["primary_target"] = TrueFalseUnknown.parse(d["primary_target"])
128
        d["endogenous"] = TrueFalseUnknown.parse(d["endogenous"])
129
        return G2pInteraction(**d)
130
131
    def __repr__(self):
132
        loaded = "not loaded" if self.ligands is None else f"n={len(self.ligands)}"
133
        return f"{self.__class__.__name__}({self.cache_path} : {loaded})"
134
135
    def __str__(self):
136
        return repr(self)
137
138
    def __eq__(self, other):
139
        raise NotImplementedError(f"Cannot compare {self.__class__.__name__}")
140
141
142
_all__ = ["G2pApi", "CachedG2pApi"]
143