Passed
Push — main ( 2e1b6b...3a0c28 )
by Douglas
02:06
created

mandos.model.apis.g2p_api.G2pApi.__str__()   A

Complexity

Conditions 1

Size

Total Lines 2
Code Lines 2

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
cc 1
eloc 2
nop 1
dl 0
loc 2
rs 10
c 0
b 0
f 0
1
import abc
0 ignored issues
show
introduced by
Missing module docstring
Loading history...
2
from datetime import datetime
3
from pathlib import Path
4
from typing import Optional, Type
5
6
import decorateme
0 ignored issues
show
introduced by
Unable to import 'decorateme'
Loading history...
Unused Code introduced by
The import decorateme seems to be unused.
Loading history...
7
import numpy as np
0 ignored issues
show
introduced by
Unable to import 'numpy'
Loading history...
8
import orjson
0 ignored issues
show
introduced by
Unable to import 'orjson'
Loading history...
9
import pandas as pd
0 ignored issues
show
introduced by
Unable to import 'pandas'
Loading history...
10
from pocketutils.core.enums import TrueFalseUnknown
0 ignored issues
show
introduced by
Unable to import 'pocketutils.core.enums'
Loading history...
11
from pocketutils.core.exceptions import UnsupportedOpError
0 ignored issues
show
introduced by
Unable to import 'pocketutils.core.exceptions'
Loading history...
12
from pocketutils.tools.common_tools import CommonTools
0 ignored issues
show
introduced by
Unable to import 'pocketutils.tools.common_tools'
Loading history...
13
from typeddfs import TypedDf, TypedDfs
0 ignored issues
show
introduced by
Unable to import 'typeddfs'
Loading history...
14
15
from mandos.model import Api, CompoundNotFoundError
16
from mandos.model.apis.g2p_support.g2p_data import G2pData, G2pInteraction
17
from mandos.model.settings import SETTINGS
18
from mandos.model.utils.setup import logger
19
20
LIGANDS_URL = "https://www.guidetopharmacology.org/DATA/ligand_id_mapping.tsv"
21
INTERACTIONS_URL = "https://www.guidetopharmacology.org/DATA/interactions.tsv"
22
_DEF_SUFFIX = SETTINGS.archive_filename_suffix
23
24
25
def _oint(x: str) -> Optional[int]:
0 ignored issues
show
Coding Style Naming introduced by
Argument name "x" doesn't conform to snake_case naming style ('([^\\W\\dA-Z][^\\WA-Z]2,|_[^\\WA-Z]*|__[^\\WA-Z\\d_][^\\WA-Z]+__)$' pattern)

This check looks for invalid names for a range of different identifiers.

You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements.

If your project includes a Pylint configuration file, the settings contained in that file take precedence.

To find out more about Pylint, please refer to their site.

Loading history...
26
    if x is None or isinstance(x, str) and x.strip() == "":
27
        return None
28
    return int(x)
29
30
31
class G2pCompoundLookupError(CompoundNotFoundError):
0 ignored issues
show
Documentation introduced by
Empty class docstring
Loading history...
32
    """ """
33
34
35
LigandDf = (
36
    TypedDfs.typed("LigandDf")
37
    .require("Ligand id", dtype=int)
38
    .require("Name", "Type", "Approved", "PubChem CID", "InChIKey", dtype=str)
39
    .strict()
40
    .secure()
41
    .hash(file=True)
42
).build()
43
44
45
InteractionDf = (
46
    TypedDfs.typed("InteractionDf")
47
    .require("target", "target_id", dtype=str)
48
    .require("target_gene_symbol", "target_uniprot", dtype=str)
49
    .require("target_species", dtype=str)
50
    .require("ligand", dtype=str)
51
    .require("ligand_id", dtype=int)
52
    .require("type", "action", dtype=str)
53
    .require("selectivity", "endogenous", "primary_target", dtype=str)
54
    .require("affinity_units", dtype=str)
55
    .require("affinity_median", dtype=np.float64)
56
    .strict()
57
    .secure()
58
    .hash(file=True)
59
).build()
60
61
62
class G2pApi(Api, metaclass=abc.ABCMeta):
0 ignored issues
show
introduced by
Missing class docstring
Loading history...
63
    def fetch(self, inchikey: str) -> G2pData:
0 ignored issues
show
introduced by
Missing function or method docstring
Loading history...
64
        raise NotImplementedError()
65
66
    def __eq__(self, other):
67
        raise UnsupportedOpError(f"Cannot compare {self.__class__.__name__}")
68
69
    def __repr__(self):
70
        return f"{self.__class__.__name__}()"
71
72
    def __str__(self):
73
        return repr(self)
74
75
76
class CachingG2pApi(G2pApi, metaclass=abc.ABCMeta):
0 ignored issues
show
introduced by
Missing class docstring
Loading history...
77
    def __init__(self, cache_path: Path = SETTINGS.g2p_cache_path):
78
        self.cache_path = Path(cache_path)
79
        self.ligands: LigandDf = None
80
        self.interactions: InteractionDf = None
81
82
    def fetch(self, inchikey: str) -> G2pData:
0 ignored issues
show
Documentation introduced by
Empty method docstring
Loading history...
83
        """ """
84
        series = self.ligands[self.ligands["inchikey"] == inchikey]
85
        if len(series) == 0:
86
            raise G2pCompoundLookupError(f"G2P ligand {inchikey} not found")
87
        basic = dict(CommonTools.only(series).to_dict())
88
        g2pid = int(basic["Ligand id"])
89
        interactions = [
90
            self._convert_interaction(series)
91
            for series in self.interactions[self.interactions["ligand_id"] == g2pid]
92
        ]
93
        return G2pData(
94
            inchikey=basic["InChIKey"],
95
            g2pid=g2pid,
96
            name=basic["ligand"],
97
            type=basic["Type"],
98
            approved=TrueFalseUnknown.of(basic["Approved"]),
99
            pubchem_id=_oint(basic["PubChem ID"]),
100
            interactions=interactions,
101
        )
102
103
    def download(self, force: bool = False) -> None:
0 ignored issues
show
introduced by
Missing function or method docstring
Loading history...
104
        if self.ligands is None or self.interactions is None or force:
105
            # always download both together -- we don't want them non-synced
106
            exists = self.ligands_path.exists() and self.interactions_path.exists()
107
            if exists and not force:
108
                self.ligands = LigandDf.read_file(self.ligands_path)
109
                self.interactions = InteractionDf.read_file(self.ligands_path)
110
            else:
111
                logger.info(f"Downloading G2P data...")
0 ignored issues
show
introduced by
Using an f-string that does not have any interpolated variables
Loading history...
112
                self.ligands = LigandDf.read_file(LIGANDS_URL, sep="\t")
113
                self.ligands.write_file(self.ligands_path)
114
                self.interactions = InteractionDf.read_file(INTERACTIONS_URL, sep="\t")
115
                self.interactions.write_file(self.interactions_path)
116
                info = dict(dt_downloaded=datetime.now().isoformat())
117
                info = orjson.dumps(info).decode(encoding="utf8")
118
                (self.cache_path / "info.json").write_text(info, encoding="utf8")
119
                if exists:
120
                    logger.notice(f"Overwrote existing cached G2P data in {self.cache_path}")
121
                else:
122
                    logger.notice(f"Cached missing G2P data to {self.cache_path}")
123
124
    @property
125
    def ligands_path(self) -> Path:
0 ignored issues
show
introduced by
Missing function or method docstring
Loading history...
126
        return (self.cache_path / "ligands").with_suffix(_DEF_SUFFIX)
127
128
    @property
129
    def interactions_path(self) -> Path:
0 ignored issues
show
introduced by
Missing function or method docstring
Loading history...
130
        return (self.cache_path / "interactions").with_suffix(_DEF_SUFFIX)
131
132
    def _load_file(self, clazz: Type[TypedDf], path: Path, url: str) -> pd.DataFrame:
133
        if path.exists():
0 ignored issues
show
unused-code introduced by
Unnecessary "else" after "return"
Loading history...
134
            return clazz.read_file(self.ligands_path)
135
        else:
136
            df = clazz.read_file(url)
0 ignored issues
show
Coding Style Naming introduced by
Variable name "df" doesn't conform to snake_case naming style ('([^\\W\\dA-Z][^\\WA-Z]2,|_[^\\WA-Z]*|__[^\\WA-Z\\d_][^\\WA-Z]+__)$' pattern)

This check looks for invalid names for a range of different identifiers.

You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements.

If your project includes a Pylint configuration file, the settings contained in that file take precedence.

To find out more about Pylint, please refer to their site.

Loading history...
137
            df.write_file(self.ligands_path)
138
            return df
139
140
    def _convert_interaction(self, series: pd.Series) -> G2pInteraction:
0 ignored issues
show
Coding Style introduced by
This method could be written as a function/class method.

If a method does not access any attributes of the class, it could also be implemented as a function or static method. This can help improve readability. For example

class Foo:
    def some_method(self, x, y):
        return x + y;

could be written as

class Foo:
    @classmethod
    def some_method(cls, x, y):
        return x + y;
Loading history...
141
        d = dict(series.to_dict())
0 ignored issues
show
Coding Style Naming introduced by
Variable name "d" doesn't conform to snake_case naming style ('([^\\W\\dA-Z][^\\WA-Z]2,|_[^\\WA-Z]*|__[^\\WA-Z\\d_][^\\WA-Z]+__)$' pattern)

This check looks for invalid names for a range of different identifiers.

You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements.

If your project includes a Pylint configuration file, the settings contained in that file take precedence.

To find out more about Pylint, please refer to their site.

Loading history...
142
        sel_map = {
143
            "Selective": TrueFalseUnknown.true,
144
            "Non-selective": TrueFalseUnknown.false,
145
            "Not Determined": TrueFalseUnknown.unknown,
146
        }
147
        d["selectivity"] = sel_map.get(d["selectivity"], TrueFalseUnknown.unknown)
148
        d["primary_target"] = TrueFalseUnknown.of(d["primary_target"])
149
        d["endogenous"] = TrueFalseUnknown.of(d["endogenous"])
150
        return G2pInteraction(**d)
151
152
    def __repr__(self):
153
        loaded = "not loaded" if self.ligands is None else f"n={len(self.ligands)}"
154
        return f"{self.__class__.__name__}({self.cache_path} : {loaded})"
155
156
    def __str__(self):
157
        return repr(self)
158
159
160
_all__ = ["G2pApi", "CachedG2pApi"]
161