Passed
Push — main ( ddff4b...7b3fbc )
by Douglas
04:33
created

mandos.model.apis.g2p_api   A

Complexity

Total Complexity 24

Size/Duplication

Total Lines 134
Duplicated Lines 0 %

Importance

Changes 0
Metric Value
eloc 102
dl 0
loc 134
rs 10
c 0
b 0
f 0
wmc 24

1 Function

Rating   Name   Duplication   Size   Complexity  
A _oint() 0 4 4

11 Methods

Rating   Name   Duplication   Size   Complexity  
A CachedG2pApi._load_file() 0 7 2
A G2pApi.fetch() 0 2 1
A CachedG2pApi._convert_interaction() 0 11 1
A CachedG2pApi.__init__() 0 4 1
A CachedG2pApi.__str__() 0 2 1
A CachedG2pApi.__repr__() 0 3 2
A CachedG2pApi.interactions_path() 0 3 1
A CachedG2pApi.ligands_path() 0 3 1
A CachedG2pApi.__eq__() 0 2 1
A CachedG2pApi.fetch() 0 19 2
B CachedG2pApi.download() 0 14 7
1
import abc
0 ignored issues
show
introduced by
Missing module docstring
Loading history...
2
import time
0 ignored issues
show
Unused Code introduced by
The import time seems to be unused.
Loading history...
3
from datetime import datetime
4
from pathlib import Path
5
from typing import Optional, Type
6
7
import numpy as np
0 ignored issues
show
introduced by
Unable to import 'numpy'
Loading history...
8
import orjson
0 ignored issues
show
introduced by
Unable to import 'orjson'
Loading history...
9
import pandas as pd
0 ignored issues
show
introduced by
Unable to import 'pandas'
Loading history...
10
from typeddfs import TypedDfs, TypedDf
0 ignored issues
show
introduced by
Unable to import 'typeddfs'
Loading history...
11
from pocketutils.tools.common_tools import CommonTools
0 ignored issues
show
introduced by
Unable to import 'pocketutils.tools.common_tools'
Loading history...
12
13
from mandos.model import Api, CompoundNotFoundError
14
from mandos.model.apis.g2p_data import G2pData, G2pInteraction, TrueFalseUnknown
15
16
LIGANDS_URL = "https://www.guidetopharmacology.org/DATA/ligand_id_mapping.tsv"
17
INTERACTIONS_URL = "https://www.guidetopharmacology.org/DATA/interactions.tsv"
18
19
20
def _oint(x: str) -> Optional[int]:
0 ignored issues
show
Coding Style Naming introduced by
Argument name "x" doesn't conform to snake_case naming style ('([^\\W\\dA-Z][^\\WA-Z]2,|_[^\\WA-Z]*|__[^\\WA-Z\\d_][^\\WA-Z]+__)$' pattern)

This check looks for invalid names for a range of different identifiers.

You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements.

If your project includes a Pylint configuration file, the settings contained in that file take precedence.

To find out more about Pylint, please refer to their site.

Loading history...
21
    if x is None or isinstance(x, str) and x.strip() == "":
22
        return None
23
    return int(x)
24
25
26
LigandDf = (
27
    TypedDfs.typed("LigandDf")
28
    .require("Ligand id", dtype=int)
29
    .require("Name", "Type", "Approved", "PubChem CID", "InChIKey", dtype=str)
30
).build()
31
32
33
InteractionDf = (
34
    TypedDfs.typed("InteractionDf")
35
    .require(
36
        "target", "target_id", "target_gene_symbol", "target_uniprot", "target_species", dtype=str
37
    )
38
    .require("ligand", dtype=str)
39
    .require("ligand_id", dtype=int)
40
    .require("type", "action", dtype=str)
41
    .require("selectivity", "endogenous", "primary_target", dtype=str)
42
    .require("affinity_units", dtype=str)
43
    .require("affinity_median", dtype=np.float64)
44
).build()
45
46
47
class G2pApi(Api, metaclass=abc.ABCMeta):
0 ignored issues
show
introduced by
Missing class docstring
Loading history...
48
    def fetch(self, inchikey: str) -> G2pData:
0 ignored issues
show
introduced by
Missing function or method docstring
Loading history...
49
        raise NotImplementedError()
50
51
52
class CachedG2pApi(G2pApi, metaclass=abc.ABCMeta):
0 ignored issues
show
introduced by
Missing class docstring
Loading history...
53
    def __init__(self, cache_path: Path):
54
        self.cache_path = Path(cache_path)
55
        self.ligands: LigandDf = None
56
        self.interactions: InteractionDf = None
57
58
    def fetch(self, inchikey: str) -> G2pData:
0 ignored issues
show
Documentation introduced by
Empty method docstring
Loading history...
59
        """ """
60
        series = self.ligands[self.ligands["inchikey"] == inchikey]
61
        if len(series) == 0:
62
            raise CompoundNotFoundError(f"G2P ligand {inchikey} not found")
63
        basic = dict(CommonTools.only(series).to_dict())
64
        g2pid = int(basic["Ligand id"])
65
        interactions = [
66
            self._convert_interaction(series)
67
            for series in self.interactions[self.interactions["ligand_id"] == g2pid]
68
        ]
69
        return G2pData(
70
            inchikey=basic["InChIKey"],
71
            g2pid=g2pid,
72
            name=basic["ligand"],
73
            type=basic["Type"],
74
            approved=TrueFalseUnknown.parse(basic["Approved"]),
75
            pubchem_id=_oint(basic["PubChem ID"]),
76
            interactions=interactions,
77
        )
78
79
    def download(self, force: bool = False) -> None:
0 ignored issues
show
introduced by
Missing function or method docstring
Loading history...
80
        if self.ligands is None or self.interactions is None or force:
81
            # always download both together -- we don't want them non-synced
82
            if self.ligands_path.exists() and self.interactions_path.exists() and not force:
83
                self.ligands = LigandDf.read_file(self.ligands_path)
84
                self.interactions = InteractionDf.read_file(self.ligands_path)
85
            else:
86
                self.ligands = LigandDf.read_file(LIGANDS_URL, sep="\t")
87
                self.ligands.write_file(self.ligands_path)
88
                self.interactions = InteractionDf.read_file(INTERACTIONS_URL, sep="\t")
89
                self.interactions.write_file(self.interactions_path)
90
                info = dict(dt_downloaded=datetime.now().isoformat())
91
                info = orjson.dumps(info).decode(encoding="utf8")
92
                (self.cache_path / "info.json").write_text(info)
93
94
    @property
95
    def ligands_path(self) -> Path:
0 ignored issues
show
introduced by
Missing function or method docstring
Loading history...
96
        return self.cache_path / "ligands.feather"
97
98
    @property
99
    def interactions_path(self) -> Path:
0 ignored issues
show
introduced by
Missing function or method docstring
Loading history...
100
        return self.cache_path / "interactions.feather"
101
102
    def _load_file(self, clazz: Type[TypedDf], path: Path, url: str) -> pd.DataFrame:
103
        if path.exists():
0 ignored issues
show
unused-code introduced by
Unnecessary "else" after "return"
Loading history...
104
            return clazz.read_file(self.ligands_path, sep="\t")
105
        else:
106
            df = clazz.read_file(url, sep="\t")
0 ignored issues
show
Coding Style Naming introduced by
Variable name "df" doesn't conform to snake_case naming style ('([^\\W\\dA-Z][^\\WA-Z]2,|_[^\\WA-Z]*|__[^\\WA-Z\\d_][^\\WA-Z]+__)$' pattern)

This check looks for invalid names for a range of different identifiers.

You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements.

If your project includes a Pylint configuration file, the settings contained in that file take precedence.

To find out more about Pylint, please refer to their site.

Loading history...
107
            df.to_csv(self.ligands_path, sep="\t")
108
            return df
109
110
    def _convert_interaction(self, series: pd.Series) -> G2pInteraction:
0 ignored issues
show
Coding Style introduced by
This method could be written as a function/class method.

If a method does not access any attributes of the class, it could also be implemented as a function or static method. This can help improve readability. For example

class Foo:
    def some_method(self, x, y):
        return x + y;

could be written as

class Foo:
    @classmethod
    def some_method(cls, x, y):
        return x + y;
Loading history...
111
        d = dict(series.to_dict())
0 ignored issues
show
Coding Style Naming introduced by
Variable name "d" doesn't conform to snake_case naming style ('([^\\W\\dA-Z][^\\WA-Z]2,|_[^\\WA-Z]*|__[^\\WA-Z\\d_][^\\WA-Z]+__)$' pattern)

This check looks for invalid names for a range of different identifiers.

You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements.

If your project includes a Pylint configuration file, the settings contained in that file take precedence.

To find out more about Pylint, please refer to their site.

Loading history...
112
        sel_map = {
113
            "Selective": TrueFalseUnknown.true,
114
            "Non-selective": TrueFalseUnknown.false,
115
            "Not Determined": TrueFalseUnknown.unknown,
116
        }
117
        d["selectivity"] = sel_map.get(d["selectivity"], TrueFalseUnknown.unknown)
118
        d["primary_target"] = TrueFalseUnknown.parse(d["primary_target"])
119
        d["endogenous"] = TrueFalseUnknown.parse(d["endogenous"])
120
        return G2pInteraction(**d)
121
122
    def __repr__(self):
123
        loaded = "not loaded" if self.ligands is None else f"n={len(self.ligands)}"
124
        return f"{self.__class__.__name__}({self.cache_path} : {loaded})"
125
126
    def __str__(self):
127
        return repr(self)
128
129
    def __eq__(self, other):
130
        raise NotImplementedError(f"Cannot compare {self.__class__.__name__}")
131
132
133
_all__ = ["G2pApi", "CachedG2pApi"]
134