Passed
Push — main ( 3a0c28...4b9dc0 )
by Douglas
01:51
created

HmdbData.cas()   A

Complexity

Conditions 1

Size

Total Lines 3
Code Lines 3

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
cc 1
eloc 3
nop 1
dl 0
loc 3
rs 10
c 0
b 0
f 0
1
import math
0 ignored issues
show
introduced by
Missing module docstring
Loading history...
2
from dataclasses import dataclass
3
from datetime import datetime
4
from functools import cached_property
0 ignored issues
show
Bug introduced by
The name cached_property does not seem to exist in module functools.
Loading history...
5
from typing import Mapping, NamedTuple, Optional, Sequence
6
7
import regex
0 ignored issues
show
introduced by
Unable to import 'regex'
Loading history...
8
from pocketutils.core.chars import Chars
0 ignored issues
show
introduced by
Unable to import 'pocketutils.core.chars'
Loading history...
9
from pocketutils.core.dot_dict import NestedDotDict
0 ignored issues
show
introduced by
Unable to import 'pocketutils.core.dot_dict'
Loading history...
10
from pocketutils.core.enums import FlagEnum
0 ignored issues
show
introduced by
Unable to import 'pocketutils.core.enums'
Loading history...
11
from pocketutils.tools.common_tools import CommonTools
0 ignored issues
show
introduced by
Unable to import 'pocketutils.tools.common_tools'
Loading history...
12
13
from mandos.model.apis.hmdb_support.properties import PREDICTED_PROPERTIES, RULES, _Prop
14
from mandos.model.utils.setup import logger
15
16
_prefixes = dict(M=1e6, mM=1e3, µM=1, uM=1, nM=1e-3, pM=1e-6, fM=1e-9)
17
_p1 = regex.compile(r"^([0-9.]+ +\(([0-9.]+) *\- *([0-9.]+)\)$", flags=regex.V1)
18
_p2 = regex.compile(r"^([0-9.]+) +\+\/\- +([0-9.]+)$", flags=regex.V1)
19
20
21
class ConcentrationBound(NamedTuple):
0 ignored issues
show
introduced by
Missing class docstring
Loading history...
22
    mean: float
23
    lower: float
24
    upper: float
25
26
    @property
27
    def std(self) -> float:
0 ignored issues
show
introduced by
Missing function or method docstring
Loading history...
28
        return self.upper / 2 - self.lower / 2
29
30
    @property
31
    def is_symmetric(self) -> bool:
0 ignored issues
show
introduced by
Missing function or method docstring
Loading history...
32
        return math.isclose(self.upper - self.mean, self.mean - self.lower)
33
34
35
@dataclass(frozen=True, repr=True, order=True)
0 ignored issues
show
introduced by
Missing class docstring
Loading history...
36
class HmdbProperty:
37
    kind: str
38
    source: str
39
    value: str
40
41
42
@dataclass(frozen=True, repr=True, order=True)
0 ignored issues
show
introduced by
Missing class docstring
Loading history...
43
class HmdbDisease:
44
    name: str
45
    omim_id: str
46
    n_refs: int
47
48
49
class PersonAge(FlagEnum):
0 ignored issues
show
introduced by
Missing class docstring
Loading history...
50
    unknown = ()
51
    adults = ()
52
    children = ()
53
54
55
class PersonSex(FlagEnum):
0 ignored issues
show
introduced by
Missing class docstring
Loading history...
56
    unknown = ()
57
    male = ()
58
    female = ()
59
60
61
@dataclass(frozen=True, repr=True, order=True)
0 ignored issues
show
introduced by
Missing class docstring
Loading history...
62
class HmdbConcentration:
63
    specimen: str
64
    ages: PersonAge
65
    sexes: PersonSex
66
    condition: Optional[str]
67
    micromolar: Optional[ConcentrationBound]
68
    mg_per_kg: Optional[ConcentrationBound]
69
70
    def __post_init__(self):
71
        if (self.mg_per_kg is None) + (self.micromolar is None) != 1:
72
            raise AssertionError(
73
                f"Provided both micromolar ({self.micromolar})"
74
                + f" and mg/kg ({self.mg_per_kg}), or neither"
75
            )
76
77
    @cached_property
78
    def format_value(self) -> str:
0 ignored issues
show
introduced by
Missing function or method docstring
Loading history...
79
        return f"{self._value}{Chars.narrownbsp}{self._unit}"
80
81
    @cached_property
82
    def format_value_pm(self) -> str:
0 ignored issues
show
introduced by
Missing function or method docstring
Loading history...
83
        v, u, s = self._value, self._unit, Chars.narrownbsp
0 ignored issues
show
Coding Style Naming introduced by
Variable name "v" doesn't conform to snake_case naming style ('([^\\W\\dA-Z][^\\WA-Z]2,|_[^\\WA-Z]*|__[^\\WA-Z\\d_][^\\WA-Z]+__)$' pattern)

This check looks for invalid names for a range of different identifiers.

You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements.

If your project includes a Pylint configuration file, the settings contained in that file take precedence.

To find out more about Pylint, please refer to their site.

Loading history...
Coding Style Naming introduced by
Variable name "u" doesn't conform to snake_case naming style ('([^\\W\\dA-Z][^\\WA-Z]2,|_[^\\WA-Z]*|__[^\\WA-Z\\d_][^\\WA-Z]+__)$' pattern)

This check looks for invalid names for a range of different identifiers.

You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements.

If your project includes a Pylint configuration file, the settings contained in that file take precedence.

To find out more about Pylint, please refer to their site.

Loading history...
Coding Style Naming introduced by
Variable name "s" doesn't conform to snake_case naming style ('([^\\W\\dA-Z][^\\WA-Z]2,|_[^\\WA-Z]*|__[^\\WA-Z\\d_][^\\WA-Z]+__)$' pattern)

This check looks for invalid names for a range of different identifiers.

You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements.

If your project includes a Pylint configuration file, the settings contained in that file take precedence.

To find out more about Pylint, please refer to their site.

Loading history...
84
        return f"{v.mean}{Chars.plusminus}{v.std}{s}{u}"
85
86
    @cached_property
87
    def format_value_range(self) -> str:
0 ignored issues
show
introduced by
Missing function or method docstring
Loading history...
88
        v, u, s = self._value, self._unit, Chars.narrownbsp
0 ignored issues
show
Coding Style Naming introduced by
Variable name "v" doesn't conform to snake_case naming style ('([^\\W\\dA-Z][^\\WA-Z]2,|_[^\\WA-Z]*|__[^\\WA-Z\\d_][^\\WA-Z]+__)$' pattern)

This check looks for invalid names for a range of different identifiers.

You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements.

If your project includes a Pylint configuration file, the settings contained in that file take precedence.

To find out more about Pylint, please refer to their site.

Loading history...
Coding Style Naming introduced by
Variable name "u" doesn't conform to snake_case naming style ('([^\\W\\dA-Z][^\\WA-Z]2,|_[^\\WA-Z]*|__[^\\WA-Z\\d_][^\\WA-Z]+__)$' pattern)

This check looks for invalid names for a range of different identifiers.

You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements.

If your project includes a Pylint configuration file, the settings contained in that file take precedence.

To find out more about Pylint, please refer to their site.

Loading history...
Coding Style Naming introduced by
Variable name "s" doesn't conform to snake_case naming style ('([^\\W\\dA-Z][^\\WA-Z]2,|_[^\\WA-Z]*|__[^\\WA-Z\\d_][^\\WA-Z]+__)$' pattern)

This check looks for invalid names for a range of different identifiers.

You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements.

If your project includes a Pylint configuration file, the settings contained in that file take precedence.

To find out more about Pylint, please refer to their site.

Loading history...
89
        return f"{v.mean}{s}({v.lower}{Chars.en}{v.upper}){s}{u}"
90
91
    @property
92
    def _value(self) -> ConcentrationBound:
93
        if self.mg_per_kg is not None:
94
            return self.mg_per_kg
95
        return self.micromolar
96
97
    @property
98
    def _unit(self) -> str:
99
        if self.mg_per_kg is not None:
100
            return " mg/kg"
101
        return " µmol/L"
102
103
104
class HmdbData:
0 ignored issues
show
introduced by
Missing class docstring
Loading history...
105
    def __init__(self, data: NestedDotDict):
106
        self._data = data
107
108
    @property
109
    def cid(self) -> str:
0 ignored issues
show
introduced by
Missing function or method docstring
Loading history...
110
        return self._data.req_as("metabolite.accession", str)
111
112
    @property
113
    def inchi(self) -> str:
0 ignored issues
show
introduced by
Missing function or method docstring
Loading history...
114
        return self._data.req_as("metabolite.inchi", str)
115
116
    @property
117
    def inchikey(self) -> str:
0 ignored issues
show
introduced by
Missing function or method docstring
Loading history...
118
        return self._data.req_as("metabolite.inchikey", str)
119
120
    @property
121
    def smiles(self) -> str:
0 ignored issues
show
introduced by
Missing function or method docstring
Loading history...
122
        return self._data.req_as("metabolite.smiles", str)
123
124
    @property
125
    def cas(self) -> str:
0 ignored issues
show
introduced by
Missing function or method docstring
Loading history...
126
        return self._data.req_as("metabolite.cas_registry_number", str)
127
128
    @property
129
    def drugbank_id(self) -> Optional[str]:
0 ignored issues
show
introduced by
Missing function or method docstring
Loading history...
130
        return self._data.get_as("metabolite.inchikey", str)
131
132
    @property
133
    def pubchem_id(self) -> Optional[str]:
0 ignored issues
show
introduced by
Missing function or method docstring
Loading history...
134
        return self._data.get_as("metabolite.pubchem_compound_id", str)
135
136
    @property
137
    def create_date(self) -> datetime:
0 ignored issues
show
introduced by
Missing function or method docstring
Loading history...
138
        return datetime.fromisoformat(self._data.req_as("metabolite.creation_date", str))
139
140
    @property
141
    def mod_date(self) -> datetime:
0 ignored issues
show
introduced by
Missing function or method docstring
Loading history...
142
        return datetime.fromisoformat(self._data.req_as("metabolite.update_date", str))
143
144
    @cached_property
145
    def predicted_properties(self) -> Sequence[HmdbProperty]:
0 ignored issues
show
introduced by
Missing function or method docstring
Loading history...
146
        data = self._data.get("metabolite.predicted_properties", [])
147
        return [
148
            HmdbProperty(kind=x["kind"], source=x["source"], value=x["value"])
149
            for x in data
150
            if _Prop(x["kind"], x["source"]) in PREDICTED_PROPERTIES
151
        ]
152
153
    @cached_property
154
    def rules(self) -> Mapping[str, bool]:
0 ignored issues
show
introduced by
Missing function or method docstring
Loading history...
155
        data = self._data.get("metabolite.predicted_properties", [])
156
        return {
157
            r["kind"]: CommonTools.parse_bool_flex(r["value"])
158
            for r in data
159
            if (r["kind"], r["source"]) in RULES
160
        }
161
162
    @cached_property
163
    def diseases(self) -> Sequence[HmdbDisease]:
0 ignored issues
show
introduced by
Missing function or method docstring
Loading history...
164
        data = self._data.get_list_as("metabolite.diseases", NestedDotDict)
165
        return [HmdbDisease(d["name"], d["omim_id"], len(d.get("references", []))) for d in data]
166
167
    @cached_property
168
    def specimens(self) -> Sequence[str]:
0 ignored issues
show
introduced by
Missing function or method docstring
Loading history...
169
        return self._data.get_list_as("metabolite.biological_properties.biospecimen_locations", str)
170
171
    @cached_property
172
    def tissue_locations(self) -> Sequence[str]:
0 ignored issues
show
introduced by
Missing function or method docstring
Loading history...
173
        return self._data.get_list_as("metabolite.biological_properties.tissue_locations", str)
174
175
    @cached_property
176
    def normal_concentrations(self) -> Sequence[HmdbConcentration]:
0 ignored issues
show
introduced by
Missing function or method docstring
Loading history...
177
        data = self._data.get_list_as("metabolite.normal_concentrations", NestedDotDict, [])
178
        results = []
179
        for d in data:
0 ignored issues
show
Coding Style Naming introduced by
Variable name "d" doesn't conform to snake_case naming style ('([^\\W\\dA-Z][^\\WA-Z]2,|_[^\\WA-Z]*|__[^\\WA-Z\\d_][^\\WA-Z]+__)$' pattern)

This check looks for invalid names for a range of different identifiers.

You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements.

If your project includes a Pylint configuration file, the settings contained in that file take precedence.

To find out more about Pylint, please refer to their site.

Loading history...
180
            x = self._new_conc(d)
0 ignored issues
show
Coding Style Naming introduced by
Variable name "x" doesn't conform to snake_case naming style ('([^\\W\\dA-Z][^\\WA-Z]2,|_[^\\WA-Z]*|__[^\\WA-Z\\d_][^\\WA-Z]+__)$' pattern)

This check looks for invalid names for a range of different identifiers.

You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements.

If your project includes a Pylint configuration file, the settings contained in that file take precedence.

To find out more about Pylint, please refer to their site.

Loading history...
181
            if x is not None:
182
                results.append(x)
183
        return results
184
185
    def _new_conc(self, x: NestedDotDict) -> Optional[HmdbConcentration]:
0 ignored issues
show
Coding Style Naming introduced by
Argument name "x" doesn't conform to snake_case naming style ('([^\\W\\dA-Z][^\\WA-Z]2,|_[^\\WA-Z]*|__[^\\WA-Z\\d_][^\\WA-Z]+__)$' pattern)

This check looks for invalid names for a range of different identifiers.

You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements.

If your project includes a Pylint configuration file, the settings contained in that file take precedence.

To find out more about Pylint, please refer to their site.

Loading history...
186
        specimen = x["biospecimen"]
187
        # both can be "Not Specified"
188
        ages = {
189
            "Adult": PersonAge.adults,
190
            "Children": PersonAge.children,
191
            "Both": PersonAge.adults | PersonAge.children,
192
        }.get(x.get_as("subject_age", str, "").split(" ")[0], PersonAge.unknown)
193
        sexes = {
194
            "Male": PersonSex.male,
195
            "Female": PersonSex.female,
196
            "Both": PersonSex.female | PersonSex.male,
197
        }.get(x.get_as("subject_sex", str, ""), PersonSex.unknown)
198
        condition = (
199
            None
200
            if x.get("subject_condition") == "Normal"
201
            else x.get_as("patient_information", str, "")
202
        )
203
        value, units = x.get_as("concentration_value", str), x.get_as("concentration_units", str)
204
        if value is None or len(value) == 0:
205
            logger.trace(f"Discarding {x} with empty value")
206
            return None
207
        if units not in ["uM", "mg/kg"]:
208
            logger.trace(f"Discarding {x} with units '{units}'")
209
            return None
210
        bound = self._parse_conc(value)
211
        if bound is None:
212
            logger.warning(f"Could not parse concentration {value} (units: {units})")
213
            logger.trace(f"Full data: {x}")
214
            return None
215
        return HmdbConcentration(
216
            specimen=specimen,
217
            ages=ages,
218
            sexes=sexes,
219
            condition=condition,
220
            micromolar=bound if units == "uM" else None,
221
            mg_per_kg=bound if units == "mg/kg" else None,
222
        )
223
224
    def _parse_conc(self, value: str) -> Optional[ConcentrationBound]:
0 ignored issues
show
Coding Style introduced by
This method could be written as a function/class method.

If a method does not access any attributes of the class, it could also be implemented as a function or static method. This can help improve readability. For example

class Foo:
    def some_method(self, x, y):
        return x + y;

could be written as

class Foo:
    @classmethod
    def some_method(cls, x, y):
        return x + y;
Loading history...
225
        m: regex.Match = _p1.fullmatch(value)
0 ignored issues
show
Coding Style Naming introduced by
Variable name "m" doesn't conform to snake_case naming style ('([^\\W\\dA-Z][^\\WA-Z]2,|_[^\\WA-Z]*|__[^\\WA-Z\\d_][^\\WA-Z]+__)$' pattern)

This check looks for invalid names for a range of different identifiers.

You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements.

If your project includes a Pylint configuration file, the settings contained in that file take precedence.

To find out more about Pylint, please refer to their site.

Loading history...
226
        if m is not None:
227
            return ConcentrationBound(*m.groups())
228
        m: regex.Match = _p2.fullmatch(value)
0 ignored issues
show
Coding Style Naming introduced by
Variable name "m" doesn't conform to snake_case naming style ('([^\\W\\dA-Z][^\\WA-Z]2,|_[^\\WA-Z]*|__[^\\WA-Z\\d_][^\\WA-Z]+__)$' pattern)

This check looks for invalid names for a range of different identifiers.

You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements.

If your project includes a Pylint configuration file, the settings contained in that file take precedence.

To find out more about Pylint, please refer to their site.

Loading history...
229
        if m is not None:
230
            v, std = m.groups()
0 ignored issues
show
Coding Style Naming introduced by
Variable name "v" doesn't conform to snake_case naming style ('([^\\W\\dA-Z][^\\WA-Z]2,|_[^\\WA-Z]*|__[^\\WA-Z\\d_][^\\WA-Z]+__)$' pattern)

This check looks for invalid names for a range of different identifiers.

You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements.

If your project includes a Pylint configuration file, the settings contained in that file take precedence.

To find out more about Pylint, please refer to their site.

Loading history...
231
            return ConcentrationBound(v, v - std, v + std)
232
        return None
233
234
    @cached_property
235
    def abnormal_concentrations(self) -> Sequence[HmdbConcentration]:
0 ignored issues
show
introduced by
Missing function or method docstring
Loading history...
236
        return self._data.get("metabolite.normal_concentrations", [])
237
238
239
__all__ = [
240
    "HmdbProperty",
241
    "ConcentrationBound",
242
    "HmdbData",
243
    "PersonSex",
244
    "PersonAge",
245
    "HmdbConcentration",
246
    "HmdbDisease",
247
]
248