Passed
Push — main ( 9ff912...d08a4e )
by Douglas
03:54
created

mandos.model.pubchem_support.pubchem_models   A

Complexity

Total Complexity 33

Size/Duplication

Total Lines 434
Duplicated Lines 0 %

Importance

Changes 0
Metric Value
eloc 312
dl 0
loc 434
rs 9.76
c 0
b 0
f 0
wmc 33

19 Methods

Rating   Name   Duplication   Size   Complexity  
A Code.of() 0 9 3
A ComputedProperty.req_is() 0 4 2
A Code.type_name() 0 3 1
A ComputedProperty.as_str() 0 3 1
A Codes.value() 0 3 1
A CoOccurrenceType.id_name() 0 9 4
A CoOccurrenceType.x_name() 0 9 4
A ClinicalTrialsGovUtils.phase_map() 0 10 1
A AtcCode.parts() 0 5 1
A ClinicalTrial.mapped_phase() 0 3 1
A AtcCode.level() 0 3 1
A ClinicalTrialsGovUtils.known_statuses() 0 3 1
A GhsCode.find() 0 11 1
A ClinicalTrialsGovUtils.status_map() 0 16 1
A ClinicalTrialsGovUtils.known_phases() 0 3 1
A ClinicalTrial.mapped_status() 0 3 1
A ClinicalTrialsGovUtils.resolve_statuses() 0 14 5
A CoOccurrence.strip_pubs() 0 10 1
A GhsCode.level() 0 3 1
1
from __future__ import annotations
0 ignored issues
show
introduced by
Missing module docstring
Loading history...
2
import enum
3
import re
4
from dataclasses import dataclass
5
from datetime import date
6
from typing import Union, Optional, FrozenSet, Sequence, Mapping, Set
7
8
from pocketutils.core.dot_dict import NestedDotDict
0 ignored issues
show
introduced by
Unable to import 'pocketutils.core.dot_dict'
Loading history...
9
10
from mandos.model import MandosResources, CleverEnum
0 ignored issues
show
Unused Code introduced by
Unused CleverEnum imported from mandos.model
Loading history...
11
from mandos.model.pubchem_support._nav_fns import Mapx
12
13
hazards = {
14
    d["code"]: d for d in NestedDotDict.read_toml(MandosResources.path("hazards.toml"))["signals"]
15
}
16
17
18
@dataclass(frozen=True, repr=True, eq=True, order=True)
0 ignored issues
show
introduced by
Missing class docstring
Loading history...
19
class ComputedProperty:
20
    key: str
21
    value: Union[int, str, float, bool]
22
    unit: Optional[str]
23
    ref: str
24
25
    def req_is(self, type_) -> Union[int, str, float, bool]:
0 ignored issues
show
introduced by
Missing function or method docstring
Loading history...
26
        if not isinstance(self.value, type_):
27
            raise TypeError(f"{self.key}->{self.value} has {type(self.value)}, not {type_}")
28
        return self.value
29
30
    @property
31
    def as_str(self) -> str:
0 ignored issues
show
introduced by
Missing function or method docstring
Loading history...
32
        return f"{self.value} {self.unit}"
33
34
35
class Code(str):
0 ignored issues
show
introduced by
Missing class docstring
Loading history...
36
    @property
37
    def type_name(self) -> str:
0 ignored issues
show
introduced by
Missing function or method docstring
Loading history...
38
        return self.__class__.__name__.lower()
39
40
    @classmethod
41
    def of(cls, value: Union[str, int, float]):
0 ignored issues
show
Coding Style Naming introduced by
Method name "of" doesn't conform to snake_case naming style ('([^\\W\\dA-Z][^\\WA-Z]2,|_[^\\WA-Z]*|__[^\\WA-Z\\d_][^\\WA-Z]+__)$' pattern)

This check looks for invalid names for a range of different identifiers.

You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements.

If your project includes a Pylint configuration file, the settings contained in that file take precedence.

To find out more about Pylint, please refer to their site.

Loading history...
introduced by
Missing function or method docstring
Loading history...
42
        if isinstance(value, float):
43
            try:
44
                value = int(value)
45
            except ArithmeticError:
46
                value = str(value)
47
        value = str(value).strip()
48
        return cls(value)
49
50
51
class Codes:
52
    """
53
    These turn out to be extremely useful for documenting return types.
54
    For example, ``DrugbankInteraction`` might have a ``gene`` field,
55
    which can be described as a ``GenecardSymbol`` if known.
56
    """
57
58
    class EcNumber(Code):
59
        """
60
        e.g. 'EC:4.6.1.1'
61
        """
62
63
    class GeneId(Code):
64
        """
65
        GeneCard, UniProt gene name, etc.
66
        e.g. 'slc1a2'
67
        """
68
69
    class ClinicaltrialId(Code):
70
        """
71
        From clinicaltrials.gov
72
        """
73
74
    class GenericDiseaseCode(Code):
75
        """
76
        From clinicaltrials.gov; pure int
77
        """
78
79
    class GenecardSymbol(GeneId):
0 ignored issues
show
Documentation introduced by
Empty class docstring
Loading history...
80
        """"""
81
82
    class UniprotId(GeneId):
0 ignored issues
show
Documentation introduced by
Empty class docstring
Loading history...
83
        """"""
84
85
    class PubchemCompoundId(Code):
86
        """
87
        e.g. 2352
88
        """
89
90
        @property
91
        def value(self) -> int:
0 ignored issues
show
introduced by
Missing function or method docstring
Loading history...
92
            return int(self)
93
94
    class AtcCode(Code):
0 ignored issues
show
Documentation introduced by
Empty class docstring
Loading history...
95
        """"""
96
97
    class PubmedId(Code):
0 ignored issues
show
Documentation introduced by
Empty class docstring
Loading history...
98
        """"""
99
100
    class Doi(Code):
0 ignored issues
show
Documentation introduced by
Empty class docstring
Loading history...
101
        """"""
102
103
    class MeshCode(Code):
0 ignored issues
show
Documentation introduced by
Empty class docstring
Loading history...
104
        """"""
105
106
    class PdbId(Code):
0 ignored issues
show
Documentation introduced by
Empty class docstring
Loading history...
107
        """"""
108
109
    class MeshHeading(Code):
0 ignored issues
show
Documentation introduced by
Empty class docstring
Loading history...
110
        """"""
111
112
    class MeshSubheading(Code):
0 ignored issues
show
Documentation introduced by
Empty class docstring
Loading history...
113
        """"""
114
115
    class DrugbankCompoundId(Code):
0 ignored issues
show
Documentation introduced by
Empty class docstring
Loading history...
116
        """"""
117
118
    class DeaSchedule(Code):
0 ignored issues
show
Documentation introduced by
Empty class docstring
Loading history...
119
        """"""
120
121
        @property
122
        def value(self) -> int:
0 ignored issues
show
introduced by
Missing function or method docstring
Loading history...
123
            return Mapx.roman_to_arabic(1, 5)(self)
124
125
    class GhsCode(Code):
0 ignored issues
show
Documentation introduced by
Empty class docstring
Loading history...
126
        """"""
127
128
129
class CoOccurrenceType(enum.Enum):
0 ignored issues
show
introduced by
Missing class docstring
Loading history...
130
    chemical = enum.auto()
131
    gene = enum.auto()
132
    disease = enum.auto()
133
134
    @property
135
    def x_name(self) -> str:
0 ignored issues
show
introduced by
Missing function or method docstring
Loading history...
136
        if self is CoOccurrenceType.chemical:
0 ignored issues
show
unused-code introduced by
Unnecessary "elif" after "return"
Loading history...
137
            return "ChemicalNeighbor"
138
        elif self is CoOccurrenceType.gene:
139
            return "ChemicalGeneSymbolNeighbor"
140
        elif self is CoOccurrenceType.disease:
141
            return "ChemicalDiseaseNeighbor"
142
        raise AssertionError(f"{self} not found!!")
143
144
    @property
145
    def id_name(self) -> str:
0 ignored issues
show
introduced by
Missing function or method docstring
Loading history...
146
        if self is CoOccurrenceType.chemical:
0 ignored issues
show
unused-code introduced by
Unnecessary "elif" after "return"
Loading history...
147
            return "CID"
148
        elif self is CoOccurrenceType.gene:
149
            return "GeneSymbol"
150
        elif self is CoOccurrenceType.disease:
151
            return "MeSH"
152
        raise AssertionError(f"{self} not found!!")
153
154
155
class ClinicalTrialsGovUtils:
0 ignored issues
show
introduced by
Missing class docstring
Loading history...
156
    @classmethod
157
    def phase_map(cls) -> Mapping[str, float]:
0 ignored issues
show
introduced by
Missing function or method docstring
Loading history...
158
        return {
159
            "Phase 4": 4,
160
            "Phase 3": 3,
161
            "Phase 2": 2,
162
            "Phase 1": 1,
163
            "Early Phase 1": 1.5,
164
            "Phase 2/Phase 3": 2.5,
165
            "N/A": 0,
166
        }
167
168
    @classmethod
169
    def known_phases(cls) -> Set[float]:
0 ignored issues
show
introduced by
Missing function or method docstring
Loading history...
170
        return set(cls.phase_map().values())
171
172
    @classmethod
173
    def resolve_statuses(cls, st: str) -> Set[str]:
0 ignored issues
show
introduced by
Missing function or method docstring
Loading history...
Coding Style Naming introduced by
Argument name "st" doesn't conform to snake_case naming style ('([^\\W\\dA-Z][^\\WA-Z]2,|_[^\\WA-Z]*|__[^\\WA-Z\\d_][^\\WA-Z]+__)$' pattern)

This check looks for invalid names for a range of different identifiers.

You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements.

If your project includes a Pylint configuration file, the settings contained in that file take precedence.

To find out more about Pylint, please refer to their site.

Loading history...
174
        found = set()
175
        for s in st.lower().split(","):
0 ignored issues
show
Coding Style Naming introduced by
Variable name "s" doesn't conform to snake_case naming style ('([^\\W\\dA-Z][^\\WA-Z]2,|_[^\\WA-Z]*|__[^\\WA-Z\\d_][^\\WA-Z]+__)$' pattern)

This check looks for invalid names for a range of different identifiers.

You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements.

If your project includes a Pylint configuration file, the settings contained in that file take precedence.

To find out more about Pylint, please refer to their site.

Loading history...
176
            s = s.strip()
0 ignored issues
show
Coding Style Naming introduced by
Variable name "s" doesn't conform to snake_case naming style ('([^\\W\\dA-Z][^\\WA-Z]2,|_[^\\WA-Z]*|__[^\\WA-Z\\d_][^\\WA-Z]+__)$' pattern)

This check looks for invalid names for a range of different identifiers.

You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements.

If your project includes a Pylint configuration file, the settings contained in that file take precedence.

To find out more about Pylint, please refer to their site.

Loading history...
177
            if s == "@all":
178
                match = cls.known_statuses()
179
            elif s in cls.known_statuses():
180
                match = {s}
181
            else:
182
                raise ValueError(s)
183
            for m in match:
0 ignored issues
show
Coding Style Naming introduced by
Variable name "m" doesn't conform to snake_case naming style ('([^\\W\\dA-Z][^\\WA-Z]2,|_[^\\WA-Z]*|__[^\\WA-Z\\d_][^\\WA-Z]+__)$' pattern)

This check looks for invalid names for a range of different identifiers.

You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements.

If your project includes a Pylint configuration file, the settings contained in that file take precedence.

To find out more about Pylint, please refer to their site.

Loading history...
184
                found.add(m)
185
        return found
186
187
    @classmethod
188
    def known_statuses(cls) -> Set[str]:
0 ignored issues
show
introduced by
Missing function or method docstring
Loading history...
189
        return set(cls.status_map().values())
190
191
    @classmethod
192
    def status_map(cls) -> Mapping[str, str]:
0 ignored issues
show
introduced by
Missing function or method docstring
Loading history...
193
        return {
194
            "Unknown status": "unknown",
195
            "Completed": "completed",
196
            "Terminated": "stopped",
197
            "Suspended": "stopped",
198
            "Withdrawn": "stopped",
199
            "Not yet recruiting": "ongoing",
200
            "Recruiting": "ongoing",
201
            "Enrolling by invitation": "ongoing",
202
            "Active, not recruiting": "ongoing",
203
            "Available": "completed",
204
            "No longer available": "completed",
205
            "Temporarily not available": "completed",
206
            "Approved for marketing": "completed",
207
        }
208
209
210
@dataclass(frozen=True, repr=True, eq=True)
0 ignored issues
show
introduced by
Missing class docstring
Loading history...
best-practice introduced by
Too many instance attributes (9/7)
Loading history...
211
class ClinicalTrial:
212
    ctid: Codes.ClinicaltrialId
213
    title: str
214
    conditions: FrozenSet[str]
215
    disease_ids: FrozenSet[Codes.ClinicaltrialId]
216
    phase: str
217
    status: str
218
    interventions: FrozenSet[str]
219
    cids: FrozenSet[Codes.PubchemCompoundId]
220
    source: str
221
222
    @property
223
    def mapped_phase(self) -> float:
0 ignored issues
show
introduced by
Missing function or method docstring
Loading history...
224
        return ClinicalTrialsGovUtils.phase_map().get(self.phase, 0)
225
226
    @property
227
    def mapped_status(self) -> str:
0 ignored issues
show
introduced by
Missing function or method docstring
Loading history...
228
        return ClinicalTrialsGovUtils.status_map().get(self.status, "unknown")
229
230
231
@dataclass(frozen=True, repr=True, eq=True)
0 ignored issues
show
introduced by
Missing class docstring
Loading history...
232
class GhsCode:
233
    code: Codes.GhsCode
234
    statement: str
235
    clazz: str
236
    categories: FrozenSet[str]
237
    signal_word: str
238
    type: str
239
240
    @classmethod
241
    def find(cls, code: str) -> GhsCode:
0 ignored issues
show
introduced by
Missing function or method docstring
Loading history...
242
        h = hazards[code]
0 ignored issues
show
Coding Style Naming introduced by
Variable name "h" doesn't conform to snake_case naming style ('([^\\W\\dA-Z][^\\WA-Z]2,|_[^\\WA-Z]*|__[^\\WA-Z\\d_][^\\WA-Z]+__)$' pattern)

This check looks for invalid names for a range of different identifiers.

You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements.

If your project includes a Pylint configuration file, the settings contained in that file take precedence.

To find out more about Pylint, please refer to their site.

Loading history...
243
        cats = h["category"]  # TODO
0 ignored issues
show
Coding Style introduced by
TODO and FIXME comments should generally be avoided.
Loading history...
244
        return GhsCode(
245
            code=Codes.GhsCode(code),
246
            statement=h["statement"],
247
            clazz=h["class"],
248
            categories=cats,
249
            signal_word=h["signal_word"],
250
            type=h["type"],
251
        )
252
253
    @property
254
    def level(self) -> int:
0 ignored issues
show
introduced by
Missing function or method docstring
Loading history...
255
        return int(self.code[1])
256
257
258
@dataclass(frozen=True, repr=True, eq=True)
0 ignored issues
show
introduced by
Missing class docstring
Loading history...
259
class AssociatedDisorder:
260
    gid: str
261
    disease_id: Codes.MeshCode
262
    disease_name: str
263
    evidence_type: str
264
    n_refs: int
265
266
267
@dataclass(frozen=True, repr=True, eq=True)
0 ignored issues
show
introduced by
Missing class docstring
Loading history...
268
class AtcCode:
269
    code: str
270
    name: str
271
272
    @property
273
    def level(self) -> int:
0 ignored issues
show
introduced by
Missing function or method docstring
Loading history...
274
        return len(self.parts)
275
276
    @property
277
    def parts(self) -> Sequence[str]:
0 ignored issues
show
introduced by
Missing function or method docstring
Loading history...
278
        pat = re.compile(r"([A-Z])([0-9]{2})?([A-Z])?([A-Z])?([A-Z])?")
279
        match = pat.fullmatch(self.code)
280
        return [g for g in match.groups() if g is not None]
281
282
283
@dataclass(frozen=True, repr=True, eq=True)
0 ignored issues
show
introduced by
Missing class docstring
Loading history...
284
class DrugbankInteraction:
285
    gene_symbol: Codes.GeneId
286
    action: str
287
    target_name: str
288
    general_function: Sequence[str]
289
    specific_function: str
290
    pmids: FrozenSet[Codes.PubmedId]
291
    dois: FrozenSet[Codes.Doi]
292
293
294
@dataclass(frozen=True, repr=True, eq=True)
0 ignored issues
show
introduced by
Missing class docstring
Loading history...
295
class DrugbankDdi:
296
    drug_drugbank_id: Codes.DrugbankCompoundId
297
    drug_pubchem_id: Codes.PubchemCompoundId
298
    drug_drugbank_name: str
299
    description: str
300
301
302
class AssayType(enum.Enum):
0 ignored issues
show
introduced by
Missing class docstring
Loading history...
303
    confirmatory = enum.auto()
304
    literature = enum.auto()
305
306
307
class Activity(enum.Enum):
0 ignored issues
show
introduced by
Missing class docstring
Loading history...
308
    active = enum.auto()
309
    inactive = enum.auto()
310
    inconclusive = enum.auto()
311
    unspecified = enum.auto()
312
313
314
@dataclass(frozen=True, repr=True, eq=True)
0 ignored issues
show
introduced by
Missing class docstring
Loading history...
best-practice introduced by
Too many instance attributes (13/7)
Loading history...
315
class Bioactivity:
316
    assay_id: int
317
    assay_type: AssayType
318
    assay_ref: str
319
    assay_name: str
320
    assay_made_date: date
321
    gene_id: Optional[Codes.GeneId]
322
    tax_id: Optional[int]
323
    pmid: Optional[Codes.PubmedId]
324
    activity: Optional[Activity]
325
    activity_name: Optional[str]
326
    activity_value: float
327
    target_name: Optional[str]
328
    compound_name: str
329
330
331
@dataclass(frozen=True, repr=True, eq=True)
0 ignored issues
show
best-practice introduced by
Too many instance attributes (9/7)
Loading history...
introduced by
Missing class docstring
Loading history...
332
class PdbEntry:
333
    pdbid: Codes.PdbId
334
    title: str
335
    exp_method: str
336
    resolution: float
337
    lig_names: FrozenSet[str]
338
    cids: FrozenSet[Codes.PubchemCompoundId]
339
    uniprot_ids: FrozenSet[Codes.UniprotId]
340
    pmids: FrozenSet[Codes.PubmedId]
341
    dois: FrozenSet[Codes.Doi]
342
343
344
@dataclass(frozen=True, repr=True, eq=True)
0 ignored issues
show
introduced by
Missing class docstring
Loading history...
best-practice introduced by
Too many instance attributes (11/7)
Loading history...
345
class PubmedEntry:
346
    pmid: Codes.PubmedId
347
    article_type: str
348
    pmidsrcs: FrozenSet[str]
349
    mesh_headings: FrozenSet[Codes.MeshHeading]
350
    mesh_subheadings: FrozenSet[Codes.MeshSubheading]
351
    mesh_codes: FrozenSet[Codes.MeshCode]
352
    cids: FrozenSet[Codes.PubchemCompoundId]
353
    article_title: str
354
    article_abstract: str
355
    journal_name: str
356
    pub_date: date
357
358
359
@dataclass(frozen=True, repr=True, eq=True)
0 ignored issues
show
introduced by
Missing class docstring
Loading history...
360
class Publication:
361
    pmid: Codes.PubmedId
362
    pub_date: date
363
    is_review: bool
364
    title: str
365
    journal: str
366
    relevance_score: int
367
368
369
@dataclass(frozen=True, repr=True, eq=True)
0 ignored issues
show
best-practice introduced by
Too many instance attributes (8/7)
Loading history...
introduced by
Missing class docstring
Loading history...
370
class CoOccurrence:
371
    neighbor_id: str
372
    neighbor_name: str
373
    kind: CoOccurrenceType
374
    # https://pubchemdocs.ncbi.nlm.nih.gov/knowledge-panels
375
    article_count: int
376
    query_article_count: int
377
    neighbor_article_count: int
378
    score: int
379
    publications: FrozenSet[Publication]
380
381
    def strip_pubs(self) -> CoOccurrence:
0 ignored issues
show
introduced by
Missing function or method docstring
Loading history...
382
        return CoOccurrence(
383
            self.neighbor_id,
384
            self.neighbor_name,
385
            self.kind,
386
            self.article_count,
387
            self.query_article_count,
388
            self.neighbor_article_count,
389
            self.score,
390
            frozenset({}),
391
        )
392
393
394
@dataclass(frozen=True, repr=True, eq=True)
0 ignored issues
show
Documentation introduced by
Empty class docstring
Loading history...
395
class DrugGeneInteraction:
396
    """"""
397
398
    gene_name: Optional[str]
399
    gene_claim_id: Optional[str]
400
    source: str
401
    interactions: FrozenSet[str]
402
    pmids: FrozenSet[Codes.PubmedId]
403
    dois: FrozenSet[Codes.Doi]
404
405
406
@dataclass(frozen=True, repr=True, eq=True)
0 ignored issues
show
introduced by
Missing class docstring
Loading history...
407
class CompoundGeneInteraction:
408
    gene_name: Optional[Codes.GeneId]
409
    interactions: FrozenSet[str]
410
    tax_name: Optional[str]
411
    pmids: FrozenSet[Codes.PubmedId]
412
413
414
__all__ = [
415
    "ClinicalTrial",
416
    "AssociatedDisorder",
417
    "AtcCode",
418
    "AssayType",
419
    "DrugbankInteraction",
420
    "DrugbankDdi",
421
    "Bioactivity",
422
    "Activity",
423
    "DrugGeneInteraction",
424
    "CompoundGeneInteraction",
425
    "GhsCode",
426
    "PubmedEntry",
427
    "Code",
428
    "Codes",
429
    "CoOccurrenceType",
430
    "CoOccurrence",
431
    "Publication",
432
    "ComputedProperty",
433
    "ClinicalTrialsGovUtils",
434
]
435