Passed
Push — dependabot/pip/flake8-bugbear-... ( 93dece...8d4b2b )
by
unknown
01:27
created

TargetFactory.__init__()   A

Complexity

Conditions 1

Size

Total Lines 2
Code Lines 2

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
cc 1
eloc 2
nop 2
dl 0
loc 2
rs 10
c 0
b 0
f 0
1
"""
2
Model of ChEMBL targets and a hierarchy between them as a directed acyclic graph (DAG).
3
"""
4
from __future__ import annotations
5
6
import enum
7
import logging
8
from dataclasses import dataclass
9
from typing import Optional, Set
10
11
from urllib3.util.retry import MaxRetryError
0 ignored issues
show
introduced by
Unable to import 'urllib3.util.retry'
Loading history...
12
from pocketutils.core.dot_dict import NestedDotDict
0 ignored issues
show
introduced by
Unable to import 'pocketutils.core.dot_dict'
Loading history...
13
14
from mandos.model.chembl_api import ChemblApi
15
16
logger = logging.getLogger(__package__)
17
18
19
class TargetNotFoundError(ValueError):
0 ignored issues
show
Documentation introduced by
Empty class docstring
Loading history...
20
    """"""
21
22
23
class TargetType(enum.Enum):
24
    """
25
    Enum corresponding to the ChEMBL API field ``target.target_type``.
26
    """
27
28
    single_protein = enum.auto()
29
    protein_family = enum.auto()
30
    protein_complex = enum.auto()
31
    protein_complex_group = enum.auto()
32
    selectivity_group = enum.auto()
33
    protein_protein_interaction = enum.auto()
34
    nucleic_acid = enum.auto()
35
    chimeric_protein = enum.auto()
36
    protein_nucleic_acid_complex = enum.auto()
37
    metal = enum.auto()
38
    small_molecule = enum.auto()
39
    subcellular = enum.auto()
40
    unknown = enum.auto()
41
42
    @classmethod
43
    def of(cls, s: str) -> TargetType:
0 ignored issues
show
Coding Style Naming introduced by
Method name "of" doesn't conform to snake_case naming style ('([^\\W\\dA-Z][^\\WA-Z]2,|_[^\\WA-Z]*|__[^\\WA-Z\\d_][^\\WA-Z]+__)$' pattern)

This check looks for invalid names for a range of different identifiers.

You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements.

If your project includes a Pylint configuration file, the settings contained in that file take precedence.

To find out more about Pylint, please refer to their site.

Loading history...
Coding Style Naming introduced by
Argument name "s" doesn't conform to snake_case naming style ('([^\\W\\dA-Z][^\\WA-Z]2,|_[^\\WA-Z]*|__[^\\WA-Z\\d_][^\\WA-Z]+__)$' pattern)

This check looks for invalid names for a range of different identifiers.

You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements.

If your project includes a Pylint configuration file, the settings contained in that file take precedence.

To find out more about Pylint, please refer to their site.

Loading history...
introduced by
Missing function or method docstring
Loading history...
44
        key = s.replace(" ", "_").replace("-", "_").lower()
45
        try:
46
            return TargetType[key]
47
        except KeyError:
48
            logger.error(f"Target type {key} not found. Using TargetType.unknown.")
0 ignored issues
show
introduced by
Use lazy % formatting in logging functions
Loading history...
49
            return TargetType.unknown
50
51
    @classmethod
52
    def protein_types(cls) -> Set[TargetType]:
53
        """
54
        Returns the target types that are expressly proteins.
55
        Specifically, single proteins, protein complexes, protein complex groups, and protein families.
0 ignored issues
show
Coding Style introduced by
This line is too long as per the coding-style (103/100).

This check looks for lines that are too long. You can specify the maximum line length.

Loading history...
56
        This does **not** include protein-protein interactions, chimeric proteins,
57
        protein-nucleic acid complexes, or selectivity groups.
58
        """
59
        return {s for s in cls if s.is_protein}
60
61
    @classmethod
62
    def all_types(cls) -> Set[TargetType]:
0 ignored issues
show
introduced by
Missing function or method docstring
Loading history...
63
        return set(TargetType)  # here for symmetry
64
65
    @property
66
    def is_traversable(self) -> bool:
67
        """
68
        Returns the target types that can have relationships defined on them.
69
        Note that this may not match ChEMBL's own definition --
70
        there may be types (e.g. protein_protein_interaction) that have relationships.
71
        Those rare types are not included here.
72
        """
73
        return self in {
74
            TargetType.single_protein,
75
            TargetType.protein_family,
76
            TargetType.protein_complex,
77
            TargetType.protein_complex_group,
78
            TargetType.selectivity_group,
79
        }
80
81
    @property
82
    def is_protein(self) -> bool:
83
        """
84
        Whether this type is a "protein".
85
        Specifically, single proteins, protein complexes, protein complex groups, and protein families.
0 ignored issues
show
Coding Style introduced by
This line is too long as per the coding-style (103/100).

This check looks for lines that are too long. You can specify the maximum line length.

Loading history...
86
        This does **not** include protein-protein interactions, chimeric proteins,
87
        protein-nucleic acid complexes, or selectivity groups.
88
        """
89
        return self in {
90
            TargetType.single_protein,
91
            TargetType.protein_family,
92
            TargetType.protein_complex,
93
            TargetType.protein_complex_group,
94
        }
95
96
    @property
97
    def is_unknown(self) -> bool:
98
        """
99
        Returns whether this is the "unkown" type.
100
        In principle, this could have a more involved meaning.
101
        """
102
        return self == TargetType.unknown
103
104
105
@dataclass(frozen=True, order=True, repr=True)
106
class ChemblTarget:
107
    """
108
    A target from ChEMBL, from the ``target`` table.
109
110
    Attributes:
111
        chembl: The CHEMBL ID, starting with 'CHEMBL'
112
        name: The preferred name (``pref_target_name``)
113
        type: From the ``target_type`` ChEMBL field
114
    """
115
116
    chembl: str
117
    name: Optional[str]
118
    type: TargetType
119
120
121
class TargetFactory:
122
    """
123
    Factory for ``Target`` that injects a ``ChemblApi``.
124
    """
125
126
    def __init__(self, api: ChemblApi):
127
        self.api = api
128
129
    def find(self, chembl: str) -> ChemblTarget:
130
        """
131
132
        Args:
133
            chembl:
134
135
        Returns:
136
            A ``Target`` instance from a newly created subclass of that class
137
        """
138
139
        try:
140
            targets = self.api.target.filter(target_chembl_id=chembl)
141
        except MaxRetryError:
142
            raise TargetNotFoundError(f"Failed to find target {chembl}")
143
        assert len(targets) == 1, f"Found {len(targets)} targets for {chembl}"
144
        target = NestedDotDict(targets[0])
145
        return ChemblTarget(
146
            chembl=target["target_chembl_id"],
147
            name=target.get("pref_name"),
148
            type=TargetType.of(target["target_type"]),
149
        )
150
151
152
__all__ = [
153
    "TargetType",
154
    "TargetFactory",
155
    "TargetNotFoundError",
156
    "ChemblTarget",
157
]
158