mandos.model.targets   A
last analyzed

Complexity

Total Complexity 26

Size/Duplication

Total Lines 284
Duplicated Lines 0 %

Importance

Changes 0
Metric Value
eloc 166
dl 0
loc 284
rs 10
c 0
b 0
f 0
wmc 26

14 Methods

Rating   Name   Duplication   Size   Complexity  
A TargetType.of() 0 8 2
A DagTargetLinkType.cross() 0 13 4
A TargetType.protein_types() 0 3 1
A TargetFactory.find() 0 20 1
A TargetType.is_strange() 0 12 1
A TargetType.all_types() 0 3 1
A Target.find() 0 20 2
A TargetType.is_unknown() 0 3 1
A Target.links() 0 21 3
A Target.traverse() 0 16 1
A TargetType.is_protein() 0 7 1
A Target.api() 0 8 1
A TargetRelationshipType.of() 0 3 1
B Target._traverse() 0 29 6
1
"""
2
Model of ChEMBL targets and a hierarchy between them as a directed acyclic graph (DAG).
3
"""
4
from __future__ import annotations
5
6
import abc
7
import enum
8
import logging
9
from dataclasses import dataclass
10
from typing import Optional, Sequence, Set
11
from typing import Tuple as Tup
12
13
from urllib3.util.retry import MaxRetryError
0 ignored issues
show
introduced by
Unable to import 'urllib3.util.retry'
Loading history...
14
from pocketutils.core.dot_dict import NestedDotDict
0 ignored issues
show
introduced by
Unable to import 'pocketutils.core.dot_dict'
Loading history...
15
16
from mandos.chembl_api import ChemblApi
17
18
logger = logging.getLogger(__package__)
19
20
21
class TargetNotFoundError(ValueError):
0 ignored issues
show
Documentation introduced by
Empty class docstring
Loading history...
22
    """"""
23
24
25
class TargetType(enum.Enum):
26
    """
27
    Enum corresponding to the ChEMBL API field ``target.target_type``.
28
    """
29
30
    single_protein = enum.auto()
31
    protein_family = enum.auto()
32
    protein_complex = enum.auto()
33
    protein_complex_group = enum.auto()
34
    selectivity_group = enum.auto()
35
    protein_protein_interaction = enum.auto()
36
    nucleic_acid = enum.auto()
37
    chimeric_protein = enum.auto()
38
    protein_nucleic_acid_complex = enum.auto()
39
    metal = enum.auto()
40
    small_molecule = enum.auto()
41
    subcellular = enum.auto()
42
    unknown = enum.auto()
43
44
    @classmethod
45
    def of(cls, s: str) -> TargetType:
0 ignored issues
show
Coding Style Naming introduced by
Method name "of" doesn't conform to snake_case naming style ('([^\\W\\dA-Z][^\\WA-Z]2,|_[^\\WA-Z]*|__[^\\WA-Z\\d_][^\\WA-Z]+__)$' pattern)

This check looks for invalid names for a range of different identifiers.

You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements.

If your project includes a Pylint configuration file, the settings contained in that file take precedence.

To find out more about Pylint, please refer to their site.

Loading history...
Coding Style Naming introduced by
Argument name "s" doesn't conform to snake_case naming style ('([^\\W\\dA-Z][^\\WA-Z]2,|_[^\\WA-Z]*|__[^\\WA-Z\\d_][^\\WA-Z]+__)$' pattern)

This check looks for invalid names for a range of different identifiers.

You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements.

If your project includes a Pylint configuration file, the settings contained in that file take precedence.

To find out more about Pylint, please refer to their site.

Loading history...
introduced by
Missing function or method docstring
Loading history...
46
        key = s.replace(" ", "_").replace("-", "_").lower()
47
        try:
48
            return TargetType[key]
49
        except KeyError:
50
            logger.error(f"Target type {key} not found. Using TargetType.unknown.")
0 ignored issues
show
introduced by
Use lazy % formatting in logging functions
Loading history...
51
            return TargetType.unknown
52
53
    @classmethod
54
    def protein_types(cls) -> Set[TargetType]:
0 ignored issues
show
introduced by
Missing function or method docstring
Loading history...
55
        return {s for s in cls if s.is_protein}
56
57
    @classmethod
58
    def all_types(cls) -> Set[TargetType]:
0 ignored issues
show
introduced by
Missing function or method docstring
Loading history...
59
        return set(TargetType)  # here for symmetry
60
61
    @property
62
    def is_protein(self) -> bool:
0 ignored issues
show
introduced by
Missing function or method docstring
Loading history...
63
        return self in {
64
            TargetType.single_protein,
65
            TargetType.protein_family,
66
            TargetType.protein_complex,
67
            TargetType.protein_complex_group,
68
        }
69
70
    @property
71
    def is_unknown(self) -> bool:
0 ignored issues
show
introduced by
Missing function or method docstring
Loading history...
72
        return self == TargetType.unknown
73
74
    @property
75
    def is_strange(self) -> bool:
0 ignored issues
show
introduced by
Missing function or method docstring
Loading history...
76
        return self in {
77
            TargetType.selectivity_group,
78
            TargetType.protein_protein_interaction,
79
            TargetType.nucleic_acid,
80
            TargetType.chimeric_protein,
81
            TargetType.metal,
82
            TargetType.small_molecule,
83
            TargetType.subcellular,
84
            TargetType.protein_nucleic_acid_complex,
85
            TargetType.unknown,
86
        }
87
88
89
class TargetRelationshipType(enum.Enum):
0 ignored issues
show
introduced by
Missing class docstring
Loading history...
90
    subset_of = enum.auto()
91
    superset_of = enum.auto()
92
    overlaps_with = enum.auto()
93
    equivalent_to = enum.auto()
94
95
    @classmethod
96
    def of(cls, s: str) -> TargetRelationshipType:
0 ignored issues
show
Coding Style Naming introduced by
Method name "of" doesn't conform to snake_case naming style ('([^\\W\\dA-Z][^\\WA-Z]2,|_[^\\WA-Z]*|__[^\\WA-Z\\d_][^\\WA-Z]+__)$' pattern)

This check looks for invalid names for a range of different identifiers.

You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements.

If your project includes a Pylint configuration file, the settings contained in that file take precedence.

To find out more about Pylint, please refer to their site.

Loading history...
Coding Style Naming introduced by
Argument name "s" doesn't conform to snake_case naming style ('([^\\W\\dA-Z][^\\WA-Z]2,|_[^\\WA-Z]*|__[^\\WA-Z\\d_][^\\WA-Z]+__)$' pattern)

This check looks for invalid names for a range of different identifiers.

You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements.

If your project includes a Pylint configuration file, the settings contained in that file take precedence.

To find out more about Pylint, please refer to their site.

Loading history...
introduced by
Missing function or method docstring
Loading history...
97
        return TargetRelationshipType[s.replace(" ", "_").replace("-", "_").lower()]
98
99
100
@dataclass(frozen=True, order=True, repr=True)
0 ignored issues
show
introduced by
Missing class docstring
Loading history...
101
class DagTargetLinkType:
102
    source_type: TargetType
103
    rel_type: TargetRelationshipType
104
    dest_type: TargetType
105
106
    @classmethod
107
    def cross(
0 ignored issues
show
introduced by
Missing function or method docstring
Loading history...
108
        cls,
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
109
        source_types: Set[TargetType],
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
110
        rel_types: Set[TargetRelationshipType],
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
111
        dest_types: Set[TargetType],
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
112
    ) -> Set[DagTargetLinkType]:
113
        st = set()
0 ignored issues
show
Coding Style Naming introduced by
Variable name "st" doesn't conform to snake_case naming style ('([^\\W\\dA-Z][^\\WA-Z]2,|_[^\\WA-Z]*|__[^\\WA-Z\\d_][^\\WA-Z]+__)$' pattern)

This check looks for invalid names for a range of different identifiers.

You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements.

If your project includes a Pylint configuration file, the settings contained in that file take precedence.

To find out more about Pylint, please refer to their site.

Loading history...
114
        for source in source_types:
115
            for rel in rel_types:
116
                for dest in dest_types:
117
                    st.add(DagTargetLinkType(source, rel, dest))
118
        return st
119
120
121
@dataclass(frozen=True, order=True, repr=True)
0 ignored issues
show
introduced by
Missing class docstring
Loading history...
122
class DagTarget:
123
    depth: int
124
    is_end: bool
125
    target: Target
126
    link_type: Optional[DagTargetLinkType]
127
128
129
@dataclass(frozen=True, order=True, repr=True)
130
class Target(metaclass=abc.ABCMeta):
131
    """
132
    A target from ChEMBL, from the ``target`` table.
133
    ChEMBL targets form a DAG via the ``target_relation`` table using links of type "SUPERSET OF" and "SUBSET OF".
0 ignored issues
show
Coding Style introduced by
This line is too long as per the coding-style (114/100).

This check looks for lines that are too long. You can specify the maximum line length.

Loading history...
134
    (There are additional link types ("OVERLAPS WITH", for ex), which we are ignoring.)
135
    For some receptors the DAG happens to be a tree. This is not true in general. See the GABAA receptor, for example.
0 ignored issues
show
Coding Style introduced by
This line is too long as per the coding-style (118/100).

This check looks for lines that are too long. You can specify the maximum line length.

Loading history...
136
    To fetch a target, use the ``find`` factory method.
137
138
    Attributes:
139
        chembl: The CHEMBL ID, starting with 'CHEMBL'
140
        name: The preferred name (``pref_target_name``)
141
        type: From the ``target_type`` ChEMBL field
142
    """
143
144
    chembl: str
145
    name: Optional[str]
146
    type: TargetType
147
148
    @classmethod
149
    def api(cls) -> ChemblApi:
150
        """
151
152
        Returns:
153
154
        """
155
        raise NotImplementedError()
156
157
    @classmethod
158
    def find(cls, chembl: str) -> Target:
159
        """
160
161
        Args:
162
            chembl:
163
164
        Returns:
165
166
        """
167
        try:
168
            targets = cls.api().target.filter(target_chembl_id=chembl)
169
        except MaxRetryError:
170
            raise TargetNotFoundError(f"Failed to find target {chembl}")
171
        assert len(targets) == 1, f"Found {len(targets)} targets for {chembl}"
172
        target = NestedDotDict(targets[0])
173
        return cls(
174
            chembl=target["target_chembl_id"],
175
            name=target.get("pref_name"),
176
            type=TargetType.of(target["target_type"]),
177
        )
178
179
    def links(
180
        self, rel_types: Set[TargetRelationshipType]
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
181
    ) -> Sequence[Tup[Target, TargetRelationshipType]]:
182
        """
183
        Gets adjacent targets in the DAG.
184
185
        Args:
186
            rel_types:
187
188
        Returns:
189
        """
190
        relations = self.__class__.api().target_relation.filter(target_chembl_id=self.chembl)
191
        links = []
192
        # "subset" means "up" (it's reversed from what's on the website)
193
        for superset in relations:
194
            linked_id = superset["related_target_chembl_id"]
195
            rel_type = TargetRelationshipType.of(superset["relationship"])
196
            if rel_type in rel_types:
197
                linked_target = self.find(linked_id)
198
                links.append((linked_target, rel_type))
199
        return sorted(links)
200
201
    def traverse(self, permitting: Set[DagTargetLinkType]) -> Set[DagTarget]:
202
        """
203
        Traverses the DAG from this node, hopping only to targets with type in the given set.
204
205
        Args:
206
            permitting: The set of target types we're allowed to follow links onto
207
208
        Returns:
209
            The targets in the set, in a breadth-first order (then sorted by CHEMBL ID)
210
            The int is the depth, starting at 0 (this protein), going to +inf for the highest ancestors
0 ignored issues
show
Coding Style introduced by
This line is too long as per the coding-style (103/100).

This check looks for lines that are too long. You can specify the maximum line length.

Loading history...
211
        """
212
        results = set()
213
        # purposely use the invalid value None for is_root
214
        self._traverse(DagTarget(0, None, self, None), permitting, results)
215
        assert not any((x.is_end is None for x in results))
0 ignored issues
show
Comprehensibility Best Practice introduced by
The variable x does not seem to be defined.
Loading history...
216
        return results
217
218
    @classmethod
219
    def _traverse(
220
        cls, source: DagTarget, permitting: Set[DagTargetLinkType], results: Set[DagTarget]
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
221
    ) -> None:
222
        # all good if we've already traversed this
223
        if source.target.chembl in {s.target.chembl for s in results}:
224
            return
225
        # find all links from ChEMBL, then filter to only the valid links
226
        # do not traverse yet -- we just want to find these links
227
        link_candidates = source.target.links({q.rel_type for q in permitting})
228
        links = []
229
        for link, rel_type in link_candidates:
230
            link_type = DagTargetLinkType(source.target.type, rel_type, link.type)
231
            if link_type in permitting:
232
                # purposely use the invalid value None for is_root
233
                linked = DagTarget(source.depth + 1, None, link, link_type)
234
                links.append(linked)
235
        # now, we'll add our own (breadth-first, remember)
236
        # we know whether we're at an "end" node by whether we found any links
237
        # note that this is an invariant of the node (and permitted link types): it doesn't depend on traversal order
0 ignored issues
show
Coding Style introduced by
This line is too long as per the coding-style (117/100).

This check looks for lines that are too long. You can specify the maximum line length.

Loading history...
238
        is_at_end = len(links) == 0
239
        results.add(DagTarget(source.depth, is_at_end, source.target, source.link_type))
240
        # alright! now traverse on the links
241
        for link in links:
242
            # this check is needed
243
            # otherwise we can go superset --- subset --- superset ---
244
            # or just --- overlaps with --- overlaps with ---
245
            if link not in results:
246
                cls._traverse(link, permitting, results)
247
248
249
class TargetFactory:
250
    """
251
    Factory for ``Target`` that injects a ``ChemblApi``.
252
    """
253
254
    @classmethod
255
    def find(cls, chembl: str, api: ChemblApi) -> Target:
256
        """
257
258
        Args:
259
            chembl:
260
            api:
261
262
        Returns:
263
            A ``Target`` instance from a newly created subclass of that class
264
        """
265
266
        @dataclass(frozen=True, order=True, repr=True)
267
        class _Target(Target):
268
            @classmethod
269
            def api(cls) -> ChemblApi:
270
                return api
271
272
        _Target.__name__ = "Target:" + chembl
273
        return _Target.find(chembl)
274
275
276
__all__ = [
277
    "TargetType",
278
    "TargetRelationshipType",
279
    "Target",
280
    "DagTarget",
281
    "TargetFactory",
282
    "DagTargetLinkType",
283
    "TargetNotFoundError",
284
]
285