Passed
Push — dependabot/pip/flake8-bugbear-... ( 93dece...8d4b2b )
by
unknown
01:27
created

ChemblTargetGraph.factory()   A

Complexity

Conditions 1

Size

Total Lines 8
Code Lines 3

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
cc 1
eloc 3
nop 1
dl 0
loc 8
rs 10
c 0
b 0
f 0
1
from __future__ import annotations
0 ignored issues
show
introduced by
Missing module docstring
Loading history...
2
import abc
3
import enum
4
import re
5
from dataclasses import dataclass
6
from typing import Optional, Set, Sequence, Tuple as Tup, Type
7
8
from mandos.model.chembl_api import ChemblApi
9
from mandos.model.chembl_support.chembl_targets import ChemblTarget, TargetType, TargetFactory
10
11
12
@dataclass(frozen=True, order=True, repr=True)
13
class TargetNode:
14
    """
15
    A target with information about how we reached it from a traversal.
16
17
    Attributes:
18
        depth: The number of steps taken to get here, with 0 for the root
19
        is_end: If there was no edge to follow from here (that we hadn't already visited)
20
        target: Our target
21
        link_reqs: The set of requirements for the link that we matched to get here
22
        origin: The parent of our target node
23
    """
24
25
    depth: int
26
    is_end: bool
27
    target: ChemblTarget
28
    link_reqs: Optional[TargetEdgeReqs]
29
    origin: Optional[TargetNode]
30
31
    @property
32
    def is_start(self) -> bool:
0 ignored issues
show
introduced by
Missing function or method docstring
Loading history...
33
        return self.depth == 0
34
35
36
class AbstractTargetEdgeReqs(metaclass=abc.ABCMeta):
37
    """
38
    A set of requirements for a (source, rel, dest) triple.
39
    This determines the edges we're allowed to follow in the graph.
40
    """
41
42
    def matches(
0 ignored issues
show
introduced by
Missing function or method docstring
Loading history...
43
        self,
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
44
        src: TargetNode,
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
45
        rel_type: TargetRelType,
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
46
        dest: TargetNode,
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
47
    ) -> bool:
48
        raise NotImplementedError()
49
50
51
@dataclass(frozen=True, order=True, repr=True)
52
class TargetEdgeReqs(AbstractTargetEdgeReqs):
53
    """
54
    A set of requirements for a (source, rel, dest) triple.
55
    This determines the edges we're allowed to follow in the graph.
56
    """
57
58
    src_type: TargetType
59
    src_pattern: Optional[re.Pattern]
60
    rel_type: TargetRelType
61
    dest_type: TargetType
62
    dest_pattern: Optional[re.Pattern]
63
64
    @classmethod
65
    def cross(
66
        cls,
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
67
        source_types: Set[TargetType],
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
68
        rel_types: Set[TargetRelType],
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
69
        dest_types: Set[TargetType],
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
70
    ) -> Set[TargetEdgeReqs]:
71
        """
72
        Returns a "cross-product" over the three types.
73
        Note that none will contain text patterns.
74
75
        Args:
76
            source_types:
77
            rel_types:
78
            dest_types:
79
80
        Returns:
81
82
        """
83
        st = set()
0 ignored issues
show
Coding Style Naming introduced by
Variable name "st" doesn't conform to snake_case naming style ('([^\\W\\dA-Z][^\\WA-Z]2,|_[^\\WA-Z]*|__[^\\WA-Z\\d_][^\\WA-Z]+__)$' pattern)

This check looks for invalid names for a range of different identifiers.

You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements.

If your project includes a Pylint configuration file, the settings contained in that file take precedence.

To find out more about Pylint, please refer to their site.

Loading history...
84
        for source in source_types:
85
            for rel in rel_types:
86
                for dest in dest_types:
87
                    st.add(
88
                        TargetEdgeReqs(
89
                            src_type=source,
90
                            src_pattern=None,
91
                            rel_type=rel,
92
                            dest_type=dest,
93
                            dest_pattern=None,
94
                        )
95
                    )
96
        return st
97
98
    def matches(
99
        self,
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
100
        src: TargetNode,
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
101
        rel_type: TargetRelType,
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
102
        dest: TargetNode,
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
103
    ) -> bool:
104
        """
105
        Determines whether a (source, rel, dest) triple matches this set of requirements.
106
        Args:
107
            src:
108
            rel_type:
109
            dest:
110
111
        Returns:
112
113
        """
114
        srcx = src.target
115
        destx = dest.target
116
        return (
117
            (
118
                self.src_pattern is None
119
                or (srcx.name is not None and self.src_pattern.fullmatch(srcx.name))
120
            )
121
            and (
122
                self.dest_pattern is None
123
                or (destx.name is not None and self.dest_pattern.fullmatch(destx.name))
124
            )
125
            and self.src_type == srcx.type
126
            and self.rel_type == rel_type
127
            and self.dest_type == destx.type
128
        )
129
130
131
class TargetRelType(enum.Enum):
132
    """
133
    A relationship between two targets.
134
135
    Types:
136
137
        - subset_of, superset_of, overlaps_with, and equivalent_to are actual types in ChEMBL.
138
        - any_link means any of the ChEMBL-defined types
139
        - self_link is an implicit link from any target to itself
140
    """
141
142
    subset_of = enum.auto()
143
    superset_of = enum.auto()
144
    overlaps_with = enum.auto()
145
    equivalent_to = enum.auto()
146
    any_link = enum.auto()
147
    self_link = enum.auto()
148
149
    @classmethod
150
    def of(cls, s: str) -> TargetRelType:
0 ignored issues
show
introduced by
Missing function or method docstring
Loading history...
Coding Style Naming introduced by
Method name "of" doesn't conform to snake_case naming style ('([^\\W\\dA-Z][^\\WA-Z]2,|_[^\\WA-Z]*|__[^\\WA-Z\\d_][^\\WA-Z]+__)$' pattern)

This check looks for invalid names for a range of different identifiers.

You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements.

If your project includes a Pylint configuration file, the settings contained in that file take precedence.

To find out more about Pylint, please refer to their site.

Loading history...
Coding Style Naming introduced by
Argument name "s" doesn't conform to snake_case naming style ('([^\\W\\dA-Z][^\\WA-Z]2,|_[^\\WA-Z]*|__[^\\WA-Z\\d_][^\\WA-Z]+__)$' pattern)

This check looks for invalid names for a range of different identifiers.

You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements.

If your project includes a Pylint configuration file, the settings contained in that file take precedence.

To find out more about Pylint, please refer to their site.

Loading history...
151
        return TargetRelType[s.replace(" ", "_").replace("-", "_").lower()]
152
153
154
class ChemblTargetGraph(metaclass=abc.ABCMeta):
155
    # noinspection PyUnresolvedReferences
156
    """
157
    A target from ChEMBL, from the ``target`` table.
158
    ChEMBL targets form a DAG via the ``target_relation`` table using links of type "SUPERSET OF" and "SUBSET OF".
0 ignored issues
show
Coding Style introduced by
This line is too long as per the coding-style (114/100).

This check looks for lines that are too long. You can specify the maximum line length.

Loading history...
159
    (There are additional link types ("OVERLAPS WITH", for ex), which we are ignoring.)
160
    For some receptors the DAG happens to be a tree. This is not true in general. See the GABAA receptor, for example.
0 ignored issues
show
Coding Style introduced by
This line is too long as per the coding-style (118/100).

This check looks for lines that are too long. You can specify the maximum line length.

Loading history...
161
    To fetch a target, use the ``find`` factory method.
162
    """
163
164
    def __init__(self, node: TargetNode):
165
        self.node = node
166
167
    @classmethod
168
    def at_node(cls, target: TargetNode) -> ChemblTargetGraph:
0 ignored issues
show
introduced by
Missing function or method docstring
Loading history...
169
        return cls(target)
170
171
    @classmethod
172
    def at_target(cls, target: ChemblTarget) -> ChemblTargetGraph:
0 ignored issues
show
introduced by
Missing function or method docstring
Loading history...
173
        # lie and fill in None -- we don't know because we haven't traversed
174
        # noinspection PyTypeChecker
175
        return cls(TargetNode(0, None, target, None, None))
176
177
    @classmethod
178
    def api(cls) -> ChemblApi:
179
        """
180
181
        Returns:
182
183
        """
184
        raise NotImplementedError()
185
186
    @classmethod
187
    def factory(cls) -> TargetFactory:
188
        """
189
190
        Returns:
191
192
        """
193
        raise NotImplementedError()
194
195
    @property
196
    def target(self) -> ChemblTarget:
0 ignored issues
show
introduced by
Missing function or method docstring
Loading history...
197
        return self.node.target
198
199
    @property
200
    def chembl(self) -> str:
0 ignored issues
show
introduced by
Missing function or method docstring
Loading history...
201
        return self.target.chembl
202
203
    @property
204
    def name(self) -> Optional[str]:
0 ignored issues
show
introduced by
Missing function or method docstring
Loading history...
205
        return self.target.name
206
207
    @property
208
    def type(self) -> TargetType:
0 ignored issues
show
introduced by
Missing function or method docstring
Loading history...
209
        return self.target.type
210
211
    def links(
212
        self, rel_types: Set[TargetRelType]
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
213
    ) -> Sequence[Tup[ChemblTargetGraph, TargetRelType]]:
214
        """
215
        Gets adjacent targets in the graph.
216
217
        Args:
218
            rel_types: Relationship types (e.g. "superset of") to include
219
                       If ``TargetRelType.self_link`` is included, will add a single self-link
220
221
        Returns:
222
        """
223
        api = self.__class__.api()
224
        relations = api.target_relation.filter(target_chembl_id=self.target.chembl)
225
        links = []
226
        # "subset" means "up" (it's reversed from what's on the website)
227
        for superset in relations:
228
            linked_id = superset["related_target_chembl_id"]
0 ignored issues
show
Unused Code introduced by
The variable linked_id seems to be unused.
Loading history...
229
            rel_type = TargetRelType.of(superset["relationship"])
230
            if rel_type in rel_types or TargetRelType.any_link in rel_types:
231
                linked_target = self.__class__(self.factory.find(self.api))
0 ignored issues
show
Bug introduced by
The Method factory does not seem to have a member named find.

This check looks for calls to members that are non-existent. These calls will fail.

The member could have been renamed or removed.

Loading history...
232
                links.append((linked_target, rel_type))
233
        # we need to add self-links separately
234
        if TargetRelType.self_link in rel_types:
235
            links.append((self.target.chembl, TargetRelType.self_link))
236
        return sorted(links)
237
238
    def traverse(self, permitting: Set[TargetEdgeReqs]) -> Set[TargetNode]:
239
        """
240
        Traverses the DAG from this node, hopping only to targets with type in the given set.
241
242
        Args:
243
            permitting: The set of target types we're allowed to follow links onto
244
245
        Returns:
246
            The targets in the set, in a breadth-first order (then sorted by CHEMBL ID)
247
            The int is the depth, starting at 0 (this protein), going to +inf for the highest ancestors
0 ignored issues
show
Coding Style introduced by
This line is too long as per the coding-style (103/100).

This check looks for lines that are too long. You can specify the maximum line length.

Loading history...
248
        """
249
        results = set()
250
        # purposely use the invalid value None for is_root
251
        # noinspection PyTypeChecker
252
        self._traverse(TargetNode(0, None, self, None, None), permitting, results)
253
        if any((x.is_end is None for x in results)):
0 ignored issues
show
Comprehensibility Best Practice introduced by
The variable x does not seem to be defined.
Loading history...
254
            raise AssertionError()
255
        return results
256
257
    @classmethod
258
    def _traverse(
259
        cls, source: TargetNode, permitting: Set[TargetEdgeReqs], results: Set[TargetNode]
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
260
    ) -> None:
261
        # recursive method called from traverse
262
        # this got really complex
263
        # basically, we just want to:
264
        # for each link (relationship) to another target:
265
        # for every allowed link type (DagTargetLinkType), try:
266
        # if the link type is acceptable, add the found target and associated link type, and break
267
        # all good if we've already traversed this
268
        if source.target.chembl in {s.target.chembl for s in results}:
269
            return
270
        # find all links from ChEMBL, then filter to only the valid links
271
        # do not traverse yet -- we just want to find these links
272
        link_candidates = cls.at_node(source).links({q.rel_type for q in permitting})
273
        links = []
274
        for linked_target, rel_type in link_candidates:
275
            # try out all of the link types that could match
276
            # record ALL of the ones that matched, even for duplicate targets
277
            # that's because the caller might care about the edge type that matched, not just the dest target
0 ignored issues
show
Coding Style introduced by
This line is too long as per the coding-style (109/100).

This check looks for lines that are too long. You can specify the maximum line length.

Loading history...
278
            # The caller might also care about the src target
279
            for permitted in permitting:
280
                if permitted.matches(
281
                    src=source,
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
282
                    rel_type=rel_type,
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
283
                    dest=linked_target.node,
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
284
                ):
285
                    link_type = TargetEdgeReqs(
286
                        src_type=source.target.type,
287
                        src_pattern=permitted.src_pattern,
288
                        rel_type=rel_type,
289
                        dest_type=linked_target.type,
290
                        dest_pattern=permitted.dest_pattern,
291
                    )
292
                    # purposely use the invalid value None for is_root
293
                    # noinspection PyTypeChecker
294
                    linked = TargetNode(source.depth + 1, None, linked_target, link_type, source)
295
                    links.append(linked)
296
                    # now add a self-link
297
                    # don't worry -- we'll make sure not to traverse it
298
        # now, we'll add our own (breadth-first, remember)
299
        # we know whether we're at an "end" node by whether we found any links
300
        # note that this is an invariant of the node (and permitted link types): it doesn't depend on traversal order
0 ignored issues
show
Coding Style introduced by
This line is too long as per the coding-style (117/100).

This check looks for lines that are too long. You can specify the maximum line length.

Loading history...
301
        is_at_end = len(links) == 0
302
        # this is BASICALLY the same as ``results.add(source)``:
303
        # the only difference is we NOW know whether we're at the end (there's nowhere to go from there)
0 ignored issues
show
Coding Style introduced by
This line is too long as per the coding-style (104/100).

This check looks for lines that are too long. You can specify the maximum line length.

Loading history...
304
        # (we had no idea before checking all of its children)
305
        # source.origin is the parent DagTarget OF source; it's None *iff* this is the root (``self`` in ``traverse``)
0 ignored issues
show
Coding Style introduced by
This line is too long as per the coding-style (118/100).

This check looks for lines that are too long. You can specify the maximum line length.

Loading history...
306
        final_origin_target = TargetNode(
307
            source.depth, is_at_end, source.target, source.link_reqs, source.origin
308
        )
309
        results.add(final_origin_target)
310
        # alright! now traverse on the links
311
        for link in links:
312
            # this check is needed
313
            # otherwise we can go superset --- subset --- superset ---
314
            # or just --- overlaps with --- overlaps with ---
315
            # obviously also don't traverse self-links
316
            if link not in results and link.link_reqs.rel_type is not TargetRelType.self_link:
317
                cls._traverse(link, permitting, results)
318
        # we've added: ``source``, and then each of its children (with recursion)
319
        # we're done now
320
321
322
class ChemblTargetGraphFactory:
0 ignored issues
show
introduced by
Missing class docstring
Loading history...
323
    def __init__(self, graph_type: Type[ChemblTargetGraph]):
324
        self.graph_type = graph_type
325
326
    @classmethod
327
    def create(cls, api: ChemblApi, target_factory: TargetFactory) -> ChemblTargetGraphFactory:
0 ignored issues
show
introduced by
Missing function or method docstring
Loading history...
328
        class CreatedChemblTargetGraph(ChemblTargetGraph):
0 ignored issues
show
introduced by
Missing class docstring
Loading history...
329
            @classmethod
330
            def api(cls) -> ChemblApi:
331
                return api
332
333
            @classmethod
334
            def factory(cls) -> TargetFactory:
335
                return target_factory
336
337
        return ChemblTargetGraphFactory(CreatedChemblTargetGraph)
338
339
    def at_node(self, target: TargetNode) -> ChemblTargetGraph:
0 ignored issues
show
introduced by
Missing function or method docstring
Loading history...
340
        return self.graph_type.at_node(target)
341
342
    def at_target(self, target: ChemblTarget) -> ChemblTargetGraph:
0 ignored issues
show
introduced by
Missing function or method docstring
Loading history...
343
        # lie and fill in None -- we don't know because we haven't traversed
344
        # noinspection PyTypeChecker
345
        return self.graph_type.at_target(target)
346
347
348
__all__ = [
349
    "TargetNode",
350
    "TargetRelType",
351
    "TargetEdgeReqs",
352
    "ChemblTargetGraph",
353
    "ChemblTargetGraphFactory",
354
]
355