Passed
Push — dependabot/pip/pyarrow-4.0.1 ( ca09ce...b2836e )
by
unknown
02:18 queued 20s
created

ChemblTargetGraph._traverse()   B

Complexity

Conditions 8

Size

Total Lines 61
Code Lines 28

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
cc 8
eloc 28
nop 4
dl 0
loc 61
rs 7.3333
c 0
b 0
f 0

How to fix   Long Method   

Long Method

Small methods make your code easier to understand, in particular if combined with a good name. Besides, if your method is small, finding a good name is usually much easier.

For example, if you find yourself adding comments to a method's body, this is usually a good sign to extract the commented part to a new method, and use the comment as a starting point when coming up with a good name for this new method.

Commonly applied refactorings include:

1
from __future__ import annotations
0 ignored issues
show
introduced by
Missing module docstring
Loading history...
2
3
import abc
4
import enum
5
import re
6
from dataclasses import dataclass
7
from functools import total_ordering
8
from typing import Optional, Sequence, Set
9
from typing import Tuple as Tup
10
from typing import Type
11
12
from mandos.model.apis.chembl_api import ChemblApi
13
from mandos.model.apis.chembl_support.chembl_targets import ChemblTarget, TargetFactory, TargetType
14
15
16
@dataclass(frozen=True, order=True, repr=True)
17
class TargetNode:
18
    """
19
    A target with information about how we reached it from a traversal.
20
21
    Attributes:
22
        depth: The number of steps taken to get here, with 0 for the root
23
        is_end: If there was no edge to follow from here (that we hadn't already visited)
24
        target: Our target
25
        link_reqs: The set of requirements for the link that we matched to get here
26
        origin: The parent of our target node
27
    """
28
29
    depth: int
30
    is_end: bool
31
    target: ChemblTarget
32
    link_reqs: Optional[TargetEdgeReqs]
33
    origin: Optional[TargetNode]
34
35
    @property
36
    def is_start(self) -> bool:
0 ignored issues
show
introduced by
Missing function or method docstring
Loading history...
37
        return self.depth == 0
38
39
40
class AbstractTargetEdgeReqs(metaclass=abc.ABCMeta):
41
    """
42
    A set of requirements for a (source, rel, dest) triple.
43
    This determines the edges we're allowed to follow in the graph.
44
    """
45
46
    def matches(
0 ignored issues
show
introduced by
Missing function or method docstring
Loading history...
47
        self,
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
48
        src: TargetNode,
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
49
        rel_type: TargetRelType,
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
50
        dest: TargetNode,
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
51
    ) -> bool:
52
        raise NotImplementedError()
53
54
55
@dataclass(frozen=True, order=True, repr=True)
56
class TargetEdgeReqs(AbstractTargetEdgeReqs):
57
    """
58
    A set of requirements for a (source, rel, dest) triple.
59
    This determines the edges we're allowed to follow in the graph.
60
    """
61
62
    src_type: TargetType
63
    src_pattern: Optional[re.Pattern]
64
    rel_type: TargetRelType
65
    dest_type: TargetType
66
    dest_pattern: Optional[re.Pattern]
67
68
    @classmethod
69
    def cross(
70
        cls,
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
71
        source_types: Set[TargetType],
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
72
        rel_types: Set[TargetRelType],
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
73
        dest_types: Set[TargetType],
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
74
    ) -> Set[TargetEdgeReqs]:
75
        """
76
        Returns a "cross-product" over the three types.
77
        Note that none will contain text patterns.
78
79
        Args:
80
            source_types:
81
            rel_types:
82
            dest_types:
83
84
        Returns:
85
86
        """
87
        st = set()
0 ignored issues
show
Coding Style Naming introduced by
Variable name "st" doesn't conform to snake_case naming style ('([^\\W\\dA-Z][^\\WA-Z]2,|_[^\\WA-Z]*|__[^\\WA-Z\\d_][^\\WA-Z]+__)$' pattern)

This check looks for invalid names for a range of different identifiers.

You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements.

If your project includes a Pylint configuration file, the settings contained in that file take precedence.

To find out more about Pylint, please refer to their site.

Loading history...
88
        for source in source_types:
89
            for rel in rel_types:
90
                for dest in dest_types:
91
                    st.add(
92
                        TargetEdgeReqs(
93
                            src_type=source,
94
                            src_pattern=None,
95
                            rel_type=rel,
96
                            dest_type=dest,
97
                            dest_pattern=None,
98
                        )
99
                    )
100
        return st
101
102
    def matches(
103
        self,
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
104
        src: TargetNode,
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
105
        rel_type: TargetRelType,
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
106
        dest: TargetNode,
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
107
    ) -> bool:
108
        """
109
        Determines whether a (source, rel, dest) triple matches this set of requirements.
110
        Args:
111
            src:
112
            rel_type:
113
            dest:
114
115
        Returns:
116
117
        """
118
        srcx = src.target
119
        destx = dest.target
120
        return (
121
            (
122
                self.src_pattern is None
123
                or (srcx.name is not None and self.src_pattern.fullmatch(srcx.name))
124
            )
125
            and (
126
                self.dest_pattern is None
127
                or (destx.name is not None and self.dest_pattern.fullmatch(destx.name))
128
            )
129
            and self.src_type == srcx.type
130
            and self.rel_type == rel_type
131
            and self.dest_type == destx.type
132
        )
133
134
135
class TargetRelType(enum.Enum):
136
    """
137
    A relationship between two targets.
138
139
    Types:
140
141
        - subset_of, superset_of, overlaps_with, and equivalent_to are actual types in ChEMBL.
142
        - any_link means any of the ChEMBL-defined types
143
        - self_link is an implicit link from any target to itself
144
    """
145
146
    subset_of = enum.auto()
147
    superset_of = enum.auto()
148
    overlaps_with = enum.auto()
149
    equivalent_to = enum.auto()
150
    any_link = enum.auto()
151
    self_link = enum.auto()
152
153
    @classmethod
154
    def of(cls, s: str) -> TargetRelType:
0 ignored issues
show
Coding Style Naming introduced by
Method name "of" doesn't conform to snake_case naming style ('([^\\W\\dA-Z][^\\WA-Z]2,|_[^\\WA-Z]*|__[^\\WA-Z\\d_][^\\WA-Z]+__)$' pattern)

This check looks for invalid names for a range of different identifiers.

You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements.

If your project includes a Pylint configuration file, the settings contained in that file take precedence.

To find out more about Pylint, please refer to their site.

Loading history...
introduced by
Missing function or method docstring
Loading history...
Coding Style Naming introduced by
Argument name "s" doesn't conform to snake_case naming style ('([^\\W\\dA-Z][^\\WA-Z]2,|_[^\\WA-Z]*|__[^\\WA-Z\\d_][^\\WA-Z]+__)$' pattern)

This check looks for invalid names for a range of different identifiers.

You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements.

If your project includes a Pylint configuration file, the settings contained in that file take precedence.

To find out more about Pylint, please refer to their site.

Loading history...
155
        return TargetRelType[s.replace(" ", "_").replace("-", "_").lower()]
156
157
158
@total_ordering
159
class ChemblTargetGraph(metaclass=abc.ABCMeta):
160
    # noinspection PyUnresolvedReferences
161
    """
162
    A target from ChEMBL, from the ``target`` table.
163
    ChEMBL targets form a DAG via the ``target_relation`` table using links of type "SUPERSET OF" and "SUBSET OF".
0 ignored issues
show
Coding Style introduced by
This line is too long as per the coding-style (114/100).

This check looks for lines that are too long. You can specify the maximum line length.

Loading history...
164
    (There are additional link types ("OVERLAPS WITH", for ex), which we are ignoring.)
165
    For some receptors the DAG happens to be a tree. This is not true in general. See the GABAA receptor, for example.
0 ignored issues
show
Coding Style introduced by
This line is too long as per the coding-style (118/100).

This check looks for lines that are too long. You can specify the maximum line length.

Loading history...
166
    To fetch a target, use the ``find`` factory method.
167
    """
168
169
    def __init__(self, node: TargetNode):
170
        if not isinstance(node, TargetNode):
171
            raise TypeError(f"Bad type {type(node)} for {node}")
172
        self.node = node
173
174
    def __repr__(self):
175
        return f"{self.__class__.__name__}({self.node})"
176
177
    def __str__(self):
178
        return f"{self.__class__.__name__}({self.node})"
179
180
    def __hash__(self):
181
        return hash(self.node)
182
183
    def __eq__(self, target):
184
        if not isinstance(target, ChemblTargetGraph):
185
            raise TypeError(f"Bad type {type(target)} for {target}")
186
        return self.node == target.node
187
188
    def __lt__(self, target):
189
        if not isinstance(target, ChemblTargetGraph):
190
            raise TypeError(f"Bad type {type(target)} for {target}")
191
        return self.node.__lt__(target.node)
192
193
    @classmethod
194
    def at_node(cls, target: TargetNode) -> ChemblTargetGraph:
0 ignored issues
show
introduced by
Missing function or method docstring
Loading history...
195
        if not isinstance(target, TargetNode):
196
            raise TypeError(f"Bad type {type(target)} for {target}")
197
        return cls(target)
198
199
    @classmethod
200
    def at_target(cls, target: ChemblTarget) -> ChemblTargetGraph:
0 ignored issues
show
introduced by
Missing function or method docstring
Loading history...
201
        # lie and fill in None -- we don't know because we haven't traversed
202
        if not isinstance(target, ChemblTarget):
203
            raise TypeError(f"Bad type {type(target)} for {target}")
204
        # noinspection PyTypeChecker
205
        return cls(TargetNode(0, None, target, None, None))
206
207
    @classmethod
208
    def api(cls) -> ChemblApi:
209
        """
210
211
        Returns:
212
213
        """
214
        raise NotImplementedError()
215
216
    @classmethod
217
    def factory(cls) -> TargetFactory:
218
        """
219
220
        Returns:
221
222
        """
223
        raise NotImplementedError()
224
225
    @property
226
    def target(self) -> ChemblTarget:
0 ignored issues
show
introduced by
Missing function or method docstring
Loading history...
227
        return self.node.target
228
229
    @property
230
    def chembl(self) -> str:
0 ignored issues
show
introduced by
Missing function or method docstring
Loading history...
231
        return self.target.chembl
232
233
    @property
234
    def name(self) -> Optional[str]:
0 ignored issues
show
introduced by
Missing function or method docstring
Loading history...
235
        return self.target.name
236
237
    @property
238
    def type(self) -> TargetType:
0 ignored issues
show
introduced by
Missing function or method docstring
Loading history...
239
        return self.target.type
240
241
    def links(
242
        self, rel_types: Set[TargetRelType]
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
243
    ) -> Sequence[Tup[ChemblTargetGraph, TargetRelType]]:
244
        """
245
        Gets adjacent targets in the graph.
246
247
        Args:
248
            rel_types: Relationship types (e.g. "superset of") to include
249
                       If ``TargetRelType.self_link`` is included, will add a single self-link
250
251
        Returns:
252
        """
253
        api = self.__class__.api()
254
        relations = api.target_relation.filter(target_chembl_id=self.target.chembl)
255
        links = []
256
        # "subset" means "up" (it's reversed from what's on the website)
257
        for superset in relations:
258
            linked_id = superset["related_target_chembl_id"]
259
            rel_type = TargetRelType.of(superset["relationship"])
260
            if rel_type in rel_types or TargetRelType.any_link in rel_types:
261
                linked_target = self.__class__.at_target(self.factory().find(linked_id))
262
                links.append((linked_target, rel_type))
263
        # we need to add self-links separately
264
        if TargetRelType.self_link in rel_types:
265
            links.append(
266
                (self.at_target(self.factory().find(self.target.chembl)), TargetRelType.self_link)
267
            )
268
        return sorted(links)
269
270
    def traverse(self, permitting: Set[TargetEdgeReqs]) -> Set[TargetNode]:
271
        """
272
        Traverses the DAG from this node, hopping only to targets with type in the given set.
273
274
        Args:
275
            permitting: The set of target types we're allowed to follow links onto
276
277
        Returns:
278
            The targets in the set, in a breadth-first order (then sorted by CHEMBL ID)
279
            The int is the depth, starting at 0 (this protein), going to +inf for the highest ancestors
0 ignored issues
show
Coding Style introduced by
This line is too long as per the coding-style (103/100).

This check looks for lines that are too long. You can specify the maximum line length.

Loading history...
280
        """
281
        results: Set[TargetNode] = set()
282
        # purposely use the invalid value None for is_root
283
        # noinspection PyTypeChecker
284
        self._traverse(TargetNode(0, None, self, None, None), permitting, results)
285
        if any((x.is_end is None for x in results)):
0 ignored issues
show
Comprehensibility Best Practice introduced by
The variable x does not seem to be defined.
Loading history...
286
            raise AssertionError()
287
        return results
288
289
    @classmethod
290
    def _traverse(
291
        cls, source: TargetNode, permitting: Set[TargetEdgeReqs], results: Set[TargetNode]
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
292
    ) -> None:
293
        # recursive method called from traverse
294
        # this got really complex
295
        # basically, we just want to:
296
        # for each link (relationship) to another target:
297
        # for every allowed link type (DagTargetLinkType), try:
298
        # if the link type is acceptable, add the found target and associated link type, and break
299
        # all good if we've already traversed this
300
        if source.target.chembl in {s.target.chembl for s in results}:
301
            return
302
        # find all links from ChEMBL, then filter to only the valid links
303
        # do not traverse yet -- we just want to find these links
304
        link_candidates = cls.at_node(source).links({q.rel_type for q in permitting})
305
        links = []
306
        for linked_target, rel_type in link_candidates:
307
            # try out all of the link types that could match
308
            # record ALL of the ones that matched, even for duplicate targets
309
            # that's because the caller might care about the edge type that matched, not just the dest target
0 ignored issues
show
Coding Style introduced by
This line is too long as per the coding-style (109/100).

This check looks for lines that are too long. You can specify the maximum line length.

Loading history...
310
            # The caller might also care about the src target
311
            for permitted in permitting:
312
                if permitted.matches(
313
                    src=source,
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
314
                    rel_type=rel_type,
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
315
                    dest=linked_target.node,
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
316
                ):
317
                    link_type = TargetEdgeReqs(
318
                        src_type=source.target.type,
319
                        src_pattern=permitted.src_pattern,
320
                        rel_type=rel_type,
321
                        dest_type=linked_target.type,
322
                        dest_pattern=permitted.dest_pattern,
323
                    )
324
                    # purposely use the invalid value None for is_root
325
                    # noinspection PyTypeChecker
326
                    linked = TargetNode(source.depth + 1, None, linked_target, link_type, source)
327
                    links.append(linked)
328
                    # now add a self-link
329
                    # don't worry -- we'll make sure not to traverse it
330
        # now, we'll add our own (breadth-first, remember)
331
        # we know whether we're at an "end" node by whether we found any links
332
        # note that this is an invariant of the node (and permitted link types): it doesn't depend on traversal order
0 ignored issues
show
Coding Style introduced by
This line is too long as per the coding-style (117/100).

This check looks for lines that are too long. You can specify the maximum line length.

Loading history...
333
        is_at_end = len(links) == 0
334
        # this is BASICALLY the same as ``results.add(source)``:
335
        # the only difference is we NOW know whether we're at the end (there's nowhere to go from there)
0 ignored issues
show
Coding Style introduced by
This line is too long as per the coding-style (104/100).

This check looks for lines that are too long. You can specify the maximum line length.

Loading history...
336
        # (we had no idea before checking all of its children)
337
        # source.origin is the parent DagTarget OF source; it's None *iff* this is the root (``self`` in ``traverse``)
0 ignored issues
show
Coding Style introduced by
This line is too long as per the coding-style (118/100).

This check looks for lines that are too long. You can specify the maximum line length.

Loading history...
338
        final_origin_target = TargetNode(
339
            source.depth, is_at_end, source.target, source.link_reqs, source.origin
340
        )
341
        results.add(final_origin_target)
342
        # alright! now traverse on the links
343
        for link in links:
344
            # this check is needed
345
            # otherwise we can go superset --- subset --- superset ---
346
            # or just --- overlaps with --- overlaps with ---
347
            # obviously also don't traverse self-links
348
            if link not in results and link.link_reqs.rel_type is not TargetRelType.self_link:
349
                cls._traverse(link, permitting, results)
350
        # we've added: ``source``, and then each of its children (with recursion)
351
        # we're done now
352
353
354
class ChemblTargetGraphFactory:
0 ignored issues
show
introduced by
Missing class docstring
Loading history...
355
    def __init__(self, graph_type: Type[ChemblTargetGraph]):
356
        self.graph_type = graph_type
357
358
    @classmethod
359
    def create(cls, api: ChemblApi, target_factory: TargetFactory) -> ChemblTargetGraphFactory:
0 ignored issues
show
introduced by
Missing function or method docstring
Loading history...
360
        class CreatedChemblTargetGraph(ChemblTargetGraph):
0 ignored issues
show
introduced by
Missing class docstring
Loading history...
361
            @classmethod
362
            def api(cls) -> ChemblApi:
363
                return api
364
365
            @classmethod
366
            def factory(cls) -> TargetFactory:
367
                return target_factory
368
369
        return ChemblTargetGraphFactory(CreatedChemblTargetGraph)
370
371
    def at_node(self, target: TargetNode) -> ChemblTargetGraph:
0 ignored issues
show
introduced by
Missing function or method docstring
Loading history...
372
        return self.graph_type.at_node(target)
373
374
    def at_target(self, target: ChemblTarget) -> ChemblTargetGraph:
0 ignored issues
show
introduced by
Missing function or method docstring
Loading history...
375
        # lie and fill in None -- we don't know because we haven't traversed
376
        # noinspection PyTypeChecker
377
        return self.graph_type.at_target(target)
378
379
380
__all__ = [
381
    "TargetNode",
382
    "TargetRelType",
383
    "TargetEdgeReqs",
384
    "ChemblTargetGraph",
385
    "ChemblTargetGraphFactory",
386
]
387