Passed
Push — main ( 9ff912...d08a4e )
by Douglas
03:54
created

ProteinSearch._set_to_regex()   A

Complexity

Conditions 1

Size

Total Lines 2
Code Lines 2

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
cc 1
eloc 2
nop 2
dl 0
loc 2
rs 10
c 0
b 0
f 0
1
import abc
0 ignored issues
show
introduced by
Missing module docstring
Loading history...
2
import logging
3
import re
4
from dataclasses import dataclass
5
from typing import Sequence, TypeVar, Set, Union
6
7
from pocketutils.core.dot_dict import NestedDotDict
0 ignored issues
show
introduced by
Unable to import 'pocketutils.core.dot_dict'
Loading history...
8
9
from mandos.model.chembl_api import ChemblApi
10
from mandos.model.chembl_support import ChemblCompound
11
from mandos.model.chembl_support.chembl_targets import TargetFactory
12
from mandos.model.chembl_support.chembl_target_graphs import (
13
    ChemblTargetGraph,
14
    ChemblTargetGraphFactory,
15
)
16
from mandos.model.chembl_support.chembl_utils import ChemblUtils
17
from mandos.model.taxonomy import Taxonomy
18
from mandos.search.chembl import ChemblHit, ChemblSearch
19
from mandos.search.chembl.target_traversal import TargetTraversalStrategies, TargetTraversalStrategy
0 ignored issues
show
Unused Code introduced by
Unused TargetTraversalStrategy imported from mandos.search.chembl.target_traversal
Loading history...
20
21
logger = logging.getLogger("mandos")
22
23
24
@dataclass(frozen=True, order=True, repr=True)
25
class ProteinHit(ChemblHit, metaclass=abc.ABCMeta):
26
    """
27
    A protein target entry for a compound.
28
    """
29
30
    exact_target_id: str
31
32
33
H = TypeVar("H", bound=ProteinHit, covariant=True)
0 ignored issues
show
Coding Style Naming introduced by
Class name "H" doesn't conform to PascalCase naming style ('[^\\W\\da-z][^\\W_]+$' pattern)

This check looks for invalid names for a range of different identifiers.

You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements.

If your project includes a Pylint configuration file, the settings contained in that file take precedence.

To find out more about Pylint, please refer to their site.

Loading history...
34
35
36
class ProteinSearch(ChemblSearch[H], metaclass=abc.ABCMeta):
37
    """
38
    Abstract search.
39
    """
40
41
    def __init__(
0 ignored issues
show
best-practice introduced by
Too many arguments (6/5)
Loading history...
42
        self,
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
43
        key: str,
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
44
        api: ChemblApi,
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
45
        taxa: Sequence[Taxonomy],
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
46
        traversal_strategy: str,
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
47
        allowed_target_types: Set[str],
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
48
    ):
49
        super().__init__(key, api)
50
        self.taxa = taxa
51
        self.traversal_strategy = TargetTraversalStrategies.by_name(traversal_strategy, self.api)
52
        self.allowed_target_types = allowed_target_types
53
54
    def is_in_taxa(self, species: Union[int, str]) -> bool:
55
        """
56
        Returns true if the ChEMBL species is contained in any of our taxonomies.
57
        """
58
        return any((taxon.contains(species) for taxon in self.taxa))
0 ignored issues
show
Comprehensibility Best Practice introduced by
The variable taxon does not seem to be defined.
Loading history...
59
60
    def _set_to_regex(self, values) -> str:
0 ignored issues
show
Coding Style introduced by
This method could be written as a function/class method.

If a method does not access any attributes of the class, it could also be implemented as a function or static method. This can help improve readability. For example

class Foo:
    def some_method(self, x, y):
        return x + y;

could be written as

class Foo:
    @classmethod
    def some_method(cls, x, y):
        return x + y;
Loading history...
61
        return "(" + "|".join([f"(?:{re.escape(v)})" for v in values]) + ")"
62
63
    def query(self, parent_form: ChemblCompound) -> Sequence[NestedDotDict]:
0 ignored issues
show
introduced by
Missing function or method docstring
Loading history...
64
        raise NotImplementedError()
65
66
    def should_include(
67
        self, lookup: str, compound: ChemblCompound, data: NestedDotDict, target: ChemblTargetGraph
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
68
    ) -> bool:
69
        """
70
        Filter based on the returned (activity/mechanism) data.
71
        IGNORE filters about the target itself, including whether it's a valid target.
72
        Return True in these cases (better yet, don't check).
73
74
        Args:
75
            lookup:
76
            compound:
77
            data:
78
            target:
79
80
        Returns:
81
82
        """
83
        raise NotImplementedError()
84
85
    def to_hit(
86
        self,
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
87
        lookup: str,
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
88
        compound: ChemblCompound,
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
89
        data: NestedDotDict,
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
90
        best_target: ChemblTargetGraph,
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
91
    ) -> Sequence[H]:
92
        """
93
        Gets the desired data as a NestedDotDict from the data from a single element
94
        returned by ``api_endpoint.filter``.
95
        This MUST MATCH the constructor, EXCEPT for object_id and object_name,
96
        which come from traversal and should be added by ``ProteinSearch.to_hit`` (parameter ``best_target``).
0 ignored issues
show
Coding Style introduced by
This line is too long as per the coding-style (110/100).

This check looks for lines that are too long. You can specify the maximum line length.

Loading history...
97
98
        Turns the final data into ``H``.
99
        Note that this has a default behavior but could be overridden to split into multiple hits
100
        and/or to add additional attributes that might come from ``best_target``.
101
102
        Args:
103
            lookup:
104
            compound:
105
            data:
106
            best_target:
107
108
        Returns:
109
            A sequence of hits.
110
        """
111
        raise NotImplementedError()
112
113
    def find(self, lookup: str) -> Sequence[H]:
114
        """
115
116
        Args:
117
            lookup:
118
119
        Returns:
120
121
        """
122
        form = ChemblUtils(self.api).get_compound(lookup)
123
        results = self.query(form)
124
        hits = []
125
        for result in results:
126
            result = NestedDotDict(result)
127
            hits.extend(self.process(lookup, form, result))
128
        return hits
129
130
    def process(self, lookup: str, compound: ChemblCompound, data: NestedDotDict) -> Sequence[H]:
131
        """
132
133
        Args:
134
            lookup:
135
            compound:
136
            data:
137
138
        Returns:
139
140
        """
141
        if data.get("target_chembl_id") is None:
142
            logger.debug(f"target_chembl_id missing from mechanism '{data}' for compound {lookup}")
0 ignored issues
show
introduced by
Use lazy % formatting in logging functions
Loading history...
143
            return []
144
        chembl_id = data["target_chembl_id"]
145
        factory = TargetFactory(self.api)
146
        target_obj = factory.find(chembl_id)
147
        graph_factory = ChemblTargetGraphFactory.create(self.api, factory)
148
        graph = graph_factory.at_target(target_obj)
149
        if not self.should_include(lookup, compound, data, graph):
150
            return []
151
        # traverse() will return the source target if it's a non-traversable type (like DNA)
152
        # and the subclass decided whether to filter those
153
        # so don't worry about that here
154
        ancestors = self.traversal_strategy(graph)
155
        lst = []
156
        for ancestor in ancestors:
157
            lst.extend(self.to_hit(lookup, compound, data, ancestor))
158
        return lst
159
160
161
__all__ = ["ProteinHit", "ProteinSearch"]
162