Passed
Push — dependabot/pip/flake8-bugbear-... ( 22089b...82a4d5 )
by
unknown
01:29
created

ProteinSearch.is_in_taxa()   A

Complexity

Conditions 1

Size

Total Lines 5
Code Lines 2

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
cc 1
eloc 2
nop 2
dl 0
loc 5
rs 10
c 0
b 0
f 0
1
import abc
0 ignored issues
show
introduced by
Missing module docstring
Loading history...
2
import logging
3
from dataclasses import dataclass
4
from typing import Sequence, TypeVar, Mapping, Any
5
6
from pocketutils.core.dot_dict import NestedDotDict
0 ignored issues
show
introduced by
Unable to import 'pocketutils.core.dot_dict'
Loading history...
7
8
from mandos.model.chembl_api import ChemblApi
9
from mandos.model.chembl_support import ChemblCompound
10
from mandos.model.chembl_support.chembl_targets import TargetFactory
11
from mandos.model.chembl_support.chembl_target_graphs import (
12
    ChemblTargetGraph,
13
    ChemblTargetGraphFactory,
14
)
15
from mandos.model.chembl_support.chembl_utils import ChemblUtils
16
from mandos.model.taxonomy import Taxonomy
17
from mandos.search.chembl import ChemblHit, ChemblSearch
18
from mandos.search.chembl.target_traversal import TargetTraversalStrategies, TargetTraversalStrategy
19
20
logger = logging.getLogger("mandos")
21
22
23
@dataclass(frozen=True, order=True, repr=True)
24
class ProteinHit(ChemblHit, metaclass=abc.ABCMeta):
25
    """
26
    A protein target entry for a compound.
27
    """
28
29
30
H = TypeVar("H", bound=ProteinHit, covariant=True)
0 ignored issues
show
Coding Style Naming introduced by
Class name "H" doesn't conform to PascalCase naming style ('[^\\W\\da-z][^\\W_]+$' pattern)

This check looks for invalid names for a range of different identifiers.

You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements.

If your project includes a Pylint configuration file, the settings contained in that file take precedence.

To find out more about Pylint, please refer to their site.

Loading history...
31
32
33
class ProteinSearch(ChemblSearch[H], metaclass=abc.ABCMeta):
34
    """
35
    Abstract search.
36
    """
37
38
    def __init__(self, chembl_api: ChemblApi, taxa: Sequence[Taxonomy], traversal_strategy: str):
39
        super().__init__(chembl_api)
40
        self.taxa = taxa
41
        self._traversal_strategy = TargetTraversalStrategies.by_name(traversal_strategy, self.api)
42
43
    def get_params(self) -> Mapping[str, Any]:
0 ignored issues
show
introduced by
Missing function or method docstring
Loading history...
44
        # TODO not robust
0 ignored issues
show
Coding Style introduced by
TODO and FIXME comments should generally be avoided.
Loading history...
45
        return {
46
            key: value
47
            for key, value in vars(self).items()
48
            if not key.startswith("_") and key != "path"
49
        }
50
51
    def get_params_str(self) -> str:
0 ignored issues
show
introduced by
Missing function or method docstring
Loading history...
52
        return ", ".join([k + "=" + str(v) for k, v in self.get_params()])
53
54
    def is_in_taxa(self, species: str) -> bool:
55
        """
56
        Returns true if the ChEMBL species is contained in any of our taxonomies.
57
        """
58
        return any((taxon.contains(species) for taxon in self.taxa))
0 ignored issues
show
Comprehensibility Best Practice introduced by
The variable taxon does not seem to be defined.
Loading history...
59
60
    def __repr__(self) -> str:
61
        return self.__class__.__name__ + "(" + self.get_params_str() + ")"
62
63
    def __str__(self) -> str:
64
        return self.__class__.__name__ + "(" + self.get_params_str() + ")"
65
66
    def find_all(self, compounds: Sequence[str]) -> Sequence[H]:
0 ignored issues
show
introduced by
Missing function or method docstring
Loading history...
67
        logger.info(
0 ignored issues
show
introduced by
Use lazy % formatting in logging functions
Loading history...
68
            f"Using traversal strategy {self.traversal_strategy.__class__.__name__} for {self.search_name}"
0 ignored issues
show
Coding Style introduced by
This line is too long as per the coding-style (107/100).

This check looks for lines that are too long. You can specify the maximum line length.

Loading history...
69
        )
70
        return super().find_all(compounds)
71
72
    def query(self, parent_form: ChemblCompound) -> Sequence[NestedDotDict]:
0 ignored issues
show
introduced by
Missing function or method docstring
Loading history...
73
        raise NotImplementedError()
74
75
    @property
76
    def traversal_strategy(self) -> TargetTraversalStrategy:
0 ignored issues
show
introduced by
Missing function or method docstring
Loading history...
77
        return self._traversal_strategy
78
79
    def should_include(
80
        self, lookup: str, compound: ChemblCompound, data: NestedDotDict, target: ChemblTargetGraph
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
81
    ) -> bool:
82
        """
83
        Filter based on the returned (activity/mechanism) data.
84
        IGNORE filters about the target itself, including whether it's a valid target.
85
        Return True in these cases (better yet, don't check).
86
87
        Args:
88
            lookup:
89
            compound:
90
            data:
91
            target:
92
93
        Returns:
94
95
        """
96
        raise NotImplementedError()
97
98
    def to_hit(
99
        self,
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
100
        lookup: str,
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
Unused Code introduced by
The argument lookup seems to be unused.
Loading history...
101
        compound: ChemblCompound,
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
Unused Code introduced by
The argument compound seems to be unused.
Loading history...
102
        data: NestedDotDict,
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
103
        best_target: ChemblTargetGraph,
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
104
    ) -> Sequence[H]:
105
        """
106
        Gets the desired data as a NestedDotDict from the data from a single element
107
        returned by ``api_endpoint.filter``.
108
        This MUST MATCH the constructor, EXCEPT for object_id and object_name,
109
        which come from traversal and should be added by ``ProteinSearch.to_hit`` (parameter ``best_target``).
0 ignored issues
show
Coding Style introduced by
This line is too long as per the coding-style (110/100).

This check looks for lines that are too long. You can specify the maximum line length.

Loading history...
110
111
        Turns the final data into ``H``.
112
        Note that this has a default behavior but could be overridden to split into multiple hits
113
        and/or to add additional attributes that might come from ``best_target``.
114
115
        Args:
116
            lookup:
117
            compound:
118
            data:
119
            best_target:
120
121
        Returns:
122
            A sequence of hits.
123
        """
124
        h = self.get_h()
0 ignored issues
show
Coding Style Naming introduced by
Variable name "h" doesn't conform to snake_case naming style ('([^\\W\\dA-Z][^\\WA-Z]2,|_[^\\WA-Z]*|__[^\\WA-Z\\d_][^\\WA-Z]+__)$' pattern)

This check looks for invalid names for a range of different identifiers.

You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements.

If your project includes a Pylint configuration file, the settings contained in that file take precedence.

To find out more about Pylint, please refer to their site.

Loading history...
125
        return [h(**data, object_id=best_target.chembl, object_name=best_target.name)]
126
127 View Code Duplication
    def find(self, lookup: str) -> Sequence[H]:
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated in your project.
Loading history...
128
        """
129
130
        Args:
131
            lookup:
132
133
        Returns:e
134
135
        """
136
        form = ChemblUtils(self.api).get_compound(lookup)
137
        results = self.query(form)
138
        hits = []
139
        for result in results:
140
            result = NestedDotDict(result)
141
            hits.extend(self.process(lookup, form, result))
142
        return hits
143
144
    def process(self, lookup: str, compound: ChemblCompound, data: NestedDotDict) -> Sequence[H]:
145
        """
146
147
        Args:
148
            lookup:
149
            compound:
150
            data:
151
152
        Returns:
153
154
        """
155
        if data.get("target_chembl_id") is None:
156
            logger.debug(f"target_chembl_id missing from mechanism '{data}' for compound {lookup}")
0 ignored issues
show
introduced by
Use lazy % formatting in logging functions
Loading history...
157
            return []
158
        chembl_id = data["target_chembl_id"]
159
        factory = TargetFactory(self.api)
160
        target_obj = factory.find(chembl_id)
161
        graph_factory = ChemblTargetGraphFactory.create(self.api, factory)
162
        graph = graph_factory.at_target(target_obj)
163
        if not self.should_include(lookup, compound, data, graph):
164
            return []
165
        # traverse() will return the source target if it's a non-traversable type (like DNA)
166
        # and the subclass decided whether to filter those
167
        # so don't worry about that here
168
        ancestors = self.traversal_strategy(graph)
0 ignored issues
show
Bug introduced by
self.traversal_strategy does not seem to be callable.
Loading history...
169
        lst = []
170
        for ancestor in ancestors:
171
            lst.extend(self.to_hit(lookup, compound, data, ancestor))
172
        return lst
173
174
175
__all__ = ["ProteinHit", "ProteinSearch"]
176