Passed
Push — dependabot/pip/flake8-bugbear-... ( 8d4b2b...22089b )
by
unknown
03:19 queued 01:48
created

ProteinSearch.__repr__()   A

Complexity

Conditions 1

Size

Total Lines 2
Code Lines 2

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
cc 1
eloc 2
nop 1
dl 0
loc 2
rs 10
c 0
b 0
f 0
1
import abc
0 ignored issues
show
introduced by
Missing module docstring
Loading history...
2
import logging
3
from dataclasses import dataclass
4
from typing import Sequence, TypeVar, Mapping, Any
5
6
from pocketutils.core.dot_dict import NestedDotDict
0 ignored issues
show
introduced by
Unable to import 'pocketutils.core.dot_dict'
Loading history...
7
8
from mandos.model.chembl_api import ChemblApi
9
from mandos.model.chembl_support import ChemblCompound
10
from mandos.model.chembl_support.chembl_targets import TargetFactory
11
from mandos.model.chembl_support.chembl_target_graphs import (
12
    ChemblTargetGraph,
13
    ChemblTargetGraphFactory,
14
)
15
from mandos.model.chembl_support.chembl_utils import ChemblUtils
16
from mandos.model.taxonomy import Taxonomy
17
from mandos.search.chembl import ChemblHit, ChemblSearch
18
from mandos.search.chembl.target_traversal import TargetTraversalStrategies, TargetTraversalStrategy
19
20
logger = logging.getLogger("mandos")
21
22
23
@dataclass(frozen=True, order=True, repr=True)
24
class ProteinHit(ChemblHit, metaclass=abc.ABCMeta):
25
    """
26
    A protein target entry for a compound.
27
    """
28
29
30
H = TypeVar("H", bound=ProteinHit, covariant=True)
0 ignored issues
show
Coding Style Naming introduced by
Class name "H" doesn't conform to PascalCase naming style ('[^\\W\\da-z][^\\W_]+$' pattern)

This check looks for invalid names for a range of different identifiers.

You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements.

If your project includes a Pylint configuration file, the settings contained in that file take precedence.

To find out more about Pylint, please refer to their site.

Loading history...
31
32
33
class ProteinSearch(ChemblSearch[H], metaclass=abc.ABCMeta):
34
    """
35
    Abstract search.
36
    """
37
38
    def __init__(self, chembl_api: ChemblApi, taxonomy: Taxonomy, traversal_strategy: str):
39
        super().__init__(chembl_api)
40
        self.taxonomy = taxonomy
41
        self._traversal_strategy = TargetTraversalStrategies.by_name(traversal_strategy, self.api)
42
43
    def get_params(self) -> Mapping[str, Any]:
0 ignored issues
show
introduced by
Missing function or method docstring
Loading history...
44
        # TODO not robust
0 ignored issues
show
Coding Style introduced by
TODO and FIXME comments should generally be avoided.
Loading history...
45
        return {
46
            key: value
47
            for key, value in vars(self).items()
48
            if not key.startswith("_") and key != "path"
49
        }
50
51
    def get_params_str(self) -> str:
0 ignored issues
show
introduced by
Missing function or method docstring
Loading history...
52
        return ", ".join([k + "=" + str(v) for k, v in self.get_params()])
53
54
    def __repr__(self) -> str:
55
        return self.__class__.__name__ + "(" + self.get_params_str() + ")"
56
57
    def __str__(self) -> str:
58
        return self.__class__.__name__ + "(" + self.get_params_str() + ")"
59
60
    def find_all(self, compounds: Sequence[str]) -> Sequence[H]:
0 ignored issues
show
introduced by
Missing function or method docstring
Loading history...
61
        logger.info(
0 ignored issues
show
introduced by
Use lazy % formatting in logging functions
Loading history...
62
            f"Using traversal strategy {self.traversal_strategy.__class__.__name__} for {self.search_name}"
0 ignored issues
show
Coding Style introduced by
This line is too long as per the coding-style (107/100).

This check looks for lines that are too long. You can specify the maximum line length.

Loading history...
63
        )
64
        return super().find_all(compounds)
65
66
    def query(self, parent_form: ChemblCompound) -> Sequence[NestedDotDict]:
0 ignored issues
show
introduced by
Missing function or method docstring
Loading history...
67
        raise NotImplementedError()
68
69
    @property
70
    def traversal_strategy(self) -> TargetTraversalStrategy:
0 ignored issues
show
introduced by
Missing function or method docstring
Loading history...
71
        return self._traversal_strategy
72
73
    def should_include(
74
        self, lookup: str, compound: ChemblCompound, data: NestedDotDict, target: ChemblTargetGraph
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
75
    ) -> bool:
76
        """
77
        Filter based on the returned (activity/mechanism) data.
78
        IGNORE filters about the target itself, including whether it's a valid target.
79
        Return True in these cases (better yet, don't check).
80
81
        Args:
82
            lookup:
83
            compound:
84
            data:
85
            target:
86
87
        Returns:
88
89
        """
90
        raise NotImplementedError()
91
92
    def to_hit(
93
        self,
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
94
        lookup: str,
0 ignored issues
show
Unused Code introduced by
The argument lookup seems to be unused.
Loading history...
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
95
        compound: ChemblCompound,
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
Unused Code introduced by
The argument compound seems to be unused.
Loading history...
96
        data: NestedDotDict,
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
97
        best_target: ChemblTargetGraph,
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
98
    ) -> Sequence[H]:
99
        """
100
        Gets the desired data as a NestedDotDict from the data from a single element
101
        returned by ``api_endpoint.filter``.
102
        This MUST MATCH the constructor, EXCEPT for object_id and object_name,
103
        which come from traversal and should be added by ``ProteinSearch.to_hit`` (parameter ``best_target``).
0 ignored issues
show
Coding Style introduced by
This line is too long as per the coding-style (110/100).

This check looks for lines that are too long. You can specify the maximum line length.

Loading history...
104
105
        Turns the final data into ``H``.
106
        Note that this has a default behavior but could be overridden to split into multiple hits
107
        and/or to add additional attributes that might come from ``best_target``.
108
109
        Args:
110
            lookup:
111
            compound:
112
            data:
113
            best_target:
114
115
        Returns:
116
            A sequence of hits.
117
        """
118
        h = self.get_h()
0 ignored issues
show
Coding Style Naming introduced by
Variable name "h" doesn't conform to snake_case naming style ('([^\\W\\dA-Z][^\\WA-Z]2,|_[^\\WA-Z]*|__[^\\WA-Z\\d_][^\\WA-Z]+__)$' pattern)

This check looks for invalid names for a range of different identifiers.

You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements.

If your project includes a Pylint configuration file, the settings contained in that file take precedence.

To find out more about Pylint, please refer to their site.

Loading history...
119
        return [h(**data, object_id=best_target.chembl, object_name=best_target.name)]
120
121 View Code Duplication
    def find(self, lookup: str) -> Sequence[H]:
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated in your project.
Loading history...
122
        """
123
124
        Args:
125
            lookup:
126
127
        Returns:e
128
129
        """
130
        form = ChemblUtils(self.api).get_compound(lookup)
131
        results = self.query(form)
132
        hits = []
133
        for result in results:
134
            result = NestedDotDict(result)
135
            hits.extend(self.process(lookup, form, result))
136
        return hits
137
138
    def process(self, lookup: str, compound: ChemblCompound, data: NestedDotDict) -> Sequence[H]:
139
        """
140
141
        Args:
142
            lookup:
143
            compound:
144
            data:
145
146
        Returns:
147
148
        """
149
        if data.get("target_chembl_id") is None:
150
            logger.debug(f"target_chembl_id missing from mechanism '{data}' for compound {lookup}")
0 ignored issues
show
introduced by
Use lazy % formatting in logging functions
Loading history...
151
            return []
152
        chembl_id = data["target_chembl_id"]
153
        factory = TargetFactory(self.api)
154
        target_obj = factory.find(chembl_id)
155
        graph_factory = ChemblTargetGraphFactory.create(self.api, factory)
156
        graph = graph_factory.at_target(target_obj)
157
        if not self.should_include(lookup, compound, data, graph):
158
            return []
159
        # traverse() will return the source target if it's a non-traversable type (like DNA)
160
        # and the subclass decided whether to filter those
161
        # so don't worry about that here
162
        ancestors = self.traversal_strategy(graph)
0 ignored issues
show
Bug introduced by
self.traversal_strategy does not seem to be callable.
Loading history...
163
        lst = []
164
        for ancestor in ancestors:
165
            lst.extend(self.to_hit(lookup, compound, data, ancestor))
166
        return lst
167
168
169
__all__ = ["ProteinHit", "ProteinSearch"]
170