Passed
Push — main ( bfa577...eb6882 )
by Douglas
04:37
created

ProteinSearch.to_hit()   A

Complexity

Conditions 1

Size

Total Lines 24
Code Lines 4

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
cc 1
eloc 4
nop 5
dl 0
loc 24
rs 10
c 0
b 0
f 0
1
import abc
0 ignored issues
show
introduced by
Missing module docstring
Loading history...
2
import logging
3
from dataclasses import dataclass
4
from typing import Sequence, TypeVar
5
6
from pocketutils.core.dot_dict import NestedDotDict
0 ignored issues
show
introduced by
Unable to import 'pocketutils.core.dot_dict'
Loading history...
7
8
from mandos.model.chembl_api import ChemblApi
9
from mandos.model.chembl_support import ChemblCompound
10
from mandos.model.chembl_support.chembl_targets import ChemblTarget, TargetFactory
11
from mandos.model.chembl_support.chembl_utils import ChemblUtils
12
from mandos.model.taxonomy import Taxonomy
13
from mandos.search.chembl import ChemblHit, ChemblSearch
14
from mandos.search.chembl.target_traversal import (
15
    TargetTraversalStrategies,
16
    TargetTraversalStrategy,
17
)
18
19
logger = logging.getLogger("mandos")
20
21
22
@dataclass(frozen=True, order=True, repr=True)
23
class ProteinHit(ChemblHit, metaclass=abc.ABCMeta):
24
    """
25
    A protein target entry for a compound.
26
    """
27
28
29
H = TypeVar("H", bound=ProteinHit, covariant=True)
0 ignored issues
show
Coding Style Naming introduced by
Class name "H" doesn't conform to PascalCase naming style ('[^\\W\\da-z][^\\W_]+$' pattern)

This check looks for invalid names for a range of different identifiers.

You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements.

If your project includes a Pylint configuration file, the settings contained in that file take precedence.

To find out more about Pylint, please refer to their site.

Loading history...
30
31
32
class ProteinSearch(ChemblSearch[H], metaclass=abc.ABCMeta):
33
    """
34
    Abstract search.
35
    """
36
37
    def __init__(self, chembl_api: ChemblApi, taxonomy: Taxonomy, traversal_strategy: str):
38
        super().__init__(chembl_api)
39
        self.taxonomy = taxonomy
40
        self._traversal_strategy = TargetTraversalStrategies.by_name(traversal_strategy, self.api)
41
42
    def find_all(self, compounds: Sequence[str]) -> Sequence[H]:
0 ignored issues
show
introduced by
Missing function or method docstring
Loading history...
43
        logger.info(
0 ignored issues
show
introduced by
Use lazy % formatting in logging functions
Loading history...
44
            f"Using traversal strategy {self.traversal_strategy.__class__.__name__} for {self.search_name}"
0 ignored issues
show
Coding Style introduced by
This line is too long as per the coding-style (107/100).

This check looks for lines that are too long. You can specify the maximum line length.

Loading history...
45
        )
46
        return super().find_all(compounds)
47
48
    def query(self, parent_form: ChemblCompound) -> Sequence[NestedDotDict]:
0 ignored issues
show
introduced by
Missing function or method docstring
Loading history...
49
        raise NotImplementedError()
50
51
    @property
52
    def traversal_strategy(self) -> TargetTraversalStrategy:
0 ignored issues
show
introduced by
Missing function or method docstring
Loading history...
53
        return self._traversal_strategy
54
55
    def should_include(
56
        self, lookup: str, compound: ChemblCompound, data: NestedDotDict, target: ChemblTarget
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
57
    ) -> bool:
58
        """
59
        Filter based on the returned (activity/mechanism) data.
60
        IGNORE filters about the target itself, including whether it's a valid target.
61
        Return True in these cases (better yet, don't check).
62
63
        Args:
64
            lookup:
65
            compound:
66
            data:
67
            target:
68
69
        Returns:
70
71
        """
72
        raise NotImplementedError()
73
74
    def to_hit(
75
        self, lookup: str, compound: ChemblCompound, data: NestedDotDict, best_target: ChemblTarget
0 ignored issues
show
Unused Code introduced by
The argument compound seems to be unused.
Loading history...
Unused Code introduced by
The argument lookup seems to be unused.
Loading history...
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
76
    ) -> Sequence[H]:
77
        """
78
        Gets the desired data as a NestedDotDict from the data from a single element
79
        returned by ``api_endpoint.filter``.
80
        This MUST MATCH the constructor, EXCEPT for object_id and object_name,
81
        which come from traversal and should be added by ``ProteinSearch.to_hit`` (parameter ``best_target``).
0 ignored issues
show
Coding Style introduced by
This line is too long as per the coding-style (110/100).

This check looks for lines that are too long. You can specify the maximum line length.

Loading history...
82
83
        Turns the final data into ``H``.
84
        Note that this has a default behavior but could be overridden to split into multiple hits
85
        and/or to add additional attributes that might come from ``best_target``.
86
87
        Args:
88
            lookup:
89
            compound:
90
            data:
91
            best_target:
92
93
        Returns:
94
            A sequence of hits.
95
        """
96
        h = self.get_h()
0 ignored issues
show
Coding Style Naming introduced by
Variable name "h" doesn't conform to snake_case naming style ('([^\\W\\dA-Z][^\\WA-Z]2,|_[^\\WA-Z]*|__[^\\WA-Z\\d_][^\\WA-Z]+__)$' pattern)

This check looks for invalid names for a range of different identifiers.

You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements.

If your project includes a Pylint configuration file, the settings contained in that file take precedence.

To find out more about Pylint, please refer to their site.

Loading history...
97
        return [h(**data, object_id=best_target.chembl, object_name=best_target.name)]
98
99 View Code Duplication
    def find(self, lookup: str) -> Sequence[H]:
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated in your project.
Loading history...
100
        """
101
102
        Args:
103
            lookup:
104
105
        Returns:e
106
107
        """
108
        form = ChemblUtils(self.api).get_compound(lookup)
109
        results = self.query(form)
110
        hits = []
111
        for result in results:
112
            result = NestedDotDict(result)
113
            hits.extend(self.process(lookup, form, result))
114
        return hits
115
116
    def process(self, lookup: str, compound: ChemblCompound, data: NestedDotDict) -> Sequence[H]:
117
        """
118
119
        Args:
120
            lookup:
121
            compound:
122
            data:
123
124
        Returns:
125
126
        """
127
        if data.get("target_chembl_id") is None:
128
            logger.debug(f"target_chembl_id missing from mechanism '{data}' for compound {lookup}")
0 ignored issues
show
introduced by
Use lazy % formatting in logging functions
Loading history...
129
            return []
130
        chembl_id = data["target_chembl_id"]
131
        target_obj = TargetFactory.find(chembl_id, self.api)
132
        if not self.should_include(lookup, compound, data, target_obj):
133
            return []
134
        # traverse() will return the source target if it's a non-traversable type (like DNA)
135
        # and the subclass decided whether to filter those
136
        # so don't worry about that here
137
        ancestors = self.traversal_strategy(target_obj)
0 ignored issues
show
Bug introduced by
self.traversal_strategy does not seem to be callable.
Loading history...
138
        lst = []
139
        for ancestor in ancestors:
140
            lst.extend(self.to_hit(lookup, compound, data, ancestor))
141
        return lst
142
143
144
__all__ = ["ProteinHit", "ProteinSearch"]
145