Passed
Push — dependabot/pip/flake8-bugbear-... ( 93dece...8d4b2b )
by
unknown
01:27
created

_traversal_strategy()   A

Complexity

Conditions 1

Size

Total Lines 3
Code Lines 3

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
cc 1
eloc 3
nop 1
dl 0
loc 3
rs 10
c 0
b 0
f 0
1
import logging
0 ignored issues
show
introduced by
Missing module docstring
Loading history...
2
from dataclasses import dataclass
3
from typing import Sequence, Set, Optional
4
import re
5
6
from pocketutils.core.dot_dict import NestedDotDict
0 ignored issues
show
introduced by
Unable to import 'pocketutils.core.dot_dict'
Loading history...
7
8
from mandos.model.chembl_api import ChemblApi
9
from mandos.model.chembl_support import ChemblCompound
10
from mandos.model.chembl_support.chembl_target_graphs import ChemblTargetGraph
11
from mandos.model.defaults import Defaults
0 ignored issues
show
Unused Code introduced by
Unused Defaults imported from mandos.model.defaults
Loading history...
introduced by
Unable to import 'mandos.model.defaults'
Loading history...
Bug introduced by
The name defaults does not seem to exist in module mandos.model.
Loading history...
12
from mandos.model.taxonomy import Taxonomy
13
from mandos.search.chembl._protein_search import ProteinHit, ProteinSearch
14
from mandos.search.chembl.target_traversal import (
0 ignored issues
show
Unused Code introduced by
Unused TargetTraversalStrategies imported from mandos.search.chembl.target_traversal
Loading history...
Unused Code introduced by
Unused TargetTraversalStrategy imported from mandos.search.chembl.target_traversal
Loading history...
15
    TargetTraversalStrategy,
16
    TargetTraversalStrategies,
17
)
18
19
logger = logging.getLogger("mandos")
20
21
22
@dataclass(frozen=True, order=True, repr=True)
23
class BindingHit(ProteinHit):
24
    """
25
    An "activity" hit for a compound.
26
    """
27
28
    taxon_id: int
29
    taxon_name: str
30
    pchembl: float
31
    std_type: str
32
    src_id: str
33
    exact_target_id: str
34
35
    @property
36
    def predicate(self) -> str:
37
        return "activity"
38
39
40
class BindingSearch(ProteinSearch[BindingHit]):
41
    """
42
    Search for ``activity`` of type "B".
43
    """
44
45
    def __init__(
0 ignored issues
show
best-practice introduced by
Too many arguments (9/5)
Loading history...
46
        self,
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
47
        chembl_api: ChemblApi,
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
48
        tax: Taxonomy,
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
49
        traversal_strategy: str,
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
50
        allowed_target_types: Set[str],
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
51
        min_confidence_score: Optional[int],
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
52
        allowed_relations: Set[str],
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
53
        min_pchembl: float,
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
54
        banned_flags: Set[str],
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
55
    ):
56
        super().__init__(chembl_api, tax, traversal_strategy)
57
        self.allowed_target_types = allowed_target_types
58
        self.min_confidence_score = min_confidence_score
59
        self.allowed_relations = allowed_relations
60
        self.min_pchembl = min_pchembl
61
        self.banned_flags = banned_flags
62
63
    def query(self, parent_form: ChemblCompound) -> Sequence[NestedDotDict]:
0 ignored issues
show
introduced by
Missing function or method docstring
Loading history...
64
        def set_to_regex(values) -> str:
65
            return "(" + "|".join([f"(?:{re.escape(v)})" for v in values]) + ")"
66
67
        filters = dict(
68
            parent_molecule_chembl_id=parent_form.chid,
69
            assay_type="B",
70
            standard_relation__iregex=set_to_regex(self.allowed_relations),
71
            pchembl_value__isnull=False,
72
            target_organism__isnull=None if self.taxonomy is None else False,
73
        )
74
        # I'd rather not figure out how the API interprets None, so remove them
75
        filters = {k: v for k, v in filters.items() if v is not None}
76
        return list(self.api.activity.filter(**filters))
77
78
    def should_include(
0 ignored issues
show
introduced by
Missing function or method docstring
Loading history...
79
        self, lookup: str, compound: ChemblCompound, data: NestedDotDict, target: ChemblTargetGraph
0 ignored issues
show
Unused Code introduced by
The argument compound seems to be unused.
Loading history...
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
80
    ) -> bool:
81
        if (
82
            (
0 ignored issues
show
best-practice introduced by
Too many boolean expressions in if statement (7/5)
Loading history...
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
83
                data.get_as("data_validity_comment", lambda s: s.lower())
84
                in {s.lower() for s in self.banned_flags}
85
            )
86
            or (data.req_as("standard_relation", str) not in self.allowed_relations)
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
87
            or (data.req_as("assay_type", str) != "B")
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
88
            or (
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
89
                self.taxonomy is not None and data.get_as("target_tax_id", int) not in self.taxonomy
90
            )
91
            or (data.get("pchembl_value") is None)
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
92
            or (data.req_as("pchembl_value", float) < self.min_pchembl)
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
93
        ):
94
            return False
95
        if data.get("data_validity_comment") is not None:
96
            logger.warning(
0 ignored issues
show
introduced by
Use lazy % formatting in logging functions
Loading history...
97
                f"Activity annotation for {lookup} has flag '{data.get('data_validity_comment')} (ok)"
0 ignored issues
show
Coding Style introduced by
This line is too long as per the coding-style (102/100).

This check looks for lines that are too long. You can specify the maximum line length.

Loading history...
98
            )
99
        # The `target_organism` doesn't always match the `assay_organism`
100
        # Ex: see assay CHEMBL823141 / document CHEMBL1135642 for homo sapiens in xenopus laevis
101
        # However, it's often something like yeast expressing a human / mouse / etc receptor
102
        # So there's no need to filter by it
103
        assay = self.api.assay.get(data.req_as("assay_chembl_id", str))
104
        confidence_score = assay.get("confidence_score")
105
        if target.type.name.lower() not in {s.lower() for s in self.allowed_target_types}:
106
            logger.warning(f"Excluding {target} with type {target.type}")
0 ignored issues
show
introduced by
Use lazy % formatting in logging functions
Loading history...
107
            return False
108
        if self.min_confidence_score is not None:
109
            if confidence_score is None or confidence_score < self.min_confidence_score:
110
                return False
111
            # Some of these are non-protein types
112
            # And if it's unknown, we don't know what to do with it
113
        return True
114
115
    def to_hit(
0 ignored issues
show
introduced by
Missing function or method docstring
Loading history...
116
        self, lookup: str, compound: ChemblCompound, data: NestedDotDict, target: ChemblTargetGraph
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
117
    ) -> Sequence[BindingHit]:
118
        # these must match the constructor of the Hit,
119
        # EXCEPT for object_id and object_name, which come from traversal
120
        x = self._extract(lookup, compound, data)
0 ignored issues
show
Coding Style Naming introduced by
Variable name "x" doesn't conform to snake_case naming style ('([^\\W\\dA-Z][^\\WA-Z]2,|_[^\\WA-Z]*|__[^\\WA-Z\\d_][^\\WA-Z]+__)$' pattern)

This check looks for invalid names for a range of different identifiers.

You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements.

If your project includes a Pylint configuration file, the settings contained in that file take precedence.

To find out more about Pylint, please refer to their site.

Loading history...
121
        return [BindingHit(**x, object_id=target.chembl, object_name=target.name)]
122
123
    def _extract(self, lookup: str, compound: ChemblCompound, data: NestedDotDict) -> NestedDotDict:
124
        # we know these exist from the query
125
        if self.taxonomy is None:
126
            tax = None
127
        else:
128
            organism = data.req_as("target_organism", str)
129
            tax_id = data.req_as("target_tax_id", int)
130
            tax = self.taxonomy.req(tax_id)
131
            if organism != tax.name:
132
                logger.warning(f"Target organism {organism} is not {tax.name}")
0 ignored issues
show
introduced by
Use lazy % formatting in logging functions
Loading history...
133
        return NestedDotDict(
134
            dict(
135
                record_id=data.req_as("activity_id", str),
136
                compound_id=compound.chid,
137
                inchikey=compound.inchikey,
138
                compound_name=compound.name,
139
                compound_lookup=lookup,
140
                taxon_id=None if tax is None else tax.id,
141
                taxon_name=None if tax is None else tax.name,
142
                pchembl=data.req_as("pchembl_value", float),
143
                std_type=data.req_as("standard_type", str),
144
                src_id=data.req_as("src_id", str),
145
                exact_target_id=data.req_as("target_chembl_id", str),
146
            )
147
        )
148
149
150
__all__ = ["BindingHit", "BindingSearch"]
151