Passed
Push — main ( 9ff912...d08a4e )
by Douglas
03:54
created

_ActivitySearch.should_include()   F

Complexity

Conditions 16

Size

Total Lines 35
Code Lines 25

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
cc 16
eloc 25
nop 5
dl 0
loc 35
rs 2.4
c 0
b 0
f 0

How to fix   Complexity   

Complexity

Complex classes like mandos.search.chembl._activity_search._ActivitySearch.should_include() often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes.

Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.

1
import enum
0 ignored issues
show
introduced by
Missing module docstring
Loading history...
2
import logging
3
import abc
4
from dataclasses import dataclass
5
from typing import Sequence, Set, Optional
6
7
from pocketutils.core.dot_dict import NestedDotDict
0 ignored issues
show
introduced by
Unable to import 'pocketutils.core.dot_dict'
Loading history...
8
9
from mandos.model.chembl_api import ChemblApi
10
from mandos.model.chembl_support import ChemblCompound
11
from mandos.model.chembl_support.chembl_target_graphs import ChemblTargetGraph
12
from mandos.model.taxonomy import Taxonomy
13
from mandos.search.chembl._protein_search import ProteinHit, ProteinSearch, H
14
15
logger = logging.getLogger("mandos")
16
17
18
class AssayType(enum.Enum):
0 ignored issues
show
introduced by
Missing class docstring
Loading history...
19
    binding = enum.auto()
20
    functional = enum.auto()
21
    adme = enum.auto()
22
    physicochemical = enum.auto()
23
24
    @property
25
    def character(self) -> str:
0 ignored issues
show
introduced by
Missing function or method docstring
Loading history...
26
        return {
27
            AssayType.binding: "B",
28
            AssayType.functional: "F",
29
            AssayType.adme: "A",
30
            AssayType.physicochemical: "P",
31
        }[self]
32
33
34
@dataclass(frozen=True, order=True, repr=True)
35
class _ActivityHit(ProteinHit):
36
    taxon_id: int
37
    taxon_name: str
38
    src_id: str
39
40
41
class _ActivitySearch(ProteinSearch[H], metaclass=abc.ABCMeta):
42
    """
43
    Search for ``activity``.
44
    """
45
46
    def __init__(
0 ignored issues
show
best-practice introduced by
Too many arguments (10/5)
Loading history...
47
        self,
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
48
        key: str,
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
49
        api: ChemblApi,
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
50
        taxa: Sequence[Taxonomy],
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
51
        traversal_strategy: str,
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
52
        allowed_target_types: Set[str],
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
53
        min_confidence_score: Optional[int],
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
54
        allowed_relations: Set[str],
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
55
        min_pchembl: Optional[float],
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
56
        banned_flags: Set[str],
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
57
    ):
58
        super().__init__(key, api, taxa, traversal_strategy, allowed_target_types)
59
        self.min_confidence_score = min_confidence_score
60
        self.allowed_relations = allowed_relations
61
        self.min_pchembl = min_pchembl
62
        self.banned_flags = banned_flags
63
64
    @classmethod
65
    def allowed_assay_types(cls) -> Set[str]:
0 ignored issues
show
introduced by
Missing function or method docstring
Loading history...
66
        raise NotImplementedError()
67
68
    def query(self, parent_form: ChemblCompound) -> Sequence[NestedDotDict]:
0 ignored issues
show
introduced by
Missing function or method docstring
Loading history...
69
70
        filters = dict(
71
            parent_molecule_chembl_id=parent_form.chid,
72
            assay_type__iregex=self._set_to_regex(self.allowed_assay_types()),
73
            standard_relation__iregex=self._set_to_regex(self.allowed_relations),
74
            pchembl_value__isnull=None if self.min_pchembl is None else False,
75
            target_organism__isnull=None if len(self.taxa) == 0 else False,
76
        )
77
        # I'd rather not figure out how the API interprets None, so remove them
78
        filters = {k: v for k, v in filters.items() if v is not None}
79
        return list(self.api.activity.filter(**filters))
80
81
    def should_include(
0 ignored issues
show
introduced by
Missing function or method docstring
Loading history...
82
        self, lookup: str, compound: ChemblCompound, data: NestedDotDict, target: ChemblTargetGraph
0 ignored issues
show
Unused Code introduced by
The argument compound seems to be unused.
Loading history...
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
83
    ) -> bool:
84
        if (
85
            (
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
best-practice introduced by
Too many boolean expressions in if statement (9/5)
Loading history...
86
                data.get_as("data_validity_comment", lambda s: s.lower())
87
                in {s.lower() for s in self.banned_flags}
88
            )
89
            or (data.req_as("standard_relation", str) not in self.allowed_relations)
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
90
            or (data.req_as("assay_type", str) not in self.allowed_assay_types())
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
91
            or (len(self.taxa) > 0 and not self.is_in_taxa(data.get_as("target_tax_id", int)))
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
92
            or (self.min_pchembl is not None and data.get("pchembl_value") is None)
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
93
            or self.min_pchembl is not None
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
94
            and data.req_as("pchembl_value", float) < self.min_pchembl
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
95
        ):
96
            return False
97
        if data.get("data_validity_comment") is not None:
98
            logger.warning(
0 ignored issues
show
introduced by
Use lazy % formatting in logging functions
Loading history...
99
                f"Activity annotation for {lookup} has flag '{data.get('data_validity_comment')} (ok)"
0 ignored issues
show
Coding Style introduced by
This line is too long as per the coding-style (102/100).

This check looks for lines that are too long. You can specify the maximum line length.

Loading history...
100
            )
101
        # The `target_organism` doesn't always match the `assay_organism`
102
        # Ex: see assay CHEMBL823141 / document CHEMBL1135642 for homo sapiens in xenopus laevis
103
        # However, it's often something like yeast expressing a human / mouse / etc receptor
104
        # So there's no need to filter by it
105
        assay = self.api.assay.get(data.req_as("assay_chembl_id", str))
106
        if target.type.name.lower() not in {s.lower() for s in self.allowed_target_types}:
107
            logger.warning(f"Excluding {target} with type {target.type}")
0 ignored issues
show
introduced by
Use lazy % formatting in logging functions
Loading history...
108
            return False
109
        confidence_score = assay.get("confidence_score")
110
        if self.min_confidence_score is not None:
111
            if confidence_score is None or confidence_score < self.min_confidence_score:
112
                return False
113
        # Some of these are non-protein types
114
        # And if it's unknown, we don't know what to do with it
115
        return True
116
117
    def _extract(self, lookup: str, compound: ChemblCompound, data: NestedDotDict) -> NestedDotDict:
118
        # we know these exist from the query
119
        organism = data.req_as("target_organism", str)
120
        tax_id = data.req_as("target_tax_id", int)
121
        if len(self.taxa) == 0:
122
            tax_id, tax_name = tax_id, organism
0 ignored issues
show
introduced by
Assigning the same variable 'tax_id' to itself
Loading history...
123
        else:
124
            taxes = {tax.req(tax_id) for tax in self.taxa if tax.contains(tax_id)}
125
            tax = next(iter(taxes))
126
            if len(taxes) > 1:
127
                logger.warning(f"Multiple matches for taxon {tax_id}: {taxes}; using {tax}")
0 ignored issues
show
introduced by
Use lazy % formatting in logging functions
Loading history...
128
            if organism != tax.name:
129
                logger.warning(f"Target organism {organism} is not {tax.name}")
0 ignored issues
show
introduced by
Use lazy % formatting in logging functions
Loading history...
130
            tax_id = tax.id
131
            tax_name = tax.name
132
        return NestedDotDict(
133
            dict(
134
                record_id=data.req_as("activity_id", str),
135
                origin_inchikey=lookup,
136
                matched_inchikey=compound.inchikey,
137
                compound_id=compound.chid,
138
                compound_name=compound.name,
139
                taxon_id=tax_id,
140
                taxon_name=tax_name,
141
                pchembl=data.req_as("pchembl_value", float),
142
                std_type=data.req_as("standard_type", str),
143
                src_id=data.req_as("src_id", str),
144
                exact_target_id=data.req_as("target_chembl_id", str),
145
                tissue=data.get_as("tissue", str),
146
                cell_type=data.get_as("cell_type", str),
147
                subcellular_region=data.get("subcellular_region", str),
148
            )
149
        )
150
151
    assay_type: AssayType
152
    tissue: Optional[str]
153
    cell_type: Optional[str]
154
    subcellular_region: Optional[str]
155
156
157
__all__ = ["_ActivitySearch", "AssayType", "_ActivityHit"]
158