Passed
Push — dependabot/pip/flake8-bugbear-... ( 82a4d5...16d864 )
by
unknown
02:18
created

_ActivitySearch.should_include()   F

Complexity

Conditions 16

Size

Total Lines 35
Code Lines 25

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
cc 16
eloc 25
nop 5
dl 0
loc 35
rs 2.4
c 0
b 0
f 0

How to fix   Complexity   

Complexity

Complex classes like mandos.search.chembl._activity_search._ActivitySearch.should_include() often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes.

Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.

1
import logging
0 ignored issues
show
introduced by
Missing module docstring
Loading history...
2
import abc
3
from dataclasses import dataclass
4
from typing import Sequence, Set, Optional
5
6
from pocketutils.core.dot_dict import NestedDotDict
0 ignored issues
show
introduced by
Unable to import 'pocketutils.core.dot_dict'
Loading history...
7
8
from mandos.model.chembl_api import ChemblApi
9
from mandos.model.chembl_support import ChemblCompound, AssayType
10
from mandos.model.chembl_support.chembl_target_graphs import ChemblTargetGraph
11
from mandos.model.taxonomy import Taxonomy
12
from mandos.search.chembl._protein_search import ProteinHit, ProteinSearch, H
13
14
logger = logging.getLogger("mandos")
15
16
17
@dataclass(frozen=True, order=True, repr=True)
18
class _ActivityHit(ProteinHit):
19
    taxon_id: int
20
    taxon_name: str
21
    src_id: str
22
23
24
class _ActivitySearch(ProteinSearch[H], metaclass=abc.ABCMeta):
25
    """
26
    Search for ``activity``.
27
    """
28
29
    def __init__(
0 ignored issues
show
best-practice introduced by
Too many arguments (10/5)
Loading history...
30
        self,
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
31
        key: str,
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
32
        api: ChemblApi,
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
33
        taxa: Sequence[Taxonomy],
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
34
        traversal_strategy: str,
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
35
        allowed_target_types: Set[str],
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
36
        min_confidence_score: Optional[int],
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
37
        allowed_relations: Set[str],
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
38
        min_pchembl: Optional[float],
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
39
        banned_flags: Set[str],
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
40
    ):
41
        super().__init__(key, api, taxa, traversal_strategy, allowed_target_types)
42
        self.min_confidence_score = min_confidence_score
43
        self.allowed_relations = allowed_relations
44
        self.min_pchembl = min_pchembl
45
        self.banned_flags = banned_flags
46
47
    @classmethod
48
    def allowed_assay_types(cls) -> Set[str]:
0 ignored issues
show
introduced by
Missing function or method docstring
Loading history...
49
        raise NotImplementedError()
50
51
    def query(self, parent_form: ChemblCompound) -> Sequence[NestedDotDict]:
0 ignored issues
show
introduced by
Missing function or method docstring
Loading history...
52
53
        filters = dict(
54
            parent_molecule_chembl_id=parent_form.chid,
55
            assay_type__iregex=self._set_to_regex(self.allowed_assay_types()),
56
            standard_relation__iregex=self._set_to_regex(self.allowed_relations),
57
            pchembl_value__isnull=None if self.min_pchembl is None else False,
58
            target_organism__isnull=None if len(self.taxa) == 0 else False,
59
        )
60
        # I'd rather not figure out how the API interprets None, so remove them
61
        filters = {k: v for k, v in filters.items() if v is not None}
62
        return list(self.api.activity.filter(**filters))
63
64
    def should_include(
0 ignored issues
show
introduced by
Missing function or method docstring
Loading history...
65
        self, lookup: str, compound: ChemblCompound, data: NestedDotDict, target: ChemblTargetGraph
0 ignored issues
show
Unused Code introduced by
The argument compound seems to be unused.
Loading history...
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
66
    ) -> bool:
67
        if (
68
            (
0 ignored issues
show
best-practice introduced by
Too many boolean expressions in if statement (9/5)
Loading history...
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
69
                data.get_as("data_validity_comment", lambda s: s.lower())
70
                in {s.lower() for s in self.banned_flags}
71
            )
72
            or (data.req_as("standard_relation", str) not in self.allowed_relations)
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
73
            or (data.req_as("assay_type", str) not in self.allowed_assay_types())
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
74
            or (len(self.taxa) > 0 and not self.is_in_taxa(data.get_as("target_tax_id", int)))
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
75
            or (self.min_pchembl is not None and data.get("pchembl_value") is None)
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
76
            or self.min_pchembl is not None
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
77
            and data.req_as("pchembl_value", float) < self.min_pchembl
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
78
        ):
79
            return False
80
        if data.get("data_validity_comment") is not None:
81
            logger.warning(
0 ignored issues
show
introduced by
Use lazy % formatting in logging functions
Loading history...
82
                f"Activity annotation for {lookup} has flag '{data.get('data_validity_comment')} (ok)"
0 ignored issues
show
Coding Style introduced by
This line is too long as per the coding-style (102/100).

This check looks for lines that are too long. You can specify the maximum line length.

Loading history...
83
            )
84
        # The `target_organism` doesn't always match the `assay_organism`
85
        # Ex: see assay CHEMBL823141 / document CHEMBL1135642 for homo sapiens in xenopus laevis
86
        # However, it's often something like yeast expressing a human / mouse / etc receptor
87
        # So there's no need to filter by it
88
        assay = self.api.assay.get(data.req_as("assay_chembl_id", str))
89
        if target.type.name.lower() not in {s.lower() for s in self.allowed_target_types}:
90
            logger.warning(f"Excluding {target} with type {target.type}")
0 ignored issues
show
introduced by
Use lazy % formatting in logging functions
Loading history...
91
            return False
92
        confidence_score = assay.get("confidence_score")
93
        if self.min_confidence_score is not None:
94
            if confidence_score is None or confidence_score < self.min_confidence_score:
95
                return False
96
        # Some of these are non-protein types
97
        # And if it's unknown, we don't know what to do with it
98
        return True
99
100
    def _extract(self, lookup: str, compound: ChemblCompound, data: NestedDotDict) -> NestedDotDict:
101
        # we know these exist from the query
102
        organism = data.req_as("target_organism", str)
103
        tax_id = data.req_as("target_tax_id", int)
104
        if len(self.taxa) == 0:
105
            tax_id, tax_name = tax_id, organism
0 ignored issues
show
introduced by
Assigning the same variable 'tax_id' to itself
Loading history...
106
        else:
107
            taxes = {tax.req(tax_id) for tax in self.taxa if tax.contains(tax_id)}
108
            tax = next(iter(taxes))
109
            if len(taxes) > 1:
110
                logger.warning(f"Multiple matches for taxon {tax_id}: {taxes}; using {tax}")
0 ignored issues
show
introduced by
Use lazy % formatting in logging functions
Loading history...
111
            if organism != tax.name:
112
                logger.warning(f"Target organism {organism} is not {tax.name}")
0 ignored issues
show
introduced by
Use lazy % formatting in logging functions
Loading history...
113
            tax_id = tax.id
114
            tax_name = tax.name
115
        return NestedDotDict(
116
            dict(
117
                record_id=data.req_as("activity_id", str),
118
                origin_inchikey=lookup,
119
                matched_inchikey=compound.inchikey,
120
                compound_id=compound.chid,
121
                compound_name=compound.name,
122
                taxon_id=tax_id,
123
                taxon_name=tax_name,
124
                pchembl=data.req_as("pchembl_value", float),
125
                std_type=data.req_as("standard_type", str),
126
                src_id=data.req_as("src_id", str),
127
                exact_target_id=data.req_as("target_chembl_id", str),
128
                tissue=data.get_as("tissue", str),
129
                cell_type=data.get_as("cell_type", str),
130
                subcellular_region=data.get("subcellular_region", str),
131
            )
132
        )
133
134
    assay_type: AssayType
135
    tissue: Optional[str]
136
    cell_type: Optional[str]
137
    subcellular_region: Optional[str]
138
139
140
__all__ = ["_ActivitySearch", "_ActivityHit"]
141