Passed
Push — dependabot/pip/flake8-bugbear-... ( 82a4d5...16d864 )
by
unknown
02:18
created

_ActivitySearch.__init__()   A

Complexity

Conditions 1

Size

Total Lines 17
Code Lines 16

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
cc 1
eloc 16
nop 10
dl 0
loc 17
rs 9.6
c 0
b 0
f 0

How to fix   Many Parameters   

Many Parameters

Methods with many parameters are not only hard to understand, but their parameters also often become inconsistent when you need more, or different data.

There are several approaches to avoid long parameter lists:

1
import logging
0 ignored issues
show
introduced by
Missing module docstring
Loading history...
2
import abc
3
from dataclasses import dataclass
4
from typing import Sequence, Set, Optional
5
6
from pocketutils.core.dot_dict import NestedDotDict
0 ignored issues
show
introduced by
Unable to import 'pocketutils.core.dot_dict'
Loading history...
7
8
from mandos.model.chembl_api import ChemblApi
9
from mandos.model.chembl_support import ChemblCompound, AssayType
10
from mandos.model.chembl_support.chembl_target_graphs import ChemblTargetGraph
11
from mandos.model.taxonomy import Taxonomy
12
from mandos.search.chembl._protein_search import ProteinHit, ProteinSearch, H
13
14
logger = logging.getLogger("mandos")
15
16
17
@dataclass(frozen=True, order=True, repr=True)
18
class _ActivityHit(ProteinHit):
19
    taxon_id: int
20
    taxon_name: str
21
    src_id: str
22
23
24
class _ActivitySearch(ProteinSearch[H], metaclass=abc.ABCMeta):
25
    """
26
    Search for ``activity``.
27
    """
28
29
    def __init__(
0 ignored issues
show
best-practice introduced by
Too many arguments (10/5)
Loading history...
30
        self,
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
31
        key: str,
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
32
        api: ChemblApi,
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
33
        taxa: Sequence[Taxonomy],
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
34
        traversal_strategy: str,
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
35
        allowed_target_types: Set[str],
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
36
        min_confidence_score: Optional[int],
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
37
        allowed_relations: Set[str],
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
38
        min_pchembl: Optional[float],
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
39
        banned_flags: Set[str],
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
40
    ):
41
        super().__init__(key, api, taxa, traversal_strategy, allowed_target_types)
42
        self.min_confidence_score = min_confidence_score
43
        self.allowed_relations = allowed_relations
44
        self.min_pchembl = min_pchembl
45
        self.banned_flags = banned_flags
46
47
    @classmethod
48
    def allowed_assay_types(cls) -> Set[str]:
0 ignored issues
show
introduced by
Missing function or method docstring
Loading history...
49
        raise NotImplementedError()
50
51
    def query(self, parent_form: ChemblCompound) -> Sequence[NestedDotDict]:
0 ignored issues
show
introduced by
Missing function or method docstring
Loading history...
52
53
        filters = dict(
54
            parent_molecule_chembl_id=parent_form.chid,
55
            assay_type__iregex=self._set_to_regex(self.allowed_assay_types()),
56
            standard_relation__iregex=self._set_to_regex(self.allowed_relations),
57
            pchembl_value__isnull=None if self.min_pchembl is None else False,
58
            target_organism__isnull=None if len(self.taxa) == 0 else False,
59
        )
60
        # I'd rather not figure out how the API interprets None, so remove them
61
        filters = {k: v for k, v in filters.items() if v is not None}
62
        return list(self.api.activity.filter(**filters))
63
64
    def should_include(
0 ignored issues
show
introduced by
Missing function or method docstring
Loading history...
65
        self, lookup: str, compound: ChemblCompound, data: NestedDotDict, target: ChemblTargetGraph
0 ignored issues
show
Unused Code introduced by
The argument compound seems to be unused.
Loading history...
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
66
    ) -> bool:
67
        if (
68
            (
0 ignored issues
show
best-practice introduced by
Too many boolean expressions in if statement (9/5)
Loading history...
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
69
                data.get_as("data_validity_comment", lambda s: s.lower())
70
                in {s.lower() for s in self.banned_flags}
71
            )
72
            or (data.req_as("standard_relation", str) not in self.allowed_relations)
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
73
            or (data.req_as("assay_type", str) not in self.allowed_assay_types())
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
74
            or (len(self.taxa) > 0 and not self.is_in_taxa(data.get_as("target_tax_id", int)))
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
75
            or (self.min_pchembl is not None and data.get("pchembl_value") is None)
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
76
            or self.min_pchembl is not None
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
77
            and data.req_as("pchembl_value", float) < self.min_pchembl
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
78
        ):
79
            return False
80
        if data.get("data_validity_comment") is not None:
81
            logger.warning(
0 ignored issues
show
introduced by
Use lazy % formatting in logging functions
Loading history...
82
                f"Activity annotation for {lookup} has flag '{data.get('data_validity_comment')} (ok)"
0 ignored issues
show
Coding Style introduced by
This line is too long as per the coding-style (102/100).

This check looks for lines that are too long. You can specify the maximum line length.

Loading history...
83
            )
84
        # The `target_organism` doesn't always match the `assay_organism`
85
        # Ex: see assay CHEMBL823141 / document CHEMBL1135642 for homo sapiens in xenopus laevis
86
        # However, it's often something like yeast expressing a human / mouse / etc receptor
87
        # So there's no need to filter by it
88
        assay = self.api.assay.get(data.req_as("assay_chembl_id", str))
89
        if target.type.name.lower() not in {s.lower() for s in self.allowed_target_types}:
90
            logger.warning(f"Excluding {target} with type {target.type}")
0 ignored issues
show
introduced by
Use lazy % formatting in logging functions
Loading history...
91
            return False
92
        confidence_score = assay.get("confidence_score")
93
        if self.min_confidence_score is not None:
94
            if confidence_score is None or confidence_score < self.min_confidence_score:
95
                return False
96
        # Some of these are non-protein types
97
        # And if it's unknown, we don't know what to do with it
98
        return True
99
100
    def _extract(self, lookup: str, compound: ChemblCompound, data: NestedDotDict) -> NestedDotDict:
101
        # we know these exist from the query
102
        organism = data.req_as("target_organism", str)
103
        tax_id = data.req_as("target_tax_id", int)
104
        if len(self.taxa) == 0:
105
            tax_id, tax_name = tax_id, organism
0 ignored issues
show
introduced by
Assigning the same variable 'tax_id' to itself
Loading history...
106
        else:
107
            taxes = {tax.req(tax_id) for tax in self.taxa if tax.contains(tax_id)}
108
            tax = next(iter(taxes))
109
            if len(taxes) > 1:
110
                logger.warning(f"Multiple matches for taxon {tax_id}: {taxes}; using {tax}")
0 ignored issues
show
introduced by
Use lazy % formatting in logging functions
Loading history...
111
            if organism != tax.name:
112
                logger.warning(f"Target organism {organism} is not {tax.name}")
0 ignored issues
show
introduced by
Use lazy % formatting in logging functions
Loading history...
113
            tax_id = tax.id
114
            tax_name = tax.name
115
        return NestedDotDict(
116
            dict(
117
                record_id=data.req_as("activity_id", str),
118
                origin_inchikey=lookup,
119
                matched_inchikey=compound.inchikey,
120
                compound_id=compound.chid,
121
                compound_name=compound.name,
122
                taxon_id=tax_id,
123
                taxon_name=tax_name,
124
                pchembl=data.req_as("pchembl_value", float),
125
                std_type=data.req_as("standard_type", str),
126
                src_id=data.req_as("src_id", str),
127
                exact_target_id=data.req_as("target_chembl_id", str),
128
                tissue=data.get_as("tissue", str),
129
                cell_type=data.get_as("cell_type", str),
130
                subcellular_region=data.get("subcellular_region", str),
131
            )
132
        )
133
134
    assay_type: AssayType
135
    tissue: Optional[str]
136
    cell_type: Optional[str]
137
    subcellular_region: Optional[str]
138
139
140
__all__ = ["_ActivitySearch", "_ActivityHit"]
141