Passed
Push — main ( 9ff912...d08a4e )
by Douglas
03:54
created

_ActivitySearch.__init__()   A

Complexity

Conditions 1

Size

Total Lines 17
Code Lines 16

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
cc 1
eloc 16
nop 10
dl 0
loc 17
rs 9.6
c 0
b 0
f 0

How to fix   Many Parameters   

Many Parameters

Methods with many parameters are not only hard to understand, but their parameters also often become inconsistent when you need more, or different data.

There are several approaches to avoid long parameter lists:

1
import enum
0 ignored issues
show
introduced by
Missing module docstring
Loading history...
2
import logging
3
import abc
4
from dataclasses import dataclass
5
from typing import Sequence, Set, Optional
6
7
from pocketutils.core.dot_dict import NestedDotDict
0 ignored issues
show
introduced by
Unable to import 'pocketutils.core.dot_dict'
Loading history...
8
9
from mandos.model.chembl_api import ChemblApi
10
from mandos.model.chembl_support import ChemblCompound
11
from mandos.model.chembl_support.chembl_target_graphs import ChemblTargetGraph
12
from mandos.model.taxonomy import Taxonomy
13
from mandos.search.chembl._protein_search import ProteinHit, ProteinSearch, H
14
15
logger = logging.getLogger("mandos")
16
17
18
class AssayType(enum.Enum):
0 ignored issues
show
introduced by
Missing class docstring
Loading history...
19
    binding = enum.auto()
20
    functional = enum.auto()
21
    adme = enum.auto()
22
    physicochemical = enum.auto()
23
24
    @property
25
    def character(self) -> str:
0 ignored issues
show
introduced by
Missing function or method docstring
Loading history...
26
        return {
27
            AssayType.binding: "B",
28
            AssayType.functional: "F",
29
            AssayType.adme: "A",
30
            AssayType.physicochemical: "P",
31
        }[self]
32
33
34
@dataclass(frozen=True, order=True, repr=True)
35
class _ActivityHit(ProteinHit):
36
    taxon_id: int
37
    taxon_name: str
38
    src_id: str
39
40
41
class _ActivitySearch(ProteinSearch[H], metaclass=abc.ABCMeta):
42
    """
43
    Search for ``activity``.
44
    """
45
46
    def __init__(
0 ignored issues
show
best-practice introduced by
Too many arguments (10/5)
Loading history...
47
        self,
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
48
        key: str,
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
49
        api: ChemblApi,
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
50
        taxa: Sequence[Taxonomy],
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
51
        traversal_strategy: str,
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
52
        allowed_target_types: Set[str],
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
53
        min_confidence_score: Optional[int],
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
54
        allowed_relations: Set[str],
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
55
        min_pchembl: Optional[float],
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
56
        banned_flags: Set[str],
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
57
    ):
58
        super().__init__(key, api, taxa, traversal_strategy, allowed_target_types)
59
        self.min_confidence_score = min_confidence_score
60
        self.allowed_relations = allowed_relations
61
        self.min_pchembl = min_pchembl
62
        self.banned_flags = banned_flags
63
64
    @classmethod
65
    def allowed_assay_types(cls) -> Set[str]:
0 ignored issues
show
introduced by
Missing function or method docstring
Loading history...
66
        raise NotImplementedError()
67
68
    def query(self, parent_form: ChemblCompound) -> Sequence[NestedDotDict]:
0 ignored issues
show
introduced by
Missing function or method docstring
Loading history...
69
70
        filters = dict(
71
            parent_molecule_chembl_id=parent_form.chid,
72
            assay_type__iregex=self._set_to_regex(self.allowed_assay_types()),
73
            standard_relation__iregex=self._set_to_regex(self.allowed_relations),
74
            pchembl_value__isnull=None if self.min_pchembl is None else False,
75
            target_organism__isnull=None if len(self.taxa) == 0 else False,
76
        )
77
        # I'd rather not figure out how the API interprets None, so remove them
78
        filters = {k: v for k, v in filters.items() if v is not None}
79
        return list(self.api.activity.filter(**filters))
80
81
    def should_include(
0 ignored issues
show
introduced by
Missing function or method docstring
Loading history...
82
        self, lookup: str, compound: ChemblCompound, data: NestedDotDict, target: ChemblTargetGraph
0 ignored issues
show
Unused Code introduced by
The argument compound seems to be unused.
Loading history...
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
83
    ) -> bool:
84
        if (
85
            (
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
best-practice introduced by
Too many boolean expressions in if statement (9/5)
Loading history...
86
                data.get_as("data_validity_comment", lambda s: s.lower())
87
                in {s.lower() for s in self.banned_flags}
88
            )
89
            or (data.req_as("standard_relation", str) not in self.allowed_relations)
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
90
            or (data.req_as("assay_type", str) not in self.allowed_assay_types())
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
91
            or (len(self.taxa) > 0 and not self.is_in_taxa(data.get_as("target_tax_id", int)))
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
92
            or (self.min_pchembl is not None and data.get("pchembl_value") is None)
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
93
            or self.min_pchembl is not None
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
94
            and data.req_as("pchembl_value", float) < self.min_pchembl
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
95
        ):
96
            return False
97
        if data.get("data_validity_comment") is not None:
98
            logger.warning(
0 ignored issues
show
introduced by
Use lazy % formatting in logging functions
Loading history...
99
                f"Activity annotation for {lookup} has flag '{data.get('data_validity_comment')} (ok)"
0 ignored issues
show
Coding Style introduced by
This line is too long as per the coding-style (102/100).

This check looks for lines that are too long. You can specify the maximum line length.

Loading history...
100
            )
101
        # The `target_organism` doesn't always match the `assay_organism`
102
        # Ex: see assay CHEMBL823141 / document CHEMBL1135642 for homo sapiens in xenopus laevis
103
        # However, it's often something like yeast expressing a human / mouse / etc receptor
104
        # So there's no need to filter by it
105
        assay = self.api.assay.get(data.req_as("assay_chembl_id", str))
106
        if target.type.name.lower() not in {s.lower() for s in self.allowed_target_types}:
107
            logger.warning(f"Excluding {target} with type {target.type}")
0 ignored issues
show
introduced by
Use lazy % formatting in logging functions
Loading history...
108
            return False
109
        confidence_score = assay.get("confidence_score")
110
        if self.min_confidence_score is not None:
111
            if confidence_score is None or confidence_score < self.min_confidence_score:
112
                return False
113
        # Some of these are non-protein types
114
        # And if it's unknown, we don't know what to do with it
115
        return True
116
117
    def _extract(self, lookup: str, compound: ChemblCompound, data: NestedDotDict) -> NestedDotDict:
118
        # we know these exist from the query
119
        organism = data.req_as("target_organism", str)
120
        tax_id = data.req_as("target_tax_id", int)
121
        if len(self.taxa) == 0:
122
            tax_id, tax_name = tax_id, organism
0 ignored issues
show
introduced by
Assigning the same variable 'tax_id' to itself
Loading history...
123
        else:
124
            taxes = {tax.req(tax_id) for tax in self.taxa if tax.contains(tax_id)}
125
            tax = next(iter(taxes))
126
            if len(taxes) > 1:
127
                logger.warning(f"Multiple matches for taxon {tax_id}: {taxes}; using {tax}")
0 ignored issues
show
introduced by
Use lazy % formatting in logging functions
Loading history...
128
            if organism != tax.name:
129
                logger.warning(f"Target organism {organism} is not {tax.name}")
0 ignored issues
show
introduced by
Use lazy % formatting in logging functions
Loading history...
130
            tax_id = tax.id
131
            tax_name = tax.name
132
        return NestedDotDict(
133
            dict(
134
                record_id=data.req_as("activity_id", str),
135
                origin_inchikey=lookup,
136
                matched_inchikey=compound.inchikey,
137
                compound_id=compound.chid,
138
                compound_name=compound.name,
139
                taxon_id=tax_id,
140
                taxon_name=tax_name,
141
                pchembl=data.req_as("pchembl_value", float),
142
                std_type=data.req_as("standard_type", str),
143
                src_id=data.req_as("src_id", str),
144
                exact_target_id=data.req_as("target_chembl_id", str),
145
                tissue=data.get_as("tissue", str),
146
                cell_type=data.get_as("cell_type", str),
147
                subcellular_region=data.get("subcellular_region", str),
148
            )
149
        )
150
151
    assay_type: AssayType
152
    tissue: Optional[str]
153
    cell_type: Optional[str]
154
    subcellular_region: Optional[str]
155
156
157
__all__ = ["_ActivitySearch", "AssayType", "_ActivityHit"]
158