mandos.search.chembl._activity_search._ActivitySearch.should_include() - Code Metrics - Inspection of "Bump flake8-bugbear from 20.11.1 to 21.3.2" - dmyersturnbull/mandos - Measure and Improve Code Quality continuously with Scrutinizer

Passed

Push — dependabot/pip/flake8-bugbear-... ( 82a4d5...16d864 )

unknown

created 2021-03-24 04:54 UTC

_ActivitySearch.should_include() F

↳ Parent: mandos.search.chembl._activity_search

Complexity

Conditions

Size

Total Lines	35
Code Lines	25

Duplication

Lines	0
Ratio	0 %

Importance

Changes

Metric	Value
cc	16
eloc	25
nop	5
dl	0
loc	35
rs	2.4
c	0
b	0
f	0

How to fix Complexity

import logging

import abc
from dataclasses import dataclass
from typing import Sequence, Set, Optional

from pocketutils.core.dot_dict import NestedDotDict


from mandos.model.chembl_api import ChemblApi
from mandos.model.chembl_support import ChemblCompound, AssayType
from mandos.model.chembl_support.chembl_target_graphs import ChemblTargetGraph
from mandos.model.taxonomy import Taxonomy
from mandos.search.chembl._protein_search import ProteinHit, ProteinSearch, H

logger = logging.getLogger("mandos")


@dataclass(frozen=True, order=True, repr=True)
class _ActivityHit(ProteinHit):
    taxon_id: int
    taxon_name: str
    src_id: str


class _ActivitySearch(ProteinSearch[H], metaclass=abc.ABCMeta):
    """
    Search for ``activity``.
    """

    def __init__(

        self,

        key: str,

        api: ChemblApi,

        taxa: Sequence[Taxonomy],

        traversal_strategy: str,

        allowed_target_types: Set[str],

        min_confidence_score: Optional[int],

        allowed_relations: Set[str],

        min_pchembl: Optional[float],

        banned_flags: Set[str],

    ):
        super().__init__(key, api, taxa, traversal_strategy, allowed_target_types)
        self.min_confidence_score = min_confidence_score
        self.allowed_relations = allowed_relations
        self.min_pchembl = min_pchembl
        self.banned_flags = banned_flags

    @classmethod
    def allowed_assay_types(cls) -> Set[str]:

        raise NotImplementedError()

    def query(self, parent_form: ChemblCompound) -> Sequence[NestedDotDict]:


        filters = dict(
            parent_molecule_chembl_id=parent_form.chid,
            assay_type__iregex=self._set_to_regex(self.allowed_assay_types()),
            standard_relation__iregex=self._set_to_regex(self.allowed_relations),
            pchembl_value__isnull=None if self.min_pchembl is None else False,
            target_organism__isnull=None if len(self.taxa) == 0 else False,
        )
        # I'd rather not figure out how the API interprets None, so remove them
        filters = {k: v for k, v in filters.items() if v is not None}
        return list(self.api.activity.filter(**filters))

    def should_include(

        self, lookup: str, compound: ChemblCompound, data: NestedDotDict, target: ChemblTargetGraph

    ) -> bool:
        if (
            (

                data.get_as("data_validity_comment", lambda s: s.lower())
                in {s.lower() for s in self.banned_flags}
            )
            or (data.req_as("standard_relation", str) not in self.allowed_relations)

            or (data.req_as("assay_type", str) not in self.allowed_assay_types())

            or (len(self.taxa) > 0 and not self.is_in_taxa(data.get_as("target_tax_id", int)))

            or (self.min_pchembl is not None and data.get("pchembl_value") is None)

            or self.min_pchembl is not None

            and data.req_as("pchembl_value", float) < self.min_pchembl

        ):
            return False
        if data.get("data_validity_comment") is not None:
            logger.warning(

                f"Activity annotation for {lookup} has flag '{data.get('data_validity_comment')} (ok)"

            )
        # The `target_organism` doesn't always match the `assay_organism`
        # Ex: see assay CHEMBL823141 / document CHEMBL1135642 for homo sapiens in xenopus laevis
        # However, it's often something like yeast expressing a human / mouse / etc receptor
        # So there's no need to filter by it
        assay = self.api.assay.get(data.req_as("assay_chembl_id", str))
        if target.type.name.lower() not in {s.lower() for s in self.allowed_target_types}:
            logger.warning(f"Excluding {target} with type {target.type}")

            return False
        confidence_score = assay.get("confidence_score")
        if self.min_confidence_score is not None:
            if confidence_score is None or confidence_score < self.min_confidence_score:
                return False
        # Some of these are non-protein types
        # And if it's unknown, we don't know what to do with it
        return True

    def _extract(self, lookup: str, compound: ChemblCompound, data: NestedDotDict) -> NestedDotDict:
        # we know these exist from the query
        organism = data.req_as("target_organism", str)
        tax_id = data.req_as("target_tax_id", int)
        if len(self.taxa) == 0:
            tax_id, tax_name = tax_id, organism

        else:
            taxes = {tax.req(tax_id) for tax in self.taxa if tax.contains(tax_id)}
            tax = next(iter(taxes))
            if len(taxes) > 1:
                logger.warning(f"Multiple matches for taxon {tax_id}: {taxes}; using {tax}")

            if organism != tax.name:
                logger.warning(f"Target organism {organism} is not {tax.name}")

            tax_id = tax.id
            tax_name = tax.name
        return NestedDotDict(
            dict(
                record_id=data.req_as("activity_id", str),
                origin_inchikey=lookup,
                matched_inchikey=compound.inchikey,
                compound_id=compound.chid,
                compound_name=compound.name,
                taxon_id=tax_id,
                taxon_name=tax_name,
                pchembl=data.req_as("pchembl_value", float),
                std_type=data.req_as("standard_type", str),
                src_id=data.req_as("src_id", str),
                exact_target_id=data.req_as("target_chembl_id", str),
                tissue=data.get_as("tissue", str),
                cell_type=data.get_as("cell_type", str),
                subcellular_region=data.get("subcellular_region", str),
            )
        )

    assay_type: AssayType
    tissue: Optional[str]
    cell_type: Optional[str]
    subcellular_region: Optional[str]


__all__ = ["_ActivitySearch", "_ActivityHit"]


1			import logging
			0 ignored issues – show introduced 2021-03-21 02:08 UTC by Report Bug Copy Issue Report Missing module docstring Loading history...
2			import abc
3			from dataclasses import dataclass
4			from typing import Sequence, Set, Optional
5
6			from pocketutils.core.dot_dict import NestedDotDict
			0 ignored issues – show introduced 2021-01-25 23:06 UTC by Report Bug Copy Issue Report Unable to import 'pocketutils.core.dot_dict' Loading history...
7
8			from mandos.model.chembl_api import ChemblApi
9			from mandos.model.chembl_support import ChemblCompound, AssayType
10			from mandos.model.chembl_support.chembl_target_graphs import ChemblTargetGraph
11			from mandos.model.taxonomy import Taxonomy
12			from mandos.search.chembl._protein_search import ProteinHit, ProteinSearch, H
13
14			logger = logging.getLogger("mandos")
15
16
17			@dataclass(frozen=True, order=True, repr=True)
18			class _ActivityHit(ProteinHit):
19			taxon_id: int
20			taxon_name: str
21			src_id: str
22
23
24			class _ActivitySearch(ProteinSearch[H], metaclass=abc.ABCMeta):
25			"""
26			Search for ``activity``.
27			"""
28
29			def __init__(
			0 ignored issues – show best-practice introduced 2021-03-21 02:08 UTC by Report Bug Copy Issue Report Too many arguments (10/5) Loading history...
30			self,
			0 ignored issues – show Coding Style introduced 2021-03-21 02:08 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
31			key: str,
			0 ignored issues – show Coding Style introduced 2021-03-21 02:08 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
32			api: ChemblApi,
			0 ignored issues – show Coding Style introduced 2021-03-21 02:08 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
33			taxa: Sequence[Taxonomy],
			0 ignored issues – show Coding Style introduced 2021-03-21 02:08 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
34			traversal_strategy: str,
			0 ignored issues – show Coding Style introduced 2021-03-21 02:08 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
35			allowed_target_types: Set[str],
			0 ignored issues – show Coding Style introduced 2021-03-21 02:08 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
36			min_confidence_score: Optional[int],
			0 ignored issues – show Coding Style introduced 2021-03-10 02:41 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
37			allowed_relations: Set[str],
			0 ignored issues – show Coding Style introduced 2021-03-10 02:41 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
38			min_pchembl: Optional[float],
			0 ignored issues – show Coding Style introduced 2021-03-10 02:41 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
39			banned_flags: Set[str],
			0 ignored issues – show Coding Style introduced 2021-03-10 02:41 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
40			):
41			super().__init__(key, api, taxa, traversal_strategy, allowed_target_types)
42			self.min_confidence_score = min_confidence_score
43			self.allowed_relations = allowed_relations
44			self.min_pchembl = min_pchembl
45			self.banned_flags = banned_flags
46
47			@classmethod
48			def allowed_assay_types(cls) -> Set[str]:
			0 ignored issues – show introduced 2021-03-21 02:08 UTC by Report Bug Copy Issue Report Missing function or method docstring Loading history...
49			raise NotImplementedError()
50
51			def query(self, parent_form: ChemblCompound) -> Sequence[NestedDotDict]:
			0 ignored issues – show introduced 2021-03-21 02:08 UTC by Report Bug Copy Issue Report Missing function or method docstring Loading history...
52
53			filters = dict(
54			parent_molecule_chembl_id=parent_form.chid,
55			assay_type__iregex=self._set_to_regex(self.allowed_assay_types()),
56			standard_relation__iregex=self._set_to_regex(self.allowed_relations),
57			pchembl_value__isnull=None if self.min_pchembl is None else False,
58			target_organism__isnull=None if len(self.taxa) == 0 else False,
59			)
60			# I'd rather not figure out how the API interprets None, so remove them
61			filters = {k: v for k, v in filters.items() if v is not None}
62			return list(self.api.activity.filter(**filters))
63
64			def should_include(
			0 ignored issues – show introduced 2021-03-10 02:41 UTC by Report Bug Copy Issue Report Missing function or method docstring Loading history...
65			self, lookup: str, compound: ChemblCompound, data: NestedDotDict, target: ChemblTargetGraph
			0 ignored issues – show Unused Code introduced 2021-01-25 23:06 UTC by Report Bug Copy Issue Report The argument `compound` seems to be unused. Loading history... Coding Style introduced 2021-03-10 02:41 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
66			) -> bool:
67			if (
68			(
			0 ignored issues – show best-practice introduced 2021-03-21 02:08 UTC by Report Bug Copy Issue Report Too many boolean expressions in if statement (9/5) Loading history... Coding Style introduced 2021-01-25 23:06 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
69			data.get_as("data_validity_comment", lambda s: s.lower())
70			in {s.lower() for s in self.banned_flags}
71			)
72			or (data.req_as("standard_relation", str) not in self.allowed_relations)
			0 ignored issues – show Coding Style introduced 2021-03-10 02:41 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
73			or (data.req_as("assay_type", str) not in self.allowed_assay_types())
			0 ignored issues – show Coding Style introduced 2021-03-21 02:08 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
74			or (len(self.taxa) > 0 and not self.is_in_taxa(data.get_as("target_tax_id", int)))
			0 ignored issues – show Coding Style introduced 2021-03-21 02:08 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
75			or (self.min_pchembl is not None and data.get("pchembl_value") is None)
			0 ignored issues – show Coding Style introduced 2021-03-21 02:08 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
76			or self.min_pchembl is not None
			0 ignored issues – show Coding Style introduced 2021-03-21 02:08 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
77			and data.req_as("pchembl_value", float) < self.min_pchembl
			0 ignored issues – show Coding Style introduced 2021-03-10 02:41 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
78			):
79			return False
80			if data.get("data_validity_comment") is not None:
81			logger.warning(
			0 ignored issues – show introduced 2021-01-25 23:06 UTC by Report Bug Copy Issue Report Use lazy % formatting in logging functions Loading history...
82			f"Activity annotation for {lookup} has flag '{data.get('data_validity_comment')} (ok)"
			0 ignored issues – show Coding Style introduced 2021-01-25 23:06 UTC by Report Bug Copy Issue Report This line is too long as per the coding-style (102/100). This check looks for lines that are too long. You can specify the maximum line length. Loading history...
83			)
84			# The `target_organism` doesn't always match the `assay_organism`
85			# Ex: see assay CHEMBL823141 / document CHEMBL1135642 for homo sapiens in xenopus laevis
86			# However, it's often something like yeast expressing a human / mouse / etc receptor
87			# So there's no need to filter by it
88			assay = self.api.assay.get(data.req_as("assay_chembl_id", str))
89			if target.type.name.lower() not in {s.lower() for s in self.allowed_target_types}:
90			logger.warning(f"Excluding {target} with type {target.type}")
			0 ignored issues – show introduced 2021-01-25 23:06 UTC by Report Bug Copy Issue Report Use lazy % formatting in logging functions Loading history...
91			return False
92			confidence_score = assay.get("confidence_score")
93			if self.min_confidence_score is not None:
94			if confidence_score is None or confidence_score < self.min_confidence_score:
95			return False
96			# Some of these are non-protein types
97			# And if it's unknown, we don't know what to do with it
98			return True
99
100			def _extract(self, lookup: str, compound: ChemblCompound, data: NestedDotDict) -> NestedDotDict:
101			# we know these exist from the query
102			organism = data.req_as("target_organism", str)
103			tax_id = data.req_as("target_tax_id", int)
104			if len(self.taxa) == 0:
105			tax_id, tax_name = tax_id, organism
			0 ignored issues – show introduced 2021-03-21 02:08 UTC by Report Bug Copy Issue Report Assigning the same variable 'tax_id' to itself Loading history...
106			else:
107			taxes = {tax.req(tax_id) for tax in self.taxa if tax.contains(tax_id)}
108			tax = next(iter(taxes))
109			if len(taxes) > 1:
110			logger.warning(f"Multiple matches for taxon {tax_id}: {taxes}; using {tax}")
			0 ignored issues – show introduced 2021-03-21 02:08 UTC by Report Bug Copy Issue Report Use lazy % formatting in logging functions Loading history...
111			if organism != tax.name:
112			logger.warning(f"Target organism {organism} is not {tax.name}")
			0 ignored issues – show introduced 2021-01-25 23:06 UTC by Report Bug Copy Issue Report Use lazy % formatting in logging functions Loading history...
113			tax_id = tax.id
114			tax_name = tax.name
115			return NestedDotDict(
116			dict(
117			record_id=data.req_as("activity_id", str),
118			origin_inchikey=lookup,
119			matched_inchikey=compound.inchikey,
120			compound_id=compound.chid,
121			compound_name=compound.name,
122			taxon_id=tax_id,
123			taxon_name=tax_name,
124			pchembl=data.req_as("pchembl_value", float),
125			std_type=data.req_as("standard_type", str),
126			src_id=data.req_as("src_id", str),
127			exact_target_id=data.req_as("target_chembl_id", str),
128			tissue=data.get_as("tissue", str),
129			cell_type=data.get_as("cell_type", str),
130			subcellular_region=data.get("subcellular_region", str),
131			)
132			)
133
134			assay_type: AssayType
135			tissue: Optional[str]
136			cell_type: Optional[str]
137			subcellular_region: Optional[str]
138
139
140			__all__ = ["_ActivitySearch", "_ActivityHit"]
141

dmyersturnbull / mandos

Push — dependabot/pip/flake8-bugbear-... ( 82a4d5...16d864 )

_ActivitySearch.should_include() F

Complexity

Size

Duplication

Importance

How to fix Complexity

Complexity

Duplication Side-by-Side

Filter issues like