mandos.search.chembl.activity_search.ActivitySearch._extract() - Code Metrics - dmyersturnbull/mandos - Measure and Improve Code Quality continuously with Scrutinizer

ActivitySearch._extract() A
last analyzed 2021-01-25 23:07 UTC

↳ Parent: mandos.search.chembl.activity_search

Complexity

Conditions

Size

Total Lines	20
Code Lines	19

Duplication

Lines	0
Ratio	0 %

Importance

Changes

Metric	Value
cc	2
eloc	19
nop	4
dl	0
loc	20
rs	9.45
c	0
b	0
f	0

import logging

from dataclasses import dataclass
from typing import Sequence
import re

from pocketutils.core.dot_dict import NestedDotDict


from mandos.model import ChemblCompound
from mandos.model.targets import Target
from mandos.search.chembl.protein_search import ProteinHit, ProteinSearch
from mandos.search.chembl.target_traversal_strategy import (
    TargetTraversalStrategy,
    TargetTraversalStrategies,
)

logger = logging.getLogger("mandos")


@dataclass(frozen=True, order=True, repr=True)
class ActivityHit(ProteinHit):
    """
    An "activity" hit for a compound.
    """

    taxon_id: int
    taxon_name: str
    pchembl: float
    std_type: str
    src_id: str
    exact_target_id: str

    @property
    def predicate(self) -> str:
        return "activity"


class ActivitySearch(ProteinSearch[ActivityHit]):
    """
    Search for ``activity``.
    """

    @property
    def default_traversal_strategy(self) -> TargetTraversalStrategy:

        return TargetTraversalStrategies.strategy0(self.api)

    def query(self, parent_form: ChemblCompound) -> Sequence[NestedDotDict]:

        def set_to_regex(values) -> str:
            return "(" + "|".join([f"(?:{re.escape(v)})" for v in values]) + ")"

        filters = dict(
            parent_molecule_chembl_id=parent_form.chid,
            assay_type__iregex=set_to_regex(self.config.allowed_assay_types),
            standard_relation__iregex=set_to_regex(self.config.allowed_relations),
            pchembl_value__isnull=False if self.config.require_pchembl else None,
            target_organism__isnull=False if self.config.require_taxon else None,
        )
        # I'd rather not figure out how the API interprets None, so remove them
        filters = {k: v for k, v in filters.items() if v is not None}
        return list(self.api.activity.filter(**filters))

    def should_include(

        self, lookup: str, compound: ChemblCompound, data: NestedDotDict, target: Target

    ) -> bool:
        if (
            data.get_as("data_validity_comment", lambda s: s.lower())

            in {s.lower() for s in self.config.banned_flags}

            or data.req_as("standard_relation", str) not in self.config.allowed_relations

            or data.req_as("assay_type", str) not in self.config.allowed_assay_types

            or data.get("target_tax_id") is None

            and self.config.require_taxon

            or data.get_as("target_tax_id", int) not in self.tax

            and self.config.require_taxon

            or data.get("pchembl_value") is None

            and self.config.require_pchembl

            or data.req_as("pchembl_value", float) < self.config.min_pchembl

            and self.config.require_pchembl

        ):
            return False
        if data.get("data_validity_comment") is not None:
            logger.warning(

                f"Activity annotation for {lookup} has flag '{data.get('data_validity_comment')} (ok)"

            )
        # The `target_organism` doesn't always match the `assay_organism`
        # Ex: see assay CHEMBL823141 / document CHEMBL1135642 for homo sapiens in xenopus laevis
        # However, it's often something like yeast expressing a human / mouse / etc receptor
        # So there's no need to filter by it
        assay = self.api.assay.get(data.req_as("assay_chembl_id", str))
        confidence_score = assay.get("confidence_score")
        if target.type.name.lower() not in {s.lower() for s in self.config.allowed_target_types}:
            logger.warning(f"Excluding {target} with type {target.type}")

            return False
        if self.config.require_confidence_score:
            if confidence_score is None or confidence_score < self.config.min_confidence_score:
                return False
            # Even if we supposedly allow the target type, it doesn't make sense for some confidence scores

            # Some of these are non-protein types]
            # And if it's unknown, we don't know what to do with it
            if (
                target.type.is_unknown

                or target.type.is_strange

                and self.config.min_confidence_score > 3

            ):
                logger.warning(f"Excluding {target} with type {target.type}")

                return False
        return True

    def to_hit(

        self, lookup: str, compound: ChemblCompound, data: NestedDotDict, target: Target

    ) -> Sequence[ActivityHit]:
        # these must match the constructor of the Hit,
        # EXCEPT for object_id and object_name, which come from traversal
        x = self._extract(lookup, compound, data)

        return [ActivityHit(**x, object_id=target.chembl, object_name=target.name)]

    def _extract(self, lookup: str, compound: ChemblCompound, data: NestedDotDict) -> NestedDotDict:
        # we know these exist from the query
        organism = data.req_as("target_organism", str)
        tax_id = data.req_as("target_tax_id", int)
        tax = self.tax.req(tax_id)
        if organism != tax.name:
            logger.warning(f"Target organism {organism} is not {tax.name}")

        return NestedDotDict(
            dict(
                record_id=data.req_as("activity_id", str),
                compound_id=compound.chid,
                inchikey=compound.inchikey,
                compound_name=compound.name,
                compound_lookup=lookup,
                taxon_id=tax.id,
                taxon_name=tax.name,
                pchembl=data.req_as("pchembl_value", float),
                std_type=data.req_as("standard_type", str),
                src_id=data.req_as("src_id", str),
                exact_target_id=data.req_as("target_chembl_id", str),
            )
        )


__all__ = ["ActivityHit", "ActivitySearch"]


1			import logging
			0 ignored issues – show introduced 2021-01-25 23:06 UTC by Report Bug Copy Issue Report Missing module docstring Loading history...
2			from dataclasses import dataclass
3			from typing import Sequence
4			import re
5
6			from pocketutils.core.dot_dict import NestedDotDict
			0 ignored issues – show introduced 2021-01-25 23:06 UTC by Report Bug Copy Issue Report Unable to import 'pocketutils.core.dot_dict' Loading history...
7
8			from mandos.model import ChemblCompound
9			from mandos.model.targets import Target
10			from mandos.search.chembl.protein_search import ProteinHit, ProteinSearch
11			from mandos.search.chembl.target_traversal_strategy import (
12			TargetTraversalStrategy,
13			TargetTraversalStrategies,
14			)
15
16			logger = logging.getLogger("mandos")
17
18
19			@dataclass(frozen=True, order=True, repr=True)
20			class ActivityHit(ProteinHit):
21			"""
22			An "activity" hit for a compound.
23			"""
24
25			taxon_id: int
26			taxon_name: str
27			pchembl: float
28			std_type: str
29			src_id: str
30			exact_target_id: str
31
32			@property
33			def predicate(self) -> str:
34			return "activity"
35
36
37			class ActivitySearch(ProteinSearch[ActivityHit]):
38			"""
39			Search for ``activity``.
40			"""
41
42			@property
43			def default_traversal_strategy(self) -> TargetTraversalStrategy:
			0 ignored issues – show introduced 2021-01-25 23:06 UTC by Report Bug Copy Issue Report Missing function or method docstring Loading history...
44			return TargetTraversalStrategies.strategy0(self.api)
45
46			def query(self, parent_form: ChemblCompound) -> Sequence[NestedDotDict]:
			0 ignored issues – show introduced 2021-01-25 23:06 UTC by Report Bug Copy Issue Report Missing function or method docstring Loading history...
47			def set_to_regex(values) -> str:
48			return "(" + "\|".join([f"(?:{re.escape(v)})" for v in values]) + ")"
49
50			filters = dict(
51			parent_molecule_chembl_id=parent_form.chid,
52			assay_type__iregex=set_to_regex(self.config.allowed_assay_types),
53			standard_relation__iregex=set_to_regex(self.config.allowed_relations),
54			pchembl_value__isnull=False if self.config.require_pchembl else None,
55			target_organism__isnull=False if self.config.require_taxon else None,
56			)
57			# I'd rather not figure out how the API interprets None, so remove them
58			filters = {k: v for k, v in filters.items() if v is not None}
59			return list(self.api.activity.filter(**filters))
60
61			def should_include(
			0 ignored issues – show introduced 2021-01-25 23:06 UTC by Report Bug Copy Issue Report Missing function or method docstring Loading history...
62			self, lookup: str, compound: ChemblCompound, data: NestedDotDict, target: Target
			0 ignored issues – show Coding Style introduced 2021-01-25 23:06 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history... Unused Code introduced 2021-01-25 23:06 UTC by Report Bug Copy Issue Report The argument `compound` seems to be unused. Loading history...
63			) -> bool:
64			if (
65			data.get_as("data_validity_comment", lambda s: s.lower())
			0 ignored issues – show Coding Style introduced 2021-01-25 23:06 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history... best-practice introduced 2021-01-25 23:06 UTC by Report Bug Copy Issue Report Too many boolean expressions in if statement (11/5) Loading history...
66			in {s.lower() for s in self.config.banned_flags}
			0 ignored issues – show Coding Style introduced 2021-01-25 23:06 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
67			or data.req_as("standard_relation", str) not in self.config.allowed_relations
			0 ignored issues – show Coding Style introduced 2021-01-25 23:06 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
68			or data.req_as("assay_type", str) not in self.config.allowed_assay_types
			0 ignored issues – show Coding Style introduced 2021-01-25 23:06 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
69			or data.get("target_tax_id") is None
			0 ignored issues – show Coding Style introduced 2021-01-25 23:06 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
70			and self.config.require_taxon
			0 ignored issues – show Coding Style introduced 2021-01-25 23:06 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
71			or data.get_as("target_tax_id", int) not in self.tax
			0 ignored issues – show Coding Style introduced 2021-01-25 23:06 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
72			and self.config.require_taxon
			0 ignored issues – show Coding Style introduced 2021-01-25 23:06 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
73			or data.get("pchembl_value") is None
			0 ignored issues – show Coding Style introduced 2021-01-25 23:06 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
74			and self.config.require_pchembl
			0 ignored issues – show Coding Style introduced 2021-01-25 23:06 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
75			or data.req_as("pchembl_value", float) < self.config.min_pchembl
			0 ignored issues – show Coding Style introduced 2021-01-25 23:06 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
76			and self.config.require_pchembl
			0 ignored issues – show Coding Style introduced 2021-01-25 23:06 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
77			):
78			return False
79			if data.get("data_validity_comment") is not None:
80			logger.warning(
			0 ignored issues – show introduced 2021-01-25 23:06 UTC by Report Bug Copy Issue Report Use lazy % formatting in logging functions Loading history...
81			f"Activity annotation for {lookup} has flag '{data.get('data_validity_comment')} (ok)"
			0 ignored issues – show Coding Style introduced 2021-01-25 23:06 UTC by Report Bug Copy Issue Report This line is too long as per the coding-style (102/100). This check looks for lines that are too long. You can specify the maximum line length. Loading history...
82			)
83			# The `target_organism` doesn't always match the `assay_organism`
84			# Ex: see assay CHEMBL823141 / document CHEMBL1135642 for homo sapiens in xenopus laevis
85			# However, it's often something like yeast expressing a human / mouse / etc receptor
86			# So there's no need to filter by it
87			assay = self.api.assay.get(data.req_as("assay_chembl_id", str))
88			confidence_score = assay.get("confidence_score")
89			if target.type.name.lower() not in {s.lower() for s in self.config.allowed_target_types}:
90			logger.warning(f"Excluding {target} with type {target.type}")
			0 ignored issues – show introduced 2021-01-25 23:06 UTC by Report Bug Copy Issue Report Use lazy % formatting in logging functions Loading history...
91			return False
92			if self.config.require_confidence_score:
93			if confidence_score is None or confidence_score < self.config.min_confidence_score:
94			return False
95			# Even if we supposedly allow the target type, it doesn't make sense for some confidence scores
			0 ignored issues – show Coding Style introduced 2021-01-25 23:06 UTC by Report Bug Copy Issue Report This line is too long as per the coding-style (107/100). This check looks for lines that are too long. You can specify the maximum line length. Loading history...
96			# Some of these are non-protein types]
97			# And if it's unknown, we don't know what to do with it
98			if (
99			target.type.is_unknown
			0 ignored issues – show Coding Style introduced 2021-01-25 23:06 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
100			or target.type.is_strange
			0 ignored issues – show Coding Style introduced 2021-01-25 23:06 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
101			and self.config.min_confidence_score > 3
			0 ignored issues – show Coding Style introduced 2021-01-25 23:06 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
102			):
103			logger.warning(f"Excluding {target} with type {target.type}")
			0 ignored issues – show introduced 2021-01-25 23:06 UTC by Report Bug Copy Issue Report Use lazy % formatting in logging functions Loading history...
104			return False
105			return True
106
107			def to_hit(
			0 ignored issues – show introduced 2021-01-25 23:06 UTC by Report Bug Copy Issue Report Missing function or method docstring Loading history...
108			self, lookup: str, compound: ChemblCompound, data: NestedDotDict, target: Target
			0 ignored issues – show Coding Style introduced 2021-01-25 23:06 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
109			) -> Sequence[ActivityHit]:
110			# these must match the constructor of the Hit,
111			# EXCEPT for object_id and object_name, which come from traversal
112			x = self._extract(lookup, compound, data)
			0 ignored issues – show Coding Style Naming introduced 2021-01-25 23:06 UTC by Report Bug Copy Issue Report Variable name "x" doesn't conform to snake_case naming style ('([^\\W\\dA-Z][^\\WA-Z]2,\|_[^\\WA-Z]*\|__[^\\WA-Z\\d_][^\\WA-Z]+__)$' pattern) This check looks for invalid names for a range of different identifiers. You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements. If your project includes a Pylint configuration file, the settings contained in that file take precedence. To find out more about Pylint, please refer to their site. Loading history...
113			return [ActivityHit(**x, object_id=target.chembl, object_name=target.name)]
114
115			def _extract(self, lookup: str, compound: ChemblCompound, data: NestedDotDict) -> NestedDotDict:
116			# we know these exist from the query
117			organism = data.req_as("target_organism", str)
118			tax_id = data.req_as("target_tax_id", int)
119			tax = self.tax.req(tax_id)
120			if organism != tax.name:
121			logger.warning(f"Target organism {organism} is not {tax.name}")
			0 ignored issues – show introduced 2021-01-25 23:06 UTC by Report Bug Copy Issue Report Use lazy % formatting in logging functions Loading history...
122			return NestedDotDict(
123			dict(
124			record_id=data.req_as("activity_id", str),
125			compound_id=compound.chid,
126			inchikey=compound.inchikey,
127			compound_name=compound.name,
128			compound_lookup=lookup,
129			taxon_id=tax.id,
130			taxon_name=tax.name,
131			pchembl=data.req_as("pchembl_value", float),
132			std_type=data.req_as("standard_type", str),
133			src_id=data.req_as("src_id", str),
134			exact_target_id=data.req_as("target_chembl_id", str),
135			)
136			)
137
138
139			__all__ = ["ActivityHit", "ActivitySearch"]
140

dmyersturnbull / mandos

ActivitySearch._extract() A last analyzed 2021-01-25 23:07 UTC

Complexity

Size

Duplication

Importance

Duplication Side-by-Side

Filter issues like

ActivitySearch._extract() A
last analyzed 2021-01-25 23:07 UTC