mandos.search.chembl.target_predictions - Code Metrics - Inspection of "feat: add prediction search; improve taxa" - dmyersturnbull/mandos - Measure and Improve Code Quality continuously with Scrutinizer

Passed

Push — main ( 9813db...5006f2 )

by Douglas

created 2021-08-07 00:13 UTC

mandos.search.chembl.target_predictions A

↳ Parent: Project

Complexity

Total Complexity

Size/Duplication

Total Lines	149
Duplicated Lines	0 %

Importance

Changes

Metric	Value
eloc	125
dl	0
loc	149
rs	10
c	0
b	0
f	0
wmc	21

6 Methods

Rating	Name	Size	Complexity
B	TargetPredictionSearch.process()	44	5
B	TargetPredictionSearch.__init__()	29	5
A	TargetPredictionSearch.find()	8	2
A	TargetPredictionSearch.data_source()	3	1
A	TargetPredictionSearch.page()	3	1
B	TargetPredictionSearch._get_taxon()	17	7

from __future__ import annotations


from typing import Sequence, Optional, Tuple, Set

import numpy as np


from mandos.model.apis.chembl_support.chembl_target_graphs import (
    ChemblTargetGraphFactory,
    ChemblTargetGraph,
)
from mandos.model.apis.chembl_support.chembl_targets import TargetFactory
from pocketutils.tools.string_tools import StringTools


from mandos import logger

from mandos.model.taxonomy import Taxonomy, Taxon
from typeddfs import TypedDf


from mandos.model.apis.chembl_api import ChemblApi

from mandos.model.apis.chembl_scrape_api import (
    ChemblScrapePage,
    ChemblScrapeApi,
    ChemblTargetPredictionTable,
)
from mandos.model.apis.chembl_support import ChemblCompound
from mandos.model.apis.chembl_support.chembl_utils import ChemblUtils
from mandos.search.chembl import ChemblScrapeSearch
from mandos.model.concrete_hits import ChemblTargetPredictionHit
from mandos.search.chembl.target_traversal import TargetTraversalStrategies

P = ChemblScrapePage.target_predictions
T = ChemblTargetPredictionTable


class TargetPredictionSearch(ChemblScrapeSearch[ChemblTargetPredictionHit]):

    """ """

    @classmethod
    def page(cls) -> ChemblScrapePage:

        return ChemblScrapePage.target_predictions

    def __init__(

        self,

        key: str,

        api: ChemblApi,

        scrape: ChemblScrapeApi,

        taxa: Sequence[Taxonomy],

        traversal: str,

        target_types: Set[str],

        required_level: int = 70,

        min_threshold: float = 1.0,

        binding_score: float = 1.0,

        nonbinding_score: float = 1.0,

    ):
        super().__init__(key, api, scrape)
        self.taxa = taxa
        self.traversal_strategy = TargetTraversalStrategies.by_name(traversal, self.api)
        self.target_types = target_types
        if required_level not in [70, 80, 90]:
            raise ValueError(f"required_level must be 70, 80, or 90, not {required_level}")
        if min_threshold <= 0:
            raise ValueError(f"min_threshold must be positive, not {min_threshold}")
        if binding_score <= 0:
            raise ValueError(f"binding_score must be positive, not {binding_score}")
        if nonbinding_score <= 0:
            raise ValueError(f"nonbinding_score must be positive, not {nonbinding_score}")
        self.required_level = required_level
        self.min_threshold = min_threshold
        self.binding_score = binding_score
        self.nonbinding_score = nonbinding_score

    @property
    def data_source(self) -> str:

        return "ChEMBL :: target predictions"

    def find(self, lookup: str) -> Sequence[ChemblTargetPredictionHit]:

        ch = ChemblUtils(self.api).get_compound_dot_dict(lookup)

        compound = ChemblUtils(self.api).compound_dot_dict_to_obj(ch)
        table: TypedDf = self.scrape.fetch_predictions(compound.chid)
        hits = []
        for row in table.itertuples():
            hits.extend(self.process(lookup, compound, row))
        return hits

    def process(

        self, lookup: str, compound: ChemblCompound, row

    ) -> Sequence[ChemblTargetPredictionHit]:
        tax_id, tax_name = self._get_taxon(row.target_organism)
        if tax_id is tax_name is None:
            return []
        thresh = row.activity_threshold
        if row.activity_threshold < self.min_threshold:
            return []
        factory = TargetFactory(self.api)
        target_obj = factory.find(row.target_chembl_id)
        graph_factory = ChemblTargetGraphFactory.create(self.api, factory)
        graph = graph_factory.at_target(target_obj)
        ancestors: Sequence[ChemblTargetGraph] = self.traversal_strategy(graph)
        lst = []
        for ancestor in ancestors:
            for conf_t, conf_v in zip(
                [70, 80, 90], [row.confidence_70, row.confidence_80, row.confidence_90]

            ):
                predicate = f"binding:{conf_v.yes_no_mixed}"
                weight = (
                    np.sqrt(thresh)
                    * abs(conf_t / (100 - conf_t) * conf_v.score)
                    / 4
                    / np.sqrt(self.min_threshold)
                )
                hit = self._create_hit(
                    c_origin=lookup,
                    c_matched=compound.inchikey,
                    c_id=compound.chid,
                    c_name=compound.name,
                    predicate=predicate,
                    object_id=ancestor.chembl,
                    object_name=ancestor.name,
                    data_source=self.data_source,
                    exact_target_id=row.target_chembl_id,
                    exact_target_name=row.target_pref_name,
                    weight=weight,
                    prediction=conf_v,
                    confidence_set=conf_t,
                    threshold=thresh,
                )
                lst.append(hit)
        return lst

    def _get_taxon(self, organism: str) -> Tuple[Optional[int], Optional[str]]:
        if len(self.taxa) == 0:  # allow all
            return None, organism
        matches = {}
        for tax in self.taxa:
            matches += tax.get_by_id_or_name(organism)
        if len(matches) == 0:
            logger.debug(f"Taxon {organism} not in set. Excluding.")
            return None, None
        best: Taxon = next(iter(matches))
        if best.scientific_name != organism and best.mnemonic != organism:

            logger.warning(f"Organism {organism} matched to {best.scientific_name} by common name")
        if len(matches) > 1:
            logger.warning(
                f"Multiple matches for taxon {organism}: {matches}; using {best.scientific_name}"
            )
        return best.id, organism


__all__ = ["TargetPredictionSearch"]


1			from __future__ import annotations
			0 ignored issues – show introduced 2021-08-07 00:15 UTC by Report Bug Copy Issue Report Missing module docstring Loading history...
2
3			from typing import Sequence, Optional, Tuple, Set
4
5			import numpy as np
			0 ignored issues – show introduced 2021-08-07 00:15 UTC by Report Bug Copy Issue Report Unable to import 'numpy' Loading history...
6
7			from mandos.model.apis.chembl_support.chembl_target_graphs import (
8			ChemblTargetGraphFactory,
9			ChemblTargetGraph,
10			)
11			from mandos.model.apis.chembl_support.chembl_targets import TargetFactory
12			from pocketutils.tools.string_tools import StringTools
			0 ignored issues – show introduced 2021-08-07 00:15 UTC by Report Bug Copy Issue Report Unable to import 'pocketutils.tools.string_tools' Loading history... Unused Code introduced 2021-08-07 00:15 UTC by Report Bug Copy Issue Report Unused StringTools imported from pocketutils.tools.string_tools Loading history...
13
14			from mandos import logger
			0 ignored issues – show introduced 2021-08-07 00:15 UTC by Report Bug Copy Issue Report Imports from package mandos are not grouped Loading history...
15			from mandos.model.taxonomy import Taxonomy, Taxon
16			from typeddfs import TypedDf
			0 ignored issues – show introduced 2021-08-07 00:15 UTC by Report Bug Copy Issue Report Unable to import 'typeddfs' Loading history...
17
18			from mandos.model.apis.chembl_api import ChemblApi
			0 ignored issues – show introduced 2021-08-07 00:15 UTC by Report Bug Copy Issue Report Imports from package mandos are not grouped Loading history...
19			from mandos.model.apis.chembl_scrape_api import (
20			ChemblScrapePage,
21			ChemblScrapeApi,
22			ChemblTargetPredictionTable,
23			)
24			from mandos.model.apis.chembl_support import ChemblCompound
25			from mandos.model.apis.chembl_support.chembl_utils import ChemblUtils
26			from mandos.search.chembl import ChemblScrapeSearch
27			from mandos.model.concrete_hits import ChemblTargetPredictionHit
28			from mandos.search.chembl.target_traversal import TargetTraversalStrategies
29
30			P = ChemblScrapePage.target_predictions
31			T = ChemblTargetPredictionTable
32
33
34			class TargetPredictionSearch(ChemblScrapeSearch[ChemblTargetPredictionHit]):
			0 ignored issues – show Documentation introduced 2021-08-07 00:15 UTC by Report Bug Copy Issue Report Empty class docstring Loading history...
35			""" """
36
37			@classmethod
38			def page(cls) -> ChemblScrapePage:
			0 ignored issues – show introduced 2021-08-07 00:15 UTC by Report Bug Copy Issue Report Missing function or method docstring Loading history...
39			return ChemblScrapePage.target_predictions
40
41			def __init__(
			0 ignored issues – show best-practice introduced 2021-08-07 00:15 UTC by Report Bug Copy Issue Report Too many arguments (11/5) Loading history...
42			self,
			0 ignored issues – show Coding Style introduced 2021-08-07 00:15 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
43			key: str,
			0 ignored issues – show Coding Style introduced 2021-08-07 00:15 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
44			api: ChemblApi,
			0 ignored issues – show Coding Style introduced 2021-08-07 00:15 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
45			scrape: ChemblScrapeApi,
			0 ignored issues – show Coding Style introduced 2021-08-07 00:15 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
46			taxa: Sequence[Taxonomy],
			0 ignored issues – show Coding Style introduced 2021-08-07 00:15 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
47			traversal: str,
			0 ignored issues – show Coding Style introduced 2021-08-07 00:15 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
48			target_types: Set[str],
			0 ignored issues – show Coding Style introduced 2021-08-07 00:15 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
49			required_level: int = 70,
			0 ignored issues – show Coding Style introduced 2021-08-07 00:15 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
50			min_threshold: float = 1.0,
			0 ignored issues – show Coding Style introduced 2021-08-07 00:15 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
51			binding_score: float = 1.0,
			0 ignored issues – show Coding Style introduced 2021-08-07 00:15 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
52			nonbinding_score: float = 1.0,
			0 ignored issues – show Coding Style introduced 2021-08-07 00:15 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
53			):
54			super().__init__(key, api, scrape)
55			self.taxa = taxa
56			self.traversal_strategy = TargetTraversalStrategies.by_name(traversal, self.api)
57			self.target_types = target_types
58			if required_level not in [70, 80, 90]:
59			raise ValueError(f"required_level must be 70, 80, or 90, not {required_level}")
60			if min_threshold <= 0:
61			raise ValueError(f"min_threshold must be positive, not {min_threshold}")
62			if binding_score <= 0:
63			raise ValueError(f"binding_score must be positive, not {binding_score}")
64			if nonbinding_score <= 0:
65			raise ValueError(f"nonbinding_score must be positive, not {nonbinding_score}")
66			self.required_level = required_level
67			self.min_threshold = min_threshold
68			self.binding_score = binding_score
69			self.nonbinding_score = nonbinding_score
70
71			@property
72			def data_source(self) -> str:
			0 ignored issues – show introduced 2021-08-07 00:15 UTC by Report Bug Copy Issue Report Missing function or method docstring Loading history...
73			return "ChEMBL :: target predictions"
74
75			def find(self, lookup: str) -> Sequence[ChemblTargetPredictionHit]:
			0 ignored issues – show introduced 2021-08-07 00:15 UTC by Report Bug Copy Issue Report Missing function or method docstring Loading history...
76			ch = ChemblUtils(self.api).get_compound_dot_dict(lookup)
			0 ignored issues – show Coding Style Naming introduced 2021-08-07 00:15 UTC by Report Bug Copy Issue Report Variable name "ch" doesn't conform to snake_case naming style ('([^\\W\\dA-Z][^\\WA-Z]2,\|_[^\\WA-Z]*\|__[^\\WA-Z\\d_][^\\WA-Z]+__)$' pattern) This check looks for invalid names for a range of different identifiers. You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements. If your project includes a Pylint configuration file, the settings contained in that file take precedence. To find out more about Pylint, please refer to their site. Loading history...
77			compound = ChemblUtils(self.api).compound_dot_dict_to_obj(ch)
78			table: TypedDf = self.scrape.fetch_predictions(compound.chid)
79			hits = []
80			for row in table.itertuples():
81			hits.extend(self.process(lookup, compound, row))
82			return hits
83
84			def process(
			0 ignored issues – show introduced 2021-08-07 00:15 UTC by Report Bug Copy Issue Report Missing function or method docstring Loading history... Comprehensibility introduced 2021-08-07 00:15 UTC by Report Bug Copy Issue Report This function exceeds the maximum number of variables (19/15). Loading history...
85			self, lookup: str, compound: ChemblCompound, row
			0 ignored issues – show Coding Style introduced 2021-08-07 00:15 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
86			) -> Sequence[ChemblTargetPredictionHit]:
87			tax_id, tax_name = self._get_taxon(row.target_organism)
88			if tax_id is tax_name is None:
89			return []
90			thresh = row.activity_threshold
91			if row.activity_threshold < self.min_threshold:
92			return []
93			factory = TargetFactory(self.api)
94			target_obj = factory.find(row.target_chembl_id)
95			graph_factory = ChemblTargetGraphFactory.create(self.api, factory)
96			graph = graph_factory.at_target(target_obj)
97			ancestors: Sequence[ChemblTargetGraph] = self.traversal_strategy(graph)
98			lst = []
99			for ancestor in ancestors:
100			for conf_t, conf_v in zip(
101			[70, 80, 90], [row.confidence_70, row.confidence_80, row.confidence_90]
			0 ignored issues – show Coding Style introduced 2021-08-07 00:15 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
102			):
103			predicate = f"binding:{conf_v.yes_no_mixed}"
104			weight = (
105			np.sqrt(thresh)
106			* abs(conf_t / (100 - conf_t) * conf_v.score)
107			/ 4
108			/ np.sqrt(self.min_threshold)
109			)
110			hit = self._create_hit(
111			c_origin=lookup,
112			c_matched=compound.inchikey,
113			c_id=compound.chid,
114			c_name=compound.name,
115			predicate=predicate,
116			object_id=ancestor.chembl,
117			object_name=ancestor.name,
118			data_source=self.data_source,
119			exact_target_id=row.target_chembl_id,
120			exact_target_name=row.target_pref_name,
121			weight=weight,
122			prediction=conf_v,
123			confidence_set=conf_t,
124			threshold=thresh,
125			)
126			lst.append(hit)
127			return lst
128
129			def _get_taxon(self, organism: str) -> Tuple[Optional[int], Optional[str]]:
130			if len(self.taxa) == 0: # allow all
131			return None, organism
132			matches = {}
133			for tax in self.taxa:
134			matches += tax.get_by_id_or_name(organism)
135			if len(matches) == 0:
136			logger.debug(f"Taxon {organism} not in set. Excluding.")
137			return None, None
138			best: Taxon = next(iter(matches))
139			if best.scientific_name != organism and best.mnemonic != organism:
			0 ignored issues – show Unused Code introduced 2021-08-07 00:15 UTC by Report Bug Copy Issue Report Consider merging these comparisons with "in" to 'organism not in (best.scientific_name, best.mnemonic)' Loading history...
140			logger.warning(f"Organism {organism} matched to {best.scientific_name} by common name")
141			if len(matches) > 1:
142			logger.warning(
143			f"Multiple matches for taxon {organism}: {matches}; using {best.scientific_name}"
144			)
145			return best.id, organism
146
147
148			__all__ = ["TargetPredictionSearch"]
149

dmyersturnbull / mandos

Push — main ( 9813db...5006f2 )

mandos.search.chembl.target_predictions A

Complexity

Size/Duplication

Importance

6 Methods

Duplication Side-by-Side

Filter issues like