Passed
Push — main ( 9813db...5006f2 )
by Douglas
01:43
created

TargetPredictionSearch.process()   B

Complexity

Conditions 5

Size

Total Lines 44
Code Lines 40

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
cc 5
eloc 40
nop 4
dl 0
loc 44
rs 8.4533
c 0
b 0
f 0
1
from __future__ import annotations
0 ignored issues
show
introduced by
Missing module docstring
Loading history...
2
3
from typing import Sequence, Optional, Tuple, Set
4
5
import numpy as np
0 ignored issues
show
introduced by
Unable to import 'numpy'
Loading history...
6
7
from mandos.model.apis.chembl_support.chembl_target_graphs import (
8
    ChemblTargetGraphFactory,
9
    ChemblTargetGraph,
10
)
11
from mandos.model.apis.chembl_support.chembl_targets import TargetFactory
12
from pocketutils.tools.string_tools import StringTools
0 ignored issues
show
introduced by
Unable to import 'pocketutils.tools.string_tools'
Loading history...
Unused Code introduced by
Unused StringTools imported from pocketutils.tools.string_tools
Loading history...
13
14
from mandos import logger
0 ignored issues
show
introduced by
Imports from package mandos are not grouped
Loading history...
15
from mandos.model.taxonomy import Taxonomy, Taxon
16
from typeddfs import TypedDf
0 ignored issues
show
introduced by
Unable to import 'typeddfs'
Loading history...
17
18
from mandos.model.apis.chembl_api import ChemblApi
0 ignored issues
show
introduced by
Imports from package mandos are not grouped
Loading history...
19
from mandos.model.apis.chembl_scrape_api import (
20
    ChemblScrapePage,
21
    ChemblScrapeApi,
22
    ChemblTargetPredictionTable,
23
)
24
from mandos.model.apis.chembl_support import ChemblCompound
25
from mandos.model.apis.chembl_support.chembl_utils import ChemblUtils
26
from mandos.search.chembl import ChemblScrapeSearch
27
from mandos.model.concrete_hits import ChemblTargetPredictionHit
28
from mandos.search.chembl.target_traversal import TargetTraversalStrategies
29
30
P = ChemblScrapePage.target_predictions
31
T = ChemblTargetPredictionTable
32
33
34
class TargetPredictionSearch(ChemblScrapeSearch[ChemblTargetPredictionHit]):
0 ignored issues
show
Documentation introduced by
Empty class docstring
Loading history...
35
    """ """
36
37
    @classmethod
38
    def page(cls) -> ChemblScrapePage:
0 ignored issues
show
introduced by
Missing function or method docstring
Loading history...
39
        return ChemblScrapePage.target_predictions
40
41
    def __init__(
0 ignored issues
show
best-practice introduced by
Too many arguments (11/5)
Loading history...
42
        self,
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
43
        key: str,
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
44
        api: ChemblApi,
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
45
        scrape: ChemblScrapeApi,
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
46
        taxa: Sequence[Taxonomy],
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
47
        traversal: str,
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
48
        target_types: Set[str],
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
49
        required_level: int = 70,
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
50
        min_threshold: float = 1.0,
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
51
        binding_score: float = 1.0,
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
52
        nonbinding_score: float = 1.0,
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
53
    ):
54
        super().__init__(key, api, scrape)
55
        self.taxa = taxa
56
        self.traversal_strategy = TargetTraversalStrategies.by_name(traversal, self.api)
57
        self.target_types = target_types
58
        if required_level not in [70, 80, 90]:
59
            raise ValueError(f"required_level must be 70, 80, or 90, not {required_level}")
60
        if min_threshold <= 0:
61
            raise ValueError(f"min_threshold must be positive, not {min_threshold}")
62
        if binding_score <= 0:
63
            raise ValueError(f"binding_score must be positive, not {binding_score}")
64
        if nonbinding_score <= 0:
65
            raise ValueError(f"nonbinding_score must be positive, not {nonbinding_score}")
66
        self.required_level = required_level
67
        self.min_threshold = min_threshold
68
        self.binding_score = binding_score
69
        self.nonbinding_score = nonbinding_score
70
71
    @property
72
    def data_source(self) -> str:
0 ignored issues
show
introduced by
Missing function or method docstring
Loading history...
73
        return "ChEMBL :: target predictions"
74
75
    def find(self, lookup: str) -> Sequence[ChemblTargetPredictionHit]:
0 ignored issues
show
introduced by
Missing function or method docstring
Loading history...
76
        ch = ChemblUtils(self.api).get_compound_dot_dict(lookup)
0 ignored issues
show
Coding Style Naming introduced by
Variable name "ch" doesn't conform to snake_case naming style ('([^\\W\\dA-Z][^\\WA-Z]2,|_[^\\WA-Z]*|__[^\\WA-Z\\d_][^\\WA-Z]+__)$' pattern)

This check looks for invalid names for a range of different identifiers.

You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements.

If your project includes a Pylint configuration file, the settings contained in that file take precedence.

To find out more about Pylint, please refer to their site.

Loading history...
77
        compound = ChemblUtils(self.api).compound_dot_dict_to_obj(ch)
78
        table: TypedDf = self.scrape.fetch_predictions(compound.chid)
79
        hits = []
80
        for row in table.itertuples():
81
            hits.extend(self.process(lookup, compound, row))
82
        return hits
83
84
    def process(
0 ignored issues
show
introduced by
Missing function or method docstring
Loading history...
Comprehensibility introduced by
This function exceeds the maximum number of variables (19/15).
Loading history...
85
        self, lookup: str, compound: ChemblCompound, row
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
86
    ) -> Sequence[ChemblTargetPredictionHit]:
87
        tax_id, tax_name = self._get_taxon(row.target_organism)
88
        if tax_id is tax_name is None:
89
            return []
90
        thresh = row.activity_threshold
91
        if row.activity_threshold < self.min_threshold:
92
            return []
93
        factory = TargetFactory(self.api)
94
        target_obj = factory.find(row.target_chembl_id)
95
        graph_factory = ChemblTargetGraphFactory.create(self.api, factory)
96
        graph = graph_factory.at_target(target_obj)
97
        ancestors: Sequence[ChemblTargetGraph] = self.traversal_strategy(graph)
98
        lst = []
99
        for ancestor in ancestors:
100
            for conf_t, conf_v in zip(
101
                [70, 80, 90], [row.confidence_70, row.confidence_80, row.confidence_90]
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
102
            ):
103
                predicate = f"binding:{conf_v.yes_no_mixed}"
104
                weight = (
105
                    np.sqrt(thresh)
106
                    * abs(conf_t / (100 - conf_t) * conf_v.score)
107
                    / 4
108
                    / np.sqrt(self.min_threshold)
109
                )
110
                hit = self._create_hit(
111
                    c_origin=lookup,
112
                    c_matched=compound.inchikey,
113
                    c_id=compound.chid,
114
                    c_name=compound.name,
115
                    predicate=predicate,
116
                    object_id=ancestor.chembl,
117
                    object_name=ancestor.name,
118
                    data_source=self.data_source,
119
                    exact_target_id=row.target_chembl_id,
120
                    exact_target_name=row.target_pref_name,
121
                    weight=weight,
122
                    prediction=conf_v,
123
                    confidence_set=conf_t,
124
                    threshold=thresh,
125
                )
126
                lst.append(hit)
127
        return lst
128
129
    def _get_taxon(self, organism: str) -> Tuple[Optional[int], Optional[str]]:
130
        if len(self.taxa) == 0:  # allow all
131
            return None, organism
132
        matches = {}
133
        for tax in self.taxa:
134
            matches += tax.get_by_id_or_name(organism)
135
        if len(matches) == 0:
136
            logger.debug(f"Taxon {organism} not in set. Excluding.")
137
            return None, None
138
        best: Taxon = next(iter(matches))
139
        if best.scientific_name != organism and best.mnemonic != organism:
0 ignored issues
show
Unused Code introduced by
Consider merging these comparisons with "in" to 'organism not in (best.scientific_name, best.mnemonic)'
Loading history...
140
            logger.warning(f"Organism {organism} matched to {best.scientific_name} by common name")
141
        if len(matches) > 1:
142
            logger.warning(
143
                f"Multiple matches for taxon {organism}: {matches}; using {best.scientific_name}"
144
            )
145
        return best.id, organism
146
147
148
__all__ = ["TargetPredictionSearch"]
149