Passed
Push — main ( 5006f2...cee75c )
by Douglas
04:00
created

TargetPredictionSearch.__init__()   B

Complexity

Conditions 5

Size

Total Lines 29
Code Lines 28

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
cc 5
eloc 28
nop 11
dl 0
loc 29
rs 8.7413
c 0
b 0
f 0

How to fix   Many Parameters   

Many Parameters

Methods with many parameters are not only hard to understand, but their parameters also often become inconsistent when you need more, or different data.

There are several approaches to avoid long parameter lists:

1
from __future__ import annotations
0 ignored issues
show
introduced by
Missing module docstring
Loading history...
2
3
from typing import Sequence, Optional, Tuple, Set
4
5
import numpy as np
0 ignored issues
show
introduced by
Unable to import 'numpy'
Loading history...
6
7
from mandos.model.apis.chembl_support.chembl_target_graphs import (
8
    ChemblTargetGraphFactory,
9
    ChemblTargetGraph,
10
)
11
from mandos.model.apis.chembl_support.chembl_targets import TargetFactory
12
from pocketutils.tools.string_tools import StringTools
0 ignored issues
show
Unused Code introduced by
Unused StringTools imported from pocketutils.tools.string_tools
Loading history...
introduced by
Unable to import 'pocketutils.tools.string_tools'
Loading history...
13
14
from mandos import logger
0 ignored issues
show
introduced by
Imports from package mandos are not grouped
Loading history...
15
from mandos.model.taxonomy import Taxonomy, Taxon
16
from typeddfs import TypedDf
0 ignored issues
show
introduced by
Unable to import 'typeddfs'
Loading history...
17
18
from mandos.model.apis.chembl_api import ChemblApi
0 ignored issues
show
introduced by
Imports from package mandos are not grouped
Loading history...
19
from mandos.model.apis.chembl_scrape_api import (
20
    ChemblScrapePage,
21
    ChemblScrapeApi,
22
    ChemblTargetPredictionTable,
23
)
24
from mandos.model.apis.chembl_support import ChemblCompound
25
from mandos.model.apis.chembl_support.chembl_utils import ChemblUtils
26
from mandos.search.chembl import ChemblScrapeSearch
27
from mandos.model.concrete_hits import ChemblTargetPredictionHit
28
from mandos.search.chembl.target_traversal import TargetTraversalStrategies
29
30
P = ChemblScrapePage.target_predictions
31
T = ChemblTargetPredictionTable
32
33
34
class TargetPredictionSearch(ChemblScrapeSearch[ChemblTargetPredictionHit]):
0 ignored issues
show
Documentation introduced by
Empty class docstring
Loading history...
35
    """ """
36
37
    @classmethod
38
    def page(cls) -> ChemblScrapePage:
0 ignored issues
show
introduced by
Missing function or method docstring
Loading history...
39
        return ChemblScrapePage.target_predictions
40
41
    def __init__(
0 ignored issues
show
best-practice introduced by
Too many arguments (11/5)
Loading history...
42
        self,
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
43
        key: str,
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
44
        api: ChemblApi,
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
45
        scrape: ChemblScrapeApi,
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
46
        taxa: Sequence[Taxonomy],
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
47
        traversal: str,
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
48
        target_types: Set[str],
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
49
        required_level: int = 70,
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
50
        min_threshold: float = 1.0,
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
51
        binding_score: float = 1.0,
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
52
        nonbinding_score: float = 1.0,
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
53
    ):
54
        super().__init__(key, api, scrape)
55
        self.taxa = taxa
56
        self.traversal_strategy = TargetTraversalStrategies.by_name(traversal, self.api)
57
        self.target_types = target_types
58
        if required_level not in [70, 80, 90]:
59
            raise ValueError(f"required_level must be 70, 80, or 90, not {required_level}")
60
        if min_threshold <= 0:
61
            raise ValueError(f"min_threshold must be positive, not {min_threshold}")
62
        if binding_score <= 0:
63
            raise ValueError(f"binding_score must be positive, not {binding_score}")
64
        if nonbinding_score <= 0:
65
            raise ValueError(f"nonbinding_score must be positive, not {nonbinding_score}")
66
        self.required_level = required_level
67
        self.min_threshold = min_threshold
68
        self.binding_score = binding_score
69
        self.nonbinding_score = nonbinding_score
70
71
    @property
72
    def data_source(self) -> str:
0 ignored issues
show
introduced by
Missing function or method docstring
Loading history...
73
        return "ChEMBL :: target predictions"
74
75
    def find(self, lookup: str) -> Sequence[ChemblTargetPredictionHit]:
0 ignored issues
show
introduced by
Missing function or method docstring
Loading history...
76
        ch = ChemblUtils(self.api).get_compound_dot_dict(lookup)
0 ignored issues
show
Coding Style Naming introduced by
Variable name "ch" doesn't conform to snake_case naming style ('([^\\W\\dA-Z][^\\WA-Z]2,|_[^\\WA-Z]*|__[^\\WA-Z\\d_][^\\WA-Z]+__)$' pattern)

This check looks for invalid names for a range of different identifiers.

You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements.

If your project includes a Pylint configuration file, the settings contained in that file take precedence.

To find out more about Pylint, please refer to their site.

Loading history...
77
        compound = ChemblUtils(self.api).compound_dot_dict_to_obj(ch)
78
        table: TypedDf = self.scrape.fetch_predictions(compound.chid)
79
        hits = []
80
        for row in table.itertuples():
81
            hits.extend(self.process(lookup, compound, row))
82
        return hits
83
84
    def process(
0 ignored issues
show
Comprehensibility introduced by
This function exceeds the maximum number of variables (19/15).
Loading history...
introduced by
Missing function or method docstring
Loading history...
85
        self, lookup: str, compound: ChemblCompound, row
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
86
    ) -> Sequence[ChemblTargetPredictionHit]:
87
        tax_id, tax_name = self._get_taxon(row.target_organism)
88
        if tax_id is tax_name is None:
89
            return []
90
        thresh = row.activity_threshold
91
        if row.activity_threshold < self.min_threshold:
92
            return []
93
        factory = TargetFactory(self.api)
94
        target_obj = factory.find(row.target_chembl_id)
95
        graph_factory = ChemblTargetGraphFactory.create(self.api, factory)
96
        graph = graph_factory.at_target(target_obj)
97
        ancestors: Sequence[ChemblTargetGraph] = self.traversal_strategy(graph)
98
        lst = []
99
        for ancestor in ancestors:
100
            for conf_t, conf_v in zip(
101
                [70, 80, 90], [row.confidence_70, row.confidence_80, row.confidence_90]
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
102
            ):
103
                predicate = f"binding:{conf_v.yes_no_mixed}"
104
                weight = (
105
                    np.sqrt(thresh)
106
                    * abs(conf_t / (100 - conf_t) * conf_v.score)
107
                    / 4
108
                    / np.sqrt(self.min_threshold)
109
                )
110
                hit = self._create_hit(
111
                    c_origin=lookup,
112
                    c_matched=compound.inchikey,
113
                    c_id=compound.chid,
114
                    c_name=compound.name,
115
                    predicate=predicate,
116
                    object_id=ancestor.chembl,
117
                    object_name=ancestor.name,
118
                    data_source=self.data_source,
119
                    exact_target_id=row.target_chembl_id,
120
                    exact_target_name=row.target_pref_name,
121
                    weight=weight,
122
                    prediction=conf_v,
123
                    confidence_set=conf_t,
124
                    threshold=thresh,
125
                )
126
                lst.append(hit)
127
        return lst
128
129
    def _get_taxon(self, organism: str) -> Tuple[Optional[int], Optional[str]]:
130
        if len(self.taxa) == 0:  # allow all
131
            return None, organism
132
        matches = {}
133
        for tax in self.taxa:
134
            matches += tax.get_by_id_or_name(organism)
135
        if len(matches) == 0:
136
            logger.debug(f"Taxon {organism} not in set. Excluding.")
137
            return None, None
138
        best: Taxon = next(iter(matches))
139
        if best.scientific_name != organism and best.mnemonic != organism:
0 ignored issues
show
Unused Code introduced by
Consider merging these comparisons with "in" to 'organism not in (best.scientific_name, best.mnemonic)'
Loading history...
140
            logger.warning(f"Organism {organism} matched to {best.scientific_name} by common name")
141
        if len(matches) > 1:
142
            logger.warning(
143
                f"Multiple matches for taxon {organism}: {matches}; using {best.scientific_name}"
144
            )
145
        return best.id, organism
146
147
148
__all__ = ["TargetPredictionSearch"]
149