Passed
Push — dependabot/pip/sphinx-copybutt... ( c72176...cfd31d )
by
unknown
07:42 queued 05:46
created

EnrichmentCalculator._col_name()   A

Complexity

Conditions 1

Size

Total Lines 2
Code Lines 2

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
cc 1
eloc 2
nop 2
dl 0
loc 2
rs 10
c 0
b 0
f 0
1
"""
2
Scoring (regression and enrichment) calculations.
3
"""
4
import abc
5
import enum
6
import math
7
from typing import Generic, Mapping, Sequence, Type, TypeVar, Union
8
9
import pandas as pd
0 ignored issues
show
introduced by
Unable to import 'pandas'
Loading history...
10
from typeddfs import TypedDfs
0 ignored issues
show
introduced by
Unable to import 'typeddfs'
Loading history...
11
12
from mandos.analysis import AnalysisUtils as Au
13
from mandos.model import CleverEnum
14
from mandos.model.hits import AbstractHit, HitFrame, HitUtils, Pair
15
16
S = TypeVar("S", bound=Union[int, float, bool])
0 ignored issues
show
Coding Style Naming introduced by
Class name "S" doesn't conform to PascalCase naming style ('[^\\W\\da-z][^\\W_]+$' pattern)

This check looks for invalid names for a range of different identifiers.

You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements.

If your project includes a Pylint configuration file, the settings contained in that file take precedence.

To find out more about Pylint, please refer to their site.

Loading history...
17
18
19
EnrichmentDf = (
20
    TypedDfs.typed("EnrichmentDf")
21
    .require("predicate", "object", dtype=str)
22
    .require("samples", dtype=int)
23
).build()
24
# extra cols are, e.g., alpha(score_1), alpha(is_hit)
25
26
27
ScoreDf = (
28
    TypedDfs.typed("ScoreDf")
29
    .require("inchikey", dtype=str, index=True)
30
    .reserve("score")
31
    .reserve("is_hit", dtype=bool)
32
).build()
33
# extra cols are, e.g., score_1, score_hello, is_lethal, is_good
34
35
36
def _score_cols(self: ScoreDf) -> Sequence[str]:
37
    return [
38
        col
39
        for col in self.columns
40
        if col == "score" or col.startswith("score_") or col.startswith("is_")
41
    ]
42
43
44
def _all_scores(self: ScoreDf) -> Mapping[str, Mapping[str, S]]:
45
    results = {}
46
    for c in self.score_cols:
0 ignored issues
show
Coding Style Naming introduced by
Variable name "c" doesn't conform to snake_case naming style ('([^\\W\\dA-Z][^\\WA-Z]2,|_[^\\WA-Z]*|__[^\\WA-Z\\d_][^\\WA-Z]+__)$' pattern)

This check looks for invalid names for a range of different identifiers.

You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements.

If your project includes a Pylint configuration file, the settings contained in that file take precedence.

To find out more about Pylint, please refer to their site.

Loading history...
47
        results[c] = self[c].to_dict()
48
    return results
49
50
51
ScoreDf.score_cols = _score_cols
52
ScoreDf.all_scores = _all_scores
53
54
55
class EnrichmentCalculator(Generic[S], metaclass=abc.ABCMeta):
0 ignored issues
show
introduced by
Missing class docstring
Loading history...
56
    def __init__(self, alg_name: str):
57
        self.alg_name = alg_name
58
59
    def calc_many(self, data: HitFrame, score_df: ScoreDf):
0 ignored issues
show
introduced by
Missing function or method docstring
Loading history...
60
        hits = HitUtils.df_to_hits(data)
61
        samples_per_pair = {k: len(v) for k, v in Au.hit_multidict(hits, "pair").items()}
62
        results = {}
63
        for col, scores in score_df.all_scores():
64
            calculated = self.calc(hits, scores)
65
            results[col] = calculated
66
        return EnrichmentDf(
67
            [
68
                pd.Series(self._results_row(pair, n_samples, results))
69
                for pair, n_samples in samples_per_pair.items()
70
            ]
71
        )
72
73
    def _results_row(
74
        self, pair: Pair, n_samples: int, results
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
75
    ) -> Mapping[str, Union[str, int, float]]:
76
        return {
77
            **dict(
78
                predicate=pair.pred,
79
                object=pair.obj,
80
                samples=n_samples,
81
            ),
82
            **{self._col_name(col): vs[pair] for col, vs in results.items()},
83
        }
84
85
    def _col_name(self, col: str) -> str:
86
        return self.alg_name + "(" + col + ")"
87
88
    def calc(self, hits: Sequence[AbstractHit], scores: Mapping[str, S]) -> Mapping[Pair, float]:
0 ignored issues
show
introduced by
Missing function or method docstring
Loading history...
89
        pair_to_hits = Au.hit_multidict(hits, "to_pair")
90
        results = {}
91
        for pair, the_hits in pair_to_hits.items():
0 ignored issues
show
Unused Code introduced by
The variable the_hits seems to be unused.
Loading history...
92
            results[pair] = self.for_pair(hits, scores)
93
        return results
94
95
    def for_pair(self, hits: Sequence[AbstractHit], scores: Mapping[str, S]) -> float:
0 ignored issues
show
introduced by
Missing function or method docstring
Loading history...
96
        raise NotImplementedError()
97
98
99
class AlphaCalculator(EnrichmentCalculator[S]):
0 ignored issues
show
introduced by
Missing class docstring
Loading history...
introduced by
Value 'EnrichmentCalculator' is unsubscriptable
Loading history...
100
    def for_pair(self, hits: Sequence[AbstractHit], scores: Mapping[str, S]) -> float:
0 ignored issues
show
introduced by
Missing function or method docstring
Loading history...
101
        source_to_hits = Au.hit_multidict(hits, "data_source")
102
        terms = []
103
        for source, source_hits in source_to_hits.items():
0 ignored issues
show
Unused Code introduced by
The variable source seems to be unused.
Loading history...
104
            terms.append(self._calc_term(source_hits, scores))
105
        return math.fsum(terms) / len(terms)
106
107
    def _calc_term(self, hits: Sequence[AbstractHit], scores: Mapping[str, S]) -> float:
0 ignored issues
show
Coding Style introduced by
This method could be written as a function/class method.

If a method does not access any attributes of the class, it could also be implemented as a function or static method. This can help improve readability. For example

class Foo:
    def some_method(self, x, y):
        return x + y;

could be written as

class Foo:
    @classmethod
    def some_method(cls, x, y):
        return x + y;
Loading history...
108
        values = [
109
            Au.elle(hit.value) * (2 * float(scores[hit.origin_inchikey] - 1)) ** 2 for hit in hits
110
        ]
111
        return math.fsum(values) / len(values)
112
113
114
class FoldUnweightedCalc(EnrichmentCalculator[bool]):
0 ignored issues
show
introduced by
Missing class docstring
Loading history...
introduced by
Value 'EnrichmentCalculator' is unsubscriptable
Loading history...
115
    def for_pair(self, hits: Sequence[AbstractHit], scores: Mapping[str, S]) -> float:
0 ignored issues
show
Coding Style introduced by
This method could be written as a function/class method.

If a method does not access any attributes of the class, it could also be implemented as a function or static method. This can help improve readability. For example

class Foo:
    def some_method(self, x, y):
        return x + y;

could be written as

class Foo:
    @classmethod
    def some_method(cls, x, y):
        return x + y;
Loading history...
introduced by
Missing function or method docstring
Loading history...
116
        numerator = len([hit for hit in hits if scores[hit.origin_inchikey]])
117
        denominator = len([hit for hit in hits if not scores[hit.origin_inchikey]])
118
        if denominator == 0:
119
            return float("inf")
120
        return numerator / denominator
121
122
123
class FoldWeightedCalc(EnrichmentCalculator[bool]):
0 ignored issues
show
introduced by
Missing class docstring
Loading history...
introduced by
Value 'EnrichmentCalculator' is unsubscriptable
Loading history...
124
    def for_pair(self, hits: Sequence[AbstractHit], scores: Mapping[str, S]) -> float:
0 ignored issues
show
Coding Style introduced by
This method could be written as a function/class method.

If a method does not access any attributes of the class, it could also be implemented as a function or static method. This can help improve readability. For example

class Foo:
    def some_method(self, x, y):
        return x + y;

could be written as

class Foo:
    @classmethod
    def some_method(cls, x, y):
        return x + y;
Loading history...
introduced by
Missing function or method docstring
Loading history...
125
        yes = [hit for hit in hits if scores[hit.origin_inchikey]]
126
        no = [hit for hit in hits if not scores[hit.origin_inchikey]]
0 ignored issues
show
Coding Style Naming introduced by
Variable name "no" doesn't conform to snake_case naming style ('([^\\W\\dA-Z][^\\WA-Z]2,|_[^\\WA-Z]*|__[^\\WA-Z\\d_][^\\WA-Z]+__)$' pattern)

This check looks for invalid names for a range of different identifiers.

You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements.

If your project includes a Pylint configuration file, the settings contained in that file take precedence.

To find out more about Pylint, please refer to their site.

Loading history...
127
        numerator = math.fsum((hit.value for hit in yes))
0 ignored issues
show
Comprehensibility Best Practice introduced by
The variable hit does not seem to be defined.
Loading history...
128
        denominator = math.fsum((hit.value for hit in no))
129
        if denominator == 0:
130
            return float("inf")
131
        return numerator / denominator
132
133
134
class SumWeightedCalc(EnrichmentCalculator[S]):
0 ignored issues
show
introduced by
Missing class docstring
Loading history...
introduced by
Value 'EnrichmentCalculator' is unsubscriptable
Loading history...
135
    def for_pair(self, hits: Sequence[AbstractHit], scores: Mapping[str, S]) -> float:
0 ignored issues
show
Coding Style introduced by
This method could be written as a function/class method.

If a method does not access any attributes of the class, it could also be implemented as a function or static method. This can help improve readability. For example

class Foo:
    def some_method(self, x, y):
        return x + y;

could be written as

class Foo:
    @classmethod
    def some_method(cls, x, y):
        return x + y;
Loading history...
introduced by
Missing function or method docstring
Loading history...
136
        return math.fsum([scores[hit.origin_inchikey] * hit.value for hit in hits]) / len(hits)
137
138
139
class SumUnweightedCalc(EnrichmentCalculator[S]):
0 ignored issues
show
introduced by
Value 'EnrichmentCalculator' is unsubscriptable
Loading history...
introduced by
Missing class docstring
Loading history...
140
    def for_pair(self, hits: Sequence[AbstractHit], scores: Mapping[str, S]) -> float:
0 ignored issues
show
Coding Style introduced by
This method could be written as a function/class method.

If a method does not access any attributes of the class, it could also be implemented as a function or static method. This can help improve readability. For example

class Foo:
    def some_method(self, x, y):
        return x + y;

could be written as

class Foo:
    @classmethod
    def some_method(cls, x, y):
        return x + y;
Loading history...
introduced by
Missing function or method docstring
Loading history...
141
        return math.fsum([scores[hit.origin_inchikey] for hit in hits]) / len(hits)
142
143
144
class EnrichmentAlg(CleverEnum):
0 ignored issues
show
Documentation introduced by
Empty class docstring
Loading history...
145
    """ """
146
147
    alpha = enum.auto()
148
    fold = enum.auto()
149
    fold_w = enum.auto()
150
    sum = enum.auto()
151
    sum_w = enum.auto()
152
153
    @property
154
    def description(self) -> str:
0 ignored issues
show
introduced by
Missing function or method docstring
Loading history...
155
        s = self.symbol
0 ignored issues
show
Coding Style Naming introduced by
Variable name "s" doesn't conform to snake_case naming style ('([^\\W\\dA-Z][^\\WA-Z]2,|_[^\\WA-Z]*|__[^\\WA-Z\\d_][^\\WA-Z]+__)$' pattern)

This check looks for invalid names for a range of different identifiers.

You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements.

If your project includes a Pylint configuration file, the settings contained in that file take precedence.

To find out more about Pylint, please refer to their site.

Loading history...
156
        return {
157
            EnrichmentAlg.alpha: rf"[float] {s}(p) = Mean product of rescaled weights and scores; see the docs",
0 ignored issues
show
Coding Style introduced by
This line is too long as per the coding-style (112/100).

This check looks for lines that are too long. You can specify the maximum line length.

Loading history...
158
            EnrichmentAlg.fold: rf"[bool] {s}(p) = #(c has p and hit) / #(c has p and not hit)",
159
            EnrichmentAlg.fold_w: rf"[bool] {s}(p) = ∑(w(c, pair) s.t. c is hit) / ∑(w(c, pair) s.t. c not hit)",
0 ignored issues
show
Coding Style introduced by
This line is too long as per the coding-style (113/100).

This check looks for lines that are too long. You can specify the maximum line length.

Loading history...
160
            EnrichmentAlg.sum: rf"{s}(p) = ∑(score(c) s.t. c has p)",
161
            EnrichmentAlg.sum_w: rf"{s}(p) = ∑(score(c) × w(c, p) for all c)",
162
        }[self]
163
164
    @property
165
    def symbol(self) -> str:
0 ignored issues
show
introduced by
Missing function or method docstring
Loading history...
166
        return {
167
            EnrichmentAlg.alpha: "α",
168
            EnrichmentAlg.fold: r"β",
169
            EnrichmentAlg.fold_w: "β*",
170
            EnrichmentAlg.sum: r"γ",
171
            EnrichmentAlg.sum_w: r"γ*",
172
        }[self]
173
174
    @property
175
    def clazz(self) -> Type[EnrichmentCalculator]:
0 ignored issues
show
introduced by
Missing function or method docstring
Loading history...
176
        return {
177
            EnrichmentAlg.alpha: AlphaCalculator,
178
            EnrichmentAlg.fold: FoldUnweightedCalc,
179
            EnrichmentAlg.fold_w: FoldWeightedCalc,
180
            EnrichmentAlg.sum: SumUnweightedCalc,
181
            EnrichmentAlg.sum_w: SumWeightedCalc,
182
        }[self.name]
183
184
185
class EnrichmentCalculation:
0 ignored issues
show
introduced by
Missing class docstring
Loading history...
186
    @classmethod
187
    def create(cls, algorithm: Union[str, EnrichmentAlg]) -> EnrichmentDf:
0 ignored issues
show
introduced by
Missing function or method docstring
Loading history...
188
        alg_clazz = EnrichmentDf.of(algorithm).clazz
189
        return alg_clazz()
190
191
192
__all__ = [
193
    "AlphaCalculator",
194
    "EnrichmentCalculator",
195
    "FoldUnweightedCalc",
196
    "FoldWeightedCalc",
197
    "SumUnweightedCalc",
198
    "SumWeightedCalc",
199
    "EnrichmentCalculation",
200
    "EnrichmentAlg",
201
    "EnrichmentDf",
202
    "ScoreDf",
203
]
204