Passed
Push — main ( a80564...ec3fe3 )
by Douglas
03:59
created

mandos.analysis.enrichment.EnrichmentAlg.clazz()   A

Complexity

Conditions 1

Size

Total Lines 9
Code Lines 9

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
cc 1
eloc 9
nop 1
dl 0
loc 9
rs 9.95
c 0
b 0
f 0
1
"""
2
Scoring (regression and enrichment) calculations.
3
"""
4
import abc
5
import enum
6
import math
7
from typing import Generic, Mapping, Sequence, Type, TypeVar, Union
8
9
import pandas as pd
0 ignored issues
show
introduced by
Unable to import 'pandas'
Loading history...
10
from typeddfs import TypedDfs
0 ignored issues
show
introduced by
Unable to import 'typeddfs'
Loading history...
11
12
from mandos.analysis import AnalysisUtils as Au
13
from mandos.model import CleverEnum
14
from mandos.model.hits import AbstractHit, HitFrame, HitUtils, Pair
15
16
S = TypeVar("S", bound=Union[int, float, bool])
0 ignored issues
show
Coding Style Naming introduced by
Class name "S" doesn't conform to PascalCase naming style ('[^\\W\\da-z][^\\W_]+$' pattern)

This check looks for invalid names for a range of different identifiers.

You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements.

If your project includes a Pylint configuration file, the settings contained in that file take precedence.

To find out more about Pylint, please refer to their site.

Loading history...
17
18
19
EnrichmentDf = (
20
    TypedDfs.typed("EnrichmentDf")
21
    .require("predicate", "object", dtype=str)
22
    .require("samples", dtype=int)
23
).build()
24
# extra cols are, e.g., alpha(score_1), alpha(is_hit)
25
26
27
ScoreDf = (TypedDfs.typed("ScoreDf").require("inchikey", dtype=str).reserve("score")).build()
28
# extra cols are, e.g., score_1, score_hello
29
30
31
IsHitDf = TypedDfs.typed("IsHitDf").require("inchikey", dtype=str)
32
# extra calls are is_hit, etc.
33
34
35
def _vars(self: ScoreDf) -> Sequence[str]:
36
    return [
37
        col
38
        for col in self.columns
39
        if (isinstance(self, ScoreDf) and (col == "score" or col.startswith("score_")))
40
        or (isinstance(self, IsHitDf) and col.startswith("is_"))
41
    ]
42
43
44
def _var_map(self: ScoreDf) -> Mapping[str, Mapping[str, S]]:
45
    results = {}
46
    for c in self.score_cols:
0 ignored issues
show
Coding Style Naming introduced by
Variable name "c" doesn't conform to snake_case naming style ('([^\\W\\dA-Z][^\\WA-Z]2,|_[^\\WA-Z]*|__[^\\WA-Z\\d_][^\\WA-Z]+__)$' pattern)

This check looks for invalid names for a range of different identifiers.

You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements.

If your project includes a Pylint configuration file, the settings contained in that file take precedence.

To find out more about Pylint, please refer to their site.

Loading history...
47
        results[c] = self[c].to_dict()
48
    return results
49
50
51
ScoreDf.vars = _vars
52
ScoreDf.var_map = _var_map
53
IsHitDf.vars = _vars
54
IsHitDf.var_map = _var_map
55
56
57
class EnrichmentCalculator(Generic[S], metaclass=abc.ABCMeta):
0 ignored issues
show
introduced by
Missing class docstring
Loading history...
58
    def __init__(self, alg_name: str):
59
        self.alg_name = alg_name
60
61
    def calc_many(self, data: HitFrame, score_df: ScoreDf):
0 ignored issues
show
introduced by
Missing function or method docstring
Loading history...
62
        hits = HitUtils.df_to_hits(data)
63
        samples_per_pair = {k: len(v) for k, v in Au.hit_multidict(hits, "pair").items()}
64
        results = {}
65
        for col, scores in score_df.var_map():
66
            calculated = self.calc(hits, scores)
67
            results[col] = calculated
68
        return EnrichmentDf(
69
            [
70
                pd.Series(self._results_row(pair, n_samples, results))
71
                for pair, n_samples in samples_per_pair.items()
72
            ]
73
        )
74
75
    def _results_row(
76
        self, pair: Pair, n_samples: int, results
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
77
    ) -> Mapping[str, Union[str, int, float]]:
78
        return {
79
            **dict(
80
                predicate=pair.pred,
81
                object=pair.obj,
82
                samples=n_samples,
83
            ),
84
            **{self._col_name(col): vs[pair] for col, vs in results.items()},
85
        }
86
87
    def _col_name(self, col: str) -> str:
88
        return self.alg_name + "(" + col + ")"
89
90
    def calc(self, hits: Sequence[AbstractHit], scores: Mapping[str, S]) -> Mapping[Pair, float]:
0 ignored issues
show
introduced by
Missing function or method docstring
Loading history...
91
        pair_to_hits = Au.hit_multidict(hits, "to_pair")
92
        results = {}
93
        for pair, the_hits in pair_to_hits.items():
0 ignored issues
show
Unused Code introduced by
The variable the_hits seems to be unused.
Loading history...
94
            results[pair] = self.for_pair(hits, scores)
95
        return results
96
97
    def for_pair(self, hits: Sequence[AbstractHit], scores: Mapping[str, S]) -> float:
0 ignored issues
show
introduced by
Missing function or method docstring
Loading history...
98
        raise NotImplementedError()
99
100
101
class AlphaCalculator(EnrichmentCalculator[S]):
0 ignored issues
show
introduced by
Missing class docstring
Loading history...
introduced by
Value 'EnrichmentCalculator' is unsubscriptable
Loading history...
102
    def for_pair(self, hits: Sequence[AbstractHit], scores: Mapping[str, S]) -> float:
0 ignored issues
show
introduced by
Missing function or method docstring
Loading history...
103
        source_to_hits = Au.hit_multidict(hits, "data_source")
104
        terms = []
105
        for source, source_hits in source_to_hits.items():
0 ignored issues
show
Unused Code introduced by
The variable source seems to be unused.
Loading history...
106
            terms.append(self._calc_term(source_hits, scores))
107
        return math.fsum(terms) / len(terms)
108
109
    def _calc_term(self, hits: Sequence[AbstractHit], scores: Mapping[str, S]) -> float:
0 ignored issues
show
Coding Style introduced by
This method could be written as a function/class method.

If a method does not access any attributes of the class, it could also be implemented as a function or static method. This can help improve readability. For example

class Foo:
    def some_method(self, x, y):
        return x + y;

could be written as

class Foo:
    @classmethod
    def some_method(cls, x, y):
        return x + y;
Loading history...
110
        values = [
111
            Au.elle(hit.weight) * (2 * float(scores[hit.origin_inchikey] - 1)) ** 2 for hit in hits
112
        ]
113
        return math.fsum(values) / len(values)
114
115
116
class FoldUnweightedCalc(EnrichmentCalculator[bool]):
0 ignored issues
show
introduced by
Missing class docstring
Loading history...
introduced by
Value 'EnrichmentCalculator' is unsubscriptable
Loading history...
117
    def for_pair(self, hits: Sequence[AbstractHit], scores: Mapping[str, S]) -> float:
0 ignored issues
show
Coding Style introduced by
This method could be written as a function/class method.

If a method does not access any attributes of the class, it could also be implemented as a function or static method. This can help improve readability. For example

class Foo:
    def some_method(self, x, y):
        return x + y;

could be written as

class Foo:
    @classmethod
    def some_method(cls, x, y):
        return x + y;
Loading history...
introduced by
Missing function or method docstring
Loading history...
118
        numerator = len([hit for hit in hits if scores[hit.origin_inchikey]])
119
        denominator = len([hit for hit in hits if not scores[hit.origin_inchikey]])
120
        if denominator == 0:
121
            return float("inf")
122
        return numerator / denominator
123
124
125
class FoldWeightedCalc(EnrichmentCalculator[bool]):
0 ignored issues
show
introduced by
Missing class docstring
Loading history...
introduced by
Value 'EnrichmentCalculator' is unsubscriptable
Loading history...
126
    def for_pair(self, hits: Sequence[AbstractHit], scores: Mapping[str, S]) -> float:
0 ignored issues
show
introduced by
Missing function or method docstring
Loading history...
Coding Style introduced by
This method could be written as a function/class method.

If a method does not access any attributes of the class, it could also be implemented as a function or static method. This can help improve readability. For example

class Foo:
    def some_method(self, x, y):
        return x + y;

could be written as

class Foo:
    @classmethod
    def some_method(cls, x, y):
        return x + y;
Loading history...
127
        yes = [hit for hit in hits if scores[hit.origin_inchikey]]
128
        no = [hit for hit in hits if not scores[hit.origin_inchikey]]
0 ignored issues
show
Coding Style Naming introduced by
Variable name "no" doesn't conform to snake_case naming style ('([^\\W\\dA-Z][^\\WA-Z]2,|_[^\\WA-Z]*|__[^\\WA-Z\\d_][^\\WA-Z]+__)$' pattern)

This check looks for invalid names for a range of different identifiers.

You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements.

If your project includes a Pylint configuration file, the settings contained in that file take precedence.

To find out more about Pylint, please refer to their site.

Loading history...
129
        numerator = math.fsum((hit.weight for hit in yes))
0 ignored issues
show
Comprehensibility Best Practice introduced by
The variable hit does not seem to be defined.
Loading history...
130
        denominator = math.fsum((hit.weight for hit in no))
131
        if denominator == 0:
132
            return float("inf")
133
        return numerator / denominator
134
135
136
class SumWeightedCalc(EnrichmentCalculator[S]):
0 ignored issues
show
introduced by
Missing class docstring
Loading history...
introduced by
Value 'EnrichmentCalculator' is unsubscriptable
Loading history...
137
    def for_pair(self, hits: Sequence[AbstractHit], scores: Mapping[str, S]) -> float:
0 ignored issues
show
introduced by
Missing function or method docstring
Loading history...
Coding Style introduced by
This method could be written as a function/class method.

If a method does not access any attributes of the class, it could also be implemented as a function or static method. This can help improve readability. For example

class Foo:
    def some_method(self, x, y):
        return x + y;

could be written as

class Foo:
    @classmethod
    def some_method(cls, x, y):
        return x + y;
Loading history...
138
        return math.fsum([scores[hit.origin_inchikey] * hit.weight for hit in hits]) / len(hits)
139
140
141
class SumUnweightedCalc(EnrichmentCalculator[S]):
0 ignored issues
show
introduced by
Missing class docstring
Loading history...
introduced by
Value 'EnrichmentCalculator' is unsubscriptable
Loading history...
142
    def for_pair(self, hits: Sequence[AbstractHit], scores: Mapping[str, S]) -> float:
0 ignored issues
show
Coding Style introduced by
This method could be written as a function/class method.

If a method does not access any attributes of the class, it could also be implemented as a function or static method. This can help improve readability. For example

class Foo:
    def some_method(self, x, y):
        return x + y;

could be written as

class Foo:
    @classmethod
    def some_method(cls, x, y):
        return x + y;
Loading history...
introduced by
Missing function or method docstring
Loading history...
143
        return math.fsum([scores[hit.origin_inchikey] for hit in hits]) / len(hits)
144
145
146
class EnrichmentAlg(CleverEnum):
0 ignored issues
show
Documentation introduced by
Empty class docstring
Loading history...
147
    """ """
148
149
    alpha = enum.auto()
150
    fold = enum.auto()
151
    fold_w = enum.auto()
152
    sum = enum.auto()
153
    sum_w = enum.auto()
154
155
    @property
156
    def description(self) -> str:
0 ignored issues
show
introduced by
Missing function or method docstring
Loading history...
157
        s = self.symbol
0 ignored issues
show
Coding Style Naming introduced by
Variable name "s" doesn't conform to snake_case naming style ('([^\\W\\dA-Z][^\\WA-Z]2,|_[^\\WA-Z]*|__[^\\WA-Z\\d_][^\\WA-Z]+__)$' pattern)

This check looks for invalid names for a range of different identifiers.

You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements.

If your project includes a Pylint configuration file, the settings contained in that file take precedence.

To find out more about Pylint, please refer to their site.

Loading history...
158
        return {
159
            EnrichmentAlg.alpha: rf"[float] {s}(p) = Mean product of rescaled weights and scores; see the docs",
0 ignored issues
show
Coding Style introduced by
This line is too long as per the coding-style (112/100).

This check looks for lines that are too long. You can specify the maximum line length.

Loading history...
160
            EnrichmentAlg.fold: rf"[bool] {s}(p) = #(c has p and hit) / #(c has p and not hit)",
161
            EnrichmentAlg.fold_w: rf"[bool] {s}(p) = ∑(w(c, pair) s.t. c is hit) / ∑(w(c, pair) s.t. c not hit)",
0 ignored issues
show
Coding Style introduced by
This line is too long as per the coding-style (113/100).

This check looks for lines that are too long. You can specify the maximum line length.

Loading history...
162
            EnrichmentAlg.sum: rf"{s}(p) = ∑(score(c) s.t. c has p)",
163
            EnrichmentAlg.sum_w: rf"{s}(p) = ∑(score(c) × w(c, p) for all c)",
164
        }[self]
165
166
    @property
167
    def symbol(self) -> str:
0 ignored issues
show
introduced by
Missing function or method docstring
Loading history...
168
        return {
169
            EnrichmentAlg.alpha: "α",
170
            EnrichmentAlg.fold: r"β",
171
            EnrichmentAlg.fold_w: "β*",
172
            EnrichmentAlg.sum: r"γ",
173
            EnrichmentAlg.sum_w: r"γ*",
174
        }[self]
175
176
    @property
177
    def clazz(self) -> Type[EnrichmentCalculator]:
0 ignored issues
show
introduced by
Missing function or method docstring
Loading history...
178
        return {
179
            EnrichmentAlg.alpha: AlphaCalculator,
180
            EnrichmentAlg.fold: FoldUnweightedCalc,
181
            EnrichmentAlg.fold_w: FoldWeightedCalc,
182
            EnrichmentAlg.sum: SumUnweightedCalc,
183
            EnrichmentAlg.sum_w: SumWeightedCalc,
184
        }[self.name]
185
186
187
class EnrichmentCalculation:
0 ignored issues
show
introduced by
Missing class docstring
Loading history...
188
    @classmethod
189
    def create(cls, algorithm: Union[str, EnrichmentAlg]) -> EnrichmentDf:
0 ignored issues
show
introduced by
Missing function or method docstring
Loading history...
190
        alg_clazz = EnrichmentDf.of(algorithm).clazz
191
        return alg_clazz()
192
193
194
__all__ = [
195
    "AlphaCalculator",
196
    "EnrichmentCalculator",
197
    "FoldUnweightedCalc",
198
    "FoldWeightedCalc",
199
    "SumUnweightedCalc",
200
    "SumWeightedCalc",
201
    "EnrichmentCalculation",
202
    "EnrichmentAlg",
203
    "EnrichmentDf",
204
    "ScoreDf",
205
]
206