mandos.analysis.regression.EnrichmentCalculator.__init__() - Code Metrics - Inspection of "build(deps-dev): bump sphinx-copybutton from 0.3.3..." - dmyersturnbull/mandos - Measure and Improve Code Quality continuously with Scrutinizer

Passed

Push — dependabot/pip/sphinx-copybutt... ( c72176 )

unknown

created 2021-07-05 03:05 UTC

EnrichmentCalculator.init() A

↳ Parent: mandos.analysis.regression

Complexity

Conditions

Size

Total Lines	2
Code Lines	2

Duplication

Lines	0
Ratio	0 %

Importance

Changes

Metric	Value
cc	1
eloc	2
nop	2
dl	0
loc	2
rs	10
c	0
b	0
f	0

"""
Scoring (regression and enrichment) calculations.
"""
import abc
import enum
import math
from typing import Generic, Mapping, Sequence, Type, TypeVar, Union

import pandas as pd

from typeddfs import TypedDfs


from mandos.analysis import AnalysisUtils as Au
from mandos.model import CleverEnum
from mandos.model.hits import AbstractHit, HitFrame, Pair

S = TypeVar("S", bound=Union[int, float, bool])



EnrichmentDf = (
    TypedDfs.typed("EnrichmentDf")
    .require("predicate", "object", dtype=str)
    .require("samples", dtype=int)
).build()
# extra cols are, e.g., alpha(score_1), alpha(is_hit)


ScoreDf = (
    TypedDfs.typed("ScoreDf")
    .require("inchikey", dtype=str, index=True)
    .reserve("score")
    .reserve("is_hit", dtype=bool)
).build()
# extra cols are, e.g., score_1, score_hello, is_lethal, is_good


def _score_cols(self: ScoreDf) -> Sequence[str]:
    return [
        col
        for col in self.columns
        if col == "score" or col.startswith("score_") or col.startswith("is_")
    ]


def _all_scores(self: ScoreDf) -> Mapping[str, Mapping[str, S]]:
    results = {}
    for c in self.score_cols:

        results[c] = self[c].to_dict()
    return results


ScoreDf.score_cols = _score_cols
ScoreDf.all_scores = _all_scores


class EnrichmentCalculator(Generic[S], metaclass=abc.ABCMeta):

    def __init__(self, scores: Mapping[str, S]):
        self.scores = scores

    def calc(self, hits: Sequence[AbstractHit]) -> Mapping[Pair, float]:

        pair_to_hits = Au.hit_multidict(hits, "to_pair")
        results = {}
        for pair, the_hits in pair_to_hits.items():

            results[pair] = self.for_pair(hits)
        return results

    def for_pair(self, hits: Sequence[AbstractHit]) -> float:

        raise NotImplementedError()


class AlphaCalculator(EnrichmentCalculator[S]):

    def for_pair(self, hits: Sequence[AbstractHit]) -> float:

        source_to_hits = Au.hit_multidict(hits, "data_source")
        terms = []
        for source, source_hits in source_to_hits.items():

            terms.append(self._calc_term(source_hits))
        return math.fsum(terms) / len(terms)

    def _calc_term(self, hits: Sequence[AbstractHit]) -> float:
        values = [
            Au.elle(hit.value) * (2 * float(self.scores[hit.origin_inchikey] - 1)) ** 2
            for hit in hits
        ]
        return math.fsum(values) / len(values)


class FoldUnweightedCalc(EnrichmentCalculator[bool]):

    def for_pair(self, hits: Sequence[AbstractHit]) -> float:

        numerator = len([hit for hit in hits if self.scores[hit.origin_inchikey]])
        denominator = len([hit for hit in hits if not self.scores[hit.origin_inchikey]])
        if denominator == 0:
            return float("inf")
        return numerator / denominator


class FoldWeightedCalc(EnrichmentCalculator[bool]):

    def for_pair(self, hits: Sequence[AbstractHit]) -> float:

        yes = [hit for hit in hits if self.scores[hit.origin_inchikey]]
        no = [hit for hit in hits if not self.scores[hit.origin_inchikey]]

        numerator = math.fsum((hit.value for hit in yes))

        denominator = math.fsum((hit.value for hit in no))
        if denominator == 0:
            return float("inf")
        return numerator / denominator


class SumWeightedCalc(EnrichmentCalculator[S]):

    def for_pair(self, hits: Sequence[AbstractHit]) -> float:

        return math.fsum([self.scores[hit.origin_inchikey] * hit.value for hit in hits]) / len(hits)


class SumUnweightedCalc(EnrichmentCalculator[S]):

    def for_pair(self, hits: Sequence[AbstractHit]) -> float:

        return math.fsum([self.scores[hit.origin_inchikey] for hit in hits]) / len(hits)


class EnrichmentAlg(CleverEnum):

    """ """

    alpha = enum.auto()
    fold = enum.auto()
    fold_w = enum.auto()
    sum = enum.auto()
    sum_w = enum.auto()

    @property
    def description(self) -> str:

        s = self.symbol

        return {
            EnrichmentAlg.alpha: rf"[float] {s}(p) = Mean product of rescaled weights and scores; see the docs",

            EnrichmentAlg.fold: rf"[bool] {s}(p) = #(c has p and hit) / #(c has p and not hit)",
            EnrichmentAlg.fold_w: rf"[bool] {s}(p) = ∑(w(c, pair) s.t. c is hit) / ∑(w(c, pair) s.t. c not hit)",

            EnrichmentAlg.sum: rf"{s}(p) = ∑(score(c) s.t. c has p)",
            EnrichmentAlg.sum_w: rf"{s}(p) = ∑(score(c) × w(c, p) for all c)",
        }[self]

    @property
    def symbol(self) -> str:

        return {
            EnrichmentAlg.alpha: "α",
            EnrichmentAlg.fold: r"β",
            EnrichmentAlg.fold_w: "β*",
            EnrichmentAlg.sum: r"γ",
            EnrichmentAlg.sum_w: r"γ*",
        }[self]

    @property
    def clazz(self) -> Type[EnrichmentCalculator]:

        return {
            EnrichmentAlg.alpha: AlphaCalculator,
            EnrichmentAlg.fold: FoldUnweightedCalc,
            EnrichmentAlg.fold_w: FoldWeightedCalc,
            EnrichmentAlg.sum: SumUnweightedCalc,
            EnrichmentAlg.sum_w: SumWeightedCalc,
        }[self.name]


class EnrichmentCalculation:

    def calc(
class Foo:
    def some_method(self, x, y):
        return x + y;
        self, data: HitFrame, score_df: ScoreDf, algorithm: Union[str, EnrichmentAlg]

    ) -> EnrichmentDf:
        hits = ...
        alg_clazz = EnrichmentDf.of(algorithm).clazz
        alg_name = str(algorithm)
        samples_per_pair = {k: len(v) for k, v in Au.hit_multidict(hits, "pair").items()}
        results = {}
        for col, scores in score_df.all_scores():
            calculated = alg_clazz(scores).calc(hits)
            results[col] = calculated
        return EnrichmentDf(
            [
                pd.Series(
                    {
                        **dict(
                            predicate=pair.pred,
                            object=pair.obj,
                            samples=n_samples,
                        ),
                        **{f"{alg_name}({c})": vs[pair] for c, vs in results.items()},
                    }
                )
                for pair, n_samples in samples_per_pair.items()
            ]
        )


__all__ = [
    "AlphaCalculator",
    "EnrichmentCalculator",
    "FoldUnweightedCalc",
    "FoldWeightedCalc",
    "SumUnweightedCalc",
    "SumWeightedCalc",
    "EnrichmentCalculation",
    "EnrichmentAlg",
    "EnrichmentDf",
    "ScoreDf",
]


1			"""
2			Scoring (regression and enrichment) calculations.
3			"""
4			import abc
5			import enum
6			import math
7			from typing import Generic, Mapping, Sequence, Type, TypeVar, Union
8
9			import pandas as pd
			0 ignored issues – show introduced 2021-07-05 03:20 UTC by Report Bug Copy Issue Report Unable to import 'pandas' Loading history...
10			from typeddfs import TypedDfs
			0 ignored issues – show introduced 2021-06-30 04:51 UTC by Report Bug Copy Issue Report Unable to import 'typeddfs' Loading history...
11
12			from mandos.analysis import AnalysisUtils as Au
13			from mandos.model import CleverEnum
14			from mandos.model.hits import AbstractHit, HitFrame, Pair
15
16			S = TypeVar("S", bound=Union[int, float, bool])
			0 ignored issues – show Coding Style Naming introduced 2021-07-05 03:20 UTC by Report Bug Copy Issue Report Class name "S" doesn't conform to PascalCase naming style ('[^\\W\\da-z][^\\W_]+$' pattern) This check looks for invalid names for a range of different identifiers. You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements. If your project includes a Pylint configuration file, the settings contained in that file take precedence. To find out more about Pylint, please refer to their site. Loading history...
17
18
19			EnrichmentDf = (
20			TypedDfs.typed("EnrichmentDf")
21			.require("predicate", "object", dtype=str)
22			.require("samples", dtype=int)
23			).build()
24			# extra cols are, e.g., alpha(score_1), alpha(is_hit)
25
26
27			ScoreDf = (
28			TypedDfs.typed("ScoreDf")
29			.require("inchikey", dtype=str, index=True)
30			.reserve("score")
31			.reserve("is_hit", dtype=bool)
32			).build()
33			# extra cols are, e.g., score_1, score_hello, is_lethal, is_good
34
35
36			def _score_cols(self: ScoreDf) -> Sequence[str]:
37			return [
38			col
39			for col in self.columns
40			if col == "score" or col.startswith("score_") or col.startswith("is_")
41			]
42
43
44			def _all_scores(self: ScoreDf) -> Mapping[str, Mapping[str, S]]:
45			results = {}
46			for c in self.score_cols:
			0 ignored issues – show Coding Style Naming introduced 2021-07-05 03:20 UTC by Report Bug Copy Issue Report Variable name "c" doesn't conform to snake_case naming style ('([^\\W\\dA-Z][^\\WA-Z]2,\|_[^\\WA-Z]*\|__[^\\WA-Z\\d_][^\\WA-Z]+__)$' pattern) This check looks for invalid names for a range of different identifiers. You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements. If your project includes a Pylint configuration file, the settings contained in that file take precedence. To find out more about Pylint, please refer to their site. Loading history...
47			results[c] = self[c].to_dict()
48			return results
49
50
51			ScoreDf.score_cols = _score_cols
52			ScoreDf.all_scores = _all_scores
53
54
55			class EnrichmentCalculator(Generic[S], metaclass=abc.ABCMeta):
			0 ignored issues – show introduced 2021-06-30 04:51 UTC by Report Bug Copy Issue Report Missing class docstring Loading history...
56			def __init__(self, scores: Mapping[str, S]):
57			self.scores = scores
58
59			def calc(self, hits: Sequence[AbstractHit]) -> Mapping[Pair, float]:
			0 ignored issues – show introduced 2021-07-05 03:20 UTC by Report Bug Copy Issue Report Missing function or method docstring Loading history...
60			pair_to_hits = Au.hit_multidict(hits, "to_pair")
61			results = {}
62			for pair, the_hits in pair_to_hits.items():
			0 ignored issues – show Unused Code introduced 2021-07-05 03:20 UTC by Report Bug Copy Issue Report The variable `the_hits` seems to be unused. Loading history...
63			results[pair] = self.for_pair(hits)
64			return results
65
66			def for_pair(self, hits: Sequence[AbstractHit]) -> float:
			0 ignored issues – show introduced 2021-07-05 03:20 UTC by Report Bug Copy Issue Report Missing function or method docstring Loading history...
67			raise NotImplementedError()
68
69
70			class AlphaCalculator(EnrichmentCalculator[S]):
			0 ignored issues – show introduced 2021-07-05 03:20 UTC by Report Bug Copy Issue Report Value 'EnrichmentCalculator' is unsubscriptable Loading history... introduced 2021-07-05 03:20 UTC by Report Bug Copy Issue Report Missing class docstring Loading history...
71			def for_pair(self, hits: Sequence[AbstractHit]) -> float:
			0 ignored issues – show introduced 2021-07-05 03:20 UTC by Report Bug Copy Issue Report Missing function or method docstring Loading history...
72			source_to_hits = Au.hit_multidict(hits, "data_source")
73			terms = []
74			for source, source_hits in source_to_hits.items():
			0 ignored issues – show Unused Code introduced 2021-07-05 03:20 UTC by Report Bug Copy Issue Report The variable `source` seems to be unused. Loading history...
75			terms.append(self._calc_term(source_hits))
76			return math.fsum(terms) / len(terms)
77
78			def _calc_term(self, hits: Sequence[AbstractHit]) -> float:
79			values = [
80			Au.elle(hit.value) * (2 * float(self.scores[hit.origin_inchikey] - 1)) ** 2
81			for hit in hits
82			]
83			return math.fsum(values) / len(values)
84
85
86			class FoldUnweightedCalc(EnrichmentCalculator[bool]):
			0 ignored issues – show introduced 2021-07-05 03:20 UTC by Report Bug Copy Issue Report Value 'EnrichmentCalculator' is unsubscriptable Loading history... introduced 2021-07-05 03:20 UTC by Report Bug Copy Issue Report Missing class docstring Loading history...
87			def for_pair(self, hits: Sequence[AbstractHit]) -> float:
			0 ignored issues – show introduced 2021-07-05 03:20 UTC by Report Bug Copy Issue Report Missing function or method docstring Loading history...
88			numerator = len([hit for hit in hits if self.scores[hit.origin_inchikey]])
89			denominator = len([hit for hit in hits if not self.scores[hit.origin_inchikey]])
90			if denominator == 0:
91			return float("inf")
92			return numerator / denominator
93
94
95			class FoldWeightedCalc(EnrichmentCalculator[bool]):
			0 ignored issues – show introduced 2021-07-05 03:20 UTC by Report Bug Copy Issue Report Missing class docstring Loading history... introduced 2021-07-05 03:20 UTC by Report Bug Copy Issue Report Value 'EnrichmentCalculator' is unsubscriptable Loading history...
96			def for_pair(self, hits: Sequence[AbstractHit]) -> float:
			0 ignored issues – show introduced 2021-07-05 03:20 UTC by Report Bug Copy Issue Report Missing function or method docstring Loading history...
97			yes = [hit for hit in hits if self.scores[hit.origin_inchikey]]
98			no = [hit for hit in hits if not self.scores[hit.origin_inchikey]]
			0 ignored issues – show Coding Style Naming introduced 2021-07-05 03:20 UTC by Report Bug Copy Issue Report Variable name "no" doesn't conform to snake_case naming style ('([^\\W\\dA-Z][^\\WA-Z]2,\|_[^\\WA-Z]*\|__[^\\WA-Z\\d_][^\\WA-Z]+__)$' pattern) This check looks for invalid names for a range of different identifiers. You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements. If your project includes a Pylint configuration file, the settings contained in that file take precedence. To find out more about Pylint, please refer to their site. Loading history...
99			numerator = math.fsum((hit.value for hit in yes))
			0 ignored issues – show Comprehensibility Best Practice introduced 2021-07-05 03:20 UTC by Report Bug Copy Issue Report The variable `hit` does not seem to be defined. Loading history...
100			denominator = math.fsum((hit.value for hit in no))
101			if denominator == 0:
102			return float("inf")
103			return numerator / denominator
104
105
106			class SumWeightedCalc(EnrichmentCalculator[S]):
			0 ignored issues – show introduced 2021-07-05 03:20 UTC by Report Bug Copy Issue Report Missing class docstring Loading history... introduced 2021-07-05 03:20 UTC by Report Bug Copy Issue Report Value 'EnrichmentCalculator' is unsubscriptable Loading history...
107			def for_pair(self, hits: Sequence[AbstractHit]) -> float:
			0 ignored issues – show introduced 2021-07-05 03:20 UTC by Report Bug Copy Issue Report Missing function or method docstring Loading history...
108			return math.fsum([self.scores[hit.origin_inchikey] * hit.value for hit in hits]) / len(hits)
109
110
111			class SumUnweightedCalc(EnrichmentCalculator[S]):
			0 ignored issues – show introduced 2021-07-05 03:20 UTC by Report Bug Copy Issue Report Missing class docstring Loading history... introduced 2021-07-05 03:20 UTC by Report Bug Copy Issue Report Value 'EnrichmentCalculator' is unsubscriptable Loading history...
112			def for_pair(self, hits: Sequence[AbstractHit]) -> float:
			0 ignored issues – show introduced 2021-07-05 03:20 UTC by Report Bug Copy Issue Report Missing function or method docstring Loading history...
113			return math.fsum([self.scores[hit.origin_inchikey] for hit in hits]) / len(hits)
114
115
116			class EnrichmentAlg(CleverEnum):
			0 ignored issues – show Documentation introduced 2021-07-05 03:20 UTC by Report Bug Copy Issue Report Empty class docstring Loading history...
117			""" """
118
119			alpha = enum.auto()
120			fold = enum.auto()
121			fold_w = enum.auto()
122			sum = enum.auto()
123			sum_w = enum.auto()
124
125			@property
126			def description(self) -> str:
			0 ignored issues – show introduced 2021-07-05 03:20 UTC by Report Bug Copy Issue Report Missing function or method docstring Loading history...
127			s = self.symbol
			0 ignored issues – show Coding Style Naming introduced 2021-07-05 03:20 UTC by Report Bug Copy Issue Report Variable name "s" doesn't conform to snake_case naming style ('([^\\W\\dA-Z][^\\WA-Z]2,\|_[^\\WA-Z]*\|__[^\\WA-Z\\d_][^\\WA-Z]+__)$' pattern) This check looks for invalid names for a range of different identifiers. You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements. If your project includes a Pylint configuration file, the settings contained in that file take precedence. To find out more about Pylint, please refer to their site. Loading history...
128			return {
129			EnrichmentAlg.alpha: rf"[float] {s}(p) = Mean product of rescaled weights and scores; see the docs",
			0 ignored issues – show Coding Style introduced 2021-07-05 03:20 UTC by Report Bug Copy Issue Report This line is too long as per the coding-style (112/100). This check looks for lines that are too long. You can specify the maximum line length. Loading history...
130			EnrichmentAlg.fold: rf"[bool] {s}(p) = #(c has p and hit) / #(c has p and not hit)",
131			EnrichmentAlg.fold_w: rf"[bool] {s}(p) = ∑(w(c, pair) s.t. c is hit) / ∑(w(c, pair) s.t. c not hit)",
			0 ignored issues – show Coding Style introduced 2021-07-05 03:20 UTC by Report Bug Copy Issue Report This line is too long as per the coding-style (113/100). This check looks for lines that are too long. You can specify the maximum line length. Loading history...
132			EnrichmentAlg.sum: rf"{s}(p) = ∑(score(c) s.t. c has p)",
133			EnrichmentAlg.sum_w: rf"{s}(p) = ∑(score(c) × w(c, p) for all c)",
134			}[self]
135
136			@property
137			def symbol(self) -> str:
			0 ignored issues – show introduced 2021-07-05 03:20 UTC by Report Bug Copy Issue Report Missing function or method docstring Loading history...
138			return {
139			EnrichmentAlg.alpha: "α",
140			EnrichmentAlg.fold: r"β",
141			EnrichmentAlg.fold_w: "β*",
142			EnrichmentAlg.sum: r"γ",
143			EnrichmentAlg.sum_w: r"γ*",
144			}[self]
145
146			@property
147			def clazz(self) -> Type[EnrichmentCalculator]:
			0 ignored issues – show introduced 2021-07-05 03:20 UTC by Report Bug Copy Issue Report Missing function or method docstring Loading history...
148			return {
149			EnrichmentAlg.alpha: AlphaCalculator,
150			EnrichmentAlg.fold: FoldUnweightedCalc,
151			EnrichmentAlg.fold_w: FoldWeightedCalc,
152			EnrichmentAlg.sum: SumUnweightedCalc,
153			EnrichmentAlg.sum_w: SumWeightedCalc,
154			}[self.name]
155
156
157			class EnrichmentCalculation:
			0 ignored issues – show introduced 2021-07-05 03:20 UTC by Report Bug Copy Issue Report Missing class docstring Loading history...
158			def calc(
			0 ignored issues – show Coding Style introduced 2021-07-05 03:20 UTC by Report Bug Copy Issue Report This method could be written as a function/class method. If a method does not access any attributes of the class, it could also be implemented as a function or static method. This can help improve readability. For example class Foo: def some_method(self, x, y): return x + y; could be written as class Foo: @classmethod def some_method(cls, x, y): return x + y; Loading history... introduced 2021-07-05 03:20 UTC by Report Bug Copy Issue Report Missing function or method docstring Loading history...
159			self, data: HitFrame, score_df: ScoreDf, algorithm: Union[str, EnrichmentAlg]
			0 ignored issues – show Unused Code introduced 2021-07-05 03:20 UTC by Report Bug Copy Issue Report The argument `data` seems to be unused. Loading history... Coding Style introduced 2021-07-05 03:20 UTC by Report Bug Copy Issue Report Wrong hanging indentation before block (add 4 spaces). Loading history...
160			) -> EnrichmentDf:
161			hits = ...
162			alg_clazz = EnrichmentDf.of(algorithm).clazz
163			alg_name = str(algorithm)
164			samples_per_pair = {k: len(v) for k, v in Au.hit_multidict(hits, "pair").items()}
165			results = {}
166			for col, scores in score_df.all_scores():
167			calculated = alg_clazz(scores).calc(hits)
168			results[col] = calculated
169			return EnrichmentDf(
170			[
171			pd.Series(
172			{
173			**dict(
174			predicate=pair.pred,
175			object=pair.obj,
176			samples=n_samples,
177			),
178			**{f"{alg_name}({c})": vs[pair] for c, vs in results.items()},
179			}
180			)
181			for pair, n_samples in samples_per_pair.items()
182			]
183			)
184
185
186			__all__ = [
187			"AlphaCalculator",
188			"EnrichmentCalculator",
189			"FoldUnweightedCalc",
190			"FoldWeightedCalc",
191			"SumUnweightedCalc",
192			"SumWeightedCalc",
193			"EnrichmentCalculation",
194			"EnrichmentAlg",
195			"EnrichmentDf",
196			"ScoreDf",
197			]
198

dmyersturnbull / mandos

Push — dependabot/pip/sphinx-copybutt... ( c72176 )

EnrichmentCalculator.__init__() A

Complexity

Size

Duplication

Importance

Duplication Side-by-Side

Filter issues like

EnrichmentCalculator.init() A