Passed
Push — main ( a80564...ec3fe3 )
by Douglas
03:59
created

mandos.analysis.concordance   A

Complexity

Total Complexity 15

Size/Duplication

Total Lines 108
Duplicated Lines 0 %

Importance

Changes 0
Metric Value
eloc 81
dl 0
loc 108
rs 10
c 0
b 0
f 0
wmc 15

10 Methods

Rating   Name   Duplication   Size   Complexity  
A ConcordanceCalculator._calc() 0 2 1
A ConcordanceCalculator.generate() 0 10 3
A ConcordanceCalculator.__init__() 0 4 1
A TauConcordanceCalculator._i_sum() 0 3 1
A TauConcordanceCalculator._n_z() 0 3 1
A TauConcordanceCalculator._calc() 0 5 1
A ConcordanceCalculator.calc_all() 0 4 3
A ConcordanceAlg.clazz() 0 3 1
A ConcordanceCalculation.create() 0 11 1
A ConcordanceCalculator.calc() 0 12 2
1
"""
2
Calculations of concordance between annotations.
3
"""
4
import abc
5
import enum
6
import math
7
from typing import Collection, Dict, Generator, Sequence, Set, Tuple, Union, Type
0 ignored issues
show
Unused Code introduced by
Unused Collection imported from typing
Loading history...
Unused Code introduced by
Unused Dict imported from typing
Loading history...
Unused Code introduced by
Unused Set imported from typing
Loading history...
Unused Code introduced by
Unused Tuple imported from typing
Loading history...
8
9
import numpy as np
0 ignored issues
show
introduced by
Unable to import 'numpy'
Loading history...
10
import pandas as pd
0 ignored issues
show
introduced by
Unable to import 'pandas'
Loading history...
11
from typeddfs import TypedDfs
0 ignored issues
show
introduced by
Unable to import 'typeddfs'
Loading history...
12
13
from mandos.analysis import AnalysisUtils
0 ignored issues
show
Unused Code introduced by
Unused AnalysisUtils imported from mandos.analysis
Loading history...
14
from mandos.analysis import SimilarityDfLongForm, SimilarityDfShortForm
15
from mandos.model import CleverEnum
16
17
ConcordanceDf = (
18
    TypedDfs.typed("ConcordanceDf")
19
    .require("phi", "psi", dtype=str)
20
    .require("sample", dtype=int)
21
    .require("tau", dtype=float)
22
).build()
23
24
25
class ConcordanceCalculator(metaclass=abc.ABCMeta):
0 ignored issues
show
introduced by
Missing class docstring
Loading history...
26
    def __init__(self, n_samples: int, seed: int):
27
        self.n_samples = n_samples
28
        self.seed = seed
29
        self.rand = np.random.RandomState(seed)
30
31
    def calc_all(self, phis: SimilarityDfLongForm, psis: SimilarityDfLongForm) -> ConcordanceDf:
0 ignored issues
show
introduced by
Missing function or method docstring
Loading history...
32
        for phi in phis["phi"].unique():
33
            for psi in psis["psi"].unique():
34
                self.calc(None, None, phi, psi)  # TODO
0 ignored issues
show
Coding Style introduced by
TODO and FIXME comments should generally be avoided.
Loading history...
35
36
    def calc(
0 ignored issues
show
introduced by
Missing function or method docstring
Loading history...
37
        self, phi: SimilarityDfShortForm, psi: SimilarityDfShortForm, phi_name: str, psi_name: str
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
38
    ) -> ConcordanceDf:
39
        phi_cols, psi_cols = phi.columns.tolist(), psi.columns.tolist()
40
        if phi_cols != psi_cols:
41
            raise ValueError(f"Mismatched compounds: {phi_cols} != {psi_cols}")
42
        df = pd.DataFrame(data=self.generate(phi, psi), columns=["score"])
0 ignored issues
show
Coding Style Naming introduced by
Variable name "df" doesn't conform to snake_case naming style ('([^\\W\\dA-Z][^\\WA-Z]2,|_[^\\WA-Z]*|__[^\\WA-Z\\d_][^\\WA-Z]+__)$' pattern)

This check looks for invalid names for a range of different identifiers.

You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements.

If your project includes a Pylint configuration file, the settings contained in that file take precedence.

To find out more about Pylint, please refer to their site.

Loading history...
43
        df = df.reset_index()
0 ignored issues
show
Coding Style Naming introduced by
Variable name "df" doesn't conform to snake_case naming style ('([^\\W\\dA-Z][^\\WA-Z]2,|_[^\\WA-Z]*|__[^\\WA-Z\\d_][^\\WA-Z]+__)$' pattern)

This check looks for invalid names for a range of different identifiers.

You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements.

If your project includes a Pylint configuration file, the settings contained in that file take precedence.

To find out more about Pylint, please refer to their site.

Loading history...
44
        df["phi"] = phi_name
45
        df["psi"] = psi_name
46
        df.columns = ["sample", "tau", "phi", "psi"]
47
        return ConcordanceDf.convert(df)
48
49
    def generate(
0 ignored issues
show
introduced by
Missing function or method docstring
Loading history...
50
        self, phi: SimilarityDfShortForm, psi: SimilarityDfShortForm
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
51
    ) -> Generator[float, None, None]:
52
        if self.n_samples == 1:
53
            yield self._calc(phi, psi)
54
        else:
55
            for b in range(self.n_samples):
0 ignored issues
show
Coding Style Naming introduced by
Variable name "b" doesn't conform to snake_case naming style ('([^\\W\\dA-Z][^\\WA-Z]2,|_[^\\WA-Z]*|__[^\\WA-Z\\d_][^\\WA-Z]+__)$' pattern)

This check looks for invalid names for a range of different identifiers.

You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements.

If your project includes a Pylint configuration file, the settings contained in that file take precedence.

To find out more about Pylint, please refer to their site.

Loading history...
Unused Code introduced by
The variable b seems to be unused.
Loading history...
56
                phi_b = self.rand.choice(phi, replace=True)
57
                psi_b = self.rand.choice(psi, replace=True)
58
                yield self._calc(phi_b, psi_b)
59
60
    def _calc(self, phi: SimilarityDfShortForm, psi: SimilarityDfShortForm) -> float:
0 ignored issues
show
Coding Style introduced by
This method could be written as a function/class method.

If a method does not access any attributes of the class, it could also be implemented as a function or static method. This can help improve readability. For example

class Foo:
    def some_method(self, x, y):
        return x + y;

could be written as

class Foo:
    @classmethod
    def some_method(cls, x, y):
        return x + y;
Loading history...
61
        raise NotImplemented()
0 ignored issues
show
Bug introduced by
NotImplemented does not seem to be callable.
Loading history...
Best Practice introduced by
NotImplemented raised - should raise NotImplementedError
Loading history...
62
63
64
class TauConcordanceCalculator(ConcordanceCalculator):
0 ignored issues
show
introduced by
Missing class docstring
Loading history...
65
    def _calc(self, phi: SimilarityDfShortForm, psi: SimilarityDfShortForm) -> float:
66
        n = len(phi)
0 ignored issues
show
Coding Style Naming introduced by
Variable name "n" doesn't conform to snake_case naming style ('([^\\W\\dA-Z][^\\WA-Z]2,|_[^\\WA-Z]*|__[^\\WA-Z\\d_][^\\WA-Z]+__)$' pattern)

This check looks for invalid names for a range of different identifiers.

You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements.

If your project includes a Pylint configuration file, the settings contained in that file take precedence.

To find out more about Pylint, please refer to their site.

Loading history...
67
        numerator = self._n_z(phi, psi, 1) - self._n_z(phi, psi, -1)
68
        denominator = math.factorial(n) / (2 * math.factorial(n - 2))
69
        return numerator / denominator
70
71
    def _n_z(self, a: Sequence[float], b: Sequence[float], z: int) -> int:
0 ignored issues
show
Coding Style Naming introduced by
Argument name "a" doesn't conform to snake_case naming style ('([^\\W\\dA-Z][^\\WA-Z]2,|_[^\\WA-Z]*|__[^\\WA-Z\\d_][^\\WA-Z]+__)$' pattern)

This check looks for invalid names for a range of different identifiers.

You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements.

If your project includes a Pylint configuration file, the settings contained in that file take precedence.

To find out more about Pylint, please refer to their site.

Loading history...
Coding Style Naming introduced by
Argument name "b" doesn't conform to snake_case naming style ('([^\\W\\dA-Z][^\\WA-Z]2,|_[^\\WA-Z]*|__[^\\WA-Z\\d_][^\\WA-Z]+__)$' pattern)

This check looks for invalid names for a range of different identifiers.

You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements.

If your project includes a Pylint configuration file, the settings contained in that file take precedence.

To find out more about Pylint, please refer to their site.

Loading history...
Coding Style Naming introduced by
Argument name "z" doesn't conform to snake_case naming style ('([^\\W\\dA-Z][^\\WA-Z]2,|_[^\\WA-Z]*|__[^\\WA-Z\\d_][^\\WA-Z]+__)$' pattern)

This check looks for invalid names for a range of different identifiers.

You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements.

If your project includes a Pylint configuration file, the settings contained in that file take precedence.

To find out more about Pylint, please refer to their site.

Loading history...
72
        values = [self._i_sum(a, b, i, z) for i in range(len(a))]
73
        return int(np.sum(values))
74
75
    def _i_sum(self, a: np.array, b: np.array, i: int, z: int):
0 ignored issues
show
Coding Style introduced by
This method could be written as a function/class method.

If a method does not access any attributes of the class, it could also be implemented as a function or static method. This can help improve readability. For example

class Foo:
    def some_method(self, x, y):
        return x + y;

could be written as

class Foo:
    @classmethod
    def some_method(cls, x, y):
        return x + y;
Loading history...
Coding Style Naming introduced by
Argument name "a" doesn't conform to snake_case naming style ('([^\\W\\dA-Z][^\\WA-Z]2,|_[^\\WA-Z]*|__[^\\WA-Z\\d_][^\\WA-Z]+__)$' pattern)

This check looks for invalid names for a range of different identifiers.

You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements.

If your project includes a Pylint configuration file, the settings contained in that file take precedence.

To find out more about Pylint, please refer to their site.

Loading history...
Coding Style Naming introduced by
Argument name "z" doesn't conform to snake_case naming style ('([^\\W\\dA-Z][^\\WA-Z]2,|_[^\\WA-Z]*|__[^\\WA-Z\\d_][^\\WA-Z]+__)$' pattern)

This check looks for invalid names for a range of different identifiers.

You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements.

If your project includes a Pylint configuration file, the settings contained in that file take precedence.

To find out more about Pylint, please refer to their site.

Loading history...
Coding Style Naming introduced by
Argument name "b" doesn't conform to snake_case naming style ('([^\\W\\dA-Z][^\\WA-Z]2,|_[^\\WA-Z]*|__[^\\WA-Z\\d_][^\\WA-Z]+__)$' pattern)

This check looks for invalid names for a range of different identifiers.

You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements.

If your project includes a Pylint configuration file, the settings contained in that file take precedence.

To find out more about Pylint, please refer to their site.

Loading history...
76
        values = [int(np.sign(a[i] - a[j]) == z * np.sign(b[i] - b[j]) != 0) for j in range(i)]
77
        return int(np.sum(values))
78
79
80
class ConcordanceAlg(CleverEnum):
0 ignored issues
show
introduced by
Missing class docstring
Loading history...
81
    tau = enum.auto()
82
83
    @property
84
    def clazz(self) -> Type[ConcordanceCalculator]:
0 ignored issues
show
introduced by
Missing function or method docstring
Loading history...
85
        return {ConcordanceAlg.tau: TauConcordanceCalculator}[self]
86
87
88
class ConcordanceCalculation:
0 ignored issues
show
introduced by
Missing class docstring
Loading history...
89
    @classmethod
90
    def create(
0 ignored issues
show
best-practice introduced by
Too many arguments (6/5)
Loading history...
introduced by
Missing function or method docstring
Loading history...
91
        cls,
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
92
        algorithm: Union[str, ConcordanceAlg],
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
93
        phi_name: str,
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
94
        psi_name: str,
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
95
        n_samples: int,
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
96
        seed: int,
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
97
    ) -> ConcordanceCalculator:
98
        algorithm = ConcordanceAlg.of(algorithm).clazz
99
        return algorithm(n_samples=n_samples, seed=seed, phi_name=phi_name, psi_name=psi_name)
100
101
102
__all__ = [
103
    "ConcordanceCalculator",
104
    "TauConcordanceCalculator",
105
    "ConcordanceDf",
106
    "ConcordanceCalculation",
107
    "ConcordanceAlg",
108
]
109