Passed
Push — main ( 6c03a0...9ee5db )
by Douglas
01:53
created

JPrimeMatrixCalculator._calc_partial()   A

Complexity

Conditions 1

Size

Total Lines 9
Code Lines 9

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
cc 1
eloc 9
nop 3
dl 0
loc 9
rs 9.95
c 0
b 0
f 0
1
"""
2
Calculations of overlap (similarity) between annotation sets.
3
"""
4
import abc
5
import enum
0 ignored issues
show
Unused Code introduced by
The import enum seems to be unused.
Loading history...
6
import math
7
import time
8
from collections import defaultdict
9
from pathlib import Path
10
from typing import Collection, Mapping, Optional, Sequence, Type, Union
11
12
import decorateme
0 ignored issues
show
introduced by
Unable to import 'decorateme'
Loading history...
13
import numpy as np
0 ignored issues
show
introduced by
Unable to import 'numpy'
Loading history...
14
from pocketutils.core.chars import Chars
0 ignored issues
show
introduced by
Unable to import 'pocketutils.core.chars'
Loading history...
15
from pocketutils.core.enums import CleverEnum
0 ignored issues
show
introduced by
Unable to import 'pocketutils.core.enums'
Loading history...
16
from pocketutils.tools.unit_tools import UnitTools
0 ignored issues
show
introduced by
Unable to import 'pocketutils.tools.unit_tools'
Loading history...
17
from typeddfs.df_errors import HashFileMissingError
0 ignored issues
show
introduced by
Unable to import 'typeddfs.df_errors'
Loading history...
18
19
from mandos.analysis import AnalysisUtils as Au
20
from mandos.analysis.io_defns import SimilarityDfLongForm, SimilarityDfShortForm
21
from mandos.model.hit_dfs import HitDf
22
from mandos.model.hits import AbstractHit
23
from mandos.model.utils import unlink
24
25
# note that most of these math functions are much faster than their numpy counterparts
26
# if we're not broadcasting, it's almost always better to use them
27
# some are more accurate, too
28
# e.g. we're using fsum rather than sum
29
from mandos.model.utils.setup import logger
30
31
32
class _Inf:
33
    def __init__(self, n: int):
34
        self.n = n
0 ignored issues
show
Coding Style Naming introduced by
Attribute name "n" doesn't conform to snake_case naming style ('([^\\W\\dA-Z][^\\WA-Z]2,|_[^\\WA-Z]*|__[^\\WA-Z\\d_][^\\WA-Z]+__)$' pattern)

This check looks for invalid names for a range of different identifiers.

You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements.

If your project includes a Pylint configuration file, the settings contained in that file take precedence.

To find out more about Pylint, please refer to their site.

Loading history...
35
        self.used, self.t0, self.nonzeros = set(), time.monotonic(), 0
0 ignored issues
show
Coding Style Naming introduced by
Attribute name "t0" doesn't conform to snake_case naming style ('([^\\W\\dA-Z][^\\WA-Z]2,|_[^\\WA-Z]*|__[^\\WA-Z\\d_][^\\WA-Z]+__)$' pattern)

This check looks for invalid names for a range of different identifiers.

You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements.

If your project includes a Pylint configuration file, the settings contained in that file take precedence.

To find out more about Pylint, please refer to their site.

Loading history...
36
37
    def is_used(self, c1: str, c2: str) -> bool:
0 ignored issues
show
Coding Style Naming introduced by
Argument name "c2" doesn't conform to snake_case naming style ('([^\\W\\dA-Z][^\\WA-Z]2,|_[^\\WA-Z]*|__[^\\WA-Z\\d_][^\\WA-Z]+__)$' pattern)

This check looks for invalid names for a range of different identifiers.

You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements.

If your project includes a Pylint configuration file, the settings contained in that file take precedence.

To find out more about Pylint, please refer to their site.

Loading history...
introduced by
Missing function or method docstring
Loading history...
Coding Style Naming introduced by
Argument name "c1" doesn't conform to snake_case naming style ('([^\\W\\dA-Z][^\\WA-Z]2,|_[^\\WA-Z]*|__[^\\WA-Z\\d_][^\\WA-Z]+__)$' pattern)

This check looks for invalid names for a range of different identifiers.

You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements.

If your project includes a Pylint configuration file, the settings contained in that file take precedence.

To find out more about Pylint, please refer to their site.

Loading history...
38
        return (c1, c2) in self.used or (c2, c1) in self.used
39
40
    def got(self, c1: str, c2: str, z: float) -> None:
0 ignored issues
show
introduced by
Missing function or method docstring
Loading history...
Coding Style Naming introduced by
Argument name "c1" doesn't conform to snake_case naming style ('([^\\W\\dA-Z][^\\WA-Z]2,|_[^\\WA-Z]*|__[^\\WA-Z\\d_][^\\WA-Z]+__)$' pattern)

This check looks for invalid names for a range of different identifiers.

You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements.

If your project includes a Pylint configuration file, the settings contained in that file take precedence.

To find out more about Pylint, please refer to their site.

Loading history...
Coding Style Naming introduced by
Argument name "c2" doesn't conform to snake_case naming style ('([^\\W\\dA-Z][^\\WA-Z]2,|_[^\\WA-Z]*|__[^\\WA-Z\\d_][^\\WA-Z]+__)$' pattern)

This check looks for invalid names for a range of different identifiers.

You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements.

If your project includes a Pylint configuration file, the settings contained in that file take precedence.

To find out more about Pylint, please refer to their site.

Loading history...
Coding Style Naming introduced by
Argument name "z" doesn't conform to snake_case naming style ('([^\\W\\dA-Z][^\\WA-Z]2,|_[^\\WA-Z]*|__[^\\WA-Z\\d_][^\\WA-Z]+__)$' pattern)

This check looks for invalid names for a range of different identifiers.

You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements.

If your project includes a Pylint configuration file, the settings contained in that file take precedence.

To find out more about Pylint, please refer to their site.

Loading history...
41
        self.used.add((c1, c2))
42
        self.nonzeros += int(c1 != c2 and not np.isnan(z) and 0 < z < 1)
43
        if self.i % 100 == 0:
44
            self.log("info")
45
46
    @property
47
    def i(self) -> int:
0 ignored issues
show
introduced by
Missing function or method docstring
Loading history...
48
        return len(self.used)
49
50
    def log(self, level: str) -> None:
0 ignored issues
show
introduced by
Missing function or method docstring
Loading history...
51
        delta = UnitTools.delta_time_to_str(time.monotonic() - self.t0, space=Chars.narrownbsp)
52
        logger.log(
53
            level.upper(),
54
            f"Processed {self.i:,}/{self.n:,} pairs in {delta};"
55
            + f" {self.nonzeros:,} ({self.nonzeros / self.i * 100:.1f}%) are nonzero",
56
        )
57
58
    def __repr__(self):
59
        return f"{self.__class__.__name__}({self.i}/{self.n})"
60
61
    def __str__(self):
62
        return repr(self)
63
64
65
@decorateme.auto_repr_str()
0 ignored issues
show
introduced by
Missing class docstring
Loading history...
66
class MatrixCalculator(metaclass=abc.ABCMeta):
67
    def __init__(
68
        self,
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
69
        *,
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
70
        min_compounds: int,
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
71
        min_nonzero: int,
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
72
        min_hits: int,
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
73
        exclude: Optional[Collection[str]] = None,
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
74
    ):
75
        self.min_compounds = min_compounds
76
        self.min_nonzero = min_nonzero
77
        self.min_hits = min_hits
78
        self.exclude = set() if exclude is None else exclude
79
80
    def calc_all(self, hits: Path, to: Path, *, keep_temp: bool = False) -> SimilarityDfLongForm:
0 ignored issues
show
Coding Style Naming introduced by
Argument name "to" doesn't conform to snake_case naming style ('([^\\W\\dA-Z][^\\WA-Z]2,|_[^\\WA-Z]*|__[^\\WA-Z\\d_][^\\WA-Z]+__)$' pattern)

This check looks for invalid names for a range of different identifiers.

You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements.

If your project includes a Pylint configuration file, the settings contained in that file take precedence.

To find out more about Pylint, please refer to their site.

Loading history...
introduced by
Missing function or method docstring
Loading history...
Coding Style introduced by
This method could be written as a function/class method.

If a method does not access any attributes of the class, it could also be implemented as a function or static method. This can help improve readability. For example

class Foo:
    def some_method(self, x, y):
        return x + y;

could be written as

class Foo:
    @classmethod
    def some_method(cls, x, y):
        return x + y;
Loading history...
81
        raise NotImplemented()
0 ignored issues
show
Best Practice introduced by
NotImplemented raised - should raise NotImplementedError
Loading history...
Bug introduced by
NotImplemented does not seem to be callable.
Loading history...
82
83
84
class JPrimeMatrixCalculator(MatrixCalculator):
0 ignored issues
show
introduced by
Missing class docstring
Loading history...
85
    def calc_all(self, path: Path, to: Path, *, keep_temp: bool = False) -> SimilarityDfLongForm:
0 ignored issues
show
Bug introduced by
Parameters differ from overridden 'calc_all' method
Loading history...
86
        hits = HitDf.read_file(path).to_hits()
87
        key_to_hit = Au.hit_multidict(hits, "search_key")
88
        logger.notice(f"Calculating J on {len(key_to_hit):,} keys from {len(hits):,} hits")
89
        good_keys = {}
90
        for key, key_hits in key_to_hit.items():
91
            if key in self.exclude:
92
                logger.caution(f"Excluding {key}")
93
                continue
94
            key_hits: Sequence[AbstractHit] = key_hits
95
            n_compounds_0 = len({k.origin_inchikey for k in key_hits})
96
            part_path = self._path_of(path, key)
97
            df = None
0 ignored issues
show
Coding Style Naming introduced by
Variable name "df" doesn't conform to snake_case naming style ('([^\\W\\dA-Z][^\\WA-Z]2,|_[^\\WA-Z]*|__[^\\WA-Z\\d_][^\\WA-Z]+__)$' pattern)

This check looks for invalid names for a range of different identifiers.

You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements.

If your project includes a Pylint configuration file, the settings contained in that file take precedence.

To find out more about Pylint, please refer to their site.

Loading history...
98
            if part_path.exists():
99
                df = self._read_part(key, part_path)
0 ignored issues
show
Coding Style Naming introduced by
Variable name "df" doesn't conform to snake_case naming style ('([^\\W\\dA-Z][^\\WA-Z]2,|_[^\\WA-Z]*|__[^\\WA-Z\\d_][^\\WA-Z]+__)$' pattern)

This check looks for invalid names for a range of different identifiers.

You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements.

If your project includes a Pylint configuration file, the settings contained in that file take precedence.

To find out more about Pylint, please refer to their site.

Loading history...
100
            if df is None and n_compounds_0 >= self.min_compounds:
101
                df = self._calc_partial(key, key_hits)
0 ignored issues
show
Coding Style Naming introduced by
Variable name "df" doesn't conform to snake_case naming style ('([^\\W\\dA-Z][^\\WA-Z]2,|_[^\\WA-Z]*|__[^\\WA-Z\\d_][^\\WA-Z]+__)$' pattern)

This check looks for invalid names for a range of different identifiers.

You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements.

If your project includes a Pylint configuration file, the settings contained in that file take precedence.

To find out more about Pylint, please refer to their site.

Loading history...
102
                df.write_file(part_path, attrs=True, file_hash=True)
103
                logger.debug(f"Wrote results for {key} to {part_path}")
104
            if df is not None and self._should_include(df):
105
                good_keys[key] = part_path
106
            if df is not None:
107
                del df
108
        big_df = self._concat_parts(good_keys)
109
        big_df.write_file(to, attrs=True, file_hash=True)
110
        logger.notice(f"Wrote {len(big_df):,} rows to {to}")
111
        logger.debug(f"Concatenating {len(big_df):,} files")
112
        if not keep_temp:
113
            for k in good_keys:
114
                unlink(self._path_of(path, k))
115
116
    def _calc_partial(self, key: str, key_hits: HitDf) -> SimilarityDfLongForm:
117
        df = self.calc_one(key, key_hits).to_long_form(kind="psi", key=key)
0 ignored issues
show
Coding Style Naming introduced by
Variable name "df" doesn't conform to snake_case naming style ('([^\\W\\dA-Z][^\\WA-Z]2,|_[^\\WA-Z]*|__[^\\WA-Z\\d_][^\\WA-Z]+__)$' pattern)

This check looks for invalid names for a range of different identifiers.

You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements.

If your project includes a Pylint configuration file, the settings contained in that file take precedence.

To find out more about Pylint, please refer to their site.

Loading history...
118
        return df.set_attrs(
119
            key=key,
120
            quartiles=[float(df["value"].quantile(x)) for x in [0, 0.25, 0.5, 0.75, 1]],
121
            n_hits=len(key_hits),
122
            n_values=len(df["value"].unique()),
123
            n_compounds=len(df["inchikey_1"].unique()),
124
            n_real=len(df[(df["value"] > 0) & (df["value"] < 1)]),
125
        )
126
127
    def _should_include(self, df: SimilarityDfLongForm) -> bool:
0 ignored issues
show
Coding Style Naming introduced by
Argument name "df" doesn't conform to snake_case naming style ('([^\\W\\dA-Z][^\\WA-Z]2,|_[^\\WA-Z]*|__[^\\WA-Z\\d_][^\\WA-Z]+__)$' pattern)

This check looks for invalid names for a range of different identifiers.

You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements.

If your project includes a Pylint configuration file, the settings contained in that file take precedence.

To find out more about Pylint, please refer to their site.

Loading history...
128
        key = df.attrs["key"]
129
        reqs = dict(n_compounds=self.min_compounds, n_hits=self.min_hits, n_real=self.min_nonzero)
130
        for a, mn in reqs.items():
0 ignored issues
show
Coding Style Naming introduced by
Variable name "a" doesn't conform to snake_case naming style ('([^\\W\\dA-Z][^\\WA-Z]2,|_[^\\WA-Z]*|__[^\\WA-Z\\d_][^\\WA-Z]+__)$' pattern)

This check looks for invalid names for a range of different identifiers.

You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements.

If your project includes a Pylint configuration file, the settings contained in that file take precedence.

To find out more about Pylint, please refer to their site.

Loading history...
Coding Style Naming introduced by
Variable name "mn" doesn't conform to snake_case naming style ('([^\\W\\dA-Z][^\\WA-Z]2,|_[^\\WA-Z]*|__[^\\WA-Z\\d_][^\\WA-Z]+__)$' pattern)

This check looks for invalid names for a range of different identifiers.

You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements.

If your project includes a Pylint configuration file, the settings contained in that file take precedence.

To find out more about Pylint, please refer to their site.

Loading history...
131
            v = df.attrs[a]
0 ignored issues
show
Coding Style Naming introduced by
Variable name "v" doesn't conform to snake_case naming style ('([^\\W\\dA-Z][^\\WA-Z]2,|_[^\\WA-Z]*|__[^\\WA-Z\\d_][^\\WA-Z]+__)$' pattern)

This check looks for invalid names for a range of different identifiers.

You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements.

If your project includes a Pylint configuration file, the settings contained in that file take precedence.

To find out more about Pylint, please refer to their site.

Loading history...
132
            if v < mn:
133
                logger.warning(f"Key {key}: {a} = {v:,} < {mn:,}")
134
                return False
135
        return True
136
137
    def _read_part(self, key: str, part_path: Path) -> Optional[SimilarityDfLongForm]:
0 ignored issues
show
Coding Style introduced by
This method could be written as a function/class method.

If a method does not access any attributes of the class, it could also be implemented as a function or static method. This can help improve readability. For example

class Foo:
    def some_method(self, x, y):
        return x + y;

could be written as

class Foo:
    @classmethod
    def some_method(cls, x, y):
        return x + y;
Loading history...
138
        try:
139
            df = SimilarityDfLongForm.read_file(part_path, file_hash=True)
0 ignored issues
show
Coding Style Naming introduced by
Variable name "df" doesn't conform to snake_case naming style ('([^\\W\\dA-Z][^\\WA-Z]2,|_[^\\WA-Z]*|__[^\\WA-Z\\d_][^\\WA-Z]+__)$' pattern)

This check looks for invalid names for a range of different identifiers.

You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements.

If your project includes a Pylint configuration file, the settings contained in that file take precedence.

To find out more about Pylint, please refer to their site.

Loading history...
140
            logger.warning(f"Results for key {key} already exist ({len(df):,} rows)")
141
            return df
142
        except HashFileMissingError:
143
            logger.error(f"Extant results for key {key} appear incomplete; restarting")
144
            logger.opt(exception=True).debug(f"Hash error for {key}")
145
            unlink(part_path)
146
        return None  #  calculate from scratch
147
148
    def _concat_parts(self, keys: Mapping[str, Path]):
0 ignored issues
show
Coding Style introduced by
This method could be written as a function/class method.

If a method does not access any attributes of the class, it could also be implemented as a function or static method. This can help improve readability. For example

class Foo:
    def some_method(self, x, y):
        return x + y;

could be written as

class Foo:
    @classmethod
    def some_method(cls, x, y):
        return x + y;
Loading history...
149
        logger.notice(f"Included {len(keys):,} keys: {', '.join(keys)}")
150
        dfs = []
151
        for key, pp in keys:
0 ignored issues
show
Coding Style Naming introduced by
Variable name "pp" doesn't conform to snake_case naming style ('([^\\W\\dA-Z][^\\WA-Z]2,|_[^\\WA-Z]*|__[^\\WA-Z\\d_][^\\WA-Z]+__)$' pattern)

This check looks for invalid names for a range of different identifiers.

You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements.

If your project includes a Pylint configuration file, the settings contained in that file take precedence.

To find out more about Pylint, please refer to their site.

Loading history...
152
            df = SimilarityDfLongForm.read_file(pp, attrs=True)
0 ignored issues
show
Coding Style Naming introduced by
Variable name "df" doesn't conform to snake_case naming style ('([^\\W\\dA-Z][^\\WA-Z]2,|_[^\\WA-Z]*|__[^\\WA-Z\\d_][^\\WA-Z]+__)$' pattern)

This check looks for invalid names for a range of different identifiers.

You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements.

If your project includes a Pylint configuration file, the settings contained in that file take precedence.

To find out more about Pylint, please refer to their site.

Loading history...
153
            n_values = df.attrs["n_values"]
154
            n_real = df.attrs["n_real"]
155
            quartiles = df.attrs["quartiles"]
156
            logger.info(f"Key {key}:")
157
            prefix = f"    {key} {Chars.fatright}"
158
            logger.info(f"{prefix} unique values = {n_values}")
159
            logger.info(f"{prefix} values in (0, 1) = {n_real,}")
160
            logger.info(f"{prefix} quartiles: " + " | ".join(quartiles))
161
            dfs.append(df)
162
        return SimilarityDfLongForm.of(dfs, keys=keys)
163
164
    def calc_one(self, key: str, hits: Sequence[AbstractHit]) -> SimilarityDfShortForm:
0 ignored issues
show
introduced by
Missing function or method docstring
Loading history...
165
        ik2hits = Au.hit_multidict(hits, "origin_inchikey")
166
        logger.info(f"Calculating J on {key} for {len(ik2hits):,} compounds and {len(hits):,} hits")
167
        data = defaultdict(dict)
168
        inf = _Inf(n=int(len(ik2hits) * (len(ik2hits) - 1) / 2))
169
        for (c1, hits1) in ik2hits.items():
0 ignored issues
show
Coding Style Naming introduced by
Variable name "c1" doesn't conform to snake_case naming style ('([^\\W\\dA-Z][^\\WA-Z]2,|_[^\\WA-Z]*|__[^\\WA-Z\\d_][^\\WA-Z]+__)$' pattern)

This check looks for invalid names for a range of different identifiers.

You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements.

If your project includes a Pylint configuration file, the settings contained in that file take precedence.

To find out more about Pylint, please refer to their site.

Loading history...
170
            for (c2, hits2) in ik2hits.items():
0 ignored issues
show
Coding Style Naming introduced by
Variable name "c2" doesn't conform to snake_case naming style ('([^\\W\\dA-Z][^\\WA-Z]2,|_[^\\WA-Z]*|__[^\\WA-Z\\d_][^\\WA-Z]+__)$' pattern)

This check looks for invalid names for a range of different identifiers.

You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements.

If your project includes a Pylint configuration file, the settings contained in that file take precedence.

To find out more about Pylint, please refer to their site.

Loading history...
171
                if inf.is_used(c1, c2):
172
                    continue
173
                z = 1 if c1 == c2 else self._j_prime(key, hits1, hits2)
0 ignored issues
show
Coding Style Naming introduced by
Variable name "z" doesn't conform to snake_case naming style ('([^\\W\\dA-Z][^\\WA-Z]2,|_[^\\WA-Z]*|__[^\\WA-Z\\d_][^\\WA-Z]+__)$' pattern)

This check looks for invalid names for a range of different identifiers.

You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements.

If your project includes a Pylint configuration file, the settings contained in that file take precedence.

To find out more about Pylint, please refer to their site.

Loading history...
174
                data[c1][c2] = z
175
                inf.got(c1, c2, z)
176
        inf.log("success")
177
        return SimilarityDfShortForm.from_dict(data)
178
179
    def _path_of(self, path: Path, key: str):
0 ignored issues
show
Coding Style introduced by
This method could be written as a function/class method.

If a method does not access any attributes of the class, it could also be implemented as a function or static method. This can help improve readability. For example

class Foo:
    def some_method(self, x, y):
        return x + y;

could be written as

class Foo:
    @classmethod
    def some_method(cls, x, y):
        return x + y;
Loading history...
180
        return path.parent / f".{path.name}-{key}.tmp.feather"
181
182
    def _j_prime(
183
        self, key: str, hits1: Collection[AbstractHit], hits2: Collection[AbstractHit]
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
184
    ) -> float:
185
        if len(hits1) == 0 or len(hits2) == 0:
186
            return 0
187
        sources = {h.data_source for h in hits1}.intersection({h.data_source for h in hits2})
188
        if len(sources) == 0:
189
            return float("NaN")
190
        values = [
191
            self._jx(
192
                key,
193
                [h for h in hits1 if h.data_source == source],
194
                [h for h in hits2 if h.data_source == source],
195
            )
196
            for source in sources
197
        ]
198
        return float(math.fsum(values) / len(values))
199
200
    def _jx(
201
        self, key: str, hits1: Collection[AbstractHit], hits2: Collection[AbstractHit]
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
202
    ) -> float:
203
        # TODO -- for testing only
0 ignored issues
show
Coding Style introduced by
TODO and FIXME comments should generally be avoided.
Loading history...
204
        # TODO: REMOVE ME!
0 ignored issues
show
Coding Style introduced by
TODO and FIXME comments should generally be avoided.
Loading history...
205
        if key in ["core.chemidplus.effects", "extra.chemidplus.specific-effects"]:
206
            hits1 = [h.copy(weight=math.pow(10, -h.weight)) for h in hits1]
207
            hits2 = [h.copy(weight=math.pow(10, -h.weight)) for h in hits2]
208
        pair_to_weights = Au.weights_of_pairs(hits1, hits2)
209
        values = [self._wedge(ca, cb) / self._vee(ca, cb) for ca, cb in pair_to_weights.values()]
210
        return float(math.fsum(values) / len(values))
211
212
    def _wedge(self, ca: float, cb: float) -> float:
0 ignored issues
show
Coding Style Naming introduced by
Argument name "cb" doesn't conform to snake_case naming style ('([^\\W\\dA-Z][^\\WA-Z]2,|_[^\\WA-Z]*|__[^\\WA-Z\\d_][^\\WA-Z]+__)$' pattern)

This check looks for invalid names for a range of different identifiers.

You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements.

If your project includes a Pylint configuration file, the settings contained in that file take precedence.

To find out more about Pylint, please refer to their site.

Loading history...
Coding Style Naming introduced by
Argument name "ca" doesn't conform to snake_case naming style ('([^\\W\\dA-Z][^\\WA-Z]2,|_[^\\WA-Z]*|__[^\\WA-Z\\d_][^\\WA-Z]+__)$' pattern)

This check looks for invalid names for a range of different identifiers.

You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements.

If your project includes a Pylint configuration file, the settings contained in that file take precedence.

To find out more about Pylint, please refer to their site.

Loading history...
Coding Style introduced by
This method could be written as a function/class method.

If a method does not access any attributes of the class, it could also be implemented as a function or static method. This can help improve readability. For example

class Foo:
    def some_method(self, x, y):
        return x + y;

could be written as

class Foo:
    @classmethod
    def some_method(cls, x, y):
        return x + y;
Loading history...
213
        return math.sqrt(Au.elle(ca) * Au.elle(cb))
214
215
    def _vee(self, ca: float, cb: float) -> float:
0 ignored issues
show
Coding Style Naming introduced by
Argument name "cb" doesn't conform to snake_case naming style ('([^\\W\\dA-Z][^\\WA-Z]2,|_[^\\WA-Z]*|__[^\\WA-Z\\d_][^\\WA-Z]+__)$' pattern)

This check looks for invalid names for a range of different identifiers.

You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements.

If your project includes a Pylint configuration file, the settings contained in that file take precedence.

To find out more about Pylint, please refer to their site.

Loading history...
Coding Style Naming introduced by
Argument name "ca" doesn't conform to snake_case naming style ('([^\\W\\dA-Z][^\\WA-Z]2,|_[^\\WA-Z]*|__[^\\WA-Z\\d_][^\\WA-Z]+__)$' pattern)

This check looks for invalid names for a range of different identifiers.

You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements.

If your project includes a Pylint configuration file, the settings contained in that file take precedence.

To find out more about Pylint, please refer to their site.

Loading history...
Coding Style introduced by
This method could be written as a function/class method.

If a method does not access any attributes of the class, it could also be implemented as a function or static method. This can help improve readability. For example

class Foo:
    def some_method(self, x, y):
        return x + y;

could be written as

class Foo:
    @classmethod
    def some_method(cls, x, y):
        return x + y;
Loading history...
216
        return Au.elle(ca) + Au.elle(cb) - math.sqrt(Au.elle(ca) * Au.elle(cb))
217
218
219
class MatrixAlg(CleverEnum):
0 ignored issues
show
introduced by
Missing class docstring
Loading history...
220
    j = ()
221
222
    @property
223
    def clazz(self) -> Type[MatrixCalculator]:
0 ignored issues
show
introduced by
Missing function or method docstring
Loading history...
224
        return {MatrixAlg.j: JPrimeMatrixCalculator}[self]
225
226
227
@decorateme.auto_utils()
0 ignored issues
show
introduced by
Missing class docstring
Loading history...
228
class MatrixCalculation:
229
    @classmethod
230
    def create(
0 ignored issues
show
introduced by
Missing function or method docstring
Loading history...
231
        cls,
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
232
        algorithm: Union[str, MatrixAlg],
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
233
        *,
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
234
        min_compounds: int,
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
235
        min_nonzero: int,
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
236
        min_hits: int,
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
237
        exclude: Optional[Collection[str]] = None,
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
238
    ) -> MatrixCalculator:
239
        return MatrixAlg.of(algorithm).clazz(
240
            min_compounds=min_compounds,
241
            min_nonzero=min_nonzero,
242
            min_hits=min_hits,
243
            exclude=exclude,
244
        )
245
246
247
__all__ = ["JPrimeMatrixCalculator", "MatrixAlg", "MatrixCalculation", "MatrixCalculator"]
248