Passed
Push — main ( 9ee5db...f62a2c )
by Douglas
01:58
created

JPrimeMatrixCalculator._read_hits()   A

Complexity

Conditions 5

Size

Total Lines 13
Code Lines 13

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
cc 5
eloc 13
nop 2
dl 0
loc 13
rs 9.2833
c 0
b 0
f 0
1
"""
2
Calculations of overlap (similarity) between annotation sets.
3
"""
4
import abc
5
import math
6
import time
7
from collections import defaultdict
8
from pathlib import Path
9
from typing import Collection, Mapping, Optional, Sequence, Type, Union
10
11
import decorateme
0 ignored issues
show
introduced by
Unable to import 'decorateme'
Loading history...
12
import numpy as np
0 ignored issues
show
introduced by
Unable to import 'numpy'
Loading history...
13
from pocketutils.core.chars import Chars
0 ignored issues
show
introduced by
Unable to import 'pocketutils.core.chars'
Loading history...
14
from pocketutils.core.enums import CleverEnum
0 ignored issues
show
introduced by
Unable to import 'pocketutils.core.enums'
Loading history...
15
from pocketutils.core.exceptions import XValueError
0 ignored issues
show
introduced by
Unable to import 'pocketutils.core.exceptions'
Loading history...
Unused Code introduced by
Unused XValueError imported from pocketutils.core.exceptions
Loading history...
16
from pocketutils.tools.unit_tools import UnitTools
0 ignored issues
show
introduced by
Unable to import 'pocketutils.tools.unit_tools'
Loading history...
17
from typeddfs.df_errors import HashFileMissingError
0 ignored issues
show
introduced by
Unable to import 'typeddfs.df_errors'
Loading history...
18
19
from mandos.analysis import AnalysisUtils as Au
20
from mandos.analysis.io_defns import SimilarityDfLongForm, SimilarityDfShortForm
21
from mandos.model.hit_dfs import HitDf
22
from mandos.model.hits import AbstractHit
23
from mandos.model.utils import unlink
24
25
# note that most of these math functions are much faster than their numpy counterparts
26
# if we're not broadcasting, it's almost always better to use them
27
# some are more accurate, too
28
# e.g. we're using fsum rather than sum
29
from mandos.model.utils.setup import logger
30
31
32
class _Inf:
33
    def __init__(self, n: int):
34
        self.n = n
0 ignored issues
show
Coding Style Naming introduced by
Attribute name "n" doesn't conform to snake_case naming style ('([^\\W\\dA-Z][^\\WA-Z]2,|_[^\\WA-Z]*|__[^\\WA-Z\\d_][^\\WA-Z]+__)$' pattern)

This check looks for invalid names for a range of different identifiers.

You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements.

If your project includes a Pylint configuration file, the settings contained in that file take precedence.

To find out more about Pylint, please refer to their site.

Loading history...
35
        self.used, self.t0, self.nonzeros = set(), time.monotonic(), 0
0 ignored issues
show
Coding Style Naming introduced by
Attribute name "t0" doesn't conform to snake_case naming style ('([^\\W\\dA-Z][^\\WA-Z]2,|_[^\\WA-Z]*|__[^\\WA-Z\\d_][^\\WA-Z]+__)$' pattern)

This check looks for invalid names for a range of different identifiers.

You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements.

If your project includes a Pylint configuration file, the settings contained in that file take precedence.

To find out more about Pylint, please refer to their site.

Loading history...
36
37
    def is_used(self, c1: str, c2: str) -> bool:
0 ignored issues
show
Coding Style Naming introduced by
Argument name "c2" doesn't conform to snake_case naming style ('([^\\W\\dA-Z][^\\WA-Z]2,|_[^\\WA-Z]*|__[^\\WA-Z\\d_][^\\WA-Z]+__)$' pattern)

This check looks for invalid names for a range of different identifiers.

You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements.

If your project includes a Pylint configuration file, the settings contained in that file take precedence.

To find out more about Pylint, please refer to their site.

Loading history...
introduced by
Missing function or method docstring
Loading history...
Coding Style Naming introduced by
Argument name "c1" doesn't conform to snake_case naming style ('([^\\W\\dA-Z][^\\WA-Z]2,|_[^\\WA-Z]*|__[^\\WA-Z\\d_][^\\WA-Z]+__)$' pattern)

This check looks for invalid names for a range of different identifiers.

You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements.

If your project includes a Pylint configuration file, the settings contained in that file take precedence.

To find out more about Pylint, please refer to their site.

Loading history...
38
        return (c1, c2) in self.used or (c2, c1) in self.used
39
40
    def got(self, c1: str, c2: str, z: float) -> None:
0 ignored issues
show
introduced by
Missing function or method docstring
Loading history...
Coding Style Naming introduced by
Argument name "c1" doesn't conform to snake_case naming style ('([^\\W\\dA-Z][^\\WA-Z]2,|_[^\\WA-Z]*|__[^\\WA-Z\\d_][^\\WA-Z]+__)$' pattern)

This check looks for invalid names for a range of different identifiers.

You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements.

If your project includes a Pylint configuration file, the settings contained in that file take precedence.

To find out more about Pylint, please refer to their site.

Loading history...
Coding Style Naming introduced by
Argument name "c2" doesn't conform to snake_case naming style ('([^\\W\\dA-Z][^\\WA-Z]2,|_[^\\WA-Z]*|__[^\\WA-Z\\d_][^\\WA-Z]+__)$' pattern)

This check looks for invalid names for a range of different identifiers.

You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements.

If your project includes a Pylint configuration file, the settings contained in that file take precedence.

To find out more about Pylint, please refer to their site.

Loading history...
Coding Style Naming introduced by
Argument name "z" doesn't conform to snake_case naming style ('([^\\W\\dA-Z][^\\WA-Z]2,|_[^\\WA-Z]*|__[^\\WA-Z\\d_][^\\WA-Z]+__)$' pattern)

This check looks for invalid names for a range of different identifiers.

You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements.

If your project includes a Pylint configuration file, the settings contained in that file take precedence.

To find out more about Pylint, please refer to their site.

Loading history...
41
        self.used.add((c1, c2))
42
        self.nonzeros += int(c1 != c2 and not np.isnan(z) and 0 < z < 1)
43
        if self.i % 1000 == 0:
44
            self.log("info")
45
46
    @property
47
    def i(self) -> int:
0 ignored issues
show
introduced by
Missing function or method docstring
Loading history...
48
        return len(self.used)
49
50
    def log(self, level: str) -> None:
0 ignored issues
show
introduced by
Missing function or method docstring
Loading history...
51
        delta = UnitTools.delta_time_to_str(time.monotonic() - self.t0, space=Chars.narrownbsp)
52
        logger.log(
53
            level.upper(),
54
            f"Processed {self.i:,}/{self.n:,} pairs in {delta};"
55
            + f" {self.nonzeros:,} ({self.nonzeros / self.i * 100:.1f}%) are nonzero",
56
        )
57
58
    def __repr__(self):
59
        return f"{self.__class__.__name__}({self.i}/{self.n})"
60
61
    def __str__(self):
62
        return repr(self)
63
64
65
@decorateme.auto_repr_str()
0 ignored issues
show
introduced by
Missing class docstring
Loading history...
66
class MatrixCalculator(metaclass=abc.ABCMeta):
67
    def __init__(
68
        self,
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
69
        *,
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
70
        min_compounds: int,
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
71
        min_nonzero: int,
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
72
        min_hits: int,
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
73
        exclude: Optional[Collection[str]] = None,
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
74
    ):
75
        self.min_compounds = min_compounds
76
        self.min_nonzero = min_nonzero
77
        self.min_hits = min_hits
78
        self.exclude = set() if exclude is None else exclude
79
80
    def calc_all(self, hits: Path, to: Path, *, keep_temp: bool = False) -> SimilarityDfLongForm:
0 ignored issues
show
Coding Style Naming introduced by
Argument name "to" doesn't conform to snake_case naming style ('([^\\W\\dA-Z][^\\WA-Z]2,|_[^\\WA-Z]*|__[^\\WA-Z\\d_][^\\WA-Z]+__)$' pattern)

This check looks for invalid names for a range of different identifiers.

You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements.

If your project includes a Pylint configuration file, the settings contained in that file take precedence.

To find out more about Pylint, please refer to their site.

Loading history...
introduced by
Missing function or method docstring
Loading history...
Coding Style introduced by
This method could be written as a function/class method.

If a method does not access any attributes of the class, it could also be implemented as a function or static method. This can help improve readability. For example

class Foo:
    def some_method(self, x, y):
        return x + y;

could be written as

class Foo:
    @classmethod
    def some_method(cls, x, y):
        return x + y;
Loading history...
81
        raise NotImplemented()
0 ignored issues
show
Best Practice introduced by
NotImplemented raised - should raise NotImplementedError
Loading history...
Bug introduced by
NotImplemented does not seem to be callable.
Loading history...
82
83
84
class JPrimeMatrixCalculator(MatrixCalculator):
0 ignored issues
show
introduced by
Missing class docstring
Loading history...
85
    def calc_all(self, path: Path, to: Path, *, keep_temp: bool = False) -> SimilarityDfLongForm:
0 ignored issues
show
Bug introduced by
Parameters differ from overridden 'calc_all' method
Loading history...
86
        hits = self._read_hits(path)
87
        key_to_hit = Au.hit_multidict(hits, "search_key")
88
        logger.notice(f"Calculating J on {len(key_to_hit):,} keys from {len(hits):,} hits")
89
        good_keys = {}
90
        for key, key_hits in key_to_hit.items():
91
            key_hits: Sequence[AbstractHit] = key_hits
92
            n_compounds_0 = len({k.origin_inchikey for k in key_hits})
93
            part_path = self._path_of(path, key)
94
            df = None
0 ignored issues
show
Coding Style Naming introduced by
Variable name "df" doesn't conform to snake_case naming style ('([^\\W\\dA-Z][^\\WA-Z]2,|_[^\\WA-Z]*|__[^\\WA-Z\\d_][^\\WA-Z]+__)$' pattern)

This check looks for invalid names for a range of different identifiers.

You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements.

If your project includes a Pylint configuration file, the settings contained in that file take precedence.

To find out more about Pylint, please refer to their site.

Loading history...
95
            if part_path.exists():
96
                df = self._read_part(key, part_path)
0 ignored issues
show
Coding Style Naming introduced by
Variable name "df" doesn't conform to snake_case naming style ('([^\\W\\dA-Z][^\\WA-Z]2,|_[^\\WA-Z]*|__[^\\WA-Z\\d_][^\\WA-Z]+__)$' pattern)

This check looks for invalid names for a range of different identifiers.

You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements.

If your project includes a Pylint configuration file, the settings contained in that file take precedence.

To find out more about Pylint, please refer to their site.

Loading history...
97
            if df is None and n_compounds_0 >= self.min_compounds:
98
                df = self._calc_partial(key, key_hits)
0 ignored issues
show
Coding Style Naming introduced by
Variable name "df" doesn't conform to snake_case naming style ('([^\\W\\dA-Z][^\\WA-Z]2,|_[^\\WA-Z]*|__[^\\WA-Z\\d_][^\\WA-Z]+__)$' pattern)

This check looks for invalid names for a range of different identifiers.

You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements.

If your project includes a Pylint configuration file, the settings contained in that file take precedence.

To find out more about Pylint, please refer to their site.

Loading history...
99
                df.write_file(part_path, attrs=True, file_hash=True, mkdirs=True)
100
                logger.debug(f"Wrote results for {key} to {part_path}")
101
            if df is not None and self._should_include(df):
102
                good_keys[key] = part_path
103
            if df is not None:
104
                del df
105
        big_df = self._concat_parts(good_keys)
106
        big_df.write_file(to, attrs=True, file_hash=True, mkdirs=True)
107
        logger.notice(f"Wrote {len(big_df):,} rows to {to}")
108
        if not keep_temp:
109
            for k in good_keys:
110
                unlink(self._path_of(path, k))
111
112
    def _read_hits(self, path: Path) -> Sequence[AbstractHit]:
113
        hits = HitDf.read_file(path)
114
        keys = hits["search_key"].unique()
115
        bad_excludes = [e for e in self.exclude if e not in keys]
116
        if len(bad_excludes) > 0:
117
            logger.error(f"Keys to exclude are not in the input file: {', '.join(bad_excludes)}")
118
        for key in keys:
119
            if key not in self.exclude:
120
                dfx = hits[hits["search_key"] == key]
121
                negatives = dfx[dfx["weight"] <= 0]
122
                if len(negatives) > 0:
123
                    logger.error(f"{len(negatives)} / {len(dfx):,} hits for {key} are nonpositive")
124
        return [h for h in hits.to_hits() if h.search_key not in self.exclude and h.weight > 0]
125
126
    def _calc_partial(self, key: str, key_hits: HitDf) -> SimilarityDfLongForm:
127
        df = self.calc_one(key, key_hits).to_long_form(kind="psi", key=key)
0 ignored issues
show
Coding Style Naming introduced by
Variable name "df" doesn't conform to snake_case naming style ('([^\\W\\dA-Z][^\\WA-Z]2,|_[^\\WA-Z]*|__[^\\WA-Z\\d_][^\\WA-Z]+__)$' pattern)

This check looks for invalid names for a range of different identifiers.

You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements.

If your project includes a Pylint configuration file, the settings contained in that file take precedence.

To find out more about Pylint, please refer to their site.

Loading history...
128
        return df.set_attrs(
129
            key=key,
130
            quartiles=[float(df["value"].quantile(x)) for x in [0, 0.25, 0.5, 0.75, 1]],
131
            n_hits=len(key_hits),
132
            n_values=len(df["value"].unique()),
133
            n_compounds=len(df["inchikey_1"].unique()),
134
            n_real=len(df[(df["value"] > 0) & (df["value"] < 1)]),
135
        )
136
137
    def _should_include(self, df: SimilarityDfLongForm) -> bool:
0 ignored issues
show
Coding Style Naming introduced by
Argument name "df" doesn't conform to snake_case naming style ('([^\\W\\dA-Z][^\\WA-Z]2,|_[^\\WA-Z]*|__[^\\WA-Z\\d_][^\\WA-Z]+__)$' pattern)

This check looks for invalid names for a range of different identifiers.

You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements.

If your project includes a Pylint configuration file, the settings contained in that file take precedence.

To find out more about Pylint, please refer to their site.

Loading history...
138
        key = df.attrs["key"]
139
        reqs = dict(n_compounds=self.min_compounds, n_hits=self.min_hits, n_real=self.min_nonzero)
140
        for a, mn in reqs.items():
0 ignored issues
show
Coding Style Naming introduced by
Variable name "a" doesn't conform to snake_case naming style ('([^\\W\\dA-Z][^\\WA-Z]2,|_[^\\WA-Z]*|__[^\\WA-Z\\d_][^\\WA-Z]+__)$' pattern)

This check looks for invalid names for a range of different identifiers.

You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements.

If your project includes a Pylint configuration file, the settings contained in that file take precedence.

To find out more about Pylint, please refer to their site.

Loading history...
Coding Style Naming introduced by
Variable name "mn" doesn't conform to snake_case naming style ('([^\\W\\dA-Z][^\\WA-Z]2,|_[^\\WA-Z]*|__[^\\WA-Z\\d_][^\\WA-Z]+__)$' pattern)

This check looks for invalid names for a range of different identifiers.

You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements.

If your project includes a Pylint configuration file, the settings contained in that file take precedence.

To find out more about Pylint, please refer to their site.

Loading history...
141
            v = df.attrs[a]
0 ignored issues
show
Coding Style Naming introduced by
Variable name "v" doesn't conform to snake_case naming style ('([^\\W\\dA-Z][^\\WA-Z]2,|_[^\\WA-Z]*|__[^\\WA-Z\\d_][^\\WA-Z]+__)$' pattern)

This check looks for invalid names for a range of different identifiers.

You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements.

If your project includes a Pylint configuration file, the settings contained in that file take precedence.

To find out more about Pylint, please refer to their site.

Loading history...
142
            if v < mn:
143
                logger.warning(f"Key {key}: {a} = {v:,} < {mn:,}")
144
                return False
145
        return True
146
147
    def _read_part(self, key: str, part_path: Path) -> Optional[SimilarityDfLongForm]:
0 ignored issues
show
Coding Style introduced by
This method could be written as a function/class method.

If a method does not access any attributes of the class, it could also be implemented as a function or static method. This can help improve readability. For example

class Foo:
    def some_method(self, x, y):
        return x + y;

could be written as

class Foo:
    @classmethod
    def some_method(cls, x, y):
        return x + y;
Loading history...
148
        try:
149
            df = SimilarityDfLongForm.read_file(part_path, file_hash=True, attrs=True)
0 ignored issues
show
Coding Style Naming introduced by
Variable name "df" doesn't conform to snake_case naming style ('([^\\W\\dA-Z][^\\WA-Z]2,|_[^\\WA-Z]*|__[^\\WA-Z\\d_][^\\WA-Z]+__)$' pattern)

This check looks for invalid names for a range of different identifiers.

You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements.

If your project includes a Pylint configuration file, the settings contained in that file take precedence.

To find out more about Pylint, please refer to their site.

Loading history...
150
            logger.warning(f"Results for key {key} already exist ({len(df):,} rows)")
151
            return df
152
        except HashFileMissingError:
153
            logger.error(f"Extant results for key {key} appear incomplete; restarting")
154
            logger.opt(exception=True).debug(f"Hash error for {key}")
155
            unlink(part_path)
156
        return None  #  calculate from scratch
157
158
    def _concat_parts(self, keys: Mapping[str, Path]):
0 ignored issues
show
Coding Style introduced by
This method could be written as a function/class method.

If a method does not access any attributes of the class, it could also be implemented as a function or static method. This can help improve readability. For example

class Foo:
    def some_method(self, x, y):
        return x + y;

could be written as

class Foo:
    @classmethod
    def some_method(cls, x, y):
        return x + y;
Loading history...
159
        logger.notice(f"Included {len(keys):,} keys: {', '.join(keys)}")
160
        dfs = []
161
        for key, pp in keys.items():
0 ignored issues
show
Coding Style Naming introduced by
Variable name "pp" doesn't conform to snake_case naming style ('([^\\W\\dA-Z][^\\WA-Z]2,|_[^\\WA-Z]*|__[^\\WA-Z\\d_][^\\WA-Z]+__)$' pattern)

This check looks for invalid names for a range of different identifiers.

You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements.

If your project includes a Pylint configuration file, the settings contained in that file take precedence.

To find out more about Pylint, please refer to their site.

Loading history...
162
            df = SimilarityDfLongForm.read_file(pp, attrs=True)
0 ignored issues
show
Coding Style Naming introduced by
Variable name "df" doesn't conform to snake_case naming style ('([^\\W\\dA-Z][^\\WA-Z]2,|_[^\\WA-Z]*|__[^\\WA-Z\\d_][^\\WA-Z]+__)$' pattern)

This check looks for invalid names for a range of different identifiers.

You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements.

If your project includes a Pylint configuration file, the settings contained in that file take precedence.

To find out more about Pylint, please refer to their site.

Loading history...
163
            n_values = df.attrs["n_values"]
164
            n_real = df.attrs["n_real"]
165
            quartiles = df.attrs["quartiles"]
166
            logger.info(f"Key {key}:")
167
            prefix = f"    {key} {Chars.fatright}"
168
            logger.info(f"{prefix} unique values = {n_values}")
169
            logger.info(f"{prefix} values in (0, 1) = {n_real:,}")
170
            logger.info(f"{prefix} quartiles: " + " | ".join([str(s) for s in quartiles]))
171
            dfs.append(df)
172
        return SimilarityDfLongForm.of(dfs, keys=keys)
173
174
    def calc_one(self, key: str, hits: Sequence[AbstractHit]) -> SimilarityDfShortForm:
0 ignored issues
show
introduced by
Missing function or method docstring
Loading history...
175
        ik2hits = Au.hit_multidict(hits, "origin_inchikey")
176
        logger.info(f"Calculating J on {key} for {len(ik2hits):,} compounds and {len(hits):,} hits")
177
        data = defaultdict(dict)
178
        inf = _Inf(n=int(len(ik2hits) * (len(ik2hits) - 1) / 2))
179
        for (c1, hits1) in ik2hits.items():
0 ignored issues
show
Coding Style Naming introduced by
Variable name "c1" doesn't conform to snake_case naming style ('([^\\W\\dA-Z][^\\WA-Z]2,|_[^\\WA-Z]*|__[^\\WA-Z\\d_][^\\WA-Z]+__)$' pattern)

This check looks for invalid names for a range of different identifiers.

You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements.

If your project includes a Pylint configuration file, the settings contained in that file take precedence.

To find out more about Pylint, please refer to their site.

Loading history...
180
            for (c2, hits2) in ik2hits.items():
0 ignored issues
show
Coding Style Naming introduced by
Variable name "c2" doesn't conform to snake_case naming style ('([^\\W\\dA-Z][^\\WA-Z]2,|_[^\\WA-Z]*|__[^\\WA-Z\\d_][^\\WA-Z]+__)$' pattern)

This check looks for invalid names for a range of different identifiers.

You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements.

If your project includes a Pylint configuration file, the settings contained in that file take precedence.

To find out more about Pylint, please refer to their site.

Loading history...
181
                if inf.is_used(c1, c2):
182
                    continue
183
                z = 1 if c1 == c2 else self._j_prime(key, hits1, hits2)
0 ignored issues
show
Coding Style Naming introduced by
Variable name "z" doesn't conform to snake_case naming style ('([^\\W\\dA-Z][^\\WA-Z]2,|_[^\\WA-Z]*|__[^\\WA-Z\\d_][^\\WA-Z]+__)$' pattern)

This check looks for invalid names for a range of different identifiers.

You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements.

If your project includes a Pylint configuration file, the settings contained in that file take precedence.

To find out more about Pylint, please refer to their site.

Loading history...
184
                data[c1][c2] = z
185
                inf.got(c1, c2, z)
186
        inf.log("success")
187
        return SimilarityDfShortForm.from_dict(data)
188
189
    def _path_of(self, path: Path, key: str):
0 ignored issues
show
Coding Style introduced by
This method could be written as a function/class method.

If a method does not access any attributes of the class, it could also be implemented as a function or static method. This can help improve readability. For example

class Foo:
    def some_method(self, x, y):
        return x + y;

could be written as

class Foo:
    @classmethod
    def some_method(cls, x, y):
        return x + y;
Loading history...
190
        return path.parent / f".{path.name}-{key}.tmp.feather"
191
192
    def _j_prime(
193
        self, key: str, hits1: Collection[AbstractHit], hits2: Collection[AbstractHit]
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
194
    ) -> float:
195
        if len(hits1) == 0 or len(hits2) == 0:
196
            return 0
197
        sources = {h.data_source for h in hits1}.intersection({h.data_source for h in hits2})
198
        if len(sources) == 0:
199
            return float("NaN")
200
        values = [
201
            self._jx(
202
                key,
203
                [h for h in hits1 if h.data_source == source],
204
                [h for h in hits2 if h.data_source == source],
205
            )
206
            for source in sources
207
        ]
208
        return float(math.fsum(values) / len(values))
209
210
    def _jx(
211
        self, key: str, hits1: Collection[AbstractHit], hits2: Collection[AbstractHit]
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
Unused Code introduced by
The argument key seems to be unused.
Loading history...
212
    ) -> float:
213
        if len(hits1) == len(hits2) == 0:
0 ignored issues
show
unused-code introduced by
Unnecessary "elif" after "return"
Loading history...
214
            return float("NaN")
215
        elif len(hits1) == 0 or len(hits2) == 0:
216
            return 0
217
        pair_to_weights = Au.weights_of_pairs(hits1, hits2)
218
        values = []
219
        for ca, cb in pair_to_weights.values():
0 ignored issues
show
Coding Style Naming introduced by
Variable name "ca" doesn't conform to snake_case naming style ('([^\\W\\dA-Z][^\\WA-Z]2,|_[^\\WA-Z]*|__[^\\WA-Z\\d_][^\\WA-Z]+__)$' pattern)

This check looks for invalid names for a range of different identifiers.

You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements.

If your project includes a Pylint configuration file, the settings contained in that file take precedence.

To find out more about Pylint, please refer to their site.

Loading history...
Coding Style Naming introduced by
Variable name "cb" doesn't conform to snake_case naming style ('([^\\W\\dA-Z][^\\WA-Z]2,|_[^\\WA-Z]*|__[^\\WA-Z\\d_][^\\WA-Z]+__)$' pattern)

This check looks for invalid names for a range of different identifiers.

You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements.

If your project includes a Pylint configuration file, the settings contained in that file take precedence.

To find out more about Pylint, please refer to their site.

Loading history...
220
            wedge = self._wedge(ca, cb)
221
            vee = self._vee(ca, cb)
222
            if vee > 0:
223
                values.append(wedge / vee)
224
        return float(math.fsum(values) / len(values))
225
226
    def _wedge(self, ca: float, cb: float) -> float:
0 ignored issues
show
Coding Style Naming introduced by
Argument name "ca" doesn't conform to snake_case naming style ('([^\\W\\dA-Z][^\\WA-Z]2,|_[^\\WA-Z]*|__[^\\WA-Z\\d_][^\\WA-Z]+__)$' pattern)

This check looks for invalid names for a range of different identifiers.

You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements.

If your project includes a Pylint configuration file, the settings contained in that file take precedence.

To find out more about Pylint, please refer to their site.

Loading history...
Coding Style Naming introduced by
Argument name "cb" doesn't conform to snake_case naming style ('([^\\W\\dA-Z][^\\WA-Z]2,|_[^\\WA-Z]*|__[^\\WA-Z\\d_][^\\WA-Z]+__)$' pattern)

This check looks for invalid names for a range of different identifiers.

You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements.

If your project includes a Pylint configuration file, the settings contained in that file take precedence.

To find out more about Pylint, please refer to their site.

Loading history...
Coding Style introduced by
This method could be written as a function/class method.

If a method does not access any attributes of the class, it could also be implemented as a function or static method. This can help improve readability. For example

class Foo:
    def some_method(self, x, y):
        return x + y;

could be written as

class Foo:
    @classmethod
    def some_method(cls, x, y):
        return x + y;
Loading history...
227
        return math.sqrt(Au.elle(ca) * Au.elle(cb))
228
229
    def _vee(self, ca: float, cb: float) -> float:
0 ignored issues
show
Coding Style Naming introduced by
Argument name "ca" doesn't conform to snake_case naming style ('([^\\W\\dA-Z][^\\WA-Z]2,|_[^\\WA-Z]*|__[^\\WA-Z\\d_][^\\WA-Z]+__)$' pattern)

This check looks for invalid names for a range of different identifiers.

You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements.

If your project includes a Pylint configuration file, the settings contained in that file take precedence.

To find out more about Pylint, please refer to their site.

Loading history...
Coding Style introduced by
This method could be written as a function/class method.

If a method does not access any attributes of the class, it could also be implemented as a function or static method. This can help improve readability. For example

class Foo:
    def some_method(self, x, y):
        return x + y;

could be written as

class Foo:
    @classmethod
    def some_method(cls, x, y):
        return x + y;
Loading history...
Coding Style Naming introduced by
Argument name "cb" doesn't conform to snake_case naming style ('([^\\W\\dA-Z][^\\WA-Z]2,|_[^\\WA-Z]*|__[^\\WA-Z\\d_][^\\WA-Z]+__)$' pattern)

This check looks for invalid names for a range of different identifiers.

You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements.

If your project includes a Pylint configuration file, the settings contained in that file take precedence.

To find out more about Pylint, please refer to their site.

Loading history...
230
        return Au.elle(ca) + Au.elle(cb) - math.sqrt(Au.elle(ca) * Au.elle(cb))
231
232
233
class MatrixAlg(CleverEnum):
0 ignored issues
show
introduced by
Missing class docstring
Loading history...
234
    j = ()
235
236
    @property
237
    def clazz(self) -> Type[MatrixCalculator]:
0 ignored issues
show
introduced by
Missing function or method docstring
Loading history...
238
        return {MatrixAlg.j: JPrimeMatrixCalculator}[self]
239
240
241
@decorateme.auto_utils()
0 ignored issues
show
introduced by
Missing class docstring
Loading history...
242
class MatrixCalculation:
243
    @classmethod
244
    def create(
0 ignored issues
show
introduced by
Missing function or method docstring
Loading history...
245
        cls,
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
246
        algorithm: Union[str, MatrixAlg],
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
247
        *,
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
248
        min_compounds: int,
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
249
        min_nonzero: int,
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
250
        min_hits: int,
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
251
        exclude: Optional[Collection[str]] = None,
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
252
    ) -> MatrixCalculator:
253
        return MatrixAlg.of(algorithm).clazz(
254
            min_compounds=min_compounds,
255
            min_nonzero=min_nonzero,
256
            min_hits=min_hits,
257
            exclude=exclude,
258
        )
259
260
261
__all__ = ["JPrimeMatrixCalculator", "MatrixAlg", "MatrixCalculation", "MatrixCalculator"]
262