Passed
Push — main ( f62a2c...b1b54a )
by Douglas
02:02
created

JPrimeMatrixCalculator._part_path()   A

Complexity

Conditions 1

Size

Total Lines 2
Code Lines 2

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
cc 1
eloc 2
nop 3
dl 0
loc 2
rs 10
c 0
b 0
f 0
1
"""
2
Calculations of overlap (similarity) between annotation sets.
3
"""
4
import abc
5
import math
6
import time
7
from collections import defaultdict
8
from pathlib import Path
9
from typing import Collection, Mapping, Optional, Sequence, Type, Union
10
11
import decorateme
0 ignored issues
show
introduced by
Unable to import 'decorateme'
Loading history...
12
import numpy as np
0 ignored issues
show
introduced by
Unable to import 'numpy'
Loading history...
13
from pocketutils.core.chars import Chars
0 ignored issues
show
introduced by
Unable to import 'pocketutils.core.chars'
Loading history...
14
from pocketutils.core.enums import CleverEnum
0 ignored issues
show
introduced by
Unable to import 'pocketutils.core.enums'
Loading history...
15
from pocketutils.core.exceptions import XValueError
0 ignored issues
show
introduced by
Unable to import 'pocketutils.core.exceptions'
Loading history...
Unused Code introduced by
Unused XValueError imported from pocketutils.core.exceptions
Loading history...
16
from pocketutils.tools.unit_tools import UnitTools
0 ignored issues
show
introduced by
Unable to import 'pocketutils.tools.unit_tools'
Loading history...
17
from typeddfs.df_errors import HashFileMissingError
0 ignored issues
show
introduced by
Unable to import 'typeddfs.df_errors'
Loading history...
18
19
from mandos.analysis import AnalysisUtils as Au
20
from mandos.analysis.io_defns import SimilarityDfLongForm, SimilarityDfShortForm
21
from mandos.model.hit_dfs import HitDf
22
from mandos.model.hits import AbstractHit
23
from mandos.model.utils import unlink
24
25
# note that most of these math functions are much faster than their numpy counterparts
26
# if we're not broadcasting, it's almost always better to use them
27
# some are more accurate, too
28
# e.g. we're using fsum rather than sum
29
from mandos.model.utils.setup import logger
30
31
32
class _Inf:
33
    def __init__(self, n: int):
34
        self.n = n
0 ignored issues
show
Coding Style Naming introduced by
Attribute name "n" doesn't conform to snake_case naming style ('([^\\W\\dA-Z][^\\WA-Z]2,|_[^\\WA-Z]*|__[^\\WA-Z\\d_][^\\WA-Z]+__)$' pattern)

This check looks for invalid names for a range of different identifiers.

You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements.

If your project includes a Pylint configuration file, the settings contained in that file take precedence.

To find out more about Pylint, please refer to their site.

Loading history...
35
        self.used, self.t0, self.nonzeros = set(), time.monotonic(), 0
0 ignored issues
show
Coding Style Naming introduced by
Attribute name "t0" doesn't conform to snake_case naming style ('([^\\W\\dA-Z][^\\WA-Z]2,|_[^\\WA-Z]*|__[^\\WA-Z\\d_][^\\WA-Z]+__)$' pattern)

This check looks for invalid names for a range of different identifiers.

You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements.

If your project includes a Pylint configuration file, the settings contained in that file take precedence.

To find out more about Pylint, please refer to their site.

Loading history...
36
37
    def is_used(self, c1: str, c2: str) -> bool:
0 ignored issues
show
Coding Style Naming introduced by
Argument name "c2" doesn't conform to snake_case naming style ('([^\\W\\dA-Z][^\\WA-Z]2,|_[^\\WA-Z]*|__[^\\WA-Z\\d_][^\\WA-Z]+__)$' pattern)

This check looks for invalid names for a range of different identifiers.

You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements.

If your project includes a Pylint configuration file, the settings contained in that file take precedence.

To find out more about Pylint, please refer to their site.

Loading history...
introduced by
Missing function or method docstring
Loading history...
Coding Style Naming introduced by
Argument name "c1" doesn't conform to snake_case naming style ('([^\\W\\dA-Z][^\\WA-Z]2,|_[^\\WA-Z]*|__[^\\WA-Z\\d_][^\\WA-Z]+__)$' pattern)

This check looks for invalid names for a range of different identifiers.

You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements.

If your project includes a Pylint configuration file, the settings contained in that file take precedence.

To find out more about Pylint, please refer to their site.

Loading history...
38
        return (c1, c2) in self.used or (c2, c1) in self.used
39
40
    def got(self, c1: str, c2: str, z: float) -> None:
0 ignored issues
show
introduced by
Missing function or method docstring
Loading history...
Coding Style Naming introduced by
Argument name "c1" doesn't conform to snake_case naming style ('([^\\W\\dA-Z][^\\WA-Z]2,|_[^\\WA-Z]*|__[^\\WA-Z\\d_][^\\WA-Z]+__)$' pattern)

This check looks for invalid names for a range of different identifiers.

You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements.

If your project includes a Pylint configuration file, the settings contained in that file take precedence.

To find out more about Pylint, please refer to their site.

Loading history...
Coding Style Naming introduced by
Argument name "c2" doesn't conform to snake_case naming style ('([^\\W\\dA-Z][^\\WA-Z]2,|_[^\\WA-Z]*|__[^\\WA-Z\\d_][^\\WA-Z]+__)$' pattern)

This check looks for invalid names for a range of different identifiers.

You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements.

If your project includes a Pylint configuration file, the settings contained in that file take precedence.

To find out more about Pylint, please refer to their site.

Loading history...
Coding Style Naming introduced by
Argument name "z" doesn't conform to snake_case naming style ('([^\\W\\dA-Z][^\\WA-Z]2,|_[^\\WA-Z]*|__[^\\WA-Z\\d_][^\\WA-Z]+__)$' pattern)

This check looks for invalid names for a range of different identifiers.

You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements.

If your project includes a Pylint configuration file, the settings contained in that file take precedence.

To find out more about Pylint, please refer to their site.

Loading history...
41
        self.used.add((c1, c2))
42
        self.nonzeros += int(c1 != c2 and not np.isnan(z) and 0 < z < 1)
43
        if self.i % 1000 == 0:
44
            self.log("info")
45
46
    @property
47
    def i(self) -> int:
0 ignored issues
show
introduced by
Missing function or method docstring
Loading history...
48
        return len(self.used)
49
50
    def log(self, level: str) -> None:
0 ignored issues
show
introduced by
Missing function or method docstring
Loading history...
51
        delta = UnitTools.delta_time_to_str(time.monotonic() - self.t0, space=Chars.narrownbsp)
52
        logger.log(
53
            level.upper(),
54
            f"Processed {self.i:,}/{self.n:,} pairs in {delta};"
55
            + f" {self.nonzeros:,} ({self.nonzeros / self.i * 100:.1f}%) are nonzero",
56
        )
57
58
    def __repr__(self):
59
        return f"{self.__class__.__name__}({self.i}/{self.n})"
60
61
    def __str__(self):
62
        return repr(self)
63
64
65
@decorateme.auto_repr_str()
0 ignored issues
show
introduced by
Missing class docstring
Loading history...
66
class MatrixCalculator(metaclass=abc.ABCMeta):
67
    def __init__(
68
        self,
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
69
        *,
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
70
        min_compounds: int,
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
71
        min_nonzero: int,
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
72
        min_hits: int,
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
73
        exclude: Optional[Collection[str]] = None,
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
74
    ):
75
        self.min_compounds = min_compounds
76
        self.min_nonzero = min_nonzero
77
        self.min_hits = min_hits
78
        self.exclude = set() if exclude is None else exclude
79
80
    def calc_all(self, hits: Path, to: Path, *, keep_temp: bool = False) -> SimilarityDfLongForm:
0 ignored issues
show
Coding Style Naming introduced by
Argument name "to" doesn't conform to snake_case naming style ('([^\\W\\dA-Z][^\\WA-Z]2,|_[^\\WA-Z]*|__[^\\WA-Z\\d_][^\\WA-Z]+__)$' pattern)

This check looks for invalid names for a range of different identifiers.

You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements.

If your project includes a Pylint configuration file, the settings contained in that file take precedence.

To find out more about Pylint, please refer to their site.

Loading history...
introduced by
Missing function or method docstring
Loading history...
Coding Style introduced by
This method could be written as a function/class method.

If a method does not access any attributes of the class, it could also be implemented as a function or static method. This can help improve readability. For example

class Foo:
    def some_method(self, x, y):
        return x + y;

could be written as

class Foo:
    @classmethod
    def some_method(cls, x, y):
        return x + y;
Loading history...
81
        raise NotImplemented()
0 ignored issues
show
Best Practice introduced by
NotImplemented raised - should raise NotImplementedError
Loading history...
Bug introduced by
NotImplemented does not seem to be callable.
Loading history...
82
83
84
class JPrimeMatrixCalculator(MatrixCalculator):
0 ignored issues
show
introduced by
Missing class docstring
Loading history...
85
    def calc_all(self, path: Path, to: Path, *, keep_temp: bool = False) -> SimilarityDfLongForm:
0 ignored issues
show
Bug introduced by
Parameters differ from overridden 'calc_all' method
Loading history...
86
        hits = self._read_hits(path)
87
        key_to_hit = Au.hit_multidict(hits, "search_key")
88
        logger.notice(f"Calculating J on {len(key_to_hit):,} keys from {len(hits):,} hits")
89
        good_keys = {}
90
        for key, key_hits in key_to_hit.items():
91
            key_hits: Sequence[AbstractHit] = key_hits
92
            n_compounds_0 = len({k.origin_inchikey for k in key_hits})
93
            part_path = self._part_path(to, key)
94
            df = None
0 ignored issues
show
Coding Style Naming introduced by
Variable name "df" doesn't conform to snake_case naming style ('([^\\W\\dA-Z][^\\WA-Z]2,|_[^\\WA-Z]*|__[^\\WA-Z\\d_][^\\WA-Z]+__)$' pattern)

This check looks for invalid names for a range of different identifiers.

You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements.

If your project includes a Pylint configuration file, the settings contained in that file take precedence.

To find out more about Pylint, please refer to their site.

Loading history...
95
            if part_path.exists():
96
                df = self._read_part(key, part_path)
0 ignored issues
show
Coding Style Naming introduced by
Variable name "df" doesn't conform to snake_case naming style ('([^\\W\\dA-Z][^\\WA-Z]2,|_[^\\WA-Z]*|__[^\\WA-Z\\d_][^\\WA-Z]+__)$' pattern)

This check looks for invalid names for a range of different identifiers.

You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements.

If your project includes a Pylint configuration file, the settings contained in that file take precedence.

To find out more about Pylint, please refer to their site.

Loading history...
97
            if df is None and n_compounds_0 >= self.min_compounds:
98
                df = self._calc_partial(key, key_hits)
0 ignored issues
show
Coding Style Naming introduced by
Variable name "df" doesn't conform to snake_case naming style ('([^\\W\\dA-Z][^\\WA-Z]2,|_[^\\WA-Z]*|__[^\\WA-Z\\d_][^\\WA-Z]+__)$' pattern)

This check looks for invalid names for a range of different identifiers.

You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements.

If your project includes a Pylint configuration file, the settings contained in that file take precedence.

To find out more about Pylint, please refer to their site.

Loading history...
99
                df.write_file(part_path, attrs=True, file_hash=True, mkdirs=True)
100
                logger.debug(f"Wrote results for {key} to {part_path}")
101
            if df is not None and self._should_include(df):
102
                good_keys[key] = part_path
103
            if df is not None:
104
                del df
105
        big_df = self._concat_parts(good_keys)
106
        big_df.write_file(to, attrs=True, file_hash=True, mkdirs=True)
107
        logger.notice(f"Wrote {len(big_df):,} rows to {to}")
108
        attrs_path = to.parent / (to.name + ".attrs.json")
109
        logger.sucess(f"Finished -- see {attrs_path} for statistics")
110
        if not keep_temp:
111
            for k in good_keys:
112
                unlink(self._part_path(to, k))
113
114
    def _read_hits(self, path: Path) -> Sequence[AbstractHit]:
115
        hits = HitDf.read_file(path)
116
        keys = hits["search_key"].unique()
117
        bad_excludes = [e for e in self.exclude if e not in keys]
118
        if len(bad_excludes) > 0:
119
            logger.error(f"Keys to exclude are not in the input file: {', '.join(bad_excludes)}")
120
        for key in keys:
121
            if key not in self.exclude:
122
                dfx = hits[hits["search_key"] == key]
123
                negatives = dfx[dfx["weight"] <= 0]
124
                if len(negatives) > 0:
125
                    logger.error(f"{len(negatives)} / {len(dfx):,} hits for {key} are nonpositive")
126
        return [h for h in hits.to_hits() if h.search_key not in self.exclude and h.weight > 0]
127
128
    def _calc_partial(self, key: str, key_hits: HitDf) -> SimilarityDfLongForm:
129
        df = self.calc_one(key, key_hits).to_long_form(kind="psi", key=key)
0 ignored issues
show
Coding Style Naming introduced by
Variable name "df" doesn't conform to snake_case naming style ('([^\\W\\dA-Z][^\\WA-Z]2,|_[^\\WA-Z]*|__[^\\WA-Z\\d_][^\\WA-Z]+__)$' pattern)

This check looks for invalid names for a range of different identifiers.

You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements.

If your project includes a Pylint configuration file, the settings contained in that file take precedence.

To find out more about Pylint, please refer to their site.

Loading history...
130
        return df.set_attrs(
131
            key=key,
132
            quartiles=[float(df["value"].quantile(x)) for x in [0, 0.25, 0.5, 0.75, 1]],
133
            n_hits=len(key_hits),
134
            n_values=len(df["value"].unique()),
135
            n_compounds=len(df["inchikey_1"].unique()),
136
            n_real=len(df[(df["value"].notna()) & (df["value"] > 0) & (df["value"] < 1)]),
137
        )
138
139
    def _should_include(self, df: SimilarityDfLongForm) -> bool:
0 ignored issues
show
Coding Style Naming introduced by
Argument name "df" doesn't conform to snake_case naming style ('([^\\W\\dA-Z][^\\WA-Z]2,|_[^\\WA-Z]*|__[^\\WA-Z\\d_][^\\WA-Z]+__)$' pattern)

This check looks for invalid names for a range of different identifiers.

You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements.

If your project includes a Pylint configuration file, the settings contained in that file take precedence.

To find out more about Pylint, please refer to their site.

Loading history...
140
        key = df.attrs["key"]
141
        reqs = dict(n_compounds=self.min_compounds, n_hits=self.min_hits, n_real=self.min_nonzero)
142
        for a, mn in reqs.items():
0 ignored issues
show
Coding Style Naming introduced by
Variable name "a" doesn't conform to snake_case naming style ('([^\\W\\dA-Z][^\\WA-Z]2,|_[^\\WA-Z]*|__[^\\WA-Z\\d_][^\\WA-Z]+__)$' pattern)

This check looks for invalid names for a range of different identifiers.

You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements.

If your project includes a Pylint configuration file, the settings contained in that file take precedence.

To find out more about Pylint, please refer to their site.

Loading history...
Coding Style Naming introduced by
Variable name "mn" doesn't conform to snake_case naming style ('([^\\W\\dA-Z][^\\WA-Z]2,|_[^\\WA-Z]*|__[^\\WA-Z\\d_][^\\WA-Z]+__)$' pattern)

This check looks for invalid names for a range of different identifiers.

You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements.

If your project includes a Pylint configuration file, the settings contained in that file take precedence.

To find out more about Pylint, please refer to their site.

Loading history...
143
            v = df.attrs[a]
0 ignored issues
show
Coding Style Naming introduced by
Variable name "v" doesn't conform to snake_case naming style ('([^\\W\\dA-Z][^\\WA-Z]2,|_[^\\WA-Z]*|__[^\\WA-Z\\d_][^\\WA-Z]+__)$' pattern)

This check looks for invalid names for a range of different identifiers.

You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements.

If your project includes a Pylint configuration file, the settings contained in that file take precedence.

To find out more about Pylint, please refer to their site.

Loading history...
144
            if v < mn:
145
                logger.warning(f"Key {key}: {a} = {v:,} < {mn:,}")
146
                return False
147
        return True
148
149
    def _read_part(self, key: str, part_path: Path) -> Optional[SimilarityDfLongForm]:
0 ignored issues
show
Coding Style introduced by
This method could be written as a function/class method.

If a method does not access any attributes of the class, it could also be implemented as a function or static method. This can help improve readability. For example

class Foo:
    def some_method(self, x, y):
        return x + y;

could be written as

class Foo:
    @classmethod
    def some_method(cls, x, y):
        return x + y;
Loading history...
150
        try:
151
            df = SimilarityDfLongForm.read_file(part_path, file_hash=True, attrs=True)
0 ignored issues
show
Coding Style Naming introduced by
Variable name "df" doesn't conform to snake_case naming style ('([^\\W\\dA-Z][^\\WA-Z]2,|_[^\\WA-Z]*|__[^\\WA-Z\\d_][^\\WA-Z]+__)$' pattern)

This check looks for invalid names for a range of different identifiers.

You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements.

If your project includes a Pylint configuration file, the settings contained in that file take precedence.

To find out more about Pylint, please refer to their site.

Loading history...
152
            logger.warning(f"Results for key {key} already exist ({len(df):,} rows)")
153
            return df
154
        except HashFileMissingError:
155
            logger.error(f"Extant results for key {key} appear incomplete; restarting")
156
            logger.opt(exception=True).debug(f"Hash error for {key}")
157
            unlink(part_path)
158
        return None  #  calculate from scratch
159
160
    def _concat_parts(self, keys: Mapping[str, Path]):
0 ignored issues
show
Coding Style introduced by
This method could be written as a function/class method.

If a method does not access any attributes of the class, it could also be implemented as a function or static method. This can help improve readability. For example

class Foo:
    def some_method(self, x, y):
        return x + y;

could be written as

class Foo:
    @classmethod
    def some_method(cls, x, y):
        return x + y;
Loading history...
161
        logger.notice(f"Included {len(keys):,} keys: {', '.join(keys)}")
162
        dfs = []
163
        for key, pp in keys.items():
0 ignored issues
show
Coding Style Naming introduced by
Variable name "pp" doesn't conform to snake_case naming style ('([^\\W\\dA-Z][^\\WA-Z]2,|_[^\\WA-Z]*|__[^\\WA-Z\\d_][^\\WA-Z]+__)$' pattern)

This check looks for invalid names for a range of different identifiers.

You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements.

If your project includes a Pylint configuration file, the settings contained in that file take precedence.

To find out more about Pylint, please refer to their site.

Loading history...
Unused Code introduced by
The variable key seems to be unused.
Loading history...
164
            df = SimilarityDfLongForm.read_file(pp, attrs=True)
0 ignored issues
show
Coding Style Naming introduced by
Variable name "df" doesn't conform to snake_case naming style ('([^\\W\\dA-Z][^\\WA-Z]2,|_[^\\WA-Z]*|__[^\\WA-Z\\d_][^\\WA-Z]+__)$' pattern)

This check looks for invalid names for a range of different identifiers.

You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements.

If your project includes a Pylint configuration file, the settings contained in that file take precedence.

To find out more about Pylint, please refer to their site.

Loading history...
165
            dfs.append(df)
166
        return SimilarityDfLongForm.of(dfs, keys=keys)
167
168
    def calc_one(self, key: str, hits: Sequence[AbstractHit]) -> SimilarityDfShortForm:
0 ignored issues
show
introduced by
Missing function or method docstring
Loading history...
169
        ik2hits = Au.hit_multidict(hits, "origin_inchikey")
170
        logger.info(f"Calculating J on {key} for {len(ik2hits):,} compounds and {len(hits):,} hits")
171
        data = defaultdict(dict)
172
        inf = _Inf(n=int(len(ik2hits) * (len(ik2hits) - 1) / 2))
173
        for (c1, hits1) in ik2hits.items():
0 ignored issues
show
Coding Style Naming introduced by
Variable name "c1" doesn't conform to snake_case naming style ('([^\\W\\dA-Z][^\\WA-Z]2,|_[^\\WA-Z]*|__[^\\WA-Z\\d_][^\\WA-Z]+__)$' pattern)

This check looks for invalid names for a range of different identifiers.

You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements.

If your project includes a Pylint configuration file, the settings contained in that file take precedence.

To find out more about Pylint, please refer to their site.

Loading history...
174
            for (c2, hits2) in ik2hits.items():
0 ignored issues
show
Coding Style Naming introduced by
Variable name "c2" doesn't conform to snake_case naming style ('([^\\W\\dA-Z][^\\WA-Z]2,|_[^\\WA-Z]*|__[^\\WA-Z\\d_][^\\WA-Z]+__)$' pattern)

This check looks for invalid names for a range of different identifiers.

You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements.

If your project includes a Pylint configuration file, the settings contained in that file take precedence.

To find out more about Pylint, please refer to their site.

Loading history...
175
                if inf.is_used(c1, c2):
176
                    continue
177
                z = 1 if c1 == c2 else self._j_prime(key, hits1, hits2)
0 ignored issues
show
Coding Style Naming introduced by
Variable name "z" doesn't conform to snake_case naming style ('([^\\W\\dA-Z][^\\WA-Z]2,|_[^\\WA-Z]*|__[^\\WA-Z\\d_][^\\WA-Z]+__)$' pattern)

This check looks for invalid names for a range of different identifiers.

You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements.

If your project includes a Pylint configuration file, the settings contained in that file take precedence.

To find out more about Pylint, please refer to their site.

Loading history...
178
                data[c1][c2] = z
179
                inf.got(c1, c2, z)
180
        inf.log("success")
181
        return SimilarityDfShortForm.from_dict(data)
182
183
    def _part_path(self, path: Path, key: str):
0 ignored issues
show
Coding Style introduced by
This method could be written as a function/class method.

If a method does not access any attributes of the class, it could also be implemented as a function or static method. This can help improve readability. For example

class Foo:
    def some_method(self, x, y):
        return x + y;

could be written as

class Foo:
    @classmethod
    def some_method(cls, x, y):
        return x + y;
Loading history...
184
        return path.parent / f".{path.name}-{key}.tmp.feather"
185
186
    def _j_prime(
187
        self, key: str, hits1: Collection[AbstractHit], hits2: Collection[AbstractHit]
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
188
    ) -> float:
189
        if len(hits1) == len(hits2) == 0:
190
            return float("NaN")  # TODO: Can this even happen?
0 ignored issues
show
Coding Style introduced by
TODO and FIXME comments should generally be avoided.
Loading history...
191
        if len(hits1) == 0 or len(hits2) == 0:
192
            return 0
193
        sources = {h.data_source for h in hits1}.intersection({h.data_source for h in hits2})
194
        if len(sources) == 0:
195
            return float("NaN")
196
        values = [
197
            self._jx(
198
                key,
199
                [h for h in hits1 if h.data_source == source],
200
                [h for h in hits2 if h.data_source == source],
201
            )
202
            for source in sources
203
        ]
204
        return float(math.fsum(values) / len(values))
205
206
    def _jx(
207
        self, key: str, hits1: Collection[AbstractHit], hits2: Collection[AbstractHit]
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
Unused Code introduced by
The argument key seems to be unused.
Loading history...
208
    ) -> float:
209
        if len(hits1) == len(hits2) == 0:
210
            return float("NaN")  # TODO: impossible, right?
0 ignored issues
show
Coding Style introduced by
TODO and FIXME comments should generally be avoided.
Loading history...
211
        if len(hits1) == 0 or len(hits2) == 0:
212
            return 0
213
        pair_to_weights = Au.weights_of_pairs(hits1, hits2)
214
        values = []
215
        for ca, cb in pair_to_weights.values():
0 ignored issues
show
Coding Style Naming introduced by
Variable name "ca" doesn't conform to snake_case naming style ('([^\\W\\dA-Z][^\\WA-Z]2,|_[^\\WA-Z]*|__[^\\WA-Z\\d_][^\\WA-Z]+__)$' pattern)

This check looks for invalid names for a range of different identifiers.

You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements.

If your project includes a Pylint configuration file, the settings contained in that file take precedence.

To find out more about Pylint, please refer to their site.

Loading history...
Coding Style Naming introduced by
Variable name "cb" doesn't conform to snake_case naming style ('([^\\W\\dA-Z][^\\WA-Z]2,|_[^\\WA-Z]*|__[^\\WA-Z\\d_][^\\WA-Z]+__)$' pattern)

This check looks for invalid names for a range of different identifiers.

You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements.

If your project includes a Pylint configuration file, the settings contained in that file take precedence.

To find out more about Pylint, please refer to their site.

Loading history...
216
            wedge = self._wedge(ca, cb)
217
            vee = self._vee(ca, cb)
218
            if vee > 0:
219
                values.append(wedge / vee)
220
        return float(math.fsum(values) / len(values))
221
222
    def _wedge(self, ca: float, cb: float) -> float:
0 ignored issues
show
Coding Style Naming introduced by
Argument name "cb" doesn't conform to snake_case naming style ('([^\\W\\dA-Z][^\\WA-Z]2,|_[^\\WA-Z]*|__[^\\WA-Z\\d_][^\\WA-Z]+__)$' pattern)

This check looks for invalid names for a range of different identifiers.

You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements.

If your project includes a Pylint configuration file, the settings contained in that file take precedence.

To find out more about Pylint, please refer to their site.

Loading history...
Coding Style Naming introduced by
Argument name "ca" doesn't conform to snake_case naming style ('([^\\W\\dA-Z][^\\WA-Z]2,|_[^\\WA-Z]*|__[^\\WA-Z\\d_][^\\WA-Z]+__)$' pattern)

This check looks for invalid names for a range of different identifiers.

You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements.

If your project includes a Pylint configuration file, the settings contained in that file take precedence.

To find out more about Pylint, please refer to their site.

Loading history...
Coding Style introduced by
This method could be written as a function/class method.

If a method does not access any attributes of the class, it could also be implemented as a function or static method. This can help improve readability. For example

class Foo:
    def some_method(self, x, y):
        return x + y;

could be written as

class Foo:
    @classmethod
    def some_method(cls, x, y):
        return x + y;
Loading history...
223
        return math.sqrt(Au.elle(ca) * Au.elle(cb))
224
225
    def _vee(self, ca: float, cb: float) -> float:
0 ignored issues
show
Coding Style Naming introduced by
Argument name "ca" doesn't conform to snake_case naming style ('([^\\W\\dA-Z][^\\WA-Z]2,|_[^\\WA-Z]*|__[^\\WA-Z\\d_][^\\WA-Z]+__)$' pattern)

This check looks for invalid names for a range of different identifiers.

You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements.

If your project includes a Pylint configuration file, the settings contained in that file take precedence.

To find out more about Pylint, please refer to their site.

Loading history...
Coding Style Naming introduced by
Argument name "cb" doesn't conform to snake_case naming style ('([^\\W\\dA-Z][^\\WA-Z]2,|_[^\\WA-Z]*|__[^\\WA-Z\\d_][^\\WA-Z]+__)$' pattern)

This check looks for invalid names for a range of different identifiers.

You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements.

If your project includes a Pylint configuration file, the settings contained in that file take precedence.

To find out more about Pylint, please refer to their site.

Loading history...
Coding Style introduced by
This method could be written as a function/class method.

If a method does not access any attributes of the class, it could also be implemented as a function or static method. This can help improve readability. For example

class Foo:
    def some_method(self, x, y):
        return x + y;

could be written as

class Foo:
    @classmethod
    def some_method(cls, x, y):
        return x + y;
Loading history...
226
        return Au.elle(ca) + Au.elle(cb) - math.sqrt(Au.elle(ca) * Au.elle(cb))
227
228
229
class MatrixAlg(CleverEnum):
0 ignored issues
show
introduced by
Missing class docstring
Loading history...
230
    j = ()
231
232
    @property
233
    def clazz(self) -> Type[MatrixCalculator]:
0 ignored issues
show
introduced by
Missing function or method docstring
Loading history...
234
        return {MatrixAlg.j: JPrimeMatrixCalculator}[self]
235
236
237
@decorateme.auto_utils()
0 ignored issues
show
introduced by
Missing class docstring
Loading history...
238
class MatrixCalculation:
239
    @classmethod
240
    def create(
0 ignored issues
show
introduced by
Missing function or method docstring
Loading history...
241
        cls,
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
242
        algorithm: Union[str, MatrixAlg],
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
243
        *,
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
244
        min_compounds: int,
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
245
        min_nonzero: int,
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
246
        min_hits: int,
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
247
        exclude: Optional[Collection[str]] = None,
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
248
    ) -> MatrixCalculator:
249
        return MatrixAlg.of(algorithm).clazz(
250
            min_compounds=min_compounds,
251
            min_nonzero=min_nonzero,
252
            min_hits=min_hits,
253
            exclude=exclude,
254
        )
255
256
257
__all__ = ["JPrimeMatrixCalculator", "MatrixAlg", "MatrixCalculation", "MatrixCalculator"]
258