mandos.analysis.distances.JPrimeMatrixCalculator.calc_one() - Code Metrics - Inspection of "feature: skeleton for plot commands" - dmyersturnbull/mandos - Measure and Improve Code Quality continuously with Scrutinizer

Passed

Push — main ( a80564...ec3fe3 )

by Douglas

created 2021-07-07 20:15 UTC

JPrimeMatrixCalculator.calc_one() A

↳ Parent: mandos.analysis.distances

Complexity

Conditions

Size

Total Lines	6
Code Lines	6

Duplication

Lines	0
Ratio	0 %

Importance

Changes

Metric	Value
cc	2
eloc	6
nop	2
dl	0
loc	6
rs	10
c	0
b	0
f	0

"""
Calculations of concordance between annotations.
"""
import abc
import enum
import math
from collections import defaultdict
from typing import Collection, Sequence, Type, Union

import numpy as np

import pandas as pd


from mandos.analysis import AnalysisUtils as Au
from mandos.analysis import SimilarityDfLongForm, SimilarityDfShortForm
from mandos.model import CleverEnum
from mandos.model.hits import AbstractHit

# note that most of these math functions are much faster than their numpy counterparts
# if we're not broadcasting, it's almost always better to use them
# some are more accurate, too
# e.g. we're using fsum rather than sum


class MatrixCalculator(metaclass=abc.ABCMeta):

    def calc_all(self, hits: Sequence[AbstractHit]) -> SimilarityDfLongForm:
class Foo:
    def some_method(self, x, y):
        return x + y;
        raise NotImplemented()



class JPrimeMatrixCalculator(MatrixCalculator):

    def calc_all(self, hits: Sequence[AbstractHit]) -> SimilarityDfLongForm:
        key_to_hit = Au.hit_multidict(hits, "search_key")
        dfs = []
        for key, key_hits in key_to_hit.items():
            df = self.calc_one(key_hits)

            df = df.to_long_form(psi=key)

            dfs += [df]
        return SimilarityDfLongForm(pd.concat(dfs))

    def calc_one(self, hits: Sequence[AbstractHit]) -> SimilarityDfShortForm:

        inchikey_to_hits = Au.hit_multidict(hits, "origin_inchikey")
        data = defaultdict(dict)
        for (c1, hits1), (c2, hits2) in zip(inchikey_to_hits.items(), inchikey_to_hits.items()):

            data[c1][c2] = self._j_prime(hits1, hits2)
        return SimilarityDfShortForm.from_dict(data)

    def _j_prime(self, hits1: Collection[AbstractHit], hits2: Collection[AbstractHit]) -> float:
        sources = {h.data_source for h in hits1}.intersection({h.data_source for h in hits2})
        if len(sources) == 0:
            return np.nan
        values = [
            self._jx(
                [h for h in hits1 if h.data_source == source],
                [h for h in hits1 if h.data_source == source],
            )
            for source in sources
        ]
        return float(math.fsum(values) / len(values))

    def _jx(self, hits1: Collection[AbstractHit], hits2: Collection[AbstractHit]) -> float:
        pair_to_weights = Au.weights_of_pairs(hits1, hits2)
        values = [self._wedge(ca, cb) / self._vee(ca, cb) for ca, cb in pair_to_weights.values()]
        return float(math.fsum(values) / len(values))

    def _wedge(self, ca: float, cb: float) -> float:
class Foo:
    def some_method(self, x, y):
        return x + y;
        return math.sqrt(Au.elle(ca) * Au.elle(cb))

    def _vee(self, ca: float, cb: float) -> float:
class Foo:
    def some_method(self, x, y):
        return x + y;
        return Au.elle(ca) + Au.elle(cb) - math.sqrt(Au.elle(ca) * Au.elle(cb))


class MatrixAlg(CleverEnum):

    j = enum.auto()

    @property
    def clazz(self) -> Type[MatrixCalculator]:

        return {MatrixAlg.j: JPrimeMatrixCalculator}[self]


class MatrixCalculation:

    @classmethod
    def create(cls, algorithm: Union[str, MatrixAlg]) -> MatrixCalculator:

        alg_name = algorithm if isinstance(algorithm, str) else algorithm.name
        alg = MatrixAlg.of(algorithm)
        return alg.clazz(alg_name)


__all__ = ["MatrixCalculator", "JPrimeMatrixCalculator"]


1			"""
2			Calculations of concordance between annotations.
3			"""
4			import abc
5			import enum
6			import math
7			from collections import defaultdict
8			from typing import Collection, Sequence, Type, Union
9
10			import numpy as np
			0 ignored issues – show introduced 2021-06-30 04:51 UTC by Report Bug Copy Issue Report Unable to import 'numpy' Loading history...
11			import pandas as pd
			0 ignored issues – show introduced 2021-07-07 20:19 UTC by Report Bug Copy Issue Report Unable to import 'pandas' Loading history...
12
13			from mandos.analysis import AnalysisUtils as Au
14			from mandos.analysis import SimilarityDfLongForm, SimilarityDfShortForm
15			from mandos.model import CleverEnum
16			from mandos.model.hits import AbstractHit
17
18			# note that most of these math functions are much faster than their numpy counterparts
19			# if we're not broadcasting, it's almost always better to use them
20			# some are more accurate, too
21			# e.g. we're using fsum rather than sum
22
23
24			class MatrixCalculator(metaclass=abc.ABCMeta):
			0 ignored issues – show introduced 2021-06-30 04:51 UTC by Report Bug Copy Issue Report Missing class docstring Loading history...
25			def calc_all(self, hits: Sequence[AbstractHit]) -> SimilarityDfLongForm:
			0 ignored issues – show Coding Style introduced 2021-06-30 04:51 UTC by Report Bug Copy Issue Report This method could be written as a function/class method. If a method does not access any attributes of the class, it could also be implemented as a function or static method. This can help improve readability. For example class Foo: def some_method(self, x, y): return x + y; could be written as class Foo: @classmethod def some_method(cls, x, y): return x + y; Loading history... introduced 2021-06-30 04:51 UTC by Report Bug Copy Issue Report Missing function or method docstring Loading history...
26			raise NotImplemented()
			0 ignored issues – show Bug introduced 2021-06-30 04:51 UTC by Report Bug Copy Issue Report `NotImplemented` does not seem to be callable. Loading history... Best Practice introduced 2021-06-30 04:51 UTC by Report Bug Copy Issue Report NotImplemented raised - should raise NotImplementedError Loading history...
27
28
29			class JPrimeMatrixCalculator(MatrixCalculator):
			0 ignored issues – show introduced 2021-06-30 04:51 UTC by Report Bug Copy Issue Report Missing class docstring Loading history...
30			def calc_all(self, hits: Sequence[AbstractHit]) -> SimilarityDfLongForm:
31			key_to_hit = Au.hit_multidict(hits, "search_key")
32			dfs = []
33			for key, key_hits in key_to_hit.items():
34			df = self.calc_one(key_hits)
			0 ignored issues – show Coding Style Naming introduced 2021-07-07 20:19 UTC by Report Bug Copy Issue Report Variable name "df" doesn't conform to snake_case naming style ('([^\\W\\dA-Z][^\\WA-Z]2,\|_[^\\WA-Z]*\|__[^\\WA-Z\\d_][^\\WA-Z]+__)$' pattern) This check looks for invalid names for a range of different identifiers. You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements. If your project includes a Pylint configuration file, the settings contained in that file take precedence. To find out more about Pylint, please refer to their site. Loading history...
35			df = df.to_long_form(psi=key)
			0 ignored issues – show Coding Style Naming introduced 2021-07-07 20:19 UTC by Report Bug Copy Issue Report Variable name "df" doesn't conform to snake_case naming style ('([^\\W\\dA-Z][^\\WA-Z]2,\|_[^\\WA-Z]*\|__[^\\WA-Z\\d_][^\\WA-Z]+__)$' pattern) This check looks for invalid names for a range of different identifiers. You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements. If your project includes a Pylint configuration file, the settings contained in that file take precedence. To find out more about Pylint, please refer to their site. Loading history...
36			dfs += [df]
37			return SimilarityDfLongForm(pd.concat(dfs))
38
39			def calc_one(self, hits: Sequence[AbstractHit]) -> SimilarityDfShortForm:
			0 ignored issues – show introduced 2021-07-05 18:49 UTC by Report Bug Copy Issue Report Missing function or method docstring Loading history...
40			inchikey_to_hits = Au.hit_multidict(hits, "origin_inchikey")
41			data = defaultdict(dict)
42			for (c1, hits1), (c2, hits2) in zip(inchikey_to_hits.items(), inchikey_to_hits.items()):
			0 ignored issues – show Coding Style Naming introduced 2021-06-30 04:51 UTC by Report Bug Copy Issue Report Variable name "c2" doesn't conform to snake_case naming style ('([^\\W\\dA-Z][^\\WA-Z]2,\|_[^\\WA-Z]\|__[^\\WA-Z\\d_][^\\WA-Z]+__)$' pattern) This check looks for invalid names for a range of different identifiers. You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements. If your project includes a Pylint configuration file, the settings contained in that file take precedence. To find out more about Pylint, please refer to their site. Loading history... Coding Style Naming introduced 2021-06-30 04:51 UTC by Report Bug Copy Issue Report Variable name "c1" doesn't conform to snake_case naming style ('([^\\W\\dA-Z][^\\WA-Z]2,\|_[^\\WA-Z]\|__[^\\WA-Z\\d_][^\\WA-Z]+__)$' pattern) This check looks for invalid names for a range of different identifiers. You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements. If your project includes a Pylint configuration file, the settings contained in that file take precedence. To find out more about Pylint, please refer to their site. Loading history...
43			data[c1][c2] = self._j_prime(hits1, hits2)
44			return SimilarityDfShortForm.from_dict(data)
45
46			def _j_prime(self, hits1: Collection[AbstractHit], hits2: Collection[AbstractHit]) -> float:
47			sources = {h.data_source for h in hits1}.intersection({h.data_source for h in hits2})
48			if len(sources) == 0:
49			return np.nan
50			values = [
51			self._jx(
52			[h for h in hits1 if h.data_source == source],
53			[h for h in hits1 if h.data_source == source],
54			)
55			for source in sources
56			]
57			return float(math.fsum(values) / len(values))
58
59			def _jx(self, hits1: Collection[AbstractHit], hits2: Collection[AbstractHit]) -> float:
60			pair_to_weights = Au.weights_of_pairs(hits1, hits2)
61			values = [self._wedge(ca, cb) / self._vee(ca, cb) for ca, cb in pair_to_weights.values()]
62			return float(math.fsum(values) / len(values))
63
64			def _wedge(self, ca: float, cb: float) -> float:
			0 ignored issues – show Coding Style introduced 2021-06-30 04:51 UTC by Report Bug Copy Issue Report This method could be written as a function/class method. If a method does not access any attributes of the class, it could also be implemented as a function or static method. This can help improve readability. For example class Foo: def some_method(self, x, y): return x + y; could be written as class Foo: @classmethod def some_method(cls, x, y): return x + y; Loading history... Coding Style Naming introduced 2021-06-30 04:51 UTC by Report Bug Copy Issue Report Argument name "cb" doesn't conform to snake_case naming style ('([^\\W\\dA-Z][^\\WA-Z]2,\|_[^\\WA-Z]\|__[^\\WA-Z\\d_][^\\WA-Z]+__)$' pattern) This check looks for invalid names for a range of different identifiers. You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements. If your project includes a Pylint configuration file, the settings contained in that file take precedence. To find out more about Pylint, please refer to their site. Loading history... Coding Style Naming introduced 2021-06-30 04:51 UTC by Report Bug Copy Issue Report Argument name "ca" doesn't conform to snake_case naming style ('([^\\W\\dA-Z][^\\WA-Z]2,\|_[^\\WA-Z]\|__[^\\WA-Z\\d_][^\\WA-Z]+__)$' pattern) This check looks for invalid names for a range of different identifiers. You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements. If your project includes a Pylint configuration file, the settings contained in that file take precedence. To find out more about Pylint, please refer to their site. Loading history...
65			return math.sqrt(Au.elle(ca) * Au.elle(cb))
66
67			def _vee(self, ca: float, cb: float) -> float:
			0 ignored issues – show Coding Style Naming introduced 2021-06-30 04:51 UTC by Report Bug Copy Issue Report Argument name "ca" doesn't conform to snake_case naming style ('([^\\W\\dA-Z][^\\WA-Z]2,\|_[^\\WA-Z]\|__[^\\WA-Z\\d_][^\\WA-Z]+__)$' pattern) This check looks for invalid names for a range of different identifiers. You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements. If your project includes a Pylint configuration file, the settings contained in that file take precedence. To find out more about Pylint, please refer to their site. Loading history... Coding Style Naming introduced 2021-06-30 04:51 UTC by Report Bug Copy Issue Report Argument name "cb" doesn't conform to snake_case naming style ('([^\\W\\dA-Z][^\\WA-Z]2,\|_[^\\WA-Z]\|__[^\\WA-Z\\d_][^\\WA-Z]+__)$' pattern) This check looks for invalid names for a range of different identifiers. You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements. If your project includes a Pylint configuration file, the settings contained in that file take precedence. To find out more about Pylint, please refer to their site. Loading history... Coding Style introduced 2021-06-30 04:51 UTC by Report Bug Copy Issue Report This method could be written as a function/class method. If a method does not access any attributes of the class, it could also be implemented as a function or static method. This can help improve readability. For example class Foo: def some_method(self, x, y): return x + y; could be written as class Foo: @classmethod def some_method(cls, x, y): return x + y; Loading history...
68			return Au.elle(ca) + Au.elle(cb) - math.sqrt(Au.elle(ca) * Au.elle(cb))
69
70
71			class MatrixAlg(CleverEnum):
			0 ignored issues – show introduced 2021-07-05 18:49 UTC by Report Bug Copy Issue Report Missing class docstring Loading history...
72			j = enum.auto()
73
74			@property
75			def clazz(self) -> Type[MatrixCalculator]:
			0 ignored issues – show introduced 2021-07-05 18:49 UTC by Report Bug Copy Issue Report Missing function or method docstring Loading history...
76			return {MatrixAlg.j: JPrimeMatrixCalculator}[self]
77
78
79			class MatrixCalculation:
			0 ignored issues – show introduced 2021-07-05 18:49 UTC by Report Bug Copy Issue Report Missing class docstring Loading history...
80			@classmethod
81			def create(cls, algorithm: Union[str, MatrixAlg]) -> MatrixCalculator:
			0 ignored issues – show introduced 2021-07-07 20:19 UTC by Report Bug Copy Issue Report Missing function or method docstring Loading history...
82			alg_name = algorithm if isinstance(algorithm, str) else algorithm.name
83			alg = MatrixAlg.of(algorithm)
84			return alg.clazz(alg_name)
85
86
87			__all__ = ["MatrixCalculator", "JPrimeMatrixCalculator"]
88

dmyersturnbull / mandos

Push — main ( a80564...ec3fe3 )

JPrimeMatrixCalculator.calc_one() A

Complexity

Size

Duplication

Importance

Duplication Side-by-Side

Filter issues like