mandos.model.utils.mappings - Code Metrics - Inspection of "feat: add prediction search; improve taxa" - dmyersturnbull/mandos - Measure and Improve Code Quality continuously with Scrutinizer

Passed

Push — main ( 9813db...5006f2 )

by Douglas

created 2021-08-07 00:13 UTC

mandos.model.utils.mappings A

↳ Parent: Project

Complexity

Total Complexity

Size/Duplication

Total Lines	100
Duplicated Lines	0 %

Importance

Changes

Metric	Value
eloc	49
dl	0
loc	100
rs	10
c	0
b	0
f	0
wmc	11

4 Methods

Rating	Name	Size	Complexity
A	_Compiler.compile()	8	2
A	Mappings.from_path()	13	1
A	_Compiler.__init__()	2	1
A	Mappings.from_resource()	4	1

3 Functions

Rating	Name	Size	Complexity
A	_get()	9	4
A	_patterns()	2	1
A	_targets()	2	1

from pathlib import Path

from typing import Sequence

import regex

import pandas as pd

from typeddfs import TypedDfs


from mandos.model import MandosResources

MappingFrame = TypedDfs.untyped("MappingFrame")


def _patterns(self: pd.DataFrame) -> Sequence[str]:
    return self[self.columns[0]].values.tolist()


def _targets(self: pd.DataFrame) -> Sequence[str]:
    return self.columns[1:].values.tolist()


def _get(self: pd.DataFrame, s: str) -> Sequence[str]:

    for irow, pattern in enumerate(self[self.columns[0]].values):
        try:
            match: regex.Match = pattern.fullmatch(s)
        except AttributeError:
            raise ValueError(f"Failed on regex {pattern}") from None
        if match is not None:
            return [pattern.sub(t, s.strip()) for t in self.T[irow] if isinstance(t, str)]
    return s


MappingFrame.__doc__ = r"""
A list of regex patterns and replacements.
The first column is the pattern, and the next n columns are the targets.
Has an important function, ``MappingFrame.get``, describe below.
These DataFrames are used in a few places to clean up, simplify, or otherwise process
predicate and object names.

Example:

    For the input string "cyp450 2A3", consider we have these two rows:
    row 1: ``['^Juggle protein [xy]', 'Juggle \1', 'J\1']``
    row 2: ``['^CYP *450 (\d+)[A-Z]\d*$', 'Cytochrome P450 \1', 'CYP\1']``
    First, we try to match against the first pattern. It doesn't match, so we try the next.
    This one does match our input string, so we return ``["Cytochrome P450 2", "CYP2"]``.
    The first returned element (here "Cytochrome P450 2"), is considered the primary,
    while the second are -- for most usages -- considered optional extras.
"""
MappingFrame.targets = _targets
MappingFrame.patterns = _patterns
MappingFrame.get = _get


class _Compiler:
    """
    Compiles multiple regex patterns, providing nice error messages.
    All patterns are global (i.e. ^ and $ are affixed) and case-insensitive.
    """

    def __init__(self):
        self._i = 0

    def compile(self, s: str) -> regex.Pattern:

        self._i += 1  # header is the first
        try:
            return regex.compile("^" + s.strip() + "$", flags=regex.V1 | regex.IGNORECASE)
        except Exception:
            raise ValueError(
                f"Failed to parse '{s}' on line {self._i} (excluding comments and blank lines)"
            ) from None


class Mappings:
    """
    Creates MappingFrames.
    See that documentation.
    """

    @classmethod
    def from_resource(cls, name: str) -> MappingFrame:

        path = MandosResources.a_path("mappings", name)
        return cls.from_path(path)

    @classmethod
    def from_path(cls, path: Path) -> MappingFrame:
        """
        Reads a mapping from a CSV-like file or ``.regexes`` file.
        Feather and Parquet are fine, too.
        The ``.regexes`` suffix is a simple extension of CSV that uses ``--->`` as the delimiter.
        and ignores empty lines and lines beginning with ``#``.
        It's just nice for easily editing in a text editor.
        """
        df = MappingFrame.read_file(path)

        compiler = _Compiler()
        df[df.columns[0]] = df[df.columns[0]].map(compiler.compile)
        return df


__all__ = ["MappingFrame", "Mappings"]


1			from pathlib import Path
			0 ignored issues – show introduced 2021-04-03 02:07 UTC by Report Bug Copy Issue Report Missing module docstring Loading history...
2			from typing import Sequence
3
4			import regex
			0 ignored issues – show introduced 2021-08-07 00:15 UTC by Report Bug Copy Issue Report Unable to import 'regex' Loading history...
5			import pandas as pd
			0 ignored issues – show introduced 2021-08-07 00:15 UTC by Report Bug Copy Issue Report Unable to import 'pandas' Loading history...
6			from typeddfs import TypedDfs
			0 ignored issues – show introduced 2021-06-30 04:51 UTC by Report Bug Copy Issue Report Unable to import 'typeddfs' Loading history...
7
8			from mandos.model import MandosResources
9
10			MappingFrame = TypedDfs.untyped("MappingFrame")
11
12
13			def _patterns(self: pd.DataFrame) -> Sequence[str]:
14			return self[self.columns[0]].values.tolist()
15
16
17			def _targets(self: pd.DataFrame) -> Sequence[str]:
18			return self.columns[1:].values.tolist()
19
20
21			def _get(self: pd.DataFrame, s: str) -> Sequence[str]:
			0 ignored issues – show Coding Style Naming introduced 2021-04-04 05:57 UTC by Report Bug Copy Issue Report Argument name "s" doesn't conform to snake_case naming style ('([^\\W\\dA-Z][^\\WA-Z]2,\|_[^\\WA-Z]*\|__[^\\WA-Z\\d_][^\\WA-Z]+__)$' pattern) This check looks for invalid names for a range of different identifiers. You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements. If your project includes a Pylint configuration file, the settings contained in that file take precedence. To find out more about Pylint, please refer to their site. Loading history...
22			for irow, pattern in enumerate(self[self.columns[0]].values):
23			try:
24			match: regex.Match = pattern.fullmatch(s)
25			except AttributeError:
26			raise ValueError(f"Failed on regex {pattern}") from None
27			if match is not None:
28			return [pattern.sub(t, s.strip()) for t in self.T[irow] if isinstance(t, str)]
29			return s
30
31
32			MappingFrame.__doc__ = r"""
33			A list of regex patterns and replacements.
34			The first column is the pattern, and the next n columns are the targets.
35			Has an important function, ``MappingFrame.get``, describe below.
36			These DataFrames are used in a few places to clean up, simplify, or otherwise process
37			predicate and object names.
38
39			Example:
40
41			For the input string "cyp450 2A3", consider we have these two rows:
42			row 1: ``['^Juggle protein [xy]', 'Juggle \1', 'J\1']``
43			row 2: ``['^CYP 450 (\d+)[A-Z]\d$', 'Cytochrome P450 \1', 'CYP\1']``
44			First, we try to match against the first pattern. It doesn't match, so we try the next.
45			This one does match our input string, so we return ``["Cytochrome P450 2", "CYP2"]``.
46			The first returned element (here "Cytochrome P450 2"), is considered the primary,
47			while the second are -- for most usages -- considered optional extras.
48			"""
49			MappingFrame.targets = _targets
50			MappingFrame.patterns = _patterns
51			MappingFrame.get = _get
52
53
54			class _Compiler:
55			"""
56			Compiles multiple regex patterns, providing nice error messages.
57			All patterns are global (i.e. ^ and $ are affixed) and case-insensitive.
58			"""
59
60			def __init__(self):
61			self._i = 0
62
63			def compile(self, s: str) -> regex.Pattern:
			0 ignored issues – show Coding Style Naming introduced 2021-06-30 04:51 UTC by Report Bug Copy Issue Report Argument name "s" doesn't conform to snake_case naming style ('([^\\W\\dA-Z][^\\WA-Z]2,\|_[^\\WA-Z]*\|__[^\\WA-Z\\d_][^\\WA-Z]+__)$' pattern) This check looks for invalid names for a range of different identifiers. You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements. If your project includes a Pylint configuration file, the settings contained in that file take precedence. To find out more about Pylint, please refer to their site. Loading history... introduced 2021-04-04 05:57 UTC by Report Bug Copy Issue Report Missing function or method docstring Loading history...
64			self._i += 1 # header is the first
65			try:
66			return regex.compile("^" + s.strip() + "$", flags=regex.V1 \| regex.IGNORECASE)
67			except Exception:
68			raise ValueError(
69			f"Failed to parse '{s}' on line {self._i} (excluding comments and blank lines)"
70			) from None
71
72
73			class Mappings:
74			"""
75			Creates MappingFrames.
76			See that documentation.
77			"""
78
79			@classmethod
80			def from_resource(cls, name: str) -> MappingFrame:
			0 ignored issues – show introduced 2021-04-04 05:57 UTC by Report Bug Copy Issue Report Missing function or method docstring Loading history...
81			path = MandosResources.a_path("mappings", name)
82			return cls.from_path(path)
83
84			@classmethod
85			def from_path(cls, path: Path) -> MappingFrame:
86			"""
87			Reads a mapping from a CSV-like file or ``.regexes`` file.
88			Feather and Parquet are fine, too.
89			The ``.regexes`` suffix is a simple extension of CSV that uses ``--->`` as the delimiter.
90			and ignores empty lines and lines beginning with ``#``.
91			It's just nice for easily editing in a text editor.
92			"""
93			df = MappingFrame.read_file(path)
			0 ignored issues – show Coding Style Naming introduced 2021-06-30 04:51 UTC by Report Bug Copy Issue Report Variable name "df" doesn't conform to snake_case naming style ('([^\\W\\dA-Z][^\\WA-Z]2,\|_[^\\WA-Z]*\|__[^\\WA-Z\\d_][^\\WA-Z]+__)$' pattern) This check looks for invalid names for a range of different identifiers. You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements. If your project includes a Pylint configuration file, the settings contained in that file take precedence. To find out more about Pylint, please refer to their site. Loading history...
94			compiler = _Compiler()
95			df[df.columns[0]] = df[df.columns[0]].map(compiler.compile)
96			return df
97
98
99			__all__ = ["MappingFrame", "Mappings"]
100

dmyersturnbull / mandos

Push — main ( 9813db...5006f2 )

mandos.model.utils.mappings A

Complexity

Size/Duplication

Importance

4 Methods

3 Functions

Duplication Side-by-Side

Filter issues like