Passed
Push — main ( fe2164...a4a582 )
by Douglas
01:37
created

mandos.model.searches.SearchError.__init__()   A

Complexity

Conditions 1

Size

Total Lines 12
Code Lines 11

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
cc 1
eloc 11
nop 6
dl 0
loc 12
rs 9.85
c 0
b 0
f 0
1
from __future__ import annotations
0 ignored issues
show
introduced by
Missing module docstring
Loading history...
2
import abc
3
import dataclasses
4
import typing
5
from typing import Generic, Sequence, TypeVar
6
7
import pandas as pd
0 ignored issues
show
introduced by
Unable to import 'pandas'
Loading history...
8
9
from mandos import logger
10
from mandos.model.hits import AbstractHit, HitFrame
11
from mandos.model import CompoundNotFoundError, ReflectionUtils
12
13
H = TypeVar("H", bound=AbstractHit, covariant=True)
0 ignored issues
show
Coding Style Naming introduced by
Class name "H" doesn't conform to PascalCase naming style ('[^\\W\\da-z][^\\W_]+$' pattern)

This check looks for invalid names for a range of different identifiers.

You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements.

If your project includes a Pylint configuration file, the settings contained in that file take precedence.

To find out more about Pylint, please refer to their site.

Loading history...
14
15
16
class SearchError(Exception):
17
    """
18
    Wrapper for any exception raised in ``find`` except for ``CompoundNotFoundError``.
19
    """
20
21
    def __init__(
22
        self,
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
23
        *args,
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
24
        inchikey: str = None,
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
25
        search_key: str = None,
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
26
        search_class: str = None,
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
27
        **kwargs,
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
28
    ):
29
        super().__init__(*args, *kwargs)
30
        self.inchikey = inchikey
31
        self.search_key = search_key
32
        self.search_class = search_class
33
34
35
class Search(Generic[H], metaclass=abc.ABCMeta):
36
    """
37
    Something to search and how to do it.
38
    """
39
40
    def __init__(self, key: str):
41
        self.key = key
42
43
    @property
44
    def search_class(self) -> str:
0 ignored issues
show
introduced by
Missing function or method docstring
Loading history...
45
        return self.__class__.__name__
46
47
    @property
48
    def search_name(self) -> str:
0 ignored issues
show
introduced by
Missing function or method docstring
Loading history...
49
        return self.__class__.__name__.lower().replace("search", "")
50
51
    @property
52
    def data_source(self) -> str:
53
        """
54
        Where the data originally came from; e.g. ``the Human Metabolome Database (HMDB)``"
55
        """
56
        raise NotImplementedError()
57
58
    def get_params(self) -> typing.Mapping[str, typing.Any]:
59
        """
60
        Returns the *parameters* of this ``Search`` their values.
61
        Parameters are attributes that do not begin with an underscore.
62
        """
63
        return {key: value for key, value in vars(self).items() if not key.startswith("_")}
64
65
    def find_to_df(self, inchikeys: Sequence[str]) -> HitFrame:
66
        """
67
        Calls :py:meth:`find_all` and returns a :py:class:`HitFrame` DataFrame subclass.
68
        Writes a logging ERROR for each compound that was not found.
69
70
        Args:
71
            inchikeys: A list of InChI key strings
72
        """
73
        hits = self.find_all(inchikeys)
74
        return HitFrame(
75
            [
76
                pd.Series(
77
                    {
78
                        **{f: getattr(h, f) for f in self.hit_fields()},
79
                        **dict(universal_id=h.universal_id),
80
                    }
81
                )
82
                for h in hits
83
            ]
84
        )
85
86
    def find_all(self, inchikeys: Sequence[str]) -> Sequence[H]:
87
        """
88
        Loops over every compound and calls ``find``.
89
        Comes with better logging.
90
        Writes a logging ERROR for each compound that was not found.
91
92
        Args:
93
            inchikeys: A list of InChI key strings
94
95
        Returns:
96
            The list of :py:class:`mandos.model.hits.AbstractHit`
97
        """
98
        lst = []
99
        # set just in case we never iterate
100
        i = 0
101
        for i, compound in enumerate(inchikeys):
102
            try:
103
                x = self.find(compound)
0 ignored issues
show
Coding Style Naming introduced by
Variable name "x" doesn't conform to snake_case naming style ('([^\\W\\dA-Z][^\\WA-Z]2,|_[^\\WA-Z]*|__[^\\WA-Z\\d_][^\\WA-Z]+__)$' pattern)

This check looks for invalid names for a range of different identifiers.

You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements.

If your project includes a Pylint configuration file, the settings contained in that file take precedence.

To find out more about Pylint, please refer to their site.

Loading history...
104
            except CompoundNotFoundError:
105
                logger.info(f"NOT FOUND: {compound}. Skipping.")
106
                continue
107
            except Exception:
108
                raise SearchError(
109
                    f"Failed {self.key} [{self.search_class}] on compound {compound}",
110
                    compound=compound,
111
                    search_key=self.key,
112
                    search_class=self.search_class,
113
                )
114
            lst.extend(x)
115
            logger.debug(f"Found {len(x)} {self.search_name} annotations for {compound}")
116
            if i % 10 == 9:
117
                logger.notice(
118
                    f"Found {len(lst)} {self.search_name} annotations for {i+1} of {len(inchikeys)} compounds"
0 ignored issues
show
Coding Style introduced by
This line is too long as per the coding-style (110/100).

This check looks for lines that are too long. You can specify the maximum line length.

Loading history...
119
                )
120
        logger.notice(
121
            f"Found {len(lst)} {self.search_name} annotations for {i+1} of {len(inchikeys)} compounds"
0 ignored issues
show
Coding Style introduced by
This line is too long as per the coding-style (102/100).

This check looks for lines that are too long. You can specify the maximum line length.

Loading history...
122
        )
123
        return lst
124
125
    def find(self, inchikey: str) -> Sequence[H]:
126
        """
127
        To override.
128
        Finds the annotations for a single compound.
129
130
        Args:
131
            inchikey: An InChI Key
132
133
        Returns:
134
            A list of annotations
135
136
        Raises:
137
            CompoundNotFoundError
138
        """
139
        raise NotImplementedError()
140
141
    @classmethod
142
    def hit_fields(cls) -> Sequence[str]:
143
        """
144
        Gets the fields in the Hit type parameter.
145
        """
146
        # Okay, there's a lot of magic going on here
147
        # We need to access the _parameter_ H on cls -- raw `H` doesn't work
148
        # get_args and __orig_bases__ do this for us
149
        # then dataclasses.fields gives us the dataclass fields
150
        # there's also actual_h.__annotations__, but that doesn't include ClassVar and InitVar
151
        # (not that we're using those)
152
        # If this magic is too magical, we can make this an abstract method
153
        # But that would be a lot of excess code and it might be less modular
154
        x = cls.get_h()
0 ignored issues
show
Coding Style Naming introduced by
Variable name "x" doesn't conform to snake_case naming style ('([^\\W\\dA-Z][^\\WA-Z]2,|_[^\\WA-Z]*|__[^\\WA-Z\\d_][^\\WA-Z]+__)$' pattern)

This check looks for invalid names for a range of different identifiers.

You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements.

If your project includes a Pylint configuration file, the settings contained in that file take precedence.

To find out more about Pylint, please refer to their site.

Loading history...
155
        return [f.name for f in dataclasses.fields(x) if f.name != "search_class"]
156
157
    @classmethod
158
    def get_h(cls):
159
        """
160
        Returns the underlying hit TypeVar, ``H``.
161
        """
162
        return ReflectionUtils.get_generic_arg(cls, AbstractHit)
163
164
    def __repr__(self) -> str:
165
        return ", ".join([k + "=" + str(v) for k, v in self.get_params().items()])
166
167
    def __str__(self) -> str:
168
        return repr(self)
169
170
    def __eq__(self, other: Search) -> bool:
171
        """
172
        Returns True iff all of the parameters match, thereby excluding attributes with underscores.
173
        Multiversal equality.
174
175
        Raises:
176
            TypeError: If ``other`` is not a :py:class:`Search`
177
        """
178
        if not isinstance(other, Search):
179
            raise TypeError(f"{type(other)} not comparable")
180
        return repr(self) == repr(other)
181
182
183
__all__ = ["Search", "HitFrame"]
184