Passed
Push — dependabot/pip/flake8-bugbear-... ( 5c5892...6076c0 )
by
unknown
01:34
created

mandos.model.searches.Search.find_all()   B

Complexity

Conditions 5

Size

Total Lines 34
Code Lines 20

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
cc 5
eloc 20
nop 2
dl 0
loc 34
rs 8.9332
c 0
b 0
f 0
1
from __future__ import annotations
0 ignored issues
show
introduced by
Missing module docstring
Loading history...
2
import abc
3
import dataclasses
4
import typing
5
from typing import Generic, Sequence, TypeVar
6
7
import pandas as pd
0 ignored issues
show
introduced by
Unable to import 'pandas'
Loading history...
8
9
from mandos import logger
10
from mandos.model.hits import AbstractHit, HitFrame
11
from mandos.model import CompoundNotFoundError, ReflectionUtils
12
13
H = TypeVar("H", bound=AbstractHit, covariant=True)
0 ignored issues
show
Coding Style Naming introduced by
Class name "H" doesn't conform to PascalCase naming style ('[^\\W\\da-z][^\\W_]+$' pattern)

This check looks for invalid names for a range of different identifiers.

You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements.

If your project includes a Pylint configuration file, the settings contained in that file take precedence.

To find out more about Pylint, please refer to their site.

Loading history...
14
15
16
class Search(Generic[H], metaclass=abc.ABCMeta):
17
    """
18
    Something to search and how to do it.
19
    """
20
21
    def __init__(self, key: str):
22
        self.key = key
23
24
    @property
25
    def search_class(self) -> str:
0 ignored issues
show
introduced by
Missing function or method docstring
Loading history...
26
        return self.__class__.__name__
27
28
    @property
29
    def search_name(self) -> str:
0 ignored issues
show
introduced by
Missing function or method docstring
Loading history...
30
        return self.__class__.__name__.lower().replace("search", "")
31
32
    @property
33
    def data_source(self) -> str:
34
        """
35
        Where the data originally came from; e.g. ``the Human Metabolome Database (HMDB)``"
36
        """
37
        raise NotImplementedError()
38
39
    def get_params(self) -> typing.Mapping[str, typing.Any]:
40
        """
41
        Returns the *parameters* of this ``Search`` their values.
42
        Parameters are attributes that do not begin with an underscore.
43
        """
44
        return {key: value for key, value in vars(self).items() if not key.startswith("_")}
45
46
    def find_to_df(self, inchikeys: Sequence[str]) -> HitFrame:
47
        """
48
        Calls :py:meth:`find_all` and returns a :py:class:`HitFrame` DataFrame subclass.
49
        Writes a logging ERROR for each compound that was not found.
50
51
        Args:
52
            inchikeys: A list of InChI key strings
53
        """
54
        hits = self.find_all(inchikeys)
55
        return HitFrame(
56
            [
57
                pd.Series(
58
                    {
59
                        **{f: getattr(h, f) for f in self.hit_fields()},
60
                        **dict(universal_id=h.universal_id),
61
                    }
62
                )
63
                for h in hits
64
            ]
65
        )
66
67
    def find_all(self, inchikeys: Sequence[str]) -> Sequence[H]:
68
        """
69
        Loops over every compound and calls ``find``.
70
        Comes with better logging.
71
        Writes a logging ERROR for each compound that was not found.
72
73
        Args:
74
            inchikeys: A list of InChI key strings
75
76
        Returns:
77
            The list of :py:class:`mandos.model.hits.AbstractHit`
78
        """
79
        lst = []
80
        # set just in case we never iterate
81
        i = 0
82
        for i, compound in enumerate(inchikeys):
83
            try:
84
                x = self.find(compound)
0 ignored issues
show
Coding Style Naming introduced by
Variable name "x" doesn't conform to snake_case naming style ('([^\\W\\dA-Z][^\\WA-Z]2,|_[^\\WA-Z]*|__[^\\WA-Z\\d_][^\\WA-Z]+__)$' pattern)

This check looks for invalid names for a range of different identifiers.

You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements.

If your project includes a Pylint configuration file, the settings contained in that file take precedence.

To find out more about Pylint, please refer to their site.

Loading history...
85
            except CompoundNotFoundError:
86
                logger.info(f"NOT FOUND: {compound}. Skipping.")
87
                continue
88
            except Exception:
89
                logger.exception(f"Failed on {compound}")
90
                raise
91
            lst.extend(x)
92
            logger.debug(f"Found {len(x)} {self.search_name} annotations for {compound}")
93
            if i % 10 == 9:
94
                logger.notice(
95
                    f"Found {len(lst)} {self.search_name} annotations for {i+1} of {len(inchikeys)} compounds"
0 ignored issues
show
Coding Style introduced by
This line is too long as per the coding-style (110/100).

This check looks for lines that are too long. You can specify the maximum line length.

Loading history...
96
                )
97
        logger.notice(
98
            f"Found {len(lst)} {self.search_name} annotations for {i+1} of {len(inchikeys)} compounds"
0 ignored issues
show
Coding Style introduced by
This line is too long as per the coding-style (102/100).

This check looks for lines that are too long. You can specify the maximum line length.

Loading history...
99
        )
100
        return lst
101
102
    def find(self, inchikey: str) -> Sequence[H]:
103
        """
104
        To override.
105
        Finds the annotations for a single compound.
106
107
        Args:
108
            inchikey: An InChI Key
109
110
        Returns:
111
            A list of annotations
112
113
        Raises:
114
            CompoundNotFoundError
115
        """
116
        raise NotImplementedError()
117
118
    @classmethod
119
    def hit_fields(cls) -> Sequence[str]:
120
        """
121
        Gets the fields in the Hit type parameter.
122
        """
123
        # Okay, there's a lot of magic going on here
124
        # We need to access the _parameter_ H on cls -- raw `H` doesn't work
125
        # get_args and __orig_bases__ do this for us
126
        # then dataclasses.fields gives us the dataclass fields
127
        # there's also actual_h.__annotations__, but that doesn't include ClassVar and InitVar
128
        # (not that we're using those)
129
        # If this magic is too magical, we can make this an abstract method
130
        # But that would be a lot of excess code and it might be less modular
131
        x = cls.get_h()
0 ignored issues
show
Coding Style Naming introduced by
Variable name "x" doesn't conform to snake_case naming style ('([^\\W\\dA-Z][^\\WA-Z]2,|_[^\\WA-Z]*|__[^\\WA-Z\\d_][^\\WA-Z]+__)$' pattern)

This check looks for invalid names for a range of different identifiers.

You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements.

If your project includes a Pylint configuration file, the settings contained in that file take precedence.

To find out more about Pylint, please refer to their site.

Loading history...
132
        return [f.name for f in dataclasses.fields(x) if f.name != "search_class"]
133
134
    @classmethod
135
    def get_h(cls):
136
        """
137
        Returns the underlying hit TypeVar, ``H``.
138
        """
139
        return ReflectionUtils.get_generic_arg(cls, AbstractHit)
140
141
    def __repr__(self) -> str:
142
        return ", ".join([k + "=" + str(v) for k, v in self.get_params().items()])
143
144
    def __str__(self) -> str:
145
        return repr(self)
146
147
    def __eq__(self, other: Search) -> bool:
148
        """
149
        Returns True iff all of the parameters match, thereby excluding attributes with underscores.
150
        Multiversal equality.
151
152
        Raises:
153
            TypeError: If ``other`` is not a :py:class:`Search`
154
        """
155
        if not isinstance(other, Search):
156
            raise TypeError(f"{type(other)} not comparable")
157
        return repr(self) == repr(other)
158
159
160
__all__ = ["Search", "HitFrame"]
161