1
|
|
|
from __future__ import annotations |
|
|
|
|
2
|
|
|
|
3
|
|
|
import abc |
4
|
|
|
import dataclasses |
5
|
|
|
from typing import Any, Generic, Mapping, Sequence, TypeVar |
6
|
|
|
|
7
|
|
|
from pocketutils.core.exceptions import XTypeError |
|
|
|
|
8
|
|
|
from suretime import Suretime |
|
|
|
|
9
|
|
|
|
10
|
|
|
from mandos.model.hit_dfs import HitDf |
11
|
|
|
from mandos.model.hits import AbstractHit |
12
|
|
|
from mandos.model.utils.reflection_utils import ReflectionUtils |
13
|
|
|
from mandos.model.utils.resources import MandosResources |
14
|
|
|
|
15
|
|
|
H = TypeVar("H", bound=AbstractHit, covariant=True) |
|
|
|
|
16
|
|
|
|
17
|
|
|
|
18
|
|
|
class SearchError(Exception): |
19
|
|
|
""" |
20
|
|
|
Wrapper for any exception raised in ``find`` except for ``CompoundNotFoundError``. |
21
|
|
|
""" |
22
|
|
|
|
23
|
|
|
def __init__( |
24
|
|
|
self, |
|
|
|
|
25
|
|
|
*args, |
|
|
|
|
26
|
|
|
inchikey: str = None, |
|
|
|
|
27
|
|
|
search_key: str = None, |
|
|
|
|
28
|
|
|
search_class: str = None, |
|
|
|
|
29
|
|
|
**kwargs, |
|
|
|
|
30
|
|
|
): |
31
|
|
|
super().__init__(*args, *kwargs) |
32
|
|
|
self.inchikey = inchikey |
33
|
|
|
self.search_key = search_key |
34
|
|
|
self.search_class = search_class |
35
|
|
|
|
36
|
|
|
|
37
|
|
|
class Search(Generic[H], metaclass=abc.ABCMeta): |
38
|
|
|
""" |
39
|
|
|
Something to search and how to do it. |
40
|
|
|
""" |
41
|
|
|
|
42
|
|
|
def __init__(self, key: str): |
43
|
|
|
self.key = key |
44
|
|
|
|
45
|
|
|
@classmethod |
46
|
|
|
def primary_data_source(cls) -> str: |
|
|
|
|
47
|
|
|
z = MandosResources.strings[cls.__name__]["source"] |
|
|
|
|
48
|
|
|
# TODO: really? |
|
|
|
|
49
|
|
|
return z.split(":")[0] |
50
|
|
|
|
51
|
|
|
@property |
52
|
|
|
def search_class(self) -> str: |
|
|
|
|
53
|
|
|
return self.__class__.__name__ |
54
|
|
|
|
55
|
|
|
@classmethod |
56
|
|
|
def search_name(cls) -> str: |
|
|
|
|
57
|
|
|
return cls.__name__.lower().replace("search", "") |
58
|
|
|
|
59
|
|
|
def get_params(self) -> Mapping[str, Any]: |
60
|
|
|
""" |
61
|
|
|
Returns the *parameters* of this ``Search`` their values. |
62
|
|
|
Parameters are attributes that do not begin with an underscore. |
63
|
|
|
""" |
64
|
|
|
return {key: value for key, value in vars(self).items() if not key.startswith("_")} |
65
|
|
|
|
66
|
|
|
def find(self, inchikey: str) -> Sequence[H]: |
|
|
|
|
67
|
|
|
# override this |
68
|
|
|
raise NotImplementedError() |
69
|
|
|
|
70
|
|
|
@classmethod |
71
|
|
|
def hit_fields(cls) -> Sequence[str]: |
72
|
|
|
""" |
73
|
|
|
Gets the fields in the Hit type parameter. |
74
|
|
|
""" |
75
|
|
|
# Okay, there's a lot of magic going on here |
76
|
|
|
# We need to access the _parameter_ H on cls -- raw `H` doesn't work |
77
|
|
|
# get_args and __orig_bases__ do this for us |
78
|
|
|
# then dataclasses.fields gives us the dataclass fields |
79
|
|
|
# there's also actual_h.__annotations__, but that doesn't include ClassVar and InitVar |
80
|
|
|
# (not that we're using those) |
81
|
|
|
# If this magic is too magical, we can make this an abstract method |
82
|
|
|
# But that would be a lot of excess code and it might be less modular |
83
|
|
|
x = cls.get_h() |
|
|
|
|
84
|
|
|
# noinspection PyDataclass |
85
|
|
|
return [f.name for f in dataclasses.fields(x) if f.name != "search_class"] |
86
|
|
|
|
87
|
|
|
@classmethod |
88
|
|
|
def get_h(cls): |
89
|
|
|
""" |
90
|
|
|
Returns the underlying hit TypeVar, ``H``. |
91
|
|
|
""" |
92
|
|
|
# noinspection PyTypeChecker |
93
|
|
|
return ReflectionUtils.get_generic_arg(cls, AbstractHit) |
94
|
|
|
|
95
|
|
|
def _format_source(self, **kwargs) -> str: |
96
|
|
|
s = MandosResources.strings[self.search_class]["source"] |
|
|
|
|
97
|
|
|
for k, v in kwargs.items(): |
|
|
|
|
98
|
|
|
s = s.replace(f"{{{k}}}", str(v)) |
|
|
|
|
99
|
|
|
return s |
100
|
|
|
|
101
|
|
|
def _format_predicate(self, **kwargs) -> str: |
102
|
|
|
s = MandosResources.strings[self.search_class]["predicate"] |
|
|
|
|
103
|
|
|
for k, v in kwargs.items(): |
|
|
|
|
104
|
|
|
s = s.replace(f"{{{k}}}", str(v)) |
|
|
|
|
105
|
|
|
return s |
106
|
|
|
|
107
|
|
|
def _create_hit( |
|
|
|
|
108
|
|
|
self, |
|
|
|
|
109
|
|
|
c_origin: str, |
|
|
|
|
110
|
|
|
c_matched: str, |
|
|
|
|
111
|
|
|
c_id: str, |
|
|
|
|
112
|
|
|
c_name: str, |
|
|
|
|
113
|
|
|
data_source: str, |
|
|
|
|
114
|
|
|
predicate: str, |
|
|
|
|
115
|
|
|
object_id: str, |
|
|
|
|
116
|
|
|
object_name: str, |
|
|
|
|
117
|
|
|
**kwargs, |
|
|
|
|
118
|
|
|
) -> H: |
119
|
|
|
# ignore statement -- we've removed it for now |
120
|
|
|
entry = dict( |
121
|
|
|
record_id=None, |
122
|
|
|
search_key=self.key, |
123
|
|
|
search_class=self.search_class, |
124
|
|
|
data_source=data_source, |
125
|
|
|
run_date=Suretime.tagged.now_utc_sys().iso_with_zone, |
126
|
|
|
cache_date=None, |
127
|
|
|
weight=1, |
128
|
|
|
compound_id=c_id, |
129
|
|
|
origin_inchikey=c_origin, |
130
|
|
|
matched_inchikey=c_matched, |
131
|
|
|
compound_name=c_name, |
132
|
|
|
predicate=predicate, |
133
|
|
|
object_id=object_id, |
134
|
|
|
object_name=object_name, |
135
|
|
|
) |
136
|
|
|
entry.update(kwargs) |
137
|
|
|
clazz = self.__class__.get_h() |
138
|
|
|
# noinspection PyArgumentList |
139
|
|
|
return clazz(**entry) |
140
|
|
|
|
141
|
|
|
def __repr__(self) -> str: |
142
|
|
|
return ", ".join([k + "=" + str(v) for k, v in self.get_params().items()]) |
143
|
|
|
|
144
|
|
|
def __str__(self) -> str: |
145
|
|
|
return repr(self) |
146
|
|
|
|
147
|
|
|
def __eq__(self, other: Search) -> bool: |
148
|
|
|
""" |
149
|
|
|
Returns True iff all of the parameters match, thereby excluding attributes with underscores. |
150
|
|
|
Multiversal equality. |
151
|
|
|
|
152
|
|
|
Raises: |
153
|
|
|
TypeError: If ``other`` is not a :class:`Search` |
154
|
|
|
""" |
155
|
|
|
if not isinstance(other, Search): |
156
|
|
|
raise XTypeError(f"{type(other)} not comparable") |
157
|
|
|
return repr(self) == repr(other) |
158
|
|
|
|
159
|
|
|
|
160
|
|
|
__all__ = ["Search", "HitDf", "SearchError"] |
161
|
|
|
|