mandos.model   A
last analyzed

Complexity

Total Complexity 36

Size/Duplication

Total Lines 358
Duplicated Lines 0 %

Importance

Changes 0
Metric Value
eloc 180
dl 0
loc 358
rs 9.52
c 0
b 0
f 0
wmc 36

21 Methods

Rating   Name   Duplication   Size   Complexity  
A Triple.tab_header() 0 15 1
A Search.compound_dot_dict_to_obj() 0 20 2
A Search.get_compound() 0 12 1
A Search._get_compound() 0 8 3
A Search.find() 0 14 1
B Search.find_all() 0 25 6
A Triple.tabs() 0 11 2
A AbstractHit.predicate() 0 8 1
A Search.get_h() 0 10 1
A Search.search_name() 0 3 1
A MolStructureType.of() 0 3 1
A AbstractHit.fields() 0 8 1
A AbstractHit.to_triple() 0 8 1
A Search.get_query_type() 0 11 1
A Search.get_target() 0 13 1
A Search.hit_fields() 0 18 1
A Search.__init__() 0 11 1
A Search._get_compound_from_smiles() 0 15 4
A AbstractHit.__hash__() 0 2 1
A Triple.statement() 0 12 1
A Search.get_compound_dot_dict() 0 22 4
1
from __future__ import annotations
0 ignored issues
show
introduced by
Missing module docstring
Loading history...
2
3
import abc
4
import dataclasses
5
import enum
6
import logging
7
import typing
8
from dataclasses import dataclass
9
from typing import Generic, Optional, Sequence, TypeVar
10
11
from urllib3.exceptions import HTTPError
0 ignored issues
show
introduced by
Unable to import 'urllib3.exceptions'
Loading history...
12
from requests.exceptions import RequestException
0 ignored issues
show
introduced by
Unable to import 'requests.exceptions'
Loading history...
13
from pocketutils.core.dot_dict import NestedDotDict
0 ignored issues
show
introduced by
Unable to import 'pocketutils.core.dot_dict'
Loading history...
14
15
from mandos import MandosUtils, QueryType
16
from mandos.chembl_api import ChemblApi
17
from mandos.model.settings import Settings
18
from mandos.model.taxonomy import Taxonomy
19
20
logger = logging.getLogger("mandos")
21
22
23
class CompoundNotFoundError(ValueError):
0 ignored issues
show
Documentation introduced by
Empty class docstring
Loading history...
24
    """"""
25
26
27
class MolStructureType(enum.Enum):
0 ignored issues
show
introduced by
Missing class docstring
Loading history...
28
    mol = enum.auto()
29
    both = enum.auto()
30
    none = enum.auto()
31
32
    @classmethod
33
    def of(cls, s: str) -> MolStructureType:
0 ignored issues
show
Coding Style Naming introduced by
Method name "of" doesn't conform to snake_case naming style ('([^\\W\\dA-Z][^\\WA-Z]2,|_[^\\WA-Z]*|__[^\\WA-Z\\d_][^\\WA-Z]+__)$' pattern)

This check looks for invalid names for a range of different identifiers.

You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements.

If your project includes a Pylint configuration file, the settings contained in that file take precedence.

To find out more about Pylint, please refer to their site.

Loading history...
Coding Style Naming introduced by
Argument name "s" doesn't conform to snake_case naming style ('([^\\W\\dA-Z][^\\WA-Z]2,|_[^\\WA-Z]*|__[^\\WA-Z\\d_][^\\WA-Z]+__)$' pattern)

This check looks for invalid names for a range of different identifiers.

You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements.

If your project includes a Pylint configuration file, the settings contained in that file take precedence.

To find out more about Pylint, please refer to their site.

Loading history...
introduced by
Missing function or method docstring
Loading history...
34
        return MolStructureType[s.lower()]
35
36
37
@dataclass(frozen=True, order=True, repr=True)
0 ignored issues
show
Documentation introduced by
Empty class docstring
Loading history...
38
class ChemblCompound:
39
    """"""
40
41
    chid: str
42
    inchikey: str
43
    name: str
44
45
46
@dataclass(frozen=True, order=True, repr=True)
0 ignored issues
show
Documentation introduced by
Empty class docstring
Loading history...
47
class AbstractHit:
48
    """"""
49
50
    record_id: Optional[str]
51
    compound_id: str
52
    inchikey: str
53
    compound_lookup: str
54
    compound_name: str
55
    object_id: str
56
    object_name: str
57
58
    def to_triple(self) -> Triple:
0 ignored issues
show
introduced by
Missing function or method docstring
Loading history...
59
        return Triple(
60
            compound_lookup=self.compound_lookup,
61
            compound_id=self.compound_id,
62
            compound_name=self.compound_name,
63
            predicate=self.predicate,
64
            object_id=self.object_id,
65
            object_name=self.object_name,
66
        )
67
68
    @property
69
    def predicate(self) -> str:
70
        """
71
72
        Returns:
73
74
        """
75
        raise NotImplementedError()
76
77
    def __hash__(self):
78
        return hash(self.record_id)
79
80
    @classmethod
81
    def fields(cls) -> Sequence[str]:
82
        """
83
84
        Returns:
85
86
        """
87
        return [f.name for f in dataclasses.fields(cls)]
88
89
90
H = TypeVar("H", bound=AbstractHit, covariant=True)
0 ignored issues
show
Coding Style Naming introduced by
Class name "H" doesn't conform to PascalCase naming style ('[^\\W\\da-z][^\\W_]+$' pattern)

This check looks for invalid names for a range of different identifiers.

You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements.

If your project includes a Pylint configuration file, the settings contained in that file take precedence.

To find out more about Pylint, please refer to their site.

Loading history...
91
92
93
class Search(Generic[H], metaclass=abc.ABCMeta):
94
    """
95
    Something to search and how to do it.
96
    """
97
98
    def __init__(self, chembl_api: ChemblApi, config: Settings, tax: Taxonomy):
99
        """
100
        Constructor.
101
102
        Args:
103
            chembl_api:
104
            tax:
105
        """
106
        self.api = chembl_api
107
        self.config = config
108
        self.tax = tax
109
110
    @property
111
    def search_name(self) -> str:
0 ignored issues
show
introduced by
Missing function or method docstring
Loading history...
112
        return self.__class__.__name__.lower().replace("search", "")
113
114
    def find_all(self, compounds: Sequence[str]) -> Sequence[H]:
115
        """
116
        Loops over every compound and calls ``find``.
117
        Just comes with better logging.
118
119
        Args:
120
            compounds:
121
122
        Returns:
123
124
        """
125
        lst = []
126
        for i, compound in enumerate(compounds):
127
            try:
128
                x = self.find(compound)
0 ignored issues
show
Coding Style Naming introduced by
Variable name "x" doesn't conform to snake_case naming style ('([^\\W\\dA-Z][^\\WA-Z]2,|_[^\\WA-Z]*|__[^\\WA-Z\\d_][^\\WA-Z]+__)$' pattern)

This check looks for invalid names for a range of different identifiers.

You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements.

If your project includes a Pylint configuration file, the settings contained in that file take precedence.

To find out more about Pylint, please refer to their site.

Loading history...
129
            except CompoundNotFoundError:
130
                logger.error(f"Failed to find compound {compound}. Skipping.")
0 ignored issues
show
introduced by
Use lazy % formatting in logging functions
Loading history...
131
                continue
132
            lst.extend(x)
133
            logger.debug(f"Found {len(x)} {self.search_name} annotations for {compound}")
0 ignored issues
show
introduced by
Use lazy % formatting in logging functions
Loading history...
134
            if i > 0 and i % 20 == 0 or i == len(compounds) - 1:
135
                logger.info(
0 ignored issues
show
introduced by
Use lazy % formatting in logging functions
Loading history...
136
                    f"Found {len(lst)} {self.search_name} annotations for {i} of {len(compounds)} compounds"
0 ignored issues
show
Coding Style introduced by
This line is too long as per the coding-style (108/100).

This check looks for lines that are too long. You can specify the maximum line length.

Loading history...
137
                )
138
        return lst
139
140
    def find(self, compound: str) -> Sequence[H]:
141
        """
142
        To override.
143
144
        Args:
145
            compound:
146
147
        Returns:
148
            Something
149
150
        Raises:
151
            CompoundNotFoundError
152
        """
153
        raise NotImplementedError()
154
155
    @classmethod
156
    def hit_fields(cls) -> Sequence[str]:
157
        """
158
        Gets the fields in the Hit type parameter.
159
160
        Returns:
161
162
        """
163
        # Okay, there's a lot of magic going on here
164
        # We need to access the _parameter_ H on cls -- raw `H` doesn't work
165
        # get_args and __orig_bases__ do this for us
166
        # then dataclasses.fields gives us the dataclass fields
167
        # there's also actual_h.__annotations__, but that doesn't include ClassVar and InitVar
168
        # (not that we're using those)
169
        # If this magic is too magical, we can make this an abstract method
170
        # But that would be a lot of excess code and it might be less modular
171
        actual_h = typing.get_args(cls.get_h())[0]
0 ignored issues
show
Bug introduced by
The Module typing does not seem to have a member named get_args.

This check looks for calls to members that are non-existent. These calls will fail.

The member could have been renamed or removed.

Loading history...
172
        return [f.name for f in dataclasses.fields(actual_h)]
173
174
    @classmethod
175
    def get_h(cls):
176
        """
177
        What is my hit type?
178
179
        Returns:
180
181
        """
182
        # noinspection PyUnresolvedReferences
183
        return cls.__orig_bases__[0]
0 ignored issues
show
Bug introduced by
The Class Search does not seem to have a member named __orig_bases__.

This check looks for calls to members that are non-existent. These calls will fail.

The member could have been renamed or removed.

Loading history...
184
185
    def get_target(self, chembl: str) -> NestedDotDict:
186
        """
187
        Queries for the target.
188
189
        Args:
190
            chembl:
191
192
        Returns:
193
194
        """
195
        targets = self.api.target.filter(target_chembl_id=chembl)
196
        assert len(targets) == 1
197
        return NestedDotDict(targets[0])
198
199
    def get_compound(self, inchikey: str) -> ChemblCompound:
200
        """
201
        Calls ``get_compound_dot_dict`` and then ``compound_dot_dict_to_obj``.
202
203
        Args:
204
            inchikey:
205
206
        Returns:
207
208
        """
209
        ch = self.get_compound_dot_dict(inchikey)
0 ignored issues
show
Coding Style Naming introduced by
Variable name "ch" doesn't conform to snake_case naming style ('([^\\W\\dA-Z][^\\WA-Z]2,|_[^\\WA-Z]*|__[^\\WA-Z\\d_][^\\WA-Z]+__)$' pattern)

This check looks for invalid names for a range of different identifiers.

You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements.

If your project includes a Pylint configuration file, the settings contained in that file take precedence.

To find out more about Pylint, please refer to their site.

Loading history...
210
        return self.compound_dot_dict_to_obj(ch)
211
212
    def compound_dot_dict_to_obj(self, ch: NestedDotDict) -> ChemblCompound:
0 ignored issues
show
Coding Style Naming introduced by
Argument name "ch" doesn't conform to snake_case naming style ('([^\\W\\dA-Z][^\\WA-Z]2,|_[^\\WA-Z]*|__[^\\WA-Z\\d_][^\\WA-Z]+__)$' pattern)

This check looks for invalid names for a range of different identifiers.

You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements.

If your project includes a Pylint configuration file, the settings contained in that file take precedence.

To find out more about Pylint, please refer to their site.

Loading history...
Coding Style introduced by
This method could be written as a function/class method.

If a method does not access any attributes of the class, it could also be implemented as a function or static method. This can help improve readability. For example

class Foo:
    def some_method(self, x, y):
        return x + y;

could be written as

class Foo:
    @classmethod
    def some_method(cls, x, y):
        return x + y;
Loading history...
213
        """
214
        Turn results from ``get_compound_dot_dict`` into a ``ChemblCompound``.
215
216
        Args:
217
            ch:
218
219
        Returns:
220
221
        """
222
        chid = ch["molecule_chembl_id"]
223
        mol_type = MolStructureType.of(ch["structure_type"])
224
        if mol_type == MolStructureType.none:
225
            logger.info(f"No structure found for compound {chid} of type {mol_type.name}.")
0 ignored issues
show
introduced by
Use lazy % formatting in logging functions
Loading history...
226
            logger.debug(f"No structure found for compound {ch}.")
0 ignored issues
show
introduced by
Use lazy % formatting in logging functions
Loading history...
227
            inchikey = "N/A"
228
        else:
229
            inchikey = ch["molecule_structures"]["standard_inchi_key"]
230
        name = ch["pref_name"]
231
        return ChemblCompound(chid, inchikey, name)
232
233
    def get_query_type(self, inchikey: str) -> QueryType:
0 ignored issues
show
Coding Style introduced by
This method could be written as a function/class method.

If a method does not access any attributes of the class, it could also be implemented as a function or static method. This can help improve readability. For example

class Foo:
    def some_method(self, x, y):
        return x + y;

could be written as

class Foo:
    @classmethod
    def some_method(cls, x, y):
        return x + y;
Loading history...
234
        """
235
        Returns the type of query.
236
237
        Args:
238
            inchikey:
239
240
        Returns:
241
242
        """
243
        return MandosUtils.get_query_type(inchikey)
244
245
    def get_compound_dot_dict(self, inchikey: str) -> NestedDotDict:
246
        """
247
        Fetches info and put into a dict.
248
249
        Args:
250
            inchikey:
251
252
        Returns:
253
            **Only** ``molecule_chembl_id``, ``pref_name``, "and ``molecule_structures`` are guaranteed to exist
0 ignored issues
show
Coding Style introduced by
This line is too long as per the coding-style (112/100).

This check looks for lines that are too long. You can specify the maximum line length.

Loading history...
254
        """
255
        # CHEMBL
256
        kind = self.get_query_type(inchikey)
257
        if kind == QueryType.smiles:
258
            ch = self._get_compound_from_smiles(inchikey)
0 ignored issues
show
Coding Style Naming introduced by
Variable name "ch" doesn't conform to snake_case naming style ('([^\\W\\dA-Z][^\\WA-Z]2,|_[^\\WA-Z]*|__[^\\WA-Z\\d_][^\\WA-Z]+__)$' pattern)

This check looks for invalid names for a range of different identifiers.

You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements.

If your project includes a Pylint configuration file, the settings contained in that file take precedence.

To find out more about Pylint, please refer to their site.

Loading history...
259
        else:
260
            ch = self._get_compound(inchikey)
0 ignored issues
show
Coding Style Naming introduced by
Variable name "ch" doesn't conform to snake_case naming style ('([^\\W\\dA-Z][^\\WA-Z]2,|_[^\\WA-Z]*|__[^\\WA-Z\\d_][^\\WA-Z]+__)$' pattern)

This check looks for invalid names for a range of different identifiers.

You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements.

If your project includes a Pylint configuration file, the settings contained in that file take precedence.

To find out more about Pylint, please refer to their site.

Loading history...
261
        # molecule_hierarchy can have the actual value None
262
        if ch.get("molecule_hierarchy") is not None:
263
            parent = ch["molecule_hierarchy"]["parent_chembl_id"]
264
            if parent != ch["molecule_chembl_id"]:
265
                ch = NestedDotDict(self._get_compound(inchikey))
0 ignored issues
show
Coding Style Naming introduced by
Variable name "ch" doesn't conform to snake_case naming style ('([^\\W\\dA-Z][^\\WA-Z]2,|_[^\\WA-Z]*|__[^\\WA-Z\\d_][^\\WA-Z]+__)$' pattern)

This check looks for invalid names for a range of different identifiers.

You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements.

If your project includes a Pylint configuration file, the settings contained in that file take precedence.

To find out more about Pylint, please refer to their site.

Loading history...
266
        return ch
267
268
    def _get_compound_from_smiles(self, smiles: str) -> NestedDotDict:
269
        try:
270
            results = list(
271
                self.api.molecule.filter(
272
                    molecule_structures__canonical_smiles__flexmatch=smiles
273
                ).only(["molecule_chembl_id", "pref_name", "molecule_structures"])
274
            )
275
        except (HTTPError, RequestException):
276
            raise CompoundNotFoundError(f"Failed to find compound {smiles}")
277
        if len(results) != 1:
278
            raise CompoundNotFoundError(f"Got {len(results)} for compound {smiles}")
279
        result = results[0]
280
        if result is None:
281
            raise CompoundNotFoundError(f"Result for compound {smiles} is null!")
282
        return NestedDotDict(result)
283
284
    def _get_compound(self, inchikey: str) -> NestedDotDict:
285
        try:
286
            result = self.api.molecule.get(inchikey)
287
            if result is None:
288
                raise CompoundNotFoundError(f"Result for compound {inchikey} is null!")
289
            return NestedDotDict(result)
290
        except (HTTPError, RequestException):
291
            raise CompoundNotFoundError(f"Failed to find compound {inchikey}")
292
293
294
@dataclass(frozen=True, repr=True, order=True)
295
class Triple:
296
    """
297
    Compound, predicate, object.
298
    """
299
300
    compound_id: str
301
    compound_lookup: str
302
    compound_name: str
303
    predicate: str
304
    object_name: str
305
    object_id: str
306
307
    @classmethod
308
    def tab_header(cls) -> str:
309
        """
310
311
        Returns:
312
313
        """
314
        return "\t".join(
315
            [
316
                "compound_id",
317
                "compound_lookup",
318
                "compound_name",
319
                "predicate",
320
                "object_name",
321
                "object_id",
322
            ]
323
        )
324
325
    @property
326
    def tabs(self) -> str:
0 ignored issues
show
introduced by
Missing function or method docstring
Loading history...
327
        items = [
328
            self.compound_lookup,
329
            self.compound_id,
330
            self.compound_name,
331
            self.predicate,
332
            self.object_name,
333
            self.object_id,
334
        ]
335
        return "\t".join(["-" if k is None else str(k) for k in items])
336
337
    @property
338
    def statement(self) -> str:
339
        """
340
        Returns a simple text statement with brackets.
341
342
        Returns:
343
344
        """
345
        sub = f"{self.compound_lookup} [{self.compound_id}] [{self.compound_name}]>"
346
        pred = f"<{self.predicate}>"
347
        obj = f"<{self.object_name} [{self.object_id}]>"
348
        return "\t".join([sub, pred, obj])
349
350
351
__all__ = [
352
    "ChemblCompound",
353
    "AbstractHit",
354
    "QueryType",
355
    "Search",
356
    "Triple",
357
    "CompoundNotFoundError",
358
]
359