Passed
Push — main ( 9813db...5006f2 )
by Douglas
01:43
created

ChemblUtils._get_compound()   B

Complexity

Conditions 6

Size

Total Lines 14
Code Lines 12

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
cc 6
eloc 12
nop 2
dl 0
loc 14
rs 8.6666
c 0
b 0
f 0
1
from __future__ import annotations
0 ignored issues
show
introduced by
Missing module docstring
Loading history...
2
3
import enum
4
from json.decoder import JSONDecodeError
0 ignored issues
show
Unused Code introduced by
Unused JSONDecodeError imported from json.decoder
Loading history...
5
6
import numpy as np
0 ignored issues
show
introduced by
Unable to import 'numpy'
Loading history...
Unused Code introduced by
Unused numpy imported as np
Loading history...
7
from pocketutils.core.dot_dict import NestedDotDict
0 ignored issues
show
introduced by
Unable to import 'pocketutils.core.dot_dict'
Loading history...
8
from pocketutils.tools.common_tools import CommonTools
0 ignored issues
show
introduced by
Unable to import 'pocketutils.tools.common_tools'
Loading history...
9
from requests.exceptions import RequestException
0 ignored issues
show
introduced by
Unable to import 'requests.exceptions'
Loading history...
10
from urllib3.exceptions import HTTPError
0 ignored issues
show
introduced by
Unable to import 'urllib3.exceptions'
Loading history...
11
12
from mandos import logger
13
from mandos.model import CompoundNotFoundError
14
from mandos.model.utils import CleverEnum
15
from mandos.model.apis.chembl_api import ChemblApi
16
from mandos.model.apis.chembl_support import ChemblCompound
17
18
19
class MolStructureType(CleverEnum):
0 ignored issues
show
introduced by
Missing class docstring
Loading history...
20
    mol = enum.auto()
21
    both = enum.auto()
22
    none = enum.auto()
23
24
25
class ChemblUtils:
0 ignored issues
show
introduced by
Missing class docstring
Loading history...
26
    def __init__(self, api: ChemblApi):
27
        self.api = api
28
29
    def get_target(self, chembl: str) -> NestedDotDict:
30
        """
31
        Queries for the target.
32
33
        Args:
34
            chembl:
35
36
        Returns:
37
38
        """
39
        targets = self.api.target.filter(target_chembl_id=chembl)
40
        if len(targets) != 1:
41
            raise AssertionError(f"There are {len(targets)} targets: {targets}")
42
        return NestedDotDict(targets[0])
43
44
    def get_compound(self, inchikey: str) -> ChemblCompound:
45
        """
46
        Calls ``get_compound_dot_dict`` and then ``compound_dot_dict_to_obj``.
47
        """
48
        ch = self.get_compound_dot_dict(inchikey)
0 ignored issues
show
Coding Style Naming introduced by
Variable name "ch" doesn't conform to snake_case naming style ('([^\\W\\dA-Z][^\\WA-Z]2,|_[^\\WA-Z]*|__[^\\WA-Z\\d_][^\\WA-Z]+__)$' pattern)

This check looks for invalid names for a range of different identifiers.

You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements.

If your project includes a Pylint configuration file, the settings contained in that file take precedence.

To find out more about Pylint, please refer to their site.

Loading history...
49
        return self.compound_dot_dict_to_obj(ch)
50
51
    def compound_dot_dict_to_obj(self, ch: NestedDotDict) -> ChemblCompound:
0 ignored issues
show
Coding Style introduced by
This method could be written as a function/class method.

If a method does not access any attributes of the class, it could also be implemented as a function or static method. This can help improve readability. For example

class Foo:
    def some_method(self, x, y):
        return x + y;

could be written as

class Foo:
    @classmethod
    def some_method(cls, x, y):
        return x + y;
Loading history...
Coding Style Naming introduced by
Argument name "ch" doesn't conform to snake_case naming style ('([^\\W\\dA-Z][^\\WA-Z]2,|_[^\\WA-Z]*|__[^\\WA-Z\\d_][^\\WA-Z]+__)$' pattern)

This check looks for invalid names for a range of different identifiers.

You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements.

If your project includes a Pylint configuration file, the settings contained in that file take precedence.

To find out more about Pylint, please refer to their site.

Loading history...
52
        """
53
        Turn results from ``get_compound_dot_dict`` into a ``ChemblCompound``.
54
        """
55
        chid = ch["molecule_chembl_id"]
56
        mol_type = MolStructureType.of(ch["structure_type"])
57
        if mol_type == MolStructureType.none:
58
            logger.info(f"No structure found for compound {chid} of type {mol_type.name}.")
59
            logger.debug(f"No structure found for compound {ch}.")
60
            inchikey = "N/A"
61
            inchi = "N/A"
62
        else:
63
            inchi = ch["molecule_structures"]["standard_inchi"]
64
            inchikey = ch["molecule_structures"]["standard_inchi_key"]
65
        name = ch["pref_name"]
66
        return ChemblCompound(chid, inchikey, name, inchi)
67
68
    def get_compound_dot_dict(self, inchikey: str) -> NestedDotDict:
69
        """
70
        Fetches info and put into a dict.
71
72
        Args:
73
            inchikey:
74
75
        Returns:
76
            **Only** ``molecule_chembl_id``, ``pref_name``, "and ``molecule_structures`` are guaranteed to exist
0 ignored issues
show
Coding Style introduced by
This line is too long as per the coding-style (112/100).

This check looks for lines that are too long. You can specify the maximum line length.

Loading history...
77
        """
78
        ch = self._get_compound(inchikey)
0 ignored issues
show
Coding Style Naming introduced by
Variable name "ch" doesn't conform to snake_case naming style ('([^\\W\\dA-Z][^\\WA-Z]2,|_[^\\WA-Z]*|__[^\\WA-Z\\d_][^\\WA-Z]+__)$' pattern)

This check looks for invalid names for a range of different identifiers.

You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements.

If your project includes a Pylint configuration file, the settings contained in that file take precedence.

To find out more about Pylint, please refer to their site.

Loading history...
79
        # molecule_hierarchy can have the actual value None
80
        if ch.get("molecule_hierarchy") is not None:
81
            parent = ch["molecule_hierarchy"]["parent_chembl_id"]
82
            if parent != ch["molecule_chembl_id"]:
83
                ch = NestedDotDict(self._get_compound(parent))
0 ignored issues
show
Coding Style Naming introduced by
Variable name "ch" doesn't conform to snake_case naming style ('([^\\W\\dA-Z][^\\WA-Z]2,|_[^\\WA-Z]*|__[^\\WA-Z\\d_][^\\WA-Z]+__)$' pattern)

This check looks for invalid names for a range of different identifiers.

You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements.

If your project includes a Pylint configuration file, the settings contained in that file take precedence.

To find out more about Pylint, please refer to their site.

Loading history...
84
        else:
85
            logger.caution(f"Missing hierarchy for {ch}")
86
        return ch
87
88
    def _get_compound_from_smiles(self, smiles: str) -> NestedDotDict:
89
        try:
90
            results = list(
91
                self.api.molecule.filter(
92
                    molecule_structures__canonical_smiles__flexmatch=smiles
93
                ).only(["molecule_chembl_id", "pref_name", "molecule_structures"])
94
            )
95
        except (HTTPError, RequestException):
96
            raise CompoundNotFoundError(f"NOT FOUND: ChEMBL compound {smiles}")
97
        if len(results) != 1:
98
            raise CompoundNotFoundError(f"Got {len(results)} for compound {smiles}")
99
        result = results[0]
100
        if result is None:
101
            raise CompoundNotFoundError(f"Result for compound {smiles} is null!")
102
        return NestedDotDict(result)
103
104
    def _get_compound(self, inchikey: str) -> NestedDotDict:
105
        # saves a slow query
106
        if CommonTools.is_null(inchikey) or str(inchikey) == "nan":
107
            raise TypeError(f"Cannot get ChEMBL compound from {inchikey} (type {type(inchikey)}")
108
        # noinspection PyBroadException
109
        try:
110
            result = self.api.molecule.get(inchikey)
111
            if result is None:
112
                raise CompoundNotFoundError(f"Result for compound {inchikey} is null!")
113
            return NestedDotDict(result)
114
        except (HTTPError, RequestException):
115
            raise CompoundNotFoundError(f"Failed to find compound {inchikey}")
116
        except Exception:
0 ignored issues
show
Best Practice introduced by
Catching very general exceptions such as Exception is usually not recommended.

Generally, you would want to handle very specific errors in the exception handler. This ensure that you do not hide other types of errors which should be fixed.

So, unless you specifically plan to handle any error, consider adding a more specific exception.

Loading history...
117
            logger.error(f"Error on ChEMBL query for compound {inchikey}")
118
119
120
__all__ = ["MolStructureType", "ChemblUtils"]
121