Passed
Push — main ( 3a0c28...4b9dc0 )
by Douglas
01:51
created

mandos.model.apis.hmdb_api   A

Complexity

Total Complexity 17

Size/Duplication

Total Lines 118
Duplicated Lines 0 %

Importance

Changes 0
Metric Value
eloc 92
dl 0
loc 118
rs 10
c 0
b 0
f 0
wmc 17

9 Methods

Rating   Name   Duplication   Size   Complexity  
A QueryingHmdbApi.__init__() 0 2 1
A CachingHmdbApi.fetch() 0 11 2
A CachingHmdbApi._write_links() 0 12 2
A CachingHmdbApi.__init__() 0 5 1
A HmdbApi.fetch() 0 2 1
A QueryingHmdbApi._query() 0 7 1
A QueryingHmdbApi._to_json() 0 8 3
A CachingHmdbApi.path() 0 2 1
B QueryingHmdbApi.fetch() 0 24 5
1
import abc
0 ignored issues
show
introduced by
Missing module docstring
Loading history...
2
import time
3
import urllib
4
from pathlib import Path
5
from typing import Optional
6
from urllib import request
0 ignored issues
show
Unused Code introduced by
Unused request imported from urllib
Loading history...
7
8
import decorateme
0 ignored issues
show
introduced by
Unable to import 'decorateme'
Loading history...
9
from pocketutils.core.dot_dict import NestedDotDict
0 ignored issues
show
introduced by
Unable to import 'pocketutils.core.dot_dict'
Loading history...
10
from pocketutils.core.query_utils import QueryExecutor
0 ignored issues
show
introduced by
Unable to import 'pocketutils.core.query_utils'
Loading history...
11
12
from mandos.model import Api, CompoundNotFoundError
13
from mandos.model.apis.hmdb_support.hmdb_data import HmdbData
14
from mandos.model.settings import QUERY_EXECUTORS, SETTINGS
15
from mandos.model.utils.setup import logger
16
17
18
class HmdbCompoundLookupError(CompoundNotFoundError):
0 ignored issues
show
Documentation introduced by
Empty class docstring
Loading history...
19
    """ """
20
21
22
@decorateme.auto_repr_str()
0 ignored issues
show
introduced by
Missing class docstring
Loading history...
23
class HmdbApi(Api, metaclass=abc.ABCMeta):
24
    def fetch(self, hmdb_id: str) -> HmdbData:
0 ignored issues
show
introduced by
Missing function or method docstring
Loading history...
25
        raise NotImplementedError()
26
27
28
@decorateme.auto_repr_str()
0 ignored issues
show
introduced by
Missing class docstring
Loading history...
29
class QueryingHmdbApi(HmdbApi):
30
    def __init__(self, executor: QueryExecutor = QUERY_EXECUTORS.hmdb):
31
        self._executor = executor
32
33
    def fetch(self, inchikey_or_hmdb_id: str) -> HmdbData:
0 ignored issues
show
Bug introduced by
Parameters differ from overridden 'fetch' method
Loading history...
34
        logger.debug(f"Downloading HMDB data for {inchikey_or_hmdb_id}")
35
        # e.g. https://hmdb.ca/metabolites/HMDB0001925.xml
36
        cid = None
37
        if inchikey_or_hmdb_id.startswith("HMDB"):
38
            cid = inchikey_or_hmdb_id
39
        else:
40
            time.sleep(SETTINGS.hmdb_query_delay_min)  # TODO
0 ignored issues
show
Coding Style introduced by
TODO and FIXME comments should generally be avoided.
Loading history...
41
            url = f"https://hmdb.ca/unearth/q?query={inchikey_or_hmdb_id}&searcher=metabolites"
42
            try:
43
                res = urllib.request.urlopen(url)
44
                url_ = res.geturl()
45
                logger.trace(f"Got UR {url_} from {url}")
46
                cid = url_.split("/")[-1]
47
                if not cid.startswith("HMDB"):
48
                    raise ValueError(f"Invalid CID {cid} from URL {url_}")
49
            except Exception:
50
                raise HmdbCompoundLookupError(f"No HMDB match for {inchikey_or_hmdb_id}")
51
        url = f"https://hmdb.ca/metabolites/{cid}.xml"
52
        try:
53
            data = self._executor(url)
54
        except Exception:
55
            raise HmdbCompoundLookupError(f"No HMDB match for {inchikey_or_hmdb_id} ({cid})")
56
        return HmdbData(self._to_json(data))
57
58
    def _to_json(self, xml) -> NestedDotDict:
59
        response = {}
60
        for child in list(xml):
61
            if len(list(child)) > 0:
62
                response[child.tag] = self._to_json(child)
63
            else:
64
                response[child.tag] = child.text or ""
65
        return NestedDotDict(response)
66
67
    def _query(self, url: str) -> str:
68
        data = self._executor(url)
69
        tt = self._executor.last_time_taken
0 ignored issues
show
Coding Style Naming introduced by
Variable name "tt" doesn't conform to snake_case naming style ('([^\\W\\dA-Z][^\\WA-Z]2,|_[^\\WA-Z]*|__[^\\WA-Z\\d_][^\\WA-Z]+__)$' pattern)

This check looks for invalid names for a range of different identifiers.

You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements.

If your project includes a Pylint configuration file, the settings contained in that file take precedence.

To find out more about Pylint, please refer to their site.

Loading history...
70
        wt, qt = tt.wait.total_seconds(), tt.query.total_seconds()
0 ignored issues
show
Coding Style Naming introduced by
Variable name "qt" doesn't conform to snake_case naming style ('([^\\W\\dA-Z][^\\WA-Z]2,|_[^\\WA-Z]*|__[^\\WA-Z\\d_][^\\WA-Z]+__)$' pattern)

This check looks for invalid names for a range of different identifiers.

You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements.

If your project includes a Pylint configuration file, the settings contained in that file take precedence.

To find out more about Pylint, please refer to their site.

Loading history...
Coding Style Naming introduced by
Variable name "wt" doesn't conform to snake_case naming style ('([^\\W\\dA-Z][^\\WA-Z]2,|_[^\\WA-Z]*|__[^\\WA-Z\\d_][^\\WA-Z]+__)$' pattern)

This check looks for invalid names for a range of different identifiers.

You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements.

If your project includes a Pylint configuration file, the settings contained in that file take precedence.

To find out more about Pylint, please refer to their site.

Loading history...
71
        bts = int(len(data) * 8 / 1024)
72
        logger.trace(f"Queried {bts} kb from {url} in {qt:.1} s with {wt:.1} s of wait")
73
        return data
74
75
76
@decorateme.auto_repr_str()
0 ignored issues
show
introduced by
Missing class docstring
Loading history...
77
class CachingHmdbApi(HmdbApi):
78
    def __init__(
79
        self, query: Optional[QueryingHmdbApi], cache_dir: Path = SETTINGS.hmdb_cache_path
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
80
    ):
81
        self._query = query
82
        self._cache_dir = cache_dir
83
84
    def path(self, inchikey_or_hmdb_id: str) -> Path:
0 ignored issues
show
introduced by
Missing function or method docstring
Loading history...
85
        return self._cache_dir / f"{inchikey_or_hmdb_id}.json.gz"
86
87
    def fetch(self, inchikey_or_hmdb_id: str) -> HmdbData:
0 ignored issues
show
Bug introduced by
Parameters differ from overridden 'fetch' method
Loading history...
88
        path = self.path(inchikey_or_hmdb_id)
89
        if path.exists():
0 ignored issues
show
unused-code introduced by
Unnecessary "else" after "return"
Loading history...
90
            return HmdbData(NestedDotDict.read_json(path))
91
        else:
92
            data = self._query.fetch(inchikey_or_hmdb_id)
93
            path = self.path(data.cid)
94
            data._data.write_json(path, mkdirs=True)
0 ignored issues
show
Coding Style Best Practice introduced by
It seems like _data was declared protected and should not be accessed from this context.

Prefixing a member variable _ is usually regarded as the equivalent of declaring it with protected visibility that exists in other languages. Consequentially, such a member should only be accessed from the same class or a child class:

class MyParent:
    def __init__(self):
        self._x = 1;
        self.y = 2;

class MyChild(MyParent):
    def some_method(self):
        return self._x    # Ok, since accessed from a child class

class AnotherClass:
    def some_method(self, instance_of_my_child):
        return instance_of_my_child._x   # Would be flagged as AnotherClass is not
                                         # a child class of MyParent
Loading history...
95
            logger.info(f"Saved HMDB metabolite {data.cid}")
96
            self._write_links(data)
97
            return data
98
99
    def _write_links(self, data: HmdbData) -> None:
100
        path = self.path(data.cid)
101
        # these all have different prefixes, so it's ok
102
        aliases = [
103
            data.inchikey,
104
            *[ell for ell in [data.cas, data.pubchem_id, data.drugbank_id] if ell is not None],
105
        ]
106
        for alias in aliases:
107
            link = self.path(alias)
108
            link.unlink(missing_ok=True)
109
            path.link_to(link)
110
        logger.debug(f"Added aliases {','.join([str(s) for s in aliases])} ⇌ {data.cid} ({path})")
111
112
113
__all__ = [
114
    "HmdbApi",
115
    "QueryingHmdbApi",
116
    "CachingHmdbApi",
117
    "HmdbCompoundLookupError",
118
]
119