Passed
Push — main ( 4b9dc0...1b55d1 )
by Douglas
06:16 queued 02:32
created

QueryingHmdbApi.executor()   A

Complexity

Conditions 1

Size

Total Lines 3
Code Lines 3

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
cc 1
eloc 3
nop 1
dl 0
loc 3
rs 10
c 0
b 0
f 0
1
import abc
0 ignored issues
show
introduced by
Missing module docstring
Loading history...
2
import time
3
import urllib
4
from pathlib import Path
5
from typing import Optional
6
from urllib import request
0 ignored issues
show
Unused Code introduced by
Unused request imported from urllib
Loading history...
7
8
import decorateme
0 ignored issues
show
introduced by
Unable to import 'decorateme'
Loading history...
9
from pocketutils.core.dot_dict import NestedDotDict
0 ignored issues
show
introduced by
Unable to import 'pocketutils.core.dot_dict'
Loading history...
10
from pocketutils.core.query_utils import QueryExecutor, QueryMixin
0 ignored issues
show
introduced by
Unable to import 'pocketutils.core.query_utils'
Loading history...
11
12
from mandos.model import Api, CompoundNotFoundError
13
from mandos.model.apis.hmdb_support.hmdb_data import HmdbData
14
from mandos.model.settings import QUERY_EXECUTORS, SETTINGS
15
from mandos.model.utils import unlink
16
from mandos.model.utils.setup import logger
17
18
19
class HmdbCompoundLookupError(CompoundNotFoundError):
0 ignored issues
show
Documentation introduced by
Empty class docstring
Loading history...
20
    """ """
21
22
23
@decorateme.auto_repr_str()
0 ignored issues
show
introduced by
Missing class docstring
Loading history...
24
class HmdbApi(Api, metaclass=abc.ABCMeta):
25
    def fetch(self, hmdb_id: str) -> HmdbData:
0 ignored issues
show
introduced by
Missing function or method docstring
Loading history...
26
        raise NotImplementedError()
27
28
29
@decorateme.auto_repr_str()
0 ignored issues
show
introduced by
Missing class docstring
Loading history...
30
class QueryingHmdbApi(HmdbApi, QueryMixin):
31
    def __init__(self, executor: QueryExecutor = QUERY_EXECUTORS.hmdb):
32
        self._executor = executor
33
34
    @property
35
    def executor(self) -> QueryExecutor:
0 ignored issues
show
introduced by
Missing function or method docstring
Loading history...
36
        raise NotImplementedError()
37
38
    def fetch(self, inchikey_or_hmdb_id: str) -> HmdbData:
0 ignored issues
show
Bug introduced by
Parameters differ from overridden 'fetch' method
Loading history...
39
        logger.debug(f"Downloading HMDB data for {inchikey_or_hmdb_id}")
40
        # e.g. https://hmdb.ca/metabolites/HMDB0001925.xml
41
        cid = None
42
        if inchikey_or_hmdb_id.startswith("HMDB"):
43
            cid = inchikey_or_hmdb_id
44
        else:
45
            time.sleep(SETTINGS.hmdb_query_delay_min)  # TODO
0 ignored issues
show
Coding Style introduced by
TODO and FIXME comments should generally be avoided.
Loading history...
46
            url = f"https://hmdb.ca/unearth/q?query={inchikey_or_hmdb_id}&searcher=metabolites"
47
            try:
48
                res = urllib.request.urlopen(url)
49
                url_ = res.geturl()
50
                logger.trace(f"Got UR {url_} from {url}")
51
                cid = url_.split("/")[-1]
52
                if not cid.startswith("HMDB"):
53
                    raise ValueError(f"Invalid CID {cid} from URL {url_}")
54
            except Exception:
55
                raise HmdbCompoundLookupError(f"No HMDB match for {inchikey_or_hmdb_id}")
56
        url = f"https://hmdb.ca/metabolites/{cid}.xml"
57
        try:
58
            data = self._executor(url)
59
        except Exception:
60
            raise HmdbCompoundLookupError(f"No HMDB match for {inchikey_or_hmdb_id} ({cid})")
61
        return HmdbData(self._to_json(data))
62
63
    def _to_json(self, xml) -> NestedDotDict:
64
        response = {}
65
        for child in list(xml):
66
            if len(list(child)) > 0:
67
                response[child.tag] = self._to_json(child)
68
            else:
69
                response[child.tag] = child.text or ""
70
        return NestedDotDict(response)
71
72
73
@decorateme.auto_repr_str()
0 ignored issues
show
introduced by
Missing class docstring
Loading history...
74
class CachingHmdbApi(HmdbApi):
75
    def __init__(
76
        self, query: Optional[QueryingHmdbApi], cache_dir: Path = SETTINGS.hmdb_cache_path
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
77
    ):
78
        self._query = query
79
        self._cache_dir = cache_dir
80
81
    def path(self, inchikey_or_hmdb_id: str) -> Path:
0 ignored issues
show
introduced by
Missing function or method docstring
Loading history...
82
        return self._cache_dir / f"{inchikey_or_hmdb_id}.json.gz"
83
84
    def fetch(self, inchikey_or_hmdb_id: str) -> HmdbData:
0 ignored issues
show
Bug introduced by
Parameters differ from overridden 'fetch' method
Loading history...
85
        path = self.path(inchikey_or_hmdb_id)
86
        if path.exists():
0 ignored issues
show
unused-code introduced by
Unnecessary "else" after "return"
Loading history...
87
            return HmdbData(NestedDotDict.read_json(path))
88
        else:
89
            data = self._query.fetch(inchikey_or_hmdb_id)
90
            path = self.path(data.cid)
91
            data._data.write_json(path, mkdirs=True)
0 ignored issues
show
Coding Style Best Practice introduced by
It seems like _data was declared protected and should not be accessed from this context.

Prefixing a member variable _ is usually regarded as the equivalent of declaring it with protected visibility that exists in other languages. Consequentially, such a member should only be accessed from the same class or a child class:

class MyParent:
    def __init__(self):
        self._x = 1;
        self.y = 2;

class MyChild(MyParent):
    def some_method(self):
        return self._x    # Ok, since accessed from a child class

class AnotherClass:
    def some_method(self, instance_of_my_child):
        return instance_of_my_child._x   # Would be flagged as AnotherClass is not
                                         # a child class of MyParent
Loading history...
92
            logger.info(f"Saved HMDB metabolite {data.cid}")
93
            self._write_links(data)
94
            return data
95
96
    def _write_links(self, data: HmdbData) -> None:
97
        path = self.path(data.cid)
98
        # these all have different prefixes, so it's ok
99
        aliases = [
100
            data.inchikey,
101
            *[ell for ell in [data.cas, data.pubchem_id, data.drugbank_id] if ell is not None],
102
        ]
103
        for alias in aliases:
104
            link = self.path(alias)
105
            unlink(link, missing_ok=True)
106
            path.link_to(link)
107
        logger.debug(f"Added aliases {','.join([str(s) for s in aliases])} ⇌ {data.cid} ({path})")
108
109
110
__all__ = [
111
    "HmdbApi",
112
    "QueryingHmdbApi",
113
    "CachingHmdbApi",
114
    "HmdbCompoundLookupError",
115
]
116