Passed
Push — main ( 4074a3...6b8d16 )
by Douglas
01:56
created

mandos.model.apis.caching_pubchem_api.CachingPubchemApi.similarity_path()   A

Complexity

Conditions 2

Size

Total Lines 6
Code Lines 6

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
cc 2
eloc 6
nop 3
dl 0
loc 6
rs 10
c 0
b 0
f 0
1
"""
2
PubChem caching API.
3
"""
4
from __future__ import annotations
5
6
import gzip
0 ignored issues
show
Unused Code introduced by
The import gzip seems to be unused.
Loading history...
7
import os
0 ignored issues
show
Unused Code introduced by
The import os seems to be unused.
Loading history...
8
from pathlib import Path
9
from typing import FrozenSet, Optional, Union
0 ignored issues
show
Unused Code introduced by
Unused FrozenSet imported from typing
Loading history...
10
11
import orjson
0 ignored issues
show
introduced by
Unable to import 'orjson'
Loading history...
Unused Code introduced by
The import orjson seems to be unused.
Loading history...
12
import pandas as pd
0 ignored issues
show
introduced by
Unable to import 'pandas'
Loading history...
Unused Code introduced by
Unused pandas imported as pd
Loading history...
13
from pocketutils.core.dot_dict import NestedDotDict
0 ignored issues
show
introduced by
Unable to import 'pocketutils.core.dot_dict'
Loading history...
14
from pocketutils.core.exceptions import XValueError
0 ignored issues
show
introduced by
Unable to import 'pocketutils.core.exceptions'
Loading history...
Unused Code introduced by
Unused XValueError imported from pocketutils.core.exceptions
Loading history...
15
16
from mandos.model.apis.pubchem_api import PubchemApi, PubchemCompoundLookupError
17
from mandos.model.apis.pubchem_support.pubchem_data import PubchemData
18
from mandos.model.apis.querying_pubchem_api import QueryingPubchemApi
19
from mandos.model.settings import SETTINGS
20
from mandos.model.utils.setup import logger
21
22
23
class CachingPubchemApi(PubchemApi):
0 ignored issues
show
introduced by
Missing class docstring
Loading history...
24
    def __init__(
25
        self,
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
26
        query: Optional[QueryingPubchemApi],
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
27
        cache_dir: Path = SETTINGS.pubchem_cache_path,
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
28
    ):
29
        self._cache_dir = cache_dir
30
        self._query = query
31
32
    def fetch_data(self, inchikey_or_cid: Union[str, int]) -> Optional[PubchemData]:
0 ignored issues
show
Bug introduced by
Parameters differ from overridden 'fetch_data' method
Loading history...
33
        path = self.data_path(inchikey_or_cid)
34
        if path.exists():
0 ignored issues
show
unused-code introduced by
Unnecessary "else" after "return"
Loading history...
35
            logger.debug(f"Found cached PubChem data for {inchikey_or_cid}")
36
            data = self._read_json(path)
37
            if data is None:
38
                raise PubchemCompoundLookupError(
39
                    f"{inchikey_or_cid} previously not found in PubChem"
40
                )
41
            self._write_siblings(data)  # TODO: remove
0 ignored issues
show
Coding Style introduced by
TODO and FIXME comments should generally be avoided.
Loading history...
42
            return data
43
        else:
44
            logger.debug(f"Did NOT find cached PubChem data for {inchikey_or_cid}")
45
        return self._download(inchikey_or_cid)
46
47
    def _download(self, inchikey_or_cid: Union[int, str]) -> PubchemData:
48
        if self._query is None:
49
            raise PubchemCompoundLookupError(f"{inchikey_or_cid} not cached")
50
        # logger.debug(f"Downloading PubChem data for {inchikey_or_cid}")
51
        try:
52
            data: PubchemData = self._query.fetch_data(inchikey_or_cid)
53
        except PubchemCompoundLookupError:
54
            path = self.data_path(inchikey_or_cid)
55
            NestedDotDict({}).write_json(path, mkdirs=True)
56
            logger.debug(f"Wrote empty PubChem data to {path}")
57
            raise
58
        cid = data.parent_or_self  # if there's ever a parent of a parent, this will NOT work
59
        path = self.data_path(cid)
60
        if path.exists():
61
            logger.debug(f"PubChem data for {inchikey_or_cid} parent CID {cid} exists")
62
            logger.warning(f"Writing over {path} for {inchikey_or_cid}")
63
        else:
64
            logger.debug(f"PubChem data for {inchikey_or_cid} parent CID {cid} does not exist")
65
        data._data.write_json(path, mkdirs=True)
0 ignored issues
show
Coding Style Best Practice introduced by
It seems like _data was declared protected and should not be accessed from this context.

Prefixing a member variable _ is usually regarded as the equivalent of declaring it with protected visibility that exists in other languages. Consequentially, such a member should only be accessed from the same class or a child class:

class MyParent:
    def __init__(self):
        self._x = 1;
        self.y = 2;

class MyChild(MyParent):
    def some_method(self):
        return self._x    # Ok, since accessed from a child class

class AnotherClass:
    def some_method(self, instance_of_my_child):
        return instance_of_my_child._x   # Would be flagged as AnotherClass is not
                                         # a child class of MyParent
Loading history...
66
        self._write_siblings(data, inchikey_or_cid)
67
        logger.debug(f"Wrote PubChem data to {path.absolute()}")
68
        logger.info(f"Got PubChem data for {inchikey_or_cid}")
69
        return data
70
71
    def _write_siblings(self, data: PubchemData, *others: str):
72
        cid = data.parent_or_self
73
        path = self.data_path(cid)
74
        aliases = {self.data_path(data.inchikey), *data.siblings, *others}
75
        for sibling in aliases:
76
            link = self.data_path(sibling)
77
            if link != path and link.resolve() != path.resolve():
78
                link.unlink(missing_ok=True)
79
                path.link_to(link)
80
        logger.debug(f"Added aliases {','.join([str(s) for s in aliases])} ⇌ {cid} ({path})")
81
82
    def data_path(self, inchikey_or_cid: Union[int, str]) -> Path:
0 ignored issues
show
introduced by
Missing function or method docstring
Loading history...
83
        return self._cache_dir / "data" / f"{inchikey_or_cid}.json.gz"
84
85
    def _read_json(self, path: Path) -> Optional[PubchemData]:
0 ignored issues
show
Coding Style introduced by
This method could be written as a function/class method.

If a method does not access any attributes of the class, it could also be implemented as a function or static method. This can help improve readability. For example

class Foo:
    def some_method(self, x, y):
        return x + y;

could be written as

class Foo:
    @classmethod
    def some_method(cls, x, y):
        return x + y;
Loading history...
86
        dot = NestedDotDict.read_json(path)
87
        return PubchemData(dot) if len(dot) > 0 else None
88
89
90
__all__ = ["CachingPubchemApi"]
91