Passed
Push — dependabot/pip/selenium-4.0.0 ( d9e630...707494 )
by
unknown
01:47
created

QueryingPubchemSimilarityApi.search()   A

Complexity

Conditions 3

Size

Total Lines 14
Code Lines 12

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
cc 3
eloc 12
nop 3
dl 0
loc 14
rs 9.8
c 0
b 0
f 0
1
"""
2
API for PubChem similarity search.
3
"""
4
from __future__ import annotations
5
6
import time
7
from pathlib import Path
8
from typing import FrozenSet
9
10
import orjson
0 ignored issues
show
introduced by
Unable to import 'orjson'
Loading history...
11
import pandas as pd
0 ignored issues
show
introduced by
Unable to import 'pandas'
Loading history...
12
from pocketutils.core.dot_dict import NestedDotDict
0 ignored issues
show
introduced by
Unable to import 'pocketutils.core.dot_dict'
Loading history...
13
from pocketutils.core.exceptions import DownloadTimeoutError, XValueError
0 ignored issues
show
introduced by
Unable to import 'pocketutils.core.exceptions'
Loading history...
14
from pocketutils.core.query_utils import QueryExecutor
0 ignored issues
show
introduced by
Unable to import 'pocketutils.core.query_utils'
Loading history...
15
from typeddfs import TypedDfs
0 ignored issues
show
introduced by
Unable to import 'typeddfs'
Loading history...
16
17
from mandos.model.apis.similarity_api import SimilarityApi
18
from mandos.model.settings import QUERY_EXECUTORS, SETTINGS
19
from mandos.model.utils.setup import logger
20
21
SimilarityDf = (TypedDfs.typed("SimilarityDf").require("cid", dtype=int).secure()).build()
22
23
24
class QueryingPubchemSimilarityApi(SimilarityApi):
0 ignored issues
show
introduced by
Missing class docstring
Loading history...
25
    def __init__(self, executor: QueryExecutor = QUERY_EXECUTORS.pubchem):
26
        self._executor = executor
27
28
    _pug = "https://pubchem.ncbi.nlm.nih.gov/rest/pug"
29
30
    def search(self, inchi: str, min_tc: float) -> FrozenSet[int]:
31
        req = self._executor(
32
            f"{self._pug}/compound/similarity/inchikey/{inchi}/JSON?Threshold={min_tc}",
33
            method="post",
34
        )
35
        key = orjson.loads(req)["Waiting"]["ListKey"]
36
        t0 = time.monotonic()
0 ignored issues
show
Coding Style Naming introduced by
Variable name "t0" doesn't conform to snake_case naming style ('([^\\W\\dA-Z][^\\WA-Z]2,|_[^\\WA-Z]*|__[^\\WA-Z\\d_][^\\WA-Z]+__)$' pattern)

This check looks for invalid names for a range of different identifiers.

You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements.

If your project includes a Pylint configuration file, the settings contained in that file take precedence.

To find out more about Pylint, please refer to their site.

Loading history...
37
        while time.monotonic() - t0 < 5:
38
            # it'll wait as needed here
39
            resp = self._executor(f"{self._pug}/compound/listkey/{key}/cids/JSON")
40
            resp = NestedDotDict(orjson.loads(resp))
41
            if resp.get("IdentifierList.CID") is not None:
42
                return frozenset(resp.req_list_as("IdentifierList.CID", int))
43
        raise DownloadTimeoutError(f"Search for {inchi} using key {key} timed out")
44
45
46
class CachingPubchemSimilarityApi(SimilarityApi):
0 ignored issues
show
introduced by
Missing class docstring
Loading history...
47
    def __init__(self, query: QueryingPubchemSimilarityApi):
48
        self._query = query
49
50
    def path(self, inchi: str, min_tc: float) -> Path:
0 ignored issues
show
introduced by
Missing function or method docstring
Loading history...
51
        if not (min_tc * 100).is_integer():
52
            raise XValueError(f"min_tc {min_tc} is not an increment of 1%")
53
        percent = int(min_tc * 100)
54
        path = self._cache_dir / "similarity" / f"{inchi}_{percent}"
0 ignored issues
show
Bug introduced by
The Instance of CachingPubchemSimilarityApi does not seem to have a member named _cache_dir.

This check looks for calls to members that are non-existent. These calls will fail.

The member could have been renamed or removed.

Loading history...
55
        return path.with_suffix(SETTINGS.archive_filename_suffix)
56
57
    def search(self, inchi: str, min_tc: float) -> FrozenSet[int]:
58
        logger.info(f"Searching for {inchi} with min TC {min_tc}")
59
        path = self.path(inchi, min_tc)
60
        if path.exists():
61
            df = SimilarityDf.read_file(path)
0 ignored issues
show
Coding Style Naming introduced by
Variable name "df" doesn't conform to snake_case naming style ('([^\\W\\dA-Z][^\\WA-Z]2,|_[^\\WA-Z]*|__[^\\WA-Z\\d_][^\\WA-Z]+__)$' pattern)

This check looks for invalid names for a range of different identifiers.

You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements.

If your project includes a Pylint configuration file, the settings contained in that file take precedence.

To find out more about Pylint, please refer to their site.

Loading history...
62
            return frozenset(set(df["cid"].values))
63
        found = self._query.search(inchi, min_tc)
64
        df: SimilarityDf = SimilarityDf.of([pd.Series(dict(cid=cid)) for cid in found])
0 ignored issues
show
Coding Style Naming introduced by
Variable name "df" doesn't conform to snake_case naming style ('([^\\W\\dA-Z][^\\WA-Z]2,|_[^\\WA-Z]*|__[^\\WA-Z\\d_][^\\WA-Z]+__)$' pattern)

This check looks for invalid names for a range of different identifiers.

You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements.

If your project includes a Pylint configuration file, the settings contained in that file take precedence.

To find out more about Pylint, please refer to their site.

Loading history...
65
        df.write_file(path, mkdirs=True, dir_hash=True)
66
        logger.info(f"Wrote {len(df)} values for {inchi} with min TC {min_tc}")
67
        return frozenset(set(df["cid"].values))
68
69
70
__all__ = ["QueryingPubchemSimilarityApi", "CachingPubchemSimilarityApi"]
71