| 1 |  |  | """ | 
            
                                                        
            
                                    
            
            
                | 2 |  |  | API for PubChem similarity search. | 
            
                                                        
            
                                    
            
            
                | 3 |  |  | """ | 
            
                                                        
            
                                    
            
            
                | 4 |  |  | from __future__ import annotations | 
            
                                                        
            
                                    
            
            
                | 5 |  |  |  | 
            
                                                        
            
                                    
            
            
                | 6 |  |  | import time | 
            
                                                        
            
                                    
            
            
                | 7 |  |  | from pathlib import Path | 
            
                                                        
            
                                    
            
            
                | 8 |  |  | from typing import FrozenSet | 
            
                                                        
            
                                    
            
            
                | 9 |  |  |  | 
            
                                                        
            
                                    
            
            
                | 10 |  |  | import orjson | 
                            
                    |  |  |  | 
                                                                                        
                                                                                     | 
            
                                                        
            
                                    
            
            
                | 11 |  |  | import pandas as pd | 
                            
                    |  |  |  | 
                                                                                        
                                                                                     | 
            
                                                        
            
                                    
            
            
                | 12 |  |  | from pocketutils.core.dot_dict import NestedDotDict | 
                            
                    |  |  |  | 
                                                                                        
                                                                                     | 
            
                                                        
            
                                    
            
            
                | 13 |  |  | from pocketutils.core.exceptions import DownloadTimeoutError, XValueError | 
                            
                    |  |  |  | 
                                                                                        
                                                                                     | 
            
                                                        
            
                                    
            
            
                | 14 |  |  | from pocketutils.core.query_utils import QueryExecutor | 
                            
                    |  |  |  | 
                                                                                        
                                                                                     | 
            
                                                        
            
                                    
            
            
                | 15 |  |  | from typeddfs import TypedDfs | 
                            
                    |  |  |  | 
                                                                                        
                                                                                     | 
            
                                                        
            
                                    
            
            
                | 16 |  |  |  | 
            
                                                        
            
                                    
            
            
                | 17 |  |  | from mandos.model.apis.similarity_api import SimilarityApi | 
            
                                                        
            
                                    
            
            
                | 18 |  |  | from mandos.model.settings import QUERY_EXECUTORS, SETTINGS | 
            
                                                        
            
                                    
            
            
                | 19 |  |  | from mandos.model.utils.setup import logger | 
            
                                                        
            
                                    
            
            
                | 20 |  |  |  | 
            
                                                        
            
                                    
            
            
                | 21 |  |  | SimilarityDf = (TypedDfs.typed("SimilarityDf").require("cid", dtype=int).secure()).build() | 
            
                                                        
            
                                    
            
            
                | 22 |  |  |  | 
            
                                                        
            
                                    
            
            
                | 23 |  |  |  | 
            
                                                        
            
                                    
            
            
                | 24 |  |  | class QueryingPubchemSimilarityApi(SimilarityApi): | 
                            
                    |  |  |  | 
                                                                                        
                                                                                     | 
            
                                                        
            
                                    
            
            
                | 25 |  |  |     def __init__(self, executor: QueryExecutor = QUERY_EXECUTORS.pubchem): | 
            
                                                        
            
                                    
            
            
                | 26 |  |  |         self._executor = executor | 
            
                                                        
            
                                    
            
            
                | 27 |  |  |  | 
            
                                                        
            
                                    
            
            
                | 28 |  |  |     _pug = "https://pubchem.ncbi.nlm.nih.gov/rest/pug" | 
            
                                                        
            
                                    
            
            
                | 29 |  |  |  | 
            
                                                        
            
                                    
            
            
                | 30 |  |  |     def search(self, inchi: str, min_tc: float) -> FrozenSet[int]: | 
            
                                                        
            
                                    
            
            
                | 31 |  |  |         req = self._executor( | 
            
                                                        
            
                                    
            
            
                | 32 |  |  |             f"{self._pug}/compound/similarity/inchikey/{inchi}/JSON?Threshold={min_tc}", | 
            
                                                        
            
                                    
            
            
                | 33 |  |  |             method="post", | 
            
                                                        
            
                                    
            
            
                | 34 |  |  |         ) | 
            
                                                        
            
                                    
            
            
                | 35 |  |  |         key = orjson.loads(req)["Waiting"]["ListKey"] | 
            
                                                        
            
                                    
            
            
                | 36 |  |  |         t0 = time.monotonic() | 
                            
                    |  |  |  | 
                                                                                        
                                                                                     | 
            
                                                        
            
                                    
            
            
                | 37 |  |  |         while time.monotonic() - t0 < 5: | 
            
                                                        
            
                                    
            
            
                | 38 |  |  |             # it'll wait as needed here | 
            
                                                        
            
                                    
            
            
                | 39 |  |  |             resp = self._executor(f"{self._pug}/compound/listkey/{key}/cids/JSON") | 
            
                                                        
            
                                    
            
            
                | 40 |  |  |             resp = NestedDotDict(orjson.loads(resp)) | 
            
                                                        
            
                                    
            
            
                | 41 |  |  |             if resp.get("IdentifierList.CID") is not None: | 
            
                                                        
            
                                    
            
            
                | 42 |  |  |                 return frozenset(resp.req_list_as("IdentifierList.CID", int)) | 
            
                                                        
            
                                    
            
            
                | 43 |  |  |         raise DownloadTimeoutError(f"Search for {inchi} using key {key} timed out") | 
            
                                                        
            
                                    
            
            
                | 44 |  |  |  | 
            
                                                        
            
                                    
            
            
                | 45 |  |  |  | 
            
                                                        
            
                                    
            
            
                | 46 |  |  | class CachingPubchemSimilarityApi(SimilarityApi): | 
                            
                    |  |  |  | 
                                                                                        
                                                                                     | 
            
                                                        
            
                                    
            
            
                | 47 |  |  |     def __init__(self, query: QueryingPubchemSimilarityApi): | 
            
                                                        
            
                                    
            
            
                | 48 |  |  |         self._query = query | 
            
                                                        
            
                                    
            
            
                | 49 |  |  |  | 
            
                                                        
            
                                    
            
            
                | 50 |  |  |     def path(self, inchi: str, min_tc: float) -> Path: | 
                            
                    |  |  |  | 
                                                                                        
                                                                                     | 
            
                                                        
            
                                    
            
            
                | 51 |  |  |         if not (min_tc * 100).is_integer(): | 
            
                                                        
            
                                    
            
            
                | 52 |  |  |             raise XValueError(f"min_tc {min_tc} is not an increment of 1%") | 
            
                                                        
            
                                    
            
            
                | 53 |  |  |         percent = int(min_tc * 100) | 
            
                                                        
            
                                    
            
            
                | 54 |  |  |         path = self._cache_dir / "similarity" / f"{inchi}_{percent}" | 
                            
                    |  |  |  | 
                                                                                        
                                                                                     | 
            
                                                        
            
                                    
            
            
                | 55 |  |  |         return path.with_suffix(SETTINGS.archive_filename_suffix) | 
            
                                                        
            
                                    
            
            
                | 56 |  |  |  | 
            
                                                        
            
                                    
            
            
                | 57 |  |  |     def search(self, inchi: str, min_tc: float) -> FrozenSet[int]: | 
            
                                                        
            
                                    
            
            
                | 58 |  |  |         logger.info(f"Searching for {inchi} with min TC {min_tc}") | 
            
                                                        
            
                                    
            
            
                | 59 |  |  |         path = self.path(inchi, min_tc) | 
            
                                                        
            
                                    
            
            
                | 60 |  |  |         if path.exists(): | 
            
                                                        
            
                                    
            
            
                | 61 |  |  |             df = SimilarityDf.read_file(path) | 
                            
                    |  |  |  | 
                                                                                        
                                                                                     | 
            
                                                        
            
                                    
            
            
                | 62 |  |  |             return frozenset(set(df["cid"].values)) | 
            
                                                        
            
                                    
            
            
                | 63 |  |  |         found = self._query.search(inchi, min_tc) | 
            
                                                        
            
                                    
            
            
                | 64 |  |  |         df: SimilarityDf = SimilarityDf.of([pd.Series(dict(cid=cid)) for cid in found]) | 
                            
                    |  |  |  | 
                                                                                        
                                                                                     | 
            
                                                        
            
                                    
            
            
                | 65 |  |  |         df.write_file(path, mkdirs=True, dir_hash=True) | 
            
                                                        
            
                                    
            
            
                | 66 |  |  |         logger.info(f"Wrote {len(df)} values for {inchi} with min TC {min_tc}") | 
            
                                                        
            
                                    
            
            
                | 67 |  |  |         return frozenset(set(df["cid"].values)) | 
            
                                                        
            
                                    
            
            
                | 68 |  |  |  | 
            
                                                        
            
                                    
            
            
                | 69 |  |  |  | 
            
                                                        
            
                                    
            
            
                | 70 |  |  | __all__ = ["QueryingPubchemSimilarityApi", "CachingPubchemSimilarityApi"] | 
            
                                                        
            
                                    
            
            
                | 71 |  |  |  |