| 1 |  |  | from __future__ import annotations | 
                            
                    |  |  |  | 
                                                                                        
                                                                                     | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2 |  |  | import abc | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3 |  |  | import dataclasses | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4 |  |  | import typing | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5 |  |  | from typing import Generic, Sequence, TypeVar | 
            
                                                                                                            
                            
            
                                    
            
            
                | 6 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 7 |  |  | import pandas as pd | 
                            
                    |  |  |  | 
                                                                                        
                                                                                     | 
            
                                                                                                            
                            
            
                                    
            
            
                | 8 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 9 |  |  | from mandos import logger | 
            
                                                                                                            
                            
            
                                    
            
            
                | 10 |  |  | from mandos.model.hits import AbstractHit, HitFrame | 
            
                                                                                                            
                            
            
                                    
            
            
                | 11 |  |  | from mandos.model import CompoundNotFoundError, ReflectionUtils | 
            
                                                                                                            
                            
            
                                    
            
            
                | 12 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 13 |  |  | H = TypeVar("H", bound=AbstractHit, covariant=True) | 
                            
                    |  |  |  | 
                                                                                        
                                                                                     | 
            
                                                                                                            
                            
            
                                    
            
            
                | 14 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 15 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 16 |  |  | class SearchError(Exception): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 17 |  |  |     """ | 
            
                                                                                                            
                            
            
                                    
            
            
                | 18 |  |  |     Wrapper for any exception raised in ``find`` except for ``CompoundNotFoundError``. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 19 |  |  |     """ | 
            
                                                                                                            
                                                                
            
                                    
            
            
                | 20 |  |  |  | 
            
                                                                        
                            
            
                                    
            
            
                | 21 |  |  |     def __init__( | 
            
                                                                        
                            
            
                                    
            
            
                | 22 |  |  |         self, | 
                            
                    |  |  |  | 
                                                                                        
                                                                                     | 
            
                                                                        
                            
            
                                    
            
            
                | 23 |  |  |         *args, | 
                            
                    |  |  |  | 
                                                                                        
                                                                                     | 
            
                                                                        
                            
            
                                    
            
            
                | 24 |  |  |         inchikey: str = None, | 
                            
                    |  |  |  | 
                                                                                        
                                                                                     | 
            
                                                                        
                            
            
                                    
            
            
                | 25 |  |  |         search_key: str = None, | 
                            
                    |  |  |  | 
                                                                                        
                                                                                     | 
            
                                                                        
                            
            
                                    
            
            
                | 26 |  |  |         search_class: str = None, | 
                            
                    |  |  |  | 
                                                                                        
                                                                                     | 
            
                                                                        
                            
            
                                    
            
            
                | 27 |  |  |         **kwargs, | 
                            
                    |  |  |  | 
                                                                                        
                                                                                     | 
            
                                                                        
                            
            
                                    
            
            
                | 28 |  |  |     ): | 
            
                                                                        
                            
            
                                    
            
            
                | 29 |  |  |         super().__init__(*args, *kwargs) | 
            
                                                                        
                            
            
                                    
            
            
                | 30 |  |  |         self.inchikey = inchikey | 
            
                                                                        
                            
            
                                    
            
            
                | 31 |  |  |         self.search_key = search_key | 
            
                                                                        
                            
            
                                    
            
            
                | 32 |  |  |         self.search_class = search_class | 
            
                                                                                                            
                            
            
                                    
            
            
                | 33 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 34 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 35 |  |  | class Search(Generic[H], metaclass=abc.ABCMeta): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 36 |  |  |     """ | 
            
                                                                                                            
                            
            
                                    
            
            
                | 37 |  |  |     Something to search and how to do it. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 38 |  |  |     """ | 
            
                                                                                                            
                            
            
                                    
            
            
                | 39 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 40 |  |  |     def __init__(self, key: str): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 41 |  |  |         self.key = key | 
            
                                                                                                            
                            
            
                                    
            
            
                | 42 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 43 |  |  |     @property | 
            
                                                                                                            
                            
            
                                    
            
            
                | 44 |  |  |     def search_class(self) -> str: | 
                            
                    |  |  |  | 
                                                                                        
                                                                                     | 
            
                                                                                                            
                            
            
                                    
            
            
                | 45 |  |  |         return self.__class__.__name__ | 
            
                                                                                                            
                            
            
                                    
            
            
                | 46 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 47 |  |  |     @property | 
            
                                                                                                            
                            
            
                                    
            
            
                | 48 |  |  |     def search_name(self) -> str: | 
                            
                    |  |  |  | 
                                                                                        
                                                                                     | 
            
                                                                                                            
                            
            
                                    
            
            
                | 49 |  |  |         return self.__class__.__name__.lower().replace("search", "") | 
            
                                                                                                            
                            
            
                                    
            
            
                | 50 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 51 |  |  |     @property | 
            
                                                                                                            
                            
            
                                    
            
            
                | 52 |  |  |     def data_source(self) -> str: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 53 |  |  |         """ | 
            
                                                                                                            
                            
            
                                    
            
            
                | 54 |  |  |         Where the data originally came from; e.g. ``the Human Metabolome Database (HMDB)``" | 
            
                                                                                                            
                            
            
                                    
            
            
                | 55 |  |  |         """ | 
            
                                                                                                            
                            
            
                                    
            
            
                | 56 |  |  |         raise NotImplementedError() | 
            
                                                                                                            
                            
            
                                    
            
            
                | 57 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 58 |  |  |     def get_params(self) -> typing.Mapping[str, typing.Any]: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 59 |  |  |         """ | 
            
                                                                                                            
                            
            
                                    
            
            
                | 60 |  |  |         Returns the *parameters* of this ``Search`` their values. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 61 |  |  |         Parameters are attributes that do not begin with an underscore. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 62 |  |  |         """ | 
            
                                                                                                            
                            
            
                                    
            
            
                | 63 |  |  |         return {key: value for key, value in vars(self).items() if not key.startswith("_")} | 
            
                                                                                                            
                            
            
                                    
            
            
                | 64 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 65 |  |  |     def find_to_df(self, inchikeys: Sequence[str]) -> HitFrame: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 66 |  |  |         """ | 
            
                                                                                                            
                            
            
                                    
            
            
                | 67 |  |  |         Calls :py:meth:`find_all` and returns a :py:class:`HitFrame` DataFrame subclass. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 68 |  |  |         Writes a logging ERROR for each compound that was not found. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 69 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 70 |  |  |         Args: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 71 |  |  |             inchikeys: A list of InChI key strings | 
            
                                                                                                            
                            
            
                                    
            
            
                | 72 |  |  |         """ | 
            
                                                                                                            
                            
            
                                    
            
            
                | 73 |  |  |         hits = self.find_all(inchikeys) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 74 |  |  |         return HitFrame( | 
            
                                                                                                            
                            
            
                                    
            
            
                | 75 |  |  |             [ | 
            
                                                                                                            
                            
            
                                    
            
            
                | 76 |  |  |                 pd.Series( | 
            
                                                                                                            
                            
            
                                    
            
            
                | 77 |  |  |                     { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 78 |  |  |                         **{f: getattr(h, f) for f in self.hit_fields()}, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 79 |  |  |                         **dict(universal_id=h.universal_id), | 
            
                                                                                                            
                            
            
                                    
            
            
                | 80 |  |  |                     } | 
            
                                                                                                            
                            
            
                                    
            
            
                | 81 |  |  |                 ) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 82 |  |  |                 for h in hits | 
            
                                                                                                            
                            
            
                                    
            
            
                | 83 |  |  |             ] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 84 |  |  |         ) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 85 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 86 |  |  |     def find_all(self, inchikeys: Sequence[str]) -> Sequence[H]: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 87 |  |  |         """ | 
            
                                                                                                            
                            
            
                                    
            
            
                | 88 |  |  |         Loops over every compound and calls ``find``. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 89 |  |  |         Comes with better logging. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 90 |  |  |         Writes a logging ERROR for each compound that was not found. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 91 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 92 |  |  |         Args: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 93 |  |  |             inchikeys: A list of InChI key strings | 
            
                                                                                                            
                            
            
                                    
            
            
                | 94 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 95 |  |  |         Returns: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 96 |  |  |             The list of :py:class:`mandos.model.hits.AbstractHit` | 
            
                                                                                                            
                            
            
                                    
            
            
                | 97 |  |  |         """ | 
            
                                                                                                            
                            
            
                                    
            
            
                | 98 |  |  |         lst = [] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 99 |  |  |         # set just in case we never iterate | 
            
                                                                                                            
                            
            
                                    
            
            
                | 100 |  |  |         i = 0 | 
            
                                                                                                            
                            
            
                                    
            
            
                | 101 |  |  |         for i, compound in enumerate(inchikeys): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 102 |  |  |             try: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 103 |  |  |                 x = self.find(compound) | 
                            
                    |  |  |  | 
                                                                                        
                                                                                     | 
            
                                                                                                            
                            
            
                                    
            
            
                | 104 |  |  |             except CompoundNotFoundError: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 105 |  |  |                 logger.info(f"NOT FOUND: {compound}. Skipping.") | 
            
                                                                                                            
                            
            
                                    
            
            
                | 106 |  |  |                 continue | 
            
                                                                                                            
                            
            
                                    
            
            
                | 107 |  |  |             except Exception: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 108 |  |  |                 raise SearchError( | 
            
                                                                                                            
                            
            
                                    
            
            
                | 109 |  |  |                     f"Failed {self.key} [{self.search_class}] on compound {compound}", | 
            
                                                                                                            
                            
            
                                    
            
            
                | 110 |  |  |                     compound=compound, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 111 |  |  |                     search_key=self.key, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 112 |  |  |                     search_class=self.search_class, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 113 |  |  |                 ) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 114 |  |  |             lst.extend(x) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 115 |  |  |             logger.debug(f"Found {len(x)} {self.search_name} annotations for {compound}") | 
            
                                                                                                            
                            
            
                                    
            
            
                | 116 |  |  |             if i % 10 == 9: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 117 |  |  |                 logger.notice( | 
            
                                                                                                            
                            
            
                                    
            
            
                | 118 |  |  |                     f"Found {len(lst)} {self.search_name} annotations for {i+1} of {len(inchikeys)} compounds" | 
                            
                    |  |  |  | 
                                                                                        
                                                                                     | 
            
                                                                                                            
                            
            
                                    
            
            
                | 119 |  |  |                 ) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 120 |  |  |         logger.notice( | 
            
                                                                                                            
                            
            
                                    
            
            
                | 121 |  |  |             f"Found {len(lst)} {self.search_name} annotations for {i+1} of {len(inchikeys)} compounds" | 
                            
                    |  |  |  | 
                                                                                        
                                                                                     | 
            
                                                                                                            
                            
            
                                    
            
            
                | 122 |  |  |         ) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 123 |  |  |         return lst | 
            
                                                                                                            
                            
            
                                    
            
            
                | 124 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 125 |  |  |     def find(self, inchikey: str) -> Sequence[H]: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 126 |  |  |         """ | 
            
                                                                                                            
                            
            
                                    
            
            
                | 127 |  |  |         To override. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 128 |  |  |         Finds the annotations for a single compound. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 129 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 130 |  |  |         Args: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 131 |  |  |             inchikey: An InChI Key | 
            
                                                                                                            
                            
            
                                    
            
            
                | 132 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 133 |  |  |         Returns: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 134 |  |  |             A list of annotations | 
            
                                                                                                            
                            
            
                                    
            
            
                | 135 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 136 |  |  |         Raises: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 137 |  |  |             CompoundNotFoundError | 
            
                                                                                                            
                            
            
                                    
            
            
                | 138 |  |  |         """ | 
            
                                                                                                            
                            
            
                                    
            
            
                | 139 |  |  |         raise NotImplementedError() | 
            
                                                                                                            
                            
            
                                    
            
            
                | 140 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 141 |  |  |     @classmethod | 
            
                                                                                                            
                            
            
                                    
            
            
                | 142 |  |  |     def hit_fields(cls) -> Sequence[str]: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 143 |  |  |         """ | 
            
                                                                                                            
                            
            
                                    
            
            
                | 144 |  |  |         Gets the fields in the Hit type parameter. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 145 |  |  |         """ | 
            
                                                                                                            
                            
            
                                    
            
            
                | 146 |  |  |         # Okay, there's a lot of magic going on here | 
            
                                                                                                            
                            
            
                                    
            
            
                | 147 |  |  |         # We need to access the _parameter_ H on cls -- raw `H` doesn't work | 
            
                                                                                                            
                            
            
                                    
            
            
                | 148 |  |  |         # get_args and __orig_bases__ do this for us | 
            
                                                                                                            
                            
            
                                    
            
            
                | 149 |  |  |         # then dataclasses.fields gives us the dataclass fields | 
            
                                                                                                            
                            
            
                                    
            
            
                | 150 |  |  |         # there's also actual_h.__annotations__, but that doesn't include ClassVar and InitVar | 
            
                                                                                                            
                            
            
                                    
            
            
                | 151 |  |  |         # (not that we're using those) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 152 |  |  |         # If this magic is too magical, we can make this an abstract method | 
            
                                                                                                            
                            
            
                                    
            
            
                | 153 |  |  |         # But that would be a lot of excess code and it might be less modular | 
            
                                                                                                            
                            
            
                                    
            
            
                | 154 |  |  |         x = cls.get_h() | 
                            
                    |  |  |  | 
                                                                                        
                                                                                     | 
            
                                                                                                            
                            
            
                                    
            
            
                | 155 |  |  |         return [f.name for f in dataclasses.fields(x) if f.name != "search_class"] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 156 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 157 |  |  |     @classmethod | 
            
                                                                                                            
                            
            
                                    
            
            
                | 158 |  |  |     def get_h(cls): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 159 |  |  |         """ | 
            
                                                                                                            
                            
            
                                    
            
            
                | 160 |  |  |         Returns the underlying hit TypeVar, ``H``. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 161 |  |  |         """ | 
            
                                                                                                            
                            
            
                                    
            
            
                | 162 |  |  |         return ReflectionUtils.get_generic_arg(cls, AbstractHit) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 163 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 164 |  |  |     def __repr__(self) -> str: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 165 |  |  |         return ", ".join([k + "=" + str(v) for k, v in self.get_params().items()]) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 166 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 167 |  |  |     def __str__(self) -> str: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 168 |  |  |         return repr(self) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 169 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 170 |  |  |     def __eq__(self, other: Search) -> bool: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 171 |  |  |         """ | 
            
                                                                                                            
                            
            
                                    
            
            
                | 172 |  |  |         Returns True iff all of the parameters match, thereby excluding attributes with underscores. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 173 |  |  |         Multiversal equality. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 174 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 175 |  |  |         Raises: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 176 |  |  |             TypeError: If ``other`` is not a :py:class:`Search` | 
            
                                                                                                            
                            
            
                                    
            
            
                | 177 |  |  |         """ | 
            
                                                                                                            
                            
            
                                    
            
            
                | 178 |  |  |         if not isinstance(other, Search): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 179 |  |  |             raise TypeError(f"{type(other)} not comparable") | 
            
                                                                                                            
                            
            
                                    
            
            
                | 180 |  |  |         return repr(self) == repr(other) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 181 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 182 |  |  |  | 
            
                                                                                                            
                                                                
            
                                    
            
            
                | 183 |  |  | __all__ = ["Search", "HitFrame"] | 
            
                                                        
            
                                    
            
            
                | 184 |  |  |  |