| 1 |  |  | import abc | 
                            
                    |  |  |  | 
                                                                                        
                                                                                     | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2 |  |  | import logging | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3 |  |  | import re | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4 |  |  | from dataclasses import dataclass | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5 |  |  | from typing import Sequence, TypeVar, Set, Union | 
            
                                                                                                            
                            
            
                                    
            
            
                | 6 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 7 |  |  | from pocketutils.core.dot_dict import NestedDotDict | 
                            
                    |  |  |  | 
                                                                                        
                                                                                     | 
            
                                                                                                            
                            
            
                                    
            
            
                | 8 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 9 |  |  | from mandos.model.chembl_api import ChemblApi | 
            
                                                                                                            
                            
            
                                    
            
            
                | 10 |  |  | from mandos.model.chembl_support import ChemblCompound | 
            
                                                                                                            
                            
            
                                    
            
            
                | 11 |  |  | from mandos.model.chembl_support.chembl_targets import TargetFactory | 
            
                                                                                                            
                            
            
                                    
            
            
                | 12 |  |  | from mandos.model.chembl_support.chembl_target_graphs import ( | 
            
                                                                                                            
                            
            
                                    
            
            
                | 13 |  |  |     ChemblTargetGraph, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 14 |  |  |     ChemblTargetGraphFactory, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 15 |  |  | ) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 16 |  |  | from mandos.model.chembl_support.chembl_utils import ChemblUtils | 
            
                                                                                                            
                            
            
                                    
            
            
                | 17 |  |  | from mandos.model.taxonomy import Taxonomy | 
            
                                                                                                            
                            
            
                                    
            
            
                | 18 |  |  | from mandos.search.chembl import ChemblHit, ChemblSearch | 
            
                                                                                                            
                            
            
                                    
            
            
                | 19 |  |  | from mandos.search.chembl.target_traversal import TargetTraversalStrategies, TargetTraversalStrategy | 
                            
                    |  |  |  | 
                                                                                        
                                                                                     | 
            
                                                                                                            
                            
            
                                    
            
            
                | 20 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 21 |  |  | logger = logging.getLogger("mandos") | 
            
                                                                                                            
                            
            
                                    
            
            
                | 22 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 23 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 24 |  |  | @dataclass(frozen=True, order=True, repr=True) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 25 |  |  | class ProteinHit(ChemblHit, metaclass=abc.ABCMeta): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 26 |  |  |     """ | 
            
                                                                                                            
                            
            
                                    
            
            
                | 27 |  |  |     A protein target entry for a compound. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 28 |  |  |     """ | 
            
                                                                                                            
                            
            
                                    
            
            
                | 29 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 30 |  |  |     exact_target_id: str | 
            
                                                                                                            
                            
            
                                    
            
            
                | 31 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 32 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 33 |  |  | H = TypeVar("H", bound=ProteinHit, covariant=True) | 
                            
                    |  |  |  | 
                                                                                        
                                                                                     | 
            
                                                                                                            
                            
            
                                    
            
            
                | 34 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 35 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 36 |  |  | class ProteinSearch(ChemblSearch[H], metaclass=abc.ABCMeta): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 37 |  |  |     """ | 
            
                                                                                                            
                            
            
                                    
            
            
                | 38 |  |  |     Abstract search. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 39 |  |  |     """ | 
            
                                                                                                            
                            
            
                                    
            
            
                | 40 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 41 |  |  |     def __init__( | 
                            
                    |  |  |  | 
                                                                                        
                                                                                     | 
            
                                                                                                            
                            
            
                                    
            
            
                | 42 |  |  |         self, | 
                            
                    |  |  |  | 
                                                                                        
                                                                                     | 
            
                                                                                                            
                            
            
                                    
            
            
                | 43 |  |  |         key: str, | 
                            
                    |  |  |  | 
                                                                                        
                                                                                     | 
            
                                                                                                            
                            
            
                                    
            
            
                | 44 |  |  |         api: ChemblApi, | 
                            
                    |  |  |  | 
                                                                                        
                                                                                     | 
            
                                                                                                            
                            
            
                                    
            
            
                | 45 |  |  |         taxa: Sequence[Taxonomy], | 
                            
                    |  |  |  | 
                                                                                        
                                                                                     | 
            
                                                                                                            
                            
            
                                    
            
            
                | 46 |  |  |         traversal_strategy: str, | 
                            
                    |  |  |  | 
                                                                                        
                                                                                     | 
            
                                                                                                            
                            
            
                                    
            
            
                | 47 |  |  |         allowed_target_types: Set[str], | 
                            
                    |  |  |  | 
                                                                                        
                                                                                     | 
            
                                                                                                            
                            
            
                                    
            
            
                | 48 |  |  |     ): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 49 |  |  |         super().__init__(key, api) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 50 |  |  |         self.taxa = taxa | 
            
                                                                                                            
                            
            
                                    
            
            
                | 51 |  |  |         self.traversal_strategy = TargetTraversalStrategies.by_name(traversal_strategy, self.api) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 52 |  |  |         self.allowed_target_types = allowed_target_types | 
            
                                                                                                            
                            
            
                                    
            
            
                | 53 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 54 |  |  |     def is_in_taxa(self, species: Union[int, str]) -> bool: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 55 |  |  |         """ | 
            
                                                                                                            
                            
            
                                    
            
            
                | 56 |  |  |         Returns true if the ChEMBL species is contained in any of our taxonomies. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 57 |  |  |         """ | 
            
                                                                                                            
                            
            
                                    
            
            
                | 58 |  |  |         return any((taxon.contains(species) for taxon in self.taxa)) | 
                            
                    |  |  |  | 
                                                                                        
                                                                                     | 
            
                                                                                                            
                                                                
            
                                    
            
            
                | 59 |  |  |  | 
            
                                                                        
                            
            
                                    
            
            
                | 60 |  |  |     def _set_to_regex(self, values) -> str: | 
                            
                    |  |  |  | 
                                                                                        
                                                                                     | 
            
                                                                        
                            
            
                                    
            
            
                | 61 |  |  |         return "(" + "|".join([f"(?:{re.escape(v)})" for v in values]) + ")" | 
            
                                                                                                            
                            
            
                                    
            
            
                | 62 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 63 |  |  |     def query(self, parent_form: ChemblCompound) -> Sequence[NestedDotDict]: | 
                            
                    |  |  |  | 
                                                                                        
                                                                                     | 
            
                                                                                                            
                            
            
                                    
            
            
                | 64 |  |  |         raise NotImplementedError() | 
            
                                                                                                            
                            
            
                                    
            
            
                | 65 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 66 |  |  |     def should_include( | 
            
                                                                                                            
                            
            
                                    
            
            
                | 67 |  |  |         self, lookup: str, compound: ChemblCompound, data: NestedDotDict, target: ChemblTargetGraph | 
                            
                    |  |  |  | 
                                                                                        
                                                                                     | 
            
                                                                                                            
                            
            
                                    
            
            
                | 68 |  |  |     ) -> bool: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 69 |  |  |         """ | 
            
                                                                                                            
                            
            
                                    
            
            
                | 70 |  |  |         Filter based on the returned (activity/mechanism) data. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 71 |  |  |         IGNORE filters about the target itself, including whether it's a valid target. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 72 |  |  |         Return True in these cases (better yet, don't check). | 
            
                                                                                                            
                            
            
                                    
            
            
                | 73 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 74 |  |  |         Args: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 75 |  |  |             lookup: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 76 |  |  |             compound: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 77 |  |  |             data: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 78 |  |  |             target: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 79 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 80 |  |  |         Returns: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 81 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 82 |  |  |         """ | 
            
                                                                                                            
                            
            
                                    
            
            
                | 83 |  |  |         raise NotImplementedError() | 
            
                                                                                                            
                            
            
                                    
            
            
                | 84 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 85 |  |  |     def to_hit( | 
            
                                                                                                            
                            
            
                                    
            
            
                | 86 |  |  |         self, | 
                            
                    |  |  |  | 
                                                                                        
                                                                                     | 
            
                                                                                                            
                            
            
                                    
            
            
                | 87 |  |  |         lookup: str, | 
                            
                    |  |  |  | 
                                                                                        
                                                                                     | 
            
                                                                                                            
                            
            
                                    
            
            
                | 88 |  |  |         compound: ChemblCompound, | 
                            
                    |  |  |  | 
                                                                                        
                                                                                     | 
            
                                                                                                            
                            
            
                                    
            
            
                | 89 |  |  |         data: NestedDotDict, | 
                            
                    |  |  |  | 
                                                                                        
                                                                                     | 
            
                                                                                                            
                            
            
                                    
            
            
                | 90 |  |  |         best_target: ChemblTargetGraph, | 
                            
                    |  |  |  | 
                                                                                        
                                                                                     | 
            
                                                                                                            
                            
            
                                    
            
            
                | 91 |  |  |     ) -> Sequence[H]: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 92 |  |  |         """ | 
            
                                                                                                            
                            
            
                                    
            
            
                | 93 |  |  |         Gets the desired data as a NestedDotDict from the data from a single element | 
            
                                                                                                            
                            
            
                                    
            
            
                | 94 |  |  |         returned by ``api_endpoint.filter``. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 95 |  |  |         This MUST MATCH the constructor, EXCEPT for object_id and object_name, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 96 |  |  |         which come from traversal and should be added by ``ProteinSearch.to_hit`` (parameter ``best_target``). | 
                            
                    |  |  |  | 
                                                                                        
                                                                                     | 
            
                                                                                                            
                            
            
                                    
            
            
                | 97 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 98 |  |  |         Turns the final data into ``H``. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 99 |  |  |         Note that this has a default behavior but could be overridden to split into multiple hits | 
            
                                                                                                            
                            
            
                                    
            
            
                | 100 |  |  |         and/or to add additional attributes that might come from ``best_target``. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 101 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 102 |  |  |         Args: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 103 |  |  |             lookup: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 104 |  |  |             compound: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 105 |  |  |             data: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 106 |  |  |             best_target: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 107 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 108 |  |  |         Returns: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 109 |  |  |             A sequence of hits. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 110 |  |  |         """ | 
            
                                                                                                            
                            
            
                                    
            
            
                | 111 |  |  |         raise NotImplementedError() | 
            
                                                                                                            
                            
            
                                    
            
            
                | 112 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 113 |  |  |     def find(self, lookup: str) -> Sequence[H]: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 114 |  |  |         """ | 
            
                                                                                                            
                            
            
                                    
            
            
                | 115 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 116 |  |  |         Args: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 117 |  |  |             lookup: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 118 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 119 |  |  |         Returns: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 120 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 121 |  |  |         """ | 
            
                                                                                                            
                            
            
                                    
            
            
                | 122 |  |  |         form = ChemblUtils(self.api).get_compound(lookup) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 123 |  |  |         results = self.query(form) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 124 |  |  |         hits = [] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 125 |  |  |         for result in results: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 126 |  |  |             result = NestedDotDict(result) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 127 |  |  |             hits.extend(self.process(lookup, form, result)) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 128 |  |  |         return hits | 
            
                                                                                                            
                            
            
                                    
            
            
                | 129 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 130 |  |  |     def process(self, lookup: str, compound: ChemblCompound, data: NestedDotDict) -> Sequence[H]: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 131 |  |  |         """ | 
            
                                                                                                            
                            
            
                                    
            
            
                | 132 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 133 |  |  |         Args: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 134 |  |  |             lookup: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 135 |  |  |             compound: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 136 |  |  |             data: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 137 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 138 |  |  |         Returns: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 139 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 140 |  |  |         """ | 
            
                                                                                                            
                            
            
                                    
            
            
                | 141 |  |  |         if data.get("target_chembl_id") is None: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 142 |  |  |             logger.debug(f"target_chembl_id missing from mechanism '{data}' for compound {lookup}") | 
                            
                    |  |  |  | 
                                                                                        
                                                                                     | 
            
                                                                                                            
                            
            
                                    
            
            
                | 143 |  |  |             return [] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 144 |  |  |         chembl_id = data["target_chembl_id"] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 145 |  |  |         factory = TargetFactory(self.api) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 146 |  |  |         target_obj = factory.find(chembl_id) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 147 |  |  |         graph_factory = ChemblTargetGraphFactory.create(self.api, factory) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 148 |  |  |         graph = graph_factory.at_target(target_obj) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 149 |  |  |         if not self.should_include(lookup, compound, data, graph): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 150 |  |  |             return [] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 151 |  |  |         # traverse() will return the source target if it's a non-traversable type (like DNA) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 152 |  |  |         # and the subclass decided whether to filter those | 
            
                                                                                                            
                            
            
                                    
            
            
                | 153 |  |  |         # so don't worry about that here | 
            
                                                                                                            
                            
            
                                    
            
            
                | 154 |  |  |         ancestors = self.traversal_strategy(graph) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 155 |  |  |         lst = [] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 156 |  |  |         for ancestor in ancestors: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 157 |  |  |             lst.extend(self.to_hit(lookup, compound, data, ancestor)) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 158 |  |  |         return lst | 
            
                                                                                                            
                            
            
                                    
            
            
                | 159 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 160 |  |  |  | 
            
                                                                                                            
                                                                
            
                                    
            
            
                | 161 |  |  | __all__ = ["ProteinHit", "ProteinSearch"] | 
            
                                                        
            
                                    
            
            
                | 162 |  |  |  |