| 1 |  |  | import logging | 
                            
                    |  |  |  | 
                                                                                        
                                                                                     | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2 |  |  | import abc | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3 |  |  | from dataclasses import dataclass | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4 |  |  | from typing import Sequence, Set, Optional | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 6 |  |  | from pocketutils.core.dot_dict import NestedDotDict | 
                            
                    |  |  |  | 
                                                                                        
                                                                                     | 
            
                                                                                                            
                            
            
                                    
            
            
                | 7 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 8 |  |  | from mandos.model.chembl_api import ChemblApi | 
            
                                                                                                            
                            
            
                                    
            
            
                | 9 |  |  | from mandos.model.chembl_support import ChemblCompound, AssayType | 
            
                                                                                                            
                            
            
                                    
            
            
                | 10 |  |  | from mandos.model.chembl_support.chembl_target_graphs import ChemblTargetGraph | 
            
                                                                                                            
                            
            
                                    
            
            
                | 11 |  |  | from mandos.model.taxonomy import Taxonomy | 
            
                                                                                                            
                            
            
                                    
            
            
                | 12 |  |  | from mandos.search.chembl._protein_search import ProteinHit, ProteinSearch, H | 
            
                                                                                                            
                            
            
                                    
            
            
                | 13 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 14 |  |  | logger = logging.getLogger("mandos") | 
            
                                                                                                            
                            
            
                                    
            
            
                | 15 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 16 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 17 |  |  | @dataclass(frozen=True, order=True, repr=True) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 18 |  |  | class _ActivityHit(ProteinHit): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 19 |  |  |     taxon_id: int | 
            
                                                                                                            
                            
            
                                    
            
            
                | 20 |  |  |     taxon_name: str | 
            
                                                                                                            
                            
            
                                    
            
            
                | 21 |  |  |     src_id: str | 
            
                                                                                                            
                            
            
                                    
            
            
                | 22 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 23 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 24 |  |  | class _ActivitySearch(ProteinSearch[H], metaclass=abc.ABCMeta): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 25 |  |  |     """ | 
            
                                                                                                            
                            
            
                                    
            
            
                | 26 |  |  |     Search for ``activity``. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 27 |  |  |     """ | 
            
                                                                                                            
                                                                
            
                                    
            
            
                | 28 |  |  |  | 
            
                                                                        
                            
            
                                    
            
            
                | 29 |  |  |     def __init__( | 
                            
                    |  |  |  | 
                                                                                        
                                                                                     | 
            
                                                                        
                            
            
                                    
            
            
                | 30 |  |  |         self, | 
                            
                    |  |  |  | 
                                                                                        
                                                                                     | 
            
                                                                        
                            
            
                                    
            
            
                | 31 |  |  |         key: str, | 
                            
                    |  |  |  | 
                                                                                        
                                                                                     | 
            
                                                                        
                            
            
                                    
            
            
                | 32 |  |  |         api: ChemblApi, | 
                            
                    |  |  |  | 
                                                                                        
                                                                                     | 
            
                                                                        
                            
            
                                    
            
            
                | 33 |  |  |         taxa: Sequence[Taxonomy], | 
                            
                    |  |  |  | 
                                                                                        
                                                                                     | 
            
                                                                        
                            
            
                                    
            
            
                | 34 |  |  |         traversal_strategy: str, | 
                            
                    |  |  |  | 
                                                                                        
                                                                                     | 
            
                                                                        
                            
            
                                    
            
            
                | 35 |  |  |         allowed_target_types: Set[str], | 
                            
                    |  |  |  | 
                                                                                        
                                                                                     | 
            
                                                                        
                            
            
                                    
            
            
                | 36 |  |  |         min_confidence_score: Optional[int], | 
                            
                    |  |  |  | 
                                                                                        
                                                                                     | 
            
                                                                        
                            
            
                                    
            
            
                | 37 |  |  |         allowed_relations: Set[str], | 
                            
                    |  |  |  | 
                                                                                        
                                                                                     | 
            
                                                                        
                            
            
                                    
            
            
                | 38 |  |  |         min_pchembl: Optional[float], | 
                            
                    |  |  |  | 
                                                                                        
                                                                                     | 
            
                                                                        
                            
            
                                    
            
            
                | 39 |  |  |         banned_flags: Set[str], | 
                            
                    |  |  |  | 
                                                                                        
                                                                                     | 
            
                                                                        
                            
            
                                    
            
            
                | 40 |  |  |     ): | 
            
                                                                        
                            
            
                                    
            
            
                | 41 |  |  |         super().__init__(key, api, taxa, traversal_strategy, allowed_target_types) | 
            
                                                                        
                            
            
                                    
            
            
                | 42 |  |  |         self.min_confidence_score = min_confidence_score | 
            
                                                                        
                            
            
                                    
            
            
                | 43 |  |  |         self.allowed_relations = allowed_relations | 
            
                                                                        
                            
            
                                    
            
            
                | 44 |  |  |         self.min_pchembl = min_pchembl | 
            
                                                                        
                            
            
                                    
            
            
                | 45 |  |  |         self.banned_flags = banned_flags | 
            
                                                                                                            
                            
            
                                    
            
            
                | 46 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 47 |  |  |     @classmethod | 
            
                                                                                                            
                            
            
                                    
            
            
                | 48 |  |  |     def allowed_assay_types(cls) -> Set[str]: | 
                            
                    |  |  |  | 
                                                                                        
                                                                                     | 
            
                                                                                                            
                            
            
                                    
            
            
                | 49 |  |  |         raise NotImplementedError() | 
            
                                                                                                            
                            
            
                                    
            
            
                | 50 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 51 |  |  |     def query(self, parent_form: ChemblCompound) -> Sequence[NestedDotDict]: | 
                            
                    |  |  |  | 
                                                                                        
                                                                                     | 
            
                                                                                                            
                            
            
                                    
            
            
                | 52 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 53 |  |  |         filters = dict( | 
            
                                                                                                            
                            
            
                                    
            
            
                | 54 |  |  |             parent_molecule_chembl_id=parent_form.chid, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 55 |  |  |             assay_type__iregex=self._set_to_regex(self.allowed_assay_types()), | 
            
                                                                                                            
                            
            
                                    
            
            
                | 56 |  |  |             standard_relation__iregex=self._set_to_regex(self.allowed_relations), | 
            
                                                                                                            
                            
            
                                    
            
            
                | 57 |  |  |             pchembl_value__isnull=None if self.min_pchembl is None else False, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 58 |  |  |             target_organism__isnull=None if len(self.taxa) == 0 else False, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 59 |  |  |         ) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 60 |  |  |         # I'd rather not figure out how the API interprets None, so remove them | 
            
                                                                                                            
                            
            
                                    
            
            
                | 61 |  |  |         filters = {k: v for k, v in filters.items() if v is not None} | 
            
                                                                                                            
                            
            
                                    
            
            
                | 62 |  |  |         return list(self.api.activity.filter(**filters)) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 63 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 64 |  |  |     def should_include( | 
                            
                    |  |  |  | 
                                                                                        
                                                                                     | 
            
                                                                                                            
                            
            
                                    
            
            
                | 65 |  |  |         self, lookup: str, compound: ChemblCompound, data: NestedDotDict, target: ChemblTargetGraph | 
                            
                    |  |  |  | 
                                                                                        
                                                                                            
                                                                                     | 
            
                                                                                                            
                            
            
                                    
            
            
                | 66 |  |  |     ) -> bool: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 67 |  |  |         if ( | 
            
                                                                                                            
                            
            
                                    
            
            
                | 68 |  |  |             ( | 
                            
                    |  |  |  | 
                                                                                        
                                                                                            
                                                                                     | 
            
                                                                                                            
                            
            
                                    
            
            
                | 69 |  |  |                 data.get_as("data_validity_comment", lambda s: s.lower()) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 70 |  |  |                 in {s.lower() for s in self.banned_flags} | 
            
                                                                                                            
                            
            
                                    
            
            
                | 71 |  |  |             ) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 72 |  |  |             or (data.req_as("standard_relation", str) not in self.allowed_relations) | 
                            
                    |  |  |  | 
                                                                                        
                                                                                     | 
            
                                                                                                            
                            
            
                                    
            
            
                | 73 |  |  |             or (data.req_as("assay_type", str) not in self.allowed_assay_types()) | 
                            
                    |  |  |  | 
                                                                                        
                                                                                     | 
            
                                                                                                            
                            
            
                                    
            
            
                | 74 |  |  |             or (len(self.taxa) > 0 and not self.is_in_taxa(data.get_as("target_tax_id", int))) | 
                            
                    |  |  |  | 
                                                                                        
                                                                                     | 
            
                                                                                                            
                            
            
                                    
            
            
                | 75 |  |  |             or (self.min_pchembl is not None and data.get("pchembl_value") is None) | 
                            
                    |  |  |  | 
                                                                                        
                                                                                     | 
            
                                                                                                            
                            
            
                                    
            
            
                | 76 |  |  |             or self.min_pchembl is not None | 
                            
                    |  |  |  | 
                                                                                        
                                                                                     | 
            
                                                                                                            
                            
            
                                    
            
            
                | 77 |  |  |             and data.req_as("pchembl_value", float) < self.min_pchembl | 
                            
                    |  |  |  | 
                                                                                        
                                                                                     | 
            
                                                                                                            
                            
            
                                    
            
            
                | 78 |  |  |         ): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 79 |  |  |             return False | 
            
                                                                                                            
                            
            
                                    
            
            
                | 80 |  |  |         if data.get("data_validity_comment") is not None: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 81 |  |  |             logger.warning( | 
                            
                    |  |  |  | 
                                                                                        
                                                                                     | 
            
                                                                                                            
                            
            
                                    
            
            
                | 82 |  |  |                 f"Activity annotation for {lookup} has flag '{data.get('data_validity_comment')} (ok)" | 
                            
                    |  |  |  | 
                                                                                        
                                                                                     | 
            
                                                                                                            
                            
            
                                    
            
            
                | 83 |  |  |             ) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 84 |  |  |         # The `target_organism` doesn't always match the `assay_organism` | 
            
                                                                                                            
                            
            
                                    
            
            
                | 85 |  |  |         # Ex: see assay CHEMBL823141 / document CHEMBL1135642 for homo sapiens in xenopus laevis | 
            
                                                                                                            
                            
            
                                    
            
            
                | 86 |  |  |         # However, it's often something like yeast expressing a human / mouse / etc receptor | 
            
                                                                                                            
                            
            
                                    
            
            
                | 87 |  |  |         # So there's no need to filter by it | 
            
                                                                                                            
                            
            
                                    
            
            
                | 88 |  |  |         assay = self.api.assay.get(data.req_as("assay_chembl_id", str)) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 89 |  |  |         if target.type.name.lower() not in {s.lower() for s in self.allowed_target_types}: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 90 |  |  |             logger.warning(f"Excluding {target} with type {target.type}") | 
                            
                    |  |  |  | 
                                                                                        
                                                                                     | 
            
                                                                                                            
                            
            
                                    
            
            
                | 91 |  |  |             return False | 
            
                                                                                                            
                            
            
                                    
            
            
                | 92 |  |  |         confidence_score = assay.get("confidence_score") | 
            
                                                                                                            
                            
            
                                    
            
            
                | 93 |  |  |         if self.min_confidence_score is not None: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 94 |  |  |             if confidence_score is None or confidence_score < self.min_confidence_score: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 95 |  |  |                 return False | 
            
                                                                                                            
                            
            
                                    
            
            
                | 96 |  |  |         # Some of these are non-protein types | 
            
                                                                                                            
                            
            
                                    
            
            
                | 97 |  |  |         # And if it's unknown, we don't know what to do with it | 
            
                                                                                                            
                            
            
                                    
            
            
                | 98 |  |  |         return True | 
            
                                                                                                            
                            
            
                                    
            
            
                | 99 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 100 |  |  |     def _extract(self, lookup: str, compound: ChemblCompound, data: NestedDotDict) -> NestedDotDict: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 101 |  |  |         # we know these exist from the query | 
            
                                                                                                            
                            
            
                                    
            
            
                | 102 |  |  |         organism = data.req_as("target_organism", str) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 103 |  |  |         tax_id = data.req_as("target_tax_id", int) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 104 |  |  |         if len(self.taxa) == 0: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 105 |  |  |             tax_id, tax_name = tax_id, organism | 
                            
                    |  |  |  | 
                                                                                        
                                                                                     | 
            
                                                                                                            
                            
            
                                    
            
            
                | 106 |  |  |         else: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 107 |  |  |             taxes = {tax.req(tax_id) for tax in self.taxa if tax.contains(tax_id)} | 
            
                                                                                                            
                            
            
                                    
            
            
                | 108 |  |  |             tax = next(iter(taxes)) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 109 |  |  |             if len(taxes) > 1: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 110 |  |  |                 logger.warning(f"Multiple matches for taxon {tax_id}: {taxes}; using {tax}") | 
                            
                    |  |  |  | 
                                                                                        
                                                                                     | 
            
                                                                                                            
                            
            
                                    
            
            
                | 111 |  |  |             if organism != tax.name: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 112 |  |  |                 logger.warning(f"Target organism {organism} is not {tax.name}") | 
                            
                    |  |  |  | 
                                                                                        
                                                                                     | 
            
                                                                                                            
                            
            
                                    
            
            
                | 113 |  |  |             tax_id = tax.id | 
            
                                                                                                            
                            
            
                                    
            
            
                | 114 |  |  |             tax_name = tax.name | 
            
                                                                                                            
                            
            
                                    
            
            
                | 115 |  |  |         return NestedDotDict( | 
            
                                                                                                            
                            
            
                                    
            
            
                | 116 |  |  |             dict( | 
            
                                                                                                            
                            
            
                                    
            
            
                | 117 |  |  |                 record_id=data.req_as("activity_id", str), | 
            
                                                                                                            
                            
            
                                    
            
            
                | 118 |  |  |                 origin_inchikey=lookup, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 119 |  |  |                 matched_inchikey=compound.inchikey, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 120 |  |  |                 compound_id=compound.chid, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 121 |  |  |                 compound_name=compound.name, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 122 |  |  |                 taxon_id=tax_id, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 123 |  |  |                 taxon_name=tax_name, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 124 |  |  |                 pchembl=data.req_as("pchembl_value", float), | 
            
                                                                                                            
                            
            
                                    
            
            
                | 125 |  |  |                 std_type=data.req_as("standard_type", str), | 
            
                                                                                                            
                            
            
                                    
            
            
                | 126 |  |  |                 src_id=data.req_as("src_id", str), | 
            
                                                                                                            
                            
            
                                    
            
            
                | 127 |  |  |                 exact_target_id=data.req_as("target_chembl_id", str), | 
            
                                                                                                            
                            
            
                                    
            
            
                | 128 |  |  |                 tissue=data.get_as("tissue", str), | 
            
                                                                                                            
                            
            
                                    
            
            
                | 129 |  |  |                 cell_type=data.get_as("cell_type", str), | 
            
                                                                                                            
                            
            
                                    
            
            
                | 130 |  |  |                 subcellular_region=data.get("subcellular_region", str), | 
            
                                                                                                            
                            
            
                                    
            
            
                | 131 |  |  |             ) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 132 |  |  |         ) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 133 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 134 |  |  |     assay_type: AssayType | 
            
                                                                                                            
                            
            
                                    
            
            
                | 135 |  |  |     tissue: Optional[str] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 136 |  |  |     cell_type: Optional[str] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 137 |  |  |     subcellular_region: Optional[str] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 138 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 139 |  |  |  | 
            
                                                                                                            
                                                                
            
                                    
            
            
                | 140 |  |  | __all__ = ["_ActivitySearch", "_ActivityHit"] | 
            
                                                        
            
                                    
            
            
                | 141 |  |  |  |