| 1 |  |  | """ | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2 |  |  | Runner. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3 |  |  | """ | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5 |  |  | from __future__ import annotations | 
            
                                                                                                            
                            
            
                                    
            
            
                | 6 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 7 |  |  | from dataclasses import dataclass | 
            
                                                                                                            
                            
            
                                    
            
            
                | 8 |  |  | from pathlib import Path | 
            
                                                                                                            
                            
            
                                    
            
            
                | 9 |  |  | from typing import Dict, Sequence, List, Type, Union, Optional | 
                            
                    |  |  |  | 
                                                                                        
                                                                                     | 
            
                                                                                                            
                            
            
                                    
            
            
                | 10 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 11 |  |  | import pandas as pd | 
                            
                    |  |  |  | 
                                                                                        
                                                                                     | 
            
                                                                                                            
                            
            
                                    
            
            
                | 12 |  |  | import typer | 
                            
                    |  |  |  | 
                                                                                        
                                                                                     | 
            
                                                                                                            
                            
            
                                    
            
            
                | 13 |  |  | from typeddfs import TypedDfs | 
                            
                    |  |  |  | 
                                                                                        
                                                                                     | 
            
                                                                                                            
                            
            
                                    
            
            
                | 14 |  |  | from pocketutils.core.dot_dict import NestedDotDict | 
                            
                    |  |  |  | 
                                                                                        
                                                                                     | 
            
                                                                                                            
                            
            
                                    
            
            
                | 15 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 16 |  |  | from mandos import logger | 
            
                                                                                                            
                            
            
                                    
            
            
                | 17 |  |  | from mandos.entries.api_singletons import Apis | 
            
                                                                                                            
                            
            
                                    
            
            
                | 18 |  |  | from mandos.entries.entries import Entries, Entry | 
            
                                                                                                            
                            
            
                                    
            
            
                | 19 |  |  | from mandos.model import InjectionError | 
            
                                                                                                            
                            
            
                                    
            
            
                | 20 |  |  | from mandos.model.hits import HitFrame | 
            
                                                                                                            
                            
            
                                    
            
            
                | 21 |  |  | from mandos.model.settings import MANDOS_SETTINGS | 
            
                                                                                                            
                            
            
                                    
            
            
                | 22 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 23 |  |  | cli = typer.Typer() | 
            
                                                                                                            
                            
            
                                    
            
            
                | 24 |  |  | Apis.set_default() | 
            
                                                                                                            
                            
            
                                    
            
            
                | 25 |  |  | Chembl, Pubchem = Apis.Chembl, Apis.Pubchem | 
            
                                                                                                            
                            
            
                                    
            
            
                | 26 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 27 |  |  | EntriesByCmd: Dict[str, Type[Entry]] = {e.cmd(): e for e in Entries} | 
            
                                                                                                            
                            
            
                                    
            
            
                | 28 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 29 |  |  | # these are only permitted in 'meta', not individual searches | 
            
                                                                                                            
                            
            
                                    
            
            
                | 30 |  |  | meta_keys = {"verbose", "quiet", "check", "log", "to"} | 
            
                                                                                                            
                            
            
                                    
            
            
                | 31 |  |  | forbidden_keys = {"dir", "out-dir", "out_dir"} | 
            
                                                                                                            
                            
            
                                    
            
            
                | 32 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 33 |  |  | SearchExplainDf = ( | 
            
                                                                                                            
                            
            
                                    
            
            
                | 34 |  |  |     TypedDfs.typed("SearchExplainDf").require( | 
            
                                                                                                            
                            
            
                                    
            
            
                | 35 |  |  |         "key", "search", "source", "category", "desc", "args", dtype=str | 
            
                                                                                                            
                            
            
                                    
            
            
                | 36 |  |  |     ) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 37 |  |  | ).build() | 
            
                                                                                                            
                            
            
                                    
            
            
                | 38 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 39 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 40 |  |  | @dataclass(frozen=True, repr=True) | 
                            
                    |  |  |  | 
                                                                                        
                                                                                     | 
            
                                                                                                            
                            
            
                                    
            
            
                | 41 |  |  | class MultiSearch: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 42 |  |  |     # 'meta' allows us to set defaults for things like --to | 
            
                                                                                                            
                            
            
                                    
            
            
                | 43 |  |  |     meta: NestedDotDict | 
            
                                                                                                            
                            
            
                                    
            
            
                | 44 |  |  |     searches: Sequence[NestedDotDict] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 45 |  |  |     toml_path: Path | 
            
                                                                                                            
                            
            
                                    
            
            
                | 46 |  |  |     input_path: Path | 
            
                                                                                                            
                            
            
                                    
            
            
                | 47 |  |  |     out_dir: Path | 
            
                                                                                                            
                            
            
                                    
            
            
                | 48 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 49 |  |  |     @property | 
            
                                                                                                            
                            
            
                                    
            
            
                | 50 |  |  |     def final_path(self) -> Path: | 
                            
                    |  |  |  | 
                                                                                        
                                                                                     | 
            
                                                                                                            
                            
            
                                    
            
            
                | 51 |  |  |         if "to" in self.meta: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 52 |  |  |             fmt = Path(self.meta["to"]).suffix | 
            
                                                                                                            
                            
            
                                    
            
            
                | 53 |  |  |         else: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 54 |  |  |             fmt = MANDOS_SETTINGS.default_table_suffix | 
            
                                                                                                            
                            
            
                                    
            
            
                | 55 |  |  |         return self.out_dir / ("search_" + self.input_path.name + "_" + self.toml_path.name + fmt) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 56 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 57 |  |  |     def __post_init__(self): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 58 |  |  |         to = self.meta.get_as("to", str) | 
                            
                    |  |  |  | 
                                                                                        
                                                                                     | 
            
                                                                                                            
                            
            
                                    
            
            
                | 59 |  |  |         if to is not None and not to.startswith("."): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 60 |  |  |             raise ValueError(f"Argument 'to' ({to})' must start with '.'.") | 
            
                                                                                                            
                            
            
                                    
            
            
                | 61 |  |  |         for key, value in self.meta.items(): | 
                            
                    |  |  |  | 
                                                                                        
                                                                                     | 
            
                                                                                                            
                            
            
                                    
            
            
                | 62 |  |  |             if key not in meta_keys: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 63 |  |  |                 raise ValueError(f"{key} in 'meta' not supported.") | 
            
                                                                                                            
                            
            
                                    
            
            
                | 64 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 65 |  |  |     @classmethod | 
            
                                                                                                            
                            
            
                                    
            
            
                | 66 |  |  |     def build(cls, input_path: Path, out_dir: Path, toml_path: Path) -> MultiSearch: | 
                            
                    |  |  |  | 
                                                                                        
                                                                                     | 
            
                                                                                                            
                            
            
                                    
            
            
                | 67 |  |  |         toml = NestedDotDict.read_toml(toml_path) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 68 |  |  |         searches = toml.get_as("search", list, []) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 69 |  |  |         meta = toml.sub("meta") | 
            
                                                                                                            
                            
            
                                    
            
            
                | 70 |  |  |         return MultiSearch(meta, searches, toml_path, input_path, out_dir) | 
            
                                                                                                            
                                                                
            
                                    
            
            
                | 71 |  |  |  | 
            
                                                                        
                            
            
                                    
            
            
                | 72 |  |  |     def to_table(self) -> SearchExplainDf: | 
                            
                    |  |  |  | 
                                                                                        
                                                                                     | 
            
                                                                        
                            
            
                                    
            
            
                | 73 |  |  |         rows = [] | 
            
                                                                        
                            
            
                                    
            
            
                | 74 |  |  |         for cmd in self._build_commands(): | 
            
                                                                        
                            
            
                                    
            
            
                | 75 |  |  |             name = cmd.cmd.get_search_type().search_name | 
            
                                                                        
                            
            
                                    
            
            
                | 76 |  |  |             cat = cmd.category | 
            
                                                                        
                            
            
                                    
            
            
                | 77 |  |  |             src = cmd.cmd.get_search_type()(cmd.key).data_source | 
            
                                                                        
                            
            
                                    
            
            
                | 78 |  |  |             desc = cmd.cmd.describe() | 
            
                                                                        
                            
            
                                    
            
            
                | 79 |  |  |             args = ", ".join([f"{k}={v}" for k, v in cmd.params.items()]) | 
            
                                                                        
                            
            
                                    
            
            
                | 80 |  |  |             ser = dict(key=cmd.key, search=name, category=cat, source=src, desc=desc, args=args) | 
            
                                                                        
                            
            
                                    
            
            
                | 81 |  |  |             rows.append(pd.Series(ser)) | 
            
                                                                        
                            
            
                                    
            
            
                | 82 |  |  |         return SearchExplainDf(rows) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 83 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 84 |  |  |     def run(self) -> None: | 
                            
                    |  |  |  | 
                                                                                        
                                                                                     | 
            
                                                                                                            
                            
            
                                    
            
            
                | 85 |  |  |         # build up the list of Entry classes first, and run ``test`` on each one | 
            
                                                                                                            
                            
            
                                    
            
            
                | 86 |  |  |         # that's to check that the parameters are correct before running anything | 
            
                                                                                                            
                            
            
                                    
            
            
                | 87 |  |  |         commands = self._build_commands() | 
            
                                                                                                            
                            
            
                                    
            
            
                | 88 |  |  |         for cmd in commands: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 89 |  |  |             cmd.test() | 
            
                                                                                                            
                            
            
                                    
            
            
                | 90 |  |  |             logger.info(f"Search {cmd.key} looks ok.") | 
            
                                                                                                            
                            
            
                                    
            
            
                | 91 |  |  |         logger.notice("All searches look ok.") | 
            
                                                                                                            
                            
            
                                    
            
            
                | 92 |  |  |         for cmd in commands: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 93 |  |  |             cmd.run() | 
            
                                                                                                            
                            
            
                                    
            
            
                | 94 |  |  |         logger.notice("Done with all searches!") | 
            
                                                                                                            
                            
            
                                    
            
            
                | 95 |  |  |         df = HitFrame(pd.concat([HitFrame.read_file(cmd.output_path) for cmd in commands])) | 
                            
                    |  |  |  | 
                                                                                        
                                                                                     | 
            
                                                                                                            
                            
            
                                    
            
            
                | 96 |  |  |         df.write_file(self.final_path) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 97 |  |  |         logger.notice(f"Concatenated file to {self.final_path}") | 
            
                                                                                                            
                            
            
                                    
            
            
                | 98 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 99 |  |  |     def _build_commands(self) -> Sequence[CmdRunner]: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 100 |  |  |         commands = {} | 
            
                                                                                                            
                            
            
                                    
            
            
                | 101 |  |  |         skipping = [] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 102 |  |  |         for search in self.searches: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 103 |  |  |             cmd = CmdRunner.build(search, self.meta, self.input_path, self.out_dir) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 104 |  |  |             if cmd.was_run: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 105 |  |  |                 skipping += [cmd] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 106 |  |  |             else: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 107 |  |  |                 commands[cmd.key] = cmd | 
            
                                                                                                            
                            
            
                                    
            
            
                | 108 |  |  |                 if cmd.key in commands: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 109 |  |  |                     raise ValueError(f"Repeated search key '{cmd.key}'") | 
            
                                                                                                            
                            
            
                                    
            
            
                | 110 |  |  |         if len(skipping) > 0: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 111 |  |  |             skipping = ", ".join([c.key for c in skipping]) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 112 |  |  |             logger.notice(f"Skipping searches {skipping} (already run).") | 
            
                                                                                                            
                            
            
                                    
            
            
                | 113 |  |  |         return list(commands.values()) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 114 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 115 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 116 |  |  | @dataclass(frozen=True, repr=True) | 
                            
                    |  |  |  | 
                                                                                        
                                                                                     | 
            
                                                                                                            
                            
            
                                    
            
            
                | 117 |  |  | class CmdRunner: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 118 |  |  |     cmd: Type[Entry] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 119 |  |  |     params: Dict[str, Union[int, str, float]] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 120 |  |  |     input_path: Path | 
            
                                                                                                            
                            
            
                                    
            
            
                | 121 |  |  |     category: Optional[str] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 122 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 123 |  |  |     @property | 
            
                                                                                                            
                            
            
                                    
            
            
                | 124 |  |  |     def key(self) -> str: | 
                            
                    |  |  |  | 
                                                                                        
                                                                                     | 
            
                                                                                                            
                            
            
                                    
            
            
                | 125 |  |  |         return self.params["key"] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 126 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 127 |  |  |     @property | 
            
                                                                                                            
                            
            
                                    
            
            
                | 128 |  |  |     def output_path(self) -> Path: | 
                            
                    |  |  |  | 
                                                                                        
                                                                                     | 
            
                                                                                                            
                            
            
                                    
            
            
                | 129 |  |  |         return Path(self.params["to"]) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 130 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 131 |  |  |     @property | 
            
                                                                                                            
                            
            
                                    
            
            
                | 132 |  |  |     def was_run(self) -> bool: | 
                            
                    |  |  |  | 
                                                                                        
                                                                                     | 
            
                                                                                                            
                            
            
                                    
            
            
                | 133 |  |  |         return self.done_path.exists() | 
            
                                                                                                            
                            
            
                                    
            
            
                | 134 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 135 |  |  |     @property | 
            
                                                                                                            
                            
            
                                    
            
            
                | 136 |  |  |     def done_path(self) -> Path: | 
                            
                    |  |  |  | 
                                                                                        
                                                                                     | 
            
                                                                                                            
                            
            
                                    
            
            
                | 137 |  |  |         return self.output_path.with_suffix(self.output_path.suffix + ".done") | 
            
                                                                                                            
                            
            
                                    
            
            
                | 138 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 139 |  |  |     def test(self) -> None: | 
                            
                    |  |  |  | 
                                                                                        
                                                                                     | 
            
                                                                                                            
                            
            
                                    
            
            
                | 140 |  |  |         self.cmd.test(self.input_path, **{**self.params, **dict(quiet=True)}) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 141 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 142 |  |  |     def run(self) -> None: | 
                            
                    |  |  |  | 
                                                                                        
                                                                                     | 
            
                                                                                                            
                            
            
                                    
            
            
                | 143 |  |  |         self.cmd.run(self.input_path, **{**self.params, **dict(no_setup=True, check=False)}) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 144 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 145 |  |  |     @classmethod | 
            
                                                                                                            
                            
            
                                    
            
            
                | 146 |  |  |     def build(cls, e: NestedDotDict, meta: NestedDotDict, input_path: Path, out_dir: Path): | 
                            
                    |  |  |  | 
                                                                                        
                                                                                            
                                                                                     | 
            
                                                                                                            
                            
            
                                    
            
            
                | 147 |  |  |         cmd = e.req_as("source", str) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 148 |  |  |         key = e.get_as("key", str, cmd) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 149 |  |  |         to = meta.get_as("to", str, MANDOS_SETTINGS.default_table_suffix) | 
                            
                    |  |  |  | 
                                                                                        
                                                                                     | 
            
                                                                                                            
                            
            
                                    
            
            
                | 150 |  |  |         try: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 151 |  |  |             cmd = EntriesByCmd[cmd] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 152 |  |  |         except KeyError: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 153 |  |  |             raise InjectionError(f"Search command {cmd} (key {key}) does not exist") | 
            
                                                                                                            
                            
            
                                    
            
            
                | 154 |  |  |         # use defaults | 
            
                                                                                                            
                            
            
                                    
            
            
                | 155 |  |  |         params = dict(meta) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 156 |  |  |         # they shouldn't pass any of these args | 
            
                                                                                                            
                            
            
                                    
            
            
                | 157 |  |  |         bad = {b for b in {*meta_keys, "path", "no_setup", "out_dir"} if b in e} | 
            
                                                                                                            
                            
            
                                    
            
            
                | 158 |  |  |         if len(bad) > 0: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 159 |  |  |             raise ValueError(f"Forbidden keys in [[search]] ({cmd}): {','.join(bad)}") | 
            
                                                                                                            
                            
            
                                    
            
            
                | 160 |  |  |         # update the defaults from 'meta' (e.g. 'verbose') | 
            
                                                                                                            
                            
            
                                    
            
            
                | 161 |  |  |         # skip the source -- it's the command name | 
            
                                                                                                            
                            
            
                                    
            
            
                | 162 |  |  |         # stupidly, we need to explicitly add the defaults from the OptionInfo instances | 
            
                                                                                                            
                            
            
                                    
            
            
                | 163 |  |  |         params.update(cmd.default_param_values().items()) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 164 |  |  |         # do this after: the defaults had path, key, and to | 
            
                                                                                                            
                            
            
                                    
            
            
                | 165 |  |  |         params["key"] = key | 
            
                                                                                                            
                            
            
                                    
            
            
                | 166 |  |  |         params["path"] = e.req_as("path", Path) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 167 |  |  |         params["out_dir"] = out_dir | 
            
                                                                                                            
                            
            
                                    
            
            
                | 168 |  |  |         params.setdefault("to", to) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 169 |  |  |         # now add the params we got for this command's section | 
            
                                                                                                            
                            
            
                                    
            
            
                | 170 |  |  |         params.update({k: v for k, v in e.items() if k != "source" and k != "category"}) | 
                            
                    |  |  |  | 
                                                                                        
                                                                                     | 
            
                                                                                                            
                            
            
                                    
            
            
                | 171 |  |  |         del params["check"] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 172 |  |  |         category = e.get_as("category", str) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 173 |  |  |         return CmdRunner(cmd, params, input_path, category) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 174 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 175 |  |  |  | 
            
                                                                                                            
                                                                
            
                                    
            
            
                | 176 |  |  | __all__ = ["MultiSearch"] | 
            
                                                        
            
                                    
            
            
                | 177 |  |  |  |