| 1 |  |  | from __future__ import annotations | 
                            
                    |  |  |  | 
                                                                                        
                                                                                     | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3 |  |  | import logging | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4 |  |  | from dataclasses import dataclass | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5 |  |  | from functools import total_ordering | 
            
                                                                                                            
                            
            
                                    
            
            
                | 6 |  |  | from pathlib import Path | 
            
                                                                                                            
                            
            
                                    
            
            
                | 7 |  |  | from typing import List, Mapping, Optional, Sequence, Set, Union | 
            
                                                                                                            
                            
            
                                    
            
            
                | 8 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 9 |  |  | import pandas as pd | 
                            
                    |  |  |  | 
                                                                                        
                                                                                     | 
            
                                                                                                            
                            
            
                                    
            
            
                | 10 |  |  | from typeddfs import TypedDfs | 
                            
                    |  |  |  | 
                                                                                        
                                                                                     | 
            
                                                                                                            
                            
            
                                    
            
            
                | 11 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 12 |  |  | logger = logging.getLogger(__package__) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 13 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 14 |  |  | TaxonomyDf = ( | 
            
                                                                                                            
                            
            
                                    
            
            
                | 15 |  |  |     TypedDfs.typed("TaxonomyDf").require("taxon").require("parent").require("scientific_name") | 
            
                                                                                                            
                            
            
                                    
            
            
                | 16 |  |  | ).build() | 
            
                                                                                                            
                            
            
                                    
            
            
                | 17 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 18 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 19 |  |  | @total_ordering | 
                            
                    |  |  |  | 
                                                                                        
                                                                                     | 
            
                                                                                                            
                            
            
                                    
            
            
                | 20 |  |  | @dataclass() | 
            
                                                                                                            
                            
            
                                    
            
            
                | 21 |  |  | class Taxon: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 22 |  |  |     """""" | 
            
                                                                                                            
                            
            
                                    
            
            
                | 23 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 24 |  |  |     # we can't use frozen=True because we have both parents and children | 
            
                                                                                                            
                            
            
                                    
            
            
                | 25 |  |  |     # instead, just use properties | 
            
                                                                                                            
                            
            
                                    
            
            
                | 26 |  |  |     __id: int | 
            
                                                                                                            
                            
            
                                    
            
            
                | 27 |  |  |     __name: str | 
            
                                                                                                            
                            
            
                                    
            
            
                | 28 |  |  |     __parent: Optional[Taxon] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 29 |  |  |     __children: Set[Taxon] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 30 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 31 |  |  |     @property | 
            
                                                                                                            
                            
            
                                    
            
            
                | 32 |  |  |     def id(self) -> int: | 
                            
                    |  |  |  | 
                                                                                        
                                                                                     | 
            
                                                                                                            
                            
            
                                    
            
            
                | 33 |  |  |         """ | 
            
                                                                                                            
                            
            
                                    
            
            
                | 34 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 35 |  |  |         Returns: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 36 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 37 |  |  |         """ | 
            
                                                                                                            
                            
            
                                    
            
            
                | 38 |  |  |         return self.__id | 
            
                                                                                                            
                            
            
                                    
            
            
                | 39 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 40 |  |  |     @property | 
            
                                                                                                            
                            
            
                                    
            
            
                | 41 |  |  |     def name(self) -> str: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 42 |  |  |         """ | 
            
                                                                                                            
                            
            
                                    
            
            
                | 43 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 44 |  |  |         Returns: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 45 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 46 |  |  |         """ | 
            
                                                                                                            
                            
            
                                    
            
            
                | 47 |  |  |         return self.__name | 
            
                                                                                                            
                            
            
                                    
            
            
                | 48 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 49 |  |  |     @property | 
            
                                                                                                            
                            
            
                                    
            
            
                | 50 |  |  |     def parent(self) -> Taxon: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 51 |  |  |         """ | 
            
                                                                                                            
                            
            
                                    
            
            
                | 52 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 53 |  |  |         Returns: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 54 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 55 |  |  |         """ | 
            
                                                                                                            
                            
            
                                    
            
            
                | 56 |  |  |         return self.__parent | 
            
                                                                                                            
                            
            
                                    
            
            
                | 57 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 58 |  |  |     @property | 
            
                                                                                                            
                            
            
                                    
            
            
                | 59 |  |  |     def children(self) -> Set[Taxon]: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 60 |  |  |         """ | 
            
                                                                                                            
                            
            
                                    
            
            
                | 61 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 62 |  |  |         Returns: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 63 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 64 |  |  |         """ | 
            
                                                                                                            
                            
            
                                    
            
            
                | 65 |  |  |         return set(self.__children) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 66 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 67 |  |  |     @property | 
            
                                                                                                            
                            
            
                                    
            
            
                | 68 |  |  |     def ancestors(self) -> Sequence[Taxon]: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 69 |  |  |         """ | 
            
                                                                                                            
                            
            
                                    
            
            
                | 70 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 71 |  |  |         Returns: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 72 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 73 |  |  |         """ | 
            
                                                                                                            
                            
            
                                    
            
            
                | 74 |  |  |         lst = [] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 75 |  |  |         self._ancestors(lst) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 76 |  |  |         return lst | 
            
                                                                                                            
                            
            
                                    
            
            
                | 77 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 78 |  |  |     @property | 
            
                                                                                                            
                            
            
                                    
            
            
                | 79 |  |  |     def descendents(self) -> Sequence[Taxon]: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 80 |  |  |         """ | 
            
                                                                                                            
                            
            
                                    
            
            
                | 81 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 82 |  |  |         Returns: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 83 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 84 |  |  |         """ | 
            
                                                                                                            
                            
            
                                    
            
            
                | 85 |  |  |         lst = [] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 86 |  |  |         self._descendents(lst) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 87 |  |  |         return lst | 
            
                                                                                                            
                            
            
                                    
            
            
                | 88 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 89 |  |  |     def _ancestors(self, values: List[Taxon]) -> None: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 90 |  |  |         values.append(self.parent) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 91 |  |  |         self.parent._ancestors(values) | 
                            
                    |  |  |  | 
                                                                                        
                                                                                     | 
            
                                                                                                            
                            
            
                                    
            
            
                | 92 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 93 |  |  |     def _descendents(self, values: List[Taxon]) -> None: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 94 |  |  |         values.extend(self.children) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 95 |  |  |         for child in self.children: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 96 |  |  |             child._descendents(values) | 
                            
                    |  |  |  | 
                                                                                        
                                                                                     | 
            
                                                                                                            
                            
            
                                    
            
            
                | 97 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 98 |  |  |     def __str__(self): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 99 |  |  |         return repr(self) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 100 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 101 |  |  |     def __repr__(self): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 102 |  |  |         return f"{self.__class__.__name__}({self.id}: {self.name} (parent={self.parent.id if self.parent else 'none'}))" | 
                            
                    |  |  |  | 
                                                                                        
                                                                                     | 
            
                                                                                                            
                            
            
                                    
            
            
                | 103 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 104 |  |  |     def __hash__(self): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 105 |  |  |         return hash(self.id) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 106 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 107 |  |  |     def __eq__(self, other): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 108 |  |  |         return self.id == other.id | 
            
                                                                                                            
                            
            
                                    
            
            
                | 109 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 110 |  |  |     def __lt__(self, other): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 111 |  |  |         return self.id < other.id | 
            
                                                                                                            
                            
            
                                    
            
            
                | 112 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 113 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 114 |  |  | @dataclass() | 
            
                                                                                                            
                            
            
                                    
            
            
                | 115 |  |  | class _Taxon(Taxon): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 116 |  |  |     """ | 
            
                                                                                                            
                            
            
                                    
            
            
                | 117 |  |  |     An internal, modifiable taxon for building the tree. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 118 |  |  |     """ | 
            
                                                                                                            
                            
            
                                    
            
            
                | 119 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 120 |  |  |     def set_name(self, name: str): | 
                            
                    |  |  |  | 
                                                                                        
                                                                                     | 
            
                                                                                                            
                            
            
                                    
            
            
                | 121 |  |  |         self.__name = name | 
            
                                                                                                            
                            
            
                                    
            
            
                | 122 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 123 |  |  |     def set_parent(self, parent: _Taxon): | 
                            
                    |  |  |  | 
                                                                                        
                                                                                     | 
            
                                                                                                            
                            
            
                                    
            
            
                | 124 |  |  |         self.__parent = parent | 
            
                                                                                                            
                            
            
                                    
            
            
                | 125 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 126 |  |  |     def add_child(self, child: _Taxon): | 
                            
                    |  |  |  | 
                                                                                        
                                                                                     | 
            
                                                                                                            
                            
            
                                    
            
            
                | 127 |  |  |         self.__children.add(child) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 128 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 129 |  |  |     # weirdly these are required again -- probably an issue with dataclass | 
            
                                                                                                            
                            
            
                                    
            
            
                | 130 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 131 |  |  |     def __str__(self): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 132 |  |  |         return repr(self) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 133 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 134 |  |  |     def __repr__(self): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 135 |  |  |         return f"{self.__class__.__name__}({self.id}: {self.name} (parent={self.parent.id if self.parent else 'none'}))" | 
                            
                    |  |  |  | 
                                                                                        
                                                                                     | 
            
                                                                                                            
                            
            
                                    
            
            
                | 136 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 137 |  |  |     def __hash__(self): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 138 |  |  |         return hash(self.id) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 139 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 140 |  |  |     def __eq__(self, other): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 141 |  |  |         return self.id == other.id | 
            
                                                                                                            
                            
            
                                    
            
            
                | 142 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 143 |  |  |     def __lt__(self, other): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 144 |  |  |         return self.id < other.id | 
            
                                                                                                            
                            
            
                                    
            
            
                | 145 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 146 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 147 |  |  | class Taxonomy: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 148 |  |  |     """ | 
            
                                                                                                            
                            
            
                                    
            
            
                | 149 |  |  |     A taxonomic tree of organisms from UniProt. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 150 |  |  |     Elements in the tree can be looked up by name or ID using ``__getitem__`` and ``get``. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 151 |  |  |     """ | 
            
                                                                                                            
                            
            
                                    
            
            
                | 152 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 153 |  |  |     def __init__(self, by_id: Mapping[int, Taxon]): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 154 |  |  |         """ | 
            
                                                                                                            
                            
            
                                    
            
            
                | 155 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 156 |  |  |         Args: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 157 |  |  |             by_id: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 158 |  |  |         """ | 
            
                                                                                                            
                            
            
                                    
            
            
                | 159 |  |  |         # constructor provided for consistency with the members | 
            
                                                                                                            
                            
            
                                    
            
            
                | 160 |  |  |         self._by_id = dict(by_id) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 161 |  |  |         # this probably isn't actually possible | 
            
                                                                                                            
                            
            
                                    
            
            
                | 162 |  |  |         if len(self) == 0: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 163 |  |  |             logger.warning(f"{self} contains 0 taxa") | 
                            
                    |  |  |  | 
                                                                                        
                                                                                     | 
            
                                                                                                            
                            
            
                                    
            
            
                | 164 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 165 |  |  |     @classmethod | 
            
                                                                                                            
                            
            
                                    
            
            
                | 166 |  |  |     def from_list(cls, taxa: Sequence[Taxon]) -> Taxonomy: | 
                            
                    |  |  |  | 
                                                                                        
                                                                                     | 
            
                                                                                                            
                            
            
                                    
            
            
                | 167 |  |  |         tax = Taxonomy({x.id: x for x in taxa}) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 168 |  |  |         # catch duplicate values | 
            
                                                                                                            
                            
            
                                    
            
            
                | 169 |  |  |         assert len(tax._by_id) == len(taxa), f"{len(tax._by_id)} != {len(taxa)}" | 
                            
                    |  |  |  | 
                                                                                        
                                                                                     | 
            
                                                                                                            
                            
            
                                    
            
            
                | 170 |  |  |         return tax | 
            
                                                                                                            
                            
            
                                    
            
            
                | 171 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 172 |  |  |     @classmethod | 
            
                                                                                                            
                            
            
                                    
            
            
                | 173 |  |  |     def from_path(cls, path: Path) -> Taxonomy: | 
                            
                    |  |  |  | 
                                                                                        
                                                                                     | 
            
                                                                                                            
                            
            
                                    
            
            
                | 174 |  |  |         df = pd.read_csv(path, sep="\t", header=0) | 
                            
                    |  |  |  | 
                                                                                        
                                                                                     | 
            
                                                                                                            
                            
            
                                    
            
            
                | 175 |  |  |         return cls.from_df(df) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 176 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 177 |  |  |     @classmethod | 
            
                                                                                                            
                            
            
                                    
            
            
                | 178 |  |  |     def from_df(cls, df: TaxonomyDf) -> Taxonomy: | 
                            
                    |  |  |  | 
                                                                                        
                                                                                     | 
            
                                                                                                            
                            
            
                                    
            
            
                | 179 |  |  |         """ | 
            
                                                                                                            
                            
            
                                    
            
            
                | 180 |  |  |         Reads from a DataFrame from a CSV file provided by a UniProt download. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 181 |  |  |         Strips any entries with missing or empty-string scientific names. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 182 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 183 |  |  |         Args: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 184 |  |  |             df: A dataframe with columns (at least) "taxon", "scientific_name", and "parent" | 
            
                                                                                                            
                            
            
                                    
            
            
                | 185 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 186 |  |  |         Returns: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 187 |  |  |             The corresponding taxonomic tree | 
            
                                                                                                            
                            
            
                                    
            
            
                | 188 |  |  |         """ | 
            
                                                                                                            
                            
            
                                    
            
            
                | 189 |  |  |         df["taxon"] = df["taxon"].astype(int) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 190 |  |  |         # TODO fillna(0) should not be needed | 
                            
                    |  |  |  | 
                                                                                        
                                                                                     | 
            
                                                                                                            
                            
            
                                    
            
            
                | 191 |  |  |         df["parent"] = df["parent"].fillna(0).astype(int) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 192 |  |  |         # just build up a tree, sticking the elements in by_id | 
            
                                                                                                            
                            
            
                                    
            
            
                | 193 |  |  |         tax = {} | 
            
                                                                                                            
                            
            
                                    
            
            
                | 194 |  |  |         for row in df.itertuples(): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 195 |  |  |             child = tax.setdefault(row.taxon, _Taxon(row.taxon, row.scientific_name, None, set())) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 196 |  |  |             child.set_name(row.scientific_name) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 197 |  |  |             if row.parent != 0: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 198 |  |  |                 parent = tax.setdefault(row.parent, _Taxon(row.parent, "", None, set())) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 199 |  |  |                 child.set_parent(parent) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 200 |  |  |                 parent.add_child(child) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 201 |  |  |         bad = [t for t in tax.values() if t.name.strip() == ""] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 202 |  |  |         if len(bad) > 0: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 203 |  |  |             raise ValueError(f"There are taxa with missing or empty names: {bad}.") | 
            
                                                                                                            
                            
            
                                    
            
            
                | 204 |  |  |         for v in tax.values(): | 
                            
                    |  |  |  | 
                                                                                        
                                                                                     | 
            
                                                                                                            
                            
            
                                    
            
            
                | 205 |  |  |             v.__class__ = Taxon | 
            
                                                                                                            
                            
            
                                    
            
            
                | 206 |  |  |         return Taxonomy(tax) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 207 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 208 |  |  |     @property | 
            
                                                                                                            
                            
            
                                    
            
            
                | 209 |  |  |     def taxa(self) -> Sequence[Taxon]: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 210 |  |  |         """ | 
            
                                                                                                            
                            
            
                                    
            
            
                | 211 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 212 |  |  |         Returns: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 213 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 214 |  |  |         """ | 
            
                                                                                                            
                            
            
                                    
            
            
                | 215 |  |  |         return list(self._by_id.values()) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 216 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 217 |  |  |     @property | 
            
                                                                                                            
                            
            
                                    
            
            
                | 218 |  |  |     def roots(self) -> Sequence[Taxon]: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 219 |  |  |         """ | 
            
                                                                                                            
                            
            
                                    
            
            
                | 220 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 221 |  |  |         Returns: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 222 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 223 |  |  |         """ | 
            
                                                                                                            
                            
            
                                    
            
            
                | 224 |  |  |         return [k for k in self.taxa if k.parent is None or k.parent not in self] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 225 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 226 |  |  |     @property | 
            
                                                                                                            
                            
            
                                    
            
            
                | 227 |  |  |     def leaves(self) -> Sequence[Taxon]: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 228 |  |  |         """ | 
            
                                                                                                            
                            
            
                                    
            
            
                | 229 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 230 |  |  |         Returns: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 231 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 232 |  |  |         """ | 
            
                                                                                                            
                            
            
                                    
            
            
                | 233 |  |  |         return [k for k in self.taxa if len(k.children) == 0] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 234 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 235 |  |  |     def subtree(self, item: int) -> Taxonomy: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 236 |  |  |         """ | 
            
                                                                                                            
                            
            
                                    
            
            
                | 237 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 238 |  |  |         Args: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 239 |  |  |             item: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 240 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 241 |  |  |         Returns: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 242 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 243 |  |  |         """ | 
            
                                                                                                            
                            
            
                                    
            
            
                | 244 |  |  |         item = self[item] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 245 |  |  |         descendents = {item, *item.descendents} | 
            
                                                                                                            
                            
            
                                    
            
            
                | 246 |  |  |         return Taxonomy({d.id: d for d in descendents}) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 247 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 248 |  |  |     def req(self, item: int) -> Taxon: | 
                            
                    |  |  |  | 
                                                                                        
                                                                                     | 
            
                                                                                                            
                            
            
                                    
            
            
                | 249 |  |  |         if isinstance(item, Taxon): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 250 |  |  |             item = item.id | 
            
                                                                                                            
                            
            
                                    
            
            
                | 251 |  |  |         return self[item] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 252 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 253 |  |  |     def get(self, item: int) -> Optional[Taxon]: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 254 |  |  |         """ | 
            
                                                                                                            
                            
            
                                    
            
            
                | 255 |  |  |         Corresponds to ``dict.get``. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 256 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 257 |  |  |         Args: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 258 |  |  |             item: The scientific name or UniProt ID | 
            
                                                                                                            
                            
            
                                    
            
            
                | 259 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 260 |  |  |         Returns: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 261 |  |  |             The taxon, or None if it was not found | 
            
                                                                                                            
                            
            
                                    
            
            
                | 262 |  |  |         """ | 
            
                                                                                                            
                            
            
                                    
            
            
                | 263 |  |  |         if isinstance(item, Taxon): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 264 |  |  |             item = item.id | 
            
                                                                                                            
                            
            
                                    
            
            
                | 265 |  |  |         if isinstance(item, int): | 
                            
                    |  |  |  | 
                                                                                        
                                                                                     | 
            
                                                                                                            
                            
            
                                    
            
            
                | 266 |  |  |             return self._by_id.get(item) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 267 |  |  |         else: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 268 |  |  |             raise TypeError(f"Type {type(item)} of {item} not applicable") | 
            
                                                                                                            
                            
            
                                    
            
            
                | 269 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 270 |  |  |     def __getitem__(self, item: int) -> Taxon: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 271 |  |  |         """ | 
            
                                                                                                            
                            
            
                                    
            
            
                | 272 |  |  |         Corresponds to ``dict[_]``. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 273 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 274 |  |  |         Args: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 275 |  |  |             item: The UniProt ID | 
            
                                                                                                            
                            
            
                                    
            
            
                | 276 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 277 |  |  |         Returns: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 278 |  |  |             The taxon | 
            
                                                                                                            
                            
            
                                    
            
            
                | 279 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 280 |  |  |         Raises: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 281 |  |  |             KeyError: If the taxon was not found | 
            
                                                                                                            
                            
            
                                    
            
            
                | 282 |  |  |         """ | 
            
                                                                                                            
                            
            
                                    
            
            
                | 283 |  |  |         got = self.get(item) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 284 |  |  |         if got is None: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 285 |  |  |             raise KeyError(f"{item} not found in {self}") | 
            
                                                                                                            
                            
            
                                    
            
            
                | 286 |  |  |         return got | 
            
                                                                                                            
                                                                
            
                                    
            
            
                | 287 |  |  |  | 
            
                                                                        
                            
            
                                    
            
            
                | 288 |  |  |     def contains(self, item: Union[Taxon, int, str]): | 
                            
                    |  |  |  | 
                                                                                        
                                                                                     | 
            
                                                                        
                            
            
                                    
            
            
                | 289 |  |  |         return self.get(item) is not None | 
            
                                                                                                            
                            
            
                                    
            
            
                | 290 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 291 |  |  |     def n_taxa(self) -> int: | 
                            
                    |  |  |  | 
                                                                                        
                                                                                     | 
            
                                                                                                            
                            
            
                                    
            
            
                | 292 |  |  |         return len(self._by_id) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 293 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 294 |  |  |     def __contains__(self, item: Union[Taxon, int, str]): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 295 |  |  |         return self.get(item) is not None | 
            
                                                                                                            
                            
            
                                    
            
            
                | 296 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 297 |  |  |     def __len__(self) -> int: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 298 |  |  |         return len(self._by_id) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 299 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 300 |  |  |     def __str__(self) -> str: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 301 |  |  |         return repr(self) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 302 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 303 |  |  |     def __repr__(self) -> str: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 304 |  |  |         roots = ", ".join(r.name for r in self.roots) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 305 |  |  |         return f"{self.__class__.__name__}(n={len(self._by_id)} (roots={roots}) @ {hex(id(self))})" | 
            
                                                                                                            
                            
            
                                    
            
            
                | 306 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 307 |  |  |  | 
            
                                                                                                            
                                                                
            
                                    
            
            
                | 308 |  |  | __all__ = ["Taxon", "Taxonomy"] | 
            
                                                        
            
                                    
            
            
                | 309 |  |  |  |