Passed
Push — main ( 7b3fbc...cf9f8c )
by Douglas
01:44
created

mandos.model.taxonomy.Taxonomy.from_trees()   A

Complexity

Conditions 1

Size

Total Lines 8
Code Lines 6

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
cc 1
eloc 6
nop 2
dl 0
loc 8
rs 10
c 0
b 0
f 0
1
from __future__ import annotations
0 ignored issues
show
introduced by
Missing module docstring
Loading history...
2
3
from collections import defaultdict
4
from dataclasses import dataclass
5
from functools import total_ordering
6
from pathlib import Path
7
from typing import List, Mapping, Optional, Sequence, Set, Union, FrozenSet, Iterable
8
9
import pandas as pd
0 ignored issues
show
introduced by
Unable to import 'pandas'
Loading history...
10
from typeddfs import TypedDfs
0 ignored issues
show
introduced by
Unable to import 'typeddfs'
Loading history...
11
12
from mandos import logger
13
14
TaxonomyDf = (
15
    TypedDfs.typed("TaxonomyDf")
16
    .require("taxon")
17
    .require("parent")
18
    .require("scientific_name")
19
    .reserve("common_name")
20
).build()
21
22
23
@total_ordering
0 ignored issues
show
Documentation introduced by
Empty class docstring
Loading history...
24
@dataclass()
25
class Taxon:
26
    """ """
27
28
    # we can't use frozen=True because we have both parents and children
29
    # instead, just use properties
30
    __id: int
31
    __name: str
32
    __parent: Optional[Taxon]
33
    __children: Set[Taxon]
34
35
    @property
36
    def id(self) -> int:
0 ignored issues
show
Coding Style Naming introduced by
Attribute name "id" doesn't conform to snake_case naming style ('([^\\W\\dA-Z][^\\WA-Z]2,|_[^\\WA-Z]*|__[^\\WA-Z\\d_][^\\WA-Z]+__)$' pattern)

This check looks for invalid names for a range of different identifiers.

You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements.

If your project includes a Pylint configuration file, the settings contained in that file take precedence.

To find out more about Pylint, please refer to their site.

Loading history...
37
        """
38
39
        Returns:
40
41
        """
42
        return self.__id
43
44
    @property
45
    def name(self) -> str:
46
        """
47
48
        Returns:
49
50
        """
51
        return self.__name
52
53
    @property
54
    def parent(self) -> Taxon:
55
        """
56
57
        Returns:
58
59
        """
60
        return self.__parent
61
62
    @property
63
    def children(self) -> Set[Taxon]:
64
        """
65
66
        Returns:
67
68
        """
69
        return set(self.__children)
70
71
    @property
72
    def ancestors(self) -> Sequence[Taxon]:
73
        """
74
75
        Returns:
76
77
        """
78
        lst = []
79
        self._ancestors(lst)
80
        return lst
81
82
    @property
83
    def descendents(self) -> Sequence[Taxon]:
84
        """
85
86
        Returns:
87
88
        """
89
        lst = []
90
        self._descendents(lst)
91
        return lst
92
93
    def _ancestors(self, values: List[Taxon]) -> None:
94
        values.append(self.parent)
95
        self.parent._ancestors(values)
0 ignored issues
show
Coding Style Best Practice introduced by
It seems like _ancestors was declared protected and should not be accessed from this context.

Prefixing a member variable _ is usually regarded as the equivalent of declaring it with protected visibility that exists in other languages. Consequentially, such a member should only be accessed from the same class or a child class:

class MyParent:
    def __init__(self):
        self._x = 1;
        self.y = 2;

class MyChild(MyParent):
    def some_method(self):
        return self._x    # Ok, since accessed from a child class

class AnotherClass:
    def some_method(self, instance_of_my_child):
        return instance_of_my_child._x   # Would be flagged as AnotherClass is not
                                         # a child class of MyParent
Loading history...
96
97
    def _descendents(self, values: List[Taxon]) -> None:
98
        values.extend(self.children)
99
        for child in self.children:
100
            child._descendents(values)
0 ignored issues
show
Coding Style Best Practice introduced by
It seems like _descendents was declared protected and should not be accessed from this context.

Prefixing a member variable _ is usually regarded as the equivalent of declaring it with protected visibility that exists in other languages. Consequentially, such a member should only be accessed from the same class or a child class:

class MyParent:
    def __init__(self):
        self._x = 1;
        self.y = 2;

class MyChild(MyParent):
    def some_method(self):
        return self._x    # Ok, since accessed from a child class

class AnotherClass:
    def some_method(self, instance_of_my_child):
        return instance_of_my_child._x   # Would be flagged as AnotherClass is not
                                         # a child class of MyParent
Loading history...
101
102
    def __str__(self):
103
        return repr(self)
104
105
    def __repr__(self):
106
        return f"{self.__class__.__name__}({self.id}: {self.name} (parent={self.parent.id if self.parent else 'none'}))"
0 ignored issues
show
Coding Style introduced by
This line is too long as per the coding-style (120/100).

This check looks for lines that are too long. You can specify the maximum line length.

Loading history...
107
108
    def __hash__(self):
109
        return hash(self.id)
110
111
    def __eq__(self, other):
112
        return self.id == other.id
113
114
    def __lt__(self, other):
115
        return self.id < other.id
116
117
118
TaxaIdsAndNames = Union[int, str, Taxon, Iterable[Union[int, str, Taxon]]]
119
TaxonIdOrName = Union[int, str, Taxon]
120
121
122
@dataclass()
123
class _Taxon(Taxon):
124
    """
125
    An internal, modifiable taxon for building the tree.
126
    """
127
128
    def set_name(self, name: str):
0 ignored issues
show
introduced by
Missing function or method docstring
Loading history...
129
        self.__name = name
130
131
    def set_parent(self, parent: _Taxon):
0 ignored issues
show
introduced by
Missing function or method docstring
Loading history...
132
        self.__parent = parent
133
134
    def add_child(self, child: _Taxon):
0 ignored issues
show
introduced by
Missing function or method docstring
Loading history...
135
        self.__children.add(child)
136
137
    # weirdly these are required again -- probably an issue with dataclass
138
139
    def __str__(self):
140
        return repr(self)
141
142
    def __repr__(self):
143
        return f"{self.__class__.__name__}({self.id}: {self.name} (parent={self.parent.id if self.parent else 'none'}))"
0 ignored issues
show
Coding Style introduced by
This line is too long as per the coding-style (120/100).

This check looks for lines that are too long. You can specify the maximum line length.

Loading history...
144
145
    def __hash__(self):
146
        return hash(self.id)
147
148
    def __eq__(self, other):
149
        return self.id == other.id
150
151
    def __lt__(self, other):
152
        return self.id < other.id
153
154
155
class Taxonomy:
0 ignored issues
show
best-practice introduced by
Too many public methods (24/20)
Loading history...
156
    """
157
    A taxonomic tree of organisms from UniProt.
158
    Elements in the tree can be looked up by name or ID using ``__getitem__`` and ``get``.
159
    """
160
161
    def __init__(self, by_id: Mapping[int, Taxon], by_name: Mapping[str, FrozenSet[Taxon]]):
162
        """
163
164
        Args:
165
            by_id:
166
        """
167
        # constructor provided for consistency with the members
168
        self._by_id = dict(by_id)
169
        self._by_name = dict(by_name)
170
        # this probably isn't actually possible
171
        if len(self) == 0:
172
            logger.warning(f"{self} contains 0 taxa")
173
174
    @classmethod
175
    def from_trees(cls, taxonomies: Sequence[Taxonomy]) -> Taxonomy:
0 ignored issues
show
introduced by
Missing function or method docstring
Loading history...
176
        # we need to rewrite the ancestors, which from_df already does
177
        # so we'll just use that
178
        dfs = [tree.to_df() for tree in taxonomies]
179
        df = TaxonomyDf(pd.concat(dfs, ignore_index=True))
0 ignored issues
show
Coding Style Naming introduced by
Variable name "df" doesn't conform to snake_case naming style ('([^\\W\\dA-Z][^\\WA-Z]2,|_[^\\WA-Z]*|__[^\\WA-Z\\d_][^\\WA-Z]+__)$' pattern)

This check looks for invalid names for a range of different identifiers.

You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements.

If your project includes a Pylint configuration file, the settings contained in that file take precedence.

To find out more about Pylint, please refer to their site.

Loading history...
180
        df = df.drop_duplicates().sort_values("taxon")
0 ignored issues
show
Coding Style Naming introduced by
Variable name "df" doesn't conform to snake_case naming style ('([^\\W\\dA-Z][^\\WA-Z]2,|_[^\\WA-Z]*|__[^\\WA-Z\\d_][^\\WA-Z]+__)$' pattern)

This check looks for invalid names for a range of different identifiers.

You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements.

If your project includes a Pylint configuration file, the settings contained in that file take precedence.

To find out more about Pylint, please refer to their site.

Loading history...
181
        return Taxonomy.from_df(df)
182
183
    @classmethod
184
    def from_list(cls, taxa: Sequence[Taxon]) -> Taxonomy:
0 ignored issues
show
introduced by
Missing function or method docstring
Loading history...
185
        by_id = {x.id: x for x in taxa}
186
        by_name = cls._build_by_name(by_id.values())
187
        tax = Taxonomy(by_id, by_name)
188
        # catch duplicate values
189
        if len(tax._by_id) != len(taxa):
0 ignored issues
show
Coding Style Best Practice introduced by
It seems like _by_id was declared protected and should not be accessed from this context.

Prefixing a member variable _ is usually regarded as the equivalent of declaring it with protected visibility that exists in other languages. Consequentially, such a member should only be accessed from the same class or a child class:

class MyParent:
    def __init__(self):
        self._x = 1;
        self.y = 2;

class MyChild(MyParent):
    def some_method(self):
        return self._x    # Ok, since accessed from a child class

class AnotherClass:
    def some_method(self, instance_of_my_child):
        return instance_of_my_child._x   # Would be flagged as AnotherClass is not
                                         # a child class of MyParent
Loading history...
190
            raise AssertionError(f"{len(tax._by_id)} != {len(taxa)}")
0 ignored issues
show
Coding Style Best Practice introduced by
It seems like _by_id was declared protected and should not be accessed from this context.

Prefixing a member variable _ is usually regarded as the equivalent of declaring it with protected visibility that exists in other languages. Consequentially, such a member should only be accessed from the same class or a child class:

class MyParent:
    def __init__(self):
        self._x = 1;
        self.y = 2;

class MyChild(MyParent):
    def some_method(self):
        return self._x    # Ok, since accessed from a child class

class AnotherClass:
    def some_method(self, instance_of_my_child):
        return instance_of_my_child._x   # Would be flagged as AnotherClass is not
                                         # a child class of MyParent
Loading history...
191
        return tax
192
193
    @classmethod
194
    def from_path(cls, path: Path) -> Taxonomy:
0 ignored issues
show
introduced by
Missing function or method docstring
Loading history...
195
        df = pd.read_csv(path, sep="\t", header=0)
0 ignored issues
show
Coding Style Naming introduced by
Variable name "df" doesn't conform to snake_case naming style ('([^\\W\\dA-Z][^\\WA-Z]2,|_[^\\WA-Z]*|__[^\\WA-Z\\d_][^\\WA-Z]+__)$' pattern)

This check looks for invalid names for a range of different identifiers.

You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements.

If your project includes a Pylint configuration file, the settings contained in that file take precedence.

To find out more about Pylint, please refer to their site.

Loading history...
196
        return cls.from_df(df)
197
198
    @classmethod
199
    def from_df(cls, df: TaxonomyDf) -> Taxonomy:
0 ignored issues
show
Coding Style Naming introduced by
Argument name "df" doesn't conform to snake_case naming style ('([^\\W\\dA-Z][^\\WA-Z]2,|_[^\\WA-Z]*|__[^\\WA-Z\\d_][^\\WA-Z]+__)$' pattern)

This check looks for invalid names for a range of different identifiers.

You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements.

If your project includes a Pylint configuration file, the settings contained in that file take precedence.

To find out more about Pylint, please refer to their site.

Loading history...
200
        """
201
        Reads from a DataFrame from a CSV file provided by a UniProt download.
202
        Strips any entries with missing or empty-string scientific names.
203
204
        Args:
205
            df: A dataframe with columns (at least) "taxon", "scientific_name", and "parent"
206
207
        Returns:
208
            The corresponding taxonomic tree
209
        """
210
        df["taxon"] = df["taxon"].astype(int)
211
        # TODO fillna(0) should not be needed
0 ignored issues
show
Coding Style introduced by
TODO and FIXME comments should generally be avoided.
Loading history...
212
        df["parent"] = df["parent"].fillna(0).astype(int)
213
        # just build up a tree, sticking the elements in by_id
214
        tax = {}
215
        for row in df.itertuples():
216
            child = tax.setdefault(row.taxon, _Taxon(row.taxon, row.scientific_name, None, set()))
217
            child.set_name(row.scientific_name)
218
            if row.parent != 0:
219
                parent = tax.setdefault(row.parent, _Taxon(row.parent, "", None, set()))
220
                child.set_parent(parent)
221
                parent.add_child(child)
222
        bad = [t for t in tax.values() if t.name.strip() == ""]
223
        if len(bad) > 0:
224
            raise ValueError(f"There are taxa with missing or empty names: {bad}.")
225
        for v in tax.values():
0 ignored issues
show
Coding Style Naming introduced by
Variable name "v" doesn't conform to snake_case naming style ('([^\\W\\dA-Z][^\\WA-Z]2,|_[^\\WA-Z]*|__[^\\WA-Z\\d_][^\\WA-Z]+__)$' pattern)

This check looks for invalid names for a range of different identifiers.

You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements.

If your project includes a Pylint configuration file, the settings contained in that file take precedence.

To find out more about Pylint, please refer to their site.

Loading history...
226
            v.__class__ = Taxon
227
        by_name = cls._build_by_name(tax.values())
228
        return Taxonomy(tax, by_name)
229
230
    def to_df(self) -> TaxonomyDf:
0 ignored issues
show
introduced by
Missing function or method docstring
Loading history...
231
        return TaxonomyDf(
232
            [
233
                pd.Series(dict(taxon=taxon.id, scientific_name=taxon.name, parent=taxon.parent.id))
234
                for taxon in self.taxa
235
            ]
236
        )
237
238
    @property
239
    def taxa(self) -> Sequence[Taxon]:
240
        """
241
242
        Returns:
243
244
        """
245
        return list(self._by_id.values())
246
247
    @property
248
    def roots(self) -> Sequence[Taxon]:
249
        """
250
251
        Returns:
252
253
        """
254
        return [k for k in self.taxa if k.parent is None or k.parent not in self]
255
256
    @property
257
    def leaves(self) -> Sequence[Taxon]:
0 ignored issues
show
introduced by
Missing function or method docstring
Loading history...
258
        return [k for k in self.taxa if len(k.children) == 0]
259
260
    def exclude_subtree(self, item: Union[int, Taxon]) -> Taxonomy:
0 ignored issues
show
introduced by
Missing function or method docstring
Loading history...
261
        descendents = self.get_by_id_or_name(item)
262
        for i in set(descendents):
263
            descendents += i.descendents
264
        by_id = {d.id: d for d in descendents}
265
        by_name = self.__class__._build_by_name(by_id.values())
0 ignored issues
show
Coding Style Best Practice introduced by
It seems like _build_by_name was declared protected and should not be accessed from this context.

Prefixing a member variable _ is usually regarded as the equivalent of declaring it with protected visibility that exists in other languages. Consequentially, such a member should only be accessed from the same class or a child class:

class MyParent:
    def __init__(self):
        self._x = 1;
        self.y = 2;

class MyChild(MyParent):
    def some_method(self):
        return self._x    # Ok, since accessed from a child class

class AnotherClass:
    def some_method(self, instance_of_my_child):
        return instance_of_my_child._x   # Would be flagged as AnotherClass is not
                                         # a child class of MyParent
Loading history...
266
        return Taxonomy(by_id, by_name)
267
268
    def exclude_subtrees_by_ids_or_names(self, items: TaxaIdsAndNames) -> Taxonomy:
0 ignored issues
show
introduced by
Missing function or method docstring
Loading history...
269
        if isinstance(items, (int, str, Taxon)):
270
            items = [items]
271
        bad_taxa = self.subtrees_by_ids_or_names(items).taxa
272
        by_id = {i: t for i, t in self._by_id.items() if i not in bad_taxa}
273
        by_name = self.__class__._build_by_name(by_id.values())
0 ignored issues
show
Coding Style Best Practice introduced by
It seems like _build_by_name was declared protected and should not be accessed from this context.

Prefixing a member variable _ is usually regarded as the equivalent of declaring it with protected visibility that exists in other languages. Consequentially, such a member should only be accessed from the same class or a child class:

class MyParent:
    def __init__(self):
        self._x = 1;
        self.y = 2;

class MyChild(MyParent):
    def some_method(self):
        return self._x    # Ok, since accessed from a child class

class AnotherClass:
    def some_method(self, instance_of_my_child):
        return instance_of_my_child._x   # Would be flagged as AnotherClass is not
                                         # a child class of MyParent
Loading history...
274
        return Taxonomy(by_id, by_name)
275
276
    def subtree(self, item: int) -> Taxonomy:
0 ignored issues
show
introduced by
Missing function or method docstring
Loading history...
277
        item = self[item]
278
        descendents = {item, *item.descendents}
279
        by_id = {d.id: d for d in descendents}
280
        by_name = self.__class__._build_by_name(by_id.values())
0 ignored issues
show
Coding Style Best Practice introduced by
It seems like _build_by_name was declared protected and should not be accessed from this context.

Prefixing a member variable _ is usually regarded as the equivalent of declaring it with protected visibility that exists in other languages. Consequentially, such a member should only be accessed from the same class or a child class:

class MyParent:
    def __init__(self):
        self._x = 1;
        self.y = 2;

class MyChild(MyParent):
    def some_method(self):
        return self._x    # Ok, since accessed from a child class

class AnotherClass:
    def some_method(self, instance_of_my_child):
        return instance_of_my_child._x   # Would be flagged as AnotherClass is not
                                         # a child class of MyParent
Loading history...
281
        return Taxonomy(by_id, by_name)
282
283
    def subtrees_by_ids_or_names(self, items: TaxaIdsAndNames) -> Taxonomy:
0 ignored issues
show
introduced by
Missing function or method docstring
Loading history...
284
        if isinstance(items, (int, str, Taxon)):
285
            items = [items]
286
        descendents: Set[Taxon] = set()
287
        for item in items:
288
            for taxon in self.get_by_id_or_name(item):
289
                descendents += {taxon, *taxon.descendents}
290
        by_id = {d.id: d for d in descendents}
291
        by_name = self.__class__._build_by_name(by_id.values())
0 ignored issues
show
Coding Style Best Practice introduced by
It seems like _build_by_name was declared protected and should not be accessed from this context.

Prefixing a member variable _ is usually regarded as the equivalent of declaring it with protected visibility that exists in other languages. Consequentially, such a member should only be accessed from the same class or a child class:

class MyParent:
    def __init__(self):
        self._x = 1;
        self.y = 2;

class MyChild(MyParent):
    def some_method(self):
        return self._x    # Ok, since accessed from a child class

class AnotherClass:
    def some_method(self, instance_of_my_child):
        return instance_of_my_child._x   # Would be flagged as AnotherClass is not
                                         # a child class of MyParent
Loading history...
292
        return Taxonomy(by_id, by_name)
293
294
    def subtrees_by_name(self, item: str) -> Taxonomy:
295
        """
296
        Returns the taxonomy that rooted at each of the taxa with the specified scientific name.
297
        """
298
        return self.subtrees_by_names(item)
299
300
    def subtrees_by_names(self, items: Iterable[str]) -> Taxonomy:
0 ignored issues
show
introduced by
Missing function or method docstring
Loading history...
301
        descendents: Set[Taxon] = set()
302
        for item in items:
303
            for taxon in self._by_name.get(item, []):
304
                descendents.update({taxon, *taxon.descendents})
305
        by_id = {d.id: d for d in descendents}
306
        by_name = self.__class__._build_by_name(by_id.values())
0 ignored issues
show
Coding Style Best Practice introduced by
It seems like _build_by_name was declared protected and should not be accessed from this context.

Prefixing a member variable _ is usually regarded as the equivalent of declaring it with protected visibility that exists in other languages. Consequentially, such a member should only be accessed from the same class or a child class:

class MyParent:
    def __init__(self):
        self._x = 1;
        self.y = 2;

class MyChild(MyParent):
    def some_method(self):
        return self._x    # Ok, since accessed from a child class

class AnotherClass:
    def some_method(self, instance_of_my_child):
        return instance_of_my_child._x   # Would be flagged as AnotherClass is not
                                         # a child class of MyParent
Loading history...
307
        return Taxonomy(by_id, by_name)
308
309
    def req_one_by_name(self, item: str) -> Taxon:
310
        """
311
        Gets a single taxon by its name.
312
        If there are multiple, returns the first (lowest ID).
313
        Raises an error if there are no matches.
314
        """
315
        one = self.get_one_by_name(item)
316
        if one is None:
317
            raise LookupError(f"No taxa for {item}")
318
        return one
319
320
    def req_only_by_name(self, item: str) -> Taxon:
321
        """
322
        Gets a single taxon by its name.
323
        Raises an error if there are multiple matches for the name, or if there are no matches.
324
        """
325
        taxa = self.get_by_name(item)
326
        ids = ",".join([str(t.id) for t in taxa])
327
        if len(taxa) > 1:
0 ignored issues
show
Unused Code introduced by
Unnecessary "elif" after "raise"
Loading history...
328
            raise ValueError(f"Got multiple results for {item}: {ids}")
329
        elif len(taxa) == 0:
330
            raise LookupError(f"No taxa for {item}")
331
        return next(iter(taxa))
332
333
    def get_one_by_name(self, item: str) -> Optional[Taxon]:
334
        """
335
        Gets a single taxon by its name.
336
        If there are multiple, returns the first (lowest ID).
337
        If there are none, returns ``None``.
338
        """
339
        taxa = self.get_by_name(item)
340
        ids = ",".join([str(t.id) for t in taxa])
341
        if len(taxa) > 1:
342
            logger.warning(f"Got multiple results for {item}: {ids}")
343
        elif len(taxa) == 0:
344
            return None
345
        return next(iter(taxa))
346
347
    def get_by_name(self, item: str) -> FrozenSet[Taxon]:
348
        """
349
        Gets all taxa that match a scientific name.
350
        """
351
        if isinstance(item, Taxon):
352
            item = item.name
353
        return self._by_name.get(item, frozenset(set()))
354
355
    def get_all_by_id_or_name(self, items: Iterable[Union[int, str, Taxon]]) -> FrozenSet[Taxon]:
356
        """
357
        Gets all taxa that match any number of IDs or names.
358
        """
359
        matching = []
360
        for item in items:
361
            matching += self.get_by_id_or_name(item)
362
        # finally de-duplicates (making this fn useful)
363
        return frozenset(matching)
364
365
    def get_by_id_or_name(self, item: Union[int, str, Taxon]) -> FrozenSet[Taxon]:
366
        """
367
        Gets all taxa that match an ID or name.
368
        """
369
        if isinstance(item, Taxon):
370
            item = item.id
371
        if isinstance(item, int):
0 ignored issues
show
unused-code introduced by
Unnecessary "elif" after "return"
Loading history...
372
            taxon = self._by_id.get(item)
373
            return frozenset([]) if taxon is None else frozenset([taxon])
374
        elif isinstance(item, str):
375
            return self._by_name.get(item, frozenset(set()))
376
        else:
377
            raise TypeError(f"Unknown type {type(item)} of {item}")
378
379
    def req(self, item: int) -> Taxon:
380
        """
381
        Gets a single taxon by its ID.
382
        Raises an error if it is not found.
383
        """
384
        if isinstance(item, Taxon):
385
            item = item.id
386
        return self[item]
387
388
    def get(self, item: Union[int, Taxon]) -> Optional[Taxon]:
389
        """
390
        Corresponds to ``dict.get``.
391
392
        Args:
393
            item: The scientific name or UniProt ID
394
395
        Returns:
396
            The taxon, or None if it was not found
397
        """
398
        if isinstance(item, Taxon):
399
            item = item.id
400
        if isinstance(item, int):
0 ignored issues
show
unused-code introduced by
Unnecessary "else" after "return"
Loading history...
401
            return self._by_id.get(item)
402
        else:
403
            raise TypeError(f"Type {type(item)} of {item} not applicable")
404
405
    def __getitem__(self, item: int) -> Taxon:
406
        """
407
        Corresponds to ``dict[_]``.
408
409
        Args:
410
            item: The UniProt ID
411
412
        Returns:
413
            The taxon
414
415
        Raises:
416
            KeyError: If the taxon was not found
417
        """
418
        got = self.get(item)
419
        if got is None:
420
            raise KeyError(f"{item} not found in {self}")
421
        return got
422
423
    def contains(self, item: Union[Taxon, int, str]):
0 ignored issues
show
introduced by
Missing function or method docstring
Loading history...
424
        return self.get(item) is not None
425
426
    def n_taxa(self) -> int:
0 ignored issues
show
introduced by
Missing function or method docstring
Loading history...
427
        return len(self._by_id)
428
429
    def __contains__(self, item: Union[Taxon, int, str]):
430
        return self.get(item) is not None
431
432
    def __len__(self) -> int:
433
        return len(self._by_id)
434
435
    def __str__(self) -> str:
436
        return repr(self)
437
438
    def __repr__(self) -> str:
439
        roots = ", ".join(r.name for r in self.roots)
440
        return f"{self.__class__.__name__}(n={len(self._by_id)} (roots={roots}) @ {hex(id(self))})"
441
442
    @classmethod
443
    def _build_by_name(cls, tax: Iterable[Taxon]) -> Mapping[str, FrozenSet[Taxon]]:
444
        by_name = defaultdict(set)
445
        for t in tax:
0 ignored issues
show
Coding Style Naming introduced by
Variable name "t" doesn't conform to snake_case naming style ('([^\\W\\dA-Z][^\\WA-Z]2,|_[^\\WA-Z]*|__[^\\WA-Z\\d_][^\\WA-Z]+__)$' pattern)

This check looks for invalid names for a range of different identifiers.

You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements.

If your project includes a Pylint configuration file, the settings contained in that file take precedence.

To find out more about Pylint, please refer to their site.

Loading history...
446
            by_name[t.name].add(t)
447
        # NOTE: lower-casing the keys for lookup
448
        return {k.lower(): frozenset(v) for k, v in by_name.items()}
449
450
451
__all__ = ["Taxon", "Taxonomy", "TaxonomyDf"]
452