Passed
Push — main ( 9f1476...29b393 )
by Douglas
02:18
created

pocketutils.tools.sort_tools   A

Complexity

Total Complexity 25

Size/Duplication

Total Lines 173
Duplicated Lines 0 %

Importance

Changes 0
Metric Value
wmc 25
eloc 74
dl 0
loc 173
rs 10
c 0
b 0
f 0

6 Methods

Rating   Name   Duplication   Size   Complexity  
D SortUtils.exact_natsort_alg() 0 43 12
A SortUtils.core_natsort_flags() 0 15 1
A SortUtils.natsort() 0 27 2
B SortUtils.get_natsort_alg() 0 32 8
A SortUtils._ns_info_from_int_flag() 0 4 1
A SortUtils.all_natsort_flags() 0 13 1
1
# SPDX-FileCopyrightText: Copyright 2020-2023, Contributors to pocketutils
2
# SPDX-PackageHomePage: https://github.com/dmyersturnbull/pocketutils
3
# SPDX-License-Identifier: Apache-2.0
4
"""
5
Tools for sorting.
6
"""
7
8
from collections.abc import Collection, Iterable, Mapping, Sequence
9
from typing import (
10
    NamedTuple,
11
    Self,
12
    TypeVar,
13
)
14
15
from natsort import natsorted, ns
16
from natsort.ns_enum import ns as ns_enum
17
18
__all__ = ["SortUtils", "SortTools"]
19
20
_empty_frozenset = frozenset()
21
22
T = TypeVar("T")
23
24
25
class NatsortFlagsAndValue(NamedTuple):
26
    flags: frozenset[str]
27
    value: int
28
29
30
class SortUtils:
31
    def natsort(
32
        self: Self,
33
        lst: Iterable[T],
34
        dtype: type[T],
35
        *,
36
        alg: int | set[str] | frozenset[str] | None = None,
37
        reverse: bool = False,
38
    ) -> Sequence[T]:
39
        """
40
        Perform a natural sort consistent with the type `dtype`.
41
        Uses `natsort <https://pypi.org/project/natsort>`_.
42
43
        See Also:
44
            [`get_natsort_alg`](pocketutils.tools.sort_tools.SortUtils.get_natsort_alg)
45
46
        Args:
47
            lst: A sequence of things to sort
48
            dtype: The type; must be a subclass of each element in `lst`
49
            alg: A specific natsort algorithm or set of flags
50
            reverse: Sort in reverse (e.g. Z to A or 9 to 1)
51
        """
52
        if alg is None:
53
            _, alg = self.get_natsort_alg(dtype)
54
        else:
55
            _, alg = self.exact_natsort_alg(alg)
56
        lst = list(lst)
57
        return natsorted(lst, alg=alg, reverse=reverse)
58
59
    def all_natsort_flags(self: Self) -> Mapping[str, int]:
60
        """
61
        Returns all flags defined by natsort, including combined and default flags.
62
        Combined flags are, e.g., `ns_enum.ns.REAL ns_enum.nsFLOAT | ns_enum.ns.SIGNED.`.
63
        Default flags are, e.g., `ns_enum.ns.UNSIGNED`.
64
65
        See Also:
66
            [`std_natsort_flags`](pocketutils.tools.sort_tools.SortUtils.std_natsort_flags)
67
68
        Returns:
69
            A mapping from flag name to int value
70
        """
71
        return {e.name: e.value for e in ns_enum}
72
73
    def core_natsort_flags(self: Self) -> Mapping[str, int]:
74
        """
75
        Returns natsort flags that are not combinations or defaults.
76
77
        See Also:
78
            [`all_natsort_flags`](pocketutils.tools.sort_tools.SortUtils.all_natsort_flags)
79
80
        Returns:
81
            A mapping from flag name to int value
82
        """
83
        # exclude 0 values -- they're defaults
84
        # exclude any that are not a power of 2 -- they're combinations
85
        # len(ns_enum) is more than the number of core vals, but that's fine
86
        good_vals = {int(2**i) for i in range(len(ns_enum))}
87
        return {e.name: e.value for e in ns_enum if e.value in good_vals}
88
89
    def get_natsort_alg(self: Self, dtype: type[T]) -> NatsortFlagsAndValue:
90
        """
91
        Guesses a good natsorted flag for the dtype.
92
93
        Here are some specifics:
94
            - integers       ⇒ INT and SIGNED
95
            - floating-point ⇒ FLOAT and SIGNED
96
            - strings        ⇒ COMPATIBILITYNORMALIZE and GROUPLETTERS
97
            - datetime       ⇒ GROUPLETTERS (only affects 'Z' vs. 'z'; shouldn't matter)
98
99
        Args:
100
            dtype: Probably from `pd.Series.dtype`
101
102
        Returns:
103
            A tuple of (set of flags, int) -- see :meth:`exact_natsort_alg`
104
        """
105
        st, x = set(), 0
106
        if dtype == str:
107
            st.update(["COMPATIBILITYNORMALIZE", "GROUPLETTERS"])
108
            x |= ns_enum.COMPATIBILITYNORMALIZE | ns_enum.GROUPLETTERS
109
        if (
110
            dtype == int
111
            or dtype == bool
112
            or repr(dtype).startswith("<class 'numpy.int")
113
            or repr(dtype) == "<class 'numpy.bool_'>"
114
        ):
115
            st.update(["INT", "SIGNED"])
116
            x |= ns_enum.INT | ns_enum.SIGNED
117
        if dtype == float or repr(dtype).startswith("<class 'numpy.float"):
118
            st.update(["FLOAT", "SIGNED"])
119
            x |= ns_enum.FLOAT | ns_enum.SIGNED  # same as ns_enum.REAL
120
        return NatsortFlagsAndValue(frozenset(st), x)
121
122
    def exact_natsort_alg(self: Self, flags: int | Collection[int | str] | None) -> NatsortFlagsAndValue:
123
        """
124
        Gets the flag names and combined `alg=` argument for natsort.
125
126
        Examples:
127
            - `exact_natsort_alg({"REAL"}) == ({"FLOAT", "SIGNED"}, ns.FLOAT | ns.SIGNED)`
128
            - `exact_natsort_alg({}) == ({}, 0)`
129
            - `exact_natsort_alg(ns.LOWERCASEFIRST) == ({"LOWERCASEFIRST"}, ns.LOWERCASEFIRST)`
130
            - `exact_natsort_alg({"localenum", "numafter"})`
131
              `== ({"LOCALENUM", "NUMAFTER"}, ns.LOCALENUM | ns.NUMAFTER)`
132
133
        Args:
134
            flags: Can be either:
135
                   - a single integer `alg` argument
136
                   - a set of flag ints and/or names in `natsort.ns`
137
138
        Returns:
139
            A tuple of the set of flag names, and the corresponding input to `natsorted`
140
            Only uses standard flag names, never the "combined" ones.
141
            (E.g. `exact_natsort_alg({"REAL"})`
142
            will return `({"FLOAT", "SIGNED"}, ns.FLOAT | ns.SIGNED)`.
143
        """
144
        if isinstance(flags, str):
145
            flags = {flags}
146
        if (
147
            flags is None
148
            or (isinstance(flags, Collection) and len(flags) == 0)
149
            or (isinstance(flags, int) and flags == 0)
150
        ):
151
            return NatsortFlagsAndValue(_empty_frozenset, 0)
152
        if isinstance(flags, int):
153
            return self._ns_info_from_int_flag(flags)
154
        if isinstance(flags, Collection):
155
            x = 0
156
            for f in flags:
157
                if isinstance(f, str):
158
                    x |= getattr(ns, f.upper())
159
                elif isinstance(f, int):
160
                    x |= f
161
                else:
162
                    raise TypeError(f"Unknown type {type(flags)} for {flags}")
163
            return self._ns_info_from_int_flag(x)
164
        raise TypeError(f"Unknown type {type(flags)} for {flags}")
165
166
    def _ns_info_from_int_flag(self: Self, val: int) -> NatsortFlagsAndValue:
167
        good = self.core_natsort_flags()
168
        st = {k for k, v in good.items() if v & val != 0}
169
        return NatsortFlagsAndValue(frozenset(st), val)
170
171
172
SortTools = SortUtils()
173