1
|
|
|
# SPDX-FileCopyrightText: Copyright 2020-2023, Contributors to pocketutils |
2
|
|
|
# SPDX-PackageHomePage: https://github.com/dmyersturnbull/pocketutils |
3
|
|
|
# SPDX-License-Identifier: Apache-2.0 |
4
|
|
|
""" |
5
|
|
|
Tools for sorting. |
6
|
|
|
""" |
7
|
|
|
|
8
|
|
|
from collections.abc import Collection, Iterable, Mapping, Sequence |
9
|
|
|
from typing import ( |
10
|
|
|
NamedTuple, |
11
|
|
|
Self, |
12
|
|
|
TypeVar, |
13
|
|
|
) |
14
|
|
|
|
15
|
|
|
from natsort import natsorted, ns |
16
|
|
|
from natsort.ns_enum import ns as ns_enum |
17
|
|
|
|
18
|
|
|
__all__ = ["SortUtils", "SortTools"] |
19
|
|
|
|
20
|
|
|
_empty_frozenset = frozenset() |
21
|
|
|
|
22
|
|
|
T = TypeVar("T") |
23
|
|
|
|
24
|
|
|
|
25
|
|
|
class NatsortFlagsAndValue(NamedTuple): |
26
|
|
|
flags: frozenset[str] |
27
|
|
|
value: int |
28
|
|
|
|
29
|
|
|
|
30
|
|
|
class SortUtils: |
31
|
|
|
def natsort( |
32
|
|
|
self: Self, |
33
|
|
|
lst: Iterable[T], |
34
|
|
|
dtype: type[T], |
35
|
|
|
*, |
36
|
|
|
alg: int | set[str] | frozenset[str] | None = None, |
37
|
|
|
reverse: bool = False, |
38
|
|
|
) -> Sequence[T]: |
39
|
|
|
""" |
40
|
|
|
Perform a natural sort consistent with the type `dtype`. |
41
|
|
|
Uses `natsort <https://pypi.org/project/natsort>`_. |
42
|
|
|
|
43
|
|
|
See Also: |
44
|
|
|
[`get_natsort_alg`](pocketutils.tools.sort_tools.SortUtils.get_natsort_alg) |
45
|
|
|
|
46
|
|
|
Args: |
47
|
|
|
lst: A sequence of things to sort |
48
|
|
|
dtype: The type; must be a subclass of each element in `lst` |
49
|
|
|
alg: A specific natsort algorithm or set of flags |
50
|
|
|
reverse: Sort in reverse (e.g. Z to A or 9 to 1) |
51
|
|
|
""" |
52
|
|
|
if alg is None: |
53
|
|
|
_, alg = self.get_natsort_alg(dtype) |
54
|
|
|
else: |
55
|
|
|
_, alg = self.exact_natsort_alg(alg) |
56
|
|
|
lst = list(lst) |
57
|
|
|
return natsorted(lst, alg=alg, reverse=reverse) |
58
|
|
|
|
59
|
|
|
def all_natsort_flags(self: Self) -> Mapping[str, int]: |
60
|
|
|
""" |
61
|
|
|
Returns all flags defined by natsort, including combined and default flags. |
62
|
|
|
Combined flags are, e.g., `ns_enum.ns.REAL ns_enum.nsFLOAT | ns_enum.ns.SIGNED.`. |
63
|
|
|
Default flags are, e.g., `ns_enum.ns.UNSIGNED`. |
64
|
|
|
|
65
|
|
|
See Also: |
66
|
|
|
[`std_natsort_flags`](pocketutils.tools.sort_tools.SortUtils.std_natsort_flags) |
67
|
|
|
|
68
|
|
|
Returns: |
69
|
|
|
A mapping from flag name to int value |
70
|
|
|
""" |
71
|
|
|
return {e.name: e.value for e in ns_enum} |
72
|
|
|
|
73
|
|
|
def core_natsort_flags(self: Self) -> Mapping[str, int]: |
74
|
|
|
""" |
75
|
|
|
Returns natsort flags that are not combinations or defaults. |
76
|
|
|
|
77
|
|
|
See Also: |
78
|
|
|
[`all_natsort_flags`](pocketutils.tools.sort_tools.SortUtils.all_natsort_flags) |
79
|
|
|
|
80
|
|
|
Returns: |
81
|
|
|
A mapping from flag name to int value |
82
|
|
|
""" |
83
|
|
|
# exclude 0 values -- they're defaults |
84
|
|
|
# exclude any that are not a power of 2 -- they're combinations |
85
|
|
|
# len(ns_enum) is more than the number of core vals, but that's fine |
86
|
|
|
good_vals = {int(2**i) for i in range(len(ns_enum))} |
87
|
|
|
return {e.name: e.value for e in ns_enum if e.value in good_vals} |
88
|
|
|
|
89
|
|
|
def get_natsort_alg(self: Self, dtype: type[T]) -> NatsortFlagsAndValue: |
90
|
|
|
""" |
91
|
|
|
Guesses a good natsorted flag for the dtype. |
92
|
|
|
|
93
|
|
|
Here are some specifics: |
94
|
|
|
- integers ⇒ INT and SIGNED |
95
|
|
|
- floating-point ⇒ FLOAT and SIGNED |
96
|
|
|
- strings ⇒ COMPATIBILITYNORMALIZE and GROUPLETTERS |
97
|
|
|
- datetime ⇒ GROUPLETTERS (only affects 'Z' vs. 'z'; shouldn't matter) |
98
|
|
|
|
99
|
|
|
Args: |
100
|
|
|
dtype: Probably from `pd.Series.dtype` |
101
|
|
|
|
102
|
|
|
Returns: |
103
|
|
|
A tuple of (set of flags, int) -- see :meth:`exact_natsort_alg` |
104
|
|
|
""" |
105
|
|
|
st, x = set(), 0 |
106
|
|
|
if dtype == str: |
107
|
|
|
st.update(["COMPATIBILITYNORMALIZE", "GROUPLETTERS"]) |
108
|
|
|
x |= ns_enum.COMPATIBILITYNORMALIZE | ns_enum.GROUPLETTERS |
109
|
|
|
if ( |
110
|
|
|
dtype == int |
111
|
|
|
or dtype == bool |
112
|
|
|
or repr(dtype).startswith("<class 'numpy.int") |
113
|
|
|
or repr(dtype) == "<class 'numpy.bool_'>" |
114
|
|
|
): |
115
|
|
|
st.update(["INT", "SIGNED"]) |
116
|
|
|
x |= ns_enum.INT | ns_enum.SIGNED |
117
|
|
|
if dtype == float or repr(dtype).startswith("<class 'numpy.float"): |
118
|
|
|
st.update(["FLOAT", "SIGNED"]) |
119
|
|
|
x |= ns_enum.FLOAT | ns_enum.SIGNED # same as ns_enum.REAL |
120
|
|
|
return NatsortFlagsAndValue(frozenset(st), x) |
121
|
|
|
|
122
|
|
|
def exact_natsort_alg(self: Self, flags: int | Collection[int | str] | None) -> NatsortFlagsAndValue: |
123
|
|
|
""" |
124
|
|
|
Gets the flag names and combined `alg=` argument for natsort. |
125
|
|
|
|
126
|
|
|
Examples: |
127
|
|
|
- `exact_natsort_alg({"REAL"}) == ({"FLOAT", "SIGNED"}, ns.FLOAT | ns.SIGNED)` |
128
|
|
|
- `exact_natsort_alg({}) == ({}, 0)` |
129
|
|
|
- `exact_natsort_alg(ns.LOWERCASEFIRST) == ({"LOWERCASEFIRST"}, ns.LOWERCASEFIRST)` |
130
|
|
|
- `exact_natsort_alg({"localenum", "numafter"})` |
131
|
|
|
`== ({"LOCALENUM", "NUMAFTER"}, ns.LOCALENUM | ns.NUMAFTER)` |
132
|
|
|
|
133
|
|
|
Args: |
134
|
|
|
flags: Can be either: |
135
|
|
|
- a single integer `alg` argument |
136
|
|
|
- a set of flag ints and/or names in `natsort.ns` |
137
|
|
|
|
138
|
|
|
Returns: |
139
|
|
|
A tuple of the set of flag names, and the corresponding input to `natsorted` |
140
|
|
|
Only uses standard flag names, never the "combined" ones. |
141
|
|
|
(E.g. `exact_natsort_alg({"REAL"})` |
142
|
|
|
will return `({"FLOAT", "SIGNED"}, ns.FLOAT | ns.SIGNED)`. |
143
|
|
|
""" |
144
|
|
|
if isinstance(flags, str): |
145
|
|
|
flags = {flags} |
146
|
|
|
if ( |
147
|
|
|
flags is None |
148
|
|
|
or (isinstance(flags, Collection) and len(flags) == 0) |
149
|
|
|
or (isinstance(flags, int) and flags == 0) |
150
|
|
|
): |
151
|
|
|
return NatsortFlagsAndValue(_empty_frozenset, 0) |
152
|
|
|
if isinstance(flags, int): |
153
|
|
|
return self._ns_info_from_int_flag(flags) |
154
|
|
|
if isinstance(flags, Collection): |
155
|
|
|
x = 0 |
156
|
|
|
for f in flags: |
157
|
|
|
if isinstance(f, str): |
158
|
|
|
x |= getattr(ns, f.upper()) |
159
|
|
|
elif isinstance(f, int): |
160
|
|
|
x |= f |
161
|
|
|
else: |
162
|
|
|
raise TypeError(f"Unknown type {type(flags)} for {flags}") |
163
|
|
|
return self._ns_info_from_int_flag(x) |
164
|
|
|
raise TypeError(f"Unknown type {type(flags)} for {flags}") |
165
|
|
|
|
166
|
|
|
def _ns_info_from_int_flag(self: Self, val: int) -> NatsortFlagsAndValue: |
167
|
|
|
good = self.core_natsort_flags() |
168
|
|
|
st = {k for k, v in good.items() if v & val != 0} |
169
|
|
|
return NatsortFlagsAndValue(frozenset(st), val) |
170
|
|
|
|
171
|
|
|
|
172
|
|
|
SortTools = SortUtils() |
173
|
|
|
|