|
1
|
|
|
# SPDX-FileCopyrightText: Copyright 2020-2023, Contributors to pocketutils |
|
2
|
|
|
# SPDX-PackageHomePage: https://github.com/dmyersturnbull/pocketutils |
|
3
|
|
|
# SPDX-License-Identifier: Apache-2.0 |
|
4
|
|
|
""" |
|
5
|
|
|
Tools for sorting. |
|
6
|
|
|
""" |
|
7
|
|
|
|
|
8
|
|
|
from collections.abc import Collection, Iterable, Mapping, Sequence |
|
9
|
|
|
from typing import ( |
|
10
|
|
|
NamedTuple, |
|
11
|
|
|
Self, |
|
12
|
|
|
TypeVar, |
|
13
|
|
|
) |
|
14
|
|
|
|
|
15
|
|
|
from natsort import natsorted, ns |
|
16
|
|
|
from natsort.ns_enum import ns as ns_enum |
|
17
|
|
|
|
|
18
|
|
|
__all__ = ["SortUtils", "SortTools"] |
|
19
|
|
|
|
|
20
|
|
|
_empty_frozenset = frozenset() |
|
21
|
|
|
|
|
22
|
|
|
T = TypeVar("T") |
|
23
|
|
|
|
|
24
|
|
|
|
|
25
|
|
|
class NatsortFlagsAndValue(NamedTuple): |
|
26
|
|
|
flags: frozenset[str] |
|
27
|
|
|
value: int |
|
28
|
|
|
|
|
29
|
|
|
|
|
30
|
|
|
class SortUtils: |
|
31
|
|
|
def natsort( |
|
32
|
|
|
self: Self, |
|
33
|
|
|
lst: Iterable[T], |
|
34
|
|
|
dtype: type[T], |
|
35
|
|
|
*, |
|
36
|
|
|
alg: int | set[str] | frozenset[str] | None = None, |
|
37
|
|
|
reverse: bool = False, |
|
38
|
|
|
) -> Sequence[T]: |
|
39
|
|
|
""" |
|
40
|
|
|
Perform a natural sort consistent with the type `dtype`. |
|
41
|
|
|
Uses `natsort <https://pypi.org/project/natsort>`_. |
|
42
|
|
|
|
|
43
|
|
|
See Also: |
|
44
|
|
|
[`get_natsort_alg`](pocketutils.tools.sort_tools.SortUtils.get_natsort_alg) |
|
45
|
|
|
|
|
46
|
|
|
Args: |
|
47
|
|
|
lst: A sequence of things to sort |
|
48
|
|
|
dtype: The type; must be a subclass of each element in `lst` |
|
49
|
|
|
alg: A specific natsort algorithm or set of flags |
|
50
|
|
|
reverse: Sort in reverse (e.g. Z to A or 9 to 1) |
|
51
|
|
|
""" |
|
52
|
|
|
if alg is None: |
|
53
|
|
|
_, alg = self.get_natsort_alg(dtype) |
|
54
|
|
|
else: |
|
55
|
|
|
_, alg = self.exact_natsort_alg(alg) |
|
56
|
|
|
lst = list(lst) |
|
57
|
|
|
return natsorted(lst, alg=alg, reverse=reverse) |
|
58
|
|
|
|
|
59
|
|
|
def all_natsort_flags(self: Self) -> Mapping[str, int]: |
|
60
|
|
|
""" |
|
61
|
|
|
Returns all flags defined by natsort, including combined and default flags. |
|
62
|
|
|
Combined flags are, e.g., `ns_enum.ns.REAL ns_enum.nsFLOAT | ns_enum.ns.SIGNED.`. |
|
63
|
|
|
Default flags are, e.g., `ns_enum.ns.UNSIGNED`. |
|
64
|
|
|
|
|
65
|
|
|
See Also: |
|
66
|
|
|
[`std_natsort_flags`](pocketutils.tools.sort_tools.SortUtils.std_natsort_flags) |
|
67
|
|
|
|
|
68
|
|
|
Returns: |
|
69
|
|
|
A mapping from flag name to int value |
|
70
|
|
|
""" |
|
71
|
|
|
return {e.name: e.value for e in ns_enum} |
|
72
|
|
|
|
|
73
|
|
|
def core_natsort_flags(self: Self) -> Mapping[str, int]: |
|
74
|
|
|
""" |
|
75
|
|
|
Returns natsort flags that are not combinations or defaults. |
|
76
|
|
|
|
|
77
|
|
|
See Also: |
|
78
|
|
|
[`all_natsort_flags`](pocketutils.tools.sort_tools.SortUtils.all_natsort_flags) |
|
79
|
|
|
|
|
80
|
|
|
Returns: |
|
81
|
|
|
A mapping from flag name to int value |
|
82
|
|
|
""" |
|
83
|
|
|
# exclude 0 values -- they're defaults |
|
84
|
|
|
# exclude any that are not a power of 2 -- they're combinations |
|
85
|
|
|
# len(ns_enum) is more than the number of core vals, but that's fine |
|
86
|
|
|
good_vals = {int(2**i) for i in range(len(ns_enum))} |
|
87
|
|
|
return {e.name: e.value for e in ns_enum if e.value in good_vals} |
|
88
|
|
|
|
|
89
|
|
|
def get_natsort_alg(self: Self, dtype: type[T]) -> NatsortFlagsAndValue: |
|
90
|
|
|
""" |
|
91
|
|
|
Guesses a good natsorted flag for the dtype. |
|
92
|
|
|
|
|
93
|
|
|
Here are some specifics: |
|
94
|
|
|
- integers ⇒ INT and SIGNED |
|
95
|
|
|
- floating-point ⇒ FLOAT and SIGNED |
|
96
|
|
|
- strings ⇒ COMPATIBILITYNORMALIZE and GROUPLETTERS |
|
97
|
|
|
- datetime ⇒ GROUPLETTERS (only affects 'Z' vs. 'z'; shouldn't matter) |
|
98
|
|
|
|
|
99
|
|
|
Args: |
|
100
|
|
|
dtype: Probably from `pd.Series.dtype` |
|
101
|
|
|
|
|
102
|
|
|
Returns: |
|
103
|
|
|
A tuple of (set of flags, int) -- see :meth:`exact_natsort_alg` |
|
104
|
|
|
""" |
|
105
|
|
|
st, x = set(), 0 |
|
106
|
|
|
if dtype == str: |
|
107
|
|
|
st.update(["COMPATIBILITYNORMALIZE", "GROUPLETTERS"]) |
|
108
|
|
|
x |= ns_enum.COMPATIBILITYNORMALIZE | ns_enum.GROUPLETTERS |
|
109
|
|
|
if ( |
|
110
|
|
|
dtype == int |
|
111
|
|
|
or dtype == bool |
|
112
|
|
|
or repr(dtype).startswith("<class 'numpy.int") |
|
113
|
|
|
or repr(dtype) == "<class 'numpy.bool_'>" |
|
114
|
|
|
): |
|
115
|
|
|
st.update(["INT", "SIGNED"]) |
|
116
|
|
|
x |= ns_enum.INT | ns_enum.SIGNED |
|
117
|
|
|
if dtype == float or repr(dtype).startswith("<class 'numpy.float"): |
|
118
|
|
|
st.update(["FLOAT", "SIGNED"]) |
|
119
|
|
|
x |= ns_enum.FLOAT | ns_enum.SIGNED # same as ns_enum.REAL |
|
120
|
|
|
return NatsortFlagsAndValue(frozenset(st), x) |
|
121
|
|
|
|
|
122
|
|
|
def exact_natsort_alg(self: Self, flags: int | Collection[int | str] | None) -> NatsortFlagsAndValue: |
|
123
|
|
|
""" |
|
124
|
|
|
Gets the flag names and combined `alg=` argument for natsort. |
|
125
|
|
|
|
|
126
|
|
|
Examples: |
|
127
|
|
|
- `exact_natsort_alg({"REAL"}) == ({"FLOAT", "SIGNED"}, ns.FLOAT | ns.SIGNED)` |
|
128
|
|
|
- `exact_natsort_alg({}) == ({}, 0)` |
|
129
|
|
|
- `exact_natsort_alg(ns.LOWERCASEFIRST) == ({"LOWERCASEFIRST"}, ns.LOWERCASEFIRST)` |
|
130
|
|
|
- `exact_natsort_alg({"localenum", "numafter"})` |
|
131
|
|
|
`== ({"LOCALENUM", "NUMAFTER"}, ns.LOCALENUM | ns.NUMAFTER)` |
|
132
|
|
|
|
|
133
|
|
|
Args: |
|
134
|
|
|
flags: Can be either: |
|
135
|
|
|
- a single integer `alg` argument |
|
136
|
|
|
- a set of flag ints and/or names in `natsort.ns` |
|
137
|
|
|
|
|
138
|
|
|
Returns: |
|
139
|
|
|
A tuple of the set of flag names, and the corresponding input to `natsorted` |
|
140
|
|
|
Only uses standard flag names, never the "combined" ones. |
|
141
|
|
|
(E.g. `exact_natsort_alg({"REAL"})` |
|
142
|
|
|
will return `({"FLOAT", "SIGNED"}, ns.FLOAT | ns.SIGNED)`. |
|
143
|
|
|
""" |
|
144
|
|
|
if isinstance(flags, str): |
|
145
|
|
|
flags = {flags} |
|
146
|
|
|
if ( |
|
147
|
|
|
flags is None |
|
148
|
|
|
or (isinstance(flags, Collection) and len(flags) == 0) |
|
149
|
|
|
or (isinstance(flags, int) and flags == 0) |
|
150
|
|
|
): |
|
151
|
|
|
return NatsortFlagsAndValue(_empty_frozenset, 0) |
|
152
|
|
|
if isinstance(flags, int): |
|
153
|
|
|
return self._ns_info_from_int_flag(flags) |
|
154
|
|
|
if isinstance(flags, Collection): |
|
155
|
|
|
x = 0 |
|
156
|
|
|
for f in flags: |
|
157
|
|
|
if isinstance(f, str): |
|
158
|
|
|
x |= getattr(ns, f.upper()) |
|
159
|
|
|
elif isinstance(f, int): |
|
160
|
|
|
x |= f |
|
161
|
|
|
else: |
|
162
|
|
|
raise TypeError(f"Unknown type {type(flags)} for {flags}") |
|
163
|
|
|
return self._ns_info_from_int_flag(x) |
|
164
|
|
|
raise TypeError(f"Unknown type {type(flags)} for {flags}") |
|
165
|
|
|
|
|
166
|
|
|
def _ns_info_from_int_flag(self: Self, val: int) -> NatsortFlagsAndValue: |
|
167
|
|
|
good = self.core_natsort_flags() |
|
168
|
|
|
st = {k for k, v in good.items() if v & val != 0} |
|
169
|
|
|
return NatsortFlagsAndValue(frozenset(st), val) |
|
170
|
|
|
|
|
171
|
|
|
|
|
172
|
|
|
SortTools = SortUtils() |
|
173
|
|
|
|