1
|
|
|
from __future__ import annotations |
|
|
|
|
2
|
|
|
|
3
|
|
|
import pickle |
4
|
|
|
import sys |
5
|
|
|
from copy import copy |
6
|
|
|
from datetime import date, datetime |
7
|
|
|
from pathlib import Path, PurePath |
8
|
|
|
from typing import Any, ByteString, Callable, Mapping, Optional, Sequence, Iterable, Collection |
|
|
|
|
9
|
|
|
from typing import Tuple as Tup |
10
|
|
|
from typing import Type, TypeVar, Union |
11
|
|
|
|
12
|
|
|
import orjson |
|
|
|
|
13
|
|
|
import toml |
|
|
|
|
14
|
|
|
|
15
|
|
|
from pocketutils.core import PathLike |
|
|
|
|
16
|
|
|
from pocketutils.core._internal import read_txt_or_gz, write_txt_or_gz |
|
|
|
|
17
|
|
|
from pocketutils.core.exceptions import XKeyError, XTypeError, XValueError |
|
|
|
|
18
|
|
|
|
19
|
|
|
PICKLE_PROTOCOL = 5 |
20
|
|
|
T = TypeVar("T") |
|
|
|
|
21
|
|
|
|
22
|
|
|
|
23
|
|
|
def _json_encode_default(obj: Any) -> Any: |
|
|
|
|
24
|
|
|
if isinstance(obj, NestedDotDict): |
25
|
|
|
# noinspection PyProtectedMember |
26
|
|
|
return dict(obj._x) |
|
|
|
|
27
|
|
|
|
28
|
|
|
|
29
|
|
|
class NestedDotDict(Mapping): |
|
|
|
|
30
|
|
|
""" |
31
|
|
|
A thin wrapper around a nested dict to make getting values easier. |
32
|
|
|
This was designed as a wrapper for TOML, but it works more generally too. |
33
|
|
|
|
34
|
|
|
Keys must be strings that do not contain a dot (.). |
35
|
|
|
A dot is reserved for splitting values to traverse the tree. |
36
|
|
|
For example, ``dotdict["pet.species.name"]``. |
37
|
|
|
""" |
38
|
|
|
|
39
|
|
|
@classmethod |
40
|
|
|
def read_toml(cls, path: PathLike) -> NestedDotDict: |
|
|
|
|
41
|
|
|
return NestedDotDict(toml.loads(read_txt_or_gz(path))) |
42
|
|
|
|
43
|
|
|
@classmethod |
44
|
|
|
def read_json(cls, path: PathLike) -> NestedDotDict: |
45
|
|
|
""" |
46
|
|
|
Reads JSON from a file, into a NestedDotDict. |
47
|
|
|
If the JSON data is a list type, converts into a dict with keys ``"1", "2", ...`` . |
48
|
|
|
Can read .json or .json.gz. |
49
|
|
|
""" |
50
|
|
|
data = orjson.loads(read_txt_or_gz(path)) |
51
|
|
|
if isinstance(data, list): |
52
|
|
|
data = dict(enumerate(data)) |
53
|
|
|
return cls(data) |
54
|
|
|
|
55
|
|
|
@classmethod |
56
|
|
|
def read_pickle(cls, path: Union[PurePath, str]) -> NestedDotDict: |
57
|
|
|
""" |
58
|
|
|
|
59
|
|
|
Note that this function has potential security concerns. |
60
|
|
|
This is because it relies on the pickle module. |
61
|
|
|
""" |
62
|
|
|
data = Path(path).read_bytes() |
63
|
|
|
data = pickle.loads(data) # nosec |
64
|
|
|
return cls(data) |
65
|
|
|
|
66
|
|
|
@classmethod |
67
|
|
|
def parse_toml(cls, data: str) -> NestedDotDict: |
|
|
|
|
68
|
|
|
return cls(toml.loads(data)) |
69
|
|
|
|
70
|
|
|
@classmethod |
71
|
|
|
def parse_json(cls, data: str) -> NestedDotDict: |
72
|
|
|
""" |
73
|
|
|
Parses JSON from a string, into a NestedDotDict. |
74
|
|
|
If the JSON data is a list type, converts into a dict with the key ``data``. |
75
|
|
|
""" |
76
|
|
|
data = orjson.loads(data.encode(encoding="utf8")) |
77
|
|
|
if isinstance(data, list): |
78
|
|
|
data = dict(enumerate(data)) |
79
|
|
|
return cls(data) |
80
|
|
|
|
81
|
|
|
@classmethod |
82
|
|
|
def parse_pickle(cls, data: ByteString) -> NestedDotDict: |
|
|
|
|
83
|
|
|
if not isinstance(data, bytes): |
84
|
|
|
data = bytes(data) |
85
|
|
|
return NestedDotDict(pickle.loads(data)) |
86
|
|
|
|
87
|
|
|
def __init__(self, x: Mapping[str, Any]) -> None: |
|
|
|
|
88
|
|
|
""" |
89
|
|
|
Constructor. |
90
|
|
|
|
91
|
|
|
Raises: |
92
|
|
|
XValueError: If a key (in this dict or a sub-dict) is not a str or contains a dot |
93
|
|
|
""" |
94
|
|
|
if not (hasattr(x, "items") and hasattr(x, "keys") and hasattr(x, "values")): |
95
|
|
|
raise XTypeError( |
96
|
|
|
f"Type {type(x)} for value {x} appears not to be dict-like", actual=str(type(x)) |
97
|
|
|
) |
98
|
|
|
bad = [k for k in x if not isinstance(k, str)] |
99
|
|
|
if len(bad) > 0: |
100
|
|
|
raise XValueError(f"Keys were not strings for these values: {bad}", value=bad) |
101
|
|
|
bad = [k for k in x if "." in k] |
102
|
|
|
if len(bad) > 0: |
103
|
|
|
raise XValueError(f"Keys contained dots (.) for these values: {bad}", value=bad) |
104
|
|
|
self._x = x |
105
|
|
|
# Let's make sure this constructor gets called on sub-dicts: |
106
|
|
|
self.leaves() |
107
|
|
|
|
108
|
|
|
def write_json(self, path: PathLike, *, indent: bool = False, mkdirs: bool = False) -> str: |
109
|
|
|
""" |
110
|
|
|
Writes to a json or .json.gz file. |
111
|
|
|
|
112
|
|
|
Returns: |
113
|
|
|
The JSON text |
114
|
|
|
""" |
115
|
|
|
return write_txt_or_gz(self.to_json(indent=indent), path, mkdirs=mkdirs) |
116
|
|
|
|
117
|
|
|
def write_toml(self, path: PathLike, mkdirs: bool = False) -> str: |
118
|
|
|
""" |
119
|
|
|
Writes to a toml or .toml.gz file. |
120
|
|
|
|
121
|
|
|
Returns: |
122
|
|
|
The JSON text |
123
|
|
|
""" |
124
|
|
|
return write_txt_or_gz(self.to_toml(), path, mkdirs=mkdirs) |
125
|
|
|
|
126
|
|
|
def write_pickle(self, path: PathLike) -> None: |
127
|
|
|
""" |
128
|
|
|
Writes to a pickle file. |
129
|
|
|
""" |
130
|
|
|
Path(path).write_bytes(pickle.dumps(self._x, protocol=PICKLE_PROTOCOL)) |
131
|
|
|
|
132
|
|
|
def to_json(self, *, indent: bool = False) -> str: |
133
|
|
|
""" |
134
|
|
|
Returns JSON text. |
135
|
|
|
""" |
136
|
|
|
kwargs = dict(option=orjson.OPT_INDENT_2) if indent else {} |
137
|
|
|
encoded = orjson.dumps(self._x, default=_json_encode_default, **kwargs) |
138
|
|
|
return encoded.decode(encoding="utf8") |
139
|
|
|
|
140
|
|
|
def to_toml(self) -> str: |
141
|
|
|
""" |
142
|
|
|
Returns TOML text. |
143
|
|
|
""" |
144
|
|
|
return toml.dumps(self._x) |
145
|
|
|
|
146
|
|
|
def n_elements_total(self) -> int: |
|
|
|
|
147
|
|
|
return len(self._all_elements()) |
148
|
|
|
|
149
|
|
|
def n_bytes_total(self) -> int: |
|
|
|
|
150
|
|
|
return sum([sys.getsizeof(x) for x in self._all_elements()]) |
151
|
|
|
|
152
|
|
|
def _all_elements(self) -> Sequence[Any]: |
153
|
|
|
i = [] |
154
|
|
|
for key, value in self._x.items(): |
|
|
|
|
155
|
|
|
if value is not None and isinstance(value, Mapping): |
|
|
|
|
156
|
|
|
i += NestedDotDict(value)._all_elements() |
|
|
|
|
157
|
|
|
elif ( |
158
|
|
|
value is not None |
|
|
|
|
159
|
|
|
and isinstance(value, Collection) |
|
|
|
|
160
|
|
|
and not isinstance(value, str) |
|
|
|
|
161
|
|
|
and not isinstance(value, ByteString) |
|
|
|
|
162
|
|
|
): |
163
|
|
|
i += list(value) |
164
|
|
|
else: |
165
|
|
|
i.append(value) |
166
|
|
|
return i |
167
|
|
|
|
168
|
|
|
def leaves(self) -> Mapping[str, Any]: |
169
|
|
|
""" |
170
|
|
|
Gets the leaves in this tree. |
171
|
|
|
|
172
|
|
|
Returns: |
173
|
|
|
A dict mapping dot-joined keys to their values |
174
|
|
|
""" |
175
|
|
|
mp = {} |
|
|
|
|
176
|
|
|
for key, value in self._x.items(): |
177
|
|
|
if value is not None and isinstance(value, Mapping): |
|
|
|
|
178
|
|
|
mp.update({key + "." + k: v for k, v in NestedDotDict(value).leaves().items()}) |
179
|
|
|
else: |
180
|
|
|
mp[key] = value |
181
|
|
|
return mp |
182
|
|
|
|
183
|
|
|
def sub(self, items: str) -> NestedDotDict: |
184
|
|
|
""" |
185
|
|
|
Returns the dictionary under (dotted) keys ``items``. |
186
|
|
|
|
187
|
|
|
See Also: |
188
|
|
|
:meth:`sub_opt` |
189
|
|
|
""" |
190
|
|
|
return NestedDotDict(self[items]) |
191
|
|
|
|
192
|
|
|
def sub_opt(self, items: str) -> NestedDotDict: |
193
|
|
|
""" |
194
|
|
|
Returns the dictionary under (dotted) keys ``items``, or empty if a key is not found. |
195
|
|
|
|
196
|
|
|
See Also: |
197
|
|
|
:meth:`sub` |
198
|
|
|
""" |
199
|
|
|
try: |
200
|
|
|
return NestedDotDict(self[items]) |
201
|
|
|
except XKeyError: |
202
|
|
|
return NestedDotDict({}) |
203
|
|
|
|
204
|
|
|
def exactly(self, items: str, astype: Type[T]) -> T: |
205
|
|
|
""" |
206
|
|
|
Gets the key ``items`` from the dict if it has type ``astype``. |
207
|
|
|
|
208
|
|
|
Args: |
209
|
|
|
items: The key hierarchy, with a dot (.) as a separator |
210
|
|
|
astype: The type, which will be checked using ``isinstance`` |
211
|
|
|
|
212
|
|
|
Returns: |
213
|
|
|
The value in the required type |
214
|
|
|
|
215
|
|
|
Raises: |
216
|
|
|
XTypeError: If not ``isinstance(value, astype)`` |
217
|
|
|
""" |
218
|
|
|
z = self[items] |
|
|
|
|
219
|
|
|
if not isinstance(z, astype): |
220
|
|
|
raise XTypeError( |
221
|
|
|
f"Value {z} from {items} is a {type(z)}, not {astype}", |
222
|
|
|
actual=str(type(z)), |
223
|
|
|
expected=str(astype), |
224
|
|
|
) |
225
|
|
|
return z |
226
|
|
|
|
227
|
|
|
def get_as( |
228
|
|
|
self, items: str, astype: Callable[[Any], T], default: Optional[T] = None |
|
|
|
|
229
|
|
|
) -> Optional[T]: |
230
|
|
|
""" |
231
|
|
|
Gets the value of an *optional* key, or ``default`` if it doesn't exist. |
232
|
|
|
Calls ``astype(value)`` on the value before returning. |
233
|
|
|
|
234
|
|
|
See Also: |
235
|
|
|
:meth:`req_as` |
236
|
|
|
:meth:`exactly` |
237
|
|
|
|
238
|
|
|
Args: |
239
|
|
|
items: The key hierarchy, with a dot (.) as a separator. |
240
|
|
|
Ex: ``animal.species.name``. |
241
|
|
|
astype: Any function that converts the found value to type ``T``. |
242
|
|
|
Can be a ``Type``, such as ``int``. |
243
|
|
|
Despite the annotated type, this function only needs to accept the actual value of the key |
|
|
|
|
244
|
|
|
as input, not ``Any``. |
245
|
|
|
default: Return this value if the key is not found (at any level) |
246
|
|
|
|
247
|
|
|
Returns: |
248
|
|
|
The value of found key in this dot-dict, or ``default``. |
249
|
|
|
|
250
|
|
|
Raises: |
251
|
|
|
XValueError: Likely exception raised if calling ``astype`` fails |
252
|
|
|
""" |
253
|
|
|
x = self.get(items) |
|
|
|
|
254
|
|
|
if x is None: |
255
|
|
|
return default |
256
|
|
|
if astype is date: |
257
|
|
|
return self._to_date(x) |
258
|
|
|
if astype is datetime: |
259
|
|
|
return self._to_datetime(x) |
260
|
|
|
return astype(x) |
261
|
|
|
|
262
|
|
|
def req_as(self, items: str, astype: Optional[Callable[[Any], T]]) -> T: |
263
|
|
|
""" |
264
|
|
|
Gets the value of a *required* key. |
265
|
|
|
Calls ``astype(value)`` on the value before returning. |
266
|
|
|
|
267
|
|
|
See Also: |
268
|
|
|
:meth:`req_as` |
269
|
|
|
:meth:`exactly` |
270
|
|
|
|
271
|
|
|
Args: |
272
|
|
|
items: The key hierarchy, with a dot (.) as a separator. |
273
|
|
|
Ex: ``animal.species.name``. |
274
|
|
|
astype: Any function that converts the found value to type ``T``. |
275
|
|
|
Can be a ``Type``, such as ``int``. |
276
|
|
|
Despite the annotated type, this function only needs to accept the actual value of the key |
|
|
|
|
277
|
|
|
as input, not ``Any``. |
278
|
|
|
|
279
|
|
|
Returns: |
280
|
|
|
The value of found key in this dot-dict. |
281
|
|
|
|
282
|
|
|
Raises: |
283
|
|
|
XKeyError: If the key is not found (at any level). |
284
|
|
|
XValueError: Likely exception raised if calling ``astype`` fails |
285
|
|
|
""" |
286
|
|
|
x = self[items] |
|
|
|
|
287
|
|
|
return astype(x) |
288
|
|
|
|
289
|
|
|
def get_list_as( |
290
|
|
|
self, items: str, astype: Callable[[Any], T], default: Optional[Sequence[T]] = None |
|
|
|
|
291
|
|
|
) -> Optional[Sequence[T]]: |
292
|
|
|
""" |
293
|
|
|
Gets list values from an *optional* key. |
294
|
|
|
Note that ``astype`` here converts elements *within* the list, not the whole list. |
295
|
|
|
Also see ``req_list_as``. |
296
|
|
|
|
297
|
|
|
Args: |
298
|
|
|
items: The key hierarchy, with a dot (.) as a separator. Ex: ``animal.species.name``. |
299
|
|
|
astype: Any function that converts the found value to type ``T``. Ex: ``int``. |
300
|
|
|
default: Return this value if the key wasn't found |
301
|
|
|
|
302
|
|
|
Returns: |
303
|
|
|
``[astype(v) for v in self[items]]``, or ``default`` if ``items`` was not found. |
304
|
|
|
|
305
|
|
|
Raises: |
306
|
|
|
XValueError: Likely exception raised if calling ``astype`` fails |
307
|
|
|
XTypeError: If the found value is not a (non-``str``) ``Sequence`` |
308
|
|
|
""" |
309
|
|
|
x = self.get(items) |
|
|
|
|
310
|
|
|
if x is None: |
311
|
|
|
return default |
312
|
|
|
if not isinstance(x, Sequence) or isinstance(x, str): |
|
|
|
|
313
|
|
|
raise XTypeError(f"Value {x} is not a list for lookup {items}", actual=str(type(x))) |
314
|
|
|
return [astype(y) for y in x] |
315
|
|
|
|
316
|
|
|
def req_list_as(self, items: str, astype: Optional[Callable[[Any], T]]) -> Sequence[T]: |
317
|
|
|
""" |
318
|
|
|
Gets list values from a *required* key. |
319
|
|
|
Note that ``astype`` here converts elements *within* the list, not the whole list. |
320
|
|
|
Also see ``get_list_as``. |
321
|
|
|
|
322
|
|
|
Args: |
323
|
|
|
items: The key hierarchy, with a dot (.) as a separator. Ex: ``animal.species.name``. |
324
|
|
|
astype: Any function that converts the found value to type ``T``. Ex: ``int``. |
325
|
|
|
|
326
|
|
|
Returns: |
327
|
|
|
``[astype(v) for v in self[items]]`` |
328
|
|
|
|
329
|
|
|
Raises: |
330
|
|
|
XValueError: Likely exception raised if calling ``astype`` fails |
331
|
|
|
XTypeError: If the found value is not a (non-``str``) ``Sequence`` |
332
|
|
|
XKeyError: If the key was not found (at any level) |
333
|
|
|
""" |
334
|
|
|
x = self[items] |
|
|
|
|
335
|
|
|
if not isinstance(x, Sequence) or isinstance(x, str): |
|
|
|
|
336
|
|
|
raise XTypeError(f"Value {x} is not a list for lookup {items}", actual=str(type(x))) |
337
|
|
|
return [astype(y) for y in x] |
338
|
|
|
|
339
|
|
|
def get(self, items: str, default: Any = None) -> Any: |
340
|
|
|
""" |
341
|
|
|
Gets a value from an optional key. |
342
|
|
|
Also see ``__getitem__``. |
343
|
|
|
""" |
344
|
|
|
try: |
345
|
|
|
return self[items] |
346
|
|
|
except KeyError: |
347
|
|
|
return default |
348
|
|
|
|
349
|
|
|
def __getitem__(self, items: str) -> Any: |
350
|
|
|
""" |
351
|
|
|
Gets a value from a required key. |
352
|
|
|
Analogous to ``dict.__getitem__``, but this can operate on dot-joined strings. |
353
|
|
|
|
354
|
|
|
**NOTE:** The number of keys for which this returns a value can be different from ``len(self)``. |
|
|
|
|
355
|
|
|
|
356
|
|
|
Example: |
357
|
|
|
>>> d = NestedDotDict(dict(a=dict(b=1))) |
358
|
|
|
>>> assert d["a.b"] == 1 |
359
|
|
|
""" |
360
|
|
|
at = self._x |
|
|
|
|
361
|
|
|
for item in items.split("."): |
362
|
|
|
if item not in at: |
363
|
|
|
raise XKeyError(f"{items} not found: {item} does not exist") |
364
|
|
|
at = at[item] |
|
|
|
|
365
|
|
|
return NestedDotDict(at) if isinstance(at, dict) else copy(at) |
366
|
|
|
|
367
|
|
|
def items(self) -> Sequence[Tup[str, Any]]: |
|
|
|
|
368
|
|
|
return list(self._x.items()) |
369
|
|
|
|
370
|
|
|
def keys(self) -> Sequence[str]: |
|
|
|
|
371
|
|
|
return list(self._x.keys()) |
372
|
|
|
|
373
|
|
|
def values(self) -> Sequence[Any]: |
|
|
|
|
374
|
|
|
return list(self._x.values()) |
375
|
|
|
|
376
|
|
|
def pretty_str(self) -> str: |
377
|
|
|
""" |
378
|
|
|
Pretty-prints the leaves of this dict using ``json.dumps``. |
379
|
|
|
|
380
|
|
|
Returns: |
381
|
|
|
A multi-line string |
382
|
|
|
""" |
383
|
|
|
return orjson.dumps( |
384
|
|
|
self.leaves(), option=orjson.OPT_SORT_KEYS | orjson.OPT_INDENT_2 | orjson.OPT_UTC_Z |
385
|
|
|
).decode(encoding="utf8") |
386
|
|
|
|
387
|
|
|
def __len__(self) -> int: |
388
|
|
|
""" |
389
|
|
|
Returns the number of values in this dict. |
390
|
|
|
Does **NOT** include nested values. |
391
|
|
|
""" |
392
|
|
|
return len(self._x) |
393
|
|
|
|
394
|
|
|
def is_empty(self) -> bool: |
|
|
|
|
395
|
|
|
return len(self._x) == 0 |
396
|
|
|
|
397
|
|
|
def __iter__(self): |
398
|
|
|
""" |
399
|
|
|
Iterates over values in this dict. |
400
|
|
|
Does **NOT** include nested items. |
401
|
|
|
""" |
402
|
|
|
return iter(self._x) |
403
|
|
|
|
404
|
|
|
def __repr__(self): |
405
|
|
|
return repr(self._x) |
406
|
|
|
|
407
|
|
|
def __str__(self): |
408
|
|
|
return str(self._x) |
409
|
|
|
|
410
|
|
|
def __eq__(self, other): |
411
|
|
|
return str(self) == str(other) |
412
|
|
|
|
413
|
|
|
def _to_date(self, s) -> date: |
|
|
|
|
414
|
|
|
if isinstance(s, date): |
|
|
|
|
415
|
|
|
return s |
416
|
|
|
elif isinstance(s, str): |
417
|
|
|
# This is MUCH faster than tomlkit's |
418
|
|
|
return date.fromisoformat(s) |
419
|
|
|
else: |
420
|
|
|
raise XTypeError(f"Invalid type {type(s)} for {s}", actual=str(type(s))) |
421
|
|
|
|
422
|
|
|
def _to_datetime(self, s) -> datetime: |
|
|
|
|
423
|
|
|
if isinstance(s, datetime): |
|
|
|
|
424
|
|
|
return s |
425
|
|
|
elif isinstance(s, str): |
426
|
|
|
# This is MUCH faster than tomlkit's |
427
|
|
|
if s.count(":") < 2: |
428
|
|
|
raise XValueError( |
429
|
|
|
f"Datetime {s} does not contain hours, minutes, and seconds", value=s |
430
|
|
|
) |
431
|
|
|
return datetime.fromisoformat(s.upper().replace("Z", "+00:00")) |
432
|
|
|
else: |
433
|
|
|
raise XTypeError(f"Invalid type {type(s)} for {s}", actual=str(type(s))) |
434
|
|
|
|
435
|
|
|
|
436
|
|
|
__all__ = ["NestedDotDict"] |
437
|
|
|
|