1
|
|
|
# src/ittools/core.py |
2
|
|
|
"""A colletion of iterable utilites. |
3
|
|
|
|
4
|
|
|
Used to grow on the fly. Aims to provide general purpose abstract |
5
|
|
|
functionalities. |
6
|
|
|
|
7
|
|
|
.. autosummary:: |
8
|
|
|
:nosignatures: |
9
|
|
|
|
10
|
|
|
depth |
11
|
|
|
nestify |
12
|
|
|
itrify |
13
|
|
|
is_empty |
14
|
|
|
Stringcrementor |
15
|
|
|
enum_to_2dix |
16
|
|
|
Index2D |
17
|
|
|
zip_split |
18
|
|
|
group |
19
|
|
|
""" |
20
|
|
|
|
21
|
|
|
import collections |
22
|
|
|
import logging |
23
|
|
|
import math |
24
|
|
|
from itertools import zip_longest |
25
|
|
|
|
26
|
|
|
from pandas import Series |
27
|
|
|
|
28
|
|
|
logger = logging.getLogger(__name__) |
29
|
|
|
|
30
|
|
|
|
31
|
|
|
def depth(arg, exclude=None): # noqa: C901 |
32
|
|
|
r"""Powerfull function to determine depth of iterable. |
33
|
|
|
|
34
|
|
|
Credit goes to: |
35
|
|
|
https://stackoverflow.com/a/35698158 |
36
|
|
|
|
37
|
|
|
Parameters |
38
|
|
|
---------- |
39
|
|
|
arg : ~collections.abc.Iterable |
40
|
|
|
Iterable of which nested depth is to be determined |
41
|
|
|
|
42
|
|
|
exclude : ~collections.abc.Iterable, default=None |
43
|
|
|
Iterable of iterable types that should be ignored. |
44
|
|
|
If None, str are excluded. |
45
|
|
|
|
46
|
|
|
Returns |
47
|
|
|
------- |
48
|
|
|
int |
49
|
|
|
Depth of :paramref:`depth.arg` |
50
|
|
|
|
51
|
|
|
Example |
52
|
|
|
------- |
53
|
|
|
>>> depth([[2, 2], [2, [3, 3]], 1]) |
54
|
|
|
3 |
55
|
|
|
|
56
|
|
|
Using exclude to ignore tuples: |
57
|
|
|
|
58
|
|
|
>>> depth([[2, 2], [2, (3, 3)], 1], exclude=(tuple,)) |
59
|
|
|
2 |
60
|
|
|
""" |
61
|
|
|
# complexity > 12; tolerable here |
62
|
|
|
if exclude is None: |
63
|
|
|
exclude = (str,) |
64
|
|
|
|
65
|
|
|
if isinstance(arg, tuple(exclude)): |
66
|
|
|
return 0 |
67
|
|
|
|
68
|
|
|
try: |
69
|
|
|
if next(iter(arg)) is arg: # avoid infinite loops |
70
|
|
|
return 1 |
71
|
|
|
except TypeError: |
72
|
|
|
return 0 |
73
|
|
|
|
74
|
|
|
try: # pragma: no cover |
75
|
|
|
# coverage is excluded here, cause this part definetly gets |
76
|
|
|
# executed and tested. I might be overlooking something, |
77
|
|
|
# or pytest has coverage issues with rersive functions |
78
|
|
|
depths_in = map(lambda x: depth(x, exclude), arg.values()) |
79
|
|
|
except AttributeError: |
80
|
|
|
try: |
81
|
|
|
depths_in = map(lambda x: depth(x, exclude), arg) |
82
|
|
|
|
83
|
|
|
except TypeError: # pragma: no cover |
84
|
|
|
# print("Could not provoke this Error!") |
85
|
|
|
return 0 |
86
|
|
|
|
87
|
|
|
try: |
88
|
|
|
depth_in = max(depths_in) |
89
|
|
|
except ValueError: |
90
|
|
|
depth_in = 0 |
91
|
|
|
|
92
|
|
|
return 1 + depth_in |
93
|
|
|
|
94
|
|
|
|
95
|
|
|
def nestify(obj, target_depth, container=list): |
96
|
|
|
"""Return a nested container of obj of target depth. |
97
|
|
|
|
98
|
|
|
Parameters |
99
|
|
|
---------- |
100
|
|
|
obj |
101
|
|
|
obj which is to be put in a container |
102
|
|
|
|
103
|
|
|
target_depth : ~numbers.Number |
104
|
|
|
Keep nesting object until nesting depth >= target_depth |
105
|
|
|
|
106
|
|
|
container : ~typing.Container |
107
|
|
|
Container (list, tuple, ...) the :paramref:`nestify.obj` is nested |
108
|
|
|
with. |
109
|
|
|
|
110
|
|
|
Returns |
111
|
|
|
------- |
112
|
|
|
~typing.Container |
113
|
|
|
Nested container object with the depth of |
114
|
|
|
:paramref:`nestify.target_depth` |
115
|
|
|
|
116
|
|
|
Examples |
117
|
|
|
-------- |
118
|
|
|
Standard use case: |
119
|
|
|
|
120
|
|
|
>>> nestify([1, 2, 3], 3) |
121
|
|
|
[[[1, 2, 3]]] |
122
|
|
|
|
123
|
|
|
|
124
|
|
|
Specifying the container to nest with: |
125
|
|
|
|
126
|
|
|
>>> nestify([1, 2, 3], 3, tuple) |
127
|
|
|
(([1, 2, 3],),) |
128
|
|
|
|
129
|
|
|
|
130
|
|
|
Not all containers work with all objects especially when working with sets, |
131
|
|
|
since sets itself are not hashable they can not be nested. |
132
|
|
|
|
133
|
|
|
>>> nestify([1, 2, 3], 3, set) |
134
|
|
|
Traceback (most recent call last): |
135
|
|
|
File "/usr/lib/python3.6/doctest.py", line 1330, in __run |
136
|
|
|
compileflags, 1), test.globs) |
137
|
|
|
File "<doctest ittools.nestify[2]>", line 1, in <module> |
138
|
|
|
nestify([1, 2, 3], 3, set) |
139
|
|
|
File "/home/tze/Code/ittools/ittools.py", line 118, in nestify |
140
|
|
|
obj = container([obj]) |
141
|
|
|
TypeError: unhashable type: 'list' |
142
|
|
|
|
143
|
|
|
|
144
|
|
|
Frozensets however can be nested: |
145
|
|
|
|
146
|
|
|
>>> nestify(frozenset([1, 2, 2]), 3, frozenset) |
147
|
|
|
frozenset({frozenset({frozenset({1, 2})})}) |
148
|
|
|
|
149
|
|
|
""" |
150
|
|
|
while ( |
151
|
|
|
depth( |
152
|
|
|
obj, |
153
|
|
|
exclude=( |
154
|
|
|
str, |
155
|
|
|
Series, |
156
|
|
|
), |
157
|
|
|
) |
158
|
|
|
< target_depth |
159
|
|
|
): |
160
|
|
|
obj = container([obj]) |
161
|
|
|
return obj |
162
|
|
|
|
163
|
|
|
|
164
|
|
|
def itrify(obj, container=list): |
165
|
|
|
"""Turn object into an iterable container if not already. |
166
|
|
|
|
167
|
|
|
Strings will be itrified without splitting! |
168
|
|
|
|
169
|
|
|
Only objects that are of type ``str`` or **not** of |
170
|
|
|
:class:`collections.abc.Sequence` will be itrified. |
171
|
|
|
|
172
|
|
|
Parameters |
173
|
|
|
---------- |
174
|
|
|
obj |
175
|
|
|
Anything not a :class:`~collections.abc.Sequence` (except for str) will |
176
|
|
|
be containered as iterable. |
177
|
|
|
|
178
|
|
|
container : ~typing.Container, default=list |
179
|
|
|
Interable container designed to house :paramref:`itrify.obj`. |
180
|
|
|
|
181
|
|
|
|
182
|
|
|
Returns |
183
|
|
|
------- |
184
|
|
|
~collections.abc.Container |
185
|
|
|
containered :paramref:`~itrify.obj`. ( i.e. ``list(obj)``) |
186
|
|
|
|
187
|
|
|
Examples |
188
|
|
|
-------- |
189
|
|
|
Pretty much the same as ``list(('String',))``: |
190
|
|
|
|
191
|
|
|
>>> itrify('String') |
192
|
|
|
['String'] |
193
|
|
|
|
194
|
|
|
|
195
|
|
|
A list is already iterable so this is futile: |
196
|
|
|
|
197
|
|
|
>>> itrify([1, 2, 3], tuple) |
198
|
|
|
[1, 2, 3] |
199
|
|
|
|
200
|
|
|
|
201
|
|
|
Strings although iterable will be itrified as whole: |
202
|
|
|
|
203
|
|
|
>>> itrify('String', tuple) |
204
|
|
|
('String',) |
205
|
|
|
|
206
|
|
|
|
207
|
|
|
The :paramref:`itrify.container` of course, can be any callable |
208
|
|
|
container type: |
209
|
|
|
|
210
|
|
|
>>> itrify('String', set) |
211
|
|
|
{'String'} |
212
|
|
|
|
213
|
|
|
|
214
|
|
|
Pandas is awesome they support out of the box data type transformation: |
215
|
|
|
|
216
|
|
|
>>> import pandas as pd |
217
|
|
|
>>> itrify(pd.Series([1,2,3]), set) |
218
|
|
|
{1, 2, 3} |
219
|
|
|
|
220
|
|
|
""" |
221
|
|
|
if isinstance(obj, str): |
222
|
|
|
return container([obj]) |
223
|
|
|
if not isinstance(obj, collections.abc.Sequence): |
224
|
|
|
return container(obj) |
225
|
|
|
return obj |
226
|
|
|
|
227
|
|
|
|
228
|
|
|
def is_empty(lst): |
229
|
|
|
"""Check if list ist empty. |
230
|
|
|
|
231
|
|
|
``True`` if :paramref:`~is_empty.lst` is an empty :class:`~typing.List`. ``False`` |
232
|
|
|
otherwise. Works based on ``bool([]) == True``. |
233
|
|
|
|
234
|
|
|
Parameters |
235
|
|
|
---------- |
236
|
|
|
lst: list |
237
|
|
|
List to be checked for emptiness. |
238
|
|
|
|
239
|
|
|
Returns |
240
|
|
|
------- |
241
|
|
|
bool |
242
|
|
|
``True`` if :paramref:`~is_empty.lst` is an empty :class:`~typing.List`. ``False`` |
243
|
|
|
otherwise. |
244
|
|
|
|
245
|
|
|
Examples |
246
|
|
|
-------- |
247
|
|
|
>>> is_empty([]) |
248
|
|
|
True |
249
|
|
|
|
250
|
|
|
>>> is_empty([[], [1,2,3]]) |
251
|
|
|
False |
252
|
|
|
|
253
|
|
|
>>> is_empty([[[[]]]]) |
254
|
|
|
True |
255
|
|
|
|
256
|
|
|
Tuple is not a list (u dont say): |
257
|
|
|
|
258
|
|
|
>>> is_empty(([], [])) |
259
|
|
|
False |
260
|
|
|
""" |
261
|
|
|
# is lst a list ? |
262
|
|
|
if isinstance(lst, list): |
263
|
|
|
# ... yes! check if all elements are empty |
264
|
|
|
return all(map(is_empty, lst)) |
265
|
|
|
# ... no! Not a list |
266
|
|
|
return False |
267
|
|
|
|
268
|
|
|
|
269
|
|
|
class Stringcrementor: |
270
|
|
|
""" |
271
|
|
|
Iterator of String + incremented Integer = Stringcrementor. |
272
|
|
|
|
273
|
|
|
Returns string + integer of which the integer is incremented |
274
|
|
|
by one each time next() is called on the Stringcrementor object. |
275
|
|
|
|
276
|
|
|
Parameters |
277
|
|
|
---------- |
278
|
|
|
string: str |
279
|
|
|
String/tag/label of what you want to be incremented i.e "Category". |
280
|
|
|
Default: ``Stringcrementor`` |
281
|
|
|
start: ~numbers.Number |
282
|
|
|
Starting number which is to be incremented. Default: 0 |
283
|
|
|
|
284
|
|
|
Returns |
285
|
|
|
------- |
286
|
|
|
str |
287
|
|
|
string + integer of which the integer is incremented. |
288
|
|
|
|
289
|
|
|
Example |
290
|
|
|
------- |
291
|
|
|
>>> strementor = Stringcrementor('The Answer is: ') |
292
|
|
|
|
293
|
|
|
>>> for i in range(42): |
294
|
|
|
... pass # just kidding |
295
|
|
|
|
296
|
|
|
>>> for i in range(3): |
297
|
|
|
... print(next(strementor)) |
298
|
|
|
The Answer is: 0 |
299
|
|
|
The Answer is: 1 |
300
|
|
|
The Answer is: 2 |
301
|
|
|
""" |
302
|
|
|
|
303
|
|
|
def __init__(self, string="Stringcrementor ", start=0): |
304
|
|
|
self.value = start |
305
|
|
|
self.string = string |
306
|
|
|
|
307
|
|
|
def __iter__(self): |
308
|
|
|
"""Retun instance of self, when iterated.""" |
309
|
|
|
return self |
310
|
|
|
|
311
|
|
|
def __next__(self): |
312
|
|
|
"""Increase the integer value by one when nexted on.""" |
313
|
|
|
next_value = self.value |
314
|
|
|
self.value += 1 |
315
|
|
|
return self.string + str(next_value) |
316
|
|
|
|
317
|
|
|
|
318
|
|
|
def enum_to_2dix(number, shape): |
319
|
|
|
"""Map a 1d range to a 2d index. |
320
|
|
|
|
321
|
|
|
Parameters |
322
|
|
|
---------- |
323
|
|
|
number : int |
324
|
|
|
Number to be mapped to a 2D index. Usually used with in some form of |
325
|
|
|
iteration. |
326
|
|
|
|
327
|
|
|
shape : tuple |
328
|
|
|
2 dimensional tuple defining an arrays 2d shape as in ``(rows, columns)``. |
329
|
|
|
|
330
|
|
|
Returns |
331
|
|
|
------- |
332
|
|
|
tuple |
333
|
|
|
the 1d enumerate numberition mapped to a (row, column) 2d tuple |
334
|
|
|
|
335
|
|
|
Note |
336
|
|
|
---- |
337
|
|
|
Only the number of columns is actually used. Since this is designed to |
338
|
|
|
be used with 2D-Matrices however, it is left as 2D-shape for convenience. |
339
|
|
|
|
340
|
|
|
This implies however, that you can actually use infinite |
341
|
|
|
:paramref:`~enum_to_2dix.number` arguments altough your |
342
|
|
|
:paramref:`~enum_to_2dix.shape` might imply only 3 rows. |
343
|
|
|
|
344
|
|
|
Examples |
345
|
|
|
-------- |
346
|
|
|
Mapping ``range(6)`` to a 3,2 dimenstion array: |
347
|
|
|
|
348
|
|
|
>>> for i in range(6): |
349
|
|
|
... print(i, '->', enum_to_2dix(i, (3,2))) |
350
|
|
|
0 -> (0, 0) |
351
|
|
|
1 -> (0, 1) |
352
|
|
|
2 -> (1, 0) |
353
|
|
|
3 -> (1, 1) |
354
|
|
|
4 -> (2, 0) |
355
|
|
|
5 -> (2, 1) |
356
|
|
|
|
357
|
|
|
Mapping ``range(12)`` to a 3,4 dimenstion array: |
358
|
|
|
|
359
|
|
|
>>> for i in range(12): |
360
|
|
|
... print(i, '->', enum_to_2dix(i, (3,4))) |
361
|
|
|
0 -> (0, 0) |
362
|
|
|
1 -> (0, 1) |
363
|
|
|
2 -> (0, 2) |
364
|
|
|
3 -> (0, 3) |
365
|
|
|
4 -> (1, 0) |
366
|
|
|
5 -> (1, 1) |
367
|
|
|
6 -> (1, 2) |
368
|
|
|
7 -> (1, 3) |
369
|
|
|
8 -> (2, 0) |
370
|
|
|
9 -> (2, 1) |
371
|
|
|
10 -> (2, 2) |
372
|
|
|
11 -> (2, 3) |
373
|
|
|
""" |
374
|
|
|
column = shape[1] |
375
|
|
|
return (math.floor(number / column), number % column) |
376
|
|
|
|
377
|
|
|
|
378
|
|
|
class Index2D: |
379
|
|
|
"""Construct a callable object that maps a number to a 2d index. |
380
|
|
|
|
381
|
|
|
Parameters |
382
|
|
|
---------- |
383
|
|
|
shape : 2-tuple |
384
|
|
|
tuple defining an array's 2d shape as in ``(rows, columns)`` |
385
|
|
|
|
386
|
|
|
Returns |
387
|
|
|
------- |
388
|
|
|
tuple |
389
|
|
|
the 1d enumerate position mapped to a (row, column) 2d tuple |
390
|
|
|
|
391
|
|
|
Examples |
392
|
|
|
-------- |
393
|
|
|
>>> idx2d = Index2D((3, 2)) |
394
|
|
|
>>> for i in range(6): |
395
|
|
|
... print(i, '->', idx2d(i)) |
396
|
|
|
... |
397
|
|
|
0 -> (0, 0) |
398
|
|
|
1 -> (0, 1) |
399
|
|
|
2 -> (1, 0) |
400
|
|
|
3 -> (1, 1) |
401
|
|
|
4 -> (2, 0) |
402
|
|
|
5 -> (2, 1) |
403
|
|
|
""" |
404
|
|
|
|
405
|
|
|
@property |
406
|
|
|
def shape(self): |
407
|
|
|
"""Tuple representing the shape of the Index2D object.""" |
408
|
|
|
return self._shape |
409
|
|
|
|
410
|
|
|
def __init__(self, shape): |
411
|
|
|
self._shape = shape |
412
|
|
|
|
413
|
|
|
def __call__(self, number): |
414
|
|
|
"""Make the :class:`~ittools.Index2D` objects callable. |
415
|
|
|
|
416
|
|
|
Parameters |
417
|
|
|
---------- |
418
|
|
|
number : ~numbers.Number |
419
|
|
|
The 1 d index/number to be mapped to a 2d index. |
420
|
|
|
|
421
|
|
|
Returns |
422
|
|
|
------- |
423
|
|
|
tuple |
424
|
|
|
the 1d enumerate position mapped to a (row, column) 2d tuple |
425
|
|
|
|
426
|
|
|
""" |
427
|
|
|
return enum_to_2dix(number, self.shape) |
428
|
|
|
|
429
|
|
|
|
430
|
|
|
def zip_split(sequence, chunks): |
431
|
|
|
r"""Split sequence into chunks returning a zipped-like order of elements. |
432
|
|
|
|
433
|
|
|
The last :math:`n` chunks will be one item short of the rest, if the |
434
|
|
|
number of items in :paramref:`~zip_split.sequence` is not an integer |
435
|
|
|
multiple of :paramref:`~zip_split.chunks`. With :math:`n` beeing: |
436
|
|
|
|
437
|
|
|
:math:`n = \text{len}\left(\text{sequence}\right)- |
438
|
|
|
\left[\text{len}\left(\text{sequence}\right) \% \text{chunks}\right]`. |
439
|
|
|
|
440
|
|
|
Note |
441
|
|
|
---- |
442
|
|
|
Credit to https://www.garyrobinson.net/2008/04/splitting-a-pyt.html |
443
|
|
|
(Garry Robinson) |
444
|
|
|
|
445
|
|
|
Parameters |
446
|
|
|
---------- |
447
|
|
|
sequence: ~collections.abc.Sequence |
448
|
|
|
The sequence to split into chunks. |
449
|
|
|
chunks: int |
450
|
|
|
The number of splitted sequences created. |
451
|
|
|
|
452
|
|
|
Yields |
453
|
|
|
------ |
454
|
|
|
:class:`~collections.abc.Generator` |
455
|
|
|
A generator object yielding the chunks of items in zip like order. |
456
|
|
|
|
457
|
|
|
Examples |
458
|
|
|
-------- |
459
|
|
|
Simple demonstration: |
460
|
|
|
|
461
|
|
|
>>> import ittools |
462
|
|
|
>>> hi10 = 10 * ['hi'] |
463
|
|
|
>>> print(list(ittools.zip_split(hi10, 3))) |
464
|
|
|
[['hi', 'hi', 'hi', 'hi'], ['hi', 'hi', 'hi'], ['hi', 'hi', 'hi']] |
465
|
|
|
|
466
|
|
|
Use case for turning a (supposedly) long iterable into a |
467
|
|
|
:class:`pandas.DataFrame` of 3 rows: |
468
|
|
|
|
469
|
|
|
>>> import ittools |
470
|
|
|
>>> import pandas as pd |
471
|
|
|
>>> print(pd.DataFrame(list(ittools.zip_split(hi10, 3))).to_string( |
472
|
|
|
... index=False, header=False)) |
473
|
|
|
hi hi hi hi |
474
|
|
|
hi hi hi None |
475
|
|
|
hi hi hi None |
476
|
|
|
""" |
477
|
|
|
for i in range(chunks): |
478
|
|
|
yield sequence[i::chunks] |
479
|
|
|
|
480
|
|
|
|
481
|
|
|
def group(iterable, chunks, fillvalue=None): |
482
|
|
|
"""Split iterable into chunks. |
483
|
|
|
|
484
|
|
|
If the number of items in :paramref:`~group.iterable` is not an integer |
485
|
|
|
multiple of :paramref:`~group.chunks`, the |
486
|
|
|
last chunk is filled using :paramref:`~group.fillvalue`. |
487
|
|
|
|
488
|
|
|
Parameters |
489
|
|
|
---------- |
490
|
|
|
iterable: ~collections.abc.Iterable |
491
|
|
|
The iterable to split into groups. |
492
|
|
|
|
493
|
|
|
chunks: int |
494
|
|
|
The number of groups created |
495
|
|
|
|
496
|
|
|
fillvalue: ~numbers.Number, None, default = None |
497
|
|
|
The last chunk is filled with this in case the number of items in |
498
|
|
|
:paramref:`~group.iterable` is not an integer multiple of |
499
|
|
|
:paramref:`~group.chunks` |
500
|
|
|
|
501
|
|
|
Returns |
502
|
|
|
------- |
503
|
|
|
:class:`~collections.abc.Generator` |
504
|
|
|
A generator object yielding the groups. |
505
|
|
|
|
506
|
|
|
Note |
507
|
|
|
---- |
508
|
|
|
Credit to https://stackoverflow.com/a/434411 |
509
|
|
|
(Boris) |
510
|
|
|
|
511
|
|
|
Examples |
512
|
|
|
-------- |
513
|
|
|
Simple example: |
514
|
|
|
|
515
|
|
|
>>> import ittools |
516
|
|
|
>>> print(list(ittools.group(range(10), chunks=3))) |
517
|
|
|
[(0, 1, 2, 3), (4, 5, 6, 7), (8, 9, None, None)] |
518
|
|
|
|
519
|
|
|
Use case for turning a(supposedly) long iterable into a |
520
|
|
|
:class:`pandas.DataFrame` of 3 columns: |
521
|
|
|
|
522
|
|
|
>>> import ittools |
523
|
|
|
>>> import pandas as pd |
524
|
|
|
>>> print(pd.DataFrame(list(zip(*ittools.group(range(10), 3)))).to_string( |
525
|
|
|
... index=False, header=False)) |
526
|
|
|
0 4 8.0 |
527
|
|
|
1 5 9.0 |
528
|
|
|
2 6 NaN |
529
|
|
|
3 7 NaN |
530
|
|
|
""" |
531
|
|
|
length = math.ceil(len(iterable) / chunks) |
532
|
|
|
args = [iter(iterable)] * length |
533
|
|
|
return zip_longest(*args, fillvalue=fillvalue) |
534
|
|
|
|