scripts.config_generator.hdf_utils._add_branches()   B
last analyzed

Complexity

Conditions 7

Size

Total Lines 26
Code Lines 20

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
cc 7
eloc 20
nop 10
dl 0
loc 26
rs 8
c 0
b 0
f 0

How to fix   Many Parameters   

Many Parameters

Methods with many parameters are not only hard to understand, but their parameters also often become inconsistent when you need more, or different data.

There are several approaches to avoid long parameter lists:

1
# Copyright 2014 Diamond Light Source Ltd.
2
#
3
# Licensed under the Apache License, Version 2.0 (the "License");
4
# you may not use this file except in compliance with the License.
5
# You may obtain a copy of the License at
6
#
7
#     http://www.apache.org/licenses/LICENSE-2.0
8
#
9
# Unless required by applicable law or agreed to in writing, software
10
# distributed under the License is distributed on an "AS IS" BASIS,
11
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
# See the License for the specific language governing permissions and
13
# limitations under the License.
14
15
"""
16
.. module:: hdf_utils
17
   :platform: Unix
18
   :synopsis: Utilities for checking hdf/nxs files
19
20
.. moduleauthor:: Nghia Vo <[email protected]>
21
22
"""
23
24
import h5py
25
import numpy as np
26
from collections import deque
27
28
PIPE = "│"
29
ELBOW = "└──"
30
TEE = "├──"
31
PIPE_PREFIX = "│   "
32
SPACE_PREFIX = "    "
33
TOMO_DATA = "entry1/tomo_entry/data/data"
34
ROTATION_ANGLE = "entry1/tomo_entry/data/rotation_angle"
35
IMAGE_KEY = "entry1/tomo_entry/instrument/detector/image_key"
36
37
38
def get_hdf_information(file_path, display=False):
39
    """
40
    Get information of datasets in a hdf/nxs file.
41
42
    Parameters
43
    ----------
44
    file_path : str
45
        Path to the file.
46
    display : bool
47
        Print the results onto the screen if True.
48
49
    Returns
50
    -------
51
    list_key : str
52
        Keys to the datasets.
53
    list_shape : tuple of int
54
        Shapes of the datasets.
55
    list_type : str
56
        Types of the datasets.
57
    """
58
    hdf_object = h5py.File(file_path, 'r')
59
    keys = []
60
    hdf_object.visit(keys.append)
61
    list_key, list_shape, list_type = [], [], []
62
    for key in keys:
63
        try:
64
            data = hdf_object[key]
65
            if isinstance(data, h5py.Group):
66
                list_tmp = list(data.items())
67
                if list_tmp:
68
                    for key2, _ in list_tmp:
69
                        list_key.append(key + "/" + key2)
70
                else:
71
                    list_key.append(key)
72
            else:
73
                list_key.append(data.name)
74
        except KeyError:
75
            list_key.append(key)
76
            pass
77
    for i, key in enumerate(list_key):
78
        try:
79
            data = hdf_object[list_key[i]]
80
            if isinstance(data, h5py.Dataset):
81
                shape, dtype = data.shape, data.dtype
82
            else:
83
                shape, dtype = None, None
84
            if isinstance(data, list):
85
                if len(data) == 1:
86
                    if not isinstance(data, np.ndarray):
87
                        dtype = str(list(data)[0])
88
                        dtype.replace("b'", "'")
89
            list_shape.append(shape)
90
            list_type.append(dtype)
91
        except KeyError:
92
            list_shape.append(None)
93
            list_type.append(None)
94
            pass
95
    hdf_object.close()
96
    if display:
97
        if list_key:
98
            for i, key in enumerate(list_key):
99
                print(key + " : " + str(list_shape[i]) + " : " + str(
100
                    list_type[i]))
101
        else:
102
            print("Empty file !!!")
103
    return list_key, list_shape, list_type
104
105
106
def find_hdf_key(file_path, pattern, display=False):
107
    """
108
    Find datasets matching the name-pattern in a hdf/nxs file.
109
110
    Parameters
111
    ----------
112
    file_path : str
113
        Path to the file.
114
    pattern : str
115
        Pattern to find the full names of the datasets.
116
    display : bool
117
        Print the results onto the screen if True.
118
119
    Returns
120
    -------
121
    list_key : str
122
        Keys to the datasets.
123
    list_shape : tuple of int
124
        Shapes of the datasets.
125
    list_type : str
126
        Types of the datasets.
127
    """
128
    hdf_object = h5py.File(file_path, 'r')
129
    list_key, keys = [], []
130
    hdf_object.visit(keys.append)
131
    for key in keys:
132
        try:
133
            data = hdf_object[key]
134
            if isinstance(data, h5py.Group):
135
                list_tmp = list(data.items())
136
                if list_tmp:
137
                    for key2, _ in list_tmp:
138
                        list_key.append(key + "/" + key2)
139
                else:
140
                    list_key.append(key)
141
            else:
142
                list_key.append(data.name)
143
        except KeyError:
144
            pass
145
    list_dkey, list_dshape, list_dtype = [], [], []
146
    for _, key in enumerate(list_key):
147
        if pattern in key:
148
            list_dkey.append(key)
149
            try:
150
                data = hdf_object[key]
151
                if isinstance(data, h5py.Dataset):
152
                    shape, dtype = data.shape, data.dtype
153
                else:
154
                    shape, dtype = None, None
155
                if isinstance(data, list):
156
                    if len(data) == 1:
157
                        if not isinstance(data, np.ndarray):
158
                            dtype = str(list(data)[0])
159
                            dtype.replace("b'", "'")
160
                list_dtype.append(dtype)
161
                list_dshape.append(shape)
162
            except KeyError:
163
                list_dtype.append(None)
164
                list_dshape.append(None)
165
                pass
166
    hdf_object.close()
167
    if display:
168
        if list_dkey:
169
            for i, key in enumerate(list_dkey):
170
                print(key + " : " + str(list_dshape[i]) + " : " + str(
171
                    list_dtype[i]))
172
        else:
173
            print("Can't find datasets with keys matching the "
174
                  "pattern: {}".format(pattern))
175
    return list_dkey, list_dshape, list_dtype
176
177
178
def _get_subgroups(hdf_object, key=None):
179
    """
180
    Supplementary method for building the tree view of a hdf5 file.
181
    Return the name of subgroups.
182
    """
183
    list_group = []
184
    if key is None:
185
        for group in hdf_object.keys():
186
            list_group.append(group)
187
        if len(list_group) == 1:
188
            key = list_group[0]
189
        else:
190
            key = ""
191
    else:
192
        if key in hdf_object:
193
            try:
194
                obj = hdf_object[key]
195
                if isinstance(obj, h5py.Group):
196
                    for group in hdf_object[key].keys():
197
                        list_group.append(group)
198
            except KeyError:
199
                pass
200
    if len(list_group) > 0:
201
        list_group = sorted(list_group)
202
    return list_group, key
203
204
205
def _add_branches(tree, hdf_object, key, key1, index, last_index, prefix,
206
                  connector, level, add_shape):
207
    """
208
    Supplementary method for building the tree view of a hdf5 file.
209
    Add branches to the tree.
210
    """
211
    shape = None
212
    key_comb = key + "/" + key1
213
    if add_shape is True:
214
        if key_comb in hdf_object:
215
            try:
216
                obj = hdf_object[key_comb]
217
                if isinstance(obj, h5py.Dataset):
218
                    shape = str(obj.shape)
219
            except KeyError:
220
                shape = str("-> ???External-link???")
221
    if shape is not None:
222
        tree.append(f"{prefix}{connector} {key1} {shape}")
223
    else:
224
        tree.append(f"{prefix}{connector} {key1}")
225
    if index != last_index:
226
        prefix += PIPE_PREFIX
227
    else:
228
        prefix += SPACE_PREFIX
229
    _make_tree_body(tree, hdf_object, prefix=prefix, key=key_comb,
230
                    level=level, add_shape=add_shape)
231
232
233
def _make_tree_body(tree, hdf_object, prefix="", key=None, level=0,
234
                    add_shape=True):
235
    """
236
    Supplementary method for building the tree view of a hdf5 file.
237
    Create the tree body.
238
    """
239
    entries, key = _get_subgroups(hdf_object, key)
240
    num_ent = len(entries)
241
    last_index = num_ent - 1
242
    level = level + 1
243
    if num_ent > 0:
244
        if last_index == 0:
245
            key = "" if level == 1 else key
246
            if num_ent > 1:
247
                connector = PIPE
248
            else:
249
                connector = ELBOW if level > 1 else ""
250
            _add_branches(tree, hdf_object, key, entries[0], 0, 0, prefix,
251
                          connector, level, add_shape)
252
        else:
253
            for index, key1 in enumerate(entries):
254
                connector = ELBOW if index == last_index else TEE
255
                if index == 0:
256
                    tree.append(prefix + PIPE)
257
                _add_branches(tree, hdf_object, key, key1, index, last_index,
258
                              prefix, connector, level, add_shape)
259
260
261
def get_hdf_tree(file_path, add_shape=True, display=True):
262
    """
263
    Get the tree view of a hdf/nxs file.
264
265
    Parameters
266
    ----------
267
    file_path : str
268
        Path to the file.
269
    add_shape : bool
270
        Including the shape of a dataset to the tree if True.
271
    display : bool
272
        Print the tree onto the screen if True.
273
274
    Returns
275
    -------
276
    list of string
277
    """
278
    hdf_object = h5py.File(file_path, 'r')
279
    tree = deque()
280
    _make_tree_body(tree, hdf_object, add_shape=add_shape)
281
    if display:
282
        for entry in tree:
283
            print(entry)
284
    return tree
285
286
287
def check_tomo_data(file_path):
288
    """
289
    To check:
290
    - If paths to datasets in a hdf/nxs file following the Diamond-tomo data
291
      convention.
292
    - Shapes between datasets are consistent.
293
    """
294
    path1, shape1, _ = find_hdf_key(file_path, TOMO_DATA)
295
    path2, shape2, _ = find_hdf_key(file_path, ROTATION_ANGLE)
296
    path3, shape3, _ = find_hdf_key(file_path, IMAGE_KEY)
297
    msg = []
298
    got_it = True
299
    if not path1:
300
        msg.append(" -> Can't find the path: '{0}' "
301
                   "to tomo-data".format(TOMO_DATA))
302
        got_it = False
303
    else:
304
        if not shape1:
305
            msg.append(" -> Empty data in: '{0}'".format(TOMO_DATA))
306
            got_it = False
307
        else:
308
            shape1 = shape1[0][0]
309
    if not path2:
310
        msg.append(" -> Can't find the path: '{0}' to "
311
                   "rotation angles".format(ROTATION_ANGLE))
312
        got_it = False
313
    else:
314
        if not shape2:
315
            msg.append(" -> Empty data in: '{0}'".format(ROTATION_ANGLE))
316
            got_it = False
317
        else:
318
            shape2 = list(shape2)[0][0]
319
    if not path3:
320
        msg.append(" -> Can't find the path: '{0}' to "
321
                   "image-keys".format(IMAGE_KEY))
322
        got_it = False
323
    else:
324
        if not shape3:
325
            msg.append(" -> Empty data in: '{0}'".format(IMAGE_KEY))
326
            got_it = False
327
        else:
328
            shape3 = list(shape3)[0][0]
329
    if shape1 != shape2:
330
        msg.append(" -> Number of projections: {0} is not the same as the"
331
                   " number of rotation-angles: {1}".format(shape1, shape2))
332
        got_it = False
333
    if shape1 != shape3:
334
        msg.append(" -> Number of projections: {0} is not the same as the"
335
                   " number of image-keys: {1}".format(shape1, shape3))
336
        got_it = False
337
    if shape2 != shape3:
338
        msg.append(" -> Number of rotation-angles: {0} is not the same as the"
339
                   " number of image-keys: {1}".format(shape2, shape3))
340
        got_it = False
341
    if got_it is True:
342
        print("=============================================================")
343
        print("Paths to datasets following the default names used by "
344
              "NxTomoLoader:")
345
        print("   Path to tomo-data: '{0}'. Shape: {1}".format(
346
            path1[0], shape1))
347
        print("   Path to rotation-angles: '{0}'. Shape: {1}".format(
348
            path2[0], shape2))
349
        print("   Path to image-keys: '{0}'. Shape: {1}".format(
350
            path3[0], shape3))
351
        print("=============================================================")
352
    else:
353
        print("=========================!!!WARNING!!!=======================")
354
        for entry in msg:
355
            print("  " + entry)
356
        print("=============================================================")
357