1
|
|
|
# Copyright 2014 Diamond Light Source Ltd. |
2
|
|
|
# |
3
|
|
|
# Licensed under the Apache License, Version 2.0 (the "License"); |
4
|
|
|
# you may not use this file except in compliance with the License. |
5
|
|
|
# You may obtain a copy of the License at |
6
|
|
|
# |
7
|
|
|
# http://www.apache.org/licenses/LICENSE-2.0 |
8
|
|
|
# |
9
|
|
|
# Unless required by applicable law or agreed to in writing, software |
10
|
|
|
# distributed under the License is distributed on an "AS IS" BASIS, |
11
|
|
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
12
|
|
|
# See the License for the specific language governing permissions and |
13
|
|
|
# limitations under the License. |
14
|
|
|
|
15
|
|
|
""" |
16
|
|
|
.. module:: hdf_utils |
17
|
|
|
:platform: Unix |
18
|
|
|
:synopsis: Utilities for checking hdf/nxs files |
19
|
|
|
|
20
|
|
|
.. moduleauthor:: Nghia Vo <[email protected]> |
21
|
|
|
|
22
|
|
|
""" |
23
|
|
|
|
24
|
|
|
import h5py |
25
|
|
|
import numpy as np |
26
|
|
|
from collections import deque |
27
|
|
|
|
28
|
|
|
PIPE = "│" |
29
|
|
|
ELBOW = "└──" |
30
|
|
|
TEE = "├──" |
31
|
|
|
PIPE_PREFIX = "│ " |
32
|
|
|
SPACE_PREFIX = " " |
33
|
|
|
TOMO_DATA = "entry1/tomo_entry/data/data" |
34
|
|
|
ROTATION_ANGLE = "entry1/tomo_entry/data/rotation_angle" |
35
|
|
|
IMAGE_KEY = "entry1/tomo_entry/instrument/detector/image_key" |
36
|
|
|
|
37
|
|
|
|
38
|
|
|
def get_hdf_information(file_path, display=False): |
39
|
|
|
""" |
40
|
|
|
Get information of datasets in a hdf/nxs file. |
41
|
|
|
|
42
|
|
|
Parameters |
43
|
|
|
---------- |
44
|
|
|
file_path : str |
45
|
|
|
Path to the file. |
46
|
|
|
display : bool |
47
|
|
|
Print the results onto the screen if True. |
48
|
|
|
|
49
|
|
|
Returns |
50
|
|
|
------- |
51
|
|
|
list_key : str |
52
|
|
|
Keys to the datasets. |
53
|
|
|
list_shape : tuple of int |
54
|
|
|
Shapes of the datasets. |
55
|
|
|
list_type : str |
56
|
|
|
Types of the datasets. |
57
|
|
|
""" |
58
|
|
|
hdf_object = h5py.File(file_path, 'r') |
59
|
|
|
keys = [] |
60
|
|
|
hdf_object.visit(keys.append) |
61
|
|
|
list_key, list_shape, list_type = [], [], [] |
62
|
|
|
for key in keys: |
63
|
|
|
try: |
64
|
|
|
data = hdf_object[key] |
65
|
|
|
if isinstance(data, h5py.Group): |
66
|
|
|
list_tmp = list(data.items()) |
67
|
|
|
if list_tmp: |
68
|
|
|
for key2, _ in list_tmp: |
69
|
|
|
list_key.append(key + "/" + key2) |
70
|
|
|
else: |
71
|
|
|
list_key.append(key) |
72
|
|
|
else: |
73
|
|
|
list_key.append(data.name) |
74
|
|
|
except KeyError: |
75
|
|
|
list_key.append(key) |
76
|
|
|
pass |
77
|
|
|
for i, key in enumerate(list_key): |
78
|
|
|
try: |
79
|
|
|
data = hdf_object[list_key[i]] |
80
|
|
|
if isinstance(data, h5py.Dataset): |
81
|
|
|
shape, dtype = data.shape, data.dtype |
82
|
|
|
else: |
83
|
|
|
shape, dtype = None, None |
84
|
|
|
if isinstance(data, list): |
85
|
|
|
if len(data) == 1: |
86
|
|
|
if not isinstance(data, np.ndarray): |
87
|
|
|
dtype = str(list(data)[0]) |
88
|
|
|
dtype.replace("b'", "'") |
89
|
|
|
list_shape.append(shape) |
90
|
|
|
list_type.append(dtype) |
91
|
|
|
except KeyError: |
92
|
|
|
list_shape.append(None) |
93
|
|
|
list_type.append(None) |
94
|
|
|
pass |
95
|
|
|
hdf_object.close() |
96
|
|
|
if display: |
97
|
|
|
if list_key: |
98
|
|
|
for i, key in enumerate(list_key): |
99
|
|
|
print(key + " : " + str(list_shape[i]) + " : " + str( |
100
|
|
|
list_type[i])) |
101
|
|
|
else: |
102
|
|
|
print("Empty file !!!") |
103
|
|
|
return list_key, list_shape, list_type |
104
|
|
|
|
105
|
|
|
|
106
|
|
|
def find_hdf_key(file_path, pattern, display=False): |
107
|
|
|
""" |
108
|
|
|
Find datasets matching the name-pattern in a hdf/nxs file. |
109
|
|
|
|
110
|
|
|
Parameters |
111
|
|
|
---------- |
112
|
|
|
file_path : str |
113
|
|
|
Path to the file. |
114
|
|
|
pattern : str |
115
|
|
|
Pattern to find the full names of the datasets. |
116
|
|
|
display : bool |
117
|
|
|
Print the results onto the screen if True. |
118
|
|
|
|
119
|
|
|
Returns |
120
|
|
|
------- |
121
|
|
|
list_key : str |
122
|
|
|
Keys to the datasets. |
123
|
|
|
list_shape : tuple of int |
124
|
|
|
Shapes of the datasets. |
125
|
|
|
list_type : str |
126
|
|
|
Types of the datasets. |
127
|
|
|
""" |
128
|
|
|
hdf_object = h5py.File(file_path, 'r') |
129
|
|
|
list_key, keys = [], [] |
130
|
|
|
hdf_object.visit(keys.append) |
131
|
|
|
for key in keys: |
132
|
|
|
try: |
133
|
|
|
data = hdf_object[key] |
134
|
|
|
if isinstance(data, h5py.Group): |
135
|
|
|
list_tmp = list(data.items()) |
136
|
|
|
if list_tmp: |
137
|
|
|
for key2, _ in list_tmp: |
138
|
|
|
list_key.append(key + "/" + key2) |
139
|
|
|
else: |
140
|
|
|
list_key.append(key) |
141
|
|
|
else: |
142
|
|
|
list_key.append(data.name) |
143
|
|
|
except KeyError: |
144
|
|
|
pass |
145
|
|
|
list_dkey, list_dshape, list_dtype = [], [], [] |
146
|
|
|
for _, key in enumerate(list_key): |
147
|
|
|
if pattern in key: |
148
|
|
|
list_dkey.append(key) |
149
|
|
|
try: |
150
|
|
|
data = hdf_object[key] |
151
|
|
|
if isinstance(data, h5py.Dataset): |
152
|
|
|
shape, dtype = data.shape, data.dtype |
153
|
|
|
else: |
154
|
|
|
shape, dtype = None, None |
155
|
|
|
if isinstance(data, list): |
156
|
|
|
if len(data) == 1: |
157
|
|
|
if not isinstance(data, np.ndarray): |
158
|
|
|
dtype = str(list(data)[0]) |
159
|
|
|
dtype.replace("b'", "'") |
160
|
|
|
list_dtype.append(dtype) |
161
|
|
|
list_dshape.append(shape) |
162
|
|
|
except KeyError: |
163
|
|
|
list_dtype.append(None) |
164
|
|
|
list_dshape.append(None) |
165
|
|
|
pass |
166
|
|
|
hdf_object.close() |
167
|
|
|
if display: |
168
|
|
|
if list_dkey: |
169
|
|
|
for i, key in enumerate(list_dkey): |
170
|
|
|
print(key + " : " + str(list_dshape[i]) + " : " + str( |
171
|
|
|
list_dtype[i])) |
172
|
|
|
else: |
173
|
|
|
print("Can't find datasets with keys matching the " |
174
|
|
|
"pattern: {}".format(pattern)) |
175
|
|
|
return list_dkey, list_dshape, list_dtype |
176
|
|
|
|
177
|
|
|
|
178
|
|
|
def _get_subgroups(hdf_object, key=None): |
179
|
|
|
""" |
180
|
|
|
Supplementary method for building the tree view of a hdf5 file. |
181
|
|
|
Return the name of subgroups. |
182
|
|
|
""" |
183
|
|
|
list_group = [] |
184
|
|
|
if key is None: |
185
|
|
|
for group in hdf_object.keys(): |
186
|
|
|
list_group.append(group) |
187
|
|
|
if len(list_group) == 1: |
188
|
|
|
key = list_group[0] |
189
|
|
|
else: |
190
|
|
|
key = "" |
191
|
|
|
else: |
192
|
|
|
if key in hdf_object: |
193
|
|
|
try: |
194
|
|
|
obj = hdf_object[key] |
195
|
|
|
if isinstance(obj, h5py.Group): |
196
|
|
|
for group in hdf_object[key].keys(): |
197
|
|
|
list_group.append(group) |
198
|
|
|
except KeyError: |
199
|
|
|
pass |
200
|
|
|
if len(list_group) > 0: |
201
|
|
|
list_group = sorted(list_group) |
202
|
|
|
return list_group, key |
203
|
|
|
|
204
|
|
|
|
205
|
|
|
def _add_branches(tree, hdf_object, key, key1, index, last_index, prefix, |
206
|
|
|
connector, level, add_shape): |
207
|
|
|
""" |
208
|
|
|
Supplementary method for building the tree view of a hdf5 file. |
209
|
|
|
Add branches to the tree. |
210
|
|
|
""" |
211
|
|
|
shape = None |
212
|
|
|
key_comb = key + "/" + key1 |
213
|
|
|
if add_shape is True: |
214
|
|
|
if key_comb in hdf_object: |
215
|
|
|
try: |
216
|
|
|
obj = hdf_object[key_comb] |
217
|
|
|
if isinstance(obj, h5py.Dataset): |
218
|
|
|
shape = str(obj.shape) |
219
|
|
|
except KeyError: |
220
|
|
|
shape = str("-> ???External-link???") |
221
|
|
|
if shape is not None: |
222
|
|
|
tree.append(f"{prefix}{connector} {key1} {shape}") |
223
|
|
|
else: |
224
|
|
|
tree.append(f"{prefix}{connector} {key1}") |
225
|
|
|
if index != last_index: |
226
|
|
|
prefix += PIPE_PREFIX |
227
|
|
|
else: |
228
|
|
|
prefix += SPACE_PREFIX |
229
|
|
|
_make_tree_body(tree, hdf_object, prefix=prefix, key=key_comb, |
230
|
|
|
level=level, add_shape=add_shape) |
231
|
|
|
|
232
|
|
|
|
233
|
|
|
def _make_tree_body(tree, hdf_object, prefix="", key=None, level=0, |
234
|
|
|
add_shape=True): |
235
|
|
|
""" |
236
|
|
|
Supplementary method for building the tree view of a hdf5 file. |
237
|
|
|
Create the tree body. |
238
|
|
|
""" |
239
|
|
|
entries, key = _get_subgroups(hdf_object, key) |
240
|
|
|
num_ent = len(entries) |
241
|
|
|
last_index = num_ent - 1 |
242
|
|
|
level = level + 1 |
243
|
|
|
if num_ent > 0: |
244
|
|
|
if last_index == 0: |
245
|
|
|
key = "" if level == 1 else key |
246
|
|
|
if num_ent > 1: |
247
|
|
|
connector = PIPE |
248
|
|
|
else: |
249
|
|
|
connector = ELBOW if level > 1 else "" |
250
|
|
|
_add_branches(tree, hdf_object, key, entries[0], 0, 0, prefix, |
251
|
|
|
connector, level, add_shape) |
252
|
|
|
else: |
253
|
|
|
for index, key1 in enumerate(entries): |
254
|
|
|
connector = ELBOW if index == last_index else TEE |
255
|
|
|
if index == 0: |
256
|
|
|
tree.append(prefix + PIPE) |
257
|
|
|
_add_branches(tree, hdf_object, key, key1, index, last_index, |
258
|
|
|
prefix, connector, level, add_shape) |
259
|
|
|
|
260
|
|
|
|
261
|
|
|
def get_hdf_tree(file_path, add_shape=True, display=True): |
262
|
|
|
""" |
263
|
|
|
Get the tree view of a hdf/nxs file. |
264
|
|
|
|
265
|
|
|
Parameters |
266
|
|
|
---------- |
267
|
|
|
file_path : str |
268
|
|
|
Path to the file. |
269
|
|
|
add_shape : bool |
270
|
|
|
Including the shape of a dataset to the tree if True. |
271
|
|
|
display : bool |
272
|
|
|
Print the tree onto the screen if True. |
273
|
|
|
|
274
|
|
|
Returns |
275
|
|
|
------- |
276
|
|
|
list of string |
277
|
|
|
""" |
278
|
|
|
hdf_object = h5py.File(file_path, 'r') |
279
|
|
|
tree = deque() |
280
|
|
|
_make_tree_body(tree, hdf_object, add_shape=add_shape) |
281
|
|
|
if display: |
282
|
|
|
for entry in tree: |
283
|
|
|
print(entry) |
284
|
|
|
return tree |
285
|
|
|
|
286
|
|
|
|
287
|
|
|
def check_tomo_data(file_path): |
288
|
|
|
""" |
289
|
|
|
To check: |
290
|
|
|
- If paths to datasets in a hdf/nxs file following the Diamond-tomo data |
291
|
|
|
convention. |
292
|
|
|
- Shapes between datasets are consistent. |
293
|
|
|
""" |
294
|
|
|
path1, shape1, _ = find_hdf_key(file_path, TOMO_DATA) |
295
|
|
|
path2, shape2, _ = find_hdf_key(file_path, ROTATION_ANGLE) |
296
|
|
|
path3, shape3, _ = find_hdf_key(file_path, IMAGE_KEY) |
297
|
|
|
msg = [] |
298
|
|
|
got_it = True |
299
|
|
|
if not path1: |
300
|
|
|
msg.append(" -> Can't find the path: '{0}' " |
301
|
|
|
"to tomo-data".format(TOMO_DATA)) |
302
|
|
|
got_it = False |
303
|
|
|
else: |
304
|
|
|
if not shape1: |
305
|
|
|
msg.append(" -> Empty data in: '{0}'".format(TOMO_DATA)) |
306
|
|
|
got_it = False |
307
|
|
|
else: |
308
|
|
|
shape1 = shape1[0][0] |
309
|
|
|
if not path2: |
310
|
|
|
msg.append(" -> Can't find the path: '{0}' to " |
311
|
|
|
"rotation angles".format(ROTATION_ANGLE)) |
312
|
|
|
got_it = False |
313
|
|
|
else: |
314
|
|
|
if not shape2: |
315
|
|
|
msg.append(" -> Empty data in: '{0}'".format(ROTATION_ANGLE)) |
316
|
|
|
got_it = False |
317
|
|
|
else: |
318
|
|
|
shape2 = list(shape2)[0][0] |
319
|
|
|
if not path3: |
320
|
|
|
msg.append(" -> Can't find the path: '{0}' to " |
321
|
|
|
"image-keys".format(IMAGE_KEY)) |
322
|
|
|
got_it = False |
323
|
|
|
else: |
324
|
|
|
if not shape3: |
325
|
|
|
msg.append(" -> Empty data in: '{0}'".format(IMAGE_KEY)) |
326
|
|
|
got_it = False |
327
|
|
|
else: |
328
|
|
|
shape3 = list(shape3)[0][0] |
329
|
|
|
if shape1 != shape2: |
330
|
|
|
msg.append(" -> Number of projections: {0} is not the same as the" |
331
|
|
|
" number of rotation-angles: {1}".format(shape1, shape2)) |
332
|
|
|
got_it = False |
333
|
|
|
if shape1 != shape3: |
334
|
|
|
msg.append(" -> Number of projections: {0} is not the same as the" |
335
|
|
|
" number of image-keys: {1}".format(shape1, shape3)) |
336
|
|
|
got_it = False |
337
|
|
|
if shape2 != shape3: |
338
|
|
|
msg.append(" -> Number of rotation-angles: {0} is not the same as the" |
339
|
|
|
" number of image-keys: {1}".format(shape2, shape3)) |
340
|
|
|
got_it = False |
341
|
|
|
if got_it is True: |
342
|
|
|
print("=============================================================") |
343
|
|
|
print("Paths to datasets following the default names used by " |
344
|
|
|
"NxTomoLoader:") |
345
|
|
|
print(" Path to tomo-data: '{0}'. Shape: {1}".format( |
346
|
|
|
path1[0], shape1)) |
347
|
|
|
print(" Path to rotation-angles: '{0}'. Shape: {1}".format( |
348
|
|
|
path2[0], shape2)) |
349
|
|
|
print(" Path to image-keys: '{0}'. Shape: {1}".format( |
350
|
|
|
path3[0], shape3)) |
351
|
|
|
print("=============================================================") |
352
|
|
|
else: |
353
|
|
|
print("=========================!!!WARNING!!!=======================") |
354
|
|
|
for entry in msg: |
355
|
|
|
print(" " + entry) |
356
|
|
|
print("=============================================================") |
357
|
|
|
|