hawc_hal.maptree.from_root_file   A
last analyzed

Complexity

Total Complexity 12

Size/Duplication

Total Lines 201
Duplicated Lines 0 %

Importance

Changes 0
Metric Value
eloc 84
dl 0
loc 201
rs 10
c 0
b 0
f 0
wmc 12

3 Functions

Rating   Name   Duplication   Size   Complexity  
A _get_bin_object() 0 23 3
A _read_partial_tree() 0 54 3
B from_root_file() 0 105 6
1
import os
0 ignored issues
show
Coding Style introduced by
This module should have a docstring.

The coding style of this project requires that you add a docstring to this code element. Below, you find an example for methods:

class SomeClass:
    def some_method(self):
        """Do x and return foo."""

If you would like to know more about docstrings, we recommend to read PEP-257: Docstring Conventions.

Loading history...
2
import socket
3
import collections
4
import numpy as np
5
6
from threeML.io.file_utils import file_existing_and_readable, sanitize_filename
7
from threeML.exceptions.custom_exceptions import custom_warnings
8
9
from ..region_of_interest import HealpixROIBase
10
from data_analysis_bin import DataAnalysisBin
0 ignored issues
show
Coding Style introduced by
Relative import 'data_analysis_bin', should be 'hawc_hal.maptree.data_analysis_bin'
Loading history...
introduced by
first party import "from data_analysis_bin import DataAnalysisBin" should be placed before "from ..region_of_interest import HealpixROIBase"
Loading history...
11
12
from ..healpix_handling import SparseHealpix, DenseHealpix
13
14
15
def _get_bin_object(f, bin_name, suffix):
0 ignored issues
show
Coding Style Naming introduced by
Argument name "f" doesn't conform to snake_case naming style ('(([a-z_][a-z0-9_]2,30)|(_[a-z0-9_]*)|(__[a-z][a-z0-9_]+__))$' pattern)

This check looks for invalid names for a range of different identifiers.

You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements.

If your project includes a Pylint configuration file, the settings contained in that file take precedence.

To find out more about Pylint, please refer to their site.

Loading history...
16
17
    # kludge: some maptrees have bin labels in BinInfo like "0", "1", "2", but then
18
    # the nHit bin is actually "nHit00, nHit01, nHit02... others instead have
19
    # labels in BinInfo like "00", "01", "02", and still the nHit bin is nHit00, nHit01
20
    # thus we need to add a 0 to the former case, but not add it to the latter case
21
22
    bin_label = "nHit0%s/%s" % (bin_name, suffix)
23
24
    bin_tobject = f.Get(bin_label)
25
26
    if not bin_tobject:
27
28
        # Try the other way
29
        bin_label = "nHit%s/%s" % (bin_name, suffix)
30
31
        bin_tobject = f.Get(bin_label)
32
33
        if not bin_tobject:
34
35
            raise IOError("Could not read bin %s" % bin_label)
36
37
    return bin_tobject
38
39
40
def from_root_file(map_tree_file, roi):
0 ignored issues
show
Comprehensibility introduced by
This function exceeds the maximum number of variables (24/15).
Loading history...
41
    """
42
    Create a MapTree object from a ROOT file and a ROI. Do not use this directly, use map_tree_factory instead.
43
44
    :param map_tree_file:
45
    :param roi:
46
    :return:
47
    """
48
49
    from ..root_handler import open_ROOT_file, root_numpy, tree_to_ndarray
50
51
    map_tree_file = sanitize_filename(map_tree_file)
52
53
    # Check that they exists and can be read
54
55
    if not file_existing_and_readable(map_tree_file):  # pragma: no cover
56
        raise IOError("MapTree %s does not exist or is not readable" % map_tree_file)
57
58
    # Make sure we have a proper ROI (or None)
59
60
    assert isinstance(roi, HealpixROIBase) or roi is None, "You have to provide an ROI choosing from the " \
61
                                                           "available ROIs in the region_of_interest module"
62
63
    if roi is None:
64
        custom_warnings.warn("You have set roi=None, so you are reading the entire sky")
65
66
    # Read map tree
67
68
    with open_ROOT_file(map_tree_file) as f:
0 ignored issues
show
Coding Style Naming introduced by
Variable name "f" doesn't conform to snake_case naming style ('(([a-z_][a-z0-9_]2,30)|(_[a-z0-9_]*)|(__[a-z][a-z0-9_]+__))$' pattern)

This check looks for invalid names for a range of different identifiers.

You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements.

If your project includes a Pylint configuration file, the settings contained in that file take precedence.

To find out more about Pylint, please refer to their site.

Loading history...
69
70
        data_bins_labels = list(root_numpy.tree2array(f.Get("BinInfo"), "name"))
71
72
        # A transit is defined as 1 day, and totalDuration is in hours
73
        # Get the number of transit from bin 0 (as LiFF does)
74
75
        n_transits = root_numpy.tree2array(f.Get("BinInfo"), "totalDuration") / 24.0
76
77
        # The map-maker underestimate the livetime of bins with low statistic by removing time intervals with
78
        # zero events. Therefore, the best estimate of the livetime is the maximum of n_transits, which normally
79
        # happen in the bins with high statistic
80
        n_transits = max(n_transits)
81
82
        n_bins = len(data_bins_labels)
83
84
        # These are going to be Healpix maps, one for each data analysis bin_name
85
86
        data_analysis_bins = collections.OrderedDict()
87
88
        for i in range(n_bins):
89
90
            name = data_bins_labels[i]
91
92
            data_tobject = _get_bin_object(f, name, "data")
93
94
            bkg_tobject = _get_bin_object(f, name, "bkg")
95
96
            # Get ordering scheme
97
            nside = data_tobject.GetUserInfo().FindObject("Nside").GetVal()
98
            nside_bkg = bkg_tobject.GetUserInfo().FindObject("Nside").GetVal()
99
100
            assert nside == nside_bkg
101
102
            scheme = data_tobject.GetUserInfo().FindObject("Scheme").GetVal()
103
            scheme_bkg = bkg_tobject.GetUserInfo().FindObject("Scheme").GetVal()
104
105
            assert scheme == scheme_bkg
106
107
            assert scheme == 0, "NESTED scheme is not supported yet"
108
109
            if roi is not None:
110
111
                # Only read the elements in the ROI
112
113
                active_pixels = roi.active_pixels(nside, system='equatorial', ordering='RING')
114
115
                counts = _read_partial_tree(data_tobject, active_pixels)
116
                bkg = _read_partial_tree(bkg_tobject, active_pixels)
117
118
                counts_hpx = SparseHealpix(counts, active_pixels, nside)
119
                bkg_hpx = SparseHealpix(bkg, active_pixels, nside)
120
121
                this_data_analysis_bin = DataAnalysisBin(name,
122
                                                         counts_hpx,
123
                                                         bkg_hpx,
124
                                                         active_pixels_ids=active_pixels,
125
                                                         n_transits=n_transits,
126
                                                         scheme='RING')
127
128
            else:
129
130
                # Read the entire sky.
131
132
                counts = tree_to_ndarray(data_tobject, "count").astype(np.float64)
133
                bkg = tree_to_ndarray(bkg_tobject, "count").astype(np.float64)
134
135
                this_data_analysis_bin = DataAnalysisBin(name,
136
                                                         DenseHealpix(counts),
137
                                                         DenseHealpix(bkg),
138
                                                         active_pixels_ids=None,
139
                                                         n_transits=n_transits,
140
                                                         scheme='RING')
141
142
            data_analysis_bins[name] = this_data_analysis_bin
143
144
    return data_analysis_bins
145
146
147
def _read_partial_tree(ttree_instance, elements_to_read):
148
149
    # Decide whether to use a smart loading scheme, or just loading the whole thing, based on the
150
    # number of elements to be read
151
152
    from ..root_handler import ROOT, root_numpy, tree_to_ndarray
153
154
    if elements_to_read.shape[0] < 500000:
155
156
        # Use a smart loading scheme, where we read only the pixels we need
157
158
        # The fastest method that I found is to create a TEventList, apply it to the
159
        # tree, get a copy of the subset and then use ttree2array
160
161
        # Create TEventList
162
        entrylist = ROOT.TEntryList()
163
164
        # Add the selections
165
        _ = map(entrylist.Enter, elements_to_read)
166
167
        # Apply the EntryList to the tree
168
        ttree_instance.SetEntryList(entrylist)
169
170
        # Get copy of the subset
171
        # We need to create a dumb TFile to silence a lot of warnings from ROOT
172
        # Get a filename for this process
173
        dumb_tfile_name = "__dumb_tfile_%s_%s.root" % (os.getpid(), socket.gethostname())
174
        dumb_tfile = ROOT.TFile(dumb_tfile_name, "RECREATE")
175
        new_tree = ttree_instance.CopyTree("")
176
177
        # Actually read it from disk
178
        partial_map = root_numpy.tree2array(new_tree, "count").astype(np.float64)
179
180
        # Now reset the entry list
181
        ttree_instance.SetEntryList(0)
182
183
        dumb_tfile.Close()
184
        os.remove(dumb_tfile_name)
185
186
    else:
187
188
        # The smart scheme starts to become slower than the brute force approach, so let's read the whole thing
189
        partial_map = tree_to_ndarray(ttree_instance, "count").astype(np.float64)
190
191
        assert partial_map.shape[0] >= elements_to_read.shape[0], "Trying to read more pixels than present in TTree"
192
193
        # Unless we have read the whole sky, let's remove the pixels we shouldn't have read
194
195
        if elements_to_read.shape[0] != partial_map.shape[0]:
196
197
            # Now let's remove the pixels we shouldn't have read
198
            partial_map = partial_map[elements_to_read]
199
200
    return partial_map.astype(np.float64)
201