Completed
Push — main ( 153081...bc6b78 )
by Chaitanya
23s queued 15s
created

asgardpy.io.io_dl4.DL4Files.get_dl4_files()   B

Complexity

Conditions 6

Size

Total Lines 36
Code Lines 17

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
cc 6
eloc 17
nop 2
dl 0
loc 36
rs 8.6166
c 0
b 0
f 0
1
"""
2
Basic classes defining Input Config for DL4 files and some functions to
3
retrieve information for the DL4 to DL5 processes.
4
"""
5
6
import logging
7
import re
8
from enum import Enum
9
from pathlib import Path
10
11
from gammapy.datasets import DATASET_REGISTRY, Datasets
12
from gammapy.modeling.models import Models
13
14
from asgardpy.base.base import BaseConfig, PathType
15
from asgardpy.base.geom import MapAxesConfig, get_energy_axis
16
17
__all__ = [
18
    "InputDL4Config",
19
    "DL4Files",
20
    "DL4InputFilePatterns",
21
    "DL4BaseConfig",
22
    "get_reco_energy_bins",
23
]
24
25
26
class DatasetTypeEnum(str, Enum):
27
    """
28
    Config section for list of Dataset types in Gammapy.
29
    """
30
31
    MapDataset = "MapDataset"
32
    MapDatasetOnOff = "MapDatasetOnOff"
33
    SpectrumDataset = "SpectrumDataset"
34
    SpectrumDatasetOnOff = "SpectrumDatasetOnOff"
35
    FluxPointsDataset = "FluxPointsDataset"
36
37
38
class DL4FormatEnum(str, Enum):
39
    """
40
    Config section for list of formats for Datasets in Gammapy.
41
    """
42
43
    ogip = "ogip"
44
    ogip_sherpa = "ogip-sherpa"
45
    gadf = "gadf"
46
    gadf_sed = "gadf-sed"
47
48
49
class DL4InputFilePatterns(BaseConfig):
50
    """
51
    Config section for list of file patterns to use for fetching relevant DL4
52
    files.
53
    """
54
55
    dl4_files: str = "pha*.fits*"
56
    dl4_model_files: str = "model*yaml"
57
58
59
class InputDL4Config(BaseConfig):
60
    """
61
    Config section for main information on getting the relevant DL4 files.
62
    """
63
64
    type: DatasetTypeEnum = DatasetTypeEnum.MapDataset
65
    input_dir: PathType = "None"
66
    # Can be OGIP format (Stacked or unstacked obs) or fits format (stacked obs)
67
    glob_pattern: dict = {}
68
    dl4_format: DL4FormatEnum = DL4FormatEnum.gadf
69
70
71
class DL4BaseConfig(BaseConfig):
72
    """Config section for DL4 Dataset for a given instrument."""
73
74
    dl4_dataset: InputDL4Config = InputDL4Config()
75
    spectral_energy_range: MapAxesConfig = MapAxesConfig()
76
77
78
# Main class for DL4 I/O
79
class DL4Files:
80
    """
81
    A general class to retrieve information from given DL4 files.
82
    """
83
84
    def __init__(self, dl4_dataset_info, log=None):
85
        self.dl4_dataset_info = dl4_dataset_info
86
        self.dl4_dataset = dl4_dataset_info.dl4_dataset
87
        self.dl4_type = self.dl4_dataset.type
88
        self.dl4_path = None
89
        self.dl4_file = None
90
        self.dl4_model = None
91
92
        if Path(self.dl4_dataset.input_dir).is_file():
93
            self.dl4_file = Path(self.dl4_dataset.input_dir)
94
        else:
95
            self.dl4_path = Path(self.dl4_dataset.input_dir)
96
97
        if not log:
98
            self._set_logging()
99
        else:
100
            self.log = log
101
102
    def _set_logging(self):
103
        self.log = logging.getLogger(__name__)
104
        self.log.setLevel(logging.INFO)
105
106
    def get_dl4_files(self, observation_config):
107
        """
108
        Fetch the required DL4 files from the given directory path, file glob
109
        search and possible list of observation ids to select the dataset files
110
        from the full list in the directory.
111
112
        If Model files are also given, fetch them as well
113
        """
114
        dl4_file_list = []
115
        dl4_model_files = []
116
117
        all_dl4_files = sorted(list(self.dl4_path.glob(self.dl4_dataset.glob_pattern["dl4_files"])))
118
        # Get model files as well
119
        if "dl4_model_files" in self.dl4_dataset.glob_pattern.keys():
120
            dl4_model_files = sorted(list(self.dl4_path.glob(self.dl4_dataset.glob_pattern["dl4_model_files"])))
121
122
        if len(all_dl4_files) == 0:
123
            self.log.error("No datasets found in %s", self.dl4_path)
124
125
        obs_ids = observation_config.obs_ids
126
        if len(obs_ids) == 0:
127
            # No filtering required based on observation ids
128
            dl4_file_list = all_dl4_files
129
        else:
130
            for dl4_files in all_dl4_files:
131
                # Assuming a simple nomenclature from gammapy on storing DL4
132
                # datasets names as pha_obs[OBS_ID].fits or obs_[OBS_ID].fits
133
                # i.e. a single integer in the filename, being the OBS_ID or
134
                # the DL4 dataset name.
135
                obs_num = int(re.findall(r"\d+", dl4_files.name)[0])
136
                if obs_num in obs_ids:
137
                    dl4_file_list.append(dl4_files)
138
139
        self.log.info("List of DL4 files are: %s", dl4_file_list)
140
141
        return dl4_file_list, dl4_model_files
142
143
    def read_dl4_file(self, filename):
144
        """
145
        Read a single file, which may be serialized in FITS or yaml format.
146
        """
147
        if str(filename)[-4:] == "yaml":
148
            return Datasets.read(filename=filename)
149
        elif str(filename)[-4:] in ["fits", "s.gz"]:
150
            dataset_ = DATASET_REGISTRY.get_cls(self.dl4_type)().read(
151
                filename=filename, format=self.dl4_dataset.dl4_format
152
            )
153
            return Datasets(dataset_)
154
        else:
155
            return None
156
157
    def get_dl4_dataset(self, observation_config=None):
158
        """
159
        Read the corresponding DL4 dataset with the list of files provided,
160
        along with the dataset format and stack them in a Datasets object.
161
        """
162
        if self.dl4_file:
163
            datasets = Datasets.read(filename=self.dl4_file)
164
165
        elif self.dl4_path:
166
            dl4_file_list, dl4_model_files = self.get_dl4_files(observation_config)
167
168
            if len(dl4_model_files) == 0:
169
                datasets = Datasets()
170
                for dl4_file in dl4_file_list:
171
                    dataset = self.read_dl4_file(dl4_file)
172
                    datasets.append(dataset[0])
173
            else:
174
                # Assuming a single DL4 file and model
175
                datasets = self.read_dl4_file(dl4_file_list[0])
176
                datasets.models = Models.read(dl4_model_files[0])
177
178
        return datasets
0 ignored issues
show
introduced by
The variable datasets does not seem to be defined for all execution paths.
Loading history...
179
180
    def get_spectral_energies(self):
181
        """
182
        Get the spectral energy information for each Instrument Dataset.
183
        """
184
        energy_axes = self.dl4_dataset_info.spectral_energy_range
185
186
        if len(energy_axes.axis_custom.edges) > 0:
187
            energy_bin_edges = get_energy_axis(energy_axes, only_edges=True, custom_range=True)
188
        else:
189
            energy_bin_edges = get_energy_axis(
190
                energy_axes,
191
                only_edges=True,
192
            )
193
194
        return energy_bin_edges
195
196
197
def get_reco_energy_bins(dataset, en_bins):
198
    """
199
    Calculate the total number of fit reco energy bins in the given dataset
200
    and add to the total value.
201
    """
202
    en_bins += dataset.mask.geom.axes["energy"].nbin
203
204
    return en_bins
205