Passed
Pull Request — main (#190)
by Chaitanya
01:26
created

asgardpy.io.io_dl4.DL4Files.get_dl4_files()   B

Complexity

Conditions 6

Size

Total Lines 36
Code Lines 17

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
cc 6
eloc 17
nop 2
dl 0
loc 36
rs 8.6166
c 0
b 0
f 0
1
"""
2
Basic classes defining Input Config for DL4 files and some functions to
3
retrieve information for the DL4 to DL5 processes.
4
"""
5
6
import logging
7
import re
8
from enum import Enum
9
from pathlib import Path
10
11
from gammapy.datasets import DATASET_REGISTRY, Datasets
12
from gammapy.modeling.models import Models
13
14
from asgardpy.base.base import BaseConfig, PathType
15
from asgardpy.base.geom import MapAxesConfig, get_energy_axis
16
17
__all__ = [
18
    "InputDL4Config",
19
    "DL4Files",
20
    "DL4InputFilePatterns",
21
    "DL4BaseConfig",
22
    "get_reco_energy_bins",
23
]
24
25
26
class DatasetTypeEnum(str, Enum):
27
    """
28
    Config section for list of Dataset types in Gammapy.
29
    """
30
31
    MapDataset = "MapDataset"
32
    MapDatasetOnOff = "MapDatasetOnOff"
33
    SpectrumDataset = "SpectrumDataset"
34
    SpectrumDatasetOnOff = "SpectrumDatasetOnOff"
35
    FluxPointsDataset = "FluxPointsDataset"
36
37
38
class DL4FormatEnum(str, Enum):
39
    """
40
    Config section for list of formats for Datasets in Gammapy.
41
    """
42
43
    ogip = "ogip"
44
    ogip_sherpa = "ogip-sherpa"
45
    gadf = "gadf"
46
    gadf_sed = "gadf-sed"
47
48
49
class DL4InputFilePatterns(BaseConfig):
50
    """
51
    Config section for list of file patterns to use for fetching relevant DL4
52
    files.
53
    """
54
55
    dl4_files: str = "pha*.fits*"
56
    dl4_model_files: str = "model*yaml"
57
58
59
class InputDL4Config(BaseConfig):
60
    """
61
    Config section for main information on getting the relevant DL4 files.
62
    """
63
64
    type: DatasetTypeEnum = DatasetTypeEnum.MapDataset
65
    input_dir: PathType = "None"
66
    # Can be OGIP format (Stacked or unstacked obs) or fits format (stacked obs)
67
    glob_pattern: dict = {}
68
    dl4_format: DL4FormatEnum = DL4FormatEnum.gadf
69
70
71
class DL4BaseConfig(BaseConfig):
72
    """Config section for DL4 Dataset for a given instrument."""
73
74
    dl4_dataset: InputDL4Config = InputDL4Config()
75
    spectral_energy_range: MapAxesConfig = MapAxesConfig()
76
77
78
# Main class for DL4 I/O
79
class DL4Files:
80
    """
81
    A general class to retrieve information from given DL4 files.
82
    """
83
84
    def __init__(self, dl4_dataset_info, log=None):
85
        self.dl4_dataset_info = dl4_dataset_info
86
        self.dl4_dataset = dl4_dataset_info.dl4_dataset
87
        self.dl4_type = self.dl4_dataset.type
88
        self.dl4_path = None
89
        self.dl4_file = None
90
        self.dl4_model = None
91
92
        if Path(self.dl4_dataset.input_dir).is_file():
93
            self.dl4_file = Path(self.dl4_dataset.input_dir)
94
        else:
95
            self.dl4_path = Path(self.dl4_dataset.input_dir)
96
97
        if not log:
98
            self._set_logging()
99
        else:
100
            self.log = log
101
102
    def _set_logging(self):
103
        self.log = logging.getLogger(__name__)
104
        self.log.setLevel(logging.INFO)
105
106
    def get_dl4_files(self, observation_config):
107
        """
108
        Fetch the required DL4 files from the given directory path, file glob
109
        search and possible list of observation ids to select the dataset files
110
        from the full list in the directory.
111
112
        If Model files are also given, fetch them as well
113
        """
114
        dl4_file_list = []
115
        dl4_model_files = []
116
117
        all_dl4_files = sorted(list(self.dl4_path.glob(self.dl4_dataset.glob_pattern["dl4_files"])))
118
        # Get model files as well
119
        if "dl4_model_files" in self.dl4_dataset.glob_pattern.keys():
120
            dl4_model_files = sorted(list(self.dl4_path.glob(self.dl4_dataset.glob_pattern["dl4_model_files"])))
121
122
        if len(all_dl4_files) == 0:
123
            self.log.error("No datasets found in %s", self.dl4_path)
124
125
        obs_ids = observation_config.obs_ids
126
        if len(obs_ids) == 0:
127
            # No filtering required based on observation ids
128
            dl4_file_list = all_dl4_files
129
        else:
130
            for dl4_files in all_dl4_files:
131
                # Assuming a simple nomenclature from gammapy on storing DL4
132
                # datasets names as pha_obs[OBS_ID].fits or obs_[OBS_ID].fits
133
                # i.e. a single integer in the filename, being the OBS_ID or
134
                # the DL4 dataset name.
135
                obs_num = int(re.findall(r"\d+", dl4_files.name)[0])
136
                if obs_num in obs_ids:
137
                    dl4_file_list.append(dl4_files)
138
139
        self.log.info("List of DL4 files are: %s", dl4_file_list)
140
141
        return dl4_file_list, dl4_model_files
142
143
    def read_dl4_file(self, filename):
144
        if str(filename)[-4:] == "yaml":
145
            return Datasets.read(filename=filename)
146
        elif str(filename)[-4:] in ["fits", "s.gz"]:
147
            dataset_ = DATASET_REGISTRY.get_cls(self.dl4_type)().read(
148
                filename=filename, format=self.dl4_dataset.dl4_format
149
            )
150
            return Datasets(dataset_)
151
        else:
152
            return None
153
154
    def get_dl4_dataset(self, observation_config=None):
155
        """
156
        Read the corresponding DL4 dataset with the list of files provided,
157
        along with the dataset format and stack them in a Datasets object.
158
        """
159
        if self.dl4_file:
160
            datasets = Datasets.read(filename=self.dl4_file)
161
162
        elif self.dl4_path:
163
            dl4_file_list, dl4_model_files = self.get_dl4_files(observation_config)
164
165
            if len(dl4_model_files) == 0:
166
                datasets = Datasets()
167
                for dl4_file in dl4_file_list:
168
                    dataset = self.read_dl4_file(dl4_file)
169
                    datasets.append(dataset[0])
170
            else:
171
                # Assuming a single DL4 file and model
172
                datasets = self.read_dl4_file(dl4_file_list[0])
173
                datasets.models = Models.read(dl4_model_files[0])
174
175
        return datasets
0 ignored issues
show
introduced by
The variable datasets does not seem to be defined for all execution paths.
Loading history...
176
177
    def get_spectral_energies(self):
178
        """
179
        Get the spectral energy information for each Instrument Dataset.
180
        """
181
        energy_axes = self.dl4_dataset_info.spectral_energy_range
182
183
        if len(energy_axes.axis_custom.edges) > 0:
184
            energy_bin_edges = get_energy_axis(energy_axes, only_edges=True, custom_range=True)
185
        else:
186
            energy_bin_edges = get_energy_axis(
187
                energy_axes,
188
                only_edges=True,
189
            )
190
191
        return energy_bin_edges
192
193
194
def get_reco_energy_bins(dataset, en_bins):
195
    """
196
    Calculate the total number of fit reco energy bins in the given dataset
197
    and add to the total value.
198
    """
199
    en_bins += dataset.mask.geom.axes["energy"].nbin
200
201
    return en_bins
202