asgardpy.io.io_dl4   A
last analyzed

Complexity

Total Complexity 22

Size/Duplication

Total Lines 210
Duplicated Lines 0 %

Importance

Changes 0
Metric Value
eloc 107
dl 0
loc 210
rs 10
c 0
b 0
f 0
wmc 22

7 Methods

Rating   Name   Duplication   Size   Complexity  
A DL4Files.__init__() 0 17 3
A DL4Files._set_logging() 0 3 1
A DL4Files.get_dl4_dataset() 0 22 5
A DL4Files.fetch_dl4_files_by_filenames() 0 12 3
A DL4Files.get_dl4_files() 0 28 4
A DL4Files.get_spectral_energies() 0 15 2
A DL4Files.read_dl4_file() 0 13 3

1 Function

Rating   Name   Duplication   Size   Complexity  
A get_reco_energy_bins() 0 8 1
1
"""
2
Basic classes defining Input Config for DL4 files and some functions to
3
retrieve information for the DL4 to DL5 processes.
4
"""
5
6
import logging
7
import re
8
from enum import Enum
9
from pathlib import Path
10
11
from gammapy.datasets import DATASET_REGISTRY, Datasets
12
from gammapy.modeling.models import Models
13
14
from asgardpy.base.base import BaseConfig, PathType
15
from asgardpy.base.geom import MapAxesConfig, get_energy_axis
16
17
__all__ = [
18
    "InputDL4Config",
19
    "DL4Files",
20
    "DL4InputFilePatterns",
21
    "DL4BaseConfig",
22
    "get_reco_energy_bins",
23
]
24
25
26
class DatasetTypeEnum(str, Enum):
27
    """
28
    Config section for list of Dataset types in Gammapy.
29
    """
30
31
    MapDataset = "MapDataset"
32
    MapDatasetOnOff = "MapDatasetOnOff"
33
    SpectrumDataset = "SpectrumDataset"
34
    SpectrumDatasetOnOff = "SpectrumDatasetOnOff"
35
    FluxPointsDataset = "FluxPointsDataset"
36
37
38
class DL4FormatEnum(str, Enum):
39
    """
40
    Config section for list of formats for Datasets in Gammapy.
41
    """
42
43
    ogip = "ogip"
44
    ogip_sherpa = "ogip-sherpa"
45
    gadf = "gadf"
46
    gadf_sed = "gadf-sed"
47
48
49
class DL4InputFilePatterns(BaseConfig):
50
    """
51
    Config section for list of file patterns to use for fetching relevant DL4
52
    files.
53
    """
54
55
    dl4_files: str = "pha*.fits*"
56
    dl4_model_files: str = "model*yaml"
57
58
59
class InputDL4Config(BaseConfig):
60
    """
61
    Config section for main information on getting the relevant DL4 files.
62
    """
63
64
    type: DatasetTypeEnum = DatasetTypeEnum.MapDataset
65
    input_dir: PathType = "None"
66
    # Can be OGIP format (Stacked or unstacked obs) or fits format (stacked obs)
67
    glob_pattern: dict = {}
68
    dl4_format: DL4FormatEnum = DL4FormatEnum.gadf
69
70
71
class DL4BaseConfig(BaseConfig):
72
    """Config section for DL4 Dataset for a given instrument."""
73
74
    dl4_dataset: InputDL4Config = InputDL4Config()
75
    spectral_energy_range: MapAxesConfig = MapAxesConfig()
76
77
78
# Main class for DL4 I/O
79
class DL4Files:
80
    """
81
    A general class to retrieve information from given DL4 files.
82
    """
83
84
    def __init__(self, dl4_dataset_info, log=None):
85
        self.dl4_dataset_info = dl4_dataset_info
86
        self.dl4_dataset = dl4_dataset_info.dl4_dataset
87
        self.dl4_type = self.dl4_dataset.type
88
        self.dl4_path = None
89
        self.dl4_file = None
90
        self.dl4_model = None
91
92
        if Path(self.dl4_dataset.input_dir).is_file():
93
            self.dl4_file = Path(self.dl4_dataset.input_dir)
94
        else:
95
            self.dl4_path = Path(self.dl4_dataset.input_dir)
96
97
        if not log:
98
            self._set_logging()
99
        else:
100
            self.log = log
101
102
    def _set_logging(self):
103
        self.log = logging.getLogger(__name__)
104
        self.log.setLevel(logging.INFO)
105
106
    def fetch_dl4_files_by_filenames(self, all_dl4_files, obs_ids):
107
        """
108
        Assuming a simple nomenclature from gammapy on storing DL4 datasets
109
        names as pha_obs[OBS_ID].fits or obs_[OBS_ID].fits i.e. a single integer
110
        in the filename, being the OBS_ID or the DL4 dataset name.
111
        """
112
        dl4_file_list = []
113
        for dl4_files in all_dl4_files:
114
            obs_num = int(re.findall(r"\d+", dl4_files.name)[0])
115
            if obs_num in obs_ids:
116
                dl4_file_list.append(dl4_files)
117
        return dl4_file_list
118
119
    def read_dl4_file(self, filename):
120
        """
121
        Read a single file, which may be serialized in FITS or yaml format.
122
        """
123
        if str(filename)[-4:] == "yaml":
124
            return Datasets.read(filename=filename)
125
        elif str(filename)[-4:] in ["fits", "s.gz"]:
126
            dataset_ = DATASET_REGISTRY.get_cls(self.dl4_type)().read(
127
                filename=filename, format=self.dl4_dataset.dl4_format
128
            )
129
            return Datasets(dataset_)
130
        else:
131
            return None
132
133
    def get_dl4_files(self, observation_config):
134
        """
135
        Fetch the required DL4 files from the given directory path, file glob
136
        search and possible list of observation ids to select the dataset files
137
        from the full list in the directory.
138
139
        If Model files are also given, fetch them as well
140
        """
141
        dl4_model_files = []
142
143
        all_dl4_files = sorted(list(self.dl4_path.glob(self.dl4_dataset.glob_pattern["dl4_files"])))
144
        # Get model files as well
145
        if "dl4_model_files" in self.dl4_dataset.glob_pattern.keys():
146
            dl4_model_files = sorted(list(self.dl4_path.glob(self.dl4_dataset.glob_pattern["dl4_model_files"])))
147
148
        if len(all_dl4_files) == 0:
149
            self.log.error("No datasets found in %s", self.dl4_path)
150
151
        obs_ids = observation_config.obs_ids
152
        if len(obs_ids) == 0:
153
            # No filtering required based on observation ids
154
            dl4_file_list = all_dl4_files
155
        else:
156
            dl4_file_list = self.fetch_dl4_files_by_filenames(all_dl4_files, obs_ids)
157
158
        self.log.info("List of DL4 files are: %s", dl4_file_list)
159
160
        return dl4_file_list, dl4_model_files
161
162
    def get_dl4_dataset(self, observation_config=None):
163
        """
164
        Read the corresponding DL4 dataset with the list of files provided,
165
        along with the dataset format and stack them in a Datasets object.
166
        """
167
        if self.dl4_file:
168
            datasets = Datasets.read(filename=self.dl4_file)
169
170
        elif self.dl4_path:
171
            dl4_file_list, dl4_model_files = self.get_dl4_files(observation_config)
172
173
            if len(dl4_model_files) == 0:
174
                datasets = Datasets()
175
                for dl4_file in dl4_file_list:
176
                    dataset = self.read_dl4_file(dl4_file)
177
                    datasets.append(dataset[0])
178
            else:
179
                # Assuming a single DL4 file and model
180
                datasets = self.read_dl4_file(dl4_file_list[0])
181
                datasets.models = Models.read(dl4_model_files[0])
182
183
        return datasets
0 ignored issues
show
introduced by
The variable datasets does not seem to be defined for all execution paths.
Loading history...
184
185
    def get_spectral_energies(self):
186
        """
187
        Get the spectral energy information for each Instrument Dataset.
188
        """
189
        energy_axes = self.dl4_dataset_info.spectral_energy_range
190
191
        if len(energy_axes.axis_custom.edges) > 0:
192
            energy_bin_edges = get_energy_axis(energy_axes, only_edges=True, custom_range=True)
193
        else:
194
            energy_bin_edges = get_energy_axis(
195
                energy_axes,
196
                only_edges=True,
197
            )
198
199
        return energy_bin_edges
200
201
202
def get_reco_energy_bins(dataset, en_bins):
203
    """
204
    Calculate the total number of fit reco energy bins in the given dataset
205
    and add to the total value.
206
    """
207
    en_bins += dataset.mask.geom.axes["energy"].nbin
208
209
    return en_bins
210