Passed
Pull Request — main (#165)
by Chaitanya
01:21
created

asgardpy.stats.stats.fetch_pivot_energy()   A

Complexity

Conditions 3

Size

Total Lines 36
Code Lines 13

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
cc 3
eloc 13
nop 1
dl 0
loc 36
rs 9.75
c 0
b 0
f 0
1
"""
2
Module for performing some statistic functions.
3
"""
4
5
import numpy as np
6
from gammapy.stats.fit_statistics import cash, wstat
7
from scipy.stats import chi2, norm
8
9
__all__ = [
10
    "check_model_preference_aic",
11
    "check_model_preference_lrt",
12
    "get_chi2_sig_pval",
13
    "get_goodness_of_fit_stats",
14
    "get_ts_target",
15
]
16
17
18
def get_chi2_sig_pval(test_stat, ndof):
19
    """
20
    Using the log-likelihood value for a model fitting to data, along with the
21
    total degrees of freedom, evaluate the significance value in terms of gaussian
22
    distribution along with one-tailed p-value for the fitting statistics.
23
24
    In Gammapy, for 3D analysis, cash statistics is used, while for 1D analysis,
25
    wstat statistics is used. Check the documentation for more details
26
    https://docs.gammapy.org/1.2/user-guide/stats/index.html
27
28
    Parameters
29
    ----------
30
    test_stat: float
31
        The test statistic (-2 ln L) value of the fitting.
32
    ndof: int
33
        Total number of degrees of freedom.
34
35
    Returns
36
    -------
37
    chi2_sig: float
38
        significance (Chi2) of the likelihood of fit model estimated in
39
        Gaussian distribution.
40
    pval: float
41
        p-value for the model fitting
42
43
    """
44
    pval = chi2.sf(test_stat, ndof)
45
    chi2_sig = norm.isf(pval / 2)
46
47
    return chi2_sig, pval
48
49
50
def check_model_preference_lrt(test_stat_1, test_stat_2, ndof_1, ndof_2):
51
    """
52
    Log-likelihood ratio test. Checking the preference of a "nested" spectral
53
    model2 (observed), over a primary model1.
54
55
    Parameters
56
    ----------
57
    test_stat_1: float
58
        The test statistic (-2 ln L) of the Fit result of the primary spectral model.
59
    test_stat_2: float
60
        The test statistic (-2 ln L) of the Fit result of the nested spectral model.
61
    ndof_1: int
62
        Number of degrees of freedom for the primary model
63
    ndof_2: int
64
        Number of degrees of freedom for the nested model
65
66
    Returns
67
    -------
68
    p_value: float
69
        p-value for the ratio of the likelihoods
70
    gaussian_sigmas: float
71
        significance (Chi2) of the ratio of the likelihoods estimated in
72
        Gaussian distribution.
73
    n_dof: int
74
        number of degrees of freedom or free parameters between primary and
75
        nested model.
76
    """
77
    n_dof = ndof_1 - ndof_2
78
79
    if n_dof < 1:
80
        print(f"DoF is lower in {ndof_1} compared to {ndof_2}")
81
82
        return np.nan, np.nan, n_dof
83
84
    gaussian_sigmas, p_value = get_chi2_sig_pval(test_stat_1 - test_stat_2, n_dof)
85
86
    return p_value, gaussian_sigmas, n_dof
87
88
89
def check_model_preference_aic(list_stat, list_dof):
90
    """
91
    Akaike Information Criterion (AIC) preference over a list of stat and DoF
92
    (degree of freedom) to get relative likelihood of a given list of best-fit
93
    models.
94
95
    Parameters
96
    ----------
97
    list_wstat: list
98
        List of stat or -2 Log likelihood values for a list of models.
99
    list_dof: list
100
        List of degrees of freedom or list of free parameters, for a list of models.
101
102
    Returns
103
    -------
104
    list_rel_p: list
105
        List of relative likelihood probabilities, for a list of models.
106
    """
107
    list_aic_stat = []
108
    for stat, dof in zip(list_stat, list_dof, strict=True):
109
        aic_stat = stat + 2 * dof
110
        list_aic_stat.append(aic_stat)
111
    list_aic_stat = np.array(list_aic_stat)
112
113
    aic_stat_min = np.min(list_aic_stat)
114
115
    list_b_stat = []
116
    for aic in list_aic_stat:
117
        b_stat = np.exp((aic_stat_min - aic) / 2)
118
        list_b_stat.append(b_stat)
119
    list_b_stat = np.array(list_b_stat)
120
121
    list_rel_p = []
122
    for b_stat in list_b_stat:
123
        rel_p = b_stat / np.sum(list_b_stat)
124
        list_rel_p.append(rel_p)
125
    list_rel_p = np.array(list_rel_p)
126
127
    return list_rel_p
128
129
130
def get_goodness_of_fit_stats(datasets, instrument_spectral_info):
131
    """
132
    Evaluating the Goodness of Fit statistics of the fitting of the model to
133
    the dataset.
134
135
    We first use the get_ts_target function to get the total test statistic for
136
    the (observed) best fit of the model to the data, and the (expected)
137
    perfect fit of model and data (model = data), for the given target source
138
    region/pixel.
139
140
    We then evaluate the total number of Degrees of Freedom for the Fit as the
141
    difference between the number of relevant energy bins used in the evaluation
142
    and the number of free model parameters.
143
144
    The fit statistics difference is used as the test statistic value for
145
    get_chi2_sig_pval function along with the total number of degrees of freedom
146
    to get the final statistics for the goodness of fit.
147
148
    The fit statistics information is updated in the dict object provided and
149
    a logging message is passed.
150
151
    Parameter
152
    ---------
153
    datasets: `gammapy.datasets.Datasets`
154
        List of Datasets object, which can contain 3D and/or 1D datasets
155
    instrument_spectral_info: dict
156
        Dict of information for storing relevant fit stats
157
158
    Return
159
    ------
160
    instrument_spectral_info: dict
161
        Filled Dict of information with relevant fit statistics
162
    stat_message: str
163
        String for logging the fit statistics
164
    """
165
    stat_best_fit, stat_max_fit = get_ts_target(datasets)
166
167
    instrument_spectral_info["max_fit_stat"] = stat_max_fit
168
    instrument_spectral_info["best_fit_stat"] = stat_best_fit
169
    ndof = instrument_spectral_info["DoF"]
170
    stat_diff_gof = stat_best_fit - stat_max_fit
171
172
    fit_chi2_sig, fit_pval = get_chi2_sig_pval(stat_diff_gof, ndof)
173
174
    instrument_spectral_info["fit_chi2_sig"] = fit_chi2_sig
175
    instrument_spectral_info["fit_pval"] = fit_pval
176
177
    stat_message = "The Chi2/dof value of the goodness of Fit is "
178
    stat_message += f"{stat_diff_gof:.2f}/{ndof}\nand the p-value is {fit_pval:.3e} "
179
    stat_message += f"and in Significance {fit_chi2_sig:.2f} sigmas"
180
    stat_message += f"\nwith best fit TS (Observed) as {stat_best_fit:.3f} "
181
    stat_message += f"and max fit TS (Expected) as {stat_max_fit:.3f}"
182
183
    return instrument_spectral_info, stat_message
184
185
186
def get_ts_target(datasets):
187
    """
188
    From a given list of DL4 datasets, with assumed associated models, estimate
189
    the total test statistic values, in the given target source region/pixel,
190
    for the (observed) best fit of the model to the data, and the (expected)
191
    perfect fit of model and data (model = data).
192
193
    For consistency in the evaluation of the statistic values, we will use the
194
    basic Fit Statistic functions in Gammapy for Poisson Data:
195
196
    * `cash <https://docs.gammapy.org/1.2/api/gammapy.stats.cash.html>`_
197
198
    * `wstat <https://docs.gammapy.org/1.2/api/gammapy.stats.wstat.html>`_
199
200
    For the different type of Statistics used in Gammapy for 3D/1D datasets,
201
    and for our use case of getting the best fit and perfect fit, we will pass
202
    the appropriate values, by adapting to the following methods,
203
204
    * Best Fit (Observed):
205
206
        * `Cash stat_array <https://docs.gammapy.org/1.2/api/gammapy.datasets.MapDataset.html#gammapy.datasets.MapDataset.stat_array # noqa>`_
207
208
        * `Wstat stat_array <https://docs.gammapy.org/1.2/api/gammapy.datasets.MapDatasetOnOff.html#gammapy.datasets.MapDatasetOnOff.stat_array # noqa>`_
209
210
    * Perfect Fit (Expected):
211
212
        * `Cash stat_max <https://docs.gammapy.org/1.2/api/gammapy.stats.CashCountsStatistic.html#gammapy.stats.CashCountsStatistic.stat_max # noqa>`_
213
214
        * `Wstat stat_max <https://docs.gammapy.org/1.2/api/gammapy.stats.WStatCountsStatistic.html#gammapy.stats.WStatCountsStatistic.stat_max # noqa>`_
215
216
    Parameter
217
    ---------
218
    datasets: `gammapy.datasets.Datasets`
219
        List of Datasets object, which can contain 3D and/or 1D datasets
220
221
    Return
222
    ------
223
    stat_best_fit: float
224
        Total sum of test statistic of the best fit of model to data, summed
225
        over all energy bins.
226
    stat_max_fit: float
227
        Test statistic difference of the perfect fit of model to data summed
228
        over all energy bins.
229
    """  # noqa
230
    stat_best_fit = 0
231
    stat_max_fit = 0
232
233
    for data in datasets:
234
        if data.stat_type != "chi2":
235
            # Assuming that the Counts Map is created with the target source as its center
236
            region = data.counts.geom.center_skydir
237
238
            if data.stat_type == "cash":
239
                counts_on = (data.counts.copy() * data.mask).get_spectrum(region).data
240
                mu_on = (data.npred() * data.mask).get_spectrum(region).data
241
242
                stat_best_fit += np.nansum(cash(n_on=counts_on, mu_on=mu_on).ravel())
243
                stat_max_fit += np.nansum(cash(n_on=counts_on, mu_on=counts_on).ravel())
244
245
            elif data.stat_type == "wstat":
246
                counts_on = (data.counts.copy() * data.mask).get_spectrum(region).data
247
                counts_off = np.nan_to_num((data.counts_off * data.mask).get_spectrum(region)).data
248
249
                # alpha is evaluated by acceptance ratios, and
250
                # Background is evaluated with given alpha and counts_off,
251
                # but for alpha to be of the same shape (in the target region),
252
                # it will be reevaluated
253
                bkg = np.nan_to_num((data.background * data.mask).get_spectrum(region))
254
255
                with np.errstate(invalid="ignore", divide="ignore"):
256
                    alpha = bkg / counts_off
257
                mu_signal = np.nan_to_num((data.npred_signal() * data.mask).get_spectrum(region)).data
258
                max_pred = counts_on - bkg
259
260
                stat_best_fit += np.nansum(wstat(n_on=counts_on, n_off=counts_off, alpha=alpha, mu_sig=mu_signal))
261
                stat_max_fit += np.nansum(wstat(n_on=counts_on, n_off=counts_off, alpha=alpha, mu_sig=max_pred))
262
        else:
263
            # For FluxxPointsDataset
264
            stat_best_fit += np.nansum(data.stat_array())
265
            stat_max_fit += len(data.data.dnde.data)
266
267
    return stat_best_fit, stat_max_fit
268