"""
Module for performing some statistic functions.
"""
import numpy as np
from gammapy.stats.fit_statistics import cash, wstat
from scipy.stats import chi2, norm
__all__ = [
"check_model_preference_aic",
"check_model_preference_lrt",
"fetch_pivot_energy",
"get_chi2_sig_pval",
"get_goodness_of_fit_stats",
"get_ts_target",
]
[docs]
def get_chi2_sig_pval(test_stat, ndof):
"""
Using the log-likelihood value for a model fitting to data, along with the
total degrees of freedom, evaluate the significance value in terms of gaussian
distribution along with one-tailed p-value for the fitting statistics.
In Gammapy, for 3D analysis, cash statistics is used, while for 1D analysis,
wstat statistics is used. Check the documentation for more details
https://docs.gammapy.org/1.3/user-guide/stats/index.html
Parameters
----------
test_stat: float
The test statistic (-2 ln L) value of the fitting.
ndof: int
Total number of degrees of freedom.
Returns
-------
chi2_sig: float
significance (Chi2) of the likelihood of fit model estimated in
Gaussian distribution.
pval: float
p-value for the model fitting
"""
pval = chi2.sf(test_stat, ndof)
chi2_sig = norm.isf(pval / 2)
return chi2_sig, pval
[docs]
def check_model_preference_lrt(test_stat_1, test_stat_2, ndof_1, ndof_2):
"""
Log-likelihood ratio test. Checking the preference of a "nested" spectral
model2 (observed), over a primary model1.
Parameters
----------
test_stat_1: float
The test statistic (-2 ln L) of the Fit result of the primary spectral model.
test_stat_2: float
The test statistic (-2 ln L) of the Fit result of the nested spectral model.
ndof_1: int
Number of degrees of freedom for the primary model
ndof_2: int
Number of degrees of freedom for the nested model
Returns
-------
p_value: float
p-value for the ratio of the likelihoods
gaussian_sigmas: float
significance (Chi2) of the ratio of the likelihoods estimated in
Gaussian distribution.
n_dof: int
number of degrees of freedom or free parameters between primary and
nested model.
"""
n_dof = ndof_1 - ndof_2
if n_dof < 1:
print(f"DoF is lower in {ndof_1} compared to {ndof_2}")
return np.nan, np.nan, n_dof
gaussian_sigmas, p_value = get_chi2_sig_pval(test_stat_1 - test_stat_2, n_dof)
return p_value, gaussian_sigmas, n_dof
[docs]
def check_model_preference_aic(list_stat, list_dof):
"""
Akaike Information Criterion (AIC) preference over a list of stat and DoF
(degree of freedom) to get relative likelihood of a given list of best-fit
models.
Parameters
----------
list_wstat: list
List of stat or -2 Log likelihood values for a list of models.
list_dof: list
List of degrees of freedom or list of free parameters, for a list of models.
Returns
-------
list_rel_p: list
List of relative likelihood probabilities, for a list of models.
"""
list_aic_stat = []
for stat, dof in zip(list_stat, list_dof, strict=True):
aic_stat = stat + 2 * dof
list_aic_stat.append(aic_stat)
list_aic_stat = np.array(list_aic_stat)
aic_stat_min = np.min(list_aic_stat)
list_b_stat = []
for aic in list_aic_stat:
b_stat = np.exp((aic_stat_min - aic) / 2)
list_b_stat.append(b_stat)
list_b_stat = np.array(list_b_stat)
list_rel_p = []
for b_stat in list_b_stat:
rel_p = b_stat / np.sum(list_b_stat)
list_rel_p.append(rel_p)
list_rel_p = np.array(list_rel_p)
return list_rel_p
[docs]
def get_goodness_of_fit_stats(datasets, instrument_spectral_info):
"""
Evaluating the Goodness of Fit statistics of the fitting of the model to
the dataset.
We first use the get_ts_target function to get the total test statistic for
the (observed) best fit of the model to the data, and the (expected)
perfect fit of model and data (model = data), for the given target source
region/pixel.
We then evaluate the total number of Degrees of Freedom for the Fit as the
difference between the number of relevant energy bins used in the evaluation
and the number of free model parameters.
The fit statistics difference is used as the test statistic value for
get_chi2_sig_pval function along with the total number of degrees of freedom
to get the final statistics for the goodness of fit.
The fit statistics information is updated in the dict object provided and
a logging message is passed.
Parameter
---------
datasets: `gammapy.datasets.Datasets`
List of Datasets object, which can contain 3D and/or 1D datasets
instrument_spectral_info: dict
Dict of information for storing relevant fit stats
Return
------
instrument_spectral_info: dict
Filled Dict of information with relevant fit statistics
stat_message: str
String for logging the fit statistics
"""
stat_best_fit, stat_max_fit = get_ts_target(datasets)
instrument_spectral_info["max_fit_stat"] = stat_max_fit
instrument_spectral_info["best_fit_stat"] = stat_best_fit
ndof = instrument_spectral_info["DoF"]
stat_diff_gof = stat_best_fit - stat_max_fit
fit_chi2_sig, fit_pval = get_chi2_sig_pval(stat_diff_gof, ndof)
instrument_spectral_info["fit_chi2_sig"] = fit_chi2_sig
instrument_spectral_info["fit_pval"] = fit_pval
stat_message = "The Chi2/dof value of the goodness of Fit is "
stat_message += f"{stat_diff_gof:.2f}/{ndof}\nand the p-value is {fit_pval:.3e} "
stat_message += f"and in Significance {fit_chi2_sig:.2f} sigmas"
stat_message += f"\nwith best fit TS (Observed) as {stat_best_fit:.3f} "
stat_message += f"and max fit TS (Expected) as {stat_max_fit:.3f}"
return instrument_spectral_info, stat_message
[docs]
def get_ts_target(datasets):
"""
From a given list of DL4 datasets, with assumed associated models, estimate
the total test statistic values, in the given target source region/pixel,
for the (observed) best fit of the model to the data, and the (expected)
perfect fit of model and data (model = data).
For consistency in the evaluation of the statistic values, we will use the
basic Fit Statistic functions in Gammapy for Poisson Data:
* `cash <https://docs.gammapy.org/1.3/api/gammapy.stats.cash.html>`_
* `wstat <https://docs.gammapy.org/1.3/api/gammapy.stats.wstat.html>`_
For the different type of Statistics used in Gammapy for 3D/1D datasets,
and for our use case of getting the best fit and perfect fit, we will pass
the appropriate values, by adapting to the following methods,
* Best Fit (Observed):
* `Cash stat_array <https://docs.gammapy.org/1.3/api/gammapy.datasets.MapDataset.html#gammapy.datasets.MapDataset.stat_array # noqa>`_
* `Wstat stat_array <https://docs.gammapy.org/1.3/api/gammapy.datasets.MapDatasetOnOff.html#gammapy.datasets.MapDatasetOnOff.stat_array # noqa>`_
* Perfect Fit (Expected):
* `Cash stat_max <https://docs.gammapy.org/1.3/api/gammapy.stats.CashCountsStatistic.html#gammapy.stats.CashCountsStatistic.stat_max # noqa>`_
* `Wstat stat_max <https://docs.gammapy.org/1.3/api/gammapy.stats.WStatCountsStatistic.html#gammapy.stats.WStatCountsStatistic.stat_max # noqa>`_
Parameter
---------
datasets: `gammapy.datasets.Datasets`
List of Datasets object, which can contain 3D and/or 1D datasets
Return
------
stat_best_fit: float
Total sum of test statistic of the best fit of model to data, summed
over all energy bins.
stat_max_fit: float
Test statistic difference of the perfect fit of model to data summed
over all energy bins.
""" # noqa
stat_best_fit = 0
stat_max_fit = 0
for data in datasets:
match data.stat_type:
case "cash":
# Assuming that the Counts Map is created with the target source as its center
region = data.counts.geom.center_skydir
counts_on = (data.counts.copy() * data.mask).get_spectrum(region).data
mu_on = (data.npred() * data.mask).get_spectrum(region).data
stat_best_fit += np.nansum(cash(n_on=counts_on, mu_on=mu_on).ravel())
stat_max_fit += np.nansum(cash(n_on=counts_on, mu_on=counts_on).ravel())
case "wstat":
# Assuming that the Counts Map is created with the target source as its center
region = data.counts.geom.center_skydir
counts_on = (data.counts.copy() * data.mask).get_spectrum(region).data
counts_off = np.nan_to_num((data.counts_off * data.mask).get_spectrum(region)).data
# alpha is evaluated by acceptance ratios, and
# Background is evaluated with given alpha and counts_off,
# but for alpha to be of the same shape (in the target region),
# it will be reevaluated
bkg = np.nan_to_num((data.background * data.mask).get_spectrum(region))
with np.errstate(invalid="ignore", divide="ignore"):
alpha = bkg / counts_off
mu_signal = np.nan_to_num((data.npred_signal() * data.mask).get_spectrum(region)).data
max_pred = counts_on - bkg
stat_best_fit += np.nansum(wstat(n_on=counts_on, n_off=counts_off, alpha=alpha, mu_sig=mu_signal))
stat_max_fit += np.nansum(wstat(n_on=counts_on, n_off=counts_off, alpha=alpha, mu_sig=max_pred))
case "chi2":
# For FluxxPointsDataset
stat_best_fit += np.nansum(data.stat_array())
stat_max_fit += len(data.data.dnde.data)
return stat_best_fit, stat_max_fit
[docs]
def fetch_pivot_energy(analysis):
"""
Using an 'AsgardpyAnalysis' object to get the pivot energy for a given dataset
and fit model, using the pivot_energy function.
Returns
-------
pivot energy : `~astropy.units.Quantity`
The energy at which the statistical error in the computed flux is smallest.
If no minimum is found, NaN will be returned.
"""
# Check if DL4 datasets are created, and if not, only run steps till Fit
if len(analysis.datasets) == 0:
steps = [step for step in analysis.config.general.steps if step != "flux-points"]
analysis.run(steps)
else:
analysis.run(["fit"])
analysis.get_correct_intrinsic_model()
pivot = analysis.model_deabs.spectral_model.pivot_energy
return pivot