Hide code cell content

import mmf_setup

mmf_setup.nbinit()
import logging

logging.getLogger("matplotlib").setLevel(logging.CRITICAL)
%matplotlib inline
import numpy as np, matplotlib.pyplot as plt

This cell adds /home/docs/checkouts/readthedocs.org/user_builds/wsu-phys-581-computation/checkouts/latest/src to your path, and contains some definitions for equations and some CSS for styling the notebook. If things look a bit strange, please try the following:

  • Choose "Trust Notebook" from the "File" menu.
  • Re-execute this cell.
  • Reload the notebook.

Model Fitting E.g. 1: Cosine#

Here we go through the step-by-step procedure for fitting data to the following model:

\[\begin{gather*} y_n = f(t_n, \vect{a}) + e_n, \qquad f(t, \vect{a}) = A\cos(\omega t + \phi) + c, \qquad \vect{a} = \begin{pmatrix} \omega\\ c\\ A\\ \phi \end{pmatrix}. \end{gather*}\]

We shall assume that the errors are independently distributed, considering both the case of gaussian errors, and non-gaussian:

\[\begin{gather*} p_n(e_n) = \frac{p_e(e_n/\sigma_n)}{\sigma_n}, \qquad p_e(x) = \overbrace{\frac{e^{-x^2/2}}{\sqrt{2\pi}}}^{\text{gaussian}}. \end{gather*}\]

We start with the gaussian case, and assume that the errors are sufficiently small that the standard analysis techniques based on \(\chi^2(\vect{a})\) apply as discussed in chapter 15 of [Press et al., 2007]. We then test these using Monte Carlo (MC) and then use an MCMC approach to solve the general case.

To make our examples mode modular, I am going to use a class to represent the mock experiment. We can specify the nature of the errors, and generate sample experiments for our analysis.

from IPython.display import Latex
from collections import namedtuple
from functools import partial
import uncertainties
from uncertainties import unumpy as unp
from scipy.optimize import least_squares
import scipy.stats
sp = scipy


class Experiment:
    """Class representing a mock experiment.
    
    Attributes
    ----------
    a : Params
        Exact parameter values.
    ts : array-like
        Times at which to sample the function.
    ys : array-like
        Exact values sampled at the specified times.
    ydata : array-like
        Simulated measurement.
    """ 
    Params = namedtuple("Params", ["w", "c", "A", "phi"])
    labels = [r"$\omega$", r"$c$", r"$A$", r"$\phi$"]

    def __init__(self, 
                 random=np.random.default_rng(seed=2).normal, 
                 ts=np.linspace(0, 10.0, 7),
                 sigmas=0.5,
                 a=Params(w=2 * np.pi / 5, c=2.1, A=3.4, phi=5.6)):
        """Constructor.
        
        Arguments
        ---------
        random : function
            Random generator for the errors.  Should return 
            sample errors from a normalized distribution.  Will
            be scaled by sigmas to get the actual errors.
        ts : array-like
            Range of times at which to sample the function.
        sigmas : float or array-like
            Errors.
        """
        self.random = random
        self.ts = np.asarray(ts)
        self.a = self.Params(*a)
        self.ys = self.f(self.ts, *self.a)
        
        # Allow sigmas to be a float
        self.sigmas = np.zeros_like(ts) + sigmas
        self.ydata = self.measure()
    
    def f(self, ts=None, *a, np=np):
        """Model function."""
        if not a:
            a = self.a
        if ts is None:
            ts = self.ts

        a = self.Params(*a)
            
        return a.c + a.A * np.cos(a.w * ts + a.phi)
        
    def measure(self, ys=None, a=None, sigmas=None):
        """Return `ydata` corresponding to a simulated measuement."""
        if ys is None:
            if a is None:
                ys = self.ys
            else:
                ys = self.f(self.ts, *a)
        if sigmas is None:
            sigmas = self.sigmas
        return self.random(loc=ys, scale=sigmas)
        
    def residuals(self, a, ydata=None, sigmas=None):
        """Return the residuals for least_square.
        
        May be overloaded to implement robust fitting models.
        
        Arguments
        ---------
        a : Params
            Parameter estimate.
        """
        if ydata is None:
            ydata = self.ydata
        if sigmas is None:
            sigmas = self.sigmas
        return (ydata - self.f(self.ts, *a))/sigmas
    
    FitResults = namedtuple("FitResults", 
                            ['a', 'C', 'chi2_r', 'nu', 'a_'])
    
    def fit(self, ydata=None, sigmas=None):
        """Return `FitResults` from least_squares fit.
        
        Arguments
        ---------
        ydata : array-like, optional
            Experimental data.  Uses `self.ydata` if not provided.

        Returns
        -------
        a : Params
            Parameter estimates.
        C : array
            Covariance matrix.
        chi2_r : float
            Sum of the residuals normalized by `nu`.
        nu : int
            Effectve number of degrees of freedom.
        a_ : Params
            Correlated parameter estimates using
            `uncertainties.correlated_values`.
        """
        if ydata is None:
            ydata = self.ydata
        if sigmas is None:
            sigmas = self.sigmas
            
        # Using simple numpy functions, we can compute the jacobian
        # with the complex-step method.
        fun = partial(self.residuals, ydata=ydata, sigmas=sigmas)
        res = least_squares(fun=fun, x0=self.a, jac='cs')
        a = self.Params(*res.x)
        C = np.linalg.inv(res.jac.T @ res.jac)
        a_ = self.correlated_values(a, C)
        r = self.residuals(a, ydata=ydata, sigmas=sigmas)
        nu = len(r) - len(a)
        chi2_r = np.sum(abs(r)**2) / nu
        
        return self.FitResults(a=a, C=C, chi2_r=chi2_r, nu=nu, a_=a_)

    def correlated_values(self, a, C):
        """Return `Params()` as correlated values using 
        `uncertainties.correlated_values`
        """
        return self.Params(*uncertainties.correlated_values(a, C))
        
    def sample_chi2_r(self, Nsamples=2000, a=None):
        """Return samples of the chi2_r obtained by repeated fitting.
        
        Arguments
        ---------
        a : Params, optional:
            Parameter values.  If not provided, `self.a` will be
            used, otherwise we will assume that the supplied
            parameters are to be used as a proxy model.
        Nsamples : int
            Number of samples.
        """
        if a is None:
            a = self.a
        chi2_rs = [self.fit(ydata=self.measure(a=a)).chi2_r
                   for n in range(Nsamples)]
        return chi2_rs
        
    def plot(self, a_=None, a_sigmas=[1, 2, 3, 4], ax=None):
        """Display the current experiment.
        
        Arguments
        ---------
        a_ : Params
            Correlated best fit parameters.  If provided, this fit 
            will be shown with the corresponding confidence regions.
        a_sigmas : array-like
            Bands to show.
        """
        if ax is None:
            fig, ax = plt.subplots()

        ts = np.linspace(self.ts.min(), self.ts.max())
        ax.errorbar(self.ts, self.ydata, yerr=self.sigmas, 
                    fmt="C0.", ecolor="C1", label="data")
        ax.plot(ts, self.f(ts), "-C2", label="exact")
        
        if a_ is not None:
            ys_ = self.f(ts, *a_, np=unp)
            fc = "C3"
            alphas = np.linspace(0.5, 0.1, len(a_sigmas))
            for sigma, alpha in zip(a_sigmas, alphas):
                self.plot_band(ts, ys_, sigma=sigma, ax=ax, 
                               fc=fc, alpha=alpha,
                               label=fr"${sigma}\sigma$ band")

        ax.set(xlabel="$t$", ylabel="$f(t)$")
        
        ax.legend()
        return ax
    
    def plot_band(self, t, y_, sigma=1, ax=None, **kw):
        """Plot a band using correlated errors in y_."""
        if ax is None:
            ax = plt.gca()
        y = unp.nominal_values(y_)
        dy = unp.std_devs(y_)
        ax.fill_between(t, y-dy*sigma, y+dy*sigma, **kw)
        return ax

Linear Gaussian Approximation#

We start with an “exact” solution, then generate some data to analyze at \(N_t\) equally spaced points for \(t \in [0, t_\max]\).

expt = Experiment(ts=np.linspace(0, 10.0, 7), 
                  random=np.random.default_rng(seed=2).normal)

fig, ax = plt.subplots(figsize=(10, 5))
expt.plot(ax=ax);
../_images/e0adab5bc3152bc4204e5b6fd602dca03c5cbf88e2edf4c5446bf9a4e75434b2.png

Chi Square Fit#

Now we do the fit. We use scipy.optimize.least_squares() which requires a function that returns the list of weighted residuals:

\[\begin{gather*} r_n = \frac{y_n - f(x_n, \vect{a})}{\sigma_n}. \end{gather*}\]

This minimizes the following cost function, for which the Hessian is the inverse covariance matrix:

\[\begin{gather*} \frac{\chi^2}{2} = \frac{1}{2}\sum_{n} r_n^2, \qquad \mat{C}^{-1} = \mat{J}^T\cdot \mat{J}. \end{gather*}\]
res = expt.fit()

# Since our errors are gaussian and small, we expect that the 
# standard chi2 distribution will hold, so we can compute the
# Q values using the corresponding CDF:
Q = 1 - sp.stats.chi2.cdf(res.chi2_r*res.nu, df=res.nu)

Latex(rf"$\chi^2_r = {res.chi2_r:.2g}, \qquad Q = {Q:.2g}$")
\[\chi^2_r = 2.3, \qquad Q = 0.08\]

Here we have used the Chi-Squared Distribution \(P_{\nu,\chi^2}(\chi^2)\) to calculate the chi-square probability \(Q\), sometimes called the tail distribution:

\[\begin{gather*} Q &= \int_{\chi^2}^{\infty} P_{\nu, \chi^2}(\chi^2)\d{\chi^2} = 1 - \underbrace{\int_{0}^{\chi^2} P_{\nu, \chi^2}(\chi^2)\d{\chi^2}}_{\text{CDF}} &= \int_{\chi^2_r}^{\infty} P_{\nu, \chi^2_r}(\chi^2_r)\d{\chi^2_r}. \end{gather*}\]

This is the probability that \(\chi^2\) exceeds the value found, and is the complement of the cumulative distribution function (CDF). This is sometimes called a one-sided \(p\)-value and is used as a test statistic to assess whether or not the model is reasonable. As discussed in section 15.1 of [Press et al., 2007], values of \(Q> 0.001\) are not too bad:

Truly wrong models have values will often be rejected with vastly smaller values of \(Q\), \(10^{-18}\), say.

Here we plot the distribution of the reduced chi squared for our dataset with \(\nu = N - M\) degrees of freedom:

\[\begin{gather*} \chi^2_r = \frac{\chi^2}{\nu},\qquad P_{\nu,\chi^2_r}(\chi^2_r) = \nu P_{\nu, \chi^2}(\nu\chi^2_r). \end{gather*}\]

Hide code cell source

# Generate actual distribution of chi2_r using MC

Ns = 2000  # number of samples
chi2_rs = expt.sample_chi2_r()

res = expt.fit()
nu, chi2_r = res.nu, res.chi2_r

_chi2_r = np.linspace(0, 3, 500)
_i = np.where(_chi2_r >= chi2_r)[0][0]
fig, ax = plt.subplots()
ax.plot(_chi2_r, nu*sp.stats.chi2.pdf(nu*_chi2_r, df=nu), 
        "-C0", label=rf"PDF ($\nu={nu}$)")
ax.plot(_chi2_r, nu*sp.stats.chi2.pdf(nu*_chi2_r, df=nu-1), 
        ":C0", label=rf"$\nu={nu}\pm 1$")
ax.plot(_chi2_r, nu*sp.stats.chi2.pdf(nu*_chi2_r, df=nu+1), 
        ":C0")
ax.plot(_chi2_r, sp.stats.chi2.cdf(nu*_chi2_r, df=nu), 
        "--C1", label="CDF")
kw = dict(bins=50, histtype='step', alpha=0.8, density=True)
ax.hist(chi2_rs, ec="C2", label=f"{Ns} samples", **kw)
ax.fill_between(_chi2_r[_i:], nu*sp.stats.chi2.pdf(nu*_chi2_r[_i:], df=nu), 
                fc="C0", alpha=0.5)
ax.axvline([chi2_r], ls=":", c="y")
ax.axhline([1-Q], ls=":", c="y")
ax.set(xlim=(0, 4),
       xlabel=r"$\chi^2_r=\chi^2/\nu$", 
       title=fr"Distribution of $\chi^2_r$ with " + 
             fr"$\nu={nu} = {len(expt.ts)}-{len(res.a)}$ degrees of freedom")
ax.legend();
../_images/09d3a0503663031d663d22de2cf20f312139a15a449f05f309b17d82f789203d.png

Parameter Estimates and Uncertainties#

The results of the fit are characterized by the best-fit parameter values \(\bar{\vect{a}}\) and the covariance matrix \(\mat{C}\) such that, to lowest order, \(\chi^2(\vect{a})\) is quadratic:

\[\begin{gather*} \overbrace{\Delta \chi^2(\vect{a})}^{\chi^2(\vect{a})- \chi^2(\bar{\vect{a}})} \approx \delta\vect{a}^T \mat{C}^{-1} \overbrace{\delta\vect{a}}^{\vect{a}-\bar{\vect{a}}}. \end{gather*}\]

The confidence region corresponding to confidence level \(p\) is given by the region within the contour

\[\begin{gather*} \Delta \chi^2(\vect{a}) \approx \delta\vect{a}^T \mat{C}^{-1} \delta\vect{a} < \chi^2_{p} \end{gather*}\]

where the contour \(\chi^2_{p}\) is carefully chosen so that this region contains fraction \(p\) of the samples. This can bed computed from the inverse of the cumulative distribution function (CDF) for the chi squared distribution with \(\nu\) degrees of freedom:

\[\begin{gather*} \int_{\rlap{P(\vect{a}) < P_p}}\d^{\nu}\vect{a}\; P(\vect{a}) = \int_0^{\chi^2_p}\d{\chi^2}P_{\nu, \chi^2}(\chi^2) = p(\chi^2_p) \end{gather*}\]

if:

  1. the fit is good,

  2. the posterior distribution is well approximated by a multivariate normal distribution (i.e. if the experimental errors are gaussian and the model is linear in the region of these errors), and

  3. you use the correct value of \(\nu\) corresponding to the number of free parameters you wish to consider. If you want to consider a marginal distribution of \(\tilde{\nu} < \nu\) parameters, then just extract the corresponding \(\tilde{nu}\) rows and columns from \(\mat{C}\) (not \(\mat{C}^{-1}\)).

In our case, the errors are gaussian as we saw above, so we can proceed with the simplified approach. What we do here is use the uncertainties package to perform a forward error propagation of the parameter covariance matrix \(\mat{C}\) through to the actual function values for the model \(y = f(x, \vect{a})\). We then treat each \(y\) as a single parameter (\(\tilde{\nu}=1\)) and use the \(n\sigma\) values where \(\sigma\) is the standard deviation computed by the uncertainties package to produce a \(1\sigma\) and \(2\sigma\) uncertainty band corresponding to our parameter values:

a_ = expt.correlated_values(res.a, res.C)
ar_ = expt.correlated_values(res.a, res.C*res.chi2_r)

fig, axs = plt.subplots(2, 1, figsize=(10, 10))
for _a_, ax in zip((a_, ar_), axs):
    expt.plot(a_=_a_, ax=ax)
axs[0].set(title=fr"$\chi^2_r={res.chi2_r:.2f}$, $Q={Q:.2g}$");
axs[1].set(title=r"Rescaled covariance so $\chi^2_r=1$");
../_images/0c9caa145487c8caf3dc302ca30739ecbb13817184c312cfd7e4ddd713e68cbf.png

In the lower plot, we have rescaled the covariance matrix \(\mat{C} \rightarrow \chi^2_r \mat{C}\) as if we did not trust our error estimates, and interpreted the large \(\chi^2_r\) value as a sign that we underestimated the errors.

We can now look at some of the marginal distributions. One can use a set of corner plots to summarize this information, but we first do this explicitly.

Single Parameter Distributions#

We start with the individual parameter marginal distributions. In this case \(\tilde{\nu} = 1\). Since our posterior is well approximated by a gaussian, we can simply look at the standard deviations given as the square root of the corresponding diagonal entry of the covariance matrix: \(\sigma_i = \sqrt{C_{ii}}\). This is nicely summarized by printing the parameters using the uncertainties package as we show below:

res = expt.fit()
a_, C = res.a_, res.C

fig, ax = plt.subplots()
_x = np.linspace(0, 7, 200)
for _name, _a, _Cii, a in zip(a_._fields, a_, np.diag(C), expt.a):
    print(f"{_name}: {_a:.2uS}, (sqrt(C_ii) = {np.sqrt(_Cii):.2g})")
    l, = ax.plot(_x, sp.stats.norm.pdf(_x, loc=_a.n, scale=_a.s), label=_name)
    ax.axvline([a], ls='--', c=l.get_c())
ax.legend();
w: 1.231(26), (sqrt(C_ii) = 0.026)
c: 2.15(19), (sqrt(C_ii) = 0.19)
A: 3.13(27), (sqrt(C_ii) = 0.27)
phi: 5.60(15), (sqrt(C_ii) = 0.15)
../_images/45b7e1a0f3cfec69046f89c6da8b6f3815d8b3fbac2d447c1d4998d82fd23995.png

Pairwise Distributions#

Supposed we are most interested in the frequency \(\omega\) and phase \(\phi\). We can consider the marginal distribution of these two parameters by extracting the appropriate columns and rows. Note that we must now choose our contours using \(\tilde{\nu} = 2\). We display these as contours of \(\chi^2\):

res = expt.fit()

a_ = res.a_

i, j = map(a_._fields.index, ['w', 'phi'])
w_, phi_ = a_.w, a_.phi

# Extract rows and colums.  Note: C[[i, j], [i, j]] does not work.
Cij = C[[i, j], :][:, [i, j]]

# We can also do this with the uncertainties package
from uncertainties import covariance_matrix
assert np.allclose(Cij, covariance_matrix([w_, phi_]))

_sigma = 5
ws, phis = np.meshgrid(
  np.linspace(w_.n-_sigma*w_.s, w_.n+_sigma*w_.s, 200),
  np.linspace(phi_.n-_sigma*phi_.s, phi_.n+_sigma*phi_.s, 200))

# Deviation matrix to calculate chi2
dws_phis = np.array([ws-w_.n, phis-phi_.n])

# This is just the matrix product, but we want to do this
# over a bunch of parameter values, so we use einsum.
dchi2 = np.einsum('ij,ixy,jxy->xy', 
                   np.linalg.inv(Cij), dws_phis, dws_phis)

# Use a normal distribution to compute the confidence levels
sigmas_ = np.array([1, 2, 3, 4])
ps = sp.stats.norm.cdf(sigmas_) - sp.stats.norm.cdf(-sigmas_)

# Now use chi2 distribution to get the corresponding contours
levels = sp.stats.chi2.ppf(ps, df=2)
fig, ax = plt.subplots(figsize=(10, 6))
_cs = ax.contour(ws, phis, dchi2, levels=levels, cmap="winter")
fmt = dict([(_l, fr"${_n}\sigma$")
            for _l, _n, _p in zip(levels, sigmas_, ps)])

# Draw single-parameter confidence limits to show that
# they are smaller.
for _n, _sigma in enumerate(sigmas_):
    kw = dict(c=_cs.get_edgecolors()[_n], 
              alpha=0.5, zorder=-100)
    for _s in [1, -1]:
        ax.axhline(phi_.n + _s*_sigma*phi_.s, ls=":", **kw)
        ax.axvline(w_.n + _s*_sigma*w_.s, ls="--", **kw)
ax.clabel(_cs, _cs.levels, inline=True, fmt=fmt, fontsize=10)
ax.set(xlabel="$\omega$", ylabel="$\phi$");
../_images/707095c95fe1657f8022f130fcc4b501a61ae660541ea1a35281eeb4a60be1fa.png

We show the \(1\sigma\), \(2\sigma\), \(3\sigma\), and \(4\sigma\) \(\tilde{\nu} = 2\)-parameter confidence regions demonstrating the correlations between \(\omega\) and \(\phi\). For comparison, we show the corresponding 1-parameter regions as dotted lines. E.g. for the \(1\sigma\) confidence regions, the same amount of posterior probability (68.27%) lies within the center blue ellipse as lies between the central vertical blue dashed lines, or between the central horizontal blue dotted lines.

In other words, the single-parameter confidence regions include points outside of the 2-parameter ellipse, hence must be smaller as shown to keep the same probability.

Here is the full corner plot using phys_581.plotting.corner_plot():

from phys_581.plotting import corner_plot
res = expt.fit()

fig, axs = corner_plot(res.a_, labels=expt.labels)
../_images/9ed6de6e77f7491200d2b200581a11bcfd47f7316dae53f2c58618ca980830ee.png

Change of Variables#

To compute a change of variables, we need to keep track of the Jacobian of the transformation so we can properly transform the covariance matrix. The uncertainties package does this automatically, computing the derivatives with automatic differentiation. This is only valid if the errors are small enough that the model is approximately linear, but is very convenient. Here we replace \(\phi\) with the location of the peak \(t_0\):

\[\begin{gather*} \omega t + \phi \equiv \omega(t - t_0) \mod 2\pi, \qquad t_0 = -\phi/\omega. \end{gather*}\]
# make sure t0_ is positive and small by making phi0_ between -2pi and 0
phi0_ = phi_ - phi_.n + phi_.n % (2*np.pi) - 2*np.pi
t0_ = - phi0_ / w_

Cij = covariance_matrix([w_, t0_])

_sigma = 5
ws, t0s = np.meshgrid(
  np.linspace(w_.n-_sigma*w_.s, w_.n+_sigma*w_.s, 200),
  np.linspace(t0_.n-_sigma*t0_.s, t0_.n+_sigma*t0_.s, 200))

# Deviation matrix to calculate chi2
dws_t0s = np.array([ws-w_.n, t0s-t0_.n])
dchi2 = np.einsum('ij,ixy,jxy->xy', 
                   np.linalg.inv(Cij), dws_t0s, dws_t0s)

# Now use chi2 distribution to get the corresponding contours
fig, ax = plt.subplots(figsize=(10, 6))
_cs = ax.contour(ws, t0s, dchi2, levels=levels, cmap="winter")
fmt = dict([(_l, fr"${_n}\sigma$")
            for _l, _n, _p in zip(levels, sigmas_, ps)])

# Draw single-parameter confidence limits to show that
# they are smaller.
for _n, _sigma in enumerate(sigmas_):
    kw = dict(c=_cs.get_edgecolors()[_n], 
              alpha=0.5, zorder=-100)
    for _s in [1, -1]:
        ax.axhline(t0_.n + _s*_sigma*t0_.s, ls=":", **kw)
        ax.axvline(w_.n + _s*_sigma*w_.s, ls="--", **kw)
ax.clabel(_cs, _cs.levels, inline=True, fmt=fmt, fontsize=10)
ax.set(xlabel="$\omega$", ylabel="$t_0$");
../_images/a346f70f6df66e2fa13043829da20855d875f2eb4594ea9537be59446eab67eb.png

Non-Gaussian Errors#

We now repeat the analysis, but with non-gaussian errors. Instead, we use the Gumbel distribution:

\[\begin{gather*} P_n(y_n) = \frac{1}{\sigma_n} e^{-z-e^{-z}}, \qquad z = \frac{y_n - f(x_n, \vect{a})}{\sigma_n}. \end{gather*}\]

If we consider the normalized errors \(\tilde{e}_n\), they are distributed as:

\[\begin{gather*} \tilde{e}_n = \frac{y_n - f(x_n, \bar{\vect{a}})}{\sigma_n}, \qquad P_n(\tilde{e}_n) = e^{-\tilde{e}_n-e^{-\tilde{e}_n}}. \end{gather*}\]

We can use this to generate the various simulated \(P_{\nu}(\chi^2)\) distributions for Gumbel distributed errors.

en = np.linspace(-3, 10, 100)
plt.plot(en, en + np.exp(-en));
../_images/f4b8ee2a0c6258784d1b9ecf93b60683b6bc2d4c16d78ac6793d88441f3786d1.png
from scipy.interpolate import InterpolatedUnivariateSpline, UnivariateSpline
rng = np.random.default_rng(seed=2)

def get_cdf_ppf(samples=None):
    """Return interpolated cdf and ppf functions.

    Results
    -------
    cdf : function
        Cumulative distribution function `q=cdf(chi2_r)`.
    ppf : function
        Percent point function (inverse of `cdf`).
    """
    Ns = len(samples)
    ps = np.linspace(0, 1, Ns)
    x = sorted(samples)
    cdf = InterpolatedUnivariateSpline(x, ps, k=1, ext='const')
    ppf = InterpolatedUnivariateSpline(ps, x, k=1, ext='const')
    return cdf, ppf

def get_chi2_cdf_ppf(nu=1, random=rng.normal, Ns=10000):
    """Return `(cdf, pdf, samples)` for the chi2 distribution.
    
    Arguments
    ---------
    nu : int
        Degrees of freedom.
    random : function
        Random number generator for errors.
    Ns : int
        Number of samples.
    """
    en = random(size=(Ns, nu))
    chi2 = (en**2).sum(axis=-1)
    cdf, ppf = get_cdf_ppf(chi2)
    return cdf, ppf, chi2

Hide code cell source

import scipy.stats
sp = scipy

chi2r_ = np.linspace(0, 4, 100)

hist_kw = dict(bins=np.linspace(0, 4, 100), histtype='step',
               alpha=0.8, density=True)
fig, axs = plt.subplots(1, 2, figsize=(10, 4))
for ax, label, random in zip(axs, 
                             ["normal", "gumbel"],
                             [rng.normal, rng.gumbel]):
    for _n, nu in enumerate([1, 2, 3, 10, 50]):
        cdf, ppf, chi2r_samples = get_chi2_cdf_ppf(nu=nu, random=random)
        chi2r_samples /= nu
        c = f"C{_n}"
        l, = ax.plot(chi2r_, cdf(nu*chi2r_), 
                     "--", lw=1, c=c, label=fr"CDF $\nu={nu}$")
        if label == "normal":
            ax.plot(chi2r_, nu*sp.stats.chi2.pdf(nu*chi2r_, df=nu), 
                    ls=":", c=c)
            ax.plot(chi2r_, sp.stats.chi2.cdf(nu*chi2r_, df=nu), 
                    ls=":", c=c)
            ax.set(xlim=(0, 2))
        else:
            ax.set(xlim=(0, 4))
        ax.hist(chi2r_samples, ec=c, **hist_kw)
    ax.set(title=rf"$\chi^2_r$ distribution for {label} errors")
    ax.set(ylim=(0, 2), xlabel=r"$\chi^2_r$")
    ax.legend(loc='upper right')
    
plt.tight_layout();
../_images/0214eda2ea598b416af31e42b075a9223db9ab915f4bcb2648dab5c5ce4bb1dc.png

On the left we show the CDF and PDF histogram for \(\chi^2_r\) generated from a sample of random numbers for a standard normal distribution, comparing with the analytic forms (dotted lines). On the right we use the same method to compute the CDF for the standard Gumbel distribution.

We now generate experimental data and proceed with an analysis:

# Randomly generated data...
# An "Experiment" with non-gaussian errors.

expt = Experiment(ts=np.linspace(0, 10.0, 7), 
                  random=np.random.default_rng(seed=2).gumbel)

fig, ax = plt.subplots(figsize=(10, 5))
expt.plot(ax=ax);
../_images/7382c32131f808ab9fc16624bf1bcf9756218f45a7c9cec11db5a5f24f12b0aa.png

Chi Square Fit#

Hide code cell source

# Generate actual distribution of chi2_r using MC

Ns = 2000  # number of samples
chi2_rs = expt.sample_chi2_r()

res = expt.fit()
nu, chi2_r = res.nu, res.chi2_r

_chi2_r = np.linspace(0, 3, 500)
_i = np.where(_chi2_r >= chi2_r)[0][0]
fig, ax = plt.subplots()
ax.plot(_chi2_r, nu*sp.stats.chi2.pdf(nu*_chi2_r, df=nu), 
        "-C0", label=rf"PDF ($\nu={nu}$)")
ax.plot(_chi2_r, nu*sp.stats.chi2.pdf(nu*_chi2_r, df=nu-1), 
        ":C0", label=rf"$\nu={nu}\pm 1$")
ax.plot(_chi2_r, nu*sp.stats.chi2.pdf(nu*_chi2_r, df=nu+1), 
        ":C0")
ax.plot(_chi2_r, sp.stats.chi2.cdf(nu*_chi2_r, df=nu), 
        "--C1", label="CDF")
kw = dict(bins=50, histtype='step', alpha=0.8, density=True)
ax.hist(chi2_rs, ec="C2", label=f"{Ns} samples", **kw)
ax.fill_between(_chi2_r[_i:], nu*sp.stats.chi2.pdf(nu*_chi2_r[_i:], df=nu), 
                fc="C0", alpha=0.5)
ax.axvline([chi2_r], ls=":", c="y")
ax.axhline([1-Q], ls=":", c="y")
ax.set(xlim=(0, 4),
       xlabel=r"$\chi^2_r=\chi^2/\nu$", 
       title=fr"Distribution of $\chi^2_r$ with " + 
             fr"$\nu={nu} = {len(expt.ts)}-{len(res.a)}$ degrees of freedom")
ax.legend();
../_images/3318d8f5230206c351abd8cee26eefc2e31f9e1c231ced2e636725980b386f6f.png

Parameter Estimates and Uncertainties#

a_ = expt.correlated_values(res.a, res.C)
ar_ = expt.correlated_values(res.a, res.C*res.chi2_r)

fig, axs = plt.subplots(2, 1, figsize=(10, 10))
for _a_, ax in zip((a_, ar_), axs):
    expt.plot(a_=_a_, ax=ax)
axs[0].set(title=fr"$\chi^2_r={res.chi2_r:.2f}$, $Q={Q:.2g}$");
axs[1].set(title=r"Rescaled covariance so $\chi^2_r=1$");
../_images/a48bc9ab032f5551e9ad53e2faddf229a617bc4629e8c775a97e6aca3dc42493.png

Single Parameter Distributions#

Unlike the case with gaussian errors, the parameter estimates are no longer consistent with the estimates.

res = expt.fit()
a_, C = res.a_, res.C

fig, ax = plt.subplots()
_x = np.linspace(0, 7, 200)
for _name, _a, _Cii, a in zip(a_._fields, a_, np.diag(C), expt.a):
    print(f"{_name}: {_a:.2uS}, (sqrt(C_ii) = {np.sqrt(_Cii):.2g})")
    l, = ax.plot(_x, sp.stats.norm.pdf(_x, loc=_a.n, scale=_a.s), label=_name)
    ax.axvline([a], ls='--', c=l.get_c())
ax.legend();
w: 1.272(22), (sqrt(C_ii) = 0.022)
c: 2.39(20), (sqrt(C_ii) = 0.2)
A: 4.01(27), (sqrt(C_ii) = 0.27)
phi: 5.56(12), (sqrt(C_ii) = 0.12)
../_images/b3e2871f7bd9dee6f86952108869a98c71b499fb281c8147003b5a434b1ad6f6.png

Chi Square Fit#

res = least_squares(fun=partial(get_residuals, ydata=ydata),
                    x0=a_exact, jac='cs')
p = Params(*res.x)
C = np.linalg.inv(res.jac.T @ res.jac)
r = get_residuals(p)
nu = len(r) - len(p)
chi2_r = np.sum(abs(r)**2) / nu
Q_wrong = 1 - sp.stats.chi2.cdf(chi2_r*nu, df=nu)

cdf_gumbel, pdf_gumbel, chi2s_gumbel = get_chi2_cdf_ppf(nu=nu, random=rng.gumbel)
Q = 1 - cdf_gumbel(chi2_r*nu)

Latex(r", \qquad ".join([
    rf"$\chi^2_r = {chi2_r:.2g}",
    rf"Q = {Q:.2g}",
    rf"Q_{{\mathrm{{wrong}}}} = {Q_wrong:.2g}$"]))
---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
Cell In[18], line 1
----> 1 res = least_squares(fun=partial(get_residuals, ydata=ydata),
      2                     x0=a_exact, jac='cs')
      3 p = Params(*res.x)
      4 C = np.linalg.inv(res.jac.T @ res.jac)

NameError: name 'get_residuals' is not defined

With the non-gaussian errors, estimating the \(Q\) value with the chi squared distribution gives a very wrong interpretation about the quality of the fit. Instead, we must use the corresponding CDF for our Gumbel-distributed errors. To obtain the confidence region with confidence level \(p\), we must find the value of \(\chi^2_p\) where:

\[\begin{gather*} \int_0^{\chi^2_p}P(\chi^2)\d{\chi^2} = p. \end{gather*}\]

This is just the inverse CDF. We can check our method (slowly) by actually fitting the data repeatedly to generate the distribution of \(\chi^2\):

Hide code cell source

# Generate actual distribution of chi2_r using MC
from functools import partial

Ns = 2000  # number of samples
chi2s_data = np.array([nu*get_chi2_r(random=rng.gumbel) for n in range(Ns)])
chi2s_normal = np.array([nu*get_chi2_r(random=rng.normal) for n in range(Ns)])

cdf_data, ppf_data = get_cdf_ppf(chi2s_data)

_chi2_r = np.linspace(0, np.max(chi2s_gumbel)/nu, 500)
fig, ax = plt.subplots()
ax.plot(_chi2_r, cdf_data(nu*_chi2_r), '-C0', label="data")
ax.plot(_chi2_r, cdf_gumbel(nu*_chi2_r), ':C1', label="gumbel")
ax.plot(_chi2_r, sp.stats.chi2.cdf(nu*_chi2_r, df=nu), '--C2', label="normal")
ax.set(xlabel="$\chi^2_r$", ylabel="CDF", 
       title=fr"CDF for $P(\chi^2_r)$ for $\nu={nu}$ Gumbel-distributed errors.")

kw = dict(bins=100, histtype='step', alpha=0.8, density=True)
ax.hist(chi2s_data/nu, ec="C0", **kw)
ax.hist(chi2s_gumbel/nu, ec="C1", **kw)
ax.legend();
---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
Cell In[19], line 5
      2 from functools import partial
      4 Ns = 2000  # number of samples
----> 5 chi2s_data = np.array([nu*get_chi2_r(random=rng.gumbel) for n in range(Ns)])
      6 chi2s_normal = np.array([nu*get_chi2_r(random=rng.normal) for n in range(Ns)])
      8 cdf_data, ppf_data = get_cdf_ppf(chi2s_data)

NameError: name 'get_chi2_r' is not defined
fig, ax = plt.subplots()
random = rng.gumbel
for Ns in [500, 1000]:
    chi2s_data = np.array([nu*get_chi2_r(random=random) for n in range(Ns)])
    cdf_data, ppf_data = get_cdf_ppf(chi2s_data)
    cdf_gumbel, pdf_gumbel, chi2s_gumbel = get_chi2_cdf_ppf(nu=nu, random=random)
    ax.plot(_chi2_r, cdf_data(nu*_chi2_r), '-', label=f"data {Ns}")
ax.plot(_chi2_r, cdf_gumbel(nu*_chi2_r), ':', label="gumbel")
ax.legend()
---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
Cell In[20], line 4
      2 random = rng.gumbel
      3 for Ns in [500, 1000]:
----> 4     chi2s_data = np.array([nu*get_chi2_r(random=random) for n in range(Ns)])
      5     cdf_data, ppf_data = get_cdf_ppf(chi2s_data)
      6     cdf_gumbel, pdf_gumbel, chi2s_gumbel = get_chi2_cdf_ppf(nu=nu, random=random)

NameError: name 'get_chi2_r' is not defined
../_images/b116d57a62f73100763cfd98956c774aff52846104e7aa97c2cb75189e8d31a0.png

Hide code cell source

res = expt.fit()
nu, chi2_r = res.nu, res.chi2_r

_chi2_r = np.linspace(0, 5, 500)
_i = np.where(_chi2_r >= chi2_r)[0][0]
fig, ax = plt.subplots()
ax.plot(_chi2_r, nu*sp.stats.chi2.pdf(nu*_chi2_r, df=nu), "-C1", label=rf"$\nu={nu}$")
ax.plot(_chi2_r, nu*sp.stats.chi2.pdf(nu*_chi2_r, df=nu-1), ":C1", label=rf"$\nu={nu}\pm 1$")
ax.plot(_chi2_r, nu*sp.stats.chi2.pdf(nu*_chi2_r, df=nu+1), ":C1")

ax.plot(_chi2_r, cdf_gumbel(nu*_chi2_r), "--C0", label=rf"CDF (Gumbel)")
ax.plot(_chi2_r, sp.stats.chi2.cdf(nu*_chi2_r, df=nu), "--C1", label=rf"CDF $\nu={nu}$")

# Make sure bins end on chi2_r
bins = np.linspace(0, chi2_r, 20)
_d = np.diff(bins).mean()
bins = np.concatenate([bins, np.arange(chi2_r+_d, 4+_d, _d)])
kw = dict(bins=bins, histtype='step', alpha=0.8, density=True)
res = ax.hist(chi2s_gumbel/nu, ec="C0", label=f"{Ns} samples (Gumbel)", **kw)

# Add filled region
xy = res[2][0].xy[res[2][0].xy[:, 0] >= chi2_r, :]
xy[0, 1] = 0
ax.add_patch(plt.Polygon(xy, fc="C0"))
ax.hist(chi2s_normal/nu, ls=":", ec="C1", label=fr"{Ns} samples ($\chi^2_{{\nu=3}}$)", **kw)

ax.axvline([chi2_r], ls=":", c="y")
ax.axhline([1-Q], ls=":", c="y")
ax.set(xlim=(0, 5),
       xlabel=r"$\chi^2_r=\chi^2/\nu$", 
       title=fr"Distribution of $\chi^2_r$ for $\nu={nu}$ gumbel errors; Q={Q:.2g}")
ax.legend();
---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
Cell In[21], line 11
      8 ax.plot(_chi2_r, nu*sp.stats.chi2.pdf(nu*_chi2_r, df=nu-1), ":C1", label=rf"$\nu={nu}\pm 1$")
      9 ax.plot(_chi2_r, nu*sp.stats.chi2.pdf(nu*_chi2_r, df=nu+1), ":C1")
---> 11 ax.plot(_chi2_r, cdf_gumbel(nu*_chi2_r), "--C0", label=rf"CDF (Gumbel)")
     12 ax.plot(_chi2_r, sp.stats.chi2.cdf(nu*_chi2_r, df=nu), "--C1", label=rf"CDF $\nu={nu}$")
     14 # Make sure bins end on chi2_r

NameError: name 'cdf_gumbel' is not defined
../_images/a8aecd7489f62327a8013bb25523a00f2f3b86efd1f0146cb18c8ac4907b5776.png

References#