Update hyper parameter example and add documentation

2f6d63e9 · Gregory Ashton · 0489e6d4 · 2f6d63e9 · 2f6d63e9 · 2f6d63e9
Commit 2f6d63e9 authored 6 years ago by Gregory Ashton
--- a/docs/hyperparameters.txt
+++ b/docs/hyperparameters.txt
+.. _hyperparameters:
+
+================
+Hyper-parameters
+================
+
+This short guide will illustrate how to estimate hyper-parameters from
+posterior samples using :code:`tupak` using a simplistic problem.
+
+Setting up the problem
+----------------------
+
+We are given three data sets labelled by a, b, and c. Each data set consists of
+:math:`N` observations of a variable :math:`y` taken at a dependent variable
+:math:`x`. Notationally, we can write these three data sets as :math:`(y_i^a,
+x_i^a),(y_i^b, x_i^b),(y_i^c, x_i^c)` where :math:`i \in [0, N]` labels the
+indexes of each data set.
+
+Plotting the data, we see that all three look like they could be modelled by
+a linear function with some slope and some intercept:
+
+.. image:: images/hyper_parameter_data.png
+
+Given any individual data set, you could write down a linear model :math:`y(x)
+= c_0 + c_1 x` and infer the intercept and gradient. For example, given the
+a-data set, you could calculate :math:`P(c_0|y_i^a, x_i^a)` by fitting the
+model to the data. Here is a figure demonstrating the posteriors on the
+intercept, for the three data sets given above
+
+.. image:: images/hyper_parameter_combined_posteriors.png
+
+While the data looks noise, the overlap in the :math:`c_0` posteriors might
+(especially given context about how the data was produced and the physical
+setting) make you believe all data sets share a common intercept. How would
+you go about estimating this? You could just take the mean of the means of the
+posterior and that would give you a pretty good estimate. However, we'll now
+discuss how to do this with hyper parameters.
+
+Understanding the population using hyperparameters
+--------------------------------------------------
+
+We first need to define our hyperparameterized model. In this case, it is
+as simple as
+
+.. math::
+
+   c_0 \sim \mathcal{N}(\mu_{c_0}, \sigma_{c_0})
+
+
+That is, we model the population :math:`c_0` (from which each of the data sets
+was drawn) as coming from a normal distribution with some mean and some
+standard deviation.
+
+To do - write in details of the likelihood and its derivation
+
+For the samples in the figure above, the posterior on these hyperparamters
+is given by
+
+.. image:: images/hyper_parameter_corner.png
--- a/docs/images/hyper_parameter_combined_posteriors.png
+++ b/docs/images/hyper_parameter_combined_posteriors.png
--- a/docs/images/hyper_parameter_corner.png
+++ b/docs/images/hyper_parameter_corner.png
--- a/docs/images/hyper_parameter_data.png
+++ b/docs/images/hyper_parameter_data.png
--- a/docs/index.txt
+++ b/docs/index.txt
@@ -15,5 +15,6 @@ Welcome to tupak's documentation!
   samplers
   tupak-output
   writing-documentation
+   hyperparameters


--- a/examples/other_examples/hyper_parameter_example.py
+++ b/examples/other_examples/hyper_parameter_example.py
 #!/bin/python
 """
-An example of how to use tupak to perform paramater estimation for hyperparams
+An example of how to use tupak to perform paramater estimation for hyper params
 """
 from __future__ import division
 import tupak
 import numpy as np
+import inspect
+import matplotlib.pyplot as plt

 tupak.core.utils.setup_logger()
 outdir = 'outdir'


-class GaussianLikelihood(tupak.core.likelihood.Likelihood):
-    def __init__(self, x, y, waveform_generator):
+class GaussianLikelihood(tupak.Likelihood):
+    def __init__(self, x, y, function, sigma=None):
+        """
+        A general Gaussian likelihood for known or unknown noise - the model
+        parameters are inferred from the arguments of function
+
+        Parameters
+        ----------
+        x, y: array_like
+            The data to analyse
+        function:
+            The python function to fit to the data. Note, this must take the
+            dependent variable as its first argument. The other arguments are
+            will require a prior and will be sampled over (unless a fixed
+            value is given).
+        sigma: None, float, array_like
+            If None, the standard deviation of the noise is unknown and will be
+            estimated (note: this requires a prior to be given for sigma). If
+            not None, this defined the standard-deviation of the data points.
+            This can either be a single float, or an array with length equal
+            to that for `x` and `y`.
+        """
        self.x = x
        self.y = y
        self.N = len(x)
-        self.waveform_generator = waveform_generator
-        self.parameters = waveform_generator.parameters
+        self.sigma = sigma
+        self.function = function
+
+        # These lines of code infer the parameters from the provided function
+        parameters = inspect.getargspec(function).args
+        parameters.pop(0)
+        self.parameters = dict.fromkeys(parameters)
+        self.function_keys = self.parameters.keys()
+        if self.sigma is None:
+            self.parameters['sigma'] = None

    def log_likelihood(self):
-        sigma = 1
-        res = self.y - self.waveform_generator.time_domain_strain()
+        sigma = self.parameters.get('sigma', self.sigma)
+        model_parameters = {k: self.parameters[k] for k in self.function_keys}
+        res = self.y - self.function(self.x, **model_parameters)
        return -0.5 * (np.sum((res / sigma)**2)
                       + self.N*np.log(2*np.pi*sigma**2))


-def model(time, m):
-    return m * time
+# Define a model to fit to each data set
+def model(x, c0, c1):
+    return c0 + c1*x


-sampling_frequency = 10
-time_duration = 100
-time = np.arange(0, time_duration, 1/sampling_frequency)
+N = 10
+x = np.linspace(0, 10, N)
+sigma = 1
+Nevents = 3
+labels = ['a', 'b', 'c']

-true_mu_m = 5
-true_sigma_m = 0.1
-sigma = 0.1
-Nevents = 10
-samples = []
+true_mu_c0 = 5
+true_sigma_c0 = 1
+
+fig1, ax1 = plt.subplots()
+fig2, ax2 = plt.subplots()

 # Make the sample sets
+samples = []
 for i in range(Nevents):
-    m = np.random.normal(true_mu_m, true_sigma_m)
-    injection_parameters = dict(m=m)
-
-    N = len(time)
-    data = model(time, **injection_parameters) + np.random.normal(0, sigma, N)
-
-    waveform_generator = tupak.gw.waveform_generator.WaveformGenerator(
-        time_duration=time_duration, sampling_frequency=sampling_frequency,
-        time_domain_source_model=model)
+    c0 = np.random.normal(true_mu_c0, true_sigma_c0)
+    c1 = np.random.uniform(-1, 1)
+    injection_parameters = dict(c0=c0, c1=c1)

-    likelihood = GaussianLikelihood(time, data, waveform_generator)
+    data = model(x, **injection_parameters) + np.random.normal(0, sigma, N)
+    line = ax1.plot(x, data, '-x', label=labels[i])

-    priors = dict(m=tupak.core.prior.Uniform(-10, 10, 'm'))
+    likelihood = GaussianLikelihood(x, data, model, sigma)
+    priors = dict(c0=tupak.core.prior.Uniform(-10, 10, 'c0'),
+                  c1=tupak.core.prior.Uniform(-10, 10, 'c1'),
+                  )

    result = tupak.core.sampler.run_sampler(
        likelihood=likelihood, priors=priors, sampler='dynesty', npoints=1000,
-        injection_parameters=injection_parameters, outdir=outdir,
-        verbose=False, label='individual_{}'.format(i), use_ratio=False,
-        sample='unif')
-    result.plot_corner()
-    samples.append(result.samples)
-
-# Now run the hyperparameter inference
-run_prior = tupak.core.prior.Uniform(minimum=-10, maximum=10, name='mu_m')
+        outdir=outdir, verbose=False, label='individual_{}'.format(i))
+    ax2.hist(result.posterior.c0, color=line[0].get_color(), normed=True,
+             alpha=0.5, label=labels[i])
+    samples.append(result.posterior.c0.values)
+
+ax1.set_xlabel('x')
+ax1.set_ylabel('y(x)')
+ax1.legend()
+fig1.savefig('outdir/hyper_parameter_data.png')
+ax2.set_xlabel('c0')
+ax2.set_ylabel('density')
+ax2.legend()
+fig2.savefig('outdir/hyper_parameter_combined_posteriors.png')
+
+# Now run the hyper parameter inference
+run_prior = tupak.core.prior.Uniform(minimum=-10, maximum=10, name='mu_c0')
 hyper_prior = tupak.core.prior.Gaussian(mu=0, sigma=1, name='hyper')

 hp_likelihood = tupak.gw.likelihood.HyperparameterLikelihood(
        samples, hyper_prior, run_prior)

 hp_priors = dict(
-    mu=tupak.core.prior.Uniform(-10, 10, 'mu', '$\mu_m$'),
-    sigma=tupak.core.prior.Uniform(0, 10, 'sigma', '$\sigma_m$'))
+    mu=tupak.core.prior.Uniform(-10, 10, 'mu', '$\mu_{c0}$'),
+    sigma=tupak.core.prior.Uniform(0, 10, 'sigma', '$\sigma_{c0}$'))

 # And run sampler
 result = tupak.core.sampler.run_sampler(
    likelihood=hp_likelihood, priors=hp_priors, sampler='dynesty',
-    npoints=1000, outdir=outdir, label='hyperparameter', use_ratio=False,
-    sample='unif', verbose=True)
-result.plot_corner(truth=dict(mu=true_mu_m, sigma=true_sigma_m))
+    npoints=1000, outdir=outdir, label='hyper_parameter', verbose=True)
+result.plot_corner(truth=dict(mu=true_mu_c0, sigma=true_sigma_c0))