From fcfa34684430680508c44b3fae4df6afaf8ea84b Mon Sep 17 00:00:00 2001
From: Gregory Ashton <gregory.ashton@ligo.org>
Date: Wed, 6 Jun 2018 14:22:23 +1000
Subject: [PATCH] Add example on Gaussian noise with unknown variance

---
 .../linear_regression_unknown_noise.py        | 117 ++++++++++++++++++
 1 file changed, 117 insertions(+)
 create mode 100644 examples/other_examples/linear_regression_unknown_noise.py

diff --git a/examples/other_examples/linear_regression_unknown_noise.py b/examples/other_examples/linear_regression_unknown_noise.py
new file mode 100644
index 000000000..1d9da4bf6
--- /dev/null
+++ b/examples/other_examples/linear_regression_unknown_noise.py
@@ -0,0 +1,117 @@
+#!/bin/python
+"""
+An example of how to use tupak to perform paramater estimation for
+non-gravitational wave data. In this case, fitting a linear function to
+data with background Gaussian noise with unknown variance.
+
+"""
+from __future__ import division
+import tupak
+import numpy as np
+import matplotlib.pyplot as plt
+import inspect
+
+# A few simple setup steps
+tupak.utils.setup_logger()
+label = 'linear_regression_unknown_noise'
+outdir = 'outdir'
+
+
+# First, we define our "signal model", in this case a simple linear function
+def model(time, m, c):
+    return time * m + c
+
+
+# New we define the injection parameters which we make simulated data with
+injection_parameters = dict(m=0.5, c=0.2)
+
+# For this example, we'll inject standard Gaussian noise
+sigma = 1
+
+# These lines of code generate the fake data. Note the ** just unpacks the
+# contents of the injection_paramsters when calling the model function.
+sampling_frequency = 10
+time_duration = 10
+time = np.arange(0, time_duration, 1/sampling_frequency)
+N = len(time)
+data = model(time, **injection_parameters) + np.random.normal(0, sigma, N)
+
+# We quickly plot the data to check it looks sensible
+fig, ax = plt.subplots()
+ax.plot(time, data, 'o', label='data')
+ax.plot(time, model(time, **injection_parameters), '--r', label='signal')
+ax.set_xlabel('time')
+ax.set_ylabel('y')
+ax.legend()
+fig.savefig('{}/{}_data.png'.format(outdir, label))
+
+
+# Parameter estimation: we now define a Gaussian Likelihood class relevant for
+# our model.
+
+class GaussianLikelihood(tupak.Likelihood):
+    def __init__(self, x, y, function, sigma=None):
+        """
+        A general Gaussian likelihood for known or unknown noise - the model
+        parameters are inferred from the arguments of function
+
+        Parameters
+        ----------
+        x, y: array_like
+            The data to analyse
+        function:
+            The python function to fit to the data. Note, this must take the
+            dependent variable as its first argument. The other arguments are
+            will require a prior and will be sampled over (unless a fixed
+            value is given).
+        sigma: None, float, array_like
+            If None, the standard deviation of the noise is unknown and will be
+            estimated (note: this requires a prior to be given for sigma). If
+            not None, this defined the standard-deviation of the data points.
+            This can either be a single float, or an array with length equal
+            to that for `x` and `y`.
+        """
+        self.x = x
+        self.y = y
+        self.N = len(x)
+        self.function = function
+
+        # These lines of code infer the parameters from the provided function
+        parameters = inspect.getargspec(function).args
+        parameters.pop(0)
+        self.parameters = dict.fromkeys(parameters)
+        self.function_keys = self.parameters.keys()
+        self.parameters['sigma'] = None
+
+    def log_likelihood(self):
+        model_parameters = {k: self.parameters[k] for k in self.function_keys}
+        res = self.y - self.function(self.x, **model_parameters)
+        sigma = self.parameters['sigma']
+        return -0.5 * (np.sum((res / sigma)**2)
+                       + self.N*np.log(2*np.pi*sigma**2))
+
+    def noise_log_likelihood(self):
+        return np.nan
+        sigma = self.parameters['sigma']
+        return -0.5 * (np.sum((self.y / sigma)**2)
+                       + self.N*np.log(2*np.pi*sigma**2))
+
+
+# Now lets instantiate a version of our GaussianLikelihood, giving it
+# the time, data and signal model
+likelihood = GaussianLikelihood(time, data, model)
+
+# From hereon, the syntax is exactly equivalent to other tupak examples
+# We make a prior
+priors = {}
+priors['m'] = tupak.prior.Uniform(0, 5, 'm')
+priors['c'] = tupak.prior.Uniform(-2, 2, 'c')
+priors['sigma'] = tupak.prior.Uniform(0, 10, 'sigma')
+
+# And run sampler
+result = tupak.run_sampler(
+    likelihood=likelihood, priors=priors, sampler='dynesty', npoints=500,
+    walks=10, injection_parameters=injection_parameters, outdir=outdir,
+    label=label)
+result.plot_corner()
+print(result)
-- 
GitLab