Added tutorial for fitting a line with x and y

84050660 · Nikhil Sarin · Gregory Ashton · 5c494930 · 84050660
Commit 84050660 authored 6 years ago by Nikhil Sarin Committed by Gregory Ashton 6 years ago
--- a/examples/tutorials/fitting_with_x_and_y_errors.ipynb
+++ b/examples/tutorials/fitting_with_x_and_y_errors.ipynb
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Fitting a model to data with both x and y errors with `BILBY`\n",
+    "\n",
+    "Usually when we fit a model to data with a Gaussian Likelihood we assume that we know x values exactly. This is almost never the case. Here we show how to fit a model with errors in both x and y."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import bilby\n",
+    "import inspect\n",
+    "%pylab inline"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### First we create the data and plot it"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#define our model, a line\n",
+    "def model(x, m, c, **kwargs):\n",
+    "    y = m*x + c\n",
+    "    return y\n",
+    "\n",
+    "#make a function to create and plot our data\n",
+    "def make_data(points, m , c, xerr, yerr, seed):\n",
+    "    np.random.seed(int(seed))\n",
+    "    xtrue = np.linspace(0,100,points)\n",
+    "    ytrue = model(x = xtrue, m = m, c = c)\n",
+    "\n",
+    "    xerr = xerr * np.random.randn(points)\n",
+    "    yerr = yerr * np.random.randn(points)\n",
+    "    xobs = xtrue + xerr\n",
+    "    yobs = ytrue + yerr\n",
+    "    \n",
+    "    plt.errorbar(xobs, yobs, xerr = xerr, yerr = yerr, fmt = 'x')\n",
+    "    plt.errorbar(xtrue, ytrue, yerr = yerr, color = 'black', alpha = 0.5)\n",
+    "    plt.xlim(0,100)\n",
+    "    plt.show()\n",
+    "    \n",
+    "    data = {'xtrue': xtrue, 'ytrue':ytrue, 'xobs':xobs, 'yobs':yobs, 'xerr':xerr, 'yerr':yerr}\n",
+    "    \n",
+    "    return data\n",
+    "\n",
+    "data = make_data(points = 30, m = 5, c = 10, xerr = 5, yerr = 5, seed = 123)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Now lets set up the prior and bilby output directory"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#setting up bilby priors\n",
+    "priors = dict(m=bilby.core.prior.Uniform(0, 30, 'm'),\n",
+    "              c=bilby.core.prior.Uniform(0, 30, 'c'))\n",
+    "\n",
+    "outdir = 'outdir'\n",
+    "livepoints = 100\n",
+    "walks = 100"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Our first step is to recover the straight line using a simple Gaussian Likelihood that only takes into account the y errors. Under the assumption we know x exactly. In this case, we pass in xtrue for x"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "OneDGaussian_knownx = bilby.core.likelihood.GaussianLikelihood(x = data['xtrue'], y = data['yobs'], func = model, sigma = data['yerr'])\n",
+    "result_1D_xtrue = bilby.run_sampler(\n",
+    "    likelihood=OneDGaussian_knownx, priors=priors, sampler='dynesty', npoints=livepoints,\n",
+    "    walks=walks, outdir=outdir, label='xtrue_1D_Gaussian')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "result_1D_xtrue.plot_corner(truth=dict(m=5, c = 10), titles = True)\n",
+    "result_1D_xtrue.plot_with_data(model = model, x = data['xtrue'], y = data['yobs'], ndraws=1000, npoints=100)\n",
+    "plt.show()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### As expected this is easy to recover and the sampler does a good job. However this was made too easy - by passing in the 'true' values of x. Lets see what happens when we pass in the observed values of x"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "OneDGaussian_unknownx = bilby.core.likelihood.GaussianLikelihood(x = data['xobs'], y = data['yobs'], \n",
+    "                                                                 func = model, sigma = data['yerr'])\n",
+    "result_1D_xobs = bilby.run_sampler(\n",
+    "    likelihood=OneDGaussian_unknownx, priors=priors, sampler='dynesty', npoints=livepoints,\n",
+    "    walks=walks, outdir=outdir, label='xobs_1D_Gaussian')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "result_1D_xobs.plot_corner(truth=dict(m=5, c = 10), titles = True)\n",
+    "result_1D_xobs.plot_with_data(model = model, x = data['xobs'], y = data['yobs'], ndraws=1000, npoints=100)\n",
+    "plt.show()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### As expected, this is significantly worse. Let us now define a new likelihood which takes into account x errors but you also pass in xtrue"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "class TwoDGaussianLikelihood_knownxtrue(bilby.Likelihood):\n",
+    "    def __init__(self, xtrue, xobs, yobs, xerr, yerr, function):\n",
+    "        \"\"\"\n",
+    "\n",
+    "        Parameters\n",
+    "        ----------\n",
+    "        xtrue: array_like \n",
+    "            The true injected x values\n",
+    "        xobs, yobs: array_like\n",
+    "            The data to analyse\n",
+    "        xerr, yerr: array_like\n",
+    "            The standard deviation of the noise\n",
+    "        function:\n",
+    "            The python function to fit to the data\n",
+    "        \"\"\"\n",
+    "        self.xobs = xobs\n",
+    "        self.xtrue = xtrue\n",
+    "        self.yobs = yobs\n",
+    "        self.yerr = yerr\n",
+    "        self.xerr = xerr\n",
+    "        self.function = function\n",
+    "        parameters = inspect.getargspec(function).args\n",
+    "        parameters.pop(0)\n",
+    "        self.parameters = dict.fromkeys(parameters)\n",
+    "\n",
+    "    def log_likelihood(self):\n",
+    "        resy = self.yobs - self.function(self.xtrue, **self.parameters)\n",
+    "        resx = self.xobs - self.xtrue\n",
+    "        return -0.5 * (np.sum(((resy) / self.yerr) ** 2) + np.sum(((resx) / self.xerr) ** 2))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "TwoDGaussian_knownx = TwoDGaussianLikelihood_knownxtrue(xtrue = data['xtrue'], xobs = data['xobs'], \n",
+    "                                                        yobs = data['yobs'], xerr=data['xerr'], \n",
+    "                                                        yerr = data['yerr'], function=model)\n",
+    "result_2D_knownx = bilby.run_sampler(\n",
+    "    likelihood=TwoDGaussian_knownx, priors=priors, sampler='dynesty', npoints=livepoints,\n",
+    "    walks=walks, outdir=outdir, label='knownx_2D_Gaussian')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "result_2D_knownx.plot_corner(truth=dict(m=5, c = 10), titles = True)\n",
+    "result_2D_knownx.plot_with_data(model = model, x = data['xobs'], y = data['yobs'], ndraws=1000, npoints=100)\n",
+    "plt.show()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### This works well, however it still is not realistic as one still needs to 'know' the true x values. Getting around this requires marginalisation of the true x values or sampling over them.  See discussion in section 7 of https://arxiv.org/pdf/1008.4686.pdf"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "class TwoDGaussianLikelihood_unknownx(bilby.Likelihood):\n",
+    "    def __init__(self, xobs, yobs, xerr, yerr, function):\n",
+    "        \"\"\"\n",
+    "\n",
+    "        Parameters\n",
+    "        ----------\n",
+    "        xobs, yobs: array_like\n",
+    "            The data to analyse\n",
+    "        xerr, yerr: array_like\n",
+    "            The standard deviation of the noise\n",
+    "        function:\n",
+    "            The python function to fit to the data\n",
+    "        \"\"\"\n",
+    "        self.xobs = xobs\n",
+    "        self.yobs = yobs\n",
+    "        self.yerr = yerr\n",
+    "        self.xerr = xerr\n",
+    "        self.function = function\n",
+    "        parameters = inspect.getargspec(function).args\n",
+    "        parameters.pop(0)\n",
+    "        self.parameters = dict.fromkeys(parameters)\n",
+    "\n",
+    "    def log_likelihood(self):\n",
+    "        m = self.parameters['m']\n",
+    "        v = np.array([-m, 1.0])\n",
+    "        \n",
+    "        Sigma2 = (self.xerr*m)**2 + self.yerr**2\n",
+    "        model_y = self.function(self.xobs, **self.parameters)\n",
+    "        Delta = self.yobs - model_y\n",
+    "\n",
+    "        ll = -0.5 * np.sum(Delta**2 / Sigma2 + np.log(Sigma2))\n",
+    "                \n",
+    "        return ll"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "TwoDGaussian_unknownx = TwoDGaussianLikelihood_unknownx(xobs = data['xobs'], yobs = data['yobs'], \n",
+    "                                                        xerr= data['xerr'], yerr = data['yerr'],\n",
+    "                                                        function=model)\n",
+    "result_2D_unknownx = bilby.run_sampler(\n",
+    "    likelihood=TwoDGaussian_unknownx, priors=priors, sampler='dynesty', npoints=livepoints,\n",
+    "    walks=walks, outdir=outdir, label='unknownx_2D_Gaussian')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "result_2D_unknownx.plot_corner(truth=dict(m=5, c = 10), titles = True)\n",
+    "result_2D_unknownx.plot_with_data(model = model, x = data['xobs'], y = data['yobs'], ndraws=1000, npoints=100)\n",
+    "plt.show()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.6.4"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
+%% Cell type:markdown id: tags:
+
+# Fitting a model to data with both x and y errors with `BILBY`
+
+Usually when we fit a model to data with a Gaussian Likelihood we assume that we know x values exactly. This is almost never the case. Here we show how to fit a model with errors in both x and y.
+
+%% Cell type:code id: tags:
+
+``` python
+import bilby
+import inspect
+%pylab inline
+```
+
+%% Cell type:markdown id: tags:
+
+### First we create the data and plot it
+
+%% Cell type:code id: tags:
+
+``` python
+#define our model, a line
+def model(x, m, c, **kwargs):
+    y = m*x + c
+    return y
+
+#make a function to create and plot our data
+def make_data(points, m , c, xerr, yerr, seed):
+    np.random.seed(int(seed))
+    xtrue = np.linspace(0,100,points)
+    ytrue = model(x = xtrue, m = m, c = c)
+
+    xerr = xerr * np.random.randn(points)
+    yerr = yerr * np.random.randn(points)
+    xobs = xtrue + xerr
+    yobs = ytrue + yerr
+
+    plt.errorbar(xobs, yobs, xerr = xerr, yerr = yerr, fmt = 'x')
+    plt.errorbar(xtrue, ytrue, yerr = yerr, color = 'black', alpha = 0.5)
+    plt.xlim(0,100)
+    plt.show()
+
+    data = {'xtrue': xtrue, 'ytrue':ytrue, 'xobs':xobs, 'yobs':yobs, 'xerr':xerr, 'yerr':yerr}
+
+    return data
+
+data = make_data(points = 30, m = 5, c = 10, xerr = 5, yerr = 5, seed = 123)
+```
+
+%% Cell type:markdown id: tags:
+
+### Now lets set up the prior and bilby output directory
+
+%% Cell type:code id: tags:
+
+``` python
+#setting up bilby priors
+priors = dict(m=bilby.core.prior.Uniform(0, 30, 'm'),
+              c=bilby.core.prior.Uniform(0, 30, 'c'))
+
+outdir = 'outdir'
+livepoints = 100
+walks = 100
+```
+
+%% Cell type:markdown id: tags:
+
+### Our first step is to recover the straight line using a simple Gaussian Likelihood that only takes into account the y errors. Under the assumption we know x exactly. In this case, we pass in xtrue for x
+
+%% Cell type:code id: tags:
+
+``` python
+OneDGaussian_knownx = bilby.core.likelihood.GaussianLikelihood(x = data['xtrue'], y = data['yobs'], func = model, sigma = data['yerr'])
+result_1D_xtrue = bilby.run_sampler(
+    likelihood=OneDGaussian_knownx, priors=priors, sampler='dynesty', npoints=livepoints,
+    walks=walks, outdir=outdir, label='xtrue_1D_Gaussian')
+```
+
+%% Cell type:code id: tags:
+
+``` python
+result_1D_xtrue.plot_corner(truth=dict(m=5, c = 10), titles = True)
+result_1D_xtrue.plot_with_data(model = model, x = data['xtrue'], y = data['yobs'], ndraws=1000, npoints=100)
+plt.show()
+```
+
+%% Cell type:markdown id: tags:
+
+### As expected this is easy to recover and the sampler does a good job. However this was made too easy - by passing in the 'true' values of x. Lets see what happens when we pass in the observed values of x
+
+%% Cell type:code id: tags:
+
+``` python
+OneDGaussian_unknownx = bilby.core.likelihood.GaussianLikelihood(x = data['xobs'], y = data['yobs'],
+                                                                 func = model, sigma = data['yerr'])
+result_1D_xobs = bilby.run_sampler(
+    likelihood=OneDGaussian_unknownx, priors=priors, sampler='dynesty', npoints=livepoints,
+    walks=walks, outdir=outdir, label='xobs_1D_Gaussian')
+```
+
+%% Cell type:code id: tags:
+
+``` python
+result_1D_xobs.plot_corner(truth=dict(m=5, c = 10), titles = True)
+result_1D_xobs.plot_with_data(model = model, x = data['xobs'], y = data['yobs'], ndraws=1000, npoints=100)
+plt.show()
+```
+
+%% Cell type:markdown id: tags:
+
+### As expected, this is significantly worse. Let us now define a new likelihood which takes into account x errors but you also pass in xtrue
+
+%% Cell type:code id: tags:
+
+``` python
+class TwoDGaussianLikelihood_knownxtrue(bilby.Likelihood):
+    def __init__(self, xtrue, xobs, yobs, xerr, yerr, function):
+        """
+
+        Parameters
+        ----------
+        xtrue: array_like
+            The true injected x values
+        xobs, yobs: array_like
+            The data to analyse
+        xerr, yerr: array_like
+            The standard deviation of the noise
+        function:
+            The python function to fit to the data
+        """
+        self.xobs = xobs
+        self.xtrue = xtrue
+        self.yobs = yobs
+        self.yerr = yerr
+        self.xerr = xerr
+        self.function = function
+        parameters = inspect.getargspec(function).args
+        parameters.pop(0)
+        self.parameters = dict.fromkeys(parameters)
+
+    def log_likelihood(self):
+        resy = self.yobs - self.function(self.xtrue, **self.parameters)
+        resx = self.xobs - self.xtrue
+        return -0.5 * (np.sum(((resy) / self.yerr) ** 2) + np.sum(((resx) / self.xerr) ** 2))
+```
+
+%% Cell type:code id: tags:
+
+``` python
+TwoDGaussian_knownx = TwoDGaussianLikelihood_knownxtrue(xtrue = data['xtrue'], xobs = data['xobs'],
+                                                        yobs = data['yobs'], xerr=data['xerr'],
+                                                        yerr = data['yerr'], function=model)
+result_2D_knownx = bilby.run_sampler(
+    likelihood=TwoDGaussian_knownx, priors=priors, sampler='dynesty', npoints=livepoints,
+    walks=walks, outdir=outdir, label='knownx_2D_Gaussian')
+```
+
+%% Cell type:code id: tags:
+
+``` python
+result_2D_knownx.plot_corner(truth=dict(m=5, c = 10), titles = True)
+result_2D_knownx.plot_with_data(model = model, x = data['xobs'], y = data['yobs'], ndraws=1000, npoints=100)
+plt.show()
+```
+
+%% Cell type:markdown id: tags:
+
+### This works well, however it still is not realistic as one still needs to 'know' the true x values. Getting around this requires marginalisation of the true x values or sampling over them.  See discussion in section 7 of https://arxiv.org/pdf/1008.4686.pdf
+
+%% Cell type:code id: tags:
+
+``` python
+class TwoDGaussianLikelihood_unknownx(bilby.Likelihood):
+    def __init__(self, xobs, yobs, xerr, yerr, function):
+        """
+
+        Parameters
+        ----------
+        xobs, yobs: array_like
+            The data to analyse
+        xerr, yerr: array_like
+            The standard deviation of the noise
+        function:
+            The python function to fit to the data
+        """
+        self.xobs = xobs
+        self.yobs = yobs
+        self.yerr = yerr
+        self.xerr = xerr
+        self.function = function
+        parameters = inspect.getargspec(function).args
+        parameters.pop(0)
+        self.parameters = dict.fromkeys(parameters)
+
+    def log_likelihood(self):
+        m = self.parameters['m']
+        v = np.array([-m, 1.0])
+
+        Sigma2 = (self.xerr*m)**2 + self.yerr**2
+        model_y = self.function(self.xobs, **self.parameters)
+        Delta = self.yobs - model_y
+
+        ll = -0.5 * np.sum(Delta**2 / Sigma2 + np.log(Sigma2))
+
+        return ll
+```
+
+%% Cell type:code id: tags:
+
+``` python
+TwoDGaussian_unknownx = TwoDGaussianLikelihood_unknownx(xobs = data['xobs'], yobs = data['yobs'],
+                                                        xerr= data['xerr'], yerr = data['yerr'],
+                                                        function=model)
+result_2D_unknownx = bilby.run_sampler(
+    likelihood=TwoDGaussian_unknownx, priors=priors, sampler='dynesty', npoints=livepoints,
+    walks=walks, outdir=outdir, label='unknownx_2D_Gaussian')
+```
+
+%% Cell type:code id: tags:
+
+``` python
+result_2D_unknownx.plot_corner(truth=dict(m=5, c = 10), titles = True)
+result_2D_unknownx.plot_with_data(model = model, x = data['xobs'], y = data['yobs'], ndraws=1000, npoints=100)
+plt.show()
+```
+
+%% Cell type:code id: tags:
+
+``` python
+```
+
+%% Cell type:code id: tags:
+
+``` python
+```
+
+%% Cell type:code id: tags:
+
+``` python
+```