diff --git a/examples/tutorials/fitting_with_x_and_y_errors.ipynb b/examples/tutorials/fitting_with_x_and_y_errors.ipynb new file mode 100644 index 0000000000000000000000000000000000000000..17f6414da7145374b2ca7302406e7291dd8adb6b --- /dev/null +++ b/examples/tutorials/fitting_with_x_and_y_errors.ipynb @@ -0,0 +1,332 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Fitting a model to data with both x and y errors with `BILBY`\n", + "\n", + "Usually when we fit a model to data with a Gaussian Likelihood we assume that we know x values exactly. This is almost never the case. Here we show how to fit a model with errors in both x and y." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import bilby\n", + "import inspect\n", + "%pylab inline" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### First we create the data and plot it" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#define our model, a line\n", + "def model(x, m, c, **kwargs):\n", + " y = m*x + c\n", + " return y\n", + "\n", + "#make a function to create and plot our data\n", + "def make_data(points, m , c, xerr, yerr, seed):\n", + " np.random.seed(int(seed))\n", + " xtrue = np.linspace(0,100,points)\n", + " ytrue = model(x = xtrue, m = m, c = c)\n", + "\n", + " xerr = xerr * np.random.randn(points)\n", + " yerr = yerr * np.random.randn(points)\n", + " xobs = xtrue + xerr\n", + " yobs = ytrue + yerr\n", + " \n", + " plt.errorbar(xobs, yobs, xerr = xerr, yerr = yerr, fmt = 'x')\n", + " plt.errorbar(xtrue, ytrue, yerr = yerr, color = 'black', alpha = 0.5)\n", + " plt.xlim(0,100)\n", + " plt.show()\n", + " \n", + " data = {'xtrue': xtrue, 'ytrue':ytrue, 'xobs':xobs, 'yobs':yobs, 'xerr':xerr, 'yerr':yerr}\n", + " \n", + " return data\n", + "\n", + "data = make_data(points = 30, m = 5, c = 10, xerr = 5, yerr = 5, seed = 123)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Now lets set up the prior and bilby output directory" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#setting up bilby priors\n", + "priors = dict(m=bilby.core.prior.Uniform(0, 30, 'm'),\n", + " c=bilby.core.prior.Uniform(0, 30, 'c'))\n", + "\n", + "outdir = 'outdir'\n", + "livepoints = 100\n", + "walks = 100" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Our first step is to recover the straight line using a simple Gaussian Likelihood that only takes into account the y errors. Under the assumption we know x exactly. In this case, we pass in xtrue for x" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "OneDGaussian_knownx = bilby.core.likelihood.GaussianLikelihood(x = data['xtrue'], y = data['yobs'], func = model, sigma = data['yerr'])\n", + "result_1D_xtrue = bilby.run_sampler(\n", + " likelihood=OneDGaussian_knownx, priors=priors, sampler='dynesty', npoints=livepoints,\n", + " walks=walks, outdir=outdir, label='xtrue_1D_Gaussian')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "result_1D_xtrue.plot_corner(truth=dict(m=5, c = 10), titles = True)\n", + "result_1D_xtrue.plot_with_data(model = model, x = data['xtrue'], y = data['yobs'], ndraws=1000, npoints=100)\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### As expected this is easy to recover and the sampler does a good job. However this was made too easy - by passing in the 'true' values of x. Lets see what happens when we pass in the observed values of x" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "OneDGaussian_unknownx = bilby.core.likelihood.GaussianLikelihood(x = data['xobs'], y = data['yobs'], \n", + " func = model, sigma = data['yerr'])\n", + "result_1D_xobs = bilby.run_sampler(\n", + " likelihood=OneDGaussian_unknownx, priors=priors, sampler='dynesty', npoints=livepoints,\n", + " walks=walks, outdir=outdir, label='xobs_1D_Gaussian')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "result_1D_xobs.plot_corner(truth=dict(m=5, c = 10), titles = True)\n", + "result_1D_xobs.plot_with_data(model = model, x = data['xobs'], y = data['yobs'], ndraws=1000, npoints=100)\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### As expected, this is significantly worse. Let us now define a new likelihood which takes into account x errors but you also pass in xtrue" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "class TwoDGaussianLikelihood_knownxtrue(bilby.Likelihood):\n", + " def __init__(self, xtrue, xobs, yobs, xerr, yerr, function):\n", + " \"\"\"\n", + "\n", + " Parameters\n", + " ----------\n", + " xtrue: array_like \n", + " The true injected x values\n", + " xobs, yobs: array_like\n", + " The data to analyse\n", + " xerr, yerr: array_like\n", + " The standard deviation of the noise\n", + " function:\n", + " The python function to fit to the data\n", + " \"\"\"\n", + " self.xobs = xobs\n", + " self.xtrue = xtrue\n", + " self.yobs = yobs\n", + " self.yerr = yerr\n", + " self.xerr = xerr\n", + " self.function = function\n", + " parameters = inspect.getargspec(function).args\n", + " parameters.pop(0)\n", + " self.parameters = dict.fromkeys(parameters)\n", + "\n", + " def log_likelihood(self):\n", + " resy = self.yobs - self.function(self.xtrue, **self.parameters)\n", + " resx = self.xobs - self.xtrue\n", + " return -0.5 * (np.sum(((resy) / self.yerr) ** 2) + np.sum(((resx) / self.xerr) ** 2))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "TwoDGaussian_knownx = TwoDGaussianLikelihood_knownxtrue(xtrue = data['xtrue'], xobs = data['xobs'], \n", + " yobs = data['yobs'], xerr=data['xerr'], \n", + " yerr = data['yerr'], function=model)\n", + "result_2D_knownx = bilby.run_sampler(\n", + " likelihood=TwoDGaussian_knownx, priors=priors, sampler='dynesty', npoints=livepoints,\n", + " walks=walks, outdir=outdir, label='knownx_2D_Gaussian')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "result_2D_knownx.plot_corner(truth=dict(m=5, c = 10), titles = True)\n", + "result_2D_knownx.plot_with_data(model = model, x = data['xobs'], y = data['yobs'], ndraws=1000, npoints=100)\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### This works well, however it still is not realistic as one still needs to 'know' the true x values. Getting around this requires marginalisation of the true x values or sampling over them. See discussion in section 7 of https://arxiv.org/pdf/1008.4686.pdf" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "class TwoDGaussianLikelihood_unknownx(bilby.Likelihood):\n", + " def __init__(self, xobs, yobs, xerr, yerr, function):\n", + " \"\"\"\n", + "\n", + " Parameters\n", + " ----------\n", + " xobs, yobs: array_like\n", + " The data to analyse\n", + " xerr, yerr: array_like\n", + " The standard deviation of the noise\n", + " function:\n", + " The python function to fit to the data\n", + " \"\"\"\n", + " self.xobs = xobs\n", + " self.yobs = yobs\n", + " self.yerr = yerr\n", + " self.xerr = xerr\n", + " self.function = function\n", + " parameters = inspect.getargspec(function).args\n", + " parameters.pop(0)\n", + " self.parameters = dict.fromkeys(parameters)\n", + "\n", + " def log_likelihood(self):\n", + " m = self.parameters['m']\n", + " v = np.array([-m, 1.0])\n", + " \n", + " Sigma2 = (self.xerr*m)**2 + self.yerr**2\n", + " model_y = self.function(self.xobs, **self.parameters)\n", + " Delta = self.yobs - model_y\n", + "\n", + " ll = -0.5 * np.sum(Delta**2 / Sigma2 + np.log(Sigma2))\n", + " \n", + " return ll" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "TwoDGaussian_unknownx = TwoDGaussianLikelihood_unknownx(xobs = data['xobs'], yobs = data['yobs'], \n", + " xerr= data['xerr'], yerr = data['yerr'],\n", + " function=model)\n", + "result_2D_unknownx = bilby.run_sampler(\n", + " likelihood=TwoDGaussian_unknownx, priors=priors, sampler='dynesty', npoints=livepoints,\n", + " walks=walks, outdir=outdir, label='unknownx_2D_Gaussian')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "result_2D_unknownx.plot_corner(truth=dict(m=5, c = 10), titles = True)\n", + "result_2D_unknownx.plot_with_data(model = model, x = data['xobs'], y = data['yobs'], ndraws=1000, npoints=100)\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.4" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +}