diff --git a/bilby/core/__init__.py b/bilby/core/__init__.py index 9d1dd51e24d6debec2c663f0128742a77337e370..3946f24a4339cc4fa456df4c53dd50bd2fde75c9 100644 --- a/bilby/core/__init__.py +++ b/bilby/core/__init__.py @@ -1,2 +1,2 @@ from __future__ import absolute_import -from . import likelihood, prior, result, sampler, series, utils +from . import grid, likelihood, prior, result, sampler, series, utils diff --git a/bilby/core/grid.py b/bilby/core/grid.py new file mode 100644 index 0000000000000000000000000000000000000000..1f782ba48b3cc9103935b9c6778dbf64eecba857 --- /dev/null +++ b/bilby/core/grid.py @@ -0,0 +1,282 @@ +from __future__ import division + +import numpy as np + +from .prior import Prior, PriorDict +from .utils import logtrapzexp + + +class Grid(object): + + def __init__(self, likelihood, priors, grid_size=101): + """ + + Parameters + ---------- + likelihood: bilby.likelihood.Likelihood + priors: bilby.prior.PriorDict + grid_size: int, list, dict + Size of the grid, can be any of + - int: all dimensions will have equal numbers of points + - list: dimensions will use these points/this number of points in + order of priors + - dict: as for list + """ + self.likelihood = likelihood + self.priors = PriorDict(priors) + self.n_dims = len(priors) + self.parameter_names = list(self.priors.keys()) + + self.sample_points = dict() + self._get_sample_points(grid_size) + # evaluate the prior on the grid points + self._ln_prior = self.priors.ln_prob( + {key: self.mesh_grid[i].flatten() for i, key in + enumerate(self.parameter_names)}, axis=0).reshape( + self.mesh_grid[0].shape) + self._ln_likelihood = None + + # evaluate the likelihood on the grid points + self._evaluate() + + @property + def ln_prior(self): + return self._ln_prior + + @property + def prior(self): + return np.exp(self.ln_prior) + + @property + def ln_likelihood(self): + if self._ln_likelihood is None: + self._evaluate() + return self._ln_likelihood + + @property + def ln_posterior(self): + return self.ln_likelihood + self.ln_prior + + def marginalize(self, log_array, parameters=None, not_parameters=None): + """ + Marginalize over a list of parameters. + + Parameters + ---------- + log_array: array_like + A :class:`numpy.ndarray` of log likelihood/posterior values. + parameters: list, str + A list, or single string, of parameters to marginalize over. If None + then all parameters will be marginalized over. + not_parameters: list, str + Instead of a list of parameters to marginalize over you can list + the set of parameter to *not* marginalize over. + + Returns + ------- + out_array: array_like + An array containing the marginalized log likelihood/posterior. + """ + + if parameters is None: + params = list(self.parameter_names) + + if not_parameters is not None: + if isinstance(not_parameters, str): + not_params = [not_parameters] + elif isinstance(not_parameters, list): + not_params = not_parameters + else: + raise TypeError("Parameters names must be a list or string") + + for name in list(params): + if name in not_params: + params.remove(name) + elif isinstance(parameters, str): + params = [parameters] + elif isinstance(parameters, list): + params = parameters + else: + raise TypeError("Parameters names must be a list or string") + + out_array = log_array.copy() + names = list(self.parameter_names) + + for name in params: + out_array = self._marginalize_single(out_array, name, names) + + return out_array + + def _marginalize_single(self, log_array, name, non_marg_names=None): + """ + Marginalize the log likelihood/posterior over a single given parameter. + + Parameters + ---------- + log_array: array_like + A :class:`numpy.ndarray` of log likelihood/posterior values. + name: str + The name of the parameter to marginalize over. + non_marg_names: list + A list of parameter names that have not been marginalized over. + + Returns + ------- + out: array_like + An array containing the marginalized log likelihood/posterior. + """ + + if name not in self.parameter_names: + raise ValueError("'{}' is not a recognised " + "parameter".format(name)) + + if non_marg_names is None: + non_marg_names = list(self.parameter_names) + + axis = non_marg_names.index(name) + non_marg_names.remove(name) + + places = self.sample_points[name] + + if len(places) > 1: + out = np.apply_along_axis( + logtrapzexp, axis, log_array, places[1] - places[0]) + else: + # no marginalisation required, just remove the singleton dimension + z = log_array.shape + q = np.arange(0, len(z)).astype(int) != axis + out = np.reshape(log_array, tuple((np.array(list(z)))[q])) + + return out + + @property + def ln_evidence(self): + return self.marginalize(self.ln_posterior) + + @property + def log_evidence(self): + return self.ln_evidence + + def marginalize_ln_likelihood(self, parameter=None, not_parameter=None): + """ + Marginalize the ln likelihood over either the specified parameter or + all but the specified "not_parameter". If neither is specified the + ln likelihood will be fully marginalized over. + + Parameters + ---------- + parameter: str, optional + Name of the parameter to marginalize over. + not_parameter: str, optional + Name of the parameter to not marginalize over. + Returns + ------- + array-like: + The marginalized ln likelihood. + """ + return self.marginalize(self.ln_likelihood, parameters=parameter, + not_parameters=not_parameter) + + def marginalize_ln_posterior(self, parameter=None, not_parameter=None): + """ + Marginalize the ln posterior over either the specified parameter or all + but the specified "not_parameter". If neither is specified the + ln posterior will be fully marginalized over. + + Parameters + ---------- + parameter: str, optional + Name of the parameter to marginalize over. + not_parameter: str, optional + Name of the parameter to not marginalize over. + Returns + ------- + array-like: + The marginalized ln posterior. + """ + return self.marginalize(self.ln_posterior, parameters=parameter, + not_parameters=not_parameter) + + def marginalize_likelihood(self, parameter=None, not_parameter=None): + """ + Marginalize the likelihood over either the specified parameter or all + but the specified "not_parameter". If neither is specified the + likelihood will be fully marginalized over. + + Parameters + ---------- + parameter: str, optional + Name of the parameter to marginalize over. + not_parameter: str, optional + Name of the parameter to not marginalize over. + Returns + ------- + array-like: + The marginalized likelihood. + """ + ln_like = self.marginalize(self.ln_likelihood, parameters=parameter, + not_parameters=not_parameter) + # NOTE: this outputs will not be properly normalised + return np.exp(ln_like - np.max(ln_like)) + + def marginalize_posterior(self, parameter=None, not_parameter=None): + """ + Marginalize the posterior over either the specified parameter or all + but the specified "not_parameter". If neither is specified the + posterior will be fully marginalized over. + + Parameters + ---------- + parameter: str, optional + Name of the parameter to marginalize over. + not_parameter: str, optional + Name of the parameter to not marginalize over. + Returns + ------- + array-like: + The marginalized posterior. + """ + ln_post = self.marginalize(self.ln_posterior, parameters=parameter, + not_parameters=not_parameter) + # NOTE: this outputs will not be properly normalised + return np.exp(ln_post - np.max(ln_post)) + + def _evaluate(self): + self._ln_likelihood = np.empty(self.mesh_grid[0].shape) + self._evaluate_recursion(0) + + def _evaluate_recursion(self, dimension): + if dimension == self.n_dims: + current_point = tuple([[int(np.where( + self.likelihood.parameters[name] == + self.sample_points[name])[0])] for name in self.parameter_names]) + self._ln_likelihood[current_point] = self.likelihood.log_likelihood() + else: + name = self.parameter_names[dimension] + for ii in range(self._ln_likelihood.shape[dimension]): + self.likelihood.parameters[name] = self.sample_points[name][ii] + self._evaluate_recursion(dimension + 1) + + def _get_sample_points(self, grid_size): + for ii, key in enumerate(self.parameter_names): + if isinstance(self.priors[key], Prior): + if isinstance(grid_size, int): + self.sample_points[key] = self.priors[key].rescale( + np.linspace(0, 1, grid_size)) + elif isinstance(grid_size, list): + if isinstance(grid_size[ii], int): + self.sample_points[key] = self.priors[key].rescale( + np.linspace(0, 1, grid_size[ii])) + else: + self.sample_points[key] = grid_size[ii] + elif isinstance(grid_size, dict): + if isinstance(grid_size[key], int): + self.sample_points[key] = self.priors[key].rescale( + np.linspace(0, 1, grid_size[key])) + else: + self.sample_points[key] = grid_size[key] + + # set the mesh of points + self.mesh_grid = np.meshgrid( + *(self.sample_points[key] for key in self.parameter_names), + indexing='ij') diff --git a/bilby/core/utils.py b/bilby/core/utils.py index dfb2117f84689cc06618ae6e393432a644aac701..5e97a641bf314671dac813a9129194a8261ebaac 100644 --- a/bilby/core/utils.py +++ b/bilby/core/utils.py @@ -11,6 +11,7 @@ import json import numpy as np from scipy.interpolate import interp2d +from scipy.special import logsumexp import pandas as pd logger = logging.getLogger('bilby') @@ -683,6 +684,25 @@ def derivatives(vals, func, releps=1e-3, abseps=None, mineps=1e-9, reltol=1e-3, return grads +def logtrapzexp(lnf, dx): + """ + Perform trapezium rule integration for the logarithm of a function on a regular grid. + + Parameters + ---------- + lnf: array_like + A :class:`numpy.ndarray` of values that are the natural logarithm of a function + dx: (array_like, float) + A :class:`numpy.ndarray` of steps sizes between values in the function, or a + single step size value. + + Returns + ------- + The natural logarithm of the area under the function. + """ + return np.log(dx / 2.) + logsumexp([logsumexp(lnf[:-1]), logsumexp(lnf[1:])]) + + def run_commandline(cl, log_level=20, raise_error=True, return_output=True): """Run a string cmd as a subprocess, check for errors and return output. diff --git a/examples/other_examples/grid_example.py b/examples/other_examples/grid_example.py new file mode 100644 index 0000000000000000000000000000000000000000..af38eb259aab5feda4e726bbec49397cde018e8f --- /dev/null +++ b/examples/other_examples/grid_example.py @@ -0,0 +1,58 @@ +#!/usr/bin/env python +""" +An example of how to use bilby to perform paramater estimation for +non-gravitational wave data. In this case, fitting a linear function to +data with background Gaussian noise + +""" +from __future__ import division +import bilby +import numpy as np +import matplotlib.pyplot as plt + +# A few simple setup steps +label = 'linear_regression_grid' +outdir = 'outdir' +bilby.utils.check_directory_exists_and_if_not_mkdir(outdir) + + +# First, we define our "signal model", in this case a simple linear function +def model(time, m, c): + return time * m + c + + +# Now we define the injection parameters which we make simulated data with +injection_parameters = dict(m=2.5, c=0.0) + +# For this example, we'll use standard Gaussian noise + +# These lines of code generate the fake data. Note the ** just unpacks the +# contents of the injection_parameters when calling the model function. +sampling_frequency = 10 +time_duration = 10 +time = np.arange(0, time_duration, 1 / sampling_frequency) +N = len(time) +sigma = np.random.normal(1, 0.01, N) +data = model(time, **injection_parameters) + np.random.normal(0, sigma, N) + +# We quickly plot the data to check it looks sensible +fig, ax = plt.subplots() +ax.plot(time, data, 'o', label='data') +ax.plot(time, model(time, **injection_parameters), '--r', label='signal') +ax.set_xlabel('time') +ax.set_ylabel('y') +ax.legend() +fig.savefig('{}/{}_data.png'.format(outdir, label)) +plt.close() + +# Now lets instantiate a version of our GaussianLikelihood, giving it +# the time, data and signal model +likelihood = bilby.likelihood.GaussianLikelihood(time, data, model, sigma) + +# From hereon, the syntax is exactly equivalent to other bilby examples +# We make a prior +priors = dict() +priors['m'] = bilby.core.prior.Uniform(0, 5, 'm') +priors['c'] = bilby.core.prior.Uniform(-2, 2, 'c') + +grid = bilby.core.grid.Grid(likelihood=likelihood, priors=priors) diff --git a/examples/other_examples/linear_regression_grid.py b/examples/other_examples/linear_regression_grid.py new file mode 100644 index 0000000000000000000000000000000000000000..6549c860a3cc9ddb6862a0426c153894347a5682 --- /dev/null +++ b/examples/other_examples/linear_regression_grid.py @@ -0,0 +1,83 @@ +#!/usr/bin/env python +""" +An example of how to use bilby to perform paramater estimation for +fitting a linear function to data with background Gaussian noise. +This will compare the output of using a stochastic sampling method +to evaluating the posterior on a grid. +""" +from __future__ import division +import numpy as np +import matplotlib.pyplot as plt + +import bilby + +# A few simple setup steps +label = 'linear_regression_grid' +outdir = 'outdir' +bilby.utils.check_directory_exists_and_if_not_mkdir(outdir) + + +# First, we define our "signal model", in this case a simple linear function +def model(time, m, c): + return time * m + c + + +# Now we define the injection parameters which we make simulated data with +injection_parameters = dict() +injection_parameters['c'] = 0.2 +injection_parameters['m'] = 0.5 + +# For this example, we'll use standard Gaussian noise + +# These lines of code generate the fake data. Note the ** just unpacks the +# contents of the injection_parameters when calling the model function. +sampling_frequency = 10 +time_duration = 10 +time = np.arange(0, time_duration, 1 / sampling_frequency) +N = len(time) +sigma = 3. +data = model(time, **injection_parameters) + np.random.normal(0, sigma, N) + +# We quickly plot the data to check it looks sensible +fig, ax = plt.subplots() +ax.plot(time, data, 'o', label='data') +ax.plot(time, model(time, **injection_parameters), '--r', label='signal') +ax.set_xlabel('time') +ax.set_ylabel('y') +ax.legend() +fig.savefig('{}/{}_data.png'.format(outdir, label)) + +# Now lets instantiate a version of our GaussianLikelihood, giving it +# the time, data and signal model +likelihood = bilby.likelihood.GaussianLikelihood(time, data, model, sigma) + +# From hereon, the syntax is exactly equivalent to other bilby examples +# We make a prior +priors = bilby.core.prior.PriorDict() +priors['m'] = bilby.core.prior.Uniform(0, 5, 'm') +priors['c'] = bilby.core.prior.Uniform(-2, 2, 'c') + +# And run sampler +result = bilby.run_sampler( + likelihood=likelihood, priors=priors, sampler='dynesty', nlive=500, + sample='unif', injection_parameters=injection_parameters, outdir=outdir, + label=label) +fig = result.plot_corner(parameters=injection_parameters, save=False) + +grid = bilby.core.grid.Grid(likelihood, priors, grid_size={'m': 200, 'c': 100}) + +# overplot the grid estimates +grid_evidence = grid.log_evidence +axes = fig.get_axes() +axes[0].plot(grid.sample_points['c'], np.exp(grid.marginalize_ln_posterior( + not_parameter='c') - grid_evidence), 'k--') +axes[3].plot(grid.sample_points['m'], np.exp(grid.marginalize_ln_posterior( + not_parameter='m') - grid_evidence), 'k--') +axes[2].contour(grid.mesh_grid[1], grid.mesh_grid[0], + np.exp(grid.ln_posterior - np.max(grid.ln_posterior))) + +fig.savefig('{}/{}_corner.png'.format(outdir, label), dpi=300) + +# compare evidences +print('Dynesty log(evidence): {}'.format(result.log_evidence)) +print('Grid log(evidence): {}'.format(grid_evidence))