diff --git a/bilby/core/result.py b/bilby/core/result.py index b6a8e7c83086032bf126b9a13fd45a02298568cd..42cf236e192b36495058a92440639bf1de660b53 100644 --- a/bilby/core/result.py +++ b/bilby/core/result.py @@ -1,18 +1,20 @@ from __future__ import division import os -from distutils.version import LooseVersion from collections import OrderedDict, namedtuple +from copy import copy +from distutils.version import LooseVersion from itertools import product -import numpy as np -import pandas as pd import corner import json -import scipy.stats import matplotlib import matplotlib.pyplot as plt from matplotlib import lines as mpllines +import numpy as np +import pandas as pd +import scipy.stats +from scipy.special import logsumexp from . import utils from .utils import (logger, infer_parameters_from_function, @@ -99,7 +101,7 @@ class Result(object): log_evidence_err=np.nan, log_noise_evidence=np.nan, log_bayes_factor=np.nan, log_likelihood_evaluations=None, log_prior_evaluations=None, sampling_time=None, nburn=None, - walkers=None, max_autocorrelation_time=None, + walkers=None, max_autocorrelation_time=None, use_ratio=None, parameter_labels=None, parameter_labels_with_unit=None, gzip=False, version=None): """ A class to store the results of the sampling run @@ -138,6 +140,9 @@ class Result(object): The samplers taken by a ensemble MCMC samplers max_autocorrelation_time: float The estimated maximum autocorrelation time for MCMC samplers + use_ratio: bool + A boolean stating whether the likelihood ratio, as opposed to the + likelihood was used during sampling parameter_labels, parameter_labels_with_unit: list Lists of the latex-formatted parameter labels gzip: bool @@ -170,6 +175,7 @@ class Result(object): self.nested_samples = nested_samples self.walkers = walkers self.nburn = nburn + self.use_ratio = use_ratio self.log_evidence = log_evidence self.log_evidence_err = log_evidence_err self.log_noise_evidence = log_noise_evidence @@ -389,7 +395,7 @@ class Result(object): 'log_noise_evidence', 'log_bayes_factor', 'priors', 'posterior', 'injection_parameters', 'meta_data', 'search_parameter_keys', 'fixed_parameter_keys', 'constraint_parameter_keys', - 'sampling_time', 'sampler_kwargs', + 'sampling_time', 'sampler_kwargs', 'use_ratio', 'log_likelihood_evaluations', 'log_prior_evaluations', 'samples', 'nested_samples', 'walkers', 'nburn', 'parameter_labels', 'parameter_labels_with_unit', 'version'] @@ -1240,6 +1246,119 @@ class Result(object): return weights +class ResultList(list): + + def __init__(self, results=None): + """ A class to store a list of :class:`bilby.core.result.Result` objects + from equivalent runs on the same data. This provides methods for + outputing combined results. + + Parameters + ---------- + results: list + A list of `:class:`bilby.core.result.Result`. + """ + list.__init__(self) + for result in results: + self.append(result) + + def append(self, result): + """ + Append a :class:`bilby.core.result.Result`, or set of results, to the + list. + + Parameters + ---------- + result: :class:`bilby.core.result.Result` or filename + pointing to a result object, to append to the list. + """ + + if isinstance(result, Result): + super(ResultList, self).append(result) + elif isinstance(result, str): + super(ResultList, self).append(read_in_result(result)) + else: + raise TypeError("Could not append a non-Result type") + + def combine(self): + """ + Return the combined results in a :class:bilby.core.result.Result` + object. + """ + if len(self) == 0: + return Result() + elif len(self) == 1: + return copy(self[0]) + else: + result = copy(self[0]) + + if result.label is not None: + result.label += 'combined' + + self._check_consistent_sampler() + self._check_consistent_data() + self._check_consistent_parameters() + self._check_consistent_priors() + + # check which kind of sampler was used: MCMC or Nested Sampling + if result.nested_samples is not None: + posteriors, result = self._combine_nested_sampled_runs(result) + else: + posteriors = [res.posterior for res in self] + + combined_posteriors = pd.concat(posteriors, ignore_index=True) + result.posterior = combined_posteriors.sample(len(combined_posteriors)) # shuffle + return result + + def _combine_nested_sampled_runs(self, result): + self._check_nested_samples() + log_evidences = np.array([res.log_evidence for res in self]) + result.log_evidence = logsumexp(log_evidences, b=1. / len(self)) + if result.use_ratio: + result.log_bayes_factor = result.log_evidence + result.log_evidence = result.log_evidence + result.log_noise_evidence + else: + result.log_bayes_factor = result.log_evidence - result.log_noise_evidence + log_errs = [res.log_evidence_err for res in self if np.isfinite(res.log_evidence_err)] + result.log_evidence_err = logsumexp(2 * np.array(log_errs), b=1. / len(self)) + result_weights = np.exp(log_evidences - np.max(log_evidences)) + posteriors = [] + for res, frac in zip(self, result_weights): + selected_samples = (np.random.uniform(size=len(res.posterior)) < frac) + posteriors.append(res.posterior[selected_samples]) + # remove original nested_samples + result.nested_samples = None + result.sampler_kwargs = None + return posteriors, result + + def _check_nested_samples(self): + for res in self: + try: + res.nested_samples + except ValueError: + raise CombineResultError("Cannot combine results: No nested samples available " + "in all results") + + def _check_consistent_priors(self): + for res in self: + for p in self[0].priors.keys(): + if not self[0].priors[p] == res.priors[p] or len(self[0].priors) != len(res.priors): + raise CombineResultError("Cannot combine results: inconsistent priors") + + def _check_consistent_parameters(self): + if not np.all([set(self[0].search_parameter_keys) == set(res.search_parameter_keys) for res in self]): + raise CombineResultError("Cannot combine results: inconsistent parameters") + + def _check_consistent_data(self): + if not np.all([res.log_noise_evidence == self[0].log_noise_evidence for res in self])\ + and not np.all([np.isnan(res.log_noise_evidence) for res in self]): + raise CombineResultError("Cannot combine results: inconsistent data") + + def _check_consistent_sampler(self): + if not np.all([res.sampler == self[0].sampler for res in self]): + raise CombineResultError("Cannot combine results: inconsistent samplers") + + def plot_multiple(results, filename=None, labels=None, colours=None, save=True, evidences=False, **kwargs): """ Generate a corner plot overlaying two sets of results @@ -1409,5 +1528,9 @@ class ResultError(Exception): """ Base exception for all Result related errors """ +class CombineResultError(ResultError): + """ For Errors occuring during combining results. """ + + class FileMovedError(ResultError): """ Exceptions that occur when files have been moved """ diff --git a/bilby/core/sampler/base_sampler.py b/bilby/core/sampler/base_sampler.py index a120dda1920daf6dd74e12e22451665d38dbb7c2..c81a692eadd88cd8d93a03977d09c6b16d552304 100644 --- a/bilby/core/sampler/base_sampler.py +++ b/bilby/core/sampler/base_sampler.py @@ -218,7 +218,7 @@ class Sampler(object): constraint_parameter_keys=self._constraint_parameter_keys, priors=self.priors, meta_data=self.meta_data, injection_parameters=self.injection_parameters, - sampler_kwargs=self.kwargs) + sampler_kwargs=self.kwargs, use_ratio=self.use_ratio) if result_class is None: result = Result(**result_kwargs) diff --git a/bilby/core/sampler/cpnest.py b/bilby/core/sampler/cpnest.py index 6acd33689f2c6dbcf6204387f52d2737bd6ef9b7..2f11d94ff54c2c9c8e82bc9aca505e20b3e5b0e0 100644 --- a/bilby/core/sampler/cpnest.py +++ b/bilby/core/sampler/cpnest.py @@ -53,6 +53,7 @@ class Cpnest(NestedSampler): def run_sampler(self): from cpnest import model as cpmodel, CPNest from cpnest.parameter import LivePoint + from cpnest.nest2pos import compute_weights class Model(cpmodel.Model): """ A wrapper class to pass our log_likelihood into cpnest """ @@ -100,8 +101,13 @@ class Cpnest(NestedSampler): out.plot() self.result.posterior = DataFrame(out.posterior_samples) - self.result.posterior.rename(columns=dict( - logL='log_likelihood', logPrior='log_prior'), inplace=True) + self.result.nested_samples = DataFrame(out.get_nested_samples(filename='')) + self.result.nested_samples.rename(columns=dict(logL='log_likelihood'), inplace=True) + self.result.posterior.rename(columns=dict(logL='log_likelihood', logPrior='log_prior'), + inplace=True) + _, log_weights = compute_weights(np.array(self.result.nested_samples.log_likelihood), + np.array(out.NS.state.nlive)) + self.result.nested_samples.weights = np.exp(log_weights) self.result.log_evidence = out.NS.state.logZ self.result.log_evidence_err = np.sqrt(out.NS.state.info / out.NS.state.nlive) return self.result diff --git a/test/result_test.py b/test/result_test.py index a18a44ac6bb088253bfe014f99b01de87f33bddc..269afaa8e37164f422eca10fd784e272bd596b03 100644 --- a/test/result_test.py +++ b/test/result_test.py @@ -391,5 +391,149 @@ class TestResult(unittest.TestCase): self.result.kde([[0, 0.1], [0.8, 0]]))) +class TestResultList(unittest.TestCase): + + def setUp(self): + np.random.seed(7) + bilby.utils.command_line_args.test = False + self.priors = bilby.prior.PriorDict(dict( + x=bilby.prior.Uniform(0, 1, 'x', latex_label='$x$', unit='s'), + y=bilby.prior.Uniform(0, 1, 'y', latex_label='$y$', unit='m'), + c=1, + d=2)) + + # create two cpnest results + self.nested_results = bilby.result.ResultList([]) + self.mcmc_results = bilby.result.ResultList([]) + self.expected_nested = [] + + self.outdir = 'outdir' + self.label = 'label' + n = 100 + for i in range(2): + result = bilby.core.result.Result( + label=self.label + str(i), outdir=self.outdir, sampler='cpnest', + search_parameter_keys=['x', 'y'], fixed_parameter_keys=['c', 'd'], + priors=self.priors, sampler_kwargs=dict(test='test', func=lambda x: x, nlive=10), + injection_parameters=dict(x=0.5, y=0.5), + meta_data=dict(test='test')) + + posterior = pd.DataFrame(dict(x=np.random.normal(0, 1, n), + y=np.random.normal(0, 1, n), + log_likelihood=sorted(np.random.normal(0, 1, n)))) + result.posterior = posterior[-10:] # use last 10 samples as posterior + result.nested_samples = posterior + result.log_evidence = 10 + result.log_evidence_err = 11 + result.log_bayes_factor = 12 + result.log_noise_evidence = 13 + self.nested_results.append(result) + self.expected_nested.append(result) + for i in range(2): + result = bilby.core.result.Result( + label=self.label + str(i) + 'mcmc', outdir=self.outdir, sampler='emcee', + search_parameter_keys=['x', 'y'], fixed_parameter_keys=['c', 'd'], + priors=self.priors, sampler_kwargs=dict(test='test', func=lambda x: x, nlive=10), + injection_parameters=dict(x=0.5, y=0.5), + meta_data=dict(test='test')) + + posterior = pd.DataFrame(dict(x=np.random.normal(0, 1, n), + y=np.random.normal(0, 1, n), + log_likelihood=sorted(np.random.normal(0, 1, n)))) + result.posterior = posterior[-10:] + result.log_evidence = 10 + result.log_evidence_err = 11 + result.log_bayes_factor = 12 + result.log_noise_evidence = 13 + self.mcmc_results.append(result) + for res in self.nested_results: + res.save_to_file() + + def tearDown(self): + bilby.utils.command_line_args.test = True + try: + shutil.rmtree(self.nested_results[0].outdir) + except OSError: + pass + + del self.nested_results + del self.label + del self.outdir + del self.expected_nested + del self.mcmc_results + del self.priors + + def test_append_illegal_type(self): + with self.assertRaises(TypeError): + _ = bilby.core.result.ResultList(1) + + def test_append_from_string(self): + self.nested_results.append(self.outdir + '/' + self.label + '1_result.json') + self.assertEqual(3, len(self.nested_results)) + + def test_append_result_type(self): + self.nested_results.append(self.nested_results[1]) + self.expected_nested.append(self.expected_nested[1]) + self.assertListEqual(self.nested_results, self.nested_results) + + def test_combine_inconsistent_samplers(self): + self.nested_results[0].sampler = 'dynesty' + with self.assertRaises(bilby.result.CombineResultError): + self.nested_results.combine() + + def test_combine_inconsistent_priors_length(self): + self.nested_results[0].priors = bilby.prior.PriorDict(dict( + x=bilby.prior.Uniform(0, 1, 'x', latex_label='$x$', unit='s'), + y=bilby.prior.Uniform(0, 1, 'y', latex_label='$y$', unit='m'), + c=1)) + with self.assertRaises(bilby.result.CombineResultError): + self.nested_results.combine() + + def test_combine_inconsistent_priors_types(self): + self.nested_results[0].priors = bilby.prior.PriorDict(dict( + x=bilby.prior.Uniform(0, 1, 'x', latex_label='$x$', unit='s'), + y=bilby.prior.Uniform(0, 1, 'y', latex_label='$y$', unit='m'), + c=1, + d=bilby.core.prior.Cosine())) + with self.assertRaises(bilby.result.CombineResultError): + self.nested_results.combine() + + def test_combine_inconsistent_search_parameters(self): + self.nested_results[0].search_parameter_keys = ['y'] + with self.assertRaises(bilby.result.CombineResultError): + self.nested_results.combine() + + def test_combine_inconsistent_data(self): + self.nested_results[0].log_noise_evidence = -7 + with self.assertRaises(bilby.result.CombineResultError): + self.nested_results.combine() + + def test_combine_inconsistent_data_nan(self): + self.nested_results[0].log_noise_evidence = np.nan + self.nested_results[1].log_noise_evidence = np.nan + self.nested_results.combine() + + def test_combine_inconsistent_sampling_data(self): + result = bilby.core.result.Result( + label=self.label, outdir=self.outdir, sampler='cpnest', + search_parameter_keys=['x', 'y'], fixed_parameter_keys=['c', 'd'], + priors=self.priors, sampler_kwargs=dict(test='test', func=lambda x: x, nlive=10), + injection_parameters=dict(x=0.5, y=0.5), + meta_data=dict(test='test')) + + posterior = pd.DataFrame(dict(x=np.random.normal(0, 1, 100), + y=np.random.normal(0, 1, 100), + log_likelihood=sorted(np.random.normal(0, 1, 100)))) + result.posterior = posterior[-10:] # use last 10 samples as posterior + result.log_evidence = 10 + result.log_evidence_err = 11 + result.log_bayes_factor = 12 + result.log_noise_evidence = 13 + result._nested_samples = None + self.nested_results.append(result) + with self.assertRaises(bilby.result.CombineResultError): + self.nested_results.combine() + + if __name__ == '__main__': unittest.main()