diff --git a/bilby/core/sampler/dynesty.py b/bilby/core/sampler/dynesty.py index a7f45cda21b4cfd37b4c00f9fab67cec3e7d5bc5..dc748792a6d265c86a9b48b8188a373bfe1a9d70 100644 --- a/bilby/core/sampler/dynesty.py +++ b/bilby/core/sampler/dynesty.py @@ -3,7 +3,6 @@ import inspect import os import sys import time -import warnings import numpy as np from pandas import DataFrame @@ -13,12 +12,19 @@ from ..utils import ( check_directory_exists_and_if_not_mkdir, latex_plot_format, logger, - reflect, safe_file_dump, ) from .base_sampler import NestedSampler, Sampler, _SamplingContainer, signal_wrapper +def _set_sampling_kwargs(args): + nact, maxmcmc, proposals, naccept = args + _SamplingContainer.nact = nact + _SamplingContainer.maxmcmc = maxmcmc + _SamplingContainer.proposals = proposals + _SamplingContainer.naccept = naccept + + def _prior_transform_wrapper(theta): """Wrapper to the prior transformation. Needed for multiprocessing.""" from .base_sampler import _sampling_convenience_dump @@ -101,38 +107,42 @@ class Dynesty(NestedSampler): If true, resume run from checkpoint (if available) maxmcmc: int (5000) The maximum length of the MCMC exploration to find a new point - nact: int (5) - The number of "autocorrelation" times to continue the MCMC for. - Note that this is a very poor approximation to the true ACT and should - be interpreted very loosely. - rejection_sample_posterior: bool + nact: int (2) + The number of autocorrelation lengths for MCMC exploration. + For use with the :code:`act-walk` and :code:`rwalk` sample methods. + See the dynesty guide in the Bilby docs for more details. + naccept: int (60) + The expected number of accepted steps for MCMC exploration when using + the :code:`acceptance-walk` sampling method. + rejection_sample_posterior: bool (True) Whether to form the posterior by rejection sampling the nested samples. If False, the nested samples are resampled with repetition. This was the default behaviour in :code:`Bilby<=1.4.1` and leads to non-independent samples being produced. + proposals: iterable (None) + The proposal methods to use during MCMC. This can be some combination + of :code:`"diff", "volumetric"`. See the dynesty guide in the Bilby docs + for more details. default=:code:`["diff"]`. Other Parameters ================ nlive: int, (1000) The number of live points, note this can also equivalently be given as one of [nlive, nlives, n_live_points, npoints] - bound: {'none', 'single', 'multi', 'balls', 'cubes'}, ('multi') + bound: {'live', 'live-multi', 'none', 'single', 'multi', 'balls', 'cubes'}, ('live') Method used to select new points - sample: {'unif', 'rwalk', 'slice', 'rslice', 'hslice'}, ('rwalk') + sample: {'act-walk', 'acceptance-walk', 'unif', 'rwalk', 'slice', + 'rslice', 'hslice', 'rwalk_dynesty'}, ('act-walk') Method used to sample uniformly within the likelihood constraints, conditioned on the provided bounds - walks: int - Number of walks taken if using `sample='rwalk'`, defaults to `100`. + walks: int (100) + Number of walks taken if using the dynesty implemented sample methods Note that the default `walks` in dynesty itself is 25, although using `ndim * 10` can be a reasonable rule of thumb for new problems. + For :code:`sample="act-walk"` and :code:`sample="rwalk"` this parameter + has no impact on the sampling. dlogz: float, (0.1) Stopping criteria - facc: float, (0.2) - The target acceptance fraction for the rwalk evolution. The proposal - scale is tuned to meet this fraction. - save_bounds: bool, (False) - Whether to save the dynesty bounding ellipse objects. This is disabled - by default as it can lead to extremely large memory usage. """ @property @@ -143,7 +153,9 @@ class Dynesty(NestedSampler): for key, param in params.items() if param.default != param.empty } - kwargs["sample"] = "rwalk" + kwargs["sample"] = "act-walk" + kwargs["bound"] = "live" + kwargs["update_interval"] = 600 kwargs["facc"] = 0.2 return kwargs @@ -184,12 +196,16 @@ class Dynesty(NestedSampler): exit_code=130, print_method="tqdm", maxmcmc=5000, - nact=5, + nact=2, + naccept=60, rejection_sample_posterior=True, + proposals=None, **kwargs, ): - _SamplingContainer.maxmcmc = maxmcmc - _SamplingContainer.nact = nact + self.nact = nact + self.naccept = naccept + self.maxmcmc = maxmcmc + self.proposals = proposals self.print_method = print_method self._translate_kwargs(kwargs) super(Dynesty, self).__init__( @@ -214,7 +230,9 @@ class Dynesty(NestedSampler): self.nestcheck = nestcheck if self.n_check_point is None: - self.n_check_point = 1000 + self.n_check_point = max( + int(check_point_delta_t / self._log_likelihood_eval_time / 10), 10 + ) self.check_point_delta_t = check_point_delta_t logger.info(f"Checkpoint every check_point_delta_t = {check_point_delta_t}s") @@ -372,29 +390,104 @@ class Dynesty(NestedSampler): return Sampler - @signal_wrapper - def run_sampler(self): + def _set_sampling_method(self): + """ + Resolve the sampling method and sampler to use from the provided + :code:`bound` and :code:`sample` arguments. + + This requires registering the :code:`bilby` specific methods in the + appropriate locations within :code:`dynesty`. + + Additionally, some combinations of bound/sample/proposals are not + compatible and so we either warn the user or raise an error. + """ import dynesty - logger.info(f"Using dynesty version {dynesty.__version__}") + _set_sampling_kwargs((self.nact, self.maxmcmc, self.proposals, self.naccept)) + + sample = self.kwargs["sample"] + bound = self.kwargs["bound"] - if self.kwargs.get("sample", "rwalk") == "rwalk": + if sample not in ["rwalk", "act-walk", "acceptance-walk"] and bound in [ + "live", + "live-multi", + ]: logger.info( - "Using the bilby-implemented rwalk sample method with ACT estimated walks" + "Live-point based bound method requested with dynesty sample " + f"'{sample}', overwriting to 'multi'" ) - dynesty.dynesty._SAMPLING["rwalk"] = sample_rwalk_bilby - dynesty.nestedsamplers._SAMPLING["rwalk"] = sample_rwalk_bilby - if self.kwargs["walks"] > _SamplingContainer.maxmcmc: - raise DynestySetupError("You have maxmcmc > walks (minimum mcmc)") - if _SamplingContainer.nact < 1: + self.kwargs["bound"] = "multi" + elif bound == "live": + from .dynesty_utils import LivePointSampler + + dynesty.dynamicsampler._SAMPLERS["live"] = LivePointSampler + elif bound == "live-multi": + from .dynesty_utils import MultiEllipsoidLivePointSampler + + dynesty.dynamicsampler._SAMPLERS[ + "live-multi" + ] = MultiEllipsoidLivePointSampler + elif sample == "acceptance-walk": + raise DynestySetupError( + "bound must be set to live or live-multi for sample=acceptance-walk" + ) + elif self.proposals is None: + logger.warning( + "No proposals specified using dynesty sampling, defaulting " + "to 'volumetric'." + ) + self.proposals = ["volumetric"] + _SamplingContainer.proposals = self.proposals + elif "diff" in self.proposals: + raise DynestySetupError( + "bound must be set to live or live-multi to use differential " + "evolution proposals" + ) + + if sample == "rwalk": + logger.info( + "Using the bilby-implemented rwalk sample method with ACT estimated walks. " + f"An average of {2 * self.nact} steps will be accepted up to chain length " + f"{self.maxmcmc}." + ) + from .dynesty_utils import sample_rwalk_bilby + + if self.kwargs["walks"] > self.maxmcmc: + raise DynestySetupError("You have maxmcmc < walks (minimum mcmc)") + if self.nact < 1: raise DynestySetupError("Unable to run with nact < 1") - elif self.kwargs.get("sample") == "rwalk_dynesty": - self._kwargs["sample"] = "rwalk" - logger.info("Using the dynesty-implemented rwalk sample method") - elif self.kwargs.get("sample") == "rstagger_dynesty": - self._kwargs["sample"] = "rstagger" - logger.info("Using the dynesty-implemented rstagger sample method") + dynesty.nestedsamplers._SAMPLING["rwalk"] = sample_rwalk_bilby + elif sample == "acceptance-walk": + logger.info( + "Using the bilby-implemented rwalk sampling with an average of " + f"{self.naccept} accepted steps per MCMC and maximum length {self.maxmcmc}" + ) + from .dynesty_utils import fixed_length_rwalk_bilby + + dynesty.nestedsamplers._SAMPLING[ + "acceptance-walk" + ] = fixed_length_rwalk_bilby + elif sample == "act-walk": + logger.info( + "Using the bilby-implemented rwalk sampling tracking the " + f"autocorrelation function and thinning by " + f"{self.nact} with maximum length {self.nact * self.maxmcmc}" + ) + from .dynesty_utils import bilby_act_rwalk + + dynesty.nestedsamplers._SAMPLING["act-walk"] = bilby_act_rwalk + elif sample == "rwalk_dynesty": + sample = sample.strip("_dynesty") + self.kwargs["sample"] = sample + logger.info(f"Using the dynesty-implemented {sample} sample method") + @signal_wrapper + def run_sampler(self): + import dynesty + + logger.info(f"Using dynesty version {dynesty.__version__}") + + self._set_sampling_method() self._setup_pool() if self.resume: @@ -446,7 +539,32 @@ class Dynesty(NestedSampler): return self.result + def _setup_pool(self): + """ + In addition to the usual steps, we need to set the sampling kwargs on + every process. To make sure we get every process, run the kwarg setting + more times than we have processes. + """ + super(Dynesty, self)._setup_pool() + if self.pool is not None: + args = ( + [(self.nact, self.maxmcmc, self.proposals, self.naccept)] + * self.npool + * 10 + ) + self.pool.map(_set_sampling_kwargs, args) + def _generate_result(self, out): + """ + Extract the information we need from the dynesty output. This includes + the evidence, nested samples, run statistics. In addition, we generate + the posterior samples from the nested samples. + + Parameters + ========== + out: dynesty.result.Result + The dynesty output. + """ import dynesty from scipy.special import logsumexp @@ -500,6 +618,14 @@ class Dynesty(NestedSampler): sampler_kwargs["add_live"] = True def _run_external_sampler_with_checkpointing(self): + """ + In order to access the checkpointing, we run the sampler for short + periods of time (less than the checkpoint time) and if sufficient + time has passed, write a checkpoint before continuing. To get the most + informative checkpoint plots, the current live points are added to the + chain of nested samples within dynesty and have to be removed before + restarting the sampler. + """ logger.debug("Running sampler with checkpointing") old_ncall = self.sampler.ncall @@ -646,6 +772,11 @@ class Dynesty(NestedSampler): self.sampler.M = self.sampler.pool.map def dump_samples_to_dat(self): + """ + Save the current posterior samples to a space-separated plain-text + file. These are unbiased posterior samples, however, there will not + be many of them until the analysis is nearly over. + """ sampler = self.sampler ln_weights = sampler.saved_logwt - sampler.saved_logz[-1] @@ -664,10 +795,20 @@ class Dynesty(NestedSampler): df.to_csv(filename, index=False, header=True, sep=" ") def plot_current_state(self): - import matplotlib.pyplot as plt + """ + Make diagonstic plots of the history and current state of the sampler. + + These plots are a mixture of :code:`dynesty` implemented run and trace + plots and our custom stats plot. We also make a copy of the trace plot + using the unit hypercube samples to reflect the internal state of the + sampler. + Any errors during plotting should be handled so that sampling can + continue. + """ if self.check_point_plot: import dynesty.plotting as dyplot + import matplotlib.pyplot as plt labels = [label.replace("_", " ") for label in self.search_parameter_keys] try: @@ -757,6 +898,7 @@ class Dynesty(NestedSampler): plt.close("all") def _run_test(self): + """Run the sampler very briefly as a sanity test that it works.""" import pandas as pd self.sampler = self.sampler_init( @@ -804,168 +946,17 @@ class Dynesty(NestedSampler): return self.priors.rescale(self._search_parameter_keys, theta) -def sample_rwalk_bilby(args): - """Modified bilby-implemented version of dynesty.sampling.sample_rwalk""" - from dynesty.utils import get_random_generator, unitcheck - - # Unzipping. - (u, loglstar, axes, scale, prior_transform, loglikelihood, rseed, kwargs) = args - rstate = get_random_generator(rseed) - - # Bounds - nonbounded = kwargs.get("nonbounded", None) - if nonbounded is not None and sum(nonbounded) == 0: - nonbounded = None - periodic = kwargs.get("periodic", None) - reflective = kwargs.get("reflective", None) - - # Setup. - n = len(u) - walks = kwargs.get("walks", 100) # minimum number of steps - maxmcmc = _SamplingContainer.maxmcmc - nact = _SamplingContainer.nact - old_act = getattr(_SamplingContainer, "old_act", walks) - - # Initialize internal variables - accept = 0 - reject = 0 - nfail = 0 - act = np.inf - u_list = [] - v_list = [] - logl_list = [] - - ii = 0 - while ii < nact * act: - ii += 1 - - # Propose a direction on the unit n-sphere. - drhat = rstate.normal(0, 1, n) - drhat /= np.linalg.norm(drhat) - - # Scale based on dimensionality. - dr = drhat * rstate.uniform(0, 1) ** (1.0 / n) - - # Transform to proposal distribution. - du = np.dot(axes, dr) - u_prop = u + scale * du - - # Wrap periodic parameters - if periodic is not None: - u_prop[periodic] = np.mod(u_prop[periodic], 1) - # Reflect - if reflective is not None: - u_prop[reflective] = reflect(u_prop[reflective]) - - # Check unit cube constraints. - if unitcheck(u_prop, nonbounded): - pass - else: - nfail += 1 - # Only start appending to the chain once a single jump is made - if accept > 0: - u_list.append(u_list[-1]) - v_list.append(v_list[-1]) - logl_list.append(logl_list[-1]) - continue - - # Check proposed point. - v_prop = prior_transform(np.array(u_prop)) - logl_prop = loglikelihood(np.array(v_prop)) - if logl_prop > loglstar: - u = u_prop - v = v_prop - logl = logl_prop - accept += 1 - u_list.append(u) - v_list.append(v) - logl_list.append(logl) - else: - reject += 1 - # Only start appending to the chain once a single jump is made - if accept > 0: - u_list.append(u_list[-1]) - v_list.append(v_list[-1]) - logl_list.append(logl_list[-1]) - - # If we've taken the minimum number of steps, calculate the ACT - if accept + reject > walks: - act = estimate_nmcmc( - accept_ratio=accept / (accept + reject + nfail), - old_act=old_act, - maxmcmc=maxmcmc, - ) - - # If we've taken too many likelihood evaluations then break - if accept + reject > maxmcmc: - warnings.warn( - f"Hit maximum number of walks {maxmcmc} with accept={accept}," - f" reject={reject}, and nfail={nfail} try increasing maxmcmc" - ) - break - - # If the act is finite, pick randomly from within the chain - if np.isfinite(act) and int(0.5 * nact * act) < len(u_list): - idx = np.random.randint(int(0.5 * nact * act), len(u_list)) - u = u_list[idx] - v = v_list[idx] - logl = logl_list[idx] - else: - logger.debug("Unable to find a new point using walk: returning a random point") - u = np.random.uniform(size=n) - v = prior_transform(u) - logl = loglikelihood(v) - - blob = {"accept": accept, "reject": reject, "fail": nfail, "scale": scale} - _SamplingContainer.old_act = act - - ncall = accept + reject - return u, v, logl, ncall, blob - - -def estimate_nmcmc(accept_ratio, old_act, maxmcmc, safety=5, tau=None): - """Estimate autocorrelation length of chain using acceptance fraction - - Using ACL = (2/acc) - 1 multiplied by a safety margin. Code adapted from CPNest: - - - https://github.com/johnveitch/cpnest/blob/master/cpnest/sampler.py - - http://github.com/farr/Ensemble.jl - - Parameters - ========== - accept_ratio: float [0, 1] - Ratio of the number of accepted points to the total number of points - old_act: int - The ACT of the last iteration - maxmcmc: int - The maximum length of the MCMC chain to use - safety: int - A safety factor applied in the calculation - tau: int (optional) - The ACT, if given, otherwise estimated. - - """ - if tau is None: - tau = maxmcmc / safety - - if accept_ratio == 0.0: - Nmcmc_exact = (1 + 1 / tau) * old_act - else: - Nmcmc_exact = (1.0 - 1.0 / tau) * old_act + (safety / tau) * ( - 2.0 / accept_ratio - 1.0 - ) - Nmcmc_exact = float(min(Nmcmc_exact, maxmcmc)) - return max(safety, Nmcmc_exact) - - @latex_plot_format def dynesty_stats_plot(sampler): """ Plot diagnostic statistics from a dynesty run The plotted quantities per iteration are: + - nc: the number of likelihood calls - - scale: the scale applied to the MCMC steps + - scale: the number of accepted MCMC steps if using :code:`bound="live"` + or :code:`bound="live-multi"`, otherwise, the scale applied to the MCMC + steps - lifetime: the number of iterations a point stays in the live set There is also a histogram of the lifetime compared with the theoretical @@ -973,7 +964,8 @@ def dynesty_stats_plot(sampler): Parameters ---------- - sampler + sampler: dynesty.sampler.Sampler + The sampler object containing the run history. Returns ------- diff --git a/bilby/core/sampler/dynesty_utils.py b/bilby/core/sampler/dynesty_utils.py new file mode 100644 index 0000000000000000000000000000000000000000..8c71c6a0d82fa55959d4b90955597e576886908f --- /dev/null +++ b/bilby/core/sampler/dynesty_utils.py @@ -0,0 +1,716 @@ +import warnings + +import numpy as np +from dynesty.nestedsamplers import MultiEllipsoidSampler, UnitCubeSampler +from dynesty.utils import apply_reflect, get_random_generator + +from ...bilby_mcmc.chain import calculate_tau +from ..utils.log import logger +from .base_sampler import _SamplingContainer + + +class LivePointSampler(UnitCubeSampler): + """ + Modified version of dynesty UnitCubeSampler that adapts the MCMC + length in addition to the proposal scale, this corresponds to + :code:`bound=live`. + + In order to support live-point based proposals, e.g., differential + evolution (:code:`diff`), the live points are added to the + :code:`kwargs` passed to the evolve method. + + Note that this does not perform ellipsoid clustering as with the + :code:`bound=multi` option, if ellipsoid-based proposals are used, e.g., + :code:`volumetric`, consider using the + :code:`MultiEllipsoidLivePointSampler` (:code:`sample=live-multi`). + """ + + rebuild = False + + def update_user(self, blob, update=True): + """ + Update the proposal parameters based on the number of accepted steps + and MCMC chain length. + + There are a number of logical checks performed: + - if the ACT tracking rwalk method is being used and any parallel + process has an empty cache, set the :code:`rebuild` flag to force + the cache to rebuild at the next call. This improves the efficiency + when using parallelisation. + - update the :code:`walks` parameter to asymptotically approach the + desired number of accepted steps for the :code:`FixedRWalk` proposal. + - update the ellipsoid scale if the ellipsoid proposals are being used. + """ + # do we need to trigger rebuilding the cache + if blob.get("remaining", 0) == 1: + self.rebuild = True + if update: + self.kwargs["rebuild"] = self.rebuild + self.rebuild = False + + # update walks to match target naccept + accept_prob = max(0.5, blob["accept"]) / self.kwargs["walks"] + delay = self.nlive // 10 - 1 + n_target = getattr(_SamplingContainer, "naccept", 60) + self.walks = (self.walks * delay + n_target / accept_prob) / (delay + 1) + self.kwargs["walks"] = min(int(np.ceil(self.walks)), _SamplingContainer.maxmcmc) + + self.scale = blob["accept"] + + update_rwalk = update_user + + def propose_live(self, *args): + """ + We need to make sure the live points are passed to the proposal + function if we are using live point-based proposals. + """ + self.kwargs["nlive"] = self.nlive + self.kwargs["live"] = self.live_u + i = self.rstate.integers(self.nlive) + u = self.live_u[i, :] + return u, np.identity(self.npdim) + + +class MultiEllipsoidLivePointSampler(MultiEllipsoidSampler): + """ + Modified version of dynesty MultiEllipsoidSampler that adapts the MCMC + length in addition to the proposal scale, this corresponds to + :code:`bound=live-multi`. + + Additionally, in order to support live point-based proposals, e.g., + differential evolution (:code:`diff`), the live points are added to the + :code:`kwargs` passed to the evolve method. + + When just using the :code:`diff` proposal method, consider using the + :code:`LivePointSampler` (:code:`sample=live`). + """ + + rebuild = False + + def update_user(self, blob, update=True): + super(MultiEllipsoidLivePointSampler, self).update_rwalk( + blob=blob, update=update + ) + LivePointSampler.update_user(self, blob=blob, update=update) + + update_rwalk = update_user + + def propose_live(self, *args): + """ + We need to make sure the live points are passed to the proposal + function if we are using ensemble proposals. + """ + self.kwargs["nlive"] = self.nlive + self.kwargs["live"] = self.live_u + return super(MultiEllipsoidLivePointSampler, self).propose_live(*args) + + +class FixedRWalk: + """ + Run the MCMC walk for a fixed length. This is nearly equivalent to + :code:`bilby.sampling.sample_rwalk` except that different proposal + distributions can be used. + """ + + def __call__(self, args): + current_u = args.u + naccept = 0 + ncall = 0 + + periodic = args.kwargs["periodic"] + reflective = args.kwargs["reflective"] + boundary_kwargs = dict(periodic=periodic, reflective=reflective) + + proposals, common_kwargs, proposal_kwargs = _get_proposal_kwargs(args) + walks = len(proposals) + + accepted = list() + + for prop in proposals: + u_prop = proposal_funcs[prop]( + u=current_u, **common_kwargs, **proposal_kwargs[prop] + ) + u_prop = apply_boundaries_(u_prop=u_prop, **boundary_kwargs) + if u_prop is None: + accepted.append(0) + continue + + v_prop = args.prior_transform(u_prop) + logl_prop = args.loglikelihood(v_prop) + ncall += 1 + + if logl_prop > args.loglstar: + current_u = u_prop + current_v = v_prop + logl = logl_prop + naccept += 1 + accepted.append(1) + else: + accepted.append(0) + + if naccept == 0: + logger.debug( + "Unable to find a new point using walk: returning a random point. " + "If this warning occurs often, increase naccept." + ) + # Technically we can find out the likelihood value + # stored somewhere + # But I'm currently recomputing it + current_u = common_kwargs["rstate"].uniform(0, 1, len(current_u)) + current_v = args.prior_transform(current_u) + logl = args.loglikelihood(current_v) + + blob = { + "accept": naccept, + "reject": walks - naccept, + "scale": args.scale, + } + + return current_u, current_v, logl, ncall, blob + + +class ACTTrackingRWalk: + """ + Run the MCMC sampler for many iterations in order to reliably estimate + the autocorrelation time. + + This builds a cache of :code:`nact / thin` points consistent with the + likelihood constraint. + While this approach is the most robust, it is not well optimized for + parallelised sampling as the length of the MCMC will be different for each + parallel process. + """ + + _cache = list() + + def __init__(self): + self.act = 1 + self.thin = getattr(_SamplingContainer, "nact", 2) + self.maxmcmc = getattr(_SamplingContainer, "maxmcmc", 5000) * 50 + + def __call__(self, args): + self.args = args + if args.kwargs.get("rebuild", False): + logger.debug("Force rebuilding cache") + self.build_cache() + while self.cache[0][2] < args.loglstar: + self.cache.pop(0) + current_u, current_v, logl, ncall, blob = self.cache.pop(0) + blob["remaining"] = len(self.cache) + return current_u, current_v, logl, ncall, blob + + @property + def cache(self): + if len(self._cache) == 0: + self.build_cache() + else: + logger.debug(f"Not rebuilding cache, remaining size {len(self._cache)}") + return self._cache + + def build_cache(self): + args = self.args + # Bounds + periodic = args.kwargs.get("periodic", None) + reflective = args.kwargs.get("reflective", None) + boundary_kwargs = dict(periodic=periodic, reflective=reflective) + + proposals, common_kwargs, proposal_kwargs = _get_proposal_kwargs(args) + + # Setup + current_u = args.u + check_interval = int(np.ceil(self.act)) + target_nact = 50 + next_check = check_interval + n_checks = 0 + + # Initialize internal variables + current_v = args.prior_transform(np.array(current_u)) + logl = args.loglikelihood(np.array(current_v)) + accept = 0 + reject = 0 + nfail = 0 + ncall = 0 + act = np.inf + u_list = [current_u] + v_list = [current_v] + logl_list = [logl] + most_failures = 0 + current_failures = 0 + + iteration = 0 + while iteration < min(target_nact * act, self.maxmcmc): + iteration += 1 + + prop = proposals[iteration % len(proposals)] + u_prop = proposal_funcs[prop]( + u=current_u, **common_kwargs, **proposal_kwargs[prop] + ) + u_prop = apply_boundaries_(u_prop=u_prop, **boundary_kwargs) + success = False + if u_prop is not None: + v_prop = args.prior_transform(np.array(u_prop)) + logl_prop = args.loglikelihood(np.array(v_prop)) + ncall += 1 + if logl_prop > args.loglstar: + success = True + current_u = u_prop + current_v = v_prop + logl = logl_prop + + if success: + accept += 1 + most_failures = max(most_failures, current_failures) + current_failures = 0 + else: + nfail += 1 + current_failures += 1 + continue + + # If we've taken the minimum number of steps, calculate the ACT + if iteration > next_check and accept > target_nact: + n_checks += 1 + most_failures = max(most_failures, current_failures) + act = self._calculate_act( + accept=accept, + iteration=iteration, + samples=np.array(u_list), + most_failures=most_failures, + ) + to_next_update = (act * target_nact - iteration) // 2 + to_next_update = max(to_next_update, iteration // 100) + next_check += to_next_update + logger.debug( + f"it: {iteration}, accept: {accept}, reject: {reject}, " + f"fail: {nfail}, act: {act:.2f}, nact: {iteration / act:.2f} " + ) + elif iteration > next_check: + next_check += check_interval + + u_list.append(current_u) + v_list.append(current_v) + logl_list.append(logl) + + most_failures = max(most_failures, current_failures) + self.act = self._calculate_act( + accept=accept, + iteration=iteration, + samples=np.array(u_list), + most_failures=most_failures, + ) + reject += nfail + blob = {"accept": accept, "reject": reject, "scale": args.scale} + + if accept == 0: + logger.debug( + "Unable to find a new point using walk: returning a random point" + ) + u = common_kwargs["rstate"].uniform(size=len(current_u)) + v = args.prior_transform(u) + logl = args.loglikelihood(v) + self._cache.append((u, v, logl, ncall, blob)) + elif not np.isfinite(act): + logger.warning( + "Unable to find a new point using walk: try increasing maxmcmc" + ) + self._cache.append((current_u, current_v, logl, ncall, blob)) + elif self.thin == -1: + self._cache.append((current_u, current_v, logl, ncall, blob)) + else: + iact = int(np.ceil(self.act)) + thin = self.thin * iact + u_list = u_list[thin::thin] + v_list = v_list[thin::thin] + logl_list = logl_list[thin::thin] + n_found = len(u_list) + accept = max(accept // n_found, 1) + reject //= n_found + nfail //= n_found + ncall_list = [ncall // n_found] * n_found + blob_list = [ + dict(accept=accept, reject=reject, fail=nfail, scale=args.scale) + ] * n_found + self._cache.extend(zip(u_list, v_list, logl_list, ncall_list, blob_list)) + logger.debug( + f"act: {self.act:.2f}, max failures: {most_failures}, thin: {thin}, " + f"iteration: {iteration}, n_found: {n_found}" + ) + logger.debug( + f"Finished building cache with length {len(self._cache)} after " + f"{iteration} iterations with {ncall} likelihood calls and ACT={self.act:.2f}" + ) + + @staticmethod + def _calculate_act(accept, iteration, samples, most_failures): + """ + Take the maximum of three ACT estimates, leading to a conservative estimate: + + - a full ACT estimate as done in :code:`bilby_mcmc`. This is almost always the + longest estimator, and is the most computationally expensive. The other + methods mostly catch cases where the estimated ACT is very small. + - the naive ACT used for the acceptance tracking walk. + - the most failed proposals between any pair of accepted steps. This is a strong + lower bound, because we know that if we thin by less than this, there will be + duplicate points. + """ + if accept > 0: + naive_act = 2 / accept * iteration - 1 + else: + naive_act = np.inf + return max(calculate_tau(samples), naive_act, most_failures) + + +class AcceptanceTrackingRWalk: + """ + This is a modified version of dynesty.sampling.sample_rwalk that runs the + MCMC random walk for a user-specified number of a crude approximation to + the autocorrelation time. + + This is the proposal method used by default for :code:`Bilby<2` and + corresponds to specifying :code:`sample="rwalk"` + """ + + def __init__(self, old_act=None): + self.maxmcmc = getattr(_SamplingContainer, "maxmcmc", 5000) + self.nact = getattr(_SamplingContainer, "nact", 40) + self.old_act = old_act + + def __call__(self, args): + rstate = get_random_generator(args.rseed) + + periodic = args.kwargs.get("periodic", None) + reflective = args.kwargs.get("reflective", None) + boundary_kwargs = dict(periodic=periodic, reflective=reflective) + + u = args.u + nlive = args.kwargs.get("nlive", args.kwargs.get("walks", 100)) + + proposals, common_kwargs, proposal_kwargs = _get_proposal_kwargs(args) + + accept = 0 + reject = 0 + nfail = 0 + act = np.inf + + iteration = 0 + while iteration < self.nact * act: + iteration += 1 + + prop = proposals[iteration % len(proposals)] + u_prop = proposal_funcs[prop](u, **common_kwargs, **proposal_kwargs[prop]) + u_prop = apply_boundaries_(u_prop, **boundary_kwargs) + + if u_prop is None: + nfail += 1 + continue + + # Check proposed point. + v_prop = args.prior_transform(np.array(u_prop)) + logl_prop = args.loglikelihood(np.array(v_prop)) + if logl_prop > args.loglstar: + u = u_prop + v = v_prop + logl = logl_prop + accept += 1 + else: + reject += 1 + + # If we've taken the minimum number of steps, calculate the ACT + if iteration > self.nact: + act = self.estimate_nmcmc( + accept_ratio=accept / (accept + reject + nfail), + safety=1, + tau=nlive, + ) + + # If we've taken too many likelihood evaluations then break + if accept + reject > self.maxmcmc: + warnings.warn( + f"Hit maximum number of walks {self.maxmcmc} with accept={accept}," + f" reject={reject}, and nfail={nfail} try increasing maxmcmc" + ) + break + + if not (np.isfinite(act) and accept > 0): + logger.debug( + "Unable to find a new point using walk: returning a random point" + ) + u = rstate.uniform(size=len(u)) + v = args.prior_transform(u) + logl = args.loglikelihood(v) + + blob = {"accept": accept, "reject": reject + nfail, "scale": args.scale} + self.old_act = act + + ncall = accept + reject + return u, v, logl, ncall, blob + + def estimate_nmcmc(self, accept_ratio, safety=5, tau=None): + """Estimate autocorrelation length of chain using acceptance fraction + + Using ACL = (2/acc) - 1 multiplied by a safety margin. Code adapted from: + + - https://github.com/farr/Ensemble.jl + - https://github.com/johnveitch/cpnest/blob/master/cpnest/sampler.py + + Parameters + ========== + accept_ratio: float [0, 1] + Ratio of the number of accepted points to the total number of points + old_act: int + The ACT of the last iteration + maxmcmc: int + The maximum length of the MCMC chain to use + safety: int + A safety factor applied in the calculation + tau: int (optional) + The ACT, if given, otherwise estimated. + + Notes + ===== + This method does not compute a reliable estimate of the autocorrelation + length for our proposal distributions. + """ + if tau is None: + tau = self.maxmcmc / safety + + if accept_ratio == 0.0: + if self.old_act is None: + Nmcmc_exact = np.inf + else: + Nmcmc_exact = (1 + 1 / tau) * self.old_act + else: + estimated_act = 2 / accept_ratio - 1 + Nmcmc_exact = safety * estimated_act + if self.old_act is not None: + Nmcmc_exact = (1 - 1 / tau) * self.old_act + Nmcmc_exact / tau + Nmcmc_exact = float(min(Nmcmc_exact, self.maxmcmc)) + return max(safety, Nmcmc_exact) + + +def _get_proposal_kwargs(args): + """ + Resolve the proposal cycle from the provided keyword arguments. + + The steps involved are: + + - extract the requested proposal types from the :code:`_SamplingContainer`. + If none are specified, only differential evolution will be used. + - differential evolution requires the live points to be passed. If they are + not present, raise an error. + - if a dictionary, e.g., :code:`dict(diff=5, volumetric=1)` is specified, + the keys will be used weighted by the values, e.g., 5:1 differential + evolution to volumetric. + - each proposal needs different keyword arguments, see the specific functions + for what requires which. + + + Parameters + ========== + args: dynesty.sampler.SamplerArgument + Object that carries around various pieces of information about the + analysis. + """ + rstate = get_random_generator(args.rseed) + walks = args.kwargs.get("walks", 100) + current_u = args.u + n_cluster = args.axes.shape[0] + + proposals = getattr(_SamplingContainer, "proposals", None) + if proposals is None: + proposals = ["diff"] + if "diff" in proposals: + live = args.kwargs.get("live", None) + if live is None: + raise ValueError( + "Live points not passed for differential evolution, specify " + "bound='live' to use differential evolution proposals." + ) + live = np.unique(live, axis=0) + matches = np.where(np.equal(current_u, live).all(axis=1))[0] + np.delete(live, matches, 0) + + if isinstance(proposals, (list, set, tuple)): + proposals = rstate.choice(proposals, int(walks)) + elif isinstance(proposals, dict): + props, weights = zip(*proposals.items()) + weights = np.array(weights) / sum(weights) + proposals = rstate.choice(list(props), int(walks), p=weights) + + common_kwargs = dict( + n=len(current_u), + n_cluster=n_cluster, + rstate=rstate, + ) + proposal_kwargs = dict() + if "diff" in proposals: + proposal_kwargs["diff"] = dict( + live=live[:, :n_cluster], + mix=0.5, + scale=2.38 / (2 * n_cluster) ** 0.5, + ) + if "volumetric" in proposals: + proposal_kwargs["volumetric"] = dict( + axes=args.axes, + scale=args.scale, + ) + return proposals, common_kwargs, proposal_kwargs + + +def propose_differetial_evolution( + u, + live, + n, + n_cluster, + rstate, + mix=0.5, + scale=1, +): + r""" + Propose a new point using ensemble differential evolution + (`ter Braak + (2006) <https://doi.org/10.1007/s11222-006-8769-1>`_). + + .. math:: + + u_{\rm prop} = u + \gamma (v_{a} - v_{b}) + + We consider two choices for :math:`\gamma`: weighted by :code:`mix`. + + - :math:`\gamma = 1`: this is a mode-hopping mode for efficiently + exploring multi-modal spaces + - :math:`\gamma \sim \Gamma\left(\gamma; k=4, \theta=\frac{\kappa}{4}\right)` + + Here :math:`\kappa = 2.38 / \sqrt{2 n}` unless specified by the user and + we scale by a random draw from a Gamma distribution. The specific + distribution was chosen somewhat arbitrarily to have mean and mode close to + :math:`\kappa` and give good acceptance and autocorrelation times on a subset + of problems. + + Parameters + ---------- + u: np.ndarray + The current point. + live: np.ndarray + The ensemble of live points to select :math:`v` from. + n: int + The number of dimensions being explored + n_cluster: int + The number of dimensions to run the differential evolution over, the + first :code:`n_cluster` dimensions are used. The rest are randomly + sampled from the prior. + rstate: np.random.RandomState + The random state to use to generate random numbers. + mix: float + The fraction of proposed points that should follow the specified scale + rather than mode hopping. :code:`default=0.5` + scale: float + The amount to scale the difference vector by. + :code:`default = 2.38 / (2 * n_cluster)**0.5)` + + Returns + ------- + u_prop: np.ndarray + The proposed point. + """ + delta = np.diff(rstate.choice(live, 2, replace=False), axis=0)[0] + if rstate.uniform(0, 1) < mix: + if scale is None: + scale = 2.38 / (2 * n_cluster) ** 0.5 + scale *= rstate.gamma(4, 0.25) + else: + scale = 1 + u_prop = u.copy() + u_prop[:n_cluster] += delta * scale + u_prop[n_cluster:] = rstate.uniform(0, 1, n - n_cluster) + return u_prop + + +def propose_volumetric( + u, + axes, + scale, + n, + n_cluster, + rstate, +): + """ + Propose a new point using the default :code:`dynesty` proposal. + + The new difference vector is scaled by a vector isotropically drawn + from an ellipsoid centered on zero with covariance given by the + provided axis. Note that the magnitude of this proposal is heavily + skewed to the size of the ellipsoid. + + Parameters + ---------- + u: np.ndarray + The current point. + n: int + The number of dimensions being explored. + scale: float + The amount to scale the proposed point by. + n_cluster: int + The number of dimensions to run the differential evolution over, the + first :code:`n_cluster` dimensions are used. The rest are randomly + sampled from the prior. + rstate: np.random.RandomState + The random state to use to generate random numbers. + + Returns + ------- + u_prop: np.ndarray + The proposed point. + """ + # Propose a direction on the unit n-sphere. + drhat = rstate.normal(0, 1, n_cluster) + drhat /= np.linalg.norm(drhat) + + # Scale based on dimensionality. + dr = drhat * rstate.uniform(0, 1) ** (1.0 / n_cluster) + + # Transform to proposal distribution. + delta = scale * np.dot(axes, dr) + u_prop = u.copy() + u_prop[:n_cluster] += delta + u_prop[n_cluster:] = rstate.uniform(0, 1, n - n_cluster) + return u_prop + + +def apply_boundaries_(u_prop, periodic, reflective): + """ + Apply the periodic and reflective boundaries and test if we are inside the + unit cube. + + Parameters + ---------- + u_prop: np.ndarray + The proposed point in the unit hypercube space. + periodic: np.ndarray + Indices of the parameters with periodic boundaries. + reflective: np.ndarray + Indices of the parameters with reflective boundaries. + + Returns + ======= + [np.ndarray, None]: + Either the remapped proposed point, or None if the proposed point + lies outside the unit cube. + """ + # Wrap periodic parameters + if periodic is not None: + u_prop[periodic] = np.mod(u_prop[periodic], 1) + + # Reflect + if reflective is not None: + u_prop[reflective] = apply_reflect(u_prop[reflective]) + + if u_prop.min() < 0 or u_prop.max() > 1: + return None + else: + return u_prop + + +proposal_funcs = dict(diff=propose_differetial_evolution, volumetric=propose_volumetric) + +fixed_length_rwalk_bilby = FixedRWalk() +bilby_act_rwalk = ACTTrackingRWalk() +sample_rwalk_bilby = AcceptanceTrackingRWalk() diff --git a/docs/dynesty-guide.txt b/docs/dynesty-guide.txt index 073ee941be1a07ecd33a91a9c2b8875fe474b7d8..61b6a33be7efd56097a9c64cc573ca95c5d2e031 100644 --- a/docs/dynesty-guide.txt +++ b/docs/dynesty-guide.txt @@ -20,51 +20,70 @@ call. For example, to set the number of live points to 1000 Bilby checks the kwargs input through run_sampler. If you miss-spell a word, you will see a warning "Supplied argument X not an argument of dynesty, removing." -Overview of dynesty -------------------- +Bilby-specific implementation details +------------------------------------- -Like many nested samplers, dynesty operates on the unit cube with a -prior_tranform function to transform a point in the unit cube to the target -space. When it begins sampling, it draws samples from the unit cube (uniform -sampling) until reaching the :code:`first_update` criteria. As of v1.0.0, this -defaults to +In Bilby, we have implemented custom stepping methods for use within dynesty. +We have implemented three methods for performing an MCMC evolution to find a +new point from the constrained prior. These can be specified using the +:code:`sample` argument. -.. code-block:: python +1. :code:`sample="act-walk"` (default): with this method, the MCMC evolution is + performed until the autocorrelation length of the chain can be accurately determined. + Following `this guide <https://emcee.readthedocs.io/en/stable/tutorials/autocorr/>`_ + we run for 50 times the autocorrelation length. This chain is then thinned by + a user-specified number of autocorrelation times :code:`nact` to yield a cache + of :code:`N = 50 / nact` points. These points are then returned the next :code:`N` + iterations of :code:`dynesty`. With this method, :code:`nact=2` often gives good + results, however, in some cases, a larger value may be required. - first_update = dict(min_ncall=2*nlive, min_eff=10) +2. :code:`sample="acceptance-walk"`: with this method, the length of each MCMC + chain is predetermined. The specific length evolves during the run to yield an + average of :code:`naccept` accepted jumps during each chain. This method is well + suited to parallelised applications, as each MCMC chain will run for the same + amount of time. The value of :code:`naccept` should be tuned based on the + application. For example, one could run a single analysis with + :code:`sample="act-walk"` to estimate a viable number of accepted steps. -That is, the first update happens when either it has called the likelihood -function twice as many times as there are live points or the sampling -efficiency hits 10%. You can change any of these by passing -:code:`first_update` as a dictionary to :code:`run_sampler`. +3. :code:`sample="rwalk"`: this method is similar to the :code:`"acceptance-walk"` + method, however, the adaptation of the MCMC length happens within the chain. + This method is primarily included for historical reasons and was the default + method prior to :code:`Bilby<2`. For this application, :code:`nact` is half + the average accepted number of jumps per chain. -Once the first update occurs, dynesty switches to the bounding method given by -the keyword :code:`bound`. After this, new points are proposed by taking an -existing live point and sampling using the `sample` keyword. This continues -until one of the stopping criteria are reached: +You can revert to the original dynesty implementation by specifying +:code:`sample="rwalk_dynesty"`. -1. The estimated remaining evidence is below the kwarg :code:`dlogz` (default is 0.1) -2. The effective number of samples exceeds the kwarg :code:`n_effective` (default is 5000) +There are a number of keyword arguments that influence these sampling methods: -Bilby-specific implementation details -------------------------------------- +#. :code:`nact/naccept` as described above, this varies based on the method. -In Bilby, we have re-implemented the :code:`sample="rwalk"` sample method (you -can see exact details by looking at the function -:code:`bilby.core.sampler.dynesty.sample_rwalk_bilby`. In dynesty, this method -took an argument :code:`walks` which was the fixed number of walks to take each -time a new point was proposed. In the bilby implementation, we still take an -argument :code:`walks` which has the new meaning: the minimum number of walks -to take (this ensures backward compatibility). Meanwhile, we add two new -arguments +#. :code:`maxmcmc`: the maximum number of walks to use. This naming is chosen + for consistency with other codes. Default is 5000. If this limit is reached, + a warning will be printed during sampling. -1. :code:`maxmcmc`: the maximum number of walks to use. This naming is chosen for consistency with other codes. Default is 5000. If this limit is reached, a warning will be printed during sampling. +#. :code:`proposals`: a list of the desired proposals. The allowed values are -2. :code:`nact`: the number of auto-correlation times to use before accepting a point. + * :code:`diff`: `ter Braak + (2006) <https://doi.org/10.1007/s11222-006-8769-1>`_ + differential evolution. This is the default for :code:`bound="live"` and + :code:`bound="live-multi"`. -In general, poor convergence can be resolved by increasing :code:`nact`. For GW -events, we find a value of 10 is typically okay. You can revert to the -original dynesty implementation by specifying :code:`sample="rwalk_dynesty"`. + * :code:`volumetric`: sample from an ellipsoid centered on the current point. + This is the proposal distribution implemented in :code:`dynesty` and the + default for all other :code:`bound` options. + +Finally, we implement two custom :code:`dynesty.sampler.Sampler` classes to +facilitate the differential evolution proposal and average acceptance tracking. + +#. :code:`bound="live"` uses the :code:`LivePointSampler` which adapts the + :code:`walks` to average :code:`naccept` accepted steps when in + :code:`acceptance-walk` mode and passes the current live points to the + sample method. + +#. :code:`bound="live-multi"` combines the functionality of :code:`"live"` with + the :code:`dynesty` implemented :code:`multi` method for multi-ellipsoid + :code:`volumetric` sampling. This method is intended when using both the + :code:`diff` and :code:`volumetric` proposals. Understanding the output ------------------------ @@ -73,21 +92,20 @@ Before sampling begins, you will see a message like this .. code-block:: console - 10:42 bilby INFO : Single likelihood evaluation took 2.977e-03 s - 10:42 bilby INFO : Using sampler Dynesty with kwargs {'bound': 'multi', 'sample': 'rwalk', 'verbose': True, 'periodic': None, 'reflective': None, 'check_point_delta_t': 600, 'nlive': 1000, 'first_update': {'min_eff': 20}, 'walks': 10, 'npdim': None, 'rstate': None, 'queue_size': None, 'pool': None, 'use_pool': None, 'live_points': None, 'logl_args': None, 'logl_kwargs': None, 'ptform_args': None, 'ptform_kwargs': None, 'enlarge': 1.25, 'bootstrap': None, 'vol_dec': 0.5, 'vol_check': 2.0, 'facc': 0.2, 'slices': 5, 'update_interval': 600, 'print_func': <bound method Dynesty._print_func of <bilby.core.sampler.dynesty.Dynesty object at 0x7f29e1c47e10>>, 'dlogz': 0.1, 'maxiter': None, 'maxcall': None, 'logl_max': inf, 'add_live': True, 'print_progress': True, 'save_bounds': False, 'n_effective': None, 'maxmcmc': 2000, 'nact': 10, 'jacobian': <function jacobian at 0x7f29ba0411e0>} - 10:42 bilby INFO : Checkpoint every n_check_point = 200000 - 10:42 bilby INFO : Using dynesty version 1.0.0 - 10:42 bilby INFO : Using the bilby-implemented rwalk sample method with ACT estimated walks - + 14:06 bilby INFO : Single likelihood evaluation took 3.273e-03 s + 14:06 bilby INFO : Using sampler Dynesty with kwargs {'nlive': 500, 'bound': 'live', 'sample': 'act-walk', 'periodic': None, 'reflective': None, 'update_interval': 600, 'first_update': None, 'npdim': None, 'rstate': None, 'queue_size': 1, 'pool': None, 'use_pool': None, 'live_points': None, 'logl_args': None, 'logl_kwargs': None, 'ptform_args': None, 'ptform_kwargs': None, 'gradient': None, 'grad_args': None, 'grad_kwargs': None, 'compute_jac': False, 'enlarge': None, 'bootstrap': None, 'walks': 100, 'facc': 0.2, 'slices': None, 'fmove': 0.9, 'max_move': 100, 'update_func': None, 'ncdim': None, 'blob': False, 'save_history': False, 'history_filename': None, 'maxiter': None, 'maxcall': None, 'dlogz': 0.1, 'logl_max': inf, 'n_effective': None, 'add_live': True, 'print_progress': True, 'print_func': <bound method Dynesty._print_func of <bilby.core.sampler.dynesty.Dynesty object at 0x16a3be860>>, 'save_bounds': False, 'checkpoint_file': None, 'checkpoint_every': 60, 'resume': False} + 14:06 bilby INFO : Checkpoint every check_point_delta_t = 600s + 14:06 bilby INFO : Using dynesty version 2.1.0 + 14:06 bilby INFO : Using the bilby-implemented rwalk sampling tracking the autocorrelation function and thinning by 2 with maximum length 250000 This tells you that a typical likelihood evaluation takes a few milliseconds. You can use this to gauge how long the run might take: if a typical likelihood evaluation takes more than a fraction of a second, it is unlikely your run will complete in a reasonable amount of time using serial bilby. After this, is a list of all the kwargs passed in to dynesty. Note, where a :code:`None` is given, -dynesty will fill in its own defaults. Then, we get a message about how often -checkpointing will be done, the version of dynesty, and which sample method -will be used. +dynesty will fill in its own defaults. The Bilby specific arguments are printed +on their onw. Then, we get a message about how often checkpointing will be done, +the version of dynesty, and which sample method will be used. During the sampling, dynesty writes an update of its progress to the terminal (specifally, this is writtent to STDOUT). Here is an example: @@ -110,6 +128,40 @@ unnormalised evidence :code:`logz`. The :code:`logz-ratio` and :code:`dlogz` gives an estimate of the final expected evidence. You can compare this to your expectations to help diagnose -problems before completing a run. However, be aware the `dlogz` does not +problems before completing a run. However, be aware the :code:`dlogz` does not monotonically decrease: if a region of large likelihood is subsequently found, the :code:`dlogz` will increase. + +Diagnostic plots +---------------- + +At each checkpoint, we make a number of plots. Three of these are produced +by :code:`dynesty` and users should consult the +`relevant documentation <https://dynesty.readthedocs.io/en/stable/quickstart.html>`_ +for those. (We note that we produce a checkpoint trace with the unit hypercube +samples in addition to the default :code:`dynesty` plots.) + +Finally, we produce a :code:`"stats"` plot as shown below. + +.. image:: images/stats-plot.png + +The panels here show: + +* The number of likelihood evaluations per iteration. The sampling here used + the :code:`act-walk` method and so the flat portions of the curve correspond + to points that come from the same MCMC exploration. We can clearly see that + in this case, the sampling became much more efficient after 4000 iterations, + most likely due to the allowed prior region becoming unimodal at this point. + +* The number of accepted steps per MCMC chain per iteration. Before the MCMC + evolution begins, this number is 1 and after the sampling stops, the final + value is repeated in the plot. + +* The number of iterations between a live point being chosen and removed + from the ensemble of live points for the point removed at each iteration. + This value reaches a stationary distribution after some initial warm-up + (shown in grey) and before the final live points are removed (shown in red.) + +* A histogram of the lifetimes of the blue colored points in the third panel. + The red curve shows the expected distribution and the p value compares the + lifetimes with the expected distribution. diff --git a/docs/images/stats-plot.png b/docs/images/stats-plot.png new file mode 100644 index 0000000000000000000000000000000000000000..ec897f4b1503bced92a304f1f60488389d33b5fd Binary files /dev/null and b/docs/images/stats-plot.png differ diff --git a/examples/core_examples/hyper_parameter_example.py b/examples/core_examples/hyper_parameter_example.py index 288e9c21b07aa254da6725f57d95b7e3eb05fa99..bda92e59aaf5cc531f1e0cc5110474022c34d5a7 100644 --- a/examples/core_examples/hyper_parameter_example.py +++ b/examples/core_examples/hyper_parameter_example.py @@ -103,8 +103,6 @@ result = run_sampler( likelihood=hp_likelihood, priors=hp_priors, sampler="dynesty", - walks=10, - nact=3, nlive=1000, use_ratio=False, outdir=outdir, diff --git a/examples/tutorials/compare_samplers.ipynb b/examples/tutorials/compare_samplers.ipynb index ad214ebb8c0e9a7321b809fe7b1cd983c79ecdfe..07856a2a7f48cddaa3147a81c8cefd9d5a503efe 100644 --- a/examples/tutorials/compare_samplers.ipynb +++ b/examples/tutorials/compare_samplers.ipynb @@ -147,11 +147,7 @@ " ntemps=10,\n", " printdt=10,\n", " ),\n", - " dynesty=dict(\n", - " npoints=500,\n", - " walks=5,\n", - " nact=3,\n", - " ),\n", + " dynesty=dict(npoints=500, sample=\"acceptance-walk\", naccept=20),\n", " pymultinest=dict(nlive=500),\n", " nestle=dict(nlive=500),\n", " emcee=dict(nwalkers=20, iterations=500),\n", diff --git a/requirements.txt b/requirements.txt index f199d07f19bf96a5dde54b8c5d73657ff8ae4c5b..c668c9d4a5b12fe7b1459dc3e575b48233307446 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,5 +1,5 @@ bilby.cython>=0.3.0 -dynesty>=2 +dynesty>=2.0.1 emcee corner numpy diff --git a/test/core/sampler/dynamic_dynesty_test.py b/test/core/sampler/dynamic_dynesty_test.py index f837644157f7463a8ae5bbbe408384c280d57aeb..f5119affcf63a1d6d23de69a6325e63c487bc8c4 100644 --- a/test/core/sampler/dynamic_dynesty_test.py +++ b/test/core/sampler/dynamic_dynesty_test.py @@ -27,7 +27,13 @@ class TestDynamicDynesty(unittest.TestCase): def test_default_kwargs(self): """Only test the kwargs where we specify different defaults to dynesty""" - expected = dict(sample="rwalk", facc=0.2, save_bounds=False) + expected = dict( + sample="act-walk", + bound="live", + facc=0.2, + save_bounds=False, + update_interval=600, + ) for key in expected: self.assertEqual(expected[key], self.sampler.kwargs[key]) diff --git a/test/core/sampler/dynesty_test.py b/test/core/sampler/dynesty_test.py index c78f1a4fc6af4af32f2f10f631da3a23d271160c..1e84cb67ccf46b598779f5596f6b65a22f050c69 100644 --- a/test/core/sampler/dynesty_test.py +++ b/test/core/sampler/dynesty_test.py @@ -2,7 +2,24 @@ import unittest from copy import deepcopy from unittest.mock import MagicMock +from attr import define import bilby +import numpy as np +import parameterized +from bilby.core.sampler import dynesty_utils +from scipy.stats import gamma, ks_1samp, uniform, powerlaw + + +@define +class Dummy: + u: np.ndarray + axes: np.ndarray + scale: float = 1 + rseed: float = 1234 + kwargs: dict = dict(walks=500, live=np.zeros((2, 4)), periodic=None, reflective=None) + prior_transform: callable = lambda x: x + loglikelihood: callable = lambda x: 0 + loglstar: float = -1 class TestDynesty(unittest.TestCase): @@ -28,7 +45,14 @@ class TestDynesty(unittest.TestCase): def test_default_kwargs(self): """Only test the kwargs where we specify different defaults to dynesty""" - expected = dict(sample="rwalk", facc=0.2, save_bounds=False, dlogz=0.1) + expected = dict( + sample="act-walk", + facc=0.2, + save_bounds=False, + dlogz=0.1, + bound="live", + update_interval=600, + ) for key in expected: self.assertEqual(expected[key], self.sampler.kwargs[key]) @@ -60,5 +84,167 @@ class TestDynesty(unittest.TestCase): self.assertEqual([1, 3], self.sampler.kwargs["reflective"]) +class ProposalsTest(unittest.TestCase): + + def test_boundaries(self): + inputs = np.array([0.1, 1.1, -1.3]) + expected = np.array([0.1, 0.1, 0.7]) + periodic = [1] + reflective = [2] + self.assertLess(max(abs( + dynesty_utils.apply_boundaries_(inputs, periodic, reflective) - expected + )), 1e-10) + + def test_boundaries_returns_none_outside_bound(self): + inputs = np.array([0.1, 1.1, -1.3]) + self.assertIsNone(dynesty_utils.apply_boundaries_(inputs, None, None)) + + def test_propose_volumetric(self): + proposal_func = dynesty_utils.proposal_funcs["volumetric"] + rng = np.random.default_rng(12345) + axes = np.array([[2, 0], [0, 2]]) + start = np.zeros(4) + new_samples = list() + for _ in range(1000): + new_samples.append(proposal_func(start, axes, 1, 4, 2, rng)) + new_samples = np.array(new_samples) + self.assertGreater(ks_1samp(new_samples[:, 2:].flatten(), uniform(0, 1).cdf).pvalue, 0.01) + self.assertGreater(ks_1samp(np.linalg.norm(new_samples[:, :2], axis=-1), powerlaw(2, scale=2).cdf).pvalue, 0.01) + + def test_propose_differential_evolution_mode_hopping(self): + proposal_func = dynesty_utils.proposal_funcs["diff"] + rng = np.random.default_rng(12345) + live = np.array([[1, 1], [0, 0]]) + start = np.zeros(4) + new_samples = list() + for _ in range(1000): + new_samples.append(proposal_func(start, live, 4, 2, rng, mix=0)) + new_samples = np.array(new_samples) + self.assertGreater(ks_1samp(new_samples[:, 2:].flatten(), uniform(0, 1).cdf).pvalue, 0.01) + self.assertLess(np.max(abs(new_samples[:, :2]) - np.array([1, 1])), 1e-10) + + @parameterized.parameterized.expand(((1,), (None,), (5,))) + def test_propose_differential_evolution(self, scale): + proposal_func = dynesty_utils.proposal_funcs["diff"] + rng = np.random.default_rng(12345) + live = np.array([[1, 1], [0, 0]]) + start = np.zeros(4) + new_samples = list() + for _ in range(1000): + new_samples.append(proposal_func(start, live, 4, 2, rng, mix=1, scale=scale)) + new_samples = np.array(new_samples) + if scale is None: + scale = 1.17 + self.assertGreater(ks_1samp(new_samples[:, 2:].flatten(), uniform(0, 1).cdf).pvalue, 0.01) + self.assertGreater(ks_1samp(np.abs(new_samples[:, :2].flatten()), gamma(4, scale=scale / 4).cdf).pvalue, 0.01) + + def test_get_proposal_kwargs_diff(self): + args = Dummy(u=-np.ones(4), axes=np.zeros((2, 2)), scale=4) + dynesty_utils._SamplingContainer.proposals = ["diff"] + proposals, common, specific = dynesty_utils._get_proposal_kwargs(args) + del common["rstate"] + self.assertTrue(np.array_equal(proposals, np.array(["diff"] * args.kwargs["walks"]))) + self.assertDictEqual(common, dict(n=len(args.u), n_cluster=len(args.axes))) + assert np.array_equal(args.kwargs["live"][:1, :2], specific["diff"]["live"]) + del specific["diff"]["live"] + self.assertDictEqual(specific, dict(diff=dict(mix=0.5, scale=1.19))) + + def test_get_proposal_kwargs_volumetric(self): + args = Dummy(u=-np.ones(4), axes=np.zeros((2, 2)), scale=4) + dynesty_utils._SamplingContainer.proposals = ["volumetric"] + proposals, common, specific = dynesty_utils._get_proposal_kwargs(args) + del common["rstate"] + self.assertTrue(np.array_equal(proposals, np.array(["volumetric"] * args.kwargs["walks"]))) + self.assertDictEqual(common, dict(n=len(args.u), n_cluster=len(args.axes))) + self.assertDictEqual(specific, dict(volumetric=dict(axes=args.axes, scale=args.scale))) + + def test_proposal_functions_run(self): + args = Dummy(u=np.ones(4) / 2, axes=np.ones((2, 2))) + args.kwargs["live"][0] += 1 + for proposals in [ + ["diff"], + ["volumetric"], + ["diff", "volumetric"], + {"diff": 5, "volumetric": 1}, + ]: + dynesty_utils._SamplingContainer.proposals = proposals + dynesty_utils.FixedRWalk()(args) + dynesty_utils.AcceptanceTrackingRWalk()(args) + dynesty_utils.ACTTrackingRWalk()(args) + + +@parameterized.parameterized_class(("kind", ), [("live",), ("live-multi",)]) +class TestCustomSampler(unittest.TestCase): + def setUp(self): + if self.kind == "live": + cls = dynesty_utils.LivePointSampler + elif self.kind == "live-multi": + cls = dynesty_utils.MultiEllipsoidLivePointSampler + else: + raise ValueError(f"Unknown sampler class {self.kind}") + + self.sampler = cls( + loglikelihood=lambda x: 1, + prior_transform=lambda x: x, + npdim=4, + live_points=(np.zeros((1000, 4)), np.zeros((1000, 4)), np.zeros(1000)), + update_interval=None, + first_update=dict(), + queue_size=1, + pool=None, + use_pool=dict(), + ncdim=2, + method="rwalk", + rstate=np.random.default_rng(1234), + kwargs=dict(walks=100) + ) + self.blob = dict(accept=5, reject=35, scale=1) + + def tearDown(self): + dynesty_utils._SamplingContainer.proposals = None + + def test_update_with_update(self): + """ + If there is only one element left in the cache for ACT tracking we + need to add the flag to rebuild the cache. After rebuilding, this is + then reset to False. + """ + self.sampler.rebuild = False + self.blob["remaining"] = 1 + self.sampler.update_user(blob=self.blob, update=True) + self.assertEqual(self.sampler.kwargs["rebuild"], True) + self.blob["remaining"] = 2 + self.sampler.update_user(blob=self.blob, update=True) + self.assertEqual(self.sampler.kwargs["rebuild"], False) + + def test_diff_update(self): + dynesty_utils._SamplingContainer.proposals = ["diff"] + dynesty_utils._SamplingContainer.maxmcmc = 10000 + dynesty_utils._SamplingContainer.naccept = 10 + self.sampler.update_user(blob=self.blob, update=False) + self.assertEqual(self.sampler.scale, self.blob["accept"]) + self.assertEqual(self.sampler.walks, 101) + + +class TestEstimateNMCMC(unittest.TestCase): + def test_converges_to_correct_value(self): + """ + NMCMC convergence should convergence to + safety * (2 / accept_ratio - 1) + """ + sampler = dynesty_utils.AcceptanceTrackingRWalk() + for _ in range(10): + accept_ratio = np.random.uniform() + safety = np.random.randint(2, 8) + expected = safety * (2 / accept_ratio - 1) + for _ in range(1000): + estimated = sampler.estimate_nmcmc( + accept_ratio=accept_ratio, + safety=safety, + tau=1000, + ) + self.assertAlmostEqual(estimated, expected) + + if __name__ == "__main__": unittest.main() diff --git a/test/integration/sampler_run_test.py b/test/integration/sampler_run_test.py index f9304971da4b5d91bc3475a1b3280576f96ef525..adfd9f3e1149097ef684acd7f66e2a3054aef035 100644 --- a/test/integration/sampler_run_test.py +++ b/test/integration/sampler_run_test.py @@ -28,14 +28,14 @@ _sampler_kwargs = dict( num_particles=50, max_pool=1, ), - dynesty=dict(nlive=100), + dynesty=dict(nlive=10, sample="acceptance-walk", nact=5, proposals=["diff"]), dynamic_dynesty=dict( - nlive_init=100, - nlive_batch=100, + nlive_init=10, + nlive_batch=10, dlogz_init=1.0, maxbatch=0, maxcall=100, - bound="single", + sample="act-walk", ), emcee=dict(iterations=1000, nwalkers=10), kombine=dict(iterations=200, nwalkers=10, autoburnin=False), @@ -57,6 +57,7 @@ _sampler_kwargs = dict( pymultinest=dict(nlive=100), pypolychord=dict(nlive=100), ultranest=dict(nlive=100, temporary_directory=False), + zeus=dict(nwalkers=10, iterations=100) ) sampler_imports = dict(