From 22c294017212313769231e9cae8075921484c963 Mon Sep 17 00:00:00 2001 From: Gregory Ashton <gregory.ashton@ligo.org> Date: Sun, 1 Dec 2019 19:01:38 -0600 Subject: [PATCH] Update ACT method and other changes - Use CPNest ACT estimation - Change scaling - Add chains starting after accept > 0 --- bilby/core/sampler/dynesty.py | 246 ++++++++++++++++++++++++---------- docs/dynesty-guide.txt | 121 +++++++++++++++++ docs/index.txt | 1 + docs/requirements.txt | 1 + requirements.txt | 1 + setup.py | 1 + test/sampler_test.py | 14 +- 7 files changed, 310 insertions(+), 75 deletions(-) create mode 100644 docs/dynesty-guide.txt diff --git a/bilby/core/sampler/dynesty.py b/bilby/core/sampler/dynesty.py index 982f0cba0..cedc5aca6 100644 --- a/bilby/core/sampler/dynesty.py +++ b/bilby/core/sampler/dynesty.py @@ -83,17 +83,18 @@ class Dynesty(NestedSampler): default_kwargs = dict(bound='multi', sample='rwalk', verbose=True, periodic=None, reflective=None, check_point_delta_t=600, nlive=1000, - first_update=None, walks=None, + first_update=None, walks=100, npdim=None, rstate=None, queue_size=None, pool=None, use_pool=None, live_points=None, logl_args=None, logl_kwargs=None, ptform_args=None, ptform_kwargs=None, - enlarge=None, bootstrap=None, vol_dec=0.5, vol_check=2.0, - facc=0.5, slices=5, + enlarge=1.5, bootstrap=None, vol_dec=0.5, vol_check=8.0, + facc=0.2, slices=5, update_interval=None, print_func=None, dlogz=0.1, maxiter=None, maxcall=None, logl_max=np.inf, add_live=True, print_progress=True, - save_bounds=False, n_effective=None) + save_bounds=False, n_effective=None, + maxmcmc=5000, nact=5) def __init__(self, likelihood, priors, outdir='outdir', label='label', use_ratio=False, plot=False, skip_import_verification=False, @@ -198,6 +199,7 @@ class Dynesty(NestedSampler): # Constructing output. string = [] string.append("bound:{:d}".format(bounditer)) + string.append("nc:{:d}".format(nc)) string.append("ncall:{:d}".format(ncall)) string.append("eff:{:0.1f}%".format(eff)) string.append("{}={:0.2f}+/-{:0.2f}".format(key, logz, logzerr)) @@ -231,8 +233,23 @@ class Dynesty(NestedSampler): self.get_initial_points_from_prior( self.kwargs['nlive'])) - dynesty.dynesty._SAMPLING["rwalk"] = sample_rwalk_bilby - dynesty.nestedsamplers._SAMPLING["rwalk"] = sample_rwalk_bilby + if self.kwargs.get("sample", "rwalk") == "rwalk": + logger.info( + "Using the bilby-implemented rwalk sample method with ACT estimated walks") + dynesty.dynesty._SAMPLING["rwalk"] = sample_rwalk_bilby + dynesty.nestedsamplers._SAMPLING["rwalk"] = sample_rwalk_bilby + if self.kwargs.get("walks", 25) > self.kwargs.get("maxmcmc"): + raise DynestySetupError("You have maxmcmc > walks (minimum mcmc)") + if self.kwargs.get("nact", 5) < 1: + raise DynestySetupError("Unable to run with nact < 1") + elif self.kwargs.get("sample") == "rwalk_dynesty": + self._kwargs["sample"] = "rwalk" + logger.info( + "Using the dynesty-implemented rwalk sample method") + elif self.kwargs.get("sample") == "rstagger_dynesty": + self._kwargs["sample"] = "rstagger" + logger.info( + "Using the dynesty-implemented rstagger sample method") self.sampler = dynesty.NestedSampler( loglikelihood=self.log_likelihood, @@ -524,10 +541,7 @@ class Dynesty(NestedSampler): def sample_rwalk_bilby(args): - """ - Modified version of dynesty.sampling.sample_rwalk - - """ + """ Modified bilby-implemented version of dynesty.sampling.sample_rwalk """ # Unzipping. (u, loglstar, axes, scale, @@ -541,66 +555,76 @@ def sample_rwalk_bilby(args): # Setup. n = len(u) - walks = kwargs.get('walks', 25) # number of steps + walks = kwargs.get('walks', 25) # minimum number of steps + maxmcmc = kwargs.get('maxmcmc', 2000) # Maximum number of steps + nact = kwargs.get('nact', 5) # Number of ACT + + # Initialize internal variables accept = 0 reject = 0 - fail = 0 nfail = 0 - nc = 0 - ncall = 0 + act = np.inf + u_list = [u] + v_list = [prior_transform(u)] + logl_list = [loglikelihood(v_list[-1])] + max_walk_warning = True drhat, dr, du, u_prop, logl_prop = np.nan, np.nan, np.nan, np.nan, np.nan - while nc + nfail < walks or accept == 0: - while True: - - # Check scale-factor. - if scale == 0.: - raise RuntimeError("The random walk sampling is stuck! " - "Some useful output quantities:\n" - "u: {0}\n" - "drhat: {1}\n" - "dr: {2}\n" - "du: {3}\n" - "u_prop: {4}\n" - "loglstar: {5}\n" - "logl_prop: {6}\n" - "axes: {7}\n" - "scale: {8}." - .format(u, drhat, dr, du, u_prop, - loglstar, logl_prop, axes, scale)) - - # Propose a direction on the unit n-sphere. - drhat = rstate.randn(n) - drhat /= linalg.norm(drhat) - - # Scale based on dimensionality. - dr = drhat * rstate.rand()**(1. / n) - - # Transform to proposal distribution. - du = np.dot(axes, dr) - u_prop = u + scale * du - - # Wrap periodic parameters - if periodic is not None: - u_prop[periodic] = np.mod(u_prop[periodic], 1) - # Reflect - if reflective is not None: - u_prop[reflective] = reflect(u_prop[reflective]) - - # Check unit cube constraints. - if unitcheck(u_prop, nonbounded): - break - else: - fail += 1 - nfail += 1 - - # Check if we're stuck generating bad numbers. - if fail > 100 * walks: - warnings.warn("Random number generation appears to be " - "extremely inefficient. Adjusting the " - "scale-factor accordingly.") - fail = 0 - scale *= math.exp(-1. / n) + while len(u_list) < nact * act: + + if scale == 0.: + raise RuntimeError("The random walk sampling is stuck! " + "Some useful output quantities:\n" + "u: {0}\n" + "drhat: {1}\n" + "dr: {2}\n" + "du: {3}\n" + "u_prop: {4}\n" + "loglstar: {5}\n" + "logl_prop: {6}\n" + "axes: {7}\n" + "scale: {8}." + .format(u, drhat, dr, du, u_prop, + loglstar, logl_prop, axes, scale)) + + # Propose a direction on the unit n-sphere. + drhat = rstate.randn(n) + drhat /= linalg.norm(drhat) + + # Scale based on dimensionality. + # dr = drhat * rstate.rand()**(1. / n) # CHANGED FROM DYNESTY 1.0 + dr = drhat * rstate.rand(n) + + # Transform to proposal distribution. + du = np.dot(axes, dr) + u_prop = u + scale * du + + # Wrap periodic parameters + if periodic is not None: + u_prop[periodic] = np.mod(u_prop[periodic], 1) + # Reflect + if reflective is not None: + u_prop[reflective] = reflect(u_prop[reflective]) + + # Check unit cube constraints. + if unitcheck(u_prop, nonbounded): + pass + else: + nfail += 1 + # Only start appending to the chain once a single jump is made + if accept > 0: + u_list.append(u_list[-1]) + v_list.append(v_list[-1]) + logl_list.append(logl_list[-1]) + continue + + # Check if we're stuck generating bad numbers. + if nfail > 100 * walks: + warnings.warn("Random number generation appears to be " + "extremely inefficient. Adjusting the " + "scale-factor accordingly.") + nfail = 0 + scale *= math.exp(-1. / n) # Check proposed point. v_prop = prior_transform(np.array(u_prop)) @@ -610,19 +634,103 @@ def sample_rwalk_bilby(args): v = v_prop logl = logl_prop accept += 1 + u_list.append(u) + v_list.append(v) + logl_list.append(logl) else: reject += 1 - nc += 1 - ncall += 1 + # Only start appending to the chain once a single jump is made + if accept > 0: + u_list.append(u_list[-1]) + v_list.append(v_list[-1]) + logl_list.append(logl_list[-1]) + + # If we've taken the minimum number of steps, calculate the ACT + if accept + reject > walks: + act = estimate_nmcmc( + accept / (accept + reject + nfail), walks, maxmcmc) + + # If we've taken too many likelihood evaluations then break + if accept + reject > maxmcmc and accept > 0: + if max_walk_warning: + warnings.warn( + "Hit maximum number of walks {} with accept={}, reject={}, " + "and nfail={} try increasing maxmcmc" + .format(maxmcmc, accept, reject, nfail)) + max_walk_warning = False + if accept > 0: + # Break if we are above maxmcmc and have at least one accepted point + break # Check if we're stuck generating bad points. - if nc > 50 * walks: + if accept + reject > 50 * walks: scale *= math.exp(-1. / n) warnings.warn("Random walk proposals appear to be " "extremely inefficient. Adjusting the " "scale-factor accordingly.") - nc, accept, reject = 0, 0, 0 # reset values + + # If the act is finite, pick randomly from within the chain + if np.isfinite(act) and act < len(u_list): + idx = np.random.randint(act, len(u_list)) + u = u_list[idx] + v = v_list[idx] + logl = logl_list[idx] + elif len(u_list) == 1: + logger.warning("Returning the only point in the chain") + u = u_list[-1] + v = v_list[-1] + logl = logl_list[-1] + else: + idx = np.random.randint(int(len(u_list) / 2), len(u_list)) + logger.warning("Returning random point in second half of the chain") + u = u_list[idx] + v = v_list[idx] + logl = logl_list[idx] blob = {'accept': accept, 'reject': reject, 'fail': nfail, 'scale': scale} + ncall = accept + reject return u, v, logl, ncall, blob + + +def estimate_nmcmc(accept_ratio, minmcmc, maxmcmc, safety=5, tau=None): + """ Estimate autocorrelation length of chain using acceptance fraction + + Using ACL = (2/acc) - 1 multiplied by a safety margin. Code adapated from + CPNest: + - https://github.com/johnveitch/cpnest/blob/master/cpnest/sampler.py + - http://github.com/farr/Ensemble.jl + + Parameters + ---------- + accept_ratio: float [0, 1] + Ratio of the number of accepted points to the total number of points + minmcmc: int + The minimum length of the MCMC chain to use + maxmcmc: int + The maximum length of the MCMC chain to use + safety: int + A safety factor applied in the calculation + tau: int (optional) + The ACT, if given, otherwise estimated. + + """ + if tau is None: + tau = maxmcmc / safety + + if accept_ratio == 0.0: + Nmcmc_exact = (1. + 1. / tau) * minmcmc + else: + Nmcmc_exact = ( + (1. - 1. / tau) * minmcmc + + (safety / tau) * (2. / accept_ratio - 1.) + ) + + Nmcmc_exact = float(min(Nmcmc_exact, maxmcmc)) + Nmcmc = max(safety, int(Nmcmc_exact)) + + return Nmcmc + + +class DynestySetupError(Exception): + pass diff --git a/docs/dynesty-guide.txt b/docs/dynesty-guide.txt new file mode 100644 index 000000000..fb74c7c77 --- /dev/null +++ b/docs/dynesty-guide.txt @@ -0,0 +1,121 @@ +.. _dynesty-guide: + +============= +Dynesty Guide +============= + +The Dynesty sampler is just one of the samplers available in bilby, but it is +well-used and found to be fast and accurate. Here, we provide a short guide to +its implementation. This will not be a complete guide, additional help can be +found in the `Dynesty documentation <https://dynesty.readthedocs.io/en/latest/>`_. + +All of the options discussed herein can be set in the :code:`bilby.run_sampler()` +call. For example, to set the number of live points to 1000 + +.. code-block:: python + + >>> bilby.run_sampler(likelihood, priors, sampler="dynesty", nlive=1000) + +.. note:: + Bilby checks the kwargs input through run_sampler. If you miss-spell a word, + you will see a warning "Supplied argument X not an argument of dynesty, removing." + +Overview of dynesty +------------------- + +Like many nested samplers, dynesty operates on the unit cube with a +prior_tranform function to transform a point in the unit cube to the target +space. When it begins sampling, it draws samples from the unit cube (uniform +sampling) until reaching the :code:`first_update` criteria. As of v1.0.0, this +defaults to + +.. code-block:: python + + first_update = dict(min_ncall=2*nlive, min_eff=10) + +That is, the first update happens when either it has called the likelihood +function twice as many times as there are live points or the sampling +efficiency hits 10%. You can change any of these by passing +:code:`first_update` as a dictionary to :code:`run_sampler`. + +Once the first update occurs, dynesty switches to the bounding method given by +the keyword :code:`bound`. After this, new points are proposed by taking an +existing live point and sampling using the `sample` keyword. This continues +until one of the stopping criteria are reached: + +1. The estimated remaining evidence is below the kwarg :code:`dlogz` (default is 0.1) +2. The effective number of samples exceeds the kwarg :code:`n_effective` (default is 5000) + +Bilby-specific implementation details +------------------------------------- + +In Bilby, we have re-implemented the :code:`sample="rwalk"` sample method. In +dynesty, this method took an argument :code:`walks` which was the fixed number +of walks to take each time a new point was proposed. In the bilby implementation, +we still take an argument :code:`walks` which has the new meaning: the minimum +number of walks to take (this ensures backward compatibility). Meanwhile, we +add two new arguments +1. :code:`maxmcmc`: the maximum number of walks to use. This naming is chosen for consistency with other codes. Default is 5000. If this limit is reached, a warning will be printed during sampling. +2. :code:`nact`: the number of auto-correlation times to use before accepting a point. Default is 5. + +The autocorrelation time calculation uses the `emcee +<https://emcee.readthedocs.io/en/stable/>`_ autocorr methods. For a detailed +discussion on the topics, see `this post by Dan Foreman-Mackey +<https://dfm.io/posts/autocorr/>`_. + +You can revert to the original dynesty implementation by specifying +:code:`sample="rwalk_dynesty"`. + +In addition, we also set several kwargs by default + +1. :code:`nwalk` (alias of :code:`nlive`), the number of live poinst. This defaults to 1000. +2. :code:`facc`: The target acceptance fraction for the 'rwalk'. In dynesty, this defaults 0.5, but in testing we found that a value of 0.2 produced better behaviour at boundaries. + +Understanding the output +------------------------ + +Before sampling begins, you will see a message like this + + +.. code-block:: console + + 10:42 bilby INFO : Single likelihood evaluation took 2.977e-03 s + 10:42 bilby INFO : Using sampler Dynesty with kwargs {'bound': 'multi', 'sample': 'rwalk', 'verbose': True, 'periodic': None, 'reflective': None, 'check_point_delta_t': 600, 'nlive': 1000, 'first_update': {'min_eff': 20}, 'walks': 10, 'npdim': None, 'rstate': None, 'queue_size': None, 'pool': None, 'use_pool': None, 'live_points': None, 'logl_args': None, 'logl_kwargs': None, 'ptform_args': None, 'ptform_kwargs': None, 'enlarge': 1.25, 'bootstrap': None, 'vol_dec': 0.5, 'vol_check': 2.0, 'facc': 0.2, 'slices': 5, 'update_interval': 600, 'print_func': <bound method Dynesty._print_func of <bilby.core.sampler.dynesty.Dynesty object at 0x7f29e1c47e10>>, 'dlogz': 0.1, 'maxiter': None, 'maxcall': None, 'logl_max': inf, 'add_live': True, 'print_progress': True, 'save_bounds': False, 'n_effective': None, 'maxmcmc': 2000, 'nact': 10, 'jacobian': <function jacobian at 0x7f29ba0411e0>} + 10:42 bilby INFO : Checkpoint every n_check_point = 200000 + 10:42 bilby INFO : Using dynesty version 1.0.0 + 10:42 bilby INFO : Using the bilby-implemented rwalk sample method with ACT estimated walks + + +This tells you that a typical likelihood evaluation takes a few milliseconds. +You can use this to gauge how long the run might take: if a typical likelihood +evaluation takes more than a fraction of a second, it is unlikely your run will +complete in a reasonable amount of time using serial bilby. After this, is a list +of all the kwargs passed in to dynesty. Note, where a :code:`None` is given, +dynesty will fill in its own defaults. Then, we get a message about how often +checkpointing will be done, the version of dynesty, and which sample method +will be used. + +During the sampling, dynesty writes an update of its progress to the terminal +(specifally, this is writtent to STDOUT). Here is an example: + + +.. code-block:: console + + 1015it [00:08, 138.49it/s, bound:0 nc:2 ncall:2714 eff:37.4% logz-ratio=-67.89+/-0.09 dlogz:181.166>0.10] + +From left to right, this gives the number of iterations, the sampling time, +the iterations per second, the bound (while :code:`bound=0` dynesty samples +from the unit cube), the number of likelihood calls per sample :code:`nc`, the +total number of likelihood calls :code:`ncall`, the sampling efficiency, the +current estimate of the logz-ratio (monotonically increases) and the estimated +remaining log evidence. + +If the likelihood calculates the :code:`log_noise_evidence`, then this output +will give the :code:`logz-ratio`. If it doesn't it instead uses just the +unnormalised evidence :code:`logz`. + +The :code:`logz-ratio` and :code:`dlogz` gives an estimate of the final +expected evidence. You can compare this to your expectations to help diagnose +problems before completing a run. However, be aware the `dlogz` does not +monotonically decrease: if a region of large likelihood is subsequently found, +the :code:`dlogz` will increase. diff --git a/docs/index.txt b/docs/index.txt index e4d2ce7ed..eb0cb84ac 100644 --- a/docs/index.txt +++ b/docs/index.txt @@ -15,6 +15,7 @@ Welcome to bilby's documentation! prior likelihood samplers + dynesty-guide bilby-output compact-binary-coalescence-parameter-estimation transient-gw-data diff --git a/docs/requirements.txt b/docs/requirements.txt index 706fb75a9..01869f484 100644 --- a/docs/requirements.txt +++ b/docs/requirements.txt @@ -1,5 +1,6 @@ html5lib==1.0b8 # Hack to ensure documentation generated sphinx +sphinx_tabs numpydoc nbsphinx autodoc diff --git a/requirements.txt b/requirements.txt index 849fd3f93..bf8b8babc 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,5 +1,6 @@ future dynesty +emcee corner numpy>=1.9 matplotlib>=2.0 diff --git a/setup.py b/setup.py index beca2119f..4ac99cd23 100644 --- a/setup.py +++ b/setup.py @@ -80,6 +80,7 @@ setup(name='bilby', install_requires=[ 'future', 'dynesty>=1.0.0', + 'emcee', 'corner', 'dill', 'numpy>=1.9', diff --git a/test/sampler_test.py b/test/sampler_test.py index 63237d1c1..79a1a09d7 100644 --- a/test/sampler_test.py +++ b/test/sampler_test.py @@ -152,10 +152,11 @@ class TestDynesty(unittest.TestCase): npdim=None, rstate=None, queue_size=None, pool=None, use_pool=None, live_points=None, logl_args=None, logl_kwargs=None, ptform_args=None, ptform_kwargs=None, - enlarge=None, bootstrap=None, vol_dec=0.5, vol_check=2.0, - facc=0.5, slices=5, dlogz=0.1, maxiter=None, maxcall=None, + enlarge=1.5, bootstrap=None, vol_dec=0.5, vol_check=8.0, + facc=0.2, slices=5, dlogz=0.1, maxiter=None, maxcall=None, logl_max=np.inf, add_live=True, print_progress=True, save_bounds=False, - walks=20, update_interval=600, print_func='func', n_effective=None) + walks=100, update_interval=600, print_func='func', n_effective=None, + maxmcmc=5000, nact=5) self.sampler.kwargs['print_func'] = 'func' # set this manually as this is not testable otherwise # DictEqual can't handle lists so we check these separately self.assertEqual([], self.sampler.kwargs['periodic']) @@ -173,10 +174,11 @@ class TestDynesty(unittest.TestCase): npdim=None, rstate=None, queue_size=None, pool=None, use_pool=None, live_points=None, logl_args=None, logl_kwargs=None, ptform_args=None, ptform_kwargs=None, - enlarge=None, bootstrap=None, vol_dec=0.5, vol_check=2.0, - facc=0.5, slices=5, dlogz=0.1, maxiter=None, maxcall=None, + enlarge=1.5, bootstrap=None, vol_dec=0.5, vol_check=8.0, + facc=0.2, slices=5, dlogz=0.1, maxiter=None, maxcall=None, logl_max=np.inf, add_live=True, print_progress=True, save_bounds=False, - walks=20, update_interval=600, print_func='func', n_effective=None) + walks=100, update_interval=600, print_func='func', n_effective=None, + maxmcmc=5000, nact=5) for equiv in bilby.core.sampler.base_sampler.NestedSampler.npoints_equiv_kwargs: new_kwargs = self.sampler.kwargs.copy() -- GitLab