From 22c294017212313769231e9cae8075921484c963 Mon Sep 17 00:00:00 2001
From: Gregory Ashton <gregory.ashton@ligo.org>
Date: Sun, 1 Dec 2019 19:01:38 -0600
Subject: [PATCH] Update ACT method and other changes

- Use CPNest ACT estimation
- Change scaling
- Add chains starting after accept > 0
---
 bilby/core/sampler/dynesty.py | 246 ++++++++++++++++++++++++----------
 docs/dynesty-guide.txt        | 121 +++++++++++++++++
 docs/index.txt                |   1 +
 docs/requirements.txt         |   1 +
 requirements.txt              |   1 +
 setup.py                      |   1 +
 test/sampler_test.py          |  14 +-
 7 files changed, 310 insertions(+), 75 deletions(-)
 create mode 100644 docs/dynesty-guide.txt

diff --git a/bilby/core/sampler/dynesty.py b/bilby/core/sampler/dynesty.py
index 982f0cba0..cedc5aca6 100644
--- a/bilby/core/sampler/dynesty.py
+++ b/bilby/core/sampler/dynesty.py
@@ -83,17 +83,18 @@ class Dynesty(NestedSampler):
     default_kwargs = dict(bound='multi', sample='rwalk',
                           verbose=True, periodic=None, reflective=None,
                           check_point_delta_t=600, nlive=1000,
-                          first_update=None, walks=None,
+                          first_update=None, walks=100,
                           npdim=None, rstate=None, queue_size=None, pool=None,
                           use_pool=None, live_points=None,
                           logl_args=None, logl_kwargs=None,
                           ptform_args=None, ptform_kwargs=None,
-                          enlarge=None, bootstrap=None, vol_dec=0.5, vol_check=2.0,
-                          facc=0.5, slices=5,
+                          enlarge=1.5, bootstrap=None, vol_dec=0.5, vol_check=8.0,
+                          facc=0.2, slices=5,
                           update_interval=None, print_func=None,
                           dlogz=0.1, maxiter=None, maxcall=None,
                           logl_max=np.inf, add_live=True, print_progress=True,
-                          save_bounds=False, n_effective=None)
+                          save_bounds=False, n_effective=None,
+                          maxmcmc=5000, nact=5)
 
     def __init__(self, likelihood, priors, outdir='outdir', label='label',
                  use_ratio=False, plot=False, skip_import_verification=False,
@@ -198,6 +199,7 @@ class Dynesty(NestedSampler):
         # Constructing output.
         string = []
         string.append("bound:{:d}".format(bounditer))
+        string.append("nc:{:d}".format(nc))
         string.append("ncall:{:d}".format(ncall))
         string.append("eff:{:0.1f}%".format(eff))
         string.append("{}={:0.2f}+/-{:0.2f}".format(key, logz, logzerr))
@@ -231,8 +233,23 @@ class Dynesty(NestedSampler):
                 self.get_initial_points_from_prior(
                     self.kwargs['nlive']))
 
-        dynesty.dynesty._SAMPLING["rwalk"] = sample_rwalk_bilby
-        dynesty.nestedsamplers._SAMPLING["rwalk"] = sample_rwalk_bilby
+        if self.kwargs.get("sample", "rwalk") == "rwalk":
+            logger.info(
+                "Using the bilby-implemented rwalk sample method with ACT estimated walks")
+            dynesty.dynesty._SAMPLING["rwalk"] = sample_rwalk_bilby
+            dynesty.nestedsamplers._SAMPLING["rwalk"] = sample_rwalk_bilby
+            if self.kwargs.get("walks", 25) > self.kwargs.get("maxmcmc"):
+                raise DynestySetupError("You have maxmcmc > walks (minimum mcmc)")
+            if self.kwargs.get("nact", 5) < 1:
+                raise DynestySetupError("Unable to run with nact < 1")
+        elif self.kwargs.get("sample") == "rwalk_dynesty":
+            self._kwargs["sample"] = "rwalk"
+            logger.info(
+                "Using the dynesty-implemented rwalk sample method")
+        elif self.kwargs.get("sample") == "rstagger_dynesty":
+            self._kwargs["sample"] = "rstagger"
+            logger.info(
+                "Using the dynesty-implemented rstagger sample method")
 
         self.sampler = dynesty.NestedSampler(
             loglikelihood=self.log_likelihood,
@@ -524,10 +541,7 @@ class Dynesty(NestedSampler):
 
 
 def sample_rwalk_bilby(args):
-    """
-    Modified version of dynesty.sampling.sample_rwalk
-
-    """
+    """ Modified bilby-implemented version of dynesty.sampling.sample_rwalk """
 
     # Unzipping.
     (u, loglstar, axes, scale,
@@ -541,66 +555,76 @@ def sample_rwalk_bilby(args):
 
     # Setup.
     n = len(u)
-    walks = kwargs.get('walks', 25)  # number of steps
+    walks = kwargs.get('walks', 25)  # minimum number of steps
+    maxmcmc = kwargs.get('maxmcmc', 2000)  # Maximum number of steps
+    nact = kwargs.get('nact', 5)  # Number of ACT
+
+    # Initialize internal variables
     accept = 0
     reject = 0
-    fail = 0
     nfail = 0
-    nc = 0
-    ncall = 0
+    act = np.inf
+    u_list = [u]
+    v_list = [prior_transform(u)]
+    logl_list = [loglikelihood(v_list[-1])]
+    max_walk_warning = True
 
     drhat, dr, du, u_prop, logl_prop = np.nan, np.nan, np.nan, np.nan, np.nan
-    while nc + nfail < walks or accept == 0:
-        while True:
-
-            # Check scale-factor.
-            if scale == 0.:
-                raise RuntimeError("The random walk sampling is stuck! "
-                                   "Some useful output quantities:\n"
-                                   "u: {0}\n"
-                                   "drhat: {1}\n"
-                                   "dr: {2}\n"
-                                   "du: {3}\n"
-                                   "u_prop: {4}\n"
-                                   "loglstar: {5}\n"
-                                   "logl_prop: {6}\n"
-                                   "axes: {7}\n"
-                                   "scale: {8}."
-                                   .format(u, drhat, dr, du, u_prop,
-                                           loglstar, logl_prop, axes, scale))
-
-            # Propose a direction on the unit n-sphere.
-            drhat = rstate.randn(n)
-            drhat /= linalg.norm(drhat)
-
-            # Scale based on dimensionality.
-            dr = drhat * rstate.rand()**(1. / n)
-
-            # Transform to proposal distribution.
-            du = np.dot(axes, dr)
-            u_prop = u + scale * du
-
-            # Wrap periodic parameters
-            if periodic is not None:
-                u_prop[periodic] = np.mod(u_prop[periodic], 1)
-            # Reflect
-            if reflective is not None:
-                u_prop[reflective] = reflect(u_prop[reflective])
-
-            # Check unit cube constraints.
-            if unitcheck(u_prop, nonbounded):
-                break
-            else:
-                fail += 1
-                nfail += 1
-
-            # Check if we're stuck generating bad numbers.
-            if fail > 100 * walks:
-                warnings.warn("Random number generation appears to be "
-                              "extremely inefficient. Adjusting the "
-                              "scale-factor accordingly.")
-                fail = 0
-                scale *= math.exp(-1. / n)
+    while len(u_list) < nact * act:
+
+        if scale == 0.:
+            raise RuntimeError("The random walk sampling is stuck! "
+                               "Some useful output quantities:\n"
+                               "u: {0}\n"
+                               "drhat: {1}\n"
+                               "dr: {2}\n"
+                               "du: {3}\n"
+                               "u_prop: {4}\n"
+                               "loglstar: {5}\n"
+                               "logl_prop: {6}\n"
+                               "axes: {7}\n"
+                               "scale: {8}."
+                               .format(u, drhat, dr, du, u_prop,
+                                       loglstar, logl_prop, axes, scale))
+
+        # Propose a direction on the unit n-sphere.
+        drhat = rstate.randn(n)
+        drhat /= linalg.norm(drhat)
+
+        # Scale based on dimensionality.
+        # dr = drhat * rstate.rand()**(1. / n)  # CHANGED FROM DYNESTY 1.0
+        dr = drhat * rstate.rand(n)
+
+        # Transform to proposal distribution.
+        du = np.dot(axes, dr)
+        u_prop = u + scale * du
+
+        # Wrap periodic parameters
+        if periodic is not None:
+            u_prop[periodic] = np.mod(u_prop[periodic], 1)
+        # Reflect
+        if reflective is not None:
+            u_prop[reflective] = reflect(u_prop[reflective])
+
+        # Check unit cube constraints.
+        if unitcheck(u_prop, nonbounded):
+            pass
+        else:
+            nfail += 1
+            # Only start appending to the chain once a single jump is made
+            if accept > 0:
+                u_list.append(u_list[-1])
+                v_list.append(v_list[-1])
+                logl_list.append(logl_list[-1])
+            continue
+
+        # Check if we're stuck generating bad numbers.
+        if nfail > 100 * walks:
+            warnings.warn("Random number generation appears to be "
+                          "extremely inefficient. Adjusting the "
+                          "scale-factor accordingly.")
+            nfail = 0
+            scale *= math.exp(-1. / n)
 
         # Check proposed point.
         v_prop = prior_transform(np.array(u_prop))
@@ -610,19 +634,103 @@ def sample_rwalk_bilby(args):
             v = v_prop
             logl = logl_prop
             accept += 1
+            u_list.append(u)
+            v_list.append(v)
+            logl_list.append(logl)
         else:
             reject += 1
-        nc += 1
-        ncall += 1
+            # Only start appending to the chain once a single jump is made
+            if accept > 0:
+                u_list.append(u_list[-1])
+                v_list.append(v_list[-1])
+                logl_list.append(logl_list[-1])
+
+        # If we've taken the minimum number of steps, calculate the ACT
+        if accept + reject > walks:
+            act = estimate_nmcmc(
+                accept / (accept + reject + nfail), walks, maxmcmc)
+
+        # If we've taken too many likelihood evaluations then break
+        if accept + reject > maxmcmc and accept > 0:
+            if max_walk_warning:
+                warnings.warn(
+                    "Hit maximum number of walks {} with accept={}, reject={}, "
+                    "and nfail={} try increasing maxmcmc"
+                    .format(maxmcmc, accept, reject, nfail))
+                max_walk_warning = False
+            if accept > 0:
+                # Break if we are above maxmcmc and have at least one accepted point
+                break
 
         # Check if we're stuck generating bad points.
-        if nc > 50 * walks:
+        if accept + reject > 50 * walks:
             scale *= math.exp(-1. / n)
             warnings.warn("Random walk proposals appear to be "
                           "extremely inefficient. Adjusting the "
                           "scale-factor accordingly.")
-            nc, accept, reject = 0, 0, 0  # reset values
+
+    # If the act is finite, pick randomly from within the chain
+    if np.isfinite(act) and act < len(u_list):
+        idx = np.random.randint(act, len(u_list))
+        u = u_list[idx]
+        v = v_list[idx]
+        logl = logl_list[idx]
+    elif len(u_list) == 1:
+        logger.warning("Returning the only point in the chain")
+        u = u_list[-1]
+        v = v_list[-1]
+        logl = logl_list[-1]
+    else:
+        idx = np.random.randint(int(len(u_list) / 2), len(u_list))
+        logger.warning("Returning random point in second half of the chain")
+        u = u_list[idx]
+        v = v_list[idx]
+        logl = logl_list[idx]
 
     blob = {'accept': accept, 'reject': reject, 'fail': nfail, 'scale': scale}
 
+    ncall = accept + reject
     return u, v, logl, ncall, blob
+
+
+def estimate_nmcmc(accept_ratio, minmcmc, maxmcmc, safety=5, tau=None):
+    """ Estimate autocorrelation length of chain using acceptance fraction
+
+    Using ACL = (2/acc) - 1 multiplied by a safety margin. Code adapated from
+    CPNest:
+        - https://github.com/johnveitch/cpnest/blob/master/cpnest/sampler.py
+        - http://github.com/farr/Ensemble.jl
+
+    Parameters
+    ----------
+    accept_ratio: float [0, 1]
+        Ratio of the number of accepted points to the total number of points
+    minmcmc: int
+        The minimum length of the MCMC chain to use
+    maxmcmc: int
+        The maximum length of the MCMC chain to use
+    safety: int
+        A safety factor applied in the calculation
+    tau: int (optional)
+        The ACT, if given, otherwise estimated.
+
+    """
+    if tau is None:
+        tau = maxmcmc / safety
+
+    if accept_ratio == 0.0:
+        Nmcmc_exact = (1. + 1. / tau) * minmcmc
+    else:
+        Nmcmc_exact = (
+            (1. - 1. / tau) * minmcmc +
+            (safety / tau) * (2. / accept_ratio - 1.)
+        )
+
+    Nmcmc_exact = float(min(Nmcmc_exact, maxmcmc))
+    Nmcmc = max(safety, int(Nmcmc_exact))
+
+    return Nmcmc
+
+
+class DynestySetupError(Exception):
+    pass
diff --git a/docs/dynesty-guide.txt b/docs/dynesty-guide.txt
new file mode 100644
index 000000000..fb74c7c77
--- /dev/null
+++ b/docs/dynesty-guide.txt
@@ -0,0 +1,121 @@
+.. _dynesty-guide:
+
+=============
+Dynesty Guide
+=============
+
+The Dynesty sampler is just one of the samplers available in bilby, but it is
+well-used and found to be fast and accurate. Here, we provide a short guide to
+its implementation. This will not be a complete guide, additional help can be
+found in the `Dynesty documentation <https://dynesty.readthedocs.io/en/latest/>`_.
+
+All of the options discussed herein can be set in the :code:`bilby.run_sampler()`
+call. For example, to set the number of live points to 1000
+
+.. code-block:: python
+
+   >>> bilby.run_sampler(likelihood, priors, sampler="dynesty", nlive=1000)
+
+.. note::
+   Bilby checks the kwargs input through run_sampler. If you miss-spell a word,
+   you will see a warning "Supplied argument X not an argument of dynesty, removing."
+
+Overview of dynesty
+-------------------
+
+Like many nested samplers, dynesty operates on the unit cube with a
+prior_tranform function to transform a point in the unit cube to the target
+space. When it begins sampling, it draws samples from the unit cube (uniform
+sampling) until reaching the :code:`first_update` criteria. As of v1.0.0, this
+defaults to
+
+.. code-block:: python
+
+   first_update = dict(min_ncall=2*nlive, min_eff=10)
+
+That is, the first update happens when either it has called the likelihood
+function twice as many times as there are live points or the sampling
+efficiency hits 10%. You can change any of these by passing
+:code:`first_update` as a dictionary to :code:`run_sampler`.
+
+Once the first update occurs, dynesty switches to the bounding method given by
+the keyword :code:`bound`. After this, new points are proposed by taking an
+existing live point and sampling using the `sample` keyword. This continues
+until one of the stopping criteria are reached:
+
+1. The estimated remaining evidence is below the kwarg :code:`dlogz` (default is 0.1)
+2. The effective number of samples exceeds the kwarg :code:`n_effective` (default is 5000)
+
+Bilby-specific implementation details
+-------------------------------------
+
+In Bilby, we have re-implemented the :code:`sample="rwalk"` sample method. In
+dynesty, this method took an argument :code:`walks` which was the fixed number
+of walks to take each time a new point was proposed. In the bilby implementation,
+we still take an argument :code:`walks` which has the new meaning: the minimum
+number of walks to take (this ensures backward compatibility). Meanwhile, we
+add two new arguments
+1. :code:`maxmcmc`: the maximum number of walks to use. This naming is chosen for consistency with other codes. Default is 5000. If this limit is reached, a warning will be printed during sampling.
+2. :code:`nact`: the number of auto-correlation times to use before accepting a point. Default is 5.
+
+The autocorrelation time calculation uses the `emcee
+<https://emcee.readthedocs.io/en/stable/>`_ autocorr methods. For a detailed
+discussion on the topics, see `this post by Dan Foreman-Mackey
+<https://dfm.io/posts/autocorr/>`_.
+
+You can revert to the original dynesty implementation by specifying
+:code:`sample="rwalk_dynesty"`.
+
+In addition, we also set several kwargs by default
+
+1. :code:`nwalk` (alias of :code:`nlive`), the number of live poinst. This defaults to 1000.
+2. :code:`facc`: The target acceptance fraction for the 'rwalk'. In dynesty, this defaults 0.5, but in testing we found that a value of 0.2 produced better behaviour at boundaries.
+
+Understanding the output
+------------------------
+
+Before sampling begins, you will see a message like this
+
+
+.. code-block:: console
+
+   10:42 bilby INFO    : Single likelihood evaluation took 2.977e-03 s
+   10:42 bilby INFO    : Using sampler Dynesty with kwargs {'bound': 'multi', 'sample': 'rwalk', 'verbose': True, 'periodic': None, 'reflective': None, 'check_point_delta_t': 600, 'nlive': 1000, 'first_update': {'min_eff': 20}, 'walks': 10, 'npdim': None, 'rstate': None, 'queue_size': None, 'pool': None, 'use_pool': None, 'live_points': None, 'logl_args': None, 'logl_kwargs': None, 'ptform_args': None, 'ptform_kwargs': None, 'enlarge': 1.25, 'bootstrap': None, 'vol_dec': 0.5, 'vol_check': 2.0, 'facc': 0.2, 'slices': 5, 'update_interval': 600, 'print_func': <bound method Dynesty._print_func of <bilby.core.sampler.dynesty.Dynesty object at 0x7f29e1c47e10>>, 'dlogz': 0.1, 'maxiter': None, 'maxcall': None, 'logl_max': inf, 'add_live': True, 'print_progress': True, 'save_bounds': False, 'n_effective': None, 'maxmcmc': 2000, 'nact': 10, 'jacobian': <function jacobian at 0x7f29ba0411e0>}
+   10:42 bilby INFO    : Checkpoint every n_check_point = 200000
+   10:42 bilby INFO    : Using dynesty version 1.0.0
+   10:42 bilby INFO    : Using the bilby-implemented rwalk sample method with ACT estimated walks
+
+
+This tells you that a typical likelihood evaluation takes a few milliseconds.
+You can use this to gauge how long the run might take: if a typical likelihood
+evaluation takes more than a fraction of a second, it is unlikely your run will
+complete in a reasonable amount of time using serial bilby. After this, is a list
+of all the kwargs passed in to dynesty. Note, where a :code:`None` is given,
+dynesty will fill in its own defaults. Then, we get a message about how often
+checkpointing will be done, the version of dynesty, and which sample method
+will be used.
+
+During the sampling, dynesty writes an update of its progress to the terminal
+(specifally, this is writtent to STDOUT). Here is an example:
+
+
+.. code-block:: console
+
+   1015it [00:08, 138.49it/s, bound:0 nc:2 ncall:2714 eff:37.4% logz-ratio=-67.89+/-0.09 dlogz:181.166>0.10]
+
+From left to right, this gives the number of iterations, the sampling time,
+the iterations per second, the bound (while :code:`bound=0` dynesty samples
+from the unit cube), the number of likelihood calls per sample :code:`nc`, the
+total number of likelihood calls :code:`ncall`, the sampling efficiency, the
+current estimate of the logz-ratio (monotonically increases) and the estimated
+remaining log evidence.
+
+If the likelihood calculates the :code:`log_noise_evidence`, then this output
+will give the :code:`logz-ratio`. If it doesn't it instead uses just the
+unnormalised evidence :code:`logz`.
+
+The :code:`logz-ratio` and :code:`dlogz` gives an estimate of the final
+expected evidence. You can compare this to your expectations to help diagnose
+problems before completing a run. However, be aware the `dlogz` does not
+monotonically decrease: if a region of large likelihood is subsequently found,
+the :code:`dlogz` will increase.
diff --git a/docs/index.txt b/docs/index.txt
index e4d2ce7ed..eb0cb84ac 100644
--- a/docs/index.txt
+++ b/docs/index.txt
@@ -15,6 +15,7 @@ Welcome to bilby's documentation!
    prior
    likelihood
    samplers
+   dynesty-guide
    bilby-output
    compact-binary-coalescence-parameter-estimation
    transient-gw-data
diff --git a/docs/requirements.txt b/docs/requirements.txt
index 706fb75a9..01869f484 100644
--- a/docs/requirements.txt
+++ b/docs/requirements.txt
@@ -1,5 +1,6 @@
 html5lib==1.0b8  # Hack to ensure documentation generated
 sphinx
+sphinx_tabs
 numpydoc
 nbsphinx
 autodoc
diff --git a/requirements.txt b/requirements.txt
index 849fd3f93..bf8b8babc 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,5 +1,6 @@
 future
 dynesty
+emcee
 corner
 numpy>=1.9
 matplotlib>=2.0
diff --git a/setup.py b/setup.py
index beca2119f..4ac99cd23 100644
--- a/setup.py
+++ b/setup.py
@@ -80,6 +80,7 @@ setup(name='bilby',
       install_requires=[
           'future',
           'dynesty>=1.0.0',
+          'emcee',
           'corner',
           'dill',
           'numpy>=1.9',
diff --git a/test/sampler_test.py b/test/sampler_test.py
index 63237d1c1..79a1a09d7 100644
--- a/test/sampler_test.py
+++ b/test/sampler_test.py
@@ -152,10 +152,11 @@ class TestDynesty(unittest.TestCase):
                         npdim=None, rstate=None, queue_size=None, pool=None,
                         use_pool=None, live_points=None, logl_args=None, logl_kwargs=None,
                         ptform_args=None, ptform_kwargs=None,
-                        enlarge=None, bootstrap=None, vol_dec=0.5, vol_check=2.0,
-                        facc=0.5, slices=5, dlogz=0.1, maxiter=None, maxcall=None,
+                        enlarge=1.5, bootstrap=None, vol_dec=0.5, vol_check=8.0,
+                        facc=0.2, slices=5, dlogz=0.1, maxiter=None, maxcall=None,
                         logl_max=np.inf, add_live=True, print_progress=True, save_bounds=False,
-                        walks=20, update_interval=600, print_func='func', n_effective=None)
+                        walks=100, update_interval=600, print_func='func', n_effective=None,
+                        maxmcmc=5000, nact=5)
         self.sampler.kwargs['print_func'] = 'func'  # set this manually as this is not testable otherwise
         # DictEqual can't handle lists so we check these separately
         self.assertEqual([], self.sampler.kwargs['periodic'])
@@ -173,10 +174,11 @@ class TestDynesty(unittest.TestCase):
                         npdim=None, rstate=None, queue_size=None, pool=None,
                         use_pool=None, live_points=None, logl_args=None, logl_kwargs=None,
                         ptform_args=None, ptform_kwargs=None,
-                        enlarge=None, bootstrap=None, vol_dec=0.5, vol_check=2.0,
-                        facc=0.5, slices=5, dlogz=0.1, maxiter=None, maxcall=None,
+                        enlarge=1.5, bootstrap=None, vol_dec=0.5, vol_check=8.0,
+                        facc=0.2, slices=5, dlogz=0.1, maxiter=None, maxcall=None,
                         logl_max=np.inf, add_live=True, print_progress=True, save_bounds=False,
-                        walks=20, update_interval=600, print_func='func', n_effective=None)
+                        walks=100, update_interval=600, print_func='func', n_effective=None,
+                        maxmcmc=5000, nact=5)
 
         for equiv in bilby.core.sampler.base_sampler.NestedSampler.npoints_equiv_kwargs:
             new_kwargs = self.sampler.kwargs.copy()
-- 
GitLab