Add a mean-log-likelihood method to improve the ACT estimation

5629325d · Gregory Ashton · Moritz Huebner · 9affbf04 · 5629325d · 5629325d
Commit 5629325d authored 4 years ago by Gregory Ashton Committed by Moritz Huebner 4 years ago
--- a/bilby/core/sampler/ptemcee.py
+++ b/bilby/core/sampler/ptemcee.py
@@ -8,10 +8,12 @@ import sys
 import time
 import dill
 from collections import namedtuple
+import logging

 import numpy as np
 import pandas as pd
 import matplotlib.pyplot as plt
+import scipy.signal

 from ..utils import logger, check_directory_exists_and_if_not_mkdir
 from .base_sampler import SamplerError, MCMCSampler
@@ -23,10 +25,14 @@ ConvergenceInputs = namedtuple(
        "autocorr_c",
        "autocorr_tol",
        "autocorr_tau",
+        "gradient_tau",
+        "gradient_mean_log_posterior",
+        "Q_tol",
        "safety",
        "burn_in_nact",
+        "burn_in_fixed_discard",
+        "mean_logl_frac",
        "thin_by_nact",
-        "frac_threshold",
        "nsamples",
        "ignore_keys_for_tau",
        "min_tau",
@@ -53,6 +59,10 @@ class Ptemcee(MCMCSampler):
        The number of burn-in autocorrelation times to discard and the thin-by
        factor. Increasing burn_in_nact increases the time required for burn-in.
        Increasing thin_by_nact increases the time required to obtain nsamples.
+    burn_in_fixed_discard: int (0)
+        A fixed number of samples to discard for burn-in
+    mean_logl_frac: float, (0.0.1)
+        The maximum fractional change the mean log-likelihood to accept
    autocorr_tol: int, (50)
        The minimum number of autocorrelation times needed to trust the
        estimate of the autocorrelation time.
@@ -62,14 +72,18 @@ class Ptemcee(MCMCSampler):
        A multiplicitive factor for the estimated autocorrelation. Useful for
        cases where non-convergence can be observed by eye but the automated
        tools are failing.
-    autocorr_tau:
+    autocorr_tau: int, (1)
        The number of autocorrelation times to use in assessing if the
        autocorrelation time is stable.
-    frac_threshold: float, (0.01)
-        The maximum fractional change in the autocorrelation for the last
-        autocorr_tau steps. If the fractional change exceeds this value,
-        sampling will continue until the estimate of the autocorrelation time
-        can be trusted.
+    gradient_tau: float, (0.1)
+        The maximum (smoothed) local gradient of the ACT estimate to allow.
+        This ensures the ACT estimate is stable before finishing sampling.
+    gradient_mean_log_posterior: float, (0.1)
+        The maximum (smoothed) local gradient of the logliklilhood to allow.
+        This ensures the ACT estimate is stable before finishing sampling.
+    Q_tol: float (1.01)
+        The maximum between-chain to within-chain tolerance allowed (akin to
+        the Gelman-Rubin statistic).
    min_tau: int, (1)
        A minimum tau (autocorrelation time) to accept.
    check_point_deltaT: float, (600)
@@ -79,7 +93,7 @@ class Ptemcee(MCMCSampler):
    exit_code: int, (77)
        The code on which the sampler exits.
    store_walkers: bool (False)
-        If true, store the unthinned, unburnt chaines in the result. Note, this
+        If true, store the unthinned, unburnt chains in the result. Note, this
        is not recommended for cases where tau is large.
    ignore_keys_for_tau: str
        A pattern used to ignore keys in estimating the autocorrelation time.
@@ -90,6 +104,12 @@ class Ptemcee(MCMCSampler):
        The walkers are then initialized from the range of values obtained.
        If a list, for the keys in the list the optimization step is applied,
        otherwise the initial points are drawn from the prior.
+    niterations_per_check: int (5)
+        The number of iteration steps to take before checking ACT. This
+        effectively pre-thins the chains. Larger values reduce the per-eval
+        timing due to improved efficiency. But, if it is made too large the
+        pre-thinning may be overly agressive effectively wasting compute-time.
+        If you see tau=1, then niterations_per_check is likely too large.


    Other Parameters
@@ -98,7 +118,7 @@ class Ptemcee(MCMCSampler):
        The number of walkers
    nsteps: int, (100)
        The number of steps to take
-    ntemps: int (2)
+    ntemps: int (10)
        The number of temperatures used by ptemcee
    Tmax: float
        The maximum temperature
@@ -107,15 +127,15 @@ class Ptemcee(MCMCSampler):

    # Arguments used by ptemcee
    default_kwargs = dict(
-        ntemps=20,
-        nwalkers=200,
+        ntemps=10,
+        nwalkers=100,
        Tmax=None,
        betas=None,
        a=2.0,
        adaptation_lag=10000,
        adaptation_time=100,
        random=None,
-        adapt=True,
+        adapt=False,
        swap_ratios=False,
    )

@@ -130,13 +150,17 @@ class Ptemcee(MCMCSampler):
        skip_import_verification=False,
        resume=True,
        nsamples=5000,
-        burn_in_nact=10,
+        burn_in_nact=50,
+        burn_in_fixed_discard=0,
+        mean_logl_frac=0.01,
        thin_by_nact=0.5,
        autocorr_tol=50,
        autocorr_c=5,
        safety=1,
-        autocorr_tau=5,
-        frac_threshold=0.01,
+        autocorr_tau=1,
+        gradient_tau=0.1,
+        gradient_mean_log_posterior=0.1,
+        Q_tol=1.02,
        min_tau=1,
        check_point_deltaT=600,
        threads=1,
@@ -145,7 +169,8 @@ class Ptemcee(MCMCSampler):
        store_walkers=False,
        ignore_keys_for_tau=None,
        pos0="prior",
-        niterations_per_check=10,
+        niterations_per_check=5,
+        log10beta_min=None,
        **kwargs
    ):
        super(Ptemcee, self).__init__(
@@ -184,14 +209,19 @@ class Ptemcee(MCMCSampler):
            autocorr_tau=autocorr_tau,
            safety=safety,
            burn_in_nact=burn_in_nact,
+            burn_in_fixed_discard=burn_in_fixed_discard,
+            mean_logl_frac=mean_logl_frac,
            thin_by_nact=thin_by_nact,
-            frac_threshold=frac_threshold,
+            gradient_tau=gradient_tau,
+            gradient_mean_log_posterior=gradient_mean_log_posterior,
+            Q_tol=Q_tol,
            nsamples=nsamples,
            ignore_keys_for_tau=ignore_keys_for_tau,
            min_tau=min_tau,
            niterations_per_check=niterations_per_check,
        )
        self.convergence_inputs = ConvergenceInputs(**convergence_inputs_dict)
+        logger.info("Using convergence inputs: {}".format(self.convergence_inputs))

        # Check if threads was given as an equivalent arg
        if threads == 1:
@@ -206,6 +236,23 @@ class Ptemcee(MCMCSampler):
        self.store_walkers = store_walkers
        self.pos0 = pos0

+        self._periodic = [
+            self.priors[key].boundary == "periodic" for key in self.search_parameter_keys
+        ]
+        self.priors.sample()
+        self._minima = np.array([
+            self.priors[key].minimum for key in self.search_parameter_keys
+        ])
+        self._range = np.array([
+            self.priors[key].maximum for key in self.search_parameter_keys
+        ]) - self._minima
+
+        self.log10beta_min = log10beta_min
+        if self.log10beta_min is not None:
+            betas = np.logspace(0, self.log10beta_min, self.ntemps)
+            logger.warning("Using betas {}".format(betas))
+            self.kwargs["betas"] = betas
+
    @property
    def sampler_function_kwargs(self):
        """ Kwargs passed to samper.sampler() """
@@ -322,7 +369,7 @@ class Ptemcee(MCMCSampler):
        return pos0

    def setup_sampler(self):
-        """ Either initialize the sampelr or read in the resume file """
+        """ Either initialize the sampler or read in the resume file """
        import ptemcee

        if os.path.isfile(self.resume_file) and self.resume is True:
@@ -335,11 +382,13 @@ class Ptemcee(MCMCSampler):
            self.iteration = data["iteration"]
            self.chain_array = data["chain_array"]
            self.log_likelihood_array = data["log_likelihood_array"]
+            self.log_posterior_array = data["log_posterior_array"]
            self.pos0 = data["pos0"]
            self.beta_list = data["beta_list"]
            self.sampler._betas = np.array(self.beta_list[-1])
            self.tau_list = data["tau_list"]
            self.tau_list_n = data["tau_list_n"]
+            self.Q_list = data["Q_list"]
            self.time_per_check = data["time_per_check"]

            # Initialize the pool
@@ -376,10 +425,12 @@ class Ptemcee(MCMCSampler):
            # Initialize storing results
            self.iteration = 0
            self.chain_array = self.get_zero_chain_array()
-            self.log_likelihood_array = self.get_zero_log_likelihood_array()
+            self.log_likelihood_array = self.get_zero_array()
+            self.log_posterior_array = self.get_zero_array()
            self.beta_list = []
            self.tau_list = []
            self.tau_list_n = []
+            self.Q_list = []
            self.time_per_check = []
            self.pos0 = self.get_pos0()

@@ -388,7 +439,7 @@ class Ptemcee(MCMCSampler):
    def get_zero_chain_array(self):
        return np.zeros((self.nwalkers, self.max_steps, self.ndim))

-    def get_zero_log_likelihood_array(self):
+    def get_zero_array(self):
        return np.zeros((self.ntemps, self.nwalkers, self.max_steps))

    def get_pos0(self):
@@ -425,18 +476,27 @@ class Ptemcee(MCMCSampler):
                    self.pos0, storechain=False,
                    iterations=self.convergence_inputs.niterations_per_check,
                    **self.sampler_function_kwargs):
-                pass
+                pos0[:, :, self._periodic] = np.mod(
+                    pos0[:, :, self._periodic] - self._minima[self._periodic],
+                    self._range[self._periodic]
+                ) + self._minima[self._periodic]

            if self.iteration == self.chain_array.shape[1]:
                self.chain_array = np.concatenate((
                    self.chain_array, self.get_zero_chain_array()), axis=1)
                self.log_likelihood_array = np.concatenate((
-                    self.log_likelihood_array, self.get_zero_log_likelihood_array()),
+                    self.log_likelihood_array, self.get_zero_array()),
+                    axis=2)
+                self.log_posterior_array = np.concatenate((
+                    self.log_posterior_array, self.get_zero_array()),
                    axis=2)

            self.pos0 = pos0
            self.chain_array[:, self.iteration, :] = pos0[0, :, :]
            self.log_likelihood_array[:, :, self.iteration] = log_likelihood
+            self.log_posterior_array[:, :, self.iteration] = log_posterior
+            self.mean_log_posterior = np.mean(
+                self.log_posterior_array[:, :, :self. iteration], axis=1)

            # Calculate time per iteration
            self.time_per_check.append((datetime.datetime.now() - t0).total_seconds())
@@ -444,6 +504,28 @@ class Ptemcee(MCMCSampler):

            self.iteration += 1

+            # Calculate minimum iteration step to discard
+            minimum_iteration = get_minimum_stable_itertion(
+                self.mean_log_posterior,
+                frac=self.convergence_inputs.mean_logl_frac
+            )
+            logger.debug("Minimum iteration = {}".format(minimum_iteration))
+
+            # Calculate the maximum discard number
+            discard_max = np.max(
+                [self.convergence_inputs.burn_in_fixed_discard,
+                 minimum_iteration]
+            )
+
+            if self.iteration > discard_max + self.nwalkers:
+                # If we have taken more than nwalkers steps after the discard
+                # then set the discard
+                self.discard = discard_max
+            else:
+                # If haven't discard everything (avoid initialisation bias)
+                logger.debug("Too few steps to calculate convergence")
+                self.discard = self.iteration
+
            (
                stop,
                self.nburn,
@@ -451,7 +533,8 @@ class Ptemcee(MCMCSampler):
                self.tau_int,
                self.nsamples_effective,
            ) = check_iteration(
-                self.chain_array[:, :self.iteration + 1, :],
+                self.iteration,
+                self.chain_array[:, self.discard:self.iteration, :],
                sampler,
                self.convergence_inputs,
                self.search_parameter_keys,
@@ -459,6 +542,8 @@ class Ptemcee(MCMCSampler):
                self.beta_list,
                self.tau_list,
                self.tau_list_n,
+                self.Q_list,
+                self.mean_log_posterior,
            )

            if stop:
@@ -479,19 +564,21 @@ class Ptemcee(MCMCSampler):

        # Get 0-likelihood samples and store in the result
        self.result.samples = self.chain_array[
-            :, self.nburn : self.iteration : self.thin, :
+            :, self.discard + self.nburn : self.iteration : self.thin, :
        ].reshape((-1, self.ndim))
        loglikelihood = self.log_likelihood_array[
-            0, :, self.nburn : self.iteration : self.thin
+            0, :, self.discard + self.nburn : self.iteration : self.thin
        ]  # nwalkers, nsteps
        self.result.log_likelihood_evaluations = loglikelihood.reshape((-1))

        if self.store_walkers:
            self.result.walkers = self.sampler.chain
        self.result.nburn = self.nburn
+        self.result.discard = self.discard

        log_evidence, log_evidence_err = compute_evidence(
-            sampler, self.log_likelihood_array, self.outdir, self.label, self.nburn,
+            sampler, self.log_likelihood_array, self.outdir,
+            self.label, self.discard, self.nburn,
            self.thin, self.iteration,
        )
        self.result.log_evidence = log_evidence
@@ -524,43 +611,79 @@ class Ptemcee(MCMCSampler):
            self.label,
            self.nsamples_effective,
            self.sampler,
+            self.discard,
            self.nburn,
            self.thin,
            self.search_parameter_keys,
            self.resume_file,
            self.log_likelihood_array,
+            self.log_posterior_array,
            self.chain_array,
            self.pos0,
            self.beta_list,
            self.tau_list,
            self.tau_list_n,
+            self.Q_list,
            self.time_per_check,
        )

-        if plot and not np.isnan(self.nburn):
-            # Generate the walkers plot diagnostic
-            plot_walkers(
-                self.chain_array[:, : self.iteration, :],
-                self.nburn,
-                self.thin,
-                self.search_parameter_keys,
-                self.outdir,
-                self.label,
-            )
+        if plot:
+            try:
+                # Generate the walkers plot diagnostic
+                plot_walkers(
+                    self.chain_array[:, : self.iteration, :],
+                    self.nburn,
+                    self.thin,
+                    self.search_parameter_keys,
+                    self.outdir,
+                    self.label,
+                    self.discard,
+                )
+            except Exception as e:
+                logger.info("Walkers plot failed with exception {}".format(e))

-            # Generate the tau plot diagnostic
-            plot_tau(
-                self.tau_list_n,
-                self.tau_list,
-                self.search_parameter_keys,
-                self.outdir,
-                self.label,
-                self.tau_int,
-                self.convergence_inputs.autocorr_tau,
-            )
+            try:
+                # Generate the tau plot diagnostic if DEBUG
+                if logger.level < logging.INFO:
+                    plot_tau(
+                        self.tau_list_n,
+                        self.tau_list,
+                        self.search_parameter_keys,
+                        self.outdir,
+                        self.label,
+                        self.tau_int,
+                        self.convergence_inputs.autocorr_tau,
+                    )
+            except Exception as e:
+                logger.info("tau plot failed with exception {}".format(e))
+
+            try:
+                plot_mean_log_posterior(
+                    self.mean_log_posterior,
+                    self.outdir,
+                    self.label,
+                )
+            except Exception as e:
+                logger.info("mean_logl plot failed with exception {}".format(e))
+
+
+def get_minimum_stable_itertion(mean_array, frac, nsteps_min=10):
+    nsteps = mean_array.shape[1]
+    if nsteps < nsteps_min:
+        return 0
+
+    min_it = 0
+    for x in mean_array:
+        maxl = np.max(x)
+        fracdiff = (maxl - x) / np.abs(maxl)
+        idxs = fracdiff < frac
+        if np.sum(idxs) > 0:
+            min_it = np.max([min_it, np.min(np.arange(len(idxs))[idxs])])
+    return min_it


 def check_iteration(
+    iteration,
    samples,
    sampler,
    convergence_inputs,
@@ -569,6 +692,8 @@ def check_iteration(
    beta_list,
    tau_list,
    tau_list_n,
+    Q_list,
+    mean_log_posterior,
 ):
    """ Per-iteration logic to calculate the convergence check

@@ -594,24 +719,16 @@ def check_iteration(
    nsamples_effective: int
        The effective number of samples after burning and thinning
    """
-    import emcee

    ci = convergence_inputs
-    nwalkers, iteration, ndim = samples.shape
-
-    # Compute ACT tau for 0-temperature chains
-    tau_array = np.zeros((nwalkers, ndim))
-    for ii in range(nwalkers):
-        for jj, key in enumerate(search_parameter_keys):
-            if ci.ignore_keys_for_tau and ci.ignore_keys_for_tau in key:
-                continue
-            try:
-                tau_array[ii, jj] = emcee.autocorr.integrated_time(
-                    samples[ii, :, jj], c=ci.autocorr_c, tol=0)[0]
-            except emcee.autocorr.AutocorrError:
-                tau_array[ii, jj] = np.inf
+    # Note: nsteps is the number of steps in the samples while iterations is
+    # the current iteration number. So iteration > nsteps by the number of
+    # of discards
+    nwalkers, nsteps, ndim = samples.shape

-    # Maximum over paramters, mean over walkers
+    tau_array = calculate_tau_array(samples, search_parameter_keys, ci)
+
+    # Maximum over parameters, mean over walkers
    tau = np.max(np.mean(tau_array, axis=0))

    # Apply multiplicitive safety factor
@@ -622,37 +739,80 @@ def check_iteration(
    tau_list.append(list(np.mean(tau_array, axis=0)))
    tau_list_n.append(iteration)

-    # Convert to an integer
-    tau_int = int(np.ceil(tau)) if not np.isnan(tau) else tau
+    Q = get_Q_convergence(samples)
+    Q_list.append(Q)

-    if np.isnan(tau_int) or np.isinf(tau_int):
+    if np.isnan(tau) or np.isinf(tau):
        print_progress(
            iteration, sampler, time_per_check, np.nan, np.nan,
-            np.nan, np.nan, False, convergence_inputs,
+            np.nan, np.nan, np.nan, False, convergence_inputs, Q,
        )
        return False, np.nan, np.nan, np.nan, np.nan

+    # Convert to an integer
+    tau_int = int(np.ceil(tau))
+
    # Calculate the effective number of samples available
    nburn = int(ci.burn_in_nact * tau_int)
    thin = int(np.max([1, ci.thin_by_nact * tau_int]))
    samples_per_check = nwalkers / thin
-    nsamples_effective = int(nwalkers * (iteration - nburn) / thin)
+    nsamples_effective = int(nwalkers * (nsteps - nburn) / thin)

    # Calculate convergence boolean
-    converged = ci.nsamples < nsamples_effective
-
-    # Calculate fractional change in tau from previous iteration
-    check_taus = np.array(tau_list[-tau_int * ci.autocorr_tau :])
-    taus_per_parameter = check_taus[-1, :]
-    if not np.any(np.isnan(check_taus)):
-        frac = (taus_per_parameter - check_taus) / taus_per_parameter
-        max_frac = np.max(frac)
-        tau_usable = np.all(frac < ci.frac_threshold)
+    converged = Q < ci.Q_tol and ci.nsamples < nsamples_effective
+    logger.debug("Convergence: Q<Q_tol={}, nsamples<nsamples_effective={}"
+                 .format(Q < ci.Q_tol, ci.nsamples < nsamples_effective))
+
+    GRAD_WINDOW_LENGTH = nwalkers + 1
+    nsteps_to_check = ci.autocorr_tau * np.max([2 * GRAD_WINDOW_LENGTH, tau_int])
+    lower_tau_index = np.max([0, len(tau_list) - nsteps_to_check])
+    check_taus = np.array(tau_list[lower_tau_index :])
+    if not np.any(np.isnan(check_taus)) and check_taus.shape[0] > GRAD_WINDOW_LENGTH:
+        gradient_tau = get_max_gradient(
+            check_taus, axis=0, window_length=11)
+
+        if gradient_tau < ci.gradient_tau:
+            logger.debug(
+                "tau usable as {} < gradient_tau={}"
+                .format(gradient_tau, ci.gradient_tau)
+            )
+            tau_usable = True
+        else:
+            logger.debug(
+                "tau not usable as {} > gradient_tau={}"
+                .format(gradient_tau, ci.gradient_tau)
+            )
+            tau_usable = False
+
+        check_mean_log_posterior = mean_log_posterior[:, -nsteps_to_check:]
+        gradient_mean_log_posterior = get_max_gradient(
+            check_mean_log_posterior, axis=1, window_length=GRAD_WINDOW_LENGTH,
+            smooth=True)
+
+        if gradient_mean_log_posterior < ci.gradient_mean_log_posterior:
+            logger.debug(
+                "tau usable as {} < gradient_mean_log_posterior={}"
+                .format(gradient_mean_log_posterior, ci.gradient_mean_log_posterior)
+            )
+            tau_usable *= True
+        else:
+            logger.debug(
+                "tau not usable as {} > gradient_mean_log_posterior={}"
+                .format(gradient_mean_log_posterior, ci.gradient_mean_log_posterior)
+            )
+            tau_usable = False
+
    else:
-        max_frac = np.nan
+        logger.debug("ACT is nan")
+        gradient_tau = np.nan
+        gradient_mean_log_posterior = np.nan
        tau_usable = False

-    if iteration < tau_int * ci.autocorr_tol or tau_int < ci.min_tau:
+    if nsteps < tau_int * ci.autocorr_tol:
+        logger.debug("ACT less than autocorr_tol")
+        tau_usable = False
+    elif tau_int < ci.min_tau:
+        logger.debug("ACT less than min_tau")
        tau_usable = False

    # Print an update on the progress
@@ -663,14 +823,39 @@ def check_iteration(
        nsamples_effective,
        samples_per_check,
        tau_int,
-        max_frac,
+        gradient_tau,
+        gradient_mean_log_posterior,
        tau_usable,
        convergence_inputs,
+        Q
    )
    stop = converged and tau_usable
    return stop, nburn, thin, tau_int, nsamples_effective


+def get_max_gradient(x, axis=0, window_length=11, polyorder=2, smooth=False):
+    if smooth:
+        x = scipy.signal.savgol_filter(
+            x, axis=axis, window_length=window_length, polyorder=3)
+    return np.max(scipy.signal.savgol_filter(
+        x, axis=axis, window_length=window_length, polyorder=polyorder,
+        deriv=1))
+
+
+def get_Q_convergence(samples):
+    nwalkers, nsteps, ndim = samples.shape
+    if nsteps > 1:
+        W = np.mean(np.var(samples, axis=1), axis=0)
+        per_walker_mean = np.mean(samples, axis=1)
+        mean = np.mean(per_walker_mean, axis=0)
+        B = nsteps / (nwalkers - 1.) * np.sum((per_walker_mean - mean)**2, axis=0)
+        Vhat = (nsteps - 1) / nsteps * W + (nwalkers + 1) / (nwalkers * nsteps) * B
+        Q_per_dim = np.sqrt(Vhat / W)
+        return np.max(Q_per_dim)
+    else:
+        return np.inf
+
+
 def print_progress(
    iteration,
    sampler,
@@ -678,17 +863,19 @@ def print_progress(
    nsamples_effective,
    samples_per_check,
    tau_int,
-    max_frac,
+    gradient_tau,
+    gradient_mean_log_posterior,
    tau_usable,
    convergence_inputs,
+    Q,
 ):
    # Setup acceptance string
    acceptance = sampler.acceptance_fraction[0, :]
-    acceptance_str = "{:1.2f}->{:1.2f}".format(np.min(acceptance), np.max(acceptance))
+    acceptance_str = "{:1.2f}-{:1.2f}".format(np.min(acceptance), np.max(acceptance))

    # Setup tswap acceptance string
    tswap_acceptance_fraction = sampler.tswap_acceptance_fraction
-    tswap_acceptance_str = "{:1.2f}->{:1.2f}".format(
+    tswap_acceptance_str = "{:1.2f}-{:1.2f}".format(
        np.min(tswap_acceptance_fraction), np.max(tswap_acceptance_fraction)
    )

@@ -701,37 +888,59 @@ def print_progress(

    sampling_time = datetime.timedelta(seconds=np.sum(time_per_check))

-    if max_frac >= 0:
-        tau_str = "{}(+{:0.2f})".format(tau_int, max_frac)
-    else:
-        tau_str = "{}({:0.2f})".format(tau_int, max_frac)
+    tau_str = "{}(+{:0.2f},+{:0.2f})".format(
+        tau_int, gradient_tau, gradient_mean_log_posterior
+    )
+
    if tau_usable:
        tau_str = "={}".format(tau_str)
    else:
        tau_str = "!{}".format(tau_str)

+    Q_str = "{:0.2f}".format(Q)
+
    evals_per_check = sampler.nwalkers * sampler.ntemps * convergence_inputs.niterations_per_check

    ncalls = "{:1.1e}".format(
        convergence_inputs.niterations_per_check * iteration * sampler.nwalkers * sampler.ntemps)
    eval_timing = "{:1.2f}ms/ev".format(1e3 * ave_time_per_check / evals_per_check)
-    samp_timing = "{:1.1f}ms/sm".format(1e3 * ave_time_per_check / samples_per_check)
-
-    print(
-        "{}| {}| nc:{}| a0:{}| swp:{}| n:{}<{}| tau{}| {}| {}".format(
-            iteration,
-            str(sampling_time).split(".")[0],
-            ncalls,
-            acceptance_str,
-            tswap_acceptance_str,
-            nsamples_effective,
-            convergence_inputs.nsamples,
-            tau_str,
-            eval_timing,
-            samp_timing,
-        ),
-        flush=True,
-    )
+
+    try:
+        print(
+            "{}|{}|nc:{}|a0:{}|swp:{}|n:{}<{}|t{}|q:{}|{}".format(
+                iteration,
+                str(sampling_time).split(".")[0],
+                ncalls,
+                acceptance_str,
+                tswap_acceptance_str,
+                nsamples_effective,
+                convergence_inputs.nsamples,
+                tau_str,
+                Q_str,
+                eval_timing,
+            ),
+            flush=True,
+        )
+    except OSError as e:
+        logger.debug("Failed to print iteration due to :{}".format(e))
+
+
+def calculate_tau_array(samples, search_parameter_keys, ci):
+    """ Compute ACT tau for 0-temperature chains """
+    import emcee
+    nwalkers, nsteps, ndim = samples.shape
+    tau_array = np.zeros((nwalkers, ndim)) + np.inf
+    if nsteps > 1:
+        for ii in range(nwalkers):
+            for jj, key in enumerate(search_parameter_keys):
+                if ci.ignore_keys_for_tau and ci.ignore_keys_for_tau in key:
+                    continue
+                try:
+                    tau_array[ii, jj] = emcee.autocorr.integrated_time(
+                        samples[ii, :, jj], c=ci.autocorr_c, tol=0)[0]
+                except emcee.autocorr.AutocorrError:
+                    tau_array[ii, jj] = np.inf
+    return tau_array


 def checkpoint(
@@ -740,16 +949,19 @@ def checkpoint(
    label,
    nsamples_effective,
    sampler,
+    discard,
    nburn,
    thin,
    search_parameter_keys,
    resume_file,
    log_likelihood_array,
+    log_posterior_array,
    chain_array,
    pos0,
    beta_list,
    tau_list,
    tau_list_n,
+    Q_list,
    time_per_check,
 ):
    logger.info("Writing checkpoint and diagnostics")
@@ -758,7 +970,7 @@ def checkpoint(
    # Store the samples if possible
    if nsamples_effective > 0:
        filename = "{}/{}_samples.txt".format(outdir, label)
-        samples = np.array(chain_array)[:, nburn : iteration : thin, :].reshape(
+        samples = np.array(chain_array)[:, discard + nburn : iteration : thin, :].reshape(
            (-1, ndim)
        )
        df = pd.DataFrame(samples, columns=search_parameter_keys)
@@ -774,8 +986,10 @@ def checkpoint(
        beta_list=beta_list,
        tau_list=tau_list,
        tau_list_n=tau_list_n,
+        Q_list=Q_list,
        time_per_check=time_per_check,
        log_likelihood_array=log_likelihood_array,
+        log_posterior_array=log_posterior_array,
        chain_array=chain_array,
        pos0=pos0,
    )
@@ -786,17 +1000,33 @@ def checkpoint(
    logger.info("Finished writing checkpoint")


-def plot_walkers(walkers, nburn, thin, parameter_labels, outdir, label):
+def plot_walkers(walkers, nburn, thin, parameter_labels, outdir, label,
+                 discard=0):
    """ Method to plot the trace of the walkers in an ensemble MCMC plot """
    nwalkers, nsteps, ndim = walkers.shape
+    if np.isnan(nburn):
+        nburn = nsteps
+    if np.isnan(thin):
+        thin = 1
    idxs = np.arange(nsteps)
    fig, axes = plt.subplots(nrows=ndim, ncols=2, figsize=(8, 3 * ndim))
-    scatter_kwargs = dict(lw=0, marker="o", markersize=1, alpha=0.05,)
+    scatter_kwargs = dict(lw=0, marker="o", markersize=1, alpha=0.1,)
+
+    # Plot the fixed burn-in
+    if discard > 0:
+        for i, (ax, axh) in enumerate(axes):
+            ax.plot(
+                idxs[: discard],
+                walkers[:, : discard, i].T,
+                color="gray",
+                **scatter_kwargs
+            )
+
    # Plot the burn-in
    for i, (ax, axh) in enumerate(axes):
        ax.plot(
-            idxs[: nburn + 1],
-            walkers[:, : nburn + 1, i].T,
+            idxs[discard: discard + nburn + 1],
+            walkers[:, discard: discard + nburn + 1, i].T,
            color="C1",
            **scatter_kwargs
        )
@@ -804,12 +1034,14 @@ def plot_walkers(walkers, nburn, thin, parameter_labels, outdir, label):
    # Plot the thinned posterior samples
    for i, (ax, axh) in enumerate(axes):
        ax.plot(
-            idxs[nburn::thin],
-            walkers[:, nburn::thin, i].T,
+            idxs[discard + nburn::thin],
+            walkers[:, discard + nburn::thin, i].T,
            color="C0",
            **scatter_kwargs
        )
-        axh.hist(walkers[:, nburn::thin, i].reshape((-1)), bins=50, alpha=0.8)
+        axh.hist(walkers[:, discard + nburn::thin, i].reshape((-1)), bins=50, alpha=0.8)
+
+    for i, (ax, axh) in enumerate(axes):
        axh.set_xlabel(parameter_labels[i])
        ax.set_ylabel(parameter_labels[i])

@@ -820,25 +1052,43 @@ def plot_walkers(walkers, nburn, thin, parameter_labels, outdir, label):


 def plot_tau(
-    tau_list_n, tau_list, search_parameter_keys, outdir, label, tau, autocorr_tau
+    tau_list_n, tau_list, search_parameter_keys, outdir, label, tau, autocorr_tau,
 ):
    fig, ax = plt.subplots()
    for i, key in enumerate(search_parameter_keys):
        ax.plot(tau_list_n, np.array(tau_list)[:, i], label=key)
-    ax.axvline(tau_list_n[-1] - tau * autocorr_tau)
    ax.set_xlabel("Iteration")
    ax.set_ylabel(r"$\langle \tau \rangle$")
    ax.legend()
+    fig.tight_layout()
    fig.savefig("{}/{}_checkpoint_tau.png".format(outdir, label))
    plt.close(fig)


-def compute_evidence(sampler, log_likelihood_array, outdir, label, nburn, thin,
+def plot_mean_log_posterior(mean_log_posterior, outdir, label):
+
+    ntemps, nsteps = mean_log_posterior.shape
+    ymax = np.max(mean_log_posterior)
+    ymin = np.min(mean_log_posterior[:, -100:])
+    ymax += 0.1 * (ymax - ymin)
+    ymin -= 0.1 * (ymax - ymin)
+
+    fig, ax = plt.subplots()
+    idxs = np.arange(nsteps)
+    ax.plot(idxs, mean_log_posterior.T)
+    ax.set(xlabel="Iteration", ylabel=r"$\langle\mathrm{log-posterior}\rangle$",
+           ylim=(ymin, ymax))
+    fig.tight_layout()
+    fig.savefig("{}/{}_checkpoint_meanlogposterior.png".format(outdir, label))
+    plt.close(fig)
+
+
+def compute_evidence(sampler, log_likelihood_array, outdir, label, discard, nburn, thin,
                     iteration, make_plots=True):
    """ Computes the evidence using thermodynamic integration """
    betas = sampler.betas
    # We compute the evidence without the burnin samples, but we do not thin
-    lnlike = log_likelihood_array[:, :, nburn : iteration]
+    lnlike = log_likelihood_array[:, :, discard + nburn : iteration]
    mean_lnlikes = np.mean(np.mean(lnlike, axis=1), axis=1)

    mean_lnlikes = mean_lnlikes[::-1]
@@ -911,6 +1161,29 @@ class LikePriorEvaluator(object):
    def __init__(self, search_parameter_keys, use_ratio=False):
        self.search_parameter_keys = search_parameter_keys
        self.use_ratio = use_ratio
+        self.periodic_set = False
+
+    def _setup_periodic(self):
+        self._periodic = [
+            priors[key].boundary == "periodic" for key in self.search_parameter_keys
+        ]
+        priors.sample()
+        self._minima = np.array([
+            priors[key].minimum for key in self.search_parameter_keys
+        ])
+        self._range = np.array([
+            priors[key].maximum for key in self.search_parameter_keys
+        ]) - self._minima
+        self.periodic_set = True
+
+    def _wrap_periodic(self, array):
+        if not self.periodic_set:
+            self._setup_periodic()
+        array[self._periodic] = np.mod(
+            array[self._periodic] - self._minima[self._periodic],
+            self._range[self._periodic]
+        ) + self._minima[self._periodic]
+        return array

    def logl(self, v_array):
        parameters = {key: v for key, v in zip(self.search_parameter_keys, v_array)}

--- a/test/core/sampler/ptemcee_test.py
+++ b/test/core/sampler/ptemcee_test.py
@@ -28,36 +28,36 @@ class TestPTEmcee(unittest.TestCase):

    def test_default_kwargs(self):
        expected = dict(
-            ntemps=20,
-            nwalkers=200,
+            ntemps=10,
+            nwalkers=100,
            Tmax=None,
            betas=None,
            a=2.0,
            adaptation_lag=10000,
            adaptation_time=100,
            random=None,
-            adapt=True,
+            adapt=False,
            swap_ratios=False,
        )
        self.assertDictEqual(expected, self.sampler.kwargs)

    def test_translate_kwargs(self):
        expected = dict(
-            ntemps=20,
-            nwalkers=200,
+            ntemps=10,
+            nwalkers=100,
            Tmax=None,
            betas=None,
            a=2.0,
            adaptation_lag=10000,
            adaptation_time=100,
            random=None,
-            adapt=True,
+            adapt=False,
            swap_ratios=False,
        )
        for equiv in bilby.core.sampler.base_sampler.MCMCSampler.nwalkers_equiv_kwargs:
            new_kwargs = self.sampler.kwargs.copy()
            del new_kwargs["nwalkers"]
-            new_kwargs[equiv] = 200
+            new_kwargs[equiv] = 100
            self.sampler.kwargs = new_kwargs
            self.assertDictEqual(expected, self.sampler.kwargs)