diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml
index 9c6d0878810b7d8955aeb118e9a0286f27fbb1b0..d1572e049c48385afb8d9a75148da796cffb07a8 100644
--- a/.gitlab-ci.yml
+++ b/.gitlab-ci.yml
@@ -43,7 +43,7 @@ basic-3.7:
 # test example on python 3.7
 python-3.7:
   stage: test
-  image: bilbydev/v2-dockerfile-test-suite-python37
+  image: quay.io/bilbydev/v2-dockerfile-test-suite-python37
   script:
     - python -m pip install .
 
@@ -69,7 +69,7 @@ python-3.7:
 # test example on python 3.8
 python-3.8:
   stage: test
-  image: bilbydev/v2-dockerfile-test-suite-python38
+  image: quay.io/bilbydev/v2-dockerfile-test-suite-python38
   script:
     - python -m pip install .
 
@@ -78,7 +78,7 @@ python-3.8:
 # test example on python 3.6
 python-3.6:
   stage: test
-  image: bilbydev/v2-dockerfile-test-suite-python36
+  image: quay.io/bilbydev/v2-dockerfile-test-suite-python36
   script:
     - python -m pip install .
 
@@ -87,7 +87,7 @@ python-3.6:
 # test samplers on python 3.7
 python-3.7-samplers:
   stage: test
-  image: bilbydev/v2-dockerfile-test-suite-python37
+  image: quay.io/bilbydev/v2-dockerfile-test-suite-python37
   script:
     - python -m pip install .
 
@@ -97,7 +97,7 @@ python-3.7-samplers:
 # test samplers on python 3.6
 python-3.6-samplers:
   stage: test
-  image: bilbydev/v2-dockerfile-test-suite-python36
+  image: quay.io/bilbydev/v2-dockerfile-test-suite-python36
   script:
     - python -m pip install .
 
@@ -106,7 +106,7 @@ python-3.6-samplers:
 # Test containers are up to date
 containers:
   stage: test
-  image: bilbydev/v2-dockerfile-test-suite-python37
+  image: quay.io/bilbydev/v2-dockerfile-test-suite-python37
   script:
     - cd containers
     - python write_dockerfiles.py
@@ -117,7 +117,7 @@ containers:
 # Tests run at a fixed schedule rather than on push
 scheduled-python-3.7:
   stage: test
-  image: bilbydev/v2-dockerfile-test-suite-python37
+  image: quay.io/bilbydev/v2-dockerfile-test-suite-python37
   only:
     - schedules
   script:
@@ -129,7 +129,7 @@ scheduled-python-3.7:
 
 plotting:
   stage: test
-  image: bilbydev/bilby-test-suite-python37
+  image: quay.io/bilbydev/v2-dockerfile-test-suite-python37
   only:
     - schedules
   script:
@@ -140,7 +140,7 @@ plotting:
 
 authors:
   stage: test
-  image: bilbydev/bilby-test-suite-python37
+  image: quay.io/bilbydev/v2-dockerfile-test-suite-python37
   script:
     - python test/check_author_list.py
 
@@ -162,7 +162,7 @@ pages:
 
 deploy_release:
   stage: deploy
-  image: bilbydev/v2-dockerfile-test-suite-python37
+  image: quay.io/bilbydev/v2-dockerfile-test-suite-python37
   variables:
     TWINE_USERNAME: $PYPI_USERNAME
     TWINE_PASSWORD: $PYPI_PASSWORD
@@ -177,7 +177,7 @@ deploy_release:
 
 precommits-py3.7:
   stage: test
-  image: bilbydev/v2-dockerfile-test-suite-python37
+  image: quay.io/bilbydev/v2-dockerfile-test-suite-python37
   script:
     - source activate python37
     - mkdir -p .pip37
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 612437929298e18624c5d170b790075909a6e7c6..ddbd703b36f04a438a61bcc14338310d13c0ef43 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,5 +1,14 @@
 # All notable changes will be documented in this file
 
+## [1.0.3] 2020-11-23
+Version 1.0.4 release of bilby
+
+### Added
+- Added a chirp-mass and mass-ratio prior which are uniform in component masses (!891)
+
+### Changes
+- Fixed issue in the CI
+
 ## [1.0.3] 2020-10-23
 
 Version 1.0.3 release of bilby
diff --git a/bilby/__init__.py b/bilby/__init__.py
index 4b6ac0f52bfae1798ca6bf338f92294c0a96606b..30e2531b5a51b892836461372a54cbf8c8907f58 100644
--- a/bilby/__init__.py
+++ b/bilby/__init__.py
@@ -16,7 +16,6 @@ https://lscsoft.docs.ligo.org/bilby/installation.html.
 """
 
 
-from __future__ import absolute_import
 import sys
 
 from . import core, gw, hyper
diff --git a/bilby/core/__init__.py b/bilby/core/__init__.py
index 3946f24a4339cc4fa456df4c53dd50bd2fde75c9..7446bd24f1a771ce015674d3e72c3803ea997c34 100644
--- a/bilby/core/__init__.py
+++ b/bilby/core/__init__.py
@@ -1,2 +1 @@
-from __future__ import absolute_import
 from . import grid, likelihood, prior, result, sampler, series, utils
diff --git a/bilby/core/grid.py b/bilby/core/grid.py
index 42fb58dd0f1f9cd46fd115027bbc95f67b0899a4..c469464bd7df9084ebf53cc3eca7b19fda86a789 100644
--- a/bilby/core/grid.py
+++ b/bilby/core/grid.py
@@ -1,5 +1,3 @@
-from __future__ import division
-
 import numpy as np
 import os
 import json
diff --git a/bilby/core/likelihood.py b/bilby/core/likelihood.py
index 3441a1a6de5a295e90619b589644bc7135f24e5d..06438a21e364b9d07ef6952434e21a3678fff4e0 100644
--- a/bilby/core/likelihood.py
+++ b/bilby/core/likelihood.py
@@ -1,4 +1,3 @@
-from __future__ import division, print_function
 import copy
 
 import numpy as np
diff --git a/bilby/core/prior/analytical.py b/bilby/core/prior/analytical.py
index 67063aa9b2538dd582d9b2bd0134d3b01deae9f3..a2aa4cac57a9ea2c44f0768fc697bdbca5ef520a 100644
--- a/bilby/core/prior/analytical.py
+++ b/bilby/core/prior/analytical.py
@@ -389,7 +389,7 @@ class Cosine(Prior):
         boundary: str
             See superclass
         """
-        super(Cosine, self).__init__(minimum=minimum, maximum=maximum,  name=name,
+        super(Cosine, self).__init__(minimum=minimum, maximum=maximum, name=name,
                                      latex_label=latex_label, unit=unit, boundary=boundary)
 
     def rescale(self, val):
diff --git a/bilby/core/prior/dict.py b/bilby/core/prior/dict.py
index 3ec1a0593b44c6ea007febbd7c33ed11c021da49..3d4371a92943911f6e52b28b50299b351ed23437 100644
--- a/bilby/core/prior/dict.py
+++ b/bilby/core/prior/dict.py
@@ -3,7 +3,6 @@ from io import open as ioopen
 import json
 import os
 
-from future.utils import iteritems
 from matplotlib.cbook import flatten
 import numpy as np
 
@@ -185,7 +184,7 @@ class PriorDict(dict):
 
     def from_dictionary(self, dictionary):
         eval_dict = dict(inf=np.inf)
-        for key, val in iteritems(dictionary):
+        for key, val in dictionary.items():
             if isinstance(val, Prior):
                 continue
             elif isinstance(val, (int, float)):
diff --git a/bilby/core/result.py b/bilby/core/result.py
index d604e52d1e2aa3990f777f306f352b78b08a660b..e2855797a79e81be860a06376367dad7765d8766 100644
--- a/bilby/core/result.py
+++ b/bilby/core/result.py
@@ -1,5 +1,3 @@
-from __future__ import division
-
 import inspect
 import os
 from collections import OrderedDict, namedtuple
@@ -238,8 +236,9 @@ class Result(object):
                  sampler_kwargs=None, injection_parameters=None,
                  meta_data=None, posterior=None, samples=None,
                  nested_samples=None, log_evidence=np.nan,
-                 log_evidence_err=np.nan, log_noise_evidence=np.nan,
-                 log_bayes_factor=np.nan, log_likelihood_evaluations=None,
+                 log_evidence_err=np.nan, information_gain=np.nan,
+                 log_noise_evidence=np.nan, log_bayes_factor=np.nan,
+                 log_likelihood_evaluations=None,
                  log_prior_evaluations=None, sampling_time=None, nburn=None,
                  num_likelihood_evaluations=None, walkers=None,
                  max_autocorrelation_time=None, use_ratio=None,
@@ -269,6 +268,8 @@ class Result(object):
             An array of the output posterior samples and the unweighted samples
         log_evidence, log_evidence_err, log_noise_evidence, log_bayes_factor: float
             Natural log evidences
+        information_gain: float
+            The Kullback-Leibler divergence
         log_likelihood_evaluations: array_like
             The evaluations of the likelihood for each sample point
         num_likelihood_evaluations: int
@@ -321,6 +322,7 @@ class Result(object):
         self.use_ratio = use_ratio
         self.log_evidence = log_evidence
         self.log_evidence_err = log_evidence_err
+        self.information_gain = information_gain
         self.log_noise_evidence = log_noise_evidence
         self.log_bayes_factor = log_bayes_factor
         self.log_likelihood_evaluations = log_likelihood_evaluations
@@ -573,7 +575,7 @@ class Result(object):
             'log_noise_evidence', 'log_bayes_factor', 'priors', 'posterior',
             'injection_parameters', 'meta_data', 'search_parameter_keys',
             'fixed_parameter_keys', 'constraint_parameter_keys',
-            'sampling_time', 'sampler_kwargs', 'use_ratio',
+            'sampling_time', 'sampler_kwargs', 'use_ratio', 'information_gain',
             'log_likelihood_evaluations', 'log_prior_evaluations',
             'num_likelihood_evaluations', 'samples', 'nested_samples',
             'walkers', 'nburn', 'parameter_labels', 'parameter_labels_with_unit',
diff --git a/bilby/core/sampler/base_sampler.py b/bilby/core/sampler/base_sampler.py
index fa05b4f83adbe59ff5c8444fc8d43f5ab26d722a..7d8d4b4dbc038df18b1393af01ae58e1b9db9041 100644
--- a/bilby/core/sampler/base_sampler.py
+++ b/bilby/core/sampler/base_sampler.py
@@ -1,4 +1,3 @@
-from __future__ import absolute_import
 import datetime
 import distutils.dir_util
 import numpy as np
diff --git a/bilby/core/sampler/cpnest.py b/bilby/core/sampler/cpnest.py
index 25183bd07eb53938b122bb0305b89b0c548d718d..a1c1ab08768a326d741551e5d4896130a53fc63c 100644
--- a/bilby/core/sampler/cpnest.py
+++ b/bilby/core/sampler/cpnest.py
@@ -1,4 +1,3 @@
-from __future__ import absolute_import
 
 import array
 import copy
@@ -89,8 +88,8 @@ class Cpnest(NestedSampler):
                 prior_samples = self.priors.sample()
                 self._update_bounds()
                 point = LivePoint(
-                    self.names, array.array(
-                        'f', [prior_samples[name] for name in self.names]))
+                    self.names, array.array('d', [prior_samples[name] for name in self.names])
+                )
                 return point
 
         self._resolve_proposal_functions()
@@ -132,6 +131,7 @@ class Cpnest(NestedSampler):
         self.result.nested_samples['weights'] = np.exp(log_weights)
         self.result.log_evidence = out.NS.state.logZ
         self.result.log_evidence_err = np.sqrt(out.NS.state.info / out.NS.state.nlive)
+        self.result.information_gain = out.NS.state.info
         return self.result
 
     def _verify_kwargs_against_default_kwargs(self):
diff --git a/bilby/core/sampler/dynamic_dynesty.py b/bilby/core/sampler/dynamic_dynesty.py
index 48d726e4aea958f4914ce4ea33a15608e5454b29..c2b143e106de2b4ec3db38abbcae23e480e70b6a 100644
--- a/bilby/core/sampler/dynamic_dynesty.py
+++ b/bilby/core/sampler/dynamic_dynesty.py
@@ -1,4 +1,3 @@
-from __future__ import absolute_import
 
 import os
 import dill as pickle
diff --git a/bilby/core/sampler/dynesty.py b/bilby/core/sampler/dynesty.py
index c841478be875f46658b189881908ede3d92cf0d4..5dc4cca32fcdc72b797160b5a6c91a52283d75a4 100644
--- a/bilby/core/sampler/dynesty.py
+++ b/bilby/core/sampler/dynesty.py
@@ -6,7 +6,7 @@ import pickle
 import signal
 import time
 
-import tqdm
+from tqdm.auto import tqdm
 import matplotlib.pyplot as plt
 import numpy as np
 from pandas import DataFrame
@@ -224,7 +224,7 @@ class Dynesty(NestedSampler):
             self.kwargs['update_interval'] = int(0.6 * self.kwargs['nlive'])
         if self.kwargs['print_func'] is None:
             self.kwargs['print_func'] = self._print_func
-            self.pbar = tqdm.tqdm(file=sys.stdout)
+            self.pbar = tqdm(file=sys.stdout)
         Sampler._verify_kwargs_against_default_kwargs(self)
 
     def _print_func(self, results, niter, ncall=None, dlogz=None, *args, **kwargs):
@@ -401,6 +401,7 @@ class Dynesty(NestedSampler):
             sorted_samples=self.result.samples)
         self.result.log_evidence = out.logz[-1]
         self.result.log_evidence_err = out.logzerr[-1]
+        self.result.information_gain = out.information[-1]
 
     def _run_nested_wrapper(self, kwargs):
         """ Wrapper function to run_nested
@@ -612,7 +613,7 @@ class Dynesty(NestedSampler):
                 fig = dyplot.traceplot(self.sampler.results, labels=labels)[0]
                 fig.tight_layout()
                 fig.savefig(filename)
-            except (RuntimeError, np.linalg.linalg.LinAlgError, ValueError) as e:
+            except (RuntimeError, np.linalg.linalg.LinAlgError, ValueError, OverflowError, Exception) as e:
                 logger.warning(e)
                 logger.warning('Failed to create dynesty state plot at checkpoint')
             finally:
@@ -690,6 +691,16 @@ class Dynesty(NestedSampler):
         """
         return self.priors.rescale(self._search_parameter_keys, theta)
 
+    def calc_likelihood_count(self):
+        if self.likelihood_benchmark:
+            if hasattr(self, 'sampler'):
+                self.result.num_likelihood_evaluations = \
+                    getattr(self.sampler, 'ncall', 0)
+            else:
+                self.result.num_likelihood_evaluations = 0
+        else:
+            return None
+
 
 def sample_rwalk_bilby(args):
     """ Modified bilby-implemented version of dynesty.sampling.sample_rwalk """
diff --git a/bilby/core/sampler/emcee.py b/bilby/core/sampler/emcee.py
index 17867251d37165ab8a8495c123b930b64ae2a123..5e83178bd98a8c9586f2eaebd8a3582753c404fe 100644
--- a/bilby/core/sampler/emcee.py
+++ b/bilby/core/sampler/emcee.py
@@ -1,5 +1,3 @@
-from __future__ import absolute_import, print_function
-
 from collections import namedtuple
 import os
 import signal
@@ -12,8 +10,7 @@ from pandas import DataFrame
 from distutils.version import LooseVersion
 import dill as pickle
 
-from ..utils import (
-    logger, get_progress_bar, check_directory_exists_and_if_not_mkdir)
+from ..utils import logger, check_directory_exists_and_if_not_mkdir
 from .base_sampler import MCMCSampler, SamplerError
 
 
@@ -353,7 +350,7 @@ class Emcee(MCMCSampler):
         self.pos0 = self.sampler.chain[:, -1, :]
 
     def run_sampler(self):
-        tqdm = get_progress_bar()
+        from tqdm.auto import tqdm
         sampler_function_kwargs = self.sampler_function_kwargs
         iterations = sampler_function_kwargs.pop('iterations')
         iterations -= self._previous_iterations
diff --git a/bilby/core/sampler/fake_sampler.py b/bilby/core/sampler/fake_sampler.py
index 8033958ddbc043ca3ae03cfa9327a9789022736b..55015eef625e2189b70df3b48e8768c486799344 100644
--- a/bilby/core/sampler/fake_sampler.py
+++ b/bilby/core/sampler/fake_sampler.py
@@ -1,4 +1,3 @@
-from __future__ import absolute_import
 
 import numpy as np
 from .base_sampler import Sampler
diff --git a/bilby/core/sampler/kombine.py b/bilby/core/sampler/kombine.py
index f7c7768eca02588b803dbd036656357dbdcd39dc..e82129d9f58c81a39fbefc9b6716473d450809ad 100644
--- a/bilby/core/sampler/kombine.py
+++ b/bilby/core/sampler/kombine.py
@@ -1,5 +1,4 @@
-from __future__ import absolute_import, print_function
-from ..utils import logger, get_progress_bar
+from ..utils import logger
 import numpy as np
 import os
 from .emcee import Emcee
@@ -141,7 +140,7 @@ class Kombine(Emcee):
                 logger.info("Kombine auto-burnin complete. Removing {} samples from chains".format(self.nburn))
                 self._set_pos0_for_resume()
 
-        tqdm = get_progress_bar()
+        from tqdm.auto import tqdm
         sampler_function_kwargs = self.sampler_function_kwargs
         iterations = sampler_function_kwargs.pop('iterations')
         iterations -= self._previous_iterations
diff --git a/bilby/core/sampler/nestle.py b/bilby/core/sampler/nestle.py
index 0b97daf72b342a124183758fe212ea41bd1e32db..b366805598eccc212892acc9d946f4fd69bec0c3 100644
--- a/bilby/core/sampler/nestle.py
+++ b/bilby/core/sampler/nestle.py
@@ -1,4 +1,3 @@
-from __future__ import absolute_import
 
 import numpy as np
 from pandas import DataFrame
@@ -74,6 +73,7 @@ class Nestle(NestedSampler):
             sorted_samples=self.result.samples)
         self.result.log_evidence = out.logz
         self.result.log_evidence_err = out.logzerr
+        self.result.information_gain = out.h
         self.calc_likelihood_count()
         return self.result
 
diff --git a/bilby/core/sampler/polychord.py b/bilby/core/sampler/polychord.py
index 3c26821b470a27d35b484970ae6d7e079199ee54..22bcc7e3a92830ee90184ac0c9b19c310dbb3b4b 100644
--- a/bilby/core/sampler/polychord.py
+++ b/bilby/core/sampler/polychord.py
@@ -1,4 +1,3 @@
-from __future__ import absolute_import
 
 import numpy as np
 
diff --git a/bilby/core/sampler/ptemcee.py b/bilby/core/sampler/ptemcee.py
index 1ee756c4084ffff4209897e9c95d94ebdecdaf21..84c95f2502dbb2efcd0790361bf56c352143cc30 100644
--- a/bilby/core/sampler/ptemcee.py
+++ b/bilby/core/sampler/ptemcee.py
@@ -1,4 +1,3 @@
-from __future__ import absolute_import, division, print_function
 
 import os
 import datetime
@@ -8,10 +7,12 @@ import sys
 import time
 import dill
 from collections import namedtuple
+import logging
 
 import numpy as np
 import pandas as pd
 import matplotlib.pyplot as plt
+import scipy.signal
 
 from ..utils import logger, check_directory_exists_and_if_not_mkdir
 from .base_sampler import SamplerError, MCMCSampler
@@ -23,10 +24,14 @@ ConvergenceInputs = namedtuple(
         "autocorr_c",
         "autocorr_tol",
         "autocorr_tau",
+        "gradient_tau",
+        "gradient_mean_log_posterior",
+        "Q_tol",
         "safety",
         "burn_in_nact",
+        "burn_in_fixed_discard",
+        "mean_logl_frac",
         "thin_by_nact",
-        "frac_threshold",
         "nsamples",
         "ignore_keys_for_tau",
         "min_tau",
@@ -53,6 +58,10 @@ class Ptemcee(MCMCSampler):
         The number of burn-in autocorrelation times to discard and the thin-by
         factor. Increasing burn_in_nact increases the time required for burn-in.
         Increasing thin_by_nact increases the time required to obtain nsamples.
+    burn_in_fixed_discard: int (0)
+        A fixed number of samples to discard for burn-in
+    mean_logl_frac: float, (0.0.1)
+        The maximum fractional change the mean log-likelihood to accept
     autocorr_tol: int, (50)
         The minimum number of autocorrelation times needed to trust the
         estimate of the autocorrelation time.
@@ -62,14 +71,18 @@ class Ptemcee(MCMCSampler):
         A multiplicitive factor for the estimated autocorrelation. Useful for
         cases where non-convergence can be observed by eye but the automated
         tools are failing.
-    autocorr_tau:
+    autocorr_tau: int, (1)
         The number of autocorrelation times to use in assessing if the
         autocorrelation time is stable.
-    frac_threshold: float, (0.01)
-        The maximum fractional change in the autocorrelation for the last
-        autocorr_tau steps. If the fractional change exceeds this value,
-        sampling will continue until the estimate of the autocorrelation time
-        can be trusted.
+    gradient_tau: float, (0.1)
+        The maximum (smoothed) local gradient of the ACT estimate to allow.
+        This ensures the ACT estimate is stable before finishing sampling.
+    gradient_mean_log_posterior: float, (0.1)
+        The maximum (smoothed) local gradient of the logliklilhood to allow.
+        This ensures the ACT estimate is stable before finishing sampling.
+    Q_tol: float (1.01)
+        The maximum between-chain to within-chain tolerance allowed (akin to
+        the Gelman-Rubin statistic).
     min_tau: int, (1)
         A minimum tau (autocorrelation time) to accept.
     check_point_deltaT: float, (600)
@@ -79,7 +92,7 @@ class Ptemcee(MCMCSampler):
     exit_code: int, (77)
         The code on which the sampler exits.
     store_walkers: bool (False)
-        If true, store the unthinned, unburnt chaines in the result. Note, this
+        If true, store the unthinned, unburnt chains in the result. Note, this
         is not recommended for cases where tau is large.
     ignore_keys_for_tau: str
         A pattern used to ignore keys in estimating the autocorrelation time.
@@ -90,6 +103,12 @@ class Ptemcee(MCMCSampler):
         The walkers are then initialized from the range of values obtained.
         If a list, for the keys in the list the optimization step is applied,
         otherwise the initial points are drawn from the prior.
+    niterations_per_check: int (5)
+        The number of iteration steps to take before checking ACT. This
+        effectively pre-thins the chains. Larger values reduce the per-eval
+        timing due to improved efficiency. But, if it is made too large the
+        pre-thinning may be overly agressive effectively wasting compute-time.
+        If you see tau=1, then niterations_per_check is likely too large.
 
 
     Other Parameters
@@ -98,7 +117,7 @@ class Ptemcee(MCMCSampler):
         The number of walkers
     nsteps: int, (100)
         The number of steps to take
-    ntemps: int (2)
+    ntemps: int (10)
         The number of temperatures used by ptemcee
     Tmax: float
         The maximum temperature
@@ -107,15 +126,15 @@ class Ptemcee(MCMCSampler):
 
     # Arguments used by ptemcee
     default_kwargs = dict(
-        ntemps=20,
-        nwalkers=200,
+        ntemps=10,
+        nwalkers=100,
         Tmax=None,
         betas=None,
         a=2.0,
         adaptation_lag=10000,
         adaptation_time=100,
         random=None,
-        adapt=True,
+        adapt=False,
         swap_ratios=False,
     )
 
@@ -130,13 +149,17 @@ class Ptemcee(MCMCSampler):
         skip_import_verification=False,
         resume=True,
         nsamples=5000,
-        burn_in_nact=10,
+        burn_in_nact=50,
+        burn_in_fixed_discard=0,
+        mean_logl_frac=0.01,
         thin_by_nact=0.5,
         autocorr_tol=50,
         autocorr_c=5,
         safety=1,
-        autocorr_tau=5,
-        frac_threshold=0.01,
+        autocorr_tau=1,
+        gradient_tau=0.1,
+        gradient_mean_log_posterior=0.1,
+        Q_tol=1.02,
         min_tau=1,
         check_point_deltaT=600,
         threads=1,
@@ -145,7 +168,8 @@ class Ptemcee(MCMCSampler):
         store_walkers=False,
         ignore_keys_for_tau=None,
         pos0="prior",
-        niterations_per_check=10,
+        niterations_per_check=5,
+        log10beta_min=None,
         **kwargs
     ):
         super(Ptemcee, self).__init__(
@@ -184,14 +208,19 @@ class Ptemcee(MCMCSampler):
             autocorr_tau=autocorr_tau,
             safety=safety,
             burn_in_nact=burn_in_nact,
+            burn_in_fixed_discard=burn_in_fixed_discard,
+            mean_logl_frac=mean_logl_frac,
             thin_by_nact=thin_by_nact,
-            frac_threshold=frac_threshold,
+            gradient_tau=gradient_tau,
+            gradient_mean_log_posterior=gradient_mean_log_posterior,
+            Q_tol=Q_tol,
             nsamples=nsamples,
             ignore_keys_for_tau=ignore_keys_for_tau,
             min_tau=min_tau,
             niterations_per_check=niterations_per_check,
         )
         self.convergence_inputs = ConvergenceInputs(**convergence_inputs_dict)
+        logger.info("Using convergence inputs: {}".format(self.convergence_inputs))
 
         # Check if threads was given as an equivalent arg
         if threads == 1:
@@ -206,6 +235,23 @@ class Ptemcee(MCMCSampler):
         self.store_walkers = store_walkers
         self.pos0 = pos0
 
+        self._periodic = [
+            self.priors[key].boundary == "periodic" for key in self.search_parameter_keys
+        ]
+        self.priors.sample()
+        self._minima = np.array([
+            self.priors[key].minimum for key in self.search_parameter_keys
+        ])
+        self._range = np.array([
+            self.priors[key].maximum for key in self.search_parameter_keys
+        ]) - self._minima
+
+        self.log10beta_min = log10beta_min
+        if self.log10beta_min is not None:
+            betas = np.logspace(0, self.log10beta_min, self.ntemps)
+            logger.warning("Using betas {}".format(betas))
+            self.kwargs["betas"] = betas
+
     @property
     def sampler_function_kwargs(self):
         """ Kwargs passed to samper.sampler() """
@@ -322,7 +368,7 @@ class Ptemcee(MCMCSampler):
         return pos0
 
     def setup_sampler(self):
-        """ Either initialize the sampelr or read in the resume file """
+        """ Either initialize the sampler or read in the resume file """
         import ptemcee
 
         if os.path.isfile(self.resume_file) and self.resume is True:
@@ -335,11 +381,13 @@ class Ptemcee(MCMCSampler):
             self.iteration = data["iteration"]
             self.chain_array = data["chain_array"]
             self.log_likelihood_array = data["log_likelihood_array"]
+            self.log_posterior_array = data["log_posterior_array"]
             self.pos0 = data["pos0"]
             self.beta_list = data["beta_list"]
             self.sampler._betas = np.array(self.beta_list[-1])
             self.tau_list = data["tau_list"]
             self.tau_list_n = data["tau_list_n"]
+            self.Q_list = data["Q_list"]
             self.time_per_check = data["time_per_check"]
 
             # Initialize the pool
@@ -376,10 +424,12 @@ class Ptemcee(MCMCSampler):
             # Initialize storing results
             self.iteration = 0
             self.chain_array = self.get_zero_chain_array()
-            self.log_likelihood_array = self.get_zero_log_likelihood_array()
+            self.log_likelihood_array = self.get_zero_array()
+            self.log_posterior_array = self.get_zero_array()
             self.beta_list = []
             self.tau_list = []
             self.tau_list_n = []
+            self.Q_list = []
             self.time_per_check = []
             self.pos0 = self.get_pos0()
 
@@ -388,7 +438,7 @@ class Ptemcee(MCMCSampler):
     def get_zero_chain_array(self):
         return np.zeros((self.nwalkers, self.max_steps, self.ndim))
 
-    def get_zero_log_likelihood_array(self):
+    def get_zero_array(self):
         return np.zeros((self.ntemps, self.nwalkers, self.max_steps))
 
     def get_pos0(self):
@@ -425,18 +475,27 @@ class Ptemcee(MCMCSampler):
                     self.pos0, storechain=False,
                     iterations=self.convergence_inputs.niterations_per_check,
                     **self.sampler_function_kwargs):
-                pass
+                pos0[:, :, self._periodic] = np.mod(
+                    pos0[:, :, self._periodic] - self._minima[self._periodic],
+                    self._range[self._periodic]
+                ) + self._minima[self._periodic]
 
             if self.iteration == self.chain_array.shape[1]:
                 self.chain_array = np.concatenate((
                     self.chain_array, self.get_zero_chain_array()), axis=1)
                 self.log_likelihood_array = np.concatenate((
-                    self.log_likelihood_array, self.get_zero_log_likelihood_array()),
+                    self.log_likelihood_array, self.get_zero_array()),
+                    axis=2)
+                self.log_posterior_array = np.concatenate((
+                    self.log_posterior_array, self.get_zero_array()),
                     axis=2)
 
             self.pos0 = pos0
             self.chain_array[:, self.iteration, :] = pos0[0, :, :]
             self.log_likelihood_array[:, :, self.iteration] = log_likelihood
+            self.log_posterior_array[:, :, self.iteration] = log_posterior
+            self.mean_log_posterior = np.mean(
+                self.log_posterior_array[:, :, :self. iteration], axis=1)
 
             # Calculate time per iteration
             self.time_per_check.append((datetime.datetime.now() - t0).total_seconds())
@@ -444,6 +503,28 @@ class Ptemcee(MCMCSampler):
 
             self.iteration += 1
 
+            # Calculate minimum iteration step to discard
+            minimum_iteration = get_minimum_stable_itertion(
+                self.mean_log_posterior,
+                frac=self.convergence_inputs.mean_logl_frac
+            )
+            logger.debug("Minimum iteration = {}".format(minimum_iteration))
+
+            # Calculate the maximum discard number
+            discard_max = np.max(
+                [self.convergence_inputs.burn_in_fixed_discard,
+                 minimum_iteration]
+            )
+
+            if self.iteration > discard_max + self.nwalkers:
+                # If we have taken more than nwalkers steps after the discard
+                # then set the discard
+                self.discard = discard_max
+            else:
+                # If haven't discard everything (avoid initialisation bias)
+                logger.debug("Too few steps to calculate convergence")
+                self.discard = self.iteration
+
             (
                 stop,
                 self.nburn,
@@ -451,7 +532,8 @@ class Ptemcee(MCMCSampler):
                 self.tau_int,
                 self.nsamples_effective,
             ) = check_iteration(
-                self.chain_array[:, :self.iteration + 1, :],
+                self.iteration,
+                self.chain_array[:, self.discard:self.iteration, :],
                 sampler,
                 self.convergence_inputs,
                 self.search_parameter_keys,
@@ -459,6 +541,8 @@ class Ptemcee(MCMCSampler):
                 self.beta_list,
                 self.tau_list,
                 self.tau_list_n,
+                self.Q_list,
+                self.mean_log_posterior,
             )
 
             if stop:
@@ -479,19 +563,21 @@ class Ptemcee(MCMCSampler):
 
         # Get 0-likelihood samples and store in the result
         self.result.samples = self.chain_array[
-            :, self.nburn : self.iteration : self.thin, :
+            :, self.discard + self.nburn : self.iteration : self.thin, :
         ].reshape((-1, self.ndim))
         loglikelihood = self.log_likelihood_array[
-            0, :, self.nburn : self.iteration : self.thin
+            0, :, self.discard + self.nburn : self.iteration : self.thin
         ]  # nwalkers, nsteps
         self.result.log_likelihood_evaluations = loglikelihood.reshape((-1))
 
         if self.store_walkers:
             self.result.walkers = self.sampler.chain
         self.result.nburn = self.nburn
+        self.result.discard = self.discard
 
         log_evidence, log_evidence_err = compute_evidence(
-            sampler, self.log_likelihood_array, self.outdir, self.label, self.nburn,
+            sampler, self.log_likelihood_array, self.outdir,
+            self.label, self.discard, self.nburn,
             self.thin, self.iteration,
         )
         self.result.log_evidence = log_evidence
@@ -524,43 +610,79 @@ class Ptemcee(MCMCSampler):
             self.label,
             self.nsamples_effective,
             self.sampler,
+            self.discard,
             self.nburn,
             self.thin,
             self.search_parameter_keys,
             self.resume_file,
             self.log_likelihood_array,
+            self.log_posterior_array,
             self.chain_array,
             self.pos0,
             self.beta_list,
             self.tau_list,
             self.tau_list_n,
+            self.Q_list,
             self.time_per_check,
         )
 
-        if plot and not np.isnan(self.nburn):
-            # Generate the walkers plot diagnostic
-            plot_walkers(
-                self.chain_array[:, : self.iteration, :],
-                self.nburn,
-                self.thin,
-                self.search_parameter_keys,
-                self.outdir,
-                self.label,
-            )
+        if plot:
+            try:
+                # Generate the walkers plot diagnostic
+                plot_walkers(
+                    self.chain_array[:, : self.iteration, :],
+                    self.nburn,
+                    self.thin,
+                    self.search_parameter_keys,
+                    self.outdir,
+                    self.label,
+                    self.discard,
+                )
+            except Exception as e:
+                logger.info("Walkers plot failed with exception {}".format(e))
 
-            # Generate the tau plot diagnostic
-            plot_tau(
-                self.tau_list_n,
-                self.tau_list,
-                self.search_parameter_keys,
-                self.outdir,
-                self.label,
-                self.tau_int,
-                self.convergence_inputs.autocorr_tau,
-            )
+            try:
+                # Generate the tau plot diagnostic if DEBUG
+                if logger.level < logging.INFO:
+                    plot_tau(
+                        self.tau_list_n,
+                        self.tau_list,
+                        self.search_parameter_keys,
+                        self.outdir,
+                        self.label,
+                        self.tau_int,
+                        self.convergence_inputs.autocorr_tau,
+                    )
+            except Exception as e:
+                logger.info("tau plot failed with exception {}".format(e))
+
+            try:
+                plot_mean_log_posterior(
+                    self.mean_log_posterior,
+                    self.outdir,
+                    self.label,
+                )
+            except Exception as e:
+                logger.info("mean_logl plot failed with exception {}".format(e))
+
+
+def get_minimum_stable_itertion(mean_array, frac, nsteps_min=10):
+    nsteps = mean_array.shape[1]
+    if nsteps < nsteps_min:
+        return 0
+
+    min_it = 0
+    for x in mean_array:
+        maxl = np.max(x)
+        fracdiff = (maxl - x) / np.abs(maxl)
+        idxs = fracdiff < frac
+        if np.sum(idxs) > 0:
+            min_it = np.max([min_it, np.min(np.arange(len(idxs))[idxs])])
+    return min_it
 
 
 def check_iteration(
+    iteration,
     samples,
     sampler,
     convergence_inputs,
@@ -569,6 +691,8 @@ def check_iteration(
     beta_list,
     tau_list,
     tau_list_n,
+    Q_list,
+    mean_log_posterior,
 ):
     """ Per-iteration logic to calculate the convergence check
 
@@ -594,24 +718,16 @@ def check_iteration(
     nsamples_effective: int
         The effective number of samples after burning and thinning
     """
-    import emcee
 
     ci = convergence_inputs
-    nwalkers, iteration, ndim = samples.shape
-
-    # Compute ACT tau for 0-temperature chains
-    tau_array = np.zeros((nwalkers, ndim))
-    for ii in range(nwalkers):
-        for jj, key in enumerate(search_parameter_keys):
-            if ci.ignore_keys_for_tau and ci.ignore_keys_for_tau in key:
-                continue
-            try:
-                tau_array[ii, jj] = emcee.autocorr.integrated_time(
-                    samples[ii, :, jj], c=ci.autocorr_c, tol=0)[0]
-            except emcee.autocorr.AutocorrError:
-                tau_array[ii, jj] = np.inf
+    # Note: nsteps is the number of steps in the samples while iterations is
+    # the current iteration number. So iteration > nsteps by the number of
+    # of discards
+    nwalkers, nsteps, ndim = samples.shape
 
-    # Maximum over paramters, mean over walkers
+    tau_array = calculate_tau_array(samples, search_parameter_keys, ci)
+
+    # Maximum over parameters, mean over walkers
     tau = np.max(np.mean(tau_array, axis=0))
 
     # Apply multiplicitive safety factor
@@ -622,37 +738,80 @@ def check_iteration(
     tau_list.append(list(np.mean(tau_array, axis=0)))
     tau_list_n.append(iteration)
 
-    # Convert to an integer
-    tau_int = int(np.ceil(tau)) if not np.isnan(tau) else tau
+    Q = get_Q_convergence(samples)
+    Q_list.append(Q)
 
-    if np.isnan(tau_int) or np.isinf(tau_int):
+    if np.isnan(tau) or np.isinf(tau):
         print_progress(
             iteration, sampler, time_per_check, np.nan, np.nan,
-            np.nan, np.nan, False, convergence_inputs,
+            np.nan, np.nan, np.nan, False, convergence_inputs, Q,
         )
         return False, np.nan, np.nan, np.nan, np.nan
 
+    # Convert to an integer
+    tau_int = int(np.ceil(tau))
+
     # Calculate the effective number of samples available
     nburn = int(ci.burn_in_nact * tau_int)
     thin = int(np.max([1, ci.thin_by_nact * tau_int]))
     samples_per_check = nwalkers / thin
-    nsamples_effective = int(nwalkers * (iteration - nburn) / thin)
+    nsamples_effective = int(nwalkers * (nsteps - nburn) / thin)
 
     # Calculate convergence boolean
-    converged = ci.nsamples < nsamples_effective
-
-    # Calculate fractional change in tau from previous iteration
-    check_taus = np.array(tau_list[-tau_int * ci.autocorr_tau :])
-    taus_per_parameter = check_taus[-1, :]
-    if not np.any(np.isnan(check_taus)):
-        frac = (taus_per_parameter - check_taus) / taus_per_parameter
-        max_frac = np.max(frac)
-        tau_usable = np.all(frac < ci.frac_threshold)
+    converged = Q < ci.Q_tol and ci.nsamples < nsamples_effective
+    logger.debug("Convergence: Q<Q_tol={}, nsamples<nsamples_effective={}"
+                 .format(Q < ci.Q_tol, ci.nsamples < nsamples_effective))
+
+    GRAD_WINDOW_LENGTH = nwalkers + 1
+    nsteps_to_check = ci.autocorr_tau * np.max([2 * GRAD_WINDOW_LENGTH, tau_int])
+    lower_tau_index = np.max([0, len(tau_list) - nsteps_to_check])
+    check_taus = np.array(tau_list[lower_tau_index :])
+    if not np.any(np.isnan(check_taus)) and check_taus.shape[0] > GRAD_WINDOW_LENGTH:
+        gradient_tau = get_max_gradient(
+            check_taus, axis=0, window_length=11)
+
+        if gradient_tau < ci.gradient_tau:
+            logger.debug(
+                "tau usable as {} < gradient_tau={}"
+                .format(gradient_tau, ci.gradient_tau)
+            )
+            tau_usable = True
+        else:
+            logger.debug(
+                "tau not usable as {} > gradient_tau={}"
+                .format(gradient_tau, ci.gradient_tau)
+            )
+            tau_usable = False
+
+        check_mean_log_posterior = mean_log_posterior[:, -nsteps_to_check:]
+        gradient_mean_log_posterior = get_max_gradient(
+            check_mean_log_posterior, axis=1, window_length=GRAD_WINDOW_LENGTH,
+            smooth=True)
+
+        if gradient_mean_log_posterior < ci.gradient_mean_log_posterior:
+            logger.debug(
+                "tau usable as {} < gradient_mean_log_posterior={}"
+                .format(gradient_mean_log_posterior, ci.gradient_mean_log_posterior)
+            )
+            tau_usable *= True
+        else:
+            logger.debug(
+                "tau not usable as {} > gradient_mean_log_posterior={}"
+                .format(gradient_mean_log_posterior, ci.gradient_mean_log_posterior)
+            )
+            tau_usable = False
+
     else:
-        max_frac = np.nan
+        logger.debug("ACT is nan")
+        gradient_tau = np.nan
+        gradient_mean_log_posterior = np.nan
         tau_usable = False
 
-    if iteration < tau_int * ci.autocorr_tol or tau_int < ci.min_tau:
+    if nsteps < tau_int * ci.autocorr_tol:
+        logger.debug("ACT less than autocorr_tol")
+        tau_usable = False
+    elif tau_int < ci.min_tau:
+        logger.debug("ACT less than min_tau")
         tau_usable = False
 
     # Print an update on the progress
@@ -663,14 +822,39 @@ def check_iteration(
         nsamples_effective,
         samples_per_check,
         tau_int,
-        max_frac,
+        gradient_tau,
+        gradient_mean_log_posterior,
         tau_usable,
         convergence_inputs,
+        Q
     )
     stop = converged and tau_usable
     return stop, nburn, thin, tau_int, nsamples_effective
 
 
+def get_max_gradient(x, axis=0, window_length=11, polyorder=2, smooth=False):
+    if smooth:
+        x = scipy.signal.savgol_filter(
+            x, axis=axis, window_length=window_length, polyorder=3)
+    return np.max(scipy.signal.savgol_filter(
+        x, axis=axis, window_length=window_length, polyorder=polyorder,
+        deriv=1))
+
+
+def get_Q_convergence(samples):
+    nwalkers, nsteps, ndim = samples.shape
+    if nsteps > 1:
+        W = np.mean(np.var(samples, axis=1), axis=0)
+        per_walker_mean = np.mean(samples, axis=1)
+        mean = np.mean(per_walker_mean, axis=0)
+        B = nsteps / (nwalkers - 1.) * np.sum((per_walker_mean - mean)**2, axis=0)
+        Vhat = (nsteps - 1) / nsteps * W + (nwalkers + 1) / (nwalkers * nsteps) * B
+        Q_per_dim = np.sqrt(Vhat / W)
+        return np.max(Q_per_dim)
+    else:
+        return np.inf
+
+
 def print_progress(
     iteration,
     sampler,
@@ -678,17 +862,19 @@ def print_progress(
     nsamples_effective,
     samples_per_check,
     tau_int,
-    max_frac,
+    gradient_tau,
+    gradient_mean_log_posterior,
     tau_usable,
     convergence_inputs,
+    Q,
 ):
     # Setup acceptance string
     acceptance = sampler.acceptance_fraction[0, :]
-    acceptance_str = "{:1.2f}->{:1.2f}".format(np.min(acceptance), np.max(acceptance))
+    acceptance_str = "{:1.2f}-{:1.2f}".format(np.min(acceptance), np.max(acceptance))
 
     # Setup tswap acceptance string
     tswap_acceptance_fraction = sampler.tswap_acceptance_fraction
-    tswap_acceptance_str = "{:1.2f}->{:1.2f}".format(
+    tswap_acceptance_str = "{:1.2f}-{:1.2f}".format(
         np.min(tswap_acceptance_fraction), np.max(tswap_acceptance_fraction)
     )
 
@@ -701,37 +887,59 @@ def print_progress(
 
     sampling_time = datetime.timedelta(seconds=np.sum(time_per_check))
 
-    if max_frac >= 0:
-        tau_str = "{}(+{:0.2f})".format(tau_int, max_frac)
-    else:
-        tau_str = "{}({:0.2f})".format(tau_int, max_frac)
+    tau_str = "{}(+{:0.2f},+{:0.2f})".format(
+        tau_int, gradient_tau, gradient_mean_log_posterior
+    )
+
     if tau_usable:
         tau_str = "={}".format(tau_str)
     else:
         tau_str = "!{}".format(tau_str)
 
+    Q_str = "{:0.2f}".format(Q)
+
     evals_per_check = sampler.nwalkers * sampler.ntemps * convergence_inputs.niterations_per_check
 
     ncalls = "{:1.1e}".format(
         convergence_inputs.niterations_per_check * iteration * sampler.nwalkers * sampler.ntemps)
     eval_timing = "{:1.2f}ms/ev".format(1e3 * ave_time_per_check / evals_per_check)
-    samp_timing = "{:1.1f}ms/sm".format(1e3 * ave_time_per_check / samples_per_check)
-
-    print(
-        "{}| {}| nc:{}| a0:{}| swp:{}| n:{}<{}| tau{}| {}| {}".format(
-            iteration,
-            str(sampling_time).split(".")[0],
-            ncalls,
-            acceptance_str,
-            tswap_acceptance_str,
-            nsamples_effective,
-            convergence_inputs.nsamples,
-            tau_str,
-            eval_timing,
-            samp_timing,
-        ),
-        flush=True,
-    )
+
+    try:
+        print(
+            "{}|{}|nc:{}|a0:{}|swp:{}|n:{}<{}|t{}|q:{}|{}".format(
+                iteration,
+                str(sampling_time).split(".")[0],
+                ncalls,
+                acceptance_str,
+                tswap_acceptance_str,
+                nsamples_effective,
+                convergence_inputs.nsamples,
+                tau_str,
+                Q_str,
+                eval_timing,
+            ),
+            flush=True,
+        )
+    except OSError as e:
+        logger.debug("Failed to print iteration due to :{}".format(e))
+
+
+def calculate_tau_array(samples, search_parameter_keys, ci):
+    """ Compute ACT tau for 0-temperature chains """
+    import emcee
+    nwalkers, nsteps, ndim = samples.shape
+    tau_array = np.zeros((nwalkers, ndim)) + np.inf
+    if nsteps > 1:
+        for ii in range(nwalkers):
+            for jj, key in enumerate(search_parameter_keys):
+                if ci.ignore_keys_for_tau and ci.ignore_keys_for_tau in key:
+                    continue
+                try:
+                    tau_array[ii, jj] = emcee.autocorr.integrated_time(
+                        samples[ii, :, jj], c=ci.autocorr_c, tol=0)[0]
+                except emcee.autocorr.AutocorrError:
+                    tau_array[ii, jj] = np.inf
+    return tau_array
 
 
 def checkpoint(
@@ -740,16 +948,19 @@ def checkpoint(
     label,
     nsamples_effective,
     sampler,
+    discard,
     nburn,
     thin,
     search_parameter_keys,
     resume_file,
     log_likelihood_array,
+    log_posterior_array,
     chain_array,
     pos0,
     beta_list,
     tau_list,
     tau_list_n,
+    Q_list,
     time_per_check,
 ):
     logger.info("Writing checkpoint and diagnostics")
@@ -758,7 +969,7 @@ def checkpoint(
     # Store the samples if possible
     if nsamples_effective > 0:
         filename = "{}/{}_samples.txt".format(outdir, label)
-        samples = np.array(chain_array)[:, nburn : iteration : thin, :].reshape(
+        samples = np.array(chain_array)[:, discard + nburn : iteration : thin, :].reshape(
             (-1, ndim)
         )
         df = pd.DataFrame(samples, columns=search_parameter_keys)
@@ -774,8 +985,10 @@ def checkpoint(
         beta_list=beta_list,
         tau_list=tau_list,
         tau_list_n=tau_list_n,
+        Q_list=Q_list,
         time_per_check=time_per_check,
         log_likelihood_array=log_likelihood_array,
+        log_posterior_array=log_posterior_array,
         chain_array=chain_array,
         pos0=pos0,
     )
@@ -786,17 +999,33 @@ def checkpoint(
     logger.info("Finished writing checkpoint")
 
 
-def plot_walkers(walkers, nburn, thin, parameter_labels, outdir, label):
+def plot_walkers(walkers, nburn, thin, parameter_labels, outdir, label,
+                 discard=0):
     """ Method to plot the trace of the walkers in an ensemble MCMC plot """
     nwalkers, nsteps, ndim = walkers.shape
+    if np.isnan(nburn):
+        nburn = nsteps
+    if np.isnan(thin):
+        thin = 1
     idxs = np.arange(nsteps)
     fig, axes = plt.subplots(nrows=ndim, ncols=2, figsize=(8, 3 * ndim))
-    scatter_kwargs = dict(lw=0, marker="o", markersize=1, alpha=0.05,)
+    scatter_kwargs = dict(lw=0, marker="o", markersize=1, alpha=0.1,)
+
+    # Plot the fixed burn-in
+    if discard > 0:
+        for i, (ax, axh) in enumerate(axes):
+            ax.plot(
+                idxs[: discard],
+                walkers[:, : discard, i].T,
+                color="gray",
+                **scatter_kwargs
+            )
+
     # Plot the burn-in
     for i, (ax, axh) in enumerate(axes):
         ax.plot(
-            idxs[: nburn + 1],
-            walkers[:, : nburn + 1, i].T,
+            idxs[discard: discard + nburn + 1],
+            walkers[:, discard: discard + nburn + 1, i].T,
             color="C1",
             **scatter_kwargs
         )
@@ -804,12 +1033,14 @@ def plot_walkers(walkers, nburn, thin, parameter_labels, outdir, label):
     # Plot the thinned posterior samples
     for i, (ax, axh) in enumerate(axes):
         ax.plot(
-            idxs[nburn::thin],
-            walkers[:, nburn::thin, i].T,
+            idxs[discard + nburn::thin],
+            walkers[:, discard + nburn::thin, i].T,
             color="C0",
             **scatter_kwargs
         )
-        axh.hist(walkers[:, nburn::thin, i].reshape((-1)), bins=50, alpha=0.8)
+        axh.hist(walkers[:, discard + nburn::thin, i].reshape((-1)), bins=50, alpha=0.8)
+
+    for i, (ax, axh) in enumerate(axes):
         axh.set_xlabel(parameter_labels[i])
         ax.set_ylabel(parameter_labels[i])
 
@@ -820,25 +1051,43 @@ def plot_walkers(walkers, nburn, thin, parameter_labels, outdir, label):
 
 
 def plot_tau(
-    tau_list_n, tau_list, search_parameter_keys, outdir, label, tau, autocorr_tau
+    tau_list_n, tau_list, search_parameter_keys, outdir, label, tau, autocorr_tau,
 ):
     fig, ax = plt.subplots()
     for i, key in enumerate(search_parameter_keys):
         ax.plot(tau_list_n, np.array(tau_list)[:, i], label=key)
-    ax.axvline(tau_list_n[-1] - tau * autocorr_tau)
     ax.set_xlabel("Iteration")
     ax.set_ylabel(r"$\langle \tau \rangle$")
     ax.legend()
+    fig.tight_layout()
     fig.savefig("{}/{}_checkpoint_tau.png".format(outdir, label))
     plt.close(fig)
 
 
-def compute_evidence(sampler, log_likelihood_array, outdir, label, nburn, thin,
+def plot_mean_log_posterior(mean_log_posterior, outdir, label):
+
+    ntemps, nsteps = mean_log_posterior.shape
+    ymax = np.max(mean_log_posterior)
+    ymin = np.min(mean_log_posterior[:, -100:])
+    ymax += 0.1 * (ymax - ymin)
+    ymin -= 0.1 * (ymax - ymin)
+
+    fig, ax = plt.subplots()
+    idxs = np.arange(nsteps)
+    ax.plot(idxs, mean_log_posterior.T)
+    ax.set(xlabel="Iteration", ylabel=r"$\langle\mathrm{log-posterior}\rangle$",
+           ylim=(ymin, ymax))
+    fig.tight_layout()
+    fig.savefig("{}/{}_checkpoint_meanlogposterior.png".format(outdir, label))
+    plt.close(fig)
+
+
+def compute_evidence(sampler, log_likelihood_array, outdir, label, discard, nburn, thin,
                      iteration, make_plots=True):
     """ Computes the evidence using thermodynamic integration """
     betas = sampler.betas
     # We compute the evidence without the burnin samples, but we do not thin
-    lnlike = log_likelihood_array[:, :, nburn : iteration]
+    lnlike = log_likelihood_array[:, :, discard + nburn : iteration]
     mean_lnlikes = np.mean(np.mean(lnlike, axis=1), axis=1)
 
     mean_lnlikes = mean_lnlikes[::-1]
@@ -911,6 +1160,29 @@ class LikePriorEvaluator(object):
     def __init__(self, search_parameter_keys, use_ratio=False):
         self.search_parameter_keys = search_parameter_keys
         self.use_ratio = use_ratio
+        self.periodic_set = False
+
+    def _setup_periodic(self):
+        self._periodic = [
+            priors[key].boundary == "periodic" for key in self.search_parameter_keys
+        ]
+        priors.sample()
+        self._minima = np.array([
+            priors[key].minimum for key in self.search_parameter_keys
+        ])
+        self._range = np.array([
+            priors[key].maximum for key in self.search_parameter_keys
+        ]) - self._minima
+        self.periodic_set = True
+
+    def _wrap_periodic(self, array):
+        if not self.periodic_set:
+            self._setup_periodic()
+        array[self._periodic] = np.mod(
+            array[self._periodic] - self._minima[self._periodic],
+            self._range[self._periodic]
+        ) + self._minima[self._periodic]
+        return array
 
     def logl(self, v_array):
         parameters = {key: v for key, v in zip(self.search_parameter_keys, v_array)}
diff --git a/bilby/core/sampler/ptmcmc.py b/bilby/core/sampler/ptmcmc.py
index 3f90711057b4bfa392c6d7d1717929289dda2757..d27698aa7278a947bf5fd46639d93527339ff271 100644
--- a/bilby/core/sampler/ptmcmc.py
+++ b/bilby/core/sampler/ptmcmc.py
@@ -1,4 +1,3 @@
-from __future__ import absolute_import, print_function
 
 import glob
 import shutil
diff --git a/bilby/core/sampler/pymc3.py b/bilby/core/sampler/pymc3.py
index 73f5f3edb13aa9715a67b11b0a3a945667dbfefe..9c44b3a621a7e88baf1266dcf7898bc9b21037ed 100644
--- a/bilby/core/sampler/pymc3.py
+++ b/bilby/core/sampler/pymc3.py
@@ -1,4 +1,3 @@
-from __future__ import absolute_import, print_function
 
 from collections import OrderedDict
 from distutils.version import StrictVersion
diff --git a/bilby/core/sampler/ultranest.py b/bilby/core/sampler/ultranest.py
index fdae9a8e9c4393b857a58a27a8f075d2bc704d12..ffeb2e267d92a9ce069a29d080359840be6d91f0 100644
--- a/bilby/core/sampler/ultranest.py
+++ b/bilby/core/sampler/ultranest.py
@@ -1,4 +1,3 @@
-from __future__ import absolute_import
 
 import datetime
 import distutils.dir_util
@@ -61,7 +60,7 @@ class Ultranest(NestedSampler):
         log_interval=None,
         dlogz=None,
         max_iters=None,
-        update_interval_iter_fraction=0.2,
+        update_interval_volume_fraction=0.2,
         viz_callback=None,
         dKL=0.5,
         frac_remain=0.01,
@@ -232,7 +231,7 @@ class Ultranest(NestedSampler):
             ]
         else:
             keys = [
-                "update_interval_iter_fraction",
+                "update_interval_volume_fraction",
                 "update_interval_ncall",
                 "log_interval",
                 "show_status",
@@ -366,6 +365,8 @@ class Ultranest(NestedSampler):
         self.result.nested_samples = nested_samples
         self.result.log_evidence = out["logz"]
         self.result.log_evidence_err = out["logzerr"]
+        if self.kwargs["num_live_points"] is not None:
+            self.result.information_gain = np.power(out["logzerr"], 2) * self.kwargs["num_live_points"]
 
         self.result.outputfiles_basename = self.outputfiles_basename
         self.result.sampling_time = datetime.timedelta(seconds=self.total_sampling_time)
diff --git a/bilby/core/utils.py b/bilby/core/utils.py
index 03380552db7c64e6c9395021be6dbc73038bd891..3b590215545dbcbe1adade632a05712b8d1f6091 100644
--- a/bilby/core/utils.py
+++ b/bilby/core/utils.py
@@ -1,4 +1,3 @@
-from __future__ import division
 
 from distutils.spawn import find_executable
 import logging
@@ -508,26 +507,6 @@ def get_version_information():
         print("No version information file '.version' found")
 
 
-def get_progress_bar(module='tqdm'):
-    """
-    TODO: Write proper docstring
-    """
-    if module in ['tqdm']:
-        try:
-            from tqdm import tqdm
-        except ImportError:
-            def tqdm(x, *args, **kwargs):
-                return x
-        return tqdm
-    elif module in ['tqdm_notebook']:
-        try:
-            from tqdm import tqdm_notebook as tqdm
-        except ImportError:
-            def tqdm(x, *args, **kwargs):
-                return x
-        return tqdm
-
-
 def spherical_to_cartesian(radius, theta, phi):
     """ Convert from spherical coordinates to cartesian.
 
diff --git a/bilby/gw/conversion.py b/bilby/gw/conversion.py
index ad1981434c0606e56260cfbac3a4bb2f1e103fba..00a6c9a4e24b669e330b60566ddea934ee49a0a8 100644
--- a/bilby/gw/conversion.py
+++ b/bilby/gw/conversion.py
@@ -1,8 +1,7 @@
-from __future__ import division
 import sys
 import multiprocessing
 
-from tqdm import tqdm
+from tqdm.auto import tqdm
 import numpy as np
 from pandas import DataFrame
 
diff --git a/bilby/gw/detector/detectors/A1.interferometer b/bilby/gw/detector/detectors/A1.interferometer
new file mode 100644
index 0000000000000000000000000000000000000000..2cfb564a3b36b4aa3e41d7e6623b8b74e98264cf
--- /dev/null
+++ b/bilby/gw/detector/detectors/A1.interferometer
@@ -0,0 +1,13 @@
+# LIGO India Aundha at Aplus sensitvity.
+# LIGO-T2000158
+# https://dcc.ligo.org/LIGO-T2000012/public
+name = 'A1'
+power_spectral_density = PowerSpectralDensity(asd_file='Aplus_asd.txt')
+minimum_frequency = 20
+maximum_frequency = 2048
+length = 4
+latitude = 19 + 36. / 60 + 47.9017 / 3600
+longitude = 77 + 1. / 60 + 51.0997 / 3600
+elevation = 440.0
+xarm_azimuth = 117.6157
+yarm_azimuth = 207.6165
diff --git a/bilby/gw/detector/strain_data.py b/bilby/gw/detector/strain_data.py
index 6ba6db039ef244e5bb3295ba8796347af868a60f..da0a397827c3892316962513cc1a1da3e9ef2d6d 100644
--- a/bilby/gw/detector/strain_data.py
+++ b/bilby/gw/detector/strain_data.py
@@ -31,7 +31,7 @@ class InterferometerStrainData(object):
     time_array = PropertyAccessor('_times_and_frequencies', 'time_array')
 
     def __init__(self, minimum_frequency=0, maximum_frequency=np.inf,
-                 roll_off=0.2):
+                 roll_off=0.2, notch_list=None):
         """ Initiate an InterferometerStrainData object
 
         The initialised object contains no data, this should be added using one
@@ -46,11 +46,14 @@ class InterferometerStrainData(object):
         roll_off: float
             The roll-off (in seconds) used in the Tukey window, default=0.2s.
             This corresponds to alpha * duration / 2 for scipy tukey window.
+        notch_list: bilby.gw.detector.strain_data.NotchList
+            A list of notches
 
         """
 
         self.minimum_frequency = minimum_frequency
         self.maximum_frequency = maximum_frequency
+        self.notch_list = notch_list
         self.roll_off = roll_off
         self.window_factor = 1
 
@@ -122,18 +125,46 @@ class InterferometerStrainData(object):
         self._maximum_frequency = maximum_frequency
         self._frequency_mask_updated = False
 
+    @property
+    def notch_list(self):
+        return self._notch_list
+
+    @notch_list.setter
+    def notch_list(self, notch_list):
+        """ Set the notch_list
+
+        Parameters
+        ----------
+        notch_list: list, bilby.gw.detector.strain_data.NotchList
+            A list of length-2 tuples of the (max, min) frequency for the
+            notches or a pre-made bilby NotchList.
+
+        """
+        if notch_list is None:
+            self._notch_list = NotchList(None)
+        elif isinstance(notch_list, list):
+            self._notch_list = NotchList(notch_list)
+        elif isinstance(notch_list, NotchList):
+            self._notch_list = notch_list
+        else:
+            raise ValueError("notch_list {} not understood".format(notch_list))
+        self._frequency_mask_updated = False
+
     @property
     def frequency_mask(self):
-        """Masking array for limiting the frequency band.
+        """ Masking array for limiting the frequency band.
 
         Returns
         -------
-        array_like: An array of boolean values
+        mask: np.ndarray
+            An array of boolean values
         """
         if not self._frequency_mask_updated:
             frequency_array = self._times_and_frequencies.frequency_array
             mask = ((frequency_array >= self.minimum_frequency) &
                     (frequency_array <= self.maximum_frequency))
+            for notch in self.notch_list:
+                mask[notch.get_idxs(frequency_array)] = False
             self._frequency_mask = mask
             self._frequency_mask_updated = True
         return self._frequency_mask
@@ -683,3 +714,104 @@ class InterferometerStrainData(object):
         strain = strain.resample(sampling_frequency)
 
         self.set_from_gwpy_timeseries(strain)
+
+
+class Notch(object):
+    def __init__(self, minimum_frequency, maximum_frequency):
+        """ A notch object storing the maximum and minimum frequency of the notch
+
+        Parameters
+        ----------
+        minimum_frequency, maximum_frequency: float
+            The minimum and maximum frequency of the notch
+
+        """
+
+        if 0 < minimum_frequency < maximum_frequency < np.inf:
+            self.minimum_frequency = minimum_frequency
+            self.maximum_frequency = maximum_frequency
+        else:
+            msg = ("Your notch minimum_frequency {} and maximum_frequency {} are invalid"
+                   .format(minimum_frequency, maximum_frequency))
+            raise ValueError(msg)
+
+    def get_idxs(self, frequency_array):
+        """ Get a boolean mask for the frequencies in frequency_array in the notch
+
+        Parameters
+        ----------
+        frequency_array: np.ndarray
+            An array of frequencies
+
+        Returns
+        -------
+        idxs: np.ndarray
+            An array of booleans which are True for frequencies in the notch
+
+        """
+        lower = (frequency_array > self.minimum_frequency)
+        upper = (frequency_array < self.maximum_frequency)
+        return lower & upper
+
+    def check_frequency(self, freq):
+        """ Check if freq is inside the notch
+
+        Parameters
+        ----------
+        freq: float
+            The frequency to check
+
+        Returns
+        -------
+        True/False:
+            If freq inside the notch, return True, else False
+        """
+
+        if self.minimum_frequency < freq < self.maximum_frequency:
+            return True
+        else:
+            return False
+
+
+class NotchList(list):
+    def __init__(self, notch_list):
+        """ A list of notches
+
+        Parameters
+        ----------
+        notch_list: list
+            A list of length-2 tuples of the (max, min) frequency for the
+            notches.
+
+        Raises
+        ------
+        ValueError
+            If the list is malformed.
+        """
+
+        if notch_list is not None:
+            for notch in notch_list:
+                if isinstance(notch, tuple) and len(notch) == 2:
+                    self.append(Notch(*notch))
+                else:
+                    msg = "notch_list {} is malformed".format(notch_list)
+                    raise ValueError(msg)
+
+    def check_frequency(self, freq):
+        """ Check if freq is inside the notch list
+
+        Parameters
+        ----------
+        freq: float
+            The frequency to check
+
+        Returns
+        -------
+        True/False:
+            If freq inside any of the notches, return True, else False
+        """
+
+        for notch in self:
+            if notch.check_frequency(freq):
+                return True
+        return False
diff --git a/bilby/gw/likelihood.py b/bilby/gw/likelihood.py
index 8a1086cca4bb37622bebeef6f2454e129b4d227a..76daadee557be754b35702e8d1ec70ade39d182f 100644
--- a/bilby/gw/likelihood.py
+++ b/bilby/gw/likelihood.py
@@ -1,4 +1,3 @@
-from __future__ import division
 
 import gc
 import os
diff --git a/bilby/gw/prior.py b/bilby/gw/prior.py
index 7d1b37da3bf3c0f35000fc699f6c48cd85aa772e..3ffcfb193df6f34ed07deb9689def2adf444bb60 100644
--- a/bilby/gw/prior.py
+++ b/bilby/gw/prior.py
@@ -4,11 +4,13 @@ import copy
 import numpy as np
 from scipy.interpolate import InterpolatedUnivariateSpline, interp1d
 from scipy.integrate import cumtrapz
+from scipy.special import hyp2f1
 from scipy.stats import norm
 
 from ..core.prior import (PriorDict, Uniform, Prior, DeltaFunction, Gaussian,
                           Interped, Constraint, conditional_prior_factory,
-                          BaseJointPriorDist, JointPrior, JointPriorDistError)
+                          BaseJointPriorDist, JointPrior, JointPriorDistError,
+                          PowerLaw)
 from ..core.utils import infer_args_from_method, logger
 from .conversion import (
     convert_to_lal_binary_black_hole_parameters,
@@ -285,6 +287,93 @@ class UniformSourceFrame(Cosmological):
         return zs, p_dz
 
 
+class UniformInComponentsChirpMass(PowerLaw):
+
+    def __init__(self, minimum, maximum, name='chirp_mass',
+                 latex_label='$\mathcal{M}$', unit=None, boundary=None):
+        """
+        Prior distribution for chirp mass which is uniform in component masses.
+
+        This is useful when chirp mass and mass ratio are sampled while the
+        prior is uniform in component masses.
+
+        Parameters
+        ----------
+        minimum : float
+            The minimum of chirp mass
+        maximum : float
+            The maximum of chirp mass
+        name: see superclass
+        latex_label: see superclass
+        unit: see superclass
+        boundary: see superclass
+        """
+        super(UniformInComponentsChirpMass, self).__init__(
+            alpha=1., minimum=minimum, maximum=maximum,
+            name=name, latex_label=latex_label, unit=unit, boundary=boundary)
+
+
+class WrappedInterp1d(interp1d):
+    """ A wrapper around scipy interp1d which sets equality-by-instantiation """
+    def __eq__(self, other):
+
+        for key in self.__dict__:
+            if type(self.__dict__[key]) is np.ndarray:
+                if not np.array_equal(self.__dict__[key], other.__dict__[key]):
+                    return False
+            elif key == "_spline":
+                pass
+            elif getattr(self, key) != getattr(other, key):
+                return False
+        return True
+
+
+class UniformInComponentsMassRatio(Prior):
+
+    def __init__(self, minimum, maximum, name='mass_ratio', latex_label='$q$',
+                 unit=None, boundary=None):
+        """
+        Prior distribution for mass ratio which is uniform in component masses.
+
+        This is useful when chirp mass and mass ratio are sampled while the
+        prior is uniform in component masses.
+
+        Parameters
+        ----------
+        minimum : float
+            The minimum of mass ratio
+        maximum : float
+            The maximum of mass ratio
+        name: see superclass
+        latex_label: see superclass
+        unit: see superclass
+        boundary: see superclass
+        """
+        super(UniformInComponentsMassRatio, self).__init__(
+            minimum=minimum, maximum=maximum, name=name,
+            latex_label=latex_label, unit=unit, boundary=boundary)
+        self.norm = self._integral(maximum) - self._integral(minimum)
+        qs = np.linspace(minimum, maximum, 1000)
+        self.icdf = WrappedInterp1d(
+            self.cdf(qs), qs, kind='cubic',
+            bounds_error=False, fill_value=(minimum, maximum))
+
+    @staticmethod
+    def _integral(q):
+        return -5. * q**(-1. / 5.) * hyp2f1(-2. / 5., -1. / 5., 4. / 5., -q)
+
+    def cdf(self, val):
+        return (self._integral(val) - self._integral(self.minimum)) / self.norm
+
+    def rescale(self, val):
+        self.test_valid_for_rescaling(val)
+        return self.icdf(val)
+
+    def prob(self, val):
+        in_prior = (val >= self.minimum) & (val <= self.maximum)
+        return (1. + val)**(2. / 5.) / (val**(6. / 5.)) / self.norm * in_prior
+
+
 class AlignedSpin(Interped):
 
     def __init__(self, a_prior=Uniform(0, 1), z_prior=Uniform(-1, 1),
diff --git a/bilby/gw/result.py b/bilby/gw/result.py
index 03f5d28d29e61dbb2ba1329ed2c53e9e35395401..80d69f654c364dca37211ac1d16530054f9676e7 100644
--- a/bilby/gw/result.py
+++ b/bilby/gw/result.py
@@ -1,4 +1,3 @@
-from __future__ import division
 
 import json
 import pickle
@@ -337,7 +336,7 @@ class CompactBinaryCoalescenceResult(CoreResult):
             interferometer.name))
 
         if n_samples is None:
-            n_samples = len(self.posterior)
+            samples = self.posterior
         elif n_samples > len(self.posterior):
             logger.debug(
                 "Requested more waveform samples ({}) than we have "
@@ -345,14 +344,16 @@ class CompactBinaryCoalescenceResult(CoreResult):
                     n_samples, len(self.posterior)
                 )
             )
-            n_samples = len(self.posterior)
+            samples = self.posterior
+        else:
+            samples = self.posterior.sample(n_samples, replace=False)
 
         if start_time is None:
             start_time = - 0.4
-        start_time = np.mean(self.posterior.geocent_time) + start_time
+        start_time = np.mean(samples.geocent_time) + start_time
         if end_time is None:
             end_time = 0.2
-        end_time = np.mean(self.posterior.geocent_time) + end_time
+        end_time = np.mean(samples.geocent_time) + end_time
         if format == "html":
             start_time = - np.inf
             end_time = np.inf
@@ -470,8 +471,8 @@ class CompactBinaryCoalescenceResult(CoreResult):
 
         fd_waveforms = list()
         td_waveforms = list()
-        for ii in range(n_samples):
-            params = dict(self.posterior.iloc[ii])
+        for _, params in samples.iterrows():
+            params = dict(params)
             wf_pols = waveform_generator.frequency_domain_strain(params)
             fd_waveform = interferometer.get_detector_response(wf_pols, params)
             fd_waveforms.append(fd_waveform[frequency_idxs])
diff --git a/bilby/gw/sampler/__init__.py b/bilby/gw/sampler/__init__.py
index 25ad2ca38576b83835854b025b36f3a73b6a1cb8..70a65589295895c739f91223e244878e12482aa9 100644
--- a/bilby/gw/sampler/__init__.py
+++ b/bilby/gw/sampler/__init__.py
@@ -1,2 +1 @@
-from __future__ import absolute_import
 from . import proposal
diff --git a/bilby/gw/source.py b/bilby/gw/source.py
index 69109364a149304926318ebd8b73fc1adf96425e..83a819a77846d58e7366a4cffa3d8ef3e81f41de 100644
--- a/bilby/gw/source.py
+++ b/bilby/gw/source.py
@@ -1,5 +1,3 @@
-from __future__ import division, print_function
-
 import numpy as np
 
 from ..core import utils
@@ -343,6 +341,15 @@ def _base_lal_cbc_fd_waveform(
     lalsim_SimInspiralWaveformParamsInsertTidalLambda2(
         waveform_dictionary, lambda_2)
 
+    for key, value in waveform_kwargs.items():
+        func = getattr(lalsim, "SimInspiralWaveformParamsInsert" + key, None)
+        if func is not None:
+            func(waveform_dictionary, value)
+
+    if waveform_kwargs.get('numerical_relativity_file', None) is not None:
+        lalsim.SimInspiralWaveformParamsInsertNumRelData(
+            waveform_dictionary, waveform_kwargs['numerical_relativity_file'])
+
     if ('mode_array' in waveform_kwargs) and waveform_kwargs['mode_array'] is not None:
         mode_array = waveform_kwargs['mode_array']
         mode_array_lal = lalsim.SimInspiralCreateModeArray()
@@ -399,11 +406,10 @@ def _base_lal_cbc_fd_waveform(
     h_cross *= frequency_bounds
 
     if wf_func == lalsim_SimInspiralFD:
-        dt = 1. / delta_frequency + (hplus.epoch.gpsSeconds + hplus.epoch.gpsNanoSeconds * 1e-9)
-        h_plus *= np.exp(
-            -1j * 2 * np.pi * dt * frequency_array)
-        h_cross *= np.exp(
-            -1j * 2 * np.pi * dt * frequency_array)
+        dt = 1 / hplus.deltaF + (hplus.epoch.gpsSeconds + hplus.epoch.gpsNanoSeconds * 1e-9)
+        time_shift = np.exp(-1j * 2 * np.pi * dt * frequency_array[frequency_bounds])
+        h_plus[frequency_bounds] *= time_shift
+        h_cross[frequency_bounds] *= time_shift
 
     return dict(plus=h_plus, cross=h_cross)
 
diff --git a/bilby/gw/utils.py b/bilby/gw/utils.py
index b89211066fce5eaf228a27ce30b62b3fcfdf5704..abdcfae40adac05190494934833285d4cbfc8f4f 100644
--- a/bilby/gw/utils.py
+++ b/bilby/gw/utils.py
@@ -1,4 +1,3 @@
-from __future__ import division
 import os
 import json
 from math import fmod
diff --git a/bilby/hyper/likelihood.py b/bilby/hyper/likelihood.py
index bc037be3cb4789c56a3fea8b79fe20d126b82539..25a1ad9abd5a442714c84fb51ef95771f6280918 100644
--- a/bilby/hyper/likelihood.py
+++ b/bilby/hyper/likelihood.py
@@ -1,4 +1,3 @@
-from __future__ import division, print_function
 
 import logging
 
diff --git a/cli_bilby/__init__.py b/cli_bilby/__init__.py
index c3961685ab8defd1a28f2a2a7bb3b74fcafa4d6b..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 100644
--- a/cli_bilby/__init__.py
+++ b/cli_bilby/__init__.py
@@ -1 +0,0 @@
-from __future__ import absolute_import
diff --git a/containers/dockerfile-template b/containers/dockerfile-template
index 46d4f54700cb9bfad245e7d1e3113b85589d73f4..eb2fa024cae7f1f93bdfa0d6ef256d6867538809 100644
--- a/containers/dockerfile-template
+++ b/containers/dockerfile-template
@@ -1,4 +1,4 @@
-FROM continuumio/miniconda3
+FROM containers.ligo.org/docker/base:conda
 LABEL name="bilby Base miniconda3" \
 maintainer="Gregory Ashton <gregory.ashton@ligo.org>"
 
diff --git a/containers/v2-dockerfile-test-suite-python35 b/containers/v2-dockerfile-test-suite-python35
index 38d4c8b2fcaebd5dc78e87cd699cc8419b69c1dc..544b562f37dc870538b71a4207139a90e8101c5d 100644
--- a/containers/v2-dockerfile-test-suite-python35
+++ b/containers/v2-dockerfile-test-suite-python35
@@ -1,6 +1,6 @@
 # This dockerfile is written automatically and should not be modified by hand.
 
-FROM continuumio/miniconda3
+FROM containers.ligo.org/docker/base:conda
 LABEL name="bilby Base miniconda3" \
 maintainer="Gregory Ashton <gregory.ashton@ligo.org>"
 
diff --git a/containers/v2-dockerfile-test-suite-python36 b/containers/v2-dockerfile-test-suite-python36
index ae75fff03aa80026a56fa48bc7682f8acb852881..d9a2782cfb22c020520375665ff1668e7c5f6aa4 100644
--- a/containers/v2-dockerfile-test-suite-python36
+++ b/containers/v2-dockerfile-test-suite-python36
@@ -1,6 +1,6 @@
 # This dockerfile is written automatically and should not be modified by hand.
 
-FROM continuumio/miniconda3
+FROM containers.ligo.org/docker/base:conda
 LABEL name="bilby Base miniconda3" \
 maintainer="Gregory Ashton <gregory.ashton@ligo.org>"
 
diff --git a/containers/v2-dockerfile-test-suite-python37 b/containers/v2-dockerfile-test-suite-python37
index 9e27fcab40ae8a7c0561a32b508449e0943925a6..b77f489227fd58720727a24d1a8989cc7b9dc36a 100644
--- a/containers/v2-dockerfile-test-suite-python37
+++ b/containers/v2-dockerfile-test-suite-python37
@@ -1,6 +1,6 @@
 # This dockerfile is written automatically and should not be modified by hand.
 
-FROM continuumio/miniconda3
+FROM containers.ligo.org/docker/base:conda
 LABEL name="bilby Base miniconda3" \
 maintainer="Gregory Ashton <gregory.ashton@ligo.org>"
 
diff --git a/containers/v2-dockerfile-test-suite-python38 b/containers/v2-dockerfile-test-suite-python38
index 690055494c8798269a17a80386fe76de313a81ad..71cf85d0197a80f01ef5e7765d3a2875e40d8a11 100644
--- a/containers/v2-dockerfile-test-suite-python38
+++ b/containers/v2-dockerfile-test-suite-python38
@@ -1,6 +1,6 @@
 # This dockerfile is written automatically and should not be modified by hand.
 
-FROM continuumio/miniconda3
+FROM containers.ligo.org/docker/base:conda
 LABEL name="bilby Base miniconda3" \
 maintainer="Gregory Ashton <gregory.ashton@ligo.org>"
 
diff --git a/examples/core_examples/gaussian_example.py b/examples/core_examples/gaussian_example.py
index d578961368bee967dc5ee51e7885745167a33d90..4555d825fd913493c6939269f714d01d81a675db 100644
--- a/examples/core_examples/gaussian_example.py
+++ b/examples/core_examples/gaussian_example.py
@@ -3,7 +3,6 @@
 An example of how to use bilby to perform paramater estimation for
 non-gravitational wave data consisting of a Gaussian with a mean and variance
 """
-from __future__ import division
 import bilby
 import numpy as np
 
diff --git a/examples/core_examples/grid_example.py b/examples/core_examples/grid_example.py
index af38eb259aab5feda4e726bbec49397cde018e8f..29df64f96acccf753d9aafd50bbc92d9a7745c1b 100644
--- a/examples/core_examples/grid_example.py
+++ b/examples/core_examples/grid_example.py
@@ -5,7 +5,6 @@ non-gravitational wave data. In this case, fitting a linear function to
 data with background Gaussian noise
 
 """
-from __future__ import division
 import bilby
 import numpy as np
 import matplotlib.pyplot as plt
diff --git a/examples/core_examples/hyper_parameter_example.py b/examples/core_examples/hyper_parameter_example.py
index fb8060e4d1c5f405c13f582cd95600560eaabf7c..0798a9faa07315d1f869d8bf8c6ff09fe0fc0e4a 100644
--- a/examples/core_examples/hyper_parameter_example.py
+++ b/examples/core_examples/hyper_parameter_example.py
@@ -2,7 +2,6 @@
 """
 An example of how to use bilby to perform parameter estimation for hyper params
 """
-from __future__ import division
 import numpy as np
 import matplotlib.pyplot as plt
 from bilby.core.likelihood import GaussianLikelihood
@@ -48,7 +47,7 @@ for i in range(Nevents):
         likelihood=likelihood, priors=priors, sampler='nestle', nlive=1000,
         outdir=outdir, verbose=False, label='individual_{}'.format(i),
         save=False, injection_parameters=injection_parameters)
-    ax2.hist(result.posterior.c0, color=line[0].get_color(), normed=True,
+    ax2.hist(result.posterior.c0, color=line[0].get_color(), density=True,
              alpha=0.5, label=labels[i])
     results.append(result)
 
@@ -62,12 +61,12 @@ ax2.legend()
 fig2.savefig('outdir/hyper_parameter_combined_posteriors.png')
 
 
-def hyper_prior(data, mu, sigma):
-    return np.exp(- (data['c0'] - mu)**2 / (2 * sigma**2)) /\
+def hyper_prior(dataset, mu, sigma):
+    return np.exp(- (dataset['c0'] - mu)**2 / (2 * sigma**2)) /\
         (2 * np.pi * sigma**2)**0.5
 
 
-def run_prior(data):
+def run_prior(dataset):
     return 1 / 20
 
 
diff --git a/examples/core_examples/linear_regression.py b/examples/core_examples/linear_regression.py
index ca0c19af540e8f5b5d895deaff6edea788fa3dc8..7bb143b0b10252014408e40850c753a4bd18cab6 100644
--- a/examples/core_examples/linear_regression.py
+++ b/examples/core_examples/linear_regression.py
@@ -5,7 +5,6 @@ non-gravitational wave data. In this case, fitting a linear function to
 data with background Gaussian noise
 
 """
-from __future__ import division
 import bilby
 import numpy as np
 import matplotlib.pyplot as plt
diff --git a/examples/core_examples/linear_regression_grid.py b/examples/core_examples/linear_regression_grid.py
index 6549c860a3cc9ddb6862a0426c153894347a5682..ec0186015384d7c5055a8c108b164f8048cfb191 100644
--- a/examples/core_examples/linear_regression_grid.py
+++ b/examples/core_examples/linear_regression_grid.py
@@ -5,7 +5,6 @@ fitting a linear function to data with background Gaussian noise.
 This will compare the output of using a stochastic sampling method
 to evaluating the posterior on a grid.
 """
-from __future__ import division
 import numpy as np
 import matplotlib.pyplot as plt
 
diff --git a/examples/core_examples/linear_regression_pymc3.py b/examples/core_examples/linear_regression_pymc3.py
index eb98be1edd7a7092dec74fd4b2eaa4dc32997bbe..0f42da66ac5668ea2862f40cb11c47c89b3c74b7 100644
--- a/examples/core_examples/linear_regression_pymc3.py
+++ b/examples/core_examples/linear_regression_pymc3.py
@@ -5,7 +5,6 @@ non-gravitational wave data. In this case, fitting a linear function to
 data with background Gaussian noise
 
 """
-from __future__ import division
 import bilby
 import numpy as np
 import matplotlib.pyplot as plt
diff --git a/examples/core_examples/linear_regression_pymc3_custom_likelihood.py b/examples/core_examples/linear_regression_pymc3_custom_likelihood.py
index 4270e696891056c9638a6fd2084b0c99fc3daf58..31c951cb98ec73e8598420de153ee252c95a96df 100644
--- a/examples/core_examples/linear_regression_pymc3_custom_likelihood.py
+++ b/examples/core_examples/linear_regression_pymc3_custom_likelihood.py
@@ -8,7 +8,6 @@ would give equivalent results as using the pre-defined 'Gaussian Likelihood'
 
 """
 
-from __future__ import division
 import bilby
 import numpy as np
 import matplotlib.pyplot as plt
diff --git a/examples/core_examples/linear_regression_unknown_noise.py b/examples/core_examples/linear_regression_unknown_noise.py
index e4427c9e0b062a880327527dbfbdf7541a962261..d276dfa96bc5f8493c832e0032ddb5af1b7adc1c 100644
--- a/examples/core_examples/linear_regression_unknown_noise.py
+++ b/examples/core_examples/linear_regression_unknown_noise.py
@@ -5,7 +5,6 @@ non-gravitational wave data. In this case, fitting a linear function to
 data with background Gaussian noise with unknown variance.
 
 """
-from __future__ import division
 import bilby
 import numpy as np
 import matplotlib.pyplot as plt
diff --git a/examples/core_examples/multidimensional_gaussian.py b/examples/core_examples/multidimensional_gaussian.py
index 451afe3e06b8c6904de63b4991a34225d850b14c..327151ef400f264babe2d474d0ab93e70d87dd8c 100644
--- a/examples/core_examples/multidimensional_gaussian.py
+++ b/examples/core_examples/multidimensional_gaussian.py
@@ -3,7 +3,6 @@
 Testing the recovery of a multi-dimensional
 Gaussian with zero mean and unit variance
 """
-from __future__ import division
 import bilby
 import numpy as np
 
@@ -33,6 +32,7 @@ class MultidimGaussianLikelihood(bilby.Likelihood):
         """
 
     def __init__(self, data, dim):
+        super().__init__()
         self.dim = dim
         self.data = np.array(data)
         self.N = len(data)
diff --git a/examples/core_examples/multivariate_gaussian_prior.py b/examples/core_examples/multivariate_gaussian_prior.py
index 53d8f94e47ac41833bd536d2f596f3a78cc76e8d..93cfbd24c867b3132977bd9a21d272d8dcb51783 100644
--- a/examples/core_examples/multivariate_gaussian_prior.py
+++ b/examples/core_examples/multivariate_gaussian_prior.py
@@ -4,7 +4,6 @@ An example of how to use bilby with a (multi-modal) multivariate
 Gaussian prior distribution.
 """
 
-from __future__ import division
 import bilby
 import numpy as np
 from scipy import linalg, stats
diff --git a/examples/core_examples/occam_factor_example.py b/examples/core_examples/occam_factor_example.py
index 319b988c0d2704b9fa9aff3d867bed6ce702ed99..f442c58d640ca392e793cf61b00e2812e3c8e138 100644
--- a/examples/core_examples/occam_factor_example.py
+++ b/examples/core_examples/occam_factor_example.py
@@ -30,7 +30,6 @@ Note - the code uses a course 100-point estimation for speed, results can be
 improved by increasing this to say 500 or 1000.
 
 """
-from __future__ import division
 import bilby
 import numpy as np
 import matplotlib.pyplot as plt
diff --git a/examples/core_examples/radioactive_decay.py b/examples/core_examples/radioactive_decay.py
index aceabd156ff1a68bdbb4bf500d0718e6167241ee..3f6b7f052b51f34cb613a37708c3daebc199b90d 100644
--- a/examples/core_examples/radioactive_decay.py
+++ b/examples/core_examples/radioactive_decay.py
@@ -4,7 +4,6 @@ An example of how to use bilby to perform paramater estimation for
 non-gravitational wave data. In this case, fitting the half-life and
 initial radionuclide number for Polonium 214.
 """
-from __future__ import division
 import bilby
 import numpy as np
 import matplotlib.pyplot as plt
diff --git a/examples/core_examples/starting_mcmc_chains_near_to_the_peak.py b/examples/core_examples/starting_mcmc_chains_near_to_the_peak.py
index 3fcbf45d32dc9f381ceb7a8961233946c63dcb1e..478c414068acde720095525b613c53b4b9d32b6f 100644
--- a/examples/core_examples/starting_mcmc_chains_near_to_the_peak.py
+++ b/examples/core_examples/starting_mcmc_chains_near_to_the_peak.py
@@ -4,7 +4,6 @@ An example of using emcee, but starting the walkers off close to the peak (or
 any other arbitrary point). This is based off the
 linear_regression_with_unknown_noise.py example.
 """
-from __future__ import division
 import bilby
 import numpy as np
 import pandas as pd
diff --git a/examples/gw_examples/data_examples/GW150914.py b/examples/gw_examples/data_examples/GW150914.py
index d0fb0631b509889acbe5b10bfc345ff20965ee90..34356e2a3d8db0a09f3d425325840fbb42ab21b1 100755
--- a/examples/gw_examples/data_examples/GW150914.py
+++ b/examples/gw_examples/data_examples/GW150914.py
@@ -9,7 +9,6 @@ the LIGO Data Grid instead.
 
 [1] https://gwpy.github.io/docs/stable/timeseries/remote-access.html
 """
-from __future__ import division, print_function
 import bilby
 from gwpy.timeseries import TimeSeries
 
diff --git a/examples/gw_examples/data_examples/GW150914_advanced.py b/examples/gw_examples/data_examples/GW150914_advanced.py
index e6f27c3aadffe46f510e52cde3e3993b02425bc6..c22568a93b196386194f5cce6151465df3abc2d4 100755
--- a/examples/gw_examples/data_examples/GW150914_advanced.py
+++ b/examples/gw_examples/data_examples/GW150914_advanced.py
@@ -10,7 +10,6 @@ LIST OF AVAILABLE EVENTS:
 
 List of events in BILBY dict: run -> help(bilby.gw.utils.get_event_time(event))
 """
-from __future__ import division, print_function
 import bilby
 from gwpy.timeseries import TimeSeries
 
diff --git a/examples/gw_examples/data_examples/GW170817.py b/examples/gw_examples/data_examples/GW170817.py
index c3e8d6c96f233f7ad575fa941b25fee6b6a02b65..4bcd2e7e5e88e50c729edc02a39b85b9978e2fa0 100644
--- a/examples/gw_examples/data_examples/GW170817.py
+++ b/examples/gw_examples/data_examples/GW170817.py
@@ -4,7 +4,6 @@ This tutorial includes advanced specifications
 for analysing binary neutron star event data.
 Here GW170817 is used as an example.
 """
-from __future__ import division, print_function
 import bilby
 
 outdir = 'outdir'
diff --git a/examples/gw_examples/data_examples/get_LOSC_event_data.py b/examples/gw_examples/data_examples/get_LOSC_event_data.py
index 9c7b314797480aa4f4f6df5b767144490a84ca19..ece59e10ed776a78350a7eb3a3ba3ad06549e816 100644
--- a/examples/gw_examples/data_examples/get_LOSC_event_data.py
+++ b/examples/gw_examples/data_examples/get_LOSC_event_data.py
@@ -7,7 +7,6 @@ $ python get_LOSC_event_data -e GW150914
 
 """
 
-from __future__ import division
 import numpy as np
 import os
 import argparse
diff --git a/examples/gw_examples/injection_examples/australian_detector.py b/examples/gw_examples/injection_examples/australian_detector.py
index 0da60ef59d47a555fea8c266079ddbb12288b215..6b903a9fc0695a279ef777e0d0fa166aa7fae140 100644
--- a/examples/gw_examples/injection_examples/australian_detector.py
+++ b/examples/gw_examples/injection_examples/australian_detector.py
@@ -5,7 +5,6 @@ Tutorial to demonstrate a new interferometer
 We place a new instrument in Gingin, with an A+ sensitivity in a network of A+
 interferometers at Hanford and Livingston
 """
-from __future__ import division, print_function
 
 import numpy as np
 
diff --git a/examples/gw_examples/injection_examples/binary_neutron_star_example.py b/examples/gw_examples/injection_examples/binary_neutron_star_example.py
index bbcce5925d88b7cfc6e227804ee3dd9327e666fc..e6aa18a9a47c9594f05a446d47d6294a31fc6032 100644
--- a/examples/gw_examples/injection_examples/binary_neutron_star_example.py
+++ b/examples/gw_examples/injection_examples/binary_neutron_star_example.py
@@ -8,7 +8,6 @@ and also estimates the tidal deformabilities using a uniform prior in both
 tidal deformabilities
 """
 
-from __future__ import division, print_function
 
 import numpy as np
 
diff --git a/examples/gw_examples/injection_examples/bns_eos_example.py b/examples/gw_examples/injection_examples/bns_eos_example.py
index b1e5e029b0d593616e472f15f98c27c9a400a798..d84a86a59e73b08fd0f6766c43bc6408918c9f52 100644
--- a/examples/gw_examples/injection_examples/bns_eos_example.py
+++ b/examples/gw_examples/injection_examples/bns_eos_example.py
@@ -8,7 +8,6 @@ and also estimates the tidal deformabilities using a uniform prior in both
 tidal deformabilities
 """
 
-from __future__ import division, print_function
 
 import numpy as np
 
diff --git a/examples/gw_examples/injection_examples/calibration_example.py b/examples/gw_examples/injection_examples/calibration_example.py
index ca58994696f4233ceb60d17bf500e144671e6c72..8b9556f48ca43a2419629cc1d4f8fdb1daa25b96 100644
--- a/examples/gw_examples/injection_examples/calibration_example.py
+++ b/examples/gw_examples/injection_examples/calibration_example.py
@@ -3,7 +3,6 @@
 Tutorial to demonstrate running parameter estimation with calibration
 uncertainties included.
 """
-from __future__ import division, print_function
 
 import numpy as np
 import bilby
diff --git a/examples/gw_examples/injection_examples/change_sampled_parameters.py b/examples/gw_examples/injection_examples/change_sampled_parameters.py
index 9f432ef52910fd125b1930ea90420e7eae0b6245..4e0266d08b6290395d2a5e8dc47bdf0dc6ca9432 100644
--- a/examples/gw_examples/injection_examples/change_sampled_parameters.py
+++ b/examples/gw_examples/injection_examples/change_sampled_parameters.py
@@ -7,7 +7,6 @@ This example estimates the masses using a uniform prior in chirp mass,
 mass ratio and redshift.
 The cosmology is according to the Planck 2015 data release.
 """
-from __future__ import division, print_function
 import bilby
 import numpy as np
 
diff --git a/examples/gw_examples/injection_examples/create_your_own_source_model.py b/examples/gw_examples/injection_examples/create_your_own_source_model.py
index ded59e157ee098bd7c765dae01e0b5b55790d408..f755363f091a12f11df425ccaea3d7d7831090c2 100644
--- a/examples/gw_examples/injection_examples/create_your_own_source_model.py
+++ b/examples/gw_examples/injection_examples/create_your_own_source_model.py
@@ -2,7 +2,6 @@
 """
 A script to demonstrate how to use your own source model
 """
-from __future__ import division, print_function
 import bilby
 import numpy as np
 
diff --git a/examples/gw_examples/injection_examples/custom_proposal_example.py b/examples/gw_examples/injection_examples/custom_proposal_example.py
index afd85dace2ba9646388d0d5e1e19800679167010..2a19f4ee6944ff7aceff31591dad3c837ce0a73c 100644
--- a/examples/gw_examples/injection_examples/custom_proposal_example.py
+++ b/examples/gw_examples/injection_examples/custom_proposal_example.py
@@ -2,7 +2,6 @@
 """
 Tutorial for running cpnest with custom jump proposals.
 """
-from __future__ import division, print_function
 
 import numpy as np
 import bilby.gw.sampler.proposal
diff --git a/examples/gw_examples/injection_examples/eccentric_inspiral.py b/examples/gw_examples/injection_examples/eccentric_inspiral.py
index 8d3053f353fb769384f25a17c3b1d406860fd88e..88c3062e49e3c4514bfadf9a3a7d46ad0331dc75 100644
--- a/examples/gw_examples/injection_examples/eccentric_inspiral.py
+++ b/examples/gw_examples/injection_examples/eccentric_inspiral.py
@@ -10,7 +10,6 @@ Lower et al. (2018) -> arXiv:1806.05350.
 For a more comprehensive look at what goes on in each step, refer to the
 "basic_tutorial.py" example.
 """
-from __future__ import division
 
 import numpy as np
 import bilby
diff --git a/examples/gw_examples/injection_examples/fake_sampler_example.py b/examples/gw_examples/injection_examples/fake_sampler_example.py
index 63c2713737b17586a9c2d296837beec21fd83b43..e1e7d600d88be737405d12dcdd21be3d5fbcba90 100755
--- a/examples/gw_examples/injection_examples/fake_sampler_example.py
+++ b/examples/gw_examples/injection_examples/fake_sampler_example.py
@@ -2,7 +2,6 @@
 """
 Read ROQ posterior and calculate full likelihood at same parameter space points.
 """
-from __future__ import division, print_function
 
 import numpy as np
 import deepdish as dd
diff --git a/examples/gw_examples/injection_examples/fast_tutorial.py b/examples/gw_examples/injection_examples/fast_tutorial.py
index 89119eb59163292257bec63dce20e71686f4dd41..0f40818cc86b2fa7d3a647ca9930e596a03ecf4b 100644
--- a/examples/gw_examples/injection_examples/fast_tutorial.py
+++ b/examples/gw_examples/injection_examples/fast_tutorial.py
@@ -7,7 +7,6 @@ This example estimates the masses using a uniform prior in both component masses
 and distance using a uniform in comoving volume prior on luminosity distance
 between luminosity distances of 100Mpc and 5Gpc, the cosmology is Planck15.
 """
-from __future__ import division, print_function
 
 import numpy as np
 import bilby
diff --git a/examples/gw_examples/injection_examples/how_to_specify_the_prior.py b/examples/gw_examples/injection_examples/how_to_specify_the_prior.py
index 68081b4d2eddabec080e48005c6fc5536bb546e1..851a3aab37cfbc1ce915af5e80e104dddf4b3a80 100644
--- a/examples/gw_examples/injection_examples/how_to_specify_the_prior.py
+++ b/examples/gw_examples/injection_examples/how_to_specify_the_prior.py
@@ -3,7 +3,6 @@
 Tutorial to demonstrate how to specify the prior distributions used for
 parameter estimation.
 """
-from __future__ import division, print_function
 
 import numpy as np
 import bilby
diff --git a/examples/gw_examples/injection_examples/marginalized_likelihood.py b/examples/gw_examples/injection_examples/marginalized_likelihood.py
index c6ab3362ca98a57a5a423c62c44f78ab54148493..c28efca13d4d9ddda064d4e6eef14c72b8086407 100644
--- a/examples/gw_examples/injection_examples/marginalized_likelihood.py
+++ b/examples/gw_examples/injection_examples/marginalized_likelihood.py
@@ -6,7 +6,6 @@ estimation on an injected signal using time, phase and distance marginalisation.
 We also demonstrate how the posterior distribution for the marginalised
 parameter can be recovered in post-processing.
 """
-from __future__ import division, print_function
 import bilby
 import numpy as np
 
diff --git a/examples/gw_examples/injection_examples/non_tensor.py b/examples/gw_examples/injection_examples/non_tensor.py
index 4b18f9648d8a98a6b9822325504090888b5d5cc1..6c44216954f92aab978cac1be612d3ebbc17b042 100644
--- a/examples/gw_examples/injection_examples/non_tensor.py
+++ b/examples/gw_examples/injection_examples/non_tensor.py
@@ -6,7 +6,6 @@ allowed in general relativity.
 We adapt the sine-Gaussian burst model to include vector polarizations with an
 unknown contribution from the vector modes.
 """
-from __future__ import division, print_function
 import bilby
 import numpy as np
 
diff --git a/examples/gw_examples/injection_examples/plot_skymap.py b/examples/gw_examples/injection_examples/plot_skymap.py
index 5d2a1ffaf22df70f40bc33b1d81c6127b0fab5a5..4c7ecb9a3e3abbaa4bab6099330b1332d5846909 100644
--- a/examples/gw_examples/injection_examples/plot_skymap.py
+++ b/examples/gw_examples/injection_examples/plot_skymap.py
@@ -3,7 +3,6 @@
 Example script which produces posterior samples of ra and dec and generates a
 skymap
 """
-from __future__ import division, print_function
 import bilby
 
 duration = 4.
diff --git a/examples/gw_examples/injection_examples/plot_time_domain_data.py b/examples/gw_examples/injection_examples/plot_time_domain_data.py
index eee3c8557bd447714bd0fe9025f838928f9cd542..4e572e8a53a330a0be47a9bfc5f2b50b6879cd0a 100644
--- a/examples/gw_examples/injection_examples/plot_time_domain_data.py
+++ b/examples/gw_examples/injection_examples/plot_time_domain_data.py
@@ -1,7 +1,6 @@
 #!/usr/bin/env python
 """
 """
-from __future__ import division, print_function
 import numpy as np
 import bilby
 
diff --git a/examples/gw_examples/injection_examples/roq_example.py b/examples/gw_examples/injection_examples/roq_example.py
index 69bff31df6e1fd6413d69de64669d53f9ee91f36..9b8b9d97a6f011f962ff083c9f894e71d6dde2f6 100644
--- a/examples/gw_examples/injection_examples/roq_example.py
+++ b/examples/gw_examples/injection_examples/roq_example.py
@@ -8,7 +8,6 @@ This requires files specifying the appropriate basis weights.
 These aren't shipped with Bilby, but are available on LDG clusters and
 from the public repository https://git.ligo.org/lscsoft/ROQ_data.
 """
-from __future__ import division, print_function
 
 import numpy as np
 
diff --git a/examples/gw_examples/injection_examples/sine_gaussian_example.py b/examples/gw_examples/injection_examples/sine_gaussian_example.py
index 07ac1ff0b2a12f1fe446e3f08e1fc85f70783c1f..d40cb68e89e37a28409c8a292dad4176117a9c20 100644
--- a/examples/gw_examples/injection_examples/sine_gaussian_example.py
+++ b/examples/gw_examples/injection_examples/sine_gaussian_example.py
@@ -3,7 +3,6 @@
 Tutorial to demonstrate running parameter estimation on a sine gaussian
 injected signal.
 """
-from __future__ import division, print_function
 import bilby
 import numpy as np
 
diff --git a/examples/gw_examples/injection_examples/standard_15d_cbc_tutorial.py b/examples/gw_examples/injection_examples/standard_15d_cbc_tutorial.py
index 822c11b1002f1bdb1021e5d0d7daef57dc16a259..7eef1b4291c18da127bdcf0494e7227baf7f763c 100644
--- a/examples/gw_examples/injection_examples/standard_15d_cbc_tutorial.py
+++ b/examples/gw_examples/injection_examples/standard_15d_cbc_tutorial.py
@@ -4,7 +4,6 @@ Tutorial to demonstrate running parameter estimation on a full 15 parameter
 space for an injected cbc signal. This is the standard injection analysis script
 one can modify for the study of injected CBC events.
 """
-from __future__ import division, print_function
 import numpy as np
 import bilby
 
diff --git a/examples/gw_examples/injection_examples/using_gwin.py b/examples/gw_examples/injection_examples/using_gwin.py
index 4d9c583ac6fa35f81b038232c566d2fbada5ad8a..de68a82886ed8f262d0f5bef5ddf98629be13b8d 100644
--- a/examples/gw_examples/injection_examples/using_gwin.py
+++ b/examples/gw_examples/injection_examples/using_gwin.py
@@ -15,7 +15,6 @@ of the model. So, in the following, we only create priors for the parameters
 to be searched over.
 
 """
-from __future__ import division, print_function
 import numpy as np
 import bilby
 
diff --git a/examples/gw_examples/supernova_example/supernova_example.py b/examples/gw_examples/supernova_example/supernova_example.py
index 507c4e4ed1c4c4907b7b9f153649d0634897c953..b5de9f40775a91f27dce75a2a3fe549d6ca4a2c8 100644
--- a/examples/gw_examples/supernova_example/supernova_example.py
+++ b/examples/gw_examples/supernova_example/supernova_example.py
@@ -7,7 +7,6 @@ supernova waveforms. The first few PCs are then linearly combined with a scale
 factor. (See https://arxiv.org/pdf/1202.3256.pdf)
 
 """
-from __future__ import division, print_function
 import numpy as np
 import bilby
 
diff --git a/requirements.txt b/requirements.txt
index bf8b8babcb5c7247c38367537e5b45f7fd479fbd..8512031dd5b0ad28e7712d55475fc557b20d712e 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,4 +1,3 @@
-future
 dynesty
 emcee
 corner
diff --git a/sampler_requirements.txt b/sampler_requirements.txt
index 06a065393d711783bbd43dbb1e3a7ccc8b8b9560..08202cc3890a9fa56ea2b84a0f786f7568d515cb 100644
--- a/sampler_requirements.txt
+++ b/sampler_requirements.txt
@@ -7,5 +7,5 @@ pymc3==3.6; python_version <= '2.7'
 pymc3>=3.6; python_version > '3.4'
 pymultinest
 kombine
-ultranest>=2.2.1
-dnest4
\ No newline at end of file
+ultranest>=3.0.0
+dnest4
diff --git a/setup.py b/setup.py
index daf2db2fc7252f02126b564695a821e93e2ee7c4..79fe30cfee90473c7a0a0bfe73ebddd838fad655 100644
--- a/setup.py
+++ b/setup.py
@@ -64,7 +64,7 @@ def readfile(filename):
     return filecontents
 
 
-VERSION = '1.0.3'
+VERSION = '1.0.4'
 version_file = write_version_file(VERSION)
 long_description = get_long_description()
 
@@ -87,7 +87,6 @@ setup(name='bilby',
                     'bilby': [version_file]},
       python_requires='>=3.5',
       install_requires=[
-          'future',
           'dynesty>=1.0.0',
           'emcee',
           'corner',
diff --git a/test/core/prior/prior_test.py b/test/core/prior/prior_test.py
index dc6ee205e8f283a6684becec44f40c5024e9bc98..2bbbc2bf1e87535ed56a8d47bff35ec57c2c9b4b 100644
--- a/test/core/prior/prior_test.py
+++ b/test/core/prior/prior_test.py
@@ -1,4 +1,3 @@
-from __future__ import absolute_import, division
 import bilby
 import unittest
 import numpy as np
diff --git a/test/core/sampler/ptemcee_test.py b/test/core/sampler/ptemcee_test.py
index 70abcb0af21e3a9a55ca486a1c0c0981a66dc2f7..08439c814878878583d191b2170a5ac36a5e751b 100644
--- a/test/core/sampler/ptemcee_test.py
+++ b/test/core/sampler/ptemcee_test.py
@@ -28,36 +28,36 @@ class TestPTEmcee(unittest.TestCase):
 
     def test_default_kwargs(self):
         expected = dict(
-            ntemps=20,
-            nwalkers=200,
+            ntemps=10,
+            nwalkers=100,
             Tmax=None,
             betas=None,
             a=2.0,
             adaptation_lag=10000,
             adaptation_time=100,
             random=None,
-            adapt=True,
+            adapt=False,
             swap_ratios=False,
         )
         self.assertDictEqual(expected, self.sampler.kwargs)
 
     def test_translate_kwargs(self):
         expected = dict(
-            ntemps=20,
-            nwalkers=200,
+            ntemps=10,
+            nwalkers=100,
             Tmax=None,
             betas=None,
             a=2.0,
             adaptation_lag=10000,
             adaptation_time=100,
             random=None,
-            adapt=True,
+            adapt=False,
             swap_ratios=False,
         )
         for equiv in bilby.core.sampler.base_sampler.MCMCSampler.nwalkers_equiv_kwargs:
             new_kwargs = self.sampler.kwargs.copy()
             del new_kwargs["nwalkers"]
-            new_kwargs[equiv] = 200
+            new_kwargs[equiv] = 100
             self.sampler.kwargs = new_kwargs
             self.assertDictEqual(expected, self.sampler.kwargs)
 
diff --git a/test/core/sampler/ultranest_test.py b/test/core/sampler/ultranest_test.py
index 76354e08281c379d28eda0abd9d18992220c4989..e70a70fdfb7785bd2457c01ee6399d022e88a91e 100644
--- a/test/core/sampler/ultranest_test.py
+++ b/test/core/sampler/ultranest_test.py
@@ -44,7 +44,7 @@ class TestUltranest(unittest.TestCase):
             log_interval=None,
             dlogz=None,
             max_iters=None,
-            update_interval_iter_fraction=0.2,
+            update_interval_volume_fraction=0.2,
             viz_callback=None,
             dKL=0.5,
             frac_remain=0.01,
@@ -79,7 +79,7 @@ class TestUltranest(unittest.TestCase):
             log_interval=None,
             dlogz=None,
             max_iters=None,
-            update_interval_iter_fraction=0.2,
+            update_interval_volume_fraction=0.2,
             viz_callback=None,
             dKL=0.5,
             frac_remain=0.01,
diff --git a/test/gw/detector/strain_data_test.py b/test/gw/detector/strain_data_test.py
index a11e347df563b96afdde5e025abc47974a5fc989..9cba741042a714e9875e2995d55bc628d7b78cfd 100644
--- a/test/gw/detector/strain_data_test.py
+++ b/test/gw/detector/strain_data_test.py
@@ -35,6 +35,49 @@ class TestInterferometerStrainData(unittest.TestCase):
                 np.array_equal(self.ifosd.frequency_mask, [False, True, False])
             )
 
+    def test_frequency_mask_2(self):
+        strain_data = bilby.gw.detector.InterferometerStrainData(
+            minimum_frequency=20, maximum_frequency=512)
+        strain_data.set_from_time_domain_strain(
+            time_domain_strain=np.random.normal(0, 1, 4096),
+            time_array=np.arange(0, 4, 4 / 4096)
+        )
+
+        # Test from init
+        freqs = strain_data.frequency_array[strain_data.frequency_mask]
+        self.assertTrue(all(freqs >= 20))
+        self.assertTrue(all(freqs <= 512))
+
+        # Test from update
+        strain_data.minimum_frequency = 30
+        strain_data.maximum_frequency = 256
+        freqs = strain_data.frequency_array[strain_data.frequency_mask]
+        self.assertTrue(all(freqs >= 30))
+        self.assertTrue(all(freqs <= 256))
+
+    def test_notches_frequency_mask(self):
+        strain_data = bilby.gw.detector.InterferometerStrainData(
+            minimum_frequency=20, maximum_frequency=512, notch_list=[(100, 101)])
+        strain_data.set_from_time_domain_strain(
+            time_domain_strain=np.random.normal(0, 1, 4096),
+            time_array=np.arange(0, 4, 4 / 4096)
+        )
+
+        # Test from init
+        freqs = strain_data.frequency_array[strain_data.frequency_mask]
+        idxs = (freqs > 100) * (freqs < 101)
+        self.assertTrue(len(freqs[idxs]) == 0)
+
+        # Test from setting
+        idxs = (freqs > 200) * (freqs < 201)
+        self.assertTrue(len(freqs[idxs]) > 0)
+        strain_data.notch_list = [(100, 101), (200, 201)]
+        freqs = strain_data.frequency_array[strain_data.frequency_mask]
+        idxs = (freqs > 200) * (freqs < 201)
+        self.assertTrue(len(freqs[idxs]) == 0)
+        idxs = (freqs > 100) * (freqs < 101)
+        self.assertTrue(len(freqs[idxs]) == 0)
+
     def test_set_data_fails(self):
         with mock.patch("bilby.core.utils.create_frequency_series") as m:
             m.return_value = [1, 2, 3]
@@ -316,5 +359,67 @@ class TestInterferometerStrainDataEquals(unittest.TestCase):
         self.assertNotEqual(self.ifosd_1, self.ifosd_2)
 
 
+class TestNotch(unittest.TestCase):
+    def setUp(self):
+        self.minimum_frequency = 20
+        self.maximum_frequency = 1024
+
+    def test_init(self):
+        notch = bilby.gw.detector.strain_data.Notch(self.minimum_frequency, self.maximum_frequency)
+        self.assertEqual(notch.minimum_frequency, self.minimum_frequency)
+        self.assertEqual(notch.maximum_frequency, self.maximum_frequency)
+
+    def test_init_fail(self):
+        # Infinite frequency
+        with self.assertRaises(ValueError):
+            bilby.gw.detector.strain_data.Notch(self.minimum_frequency, np.inf)
+
+        # Negative frequency
+        with self.assertRaises(ValueError):
+            bilby.gw.detector.strain_data.Notch(-10, 1024)
+        with self.assertRaises(ValueError):
+            bilby.gw.detector.strain_data.Notch(10, -1024)
+
+        # Ordering
+        with self.assertRaises(ValueError):
+            bilby.gw.detector.strain_data.Notch(30, 20)
+
+    def test_idxs(self):
+        notch = bilby.gw.detector.strain_data.Notch(self.minimum_frequency, self.maximum_frequency)
+        freqs = np.linspace(0, 2048, 100)
+        idxs = notch.get_idxs(freqs)
+        self.assertEqual(len(idxs), len(freqs))
+        freqs_masked = freqs[idxs]
+        self.assertTrue(all(freqs_masked > notch.minimum_frequency))
+        self.assertTrue(all(freqs_masked < notch.maximum_frequency))
+
+
+class TestNotchList(unittest.TestCase):
+
+    def test_init_single(self):
+        notch_list_of_tuples = [(32, 34)]
+        notch_list = bilby.gw.detector.strain_data.NotchList(notch_list_of_tuples)
+        self.assertEqual(len(notch_list), len(notch_list_of_tuples))
+        for notch, notch_tuple in zip(notch_list, notch_list_of_tuples):
+            self.assertEqual(notch.minimum_frequency, notch_tuple[0])
+            self.assertEqual(notch.maximum_frequency, notch_tuple[1])
+
+    def test_init_multiple(self):
+        notch_list_of_tuples = [(32, 34), (56, 59)]
+        notch_list = bilby.gw.detector.strain_data.NotchList(notch_list_of_tuples)
+        self.assertEqual(len(notch_list), len(notch_list_of_tuples))
+        for notch, notch_tuple in zip(notch_list, notch_list_of_tuples):
+            self.assertEqual(notch.minimum_frequency, notch_tuple[0])
+            self.assertEqual(notch.maximum_frequency, notch_tuple[1])
+
+    def test_init_fail(self):
+        with self.assertRaises(ValueError):
+            bilby.gw.detector.strain_data.NotchList([20, 30])
+        with self.assertRaises(ValueError):
+            bilby.gw.detector.strain_data.NotchList([(30, 20), (20)])
+        with self.assertRaises(ValueError):
+            bilby.gw.detector.strain_data.NotchList([(30, 20, 20)])
+
+
 if __name__ == "__main__":
     unittest.main()
diff --git a/test/gw/plot_test.py b/test/gw/plot_test.py
index 2a33fdec04df34257dbc76aaf07dc22f5a72d01e..e85aac75a6582834c504e66ab66aa7a90077a484 100644
--- a/test/gw/plot_test.py
+++ b/test/gw/plot_test.py
@@ -140,7 +140,7 @@ class TestCBCResult(unittest.TestCase):
             objid="test",
             instruments="H1L1",
             load_pickle=True,
-            colorbar=True,
+            colorbar=False,
         )
 
 
diff --git a/test/gw/source_test.py b/test/gw/source_test.py
index d14517acf9549d08e7d329532d47eb4475bdb85f..ec4a2929ad96638b541ae7d39ac567d2c9fe7c4d 100644
--- a/test/gw/source_test.py
+++ b/test/gw/source_test.py
@@ -79,6 +79,19 @@ class TestLalBBH(unittest.TestCase):
     #         bilby.gw.source.lal_binary_black_hole(
     #             self.frequency_array, **self.parameters), dict)
 
+    def test_lal_bbh_xpprecession_version(self):
+        self.parameters.update(self.waveform_kwargs)
+        self.parameters["waveform_approximant"] = "IMRPhenomXP"
+
+        # Test that we can modify the XP precession version
+        out_v223 = bilby.gw.source.lal_binary_black_hole(
+            self.frequency_array, PhenomXPrecVersion=223, **self.parameters
+        )
+        out_v102 = bilby.gw.source.lal_binary_black_hole(
+            self.frequency_array, PhenomXPrecVersion=102, **self.parameters
+        )
+        self.assertFalse(np.all(out_v223["plus"] == out_v102["plus"]))
+
 
 class TestLalBNS(unittest.TestCase):
     def setUp(self):