diff --git a/tupak/result.py b/tupak/result.py index 60f6165079755848eedb8d13b209931b2cfe0a13..649727fd8771d5485fbf8ebac937bf92e1d94fb8 100644 --- a/tupak/result.py +++ b/tupak/result.py @@ -12,7 +12,25 @@ except ImportError: "You do not have the optional module chainconsumer installed") +def result_file_name(outdir, label): + """ Returns the standard filename used for a result file """ + return '{}/{}_result.h5'.format(outdir, label) + + +def read_in_result(outdir, label): + """ Read in a saved .h5 data file """ + filename = result_file_name(outdir, label) + if os.path.isfile(filename): + return Result(deepdish.io.load(filename)) + else: + return None + + class Result(dict): + def __init__(self, dictionary=None): + if type(dictionary) is dict: + for key in dictionary: + setattr(self, key, dictionary[key]) def __getattr__(self, name): try: @@ -25,15 +43,19 @@ class Result(dict): def __repr__(self): """Print a summary """ - return ("nsamples: {:d}\n" - "noise_logz: {:6.3f}\n" - "logz: {:6.3f} +/- {:6.3f}\n" - "log_bayes_factor: {:6.3f} +/- {:6.3f}\n" - .format(len(self.samples), self.noise_logz, self.logz, self.logzerr, self.log_bayes_factor, - self.logzerr)) + if hasattr(self, 'samples'): + return ("nsamples: {:d}\n" + "noise_logz: {:6.3f}\n" + "logz: {:6.3f} +/- {:6.3f}\n" + "log_bayes_factor: {:6.3f} +/- {:6.3f}\n" + .format(len(self.samples), self.noise_logz, self.logz, + self.logzerr, self.log_bayes_factor, self.logzerr)) + else: + return '' def save_to_file(self, outdir, label): - file_name = '{}/{}_result.h5'.format(outdir, label) + """ Writes the Result to a deepdish h5 file """ + file_name = result_file_name(outdir, label) if os.path.isdir(outdir) is False: os.makedirs(outdir) if os.path.isfile(file_name): @@ -194,3 +216,19 @@ class Result(dict): self.posterior['chi_p'] = max(self.posterior.a_1 * np.sin(self.posterior.tilt_1), (4 * self.posterior.q + 3) / (3 * self.posterior.q + 4) * self.posterior.q * self.posterior.a_2 * np.sin(self.posterior.tilt_2)) + + def check_attribute_match_to_other_object(self, name, other_object): + """ Check attribute name exists in other_object and is the same """ + A = getattr(self, name, False) + B = getattr(other_object, name, False) + logging.debug('Checking {} value: {}=={}'.format(name, A, B)) + if (A is not False) and (B is not False): + typeA = type(A) + typeB = type(B) + if typeA == typeB: + if typeA in [str, float, int, dict, list]: + return A == B + elif typeA in [np.ndarray]: + return np.all(A == B) + return False + diff --git a/tupak/sampler.py b/tupak/sampler.py index 970aa150fb6b03fb67fe78091b7154afe613c108..c9f028c86c275fcacfb529520f0c5d05b35316b5 100644 --- a/tupak/sampler.py +++ b/tupak/sampler.py @@ -7,7 +7,7 @@ import sys import numpy as np import matplotlib.pyplot as plt -from .result import Result +from .result import Result, read_in_result from .prior import Prior, fill_priors from . import utils from . import prior @@ -54,6 +54,7 @@ class Sampler(object): self.kwargs = kwargs self.result = result + self.check_cached_result() self.log_summary_for_sampler() @@ -188,9 +189,26 @@ class Sampler(object): def run_sampler(self): pass + def check_cached_result(self): + """ Check if the cached data file exists and can be used """ + logging.debug("Checking cached data") + self.cached_result = read_in_result(self.outdir, self.label) + if self.cached_result: + check_keys = ['search_parameter_keys', 'fixed_parameter_keys', + 'kwargs'] + use_cache = True + for key in check_keys: + if self.cached_result.check_attribute_match_to_other_object( + key, self) is False: + logging.debug("Cached value {} is unmatched".format(key)) + use_cache = False + if use_cache is False: + self.cached_result = None + def log_summary_for_sampler(self): - logging.info("Using sampler {} with kwargs {}".format( - self.__class__.__name__, self.kwargs)) + if self.cached_result is False: + logging.info("Using sampler {} with kwargs {}".format( + self.__class__.__name__, self.kwargs)) class Nestle(Sampler): @@ -367,7 +385,7 @@ class Ptemcee(Sampler): def run_sampler(likelihood, priors=None, label='label', outdir='outdir', sampler='nestle', use_ratio=True, injection_parameters=None, - **sampler_kwargs): + **kwargs): """ The primary interface to easy parameter estimation @@ -392,7 +410,7 @@ def run_sampler(likelihood, priors=None, label='label', outdir='outdir', injection_parameters: dict A dictionary of injection parameters used in creating the data (if using simulated data). Appended to the result object and saved. - **sampler_kwargs: + **kwargs: All kwargs are passed directly to the samplers `run` functino Returns @@ -413,7 +431,11 @@ def run_sampler(likelihood, priors=None, label='label', outdir='outdir', sampler_class = globals()[sampler.title()] sampler = sampler_class(likelihood, priors, sampler, outdir=outdir, label=label, use_ratio=use_ratio, - **sampler_kwargs) + **kwargs) + if sampler.cached_result: + logging.info("Using cached result") + return sampler.cached_result + result = sampler.run_sampler() result.noise_logz = likelihood.noise_log_likelihood() if use_ratio: @@ -423,7 +445,8 @@ def run_sampler(likelihood, priors=None, label='label', outdir='outdir', result.log_bayes_factor = result.logz - result.noise_logz result.injection_parameters = injection_parameters result.fixed_parameter_keys = [key for key in priors if isinstance(key, prior.DeltaFunction)] - # result.prior = prior # Removed as this breaks the saving of the data + result.priors = priors + result.kwargs = sampler.kwargs result.samples_to_data_frame() result.save_to_file(outdir=outdir, label=label) return result