Merge branch 'adding-cached-data-check' into 'master'

Adding cached data check Closes #64 See merge request Monash/tupak!39

Merge branch 'adding-cached-data-check' into 'master'
532555f5 · Gregory Ashton · 9420ae5d · e411b206 · 532555f5 · 532555f5
Commit 532555f5 authored 6 years ago by Gregory Ashton
--- a/tupak/result.py
+++ b/tupak/result.py
@@ -12,7 +12,25 @@ except ImportError:
            "You do not have the optional module chainconsumer installed")


+def result_file_name(outdir, label):
+    """ Returns the standard filename used for a result file """
+    return '{}/{}_result.h5'.format(outdir, label)
+
+
+def read_in_result(outdir, label):
+    """ Read in a saved .h5 data file """
+    filename = result_file_name(outdir, label)
+    if os.path.isfile(filename):
+        return Result(deepdish.io.load(filename))
+    else:
+        return None
+
+
 class Result(dict):
+    def __init__(self, dictionary=None):
+        if type(dictionary) is dict:
+            for key in dictionary:
+                setattr(self, key, dictionary[key])

    def __getattr__(self, name):
        try:
@@ -25,15 +43,19 @@ class Result(dict):

    def __repr__(self):
        """Print a summary """
-        return ("nsamples: {:d}\n"
-                "noise_logz: {:6.3f}\n"
-                "logz: {:6.3f} +/- {:6.3f}\n"
-                "log_bayes_factor: {:6.3f} +/- {:6.3f}\n"
-                .format(len(self.samples), self.noise_logz, self.logz, self.logzerr, self.log_bayes_factor,
-                        self.logzerr))
+        if hasattr(self, 'samples'):
+            return ("nsamples: {:d}\n"
+                    "noise_logz: {:6.3f}\n"
+                    "logz: {:6.3f} +/- {:6.3f}\n"
+                    "log_bayes_factor: {:6.3f} +/- {:6.3f}\n"
+                    .format(len(self.samples), self.noise_logz, self.logz,
+                            self.logzerr, self.log_bayes_factor, self.logzerr))
+        else:
+            return ''

    def save_to_file(self, outdir, label):
-        file_name = '{}/{}_result.h5'.format(outdir, label)
+        """ Writes the Result to a deepdish h5 file """
+        file_name = result_file_name(outdir, label)
        if os.path.isdir(outdir) is False:
            os.makedirs(outdir)
        if os.path.isfile(file_name):
@@ -194,3 +216,19 @@ class Result(dict):
        self.posterior['chi_p'] = max(self.posterior.a_1 * np.sin(self.posterior.tilt_1),
                                      (4 * self.posterior.q + 3) / (3 * self.posterior.q + 4) * self.posterior.q
                                      * self.posterior.a_2 * np.sin(self.posterior.tilt_2))
+
+    def check_attribute_match_to_other_object(self, name, other_object):
+        """ Check attribute name exists in other_object and is the same """
+        A = getattr(self, name, False)
+        B = getattr(other_object, name, False)
+        logging.debug('Checking {} value: {}=={}'.format(name, A, B))
+        if (A is not False) and (B is not False):
+            typeA = type(A)
+            typeB = type(B)
+            if typeA == typeB:
+                if typeA in [str, float, int, dict, list]:
+                    return A == B
+                elif typeA in [np.ndarray]:
+                    return np.all(A == B)
+        return False
+
--- a/tupak/sampler.py
+++ b/tupak/sampler.py
@@ -7,7 +7,7 @@ import sys
 import numpy as np
 import matplotlib.pyplot as plt

-from .result import Result
+from .result import Result, read_in_result
 from .prior import Prior, fill_priors
 from . import utils
 from . import prior
@@ -54,6 +54,7 @@ class Sampler(object):
        self.kwargs = kwargs

        self.result = result
+        self.check_cached_result()

        self.log_summary_for_sampler()

@@ -188,9 +189,26 @@ class Sampler(object):
    def run_sampler(self):
        pass

+    def check_cached_result(self):
+        """ Check if the cached data file exists and can be used """
+        logging.debug("Checking cached data")
+        self.cached_result = read_in_result(self.outdir, self.label)
+        if self.cached_result:
+            check_keys = ['search_parameter_keys', 'fixed_parameter_keys',
+                          'kwargs']
+            use_cache = True
+            for key in check_keys:
+                if self.cached_result.check_attribute_match_to_other_object(
+                        key, self) is False:
+                    logging.debug("Cached value {} is unmatched".format(key))
+                    use_cache = False
+            if use_cache is False:
+                self.cached_result = None
+
    def log_summary_for_sampler(self):
-        logging.info("Using sampler {} with kwargs {}".format(
-            self.__class__.__name__, self.kwargs))
+        if self.cached_result is False:
+            logging.info("Using sampler {} with kwargs {}".format(
+                self.__class__.__name__, self.kwargs))


 class Nestle(Sampler):
@@ -367,7 +385,7 @@ class Ptemcee(Sampler):

 def run_sampler(likelihood, priors=None, label='label', outdir='outdir',
                sampler='nestle', use_ratio=True, injection_parameters=None,
-                **sampler_kwargs):
+                **kwargs):
    """
    The primary interface to easy parameter estimation

@@ -392,7 +410,7 @@ def run_sampler(likelihood, priors=None, label='label', outdir='outdir',
    injection_parameters: dict
        A dictionary of injection parameters used in creating the data (if
        using simulated data). Appended to the result object and saved.
-    **sampler_kwargs:
+    **kwargs:
        All kwargs are passed directly to the samplers `run` functino

    Returns
@@ -413,7 +431,11 @@ def run_sampler(likelihood, priors=None, label='label', outdir='outdir',
        sampler_class = globals()[sampler.title()]
        sampler = sampler_class(likelihood, priors, sampler, outdir=outdir,
                                label=label, use_ratio=use_ratio,
-                                **sampler_kwargs)
+                                **kwargs)
+        if sampler.cached_result:
+            logging.info("Using cached result")
+            return sampler.cached_result
+
        result = sampler.run_sampler()
        result.noise_logz = likelihood.noise_log_likelihood()
        if use_ratio:
@@ -423,7 +445,8 @@ def run_sampler(likelihood, priors=None, label='label', outdir='outdir',
            result.log_bayes_factor = result.logz - result.noise_logz
        result.injection_parameters = injection_parameters
        result.fixed_parameter_keys = [key for key in priors if isinstance(key, prior.DeltaFunction)]
-        # result.prior = prior  # Removed as this breaks the saving of the data
+        result.priors = priors
+        result.kwargs = sampler.kwargs
        result.samples_to_data_frame()
        result.save_to_file(outdir=outdir, label=label)
        return result