diff --git a/examples/other_examples/linear_regression_pymc3_custom_likelihood.py b/examples/other_examples/linear_regression_pymc3_custom_likelihood.py index 3b3ab8af138ead0dc1428e1737eb46ed1ea255ca..4d87b8110bf6c4490378c29b61df2e81656df6b9 100644 --- a/examples/other_examples/linear_regression_pymc3_custom_likelihood.py +++ b/examples/other_examples/linear_regression_pymc3_custom_likelihood.py @@ -99,7 +99,7 @@ class GaussianLikelihoodPyMC3(tupak.Likelihood): with sampler.pymc3_model: mdist = sampler.pymc3_priors['m'] cdist = sampler.pymc3_priors['c'] - + mu = model(time, mdist, cdist) # set the likelihood distribution diff --git a/tupak/core/result.py b/tupak/core/result.py index dc99e0946ff61aebfe98796fdbf2493475181c1a..687560869b4a2db1479ba75c9f68f899073f3ed9 100644 --- a/tupak/core/result.py +++ b/tupak/core/result.py @@ -375,21 +375,33 @@ class Result(dict): self.posterior['mass_chirp'] = ( (self.posterior.mass_1 * self.posterior.mass_2) ** 0.6 / ( self.posterior.mass_1 + self.posterior.mass_2) ** 0.2) + self.search_parameter_keys.append('mass_chirp') + self.parameter_labels.append('$\mathcal{M}$') + self.posterior['q'] = self.posterior.mass_2 / self.posterior.mass_1 + self.search_parameter_keys.append('q') + self.parameter_labels.append('$q$') + self.posterior['eta'] = ( (self.posterior.mass_1 * self.posterior.mass_2) / ( self.posterior.mass_1 + self.posterior.mass_2) ** 2) + self.search_parameter_keys.append('eta') + self.parameter_labels.append('$\eta$') self.posterior['chi_eff'] = ( - self.posterior.a_1 * np.cos(self.posterior.tilt_1) + - self.posterior.q * self.posterior.a_2 * - np.cos(self.posterior.tilt_2)) / (1 + self.posterior.q) - - self.posterior['chi_p'] = np.maximum( - self.posterior.a_1 * np.sin(self.posterior.tilt_1), - (4 * self.posterior.q + 3) / (3 * self.posterior.q + 4) * - self.posterior.q * self.posterior.a_2 * - np.sin(self.posterior.tilt_2)) + (self.posterior.a_1 * np.cos(self.posterior.tilt_1) + + self.posterior.q * self.posterior.a_2 * + np.cos(self.posterior.tilt_2)) / (1 + self.posterior.q)) + self.search_parameter_keys.append('chi_eff') + self.parameter_labels.append('$\chi_{\mathrm eff}$') + + self.posterior['chi_p'] = ( + np.maximum(self.posterior.a_1 * np.sin(self.posterior.tilt_1), + (4 * self.posterior.q + 3) / (3 * self.posterior.q + 4) * + self.posterior.q * self.posterior.a_2 * + np.sin(self.posterior.tilt_2))) + self.search_parameter_keys.append('chi_p') + self.parameter_labels.append('$\chi_{\mathrm p}$') def check_attribute_match_to_other_object(self, name, other_object): """ Check attribute name exists in other_object and is the same diff --git a/tupak/core/sampler.py b/tupak/core/sampler.py index e9700762e07618dca28051733d04f09175b9ae52..47972d75683ce5b104b9fda41207dbd02a762af1 100644 --- a/tupak/core/sampler.py +++ b/tupak/core/sampler.py @@ -597,6 +597,9 @@ class Dynesty(Sampler): self.result.log_likelihood_evaluations = out.logl self.result.log_evidence = out.logz[-1] self.result.log_evidence_err = out.logzerr[-1] + self.result.nested_samples = pd.DataFrame( + out.samples, columns=self.search_parameter_keys) + self.result.nested_samples['weights'] = weights if self.plot: self.generate_trace_plots(out) @@ -1176,6 +1179,9 @@ class Pymc3(Sampler): prior_map['Lorentzian'] = prior_map['Cauchy'] prior_map['FromFile'] = prior_map['Interped'] + # GW specific priors + prior_map['UniformComovingVolume'] = prior_map['Interped'] + # internally defined mappings for tupak priors prior_map['DeltaFunction'] = {'internal': self._deltafunction_prior} prior_map['Sine'] = {'internal': self._sine_prior} @@ -1353,22 +1359,41 @@ class Pymc3(Sampler): step_methods = {m.__name__.lower(): m.__name__ for m in STEP_METHODS} if 'step' in self.__kwargs: - step_method = self.__kwargs.pop('step').lower() + self.step_method = self.__kwargs.pop('step') - if step_method not in step_methods: - raise ValueError("Using invalid step method '{}'".format(step_method)) + # 'step' could be a dictionary of methods for different parameters, so check for this + if isinstance(self.step_method, (dict, OrderedDict)): + for key in self.step_method: + if key not in self.__search_parameter_keys: + raise ValueError("Setting a step method for an unknown parameter '{}'".format(key)) + else: + if self.step_method[key].lower() not in step_methods: + raise ValueError("Using invalid step method '{}'".format(self.step_method[key])) + else: + self.step_method = self.step_method.lower() + + if self.step_method not in step_methods: + raise ValueError("Using invalid step method '{}'".format(self.step_method)) else: - step_method = None + self.step_method = None # initialise the PyMC3 model self.pymc3_model = pymc3.Model() - # set the step method - sm = None if step_method is None else pymc3.__dict__[step_methods[step_method]]() - # set the prior self.set_prior() + # set the step method + if isinstance(self.step_method, (dict, OrderedDict)): + # create list of step methods (any not given will default to NUTS) + sm = [] + with self.pymc3_model: + for key in self.step_method: + curmethod = self.step_method[key].lower() + sm.append(pymc3.__dict__[step_methods[curmethod]]([self.pymc3_priors[key]])) + else: + sm = None if self.step_method is None else pymc3.__dict__[step_methods[self.step_method]]() + # if a custom log_likelihood function requires a `sampler` argument # then use that log_likelihood function, with the assumption that it # takes in a Pymc3 Sampler, with a pymc3_model attribute, and defines @@ -1407,7 +1432,7 @@ class Pymc3(Sampler): self.setup_prior_mapping() - self.pymc3_priors = dict() + self.pymc3_priors = OrderedDict() pymc3 = self.external_sampler @@ -1472,17 +1497,85 @@ class Pymc3(Sampler): Convert any tupak likelihoods to PyMC3 distributions. """ + try: + import theano + import theano.tensor as tt + from theano.compile.ops import as_op + except ImportError: + raise ImportError("Could not import theano") + + from tupak.core.likelihood import GaussianLikelihood, PoissonLikelihood, ExponentialLikelihood, StudentTLikelihood + from tupak.gw.likelihood import BasicGravitationalWaveTransient, GravitationalWaveTransient + + # create theano Op for the log likelihood if not using a predefined model + class LogLike(tt.Op): + + itypes = [tt.dvector] + otypes = [tt.dscalar] + + def __init__(self, parameters, loglike, priors): + self.parameters = parameters + self.likelihood = loglike + self.priors = priors + + # set the fixed parameters + for key in self.priors.keys(): + if isinstance(self.priors[key], float): + self.likelihood.parameters[key] = self.priors[key] + + self.logpgrad = LogLikeGrad(self.parameters, self.likelihood, self.priors) + + def perform(self, node, inputs, outputs): + theta, = inputs + for i, key in enumerate(self.parameters): + self.likelihood.parameters[key] = theta[i] + + outputs[0][0] = np.array(self.likelihood.log_likelihood()) + + def grad(self, inputs, g): + theta, = inputs + return [g[0]*self.logpgrad(theta)] + + # create theano Op for calculating the gradient of the log likelihood + class LogLikeGrad(tt.Op): + + itypes = [tt.dvector] + otypes = [tt.dvector] + + def __init__(self, parameters, loglike, priors): + self.parameters = parameters + self.Nparams = len(parameters) + self.likelihood = loglike + self.priors = priors + + # set the fixed parameters + for key in self.priors.keys(): + if isinstance(self.priors[key], float): + self.likelihood.parameters[key] = self.priors[key] + + def perform(self, node, inputs, outputs): + theta, = inputs + + # define version of likelihood function to pass to derivative function + def lnlike(values): + for i, key in enumerate(self.parameters): + self.likelihood.parameters[key] = values[i] + return self.likelihood.log_likelihood() + + # calculate gradients + grads = utils.derivatives(theta, lnlike, abseps=1e-5, mineps=1e-12, reltol=1e-2) + + outputs[0][0] = grads + pymc3 = self.external_sampler with self.pymc3_model: # check if it is a predefined likelhood function - if self.likelihood.__class__.__name__ == 'GaussianLikelihood': + if isinstance(self.likelihood, GaussianLikelihood): # check required attributes exist if (not hasattr(self.likelihood, 'sigma') or not hasattr(self.likelihood, 'x') or - not hasattr(self.likelihood, 'y') or - not hasattr(self.likelihood, 'function') or - not hasattr(self.likelihood, 'function_keys')): + not hasattr(self.likelihood, 'y')): raise ValueError("Gaussian Likelihood does not have all the correct attributes!") if 'sigma' in self.pymc3_priors: @@ -1496,17 +1589,15 @@ class Pymc3(Sampler): if key not in self.likelihood.function_keys: raise ValueError("Prior key '{}' is not a function key!".format(key)) - model = self.likelihood.function(self.likelihood.x, **self.pymc3_priors) + model = self.likelihood.func(self.likelihood.x, **self.pymc3_priors) # set the distribution pymc3.Normal('likelihood', mu=model, sd=self.likelihood.sigma, observed=self.likelihood.y) - elif self.likelihood.__class__.__name__ == 'PoissonLikelihood': + elif isinstance(self.likelihood, PoissonLikelihood): # check required attributes exist if (not hasattr(self.likelihood, 'x') or - not hasattr(self.likelihood, 'y') or - not hasattr(self.likelihood, 'function') or - not hasattr(self.likelihood, 'function_keys')): + not hasattr(self.likelihood, 'y')): raise ValueError("Poisson Likelihood does not have all the correct attributes!") for key in self.pymc3_priors: @@ -1514,16 +1605,14 @@ class Pymc3(Sampler): raise ValueError("Prior key '{}' is not a function key!".format(key)) # get rate function - model = self.likelihood.function(self.likelihood.x, **self.pymc3_priors) + model = self.likelihood.func(self.likelihood.x, **self.pymc3_priors) # set the distribution pymc3.Poisson('likelihood', mu=model, observed=self.likelihood.y) - elif self.likelihood.__class__.__name__ == 'ExponentialLikelihood': + elif isinstance(self.likelihood, ExponentialLikelihood): # check required attributes exist if (not hasattr(self.likelihood, 'x') or - not hasattr(self.likelihood, 'y') or - not hasattr(self.likelihood, 'function') or - not hasattr(self.likelihood, 'function_keys')): + not hasattr(self.likelihood, 'y')): raise ValueError("Exponential Likelihood does not have all the correct attributes!") for key in self.pymc3_priors: @@ -1531,18 +1620,16 @@ class Pymc3(Sampler): raise ValueError("Prior key '{}' is not a function key!".format(key)) # get mean function - model = self.likelihood.function(self.likelihood.x, **self.pymc3_priors) + model = self.likelihood.func(self.likelihood.x, **self.pymc3_priors) # set the distribution pymc3.Exponential('likelihood', lam=1. / model, observed=self.likelihood.y) - elif self.likelihood.__class__.__name__ == 'StudentTLikelihood': + elif isinstance(self.likelihood, StudentTLikelihood): # check required attributes exist if (not hasattr(self.likelihood, 'x') or not hasattr(self.likelihood, 'y') or not hasattr(self.likelihood, 'nu') or - not hasattr(self.likelihood, 'sigma') or - not hasattr(self.likelihood, 'function') or - not hasattr(self.likelihood, 'function_keys')): + not hasattr(self.likelihood, 'sigma')): raise ValueError("StudentT Likelihood does not have all the correct attributes!") if 'nu' in self.pymc3_priors: @@ -1556,10 +1643,25 @@ class Pymc3(Sampler): if key not in self.likelihood.function_keys: raise ValueError("Prior key '{}' is not a function key!".format(key)) - model = self.likelihood.function(self.likelihood.x, **self.pymc3_priors) + model = self.likelihood.func(self.likelihood.x, **self.pymc3_priors) # set the distribution pymc3.StudentT('likelihood', nu=self.likelihood.nu, mu=model, sd=self.likelihood.sigma, observed=self.likelihood.y) + elif isinstance(self.likelihood, (GravitationalWaveTransient, BasicGravitationalWaveTransient)): + # set theano Op - pass __search_parameter_keys, which only contains non-fixed variables + logl = LogLike(self.__search_parameter_keys, self.likelihood, self.pymc3_priors) + + parameters = OrderedDict() + for key in self.__search_parameter_keys: + try: + parameters[key] = self.pymc3_priors[key] + except KeyError: + raise KeyError("Unknown key '{}' when setting GravitationalWaveTransient likelihood".format(key)) + + # convert to theano tensor variable + values = tt.as_tensor_variable(list(parameters.values())) + + pymc3.DensityDist('likelihood', lambda v: logl(v), observed={'v': values}) else: raise ValueError("Unknown likelihood has been provided") diff --git a/tupak/core/utils.py b/tupak/core/utils.py index 3b7fcd611be714155aa51a39824419adcf17c9a0..71ae1c4894233cb17eacd4e08268a1d242b6e157 100644 --- a/tupak/core/utils.py +++ b/tupak/core/utils.py @@ -450,6 +450,141 @@ def set_up_command_line_arguments(): return args +def derivatives(vals, func, releps=1e-3, abseps=None, mineps=1e-9, reltol=1e-3, + epsscale=0.5, nonfixedidx=None): + """ + Calculate the partial derivatives of a function at a set of values. The + derivatives are calculated using the central difference, using an iterative + method to check that the values converge as step size decreases. + + Parameters + ---------- + vals: array_like + A set of values, that are passed to a function, at which to calculate + the gradient of that function + func: + A function that takes in an array of values. + releps: float, array_like, 1e-3 + The initial relative step size for calculating the derivative. + abseps: float, array_like, None + The initial absolute step size for calculating the derivative. + This overrides `releps` if set. + `releps` is set then that is used. + mineps: float, 1e-9 + The minimum relative step size at which to stop iterations if no + convergence is achieved. + epsscale: float, 0.5 + The factor by which releps if scaled in each iteration. + nonfixedidx: array_like, None + An array of indices in `vals` that are _not_ fixed values and therefore + can have derivatives taken. If `None` then derivatives of all values + are calculated. + + Returns + ------- + grads: array_like + An array of gradients for each non-fixed value. + """ + + if nonfixedidx is None: + nonfixedidx = range(len(vals)) + + if len(nonfixedidx) > len(vals): + raise ValueError("To many non-fixed values") + + if max(nonfixedidx) >= len(vals) or min(nonfixedidx) < 0: + raise ValueError("Non-fixed indexes contain non-existant indices") + + grads = np.zeros(len(nonfixedidx)) + + # maximum number of times the gradient can change sign + flipflopmax = 10. + + # set steps + if abseps is None: + if isinstance(releps, float): + eps = np.abs(vals)*releps + eps[eps == 0.] = releps # if any values are zero set eps to releps + teps = releps*np.ones(len(vals)) + elif isinstance(releps, (list, np.ndarray)): + if len(releps) != len(vals): + raise ValueError("Problem with input relative step sizes") + eps = np.multiply(np.abs(vals), releps) + eps[eps == 0.] = np.array(releps)[eps == 0.] + teps = releps + else: + raise RuntimeError("Relative step sizes are not a recognised type!") + else: + if isinstance(abseps, float): + eps = abseps*np.ones(len(vals)) + elif isinstance(abseps, (list, np.ndarray)): + if len(abseps) != len(vals): + raise ValueError("Problem with input absolute step sizes") + eps = np.array(abseps) + else: + raise RuntimeError("Absolute step sizes are not a recognised type!") + teps = eps + + # for each value in vals calculate the gradient + count = 0 + for i in nonfixedidx: + # initial parameter diffs + leps = eps[i] + cureps = teps[i] + + flipflop = 0 + + # get central finite difference + fvals = np.copy(vals) + bvals = np.copy(vals) + + # central difference + fvals[i] += 0.5*leps # change forwards distance to half eps + bvals[i] -= 0.5*leps # change backwards distance to half eps + cdiff = (func(fvals)-func(bvals))/leps + + while 1: + fvals[i] -= 0.5*leps # remove old step + bvals[i] += 0.5*leps + + # change the difference by a factor of two + cureps *= epsscale + if cureps < mineps or flipflop > flipflopmax: + # if no convergence set flat derivative (TODO: check if there is a better thing to do instead) + logger.warning("Derivative calculation did not converge: setting flat derivative.") + grads[count] = 0. + break + leps *= epsscale + + # central difference + fvals[i] += 0.5*leps # change forwards distance to half eps + bvals[i] -= 0.5*leps # change backwards distance to half eps + cdiffnew = (func(fvals)-func(bvals))/leps + + if cdiffnew == cdiff: + grads[count] = cdiff + break + + # check whether previous diff and current diff are the same within reltol + rat = (cdiff/cdiffnew) + if np.isfinite(rat) and rat > 0.: + # gradient has not changed sign + if np.abs(1.-rat) < reltol: + grads[count] = cdiffnew + break + else: + cdiff = cdiffnew + continue + else: + cdiff = cdiffnew + flipflop += 1 + continue + + count += 1 + + return grads + + command_line_args = set_up_command_line_arguments() setup_logger(print_version=True)