From 353f87e87b1a501239a58aab1d5ba774e7361a74 Mon Sep 17 00:00:00 2001 From: Liting Xiao <liting.xiao@ligo.org> Date: Thu, 24 Oct 2019 18:59:25 -0500 Subject: [PATCH] Speed up core.prior classes --- bilby/core/prior.py | 281 ++++++++++++++++++++++++++------------------ test/prior_test.py | 80 +++++++++++++ 2 files changed, 245 insertions(+), 116 deletions(-) diff --git a/bilby/core/prior.py b/bilby/core/prior.py index 15ece0185..cd1fa31ba 100644 --- a/bilby/core/prior.py +++ b/bilby/core/prior.py @@ -12,7 +12,8 @@ import numpy as np import scipy.stats from scipy.integrate import cumtrapz from scipy.interpolate import interp1d -from scipy.special import erf, erfinv +from scipy.special import erf, erfinv, xlogy, log1p,\ + gammaln, gammainc, gammaincinv, stdtr, stdtrit, betaln, btdtr, btdtri from matplotlib.cbook import flatten # Keep import bilby statement, it is necessary for some eval() statements @@ -1155,10 +1156,7 @@ class Uniform(Prior): ------- float: log probability of val """ - with np.errstate(divide='ignore'): - _ln_prob = np.log((val >= self.minimum) & (val <= self.maximum), dtype=np.float64)\ - - np.log(self.maximum - self.minimum) - return _ln_prob + return xlogy(1, (val >= self.minimum) & (val <= self.maximum)) - xlogy(1, self.maximum - self.minimum) def cdf(self, val): _cdf = (val - self.minimum) / (self.maximum - self.minimum) @@ -1600,7 +1598,7 @@ class LogNormal(Prior): This maps to the inverse CDF. This has been analytically solved for this case. """ self.test_valid_for_rescaling(val) - return scipy.stats.lognorm.ppf(val, self.sigma, scale=np.exp(self.mu)) + return np.exp(self.mu + np.sqrt(2 * self.sigma ** 2) * erfinv(2 * val - 1)) def prob(self, val): """Returns the prior probability of val. @@ -1613,8 +1611,18 @@ class LogNormal(Prior): ------- Union[float, array_like]: Prior probability of val """ - - return scipy.stats.lognorm.pdf(val, self.sigma, scale=np.exp(self.mu)) + if isinstance(val, (float, int)): + if val <= self.minimum: + _prob = 0. + else: + _prob = np.exp(-(np.log(val) - self.mu) ** 2 / self.sigma ** 2 / 2)\ + / np.sqrt(2 * np.pi) / val / self.sigma + else: + _prob = np.zeros(len(val)) + idx = (val > self.minimum) + _prob[idx] = np.exp(-(np.log(val[idx]) - self.mu) ** 2 / self.sigma ** 2 / 2)\ + / np.sqrt(2 * np.pi) / val[idx] / self.sigma + return _prob def ln_prob(self, val): """Returns the log prior probability of val. @@ -1627,11 +1635,30 @@ class LogNormal(Prior): ------- Union[float, array_like]: Prior probability of val """ - - return scipy.stats.lognorm.logpdf(val, self.sigma, scale=np.exp(self.mu)) + if isinstance(val, (float, int)): + if val <= self.minimum: + _ln_prob = -np.inf + else: + _ln_prob = -(np.log(val) - self.mu) ** 2 / self.sigma ** 2 / 2\ + - np.log(np.sqrt(2 * np.pi) * val * self.sigma) + else: + _ln_prob = -np.inf * np.ones(len(val)) + idx = (val > self.minimum) + _ln_prob[idx] = -(np.log(val[idx]) - self.mu) ** 2\ + / self.sigma ** 2 / 2 - np.log(np.sqrt(2 * np.pi) * val[idx] * self.sigma) + return _ln_prob def cdf(self, val): - return scipy.stats.lognorm.cdf(val, self.sigma, scale=np.exp(self.mu)) + if isinstance(val, (float, int)): + if val <= self.minimum: + _cdf = 0. + else: + _cdf = 0.5 + erf((np.log(val) - self.mu) / self.sigma / np.sqrt(2)) / 2 + else: + _cdf = np.zeros(len(val)) + _cdf[val > self.minimum] = 0.5 + erf(( + np.log(val[val > self.minimum]) - self.mu) / self.sigma / np.sqrt(2)) / 2 + return _cdf class LogGaussian(LogNormal): @@ -1666,7 +1693,7 @@ class Exponential(Prior): This maps to the inverse CDF. This has been analytically solved for this case. """ self.test_valid_for_rescaling(val) - return scipy.stats.expon.ppf(val, scale=self.mu) + return -self.mu * log1p(-val) def prob(self, val): """Return the prior probability of val. @@ -1679,8 +1706,15 @@ class Exponential(Prior): ------- Union[float, array_like]: Prior probability of val """ - - return scipy.stats.expon.pdf(val, scale=self.mu) + if isinstance(val, (float, int)): + if val < self.minimum: + _prob = 0. + else: + _prob = np.exp(-val / self.mu) / self.mu + else: + _prob = np.zeros(len(val)) + _prob[val >= self.minimum] = np.exp(-val[val >= self.minimum] / self.mu) / self.mu + return _prob def ln_prob(self, val): """Returns the log prior probability of val. @@ -1693,11 +1727,26 @@ class Exponential(Prior): ------- Union[float, array_like]: Prior probability of val """ - - return scipy.stats.expon.logpdf(val, scale=self.mu) + if isinstance(val, (float, int)): + if val < self.minimum: + _ln_prob = -np.inf + else: + _ln_prob = -val / self.mu - np.log(self.mu) + else: + _ln_prob = -np.inf * np.ones(len(val)) + _ln_prob[val >= self.minimum] = -val[val >= self.minimum] / self.mu - np.log(self.mu) + return _ln_prob def cdf(self, val): - return scipy.stats.expon.cdf(val, scale=self.mu) + if isinstance(val, (float, int)): + if val < self.minimum: + _cdf = 0. + else: + _cdf = 1. - np.exp(-val / self.mu) + else: + _cdf = np.zeros(len(val)) + _cdf[val >= self.minimum] = 1. - np.exp(-val[val >= self.minimum] / self.mu) + return _cdf class StudentT(Prior): @@ -1741,9 +1790,18 @@ class StudentT(Prior): This maps to the inverse CDF. This has been analytically solved for this case. """ self.test_valid_for_rescaling(val) - - # use scipy distribution percentage point function (ppf) - return scipy.stats.t.ppf(val, self.df, loc=self.mu, scale=self.scale) + if isinstance(val, (float, int)): + if val == 0: + rescaled = -np.inf + elif val == 1: + rescaled = np.inf + else: + rescaled = stdtrit(self.df, val) * self.scale + self.mu + else: + rescaled = stdtrit(self.df, val) * self.scale + self.mu + rescaled[val == 0] = -np.inf + rescaled[val == 1] = np.inf + return rescaled def prob(self, val): """Return the prior probability of val. @@ -1756,7 +1814,7 @@ class StudentT(Prior): ------- Union[float, array_like]: Prior probability of val """ - return scipy.stats.t.pdf(val, self.df, loc=self.mu, scale=self.scale) + return np.exp(self.ln_prob(val)) def ln_prob(self, val): """Returns the log prior probability of val. @@ -1769,11 +1827,12 @@ class StudentT(Prior): ------- Union[float, array_like]: Prior probability of val """ - - return scipy.stats.t.logpdf(val, self.df, loc=self.mu, scale=self.scale) + return gammaln(0.5 * (self.df + 1)) - gammaln(0.5 * self.df)\ + - np.log(np.sqrt(np.pi * self.df) * self.scale) - (self.df + 1) / 2 *\ + np.log(1 + ((val - self.mu) / self.scale) ** 2 / self.df) def cdf(self, val): - return scipy.stats.t.cdf(val, self.df, loc=self.mu, scale=self.scale) + return stdtr(self.df, (val - self.mu) / self.scale) class Beta(Prior): @@ -1805,16 +1864,14 @@ class Beta(Prior): boundary: str See superclass """ + super(Beta, self).__init__(minimum=minimum, maximum=maximum, name=name, + latex_label=latex_label, unit=unit, boundary=boundary) + if alpha <= 0. or beta <= 0.: raise ValueError("alpha and beta must both be positive values") - self._alpha = alpha - self._beta = beta - self._minimum = minimum - self._maximum = maximum - super(Beta, self).__init__(minimum=minimum, maximum=maximum, name=name, - latex_label=latex_label, unit=unit, boundary=boundary) - self._set_dist() + self.alpha = alpha + self.beta = beta def rescale(self, val): """ @@ -1823,9 +1880,7 @@ class Beta(Prior): This maps to the inverse CDF. This has been analytically solved for this case. """ self.test_valid_for_rescaling(val) - - # use scipy distribution percentage point function (ppf) - return self._dist.ppf(val) + return btdtri(self.alpha, self.beta, val) * (self.maximum - self.minimum) + self.minimum def prob(self, val): """Return the prior probability of val. @@ -1838,18 +1893,7 @@ class Beta(Prior): ------- Union[float, array_like]: Prior probability of val """ - - spdf = self._dist.pdf(val) - if np.all(np.isfinite(spdf)): - return spdf - - # deal with the fact that if alpha or beta are < 1 you get infinities at 0 and 1 - if isinstance(val, np.ndarray): - pdf = np.zeros(len(val)) - pdf[np.isfinite(spdf)] = spdf[np.isfinite] - return spdf - else: - return 0. + return np.exp(self.ln_prob(val)) def ln_prob(self, val): """Returns the log prior probability of val. @@ -1862,61 +1906,35 @@ class Beta(Prior): ------- Union[float, array_like]: Prior probability of val """ + _ln_prob = xlogy(self.alpha - 1, val - self.minimum) + xlogy(self.beta - 1, self.maximum - val)\ + - betaln(self.alpha, self.beta) - xlogy(self.alpha + self.beta - 1, self.maximum - self.minimum) - spdf = self._dist.logpdf(val) - if np.all(np.isfinite(spdf)): - return spdf - + # deal with the fact that if alpha or beta are < 1 you get infinities at 0 and 1 if isinstance(val, np.ndarray): - pdf = -np.inf * np.ones(len(val)) - pdf[np.isfinite(spdf)] = spdf[np.isfinite] - return spdf + _ln_prob_sub = -np.inf * np.ones(len(val)) + idx = np.isfinite(_ln_prob) & (val >= self.minimum) & (val <= self.maximum) + _ln_prob_sub[idx] = _ln_prob[idx] + return _ln_prob_sub else: + if np.isfinite(_ln_prob) and val >= self.minimum and val <= self.maximum: + return _ln_prob return -np.inf def cdf(self, val): - return self._dist.cdf(val) - - def _set_dist(self): - self._dist = scipy.stats.beta( - a=self.alpha, b=self.beta, loc=self.minimum, - scale=(self.maximum - self.minimum)) - - @property - def maximum(self): - return self._maximum - - @maximum.setter - def maximum(self, maximum): - self._maximum = maximum - self._set_dist() - - @property - def minimum(self): - return self._minimum - - @minimum.setter - def minimum(self, minimum): - self._minimum = minimum - self._set_dist() - - @property - def alpha(self): - return self._alpha - - @alpha.setter - def alpha(self, alpha): - self._alpha = alpha - self._set_dist() - - @property - def beta(self): - return self._beta - - @beta.setter - def beta(self, beta): - self._beta = beta - self._set_dist() + if isinstance(val, (float, int)): + if val > self.maximum: + return 1. + elif val < self.minimum: + return 0. + else: + return btdtr(self.alpha, self.beta, + (val - self.minimum) / (self.maximum - self.minimum)) + else: + _cdf = np.nan_to_num(btdtr(self.alpha, self.beta, + (val - self.minimum) / (self.maximum - self.minimum))) + _cdf[val < self.minimum] = 0. + _cdf[val > self.maximum] = 1. + return _cdf class Logistic(Prior): @@ -1955,9 +1973,19 @@ class Logistic(Prior): This maps to the inverse CDF. This has been analytically solved for this case. """ self.test_valid_for_rescaling(val) - - # use scipy distribution percentage point function (ppf) - return scipy.stats.logistic.ppf(val, loc=self.mu, scale=self.scale) + if isinstance(val, (float, int)): + if val == 0: + rescaled = -np.inf + elif val == 1: + rescaled = np.inf + else: + rescaled = self.mu + self.scale * np.log(val / (1. - val)) + else: + rescaled = np.inf * np.ones(len(val)) + rescaled[val == 0] = -np.inf + rescaled[(val > 0) & (val < 1)] = self.mu + self.scale\ + * np.log(val[(val > 0) & (val < 1)] / (1. - val[(val > 0) & (val < 1)])) + return rescaled def prob(self, val): """Return the prior probability of val. @@ -1970,7 +1998,7 @@ class Logistic(Prior): ------- Union[float, array_like]: Prior probability of val """ - return scipy.stats.logistic.pdf(val, loc=self.mu, scale=self.scale) + return np.exp(self.ln_prob(val)) def ln_prob(self, val): """Returns the log prior probability of val. @@ -1983,11 +2011,11 @@ class Logistic(Prior): ------- Union[float, array_like]: Prior probability of val """ - - return scipy.stats.logistic.logpdf(val, loc=self.mu, scale=self.scale) + return -(val - self.mu) / self.scale -\ + 2. * np.log(1. + np.exp(-(val - self.mu) / self.scale)) - np.log(self.scale) def cdf(self, val): - return scipy.stats.logistic.cdf(val, loc=self.mu, scale=self.scale) + return 1. / (1. + np.exp(-(val - self.mu) / self.scale)) class Cauchy(Prior): @@ -2026,9 +2054,16 @@ class Cauchy(Prior): This maps to the inverse CDF. This has been analytically solved for this case. """ self.test_valid_for_rescaling(val) - - # use scipy distribution percentage point function (ppf) - return scipy.stats.cauchy.ppf(val, loc=self.alpha, scale=self.beta) + rescaled = self.alpha + self.beta * np.tan(np.pi * (val - 0.5)) + if isinstance(val, (float, int)): + if val == 1: + rescaled = np.inf + elif val == 0: + rescaled = -np.inf + else: + rescaled[val == 1] = np.inf + rescaled[val == 0] = -np.inf + return rescaled def prob(self, val): """Return the prior probability of val. @@ -2041,7 +2076,7 @@ class Cauchy(Prior): ------- Union[float, array_like]: Prior probability of val """ - return scipy.stats.cauchy.pdf(val, loc=self.alpha, scale=self.beta) + return 1. / self.beta / np.pi / (1. + ((val - self.alpha) / self.beta) ** 2) def ln_prob(self, val): """Return the log prior probability of val. @@ -2054,10 +2089,10 @@ class Cauchy(Prior): ------- Union[float, array_like]: Log prior probability of val """ - return scipy.stats.cauchy.logpdf(val, loc=self.alpha, scale=self.beta) + return - np.log(self.beta * np.pi) - np.log(1. + ((val - self.alpha) / self.beta) ** 2) def cdf(self, val): - return scipy.stats.cauchy.cdf(val, loc=self.alpha, scale=self.beta) + return 0.5 + np.arctan((val - self.alpha) / self.beta) / np.pi class Lorentzian(Cauchy): @@ -2101,9 +2136,7 @@ class Gamma(Prior): This maps to the inverse CDF. This has been analytically solved for this case. """ self.test_valid_for_rescaling(val) - - # use scipy distribution percentage point function (ppf) - return scipy.stats.gamma.ppf(val, self.k, loc=0., scale=self.theta) + return gammaincinv(self.k, val) * self.theta def prob(self, val): """Return the prior probability of val. @@ -2116,8 +2149,7 @@ class Gamma(Prior): ------- Union[float, array_like]: Prior probability of val """ - - return scipy.stats.gamma.pdf(val, self.k, loc=0., scale=self.theta) + return np.exp(self.ln_prob(val)) def ln_prob(self, val): """Returns the log prior probability of val. @@ -2130,11 +2162,28 @@ class Gamma(Prior): ------- Union[float, array_like]: Prior probability of val """ - - return scipy.stats.gamma.logpdf(val, self.k, loc=0., scale=self.theta) + if isinstance(val, (float, int)): + if val < self.minimum: + _ln_prob = -np.inf + else: + _ln_prob = xlogy(self.k - 1, val) - val / self.theta - xlogy(self.k, self.theta) - gammaln(self.k) + else: + _ln_prob = -np.inf * np.ones(len(val)) + idx = (val >= self.minimum) + _ln_prob[idx] = xlogy(self.k - 1, val[idx]) - val[idx] / self.theta\ + - xlogy(self.k, self.theta) - gammaln(self.k) + return _ln_prob def cdf(self, val): - return scipy.stats.gamma.cdf(val, self.k, loc=0., scale=self.theta) + if isinstance(val, (float, int)): + if val < self.minimum: + _cdf = 0. + else: + _cdf = gammainc(self.k, val / self.theta) + else: + _cdf = np.zeros(len(val)) + _cdf[val >= self.minimum] = gammainc(self.k, val[val >= self.minimum] / self.theta) + return _cdf class ChiSquared(Gamma): diff --git a/test/prior_test.py b/test/prior_test.py index 852031a72..e9a06d653 100644 --- a/test/prior_test.py +++ b/test/prior_test.py @@ -5,6 +5,7 @@ from mock import Mock import numpy as np import os from collections import OrderedDict +import scipy.stats as ss class TestPriorInstantiationWithoutOptionalPriors(unittest.TestCase): @@ -468,6 +469,85 @@ class TestPriorClasses(unittest.TestCase): domain = np.linspace(prior.minimum, prior.maximum, 1000) self.assertAlmostEqual(np.trapz(prior.prob(domain), domain), 1, 3) + def test_accuracy(self): + """Test that each of the priors' functions is calculated accurately, as compared to scipy's calculations""" + for prior in self.priors: + rescale_domain = np.linspace(0, 1, 1000) + if isinstance(prior, bilby.core.prior.Uniform): + domain = np.linspace(-5, 5, 100) + scipy_prob = ss.uniform.pdf(domain, loc=0, scale=1) + scipy_lnprob = ss.uniform.logpdf(domain, loc=0, scale=1) + scipy_cdf = ss.uniform.cdf(domain, loc=0, scale=1) + scipy_rescale = ss.uniform.ppf(rescale_domain, loc=0, scale=1) + elif isinstance(prior, bilby.core.prior.Gaussian): + domain = np.linspace(-1e2, 1e2, 1000) + scipy_prob = ss.norm.pdf(domain, loc=0, scale=1) + scipy_lnprob = ss.norm.logpdf(domain, loc=0, scale=1) + scipy_cdf = ss.norm.cdf(domain, loc=0, scale=1) + scipy_rescale = ss.norm.ppf(rescale_domain, loc=0, scale=1) + elif isinstance(prior, bilby.core.prior.Cauchy): + domain = np.linspace(-1e2, 1e2, 1000) + scipy_prob = ss.cauchy.pdf(domain, loc=0, scale=1) + scipy_lnprob = ss.cauchy.logpdf(domain, loc=0, scale=1) + scipy_cdf = ss.cauchy.cdf(domain, loc=0, scale=1) + scipy_rescale = ss.cauchy.ppf(rescale_domain, loc=0, scale=1) + elif isinstance(prior, bilby.core.prior.StudentT): + domain = np.linspace(-1e2, 1e2, 1000) + scipy_prob = ss.t.pdf(domain, 3, loc=0, scale=1) + scipy_lnprob = ss.t.logpdf(domain, 3, loc=0, scale=1) + scipy_cdf = ss.t.cdf(domain, 3, loc=0, scale=1) + scipy_rescale = ss.t.ppf(rescale_domain, 3, loc=0, scale=1) + elif (isinstance(prior, bilby.core.prior.Gamma) and + not isinstance(prior, bilby.core.prior.ChiSquared)): + domain = np.linspace(0., 1e2, 5000) + scipy_prob = ss.gamma.pdf(domain, 1, loc=0, scale=1) + scipy_lnprob = ss.gamma.logpdf(domain, 1, loc=0, scale=1) + scipy_cdf = ss.gamma.cdf(domain, 1, loc=0, scale=1) + scipy_rescale = ss.gamma.ppf(rescale_domain, 1, loc=0, scale=1) + elif isinstance(prior, bilby.core.prior.LogNormal): + domain = np.linspace(0., 1e2, 1000) + scipy_prob = ss.lognorm.pdf(domain, 1, scale=1) + scipy_lnprob = ss.lognorm.logpdf(domain, 1, scale=1) + scipy_cdf = ss.lognorm.cdf(domain, 1, scale=1) + scipy_rescale = ss.lognorm.ppf(rescale_domain, 1, scale=1) + elif isinstance(prior, bilby.core.prior.Exponential): + domain = np.linspace(0., 1e2, 5000) + scipy_prob = ss.expon.pdf(domain, scale=1) + scipy_lnprob = ss.expon.logpdf(domain, scale=1) + scipy_cdf = ss.expon.cdf(domain, scale=1) + scipy_rescale = ss.expon.ppf(rescale_domain, scale=1) + elif isinstance(prior, bilby.core.prior.Logistic): + domain = np.linspace(-1e2, 1e2, 1000) + scipy_prob = ss.logistic.pdf(domain, loc=0, scale=1) + scipy_lnprob = ss.logistic.logpdf(domain, loc=0, scale=1) + scipy_cdf = ss.logistic.cdf(domain, loc=0, scale=1) + scipy_rescale = ss.logistic.ppf(rescale_domain, loc=0, scale=1) + elif isinstance(prior, bilby.core.prior.ChiSquared): + domain = np.linspace(0., 1e2, 5000) + scipy_prob = ss.gamma.pdf(domain, 1, loc=0, scale=2) + scipy_lnprob = ss.gamma.logpdf(domain, 1, loc=0, scale=2) + scipy_cdf = ss.gamma.cdf(domain, 1, loc=0, scale=2) + scipy_rescale = ss.gamma.ppf(rescale_domain, 1, loc=0, scale=2) + elif isinstance(prior, bilby.core.prior.Beta): + domain = np.linspace(-5, 5, 5000) + scipy_prob = ss.beta.pdf(domain, 2, 2, loc=0, scale=1) + scipy_lnprob = ss.beta.logpdf(domain, 2, 2, loc=0, scale=1) + scipy_cdf = ss.beta.cdf(domain, 2, 2, loc=0, scale=1) + scipy_rescale = ss.beta.ppf(rescale_domain, 2, 2, loc=0, scale=1) + else: + continue + testTuple = ( + bilby.core.prior.Uniform, bilby.core.prior.Gaussian, + bilby.core.prior.Cauchy, bilby.core.prior.StudentT, + bilby.core.prior.Exponential, bilby.core.prior.Logistic, + bilby.core.prior.LogNormal, bilby.core.prior.Gamma, + bilby.core.prior.Beta) + if isinstance(prior, (testTuple)): + np.testing.assert_almost_equal(prior.prob(domain), scipy_prob) + np.testing.assert_almost_equal(prior.ln_prob(domain), scipy_lnprob) + np.testing.assert_almost_equal(prior.cdf(domain), scipy_cdf) + np.testing.assert_almost_equal(prior.rescale(rescale_domain), scipy_rescale) + def test_unit_setting(self): for prior in self.priors: if isinstance(prior, bilby.gw.prior.Cosmological): -- GitLab