Skip to content
Snippets Groups Projects
Commit edc0c4d5 authored by Colm Talbot's avatar Colm Talbot Committed by Moritz Huebner
Browse files

Resolve "Use evidences in hyper pe"

parent 4a6817c6
No related branches found
No related tags found
No related merge requests found
......@@ -18,6 +18,7 @@ Changes currently on master, but not under a tag.
- Adds custom titles to corner plots
- Adds plotting of the prior on 1D marginal distributions of corner plots
- Adds a method to plot time-domain GW data
- Hyperparameter estimation now enables the user to provide the single event evidences
### Changes
- Fix construct_cbc_derived_parameters
......
......@@ -31,7 +31,8 @@ fig1, ax1 = plt.subplots()
fig2, ax2 = plt.subplots()
# Make the sample sets
samples = []
samples = list()
evidences = list()
for i in range(Nevents):
c0 = np.random.normal(true_mu_c0, true_sigma_c0)
c1 = np.random.uniform(-1, 1)
......@@ -50,6 +51,7 @@ for i in range(Nevents):
ax2.hist(result.posterior.c0, color=line[0].get_color(), normed=True,
alpha=0.5, label=labels[i])
samples.append(result.posterior)
evidences.append(result.log_evidence)
ax1.set_xlabel('x')
ax1.set_ylabel('y(x)')
......@@ -72,7 +74,7 @@ def run_prior(data):
hp_likelihood = HyperparameterLikelihood(
posteriors=samples, hyper_prior=hyper_prior,
sampling_prior=run_prior, max_samples=500)
sampling_prior=run_prior, log_evidences=evidences, max_samples=500)
hp_priors = dict(mu=Uniform(-10, 10, 'mu', '$\mu_{c0}$'),
sigma=Uniform(0, 10, 'sigma', '$\sigma_{c0}$'))
......@@ -80,5 +82,6 @@ hp_priors = dict(mu=Uniform(-10, 10, 'mu', '$\mu_{c0}$'),
# And run sampler
result = run_sampler(
likelihood=hp_likelihood, priors=hp_priors, sampler='dynesty', nlive=1000,
outdir=outdir, label='hyper_parameter', verbose=True, clean=True)
use_ratio=False, outdir=outdir, label='hyper_parameter',
verbose=True, clean=True)
result.plot_corner(truth=dict(mu=true_mu_c0, sigma=true_sigma_c0))
import unittest
import numpy as np
import pandas as pd
import tupak.hyper as hyp
class TestHyperLikelihood(unittest.TestCase):
def setUp(self):
self.keys = ['a', 'b', 'c']
self.lengths = [300, 400, 500]
self.posteriors = list()
for ii, length in enumerate(self.lengths):
self.posteriors.append(
pd.DataFrame(
{key: np.random.normal(0, 1, length) for key in self.keys}))
self.log_evidences = [2, 2, 2]
self.model = hyp.model.Model(list())
self.sampling_model = hyp.model.Model(list())
def tearDown(self):
del self.keys
del self.lengths
del self.posteriors
del self.log_evidences
def test_evidence_factor_with_evidences(self):
like = hyp.likelihood.HyperparameterLikelihood(
self.posteriors, self.model, self.sampling_model,
log_evidences=self.log_evidences)
self.assertEqual(like.evidence_factor, 6)
def test_evidence_factor_without_evidences(self):
like = hyp.likelihood.HyperparameterLikelihood(
self.posteriors, self.model, self.sampling_model)
self.assertTrue(np.isnan(like.evidence_factor))
def test_len_samples_with_max_samples(self):
like = hyp.likelihood.HyperparameterLikelihood(
self.posteriors, self.model, self.sampling_model,
log_evidences=self.log_evidences, max_samples=10)
self.assertEqual(like.samples_per_posterior, 10)
def test_len_samples_without_max_samples(self):
like = hyp.likelihood.HyperparameterLikelihood(
self.posteriors, self.model, self.sampling_model,
log_evidences=self.log_evidences)
self.assertEqual(like.samples_per_posterior, min(self.lengths))
def test_resample_with_max_samples(self):
like = hyp.likelihood.HyperparameterLikelihood(
self.posteriors, self.model, self.sampling_model,
log_evidences=self.log_evidences)
resampled = like.resample_posteriors()
self.assertEqual(resampled['a'].shape,
(len(self.lengths), min(self.lengths)))
def test_resample_without_max_samples(self):
like = hyp.likelihood.HyperparameterLikelihood(
self.posteriors, self.model, self.sampling_model,
log_evidences=self.log_evidences)
resampled = like.resample_posteriors(10)
self.assertEqual(resampled['a'].shape, (len(self.lengths), 10))
if __name__ == '__main__':
unittest.main()
......@@ -7,29 +7,39 @@ from .model import Model
class HyperparameterLikelihood(Likelihood):
""" A likelihood for infering hyperparameter posterior distributions
""" A likelihood for inferring hyperparameter posterior distributions
See Eq. (1) of https://arxiv.org/abs/1801.02699 for a definition.
See Eq. (34) of https://arxiv.org/abs/1809.02293 for a definition.
Parameters
----------
posteriors: list
An list of pandas data frames of samples sets of samples. Each set may have
a different size.
An list of pandas data frames of samples sets of samples.
Each set may have a different size.
hyper_prior: `tupak.hyper.model.Model`
The population model, this can alternatively be a function.
sampling_prior: `tupak.hyper.model.Model`
The sampling prior, this can alternatively be a function.
log_evidences: list, optional
Log evidences for single runs to ensure proper normalisation
of the hyperparameter likelihood. If not provided, the original
evidences will be set to 0. This produces a Bayes factor between
the sampling prior and the hyperparameterised model.
max_samples: int, optional
Maximum number of samples to use from each set.
"""
def __init__(self, posteriors, hyper_prior, sampling_prior, max_samples=1e100):
def __init__(self, posteriors, hyper_prior, sampling_prior,
log_evidences=None, max_samples=1e100):
if not isinstance(hyper_prior, Model):
hyper_prior = Model([hyper_prior])
if not isinstance(sampling_prior, Model):
sampling_prior = Model([sampling_prior])
if log_evidences is not None:
self.evidence_factor = np.sum(log_evidences)
else:
self.evidence_factor = np.nan
self.posteriors = posteriors
self.hyper_prior = hyper_prior
self.sampling_prior = sampling_prior
......@@ -39,21 +49,44 @@ class HyperparameterLikelihood(Likelihood):
self.data = self.resample_posteriors()
self.n_posteriors = len(self.posteriors)
self.samples_per_posterior = self.max_samples
self.log_factor = - self.n_posteriors * np.log(self.samples_per_posterior)
self.samples_factor =\
- self.n_posteriors * np.log(self.samples_per_posterior)
def log_likelihood(self):
def log_likelihood_ratio(self):
self.hyper_prior.parameters.update(self.parameters)
log_l = np.sum(np.log(np.sum(self.hyper_prior.prob(self.data) /
self.sampling_prior.prob(self.data), axis=-1))) + self.log_factor
self.sampling_prior.prob(self.data), axis=-1)))
log_l += self.samples_factor
return np.nan_to_num(log_l)
def noise_log_likelihood(self):
return self.evidence_factor
def log_likelihood(self):
return self.noise_log_likelihood() + self.log_likelihood_ratio()
def resample_posteriors(self, max_samples=None):
"""
Convert list of pandas DataFrame object to dict of arrays.
Parameters
----------
max_samples: int, opt
Maximum number of samples to take from each posterior,
default is length of shortest posterior chain.
Returns
-------
data: dict
Dictionary containing arrays of size (n_posteriors, max_samples)
There is a key for each shared key in self.posteriors.
"""
if max_samples is not None:
self.max_samples = max_samples
for posterior in self.posteriors:
self.max_samples = min(len(posterior), self.max_samples)
data = {key: [] for key in self.posteriors[0]}
logging.debug('Downsampling to {} samples per posterior.'.format(self.max_samples))
logging.debug('Downsampling to {} samples per posterior.'.format(
self.max_samples))
for posterior in self.posteriors:
temp = posterior.sample(self.max_samples)
for key in data:
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment