Maintenance will be performed on git.ligo.org, chat.ligo.org, containers.ligo.org, and docs.ligo.org on Tuesday 7th July 2020 starting at approximately 10am PDT and lasting for around 15 minutes. There will be a short period of downtime towards the end of the maintenance window. Please direct any comments, questions, or concerns to uwm-help@cgca.uwm.edu.

Commit edc0c4d5 authored by Colm Talbot's avatar Colm Talbot Committed by Moritz Huebner

Resolve "Use evidences in hyper pe"

parent 4a6817c6
......@@ -18,6 +18,7 @@ Changes currently on master, but not under a tag.
- Adds custom titles to corner plots
- Adds plotting of the prior on 1D marginal distributions of corner plots
- Adds a method to plot time-domain GW data
- Hyperparameter estimation now enables the user to provide the single event evidences
### Changes
- Fix construct_cbc_derived_parameters
......
......@@ -31,7 +31,8 @@ fig1, ax1 = plt.subplots()
fig2, ax2 = plt.subplots()
# Make the sample sets
samples = []
samples = list()
evidences = list()
for i in range(Nevents):
c0 = np.random.normal(true_mu_c0, true_sigma_c0)
c1 = np.random.uniform(-1, 1)
......@@ -50,6 +51,7 @@ for i in range(Nevents):
ax2.hist(result.posterior.c0, color=line[0].get_color(), normed=True,
alpha=0.5, label=labels[i])
samples.append(result.posterior)
evidences.append(result.log_evidence)
ax1.set_xlabel('x')
ax1.set_ylabel('y(x)')
......@@ -72,7 +74,7 @@ def run_prior(data):
hp_likelihood = HyperparameterLikelihood(
posteriors=samples, hyper_prior=hyper_prior,
sampling_prior=run_prior, max_samples=500)
sampling_prior=run_prior, log_evidences=evidences, max_samples=500)
hp_priors = dict(mu=Uniform(-10, 10, 'mu', '$\mu_{c0}$'),
sigma=Uniform(0, 10, 'sigma', '$\sigma_{c0}$'))
......@@ -80,5 +82,6 @@ hp_priors = dict(mu=Uniform(-10, 10, 'mu', '$\mu_{c0}$'),
# And run sampler
result = run_sampler(
likelihood=hp_likelihood, priors=hp_priors, sampler='dynesty', nlive=1000,
outdir=outdir, label='hyper_parameter', verbose=True, clean=True)
use_ratio=False, outdir=outdir, label='hyper_parameter',
verbose=True, clean=True)
result.plot_corner(truth=dict(mu=true_mu_c0, sigma=true_sigma_c0))
import unittest
import numpy as np
import pandas as pd
import tupak.hyper as hyp
class TestHyperLikelihood(unittest.TestCase):
def setUp(self):
self.keys = ['a', 'b', 'c']
self.lengths = [300, 400, 500]
self.posteriors = list()
for ii, length in enumerate(self.lengths):
self.posteriors.append(
pd.DataFrame(
{key: np.random.normal(0, 1, length) for key in self.keys}))
self.log_evidences = [2, 2, 2]
self.model = hyp.model.Model(list())
self.sampling_model = hyp.model.Model(list())
def tearDown(self):
del self.keys
del self.lengths
del self.posteriors
del self.log_evidences
def test_evidence_factor_with_evidences(self):
like = hyp.likelihood.HyperparameterLikelihood(
self.posteriors, self.model, self.sampling_model,
log_evidences=self.log_evidences)
self.assertEqual(like.evidence_factor, 6)
def test_evidence_factor_without_evidences(self):
like = hyp.likelihood.HyperparameterLikelihood(
self.posteriors, self.model, self.sampling_model)
self.assertTrue(np.isnan(like.evidence_factor))
def test_len_samples_with_max_samples(self):
like = hyp.likelihood.HyperparameterLikelihood(
self.posteriors, self.model, self.sampling_model,
log_evidences=self.log_evidences, max_samples=10)
self.assertEqual(like.samples_per_posterior, 10)
def test_len_samples_without_max_samples(self):
like = hyp.likelihood.HyperparameterLikelihood(
self.posteriors, self.model, self.sampling_model,
log_evidences=self.log_evidences)
self.assertEqual(like.samples_per_posterior, min(self.lengths))
def test_resample_with_max_samples(self):
like = hyp.likelihood.HyperparameterLikelihood(
self.posteriors, self.model, self.sampling_model,
log_evidences=self.log_evidences)
resampled = like.resample_posteriors()
self.assertEqual(resampled['a'].shape,
(len(self.lengths), min(self.lengths)))
def test_resample_without_max_samples(self):
like = hyp.likelihood.HyperparameterLikelihood(
self.posteriors, self.model, self.sampling_model,
log_evidences=self.log_evidences)
resampled = like.resample_posteriors(10)
self.assertEqual(resampled['a'].shape, (len(self.lengths), 10))
if __name__ == '__main__':
unittest.main()
......@@ -7,29 +7,39 @@ from .model import Model
class HyperparameterLikelihood(Likelihood):
""" A likelihood for infering hyperparameter posterior distributions
""" A likelihood for inferring hyperparameter posterior distributions
See Eq. (1) of https://arxiv.org/abs/1801.02699 for a definition.
See Eq. (34) of https://arxiv.org/abs/1809.02293 for a definition.
Parameters
----------
posteriors: list
An list of pandas data frames of samples sets of samples. Each set may have
a different size.
An list of pandas data frames of samples sets of samples.
Each set may have a different size.
hyper_prior: `tupak.hyper.model.Model`
The population model, this can alternatively be a function.
sampling_prior: `tupak.hyper.model.Model`
The sampling prior, this can alternatively be a function.
log_evidences: list, optional
Log evidences for single runs to ensure proper normalisation
of the hyperparameter likelihood. If not provided, the original
evidences will be set to 0. This produces a Bayes factor between
the sampling prior and the hyperparameterised model.
max_samples: int, optional
Maximum number of samples to use from each set.
"""
def __init__(self, posteriors, hyper_prior, sampling_prior, max_samples=1e100):
def __init__(self, posteriors, hyper_prior, sampling_prior,
log_evidences=None, max_samples=1e100):
if not isinstance(hyper_prior, Model):
hyper_prior = Model([hyper_prior])
if not isinstance(sampling_prior, Model):
sampling_prior = Model([sampling_prior])
if log_evidences is not None:
self.evidence_factor = np.sum(log_evidences)
else:
self.evidence_factor = np.nan
self.posteriors = posteriors
self.hyper_prior = hyper_prior
self.sampling_prior = sampling_prior
......@@ -39,21 +49,44 @@ class HyperparameterLikelihood(Likelihood):
self.data = self.resample_posteriors()
self.n_posteriors = len(self.posteriors)
self.samples_per_posterior = self.max_samples
self.log_factor = - self.n_posteriors * np.log(self.samples_per_posterior)
self.samples_factor =\
- self.n_posteriors * np.log(self.samples_per_posterior)
def log_likelihood(self):
def log_likelihood_ratio(self):
self.hyper_prior.parameters.update(self.parameters)
log_l = np.sum(np.log(np.sum(self.hyper_prior.prob(self.data) /
self.sampling_prior.prob(self.data), axis=-1))) + self.log_factor
self.sampling_prior.prob(self.data), axis=-1)))
log_l += self.samples_factor
return np.nan_to_num(log_l)
def noise_log_likelihood(self):
return self.evidence_factor
def log_likelihood(self):
return self.noise_log_likelihood() + self.log_likelihood_ratio()
def resample_posteriors(self, max_samples=None):
"""
Convert list of pandas DataFrame object to dict of arrays.
Parameters
----------
max_samples: int, opt
Maximum number of samples to take from each posterior,
default is length of shortest posterior chain.
Returns
-------
data: dict
Dictionary containing arrays of size (n_posteriors, max_samples)
There is a key for each shared key in self.posteriors.
"""
if max_samples is not None:
self.max_samples = max_samples
for posterior in self.posteriors:
self.max_samples = min(len(posterior), self.max_samples)
data = {key: [] for key in self.posteriors[0]}
logging.debug('Downsampling to {} samples per posterior.'.format(self.max_samples))
logging.debug('Downsampling to {} samples per posterior.'.format(
self.max_samples))
for posterior in self.posteriors:
temp = posterior.sample(self.max_samples)
for key in data:
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment