Skip to content
Snippets Groups Projects
Commit 64cdcbc9 authored by Gregory Ashton's avatar Gregory Ashton
Browse files

Merge branch 'update_hyperpe' into 'master'

Update hyperpe

See merge request Monash/tupak!93
parents 0b6a3913 82923d9d
No related branches found
No related tags found
1 merge request!93Update hyperpe
Pipeline #
......@@ -5,56 +5,11 @@ An example of how to use tupak to perform paramater estimation for hyper params
from __future__ import division
import tupak
import numpy as np
import inspect
import matplotlib.pyplot as plt
outdir = 'outdir'
class GaussianLikelihood(tupak.Likelihood):
def __init__(self, x, y, function, sigma=None):
"""
A general Gaussian likelihood for known or unknown noise - the model
parameters are inferred from the arguments of function
Parameters
----------
x, y: array_like
The data to analyse
function:
The python function to fit to the data. Note, this must take the
dependent variable as its first argument. The other arguments are
will require a prior and will be sampled over (unless a fixed
value is given).
sigma: None, float, array_like
If None, the standard deviation of the noise is unknown and will be
estimated (note: this requires a prior to be given for sigma). If
not None, this defined the standard-deviation of the data points.
This can either be a single float, or an array with length equal
to that for `x` and `y`.
"""
self.x = x
self.y = y
self.N = len(x)
self.sigma = sigma
self.function = function
# These lines of code infer the parameters from the provided function
parameters = inspect.getargspec(function).args
parameters.pop(0)
self.parameters = dict.fromkeys(parameters)
self.function_keys = self.parameters.keys()
if self.sigma is None:
self.parameters['sigma'] = None
def log_likelihood(self):
sigma = self.parameters.get('sigma', self.sigma)
model_parameters = {k: self.parameters[k] for k in self.function_keys}
res = self.y - self.function(self.x, **model_parameters)
return -0.5 * (np.sum((res / sigma)**2)
+ self.N*np.log(2*np.pi*sigma**2))
# Define a model to fit to each data set
def model(x, c0, c1):
return c0 + c1*x
......@@ -82,17 +37,16 @@ for i in range(Nevents):
data = model(x, **injection_parameters) + np.random.normal(0, sigma, N)
line = ax1.plot(x, data, '-x', label=labels[i])
likelihood = GaussianLikelihood(x, data, model, sigma)
likelihood = tupak.core.likelihood.GaussianLikelihood(x, data, model, sigma)
priors = dict(c0=tupak.core.prior.Uniform(-10, 10, 'c0'),
c1=tupak.core.prior.Uniform(-10, 10, 'c1'),
)
c1=tupak.core.prior.Uniform(-10, 10, 'c1'))
result = tupak.core.sampler.run_sampler(
likelihood=likelihood, priors=priors, sampler='dynesty', npoints=1000,
outdir=outdir, verbose=False, label='individual_{}'.format(i))
ax2.hist(result.posterior.c0, color=line[0].get_color(), normed=True,
alpha=0.5, label=labels[i])
samples.append(result.posterior.c0.values)
samples.append(result.posterior)
ax1.set_xlabel('x')
ax1.set_ylabel('y(x)')
......@@ -103,19 +57,23 @@ ax2.set_ylabel('density')
ax2.legend()
fig2.savefig('outdir/hyper_parameter_combined_posteriors.png')
# Now run the hyper parameter inference
run_prior = tupak.core.prior.Uniform(minimum=-10, maximum=10, name='mu_c0')
hyper_prior = tupak.core.prior.Gaussian(mu=0, sigma=1, name='hyper')
hp_likelihood = tupak.core.likelihood.HyperparameterLikelihood(
samples, hyper_prior, run_prior)
def hyper_prior(data, mu, sigma):
return np.exp(- (data['c0'] - mu)**2 / (2 * sigma**2)) / (2 * np.pi * sigma**2)**0.5
def run_prior(data):
return 1 / 20
hp_likelihood = tupak.hyper.likelihood.HyperparameterLikelihood(
posteriors=samples, hyper_prior=hyper_prior, sampling_prior=run_prior)
hp_priors = dict(
mu=tupak.core.prior.Uniform(-10, 10, 'mu', '$\mu_{c0}$'),
sigma=tupak.core.prior.Uniform(0, 10, 'sigma', '$\sigma_{c0}$'))
hp_priors = dict(mu=tupak.core.prior.Uniform(-10, 10, 'mu', '$\mu_{c0}$'),
sigma=tupak.core.prior.Uniform(0, 10, 'sigma', '$\sigma_{c0}$'))
# And run sampler
result = tupak.core.sampler.run_sampler(
likelihood=hp_likelihood, priors=hp_priors, sampler='dynesty',
npoints=1000, outdir=outdir, label='hyper_parameter', verbose=True)
npoints=1000, outdir=outdir, label='hyper_parameter', verbose=True, clean=True)
result.plot_corner(truth=dict(mu=true_mu_c0, sigma=true_sigma_c0))
......@@ -38,7 +38,7 @@ setup(name='tupak',
author_email='paul.lasky@monash.edu',
license="MIT",
version=version,
packages=['tupak', 'tupak.core', 'tupak.gw'],
packages=['tupak', 'tupak.core', 'tupak.gw', 'tupak.hyper'],
package_dir={'tupak': 'tupak'},
package_data={'tupak.gw': ['prior_files/*', 'noise_curves/*.txt', 'detectors/*'],
'tupak': [version_file]},
......
from __future__ import division, print_function
import inspect
import logging
import numpy as np
try:
from scipy.special import logsumexp
except ImportError:
from scipy.misc import logsumexp
class Likelihood(object):
......@@ -91,54 +85,3 @@ class GaussianLikelihood(Likelihood):
res = self.y - self.function(self.x, **model_parameters)
return -0.5 * (np.sum((res / sigma)**2)
+ self.N*np.log(2*np.pi*sigma**2))
class HyperparameterLikelihood(Likelihood):
""" A likelihood for infering hyperparameter posterior distributions
See Eq. (1) of https://arxiv.org/abs/1801.02699 for a definition.
Parameters
----------
samples: list
An N-dimensional list of individual sets of samples. Each set may have
a different size.
hyper_prior: `tupak.prior.Prior`
A prior distribution with a `parameters` argument pointing to the
hyperparameters to infer from the samples. These may need to be
initialized to any arbitrary value, but this will not effect the
result.
run_prior: `tupak.prior.Prior`
The prior distribution used in the inidivudal inferences which resulted
in the set of samples.
"""
def __init__(self, samples, hyper_prior, run_prior):
Likelihood.__init__(self, parameters=hyper_prior.__dict__)
self.samples = samples
self.hyper_prior = hyper_prior
self.run_prior = run_prior
if hasattr(hyper_prior, 'lnprob') and hasattr(run_prior, 'lnprob'):
logging.info("Using log-probabilities in likelihood")
self.log_likelihood = self.log_likelihood_using_lnprob
else:
logging.info("Using probabilities in likelihood")
self.log_likelihood = self.log_likelihood_using_prob
def log_likelihood_using_lnprob(self):
L = []
self.hyper_prior.__dict__.update(self.parameters)
for samp in self.samples:
f = self.hyper_prior.lnprob(samp) - self.run_prior.lnprob(samp)
L.append(logsumexp(f))
return np.sum(L)
def log_likelihood_using_prob(self):
L = []
self.hyper_prior.__dict__.update(self.parameters)
for samp in self.samples:
L.append(
np.sum(self.hyper_prior.prob(samp) /
self.run_prior.prob(samp)))
return np.sum(np.log(L))
from __future__ import absolute_import
import tupak.hyper.likelihood
import tupak.hyper.model
from __future__ import division, print_function
import logging
import numpy as np
from ..core.likelihood import Likelihood
from .model import Model
class HyperparameterLikelihood(Likelihood):
""" A likelihood for infering hyperparameter posterior distributions
See Eq. (1) of https://arxiv.org/abs/1801.02699 for a definition.
Parameters
----------
posteriors: list
An list of pandas data frames of samples sets of samples. Each set may have
a different size.
hyper_prior: `tupak.hyper.model.Model`
The population model, this can alternatively be a function.
sampling_prior: `tupak.hyper.model.Model`
The sampling prior, this can alternatively be a function.
max_samples: int, optional
Maximum number of samples to use from each set.
"""
def __init__(self, posteriors, hyper_prior, sampling_prior, max_samples=1e100):
if not isinstance(hyper_prior, Model):
hyper_prior = Model([hyper_prior])
if not isinstance(sampling_prior, Model):
sampling_prior = Model([sampling_prior])
self.posteriors = posteriors
self.hyper_prior = hyper_prior
self.sampling_prior = sampling_prior
self.max_samples = max_samples
Likelihood.__init__(self, hyper_prior.parameters)
self.data = self.resample_posteriors()
self.n_posteriors = len(self.posteriors)
self.samples_per_posterior = self.max_samples
self.log_factor = - self.n_posteriors * np.log(self.samples_per_posterior)
def log_likelihood(self):
self.hyper_prior.parameters.update(self.parameters)
log_l = np.sum(np.log(np.sum(self.hyper_prior.prob(self.data)
/ self.sampling_prior.prob(self.data), axis=-1))) + self.log_factor
return np.nan_to_num(log_l)
def resample_posteriors(self, max_samples=None):
if max_samples is not None:
self.max_samples = max_samples
for posterior in self.posteriors:
self.max_samples = min(len(posterior), self.max_samples)
data = {key: [] for key in self.posteriors[0]}
logging.debug('Downsampling to {} samples per posterior.'.format(self.max_samples))
for posterior in self.posteriors:
temp = posterior.sample(self.max_samples)
for key in data:
data[key].append(temp[key])
for key in data:
data[key] = np.array(data[key])
return data
import inspect
class Model(object):
"""
Population model
This should take population parameters and return the probability.
"""
def __init__(self, model_functions=None):
"""
Parameters
----------
model_functions: list
List of functions to compute.
"""
self.models = model_functions
self.parameters = dict()
for function in self.models:
for key in inspect.getargspec(function).args[1:]:
self.parameters[key] = None
def prob(self, data):
for ii, function in enumerate(self.models):
if ii == 0:
probability = function(data, **self._get_function_parameters(function))
else:
probability *= function(data, **self._get_function_parameters(function))
return probability
def _get_function_parameters(self, function):
parameters = {key: self.parameters[key] for key in inspect.getargspec(function).args[1:]}
return parameters
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment