Commit 5f6ad821 authored by Colm Talbot's avatar Colm Talbot

add hyperparameter estimation package

parent 0d0b4f5b
......@@ -38,7 +38,7 @@ setup(name='tupak',
author_email='paul.lasky@monash.edu',
license="MIT",
version=version,
packages=['tupak', 'tupak.core', 'tupak.gw'],
packages=['tupak', 'tupak.core', 'tupak.gw', 'tupak.hyper'],
package_dir={'tupak': 'tupak'},
package_data={'tupak.gw': ['prior_files/*', 'noise_curves/*.txt', 'detectors/*'],
'tupak': [version_file]},
......
from __future__ import absolute_import
import tupak.hyper.likelihood
import tupak.hyper.model
from __future__ import division, print_function
import logging
import numpy as np
from ..core.likelihood import Likelihood
from .model import Model
class HyperparameterLikelihood(Likelihood):
""" A likelihood for infering hyperparameter posterior distributions
See Eq. (1) of https://arxiv.org/abs/1801.02699 for a definition.
Parameters
----------
posteriors: list
An list of pandas data frames of samples sets of samples. Each set may have
a different size.
hyper_prior: `tupak.hyper.model.Model`
The population model, this can alternatively be a function.
sampling_prior: `tupak.hyper.model.Model`
The sampling prior, this can alternatively be a function.
max_samples: int, optional
Maximum number of samples to use from each set.
"""
def __init__(self, posteriors, hyper_prior, sampling_prior, max_samples=1e100):
if not isinstance(hyper_prior, Model):
hyper_prior = Model([hyper_prior])
if not isinstance(sampling_prior, Model):
sampling_prior = Model([sampling_prior])
self.posteriors = posteriors
self.hyper_prior = hyper_prior
self.sampling_prior = sampling_prior
self.max_samples = max_samples
Likelihood.__init__(self, hyper_prior.parameters)
self.data = self.resample_posteriors()
self.n_posteriors = len(self.posteriors)
self.samples_per_posterior = self.max_samples
self.log_factor = - self.n_posteriors * np.log(self.samples_per_posterior)
def log_likelihood(self):
self.hyper_prior.parameters.update(self.parameters)
log_l = np.sum(np.log(np.sum(self.hyper_prior.prob(self.data)
/ self.sampling_prior.prob(self.data), axis=-1))) + self.log_factor
return np.nan_to_num(log_l)
def resample_posteriors(self, max_samples=None):
if max_samples is not None:
self.max_samples = max_samples
for posterior in self.posteriors:
self.max_samples = min(len(posterior), self.max_samples)
data = {key: [] for key in self.posteriors[0]}
logging.debug('Downsampling to {} samples per posterior.'.format(self.max_samples))
for posterior in self.posteriors:
temp = posterior.sample(self.max_samples)
for key in data:
data[key].append(temp[key])
for key in data:
data[key] = np.array(data[key])
return data
import inspect
class Model(object):
"""
Population model
This should take population parameters and return the probability.
"""
def __init__(self, model_functions=None):
"""
Parameters
----------
model_functions: list
List of functions to compute.
"""
self.models = model_functions
self.parameters = dict()
for function in self.models:
for key in inspect.getargspec(function).args[1:]:
self.parameters[key] = None
def prob(self, data):
for ii, function in enumerate(self.models):
if ii == 0:
probability = function(data, **self._get_function_parameters(function))
else:
probability *= function(data, **self._get_function_parameters(function))
return probability
def _get_function_parameters(self, function):
parameters = {key: self.parameters[key] for key in inspect.getargspec(function).args[1:]}
return parameters
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment