There will be maintenance performed on git.ligo.org, chat.ligo.org, containers.lig.org, and docs.ligo.org starting at 9am PDT on Tuesday 18th August 2020. There will be an extremely small period of downtime at the start of the maintenance window as various services are restarted. Please address any comments, questions, or concerns to computing-help@igwn.org.

Commit 5f6ad821 authored by Colm Talbot's avatar Colm Talbot

add hyperparameter estimation package

parent 0d0b4f5b
......@@ -38,7 +38,7 @@ setup(name='tupak',
author_email='paul.lasky@monash.edu',
license="MIT",
version=version,
packages=['tupak', 'tupak.core', 'tupak.gw'],
packages=['tupak', 'tupak.core', 'tupak.gw', 'tupak.hyper'],
package_dir={'tupak': 'tupak'},
package_data={'tupak.gw': ['prior_files/*', 'noise_curves/*.txt', 'detectors/*'],
'tupak': [version_file]},
......
from __future__ import absolute_import
import tupak.hyper.likelihood
import tupak.hyper.model
from __future__ import division, print_function
import logging
import numpy as np
from ..core.likelihood import Likelihood
from .model import Model
class HyperparameterLikelihood(Likelihood):
""" A likelihood for infering hyperparameter posterior distributions
See Eq. (1) of https://arxiv.org/abs/1801.02699 for a definition.
Parameters
----------
posteriors: list
An list of pandas data frames of samples sets of samples. Each set may have
a different size.
hyper_prior: `tupak.hyper.model.Model`
The population model, this can alternatively be a function.
sampling_prior: `tupak.hyper.model.Model`
The sampling prior, this can alternatively be a function.
max_samples: int, optional
Maximum number of samples to use from each set.
"""
def __init__(self, posteriors, hyper_prior, sampling_prior, max_samples=1e100):
if not isinstance(hyper_prior, Model):
hyper_prior = Model([hyper_prior])
if not isinstance(sampling_prior, Model):
sampling_prior = Model([sampling_prior])
self.posteriors = posteriors
self.hyper_prior = hyper_prior
self.sampling_prior = sampling_prior
self.max_samples = max_samples
Likelihood.__init__(self, hyper_prior.parameters)
self.data = self.resample_posteriors()
self.n_posteriors = len(self.posteriors)
self.samples_per_posterior = self.max_samples
self.log_factor = - self.n_posteriors * np.log(self.samples_per_posterior)
def log_likelihood(self):
self.hyper_prior.parameters.update(self.parameters)
log_l = np.sum(np.log(np.sum(self.hyper_prior.prob(self.data)
/ self.sampling_prior.prob(self.data), axis=-1))) + self.log_factor
return np.nan_to_num(log_l)
def resample_posteriors(self, max_samples=None):
if max_samples is not None:
self.max_samples = max_samples
for posterior in self.posteriors:
self.max_samples = min(len(posterior), self.max_samples)
data = {key: [] for key in self.posteriors[0]}
logging.debug('Downsampling to {} samples per posterior.'.format(self.max_samples))
for posterior in self.posteriors:
temp = posterior.sample(self.max_samples)
for key in data:
data[key].append(temp[key])
for key in data:
data[key] = np.array(data[key])
return data
import inspect
class Model(object):
"""
Population model
This should take population parameters and return the probability.
"""
def __init__(self, model_functions=None):
"""
Parameters
----------
model_functions: list
List of functions to compute.
"""
self.models = model_functions
self.parameters = dict()
for function in self.models:
for key in inspect.getargspec(function).args[1:]:
self.parameters[key] = None
def prob(self, data):
for ii, function in enumerate(self.models):
if ii == 0:
probability = function(data, **self._get_function_parameters(function))
else:
probability *= function(data, **self._get_function_parameters(function))
return probability
def _get_function_parameters(self, function):
parameters = {key: self.parameters[key] for key in inspect.getargspec(function).args[1:]}
return parameters
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment