likelihood.py 4.38 KB
Newer Older
1 2 3
from __future__ import division, print_function

import logging
MoritzThomasHuebner's avatar
MoritzThomasHuebner committed
4

5
import numpy as np
MoritzThomasHuebner's avatar
MoritzThomasHuebner committed
6

7 8
from ..core.likelihood import Likelihood
from .model import Model
9
from ..core.prior import PriorDict
10 11 12


class HyperparameterLikelihood(Likelihood):
13
    """ A likelihood for inferring hyperparameter posterior distributions
14

15
    See Eq. (34) of https://arxiv.org/abs/1809.02293 for a definition.
16 17 18 19

    Parameters
    ----------
    posteriors: list
20 21
        An list of pandas data frames of samples sets of samples.
        Each set may have a different size.
22
    hyper_prior: `bilby.hyper.model.Model`
23
        The population model, this can alternatively be a function.
24
    sampling_prior: `bilby.hyper.model.Model`
25
        The sampling prior, this can alternatively be a function.
26 27 28 29 30
    log_evidences: list, optional
        Log evidences for single runs to ensure proper normalisation
        of the hyperparameter likelihood. If not provided, the original
        evidences will be set to 0. This produces a Bayes factor between
        the sampling prior and the hyperparameterised model.
31 32 33 34 35
    max_samples: int, optional
        Maximum number of samples to use from each set.

    """

36
    def __init__(self, posteriors, hyper_prior, sampling_prior=None,
37
                 log_evidences=None, max_samples=1e100):
38 39
        if not isinstance(hyper_prior, Model):
            hyper_prior = Model([hyper_prior])
40 41 42 43 44 45 46
        if sampling_prior is None:
            if ('log_prior' not in posteriors[0].keys()) and ('prior' not in posteriors[0].keys()):
                raise ValueError('Missing both sampling prior function and prior or log_prior '
                                 'column in posterior dictionary. Must pass one or the other.')
        else:
            if not (isinstance(sampling_prior, Model) or isinstance(sampling_prior, PriorDict)):
                sampling_prior = Model([sampling_prior])
47 48 49 50
        if log_evidences is not None:
            self.evidence_factor = np.sum(log_evidences)
        else:
            self.evidence_factor = np.nan
51 52 53 54 55 56 57 58 59
        self.posteriors = posteriors
        self.hyper_prior = hyper_prior
        self.sampling_prior = sampling_prior
        self.max_samples = max_samples
        Likelihood.__init__(self, hyper_prior.parameters)

        self.data = self.resample_posteriors()
        self.n_posteriors = len(self.posteriors)
        self.samples_per_posterior = self.max_samples
60 61
        self.samples_factor =\
            - self.n_posteriors * np.log(self.samples_per_posterior)
62

63
    def log_likelihood_ratio(self):
64
        self.hyper_prior.parameters.update(self.parameters)
Gregory Ashton's avatar
Gregory Ashton committed
65
        log_l = np.sum(np.log(np.sum(self.hyper_prior.prob(self.data) /
66
                       self.data['prior'], axis=-1)))
67
        log_l += self.samples_factor
68 69
        return np.nan_to_num(log_l)

70 71 72 73 74 75
    def noise_log_likelihood(self):
        return self.evidence_factor

    def log_likelihood(self):
        return self.noise_log_likelihood() + self.log_likelihood_ratio()

76
    def resample_posteriors(self, max_samples=None):
77 78 79 80 81 82 83 84 85 86 87 88 89 90
        """
        Convert list of pandas DataFrame object to dict of arrays.

        Parameters
        ----------
        max_samples: int, opt
            Maximum number of samples to take from each posterior,
            default is length of shortest posterior chain.
        Returns
        -------
        data: dict
            Dictionary containing arrays of size (n_posteriors, max_samples)
            There is a key for each shared key in self.posteriors.
        """
91 92 93 94 95
        if max_samples is not None:
            self.max_samples = max_samples
        for posterior in self.posteriors:
            self.max_samples = min(len(posterior), self.max_samples)
        data = {key: [] for key in self.posteriors[0]}
96 97 98 99
        if 'log_prior' in data.keys():
            data.pop('log_prior')
        if 'prior' not in data.keys():
            data['prior'] = []
100 101
        logging.debug('Downsampling to {} samples per posterior.'.format(
            self.max_samples))
102 103
        for posterior in self.posteriors:
            temp = posterior.sample(self.max_samples)
104 105 106 107
            if self.sampling_prior is not None:
                temp['prior'] = self.sampling_prior.prob(temp, axis=0)
            elif 'log_prior' in temp.keys():
                temp['prior'] = np.exp(temp['log_prior'])
108 109 110 111 112
            for key in data:
                data[key].append(temp[key])
        for key in data:
            data[key] = np.array(data[key])
        return data