Skip to content
Snippets Groups Projects
Commit 9208f392 authored by Kipp Cannon's avatar Kipp Cannon
Browse files

rate_estimation: move posterior evaluation to C

parent 1724cb16
No related branches found
No related tags found
No related merge requests found
......@@ -22,3 +22,10 @@ pkgpython_PYTHON = \
streamthinca.py \
svd_bank.py \
templates.py
pkgpyexec_LTLIBRARIES = _rate_estimation.la
_rate_estimation_la_SOURCES = rate_estimation.c
_rate_estimation_la_CPPFLAGS = $(AM_CPPFLAGS) $(PYTHON_CPPFLAGS) -DMODULE_NAME="\"gstlal._rate_estimation\""
_rate_estimation_la_CFLAGS = $(AM_CFLAGS) -DMODULE_NAME="\"gstlal._rate_estimation\""
_rate_estimation_la_LDFLAGS = $(AM_LDFLAGS) $(PYTHON_LDFLAGS) -module -avoid-version
/*
* Copyright (C) 2014 Kipp C. Cannon
*
* This program is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License as published by the
* Free Software Foundation; either version 2 of the License, or (at your
* option) any later version.
*
* This program is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*
* You should have received a copy of the GNU General Public License along
* with this program; if not, write to the Free Software Foundation, Inc.,
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
*/
/*
* ============================================================================
*
* Preamble
*
* ============================================================================
*/
#include <math.h>
#include <stdlib.h>
#include <Python.h>
#include <numpy/arrayobject.h>
/*
* ============================================================================
*
* Internal Code
*
* ============================================================================
*/
/*
* input conditioning. sort f_over_b array in descending order to allow
* for early bail-out
*/
static int conditioning_compare(const void *a, const void *b)
{
double A = *(double *) a, B = *(double *) b;
return A > B ? -1 : A < B ? +1 : 0;
}
static void condition(double *f_over_b, int n)
{
qsort(f_over_b, n, sizeof(*f_over_b), conditioning_compare);
}
/*
* compute the log probability density of the foreground and background
* rates given by equation (21) in Farr et al., "Counting and Confusion:
* Bayesian Rate Estimation With Multiple Populations", arXiv:1302.5341.
* The default prior is that specified in the paper but it can be
* overridden with the lnpriorfunc keyword argument (giving a function that
* returns the natural log of the prior given Rf, Rb).
*/
static double compute_log_prior(double Rf, double Rb)
{
return -0.5 * log(Rf * Rb);
}
static double compute_log_posterior(const double *f_over_b, int n, double Rf, double Rb)
{
double Rf_over_Rb = Rf / Rb;
int i;
double ln_P;
if(Rf < 0. || Rb < 0.)
return atof("-inf");
ln_P = 0.;
for(i = 0; i < n; i++)
ln_P += log1p(Rf_over_Rb * f_over_b[i]);
ln_P += n * log(Rb) - (Rf + Rb);
return ln_P + compute_log_prior(Rf, Rb);
}
/*
* compute_log_posterior() / 2. to improve the measurement of the tails of
* the PDF using the MCMC sampler, we draw from the square root of the PDF
* and then correct the histogram of the samples.
*/
static double compute_log_sqrt_posterior(const double *f_over_b, int n, double Rf, double Rb)
{
return compute_log_posterior(f_over_b, n, Rf, Rb) / 2.;
}
/*
* ============================================================================
*
* Rate Estimation --- Posterior Class
*
* ============================================================================
*/
/*
* Structure
*/
struct posterior {
PyObject_HEAD
/*
* array of P(L | signal) / P(L | noise) for the L's of all events
* in the experiment's results
*/
double *f_over_b;
int f_over_b_len;
};
/*
* __del__() method
*/
static void __del__(PyObject *self)
{
struct posterior *posterior = (struct posterior *) self;
free(posterior->f_over_b);
posterior->f_over_b = NULL;
posterior->f_over_b_len = 0;
self->ob_type->tp_free(self);
}
/*
* __init__() method
*/
static int __init__(PyObject *self, PyObject *args, PyObject *kwds)
{
struct posterior *posterior = (struct posterior *) self;
PyArrayObject *arr;
int i;
if(!PyArg_ParseTuple(args, "O!", &PyArray_Type, &arr))
return -1;
if(PyArray_NDIM(arr) != 1) {
PyErr_SetString(PyExc_ValueError, "wrong number of dimensions");
return -1;
}
if(!PyArray_ISFLOAT(arr) || !PyArray_ISPYTHON(arr)) {
PyErr_SetObject(PyExc_TypeError, (PyObject *) arr);
return -1;
}
posterior->f_over_b_len = *PyArray_DIMS(arr);
posterior->f_over_b = malloc(posterior->f_over_b_len * sizeof(*posterior->f_over_b));
for(i = 0; i < posterior->f_over_b_len; i++) {
posterior->f_over_b[i] = *(double *) PyArray_GETPTR1(arr, i);
if(posterior->f_over_b[i] < 0.) {
PyErr_SetString(PyExc_ValueError, "negative probability density encountered");
return -1;
}
}
condition(posterior->f_over_b, posterior->f_over_b_len);
return 0;
}
/*
* __call__() method
*/
static PyObject *__call__(PyObject *self, PyObject *args, PyObject *kw)
{
struct posterior *posterior = (struct posterior *) self;
double Rf, Rb;
if(kw) {
PyErr_SetString(PyExc_ValueError, "unexpected keyword arguments");
return NULL;
}
if(!PyArg_ParseTuple(args, "(dd)", &Rf, &Rb))
return NULL;
return PyFloat_FromDouble(compute_log_sqrt_posterior(posterior->f_over_b, posterior->f_over_b_len, Rf, Rb));
}
/*
* Type information
*/
static PyTypeObject posterior_Type = {
PyObject_HEAD_INIT(NULL)
.tp_basicsize = sizeof(struct posterior),
.tp_call = __call__,
.tp_dealloc = __del__,
.tp_doc = "",
.tp_flags = Py_TPFLAGS_DEFAULT | Py_TPFLAGS_CHECKTYPES,
.tp_init = __init__,
.tp_name = MODULE_NAME ".posterior",
.tp_new = PyType_GenericNew,
};
/*
* ============================================================================
*
* Entry Point
*
* ============================================================================
*/
void init_rate_estimation(void)
{
PyObject *module = Py_InitModule3(MODULE_NAME, NULL, "");
import_array();
if(PyType_Ready(&posterior_Type) < 0)
return;
Py_INCREF(&posterior_Type);
PyModule_AddObject(module, "posterior", (PyObject *) &posterior_Type);
}
......@@ -40,10 +40,13 @@ import sys
from glue.text_progress_bar import ProgressBar
from gstlal import emcee
from pylal import rate
from gstlal import emcee
from gstlal._rate_estimation import *
#
# =============================================================================
#
......@@ -53,30 +56,6 @@ from pylal import rate
#
def RatesLnPDF((Rf, Rb), f_over_b, lnpriorfunc = lambda Rf, Rb: -0.5 * math.log(Rf * Rb)):
"""
Compute the log probability density of the foreground and
background rates given by equation (21) in Farr et al., "Counting
and Confusion: Bayesian Rate Estimation With Multiple
Populations", arXiv:1302.5341. The default prior is that specified
in the paper but it can be overridden with the lnpriorfunc keyword
argument (giving a function that returns the natural log of the
prior given Rf, Rb).
"""
if Rf <= 0. or Rb <= 0.:
return NegInf
return numpy.log1p((Rf / Rb) * f_over_b).sum() + len(f_over_b) * math.log(Rb) - (Rf + Rb) + lnpriorfunc(Rf, Rb)
def RatesLnSqrtPDF(*args, **kwargs):
"""
Compute RatesLnPDF() / 2. To improve the measurement of the tails
of the PDF using the MCMC sampler, we draw from the square root of
the PDF and then correct the histogram of the samples.
"""
return RatesLnPDF(*args, **kwargs) / 2.
def maximum_likelihood_rates(f_over_b):
# the upper bound is chosen to include N + \sqrt{N}
return optimize.fmin((lambda x: -RatesLnPDF(x, f_over_b)), (1.0, len(f_over_b) + len(f_over_b)**.5), disp = True)
......@@ -184,18 +163,10 @@ def calculate_rate_posteriors(ranking_data, ln_likelihood_ratios, restrict_to_in
#
# for each sample of the ranking statistic, evaluate the ratio of
# the signal ranking statistic PDF to background ranking statistic
# PDF. since order is irrelevant in what follows, construct the
# array in ascending order for better numerical behaviour in the
# cost function. the sort order is stored in a look-up table in
# case we want to do something with it later (the Farr et al.
# paper provides a technique for assessing the probability that
# each event individually is a signal and if we ever implement that
# here then we need to have not lost the original event order).
# PDF.
#
order = range(len(ln_likelihood_ratios))
order.sort(key = ln_likelihood_ratios.__getitem__)
f_over_b = numpy.array([ranking_data.signal_likelihood_pdfs[restrict_to_instruments][ln_likelihood_ratios[index],] / ranking_data.background_likelihood_pdfs[restrict_to_instruments][ln_likelihood_ratios[index],] for index in order])
f_over_b = numpy.array([ranking_data.signal_likelihood_pdfs[restrict_to_instruments][ln_lr,] / ranking_data.background_likelihood_pdfs[restrict_to_instruments][ln_lr,] for ln_lr in ln_likelihood_ratios])
# remove NaNs. these occur because the ranking statistic PDFs have
# been zeroed at the cut-off and some events get pulled out of the
......@@ -256,7 +227,7 @@ def calculate_rate_posteriors(ranking_data, ln_likelihood_ratios, restrict_to_in
# samples.
#
for i, coordslist in enumerate(run_mcmc(nwalkers, ndim, max(0, nsample - i), RatesLnSqrtPDF, n_burn = nburn, args = (f_over_b,), pos0 = pos0, progressbar = progressbar), i):
for i, coordslist in enumerate(run_mcmc(nwalkers, ndim, max(0, nsample - i), posterior(f_over_b), n_burn = nburn, pos0 = pos0, progressbar = progressbar), i):
# coordslist is nwalkers x ndim
samples[i,:,:] = coordslist
if chain_file is not None:
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment