rate_estimation: move posterior evaluation to C

9208f392 · Kipp Cannon · 1724cb16 · 9208f392 · 9208f392 · 9208f392
Commit 9208f392 authored 10 years ago by Kipp Cannon
--- a/gstlal-inspiral/python/Makefile.am
+++ b/gstlal-inspiral/python/Makefile.am
@@ -22,3 +22,10 @@ pkgpython_PYTHON = \
 	streamthinca.py \
 	svd_bank.py \
 	templates.py
+
+pkgpyexec_LTLIBRARIES = _rate_estimation.la
+
+_rate_estimation_la_SOURCES = rate_estimation.c
+_rate_estimation_la_CPPFLAGS = $(AM_CPPFLAGS) $(PYTHON_CPPFLAGS) -DMODULE_NAME="\"gstlal._rate_estimation\""
+_rate_estimation_la_CFLAGS = $(AM_CFLAGS) -DMODULE_NAME="\"gstlal._rate_estimation\""
+_rate_estimation_la_LDFLAGS = $(AM_LDFLAGS) $(PYTHON_LDFLAGS) -module -avoid-version
--- a/gstlal-inspiral/python/rate_estimation.c
+++ b/gstlal-inspiral/python/rate_estimation.c
+/*
+ * Copyright (C) 2014  Kipp C. Cannon
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 2 of the License, or (at your
+ * option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+
+/*
+ * ============================================================================
+ *
+ *                                  Preamble
+ *
+ * ============================================================================
+ */
+
+
+#include <math.h>
+#include <stdlib.h>
+
+
+#include <Python.h>
+#include <numpy/arrayobject.h>
+
+
+/*
+ * ============================================================================
+ *
+ *                               Internal Code
+ *
+ * ============================================================================
+ */
+
+
+/*
+ * input conditioning.  sort f_over_b array in descending order to allow
+ * for early bail-out
+ */
+
+
+static int conditioning_compare(const void *a, const void *b)
+{
+	double A = *(double *) a, B = *(double *) b;
+
+	return A > B ? -1 : A < B ? +1 : 0;
+}
+
+
+static void condition(double *f_over_b, int n)
+{
+	qsort(f_over_b, n, sizeof(*f_over_b), conditioning_compare);
+}
+
+
+/*
+ * compute the log probability density of the foreground and background
+ * rates given by equation (21) in Farr et al., "Counting and Confusion:
+ * Bayesian Rate Estimation With Multiple Populations", arXiv:1302.5341.
+ * The default prior is that specified in the paper but it can be
+ * overridden with the lnpriorfunc keyword argument (giving a function that
+ * returns the natural log of the prior given Rf, Rb).
+ */
+
+
+static double compute_log_prior(double Rf, double Rb)
+{
+	return -0.5 * log(Rf * Rb);
+}
+
+
+static double compute_log_posterior(const double *f_over_b, int n, double Rf, double Rb)
+{
+	double Rf_over_Rb = Rf / Rb;
+	int i;
+	double ln_P;
+
+	if(Rf < 0. || Rb < 0.)
+		return atof("-inf");
+
+	ln_P = 0.;
+	for(i = 0; i < n; i++)
+		ln_P += log1p(Rf_over_Rb * f_over_b[i]);
+	ln_P += n * log(Rb) - (Rf + Rb);
+
+	return ln_P + compute_log_prior(Rf, Rb);
+}
+
+
+/*
+ * compute_log_posterior() / 2.  to improve the measurement of the tails of
+ * the PDF using the MCMC sampler, we draw from the square root of the PDF
+ * and then correct the histogram of the samples.
+ */
+
+
+static double compute_log_sqrt_posterior(const double *f_over_b, int n, double Rf, double Rb)
+{
+	return compute_log_posterior(f_over_b, n, Rf, Rb) / 2.;
+}
+
+
+/*
+ * ============================================================================
+ *
+ *                    Rate Estimation --- Posterior Class
+ *
+ * ============================================================================
+ */
+
+
+/*
+ * Structure
+ */
+
+
+struct posterior {
+	PyObject_HEAD
+
+	/*
+	 * array of P(L | signal) / P(L | noise) for the L's of all events
+	 * in the experiment's results
+	 */
+
+	double *f_over_b;
+	int f_over_b_len;
+};
+
+
+/*
+ * __del__() method
+ */
+
+
+static void __del__(PyObject *self)
+{
+	struct posterior *posterior = (struct posterior *) self;
+
+	free(posterior->f_over_b);
+	posterior->f_over_b = NULL;
+	posterior->f_over_b_len = 0;
+
+	self->ob_type->tp_free(self);
+}
+
+
+/*
+ * __init__() method
+ */
+
+
+static int __init__(PyObject *self, PyObject *args, PyObject *kwds)
+{
+	struct posterior *posterior = (struct posterior *) self;
+	PyArrayObject *arr;
+	int i;
+
+	if(!PyArg_ParseTuple(args, "O!", &PyArray_Type, &arr))
+		return -1;
+
+	if(PyArray_NDIM(arr) != 1) {
+		PyErr_SetString(PyExc_ValueError, "wrong number of dimensions");
+		return -1;
+	}
+	if(!PyArray_ISFLOAT(arr) || !PyArray_ISPYTHON(arr)) {
+		PyErr_SetObject(PyExc_TypeError, (PyObject *) arr);
+		return -1;
+	}
+
+	posterior->f_over_b_len = *PyArray_DIMS(arr);
+	posterior->f_over_b = malloc(posterior->f_over_b_len * sizeof(*posterior->f_over_b));
+
+	for(i = 0; i < posterior->f_over_b_len; i++) {
+		posterior->f_over_b[i] = *(double *) PyArray_GETPTR1(arr, i);
+		if(posterior->f_over_b[i] < 0.) {
+			PyErr_SetString(PyExc_ValueError, "negative probability density encountered");
+			return -1;
+		}
+	}
+
+	condition(posterior->f_over_b, posterior->f_over_b_len);
+
+	return 0;
+}
+
+
+/*
+ * __call__() method
+ */
+
+
+static PyObject *__call__(PyObject *self, PyObject *args, PyObject *kw)
+{
+	struct posterior *posterior = (struct posterior *) self;
+	double Rf, Rb;
+
+	if(kw) {
+		PyErr_SetString(PyExc_ValueError, "unexpected keyword arguments");
+		return NULL;
+	}
+	if(!PyArg_ParseTuple(args, "(dd)", &Rf, &Rb))
+		return NULL;
+
+	return PyFloat_FromDouble(compute_log_sqrt_posterior(posterior->f_over_b, posterior->f_over_b_len, Rf, Rb));
+}
+
+
+/*
+ * Type information
+ */
+
+
+static PyTypeObject posterior_Type = {
+	PyObject_HEAD_INIT(NULL)
+	.tp_basicsize = sizeof(struct posterior),
+	.tp_call = __call__,
+	.tp_dealloc = __del__,
+	.tp_doc = "",
+	.tp_flags = Py_TPFLAGS_DEFAULT | Py_TPFLAGS_CHECKTYPES,
+	.tp_init = __init__,
+	.tp_name = MODULE_NAME ".posterior",
+	.tp_new = PyType_GenericNew,
+};
+
+
+/*
+ * ============================================================================
+ *
+ *                                Entry Point
+ *
+ * ============================================================================
+ */
+
+
+void init_rate_estimation(void)
+{
+	PyObject *module = Py_InitModule3(MODULE_NAME, NULL, "");
+
+	import_array();
+
+	if(PyType_Ready(&posterior_Type) < 0)
+		return;
+	Py_INCREF(&posterior_Type);
+	PyModule_AddObject(module, "posterior", (PyObject *) &posterior_Type);
+}
--- a/gstlal-inspiral/python/rate_estimation.py
+++ b/gstlal-inspiral/python/rate_estimation.py
@@ -40,10 +40,13 @@ import sys


 from glue.text_progress_bar import ProgressBar
-from gstlal import emcee
 from pylal import rate


+from gstlal import emcee
+from gstlal._rate_estimation import *
+
+
 #
 # =============================================================================
 #
@@ -53,30 +56,6 @@ from pylal import rate
 #


-def RatesLnPDF((Rf, Rb), f_over_b, lnpriorfunc = lambda Rf, Rb: -0.5 * math.log(Rf * Rb)):
-	"""
-	Compute the log probability density of the foreground and
-	background rates given by equation (21) in Farr et al., "Counting
-	and Confusion:  Bayesian Rate Estimation With Multiple
-	Populations", arXiv:1302.5341.  The default prior is that specified
-	in the paper but it can be overridden with the lnpriorfunc keyword
-	argument (giving a function that returns the natural log of the
-	prior given Rf, Rb).
-	"""
-	if Rf <= 0. or Rb <= 0.:
-		return NegInf
-	return numpy.log1p((Rf / Rb) * f_over_b).sum() + len(f_over_b) * math.log(Rb) - (Rf + Rb) + lnpriorfunc(Rf, Rb)
-
-
-def RatesLnSqrtPDF(*args, **kwargs):
-	"""
-	Compute RatesLnPDF() / 2.  To improve the measurement of the tails
-	of the PDF using the MCMC sampler, we draw from the square root of
-	the PDF and then correct the histogram of the samples.
-	"""
-	return RatesLnPDF(*args, **kwargs) / 2.
-
-
 def maximum_likelihood_rates(f_over_b):
 	# the upper bound is chosen to include N + \sqrt{N}
 	return optimize.fmin((lambda x: -RatesLnPDF(x, f_over_b)), (1.0, len(f_over_b) + len(f_over_b)**.5), disp = True)
@@ -184,18 +163,10 @@ def calculate_rate_posteriors(ranking_data, ln_likelihood_ratios, restrict_to_in
 	#
 	# for each sample of the ranking statistic, evaluate the ratio of
 	# the signal ranking statistic PDF to background ranking statistic
-	# PDF.  since order is irrelevant in what follows, construct the
-	# array in ascending order for better numerical behaviour in the
-	# cost function.  the sort order is stored in a look-up table in
-	# case we want to do something with it later (the Farr et al.
-	# paper provides a technique for assessing the probability that
-	# each event individually is a signal and if we ever implement that
-	# here then we need to have not lost the original event order).
+	# PDF.
 	#

-	order = range(len(ln_likelihood_ratios))
-	order.sort(key = ln_likelihood_ratios.__getitem__)
-	f_over_b = numpy.array([ranking_data.signal_likelihood_pdfs[restrict_to_instruments][ln_likelihood_ratios[index],] / ranking_data.background_likelihood_pdfs[restrict_to_instruments][ln_likelihood_ratios[index],] for index in order])
+	f_over_b = numpy.array([ranking_data.signal_likelihood_pdfs[restrict_to_instruments][ln_lr,] / ranking_data.background_likelihood_pdfs[restrict_to_instruments][ln_lr,] for ln_lr in ln_likelihood_ratios])

 	# remove NaNs.  these occur because the ranking statistic PDFs have
 	# been zeroed at the cut-off and some events get pulled out of the
@@ -256,7 +227,7 @@ def calculate_rate_posteriors(ranking_data, ln_likelihood_ratios, restrict_to_in
 	# samples.
 	#

-	for i, coordslist in enumerate(run_mcmc(nwalkers, ndim, max(0, nsample - i), RatesLnSqrtPDF, n_burn = nburn, args = (f_over_b,), pos0 = pos0, progressbar = progressbar), i):
+	for i, coordslist in enumerate(run_mcmc(nwalkers, ndim, max(0, nsample - i), posterior(f_over_b), n_burn = nburn, pos0 = pos0, progressbar = progressbar), i):
 		# coordslist is nwalkers x ndim
 		samples[i,:,:] = coordslist
 		if chain_file is not None: