rate_estimation.c 7.88 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18
/*
 * Copyright (C) 2014  Kipp C. Cannon
 *
 * This program is free software; you can redistribute it and/or modify it
 * under the terms of the GNU General Public License as published by the
 * Free Software Foundation; either version 2 of the License, or (at your
 * option) any later version.
 *
 * This program is distributed in the hope that it will be useful, but
 * WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License along
 * with this program; if not, write to the Free Software Foundation, Inc.,
 * 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
 */

19 20 21 22 23 24 25 26 27 28 29 30
/**
 * SECTION:rate_estimation.c
 * @short_description: C code for event rate intervals
 * 
 * Reviewed: 2fb185eda0edb9d49d79b8185f7b35457cafa06b 2015-05-14 
 * K. Cannon, J. Creighton, C. Hanna, F. Robinett
 *
 * Actions:
 *  - Improve comments (some out of date)
 *
 */

31 32 33 34 35 36 37 38 39 40 41 42 43 44

/*
 * ============================================================================
 *
 *                                  Preamble
 *
 * ============================================================================
 */


#include <math.h>
#include <stdlib.h>


45 46 47
#include <gsl/gsl_integration.h>


48
#include <Python.h>
49
#define NPY_NO_DEPRECATED_API NPY_1_7_API_VERSION
50 51 52
#include <numpy/arrayobject.h>


53 54 55
#define GSL_WORKSPACE_SIZE  8


56 57 58 59 60 61 62 63 64 65
/*
 * ============================================================================
 *
 *                               Internal Code
 *
 * ============================================================================
 */


/*
Kipp Cannon's avatar
Kipp Cannon committed
66
 * input conditioning.  sort ln_f_over_b array in ascending order.
67 68 69 70 71 72 73
 */


static int conditioning_compare(const void *a, const void *b)
{
	double A = *(double *) a, B = *(double *) b;

Kipp Cannon's avatar
Kipp Cannon committed
74
	return A > B ? +1 : A < B ? -1 : 0;
75 76 77
}


78
static void condition(double *ln_f_over_b, int n)
79
{
80
	qsort(ln_f_over_b, n, sizeof(*ln_f_over_b), conditioning_compare);
81 82 83 84
}


/*
85 86 87
 * the natural logarithm (up to an unknown additive constant) of the prior.
 * see equation (17) of Farr et al., "Counting and Confusion: Bayesian Rate
 * Estimation With Multiple Populations", arXiv:1302.5341.
88 89 90
 */


Kipp Cannon's avatar
Kipp Cannon committed
91
static double log_prior(double Rf, double Rb)
92 93 94 95 96
{
	return -0.5 * log(Rf * Rb);
}


97 98 99 100 101 102 103 104
/*
 * compute the logarithm (up to an unknown additive constant) of the joint
 * probability density of the foreground and background rates given by
 * equation (21) in Farr et al., "Counting and Confusion: Bayesian Rate
 * Estimation With Multiple Populations", arXiv:1302.5341.
 */


105 106 107 108 109 110 111 112 113 114
struct integrand_data_t {
	const double *ln_f_over_b;
	double ln_Rf_over_Rb;
};


static double integrand(double i, void *data)
{
	const struct integrand_data_t *integrand_data = data;
	double ln_x = integrand_data->ln_Rf_over_Rb + integrand_data->ln_f_over_b[(int) floor(i)];
115
	if(ln_x > 33.)	/* x ~= 10^14 */
116 117 118 119 120 121
		return ln_x;
	return log1p(exp(ln_x));
}


static double log_posterior(const double *ln_f_over_b, int n, double Rf, double Rb, gsl_integration_cquad_workspace *workspace)
122
{
123
	double ln_Rf_over_Rb = log(Rf / Rb);
124
	int i;
Kipp Cannon's avatar
Kipp Cannon committed
125
	double ln_P = 0.;
126 127 128 129

	if(Rf < 0. || Rb < 0.)
		return atof("-inf");

130 131
	/*
	 * need to compute sum of log(Rf f / (Rb b) + 1).  if x = Rf f /
132 133 134
	 * (Rb b) is larger than about 10^14 we consider the +1 to be
	 * irrelevant and approximate ln(x + 1) with ln(x).  for smaller x
	 * we use log1p(x) to evaluate the exprsesion.
135 136 137 138 139
	 *
	 * experience shows that the array of f/b values contains many very
	 * similar entries at the lower end of its range, so we sort the
	 * array and by treating the array as a function of its index use a
	 * numerical integration scheme to obtain the sum.  we do this for
140
	 * the bottom 99% of the array, and the top 1% is treated
141 142 143 144 145 146 147 148 149
	 * explicitly to capture the more rapid sample-to-sample variation.
	 */

	/*
	 * first do the numerical interal for the bottom part of the array
	 */

	i = 0.99 * n;
	if(i) {
150
		/*
151
		 * for these entries, compute the sum of log(Rf f / (Rb b)
152 153 154
		 * + 1) by approximating it with a numerical integration of
		 * the addend (evaluated using the approximations described
		 * above)
155
		 */
156 157 158 159 160 161 162 163 164
		gsl_function _integrand = {
			.function = integrand,
			.params = &(struct integrand_data_t) {
				.ln_f_over_b = ln_f_over_b,
				.ln_Rf_over_Rb = ln_Rf_over_Rb
			}
		};
		gsl_integration_cquad(&_integrand, 0, i * (1. - 1e-16), 0., 1e-8, workspace, &ln_P, NULL, NULL);
	}
165

166
	/*
167 168
	 * now explicitly compute the sum of log(Rf f / (Rb b) + 1) for the
	 * remaining entries
169 170 171
	 */

	for(; i < n; i++) {
172
		double ln_x = ln_Rf_over_Rb + ln_f_over_b[i];
173
		if(ln_x > 33.)	/* x ~= log(10^14) */
174
			ln_P += ln_x;
175
		else
176 177
			ln_P += log1p(exp(ln_x));
	}
178 179 180 181 182

	/*
	 * multiply by the remaining factors
	 */

183 184
	ln_P += n * log(Rb) - (Rf + Rb);

185 186 187 188
	/*
	 * finally multiply by the prior
	 */

Kipp Cannon's avatar
Kipp Cannon committed
189
	return ln_P + log_prior(Rf, Rb);
190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206
}


/*
 * ============================================================================
 *
 *                    Rate Estimation --- Posterior Class
 *
 * ============================================================================
 */


/*
 * Structure
 */


207
struct LogPosterior {
208 209 210 211 212 213 214
	PyObject_HEAD

	/*
	 * array of P(L | signal) / P(L | noise) for the L's of all events
	 * in the experiment's results
	 */

215 216
	double *ln_f_over_b;
	int ln_f_over_b_len;
217
	gsl_integration_cquad_workspace *workspace;
218 219 220 221 222 223 224 225 226 227
};


/*
 * __del__() method
 */


static void __del__(PyObject *self)
{
228
	struct LogPosterior *posterior = (struct LogPosterior *) self;
229

230 231 232
	free(posterior->ln_f_over_b);
	posterior->ln_f_over_b = NULL;
	posterior->ln_f_over_b_len = 0;
233 234
	gsl_integration_cquad_workspace_free(posterior->workspace);
	posterior->workspace = NULL;
235 236 237 238 239 240 241 242 243 244 245 246

	self->ob_type->tp_free(self);
}


/*
 * __init__() method
 */


static int __init__(PyObject *self, PyObject *args, PyObject *kwds)
{
247
	struct LogPosterior *posterior = (struct LogPosterior *) self;
248 249 250 251 252 253 254 255 256 257 258 259 260 261 262
	PyArrayObject *arr;
	int i;

	if(!PyArg_ParseTuple(args, "O!", &PyArray_Type, &arr))
		return -1;

	if(PyArray_NDIM(arr) != 1) {
		PyErr_SetString(PyExc_ValueError, "wrong number of dimensions");
		return -1;
	}
	if(!PyArray_ISFLOAT(arr) || !PyArray_ISPYTHON(arr)) {
		PyErr_SetObject(PyExc_TypeError, (PyObject *) arr);
		return -1;
	}

263 264
	posterior->ln_f_over_b_len = *PyArray_DIMS(arr);
	posterior->ln_f_over_b = malloc(posterior->ln_f_over_b_len * sizeof(*posterior->ln_f_over_b));
265

266 267
	for(i = 0; i < posterior->ln_f_over_b_len; i++)
		posterior->ln_f_over_b[i] = *(double *) PyArray_GETPTR1(arr, i);
268

269
	condition(posterior->ln_f_over_b, posterior->ln_f_over_b_len);
270

271 272
	posterior->workspace = gsl_integration_cquad_workspace_alloc(GSL_WORKSPACE_SIZE);

273 274 275 276 277 278 279 280 281 282 283
	return 0;
}


/*
 * __call__() method
 */


static PyObject *__call__(PyObject *self, PyObject *args, PyObject *kw)
{
284
	struct LogPosterior *posterior = (struct LogPosterior *) self;
285 286 287 288 289 290 291 292 293
	double Rf, Rb;

	if(kw) {
		PyErr_SetString(PyExc_ValueError, "unexpected keyword arguments");
		return NULL;
	}
	if(!PyArg_ParseTuple(args, "(dd)", &Rf, &Rb))
		return NULL;

Kipp Cannon's avatar
Kipp Cannon committed
294
	/*
295
	 * return log_posterior()
Kipp Cannon's avatar
Kipp Cannon committed
296 297
	 */

298
	return PyFloat_FromDouble(log_posterior(posterior->ln_f_over_b, posterior->ln_f_over_b_len, Rf, Rb, posterior->workspace));
299 300 301 302 303 304 305 306
}


/*
 * Type information
 */


307
static PyTypeObject LogPosterior_Type = {
308
	PyObject_HEAD_INIT(NULL)
309
	.tp_basicsize = sizeof(struct LogPosterior),
310 311 312 313 314
	.tp_call = __call__,
	.tp_dealloc = __del__,
	.tp_doc = "",
	.tp_flags = Py_TPFLAGS_DEFAULT | Py_TPFLAGS_CHECKTYPES,
	.tp_init = __init__,
315
	.tp_name = MODULE_NAME ".LogPosterior",
316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334
	.tp_new = PyType_GenericNew,
};


/*
 * ============================================================================
 *
 *                                Entry Point
 *
 * ============================================================================
 */


void init_rate_estimation(void)
{
	PyObject *module = Py_InitModule3(MODULE_NAME, NULL, "");

	import_array();

335
	if(PyType_Ready(&LogPosterior_Type) < 0)
336
		return;
337 338
	Py_INCREF((PyObject *) &LogPosterior_Type);
	PyModule_AddObject(module, "LogPosterior", (PyObject *) &LogPosterior_Type);
339
}