inspiral_extrinsics.py

# Copyright (C) 2016,2017  Kipp Cannon
# Copyright (C) 2011--2014  Kipp Cannon, Chad Hanna, Drew Keppel
# Copyright (C) 2013  Jacob Peoples
#
# This program is free software; you can redistribute it and/or modify it
# under the terms of the GNU General Public License as published by the
# Free Software Foundation; either version 2 of the License, or (at your
# option) any later version.
#
# This program is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
# Public License for more details.
#
# You should have received a copy of the GNU General Public License along
# with this program; if not, write to the Free Software Foundation, Inc.,
# 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.


#
# =============================================================================
#
#                                   Preamble
#
# =============================================================================
#


try:
	from fpconst import NaN, NegInf, PosInf
except ImportError:
	# fpconst is not part of the standard library and might not be
	# available
	NaN = float("nan")
	NegInf = float("-inf")
	PosInf = float("+inf")
import itertools
import math
import numpy
import os
import random
from scipy import stats
from scipy import spatial
import sys
import h5py

from glue.ligolw import ligolw
from glue.ligolw import lsctables
from glue.ligolw import array as ligolw_array
from glue.ligolw import param as ligolw_param
from glue.ligolw import utils as ligolw_utils
from glue.text_progress_bar import ProgressBar
from gstlal import stats as gstlalstats
import lal
from lal import rate
from lalburst import snglcoinc
import lalsimulation


# FIXME:  caution, this information might get organized differently later.
# for now we just need to figure out where the gstlal-inspiral directory in
# share/ is.  don't write anything that assumes that this module will
# continue to define any of these symbols
from gstlal import paths as gstlal_config_paths


__all__ = [
	"instruments_rate_given_noise",
	"P_instruments_given_signal",
	"SNRPDF",
	"NumeratorSNRCHIPDF"
]


#
# =============================================================================
#
#                              Instrument Combos
#
# =============================================================================
#


def P_instruments_given_signal(horizon_distances, n_samples = 500000, min_instruments = 2, min_distance = 0.):
	"""
	Example:

	>>> P_instruments_given_signal({"H1": 120., "L1": 120.})
	{frozenset(['H1', 'L1']): 1.0}
	>>> P_instruments_given_signal({"H1": 120., "L1": 120.}, min_instruments = 1)
	{frozenset(['L1']): 0.25423904879460091, frozenset(['H1', 'L1']): 0.49116512120682387, frozenset(['H1']): 0.25459582999857527}
	"""
	if n_samples < 1:
		raise ValueError("n_samples=%d must be >= 1" % n_samples)
	if min_distance < 0.:
		raise ValueError("min_distance=%g must be >= 0" % min_distance)
	if min_instruments < 1:
		raise ValueError("min_instruments=%d must be >= 1" % min_instruments)

	# get instrument names
	names = tuple(horizon_distances.keys())
	# get the horizon distances in that same order
	DH = numpy.array(tuple(horizon_distances.values()))
	# get detecor responses in that same order
	resps = [lalsimulation.DetectorPrefixToLALDetector(str(inst)).response for inst in names]

	# initialize output.  dictionary mapping instrument combination to
	# probability (initially all 0).
	result = dict.fromkeys((frozenset(instruments) for n in range(min_instruments, len(names) + 1) for instruments in itertools.combinations(names, n)), 0.0)
	if not result:
		raise ValueError("not enough instruments in horizon_distances to satisfy min_instruments")

	# check for no-op
	if (DH != 0.).sum() < min_instruments:
		# not enough instruments are on to form a coinc with the
		# minimum required instruments.  this is not considered an
		# error condition, return all probabilities = 0.  NOTE:
		# result is not normalizable.
		return result

	# we select random uniformly-distributed right ascensions so
	# there's no point in also choosing random GMSTs and any value is
	# as good as any other
	gmst = 0.0

	# in the loop, we'll need a sequence of integers to enumerate
	# instruments.  construct it here to avoid doing it repeatedly in
	# the loop
	indexes = tuple(range(len(names)))

	# avoid attribute look-ups and arithmetic in the loop
	acos = math.acos
	numpy_array = numpy.array
	numpy_dot = numpy.dot
	numpy_sqrt = numpy.sqrt
	random_uniform = random.uniform
	twopi = 2. * math.pi
	pi_2 = math.pi / 2.
	xlal_am_resp = lal.ComputeDetAMResponse

	# loop as many times as requested
	successes = fails = 0
	while successes < n_samples:
		# select random sky location and source orbital plane
		# inclination and choice of polarization
		ra = random_uniform(0., twopi)
		dec = pi_2 - acos(random_uniform(-1., 1.))
		psi = random_uniform(0., twopi)
		cosi2 = random_uniform(-1., 1.)**2.

		# compute F+^2 and Fx^2 for each antenna from the sky
		# location and antenna responses
		fpfc2 = numpy_array(tuple(xlal_am_resp(resp, ra, dec, psi, gmst) for resp in resps))**2.

		# 1/8 ratio of inverse SNR to distance for each instrument
		# (1/8 because horizon distance is defined for an SNR of 8,
		# and we've omitted that factor for performance)
		snr_times_D_over_8 = DH * numpy_sqrt(numpy_dot(fpfc2, ((1. + cosi2)**2. / 4., cosi2)))

		# the volume visible to each instrument given the
		# requirement that a source be above the SNR threshold is
		#
		# V = [constant] * (8 * snr_times_D_over_8 / snr_threshold)**3.
		#
		# but in the end we'll only need ratios of these volumes so
		# we can omit the proportionality constant and we can also
		# omit the factor of (8 / snr_threshold)**3
		#
		# NOTE:  noise-induced SNR fluctuations have the effect of
		# allowing sources slightly farther away than would
		# nominally allow them to be detectable to be seen above
		# the detection threshold with some non-zero probability,
		# and sources close enough to be detectable to be masked by
		# noise and missed with some non-zero probability.
		# accounting for this effect correctly shows it to provide
		# an additional multiplicative factor to the volume that
		# depends only on the SNR threshold.  therefore, like all
		# the other factors common to all instruments, it too can
		# be ignored.
		V_at_snr_threshold = snr_times_D_over_8**3.

		# order[0] is index of instrument that can see sources the
		# farthest, order[1] is index of instrument that can see
		# sources the next farthest, etc.
		order = sorted(indexes, key = V_at_snr_threshold.__getitem__, reverse = True)
		ordered_names = tuple(names[i] for i in order)

		# instrument combination and volume of space (up to
		# irrelevant proportionality constant) visible to that
		# combination given the requirement that a source be above
		# the SNR threshold in that combination.  sequence of
		# instrument combinations is left as a generator expression
		# for lazy evaluation
		instruments = (frozenset(ordered_names[:n]) for n in xrange(min_instruments, len(order) + 1))
		V = tuple(V_at_snr_threshold[i] for i in order[min_instruments - 1:])
		if V[0] <= min_distance:
			# fewer than the required minimum number of
			# instruments can see sources in this configuration
			# far enough way in this direction to form coincs.
			# if this happens too often we report a convergence
			# rate failure
			# FIXME:  min_distance is a misnomer, it's compared
			# to a volume-like thing with a mystery
			# proportionality constant factored out, so who
			# knows what to call it.  who cares.
			fails += 1
			if successes + fails >= n_samples and fails / float(successes + fails) > .90:
				raise ValueError("convergence too slow:  success/fail ratio = %d/%d" % (successes, fails))
			continue

		# for each instrument combination, probability that a
		# source visible to at least the minimum required number of
		# instruments is visible to that combination (here is where
		# the proportionality constant and factor of
		# (8/snr_threshold)**3 drop out of the calculation)
		P = tuple(x / V[0] for x in V)

		# accumulate result.  p - pnext is the probability that a
		# source (that is visible to at least the minimum required
		# number of instruments) is visible to that combination of
		# instruments and not any other combination of instruments.
		for key, p, pnext in zip(instruments, P, P[1:] + (0.,)):
			result[key] += p - pnext
		successes += 1
	for key in result:
		result[key] /= n_samples

	#
	# make sure it's normalized.  allow an all-0 result in the event
	# that too few instruments are available to ever form coincs
	#

	total = sum(sorted(result.values()))
	assert abs(total - 1.) < 1e-13 or total == 0., "result insufficiently well normalized: %s, sum = %g" % (result, total)
	if total != 0.:
		for key in result:
			result[key] /= total

	#
	# done
	#

	return result


#
# =============================================================================
#
#                                   SNR PDF
#
# =============================================================================
#


class SNRPDF(object):
	#
	# cache of pre-computed P(instruments | horizon distances, signal)
	# probabilities and P(SNRs | instruments, horizon distances,
	# signal) PDFs.
	#

	DEFAULT_FILENAME = os.path.join(gstlal_config_paths["pkgdatadir"], "inspiral_snr_pdf.xml.gz")
	snr_joint_pdf_cache = {}

	@ligolw_array.use_in
	@ligolw_param.use_in
	class LIGOLWContentHandler(ligolw.LIGOLWContentHandler):
		pass


	class cacheentry(object):
		"""
		One entry in the SNR PDF cache.  For internal use only.
		"""
		def __init__(self, lnP_instruments, pdf, binnedarray):
			self.lnP_instruments = lnP_instruments
			self.pdf = pdf
			self.binnedarray = binnedarray


	# FIXME:  is the default choice of distance quantization appropriate?
	def __init__(self, snr_cutoff, log_distance_tolerance = math.log(1.05), min_ratio = 0.1):
		"""
		snr_cutoff sets the minimum SNR below which it is
		impossible to obtain a candidate (the trigger SNR
		threshold).

		min_ratio sets the minimum sensitivity an instrument must
		achieve to be considered to be on at all.  An instrument
		whose horizon distance is less than min_ratio * the horizon
		distance of the most sensitive instrument is considered to
		be off (it's horizon distance is set to 0).
		"""
		if log_distance_tolerance <= 0.:
			raise ValueError("require log_distance_tolerance > 0")
		if not 0. <= min_ratio < 1.:
			raise ValueError("require 0 <= min_ratio < 1")
		self.snr_cutoff = snr_cutoff
		self.log_distance_tolerance = log_distance_tolerance
		self.min_ratio = min_ratio


	def quantize_horizon_distances(self, horizon_distances):
		"""
		if two horizon distances, D1 and D2, differ by less than

			| ln(D1 / D2) | <= log_distance_tolerance

		then they are considered to be equal for the purpose of
		recording horizon distance history, generating joint SNR
		PDFs, and so on.  if the smaller of the two is < min_ratio
		* the larger then the smaller is treated as though it were
		0.
		"""
		if any(horizon_distance < 0. for horizon_distance in horizon_distances.values()):
			raise ValueError("%s: negative horizon distance" % repr(horizon_distances))
		horizon_distance_norm = max(horizon_distances.values())
		# check for no-op:  all distances are 0.
		if horizon_distance_norm == 0.:
			return dict.fromkeys(horizon_distances, NegInf)
		# check for no-op:  map all (non-zero) values to 1
		if math.isinf(self.log_distance_tolerance):
			return dict((instrument, 1 if horizon_distance > 0. else NegInf) for instrument, horizon_distance in horizon_distances.items())
		min_distance = self.min_ratio * horizon_distance_norm
		return dict((instrument, (NegInf if horizon_distance < min_distance else int(round(math.log(horizon_distance / horizon_distance_norm) / self.log_distance_tolerance)))) for instrument, horizon_distance in horizon_distances.items())


	@property
	def quants(self):
		return [NegInf] + range(int(math.ceil(math.log(self.min_ratio) / self.log_distance_tolerance)), 1)


	def quantized_horizon_distances(self, quants):
		if math.isinf(self.log_distance_tolerance):
			return dict((instrument, 0. if math.isinf(quant) else 1.) for instrument, quant in quants)
		return dict((instrument, math.exp(quant * self.log_distance_tolerance)) for instrument, quant in quants)


	def snr_joint_pdf_keyfunc(self, instruments, horizon_distances, min_instruments):
		"""
		Internal function defining the key for cache:  two element
		tuple, first element is frozen set of instruments for which
		this is the PDF, second element is frozen set of
		(instrument, horizon distance) pairs for all instruments in
		the network.  horizon distances are normalized to fractions
		of the largest among them and then the fractions aquantized
		to integer powers of a common factor
		"""
		return frozenset(instruments), frozenset(self.quantize_horizon_distances(horizon_distances).items()), min_instruments


	def get_snr_joint_pdf_binnedarray(self, instruments, horizon_distances, min_instruments):
		return self.snr_joint_pdf_cache[self.snr_joint_pdf_keyfunc(instruments, horizon_distances, min_instruments)].binnedarray


	def get_snr_joint_pdf(self, instruments, horizon_distances, min_instruments):
		return self.snr_joint_pdf_cache[self.snr_joint_pdf_keyfunc(instruments, horizon_distances, min_instruments)].pdf


	def lnP_instruments(self, instruments, horizon_distances, min_instruments):
		return self.snr_joint_pdf_cache[self.snr_joint_pdf_keyfunc(instruments, horizon_distances, min_instruments)].lnP_instruments


	def lnP_snrs(self, snrs, horizon_distances, min_instruments):
		"""
		snrs is an instrument-->SNR mapping containing one entry
		for each instrument that sees a signal.  snrs cannot
		contain instruments that are not listed in
		horizon_distances.

		horizon_distances is an instrument-->horizon distance mapping
		containing one entry for each instrument in the network.
		For instruments that are off set horizon distance to 0.
		"""
		# retrieve the PDF using the keys of the snrs mapping for
		# the participating instruments
		lnpdf = self.get_snr_joint_pdf(snrs, horizon_distances, min_instruments)
		# PDF axes are in alphabetical order
		return lnpdf(*(snr for instrument, snr in sorted(snrs.items())))


	def add_to_cache(self, horizon_distances, min_instruments, verbose = False):
		#
		# input check
		#

		if len(horizon_distances) < min_instruments:
			raise ValueError("require at least %d instruments, got %s" % (min_instruments, ", ".join(sorted(horizon_distances))))

		#
		# compute P(instruments | horizon distances, signal)
		#

		if verbose:
			print >>sys.stderr, "For horizon distances %s, requiring %d instrument(s):" % (", ".join("%s = %.4g Mpc" % item for item in sorted(horizon_distances.items())), min_instruments)

		P_instruments = P_instruments_given_signal(
			self.quantized_horizon_distances(self.quantize_horizon_distances(horizon_distances).items()),
			min_instruments = min_instruments,
			n_samples = 1000000
		)

		if verbose:
			for key_value in sorted((",".join(sorted(key)), value) for key, value in P_instruments.items()):
				print >>sys.stderr, "\tP(%s | signal) = %g" % key_value
			print >>sys.stderr, "generating P(snrs | signal) ..."

		#
		# compute P(snrs | instruments, horizon distances, signal)
		#

		for n in range(min_instruments, len(horizon_distances) + 1):
			for instruments in itertools.combinations(sorted(horizon_distances), n):
				instruments = frozenset(instruments)
				key = self.snr_joint_pdf_keyfunc(instruments, horizon_distances, min_instruments)
				# already in cache?
				if key in self.snr_joint_pdf_cache:
					continue
				# need to build
				with ProgressBar(text = "%s candidates" % ", ".join(sorted(instruments))) if verbose else None as progressbar:
					binnedarray = self.joint_pdf_of_snrs(instruments, self.quantized_horizon_distances(key[1]), self.snr_cutoff, progressbar = progressbar)
				lnbinnedarray = binnedarray.copy()
				with numpy.errstate(divide = "ignore"):
					lnbinnedarray.array = numpy.log(lnbinnedarray.array)
				pdf = rate.InterpBinnedArray(lnbinnedarray, fill_value = NegInf)
				self.snr_joint_pdf_cache[key] = self.cacheentry(math.log(P_instruments[instruments]) if P_instruments[instruments] else NegInf, pdf, binnedarray)


	@staticmethod
	def joint_pdf_of_snrs(instruments, inst_horiz_mapping, snr_cutoff, n_samples = 160000, bins = rate.ATanLogarithmicBins(3.6, 1200., 170), progressbar = None):
		"""
		Return a BinnedArray containing

		P(snr_{inst1}, snr_{inst2}, ... | signal seen in exactly
			{inst1, inst2, ...} in a network of instruments
			with a given set of horizon distances)

		i.e., the joint probability density of observing a set of
		SNRs conditional on them being the result of signal that
		has been recovered in a given subset of the instruments in
		a network of instruments with a given set of horizon
		distances.

		The snr_cutoff parameter sets the minimum SNR required for
		a trigger (it is assumed SNRs below this value are
		impossible to observe).

		The instruments parameter specifies the instruments upon
		whose triggers the SNR distribution is conditional --- the
		SNR distribution is conditional on the signal being
		recovered by exactly these instruments and no others.

		The inst_horiz_mapping parameter is a dictionary mapping
		instrument name (e.g., "H1") to horizon distance (arbitrary
		units).  For consistency, all instruments in the network
		must be listed in inst_horiz_mapping regardless of which
		instruments are operating and regardless of which
		instruments the probability density is conditional on the
		signal being recovered in; instruments that are included in
		the network but not operating at the time should be listed
		with a horizon distance of 0.

		The axes of the PDF correspond to the instruments in
		alphabetical order.  The binning used for all axes is set
		with the bins parameter.

		The n_samples parameter sets the number of iterations for
		the internal Monte Carlo sampling loop, and progressbar can
		be a glue.text_progress_bar.ProgressBar instance for
		verbosity.
		"""
		if progressbar is not None:
			progressbar.max = n_samples

		# get instrument names in alphabetical order
		instruments = sorted(instruments)
		if len(instruments) < 1:
			raise ValueError(instruments)
		# get horizon distances and responses in that same order
		DH_times_8 = 8. * numpy.array([inst_horiz_mapping[inst] for inst in instruments])
		resps = tuple(lalsimulation.DetectorPrefixToLALDetector(str(inst)).response for inst in instruments)

		# get horizon distances and responses of remaining
		# instruments (order doesn't matter as long as they're in
		# the same order)
		DH_times_8_other = 8. * numpy.array([dist for inst, dist in inst_horiz_mapping.items() if inst not in instruments])
		resps_other = tuple(lalsimulation.DetectorPrefixToLALDetector(str(inst)).response for inst in inst_horiz_mapping if inst not in instruments)

		# initialize the PDF array, and pre-construct the sequence
		# of snr,d(snr) tuples.  since the last SNR bin probably
		# has infinite size, we remove it from the sequence
		# (meaning the PDF will be left 0 in that bin).
		pdf = rate.BinnedArray(rate.NDBins([bins] * len(instruments)))
		snr_sequence = rate.ATanLogarithmicBins(3.6, 1200., 500)
		snr_snrlo_snrhi_sequence = numpy.array(zip(snr_sequence.centres(), snr_sequence.lower(), snr_sequence.upper())[:-1])

		# compute the SNR at which to begin iterations over bins
		assert type(snr_cutoff) is float
		snr_min = snr_cutoff - 3.0
		assert snr_min > 0.0

		# no-op if one of the instruments that must be able to
		# participate in a coinc is not on.  the PDF that gets
		# returned is not properly normalized (it's all 0) but
		# that's the correct value everywhere except at SNR-->+inf
		# where the product of SNR * no sensitivity might be said
		# to give a non-zero contribution, who knows.  anyway, for
		# finite SNR, 0 is the correct value.
		if DH_times_8.min() == 0.:
			if progressbar is not None:
				progressbar.update(progressbar.max)
			return pdf

		# we select random uniformly-distributed right ascensions
		# so there's no point in also choosing random GMSTs and any
		# value is as good as any other
		gmst = 0.0

		# run the sampler the requested # of iterations.  save some
		# symbols to avoid doing module attribute look-ups in the
		# loop
		acos = math.acos
		random_uniform = random.uniform
		twopi = 2. * math.pi
		pi_2 = math.pi / 2.
		xlal_am_resp = lal.ComputeDetAMResponse
		# FIXME:  scipy.stats.rice.rvs broken on reference OS.
		# switch to it when we can rely on a new-enough scipy
		#rice_rvs = stats.rice.rvs	# broken on reference OS
		# the .reshape is needed in the event that x is a 1x1
		# array:  numpy returns a scalar from sqrt(), but we must
		# have something that we can iterate over
		rice_rvs = lambda x: numpy.sqrt(stats.ncx2.rvs(2., x**2.)).reshape(x.shape)
		for i in xrange(n_samples):
			if progressbar is not None:
				progressbar.increment()
			# select random sky location and source orbital
			# plane inclination and choice of polarization
			ra = random_uniform(0., twopi)
			dec = pi_2 - acos(random_uniform(-1., 1.))
			psi = random_uniform(0., twopi)
			cosi2 = random_uniform(-1., 1.)**2.

			# F+^2 and Fx^2 for each instrument
			fpfc2 = numpy.array(tuple(xlal_am_resp(resp, ra, dec, psi, gmst) for resp in resps))**2.
			fpfc2_other = numpy.array(tuple(xlal_am_resp(resp, ra, dec, psi, gmst) for resp in resps_other))**2.

			# ratio of distance to inverse SNR for each instrument
			fpfc_factors = ((1. + cosi2)**2. / 4., cosi2)
			snr_times_D = DH_times_8 * numpy.dot(fpfc2, fpfc_factors)**0.5

			# snr * D in instrument whose SNR grows fastest
			# with decreasing D
			max_snr_times_D = snr_times_D.max()

			# snr_times_D.min() / snr_min = the furthest a
			# source can be and still be above snr_min in all
			# instruments involved.  max_snr_times_D / that
			# distance = the SNR that distance corresponds to
			# in the instrument whose SNR grows fastest with
			# decreasing distance --- the SNR the source has in
			# the most sensitive instrument when visible to all
			# instruments in the combo
			try:
				start_index = snr_sequence[max_snr_times_D / (snr_times_D.min() / snr_min)]
			except ZeroDivisionError:
				# one of the instruments that must be able
				# to see the event is blind to it
				continue

			# min_D_other is minimum distance at which source
			# becomes visible in an instrument that isn't
			# involved.  max_snr_times_D / min_D_other gives
			# the SNR in the most sensitive instrument at which
			# the source becomes visible to one of the
			# instruments not allowed to participate
			if len(DH_times_8_other):
				min_D_other = (DH_times_8_other * numpy.dot(fpfc2_other, fpfc_factors)**0.5).min() / snr_cutoff
				try:
					end_index = snr_sequence[max_snr_times_D / min_D_other] + 1
				except ZeroDivisionError:
					# all instruments that must not see
					# it are blind to it
					end_index = None
			else:
				# there are no other instruments
				end_index = None

			# if start_index >= end_index then in order for the
			# source to be close enough to be visible in all
			# the instruments that must see it it is already
			# visible to one or more instruments that must not.
			# don't need to check for this, the for loop that
			# comes next will simply not have any iterations.

			# iterate over the nominal SNRs (= noise-free SNR
			# in the most sensitive instrument) at which we
			# will add weight to the PDF.  from the SNR in
			# most sensitive instrument, the distance to the
			# source is:
			#
			#	D = max_snr_times_D / snr
			#
			# and the (noise-free) SNRs in all instruments are:
			#
			#	snr_times_D / D
			#
			# scipy's Rice-distributed RV code is used to
			# add the effect of background noise, converting
			# the noise-free SNRs into simulated observed SNRs
			#
			# number of sources b/w Dlo and Dhi:
			#
			#	d count \propto D^2 |dD|
			#	  count \propto Dhi^3 - Dlo**3
			D_Dhi_Dlo_sequence = max_snr_times_D / snr_snrlo_snrhi_sequence[start_index:end_index]
			for snr, weight in zip(rice_rvs(snr_times_D / numpy.reshape(D_Dhi_Dlo_sequence[:,0], (len(D_Dhi_Dlo_sequence), 1))), D_Dhi_Dlo_sequence[:,1]**3. - D_Dhi_Dlo_sequence[:,2]**3.):
				pdf[tuple(snr)] += weight

		# check for divide-by-zeros that weren't caught.  also
		# finds NaNs if they're there
		assert numpy.isfinite(pdf.array).all()

		# convolve the samples with a Gaussian density estimation
		# kernel
		rate.filter_array(pdf.array, rate.gaussian_window(*(1.875,) * len(pdf.array.shape)))
		# protect against round-off in FFT convolution leading to
		# negative values in the PDF
		numpy.clip(pdf.array, 0., PosInf, pdf.array)
		# zero counts in bins that are below the trigger threshold.
		# have to convert SNRs to indexes ourselves and adjust so
		# that we don't zero the bin in which the SNR threshold
		# falls
		range_all = slice(None, None)
		range_low = slice(None, pdf.bins[0][snr_cutoff])
		for i in xrange(len(instruments)):
			slices = [range_all] * len(instruments)
			slices[i] = range_low
			pdf.array[slices] = 0.
		# convert bin counts to normalized PDF
		pdf.to_pdf()
		# one last sanity check
		assert numpy.isfinite(pdf.array).all()
		# done
		return pdf


	@classmethod
	def from_xml(cls, xml, name = u"generic"):
		name = u"%s:%s" % (name, u"inspiral_snr_pdf")
		xml = [elem for elem in xml.getElementsByTagName(ligolw.LIGO_LW.tagName) if elem.hasAttribute(u"Name") and elem.Name == name]
		if len(xml) != 1:
			raise ValueError("XML tree must contain exactly one %s element named %s" % (ligolw.LIGO_LW.tagName, name))
		xml, = xml
		self = cls(
			snr_cutoff = ligolw_param.get_pyvalue(xml, u"snr_cutoff"),
			log_distance_tolerance = ligolw_param.get_pyvalue(xml, u"log_distance_tolerance"),
			min_ratio = ligolw_param.get_pyvalue(xml, u"min_ratio")
		)
		for elem in xml.childNodes:
			if elem.tagName != ligolw.LIGO_LW.tagName:
				continue
			binnedarray = rate.BinnedArray.from_xml(elem, elem.Name.rsplit(u":", 1)[0])

			key = (
				frozenset(lsctables.instrumentsproperty.get(ligolw_param.get_pyvalue(elem, u"instruments:key"))),
				frozenset((inst.strip(), float(quant) if math.isinf(float(quant)) else int(quant)) for inst, quant in (inst_quant.split(u"=") for inst_quant in ligolw_param.get_pyvalue(elem, u"quantizedhorizons:key").split(u","))),
				ligolw_param.get_pyvalue(elem, u"min_instruments:key")
			)

			lnbinnedarray = binnedarray.copy()
			with numpy.errstate(divide = "ignore"):
				lnbinnedarray.array = numpy.log(lnbinnedarray.array)
			self.snr_joint_pdf_cache[key] = self.cacheentry(ligolw_param.get_pyvalue(elem, u"lnp_instruments"), rate.InterpBinnedArray(lnbinnedarray, fill_value = NegInf), binnedarray)
		return self


	def to_xml(self, name = u"generic"):
		xml = ligolw.LIGO_LW()
		xml.Name = u"%s:%s" % (name, u"inspiral_snr_pdf")
		xml.appendChild(ligolw_param.Param.from_pyvalue(u"snr_cutoff", self.snr_cutoff))
		xml.appendChild(ligolw_param.Param.from_pyvalue(u"log_distance_tolerance", self.log_distance_tolerance))
		xml.appendChild(ligolw_param.Param.from_pyvalue(u"min_ratio", self.min_ratio))
		for i, (key, entry) in enumerate(self.snr_joint_pdf_cache.items()):
			elem = xml.appendChild(entry.binnedarray.to_xml(u"%d:pdf" % i))
			elem.appendChild(ligolw_param.Param.from_pyvalue(u"lnp_instruments", entry.lnP_instruments))
			elem.appendChild(ligolw_param.Param.from_pyvalue(u"instruments:key", lsctables.instrumentsproperty.set(key[0])))
			elem.appendChild(ligolw_param.Param.from_pyvalue(u"quantizedhorizons:key", u",".join(u"%s=%.17g" % inst_quant for inst_quant in sorted(key[1]))))
			elem.appendChild(ligolw_param.Param.from_pyvalue(u"min_instruments:key", key[2]))
		return xml


	@classmethod
	def load(cls, fileobj = None, verbose = False):
		if fileobj is None:
			fileobj = open(cls.DEFAULT_FILENAME)
		return cls.from_xml(ligolw_utils.load_fileobj(fileobj, gz = True, contenthandler = cls.LIGOLWContentHandler)[0])


#
# =============================================================================
#
#                                 dt dphi PDF
#
# =============================================================================
#


dt_chebyshev_coeffs_polynomials = []
dt_chebyshev_coeffs_polynomials.append(numpy.poly1d([-597.94227757949329, 2132.773853473127, -2944.126306979932, 1945.3033368083029, -603.91576991927593, 70.322754873993347]))
dt_chebyshev_coeffs_polynomials.append(numpy.poly1d([-187.50681052710425, 695.84172327044325, -1021.0688423797938, 744.3266490236075, -272.12853221391498, 35.542404632554508]))
dt_chebyshev_coeffs_polynomials.append(numpy.poly1d([52.128579054466599, -198.32054234352267, 301.34865541080791, -230.8522943433488, 90.780611645135437, -16.310130528927655]))
dt_chebyshev_coeffs_polynomials.append(numpy.poly1d([48.216566165393878, -171.70632176976451, 238.48370471322843, -159.65005032451785, 50.122296925755677, -5.5466740894321367]))
dt_chebyshev_coeffs_polynomials.append(numpy.poly1d([-34.030336093450863, 121.44714070928059, -169.91439486329773, 115.86873916702386, -38.08411813067778, 4.7396784315927816]))
dt_chebyshev_coeffs_polynomials.append(numpy.poly1d([3.4576823675810178, -12.362609260376738, 17.3300203922424, -11.830868787176165, 3.900284020272442, -0.47157315012399248]))
dt_chebyshev_coeffs_polynomials.append(numpy.poly1d([4.0423239651315113, -14.492611904657275, 20.847419746265583, -15.033846689362553, 5.3953159232942216, -0.78132676885883601]))
norm_polynomial = numpy.poly1d([-348550.84040194791, 2288151.9147818103, -6623881.5646601757, 11116243.157047395, -11958335.1384027, 8606013.1361163966, -4193136.6690072878, 1365634.0450674745, -284615.52077054407, 34296.855844416605, -1815.7135263788341])

dt_chebyshev_coeffs = [0]*13


def __dphi_calc_A(combined_snr, delta_t):
        B = -10.840765 * numpy.abs(delta_t) + 1.072866
        M = 46.403738 * numpy.abs(delta_t) - 0.160205
        nu = 0.009032 * numpy.abs(delta_t) + 0.001150
        return (1.0 / (1.0 + math.exp(-B*(combined_snr - M)))**(1.0/nu))


def __dphi_calc_mu(combined_snr, delta_t):
        if delta_t >= 0.0:
                A = 76.234617*delta_t + 0.001639
                B = 0.290863
                C = 4.775688
                return (3.145953 - A* math.exp(-B*(combined_snr-C)))
        elif delta_t < 0.0:
                A = -130.877663*delta_t - 0.002814
                B = 0.31023
                C = 3.184671
                return (3.145953 + A* math.exp(-B*(combined_snr-C)))


def __dphi_calc_kappa(combined_snr, delta_t):
        K = -176.540199*numpy.abs(delta_t) + 7.4387
        B = -7.599585*numpy.abs(delta_t) + 0.215074
        M = -1331.386835*numpy.abs(delta_t) - 35.309173
        nu = 0.012225*numpy.abs(delta_t) + 0.000066
        return (K / (1.0 + math.exp(-B*(combined_snr - M)))**(1.0/nu))


def lnP_dphi_signal(delta_phi, delta_t, combined_snr):
	# Compute von mises parameters
	A_param = __dphi_calc_A(combined_snr, delta_t)
	mu_param = __dphi_calc_mu(combined_snr, delta_t)
	kappa_param = __dphi_calc_kappa(combined_snr, delta_t)
	C_param = 1.0 - A_param

	return math.log(A_param*stats.vonmises.pdf(delta_phi, kappa_param, loc=mu_param) + C_param/(2*math.pi))


def lnP_dt_signal(dt, snr_ratio):
	# FIXME Work out correct model

	# Fits below an snr ratio of 0.33 are not reliable
	if snr_ratio < 0.33:
		snr_ratio = 0.33

	dt_chebyshev_coeffs[0] = dt_chebyshev_coeffs_polynomials[0](snr_ratio)
	dt_chebyshev_coeffs[2] = dt_chebyshev_coeffs_polynomials[1](snr_ratio)
	dt_chebyshev_coeffs[4] = dt_chebyshev_coeffs_polynomials[2](snr_ratio)
	dt_chebyshev_coeffs[6] = dt_chebyshev_coeffs_polynomials[3](snr_ratio)
	dt_chebyshev_coeffs[8] = dt_chebyshev_coeffs_polynomials[4](snr_ratio)
	dt_chebyshev_coeffs[10] = dt_chebyshev_coeffs_polynomials[5](snr_ratio)
	dt_chebyshev_coeffs[12] = dt_chebyshev_coeffs_polynomials[6](snr_ratio)

	return numpy.polynomial.chebyshev.chebval(dt/0.015013, dt_chebyshev_coeffs) - numpy.log(norm_polynomial(snr_ratio))

def lnP_dt_dphi_uniform_H1L1(coincidence_window_extension):
	# FIXME Dont hardcode
	# NOTE This assumes the standard delta t
	return -math.log((snglcoinc.light_travel_time("H1", "L1") + coincidence_window_extension) * (2. * math.pi))


def lnP_dt_dphi_uniform(coincidence_window_extension):
	# NOTE Currently hardcoded for H1L1
	# NOTE this is future proofed so that in > 2 ifo scenarios, the
	# appropriate length can be chosen for the uniform dt distribution
	return lnP_dt_dphi_uniform_H1L1(coincidence_window_extension)


def lnP_dt_dphi_signal(snrs, phase, dt, horizons, coincidence_window_extension):
	# Return P(dt, dphi|{rho_{IFO}}, signal)
	# FIXME Insert actual signal models
	if sorted(dt.keys()) == ("H1", "L1"):
		delta_t = float(dt["H1"] - dt["L1"])
		delta_phi = (phase["H1"] - phase["L1"]) % (2*math.pi)
		combined_snr = math.sqrt(snrs["H1"]**2. + snrs["L1"]**2.)
		if horizons["H1"] != 0 and horizons["L1"] != 0:
			# neither are zero, proceed as normal
			H1_snr_over_horizon = snrs["H1"] / horizons["H1"]
			L1_snr_over_horizon = snrs["L1"] / horizons["L1"]

		elif horizons["H1"] == horizons["L1"]:
			# both are zero, treat as equal
			H1_snr_over_horizon = snrs["H1"]
			L1_snr_over_horizon = snrs["L1"]

		else:
			# one of them is zero, treat snr_ratio as 0, which will get changed to 0.33 in lnP_dt_signal
			# FIXME is this the right thing to do?
			return lnP_dt_signal(abs(delta_t), 0.33) + lnP_dphi_signal(delta_phi, delta_t, combined_snr)

		if H1_snr_over_horizon > L1_snr_over_horizon:
			snr_ratio = L1_snr_over_horizon / H1_snr_over_horizon

		else:
			snr_ratio = H1_snr_over_horizon / L1_snr_over_horizon

		return lnP_dt_signal(abs(delta_t), snr_ratio) + lnP_dphi_signal(delta_phi, delta_t, combined_snr)

	else:
		# IFOs != {H1,L1} case, thus just return uniform
		# distribution so that dt/dphi terms dont affect
		# likelihood ratio
		# FIXME Work out general N detector case
		return lnP_dt_dphi_uniform(coincidence_window_extension)


#
# =============================================================================
#
#                               SNR, \chi^2 PDF
#
# =============================================================================
#


class NumeratorSNRCHIPDF(rate.BinnedLnPDF):
	"""
	Reports ln P(chi^2/rho^2 | rho, signal)
	"""
	# NOTE:  by happy coincidence numpy's broadcasting rules allow the
	# inherited .at_centres() method to be used as-is
	def __init__(self, *args, **kwargs):
		super(rate.BinnedLnPDF, self).__init__(*args, **kwargs)
		self.volume = self.bins[1].upper() - self.bins[1].lower()
		# FIXME: instead of .shape and repeat() use .broadcast_to()
		# when we can rely on numpy >= 1.8
		#self.volume = numpy.broadcast_to(self.volume, self.bins.shape)
		self.volume.shape = (1, len(self.volume))
		self.volume = numpy.repeat(self.volume, len(self.bins[0]), 0)
		self.norm = numpy.zeros((len(self.bins[0]), 1))

	def __getitem__(self, coords):
		return numpy.log(super(rate.BinnedLnPDF, self).__getitem__(coords)) - self.norm[self.bins(*coords)[0]]

	def marginalize(self, *args, **kwargs):
		raise NotImplementedError

	def __iadd__(self, other):
		# the total count is meaningless, it serves only to set the
		# scale by which the density estimation kernel chooses its
		# size, so we preserve the count across this operation.  if
		# the two arguments have different counts, use the
		# geometric mean unless one of the two is 0 in which case
		# don't screw with the total count
		self_count, other_count = self.array.sum(), other.array.sum()
		super(rate.BinnedLnPDF, self).__iadd__(other)
		if self_count and other_count:
			self.array *= numpy.exp((numpy.log(self_count) + numpy.log(other_count)) / 2.) / self.array.sum()
		self.norm = numpy.log(numpy.exp(self.norm) + numpy.exp(other.norm))
		return self

	def normalize(self):
		# replace the vector with a new one so that we don't
		# interfere with any copies that might have been made
		with numpy.errstate(divide = "ignore"):
			self.norm = numpy.log(self.array.sum(axis = 1))
		self.norm.shape = (len(self.norm), 1)

	@staticmethod
	def add_signal_model(lnpdf, n, prefactors_range, df, inv_snr_pow = 4., snr_min = 4., progressbar = None):
		if df <= 0.:
			raise ValueError("require df >= 0: %s" % repr(df))
		pfs = numpy.linspace(prefactors_range[0], prefactors_range[1], 100)
		if progressbar is not None:
			progressbar.max = len(pfs)

		# FIXME:  except for the low-SNR cut, the slicing is done
		# to work around various overflow and loss-of-precision
		# issues in the extreme parts of the domain of definition.
		# it would be nice to identify the causes of these and
		# either fix them or ignore them one-by-one with a comment
		# explaining why it's OK to ignore the ones being ignored.
		# for example, computing snrchi2 by exponentiating the sum
		# of the logs of the terms might permit its evaluation
		# everywhere on the domain.  can ncx2pdf() be made to work
		# everywhere?
		snrindices, rcossindices = lnpdf.bins[snr_min:1e10, 1e-10:1e10]
		snr, dsnr = lnpdf.bins[0].centres()[snrindices], lnpdf.bins[0].upper()[snrindices] - lnpdf.bins[0].lower()[snrindices]
		rcoss, drcoss = lnpdf.bins[1].centres()[rcossindices], lnpdf.bins[1].upper()[rcossindices] - lnpdf.bins[1].lower()[rcossindices]

		snr2 = snr**2.
		snrchi2 = numpy.outer(snr2, rcoss) * df

		arr = numpy.zeros_like(lnpdf.array)
		for pf in pfs:
			if progressbar is not None:
				progressbar.increment()
			arr[snrindices, rcossindices] += gstlalstats.ncx2pdf(snrchi2, df, numpy.array([pf * snr2]).T)

		# convert to counts by multiplying by bin volume, and also
		# multiply by an SNR powr law
		arr[snrindices, rcossindices] *= numpy.outer(dsnr / snr**inv_snr_pow, drcoss)

		# normalize to a total count of n
		arr *= n / arr.sum()

		# add to lnpdf
		lnpdf.array += arr

	def to_xml(self, *args, **kwargs):
		elem = super(rate.BinnedLnPDF, self).to_xml(*args, **kwargs)
		elem.appendChild(ligolw_array.Array.build("norm", self.norm))
		return elem

	@classmethod
	def from_xml(cls, xml, name):
		xml = cls.get_xml_root(xml, name)
		self = super(rate.BinnedLnPDF, cls).from_xml(xml, name)
		self.norm = ligolw_array.get_array(xml, "norm").array
		return self


#
# =============================================================================
#
#                               dt, dphi, deff ratio PDF
#
# =============================================================================
#


class InspiralExtrinsics(object):
	def __init__(self):
		DEFAULT_FILENAME = os.path.join(gstlal_config_paths["pkgdatadir"], "inspiral_dtdphi_pdf.h5")
		self.TimePhaseSNR = TimePhaseSNR.from_hdf5(DEFAULT_FILENAME)
		self.p_of_ifos = {}
		# FIXME these need to be renormalized based on a
		# min_instruments which this class will need to take as a
		# parameter
		self.p_of_ifos[("H1", "L1", "V1",)] = p_of_instruments_given_horizons.from_hdf5(os.path.join(gstlal_config_paths["pkgdatadir"], "H1L1V1_p_of_instruments_given_H_d.h5"))
		self.p_of_ifos[("H1", "L1",)] = p_of_instruments_given_horizons.from_hdf5(os.path.join(gstlal_config_paths["pkgdatadir"], "H1L1_p_of_instruments_given_H_d.h5"))
		self.p_of_ifos[("H1", "V1",)] = p_of_instruments_given_horizons.from_hdf5(os.path.join(gstlal_config_paths["pkgdatadir"], "H1V1_p_of_instruments_given_H_d.h5"))
		self.p_of_ifos[("L1", "V1",)] = p_of_instruments_given_horizons.from_hdf5(os.path.join(gstlal_config_paths["pkgdatadir"], "L1V1_p_of_instruments_given_H_d.h5"))

	def p_of_instruments_given_horizons(self, instruments, horizons):
		horizons = dict((k,v) for k,v in horizons.items() if v != 0)
		# if only one instrument is on the probability is 1.0
		if set(horizons) & set(instruments) != set(instruments):
			raise ValueError("A trigger exists for a detector with zero horizon distance")
		if len(horizons) == 1:
			return 1.0
		on_ifos = tuple(sorted(horizons.keys()))
		return self.p_of_ifos[on_ifos](instruments, horizons)

def chunker(seq, size):
	return (seq[pos:pos + size] for pos in xrange(0, len(seq), size))


def normsq_along_one(x):
	return numpy.add.reduce(x * x, axis=(1,))


def margprob(Dmat):
	out = []
	for D in Dmat:
		D = D[numpy.isfinite(D)]
		step = max(int(len(D) / 32.), 1)
		D = D[::step]
		if len(D) == 33:
			out.append(step * scipy.integrate.romb(numpy.exp(-D**2/2.)))
		else:
			out.append(step * scipy.integrate.simps(numpy.exp(-D**2/2.)))
	return out


class TimePhaseSNR(object):

	# NOTE to save a couple hundred megs of ram we do not
	# include kagra for now...
	responses = {"H1": lal.CachedDetectors[lal.LHO_4K_DETECTOR].response, "L1":lal.CachedDetectors[lal.LLO_4K_DETECTOR].response, "V1":lal.CachedDetectors[lal.VIRGO_DETECTOR].response}#, "K1":lal.CachedDetectors[lal.KAGRA_DETECTOR].response}
	locations = {"H1":lal.CachedDetectors[lal.LHO_4K_DETECTOR].location, "L1":lal.CachedDetectors[lal.LLO_4K_DETECTOR].location, "V1":lal.CachedDetectors[lal.VIRGO_DETECTOR].location}#, "K1":lal.CachedDetectors[lal.KAGRA_DETECTOR].location}

	def __init__(self, tree_data = None, margsky = None):

		# FIXME compute this more reliably or expose it as a property
		# or something
		self.sigma = {"time": 0.001, "phase": numpy.pi / 8, "deff": 0.2}
		self.norm = (4 * numpy.pi**2)**2
		self.tree_data = tree_data
		self.margsky = margsky

		if self.tree_data is None:
			time, phase, deff = TimePhaseSNR.tile()
			self.tree_data = self.dtdphideffpoints(time, phase, deff, self.slices)

		# produce KD trees for all the combinations.  NOTE we slice
		# into the same array for memory considerations.  the KDTree
		# does *not* make copies of the data so be careful to not
		# modify it afterward
		self.KDTree = {}
		for combo in self.combos:
			print "initializing tree for: ", combo
			slcs = sorted(sum(self.instrument_pair_slices(self.instrument_pairs(combo)).values(),[]))
			self.KDTree[combo] = spatial.cKDTree(self.tree_data[:,slcs])

		# NOTE: This is super slow we have a premarginalized h5 file in
		# the tree, see the helper class InspiralExtrinsics
		if self.margsky is None:
			self.margsky = {}
			for combo in self.combos:
				print "marginalizing tree for: ", combo
				slcs = sorted(sum(self.instrument_pair_slices(self.instrument_pairs(combo)).values(),[]))
				#
				# NOTE we approximate the marginalization
				# integral with 10% of the sky points
				#
				num_points = int(self.tree_data.shape[0] / 10.)

				marg = []
				# FIXME the n_jobs parameter is not available
				# in the reference platforms, but this doesn't
				# get used in practice during an actual
				# analysis.  This will use 8GB of RAM and keep
				# a box pretty busy.
				for points in chunker(self.tree_data[:,slcs], 1000):
					Dmat = self.KDTree[combo].query(points, k=num_points, distance_upper_bound = 3.5, n_jobs=-1)[0]
					marg.extend(margprob(Dmat))
				self.margsky[combo] = numpy.array(marg, dtype="float32")

	def to_hdf5(self, fname):
		f = h5py.File(fname, "w")
		dgrp = f.create_group("gstlal_extparams")
		dgrp.create_dataset("treedata", data = self.tree_data, compression="gzip")
		mgrp = dgrp.create_group("marg")
		for combo in self.combos:
			mgrp.create_dataset(",".join(combo), data = self.margsky[combo], compression="gzip")
		f.close()

	@staticmethod
	def from_hdf5(fname):
		f = h5py.File(fname, "r")
		dgrp = f["gstlal_extparams"]
		tree_data = numpy.array(dgrp["treedata"])
		margsky = {}
		for combo in dgrp["marg"]:
			key = tuple(combo.split(","))
			margsky[key] = numpy.array(dgrp["marg"][combo])
		return TimePhaseSNR(tree_data = tree_data, margsky = margsky)

	@property
	def combos(self):
		return self.instrument_combos(self.responses)

	@property
	def pairs(self):
		out = []
		for combo in self.combos:
			out.extend(self.instrument_pairs(combo))
		return tuple(sorted(list(set(out))))

	@property
	def slices(self):
		# we will define indexes for slicing into a subset of instrument data
		return dict((pair, [3*n,3*n+1,3*n+2]) for n,pair in enumerate(self.pairs))

	def instrument_pair_slices(self, pairs):
		s = self.slices
		return dict((pair, s[pair]) for pair in pairs)

	@classmethod
	def instrument_combos(cls, instruments, min_instruments = 2):
		combos = set()
		# FIXME this probably should be exposed, but 1 doesn't really make sense anyway
		for i in range(min_instruments, len(instruments,) + 1):
			for choice in itertools.combinations(instruments, i):
				# NOTE the reference IFO is always the first in
				# alphabetical order for any given combination,
				# hence the sort here
				combos.add(tuple(sorted(choice)))
		return tuple(sorted(list(combos)))

	def instrument_pairs(self, instruments):
		# They should be sorted, but it doesn't hurt
		out = []
		instruments = tuple(sorted(instruments))
		for i in instruments[1:]:
			out.append((instruments[0], i))
		return tuple(out)

	def dtdphideffpoints(self, time, phase, deff, slices):
		# order is dt, dphi and effective distance ratio for each combination
		# NOTE the instruments argument here really needs to come from calling instrument_pairs()
		if hasattr(time.values()[0], "__iter__"):
			outlen = len(time.values()[0])
		else:
			outlen =1
		out = numpy.zeros((outlen, 1 + max(sum(slices.values(),[]))), dtype="float32")

		for ifos, slc in slices.items():
			ifo1, ifo2 = ifos
			out[:,slc[0]] = (time[ifo1] - time[ifo2]) / self.sigma["time"]
			out[:,slc[1]] = (phase[ifo1] - phase[ifo2]) / self.sigma["phase"]
			# FIXME should this be the ratio - 1 or without the -1 ???
			out[:,slc[2]] = (deff[ifo1] / deff[ifo2] - 1) / self.sigma["deff"]

		return out

	@classmethod
	def tile(cls, NSIDE = 16, NANGLE = 33):
		# FIXME should be put at top, but do we require healpy?  It
		# isn't necessary for running at the moment since cached
		# versions of this will be used.
		import healpy
		healpix_idxs = numpy.arange(healpy.nside2npix(NSIDE))
		# We are concerned with a shell on the sky at some fiducial
		# time (which simply fixes Earth as a natural coordinate
		# system)
		T = lal.LIGOTimeGPS(0)
		GMST = lal.GreenwichMeanSiderealTime(T)
		D = 1.
		phase = dict((ifo, numpy.zeros(len(healpix_idxs) * NANGLE**2, dtype="float32")) for ifo in cls.responses)
		deff = dict((ifo, numpy.zeros(len(healpix_idxs) * NANGLE**2, dtype="float32")) for ifo in cls.responses)
		time = dict((ifo, numpy.zeros(len(healpix_idxs) * NANGLE**2, dtype="float32")) for ifo in cls.responses)

		print "tiling sky: \n"
		cnt = 0
		for i in healpix_idxs:
			print "sky %04d of %04d\r" % (i, len(healpix_idxs)),
			DEC, RA = healpy.pix2ang(NSIDE, i)
			DEC -= numpy.pi / 2
			for COSIOTA in numpy.linspace(-1, 1, NANGLE):
				hplus = 0.5 * (1.0 + COSIOTA**2)
				hcross = COSIOTA
				for PSI in numpy.linspace(0, numpy.pi * 2, NANGLE):
					for ifo in cls.responses:
						Fplus, Fcross = lal.ComputeDetAMResponse(cls.responses[ifo], RA, DEC, PSI, GMST)
						phase[ifo][cnt] = -numpy.arctan2(Fcross * hcross, Fplus * hplus)
						deff[ifo][cnt] = D / ((Fplus * hplus)**2 + (Fcross * hcross)**2)**0.5
						time[ifo][cnt] = lal.TimeDelayFromEarthCenter(cls.locations[ifo], RA, DEC, T)
					cnt += 1

		print "\n...done"
		return time, phase, deff

	def __call__(self, time, phase, snr, horizon):
		deff = dict((k, horizon[k] / snr[k] * 8.0) for k in snr)
		slices = dict((pair, [3*n,3*n+1,3*n+2]) for n,pair in enumerate(self.instrument_pairs(time)))
		point = self.dtdphideffpoints(time, phase, deff, slices)
		combo = tuple(sorted(time))
		treedataslices = sorted(sum(self.instrument_pair_slices(self.instrument_pairs(combo)).values(),[]))
		nearestix = self.KDTree[combo].query(point)[1]
		nearestpoint = self.tree_data[nearestix, treedataslices]
		D = (point - nearestpoint)[0]
		D2 = numpy.dot(D,D)
		# FIXME 4. / (sum(s**2 for s in S.values())**.5)**4 is the term
		# that goes like rho^-4 with a somewhat arbitrary
		# normilization.  Could use the network snr minimum to
		# normalize it properly
		return numpy.exp(-D2 / 2.) * self.margsky[combo][nearestix] / self.norm * 4. / (sum(s**2 for s in snr.values())**.5)**4


class p_of_instruments_given_horizons(object):
	def __init__(self, instruments = ("H1", "L1", "V1"), snr_thresh = 4., nbins = 41, hmin = 0.05, hmax = 20.0, histograms = None):
		self.instruments = tuple(sorted(list(instruments)))
		self.snr_thresh = snr_thresh
		self.nbins = nbins
		self.hmin = hmin
		self.hmax = hmax
		# NOTE should be sorted

		if histograms is not None:
			self.histograms = histograms
			self.first_center = histograms.values()[0].centres()[0][0]
			self.last_center = histograms.values()[0].centres()[0][-1]
			for combo in histograms:
				histograms[combo] = rate.InterpBinnedArray(histograms[combo])
		else:
			combos = TimePhaseSNR.instrument_combos(self.instruments, min_instruments = 1)
			self.histograms = {}
			bins = []
			for i in range(len(self.instruments) - 1):
				bins.append(rate.LogarithmicBins(self.hmin, self.hmax, self.nbins))
			for combo in combos:
				print combo
				self.histograms[combo] = rate.BinnedArray(rate.NDBins(bins))

			self.first_center = histograms.values()[0].centres()[0][0]
			self.last_center = histograms.values()[0].centres()[0][-1]
			_, _, deff = TimePhaseSNR.tile(NSIDE = 8, NANGLE = 17)
			alldeff = []
			for v in deff.values():
				alldeff.extend(v)
			mindeff = min(alldeff)
			maxdeff = max(alldeff)
			print mindeff, maxdeff

			for horizontuple in itertools.product(*[b.centres() for b in bins]):
				horizondict = {}
				# NOTE by defn the first instrument in alpha order will always have a horizon of 1
				horizondict[self.instruments[0]] = 1.0
				for i, ifo in enumerate(self.instruments[1:]):
					horizondict[ifo] = horizontuple[i]
				snrs = {}
				snrs_above_thresh = {}
				snrs_below_thresh = {}
				prob = []
				for cnt, ifo in enumerate(horizondict):
					# FIXME remember to carefully comment this logic
					LOW = self.hmin * 8. / self.snr_thresh / maxdeff
					HIGH = max(horizontuple + (1,)) * 8. / self.snr_thresh / mindeff
					for D in numpy.linspace(LOW, HIGH, 200):
						#blah = 8 * horizondict[ifo] / (D * deff[ifo])
						#print ifo, len(blah), D, 8 * horizondict[ifo], len(blah[blah > 4])
						snrs.setdefault(ifo,[]).extend(8 * horizondict[ifo] / (D * deff[ifo]))
						if cnt == 0:
							prob.extend([D**2] * len(deff[ifo]))
					snrs[ifo] = stats.ncx2.rvs(2, numpy.array(snrs[ifo])**2)**.5
					snrs_above_thresh[ifo] = snrs[ifo] >= self.snr_thresh
					snrs_below_thresh[ifo] = snrs[ifo] < self.snr_thresh
					print horizontuple, ifo, len(snrs_above_thresh[ifo][snrs_above_thresh[ifo]])
					prob = numpy.array(prob)
				total = 0.
				for combo in combos:
					for cnt, ifo in enumerate(combo):
						if cnt == 0:
							must_be_above = snrs_above_thresh[ifo].copy()
						else:
							must_be_above &= snrs_above_thresh[ifo]
					# the ones above thresh must be accompanied with the compliment to this combo being below thresh
					for ifo in set(self.instruments) - set(combo):
						must_be_above &= snrs_below_thresh[ifo]
					# sum up the prob
					count = sum(prob[must_be_above])
					self.histograms[combo][horizontuple] += count
					total += count
				for I in self.histograms:
					self.histograms[I][horizontuple] /= total
					print horizontuple, I, self.histograms[I][horizontuple]
					self.histograms[I] = rate.InterpBinnedArray(histograms[I])

	def __call__(self, instruments, horizon_distances):
		H = [horizon_distances[k] for k in sorted(horizon_distances)]
		return self.histograms[tuple(sorted(instruments))](*[min(max(h / H[0], self.first_center), self.last_center) for h in H[1:]])

	def to_hdf5(self, fname):
		f = h5py.File(fname, "w")
		grp = f.create_group("gstlal_p_of_instruments")
		grp.attrs["snr_thresh"] = self.snr_thresh
		grp.attrs["hmin"] = self.hmin
		grp.attrs["hmax"] = self.hmax
		grp.attrs["nbins"] = self.nbins
		grp.attrs["instruments"] = ",".join(self.instruments)
		for combo in self.histograms:
			grp.create_dataset(",".join(combo), data = self.histograms[combo].array, compression="gzip")
		f.close()

	@staticmethod
	def from_hdf5(fname):
		f = h5py.File(fname, "r")
		grp = f["gstlal_p_of_instruments"]
		snr_thresh = grp.attrs["snr_thresh"]
		hmin = grp.attrs["hmin"]
		hmax = grp.attrs["hmax"]
		nbins = grp.attrs["nbins"]
		instruments = tuple(sorted(grp.attrs["instruments"].split(",")))
		histograms = {}
		bins = []
		for i in range(len(instruments) - 1):
			bins.append(rate.LogarithmicBins(hmin, hmax, nbins))
		for combo in TimePhaseSNR.instrument_combos(instruments, min_instruments = 1):
			histograms[combo] = rate.BinnedArray(rate.NDBins(bins))
			histograms[combo].array[:] = numpy.array(grp[",".join(combo)])[:]
		f.close()
		return p_of_instruments_given_horizons(instruments = instruments, snr_thresh = snr_thresh, nbins = nbins, hmin = hmin, hmax = hmax, histograms = histograms)