From 65c31a40749f1f905546e63c0195107750904a06 Mon Sep 17 00:00:00 2001
From: Jacob Peoples <jacob.peoples@ligo.org>
Date: Wed, 21 Aug 2013 17:18:54 -0400
Subject: [PATCH] gstlal-inspiral/far.py:  port to MCMC-based ranking statistic
 PDFs

this patch replaces the guts of LocalRankingData
---
 .../bin/gstlal_inspiral_calc_likelihood       |   2 +-
 .../gstlal_inspiral_marginalize_likelihood    |   2 +-
 .../bin/gstlal_inspiral_plot_background       |   2 +-
 gstlal-inspiral/python/far.py                 | 354 ++++++++++--------
 4 files changed, 204 insertions(+), 156 deletions(-)

diff --git a/gstlal-inspiral/bin/gstlal_inspiral_calc_likelihood b/gstlal-inspiral/bin/gstlal_inspiral_calc_likelihood
index 0f854b696f..a55b44e2b3 100755
--- a/gstlal-inspiral/bin/gstlal_inspiral_calc_likelihood
+++ b/gstlal-inspiral/bin/gstlal_inspiral_calc_likelihood
@@ -156,7 +156,7 @@ if options.verbose:
 coincparamsdistributions.finish(verbose = options.verbose)
 
 if options.write_likelihood is not None:
-	Far.compute_joint_instrument_background(remap = {frozenset(["H1", "H2", "L1"]) : frozenset(["H1", "L1"]), frozenset(["H1", "H2", "V1"]) : frozenset(["H1", "V1"]), frozenset(["H1", "H2", "L1", "V1"]) : frozenset(["H1", "L1", "V1"])}, instruments = Far.trials_table.get_sngl_ifos(), verbose = options.verbose)	
+	Far.compute_likelihood_pdfs(remap = {frozenset(["H1", "H2", "L1"]) : frozenset(["H1", "L1"]), frozenset(["H1", "H2", "V1"]) : frozenset(["H1", "V1"]), frozenset(["H1", "H2", "L1", "V1"]) : frozenset(["H1", "L1", "V1"])}, instruments = Far.trials_table.get_sngl_ifos(), verbose = options.verbose)	
 	xmldoc = inspiral.gen_likelihood_control_doc(Far, ("H1", "H2", "L1", "V1"))
 	utils.write_filename(xmldoc, options.write_likelihood, gz = (options.write_likelihood or "stdout").endswith(".gz"), verbose = options.verbose)
 
diff --git a/gstlal-inspiral/bin/gstlal_inspiral_marginalize_likelihood b/gstlal-inspiral/bin/gstlal_inspiral_marginalize_likelihood
index 4c1a8753dc..c75931df2f 100644
--- a/gstlal-inspiral/bin/gstlal_inspiral_marginalize_likelihood
+++ b/gstlal-inspiral/bin/gstlal_inspiral_marginalize_likelihood
@@ -145,7 +145,7 @@ for url in urls:
 
 	# FIXME This assumes that a trials table was initialized for all jobs with the same set of instruments.  It will only compute the likelihood background for those instruments to save time.
 	likelihood_data.smooth_distribution_stats(verbose = options.verbose)
-	likelihood_data.compute_joint_instrument_background(remap = {frozenset(["H1", "H2", "L1"]) : frozenset(["H1", "L1"]), frozenset(["H1", "H2", "V1"]) : frozenset(["H1", "V1"]), frozenset(["H1", "H2", "L1", "V1"]) : frozenset(["H1", "L1", "V1"])}, instruments = likelihood_data.trials_table.get_sngl_ifos(), verbose = options.verbose)
+	likelihood_data.compute_likelihood_pdfs(remap = {frozenset(["H1", "H2", "L1"]) : frozenset(["H1", "L1"]), frozenset(["H1", "H2", "V1"]) : frozenset(["H1", "V1"]), frozenset(["H1", "H2", "L1", "V1"]) : frozenset(["H1", "L1", "V1"])}, instruments = likelihood_data.trials_table.get_sngl_ifos(), verbose = options.verbose)
 
 	#
 	# merge into marginalized data
diff --git a/gstlal-inspiral/bin/gstlal_inspiral_plot_background b/gstlal-inspiral/bin/gstlal_inspiral_plot_background
index 4272ce40c0..b04138f280 100644
--- a/gstlal-inspiral/bin/gstlal_inspiral_plot_background
+++ b/gstlal-inspiral/bin/gstlal_inspiral_plot_background
@@ -83,7 +83,7 @@ if options.marginalized_file:
 	marg, procid = far.RankingData.from_xml(utils.load_filename(options.marginalized_file, contenthandler = ligolw.LIGOLWContentHandler, verbose = options.verbose))
 	marg.compute_joint_cdfs()
 
-	for k in marg.joint_likelihood_pdfs:
+	for k in marg.likelihood_pdfs:
 		N = sum(total_count[ifo] for ifo in k)
 		#FIXME hardcoded num_slides to 2, don't do that!
 		m = marg.trials_table[k].count * marg.trials_table[k].count_below_thresh / marg.trials_table[k].thresh / float(abs(marg.livetime_seg)) * marg.trials_table.num_nonzero_count() / 2
diff --git a/gstlal-inspiral/python/far.py b/gstlal-inspiral/python/far.py
index 523a756a5e..aedecebb61 100644
--- a/gstlal-inspiral/python/far.py
+++ b/gstlal-inspiral/python/far.py
@@ -61,6 +61,7 @@ from glue.ligolw.utils import search_summary as ligolw_search_summary
 from glue.ligolw.utils import segments as ligolw_segments
 from glue import segments
 from glue.segmentsUtils import vote
+from gstlal import emcee
 from pylal import inject
 from pylal import progress
 from pylal import rate
@@ -374,18 +375,6 @@ lsctables.TableByName[lsctables.table.StripTableName(TrialsTable.TrialsTableTabl
 #
 
 
-class FilterThread(snglcoinc.CoincParamsFilterThread):
-	# populate the bins with probabilities (not probability densities
-	# as in the default implementation)
-	def run(self):
-		with self.cpu:
-			if self.verbose:
-				with self.stderr:
-					print >>sys.stderr, "%s," % self.getName(),
-			rate.filter_array(self.binnedarray.array, self.filter)
-			self.binnedarray.array /= self.binnedarray.array.sum()
-
-
 class ThincaCoincParamsDistributions(snglcoinc.CoincParamsDistributions):
 	# FIXME:  switch to new default when possible
 	ligo_lw_name_suffix = u"pylal_ligolw_burca_tailor_coincparamsdistributions"
@@ -512,6 +501,13 @@ class ThincaCoincParamsDistributions(snglcoinc.CoincParamsDistributions):
 				P *= self.injection_pdf_interp[name](*value)
 		return P
 
+	@staticmethod
+	def create_emcee_lnprob_wrapper(lnprobfunc, keys):
+		keys = tuple(sorted(keys))
+		# coords[0::2] = rho
+		# coords[1::2] = chi^2/rho^2
+		return lambda coords: lnprobfunc(dict(zip(keys, zip(coords[0::2], coords[1::2]))))
+
 	def add_background_prior(self, n = 1., transition = 10., instruments = None, prefactors_range = (1.0, 10.0), df = 40, verbose = False):
 		for param, binarr in self.background_rates.items():
 			new_binarr = rate.BinnedArray(binarr.bins)
@@ -820,54 +816,83 @@ def P_instruments_given_signal(inst_horiz_mapping, snr_threshold, n_samples = 50
 #
 
 
-def likelihood_bin_boundaries(likelihoods, probabilities, minint = 1e-2, maxint = (1 - 1e-14)):
+def binned_likelihood_rates_from_samples(samples, bins_per_decade = 250.0, min_bins = 1000, limits = None):
+	"""
+	Construct and return a BinnedArray containing a histogram of a
+	sequence of samples.  If limits is None (default) then the limits
+	of the binning will be determined automatically from the sequence,
+	otherwise limits is expected to be a tuple or other two-element
+	sequence providing the (low, high) limits, and in that case the
+	sequence can be a generator.
+	"""
+	if limits is None:
+		# add a factor of 10 of padding for the smoothing that will
+		# be done later
+		lo, hi = min(samples) / 10.0, max(samples) * 10.0
+	else:
+		lo, hi = limits
+		if lo >= hi:
+			raise ValueError("limits out of order (%g, %g)" % limits)
+	nbins = max(int(round(bins_per_decade * math.log10(hi / lo))), min_bins)
+	if nbins < 1:
+		raise ValueError("bins_per_decade (%g) too small for limits (%g, %g)" % (nbins, lo, hi))
+	signal_rates = rate.BinnedArray(rate.NDBins((rate.LogarithmicPlusOverflowBins(lo, hi, nbins),)))
+	noise_rates = rate.BinnedArray(rate.NDBins((rate.LogarithmicPlusOverflowBins(lo, hi, nbins),)))
+	for lamb, lnP_signal, lnP_noise in samples:
+		signal_rates[lamb,] += math.exp(lnP_signal)
+		noise_rates[lamb,] += math.exp(lnP_noise)
+	return signal_rates, noise_rates
+
+
+def run_mcmc(n_walkers, n_dim, n_samples_per_walker, lnprobfunc, args = (), n_burn = 100, mean = 1.0, stdev = 1.0):
 	"""
-	A function to choose the likelihood bin boundaries based on a certain
-	interval in the likelihood pdfs set by minint and maxint. This should typically
-	be combined with Overflow binning to catch the edges
+	A generator function that yields samples distributed according to a
+	user-supplied probability density function that need not be
+	normalized.  lnprobfunc computes and returns the natural logarithm
+	of the probability density, up to a constant offset.  n_dim sets
+	the number of dimensions of the parameter space over which the PDF
+	is defined and args gives any additional arguments to be passed to
+	lnprobfunc, whose signature must be
+
+		ln(P) = lnprobfunc(X, *args)
+
+	where X is a numpy array of length n_dim.
+
+	The generator yields a total of n_walkers * n_samples_per_walker
+	samples drawn from the n_dim-dimensional parameter space.  Each
+	sample is returned as a numpy array.
+
+	mean and stdev adjust the Gaussian random number generator used to
+	set the initial co-ordinates of the walkers.  n_burn iterations of
+	the MCMC sampler will be executed and discarded to allow the system
+	to stabilize before samples are yielded to the calling code.
 	"""
-	finite_likelihoods = numpy.isfinite(likelihoods)
-	likelihoods = likelihoods[finite_likelihoods]
-	background_pdf = probabilities[finite_likelihoods]
-	
-	sortindex = likelihoods.argsort()
-	likelihoods = likelihoods[sortindex]
-	background_pdf = background_pdf[sortindex]
-
-	s = background_pdf.cumsum() / background_pdf.sum()
-	# restrict range to a reasonable confidence interval to make the best use of our bin resolution
-	minlikelihood = likelihoods[s.searchsorted(minint, side = 'right')]
-	maxlikelihood = likelihoods[s.searchsorted(maxint)]
-	if minlikelihood == 0:
-		minlikelihood = max(likelihoods[likelihoods != 0].min(), 1e-100) # to prevent numerical issues
-	return minlikelihood, maxlikelihood
-
-
-def possible_ranks_array(likelihood_pdfs, ifo_set, targetlen):
-	# start with an identity array to seed the outerproduct chain
-	ranks = numpy.array([1.0])
-	vals = numpy.array([1.0])
-	# FIXME:  probably only works because the pdfs aren't pdfs but probabilities
-	for ifo in ifo_set:
-		likelihood_pdf = likelihood_pdfs[ifo]
-		# FIXME lower instead of centres() to avoid inf in the last bin
-		ranks = numpy.outer(ranks, likelihood_pdf.bins.lower()[0])
-		vals = numpy.outer(vals, likelihood_pdf.array)
-		ranks = ranks.reshape((ranks.shape[0] * ranks.shape[1],))
-		vals = vals.reshape((vals.shape[0] * vals.shape[1],))
-		# rebin the outer-product
-		minlikelihood, maxlikelihood = likelihood_bin_boundaries(ranks, vals)
-		new_likelihood_pdf = rate.BinnedArray(rate.NDBins((rate.LogarithmicPlusOverflowBins(minlikelihood, maxlikelihood, targetlen),)))
-		for rank,val in zip(ranks,vals):
-			new_likelihood_pdf[rank,] += val
-		ranks = new_likelihood_pdf.bins.lower()[0]
-		vals = new_likelihood_pdf.array
-
-	# FIXME the size of these is targetlen which has to be small
-	# since it is squared in the outer product.  We can afford to
-	# store a 1e6 array.  Maybe we should try to make the resulting
-	# pdf bigger somehow?
-	return new_likelihood_pdf
+	#
+	# construct a sampler
+	#
+
+	sampler = emcee.EnsembleSampler(n_walkers, n_dim, lnprobfunc, args = args, threads = 1)
+
+	#
+	# start the walkers at Gaussian-distributed initial positions, and
+	# run for a while to get better initial positions
+	#
+	# FIXME:  these starting points work OK, but don't know how to tune
+	# for sure
+	#
+
+	pos0 = numpy.random.normal(loc = mean, scale = stdev, size = (n_walkers, n_dim))
+	pos0, ignored, ignored = sampler.run_mcmc(pos0, n_burn, storechain = False)
+
+	#
+	# reset and yield positions distributed according to the supplied
+	# PDF
+	#
+
+	sampler.reset()
+	for coordslist, ignored, ignored in sampler.sample(pos0, iterations = n_samples_per_walker, storechain = False):
+		 for coords in coordslist:
+		 	yield coords
 
 
 #
@@ -876,23 +901,19 @@ def possible_ranks_array(likelihood_pdfs, ifo_set, targetlen):
 
 
 class LocalRankingData(object):
-	def __init__(self, livetime_seg, coinc_params_distributions, trials_table = None, target_length = 1000):
+	def __init__(self, livetime_seg, coinc_params_distributions, trials_table = None):
 		self.distributions = coinc_params_distributions
+		self.likelihoodratio = snglcoinc.LikelihoodRatio(coinc_params_distributions)
 		if trials_table is None:
 			self.trials_table = TrialsTable()
 		else:
 			self.trials_table = trials_table
-		self.likelihood_pdfs = {}
-		self.joint_likelihood_pdfs = {}
+		self.background_likelihood_rates = {}
+		self.background_likelihood_pdfs = {}
+		self.signal_likelihood_rates = {}
+		self.signal_likelihood_pdfs = {}
 		self.livetime_seg = livetime_seg
 
-		#
-		# the target FAP resolution is 1000 bins by default. This is purely
-		# for memory/CPU requirements
-		#
-
-		self.target_length = target_length
-
 	def __iadd__(self, other):
 		self.distributions += other.distributions
 		self.trials_table += other.trials_table
@@ -907,6 +928,9 @@ class LocalRankingData(object):
 		maxend = max(self.livetime_seg[1], other.livetime_seg[1])
 		self.livetime_seg = segments.segment(minstart, maxend)
 
+		self.distributions.addbinnedarrays(self.background_likelihood_rates, other.background_likelihood_rates, self.background_likelihood_pdfs, other.background_likelihood_pdfs)
+		self.distributions.addbinnedarrays(self.signal_likelihood_rates, other.signal_likelihood_rates, self.signal_likelihood_pdfs, other.signal_likelihood_pdfs)
+
 		return self
 
 	@classmethod
@@ -930,10 +954,15 @@ class LocalRankingData(object):
 		self = cls(livetime_seg, coinc_params_distributions = distributions, trials_table = TrialsTable.from_xml(llw_elem))
 
 		# pull out the joint likelihood arrays if they are present
-		for ba_elem in [elem for elem in xml.getElementsByTagName(ligolw.LIGO_LW.tagName) if elem.hasAttribute(u"Name") and "_joint_likelihood" in elem.getAttribute(u"Name")]:
-			ifo_set = frozenset(lsctables.instrument_set_from_ifos(ba_elem.getAttribute(u"Name").split("_")[0]))
-			self.joint_likelihood_pdfs[ifo_set] = rate.binned_array_from_xml(ba_elem, ba_elem.getAttribute(u"Name").split(":")[0])
-		
+		def reconstruct(xml, prefix, target_dict):
+			for ba_elem in [elem for elem in xml.getElementsByTagName(ligolw.LIGO_LW.tagName) if elem.hasAttribute(u"Name") and ("_%s" % prefix) in elem.getAttribute(u"Name")]:
+				ifo_set = frozenset(lsctables.instrument_set_from_ifos(ba_elem.getAttribute(u"Name").split("_")[0]))
+				target_dict[ifo_set] = rate.binned_array_from_xml(ba_elem, ba_elem.getAttribute(u"Name").split(":")[0])
+		reconstruct(xml, u"background_likelihood_rate", self.background_likelihood_rates)
+		reconstruct(xml, u"background_likelihood_pdf", self.background_likelihood_pdfs)
+		reconstruct(xml, u"signal_likelihood_rate", self.signal_likelihood_rates)
+		reconstruct(xml, u"signal_likelihood_pdf", self.signal_likelihood_pdfs)
+
 		return self
 
 	@classmethod
@@ -942,99 +971,118 @@ class LocalRankingData(object):
 		for f in filenames[1:]:
 			self += LocalRankingData.from_xml(ligolw_utils.load_filename(f, contenthandler = contenthandler, verbose = verbose), name = name)
 		return self
-		
+
 	def to_xml(self, name = u"gstlal_inspiral_likelihood"):
 		xml = ligolw.LIGO_LW({u"Name": u"%s:gstlal_inspiral_FAR" % name})
 		xml.appendChild(self.trials_table.to_xml())
 		xml.appendChild(self.distributions.to_xml(name))
-		for key in self.joint_likelihood_pdfs:
-			ifostr = lsctables.ifos_from_instrument_set(key).replace(",","")
-			xml.appendChild(rate.binned_array_to_xml(self.joint_likelihood_pdfs[key], "%s_joint_likelihood" % (ifostr,)))
-		return xml
-
-	def smooth_distribution_stats(self, verbose = False):
-		if self.distributions is not None:
-			# FIXME:  this results in the .distributions
-			# object's PDF arrays containing *probabilities*
-			# not probability densities. this might be changed
-			# in the future.
-			if verbose:
-				print >>sys.stderr, "smoothing parameter distributions ...",
-			self.distributions.finish(verbose = verbose, filterthread = FilterThread)
-			if verbose:
-				print >>sys.stderr, "done"
+		def store(xml, prefix, source_dict):
+			for key, binnedarray in source_dict.items():
+				ifostr = lsctables.ifos_from_instrument_set(key).replace(",","")
+				xml.appendChild(rate.binned_array_to_xml(binnedarray, u"%s_%s" % (ifostr, prefix)))
+		store(xml, u"background_likelihood_rate", self.background_likelihood_rates)
+		store(xml, u"background_likelihood_pdf", self.background_likelihood_pdfs)
+		store(xml, u"signal_likelihood_rate", self.signal_likelihood_rates)
+		store(xml, u"signal_likelihood_pdf", self.signal_likelihood_pdfs)
 
-	def compute_single_instrument_background(self, instruments = None, verbose = False):
-		# initialize a likelihood ratio evaluator
-		likelihood_ratio_evaluator = snglcoinc.LikelihoodRatio(self.distributions)
-
-		# reduce typing
-		background = self.distributions.background_pdf
-		injections = self.distributions.injection_pdf
-
-		self.likelihood_pdfs.clear()
-		for param in background:
-			# FIXME only works if there is a 1-1 relationship between params and instruments
-			instrument = param.split("_")[0]
+		return xml
 
-			# save some computation if we only requested certain instruments
-			if instruments is not None and instrument not in instruments:
-				continue
+	def finish(self):
+		self.background_likelihood_pdfs.clear()
+		self.signal_likelihood_pdfs.clear()
+		for key, binnedarray in self.background_likelihood_rates.items():
+			assert not numpy.isnan(binnedarray.array).any(), "%s noise model likelihood ratio counts contain NaNs" % key
+			# copy counts
+			self.background_likelihood_pdfs[key] = binnedarray.copy()
+			# smooth on a scale ~= 1/4 unit of SNR
+			bins_per_efold = binnedarray.bins[0].n / math.log(binnedarray.bins[0].max / binnedarray.bins[0].min)
+			rate.filter_array(self.background_likelihood_pdfs[key].array, rate.gaussian_window(.25 * bins_per_efold))
+			# guard against round-off in FFT convolution
+			# yielding negative probability densities
+			numpy.clip(self.background_likelihood_pdfs[key].array, 0.0, PosInf, self.background_likelihood_pdfs[key].array)
+			# convert to normalized PDF
+			self.background_likelihood_pdfs[key].to_pdf()
+		for key, binnedarray in self.signal_likelihood_rates.items():
+			assert not numpy.isnan(binnedarray.array).any(), "%s signal model likelihood ratio counts contain NaNs" % key
+			# copy counts
+			self.signal_likelihood_pdfs[key] = binnedarray.copy()
+			# smooth on a scale ~= 1/4 unit of SNR
+			bins_per_efold = binnedarray.bins[0].n / math.log(binnedarray.bins[0].max / binnedarray.bins[0].min)
+			rate.filter_array(self.signal_likelihood_pdfs[key].array, rate.gaussian_window(.25 * bins_per_efold))
+			# guard against round-off in FFT convolution
+			# yielding negative probability densities
+			numpy.clip(self.signal_likelihood_pdfs[key].array, 0.0, PosInf, self.signal_likelihood_pdfs[key].array)
+			# convert to normalized PDF
+			self.signal_likelihood_pdfs[key].to_pdf()
 
-			if verbose:
-				print >>sys.stderr, "updating likelihood background for %s" % instrument
-
-			likelihoods = injections[param].array / background[param].array
-			# ignore infs and nans because background is never
-			# found in those bins.  the boolean array indexing
-			# flattens the array
-			minlikelihood, maxlikelihood = likelihood_bin_boundaries(likelihoods, background[param].array)
-			# construct PDF
-			# FIXME:  because the background array contains
-			# probabilities and not probability densities, the
-			# likelihood_pdfs contain probabilities and not
-			# densities, as well, when this is done
-			self.likelihood_pdfs[instrument] = rate.BinnedArray(rate.NDBins((rate.LogarithmicPlusOverflowBins(minlikelihood, maxlikelihood, self.target_length),)))
-			for coords in iterutils.MultiIter(*background[param].bins.centres()):
-				likelihood = likelihood_ratio_evaluator({param: coords})
-				if numpy.isfinite(likelihood):
-					self.likelihood_pdfs[instrument][likelihood,] += background[param][coords]
-
-	def compute_joint_instrument_background(self, remap, instruments = None, verbose = False):
-		# first get the single detector distributions
-		self.compute_single_instrument_background(instruments = instruments, verbose = verbose)
-
-		self.joint_likelihood_pdfs.clear()
+	def smooth_distribution_stats(self, verbose = False):
+		if verbose:
+			print >>sys.stderr, "smoothing parameter distributions ...",
+		self.distributions.finish()
+		if verbose:
+			print >>sys.stderr, "done"
+
+	def likelihoodratio_samples(self, ln_prob, keys, ndim, nwalkers = None, nburn = 5000, nsamples = 100000):
+		keys = tuple(sorted(keys))
+		# FIXME:  don't know how to tune nwalkers.  must be even, and >
+		# 2 * ndim
+		if nwalkers is None:
+			nwalkers = 24 * ndim
+		for coords in run_mcmc(nwalkers, ndim, nsamples, ln_prob, n_burn = nburn):
+			# coords[0::2] = rho
+			# coords[1::2] = chi^2/rho^2
+			lamb = self.likelihoodratio(dict(zip(keys, zip(coords[0::2], coords[1::2]))))
+			if not math.isinf(lamb) and not math.isnan(lamb):	# FIXME:  is this needed?
+				yield lamb
+
+	def compute_likelihood_pdfs(self, remap, instruments = None, verbose = False):
+		self.background_likelihood_rates.clear()
+		self.signal_likelihood_rates.clear()
+
+		# get default instruments from whatever we have SNR PDFs for
+		if instruments is None:
+			instruments = set()
+			for key in self.distributions.snr_joint_pdf_cache:
+				instruments |= set(instrument for instrument, distance in key)
+		instruments = tuple(instruments)
 
 		# calculate all of the possible ifo combinations with at least
 		# 2 detectors in order to get the joint likelihood pdfs
-		if instruments is None:
-			instruments = self.likelihood_pdfs.keys()
 		for n in range(2, len(instruments) + 1):
 			for ifos in iterutils.choices(instruments, n):
 				ifo_set = frozenset(ifos)
 				remap_set = remap.setdefault(ifo_set, ifo_set)
 
 				if verbose:
-					print >>sys.stderr, "computing joint likelihood background for %s remapped to %s" % (lsctables.ifos_from_instrument_set(ifo_set), lsctables.ifos_from_instrument_set(remap_set))
+					print >>sys.stderr, "computing likelihood PDFs for %s remapped to %s" % (lsctables.ifos_from_instrument_set(ifo_set), lsctables.ifos_from_instrument_set(remap_set))
 
 				# only recompute if necessary, some choices
-				# may not be if a certain remap set is
+				# might not be if a certain remap set is
 				# provided
-				if remap_set not in self.joint_likelihood_pdfs:
-					self.joint_likelihood_pdfs[remap_set] = possible_ranks_array(self.likelihood_pdfs, remap_set, self.target_length)
+				if remap_set not in self.background_likelihood_rates:
+					ndim = 2 * len(ifo_set)
+					keys = frozenset("%s_snr_chi" % inst for inst in ifo_set)
+
+					ln_prob = self.distributions.create_emcee_lnprob_wrapper(self.distributions.lnP_signal, keys)
+					self.signal_likelihood_rates[remap_set] = binned_rates_from_samples(self.likelihoodratio_samples(ln_prob, keys, ndim), limits = (1e-2, 1e+100))
+
+					ln_prob = self.distributions.create_emcee_lnprob_wrapper(self.distributions.lnP_noise, keys)
+					self.background_likelihood_rates[remap_set] = binned_rates_from_samples(self.likelihoodratio_samples(ln_prob, keys, ndim), limits = (1e-2, 1e+100))
+
+				self.background_likelihood_rates[ifo_set] = self.background_likelihood_rates[remap_set]
+				self.signal_likelihood_rates[ifo_set] = self.signal_likelihood_rates[remap_set]
 
-				self.joint_likelihood_pdfs[ifo_set] = self.joint_likelihood_pdfs[remap_set]
+		self.finish()
 
 
 class RankingData(object):
 	def __init__(self, local_ranking_data):
 		# ensure the trials tables' keys match the likelihood
 		# histograms' keys
-		assert set(local_ranking_data.joint_likelihood_pdfs) == set(local_ranking_data.trials_table)
+		assert set(local_ranking_data.background_likelihood_pdfs) == set(local_ranking_data.trials_table)
 
 		# copy likelihood ratio PDFs
-		self.joint_likelihood_pdfs = dict((key, copy.deepcopy(value)) for key, value in local_ranking_data.joint_likelihood_pdfs.items())
+		self.likelihood_pdfs = dict((key, copy.deepcopy(value)) for key, value in local_ranking_data.background_likelihood_pdfs.items())
 
 		# copy trials table counts
 		self.trials_table = TrialsTable()
@@ -1058,10 +1106,10 @@ class RankingData(object):
 			pass
 		fake_local_ranking_data = fake_local_ranking_data()
 		fake_local_ranking_data.trials_table = TrialsTable.from_xml(llw_elem)
-		fake_local_ranking_data.joint_likelihood_pdfs = {}
+		fake_local_ranking_data.background_likelihood_pdfs = {}
 		for key in fake_local_ranking_data.trials_table:
 			ifostr = lsctables.ifos_from_instrument_set(key).replace(",","")
-			fake_local_ranking_data.joint_likelihood_pdfs[key] = rate.binned_array_from_xml(llw_elem, ifostr)
+			fake_local_ranking_data.background_likelihood_pdfs[key] = rate.binned_array_from_xml(llw_elem, ifostr)
 
 		# the code that writes these things has put the
 		# livetime_seg into the out segment in the search_summary
@@ -1079,9 +1127,9 @@ class RankingData(object):
 		xml = ligolw.LIGO_LW({u"Name": u"%s:gstlal_inspiral_ranking_data" % name})
 		xml.appendChild(ligolw_param.new_param(u"process_id", u"ilwd:char", process.process_id))
 		xml.appendChild(self.trials_table.to_xml())
-		for key in self.joint_likelihood_pdfs:
+		for key in self.likelihood_pdfs:
 			ifostr = lsctables.ifos_from_instrument_set(key).replace(",","")
-			xml.appendChild(rate.binned_array_to_xml(self.joint_likelihood_pdfs[key], ifostr))
+			xml.appendChild(rate.binned_array_to_xml(self.likelihood_pdfs[key], ifostr))
 		assert search_summary.process_id == process.process_id
 		search_summary.set_out(self.livetime_seg)
 		return xml
@@ -1091,29 +1139,29 @@ class RankingData(object):
 		our_trials = self.trials_table
 		other_trials = other.trials_table
 
-		our_keys = set(self.joint_likelihood_pdfs)
-		other_keys  = set(other.joint_likelihood_pdfs)
+		our_keys = set(self.likelihood_pdfs)
+		other_keys  = set(other.likelihood_pdfs)
 
 		# PDFs that only we have are unmodified
 		pass
 
 		# PDFs that only the new data has get copied verbatim
 		for k in other_keys - our_keys:
-			self.joint_likelihood_pdfs[k] = copy.deepcopy(other.joint_likelihood_pdfs[k])
+			self.likelihood_pdfs[k] = copy.deepcopy(other.likelihood_pdfs[k])
 
 		# PDFs that we have and are in the new data get replaced
 		# with the weighted sum, re-binned
 		for k in our_keys & other_keys:
-			minself, maxself, nself = self.joint_likelihood_pdfs[k].bins[0].min, self.joint_likelihood_pdfs[k].bins[0].max, self.joint_likelihood_pdfs[k].bins[0].n
-			minother, maxother, nother = other.joint_likelihood_pdfs[k].bins[0].min, other.joint_likelihood_pdfs[k].bins[0].max, other.joint_likelihood_pdfs[k].bins[0].n
-			new_joint_likelihood_pdf =  rate.BinnedArray(rate.NDBins((rate.LogarithmicPlusOverflowBins(min(minself, minother), max(maxself, maxother), max(nself, nother)),)))
+			minself, maxself, nself = self.likelihood_pdfs[k].bins[0].min, self.likelihood_pdfs[k].bins[0].max, self.likelihood_pdfs[k].bins[0].n
+			minother, maxother, nother = other.likelihood_pdfs[k].bins[0].min, other.likelihood_pdfs[k].bins[0].max, other.likelihood_pdfs[k].bins[0].n
+			new_likelihood_pdf =  rate.BinnedArray(rate.NDBins((rate.LogarithmicPlusOverflowBins(min(minself, minother), max(maxself, maxother), max(nself, nother)),)))
 
-			for x in self.joint_likelihood_pdfs[k].centres()[0]:
-				new_joint_likelihood_pdf[x,] += self.joint_likelihood_pdfs[k][x,] * float(our_trials[k].count or 1) / ((our_trials[k].count + other_trials[k].count) or 1)
-			for x in other.joint_likelihood_pdfs[k].centres()[0]:
-				new_joint_likelihood_pdf[x,] += other.joint_likelihood_pdfs[k][x,] * float(other_trials[k].count or 1) / ((our_trials[k].count + other_trials[k].count) or 1)
+			for x in self.likelihood_pdfs[k].centres()[0]:
+				new_likelihood_pdf[x,] += self.likelihood_pdfs[k][x,] * float(our_trials[k].count or 1) / ((our_trials[k].count + other_trials[k].count) or 1)
+			for x in other.likelihood_pdfs[k].centres()[0]:
+				new_likelihood_pdf[x,] += other.likelihood_pdfs[k][x,] * float(other_trials[k].count or 1) / ((our_trials[k].count + other_trials[k].count) or 1)
 
-			self.joint_likelihood_pdfs[k] = new_joint_likelihood_pdf
+			self.likelihood_pdfs[k] = new_likelihood_pdf
 
 		# combined trials counts
 		self.trials_table += other.trials_table
@@ -1128,7 +1176,7 @@ class RankingData(object):
 		self.maxrank.clear()
 		self.cdf_interpolator.clear()
 		self.ccdf_interpolator.clear()
-		for key, lpdf in self.joint_likelihood_pdfs.items():
+		for key, lpdf in self.likelihood_pdfs.items():
 			ranks = lpdf.bins.lower()[0]
 			weights = lpdf.array
 			# cumulative distribution function and its
-- 
GitLab