gstlal-burst: Add new program gstlal_cherenkov_calc_rank_pdfs

7ed823de · Soichiro Kuwahara · 2fbd927c · 7ed823de · 7ed823de
Commit 7ed823de authored 3 years ago by Soichiro Kuwahara
--- a/gstlal-burst/bin/Makefile.am
+++ b/gstlal-burst/bin/Makefile.am
 dist_bin_SCRIPTS = \
 	gstlal_cherenkov_burst \
+	gstlal_cherenkov_calc_rank_pdfs \
 	gstlal_cherenkov_inj \
 	gstlal_cherenkov_plot_rankingstat \
 	gstlal_cs_triggergen \

--- a/gstlal-burst/bin/gstlal_cherenkov_calc_rank_pdfs
+++ b/gstlal-burst/bin/gstlal_cherenkov_calc_rank_pdfs
+#!/usr/bin/env python3
+#
+# Copyright (C) 2010--2015  Kipp Cannon, Chad Hanna
+# Copyright (C) 2021  Soichiro Kuwawhara
+#
+# This program is free software; you can redistribute it and/or modify it
+# under the terms of the GNU General Public License as published by the
+# Free Software Foundation; either version 2 of the License, or (at your
+# option) any later version.
+#
+# This program is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+# Public License for more details.
+#
+# You should have received a copy of the GNU General Public License along
+# with this program; if not, write to the Free Software Foundation, Inc.,
+# 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+
+### A program to compute the noise probability distributions of likehood ratios for inspiral triggers
+
+#
+# =============================================================================
+#
+#                                   Preamble
+#
+# =============================================================================
+#
+
+
+from optparse import OptionParser
+
+
+from ligo.lw import ligolw
+from ligo.lw import utils as ligolw_utils
+from ligo.lw.utils import process as ligolw_process
+from lal.utils import CacheEntry
+
+from gstlal.cherenkov import rankingstat as cherenkov_rankingstat
+
+__author__ = "Soichiro Kuwara <soichiro.kuwahara@ligo.org>"
+
+
+#
+# =============================================================================
+#
+#                                 Command Line
+#
+# =============================================================================
+#
+
+
+def parse_command_line():
+	parser = OptionParser(
+		description = "Rankngstat calculation program for Cherenkov burst search."
+	)
+	parser.add_option("--likelihood-cache", metavar = "filename", help = "Also load the likelihood ratio data files listsed in this LAL cache.  See lalapps_path2cache for information on how to produce a LAL cache file.")
+	parser.add_option("-v", "--verbose", action = "store_true", help = "Be verbose.")
+	parser.add_option("--ranking-stat-samples", metavar = "N", default = 2**24, type = "int", help = "Construct ranking statistic histograms by drawing this many samples from the ranking statistic generator (default = 2^24).")
+	parser.add_option("--output", metavar = "filename", help = "Write merged raw likelihood data and likelihood ratio histograms to this LIGO Light-Weight XML file.")
+	options, urls = parser.parse_args()
+
+	paramdict = options.__dict__.copy()
+
+	if options.likelihood_cache is not None:
+		urls += [CacheEntry(line).url for line in open(options.likelihood_cache)]
+	if not urls:
+		raise ValueError("must provide some likelihood files")
+
+	if options.output is None:
+		raise ValueError("must set --output")
+
+	return options, urls, paramdict
+
+
+#
+# =============================================================================
+#
+#                                     Main
+#
+# =============================================================================
+#
+
+
+#
+# command line
+#
+
+
+options, urls, paramdict = parse_command_line()
+
+
+#
+# load parameter distribution data
+#
+
+
+rankingstat = cherenkov_rankingstat.marginalize_pdf_urls(urls, verbose = options.verbose)
+
+
+#
+# invoke .finish() to apply density estimation kernels and correct the
+# normalization.
+#
+
+
+rankingstat.finish()
+
+
+#
+# generate likelihood ratio histograms
+#
+
+
+def binned_log_likelihood_ratio_rates_from_samples(noise_lr_lnpdf, random_params, nsamples):
+	exp = math.exp
+	isnan = math.isnan
+	noise_lr_lnpdf_count = noise_lr_lnpdf.count
+	for ln_lamb, lnP_signal, lnP_noise in itertools.islice(random_params, nsamples):
+		if isnan(ln_lamb):
+			raise ValueError("encountered NaN likelihood ratio")
+		if isnan(lnP_signal) or isnan(lnP_noise):
+			raise ValueError("encountered NaN signal or noise model probability densities")
+		noise_lr_lnpdf_count[ln_lamb,] += exp(lnP_noise)
+
+class RankingStatPDF(object):
+	ligo_lw_name_suffix = "gstlal_cherenkov_rankingstatpdf"
+
+	@staticmethod
+	def density_estimate(lnpdf, name, kernel = rate.gaussian_window(4.)):
+		"""
+		For internal use only.
+		"""
+		assert not numpy.isnan(lnpdf.array).any(), "%s log likelihood ratio PDF contain NaNs" % name
+		rate.filter_array(lnpdf.array, kernel)
+
+	@staticmethod
+	def binned_log_likelihood_ratio_rates_from_samples_wrapper(queue, noise_lr_lnpdf, random_params, nsamples):
+		"""
+		For internal use only.
+		"""
+		try:
+			# want the forked processes to use different random
+			# number sequences, so we re-seed Python and
+			# numpy's random number generators here in the
+			# wrapper in the hope that that takes care of it
+			random.seed()
+			numpy.random.seed()
+			binned_log_likelihood_ratio_rates_from_samples(noise_lr_lnpdf, random_params, nsamples)
+			queue.put((noise_lr_lnpdf.array,))
+		except:
+			queue.put((None,))
+			raise
+
+	def __init__(self, rankingstat, nsamples = 2**24, nthreads = 8, verbose = False):
+		#
+		# bailout out used by .from_xml() class method to get an
+		# uninitialized instance
+		#
+
+		if rankingstat is None:
+			return
+
+		#
+		# initialize binnings
+		#
+
+		self.noise_lr_lnpdf = rate.BinnedLnPDF(rate.NDBins((rate.ATanBins(0., 110., 6000),)))
+
+		#
+		# bailout used by codes that want all-zeros histograms
+		#
+
+		if not nsamples:
+			return
+
+		#
+		# run importance-weighted random sampling to populate
+		# binnings.
+		#
+
+		nthreads = int(nthreads)
+		assert nthreads >= 1
+		threads = []
+		for i in range(nthreads):
+			assert nsamples // nthreads >= 1
+			q = multiprocessing.SimpleQueue()
+			p = multiprocessing.Process(target = lambda: self.binned_log_likelihood_ratio_rates_from_samples_wrapper(
+				q,
+				self.noise_lr_lnpdf,
+				rankingstat.ln_lr_samples(rankingstat.denominator.random_params()),
+				nsamples = nsamples // nthreads
+			))
+			p.start()
+			threads.append((p, q))
+			nsamples -= nsamples // nthreads
+			nthreads -= 1
+			# sleep a bit to help random number seeds change
+			time.sleep(1.5)
+		while threads:
+			p, q = threads.pop(0)
+			noise_counts, = q.get()
+			self.noise_lr_lnpdf.array += noise_counts
+			p.join()
+			if p.exitcode:
+				raise Exception("sampling thread failed")
+		if verbose:
+			print("done computing ranking statistic PDFs", file=sys.stderr)
+
+		#
+		# apply density estimation kernels to counts
+		#
+
+		self.density_estimate(self.noise_lr_lnpdf, "noise model")
+		self.noise_lr_lnpdf.normalize()
+
+
+	def copy(self):
+		new = self.__class__(None)
+		new.noise_lr_lnpdf = self.noise_lr_lnpdf.copy()
+		return new
+
+
+	def __iadd__(self, other):
+		self.noise_lr_lnpdf += other.noise_lr_lnpdf
+		self.noise_lr_lnpdf.normalize()
+		return self
+
+
+	@classmethod
+	def get_xml_root(cls, xml, name):
+		"""
+		Sub-classes can use this in their overrides of the
+		.from_xml() method to find the root element of the XML
+		serialization.
+		"""
+		name = "%s:%s" % (name, cls.ligo_lw_name_suffix)
+		xml = [elem for elem in xml.getElementsByTagName(ligolw.LIGO_LW.tagName) if elem.hasAttribute("Name") and elem.Name == name]
+		if len(xml) != 1:
+			raise ValueError("XML tree must contain exactly one %s element named %s" % (ligolw.LIGO_LW.tagName, name))
+		return xml[0]
+
+
+	@classmethod
+	def from_xml(cls, xml, name):
+		# find the root of the XML tree containing the
+		# serialization of this object
+		xml = cls.get_xml_root(xml, name)
+		# create a mostly uninitialized instance
+		self = cls(None)
+		# populate from XML
+		self.noise_lr_lnpdf = rate.BinnedLnPDF.from_xml(xml, "noise_lr_lnpdf")
+		return self
+
+	def to_xml(self, name):
+		# do not allow ourselves to be written to disk without our
+		# PDFs' internal normalization metadata being up to date
+		self.noise_lr_lnpdf.normalize()
+		xml = ligolw.LIGO_LW({"Name": "%s:%s" % (name, self.ligo_lw_name_suffix)})
+		xml.appendChild(self.noise_lr_lnpdf.to_xml("noise_lr_lnpdf"))
+		return xml
+
+rankingstatpdf = far.RankingStatPDF(lr_rankingstat, nsamples = options.ranking_stat_samples, verbose = options.verbose)
+
+
+#
+# Write the parameter and ranking statistic distribution data to a file
+#
+
+
+xmldoc = ligolw.Document()
+xmldoc.appendChild(ligolw.LIGO_LW())
+process = ligolw_process.register_to_xmldoc(xmldoc, "gstlal_cherenkov_calc_rank_pdfs", paramdict = paramdict, instruments = rankingstat.instruments)
+far.gen_likelihood_control_doc(xmldoc, rankingstat, rankingstatpdf)
+process.set_end_time_now()
+ligolw_utils.write_filename(xmldoc, options.output, verbose = options.verbose)
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+