updated string rankingstat modules

5ab02ce3 · Daichi Tsuna · e7af6f76 · 5ab02ce3 · 5ab02ce3 · 5ab02ce3
Commit 5ab02ce3 authored 5 years ago by Daichi Tsuna
--- a/gstlal-burst/python/string/lalapps_string_calc_likelihood.py
+++ b/gstlal-burst/python/string/lalapps_string_calc_likelihood.py
+#
+# Copyright (C) 2010  Kipp Cannon
+#
+# This program is free software; you can redistribute it and/or modify it
+# under the terms of the GNU General Public License as published by the
+# Free Software Foundation; either version 2 of the License, or (at your
+# option) any later version.
+#
+# This program is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+# Public License for more details.
+#
+# You should have received a copy of the GNU General Public License along
+# with this program; if not, write to the Free Software Foundation, Inc.,
+# 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+
+
+#
+# =============================================================================
+#
+#                                   Preamble
+#
+# =============================================================================
+#
+
+
+from __future__ import print_function
+
+
+from optparse import OptionParser
+import sqlite3
+import sys
+
+
+from ligo.lw import dbtables
+
+from lal.utils import CacheEntry
+from lalburst import git_version
+from lalburst import calc_likelihood
+from lalburst import SnglBurstUtils
+from lalburst import stringutils
+
+
+__author__ = "Kipp Cannon <kipp.cannon@ligo.org>"
+__version__ = "git id %s" % git_version.id
+__date__ = git_version.date
+
+
+#
+# =============================================================================
+#
+#                                 Command Line
+#
+# =============================================================================
+#
+
+
+def parse_command_line():
+	parser = OptionParser(
+		version = "Name: %%prog\n%s" % git_version.verbose_msg
+	)
+	parser.add_option("-c", "--input-cache", metavar = "filename", help = "Also process the files named in this LAL cache.  See lalapps_path2cache for information on how to produce a LAL cache file.")
+	parser.add_option("-l", "--likelihood-file", metavar = "filename", action = "append", help = "Set the name of the likelihood ratio data file to use.  Can be given more than once.")
+	parser.add_option("--likelihood-cache", metavar = "filename", help = "Also load the likelihood ratio data files listsed in this LAL cache.  See lalapps_path2cache for information on how to produce a LAL cache file.")
+	parser.add_option("-t", "--tmp-space", metavar = "path", help = "Path to a directory suitable for use as a work area while manipulating the database file.  The database file will be worked on in this directory, and then moved to the final location when complete.  This option is intended to improve performance when running in a networked environment, where there might be a local disk with higher bandwidth than is available to the filesystem on which the final output will reside.")
+	parser.add_option("--vetoes-name", metavar = "name", help = "Set the name of the segment lists to use as vetoes (default = do not apply vetoes).")
+	parser.add_option("-v", "--verbose", action = "store_true", help = "Be verbose.")
+	options, filenames = parser.parse_args()
+
+	options.likelihood_filenames = []
+	if options.likelihood_file is not None:
+		options.likelihood_filenames += options.likelihood_file
+	if options.likelihood_cache is not None:
+		options.likelihood_filenames += [CacheEntry(line).path for line in open(options.likelihood_cache)]
+	if not options.likelihood_filenames:
+		raise ValueError("no ranking statistic likelihood data files specified")
+
+	if options.input_cache:
+		filenames += [CacheEntry(line).path for line in open(options.input_cache)]
+	if not filenames:
+		raise ValueError("no candidate databases specified")
+
+	return options, filenames
+
+
+#
+# =============================================================================
+#
+#                                     Main
+#
+# =============================================================================
+#
+
+
+#
+# command line
+#
+
+
+options, filenames = parse_command_line()
+
+
+#
+# load likelihood data
+#
+
+
+rankingstat = stringutils.marginalize_rankingstat(options.likelihood_filenames, verbose = options.verbose)
+if options.verbose:
+	print("computing event densities ...", file=sys.stderr)
+rankingstat.finish()
+
+
+#
+# iterate over files
+#
+
+
+for n, filename in enumerate(filenames):
+	#
+	# Open the database file.
+	#
+
+	if options.verbose:
+		print("%d/%d: %s" % (n + 1, len(filenames), filename), file=sys.stderr)
+
+	working_filename = dbtables.get_connection_filename(filename, tmp_path = options.tmp_space, verbose = options.verbose)
+	connection = sqlite3.connect(working_filename)
+	if options.tmp_space is not None:
+		dbtables.set_temp_store_directory(connection, options.tmp_space, verbose = options.verbose)
+
+	#
+	# Summarize the database.
+	#
+
+	contents = SnglBurstUtils.CoincDatabase(connection, live_time_program = "StringSearch", search = "StringCusp", veto_segments_name = options.vetoes_name)
+	if options.verbose:
+		SnglBurstUtils.summarize_coinc_database(contents)
+
+
+	#
+	# Run likelihood ratio calculation.
+	#
+
+	calc_likelihood.assign_likelihood_ratios(
+		connection = contents.connection,
+		coinc_def_id = contents.bb_definer_id,
+		offset_vectors = contents.time_slide_table.as_dict(),
+		vetoseglists = contents.vetoseglists,
+		events_func = lambda cursor, coinc_event_id: calc_likelihood.sngl_burst_events_func(cursor, coinc_event_id, contents.sngl_burst_table.row_from_cols),
+		veto_func = calc_likelihood.sngl_burst_veto_func,
+		ln_likelihood_ratio_func = rankingstat.ln_lr_from_triggers,
+		verbose = options.verbose
+	)
+
+	#
+	# Clean up.
+	#
+
+	contents.xmldoc.unlink()
+	connection.close()
+	dbtables.put_connection_filename(filename, working_filename, verbose = options.verbose)
--- a/gstlal-burst/python/string/lalapps_string_calc_rank_pdfs.py
+++ b/gstlal-burst/python/string/lalapps_string_calc_rank_pdfs.py
+#
+# Copyright (C) 2019 Daichi Tsuna
+#
+# This program is free software; you can redistribute it and/or modify it
+# under the terms of the GNU General Public License as published by the
+# Free Software Foundation; either version 2 of the License, or (at your
+# option) any later version.
+#
+# This program is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+# Public License for more details.
+#
+# You should have received a copy of the GNU General Public License along
+# with this program; if not, write to the Free Software Foundation, Inc.,
+# 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+
+# a module to construct ranking statistic PDFs from the calc_likelihood outputs
+
+#
+# =============================================================================
+#
+#                                   Preamble
+#
+# =============================================================================
+#
+
+
+from optparse import OptionParser
+
+from ligo.lw import ligolw
+from ligo.lw import utils as ligolw_utils
+from ligo.lw.utils import process as ligolw_process
+
+from lal.utils import CacheEntry
+from lalburst import git_version
+from lalburst import stringutils
+from lalburst import string_lr_far
+
+
+__author__ = "Daichi Tsuna <daichi.tsuna@ligo.org>"
+__version__ = "git id %s" % git_version.id
+__date__ = git_version.date
+
+
+#
+# =============================================================================
+#
+#                                 Command Line
+#
+# =============================================================================
+#
+
+
+def parse_command_line():
+	parser = OptionParser(
+		version = "Name: %%prog\n%s" % git_version.verbose_msg
+	)
+	parser.add_option("-c", "--input-cache", metavar = "filename", help = "Also process the files named in this LAL cache.  See lalapps_path2cache for information on how to produce a LAL cache file.")
+	parser.add_option("-n", "--ranking-stat-samples", metavar = "N", default = 2**24, type = "int", help = "Construct ranking statistic histograms by drawing this many samples from the ranking statistic generator (default = 2^24).")
+	parser.add_option("-o", "--output", metavar = "filename", help = "Write merged likelihood ratio histograms to this LIGO Light-Weight XML file.")
+	parser.add_option("-v", "--verbose", action = "store_true", help = "Be verbose.")
+	options, filenames = parser.parse_args()
+
+	paramdict = options.__dict__.copy()
+
+	if options.input_cache is not None:
+		filenames = [CacheEntry(line).path for line in open(options.input_cache)]
+	if not filenames:
+		raise ValueError("no ranking statistic likelihood data files specified")
+
+	if options.output is None:
+		raise ValueError("must set --output")
+		
+	return options, filenames, paramdict
+
+
+#
+# =============================================================================
+#
+#                                     Main
+#
+# =============================================================================
+#
+
+
+#
+# command line
+#
+
+
+options, filenames, paramdict = parse_command_line()
+
+
+#
+# load rankingstat data
+#
+
+
+rankingstat = stringutils.marginalize_rankingstat(filenames, verbose = options.verbose) 
+
+
+#
+# generate rankingstatpdfs
+#
+
+
+rankingstatpdf = stringutils.RankingStatPDF(rankingstat, nsamples = options.ranking_stat_samples, verbose = options.verbose)
+
+
+#
+# write to output
+#
+
+
+xmldoc = ligolw.Document()
+xmldoc.appendChild(ligolw.LIGO_LW())
+xmldoc.childNodes[-1].appendChild(rankingstatpdf.to_xml())
+process = ligolw_process.register_to_xmldoc(xmldoc, program = u"lalapps_string_meas_likelihood", paramdict = paramdict, version = __version__, cvs_repository = "lscsoft", cvs_entry_time = __date__, comment = u"")
+ligolw_process.set_process_end_time(process)
+ligolw_utils.write_filename(xmldoc, options.output, gz = (options.output or "stdout").endswith(".gz"), verbose = options.verbose)
--- a/gstlal-burst/python/string/lalapps_string_meas_likelihood.py
+++ b/gstlal-burst/python/string/lalapps_string_meas_likelihood.py
+# Copyright (C) 2010--2018  Kipp Cannon
+#
+# This program is free software; you can redistribute it and/or modify it
+# under the terms of the GNU General Public License as published by the
+# Free Software Foundation; either version 2 of the License, or (at your
+# option) any later version.
+#
+# This program is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+# Public License for more details.
+#
+# You should have received a copy of the GNU General Public License along
+# with this program; if not, write to the Free Software Foundation, Inc.,
+# 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+
+
+#
+# =============================================================================
+#
+#                                   Preamble
+#
+# =============================================================================
+#
+
+
+from __future__ import print_function
+
+
+import math
+from optparse import OptionParser
+import sqlite3
+import string
+import sys
+
+from ligo.lw import ligolw
+from ligo.lw import utils as ligolw_utils
+from ligo.lw.utils import process as ligolw_process
+
+from lal.utils import CacheEntry
+
+from lalburst import SnglBurstUtils
+from lalburst import git_version
+from lalburst import stringutils
+from lalburst import string_lr_far
+
+
+__author__ = "Kipp Cannon <kipp.cannon@ligo.org>"
+__version__ = "git id %s" % git_version.id
+__date__ = git_version.date
+
+
+# characters allowed to appear in the description string
+T010150_letters = set(string.ascii_lowercase + string.ascii_uppercase + string.digits + "_+#")
+
+
+#
+# =============================================================================
+#
+#                                 Command Line
+#
+# =============================================================================
+#
+
+
+def parse_command_line():
+	parser = OptionParser(
+		version = "Name: %%prog\n%s" % git_version.verbose_msg,
+		usage = "%prog [options] [filename ...]",
+		description = "%prog analyzes a collection of sqlite3 database files containing lalapps_burca outputs of string-cusp coincidence events, and measures probability distributions for a variety of parameters computed from those coincidences.  The distributions are written to a likelihood data file in XML format, which can later be used by to assign likelihoods to the coincidences.  The files to be processed can be named on the command line and/or provided by a LAL cache file."
+	)
+	parser.add_option("-o", "--output", metavar = "filename", default = None, help = "Set the name of the likelihood data file to write (default = stdout).")
+	parser.add_option("-c", "--input-cache", metavar = "filename", help = "Also process the files named in this LAL cache.  See lalapps_path2cache for information on how to produce a LAL cache file.")
+	parser.add_option("-t", "--tmp-space", metavar = "path", help = "Path to a directory suitable for use as a work area while manipulating the database file.  The database file will be worked on in this directory, and then moved to the final location when complete.  This option is intended to improve performance when running in a networked environment, where there might be a local disk with higher bandwidth than is available to the filesystem on which the final output will reside.")
+	parser.add_option("--T010150", metavar = "description", default = None, help = "Write the output to a file whose name is compatible with the file name format described in LIGO-T010150-00-E, \"Naming Convention for Frame Files which are to be Processed by LDAS\".  The description string will be used to form the second field in the file name.")
+	parser.add_option("--injection-reweight", metavar = "off|astrophysical", default = "off", help = "Set the weight function to be applied to the injections (default = \"off\").  When \"off\", the injections are all given equal weight and so the injection population is whatever was injected.  When set to \"astrophysical\", the injections are reweighted to simulate an amplitude^{-4} distribution.")
+	parser.add_option("--injection-reweight-cutoff", metavar = "amplitude", default = 1e-20, type = "float", help = "When using the astrophysical injection reweighting, do not allow the weight assigned to arbitrarily low-amplitude injections to grow without bound, instead clip the weight assigned to injections to the weight given to injections with this amplitude (default = 1e-20, 0 = disabled).  This option is ignored when astrophysical reweighting is not being performed.")
+	parser.add_option("--vetoes-name", metavar = "name", help = "Set the name of the segment lists to use as vetoes (default = do not apply vetoes).")
+	parser.add_option("-v", "--verbose", action = "store_true", help = "Be verbose.")
+	options, filenames = parser.parse_args()
+
+	paramdict = options.__dict__.copy()
+
+	if options.T010150 is not None:
+		if options.output is not None:
+			raise ValueError("cannot set both --T010150 and --output")
+		if options.T010150 == "":
+			options.T010150 = "STRING_LIKELIHOOD"
+		elif set(options.T010150) - T010150_letters:
+			raise ValueError("invalid characters in description \"%s\"" % options.T010150)
+
+	if options.input_cache:
+		filenames += [CacheEntry(line).path for line in file(options.input_cache)]
+
+	if not filenames:
+		raise ValueError("no input files!")
+
+	if options.injection_reweight not in ("off", "astrophysical"):
+		raise ValueError("--injection-reweight \"%s\" not recognized" % options.injections_reweight)
+
+	return options, filenames, paramdict
+
+
+#
+# =============================================================================
+#
+#                         Injection ReWeight Functions
+#
+# =============================================================================
+#
+
+
+def get_injection_weight_func(contents, reweight_type, amplitude_cutoff):
+	if reweight_type == "off":
+		def weight_func(sim, amplitude_cutoff = amplitude_cutoff):
+			# amplitude cut-off is not used
+			return 1.0
+	elif reweight_type == "astrophysical":
+		population, = ligolw_process.get_process_params(contents.xmldoc, "lalapps_string_injections", "--population")
+		if population != "string_cusp":
+			raise ValueError("lalapps_string_injections was not run with --population=\"string_cusp\"")
+		def weight_func(sim, amplitude_cutoff = amplitude_cutoff):
+			# the "string_cusp" injection population is uniform
+			# in log A, meaning the number of injections
+			# between log A and log A + d log A is independent
+			# of A.  that corresponds to a distribution density
+			# in A of P(A) dA \propto A^{-1} dA.  we want P(A)
+			# dA \propto A^{-4} dA, so each physical injection
+			# needs to be treated as if it was A^{-3} virtual
+			# injections, then the density of virtual
+			# injections will be P(A) dA \propto A^{-4} dA.
+			# the factor of 10^{21} simply renders the
+			# amplitudes closer to 1;  since double-precision
+			# arithmetic is used this should have no affect on
+			# the results, but it might help make the numbers a
+			# little easier for humans to look at should anyone
+			# have occasion to do so.
+			return (max(sim.amplitude, amplitude_cutoff) * 1e21)**-3
+	else:
+		raise ValueError(reweight_type)
+	return weight_func
+
+
+#
+# =============================================================================
+#
+#                                     Main
+#
+# =============================================================================
+#
+
+
+#
+# Command line.
+#
+
+
+options, filenames, paramdict = parse_command_line()
+
+
+#
+# Start output document
+#
+
+
+xmldoc = ligolw.Document()
+xmldoc.appendChild(ligolw.LIGO_LW())
+process = ligolw_process.register_to_xmldoc(xmldoc, program = u"lalapps_string_meas_likelihood", paramdict = paramdict, version = __version__, cvs_repository = "lscsoft", cvs_entry_time = __date__, comment = u"")
+
+
+#
+# Iterate over files to obtain the denominator and zero-lag
+#
+
+rankingstat = stringutils.marginalize_rankingstat(filenames, verbose = options.verbose)
+
+
+#
+# obtain the numerator
+# FIXME for now we simply copy over the numerator from an existing table
+#
+
+
+adhoc_rankingstat = stringutils.marginalize_rankingstat(["/home/daichi.tsuna/playground/rankingstat_numerator/G1+H1+H2+L1+T1+V1-STRING_LIKELIHOOD_O2CUSP_INJ_0_0-1164600798-154406.xml.gz"], verbose = options.verbose)
+adhoc_rankingstat.finish()
+rankingstat.numerator = adhoc_rankingstat.numerator 
+rankingstat.finish()
+
+
+#
+# Output.
+#
+
+
+xmldoc.childNodes[-1].appendChild(rankingstat.to_xml())
+ligolw_process.set_process_end_time(process)
+
+
+def T010150_basename(instruments, description, seg):
+	start = int(math.floor(seg[0]))
+	duration = int(math.ceil(seg[1] - start))
+	return "%s-%s-%d-%d" % ("+".join(sorted(instruments)), description, start, duration)
+if options.T010150:
+	filename = "%s.xml.gz" % T010150_basename(segs.keys(), options.T010150, segs.extent_all())
+else:
+	filename = options.output
+ligolw_utils.write_filename(xmldoc, filename, verbose = options.verbose, gz = (filename or "stdout").endswith(".gz"))
--- a/gstlal-burst/python/string/string_extrinsics.py
+++ b/gstlal-burst/python/string/string_extrinsics.py
@@ -28,13 +28,23 @@

 import numpy

-from ligo.lw import ligolw
 from ligo.lw import array as ligolw_array
-from glue.text_progress_bar import ProgressBar
 from gstlal import stats as gstlalstats
+import lal
 from lal import rate


+__doc__ = """
+
+The goal of this module is to implement the probability of getting a given set
+of extrinsic parameters for a set of detectors parameterized by n-tuples of
+trigger parameters: (snr, chi2) assuming that the event is a gravitational wave
+signal, *s*, coming from an isotropic distribution in location, orientation and
+the volume of space.  The implementation of this in the calling code can be 
+found in :py:mod:`string_lr_far`.
+"""
+
+
 #
 # =============================================================================
 #
@@ -88,7 +98,7 @@ class NumeratorSNRCHIPDF(rate.BinnedLnPDF):
 		self.norm.shape = (len(self.norm), 1)

 	@staticmethod
-	def add_signal_model(lnpdf, n, prefactors_range, df = 100, inv_snr_pow = 4., snr_min = 3.5, progressbar = None):
+	def add_signal_model(lnpdf, n, prefactors_range, df, inv_snr_pow = 4., snr_min = 3.5, progressbar = None):
 		if df <= 0.:
 			raise ValueError("require df >= 0: %s" % repr(df))
 		pfs = numpy.linspace(prefactors_range[0], prefactors_range[1], 100)

--- a/gstlal-burst/python/string/string_lr_far.py
+++ b/gstlal-burst/python/string/string_lr_far.py
--- a/gstlal-burst/python/string/stringutils.py
+++ b/gstlal-burst/python/string/stringutils.py