From b8ec090c10ba753e059c34a262f2f893a97a830c Mon Sep 17 00:00:00 2001 From: Kipp Cannon <kipp.cannon@ligo.org> Date: Wed, 21 Jun 2017 15:41:31 +0900 Subject: [PATCH] gstlal_inspiral_make_snr_pdf: new version - add --full feature to enable population of all possible PDFs - add --full-fragment feature to enable parallelization - add --seed / --seed-cache feature to enable merging of PDFs generated in parallel - add process metadata to the output file to record how it was constructed - remove make_snr_pdf_cache.py from share/ as it is now redundant --- .../bin/gstlal_inspiral_make_snr_pdf | 160 +++++++++++++++--- gstlal-inspiral/share/make_snr_pdf_cache.py | 35 ---- 2 files changed, 136 insertions(+), 59 deletions(-) delete mode 100644 gstlal-inspiral/share/make_snr_pdf_cache.py diff --git a/gstlal-inspiral/bin/gstlal_inspiral_make_snr_pdf b/gstlal-inspiral/bin/gstlal_inspiral_make_snr_pdf index 7c4c59b68f..322c62c01f 100755 --- a/gstlal-inspiral/bin/gstlal_inspiral_make_snr_pdf +++ b/gstlal-inspiral/bin/gstlal_inspiral_make_snr_pdf @@ -1,6 +1,6 @@ #!/usr/bin/env python # -# Copyright (C) 2016 Kipp Cannon, Chad Hanna +# Copyright (C) 2016,2017 Kipp Cannon, Chad Hanna # # This program is free software; you can redistribute it and/or modify it # under the terms of the GNU General Public License as published by the @@ -33,16 +33,34 @@ import itertools +import logging from optparse import OptionParser -import sys +import time from glue.ligolw import ligolw from glue.ligolw import utils as ligolw_utils +from glue.ligolw.utils import process as ligolw_process from glue.text_progress_bar import ProgressBar from gstlal import reference_psd from gstlal.stats import inspiral_extrinsics import lal.series +from lal.utils import CacheEntry + + +# +# ============================================================================= +# +# Utils +# +# ============================================================================= +# + + +def all_instrument_combos(instruments, min_number): + for n in range(min_number, len(instruments) + 1): + for combo in itertools.combinations(instruments, n): + yield combo # @@ -59,16 +77,54 @@ def parse_command_line(): description = "" ) + parser.add_option("-f", "--full", action = "store_true", help = "Generate SNR PDFs for all possible horizon distance ratios as defined by the SNR PDF mechanism's internal quantization system. Use --instruments to select the detector network to model (required). A PDF will be constructed for every combination of --min-instruments or more instruments from the set. When --full enabled, any --horizon-distances options and/or PSD files are ignored. See also --full-fragment for information on parallelization.") + parser.add_option("--full-fragment", metavar = "n/m", default = "1/1", help = "Enable parallelization of --full by selecting a fragment of the full SNR PDFs sequence to generate. When --full is enabled, the iteration over SNR PDFs employs a reproducible, deterministic, sequence. That sequence is divided into m (m >= 1) approximately equal-sized intervals, and only the SNR PDFs from the n-th interval (1 <= n <= m) of the sequence will be constructed. Afterwards, the files generated by several jobs can be combined by re-invoking the program using the --seed or --seed-cache options to obtain the complete SNR PDF collection. The format of this option is \"n/m\", and the default is \"1/1\", which causes all SNR PDFs to be constructed.") parser.add_option("--horizon-distances", metavar = "instrument=distance[,instrument=distance,...]", action = "append", help = "Cache SNR PDFs for these instruments and horizon distances. Format is, e.g., H1=120,L1=120,V1=48. Units for distance are irrelevant (PDFs depend only on their ratios). A PDF will be constructed for every combination of --min-instruments or more instruments from the set. All --horizon-distances must list the same instruments (if an instrument is off set its horizon distance to 0).") - parser.add_option("--horizon-distance-masses", metavar = "m1,m2", action = "append", default = ["1.4,1.4"], help = "When computing pre-cached SNR PDFs from a collection of PSD files, compute horizon distances for these masses in solar mass units (default = 1.4,1.4). Can be given multiple times.") - parser.add_option("--horizon-distance-flow", metavar = "Hz", default = 10., type = "float", help = "When computing pre-cached SNR PDFs from a collection PSD files, start horizon distance integral at this frequency in Hertz (default = 10 Hz).") - parser.add_option("--instruments", metavar = "name[,name,...]", help = "Name the instruments explicitly. If only PSD files are to be used to get horizon distances (no --horizon-distances options are given) then this options is required, otherwise it is optional. If both --instruments and --horizon-distances are given, they must list the same instruments.") + parser.add_option("--horizon-distance-flow", metavar = "Hz", default = 10., type = "float", help = "When obtaining horizon distances from a collection PSD files, start horizon distance integral at this frequency in Hertz (default = 10 Hz).") + parser.add_option("--horizon-distance-masses", metavar = "m1,m2", action = "append", default = ["1.4,1.4"], help = "When obtaining horizon distances from a collection of PSD files, compute them for these masses in solar mass units (default = 1.4,1.4). Can be given multiple times.") + parser.add_option("--instruments", metavar = "name[,name,...]", help = "Set the instruments to include in the network. If PSD files alone are to be used to provide horizon distances, or --full is selected, then this option is required, otherwise it is optional. If both --instruments and --horizon-distances are given, they must list the same instruments.") parser.add_option("--min-instruments", metavar = "count", default = 2, type = "int", help = "Set the minimum number of instruments required to form a candidate (default = 2).") parser.add_option("--output", "-o", metavar = "filename", help = "Write SNR PDF cache to this file (required).") + parser.add_option("--seed", metavar = "filename", action = "append", default = [], help = "Seed the SNR PDF cache by loading pre-computed SNR PDFs from this file. Can be given multiple times.") + parser.add_option("--seed-cache", metavar = "filename", help = "Seed the SNR PDF cache by loading pre-computed SNR PDFs from the files named in this LAL cache.") parser.add_option("--verbose", "-v", action = "store_true", help = "Be verbose.") options, filenames = parser.parse_args() + processparams = options.__dict__.copy() + + if options.verbose: + logging.basicConfig(format = "%(message)s", level = logging.INFO) + else: + logging.basicConfig(format = "%(message)s", level = logging.WARNING) + + if options.seed_cache: + options.seed += [CacheEntry(line).path for line in open(options.seed_cache)] + + if not options.full and not options.horizon_distances and not options.seed and not filenames: + raise ValueError("must either enable --full, or provide one or more --horizon-distances, or one or more --seed and/or a --seed-cache, or provide the names of one or more PSD files") + + if options.full: + if not options.instruments: + raise ValueError("must set --instruments with --full enabled") + + # + # disable PSD files and --horizon-distances options. waste + # of time if we're already going to generate all possible + # PDFs + # + + if filenames: + logging.warning("--full enabled, ignoring PSD files") + del filenames[:] + if options.horizon_distances is not None: + logging.warning("--full enabled, ignoring --horizon-distances") + options.horizon_distances = None + + options.full_fragment = map(int, options.full_fragment.split("/")) + if len(options.full_fragment) != 2 or options.full_fragment[1] < 1 or not 1 <= options.full_fragment[0] <= options.full_fragment[1]: + raise ValueError("invalid --full-fragment") + if options.instruments: options.instruments = set(options.instruments.split(",")) if len(options.instruments) < options.min_instruments: @@ -89,14 +145,16 @@ def parse_command_line(): raise ValueError("--instruments does not match --horizon-distances") if any(min(horizon_distances.values()) < 0. for horizon_distances in options.horizon_distances): raise ValueError("all --horizon-distances must be >= 0.") - elif not filenames: - raise ValueError("must provide either one or more PSD files or one or more --horizon-distances, or one or more of both") - elif not options.instruments: - raise ValueError("must set --instruments when --horizon-distances not given") else: - # we'll populate this later from the PSD files + # we'll populate this later options.horizon_distances = [] + # if --full was given it has unset filenames; if any + # --horizon-distances were given then .instruments has been + # populated from those if it wasn't already set + if filenames and not options.instruments: + raise ValueError("must set --instruments if PSD files alone are used for horizon distances") + options.horizon_distance_masses = [map(float, s.split(",")) for s in options.horizon_distance_masses] if any(len(masses) != 2 for masses in options.horizon_distance_masses): raise ValueError("must provide exactly 2 masses for each --horizon-distance-masses") @@ -109,7 +167,7 @@ def parse_command_line(): if not options.output: raise ValueError("must set --output") - return options, filenames + return options, processparams, filenames # @@ -126,36 +184,88 @@ def parse_command_line(): # -options, filenames = parse_command_line() +options, processparams, filenames = parse_command_line() # -# extract additional horizon distances from PSD files if given +# initialize empty SNR PDF cache +# + + +snrpdf = inspiral_extrinsics.SNRPDF(snr_cutoff = 4.) +snrpdf.snr_joint_pdf_cache.clear() # just in case + + +# +# now load seed files if requested. all SNRPDF instances share a single +# global cache of PDFs, so loading one from disk has the effect of +# inserting its PDFs into our existing cache +# + + +for n, filename in enumerate(options.seed, 1): + logging.info("loading seed PDFs %d/%d:" % (n, len(options.seed))) + seed_snrpdf = inspiral_extrinsics.SNRPDF.load(open(filename), verbose = options.verbose) + if seed_snrpdf.snr_cutoff != snrpdf.snr_cutoff: + raise ValueError("incompatible SNR cut-offs") + if seed_snrpdf.log_distance_tolerance != snrpdf.log_distance_tolerance: + raise ValueError("incompatible distance ratio quantizations") + if seed_snrpdf.min_ratio != snrpdf.min_ratio: + raise ValueError("incompatible minimum distance ratios") + + +# +# obtain horizon distances from PSD files if requested # for n, filename in enumerate(filenames, 1): - if options.verbose: - print >>sys.stderr, "%d/%d:" % (n, len(filenames)), + logging.info("loading PSD %d/%d:" % (n, len(filenames))) psd = lal.series.read_psd_xmldoc(ligolw_utils.load_filename(filename, contenthandler = lal.series.PSDContentHandler, verbose = options.verbose)) for m1, m2 in options.horizon_distance_masses: horizon_distances = dict((instrument, (0. if instrument not in psd else reference_psd.horizon_distance(psd[instrument], m1, m2, 8., options.horizon_distance_flow))) for instrument in options.instruments) - if options.verbose: - print >>sys.stderr, "\t%s" % ", ".join("%s = %.4g Mpc" % x for x in sorted(horizon_distances.items())) + logging.info("\t%s" % ", ".join("%s = %.4g Mpc" % x for x in sorted(horizon_distances.items()))) options.horizon_distances.append(horizon_distances) # -# build and populate SNR PDF cache +# construct (fragment of) full spectrum of horizon distances if requested # -snrpdf = inspiral_extrinsics.SNRPDF(snr_cutoff = 4.) +if options.full: + # the use of sorted() in both loops ensures the sequence is + # reproducible from one job to the next. this is required by the + # --full-fragment feature. + for loudest in sorted(options.instruments): + other_instruments = sorted(options.instruments - set((loudest,))) + for other_distances in itertools.product(*([snrpdf.quants] * len(other_instruments))): + options.horizon_distances.append(snrpdf.quantized_horizon_distances([(loudest, 0.0)] + zip(other_instruments, other_distances))) + options.horizon_distances.append(dict.fromkeys(options.instruments, 0.0)) + # clip total fragment count to total number of ratios + n = len(options.horizon_distances) + options.full_fragment[1] = min(options.full_fragment[1], n) + # clip fragment index to clipped fragment count + options.full_fragment[0] = min(*options.full_fragment) + # mean horizon distance ratios per fragment + ratios_per_fragment = n / float(options.full_fragment[1]) + # find start and end of interval to compute + start = int(round(ratios_per_fragment * (options.full_fragment[0] - 1))) + end = int(round(ratios_per_fragment * options.full_fragment[0])) + # clip sequence + options.horizon_distances = options.horizon_distances[start : end] + + +# +# populate SNR PDF cache +# + -for horizon_distances in options.horizon_distances: - for n in range(options.min_instruments, len(horizon_distances) + 1): - for instruments in itertools.combinations(horizon_distances, n): - snrpdf.add_to_cache(instruments, horizon_distances, verbose = options.verbose) +start_time = time.time() +for n, horizon_distances in enumerate(options.horizon_distances, 1): + logging.info("horizon distance ratio %d/%d, %g minutes elapsed" % (n, len(options.horizon_distances), (time.time() - start_time) / 60.)) + for trigger_instruments in all_instrument_combos(options.instruments, options.min_instruments): + snrpdf.add_to_cache(trigger_instruments, horizon_distances, verbose = options.verbose) # @@ -164,5 +274,7 @@ for horizon_distances in options.horizon_distances: xmldoc = ligolw.Document() -xmldoc.appendChild(snrpdf.to_xml()) +xmldoc.appendChild(ligolw.LIGO_LW()) +process = ligolw_process.register_to_xmldoc(xmldoc, "gstlal_inspiral_make_snr_pdf", processparams, ifos = options.instruments) +xmldoc.childNodes[0].appendChild(snrpdf.to_xml()) ligolw_utils.write_filename(xmldoc, options.output, gz = (options.output or "stdout").endswith(".gz"), verbose = options.verbose) diff --git a/gstlal-inspiral/share/make_snr_pdf_cache.py b/gstlal-inspiral/share/make_snr_pdf_cache.py deleted file mode 100644 index 75d23a35af..0000000000 --- a/gstlal-inspiral/share/make_snr_pdf_cache.py +++ /dev/null @@ -1,35 +0,0 @@ -from glue import iterutils -from glue.ligolw import ligolw -from glue.ligolw import utils as ligolw_utils -from glue.text_progress_bar import ProgressBar -from gstlal import far -from gstlal.stats import inspiral_extrinsics - -def all_instrument_combos(instruments, min_number): - for n in range(min_number, len(instruments) + 1): - for combo in iterutils.choices(instruments, n): - yield combo - -diststats, _, segs = far.parse_likelihood_control_doc(ligolw_utils.load_filename("all_o1_likelihood.xml.gz", contenthandler = far.ThincaCoincParamsDistributions.LIGOLWContentHandler, verbose = True)) -#diststats.finish(segs, verbose = True) -if set(diststats.horizon_history) != set(diststats.instruments): - raise ValueError("horizon histories are for %s, need for %s" % (", ".join(sorted(diststats.horizon_history)), ", ".join(sorted(diststats.instruments))) - -snrpdf = inspiral_extrinsics.SNRPDF(diststats.snr_min) -snrpdf.snr_joint_pdf_cache.clear() - -all_horizon_distances = diststats.horizon_history.all() -with ProgressBar(max = len(all_horizon_distances)) as progressbar: - for horizon_distances in all_horizon_distances: - progressbar.increment(text = ",".join(sorted("%s=%.5g" % item for item in horizon_distances.items()))) - progressbar.show() - for off_instruments in all_instrument_combos(horizon_distances, 0): - dists = horizon_distances.copy() - for instrument in off_instruments: - dists[instrument] = 0. - for instruments in all_instrument_combos(diststats.instruments, diststats.min_instruments): - snrpdf.add_to_cache(instruments, dists, verbose = True) - -xmldoc = ligolw.Document() -xmldoc.appendChild(ligolw.LIGO_LW()).appendChild(snrpdf.to_xml()) -ligolw_utils.write_filename(xmldoc, "inspiral_snr_pdf.xml.gz", gz = True, verbose = True) -- GitLab