Skip to content
Snippets Groups Projects
Commit b8ec090c authored by Kipp Cannon's avatar Kipp Cannon
Browse files

gstlal_inspiral_make_snr_pdf: new version

- add --full feature to enable population of all possible PDFs
- add --full-fragment feature to enable parallelization
- add --seed / --seed-cache feature to enable merging of PDFs generated in parallel
- add process metadata to the output file to record how it was constructed
- remove make_snr_pdf_cache.py from share/ as it is now redundant
parent 276c866e
No related branches found
No related tags found
No related merge requests found
#!/usr/bin/env python
#
# Copyright (C) 2016 Kipp Cannon, Chad Hanna
# Copyright (C) 2016,2017 Kipp Cannon, Chad Hanna
#
# This program is free software; you can redistribute it and/or modify it
# under the terms of the GNU General Public License as published by the
......@@ -33,16 +33,34 @@
import itertools
import logging
from optparse import OptionParser
import sys
import time
from glue.ligolw import ligolw
from glue.ligolw import utils as ligolw_utils
from glue.ligolw.utils import process as ligolw_process
from glue.text_progress_bar import ProgressBar
from gstlal import reference_psd
from gstlal.stats import inspiral_extrinsics
import lal.series
from lal.utils import CacheEntry
#
# =============================================================================
#
# Utils
#
# =============================================================================
#
def all_instrument_combos(instruments, min_number):
for n in range(min_number, len(instruments) + 1):
for combo in itertools.combinations(instruments, n):
yield combo
#
......@@ -59,16 +77,54 @@ def parse_command_line():
description = ""
)
parser.add_option("-f", "--full", action = "store_true", help = "Generate SNR PDFs for all possible horizon distance ratios as defined by the SNR PDF mechanism's internal quantization system. Use --instruments to select the detector network to model (required). A PDF will be constructed for every combination of --min-instruments or more instruments from the set. When --full enabled, any --horizon-distances options and/or PSD files are ignored. See also --full-fragment for information on parallelization.")
parser.add_option("--full-fragment", metavar = "n/m", default = "1/1", help = "Enable parallelization of --full by selecting a fragment of the full SNR PDFs sequence to generate. When --full is enabled, the iteration over SNR PDFs employs a reproducible, deterministic, sequence. That sequence is divided into m (m >= 1) approximately equal-sized intervals, and only the SNR PDFs from the n-th interval (1 <= n <= m) of the sequence will be constructed. Afterwards, the files generated by several jobs can be combined by re-invoking the program using the --seed or --seed-cache options to obtain the complete SNR PDF collection. The format of this option is \"n/m\", and the default is \"1/1\", which causes all SNR PDFs to be constructed.")
parser.add_option("--horizon-distances", metavar = "instrument=distance[,instrument=distance,...]", action = "append", help = "Cache SNR PDFs for these instruments and horizon distances. Format is, e.g., H1=120,L1=120,V1=48. Units for distance are irrelevant (PDFs depend only on their ratios). A PDF will be constructed for every combination of --min-instruments or more instruments from the set. All --horizon-distances must list the same instruments (if an instrument is off set its horizon distance to 0).")
parser.add_option("--horizon-distance-masses", metavar = "m1,m2", action = "append", default = ["1.4,1.4"], help = "When computing pre-cached SNR PDFs from a collection of PSD files, compute horizon distances for these masses in solar mass units (default = 1.4,1.4). Can be given multiple times.")
parser.add_option("--horizon-distance-flow", metavar = "Hz", default = 10., type = "float", help = "When computing pre-cached SNR PDFs from a collection PSD files, start horizon distance integral at this frequency in Hertz (default = 10 Hz).")
parser.add_option("--instruments", metavar = "name[,name,...]", help = "Name the instruments explicitly. If only PSD files are to be used to get horizon distances (no --horizon-distances options are given) then this options is required, otherwise it is optional. If both --instruments and --horizon-distances are given, they must list the same instruments.")
parser.add_option("--horizon-distance-flow", metavar = "Hz", default = 10., type = "float", help = "When obtaining horizon distances from a collection PSD files, start horizon distance integral at this frequency in Hertz (default = 10 Hz).")
parser.add_option("--horizon-distance-masses", metavar = "m1,m2", action = "append", default = ["1.4,1.4"], help = "When obtaining horizon distances from a collection of PSD files, compute them for these masses in solar mass units (default = 1.4,1.4). Can be given multiple times.")
parser.add_option("--instruments", metavar = "name[,name,...]", help = "Set the instruments to include in the network. If PSD files alone are to be used to provide horizon distances, or --full is selected, then this option is required, otherwise it is optional. If both --instruments and --horizon-distances are given, they must list the same instruments.")
parser.add_option("--min-instruments", metavar = "count", default = 2, type = "int", help = "Set the minimum number of instruments required to form a candidate (default = 2).")
parser.add_option("--output", "-o", metavar = "filename", help = "Write SNR PDF cache to this file (required).")
parser.add_option("--seed", metavar = "filename", action = "append", default = [], help = "Seed the SNR PDF cache by loading pre-computed SNR PDFs from this file. Can be given multiple times.")
parser.add_option("--seed-cache", metavar = "filename", help = "Seed the SNR PDF cache by loading pre-computed SNR PDFs from the files named in this LAL cache.")
parser.add_option("--verbose", "-v", action = "store_true", help = "Be verbose.")
options, filenames = parser.parse_args()
processparams = options.__dict__.copy()
if options.verbose:
logging.basicConfig(format = "%(message)s", level = logging.INFO)
else:
logging.basicConfig(format = "%(message)s", level = logging.WARNING)
if options.seed_cache:
options.seed += [CacheEntry(line).path for line in open(options.seed_cache)]
if not options.full and not options.horizon_distances and not options.seed and not filenames:
raise ValueError("must either enable --full, or provide one or more --horizon-distances, or one or more --seed and/or a --seed-cache, or provide the names of one or more PSD files")
if options.full:
if not options.instruments:
raise ValueError("must set --instruments with --full enabled")
#
# disable PSD files and --horizon-distances options. waste
# of time if we're already going to generate all possible
# PDFs
#
if filenames:
logging.warning("--full enabled, ignoring PSD files")
del filenames[:]
if options.horizon_distances is not None:
logging.warning("--full enabled, ignoring --horizon-distances")
options.horizon_distances = None
options.full_fragment = map(int, options.full_fragment.split("/"))
if len(options.full_fragment) != 2 or options.full_fragment[1] < 1 or not 1 <= options.full_fragment[0] <= options.full_fragment[1]:
raise ValueError("invalid --full-fragment")
if options.instruments:
options.instruments = set(options.instruments.split(","))
if len(options.instruments) < options.min_instruments:
......@@ -89,14 +145,16 @@ def parse_command_line():
raise ValueError("--instruments does not match --horizon-distances")
if any(min(horizon_distances.values()) < 0. for horizon_distances in options.horizon_distances):
raise ValueError("all --horizon-distances must be >= 0.")
elif not filenames:
raise ValueError("must provide either one or more PSD files or one or more --horizon-distances, or one or more of both")
elif not options.instruments:
raise ValueError("must set --instruments when --horizon-distances not given")
else:
# we'll populate this later from the PSD files
# we'll populate this later
options.horizon_distances = []
# if --full was given it has unset filenames; if any
# --horizon-distances were given then .instruments has been
# populated from those if it wasn't already set
if filenames and not options.instruments:
raise ValueError("must set --instruments if PSD files alone are used for horizon distances")
options.horizon_distance_masses = [map(float, s.split(",")) for s in options.horizon_distance_masses]
if any(len(masses) != 2 for masses in options.horizon_distance_masses):
raise ValueError("must provide exactly 2 masses for each --horizon-distance-masses")
......@@ -109,7 +167,7 @@ def parse_command_line():
if not options.output:
raise ValueError("must set --output")
return options, filenames
return options, processparams, filenames
#
......@@ -126,36 +184,88 @@ def parse_command_line():
#
options, filenames = parse_command_line()
options, processparams, filenames = parse_command_line()
#
# extract additional horizon distances from PSD files if given
# initialize empty SNR PDF cache
#
snrpdf = inspiral_extrinsics.SNRPDF(snr_cutoff = 4.)
snrpdf.snr_joint_pdf_cache.clear() # just in case
#
# now load seed files if requested. all SNRPDF instances share a single
# global cache of PDFs, so loading one from disk has the effect of
# inserting its PDFs into our existing cache
#
for n, filename in enumerate(options.seed, 1):
logging.info("loading seed PDFs %d/%d:" % (n, len(options.seed)))
seed_snrpdf = inspiral_extrinsics.SNRPDF.load(open(filename), verbose = options.verbose)
if seed_snrpdf.snr_cutoff != snrpdf.snr_cutoff:
raise ValueError("incompatible SNR cut-offs")
if seed_snrpdf.log_distance_tolerance != snrpdf.log_distance_tolerance:
raise ValueError("incompatible distance ratio quantizations")
if seed_snrpdf.min_ratio != snrpdf.min_ratio:
raise ValueError("incompatible minimum distance ratios")
#
# obtain horizon distances from PSD files if requested
#
for n, filename in enumerate(filenames, 1):
if options.verbose:
print >>sys.stderr, "%d/%d:" % (n, len(filenames)),
logging.info("loading PSD %d/%d:" % (n, len(filenames)))
psd = lal.series.read_psd_xmldoc(ligolw_utils.load_filename(filename, contenthandler = lal.series.PSDContentHandler, verbose = options.verbose))
for m1, m2 in options.horizon_distance_masses:
horizon_distances = dict((instrument, (0. if instrument not in psd else reference_psd.horizon_distance(psd[instrument], m1, m2, 8., options.horizon_distance_flow))) for instrument in options.instruments)
if options.verbose:
print >>sys.stderr, "\t%s" % ", ".join("%s = %.4g Mpc" % x for x in sorted(horizon_distances.items()))
logging.info("\t%s" % ", ".join("%s = %.4g Mpc" % x for x in sorted(horizon_distances.items())))
options.horizon_distances.append(horizon_distances)
#
# build and populate SNR PDF cache
# construct (fragment of) full spectrum of horizon distances if requested
#
snrpdf = inspiral_extrinsics.SNRPDF(snr_cutoff = 4.)
if options.full:
# the use of sorted() in both loops ensures the sequence is
# reproducible from one job to the next. this is required by the
# --full-fragment feature.
for loudest in sorted(options.instruments):
other_instruments = sorted(options.instruments - set((loudest,)))
for other_distances in itertools.product(*([snrpdf.quants] * len(other_instruments))):
options.horizon_distances.append(snrpdf.quantized_horizon_distances([(loudest, 0.0)] + zip(other_instruments, other_distances)))
options.horizon_distances.append(dict.fromkeys(options.instruments, 0.0))
# clip total fragment count to total number of ratios
n = len(options.horizon_distances)
options.full_fragment[1] = min(options.full_fragment[1], n)
# clip fragment index to clipped fragment count
options.full_fragment[0] = min(*options.full_fragment)
# mean horizon distance ratios per fragment
ratios_per_fragment = n / float(options.full_fragment[1])
# find start and end of interval to compute
start = int(round(ratios_per_fragment * (options.full_fragment[0] - 1)))
end = int(round(ratios_per_fragment * options.full_fragment[0]))
# clip sequence
options.horizon_distances = options.horizon_distances[start : end]
#
# populate SNR PDF cache
#
for horizon_distances in options.horizon_distances:
for n in range(options.min_instruments, len(horizon_distances) + 1):
for instruments in itertools.combinations(horizon_distances, n):
snrpdf.add_to_cache(instruments, horizon_distances, verbose = options.verbose)
start_time = time.time()
for n, horizon_distances in enumerate(options.horizon_distances, 1):
logging.info("horizon distance ratio %d/%d, %g minutes elapsed" % (n, len(options.horizon_distances), (time.time() - start_time) / 60.))
for trigger_instruments in all_instrument_combos(options.instruments, options.min_instruments):
snrpdf.add_to_cache(trigger_instruments, horizon_distances, verbose = options.verbose)
#
......@@ -164,5 +274,7 @@ for horizon_distances in options.horizon_distances:
xmldoc = ligolw.Document()
xmldoc.appendChild(snrpdf.to_xml())
xmldoc.appendChild(ligolw.LIGO_LW())
process = ligolw_process.register_to_xmldoc(xmldoc, "gstlal_inspiral_make_snr_pdf", processparams, ifos = options.instruments)
xmldoc.childNodes[0].appendChild(snrpdf.to_xml())
ligolw_utils.write_filename(xmldoc, options.output, gz = (options.output or "stdout").endswith(".gz"), verbose = options.verbose)
from glue import iterutils
from glue.ligolw import ligolw
from glue.ligolw import utils as ligolw_utils
from glue.text_progress_bar import ProgressBar
from gstlal import far
from gstlal.stats import inspiral_extrinsics
def all_instrument_combos(instruments, min_number):
for n in range(min_number, len(instruments) + 1):
for combo in iterutils.choices(instruments, n):
yield combo
diststats, _, segs = far.parse_likelihood_control_doc(ligolw_utils.load_filename("all_o1_likelihood.xml.gz", contenthandler = far.ThincaCoincParamsDistributions.LIGOLWContentHandler, verbose = True))
#diststats.finish(segs, verbose = True)
if set(diststats.horizon_history) != set(diststats.instruments):
raise ValueError("horizon histories are for %s, need for %s" % (", ".join(sorted(diststats.horizon_history)), ", ".join(sorted(diststats.instruments)))
snrpdf = inspiral_extrinsics.SNRPDF(diststats.snr_min)
snrpdf.snr_joint_pdf_cache.clear()
all_horizon_distances = diststats.horizon_history.all()
with ProgressBar(max = len(all_horizon_distances)) as progressbar:
for horizon_distances in all_horizon_distances:
progressbar.increment(text = ",".join(sorted("%s=%.5g" % item for item in horizon_distances.items())))
progressbar.show()
for off_instruments in all_instrument_combos(horizon_distances, 0):
dists = horizon_distances.copy()
for instrument in off_instruments:
dists[instrument] = 0.
for instruments in all_instrument_combos(diststats.instruments, diststats.min_instruments):
snrpdf.add_to_cache(instruments, dists, verbose = True)
xmldoc = ligolw.Document()
xmldoc.appendChild(ligolw.LIGO_LW()).appendChild(snrpdf.to_xml())
ligolw_utils.write_filename(xmldoc, "inspiral_snr_pdf.xml.gz", gz = True, verbose = True)
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment