gstlal_inspiral_create_prior_diststats

#!/usr/bin/env python3
#
# Copyright (C) 2010--2014  Kipp Cannon, Chad Hanna
#
# This program is free software; you can redistribute it and/or modify it
# under the terms of the GNU General Public License as published by the
# Free Software Foundation; either version 2 of the License, or (at your
# option) any later version.
#
# This program is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
# Public License for more details.
#
# You should have received a copy of the GNU General Public License along
# with this program; if not, write to the Free Software Foundation, Inc.,
# 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
#

### A program to create some prior likelihood data to seed an offline analysis

#
# =============================================================================
#
#                                   Preamble
#
# =============================================================================
#


from optparse import OptionParser
import numpy
import scipy

from lal import series

from ligo.lw import ligolw
from ligo.lw import lsctables
from ligo.lw import array as ligolw_array
from ligo.lw import param as ligolw_param
from ligo.lw import utils as ligolw_utils
from ligo.lw.utils import process as ligolw_process


from gstlal import far
from gstlal import svd_bank
from gstlal import templates

@ligolw_array.use_in
@ligolw_param.use_in
@lsctables.use_in
class LIGOLWContentHandler(ligolw.LIGOLWContentHandler):
	pass

__author__ = "Chad Hanna <chad.hanna@ligo.org>"
__version__ = "git id %s" % ""	# FIXME
__date__ = ""	# FIXME


#
# =============================================================================
#
#                                 Command Line
#
# =============================================================================
#


def parse_command_line():
	parser = OptionParser(
		version = "Name: %%prog\n%s" % "" # FIXME
	)
	parser.add_option("-v", "--verbose", action = "store_true", help = "Be verbose.")
	# FIXME:  default must be identical to gstlal_inspiral's default
	parser.add_option("--coincidence-threshold", metavar = "value", type = "float", default = 0.005, help = "Set the coincidence window in seconds (default = 0.005).  The light-travel time between instruments will be added automatically in the coincidence test.")
	# FIXME:  default must be identical to gstlal_inspiral's default
	parser.add_option("--min-instruments", metavar = "count", type = "int", default = 2, help = "Set the minimum number of instruments that must contribute triggers to form a candidate (default = 2).")
	parser.add_option("--write-likelihood", metavar = "filename", help = "Write merged raw likelihood data to this file.")
	parser.add_option("--instrument", action = "append", help = "Append to a list of instruments to create dist stats for.  List must be whatever instruments you intend to analyze.")
	parser.add_option("-p", "--background-prior", metavar = "N", default = 1, type = "float", help = "Include an exponential background prior with the maximum bin count = N, default 1")
	parser.add_option("--df", metavar = "N", default = 40, help = "set the degrees of freedom for the background chisq prior: default 40. You can also use template bandwidth to set this by setting it to 'bandwidth'")
	parser.add_option("--svd-file", metavar = "filename", help = "The SVD file to read the template ids from")
	parser.add_option("--svd-bin", metavar = "%04d", help = "The bin that this is, required if giving json manifest for --svd-file")
	parser.add_option("--mass-model-file", metavar = "filename", help = "The mass model file to read from (hdf5 format)")
	parser.add_option("--dtdphi-file", metavar = "filename", help = "dtdphi snr ratio pdfs to read from (hdf5 format). Default passed by gstlal_inspiral_pipe, but not when run as a standalone program.")
	parser.add_option("--idq-file", metavar = "filename", help = "idq glitch file (hdf5 format)")
	parser.add_option("--psd-xml", type = "string", help = "Specify a PSD to use for computing template bandwidth. Required if df is bandwidth")
	options, filenames = parser.parse_args()

	process_params = dict(options.__dict__)

	if not options.instrument:
		raise ValueError("must specify at least one --instrument")
	options.instrument = set(options.instrument)

	if options.min_instruments < 1:
		raise ValueError("--min-instruments must be >= 1")
	if options.min_instruments > len(options.instrument):
		raise ValueError("--min-instruments is greater than the number of unique --instrument's")

	if filenames:
		raise ValueError("unrecognized arguments after options: %s" % " ".join(filenames))

	if options.df == "bandwidth" and options.psd_xml is None:
		raise ValueError("Must specify psd xml file if using bandwidth to set degrees of freedom")
	psd = {}
	if options.psd_xml:
		for ifo, p in series.read_psd_xmldoc(ligolw_utils.load_filename(options.psd_xml, verbose = options.verbose, contenthandler = series.PSDContentHandler)).items():
			f = numpy.arange(len(p.data.data)) * p.deltaF
			# remove the last 10% of the PSD to remove anti-aliasing artifacts and replace with +inf
			psd[ifo] = scipy.interpolate.interp1d(f[:int(0.9*len(f))], p.data.data[:int(0.9*len(f))], fill_value = numpy.inf, bounds_error = False)

	template_ids = []
	horizon_factors = {}
	bandwidths = []
	if options.svd_file.endswith(".xml") or options.svd_file.endswith(".xml.gz"):
		for n, bank in enumerate(svd_bank.read_banks(options.svd_file, contenthandler = LIGOLWContentHandler, verbose = options.verbose)):
			if bank.bank_type != "signal_model":
				continue
			template_ids += [row.template_id for row in bank.sngl_inspiral_table]
			# FIXME don't hard code
			if options.df == "bandwidth":
				for ifo in psd:
					bandwidths += [templates.bandwidth(row.mass1, row.mass2, row.spin1z, row.spin2z, f_min = 10.0, f_max = row.f_final, delta_f = 0.25, psd = psd[ifo]) for row in bank.sngl_inspiral_table]
			horizon_factors.update(bank.horizon_factors)
	elif options.svd_file.endswith(".json"):
		assert options.svd_bin is not None
		import json
		with open(options.svd_file) as f:
			manifest = json.loads(f.read())
		bandwidths = [manifest[options.svd_bin]["min_bw"], manifest[options.svd_bin]["max_bw"]]
		horizon_factors.update(manifest[options.svd_bin]["horizon_factors"])
		template_ids = [int(template_id) for template_id in list(manifest[options.svd_bin]["horizon_factors"].keys())]
	else:
		raise ValueError("svd file cannot be read")

	if options.df == "bandwidth":
		# don't let the bandwidth get too small
		options.df = max(int(min(bandwidths)) + 1, 10) * 3

	return options, process_params, filenames, template_ids, horizon_factors


#
# =============================================================================
#
#                                     Main
#
# =============================================================================
#


#
# command line
#


options, process_params, filenames, template_ids, horizon_factors = parse_command_line()


#
# initialize output document (records process start time)
#


xmldoc = ligolw.Document()
xmldoc.appendChild(ligolw.LIGO_LW())
process = ligolw_process.register_to_xmldoc(xmldoc, u"gstlal_inspiral_create_prior_diststats", instruments = options.instrument, paramdict = process_params)


#
# create parameter distribution priors
#


rankingstat = far.RankingStat(template_ids = template_ids, instruments = options.instrument, delta_t = options.coincidence_threshold, min_instruments = options.min_instruments, population_model_file = options.mass_model_file, dtdphi_file = options.dtdphi_file, horizon_factors = horizon_factors, idq_file = options.idq_file)

if options.background_prior > 0:
	rankingstat.denominator.add_noise_model(number_of_events = options.background_prior)

# Add the numerator
rankingstat.numerator.add_signal_model(df = int(options.df), verbose = options.verbose)


#
# record results in output file
#


far.gen_likelihood_control_doc(xmldoc, rankingstat, None)


#
# done
#


process.set_end_time_now()
ligolw_utils.write_filename(xmldoc, options.write_likelihood, verbose = options.verbose)