gstlal_inspiral_create_prior_diststats

#!/usr/bin/env python
#
# Copyright (C) 2010--2014  Kipp Cannon, Chad Hanna
#
# This program is free software; you can redistribute it and/or modify it
# under the terms of the GNU General Public License as published by the
# Free Software Foundation; either version 2 of the License, or (at your
# option) any later version.
#
# This program is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
# Public License for more details.
#
# You should have received a copy of the GNU General Public License along
# with this program; if not, write to the Free Software Foundation, Inc.,
# 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
#

### A program to create some prior likelihood data to seed an offline analysis

#
# =============================================================================
#
#                                   Preamble
#
# =============================================================================
#


from optparse import OptionParser
import numpy
import scipy

from lal import series

from ligo.lw import ligolw
from ligo.lw import lsctables
from ligo.lw import array as ligolw_array
from ligo.lw import param as ligolw_param
from ligo.lw import utils as ligolw_utils
from ligo.lw.utils import process as ligolw_process


from gstlal import far
from gstlal import svd_bank
from gstlal import templates

@ligolw_array.use_in
@ligolw_param.use_in
@lsctables.use_in
class LIGOLWContentHandler(ligolw.LIGOLWContentHandler):
	pass

__author__ = "Chad Hanna <chad.hanna@ligo.org>"
__version__ = "git id %s" % ""	# FIXME
__date__ = ""	# FIXME


#
# =============================================================================
#
#                                 Command Line
#
# =============================================================================
#


def parse_command_line():
	parser = OptionParser(
		version = "Name: %%prog\n%s" % "" # FIXME
	)
	parser.add_option("-v", "--verbose", action = "store_true", help = "Be verbose.")
	# FIXME:  default must be identical to gstlal_inspiral's default
	parser.add_option("--coincidence-threshold", metavar = "value", type = "float", default = 0.005, help = "Set the coincidence window in seconds (default = 0.005).  The light-travel time between instruments will be added automatically in the coincidence test.")
	# FIXME:  default must be identical to gstlal_inspiral's default
	parser.add_option("--min-instruments", metavar = "count", type = "int", default = 2, help = "Set the minimum number of instruments that must contribute triggers to form a candidate (default = 2).")
	parser.add_option("--write-likelihood", metavar = "filename", help = "Write merged raw likelihood data to this file.")
	parser.add_option("--instrument", action = "append", help = "Append to a list of instruments to create dist stats for.  List must be whatever instruments you intend to analyze.")
	parser.add_option("-p", "--background-prior", metavar = "N", default = 1, type = "float", help = "Include an exponential background prior with the maximum bin count = N, default 1")
	parser.add_option("--df", metavar = "N", default = 40, help = "set the degrees of freedom for the background chisq prior: default 40. You can also use template bandwidth to set this by setting it to 'bandwidth'")
	parser.add_option("--svd-file", metavar = "filename", help = "The SVD file to read the template ids from")
	parser.add_option("--mass-model-file", metavar = "filename", help = "The mass model file to read from (hdf5 format)")
	parser.add_option("--dtdphi-file", metavar = "filename", help = "dtdphi snr ratio pdfs to read from (hdf5 format). Default passed by gstlal_inspiral_pipe, but not when run as a standalone program.")
	parser.add_option("--psd-xml", type = "string", help = "Specify a PSD to use for computing template bandwidth. Required if df is bandwidth")
	options, filenames = parser.parse_args()

	process_params = dict(options.__dict__)

	if not options.instrument:
		raise ValueError("must specify at least one --instrument")
	options.instrument = set(options.instrument)

	if options.min_instruments < 1:
		raise ValueError("--min-instruments must be >= 1")
	if options.min_instruments > len(options.instrument):
		raise ValueError("--min-instruments is greater than the number of unique --instrument's")

	if filenames:
		raise ValueError("unrecognized arguments after options: %s" % " ".join(filenames))

	if options.df == "bandwidth" and options.psd_xml is None:
		raise ValueError("Must specify psd xml file if using bandwidth to set degrees of freedom")
	psd = {}
	if options.psd_xml:
		for ifo, p in series.read_psd_xmldoc(ligolw_utils.load_filename(options.psd_xml, verbose = options.verbose, contenthandler = series.PSDContentHandler)).items():
			f = numpy.arange(len(p.data.data)) * p.deltaF
			psd[ifo] = scipy.interpolate.interp1d(f, p.data.data)


	template_ids = []
	horizon_factors = {}
	bandwidths = []
	for n, bank in enumerate(svd_bank.read_banks(options.svd_file, contenthandler = LIGOLWContentHandler, verbose = options.verbose)):
		template_ids += [row.template_id for row in bank.sngl_inspiral_table]
		# FIXME don't hard code
		if options.df == "bandwidth":
			for ifo in psd:
				bandwidths += [templates.bandwidth(row.mass1, row.mass2, row.spin1z, row.spin2z, f_min = 10.0, f_max = row.f_final, delta_f = 0.25, psd = psd[ifo])]
		horizon_factors.update(bank.horizon_factors)
	if options.df == "bandwidth":
		# NOTE the 4000 is tuned by looking at real data distributions
		options.df = int(4000. / min(bandwidths))

	return options, process_params, filenames, template_ids, horizon_factors


#
# =============================================================================
#
#                                     Main
#
# =============================================================================
#


#
# command line
#


options, process_params, filenames, template_ids, horizon_factors = parse_command_line()


#
# initialize output document (records process start time)
#


xmldoc = ligolw.Document()
xmldoc.appendChild(ligolw.LIGO_LW())
process = ligolw_process.register_to_xmldoc(xmldoc, u"gstlal_inspiral_create_prior_diststats", ifos = options.instrument, paramdict = process_params)


#
# create parameter distribution priors
#


rankingstat = far.RankingStat(template_ids = template_ids, instruments = options.instrument, delta_t = options.coincidence_threshold, min_instruments = options.min_instruments, population_model_file = options.mass_model_file, dtdphi_file = options.dtdphi_file, horizon_factors = horizon_factors)

if options.background_prior > 0:
	rankingstat.denominator.add_noise_model(number_of_events = options.background_prior, df = int(options.df))

# Add the numerator
rankingstat.numerator.add_signal_model()


#
# record results in output file
#


far.gen_likelihood_control_doc(xmldoc, rankingstat, None)


#
# done
#


ligolw_process.set_process_end_time(process)
ligolw_utils.write_filename(xmldoc, options.write_likelihood, gz = options.write_likelihood.endswith(".gz"), verbose = options.verbose)