gstlal_inspiral_plot_background 17.2 KB
Newer Older
1 2
#!/usr/bin/env python
#
3
# Copyright (C) 2013 Chad Hanna, Kipp Cannon
4 5 6 7 8 9 10 11 12 13 14 15 16 17 18
#
# This program is free software; you can redistribute it and/or modify it
# under the terms of the GNU General Public License as published by the
# Free Software Foundation; either version 2 of the License, or (at your
# option) any later version.
#
# This program is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
# Public License for more details.
#
# You should have received a copy of the GNU General Public License along
# with this program; if not, write to the Free Software Foundation, Inc.,
# 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.

19 20

### A program to plot the likelihood distributions in noise of a gstlal inspiral analysis
21

Kipp Cannon's avatar
Kipp Cannon committed
22 23 24 25 26 27 28 29 30 31

#
# =============================================================================
#
#                                   Preamble
#
# =============================================================================
#


32
import itertools
33
import math
34
import matplotlib
35
matplotlib.rcParams.update({
36
	"font.size": 10.0,
37 38 39 40 41 42 43 44 45 46 47
	"axes.titlesize": 10.0,
	"axes.labelsize": 10.0,
	"xtick.labelsize": 8.0,
	"ytick.labelsize": 8.0,
	"legend.fontsize": 8.0,
	"figure.dpi": 600,
	"savefig.dpi": 600,
	"text.usetex": True
})
from matplotlib import figure
from matplotlib.backends.backend_agg import FigureCanvasAgg as FigureCanvas
48 49
import numpy
from optparse import OptionParser
50 51 52 53
import sqlite3
import sys


54 55
from ligo.lw import dbtables
from ligo.lw import lsctables
Kipp Cannon's avatar
Kipp Cannon committed
56
from glue.text_progress_bar import ProgressBar
57
from lal.utils import CacheEntry
58
from lalinspiral import thinca
59
from ligo.segments import utils as segmentsUtils
60 61 62


from gstlal import far
63
from gstlal import plotfar
64
from gstlal import dagparts
65

66

Kipp Cannon's avatar
Kipp Cannon committed
67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124
class SnrChiColourNorm(matplotlib.colors.Normalize):
	def __call__(self, value, clip = None):
		value, is_scalar = self.process_value(value)
		numpy.clip(value, 1e-200, 1e+200, value)

		self.autoscale_None(value)

		vmin = math.log(self.vmin)
		vmax = math.log(self.vmax)
		xbar = (vmax + vmin) / 2.
		delta = (vmax - vmin) / 2.
		pi_2 = math.pi / 2.

		value = (numpy.arctan((numpy.log(value) - xbar) * pi_2 / delta) + pi_2) / math.pi
		return value[0] if is_scalar else value

	def inverse(self, value):
		vmin = math.log(self.vmin)
		vmax = math.log(self.vmax)
		xbar = (vmax + vmin) / 2.
		delta = (vmax - vmin) / 2.
		pi_2 = math.pi / 2.

		value = numpy.exp(numpy.tan(value * math.pi - pi_2) * delta / pi_2 + xbar)
		numpy.clip(value, 1e-200, 1e+200, value)
		return value

	def _autoscale(self, vmin, vmax):
		vmin = math.log(vmin)
		vmax = math.log(vmax)
		xbar = (vmax + vmin) / 2.
		delta = (vmax - vmin) / 2.
		xbar += 2. * delta / 3.
		delta /= 4.
		vmin = math.exp(xbar - delta)
		vmax = math.exp(xbar + delta)
		return vmin, vmax

	def autoscale(self, A):
		self.vmin, self.vmax = self._autoscale(numpy.ma.min(A), numpy.ma.max(A))

	def autoscale_None(self, A):
		vmin, vmax = self._autoscale(numpy.ma.min(A), numpy.ma.max(A))
		if self.vmin is None:
			self.vmin = vmin
		if self.vmax is None:
			self.vmax = vmax


#
# =============================================================================
#
#                                 Command Line
#
# =============================================================================
#


125 126
def parse_command_line():
	parser = OptionParser()
127 128
	parser.add_option("-d", "--database", metavar = "filename", action = "append", help = "Retrieve search results from this database (optional).  Can be given multiple times.")
	parser.add_option("-c", "--database-cache", metavar = "filename", help = "Retrieve search results from all databases in this LAL cache (optional).  See lalapps_path2cache.")
129
	parser.add_option("--max-snr", metavar = "SNR", default = 200., type = "float", help = "Plot SNR PDFs up to this value of SNR (default = 200).")
130
	parser.add_option("--max-log-lambda", metavar = "value", default = 40., type = "float", help = "Plot ranking statistic CCDFs, etc., up to this value of the natural logarithm of the likelihood ratio (default = 40).")
131
	parser.add_option("--min-log-lambda", metavar = "value", default = -5., type = "float", help = "Plot ranking statistic CCDFs, etc., down to this value of the natural logarithm of the likelihood ratio (default = -5).")
132
	parser.add_option("--scatter-log-lambdas", metavar = "[low]:[high][,[low]:[high]...]", help = "Overlay scatter plots of candidate parameter co-ordinates on various PDF plots, limiting the candidates to those whose log likelihood ratios are in the given range (default = don't overlay scatter plot).  Use a range of \":\" to plot all candidates.")
133
	parser.add_option("--output-dir", metavar = "output-dir", default = ".", help = "Write output to this directory (default = \".\").")
Kipp Cannon's avatar
Kipp Cannon committed
134
	parser.add_option("--output-format", metavar = "extension", default = ".png", help = "Select output format by setting the filename extension (default = \".png\").")
135
	parser.add_option("-t", "--tmp-space", metavar = "path", help = "Path to a directory suitable for use as a work area while manipulating the database file.  The database file will be worked on in this directory, and then moved to the final location when complete.  This option is intended to improve performance when running in a networked environment, where there might be a local disk with higher bandwidth than is available to the filesystem on which the final output will reside.")
Kipp Cannon's avatar
Kipp Cannon committed
136
	parser.add_option("--add-zerolag-to-background", action = "store_true", help = "Add zerolag events to background before populating coincident parameter PDF histograms.")
137
	parser.add_option("--user-tag", metavar = "user-tag", default = "ALL", help = "Set the adjustable component of the description fields in the output filenames (default = \"ALL\").")
Kipp Cannon's avatar
Kipp Cannon committed
138
	parser.add_option("--plot-snr-snr-pdfs", action = "store_true", help = "Plot the full cache of snr-snr-pdfs.")
139 140
	parser.add_option("--event-snr", metavar = "ifo:snr", action = "append", help = "SNR to plot on snr chisq plots. Pass as ifo:snr, e.g. H1:3.2. Can be passed multiple times, though only once for each ifo. Must also pass --event-chisq for ifo.")
	parser.add_option("--event-chisq", metavar = "ifo:chisq", action = "append", help = "chisq to plot on snr chisq plots. Pass as ifo:chisq, e.g. H1:1.1. Can be passed multiple times, though only once for each ifo. Must also pass --event-snr for ifo.")
141
	parser.add_option("--bank-chisq", action = "store_true", help = "If provided, plots bank chisq background instead of chisq.")
142 143 144
	parser.add_option("-v", "--verbose", action = "store_true", help = "Be verbose.")
	options, filenames = parser.parse_args()

145 146 147 148 149
	if options.database_cache is not None:
		if options.database is None:
			options.database = []
		options.database += [CacheEntry(line).path for line in open(options.database_cache)]

Kipp Cannon's avatar
Kipp Cannon committed
150 151 152
	valid_formats = (".png", ".pdf", ".svg")
	if options.output_format not in valid_formats:
		raise ValueError("invalid --output-format \"%s\", allowed are %s" % (options.output_format, ", ".join("\"%s\"" % fmt for fmt in valid_formats)))
153

154 155 156
	if options.scatter_log_lambdas is not None:
		options.scatter_log_lambdas = segmentsUtils.from_range_strings(options.scatter_log_lambdas.split(","), boundtype = float).coalesce()

157 158
	options.user_tag = options.user_tag.upper()

159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174
	options.event_snr_dict = {}
	options.event_chisq_dict = {}
	if options.event_snr or options.event_chisq:
		for ifo_snr_str in options.event_snr:
			ifo, snr = ifo_snr_str.split(':')
			options.event_snr_dict[ifo] = float(snr)
			for ifo_chisq_str in options.event_chisq:
				if ifo_chisq_str.split(':')[0] != ifo:
					continue
				options.event_chisq_dict[ifo] = float(ifo_chisq_str.split(':')[1])

		for ifo in options.event_snr_dict.keys():
			if ifo not in options.event_chisq_dict.keys():
				print ifo
				raise ValueError("Each ifo provided to --event-snr or --event-chisq must be provided to both options")

175 176
	return options, filenames

177

Kipp Cannon's avatar
Kipp Cannon committed
178 179 180 181 182 183 184 185 186
#
# =============================================================================
#
#                                    Input
#
# =============================================================================
#


187
def load_distributions(filenames, verbose = False):
Kipp Cannon's avatar
Kipp Cannon committed
188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214
	# RankingStat objects go into a dictionary indexed by the template
	# bank ID set they are for, with RankinStat objects for the same
	# templates being summed.  RankingStatPDF objects are summed (and
	# therefore are required to be fore the same templates).  these
	# organizational choices reflect the current use cases for this
	# program
	rankingstats = {}
	rankingstatpdf = None
	for n, filename in enumerate(filenames, 1):
		if verbose:
			print >>sys.stderr, "%d/%d:" % (n, len(filenames)),
		this_rankingstat, this_rankingstatpdf = far.parse_likelihood_control_doc(far.ligolw_utils.load_filename(filename, verbose = verbose, contenthandler = far.RankingStat.LIGOLWContentHandler))
		if this_rankingstat is not None:
			template_ids = this_rankingstat.template_ids
			if template_ids in rankingstats:
				rankingstats[template_ids] += this_rankingstat
			else:
				rankingstats[template_ids] = this_rankingstat
		if this_rankingstatpdf is not None:
			if rankingstatpdf is None:
				rankingstatpdf = this_rankingstatpdf
			else:
				rankingstatpdf += this_rankingstatpdf
	progress = ProgressBar(text = "Density estimation", max = len(rankingstats)) if verbose and rankingstats else None
	for rankingstat in rankingstats.values():
		if progress is not None:
			progress.increment()
215
		if options.add_zerolag_to_background:
216 217
			rankingstat.denominator.lnzerolagdensity = rankingstat.zerolag
		rankingstat.finish()
Kipp Cannon's avatar
Kipp Cannon committed
218
	del progress
219 220
	# the segment is only used to construct T050017-style filenames, so
	# just fake one if there's no livetime information
Kipp Cannon's avatar
Kipp Cannon committed
221 222 223 224 225 226 227 228 229 230
	seg = (0, 0)
	if rankingstatpdf is not None:
		seg = rankingstatpdf.segments.extent()
	elif rankingstats:
		rankingstat = rankingstats.values()[0]
		try:
			seg = rankingstat.segmentlists.extent_all()
		except ValueError:
			pass
	return rankingstats, rankingstatpdf, seg
231 232


233
def load_search_results(filenames, tmp_path = None, verbose = False):
Kipp Cannon's avatar
Kipp Cannon committed
234 235 236
	timeslide_ln_lr = []
	zerolag_ln_lr = []
	timeslide_sngls = []
237
	zerolag_sngls = []
238 239 240 241 242 243 244 245

	for n, filename in enumerate(filenames, 1):
		if verbose:
			print >>sys.stderr, "%d/%d: %s" % (n, len(filenames), filename)
		working_filename = dbtables.get_connection_filename(filename, tmp_path = tmp_path, verbose = verbose)
		connection = sqlite3.connect(working_filename)

		xmldoc = dbtables.get_xml(connection)
246
		definer_id = lsctables.CoincDefTable.get_table(xmldoc).get_coinc_def_id(thinca.InspiralCoincDef.search, thinca.InspiralCoincDef.search_coinc_type, create_new = False)
247

248 249
		cursor = connection.cursor()

250 251 252
		# subclass to allow it to carry extra attributes
		class snglsdict(dict):
			__slots__ = ("ln_likelihood_ratio", "fap")
253

254 255 256
		for coinc_event_id, rows in itertools.groupby(cursor.execute("""
SELECT
	coinc_event_map.coinc_event_id,
257 258
	coinc_event.likelihood,
	coinc_inspiral.false_alarm_rate,
259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279
	EXISTS (
		SELECT
			*
		FROM
			time_slide
		WHERE
			time_slide.time_slide_id == coinc_event.time_slide_id
			AND time_slide.offset != 0
	),
	sngl_inspiral.ifo,
	sngl_inspiral.snr,
	sngl_inspiral.chisq
FROM
	sngl_inspiral
	JOIN coinc_event_map ON (
		coinc_event_map.table_name == 'sngl_inspiral'
		AND coinc_event_map.event_id == sngl_inspiral.event_id
	)
	JOIN coinc_event ON (
		coinc_event.coinc_event_id == coinc_event_map.coinc_event_id
	)
280 281 282
	JOIN coinc_inspiral ON (
		coinc_inspiral.coinc_event_id == coinc_event.coinc_event_id
	)
283 284 285 286
WHERE
	coinc_event.coinc_def_id == ?
ORDER BY
	coinc_event_map.coinc_event_id
287
		""", (definer_id,)), lambda row: row[0]):
288
			rows = list(rows)
Kipp Cannon's avatar
Kipp Cannon committed
289
			is_timeslide = rows[0][3]
290
			# {instrument: (snr, chisq)} dictionary
291 292 293
			sngls = snglsdict((row[4], (row[5], row[6])) for row in rows)
			sngls.ln_likelihood_ratio = rows[0][1]
			sngls.fap = rows[0][2]
Kipp Cannon's avatar
Kipp Cannon committed
294 295
			if is_timeslide:
				timeslide_sngls.append(sngls)
296 297 298
			else:
				zerolag_sngls.append(sngls)

299
		cursor.close()
300 301 302
		connection.close()
		dbtables.discard_connection_filename(filename, working_filename, verbose = verbose)

Kipp Cannon's avatar
Kipp Cannon committed
303 304
	timeslide_ln_lr = sorted((sngls.ln_likelihood_ratio, sngls.fap) for sngls in timeslide_sngls)
	zerolag_ln_lr = sorted((sngls.ln_likelihood_ratio, sngls.fap) for sngls in zerolag_sngls)
305

Kipp Cannon's avatar
Kipp Cannon committed
306
	return timeslide_ln_lr, zerolag_ln_lr, timeslide_sngls, zerolag_sngls
307 308


Kipp Cannon's avatar
Kipp Cannon committed
309 310 311 312 313 314 315
#
# =============================================================================
#
#                                     Main
#
# =============================================================================
#
316 317


318 319 320 321 322 323 324 325 326 327 328 329 330
#
# command line
#


options, filenames = parse_command_line()


#
# load input
#


Kipp Cannon's avatar
Kipp Cannon committed
331
rankingstats, rankingstatpdf, seg = load_distributions(filenames, verbose = options.verbose)
332 333


334
if options.database:
335
	timeslide_ln_lr, zerolag_ln_lr, timeslide_sngls, zerolag_sngls = load_search_results(options.database, tmp_path = options.tmp_space, verbose = options.verbose)
336
	if options.scatter_log_lambdas is not None:
337
		sngls = [sngl for sngl in timeslide_sngls + zerolag_sngls if sngl.ln_likelihood_ratio in options.scatter_log_lambdas]
338 339
	else:
		sngls = None
340
else:
341
	timeslide_ln_lr, zerolag_ln_lr, sngls = None, None, None
342 343 344 345 346 347


#
# plots
#

Kipp Cannon's avatar
Kipp Cannon committed
348

Kipp Cannon's avatar
Kipp Cannon committed
349 350 351 352
for bin_index, rankingstat in enumerate(sorted(rankingstats.values(), key = lambda rankingstat: sorted(rankingstat.template_ids))):
	# SNR and \chi^2
	for instrument in rankingstat.instruments:
		for snr_chi_type in ("background_pdf", "injection_pdf", "zero_lag_pdf", "LR"):
353
			if instrument in options.event_snr_dict.keys():
354
				fig = plotfar.plot_snr_chi_pdf(rankingstat, instrument, snr_chi_type, options.max_snr, bankchisq = options.bank_chisq, sngls = sngls, event_snr = options.event_snr_dict[instrument], event_chisq = options.event_chisq_dict[instrument])
355
			else:
356
				fig = plotfar.plot_snr_chi_pdf(rankingstat, instrument, snr_chi_type, options.max_snr, bankchisq = options.bank_chisq, sngls = sngls)
Kipp Cannon's avatar
Kipp Cannon committed
357 358
			if fig is None:
				continue
359
			plotname = dagparts.T050017_filename(instrument, "GSTLAL_INSPIRAL_PLOT_BACKGROUND_%s_%04d_%s_SNRCHI2" % (options.user_tag, bin_index, snr_chi_type.upper()), seg, options.output_format, path = options.output_dir)
Kipp Cannon's avatar
Kipp Cannon committed
360 361 362
			if options.verbose:
				print >>sys.stderr, "writing %s" % plotname
			fig.savefig(plotname)
363

Kipp Cannon's avatar
Kipp Cannon committed
364 365
	# candidate rates
	fig = plotfar.plot_rates(rankingstat)
366
	plotname = dagparts.T050017_filename("H1L1V1", "GSTLAL_INSPIRAL_PLOT_BACKGROUND_%s_%04d_RATES" % (options.user_tag, bin_index), seg, options.output_format, path = options.output_dir)
Kipp Cannon's avatar
Kipp Cannon committed
367 368 369
	if options.verbose:
		print >>sys.stderr, "writing %s" % plotname
	fig.savefig(plotname)
370 371


372
# SNR PDFs
373
if options.plot_snr_snr_pdfs:
Kipp Cannon's avatar
Kipp Cannon committed
374 375 376
	# assume PDFs are identical for all template bank bins, and pick
	# one set to plot
	rankingstat = rankingstats.values()[0]
377 378 379 380 381
	for (instruments, horizon_distances) in sorted(rankingstat.numerator.SNRPDF.snr_joint_pdf_cache.keys(), key = lambda (a, horizon_distances): sorted(horizon_distances)):
		# they're stored as a frozen set of quantized key/value
		# pairs, need to unquantize them and get a dictionary back
		horizon_distances = rankingstat.numerator.SNRPDF.quantized_horizon_distances(horizon_distances)
		fig = plotfar.plot_snr_joint_pdf(rankingstat.numerator.SNRPDF, instruments, horizon_distances, rankingstat.min_instruments, options.max_snr, sngls = sngls)
382
		if fig is not None:
383
			plotname = dagparts.T050017_filename(instruments, "GSTLAL_INSPIRAL_PLOT_BACKGROUND_%s_SNR_PDF_%s" % (options.user_tag, "_".join(["%s_%s" % (k, horizon_distances[k]) for k in sorted(horizon_distances)]) ), seg, options.output_format, path = options.output_dir)
384 385 386
			if options.verbose:
				print >>sys.stderr, "writing %s" % plotname
			fig.savefig(plotname)
387 388


389
# ranking statistic PDFs and CCDFs
390 391 392
if rankingstatpdf is not None:
	for Title, which, NAME in (("Noise", "noise", "NOISE"), ("Signal", "signal", "SIGNAL")):
		fig = plotfar.plot_likelihood_ratio_pdf(rankingstatpdf, (options.min_log_lambda, options.max_log_lambda), Title, which = which)
393
		plotname = dagparts.T050017_filename("COMBINED", "GSTLAL_INSPIRAL_PLOT_BACKGROUND_%s_%s_LIKELIHOOD_RATIO_PDF" % (options.user_tag, NAME), seg, options.output_format, path = options.output_dir)
394 395 396 397
		if options.verbose:
			print >>sys.stderr, "writing %s" % plotname
		fig.savefig(plotname)

Kipp Cannon's avatar
Kipp Cannon committed
398 399
if rankingstatpdf is not None and rankingstatpdf.zero_lag_lr_lnpdf.array.any():
	fapfar = far.FAPFAR(rankingstatpdf.new_with_extinction())
400 401
	if zerolag_ln_lr is not None:
		xhi = max(zerolag_ln_lr + timeslide_ln_lr)[0]
402 403 404 405
		xhi = 5. * math.ceil(xhi / 5.)
		xhi = max(xhi, options.max_log_lambda)
	else:
		xhi = options.max_log_lambda
406
	fig = plotfar.plot_likelihood_ratio_ccdf(fapfar, (options.min_log_lambda, xhi), observed_ln_likelihood_ratios = zerolag_ln_lr, is_open_box = True)
407
	plotname = dagparts.T050017_filename("COMBINED", "GSTLAL_INSPIRAL_PLOT_BACKGROUND_%s_NOISE_LIKELIHOOD_RATIO_CCDF_openbox" % options.user_tag, seg, options.output_format, path = options.output_dir)
408 409 410 411
	if options.verbose:
		print >>sys.stderr, "writing %s" % plotname
	fig.savefig(plotname)

412
	fig = plotfar.plot_likelihood_ratio_ccdf(fapfar, (options.min_log_lambda, xhi), observed_ln_likelihood_ratios = timeslide_ln_lr, is_open_box = False)
413
	plotname = dagparts.T050017_filename("COMBINED", "GSTLAL_INSPIRAL_PLOT_BACKGROUND_%s_NOISE_LIKELIHOOD_RATIO_CCDF_closedbox" % options.user_tag, seg, options.output_format, path = options.output_dir)
414 415 416
	if options.verbose:
		print >>sys.stderr, "writing %s" % plotname
	fig.savefig(plotname)