gstlal_inspiral_compress_ranking_stat 5.11 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32
#!/usr/bin/env python
#
# Copyright (C) 2019  Kipp Cannon
#
# This program is free software; you can redistribute it and/or modify it
# under the terms of the GNU General Public License as published by the
# Free Software Foundation; either version 2 of the License, or (at your
# option) any later version.
#
# This program is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
# Public License for more details.
#
# You should have received a copy of the GNU General Public License along
# with this program; if not, write to the Free Software Foundation, Inc.,
# 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
#


#
# =============================================================================
#
#                                   Preamble
#
# =============================================================================
#


import math
from optparse import OptionParser
import sys
33
import numpy
34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56

from ligo.lw import utils as ligolw_utils
from gstlal import far


__author__ = "Kipp Cannon <kipp.cannon@ligo.org>"
__version__ = "git id %s" % ""	# FIXME
__date__ = ""	# FIXME


#
# =============================================================================
#
#                                 Command Line
#
# =============================================================================
#


def parse_command_line():
	parser = OptionParser(
		version = "Name: %%prog\n%s" % "" # FIXME
	)
57
	parser.add_option("-t", "--threshold", type = "float", default = 0.03, help = "Only keep horizon distance values that differ by this much, fractionally, from their neighbours (default = 0.03).")
58 59
	parser.add_option("--remove-horizon-deviations", action = "store_true", help = "Remove horizon entries that display an uncharacteristic deviation in sensitivity from the non-zero mean.")
	parser.add_option("--deviation-percent", type = "float", default = 0.50, help = "Remove horizon entries that deviate by this fraction from the non-zero mean.")
60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119
	parser.add_option("-v", "--verbose", action = "store_true", help = "Be verbose.")
	options, filenames = parser.parse_args()

	process_params = dict(options.__dict__)

	return options, process_params, filenames


#
# =============================================================================
#
#                                     Main
#
# =============================================================================
#


#
# command line
#


options, process_params, filenames = parse_command_line()


#
# loop over ranking statistic files
#


for filename in filenames:
	#
	# load file
	#

	xmldoc = ligolw_utils.load_filename(filename, verbose = options.verbose, contenthandler = far.RankingStat.LIGOLWContentHandler)

	#
	# extract ranking statistic object, and erase from XML tree
	#

	rankingstat, rankingstatpdf = far.parse_likelihood_control_doc(xmldoc)
	if rankingstatpdf is not None and options.verbose:
		print >>sys.stderr, "WARNING: \"%s\" contains a RankingStatPDF object, it is not a pure ranking statistic file, you might be using this program on the wrong files." % filename
	# FIXME:  don't hard-code object name
	name = u"gstlal_inspiral_likelihood"
	elem = rankingstat.get_xml_root(xmldoc, name)
	elem.parentNode.removeChild(elem)
	elem.unlink()
	elem = rankingstatpdf.get_xml_root(xmldoc, name)
	elem.parentNode.removeChild(elem)
	elem.unlink()

	#
	# compress horizon distance history.  the outer loop makes a list
	# to ensure no problems modifying the object being iterated over
	#

	abs_ln_thresh = math.log1p(options.threshold)
	for instrument, horizon_history in list(rankingstat.numerator.horizon_history.items()):
120 121
		# GPS time / distance pairs
		items = horizon_history.items()
122 123 124 125
		if options.remove_horizon_deviations:
			values = numpy.array(items)[:,1]
			mean_horizon = values[values!=0].mean()
			items = [item for item in items if item[1] < (mean_horizon * (1. + options.deviation_percent))]
126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144

		# compress array
		j = 1
		for i in range(1, len(items) - 1):
			values = items[j - 1][1], items[i][1], items[i + 1][1]
			# remove distances that are non-zero and differ
			# fractionally from both neighbours by less than
			# the selected threshold.  always keep the first
			# and last values
			if values[0] > 0. and values[1] > 0. and values[2] > 0. and abs(math.log(values[1] / values[0])) < abs_ln_thresh and abs(math.log(values[1] / values[2])) < abs_ln_thresh:
				continue
			# remove distances that are 0 and surrounded by 0
			# on both sides (basically the same as the last
			# test, but we can't take log(0)).
			if values == (0., 0., 0.):
				continue
			items[j] = items[i]
			j += 1
		del items[j:]
145 146 147 148
		if options.verbose:
			print >>sys.stderr, "\"%s\":  %s horizon history reduced to %.3g%% of original size" % (filename, instrument, 100. * j / (i + 1.))

		# replace
149
		rankingstat.numerator.horizon_history[instrument] = type(horizon_history)(items)
150 151 152 153 154 155 156 157 158 159 160 161

	#
	# re-insert into XML tree
	#

	far.gen_likelihood_control_doc(xmldoc, rankingstat, rankingstatpdf)

	#
	# write to disk
	#

	ligolw_utils.write_filename(xmldoc, filename, gz = filename.endswith(".gz"), verbose = options.verbose)