gstlal_inspiral_split_injections 7.55 KB
Newer Older
1
#!/usr/bin/env python
2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18
#
# Copyright (C) 2016  Cody Messick
#
# This program is free software; you can redistribute it and/or modify it
# under the terms of the GNU General Public License as published by the
# Free Software Foundation; either version 2 of the License, or (at your
# option) any later version.
#
# This program is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
# Public License for more details.
#
# You should have received a copy of the GNU General Public License along
# with this program; if not, write to the Free Software Foundation, Inc.,
# 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.

19 20

### A program to split a set of injections up into a smaller set of files grouped together by chirp mass
21

22 23 24

from optparse import OptionParser

25 26 27 28
from ligo.lw import ilwd, ligolw, lsctables
from ligo.lw import utils as ligolw_utils
from ligo.lw.utils import process as ligolw_process
from ligo.lw.utils import ligolw_add
29 30 31

import numpy

32
@lsctables.use_in
33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48
class ligolwcontenthandler(ligolw.LIGOLWContentHandler):
	pass


def mchirp_bounds(mchirp, which):
	if mchirp < 10:
		return {'lower': 0.65*mchirp, 'upper': 1.35*mchirp}[which]
	elif mchirp < 20:
		return {'lower': 0.5*mchirp, 'upper': 1.5*mchirp}[which]
	else:
		return {'lower': 0.5*mchirp, 'upper': 2*mchirp}[which]


def mchirp_injection_str(keys, name):
	# FIXME this could be done better by just looking for values that cross
	# the boundaries defined in mchirp_bounds
49 50
	lower_mchirp_bound = min([mchirp_bounds(mchirp, 'lower') for [mchirp, simulation_id] in keys])
	upper_mchirp_bound = max([mchirp_bounds(mchirp, 'upper') for [mchirp, simulation_id] in keys])
51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91
	return '%.2f:%.2f:%s ' % ( lower_mchirp_bound, upper_mchirp_bound, name )


def create_split_injection(processmap, process_params_dict, endtimesim_tuples, min_mchirp, max_mchirp, name, options):

	xmldoc = ligolw.Document()
	xmldoc.appendChild(ligolw.LIGO_LW())

	processtable = lsctables.New(lsctables.ProcessTable)
	processparamstable = lsctables.New(lsctables.ProcessParamsTable)
	siminspiraltable = lsctables.New(lsctables.SimInspiralTable)

	xmldoc.childNodes[-1].appendChild(processtable)
	xmldoc.childNodes[-1].appendChild(processparamstable)
	xmldoc.childNodes[-1].appendChild(siminspiraltable)

	# FIXME add this to process params table
	#process = ligolw_process.register_to_xmldoc(xmldoc, "gstlal_inspiral_split_injections", param_dict)
	
	process_ids = {}
	siminspiraltimemap = {}
	end_times = numpy.array([])

	for end_time, sim in sorted(endtimesim_tuples, key = lambda (k,v): k):
		siminspiraltable.append(sim)
		end_times = numpy.append(end_times, end_time)
		process_ids.setdefault(sim.process_id, []).append(sim.simulation_id)

	for procid in sorted(process_ids.keys()):
		processtable.append(processmap[procid])
		for param in process_params_dict[procid]:
			processparamstable.append(param)

	dt = end_times[1:] - end_times[:-1]
	print '%s: avg injection rate = %f, min mchirp = %f, max mchirp = %f, min dt = %f' % (name, 1./dt.mean(), min_mchirp, max_mchirp, dt.min())
	ligolw_utils.write_filename(xmldoc, name)

def parse_command_line():
	parser = OptionParser()

	parser.add_option("--target-injection-rate", "-r", metavar = "s^-1", type = "float", default = 0.1, help = "Desired injection rate of output files (default: 0.01)")
92
	parser.add_option("--output-tag", "-o", metavar = "filename", default = "split_injections", help = "Base of output file name (default: split_injections)")
93
	parser.add_option("--injection-rate-tolerance", type = "float", default = 0.1, help = "Acceptable tolerance for target injection rate, files will be written to disk once a splitting has been found such that the injection rate is target-injection-rate +- (injection-rate-tolerance*target-injection-rate) (default: 0.1)")
94
	parser.add_option("--generate-inj-string-only", action = "store_true", help = "Do not combine files, generate an injection string specifying which chirp mass bounds to use to search for the provided injection files")
95
	parser.add_option("--single-output", action = "store_true", help = "Produce a single output file containing all injections")
96
	parser.add_option("--injection-str-output", default = "injection_str.txt", help = "Write injection string used in analysis Makefile to this file. [Default: injection_str.txt]")
97 98 99 100 101 102 103 104 105 106 107

	# FIXME Add option checking so num-output and target-injection-rate cannot both be provided

	options, filenames = parser.parse_args()

	target_inj_rate_interval = (options.target_injection_rate - options.injection_rate_tolerance * options.target_injection_rate, options.target_injection_rate + options.injection_rate_tolerance * options.target_injection_rate)

	return options, filenames, target_inj_rate_interval

options, filenames, target_inj_rate_interval = parse_command_line()

108 109 110 111 112 113 114 115 116
mchirp_str = ''
if options.generate_inj_string_only:
	for f in filenames:
		xmldoc = ligolw_utils.load_filename(f, contenthandler = ligolwcontenthandler)
		mchirp_str += mchirp_injection_str([(sim.mchirp, None) for sim in lsctables.SimInspiralTable.get_table(xmldoc)], f)
		xmldoc.unlink()
else:
	# set up xmldoc structure to contain all of the injections
	xmldoc = ligolw.Document()
117

118 119 120 121 122 123
	# add all files together and create mappings needed to split the files up while
	# conserving process ids
	xmldoc = ligolw_add.ligolw_add(xmldoc, filenames, verbose = True, contenthandler = ligolwcontenthandler)
	processmap = dict((proc.process_id, proc) for proc in lsctables.ProcessTable.get_table(xmldoc))
	siminspiralmap = dict(((sim.mchirp, sim.simulation_id), (sim.geocent_end_time + 1e-9*sim.geocent_end_time_ns, sim)) for sim in lsctables.SimInspiralTable.get_table(xmldoc))
	siminspiralmchirps = sorted(siminspiralmap.keys(), key = lambda k: k[0])
124

125 126 127
	process_params_dict = {}
	for param in lsctables.ProcessParamsTable.get_table(xmldoc):
		process_params_dict.setdefault(param.process_id, []).append(param)
128

129 130
	# close original xmldoc
	xmldoc.unlink()
131

132 133 134 135
	# split injections up into X files with roughly equal injections each
	num_output = 1
	avg_inj_rate = numpy.inf
	i=0
136

137 138
	if options.single_output:
		split_keys = [siminspiralmchirps]
139

140 141 142 143 144 145 146 147 148 149 150 151
	else:
		while avg_inj_rate > target_inj_rate_interval[1]:

			num_output += 1
			split_keys = numpy.array_split(siminspiralmchirps, num_output)
			avg_dts = []
			for i, keys in enumerate(split_keys):
				sorted_times = numpy.sort([siminspiralmap[tuple(k for k in key)][0] for key in keys])
				if len(sorted_times) < 2:
					raise ValueError("files need to be split too finely to hit target injection rate, try increasing target rate or tolerance (last splitting attempt: %d files)" % num_output)
				avg_dts.append((sorted_times[1:] - sorted_times[:-1]).mean())
			avg_inj_rate = 1/numpy.mean(avg_dts)
152

153 154 155 156
			if avg_inj_rate < target_inj_rate_interval[0]:
				print avg_inj_rate, num_output
				# the average injection rate cannot increase with more splittings, thus we can't hit this injection rate interval
				raise ValueError("target injection rate not attainable, either increase the tolerance or decrease the target rate")
157 158 159


	for i, keys in enumerate(split_keys):
160 161
		create_split_injection(processmap, process_params_dict, [siminspiralmap[tuple(k for k in key)] for key in keys], keys[0][0], keys[-1][0], "%s_%04d.xml" %( options.output_tag, i) if not options.single_output else "%s.xml" % options.output_tag, options)
		mchirp_str += mchirp_injection_str(keys, "%s_%04d.xml" %( options.output_tag, i) if not options.single_output else "%s.xml" % options.output_tag)
162

163 164
with open(options.injection_str_output, 'w') as f:
	f.write('MCHIRP_INJECTIONS := %s' % mchirp_str[:-1])