gstlal_inspiral_pipe: Change --injections so that it takes a chirp mass

interval in addition to the injection filename, so that said injections are only searched for by jobs which have templates within the specified chirpmass interval

gstlal_inspiral_pipe: Change --injections so that it takes a chirp mass
5cdc0e0e · Cody Messick · 35832090 · 5cdc0e0e
Commit 5cdc0e0e authored 8 years ago by Cody Messick
--- a/gstlal-inspiral/bin/gstlal_inspiral_pipe
+++ b/gstlal-inspiral/bin/gstlal_inspiral_pipe
@@ -106,15 +106,18 @@ def sim_tag_from_inj_file(injections):
 		return None
 	return injections.replace('.xml', '').replace('.gz', '').replace('-','_')

-def get_max_length(bank_cache, verbose = False):
+def get_bank_params(bank_cache, verbose = False):
 	max_time = 0
+	template_mchirp_dict = {}
 	cache = Cache.fromfilenames([bank_cache.values()[0]])
 	for f in cache.pfnlist():
 		xmldoc = ligolw_utils.load_filename(f, verbose = verbose, contenthandler = LIGOLWContentHandler)
-		max_time = max(max_time, max(lsctables.SnglInspiralTable.get_table(xmldoc).getColumnByName('template_duration')))
+		snglinspiraltable = lsctables.SnglInspiralTable.get_table(xmldoc)
+		max_time = max(max_time, max(snglinspiraltable.getColumnByName('template_duration')))
+		template_mchirp_dict[f] = [min(snglinspiraltable.getColumnByName('mchirp')), max(snglinspiraltable.getColumnByName('mchirp'))]
 		xmldoc.unlink()

-	return max_time
+	return max_time, template_mchirp_dict

 def chunks(l, n):
 	for i in xrange(0, len(l), n):
@@ -223,22 +226,28 @@ def psd_node_gen(refPSDJob, dag, parent_nodes, segsdict, channel_dict, options):
 				)
 	return psd_nodes

-def svd_node_gen(svdJob, dag, parent_nodes, psd, bank_groups, options, seg):
+def svd_node_gen(svdJob, dag, parent_nodes, psd, bank_groups, options, seg, template_mchirp_dict):
 	svd_nodes = {}
 	output_path = subdir_path([svdJob.output_path, str(int(seg[0]))[:5]])
+	new_template_mchirp_dict = {}
 	for i, bank_group in enumerate(bank_groups):
 		for ifo, files in bank_group.items():
 			# First sort out the clipleft, clipright options
 			clipleft = []
 			clipright = []
 			ids = []
+			mchirp_interval = (numpy.inf, 0)
 			for n, f in enumerate(files):
 				# handle template bank clipping
 				clipleft.append(options.overlap / 2)
 				clipright.append(options.overlap / 2)
 				ids.append("%d_%d" % (i, n))
+				if f in template_mchirp_dict:
+					mchirp_interval = (min(mchirp_interval[0], template_mchirp_dict[f][0]), max(mchirp_interval[1], template_mchirp_dict[f][1]))

 			svd_bank_name = inspiral_pipe.T050017_filename(ifo, '%04d_SVD' % (i,), int(seg[0]), int(seg[1]), '.xml.gz', path = output_path)
+			if '%04d' %i not in new_template_mchirp_dict and mchirp_interval != (numpy.inf, 0):
+				new_template_mchirp_dict['%04d' % i] = mchirp_interval

 			svd_nodes.setdefault(ifo, []).append(
 				inspiral_pipe.generic_node(svdJob, dag,
@@ -260,7 +269,7 @@ def svd_node_gen(svdJob, dag, parent_nodes, psd, bank_groups, options, seg):
 				output_files = {"write-svd":svd_bank_name}
 				)
 			)
-	return svd_nodes
+	return svd_nodes, new_template_mchirp_dict

 def create_svd_bank_strings(svd_nodes, instruments = None):
 	# FIXME assume that the number of svd nodes is the same per ifo, a good assumption though
@@ -288,7 +297,7 @@ def svd_bank_cache_maker(svd_bank_strings, counter, injection = False):

 	return [svd_cache_entry.url for svd_cache_entry in svd_cache_entries] 

-def inspiral_node_gen(gstlalInspiralJob, gstlalInspiralInjJob, dag, svd_nodes, segsdict, options, channel_dict):
+def inspiral_node_gen(gstlalInspiralJob, gstlalInspiralInjJob, dag, svd_nodes, segsdict, options, channel_dict, template_mchirp_dict):

 	inspiral_nodes = {}
 	for ifos in segsdict:
@@ -296,6 +305,8 @@ def inspiral_node_gen(gstlalInspiralJob, gstlalInspiralInjJob, dag, svd_nodes, s
 		# setup dictionaries to hold the inspiral nodes
 		inspiral_nodes[(ifos, None)] = {}	
 		for injections in options.injections:
+			if injections is not None:
+				injections = injections.split(':')[-1]
 			inspiral_nodes[(ifos, sim_tag_from_inj_file(injections))] = {}
 		
 		# FIXME choose better splitting?
@@ -361,12 +372,38 @@ def inspiral_node_gen(gstlalInspiralJob, gstlalInspiralInjJob, dag, svd_nodes, s
 				inspiral_nodes[(ifos, None)].setdefault(seg, []).append(noninjnode)

 			for chunk_counter, svd_bank_strings in enumerate(chunks(svd_bank_strings_full, numchunks)):
+				mchirp_boundaries = [template_mchirp_dict['%04d' % (i + numchunks * chunk_counter)] for i, s in enumerate(svd_bank_strings)]

 				# process injections
 				for injections in options.injections:
+					min_chirp_mass, max_chirp_mass, injections = injections.split(':')
+					min_chirp_mass, max_chirp_mass = float(min_chirp_mass), float(max_chirp_mass)
+					#start = 0
+					ignore = []
+					#end = len(svd_bank_strings)
+					for i, bounds in enumerate(mchirp_boundaries):
+						if max_chirp_mass <= bounds[0]:
+							ignore.extend(range(i,len(svd_bank_strings)))
+							#end = min(end, i)
+							# NOTE putting a break here assumes that the min chirp mass
+							# in a subbank increases with bin number, i.e. XXXX+1 has a
+							# greater minimum chirpmass than XXXX, for all XXXX. Note
+							# that the reverse is not true, bin XXXX+1 may have a lower
+							# max chirpmass than bin XXXX.
+							break
+						elif min_chirp_mass > bounds[1]:
+							ignore.append(i)
+							#start = max(start, i+1)
 				
-					# setup output names	
+					# setup output names
 					sim_name = sim_tag_from_inj_file(injections)
+
+					if ignore == range(len(svd_bank_strings)):
+						# All of the subbanks represented by svd_bank_strings contain
+						# chirp masses below or above the interval we care about
+						inspiral_nodes[(ifos, sim_name)].setdefault(seg, []).append(None)
+						continue
+
 					output_paths = [subdir_path([output_seg_inj_path, '%04d' % (i + numchunks * chunk_counter,)]) for i, s in enumerate(svd_bank_strings)]
 					output_names = [inspiral_pipe.T050017_filename(ifos, '%04d_LLOID_%s' % (i + numchunks * chunk_counter, sim_name), int(seg[0]), int(seg[1]), '.xml.gz', path = output_paths[i]) for i, s in enumerate(svd_bank_strings)]
 					dist_stat_names = [inspiral_pipe.T050017_filename(ifos, '%04d_DIST_STATS_%s' % (i + numchunks * chunk_counter, sim_name), int(seg[0]), int(seg[1]), '.xml.gz', path = output_paths[i]) for i, s in enumerate(svd_bank_strings)]
@@ -398,7 +435,8 @@ def inspiral_node_gen(gstlalInspiralJob, gstlalInspiralInjJob, dag, svd_nodes, s
 									"veto-segments-file":options.vetoes,
 									"injections": injections
 								},
-							input_cache_files = {"svd-bank-cache":svd_bank_cache_maker(svd_bank_strings, chunk_counter, injection = True)},
+							input_cache_files = {"svd-bank-cache":svd_names},
+							input_cache_file_name = inspiral_pipe.group_T050017_filename_from_T050017_files([CacheEntry.from_T050017(filename) for filename in svd_names], '.cache', path = gstlalInspiralInjJob.tag_base).replace('SVD', 'SVD_%s' % sim_name),
 							output_cache_files = {
 									"output-cache":output_names,
 									"likelihood-file-cache":dist_stat_names
@@ -419,6 +457,8 @@ def adapt_gstlal_inspiral_output(inspiral_nodes, options, segsdict):
 	# first get the previous output in a usable form
 	lloid_output = {}
 	for inj in options.injections + [None]:
+		if inj is not None:
+			inj = inj.split(':')[-1]
 		lloid_output[sim_tag_from_inj_file(inj)] = {}
 	lloid_diststats = {}
 	for ifos in segsdict:
@@ -431,15 +471,22 @@ def adapt_gstlal_inspiral_output(inspiral_nodes, options, segsdict):
 					lloid_output[None].setdefault((j,i), []).append((f, [node]))
 				for i,f in enumerate(node.output_files["likelihood-file-cache"]):
 					lloid_diststats.setdefault((j,i) ,[]).append(f)
-				for inj in options.injections:
-					# NOTE This assumes that injection jobs
-					# and non injection jobs are 1:1 in
-					# terms of the mass space they cover,
-					# e.g., that the chunks ar the same!
-					injnode = inspiral_nodes[(ifos, sim_tag_from_inj_file(inj))][seg][j]
+			for inj in options.injections:
+				inj = inj.split(':')[-1]
+				#print inspiral_nodes.items()
+				for j, injnode in enumerate(inspiral_nodes[(ifos, sim_tag_from_inj_file(inj))][seg]):
+					if injnode is None:
+						continue
+					#injnode = inspiral_nodes[(ifos, sim_tag_from_inj_file(inj))][seg][j]
 					for i,f in enumerate(injnode.output_files["output-cache"]):
 						# Store the output files and the node and injnode for use as a parent dependencies
-						lloid_output[sim_tag_from_inj_file(inj)].setdefault((j,i), []).append((f, [node, injnode]))
+						lloid_output[sim_tag_from_inj_file(inj)].setdefault((j,i), []).append((f, lloid_output[None][(j,i)][-1][1]+[injnode]))
+
+	# Pad injection output with Nones to match shape of non-injection output
+	for key in lloid_output[None].keys():
+		for inj in options.injections:
+			inj = inj.split(':')[-1]
+			lloid_output[sim_tag_from_inj_file(inj)].setdefault(key, None)

 	return lloid_output, lloid_diststats

@@ -497,7 +544,10 @@ def rank_and_merge(dag, createPriorDistStatsJob, calcRankPDFsJob, calcLikelihood

 	# then injections
 	for inj in options.injections:
+		inj = inj.split(':')[-1]
 		for n, (outputs, diststats) in enumerate((lloid_output[sim_tag_from_inj_file(inj)][key], lloid_diststats[key]) for key in sorted(lloid_output[None].keys())):
+			if outputs is None:
+				continue
 			inputs = [o[0] for o in outputs]
 			parents = []
 			[parents.extend(o[1]) for o in outputs]
@@ -626,6 +676,7 @@ def finalize_runs(dag, lalappsRunSqliteJob, toXMLJob, ligolwInspinjFindJob, toSq
 	outnodes = [noninjsqlitenode]

 	for injections, far_injections in zip(options.injections, options.far_injections):
+		injections = injections.split(':')[-1]

 		# extract only the nodes that were used for injections
 		thisinjnodes = innodes[sim_tag_from_inj_file(injections)]
@@ -778,7 +829,7 @@ def parse_command_line():

 	# Override the datasource injection option
 	parser.remove_option("--injections")
-	parser.add_option("--injections", action = "append", help = "append injection files to analyze")
+	parser.add_option("--injections", action = "append", help = "append injection files to analyze. Must prepend filename with X:Y:, where X and Y are floats, e.g. 1.2:3.1:filename, so that the injections are only searched for in regions of the template bank with X <= chirp mass < Y.")

 	# Condor commands
 	parser.add_option("--request-cpu", default = "8", metavar = "integer", help = "set the inspiral CPU count, default = 4")
@@ -895,11 +946,16 @@ rmJob = inspiral_pipe.generic_job("rm", tag_base = "rm_intermediate_merger_produ

 create_cache_directories(svdJob, gstlalInspiralJob, gstlalInspiralInjJob, calcRankPDFsJob, calcLikelihoodJob, calcLikelihoodJobInj, toSqliteJob, marginalizeJob, options)

+#
+# Get mchirp boundaries of banks and maximum duration of templates
+#
+
+max_time, template_mchirp_dict = get_bank_params(bank_cache)
 #
 # Get the analysis segments
 #

-segsdict = analysis_segments(set(bank_cache.keys()), detectors.frame_segments, boundary_seg, get_max_length(bank_cache), options.min_instruments)
+segsdict = analysis_segments(set(bank_cache.keys()), detectors.frame_segments, boundary_seg, max_time, options.min_instruments)

 if options.reference_psd is None:

@@ -959,6 +1015,7 @@ else:
 injsnrnodes = []
 if not options.disable_calc_inj_snr:
 	for inj in options.injections:
+		inj = inj.split(':')
 		injsnrnodes.append(inspiral_pipe.generic_node(gstlalInjSnrJob, dag, parent_nodes=ref_psd_parent_nodes,
 			opts = {"flow":options.flow},
 			input_files = {"injection-file":inj, "reference-psd-cache":"reference_psd.cache"}
@@ -968,14 +1025,14 @@ if not options.disable_calc_inj_snr:
 # Compute SVD banks
 #

-svd_nodes = svd_node_gen(svdJob, dag, ref_psd_parent_nodes, ref_psd, inspiral_pipe.build_bank_groups(bank_cache, options.num_banks), options, boundary_seg)
+svd_nodes, template_mchirp_dict = svd_node_gen(svdJob, dag, ref_psd_parent_nodes, ref_psd, inspiral_pipe.build_bank_groups(bank_cache, options.num_banks), options, boundary_seg, template_mchirp_dict)


 #	
 # Inspiral jobs by segment
 #

-inspiral_nodes = inspiral_node_gen(gstlalInspiralJob, gstlalInspiralInjJob, dag, svd_nodes, segsdict, options, channel_dict)
+inspiral_nodes = inspiral_node_gen(gstlalInspiralJob, gstlalInspiralInjJob, dag, svd_nodes, segsdict, options, channel_dict, template_mchirp_dict)

 #
 # Adapt the output of the gstlal_inspiral jobs to be suitable for the remainder of this analysis