Skip to content
Snippets Groups Projects
Commit 5cdc0e0e authored by Cody Messick's avatar Cody Messick
Browse files

gstlal_inspiral_pipe: Change --injections so that it takes a chirp mass

interval in addition to the injection filename, so that said injections are
only searched for by jobs which have templates within the specified chirpmass
interval
parent 35832090
No related branches found
No related tags found
No related merge requests found
......@@ -106,15 +106,18 @@ def sim_tag_from_inj_file(injections):
return None
return injections.replace('.xml', '').replace('.gz', '').replace('-','_')
def get_max_length(bank_cache, verbose = False):
def get_bank_params(bank_cache, verbose = False):
max_time = 0
template_mchirp_dict = {}
cache = Cache.fromfilenames([bank_cache.values()[0]])
for f in cache.pfnlist():
xmldoc = ligolw_utils.load_filename(f, verbose = verbose, contenthandler = LIGOLWContentHandler)
max_time = max(max_time, max(lsctables.SnglInspiralTable.get_table(xmldoc).getColumnByName('template_duration')))
snglinspiraltable = lsctables.SnglInspiralTable.get_table(xmldoc)
max_time = max(max_time, max(snglinspiraltable.getColumnByName('template_duration')))
template_mchirp_dict[f] = [min(snglinspiraltable.getColumnByName('mchirp')), max(snglinspiraltable.getColumnByName('mchirp'))]
xmldoc.unlink()
return max_time
return max_time, template_mchirp_dict
def chunks(l, n):
for i in xrange(0, len(l), n):
......@@ -223,22 +226,28 @@ def psd_node_gen(refPSDJob, dag, parent_nodes, segsdict, channel_dict, options):
)
return psd_nodes
def svd_node_gen(svdJob, dag, parent_nodes, psd, bank_groups, options, seg):
def svd_node_gen(svdJob, dag, parent_nodes, psd, bank_groups, options, seg, template_mchirp_dict):
svd_nodes = {}
output_path = subdir_path([svdJob.output_path, str(int(seg[0]))[:5]])
new_template_mchirp_dict = {}
for i, bank_group in enumerate(bank_groups):
for ifo, files in bank_group.items():
# First sort out the clipleft, clipright options
clipleft = []
clipright = []
ids = []
mchirp_interval = (numpy.inf, 0)
for n, f in enumerate(files):
# handle template bank clipping
clipleft.append(options.overlap / 2)
clipright.append(options.overlap / 2)
ids.append("%d_%d" % (i, n))
if f in template_mchirp_dict:
mchirp_interval = (min(mchirp_interval[0], template_mchirp_dict[f][0]), max(mchirp_interval[1], template_mchirp_dict[f][1]))
svd_bank_name = inspiral_pipe.T050017_filename(ifo, '%04d_SVD' % (i,), int(seg[0]), int(seg[1]), '.xml.gz', path = output_path)
if '%04d' %i not in new_template_mchirp_dict and mchirp_interval != (numpy.inf, 0):
new_template_mchirp_dict['%04d' % i] = mchirp_interval
svd_nodes.setdefault(ifo, []).append(
inspiral_pipe.generic_node(svdJob, dag,
......@@ -260,7 +269,7 @@ def svd_node_gen(svdJob, dag, parent_nodes, psd, bank_groups, options, seg):
output_files = {"write-svd":svd_bank_name}
)
)
return svd_nodes
return svd_nodes, new_template_mchirp_dict
def create_svd_bank_strings(svd_nodes, instruments = None):
# FIXME assume that the number of svd nodes is the same per ifo, a good assumption though
......@@ -288,7 +297,7 @@ def svd_bank_cache_maker(svd_bank_strings, counter, injection = False):
return [svd_cache_entry.url for svd_cache_entry in svd_cache_entries]
def inspiral_node_gen(gstlalInspiralJob, gstlalInspiralInjJob, dag, svd_nodes, segsdict, options, channel_dict):
def inspiral_node_gen(gstlalInspiralJob, gstlalInspiralInjJob, dag, svd_nodes, segsdict, options, channel_dict, template_mchirp_dict):
inspiral_nodes = {}
for ifos in segsdict:
......@@ -296,6 +305,8 @@ def inspiral_node_gen(gstlalInspiralJob, gstlalInspiralInjJob, dag, svd_nodes, s
# setup dictionaries to hold the inspiral nodes
inspiral_nodes[(ifos, None)] = {}
for injections in options.injections:
if injections is not None:
injections = injections.split(':')[-1]
inspiral_nodes[(ifos, sim_tag_from_inj_file(injections))] = {}
# FIXME choose better splitting?
......@@ -361,12 +372,38 @@ def inspiral_node_gen(gstlalInspiralJob, gstlalInspiralInjJob, dag, svd_nodes, s
inspiral_nodes[(ifos, None)].setdefault(seg, []).append(noninjnode)
for chunk_counter, svd_bank_strings in enumerate(chunks(svd_bank_strings_full, numchunks)):
mchirp_boundaries = [template_mchirp_dict['%04d' % (i + numchunks * chunk_counter)] for i, s in enumerate(svd_bank_strings)]
# process injections
for injections in options.injections:
min_chirp_mass, max_chirp_mass, injections = injections.split(':')
min_chirp_mass, max_chirp_mass = float(min_chirp_mass), float(max_chirp_mass)
#start = 0
ignore = []
#end = len(svd_bank_strings)
for i, bounds in enumerate(mchirp_boundaries):
if max_chirp_mass <= bounds[0]:
ignore.extend(range(i,len(svd_bank_strings)))
#end = min(end, i)
# NOTE putting a break here assumes that the min chirp mass
# in a subbank increases with bin number, i.e. XXXX+1 has a
# greater minimum chirpmass than XXXX, for all XXXX. Note
# that the reverse is not true, bin XXXX+1 may have a lower
# max chirpmass than bin XXXX.
break
elif min_chirp_mass > bounds[1]:
ignore.append(i)
#start = max(start, i+1)
# setup output names
# setup output names
sim_name = sim_tag_from_inj_file(injections)
if ignore == range(len(svd_bank_strings)):
# All of the subbanks represented by svd_bank_strings contain
# chirp masses below or above the interval we care about
inspiral_nodes[(ifos, sim_name)].setdefault(seg, []).append(None)
continue
output_paths = [subdir_path([output_seg_inj_path, '%04d' % (i + numchunks * chunk_counter,)]) for i, s in enumerate(svd_bank_strings)]
output_names = [inspiral_pipe.T050017_filename(ifos, '%04d_LLOID_%s' % (i + numchunks * chunk_counter, sim_name), int(seg[0]), int(seg[1]), '.xml.gz', path = output_paths[i]) for i, s in enumerate(svd_bank_strings)]
dist_stat_names = [inspiral_pipe.T050017_filename(ifos, '%04d_DIST_STATS_%s' % (i + numchunks * chunk_counter, sim_name), int(seg[0]), int(seg[1]), '.xml.gz', path = output_paths[i]) for i, s in enumerate(svd_bank_strings)]
......@@ -398,7 +435,8 @@ def inspiral_node_gen(gstlalInspiralJob, gstlalInspiralInjJob, dag, svd_nodes, s
"veto-segments-file":options.vetoes,
"injections": injections
},
input_cache_files = {"svd-bank-cache":svd_bank_cache_maker(svd_bank_strings, chunk_counter, injection = True)},
input_cache_files = {"svd-bank-cache":svd_names},
input_cache_file_name = inspiral_pipe.group_T050017_filename_from_T050017_files([CacheEntry.from_T050017(filename) for filename in svd_names], '.cache', path = gstlalInspiralInjJob.tag_base).replace('SVD', 'SVD_%s' % sim_name),
output_cache_files = {
"output-cache":output_names,
"likelihood-file-cache":dist_stat_names
......@@ -419,6 +457,8 @@ def adapt_gstlal_inspiral_output(inspiral_nodes, options, segsdict):
# first get the previous output in a usable form
lloid_output = {}
for inj in options.injections + [None]:
if inj is not None:
inj = inj.split(':')[-1]
lloid_output[sim_tag_from_inj_file(inj)] = {}
lloid_diststats = {}
for ifos in segsdict:
......@@ -431,15 +471,22 @@ def adapt_gstlal_inspiral_output(inspiral_nodes, options, segsdict):
lloid_output[None].setdefault((j,i), []).append((f, [node]))
for i,f in enumerate(node.output_files["likelihood-file-cache"]):
lloid_diststats.setdefault((j,i) ,[]).append(f)
for inj in options.injections:
# NOTE This assumes that injection jobs
# and non injection jobs are 1:1 in
# terms of the mass space they cover,
# e.g., that the chunks ar the same!
injnode = inspiral_nodes[(ifos, sim_tag_from_inj_file(inj))][seg][j]
for inj in options.injections:
inj = inj.split(':')[-1]
#print inspiral_nodes.items()
for j, injnode in enumerate(inspiral_nodes[(ifos, sim_tag_from_inj_file(inj))][seg]):
if injnode is None:
continue
#injnode = inspiral_nodes[(ifos, sim_tag_from_inj_file(inj))][seg][j]
for i,f in enumerate(injnode.output_files["output-cache"]):
# Store the output files and the node and injnode for use as a parent dependencies
lloid_output[sim_tag_from_inj_file(inj)].setdefault((j,i), []).append((f, [node, injnode]))
lloid_output[sim_tag_from_inj_file(inj)].setdefault((j,i), []).append((f, lloid_output[None][(j,i)][-1][1]+[injnode]))
# Pad injection output with Nones to match shape of non-injection output
for key in lloid_output[None].keys():
for inj in options.injections:
inj = inj.split(':')[-1]
lloid_output[sim_tag_from_inj_file(inj)].setdefault(key, None)
return lloid_output, lloid_diststats
......@@ -497,7 +544,10 @@ def rank_and_merge(dag, createPriorDistStatsJob, calcRankPDFsJob, calcLikelihood
# then injections
for inj in options.injections:
inj = inj.split(':')[-1]
for n, (outputs, diststats) in enumerate((lloid_output[sim_tag_from_inj_file(inj)][key], lloid_diststats[key]) for key in sorted(lloid_output[None].keys())):
if outputs is None:
continue
inputs = [o[0] for o in outputs]
parents = []
[parents.extend(o[1]) for o in outputs]
......@@ -626,6 +676,7 @@ def finalize_runs(dag, lalappsRunSqliteJob, toXMLJob, ligolwInspinjFindJob, toSq
outnodes = [noninjsqlitenode]
for injections, far_injections in zip(options.injections, options.far_injections):
injections = injections.split(':')[-1]
# extract only the nodes that were used for injections
thisinjnodes = innodes[sim_tag_from_inj_file(injections)]
......@@ -778,7 +829,7 @@ def parse_command_line():
# Override the datasource injection option
parser.remove_option("--injections")
parser.add_option("--injections", action = "append", help = "append injection files to analyze")
parser.add_option("--injections", action = "append", help = "append injection files to analyze. Must prepend filename with X:Y:, where X and Y are floats, e.g. 1.2:3.1:filename, so that the injections are only searched for in regions of the template bank with X <= chirp mass < Y.")
# Condor commands
parser.add_option("--request-cpu", default = "8", metavar = "integer", help = "set the inspiral CPU count, default = 4")
......@@ -895,11 +946,16 @@ rmJob = inspiral_pipe.generic_job("rm", tag_base = "rm_intermediate_merger_produ
create_cache_directories(svdJob, gstlalInspiralJob, gstlalInspiralInjJob, calcRankPDFsJob, calcLikelihoodJob, calcLikelihoodJobInj, toSqliteJob, marginalizeJob, options)
#
# Get mchirp boundaries of banks and maximum duration of templates
#
max_time, template_mchirp_dict = get_bank_params(bank_cache)
#
# Get the analysis segments
#
segsdict = analysis_segments(set(bank_cache.keys()), detectors.frame_segments, boundary_seg, get_max_length(bank_cache), options.min_instruments)
segsdict = analysis_segments(set(bank_cache.keys()), detectors.frame_segments, boundary_seg, max_time, options.min_instruments)
if options.reference_psd is None:
......@@ -959,6 +1015,7 @@ else:
injsnrnodes = []
if not options.disable_calc_inj_snr:
for inj in options.injections:
inj = inj.split(':')
injsnrnodes.append(inspiral_pipe.generic_node(gstlalInjSnrJob, dag, parent_nodes=ref_psd_parent_nodes,
opts = {"flow":options.flow},
input_files = {"injection-file":inj, "reference-psd-cache":"reference_psd.cache"}
......@@ -968,14 +1025,14 @@ if not options.disable_calc_inj_snr:
# Compute SVD banks
#
svd_nodes = svd_node_gen(svdJob, dag, ref_psd_parent_nodes, ref_psd, inspiral_pipe.build_bank_groups(bank_cache, options.num_banks), options, boundary_seg)
svd_nodes, template_mchirp_dict = svd_node_gen(svdJob, dag, ref_psd_parent_nodes, ref_psd, inspiral_pipe.build_bank_groups(bank_cache, options.num_banks), options, boundary_seg, template_mchirp_dict)
#
# Inspiral jobs by segment
#
inspiral_nodes = inspiral_node_gen(gstlalInspiralJob, gstlalInspiralInjJob, dag, svd_nodes, segsdict, options, channel_dict)
inspiral_nodes = inspiral_node_gen(gstlalInspiralJob, gstlalInspiralInjJob, dag, svd_nodes, segsdict, options, channel_dict, template_mchirp_dict)
#
# Adapt the output of the gstlal_inspiral jobs to be suitable for the remainder of this analysis
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment