Commit 15d1532e authored by Ka-Lok Lo's avatar Ka-Lok Lo 💬

gstlal_inspiral, gstlal_inspiral_pipe, gstlal_ll_inspiral_pipe,

inspiral_pipe.py: allow for linearly varying ht-gate-threshold
based on max mchirp for a given bin
parent 3271ffa6
......@@ -132,7 +132,7 @@
# + `--time-slide-file` [filename]: Set the name of the xml file to get time slide offsets (required).
# + `--control-peak-time` [time] (int): Set a time window in seconds to find peaks in the control signal.
# + `--fir-stride` [time] (int): Set the length of the fir filter stride in seconds, default = 8.
# + `--ht-gate-threshold` [threshold] (float): Set the threshold on whitened h(t) to mark samples as gaps (glitch removal), default = infinity.
# + `--ht-gate-threshold` [threshold] (float): Set the threshold on whitened h(t) to mark samples as gaps (glitch removal), default = infinity. Should be given per bank file.
# + `--chisq-type" [type]: Choose the type of chisq computation to perform. Must be one of (autochisq|timeslicechisq). The default is autochisq.
# + `--coincidence-threshold` [value] (float): Set the coincidence window in seconds (default = 0.005). The light-travel time between instruments will be added automatically in the coincidence test.
# + `--min-instruments` [count] (int): Set the minimum number of instruments that must contribute triggers to form a candidate (default = 2).
......@@ -280,6 +280,7 @@ def parse_command_line():
parser.add_option("--control-peak-time", metavar = "time", type = "int", help = "Set a time window in seconds to find peaks in the control signal")
parser.add_option("--fir-stride", metavar = "time", type = "int", default = 8, help = "Set the length of the fir filter stride in seconds. default = 8")
parser.add_option("--ht-gate-threshold", metavar = "threshold", type = "float", default = float("inf"), help = "Set the threshold on whitened h(t) to mark samples as gaps (glitch removal)")
parser.add_option("--ht-gate-threshold", metavar = "threshold", type = "float", action = "append", default = [], help = "Set the threshold on whitened h(t) to mark samples as gaps (glitch removal)")
parser.add_option("--chisq-type", metavar = "type", default = "autochisq", help = "Choose the type of chisq computation to perform. Must be one of (autochisq|timeslicechisq). The default is autochisq.")
parser.add_option("--coincidence-threshold", metavar = "value", type = "float", default = 0.005, help = "Set the coincidence window in seconds (default = 0.005). The light-travel time between instruments will be added automatically in the coincidence test.")
parser.add_option("--min-instruments", metavar = "count", type = "int", default = 2, help = "Set the minimum number of instruments that must contribute triggers to form a candidate (default = 2).")
......@@ -432,6 +433,13 @@ def parse_command_line():
else:
likelihood_url_namedtuples_list = [namedtuple('likelihood_url_namedtuple',('likelihood_url','reference_likelihood_url'))(likelihood_file, None) for likelihood_file in options.likelihood_file]
# Checking options.ht_gate_threshold
# If no gate threshold is given, use threshold of infinity
if options.ht_gate_threshold == []:
options.ht_gate_threshold = [float("inf")]*len(svd_banks)
if not (len(options.ht_gate_threshold) == len(svd_banks)):
raise ValueError("must have equal numbers of svd banks (%d) and ht-gate-threshold values (%d)" % (len(svd_banks), len(options.ht_gate_threshold)))
if not (len(svd_banks) == len(options.output) == len(likelihood_url_namedtuples_list)):
raise ValueError("must have equal numbers of svd banks (%d), output files (%d) and likelihood files (%d)" % (len(svd_banks), len(options.output), len(likelihood_url_namedtuples_list)))
......@@ -587,7 +595,7 @@ else:
#
for output_file_number, (svd_bank_url_dict, output_url, likelihood_url_namedtuple, zero_lag_ranking_stats_file) in enumerate(zip(svd_banks, options.output, likelihood_url_namedtuples_list, options.zero_lag_ranking_stat_file)):
for output_file_number, (svd_bank_url_dict, output_url, likelihood_url_namedtuple, zero_lag_ranking_stats_file, ht_gate_threshold) in enumerate(zip(svd_banks, options.output, likelihood_url_namedtuples_list, options.zero_lag_ranking_stat_file, options.ht_gate_threshold)):
#
# Checkpointing only supported for gzip files in offline analysis
# FIXME Is there a better way to check if a file is corrupted? Implement a means by which to check for sqlite file validity
......@@ -687,7 +695,7 @@ for output_file_number, (svd_bank_url_dict, output_url, likelihood_url_namedtupl
banks = banks,
psd = psd,
psd_fft_length = options.psd_fft_length,
ht_gate_threshold = options.ht_gate_threshold,
ht_gate_threshold = ht_gate_threshold,
veto_segments = veto_segments,
verbose = options.verbose,
nxydump_segment = options.nxydump_segment,
......
......@@ -51,6 +51,7 @@
# + '--num-banks' [str] : The number of parallel subbanks per gstlal_inspiral job. can be given as a list like 1,2,3,4 then it will split up the bank cache into N groups with M banks each.
# + '--max-inspiral-jobs' [int] : Set the maximum number of gstlal_inspiral jobs to run simultaneously, default no constraint.
# + '--ht-gate-threshold' [float] : Set a threshold on whitened h(t) to veto glitches
# + '--ht-gate-threshold-linear' [string] : Set threshold on whitened h(t) on a linear scale based on the maximum mchirp of the svd bin. set as mchirp_min:ht_gate_threshold_min-mchirp_max:ht_gate_threshold_max (example: --ht-gate-threshold-linear 0.8:12.0-45.0:100.0)
# + '--inspiral-executable' [str] : Options gstlal_inspiral | gstlal_iir_inspiral, default gstlal_inspiral
# + '--blind-injections' [filename] : Set the name of an injection file that will be added to the data without saving the sim_inspiral table or otherwise processing the data differently. Has the effect of having hidden signals in the input data. Separate injection runs using the --injections option will still occur.
# + '--singles-threshold' [float] : Record all (including non-coincident) single detector triggers above the specified SNR threshold (by default = 8).
......@@ -336,11 +337,23 @@ def inspiral_node_gen(gstlalInspiralJob, gstlalInspiralInjJob, dag, svd_nodes, s
for bgbin in bgbin_indices:
bgbin_chunk_map.setdefault(bgbin, chunk_counter)
# Calculate the appropriate ht-gate-threshold values according to the scale given
threshold_values = None
if options.ht_gate_threshold_linear is not None:
# A scale is given
mchirp_min, ht_gate_threshold_min, mchirp_max, ht_gate_threshold_max = [float(y) for x in options.ht_gate_threshold_linear.split("-") for y in x.split(":")]
# use max mchirp in a given svd bank to decide gate threshold
bank_mchirps = [template_mchirp_dict['%04d' % (i + numchunks * chunk_counter,)][1] for i, s in enumerate(svd_bank_strings)]
threshold_values = [(ht_gate_threshold_max - ht_gate_threshold_min)/(mchirp_max - mchirp_min)*(bank_mchirp - mchirp_min) + ht_gate_threshold_min for bank_mchirp in bank_mchirps]
else:
if options.ht_gate_threshold is not None:
threshold_values = [options.ht_gate_threshold]*len(svd_bank_strings) # Use the ht-gate-threshold value given
# FIXME do better with svd node parents?
# non injection node
noninjnode = inspiral_pipe.generic_node(gstlalInspiralJob, dag, parent_nodes = sum(svd_nodes.values(),[]),
opts = {"psd-fft-length":options.psd_fft_length,
"ht-gate-threshold":options.ht_gate_threshold,
"ht-gate-threshold":threshold_values,
"frame-segments-name":options.frame_segments_name,
"gps-start-time":int(seg[0]),
"gps-end-time":int(seg[1]),
......@@ -399,10 +412,22 @@ def inspiral_node_gen(gstlalInspiralJob, gstlalInspiralInjJob, dag, svd_nodes, s
reference_psd = psd_nodes[(ifos, seg)]
parents = []
# Calculate the appropriate ht-gate-threshold values according to the scale given
threshold_values = None
if options.ht_gate_threshold_linear is not None:
# A scale is given
mchirp_min, ht_gate_threshold_min, mchirp_max, ht_gate_threshold_max = [float(y) for x in options.ht_gate_threshold_linear.split("-") for y in x.split(":")]
# use max mchirp in a given svd bank to decide gate threshold
bank_mchirps = [template_mchirp_dict['%04d' % (i + numchunks * chunk_counter,)][1] for i, s in enumerate(svd_bank_strings)]
threshold_values = [(ht_gate_threshold_max - ht_gate_threshold_min)/(mchirp_max - mchirp_min)*(bank_mchirp - mchirp_min) + ht_gate_threshold_min for bank_mchirp in bank_mchirps]
else:
if options.ht_gate_threshold is not None:
threshold_values = [options.ht_gate_threshold]*len(svd_bank_strings) # Use the ht-gate-threshold value given
# setup injection node
injnode = inspiral_pipe.generic_node(gstlalInspiralInjJob, dag, parent_nodes = parents,
opts = {"psd-fft-length":options.psd_fft_length,
"ht-gate-threshold":options.ht_gate_threshold,
"ht-gate-threshold":threshold_values,
"frame-segments-name":options.frame_segments_name,
"gps-start-time":int(seg[0]),
"gps-end-time":int(seg[1]),
......@@ -838,6 +863,7 @@ def parse_command_line():
parser.add_option("--num-banks", metavar = "str", help = "The number of parallel subbanks per gstlal_inspiral job. can be given as a list like 1,2,3,4 then it will split up the bank cache into N groups with M banks each.")
parser.add_option("--max-inspiral-jobs", type="int", metavar = "jobs", help = "Set the maximum number of gstlal_inspiral jobs to run simultaneously, default no constraint.")
parser.add_option("--ht-gate-threshold", type="float", help="set a threshold on whitened h(t) to veto glitches")
parser.add_option("--ht-gate-threshold-linear", metavar = "mchirp_min:ht_gate_threshold_min-mchirp_max:ht_gate_threshold_max", type="string", help = "Set the threshold on whitened h(t) to mark samples as gaps (glitch removal) with a linear scale of mchirp")
parser.add_option("--inspiral-executable", default = "gstlal_inspiral", help = "Options gstlal_inspiral | gstlal_iir_inspiral, default gstlal_inspiral")
parser.add_option("--blind-injections", metavar = "filename", help = "Set the name of an injection file that will be added to the data without saving the sim_inspiral table or otherwise processing the data differently. Has the effect of having hidden signals in the input data. Separate injection runs using the --injections option will still occur.")
# FIXME far-injections currently doesnt work, either fix it or delete it
......
......@@ -97,6 +97,7 @@ lsctables.use_in(ligolwcontenthandler)
# "--inj-framexmit-addr", metavar = "name", default=[], action = "append", help = "Set the framexmit address to process for the injection stream (required if --inj-channel-name set). IFO=ADDR:port can be given multiple times.")
# "--inj-framexmit-iface", metavar = "name", default "10.14.0.1", action = "append", help = "Set the interface address to process for injections (required if --inj-channel-name set). default 10.14.0.1")
# "--ht-gate-threshold", metavar = "float", help = "Set the h(t) gate threshold to reject glitches", type="float")
# "--ht-gate-threshold-linear", metavar = "string", help = "Set the scale for h(t) gate threshold to reject glitches", type="string". set as mchirp_min:ht_gate_threshold_min-mchirp_max:ht_gate_threshold_max (example: --ht-gate-threshold-linear 0.8:12.0-45.0:100.0)
# "--do-iir-pipeline", action = "store_true", help = "run the iir pipeline instead of lloid")
# "--max-jobs", metavar = "num", type = "int", help = "stop parsing the cache after reaching a certain number of jobs to limit what is submitted to the HTCondor pool")
# "--likelihood-cache", help = "set the cache containin likelihood files")
......@@ -203,6 +204,7 @@ def parse_command_line():
parser.add_option("--inj-framexmit-addr", metavar = "name", default=[], action = "append", help = "Set the framexmit address to process for the injection stream (required if --inj-channel-name set). IFO=ADDR:port can be given multiple times.")
parser.add_option("--inj-framexmit-iface", metavar = "name", action = "append", help = "Set the interface address to process for injections (required if --inj-channel-name set).")
parser.add_option("--ht-gate-threshold", metavar = "float", help = "Set the h(t) gate threshold to reject glitches", type="float")
parser.add_option("--ht-gate-threshold-linear", metavar = "mchirp_min:ht_gate_threshold_min-mchirp_max:ht_gate_threshold_max", type="string", help = "Set the threshold on whitened h(t) to mark samples as gaps (glitch removal) with a linear scale of mchirp")
parser.add_option("--do-iir-pipeline", action = "store_true", help = "run the iir pipeline instead of lloid")
parser.add_option("--max-jobs", metavar = "num", type = "int", help = "stop parsing the cache after reaching a certain number of jobs to limit what is submitted to the HTCondor pool")
parser.add_option("--likelihood-cache", help = "set the cache containin likelihood files")
......@@ -364,10 +366,23 @@ for num_insp_nodes, (svd_banks, likefile, zerolikefile) in enumerate(zip(bank_gr
svd_bank_string = ",".join([":".join([k, v[0]]) for k,v in svd_banks.items()])
jobTags.append("%04d" % num_insp_nodes)
# Calculate the appropriate ht-gate-threshold value
threshold_values = None
if options.ht_gate_threshold_linear is not None:
template_mchirp_dict = inspiral_pipe.get_svd_bank_params_online(bank_cache.values()[0])
# Linear scale specified
mchirp_min, ht_gate_threshold_min, mchirp_max, ht_gate_threshold_max = [float(y) for x in options.ht_gate_threshold_linear.split("-") for y in x.split(":")]
# use max mchirp in a given svd bank to decide gate threshold
bank_mchirps = [template_mchirp_dict["%04d" % int(os.path.basename(svd_file).split("-")[1].split("_")[3])][1] for svd_file in svd_banks.items()[0][1]]
threshold_values = [(ht_gate_threshold_max - ht_gate_threshold_min)/(mchirp_max - mchirp_min)*(bank_mchirp - mchirp_min) + ht_gate_threshold_min for bank_mchirp in bank_mchirps]
else:
if options.ht_gate_threshold is not None:
threshold_values = [options.ht_gate_threshold] * len(svd_banks.items()[0][1]) # Use the ht-gate-threshold value given
inspNode = inspiral_pipe.generic_node(gstlalInspiralJob, dag, [],
opts = {"psd-fft-length":options.psd_fft_length,
"reference-psd":options.reference_psd,
"ht-gate-threshold":options.ht_gate_threshold,
"ht-gate-threshold":threshold_values,
"channel-name":datasource.pipeline_channel_list_from_channel_dict(channel_dict),
"dq-channel-name":datasource.pipeline_channel_list_from_channel_dict(dq_channel_dict, opt = "dq-channel-name"),
"state-vector-on-bits":options.state_vector_on_bits,
......@@ -410,7 +425,7 @@ for num_insp_nodes, (svd_banks, likefile, zerolikefile) in enumerate(zip(bank_gr
inspInjNode = inspiral_pipe.generic_node(gstlalInspiralInjJob, dag, [],
opts = {"psd-fft-length":options.psd_fft_length,
"reference-psd":options.reference_psd,
"ht-gate-threshold":options.ht_gate_threshold,
"ht-gate-threshold":threshold_values,
"channel-name":datasource.pipeline_channel_list_from_channel_dict_with_node_range(inj_channel_dict, node = jobTags[-1]),
"dq-channel-name":datasource.pipeline_channel_list_from_channel_dict(inj_dq_channel_dict, opt = "dq-channel-name"),
"state-vector-on-bits":options.inj_state_vector_on_bits,
......
......@@ -446,6 +446,20 @@ def group_T050017_filename_from_T050017_files(cache_entries, extension, path = N
print >>sys.stderr, "ERROR: first and last file of cache file do not match known pattern, cannot name group file under T050017 convention. \nFile 1: %s\nFile 2: %s" % (cache_entries[0].path, cache_entries[-1].path)
raise ValueError
def get_svd_bank_params_online(svd_bank_cache):
template_mchirp_dict = {}
for ce in [lal.CacheEntry(f) for f in open(svd_bank_cache)]:
if not template_mchirp_dict.setdefault("%04d" % int(ce.description.split("_")[3]), []):
min_mchirp, max_mchirp = float("inf"), 0
xmldoc = utils.load_url(ce.path, contenthandler = svd_bank.DefaultContentHandler)
for root in (elem for elem in xmldoc.getElementsByTagName(ligolw.LIGO_LW.tagName) if elem.hasAttribute(u"Name") and elem.Name == "gstlal_svd_bank_Bank"):
snglinspiraltable = lsctables.SnglInspiralTable.get_table(root)
mchirp_column = snglinspiraltable.getColumnByName("mchirp")
min_mchirp, max_mchirp = min(min_mchirp, min(mchirp_column)), max(max_mchirp, max(mchirp_column))
template_mchirp_dict["%04d" % int(ce.description.split("_")[3])] = (min_mchirp, max_mchirp)
xmldoc.unlink()
return template_mchirp_dict
def get_svd_bank_params(svd_bank_cache, online = False):
if not online:
bgbin_file_map = {}
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment