Commit 6ab15793 authored by Kipp Cannon's avatar Kipp Cannon

lalapps_burca: house keeping

- add --min-instruments
- simplify --thresholds option:  now only used by string search for a single parameter, so make it be just that
- don't assign to id() built-in
- make more use of lsctables row properties
- don't forget to set coinc_event instruments column in excesspower search
- use super() consistently to chain to parent classes
- convert make_multi_burst() to staticmethod of ExcessPowerCoincTables
Original: aaba77535481ad2bb9146ac0cfbcc49cdb5e03cd
parent b6a8af4f
......@@ -63,98 +63,18 @@ __date__ = git_version.date
#
def parse_thresholdstrings(thresholdstrings):
class Thresholds(object):
"""
Turn a list of strings of the form
inst1,inst2=threshold1[,threshold2,...] into a dictionary with
(inst1, inst2) 2-tuples as keys and the values being the thresholds
parsed into lists of strings split on the "," character.
For each pair of instruments present among the input strings, the
two possible orders are considered independent: the input strings
are allowed to contain one set of thresholds for (inst1, inst2),
and a different set of thresholds for (inst2, inst1). Be aware
that no input checking is done to ensure the user has not provided
duplicate, incompatible, thresholds. This is considered the
responsibility of the application program to verify.
The output dictionary contains threshold sets for both instrument
orders. If, for some pair of instruments, the input strings
specified thresholds for only one of the two possible orders, the
thresholds for the other order are copied from the one that was
provided.
Whitespace is removed from the start and end of all strings.
A typical use for this function is in parsing command line
arguments or configuration file entries.
Example:
>>> from pylal.snglcoinc import parse_thresholds
>>> parse_thresholds(["H1,H2=X=0.1,Y=100", "H1,L1=X=.2,Y=100"])
{('H1', 'H2'): ['X=0.1', 'Y=100'], ('H1', 'L1'): ['X=.2', 'Y=100'], ('H2', 'H1'): ['X=0.1', 'Y=100'], ('L1', 'H1'): ['X=.2', 'Y=100']}
Fake dictionary that returns the same thing for all keys.
"""
thresholds = {}
for pair, delta in [s.split("=", 1) for s in thresholdstrings]:
try:
A, B = [s.strip() for s in pair.split(",")]
except Exception:
raise ValueError("cannot parse instruments '%s'" % pair)
thresholds[(A, B)] = [s.strip() for s in delta.split(",")]
for (A, B), value in thresholds.items():
if (B, A) not in thresholds:
thresholds[(B, A)] = value
return thresholds
def parse_thresholds(options):
#
# parse --thresholds options into a dictionary of instrument pairs
# and components
#
try:
thresholds = parse_thresholdstrings(options.thresholds)
except Exception as e:
raise ValueError("error parsing --thresholds: %s" % str(e))
#
# parse the components from --thresholds options
#
if options.coincidence_algorithm == "excesspower":
#
# excess power does not use adjustable thresholds, but for
# speed it helps to pre-compute the light travel time
# between the instruments involved in the analysis
#
for pair in thresholds.keys():
thresholds[pair] = snglcoinc.light_travel_time(*pair)
elif options.coincidence_algorithm == "stringcusp":
#
# parse thresholds into dt values
#
def __init__(self, val):
self.val = val
try:
thresholds = dict((instrumentpair, (float(dt),)) for instrumentpair, (dt,) in thresholds.iteritems())
except Exception as e:
raise ValueError("error parsing --thresholds: %s" % str(e))
def __setitem__(self, key, val):
self.val = val
else:
#
# unrecognized coincidence algorithm
#
raise ValueError(options.coincidence_algorithm)
#
# Done
#
return thresholds
def __getitem__(self, key):
return self.val
def parse_command_line():
......@@ -167,7 +87,8 @@ def parse_command_line():
parser.add_option("-f", "--force", action = "store_true", help = "Process document even if it has already been processed.")
parser.add_option("-a", "--coincidence-algorithm", metavar = "[excesspower|stringcusp]", default = None, help = "Select the coincidence test algorithm to use (required).")
parser.add_option("-s", "--coincidence-segments", metavar = "start:end[,start:end,...]", help = "Set the GPS segments in which to retain coincidences. Multiple segments can be specified by separating them with commas. If either start or end is absent from a segment then the interval is unbounded on that side, for example \"874000000:\" causes all coincidences starting at 874000000 to be retained. The \"time\" of a coincidence is ambiguous, and is different for different search algorithms, but a deterministic algorithm is used in all cases so the same coincidence of events will always be assigned the same time. This feature is intended to allow large input files to be analyzed; the procedure is to make n copies of the file and run n instances of burca specifying disjoint --coincidence-segments for each.")
parser.add_option("-t", "--thresholds", metavar = "inst1,inst2=[threshold,...]", action = "append", default = [], help = "Set the coincidence algorithm's thresholds for an instrument pair. For excesspower there are no thresholds. For stringcusp, each instrument pair has a single threshold setting dt. One set of thresholds must be provided for each instrument combination that will be compared, even if there are no thresholds to set.")
parser.add_option("-m", "--min-instruments", metavar = "N", type = "int", default = 2, help = "Set the minimum number of instruments required to form a coincidence (default = 2).")
parser.add_option("-t", "--threshold", metavar = "threshold", default = None, help = "Set the coincidence algorithm's threshold. For excesspower this parameter is not used. For stringcusp, this parameter sets the maximum peak time difference not including light travel time (which will be added internally).")
parser.add_option("-v", "--verbose", action = "store_true", help = "Be verbose.")
options, filenames = parser.parse_args()
......@@ -187,12 +108,17 @@ def parse_command_line():
raise ValueError("refusing to allow use of --coincidence-segments with more than one input file")
else:
options.coinc_segs = None
if options.min_instruments < 1:
raise ValueError("--min-instruments must be >= 1")
#
# parse the --thresholds arguments
#
options.thresholds = parse_thresholds(options)
if options.coincidence_algorithm == "stringcusp":
if options.threshold is None:
raise ValueError("--threshold is required for --coincidence-algorithm stringcusp")
options.threshold = Thresholds(float(options.threshold))
#
# done
......@@ -225,31 +151,28 @@ options, filenames, paramdict = parse_command_line()
if options.coinc_segs is not None:
def coinc_segs_ntuple_comparefunc(events, offset_vector, coinc_segs = options.coinc_segs):
def coinc_segs_ntuple_comparefunc(events, offset_vector, min_instruments = options.min_instruments, coinc_segs = options.coinc_segs):
# sort so we always do arithmetic in the same order
events = sorted(events, key = lambda event: event.peak)
# coinc time is SNR-weighted mean of event peak times
epoch = events[0].peak + offset_vector[events[0].ifo]
coinc_time = epoch + sum(float(event.peak + offset_vector[event.ifo] - epoch) * event.snr for event in events) / sum(event.snr for event in events)
return coinc_time not in coinc_segs
return len(events) < min_instruments or coinc_time not in coinc_segs
else:
def coinc_segs_ntuple_comparefunc(events, offset_vector, min_instruments = options.min_instruments):
return len(events) < min_instruments
if options.coincidence_algorithm == "excesspower":
EventListType = burca.ExcessPowerEventList
comparefunc = burca.ExcessPowerCoincCompare
if options.coinc_segs is not None:
ntuple_comparefunc = coinc_segs_ntuple_comparefunc
else:
ntuple_comparefunc = lambda *args: False # keep everything
ntuple_comparefunc = coinc_segs_ntuple_comparefunc
CoincTables = burca.ExcessPowerCoincTables
CoincDef = burca.ExcessPowerBBCoincDef
elif options.coincidence_algorithm == "stringcusp":
EventListType = burca.StringEventList
comparefunc = burca.StringCoincCompare
if options.coinc_segs is not None:
ntuple_comparefunc = lambda *args: coinc_segs_ntuple_comparefunc(*args) or burca.StringNTupleCoincCompare(*args)
else:
ntuple_comparefunc = burca.StringNTupleCoincCompare
ntuple_comparefunc = lambda *args: coinc_segs_ntuple_comparefunc(*args) or burca.StringNTupleCoincCompare(*args)
CoincTables = burca.StringCuspCoincTables
CoincDef = burca.StringCuspBBCoincDef
else:
......@@ -306,7 +229,7 @@ for n, filename in enumerate(filenames):
CoincTables = CoincTables,
coinc_definer_row = CoincDef,
event_comparefunc = comparefunc,
thresholds = options.thresholds,
thresholds = options.threshold,
ntuple_comparefunc = ntuple_comparefunc,
verbose = options.verbose
)
......
......@@ -110,8 +110,6 @@ match-algorithm = excesspower
comment = $(macrocomment)
program = lalapps_power
coincidence-algorithm = excesspower
; ha hah hah! I win!
thresholds = H1,H2= --thresholds H1,L1= --thresholds H2,L1=
[ligolw_sqlite]
replace =
......
......@@ -106,7 +106,7 @@ chi2par2 = -2.3
[lalapps_burca]
comment = $(macrocomment)
coincidence-algorithm = stringcusp
thresholds = H1,H2=0.008 --thresholds H1,L1=0.008 --thresholds H1,V1=0.008 --thresholds H2,L1=0.008 --thresholds H2,V1=0.008 --thresholds L1,V1=0.008
threshold = 0.008
[lalapps_binjfind]
comment = $(macrocomment)
......
......@@ -106,7 +106,7 @@ chi2par2 = -2.3
[lalapps_burca]
comment = $(macrocomment)
coincidence-algorithm = stringcusp
thresholds = H1,H2=0.008 --thresholds H1,L1=0.008 --thresholds H1,V1=0.008 --thresholds H2,L1=0.008 --thresholds H2,V1=0.008 --thresholds L1,V1=0.008
threshold = 0.008
[lalapps_binjfind]
comment = $(macrocomment)
......
......@@ -107,7 +107,7 @@ chi2par2 = -2.3
[lalapps_burca]
comment = $(macrocomment)
coincidence-algorithm = stringcusp
thresholds = H1,H2=0.008 --thresholds H1,L1=0.008 --thresholds H1,V1=0.008 --thresholds H2,L1=0.008 --thresholds H2,V1=0.008 --thresholds L1,V1=0.008
threshold = 0.008
[lalapps_binjfind]
comment = $(macrocomment)
......
......@@ -98,7 +98,7 @@ chi2par2 = -2.3
[lalapps_burca]
comment = $(macrocomment)
coincidence-algorithm = stringcusp
thresholds = H1,L1=0.008 --thresholds H1,V1=0.008 --thresholds L1,V1=0.008
threshold = 0.008
[lalapps_binjfind]
comment = $(macrocomment)
......
......@@ -76,66 +76,67 @@ class SnglBurst(lsctables.SnglBurst):
ExcessPowerBBCoincDef = lsctables.CoincDef(search = u"excesspower", search_coinc_type = 0, description = u"sngl_burst<-->sngl_burst coincidences")
def make_multi_burst(process_id, coinc_event_id, events, offset_vector):
multiburst = lsctables.MultiBurst()
multiburst.process_id = process_id
multiburst.coinc_event_id = coinc_event_id
# snr = root sum of ms_snr squares
multiburst.snr = math.sqrt(sum(event.ms_snr**2.0 for event in events))
# peak time = ms_snr squared weighted average of peak times (no
# attempt to account for inter-site delays). LIGOTimeGPS objects
# don't like being multiplied by things, so the first event's peak
# time is used as a reference epoch.
class ExcessPowerCoincTables(snglcoinc.CoincTables):
def __init__(self, xmldoc):
super(ExcessPowerCoincTables, self).__init__(xmldoc)
t = events[0].peak + offset_vector[events[0].ifo]
multiburst.peak = t + sum(event.ms_snr**2.0 * float(event.peak + offset_vector[event.ifo] - t) for event in events) / multiburst.snr**2.0
# find the multi_burst table or create one if not found
try:
self.multibursttable = lsctables.MultiBurstTable.get_table(xmldoc)
except ValueError:
self.multibursttable = lsctables.New(lsctables.MultiBurstTable, ("process_id", "duration", "central_freq", "bandwidth", "snr", "confidence", "amplitude", "coinc_event_id"))
xmldoc.childNodes[0].appendChild(self.multibursttable)
# duration = ms_snr squared weighted average of durations
def make_multi_burst(self, process_id, coinc_event_id, events, offset_vector):
multiburst = self.multibursttable.RowType(
process_id = process_id,
coinc_event_id = coinc_event_id,
# populate the false alarm rate with none.
false_alarm_rate = None
)
multiburst.duration = sum(event.ms_snr**2.0 * event.duration for event in events) / multiburst.snr**2.0
# snr = root sum of ms_snr squares
# central_freq = ms_snr squared weighted average of peak frequencies
multiburst.snr = math.sqrt(sum(event.ms_snr**2.0 for event in events))
multiburst.central_freq = sum(event.ms_snr**2.0 * event.peak_frequency for event in events) / multiburst.snr**2.0
# peak time = ms_snr squared weighted average of peak times
# (no attempt to account for inter-site delays).
# LIGOTimeGPS objects don't like being multiplied by
# things, so the first event's peak time is used as a
# reference epoch.
# bandwidth = ms_snr squared weighted average of bandwidths
t = events[0].peak + offset_vector[events[0].ifo]
multiburst.peak = t + sum(event.ms_snr**2.0 * float(event.peak + offset_vector[event.ifo] - t) for event in events) / multiburst.snr**2.0
multiburst.bandwidth = sum(event.ms_snr**2.0 * event.bandwidth for event in events) / multiburst.snr**2.0
# duration = ms_snr squared weighted average of durations
# confidence = minimum of confidences
multiburst.duration = sum(event.ms_snr**2.0 * event.duration for event in events) / multiburst.snr**2.0
multiburst.confidence = min(event.confidence for event in events)
# central_freq = ms_snr squared weighted average of peak
# frequencies
# "amplitude" = h_rss of event with highest confidence
multiburst.central_freq = sum(event.ms_snr**2.0 * event.peak_frequency for event in events) / multiburst.snr**2.0
multiburst.amplitude = max((event.ms_confidence, event.ms_hrss) for event in events)[1]
# bandwidth = ms_snr squared weighted average of bandwidths
# populate the false alarm rate with none.
multiburst.bandwidth = sum(event.ms_snr**2.0 * event.bandwidth for event in events) / multiburst.snr**2.0
multiburst.false_alarm_rate = None
# confidence = minimum of confidences
# done
multiburst.confidence = min(event.confidence for event in events)
return multiburst
# "amplitude" = h_rss of event with highest confidence
multiburst.amplitude = max(events, key = lambda event: event.ms_confidence).ms_hrss
class ExcessPowerCoincTables(snglcoinc.CoincTables):
def __init__(self, xmldoc):
snglcoinc.CoincTables.__init__(self, xmldoc)
# done
# find the multi_burst table or create one if not found
try:
self.multibursttable = lsctables.MultiBurstTable.get_table(xmldoc)
except ValueError:
self.multibursttable = lsctables.New(lsctables.MultiBurstTable, ("process_id", "duration", "central_freq", "bandwidth", "snr", "confidence", "amplitude", "coinc_event_id"))
xmldoc.childNodes[0].appendChild(self.multibursttable)
return multiburst
def coinc_rows(self, process_id, time_slide_id, coinc_def_id, events):
coinc, coincmaps = super(ExcessPowerCoincTables, self).coinc_rows(process_id, time_slide_id, coinc_def_id, events)
return coinc, coincmaps, make_multi_burst(process_id, coinc.coinc_event_id, events, self.time_slide_index[time_slide_id])
coinc.insts = (event.ifo for event in events)
return coinc, coincmaps, self.make_multi_burst(process_id, coinc.coinc_event_id, events, self.time_slide_index[time_slide_id])
def append_coinc(self, coinc, coincmaps, multiburst):
coinc = super(ExcessPowerCoincTables, self).append_coinc(coinc, coincmaps)
......@@ -155,7 +156,7 @@ StringCuspBBCoincDef = lsctables.CoincDef(search = u"StringCusp", search_coinc_t
class StringCuspCoincTables(snglcoinc.CoincTables):
def coinc_rows(self, process_id, time_slide_id, coinc_def_id, events):
coinc, coincmaps = super(StringCuspCoincTables, self).coinc_rows(process_id, time_slide_id, coinc_def_id, events)
coinc.set_instruments(event.ifo for event in events)
coinc.insts = (event.ifo for event in events)
return coinc, coincmaps
......@@ -187,13 +188,13 @@ class ExcessPowerEventList(snglcoinc.EventList):
LIGOTimeGPS.
"""
# sort by peak time
self.sort(lambda a, b: cmp(a.peak_time, b.peak_time) or cmp(a.peak_time_ns, b.peak_time_ns))
self.sort(key = lambda event: event.peak)
# for the events in this list, record the largest
# difference between an event's peak time and either its
# start or stop times
if self:
self.max_edge_peak_delta = max([max(float(event.peak - event.start), float(event.start + event.duration - event.peak)) for event in self])
self.max_edge_peak_delta = max(max(float(event.peak - event.start), float(event.start + event.duration - event.peak)) for event in self)
else:
# max() doesn't like empty lists
self.max_edge_peak_delta = 0
......@@ -244,12 +245,12 @@ class StringEventList(snglcoinc.EventList):
previously been set to compare the event's peak time to a
LIGOTimeGPS.
"""
self.sort(lambda a, b: cmp(a.peak_time, b.peak_time) or cmp(a.peak_time_ns, b.peak_time_ns))
self.sort(key = lambda event: event.peak)
def get_coincs(self, event_a, offset_a, light_travel_time, threshold, comparefunc):
min_peak = max_peak = event_a.peak + offset_a - self.offset
min_peak -= threshold[0] + light_travel_time
max_peak += threshold[0] + light_travel_time
min_peak -= threshold + light_travel_time
max_peak += threshold + light_travel_time
return [event_b for event_b in self[bisect.bisect_left(self, min_peak) : bisect.bisect_right(self, max_peak)] if not comparefunc(event_a, offset_a, event_b, self.offset, light_travel_time, threshold)]
......@@ -262,7 +263,7 @@ class StringEventList(snglcoinc.EventList):
#
def ExcessPowerCoincCompare(a, offseta, b, offsetb, light_travel_time, thresholds):
def ExcessPowerCoincCompare(a, offseta, b, offsetb, light_travel_time, ignored):
if abs(a.central_freq - b.central_freq) > (a.bandwidth + b.bandwidth) / 2:
return True
......@@ -280,20 +281,13 @@ def ExcessPowerCoincCompare(a, offseta, b, offsetb, light_travel_time, threshold
return False
def StringCoincCompare(a, offseta, b, offsetb, light_travel_time, thresholds):
def StringCoincCompare(a, offseta, b, offsetb, light_travel_time, threshold):
"""
Returns False (a & b are coincident) if the events' peak times
differ from each other by no more than dt plus the light travel
time from one instrument to the next.
"""
# unpack thresholds (it's just the \Delta t window)
dt, = thresholds
# test for time coincidence
coincident = abs(float(a.peak + offseta - b.peak - offsetb)) <= (dt + light_travel_time)
# return result
return not coincident
return abs(float(a.peak + offseta - b.peak - offsetb)) > threshold + light_travel_time
def StringNTupleCoincCompare(events, offset_vector):
......@@ -366,7 +360,7 @@ def burca(
for node, coinc in time_slide_graph.get_coincs(eventlists, event_comparefunc, thresholds, verbose = verbose):
if len(coinc) < min_instruments:
continue
ntuple = tuple(sngl_index[id] for id in coinc)
ntuple = tuple(sngl_index[event_id] for event_id in coinc)
if not ntuple_comparefunc(ntuple, node.offset_vector):
coinc_tables.append_coinc(*coinc_tables.coinc_rows(process_id, node.time_slide_id, coinc_def_id, ntuple))
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment