Maintenance will be performed on git.ligo.org, chat.ligo.org, containers.ligo.org, and docs.ligo.org on Tuesday 7th July 2020 starting at approximately 10am PDT and lasting for around 15 minutes. There will be a short period of downtime towards the end of the maintenance window. Please direct any comments, questions, or concerns to uwm-help@cgca.uwm.edu.

Commit 4ad66ac8 authored by Kipp Cannon's avatar Kipp Cannon

pylal, lalburst: move snglcoinc from pylal to lalburst

- refs #5493
Original: ce562cadbb749ca89e9a85c770c1f01c6eb32cad
parent 60ebc086
......@@ -37,7 +37,6 @@ from glue.ligolw import utils as ligolw_utils
from glue.ligolw.utils import process as ligolw_process
from lalburst import git_version
from lalburst import burca
from pylal import snglcoinc
@lsctables.use_in
......
......@@ -51,7 +51,7 @@ import sys
from lalburst import git_version
from pylal import snglcoinc
from lalburst import snglcoinc
from lalburst import stringutils
......
......@@ -24,6 +24,7 @@ pymodule_PYTHON = \
SimBurstUtils.py \
SnglBurstUtils.py \
snglcluster.py \
snglcoinc.py \
stringutils.py \
timeslides.py \
$(END_OF_LIST)
......
......@@ -32,7 +32,7 @@ import sys
from glue.ligolw import lsctables
from glue.ligolw.utils import coincs as ligolw_coincs
from glue.ligolw.utils import process as ligolw_process
from pylal import snglcoinc
from . import snglcoinc
__author__ = "Kipp Cannon <kipp.cannon@ligo.org>"
......
......@@ -41,7 +41,7 @@ from glue.ligolw import param as ligolw_param
from glue.ligolw import utils as ligolw_utils
from glue.ligolw.utils import process as ligolw_process
from glue.ligolw.utils import search_summary as ligolw_search_summary
from pylal import snglcoinc
from . import snglcoinc
__author__ = "Kipp Cannon <kipp.cannon@ligo.org>"
......
# Copyright (C) 2006--2017 Kipp Cannon, Drew G. Keppel, Jolien Creighton
#
# This program is free software; you can redistribute it and/or modify it
# under the terms of the GNU General Public License as published by the
# Free Software Foundation; either version 2 of the License, or (at your
# option) any later version.
#
# This program is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
# Public License for more details.
#
# You should have received a copy of the GNU General Public License along
# with this program; if not, write to the Free Software Foundation, Inc.,
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
#
# =============================================================================
#
# Preamble
#
# =============================================================================
#
"""
Generic coincidence engine for use with time-based event lists in LIGO
Light Weight XML documents.
"""
from bisect import bisect_left
try:
from fpconst import NaN, NegInf, PosInf
except ImportError:
# fpconst is not part of the standard library and might not be
# available
NaN = float("nan")
NegInf = float("-inf")
PosInf = float("+inf")
import itertools
import math
import numpy
import random
from scipy.constants import c as speed_of_light
import scipy.optimize
import sys
import warnings
import lal
from lal import rate
from glue import offsetvector
from glue import segmentsUtils
from glue.ligolw import ligolw
from glue.ligolw import array as ligolw_array
from glue.ligolw import param as ligolw_param
from glue.ligolw import lsctables
from glue.text_progress_bar import ProgressBar
__author__ = "Kipp Cannon <kipp.cannon@ligo.org>"
from git_version import date as __date__
from git_version import version as __version__
#
# =============================================================================
#
# Event List Interface
#
# =============================================================================
#
class EventList(list):
"""
A parent class for managing a list of events: applying time
offsets, and retrieving subsets of the list selected by time
interval. To be useful, this class must be subclassed with
overrides provided for certain methods. The only methods that
*must* be overridden in a subclass are the _add_offset() and
get_coincs() methods. The make_index() method can be overridden if
needed. None of the other methods inherited from the list parent
class need to be overridden, indeed they probably should not be
unless you know what you're doing.
"""
def __init__(self):
# the offset that should be added to the times of events in
# this list when comparing to the times of other events.
# used to implement time-shifted coincidence tests
self.offset = lal.LIGOTimeGPS(0)
def make_index(self):
"""
Provided to allow for search-specific look-up tables or
other indexes to be constructed for use in increasing the
speed of the get_coincs() method. This will be called
after all events have been added to the list, and again if
the list is ever modified, and before get_coincs() is ever
called.
"""
pass
def set_offset(self, offset):
"""
Set an offset on the times of all events in the list.
"""
# cast offset to LIGOTimeGPS to avoid repeated conversion
# when applying the offset to each event.
self.offset = lal.LIGOTimeGPS(offset)
def get_coincs(self, event_a, offset_a, light_travel_time, threshold):
"""
Return a sequence of the events from this list that are
coincident with event_a. The object returned by this
method must support being passed to bool() to determine if
the sequence is empty.
offset_a is the time shift to be added to the time of
event_a before comparing to the times of events in this
list. The offset attribute of this object will contain the
time shift to be added to the times of the events in this
list before comparing to event_a. That is, the times of
arrival of the events in this list should have (self.offset
- offset_a) added to them before comparing to the time of
arrival of event_a. Or, equivalently, the time of arrival
of event_a should have (offset_a - self.offset) added to it
before comparing to the times of arrival of the events in
this list. This behaviour is to support the construction
of time shifted coincidences.
Because it is frequently needed by implementations of this
method, the distance in light seconds between the two
instruments is provided as the light_travel_time parameter.
The threshold argument will be the thresholds appropriate
for "instrument_a, instrument_b", in that order, where
instrument_a is the instrument for event_a, and
instrument_b is the instrument for the events in this
EventList.
"""
raise NotImplementedError
class EventListDict(dict):
"""
A dictionary of EventList objects, indexed by instrument,
initialized from an XML trigger table and a list of process IDs
whose events should be included.
"""
def __new__(cls, *args, **kwargs):
# wrapper to shield dict.__new__() from our arguments.
return dict.__new__(cls)
def __init__(self, EventListType, events, instruments):
"""
Initialize a newly-created instance. EventListType is a
subclass of EventList (the subclass itself, not an instance
of the subclass). events is an iterable of events (e.g.,
an instance of a glue.ligolw.table.Table subclass).
instruments is an iterable of instrument names.
For each instrument in instruments, an instance of
EventListType will be created, and the event objects in
events whose .ifo attribute equals that instrument will be
.append()'ed to that list. If an event has a .ifo value
that is not equal to one of the instruments, KeyError is
raised. It is not an error for there to be no events for a
given instrument.
NOTE: the event objects in events must have a .ifo
attribute.
NOTE: both the events and instruments iterables will only
be iterated over once, so generator expressions are
acceptable.
"""
for instrument in instruments:
self[instrument] = EventListType()
self.idx = {}
for event in events:
self[event.ifo].append(event)
self.idx[id(event)] = event
for l in self.values():
l.make_index()
@property
def offsetvector(self):
"""
offsetvector of the offsets carried by the event lists.
When assigning to this property, any event list whose
instrument is not in the dictionary of offsets is not
modified, and KeyError is raised if the offset dictionary
contains an instrument that is not in this dictionary of
event lists. As a short-cut to setting all offsets to 0,
the attribute can be deleted.
"""
return offsetvector.offsetvector((instrument, eventlist.offset) for instrument, eventlist in self.items())
@offsetvector.setter
def offsetvector(self, offsetvector):
for instrument, offset in offsetvector.items():
self[instrument].set_offset(offset)
@offsetvector.deleter
def offsetvector(self):
for eventlist in self.values():
eventlist.set_offset(0)
#
# =============================================================================
#
# Double Coincidence Iterator
#
# =============================================================================
#
def light_travel_time(instrument1, instrument2):
"""
Compute and return the time required for light to travel through
free space the distance separating the two instruments. The inputs
are two instrument prefixes (e.g., "H1"), and the result is
returned in seconds. Note how this differs from LAL's
XLALLightTravelTime() function, which takes two detector objects as
input, and returns the time truncated to integer nanoseconds.
"""
dx = lal.cached_detector_by_prefix[instrument1].location - lal.cached_detector_by_prefix[instrument2].location
return math.sqrt((dx * dx).sum()) / lal.C_SI
def get_doubles(eventlists, instruments, thresholds, unused):
"""
Given an instance of an EventListDict, an iterable (e.g., a list)
of instruments, and a dictionary mapping instrument pair to
threshold data for use by the event comparison test defined by the
EventListDict, generate a sequence of tuples of Python IDs of
mutually coincident events, and populate a set (unused) of
1-element tuples of the Python IDs of the events that did not
participate in coincidences.
The thresholds dictionary should look like
{("H1", "L1"): 10.0, ("L1", "H1"): -10.0}
i.e., the keys are tuples of instrument pairs and the values
specify the "threshold data" for that instrument pair. The
threshold data itself is an arbitrary Python object. Floats are
shown in the example above, but any Python object can be provided
and will be passed to the .get_coincs() method of an EventList
object in the EventListDict. Note that it is assumed that order
matters in the selection of the threshold object function and so
the thresholds dictionary must provide a threshold for the
instruments in both orders.
Each tuple returned by this generator will contain exactly two
Python IDs, one from each of the two instruments in the instruments
sequence.
NOTE: the instruments sequence must contain exactly two
instruments.
NOTE: the "unused" parameter passed to this function must be a set
or set-like object. It will be cleared by invoking .clear(), then
populated by invoking .update(), .add(), and .remove().
NOTE: the order of the IDs in each tuple returned by this function
matches the order of the instruments sequence.
"""
# retrieve the event lists for the requested instrument combination
instruments = tuple(instruments)
assert len(instruments) == 2, "instruments must be an iterable of exactly two names, not %d" % len(instruments)
eventlista, eventlistb = eventlists[instruments[0]], eventlists[instruments[1]]
# choose the shorter of the two lists for the outer loop
if len(eventlistb) < len(eventlista):
eventlista, eventlistb = eventlistb, eventlista
instruments = instruments[1], instruments[0]
unswap = lambda a, b: (b, a)
else:
unswap = lambda a, b: (a, b)
# extract the thresholds and pre-compute the light travel time.
# need to do this after swapping the event lists (if they need to
# be swapped).
try:
threshold_data = thresholds[instruments]
except KeyError as e:
raise KeyError("no coincidence thresholds provided for instrument pair %s, %s" % e.args[0])
dt = light_travel_time(*instruments)
# populate the unused set with all IDs from list B
unused.clear()
unused.update((id(event),) for event in eventlistb)
# for each event in list A, iterate over events from the other list
# that are coincident with the event, and return the pairs. if
# nothing is coincident with it add its ID to the set of unused
# IDs, otherwise remove the IDs of the things that are coincident
# with it from the set.
offset_a = eventlista.offset
for eventa in eventlista:
eventa_id = id(eventa)
matches = eventlistb.get_coincs(eventa, offset_a, dt, threshold_data)
if matches:
for eventb in matches:
eventb_id = id(eventb)
unused.discard((eventb_id,))
yield unswap(eventa_id, eventb_id)
else:
unused.add((eventa_id,))
# done
#
# =============================================================================
#
# Time Slide Graph
#
# =============================================================================
#
class TimeSlideGraphNode(object):
def __init__(self, offset_vector, time_slide_id = None, keep_unused = True):
self.time_slide_id = time_slide_id
self.offset_vector = offset_vector
self.deltas = frozenset(offset_vector.deltas.items())
self.components = None
self.coincs = None
self.unused_coincs = set()
self.keep_unused = keep_unused
@property
def name(self):
return self.offset_vector.__str__(compact = True)
def get_coincs(self, eventlists, thresholds, verbose = False):
#
# has this node already been visited? if so, return the
# answer we already know
#
if self.coincs is not None:
if verbose:
print >>sys.stderr, "\treusing %s" % str(self.offset_vector)
return self.coincs
#
# is this a leaf node? construct the coincs explicitly
#
if self.components is None:
if verbose:
print >>sys.stderr, "\tconstructing %s ..." % str(self.offset_vector)
#
# sanity check input
#
assert len(self.offset_vector) == 2, "broken graph: node with no components has %d-component offset vector, must be 2" % len(self.offset_vector)
assert set(self.offset_vector) <= set(eventlists), "no event list for instrument(s) %s" % ", ".join(sorted(set(self.offset_vector) - set(eventlists)))
#
# apply offsets to events
#
eventlists.offsetvector = self.offset_vector
#
# search for and record coincidences. coincs is a
# sorted tuple of event ID pairs, where each pair
# of IDs is, itself, ordered alphabetically by
# instrument name. note that the event order in
# each tuple returned by get_doubles() is set by
# the order of the instrument names passed to it,
# which we make be alphabetical
#
self.coincs = tuple(sorted(get_doubles(eventlists, sorted(self.offset_vector), thresholds, self.unused_coincs)))
if not self.keep_unused:
self.unused_coincs.clear()
return self.coincs
#
# is this a head node, or some other node that magically
# has only one component? copy coincs from component
#
if len(self.components) == 1:
if verbose:
print >>sys.stderr, "\tcopying from %s ..." % str(self.components[0].offset_vector)
self.coincs = self.components[0].get_coincs(eventlists, thresholds, verbose = verbose)
if self.keep_unused:
# don't copy reference, always do in-place
# manipulation of our own .unused_coincs so
# that calling codes that might hold a
# reference to it get the correct result
self.unused_coincs.update(self.components[0].unused_coincs)
#
# done. unlink the graph as we go to release
# memory
#
self.components = None
return self.coincs
#
# len(self.components) == 2 is impossible
#
assert len(self.components) > 2
#
# this is a regular node in the graph. use coincidence
# synthesis algorithm to populate its coincs
#
self.coincs = []
# all coincs with n-1 instruments from the component time
# slides are potentially unused. they all go in, we'll
# remove things from this set as we use them
# NOTE: this function call is the recursion into the
# components to ensure they are initialized, it must be
# executed before any of what follows, and in particular it
# must be done whether or not we'll be keeping the
# collection of return values
for component in self.components:
self.unused_coincs.update(component.get_coincs(eventlists, thresholds, verbose = verbose))
# of the (< n-1)-instrument coincs that were not used in
# forming the (n-1)-instrument coincs, any that remained
# unused after forming two compontents cannot have been
# used by any other components, they definitely won't be
# used to construct our n-instrument coincs, and so they go
# into our unused pile
if self.keep_unused:
for componenta, componentb in itertools.combinations(self.components, 2):
self.unused_coincs |= componenta.unused_coincs & componentb.unused_coincs
else:
self.unused_coincs.clear()
if verbose:
print >>sys.stderr, "\tassembling %s ..." % str(self.offset_vector)
# magic: we can form all n-instrument coincs by knowing
# just three sets of the (n-1)-instrument coincs no matter
# what n is (n > 2). note that we pass verbose=False
# because we've already called the .get_coincs() methods
# above, these are no-ops to retrieve the answers again
allcoincs0 = self.components[0].get_coincs(eventlists, thresholds, verbose = False)
allcoincs1 = self.components[1].get_coincs(eventlists, thresholds, verbose = False)
allcoincs2 = self.components[-1].get_coincs(eventlists, thresholds, verbose = False)
# for each coinc in list 0
for coinc0 in allcoincs0:
# find all the coincs in list 1 whose first (n-2)
# event IDs are the same as the first (n-2) event
# IDs in coinc0. note that they are guaranteed to
# be arranged together in the list of coincs and
# can be identified with two bisection searches
# note: cannot use bisect_right() because we're
# only comparing against the first (n-2) of (n-1)
# things in each tuple, we need to use bisect_left
# after incrementing the last of the (n-2) things
# by one to obtain the correct range of indexes
coincs1 = allcoincs1[bisect_left(allcoincs1, coinc0[:-1]):bisect_left(allcoincs1, coinc0[:-2] + (coinc0[-2] + 1,))]
# find all the coincs in list 2 whose first (n-2)
# event IDs are the same as the last (n-2) event
# IDs in coinc0. note that they are guaranteed to
# be arranged together in the list and can be
# identified with two bisection searches
coincs2 = allcoincs2[bisect_left(allcoincs2, coinc0[1:]):bisect_left(allcoincs2, coinc0[1:-1] + (coinc0[-1] + 1,))]
# for each coinc extracted from list 1 above search
# for a coinc extracted from list 2 above whose
# first (n-2) event IDs are the last (n-2) event
# IDs in coinc 0 and whose last event ID is the
# last event ID in coinc 1. when found, the first
# ID from coinc 0 prepended to the (n-1) coinc IDs
# from coinc 2 forms an n-instrument coinc. how
# this works is as follows: coinc 0 and coinc 1,
# both (n-1)-instrument coincs, together identify a
# unique potential n-instrument coinc. coinc 2's
# role is to confirm the coincidence by showing
# that the event from the instrument in coinc 1
# that isn't found in coinc 0 is coincident with
# all the other events that are in coinc 1. if the
# coincidence holds then that combination of event
# IDs must be found in the coincs2 list, because we
# assume the coincs2 list is complete the
# bisection search above to extract the coincs2
# list could be skipped, but by starting with a
# shorter list the bisection searches inside the
# following loop are faster.
for coinc1 in coincs1:
confirmation = coinc0[1:] + coinc1[-1:]
i = bisect_left(coincs2, confirmation)
if i < len(coincs2) and coincs2[i] == confirmation:
new_coinc = coinc0[:1] + confirmation
# break the new coinc into
# (n-1)-instrument components and
# remove them from the unused list
# because we just used them, then
# record the coinc and move on
self.unused_coincs.difference_update(itertools.combinations(new_coinc, len(new_coinc) - 1))
self.coincs.append(new_coinc)
# sort the coincs we just constructed by the component
# event IDs and convert to a tuple for speed
self.coincs.sort()
self.coincs = tuple(self.coincs)
#
# done. we won't be back here again so unlink the graph as
# we go to release memory
#
self.components = None
return self.coincs
class TimeSlideGraph(object):
def __init__(self, offset_vector_dict, min_instruments = 2, verbose = False):
#
# safety check input
#
if min_instruments < 1:
raise ValueError("min_instruments must be >= 1: %d" % min_instruments)
offset_vector = min(offset_vector_dict.values(), key = lambda x: len(x))
if len(offset_vector) < 2:
raise ValueError("encountered offset vector with fewer than 2 instruments: %s", str(offset_vector))
if len(offset_vector) < min_instruments:
# this test is part of the logic that ensures we
# will only extract coincs that meet the
# min_instruments criterion
raise ValueError("encountered offset vector smaller than min_instruments (%d): %s", (min_instruments, str(offset_vector)))
#
# plays no role in the coincidence engine. used for early
# memory clean-up, to remove coincs from intermediate data
# products that the calling code will not retain. also as
# a convenience for the calling code, implementing the
# ubiquitous "minimum instruments" cut here in the
# .get_coincs() method
#
self.min_instruments = min_instruments
#
# populate the graph head nodes. these represent the
# target offset vectors requested by the calling code.
#
if verbose:
print >>sys.stderr, "constructing coincidence assembly graph for %d target offset vectors ..." % len(offset_vector_dict)
self.head = tuple(TimeSlideGraphNode(offset_vector, time_slide_id = time_slide_id, keep_unused = len(offset_vector) > min_instruments) for time_slide_id, offset_vector in sorted(offset_vector_dict.items()))
#
# populate the graph generations. generations[n] is a
# tuple of the nodes in the graph representing all unique
# n-instrument offset vectors to be constructed as part of
# the analysis (including normalized forms of any
# n-instrument target offset vectors).
#
self.generations = {}
n = max(len(offset_vector) for offset_vector in offset_vector_dict.values())
self.generations[n] = tuple(TimeSlideGraphNode(offset_vector, keep_unused = len(offset_vector) > min_instruments) for offset_vector in offsetvector.component_offsetvectors((node.offset_vector for node in self.head if len(node.offset_vector) == n), n))
for n in range(n, 2, -1): # [n, n-1, ..., 3]
#
# collect all offset vectors of length n that we
# need to be able to construct
#
offset_vectors = [node.offset_vector for node in self.head if len(node.offset_vector) == n] + [node.offset_vector for node in self.generations[n]]
#
# determine the smallest set of offset vectors of
# length n-1 required to construct the length-n
# offset vectors, build a graph node for each of
# the vectors of length n-1, and record the nodes
# as the n-1'st generation
#
self.generations[n - 1] = tuple(TimeSlideGraphNode(offset_vector, keep_unused = len(offset_vector) > min_instruments) for offset_vector in offsetvector.component_offsetvectors(offset_vectors, n - 1))
#
# link each n-instrument node to the n-1 instrument nodes
# from which it will be constructed. NOTE: the components
# are sorted according to the alphabetically-sorted tuples
# of instrument names involved in each component; this is
# a critical part of the coincidence synthesis algorithm
#
for node in self.head:
#
# the offset vector of a head node should be found
# directly in its generation, and it must be unique
# or there's a bug above. despite this, we still
# go to the trouble of sorting to make it easy to
# keep this code in sync with the code for other
# graph nodes below, but the assert makes sure the
# result contains just one entry
#
node.components = tuple(sorted((component for component in self.generations[len(node.offset_vector)] if node.deltas == component.deltas), key = lambda x: sorted(x.offset_vector)))
assert len(node.components) == 1
for n, nodes in self.generations.items():
assert n >= 2 # failure indicates bug in code that constructed generations
if n == 2:
# leaf nodes have no components
continue
for node in nodes:
component_deltas = set(frozenset(offset_vector.deltas.items()) for offset_vector in offsetvector.component_offsetvectors([node.offset_vector], n - 1))
node.components = tuple(sorted((component for component in self.generations[n - 1] if component.deltas in component_deltas), key = lambda x: sorted(x.offset_vector)))
#
# done
#
if verbose:
print >>sys.stderr, "graph contains:"
for n in sorted(self.generations):
print >>sys.stderr,"\t%d %d-insrument offset vectors (%s)" % (len(self.generations[n]), n, ("to be constructed directly" if n == 2 else "to be constructed indirectly"))
print >>sys.stderr, "\t%d offset vectors total" % sum(len(self.generations[n]) for n in self.generations)
def get_coincs(self, eventlists, thresholds, verbose = False):
if verbose:
print >>sys.stderr, "constructing coincs for target offset vectors ..."
# don't do attribute look-ups in the loop
sngl_index = eventlists.idx
min_instruments = self.min_instruments
for n, node in enumerate(self.head, start = 1):
if verbose:
print >>sys.stderr, "%d/%d: %s" % (n, len(self.head), str(node.offset_vector))
# the contents of .unused_coincs must be iterated
# over after the call to .get_coincs() because the
# former is populated as a side effect of the
# latter, but .unused_coincs is populated in-place
# so the following is not sensitive to the order of
# evaluation of arguments (the .unused_coincs
# attribute look-up can occur before or after
# .get_coincs() is called as long as its contents
# are not iterated over until after).
for coinc in itertools.chain(node.get_coincs(eventlists, thresholds, verbose), node.unused_coincs):
# we don't need to check that coinc
# contains at least min_instruments events
# because those that don't meet the