= two_epsilon * n:
+ dt = tuple(random_uniform(*window) for window in windows)
+ if all(abs(dt[i] - dt[j]) <= maxdt for i, j, maxdt in ijseq):
+ n += 1
+ d += 1
+ self._coincidence_rate_factors[key] *= float(n) / float(d)
+
+ # done
+ return self._coincidence_rate_factors
+
+
+ @property
+ def rates(self):
+ """
+ Dictionary mapping instrument combination (as a frozenset)
+ to mean rate in Hz at which that combination of instruments
+ can be found in a coincidence under the assumption that all
+ instruments are on and able to participate in coincidences.
+ Corrections (e.g., based on the contents of the .P_live
+ attribute) are required if that assumption does not hold.
+ Note the difference between the contents of this dictionary
+ and the rates of various kinds of coincidences. For
+ example, the rate for frozenset(("H1", "L1")) is the rate,
+ in Hz, at which that combination of instruments
+ participates in coincidences, not the rate of H1,L1
+ doubles. The return value is not cached.
+ """
+ # compute \mu_{1} * \mu_{2} ... \mu_{N} * FACTOR where
+ # FACTOR is the previously-computed proportionality
+ # constant from coincidence_rate_factors.
+
+ rates = dict(self.coincidence_rate_factors)
+ for instruments in rates:
+ for instrument in instruments:
+ rates[instruments] *= self.mu[instrument]
+
+ # rates now contains the mean rate at which each
+ # combination of instruments can be found in a coincidence
+ # during the times when at least those instruments are
+ # available to form coincidences. Note: the rate, e.g.,
+ # for the combo "H1,L1" is the sum of the rate of "H1,L1"
+ # doubles as well as the rate of "H1,L1,V1" triples and all
+ # other higher-order coincidences in which H1 and L1
+ # participate.
+
+ return rates
+
+
+ @property
+ def mean_coinc_rate(self):
+ """
+ Dictionary mapping instrument combo (as a frozenset) to the
+ mean rate at which coincidences involving precisely that
+ combination of instruments occur, averaged over times when
+ at least the minimum required number of instruments are
+ operating --- the mean rate during times when coincidences
+ are possible, not the mean rate over all time. The result
+ is not cached.
+ """
+ rates = self.rates # don't re-evalute in loop
+ coinc_rate = dict.fromkeys(rates, 0.0)
+ # iterate over probabilities in order for better numerical
+ # accuracy
+ for on_instruments, P_on_instruments in sorted(self.P_live.items(), key = lambda (ignored, P): P):
+ # short cut
+ if not P_on_instruments:
+ continue
+
+ # rates for instrument combinations that are
+ # possible given the instruments that are on
+ allowed_rates = dict((participating_instruments, rate) for participating_instruments, rate in rates.items() if participating_instruments <= on_instruments)
+
+ # subtract from each rate the rate at which that
+ # combination of instruments is found in (allowed)
+ # higher-order coincs. after this, allowed_rates
+ # maps instrument combo to rate of coincs involving
+ # exactly that combo given the instruments that are
+ # on
+ for key in sorted(allowed_rates, key = lambda x: len(x), reverse = True):
+ allowed_rates[key] -= sum(sorted(rate for otherkey, rate in allowed_rates.items() if key < otherkey))
+
+ for combo, rate in allowed_rates.items():
+ assert rate >= 0.
+ coinc_rate[combo] += P_on_instruments * rate
+ return coinc_rate
+
+
+ @property
+ def P_instrument_combo(self):
+ """
+ A dictionary mapping instrument combination (as a
+ frozenset) to the probability that a background coincidence
+ involves precisely that combination of instruments. This
+ is derived from the live times and the mean rates at which
+ the different instrument combinations participate in
+ coincidences. The result is not cached.
+ """
+ # convert rates to relative abundances
+ mean_rates = self.mean_coinc_rate # calculate once
+ total_rate = sum(sorted(mean_rates.values()))
+ P = dict((key, rate / total_rate) for key, rate in mean_rates.items())
+ # make sure normalization is good
+ total = sum(sorted(P.values()))
+ assert abs(1.0 - total) < 1e-14
+ for key in P:
+ P[key] /= total
+ return P
+
+
+ @property
+ def mean_coinc_count(self):
+ """
+ A dictionary mapping instrument combination (as a
+ frozenset) to the total number of coincidences involving
+ precisely that combination of instruments expected from the
+ background. The result is not cached.
+ """
+ T = float(abs(segmentsUtils.vote(self.segmentlists.values(), self.min_instruments)))
+ return dict((instruments, rate * T) for instruments, rate in self.mean_coinc_rate.items())
+
+
+ def instrument_combos(self):
+ """
+ Generator that yields random instrument combinations (as
+ frozensets) in relative abundances that match the expected
+ relative abundances of background coincidences given the
+ live times, mean single-instrument event rates, and
+ coincidence windows.
+
+ Example:
+
+ >>> from glue.segments import *
+ >>> eventlists = {"H1": [0, 1, 2, 3], "L1": [10, 11, 12, 13], "V1": [20, 21, 22, 23]}
+ >>> seglists = segmentlistdict({"H1": segmentlist([segment(0, 30)]), "L1": segmentlist([segment(10, 50)]), "V1": segmentlist([segment(20, 70)])})
+ >>> coinc_synth = CoincSynthesizer(eventlists, seglists, 0.001)
+ >>> combos = coinc_synth.instrument_combos()
+ >>> combos.next() # doctest: +SKIP
+ frozenset(['V1', 'L1'])
+ """
+ #
+ # retrieve sorted tuple of (probability mass, instrument
+ # combo) pairs. remove instrument combos whose probability
+ # mass is 0. if no combos remain then we can't form
+ # coincidences
+ #
+
+ P = tuple(sorted([mass, instruments] for instruments, mass in self.P_instrument_combo.items() if mass != 0))
+ if not P:
+ return
+
+ #
+ # replace the probability masses with cummulative probabilities
+ #
+
+ for i in range(1, len(P)):
+ P[i][0] += P[i - 1][0]
+
+ #
+ # normalize (should be already, just be certain)
+ #
+
+ assert abs(P[-1][0] - 1.0) < 1e-14
+ for i in range(len(P)):
+ P[i][0] /= P[-1][0]
+ assert P[-1][0] == 1.0
+
+ #
+ # generate random instrument combos
+ #
+
+ while 1: # 1 is immutable, so faster than True
+ yield P[bisect_left(P, [random.uniform(0.0, 1.0)])][1]
+
+
+ def coincs(self, timefunc, allow_zero_lag = False):
+ """
+ Generator to yield time shifted coincident event tuples
+ without the use of explicit time shift vectors. This
+ generator can only be used if the eventlists dictionary
+ with which this object was initialized contained lists of
+ event objects and not merely counts of events.
+
+ timefunc is a function for computing the "time" of an
+ event, its signature should be
+
+ t = timefunc(event)
+
+ This function will be applied to the event objects
+ contained in self.eventlists.
+
+ If allow_zero_lag is False (the default), then only event tuples
+ with no genuine zero-lag coincidences are returned, that is
+ only tuples in which no event pairs would be considered to
+ be coincident without time shifts applied. Note that
+ single-instrument "coincidences", if allowed, are *not*
+ considered to be zero-lag coincidences.
+
+ Example:
+
+ >>> from glue.segments import *
+ >>> eventlists = {"H1": [0, 1, 2, 3], "L1": [10, 11, 12, 13], "V1": [20, 21, 22, 23]}
+ >>> seglists = segmentlistdict({"H1": segmentlist([segment(0, 30)]), "L1": segmentlist([segment(10, 50)]), "V1": segmentlist([segment(20, 70)])})
+ >>> coinc_synth = CoincSynthesizer(eventlists, seglists, 0.001)
+ >>> coincs = coinc_synth.coincs((lambda x: 0), allow_zero_lag = True)
+ >>> # returns a tuple of events
+ >>> coincs.next() # doctest: +SKIP
+ """
+ for instruments in self.instrument_combos():
+ # randomly selected events from those instruments
+ instruments = tuple(instruments)
+ events = tuple(random.choice(self.eventlists[instrument]) for instrument in instruments)
+
+ # test for a genuine zero-lag coincidence among them
+ if not allow_zero_lag and any(abs(ta - tb) < self.tau[frozenset((instrumenta, instrumentb))] for (instrumenta, ta), (instrumentb, tb) in itertools.combinations(zip(instruments, (timefunc(event) for event in events)), 2)):
+ continue
+
+ # return acceptable event tuples
+ yield events
+
+
+ def plausible_toas(self, instruments):
+ """
+ Generator that yields dictionaries of random noise event
+ time-of-arrival offsets for the given instruments such that
+ the time-of-arrivals are mutually coincident given the
+ maximum allowed inter-instrument \\Delta t's. The values
+ returned are offsets, and would need to be added to some
+ common time to yield absolute arrival times.
+
+ Example:
+
+ >>> # minimal initialization for this method
+ >>> coinc_synth = CoincSynthesizer(segmentlists = {"H1": None, "L1": None, "V1": None}, delta_t = 0.005)
+ >>> toas = coinc_synth.plausible_toas(("H1", "L1", "V1"))
+ >>> toas.next() # doctest: +SKIP
+ {'V1': 0.004209420456924601, 'H1': 0.0, 'L1': -0.006071537909950742}
+ """
+ # this algorithm is documented in slideless_coinc_generator_rates()
+ instruments = tuple(instruments)
+ anchor, instruments = instruments[0], instruments[1:]
+ anchor_offset = ((anchor, 0.0),) # don't build inside loop
+ uniform = random.uniform
+ windows = tuple((instrument, -self.tau[frozenset((anchor, instrument))], +self.tau[frozenset((anchor, instrument))]) for instrument in instruments)
+ ijseq = tuple((i, j, self.tau[frozenset((instruments[i], instruments[j]))]) for (i, j) in itertools.combinations(range(len(instruments)), 2))
+ while 1:
+ dt = tuple((instrument, uniform(lo, hi)) for instrument, lo, hi in windows)
+ if all(abs(dt[i][1] - dt[j][1]) <= maxdt for i, j, maxdt in ijseq):
+ yield dict(anchor_offset + dt)
+
+
+#
+# =============================================================================
+#
+# Triangulation
+#
+# =============================================================================
+#
+
+
+class TOATriangulator(object):
+ """
+ Time-of-arrival triangulator. See section 6.6.4 of
+ "Gravitational-Wave Physics and Astronomy" by Creighton and
+ Anderson.
+
+ An instance of this class is a function-like object that accepts a
+ tuple of event arival times and returns a tuple providing
+ information derived by solving for the maximum-likelihood source
+ location assuming Gaussian-distributed timing errors.
+ """
+ def __init__(self, rs, sigmas, v = speed_of_light):
+ """
+ Create and initialize a triangulator object.
+
+ rs is a sequence of location 3-vectors, sigmas is a
+ sequence of the timing uncertainties for those locations.
+ Both sequences must be in the same order --- the first
+ sigma in the sequence is interpreted as belonging to the
+ first location 3-vector --- and, of course, they must be
+ the same length.
+
+ v is the speed at which the wave carrying the signals
+ travels. The rs 3-vectors carry units of distance, the
+ sigmas carry units of time, v carries units of
+ distance/time. What units are used for the three is
+ arbitrary, but they must be mutually consistent. The
+ default value for v in c, the speed of light, in
+ metres/second, therefore the location 3-vectors should be
+ given in metres and the sigmas should be given in seconds
+ unless a value for v is provided with different units.
+
+ Example:
+
+ >>> from numpy import array
+ >>> triangulator = TOATriangulator([
+ ... array([-2161414.92636, -3834695.17889, 4600350.22664]),
+ ... array([ -74276.0447238, -5496283.71971 , 3224257.01744 ]),
+ ... array([ 4546374.099 , 842989.697626, 4378576.96241 ])
+ ... ], [
+ ... 0.005,
+ ... 0.005,
+ ... 0.005
+ ... ])
+ ...
+ >>>
+
+ This creates a TOATriangulator instance configured for the
+ LIGO Hanford, LIGO Livingston and Virgo antennas with 5 ms
+ time-of-arrival uncertainties at each location.
+
+ Note: rs and sigmas may be iterated over multiple times.
+ """
+ assert len(rs) == len(sigmas)
+ assert len(rs) >= 2
+
+ self.rs = numpy.vstack(rs)
+ self.sigmas = numpy.array(sigmas)
+ self.v = v
+
+ # sigma^-2 -weighted mean of locations
+ rbar = sum(self.rs / self.sigmas[:,numpy.newaxis]**2) / sum(1 / self.sigmas**2)
+
+ # the ith row is r - \bar{r} for the ith location
+ self.R = self.rs - rbar
+
+ # ith row is \sigma_i^-2 (r_i - \bar{r}) / c
+ M = self.R / (self.v * self.sigmas[:,numpy.newaxis]**2)
+
+ if len(rs) >= 3:
+ self.U, self.S, self.VT = numpy.linalg.svd(M)
+
+ # if the smallest singular value is less than 10^-8 * the
+ # largest singular value, assume the network is degenerate
+ self.singular = abs(self.S.min() / self.S.max()) < 1e-8
+ else:
+ # len(rs) == 2
+ self.max_dt = numpy.dot(self.rs[1] - self.rs[0], self.rs[1] - self.rs[0])**.5 / self.v
+
+ def __call__(self, ts):
+ """
+ Triangulate the direction to the source of a signal based
+ on a tuple of times when the signal was observed. ts is a
+ sequence of signal arrival times. One arrival time must be
+ provided for each of the observation locations provided
+ when the instance was created, and the units of the arrival
+ times must be the same as the units used for the sequence
+ of sigmas.
+
+ The return value is a tuple of information derived by
+ solving for the maximum-likelihood source location assuming
+ Gaussian-distributed timing errors. The return value is
+
+ (n, toa, chi2 / DOF, dt)
+
+ where n is a unit 3-vector pointing from the co-ordinate
+ origin towards the source of the signal, toa is the
+ time-of-arrival of the signal at the co-ordinate origin,
+ chi2 / DOF is the \\chi^{2} per degree-of-freedom from to
+ the arrival time residuals, and dt is the root-sum-square
+ of the arrival time residuals.
+
+ Example:
+
+ >>> from numpy import array
+ >>> triangulator = TOATriangulator([
+ ... array([-2161414.92636, -3834695.17889, 4600350.22664]),
+ ... array([ -74276.0447238, -5496283.71971 , 3224257.01744 ]),
+ ... array([ 4546374.099 , 842989.697626, 4378576.96241 ])
+ ... ], [
+ ... 0.005,
+ ... 0.005,
+ ... 0.005
+ ... ])
+ ...
+ >>> n, toa, chi2_per_dof, dt = triangulator([
+ ... 794546669.429688,
+ ... 794546669.41333,
+ ... 794546669.431885
+ ... ])
+ ...
+ >>> n
+ array([ 0.28747132, -0.37035214, 0.88328904])
+ >>> toa
+ 794546669.40874898
+ >>> chi2_per_dof
+ 2.7407579727907181
+ >>> dt
+ 0.01433725384999875
+ """
+ assert len(ts) == len(self.sigmas)
+
+ # change of t co-ordinate to avoid LIGOTimeGPS overflow
+ t0 = min(ts)
+ ts = numpy.array([float(t - t0) for t in ts])
+
+ # sigma^-2 -weighted mean of arrival times
+ tbar = sum(ts / self.sigmas**2) / sum(1 / self.sigmas**2)
+ # the i-th element is ts - tbar for the i-th location
+ tau = ts - tbar
+
+ if len(self.rs) >= 3:
+ tau_prime = numpy.dot(self.U.T, tau)[:3]
+
+ if self.singular:
+ l = 0.0
+ np = tau_prime / self.S
+ try:
+ np[2] = math.sqrt(1.0 - np[0]**2 - np[1]**2)
+ except ValueError:
+ np[2] = 0.0
+ np /= math.sqrt(numpy.dot(np, np))
+ else:
+ def n_prime(l, Stauprime = self.S * tau_prime, S2 = self.S * self.S):
+ return Stauprime / (S2 + l)
+ def secular_equation(l):
+ np = n_prime(l)
+ return numpy.dot(np, np) - 1
+
+ # values of l that make the denominator of
+ # n'(l) 0
+ lsing = -self.S * self.S
+ # least negative of them is used as lower
+ # bound for bisection search root finder
+ # (elements of S are ordered from greatest
+ # to least, so the last element of lsing is
+ # the least negative)
+ l_lo = lsing[-1]
+
+ # find a suitable upper bound for the root
+ # finder FIXME: in Jolien's original code
+ # l_hi was hard-coded to 1 but we can't
+ # figure out why the root must be <= 1, so
+ # I put this loop to be safe but at some
+ # point it would be good to figure out if
+ # 1.0 can be used because it would allow
+ # this loop to be skipped
+ l_hi = 1.0
+ while secular_equation(l_lo) / secular_equation(l_hi) > 0:
+ l_lo, l_hi = l_hi, l_hi * 2
+
+ # solve for l
+ l = scipy.optimize.brentq(secular_equation, l_lo, l_hi)
+
+ # compute n'
+ np = n_prime(l)
+
+ # compute n from n'
+ n = numpy.dot(self.VT.T, np)
+
+ # safety check the nomalization of the result
+ assert abs(numpy.dot(n, n) - 1.0) < 1e-8
+
+ # arrival time at origin
+ toa = sum((ts - numpy.dot(self.rs, n) / self.v) / self.sigmas**2) / sum(1 / self.sigmas**2)
+
+ # chi^{2}
+ chi2 = sum(((numpy.dot(self.R, n) / self.v - tau) / self.sigmas)**2)
+
+ # root-sum-square timing residual
+ dt = ts - toa - numpy.dot(self.rs, n) / self.v
+ dt = math.sqrt(numpy.dot(dt, dt))
+ else:
+ # len(rs) == 2
+ # FIXME: fill in n and toa (is chi2 right?)
+ n = numpy.zeros((3,), dtype = "double")
+ toa = 0.0
+ dt = max(abs(ts[1] - ts[0]) - self.max_dt, 0)
+ chi2 = dt**2 / sum(self.sigmas**2)
+
+ # done
+ return n, t0 + toa, chi2 / len(self.sigmas), dt
+
+
+#
+# =============================================================================
+#
+# Coincidence Parameter Distributions
+#
+# =============================================================================
+#
+
+
+#
+# A binning for instrument combinations
+#
+# FIXME: we decided that the coherent and null stream naming convention
+# would look like
+#
+# H1H2:LSC-STRAIN_HPLUS, H1H2:LSC-STRAIN_HNULL
+#
+# and so on. i.e., the +, x and null streams from a coherent network would
+# be different channels from a single instrument whose name would be the
+# mash-up of the names of the instruments in the network. that is
+# inconsisntent with the "H1H2+", "H1H2-" shown here, so this needs to be
+# fixed but I don't know how. maybe it'll go away before it needs to be
+# fixed.
+#
+
+
+def InstrumentBins(names = ("E0", "E1", "E2", "E3", "G1", "H1", "H2", "H1H2+", "H1H2-", "L1", "V1")):
+ """
+ Example:
+
+ >>> x = InstrumentBins()
+ >>> x[frozenset(("H1", "L1"))]
+ 55
+ >>> x.centres()[55]
+ frozenset(['H1', 'L1'])
+ """
+ return rate.HashableBins(frozenset(combo) for n in range(len(names) + 1) for combo in itertools.combinations(names, n))
+
+
+#
+# Base class for parameter distribution densities for use in log likelihood
+# ratio ranking statistics
+#
+
+
+class LnLRDensity(object):
+ """
+ Base class for parameter distribution densities for use in log
+ likelihood ratio ranking statistics. Generally several instances
+ of (subclasses of) this will be grouped together to construct a log
+ likelihood ratio class for use as a ranking statistic in a
+ trigger-based search. For example, as a minimum one would expect
+ one instance for the numerator and another for the denominator, but
+ additional ones might be included in a practical ranking statistic
+ implementation, for example a third might be used for storing a
+ histogram of the candidates observed in a search.
+
+ Typically, the ranking statistic implementation will provide a
+ function to transform a candidate to a "params" object for use with
+ the .__call__() implementation, and so in this way a LnLRDensity
+ object is generally only meaningful in the context of the ranking
+ statistic class for which it has been constructed.
+ """
+ def __call__(self, params):
+ """
+ Evaluate. Return the natural logarithm of the density
+ evaluated at the given parameters.
+ """
+ raise NotImplementedError
+
+ def __iadd__(self, other):
+ """
+ Marginalize the two densities.
+ """
+ raise NotImplementedError
+
+ def increment(self, params, weight = 1.0):
+ """
+ Increment the counts defining this density by weight
+ (default = 1) at the given parameters.
+ """
+ raise NotImplementedError
+
+ def copy(self):
+ """
+ Return a duplicate copy of this object.
+ """
+ raise NotImplementedError
+
+ def finish(self):
+ """
+ Ensure all internal densities are normalized, and
+ initialize interpolator objects as needed for smooth
+ evaluation. Must be invoked before .__call__() will yield
+ sensible results.
+
+ NOTE: for some implementations this operation will
+ irreversibly alter the contents of the counts array, for
+ example often this operation will involve the convolution
+ of the counts with a density estimation kernel. If it is
+ necessary to preserve a pristine copy of the counts data,
+ use the .copy() method to obtain a copy of the data, first,
+ and then .finish() the copy.
+ """
+ raise NotImplementedError
+
+ def samples(self):
+ """
+ Generator returning a sequence of parameter values drawn
+ from the distribution density. Some subclasses might
+ choose not to implement this, and those that do might
+ choose to use an MCMC-style sample generator and so the
+ samples should not assumed to be statistically independent.
+ """
+ raise NotImplementedError
+
+ def to_xml(self, name):
+ """
+ Serialize to an XML fragment and return the root element of
+ the resulting XML tree.
+
+ Subclasses must chain to this method, then customize the
+ return value as needed.
+ """
+ return ligolw.LIGO_LW({u"Name": u"%s:lnlrdensity" % name})
+
+ @classmethod
+ def get_xml_root(cls, xml, name):
+ """
+ Sub-classes can use this in their overrides of the
+ .from_xml() method to find the root element of the XML
+ serialization.
+ """
+ name = u"%s:lnlrdensity" % name
+ xml = [elem for elem in xml.getElementsByTagName(ligolw.LIGO_LW.tagName) if elem.hasAttribute(u"Name") and elem.Name == name]
+ if len(xml) != 1:
+ raise ValueError("XML tree must contain exactly one %s element named %s" % (ligolw.LIGO_LW.tagName, name))
+ return xml[0]
+
+ @classmethod
+ def from_xml(cls, xml, name):
+ """
+ In the XML document tree rooted at xml, search for the
+ serialized LnLRDensity object named name, and deserialize
+ it. The return value is the deserialized LnLRDensity
+ object.
+ """
+ # Generally implementations should start with something
+ # like this:
+ #xml = cls.get_xml_root(xml, name)
+ #self = cls()
+ #return self
+ raise NotImplementedError
+
+
+#
+# A class for measuring parameter distributions
+#
+
+
+class CoincParamsDistributions(object):
+ """
+ A class for histograming the parameters of coincidences (or of
+ single events). It is assumed there is a fixed, pre-determined,
+ set of parameters that one wishes to histogram, and that each
+ parameter has a name. To use this, it must be sub-classed and the
+ derived class must provide dictionaries of binnings and functions
+ for performing the kernel density estimation transform to obtain
+ PDFs from histograms of counts. The binnings is a dictionary
+ mapping parameter names to rate.NDBins instances describing the
+ binning to be used for each paramter. The pdf_from_rates_func
+ dictionary maps parameter names to functions to smooth and
+ normalize bin count data into PDFs. As a special case, a default
+ function is provided and will be used for any parameters whose
+ names do not appear in the pdf_from_rates_func dictionary. The
+ default function looks for a smoothing filter in the filters
+ dictionary, applies it if found, then invokes the .to_pdf() method
+ of the binned array object. Subclasses must also provide a
+ .coinc_params() static method that will transform a list of
+ single-instrument events into a dictionary mapping paramter name to
+ parameter value.
+
+ This class maintains three sets of histograms, one set for noise
+ (or "background") events, one set for signal (or "injection")
+ events and one set for observed (or "zero lag") events. The bin
+ counts are floating point values (not integers).
+ """
+ #
+ # sub-classes may override the following
+ #
+
+ ligo_lw_name_suffix = u"pylal_snglcoinc_coincparamsdistributions"
+
+ #
+ # Default content handler for loading CoincParamsDistributions
+ # objects from XML documents
+ #
+
+ class LIGOLWContentHandler(ligolw.LIGOLWContentHandler):
+ pass
+ ligolw_array.use_in(LIGOLWContentHandler)
+ lsctables.use_in(LIGOLWContentHandler)
+ ligolw_param.use_in(LIGOLWContentHandler)
+
+ #
+ # sub-classes must override the following
+ #
+
+ binnings = {}
+
+ pdf_from_rates_func = {}
+ filters = {}
+
+ @staticmethod
+ def coinc_params(*args, **kwargs):
+ """
+ Given a sequence of single-instrument events (rows from an
+ event table) that form a coincidence, compute and return a
+ dictionary mapping parameter name to parameter values,
+ suitable for being passed to one of the .add_*() methods.
+ This function may return None.
+ """
+ raise NotImplementedError("subclass must implement .coinc_params() method")
+
+ #
+ # begin implementation
+ #
+
+ def __init__(self, process_id = None):
+ if not self.binnings:
+ raise NotImplementedError("subclass must provide dictionary of binnings")
+ self.zero_lag_rates = dict((param, rate.BinnedArray(binning)) for param, binning in self.binnings.items())
+ self.background_rates = dict((param, rate.BinnedArray(binning)) for param, binning in self.binnings.items())
+ self.injection_rates = dict((param, rate.BinnedArray(binning)) for param, binning in self.binnings.items())
+ self.zero_lag_pdf = {}
+ self.background_pdf = {}
+ self.injection_pdf = {}
+ self.zero_lag_lnpdf_interp = {}
+ self.background_lnpdf_interp = {}
+ self.injection_lnpdf_interp = {}
+ self.process_id = process_id
+
+ def _rebuild_interpolators(self, keys = None):
+ """
+ Initialize the interp dictionaries from the discretely
+ sampled PDF data. For internal use only.
+ """
+ self.zero_lag_lnpdf_interp.clear()
+ self.background_lnpdf_interp.clear()
+ self.injection_lnpdf_interp.clear()
+ # if a specific set of keys wasn't given, do them all
+ if keys is None:
+ keys = set(self.zero_lag_pdf)
+ # build interpolators for the requested keys
+ def mkinterp(binnedarray):
+ with numpy.errstate(invalid = "ignore"):
+ assert not (binnedarray.array < 0.).any()
+ binnedarray = binnedarray.copy()
+ with numpy.errstate(divide = "ignore"):
+ binnedarray.array = numpy.log(binnedarray.array)
+ return rate.InterpBinnedArray(binnedarray, fill_value = NegInf)
+ for key, binnedarray in self.zero_lag_pdf.items():
+ if key in keys:
+ self.zero_lag_lnpdf_interp[key] = mkinterp(binnedarray)
+ for key, binnedarray in self.background_pdf.items():
+ if key in keys:
+ self.background_lnpdf_interp[key] = mkinterp(binnedarray)
+ for key, binnedarray in self.injection_pdf.items():
+ if key in keys:
+ self.injection_lnpdf_interp[key] = mkinterp(binnedarray)
+
+ @staticmethod
+ def addbinnedarrays(rate_target_dict, rate_source_dict, pdf_target_dict, pdf_source_dict):
+ """
+ For internal use.
+ """
+ weight_target = {}
+ weight_source = {}
+ for name, binnedarray in rate_source_dict.items():
+ if name in rate_target_dict:
+ weight_target[name] = rate_target_dict[name].array.sum()
+ weight_source[name] = rate_source_dict[name].array.sum()
+ rate_target_dict[name] += binnedarray
+ else:
+ rate_target_dict[name] = binnedarray.copy()
+ for name, binnedarray in pdf_source_dict.items():
+ if name in pdf_target_dict:
+ binnedarray = binnedarray.copy()
+ binnedarray.array *= weight_source[name]
+ pdf_target_dict[name].array *= weight_target[name]
+ pdf_target_dict[name] += binnedarray
+ pdf_target_dict[name].array /= weight_source[name] + weight_target[name]
+ else:
+ pdf_target_dict[name] = binnedarray.copy()
+
+ def __iadd__(self, other):
+ if type(other) != type(self):
+ raise TypeError(other)
+
+ self.addbinnedarrays(self.zero_lag_rates, other.zero_lag_rates, self.zero_lag_pdf, other.zero_lag_pdf)
+ self.addbinnedarrays(self.background_rates, other.background_rates, self.background_pdf, other.background_pdf)
+ self.addbinnedarrays(self.injection_rates, other.injection_rates, self.injection_pdf, other.injection_pdf)
+
+ #
+ # rebuild interpolators
+ #
+
+ self._rebuild_interpolators()
+
+ #
+ # done
+ #
+
+ return self
+
+ def copy(self):
+ new = type(self)(process_id = self.process_id)
+ new += self
+ return new
+
+ def add_zero_lag(self, param_dict, weight = 1.0):
+ """
+ Increment a bin in one or more of the observed data (or
+ "zero lag") histograms by weight (default 1). The names of
+ the histograms to increment, and the parameters identifying
+ the bin in each histogram, are given by the param_dict
+ dictionary.
+ """
+ for param, value in param_dict.items():
+ try:
+ self.zero_lag_rates[param][value] += weight
+ except IndexError:
+ # param value out of range
+ pass
+
+ def add_background(self, param_dict, weight = 1.0):
+ """
+ Increment a bin in one or more of the noise (or
+ "background") histograms by weight (default 1). The names
+ of the histograms to increment, and the parameters
+ identifying the bin in each histogram, are given by the
+ param_dict dictionary.
+ """
+ for param, value in param_dict.items():
+ try:
+ self.background_rates[param][value] += weight
+ except IndexError:
+ # param value out of range
+ pass
+
+ def add_injection(self, param_dict, weight = 1.0):
+ """
+ Increment a bin in one or more of the signal (or
+ "injection") histograms by weight (default 1). The names
+ of the histograms to increment, and the parameters
+ identifying the bin in each histogram, are given by the
+ param_dict dictionary.
+ """
+ for param, value in param_dict.items():
+ try:
+ self.injection_rates[param][value] += weight
+ except IndexError:
+ # param value out of range
+ pass
+
+ def default_pdf_from_rates(self, key, pdf_dict):
+ """
+ For internal use by the CoincParamsDistributions class.
+ """
+ binnedarray = pdf_dict[key]
+ if key in self.filters:
+ rate.filter_array(binnedarray.array, self.filters[key])
+ binnedarray.to_pdf()
+
+ def finish(self, verbose = False):
+ """
+ Populate the discrete PDF dictionaries from the contents of
+ the rates dictionaries, and then the PDF interpolator
+ dictionaries from the discrete PDFs. The raw bin counts
+ from the rates dictionaries are copied verbatim, smoothed
+ using the dictionary of filters carried by this class
+ instance, and converted to normalized PDFs using the bin
+ volumes. Finally the dictionary of PDF interpolators is
+ populated from the discretely sampled PDF data.
+ """
+ #
+ # convert raw bin counts into normalized PDFs
+ #
+
+ self.zero_lag_pdf.clear()
+ self.background_pdf.clear()
+ self.injection_pdf.clear()
+ progressbar = ProgressBar(text = "Computing Parameter PDFs", max = len(self.zero_lag_rates) + len(self.background_rates) + len(self.injection_rates)) if verbose else None
+ for key, (msg, rates_dict, pdf_dict) in itertools.chain(
+ zip(self.zero_lag_rates, itertools.repeat(("zero lag", self.zero_lag_rates, self.zero_lag_pdf))),
+ zip(self.background_rates, itertools.repeat(("background", self.background_rates, self.background_pdf))),
+ zip(self.injection_rates, itertools.repeat(("injections", self.injection_rates, self.injection_pdf)))
+ ):
+ assert numpy.isfinite(rates_dict[key].array).all() and (rates_dict[key].array >= 0).all(), "%s %s counts are not valid" % (key, msg)
+ pdf_dict[key] = rates_dict[key].copy()
+ try:
+ pdf_from_rates_func = self.pdf_from_rates_func[key]
+ except KeyError:
+ pdf_from_rates_func = self.default_pdf_from_rates
+ if pdf_from_rates_func is not None:
+ pdf_from_rates_func(key, pdf_dict)
+ if progressbar is not None:
+ progressbar.increment()
+
+ #
+ # rebuild interpolators
+ #
+
+ self._rebuild_interpolators()
+
+ def lnP_noise(self, params):
+ """
+ From a parameter value dictionary as returned by
+ self.coinc_params(), compute and return the natural
+ logarithm of the noise probability density at that point in
+ parameter space.
+
+ The .finish() method must have been invoked before this
+ method does meaningful things. No attempt is made to
+ ensure the .finish() method has been invoked nor, if it has
+ been invoked, that no manipulations have occured that might
+ require it to be re-invoked (e.g., the contents of the
+ parameter distributions have been modified and require
+ re-normalization).
+
+ This default implementation assumes the individual PDFs
+ containined in the noise dictionary are for
+ statistically-independent random variables, and computes
+ and returns the logarithm of their product. Sub-classes
+ that require more sophisticated calculations can override
+ this method.
+ """
+ __getitem__ = self.background_lnpdf_interp.__getitem__
+ return sum(__getitem__(name)(*value) for name, value in params.items())
+
+ def lnP_signal(self, params):
+ """
+ From a parameter value dictionary as returned by
+ self.coinc_params(), compute and return the natural
+ logarithm of the signal probability density at that point
+ in parameter space.
+
+ The .finish() method must have been invoked before this
+ method does meaningful things. No attempt is made to
+ ensure the .finish() method has been invoked nor, if it has
+ been invoked, that no manipulations have occured that might
+ require it to be re-invoked (e.g., the contents of the
+ parameter distributions have been modified and require
+ re-normalization).
+
+ This default implementation assumes the individual PDFs
+ containined in the signal dictionary are for
+ statistically-independent random variables, and computes
+ and returns the logarithm of their product. Sub-classes
+ that require more sophisticated calculations can override
+ this method.
+ """
+ __getitem__ = self.injection_lnpdf_interp.__getitem__
+ return sum(__getitem__(name)(*value) for name, value in params.items())
+
+ @classmethod
+ def get_xml_root(cls, xml, name):
+ """
+ Sub-classes can use this in their overrides of the
+ .from_xml() method to find the root element of the XML
+ serialization.
+ """
+ name = u"%s:%s" % (name, cls.ligo_lw_name_suffix)
+ xml = [elem for elem in xml.getElementsByTagName(ligolw.LIGO_LW.tagName) if elem.hasAttribute(u"Name") and elem.Name == name]
+ if len(xml) != 1:
+ raise ValueError("XML tree must contain exactly one %s element named %s" % (ligolw.LIGO_LW.tagName, name))
+ return xml[0]
+
+ @classmethod
+ def from_xml(cls, xml, name, **kwargs):
+ """
+ In the XML document tree rooted at xml, search for the
+ serialized CoincParamsDistributions object named name, and
+ deserialize it. The return value is a two-element tuple.
+ The first element is the deserialized
+ CoincParamsDistributions object, the second is the process
+ ID recorded when it was written to XML.
+ """
+ # find the root element of the XML serialization
+ xml = cls.get_xml_root(xml, name)
+
+ # retrieve the process ID
+ process_id = ligolw_param.get_pyvalue(xml, u"process_id")
+
+ # create an instance
+ self = cls(process_id = process_id, **kwargs)
+
+ # reconstruct the BinnedArray objects
+ def reconstruct(xml, prefix, target_dict):
+ for name in [elem.Name.split(u":")[1] for elem in xml.childNodes if elem.Name.startswith(u"%s:" % prefix)]:
+ target_dict[str(name)] = rate.BinnedArray.from_xml(xml, u"%s:%s" % (prefix, name))
+ reconstruct(xml, u"zero_lag", self.zero_lag_rates)
+ reconstruct(xml, u"zero_lag_pdf", self.zero_lag_pdf)
+ reconstruct(xml, u"background", self.background_rates)
+ reconstruct(xml, u"background_pdf", self.background_pdf)
+ reconstruct(xml, u"injection", self.injection_rates)
+ reconstruct(xml, u"injection_pdf", self.injection_pdf)
+
+ #
+ # rebuild interpolators
+ #
+
+ self._rebuild_interpolators()
+
+ #
+ # done
+ #
+
+ return self
+
+ def to_xml(self, name):
+ """
+ Serialize this CoincParamsDistributions object to an XML
+ fragment and return the root element of the resulting XML
+ tree. The .process_id attribute of process will be
+ recorded in the serialized XML, and the object will be
+ given the name name.
+ """
+ xml = ligolw.LIGO_LW({u"Name": u"%s:%s" % (name, self.ligo_lw_name_suffix)})
+ xml.appendChild(ligolw_param.Param.from_pyvalue(u"process_id", self.process_id))
+ def store(xml, prefix, source_dict):
+ for name, binnedarray in sorted(source_dict.items()):
+ xml.appendChild(binnedarray.to_xml(u"%s:%s" % (prefix, name)))
+ store(xml, u"zero_lag", self.zero_lag_rates)
+ store(xml, u"zero_lag_pdf", self.zero_lag_pdf)
+ store(xml, u"background", self.background_rates)
+ store(xml, u"background_pdf", self.background_pdf)
+ store(xml, u"injection", self.injection_rates)
+ store(xml, u"injection_pdf", self.injection_pdf)
+
+ return xml
+
+
+#
+# Likelihood Ratio
+#
+
+
+# starting from Bayes' theorem:
+#
+# P(coinc is a g.w. | its parameters)
+# P(those parameters | coinc is g.w.) * P(coinc is g.w.)
+# = ------------------------------------------------------
+# P(parameters)
+#
+# P(those parameters | coinc is g.w.) * P(coinc is g.w.)
+# = -------------------------------------------------------------------------
+# P(noise params) * P(coinc is not g.w.) + P(inj params) * P(coinc is g.w.)
+#
+# P(inj params) * P(coinc is g.w.)
+# = ---------------------------------------------------------------------------
+# P(noise params) * [1 - P(coinc is g.w.)] + P(inj params) * P(coinc is g.w.)
+#
+# P(inj params) * P(coinc is g.w.)
+# = ----------------------------------------------------------------------
+# P(noise params) + [P(inj params) - P(noise params)] * P(coinc is g.w.)
+#
+# this last form above is used below to compute the LHS
+#
+# [P(inj params) / P(noise params)] * P(coinc is g.w.)
+# = --------------------------------------------------------------
+# 1 + [[P(inj params) / P(noise params)] - 1] * P(coinc is g.w.)
+#
+# Lambda * P(coinc is g.w.) P(inj params)
+# = ----------------------------------- where Lambda = ---------------
+# 1 + (Lambda - 1) * P(coinc is g.w.) P(noise params)
+#
+# Differentiating w.r.t. Lambda shows the derivative is always positive, so
+# thresholding on Lambda is equivalent to thresholding on P(coinc is a g.w.
+# | its parameters). The limits: Lambda=0 --> P(coinc is a g.w. | its
+# parameters)=0, Lambda=+inf --> P(coinc is a g.w. | its parameters)=1. We
+# interpret Lambda=0/0 to mean P(coinc is a g.w. | its parameters)=0 since
+# although it can't be noise it's definitely not a g.w.. We do not protect
+# against NaNs in the Lambda = +inf/+inf case.
+
+
+class LnLikelihoodRatioMixin(object):
+ """
+ Mixin class to provide the standard log likelihood ratio methods.
+ Intended to be added to the parent classes of a ranking statistic
+ class defining .numerator and .denominator attributes that are both
+ instances of (subclasses of) the LnLRDensity class. The ranking
+ statistic class will then acquire a .__call__() method allowing it
+ to be used as a log likelihood ratio function, and also a
+ .ln_lr_samples() method providing importance-weighted sampling of
+ the log likelihood ratio distribution in the signal and noise
+ (numerator and denominator) populations.
+ """
+ def __call__(self, *args, **kwargs):
+ """
+ Return the natural logarithm of the likelihood ratio for
+ the given parameters. The likelihood ratio is P(params |
+ signal) / P(params | noise). The probability that the
+ events are the result of a gravitiational wave is a
+ monotonically increasing function of the likelihood ratio,
+ so ranking events from "most like a gravitational wave" to
+ "least like a gravitational wave" can be performed by
+ calculating the (logarithm of the) likelihood ratios.
+
+ The arguments are passed verbatim to the .__call__()
+ methods of the .numerator and .denominator attributes of
+ self.
+ """
+ lnP_signal = self.numerator(*args, **kwargs)
+ lnP_noise = self.denominator(*args, **kwargs)
+ if math.isinf(lnP_noise) and math.isinf(lnP_signal):
+ # need to handle a special case
+ if lnP_noise < 0. and lnP_signal < 0.:
+ # both probabilities are 0. "correct"
+ # answer is -inf, because if a candidate is
+ # in a region of parameter space where the
+ # probability of a signal occuring is 0
+ # then it is not a signal. is it also,
+ # aparently, not noise, which is curious
+ # but irrelevant because we are seeking a
+ # result that is a monotonically increasing
+ # function of the probability that a
+ # candidate is a signal, which is
+ # impossible in this part of the parameter
+ # space.
+ return NegInf
+ # all remaining cases are handled correctly by the
+ # expression that follows, but one still deserves a
+ # warning
+ if lnP_noise > 0. and lnP_signal > 0.:
+ # both probabilities are +inf. no correct
+ # answer. NaN will be returned in thise
+ # case, and it helps to have a record in
+ # the log of why that happened.
+ warnings.warn("inf/inf encountered")
+ return lnP_signal - lnP_noise
+
+ def ln_lr_samples(self, random_params_seq, sampler_coinc_params = None, **kwargs):
+ """
+ Generator that yields an unending sequence of 3-element
+ tuples. Each tuple's elements are a value of the natural
+ logarithm of the likelihood rato, the natural logarithm of
+ the relative frequency of occurance of that likelihood
+ ratio in the signal population corrected for the relative
+ frequency at which the sampler is yielding that value, and
+ the natural logarithm of the relative frequency of
+ occurance of that likelihood ratio in the noise population
+ similarly corrected for the relative frequency at which the
+ sampler is yielding that value. The intention is for the
+ return values to be added to histograms using the given
+ probability densities as weights, i.e., the two relative
+ frequencies give the number of times one should consider
+ this one draw of log likelihood ratio to have occured in
+ the two populations.
+
+ random_params_seq is a sequence (generator is OK) yielding
+ 2-element tuples whose first element is a choice of
+ parameter values and whose second element is the natural
+ logarithm of the probability density from which the
+ parameters have been drawn evaluated at the parameters.
+
+ On each iteration, the sample of parameter values yielded
+ by random_params_seq is passed to our own .__call__()
+ method to evalute the log likelihood ratio at that choice
+ of parameter values. If sampler_coinc_params is None the
+ parameters are also passed to the .__call__() mehods of the
+ .numerator and .denominator attributes of self to obtain
+ the signal and noise population densities at those
+ parameters. If sample_coinc_params is not None then,
+ instead, the parameters are passed to the .__call__()
+ methods of its .numerator and .denominator attributes.
+
+ If histograming the results as described above, the effect
+ is to draw paramter values from the signal and noise
+ populations defined by sampler_coinc_params' PDFs but with
+ log likelihood ratios evaluted using our own PDFs.
+ """
+ if sampler_coinc_params is None:
+ lnP_signal_func = self.numerator
+ lnP_noise_func = self.denominator
+ else:
+ lnP_signal_func = sampler_coinc_params.numerator
+ lnP_noise_func = sampler_coinc_params.denominator
+ isinf = math.isinf
+ for params, lnP_params in random_params_seq:
+ lnP_signal = lnP_signal_func(params, **kwargs)
+ lnP_noise = lnP_noise_func(params, **kwargs)
+ yield self(params, **kwargs), lnP_signal - lnP_params, lnP_noise - lnP_params
diff --git a/lalburst/python/lalburst/stringutils.py b/lalburst/python/lalburst/stringutils.py
index 473617bbb9c817262a3a00548f9857805953103d..b4f8f93a4da8104d4d1b4da220b5ac813047f073 100644
--- a/lalburst/python/lalburst/stringutils.py
+++ b/lalburst/python/lalburst/stringutils.py
@@ -42,7 +42,7 @@ from glue.ligolw import lsctables
from glue.ligolw import utils as ligolw_utils
from glue.ligolw.utils import process as ligolw_process
from glue.offsetvector import offsetvector
-from pylal import snglcoinc
+from . import snglcoinc
__author__ = "Kipp Cannon