diff --git a/gstlal-inspiral/bin/gstlal_inspiral_create_prior_diststats b/gstlal-inspiral/bin/gstlal_inspiral_create_prior_diststats index 8dd19eddc68dbe577a8757ee6ecbfece73498150..9077a7cdd524bd8bd9b6a1afa03d697e368b7cd3 100755 --- a/gstlal-inspiral/bin/gstlal_inspiral_create_prior_diststats +++ b/gstlal-inspiral/bin/gstlal_inspiral_create_prior_diststats @@ -60,6 +60,7 @@ from pylal import rate from gstlal import far from gstlal import reference_psd +from gstlal.stats import horizonhistory __author__ = "Chad Hanna <chad.hanna@ligo.org>" @@ -179,7 +180,7 @@ for horizon_distances in options.horizon_distances: # coincparamsdistributions.get_snr_joint_pdf(instruments, horizon_distances, verbose = options.verbose) # Force the SNR pdf to be generated to be at the actual horizon distance values not the quantized ones - key = frozenset(instruments), frozenset(coincparamsdistributions.quantize_horizon_distances(horizon_distances).items()) + key = frozenset(instruments), frozenset(horizonhistory.quantize_horizon_distances(horizon_distances).items()) if options.verbose: print >>sys.stderr, "For horizon distances %s" % ", ".join("%s = %.4g Mpc" % item for item in sorted(horizon_distances.items())) progressbar = ProgressBar(text = "%s SNR PDF" % ", ".join(sorted(key[0]))) diff --git a/gstlal-inspiral/bin/gstlal_ll_inspiral_create_prior_diststats b/gstlal-inspiral/bin/gstlal_ll_inspiral_create_prior_diststats index b56ae82530e7351f8d776795a4625cb3383f8a2d..0e46c266177936b1e4edd3c6dc16c31c66e8e1ab 100755 --- a/gstlal-inspiral/bin/gstlal_ll_inspiral_create_prior_diststats +++ b/gstlal-inspiral/bin/gstlal_ll_inspiral_create_prior_diststats @@ -44,6 +44,7 @@ import sys import shutil from gstlal import far +from gstlal.stats import horizonhistory from gstlal import inspiral_pipe from glue.ligolw import utils as ligolw_utils from glue.ligolw.utils import process as ligolw_process @@ -82,8 +83,8 @@ def parse_command_line(): for x in options.segment_and_horizon: ifo, start, stop, horizon = x.split(":") seglistdict.setdefault(ifo, segments.segmentlist()).append(segments.segment([LIGOTimeGPS(float(start)), LIGOTimeGPS(float(stop))])) - horizon_history.setdefault(ifo, far.NearestLeafTree())[LIGOTimeGPS(float(start))] = float(horizon) - horizon_history.setdefault(ifo, far.NearestLeafTree())[LIGOTimeGPS(float(stop))] = float(horizon) + horizon_history.setdefault(ifo, horizonhistory.NearestLeafTree())[LIGOTimeGPS(float(start))] = float(horizon) + horizon_history.setdefault(ifo, horizonhistory.NearestLeafTree())[LIGOTimeGPS(float(stop))] = float(horizon) return seglistdict, horizon_history if options.segment_and_horizon is None: @@ -170,7 +171,7 @@ horizon_distances = dict(((ifo, numpy.mean(h.values())) for ifo, h in horizon_hi for n in range(2, len(horizon_distances) + 1): for instruments in iterutils.choices(horizon_distances.keys(), n): # Force the SNR pdf to be generated to be at the actual horizon distance values not the quantized ones - key = frozenset(instruments), frozenset(diststats.quantize_horizon_distances(horizon_distances).items()) + key = frozenset(instruments), frozenset(horizonhistory.quantize_horizon_distances(horizon_distances).items()) if options.verbose: print >>sys.stderr, "For horizon distances %s" % ", ".join("%s = %.4g Mpc" % item for item in sorted(horizon_distances.items())) progressbar = ProgressBar(text = "%s SNR PDF" % ", ".join(sorted(key[0]))) diff --git a/gstlal-inspiral/configure.ac b/gstlal-inspiral/configure.ac index f4a17c4c0ccb0de9afd3be38eae6ee84d33a5ddc..d9c0a17ebf598a945efbcc6231f00d91c6b60edb 100644 --- a/gstlal-inspiral/configure.ac +++ b/gstlal-inspiral/configure.ac @@ -22,6 +22,7 @@ AC_CONFIG_FILES([ \ lib/skymap/Makefile \ python/Makefile \ python/emcee/Makefile \ + python/stats/Makefile \ gst/Makefile \ gst/lal/Makefile \ gst/python/Makefile \ diff --git a/gstlal-inspiral/python/Makefile.am b/gstlal-inspiral/python/Makefile.am index a54cf01e97049e72ddfba9ed19f0dadfb59d2bb2..0255b200b74ad66a196b1c2eef37dd12f5adfc21 100644 --- a/gstlal-inspiral/python/Makefile.am +++ b/gstlal-inspiral/python/Makefile.am @@ -1,6 +1,6 @@ AM_CPPFLAGS = -I$(top_srcdir)/lib -SUBDIRS = emcee +SUBDIRS = emcee stats # This is a trick taken from the gst-python automake setup. # All of the Python scripts will be installed under the exec dir, diff --git a/gstlal-inspiral/python/far.py b/gstlal-inspiral/python/far.py index dbf05476b591a311d109ecff9c06d0355d6f74a6..07acbbe5a357fb51de4c3d1bd731717e63ecf512 100644 --- a/gstlal-inspiral/python/far.py +++ b/gstlal-inspiral/python/far.py @@ -46,8 +46,6 @@ # -import bisect -import copy try: from fpconst import NaN, NegInf, PosInf except ImportError: @@ -72,7 +70,6 @@ import sys from glue import iterutils from glue import segments from glue.ligolw import ligolw -from glue.ligolw import array as ligolw_array from glue.ligolw import param as ligolw_param from glue.ligolw import lsctables from glue.ligolw import utils as ligolw_utils @@ -86,6 +83,7 @@ from pylal import snglcoinc from gstlal import stats as gstlalstats +from gstlal.stats import horizonhistory # @@ -97,308 +95,6 @@ from gstlal import stats as gstlalstats # -# -# Horizon distance record keeping -# - - -class NearestLeafTree(object): - """ - A simple binary tree in which look-ups return the value of the - closest leaf. Only float objects are supported for the keys and - values. Look-ups raise KeyError if the tree is empty. - - Example: - - >>> x = NearestLeafTree() - >>> x[100.0] = 120. - >>> x[104.0] = 100. - >>> x[102.0] = 110. - >>> x[90.] - 120.0 - >>> x[100.999] - 120.0 - >>> x[101.001] - 110.0 - >>> x[200.] - 100.0 - >>> del x[104] - >>> x[200.] - 110.0 - >>> x.keys() - [100.0, 102.0] - >>> 102 in x - True - >>> 103 in x - False - >>> x.to_xml(u"H1").write() - <Array Type="real_8" Name="H1:nearestleaftree:array"> - <Dim>2</Dim> - <Dim>2</Dim> - <Stream Delimiter=" " Type="Local"> - 100 102 - 120 110 - </Stream> - </Array> - """ - def __init__(self, items = ()): - """ - Initialize a NearestLeafTree. - - Example: - - >>> x = NearestLeafTree() - >>> x = NearestLeafTree([(100., 120.), (104., 100.), (102., 110.)]) - >>> y = {100.: 120., 104.: 100., 102.: 100.} - >>> x = NearestLeafTree(y.items()) - """ - self.tree = list(items) - self.tree.sort() - - def __setitem__(self, x, val): - """ - Example: - - >>> x = NearestLeafTree() - >>> x[100.:200.] = 0. - >>> x[150.] = 1. - >>> x - NearestLeaftree([(100, 0), (150, 1), (200, 0)]) - """ - if type(x) is slice: - # replace all entries in the requested range of - # co-ordiantes with two entries, each with the - # given value, one at the start of the range and - # one at the end of the range. thus, after this - # all queries within that range will return this - # value. - if x.step is not None: - raise ValueError("%s: step not supported" % repr(x)) - if x.start is None: - if not self.tree: - raise IndexError("open-ended slice not supported with empty tree") - x = slice(self.minkey(), x.stop) - if x.stop is None: - if not self.tree: - raise IndexError("open-ended slice not supported with empty tree") - x = slice(x.start, self.maxkey()) - if x.stop < x.start: - raise ValueError("%s: bounds out of order" % repr(x)) - lo = bisect.bisect_left(self.tree, (x.start, NegInf)) - hi = bisect.bisect_right(self.tree, (x.stop, PosInf)) - self.tree[lo:hi] = ((x.start, val), (x.stop, val)) - else: - # replace all entries having the same co-ordinate - # with this one - lo = bisect.bisect_left(self.tree, (x, NegInf)) - hi = bisect.bisect_right(self.tree, (x, PosInf)) - self.tree[lo:hi] = ((x, val),) - - def __getitem__(self, x): - if not self.tree: - raise KeyError(x) - if type(x) is slice: - raise ValueError("slices not supported") - hi = bisect.bisect_right(self.tree, (x, PosInf)) - try: - x_hi, val_hi = self.tree[hi] - except IndexError: - x_hi, val_hi = self.tree[-1] - if hi == 0: - x_lo, val_lo = x_hi, val_hi - else: - x_lo, val_lo = self.tree[hi - 1] - return val_lo if abs(x - x_lo) < abs(x_hi - x) else val_hi - - def __delitem__(self, x): - """ - Example: - - >>> x = NearestLeafTree([(100., 0.), (150., 1.), (200., 0.)]) - >>> del x[150.] - >>> x - NearestLeafTree([(100., 0.), (200., 0.)]) - >>> del x[:] - NearestLeafTree([]) - """ - if type(x) is slice: - if x.step is not None: - raise ValueError("%s: step not supported" % repr(x)) - if x.start is None: - if not self.tree: - # no-op - return - x = slice(self.minkey(), x.stop) - if x.stop is None: - if not self.tree: - # no-op - return - x = slice(x.start, self.maxkey()) - if x.stop < x.start: - # no-op - return - lo = bisect.bisect_left(self.tree, (x.start, NegInf)) - hi = bisect.bisect_right(self.tree, (x.stop, PosInf)) - del self.tree[lo:hi] - elif not self.tree: - raise IndexError(x) - else: - lo = bisect.bisect_left(self.tree, (x, NegInf)) - if self.tree[lo][0] != x: - raise IndexError(x) - del self.tree[lo] - - def __nonzero__(self): - return bool(self.tree) - - def __iadd__(self, other): - for x, val in other.tree: - self[x] = val - return self - - def keys(self): - return [x for x, val in self.tree] - - def values(self): - return [val for x, val in self.tree] - - def items(self): - return list(self.tree) - - def min(self): - """ - Return the minimum value stored in the tree. This is O(n). - """ - if not self.tree: - raise ValueError("empty tree") - return min(val for x, val in self.tree) - - def minkey(self): - """ - Return the minimum key stored in the tree. This is O(1). - """ - if not self.tree: - raise ValueError("empty tree") - return self.tree[0][0] - - def max(self): - """ - Return the maximum value stored in the tree. This is O(n). - """ - if not self.tree: - raise ValueError("empty tree") - return max(val for x, val in self.tree) - - def maxkey(self): - """ - Return the maximum key stored in the tree. This is O(1). - """ - if not self.tree: - raise ValueError("empty tree") - return self.tree[-1][0] - - def __contains__(self, x): - try: - return bool(self.tree) and self.tree[bisect.bisect_left(self.tree, (x, NegInf))][0] == x - except IndexError: - return False - - def __len__(self): - return len(self.tree) - - def __repr__(self): - return "NearestLeaftree([%s])" % ", ".join("(%g, %g)" % item for item in self.tree) - - @classmethod - def from_xml(cls, xml, name): - return cls(map(tuple, ligolw_array.get_array(xml, u"%s:nearestleaftree" % name).array[:])) - - def to_xml(self, name): - return ligolw_array.from_array(u"%s:nearestleaftree" % name, numpy.array(self.tree, dtype = "double")) - - -class HorizonHistories(dict): - def __iadd__(self, other): - for key, history in other.iteritems(): - try: - self[key] += history - except KeyError: - self[key] = copy.deepcopy(history) - return self - - def minkey(self): - """ - Return the minimum key stored in the trees. - """ - minkeys = tuple(history.minkey() for history in self.values() if history) - if not minkeys: - raise ValueError("empty trees") - return min(minkeys) - - def maxkey(self): - """ - Return the maximum key stored in the trees. - """ - maxkeys = tuple(history.maxkey() for history in self.values() if history) - if not maxkeys: - raise ValueError("empty trees") - return max(maxkeys) - - def getdict(self, x): - return dict((key, value[x]) for key, value in self.iteritems()) - - def randhorizons(self): - """ - Generator yielding a sequence of random horizon distance - dictionaries chosen by drawing random times uniformly - distributed between the lowest and highest times recorded - in the history and returning the dictionary of horizon - distances for each of those times. - """ - x_min = self.minkey() - x_max = self.maxkey() - getdict = self.getdict - rnd = random.uniform - while 1: - yield getdict(rnd(x_min, x_max)) - - def all(self): - """ - Returns a list of the unique sets of horizon distances - recorded in the histories. - """ - # unique times for which a horizon distance measurement is - # available - all_x = set(x for value in self.values() for x in value.keys()) - - # the unique horizon distances from those times, expressed - # as frozensets of instrument/distance pairs - result = set(frozenset(self.getdict(x).items()) for x in all_x) - - # return a list of the results converted back to - # dictionaries - return map(dict, result) - - @classmethod - def from_xml(cls, xml, name): - xml = [elem for elem in xml.getElementsByTagName(ligolw.LIGO_LW.tagName) if elem.hasAttribute(u"Name") and elem.Name == u"%s:horizonhistories" % name] - try: - xml, = xml - except ValueError: - raise ValueError("document must contain exactly 1 HorizonHistories named '%s'" % name) - keys = [elem.Name.replace(u":nearestleaftree", u"") for elem in xml.getElementsByTagName(ligolw.Array.tagName) if elem.hasAttribute(u"Name") and elem.Name.endswith(u":nearestleaftree")] - self = cls() - for key in keys: - self[key] = NearestLeafTree.from_xml(xml, key) - return self - - def to_xml(self, name): - xml = ligolw.LIGO_LW({u"Name": u"%s:horizonhistories" % name}) - for key, value in self.items(): - xml.appendChild(value.to_xml(key)) - return xml - - # # Inspiral-specific CoincParamsDistributions sub-class # @@ -425,28 +121,6 @@ class ThincaCoincParamsDistributions(snglcoinc.CoincParamsDistributions): # accidental_weight * (measured denominator) numerator_accidental_weight = 0. - # if two horizon distances, D1 and D2, differ by less than - # - # | ln(D1 / D2) | <= log_distance_tolerance - # - # then they are considered to be equal for the purpose of recording - # horizon distance history, generating joint SNR PDFs, and so on. - # if the smaller of the two is < min_ratio * the larger then the - # smaller is treated as though it were 0. - # FIXME: is this choice of distance quantization appropriate? - @staticmethod - def quantize_horizon_distances(horizon_distances, log_distance_tolerance = PosInf, min_ratio = 0.1): - horizon_distance_norm = max(horizon_distances.values()) - assert horizon_distance_norm >= 0. - # check for no-op: all distances are 0. - if horizon_distance_norm == 0.: - return dict.fromkeys(horizon_distances, 0.) - # check for no-op: map all (non-zero) values to 1 - if math.isinf(log_distance_tolerance): - return dict((instrument, 1. if horizon_distance > 0. else 0.) for instrument, horizon_distance in horizon_distances.items()) - min_distance = min_ratio * horizon_distance_norm - return dict((instrument, (0. if horizon_distance < min_distance else math.exp(round(math.log(horizon_distance / horizon_distance_norm) / log_distance_tolerance) * log_distance_tolerance))) for instrument, horizon_distance in horizon_distances.items()) - # binnings (filter funcs look-up initialized in .__init__() snr_chi_binning = rate.NDBins((rate.ATanLogarithmicBins(3.6, 70., 600), rate.ATanLogarithmicBins(.001, 0.5, 300))) binnings = { @@ -465,7 +139,7 @@ class ThincaCoincParamsDistributions(snglcoinc.CoincParamsDistributions): def __init__(self, *args, **kwargs): super(ThincaCoincParamsDistributions, self).__init__(*args, **kwargs) - self.horizon_history = HorizonHistories() + self.horizon_history = horizonhistory.HorizonHistories() self.pdf_from_rates_func = { "instruments": self.pdf_from_rates_instruments, "H1_snr_chi": self.pdf_from_rates_snrchi2, @@ -523,7 +197,7 @@ class ThincaCoincParamsDistributions(snglcoinc.CoincParamsDistributions): # largest among them and then the fractions aquantized to # integer powers of a common factor # - return frozenset(instruments), frozenset(self.quantize_horizon_distances(horizon_distances).items()) + return frozenset(instruments), frozenset(horizonhistory.quantize_horizon_distances(horizon_distances).items()) def get_snr_joint_pdf(self, instruments, horizon_distances, verbose = False): # @@ -948,7 +622,7 @@ class ThincaCoincParamsDistributions(snglcoinc.CoincParamsDistributions): def from_xml(cls, xml, name): self = super(ThincaCoincParamsDistributions, cls).from_xml(xml, name) xml = self.get_xml_root(xml, name) - self.horizon_history = HorizonHistories.from_xml(xml, name) + self.horizon_history = horizonhistory.HorizonHistories.from_xml(xml, name) prefix = u"cached_snr_joint_pdf" for elem in [elem for elem in xml.childNodes if elem.Name.startswith(u"%s:" % prefix)]: key = ligolw_param.get_pyvalue(elem, u"key").strip().split(u";") diff --git a/gstlal-inspiral/python/inspiral.py b/gstlal-inspiral/python/inspiral.py index d56c43422dfce2094abf3aefb6238cd8680e8c51..d24b2ba2449f6b20a97402b7343cdf93ea2afd53 100644 --- a/gstlal-inspiral/python/inspiral.py +++ b/gstlal-inspiral/python/inspiral.py @@ -107,6 +107,7 @@ from gstlal import streamthinca from gstlal import svd_bank from gstlal import cbc_template_iir from gstlal import far +from gstlal.stats import horizonhistory lsctables.LIGOTimeGPS = LIGOTimeGPS @@ -751,7 +752,7 @@ class Data(object): try: horizon_history = self.coinc_params_distributions.horizon_history[instrument] except KeyError: - horizon_history = self.coinc_params_distributions.horizon_history[instrument] = far.NearestLeafTree() + horizon_history = self.coinc_params_distributions.horizon_history[instrument] = horizonhistory.NearestLeafTree() horizon_history[float(timestamp)] = horizon_distance def __get_likelihood_file(self): diff --git a/gstlal-inspiral/python/stats/Makefile.am b/gstlal-inspiral/python/stats/Makefile.am new file mode 100644 index 0000000000000000000000000000000000000000..ce61e4edff0f7bf08422a5198348b7ce4da3d2ab --- /dev/null +++ b/gstlal-inspiral/python/stats/Makefile.am @@ -0,0 +1,5 @@ +pkgpythondir = $(pkgpyexecdir) +statsdir = $(pkgpythondir)/stats + +stats_PYTHON = \ + horizonhistory.py diff --git a/gstlal-inspiral/python/stats/horizonhistory.py b/gstlal-inspiral/python/stats/horizonhistory.py new file mode 100644 index 0000000000000000000000000000000000000000..aa55aa0a360aabb2632fb601892047f00d38e25a --- /dev/null +++ b/gstlal-inspiral/python/stats/horizonhistory.py @@ -0,0 +1,459 @@ +# Copyright (C) 2011--2014 Kipp Cannon, Chad Hanna, Drew Keppel +# Copyright (C) 2013 Jacob Peoples +# +# This program is free software; you can redistribute it and/or modify it +# under the terms of the GNU General Public License as published by the +# Free Software Foundation; either version 2 of the License, or (at your +# option) any later version. +# +# This program is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General +# Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + +## @file +# The python module to implement false alarm probability and false alarm rate +# +# ### Review Status +# +# STATUS: reviewed with actions +# +# | Names | Hash | Date | Diff to Head of Master | +# | ------------------------------------------- | ------------------------------------------- | ---------- | --------------------------- | +# | Hanna, Cannon, Meacher, Creighton J, Robinet, Sathyaprakash, Messick, Dent, Blackburn | 7fb5f008afa337a33a72e182d455fdd74aa7aa7a | 2014-11-05 |<a href="@gstlal_inspiral_cgit_diff/python/far.py?id=HEAD&id2=7fb5f008afa337a33a72e182d455fdd74aa7aa7a">far.py</a> | +# | Hanna, Cannon, Meacher, Creighton J, Sathyaprakash, | 72875f5cb241e8d297cd9b3f9fe309a6cfe3f716 | 2015-11-06 |<a href="@gstlal_inspiral_cgit_diff/python/far.py?id=HEAD&id2=72875f5cb241e8d297cd9b3f9fe309a6cfe3f716">far.py</a> | +# +# #### Action items +# + +# - Address the fixed SNR PDF using median PSD which could be pre-computed and stored on disk. (Store database of SNR pdfs for a variety of horizon) +# - The binning parameters are hard-coded too; Could it be a problem? +# - Chisquare binning hasn't been tuned to be a good representation of the PDFs; could be improved in future + +## @package far + + +# +# ============================================================================= +# +# Preamble +# +# ============================================================================= +# + + +import bisect +import copy +try: + from fpconst import NaN, NegInf, PosInf +except ImportError: + # fpconst is not part of the standard library and might not be + # available + NaN = float("nan") + NegInf = float("-inf") + PosInf = float("+inf") +import math +import numpy +import random + + +from glue.ligolw import ligolw +from glue.ligolw import array as ligolw_array + + +__all__ = ["NearestLeafTree", "HorizonHistories", "quantize_horizon_distances"] + + +# +# ============================================================================= +# +# Horizon Distance Record Keeping +# +# ============================================================================= +# + + +class NearestLeafTree(object): + """ + A simple binary tree in which look-ups return the value of the + closest leaf. Only float objects are supported for the keys and + values. Look-ups raise KeyError if the tree is empty. + + Example: + + >>> x = NearestLeafTree() + >>> x[100.0] = 120. + >>> x[104.0] = 100. + >>> x[102.0] = 110. + >>> x[90.] + 120.0 + >>> x[100.999] + 120.0 + >>> x[101.001] + 110.0 + >>> x[200.] + 100.0 + >>> del x[104] + >>> x[200.] + 110.0 + >>> x.keys() + [100.0, 102.0] + >>> 102 in x + True + >>> 103 in x + False + >>> x.to_xml(u"H1").write() + <Array Type="real_8" Name="H1:nearestleaftree:array"> + <Dim>2</Dim> + <Dim>2</Dim> + <Stream Delimiter=" " Type="Local"> + 100 102 + 120 110 + </Stream> + </Array> + """ + def __init__(self, items = ()): + """ + Initialize a NearestLeafTree. + + Example: + + >>> x = NearestLeafTree() + >>> x = NearestLeafTree([(100., 120.), (104., 100.), (102., 110.)]) + >>> y = {100.: 120., 104.: 100., 102.: 100.} + >>> x = NearestLeafTree(y.items()) + """ + # make a copy to ensure we have a stable object + self.tree = map(tuple, items) + self.tree.sort() + + def __setitem__(self, x, val): + """ + Example: + + >>> x = NearestLeafTree() + >>> x[100.:200.] = 0. + >>> x[150.] = 1. + >>> x + NearestLeaftree([(100, 0), (150, 1), (200, 0)]) + >>> x[210:] = 5. + >>> x[:90] = 5. + >>> x + NearestLeaftree([(90, 5), (100, 0), (150, 1), (200, 0), (210, 5)]) + """ + if isinstance(x, slice): + # replace all entries in the requested range of + # co-ordiantes with two entries, each with the + # given value, one at the start of the range and + # one at the end of the range. thus, after this + # all queries within that range will return this + # value. + if x.step is not None: + raise ValueError("%s: step not supported" % repr(x)) + if x.start is None: + if not self.tree: + raise IndexError("open-ended slice not supported with empty tree") + x = slice(min(self.minkey(), x.stop), x.stop) + if x.stop is None: + if not self.tree: + raise IndexError("open-ended slice not supported with empty tree") + x = slice(x.start, max(self.maxkey(), x.start)) + if x.start > x.stop: + raise ValueError("%s: bounds out of order" % repr(x)) + lo = bisect.bisect_left(self.tree, (x.start, NegInf)) + hi = bisect.bisect_right(self.tree, (x.stop, PosInf)) + self.tree[lo:hi] = ((x.start, val), (x.stop, val)) if x.start != x.stop else ((x.start, val),) + else: + # replace all entries having the same co-ordinate + # with this one + lo = bisect.bisect_left(self.tree, (x, NegInf)) + hi = bisect.bisect_right(self.tree, (x, PosInf)) + self.tree[lo:hi] = ((x, val),) + + def __getitem__(self, x): + if not self.tree: + raise KeyError(x) + if isinstance(x, slice): + raise ValueError("slices not supported") + hi = bisect.bisect_right(self.tree, (x, PosInf)) + try: + x_hi, val_hi = self.tree[hi] + except IndexError: + x_hi, val_hi = self.tree[-1] + if hi: + x_lo, val_lo = self.tree[hi - 1] + else: + x_lo, val_lo = x_hi, val_hi + return val_lo if abs(x - x_lo) < abs(x_hi - x) else val_hi + + def __delitem__(self, x): + """ + Example: + + >>> x = NearestLeafTree([(100., 0.), (150., 1.), (200., 0.)]) + >>> del x[150.] + >>> x + NearestLeafTree([(100., 0.), (200., 0.)]) + >>> del x[:] + NearestLeafTree([]) + """ + if isinstance(x, slice): + if x.step is not None: + raise ValueError("%s: step not supported" % repr(x)) + if x.start is None: + if not self.tree: + # no-op + return + x = slice(self.minkey(), x.stop) + if x.stop is None: + if not self.tree: + # no-op + return + x = slice(x.start, self.maxkey()) + if x.stop < x.start: + # no-op + return + lo = bisect.bisect_left(self.tree, (x.start, NegInf)) + hi = bisect.bisect_right(self.tree, (x.stop, PosInf)) + del self.tree[lo:hi] + elif not self.tree: + raise IndexError(x) + else: + lo = bisect.bisect_left(self.tree, (x, NegInf)) + if self.tree[lo][0] != x: + raise IndexError(x) + del self.tree[lo] + + def __nonzero__(self): + return bool(self.tree) + + def __iadd__(self, other): + for x, val in other.tree: + self[x] = val + return self + + def keys(self): + return [x for x, val in self.tree] + + def values(self): + return [val for x, val in self.tree] + + def items(self): + return list(self.tree) + + def min(self): + """ + Return the minimum value stored in the tree. This is O(n). + """ + if not self.tree: + raise ValueError("empty tree") + return min(val for x, val in self.tree) + + def minkey(self): + """ + Return the minimum key stored in the tree. This is O(1). + """ + if not self.tree: + raise ValueError("empty tree") + return self.tree[0][0] + + def max(self): + """ + Return the maximum value stored in the tree. This is O(n). + """ + if not self.tree: + raise ValueError("empty tree") + return max(val for x, val in self.tree) + + def maxkey(self): + """ + Return the maximum key stored in the tree. This is O(1). + """ + if not self.tree: + raise ValueError("empty tree") + return self.tree[-1][0] + + def __contains__(self, x): + try: + return bool(self.tree) and self.tree[bisect.bisect_left(self.tree, (x, NegInf))][0] == x + except IndexError: + return False + + def __len__(self): + return len(self.tree) + + def __repr__(self): + return "NearestLeaftree([%s])" % ", ".join("(%g, %g)" % item for item in self.tree) + + def weighted_mean(self, (lo, hi), weight = lambda y: 1.): + """ + Given the function f(x) = self[x], compute + + \int_{lo}^{hi} w(f(x)) f(x) dx + ------------------------------ + \int_{lo}^{hi} w(f(x)) dx + + where w(y) is a weighting function. The default weight + function is w(y) = 1. + """ + if not self.tree: + raise ValueError("empty tree") + if lo > hi: + # remove a common factor of -1 from the numerator + # and denominator + lo, hi = hi, lo + elif lo == hi: + return self[lo] + # now we are certain that lo < hi and that there is at + # least one entry in the tree + + # construct an array of (x,y) pairs such that f(x) = y and + # continues to equal y until the next x. ensure the 0th + # and last entries in the array are the left and right + # edges of the integration domain. + i = bisect.bisect_right(self.tree, (lo, NegInf)) + j = bisect.bisect_right(self.tree, (hi, PosInf)) + if i > 0: + i -= 1 + samples = self.tree[i:j+1] + samples = [((a_key + b_key) / 2., b_val) for (a_key, a_val), (b_key, b_val) in zip(samples[:-1], samples[1:])] + if not samples: + samples = [(lo, self[lo])] + elif samples[0][0] > lo: + samples.insert(0, (lo, self[lo])) + else: + samples[0] = lo, self[lo] + if samples[-1][0] < hi: + samples.append((hi, self[hi])) + else: + samples[-1] = hi, self[hi] + # return the ratio of the two integrals + samples = [((b_key - a_key) * weight(a_val), a_val) for (a_key, a_val), (b_key, b_val) in zip(samples[:-1], samples[1:])] + return sum(w * val for w, val in samples) / sum(w for w, val in samples) + + @classmethod + def from_xml(cls, xml, name): + return cls(map(tuple, ligolw_array.get_array(xml, u"%s:nearestleaftree" % name).array[:])) + + def to_xml(self, name): + return ligolw_array.from_array(u"%s:nearestleaftree" % name, numpy.array(self.tree, dtype = "double")) + + +class HorizonHistories(dict): + def __iadd__(self, other): + for key, history in other.iteritems(): + try: + self[key] += history + except KeyError: + self[key] = copy.deepcopy(history) + return self + + def minkey(self): + """ + Return the minimum key stored in the trees. + """ + minkeys = tuple(history.minkey() for history in self.values() if history) + if not minkeys: + raise ValueError("empty trees") + return min(minkeys) + + def maxkey(self): + """ + Return the maximum key stored in the trees. + """ + maxkeys = tuple(history.maxkey() for history in self.values() if history) + if not maxkeys: + raise ValueError("empty trees") + return max(maxkeys) + + def getdict(self, x): + return dict((key, value[x]) for key, value in self.iteritems()) + + def randhorizons(self): + """ + Generator yielding a sequence of random horizon distance + dictionaries chosen by drawing random times uniformly + distributed between the lowest and highest times recorded + in the history and returning the dictionary of horizon + distances for each of those times. + """ + x_min = self.minkey() + x_max = self.maxkey() + getdict = self.getdict + rnd = random.uniform + while 1: + yield getdict(rnd(x_min, x_max)) + + def all(self): + """ + Returns a list of the unique sets of horizon distances + recorded in the histories. + """ + # unique times for which a horizon distance measurement is + # available + all_x = set(x for value in self.values() for x in value.keys()) + + # the unique horizon distances from those times, expressed + # as frozensets of instrument/distance pairs + result = set(frozenset(self.getdict(x).items()) for x in all_x) + + # return a list of the results converted back to + # dictionaries + return map(dict, result) + + def weighted_mean(self, *args, **kwargs): + """ + Return a dictionary of the result of invoking + .weighted_mean() on each of the histories. args and kwargs + are passed to the .weighted_mean() method of the histories + objects, see their documentation for more information. + """ + return dict((key, value.weighted_mean(*args, **kwargs)) for key, value in self.iteritems()) + + @classmethod + def from_xml(cls, xml, name): + xml = [elem for elem in xml.getElementsByTagName(ligolw.LIGO_LW.tagName) if elem.hasAttribute(u"Name") and elem.Name == u"%s:horizonhistories" % name] + try: + xml, = xml + except ValueError: + raise ValueError("document must contain exactly 1 HorizonHistories named '%s'" % name) + keys = [elem.Name.replace(u":nearestleaftree", u"") for elem in xml.getElementsByTagName(ligolw.Array.tagName) if elem.hasAttribute(u"Name") and elem.Name.endswith(u":nearestleaftree")] + self = cls() + for key in keys: + self[key] = NearestLeafTree.from_xml(xml, key) + return self + + def to_xml(self, name): + xml = ligolw.LIGO_LW({u"Name": u"%s:horizonhistories" % name}) + for key, value in self.items(): + xml.appendChild(value.to_xml(key)) + return xml + + +# FIXME: is the choice of distance quantization appropriate? +def quantize_horizon_distances(horizon_distances, log_distance_tolerance = PosInf, min_ratio = 0.1): + """ + if two horizon distances, D1 and D2, differ by less than + + | ln(D1 / D2) | <= log_distance_tolerance + + then they are considered to be equal for the purpose of recording + horizon distance history, generating joint SNR PDFs, and so on. if + the smaller of the two is < min_ratio * the larger then the smaller + is treated as though it were 0. + """ + if any(horizon_distance < 0. for horizon_distance in horizon_distances.values()): + raise ValueError("%s: negative horizon distance" % repr(horizon_distances)) + horizon_distance_norm = max(horizon_distances.values()) + # check for no-op: all distances are 0. + if horizon_distance_norm == 0.: + return dict.fromkeys(horizon_distances, 0.) + # check for no-op: map all (non-zero) values to 1 + if math.isinf(log_distance_tolerance): + return dict((instrument, 1. if horizon_distance > 0. else 0.) for instrument, horizon_distance in horizon_distances.items()) + min_distance = min_ratio * horizon_distance_norm + return dict((instrument, (0. if horizon_distance < min_distance else math.exp(round(math.log(horizon_distance / horizon_distance_norm) / log_distance_tolerance) * log_distance_tolerance))) for instrument, horizon_distance in horizon_distances.items())