Skip to content
Snippets Groups Projects

Online new extinction

Merged Prathamesh Joshi requested to merge o4b-online-new-extinction into o4b-online
1 file
+ 0
14
Compare changes
  • Side-by-side
  • Inline
@@ -59,6 +59,7 @@ import math
import multiprocessing
import numpy
import random
import scipy
from scipy import interpolate
import sys
import time
@@ -519,6 +520,9 @@ def binned_log_likelihood_ratio_rates_from_samples(signal_lr_lnpdf, noise_lr_lnp
class RankingStatPDF(object):
ligo_lw_name_suffix = u"gstlal_inspiral_rankingstatpdf"
extinction_fitting_limits = (1/2., 1/100.)
# extinction curve fitting is done for the range of LRs
# corresponding to the 1/2 and 1/100 point of the zl CCDF
@staticmethod
def density_estimate(lnpdf, name, kernel = rate.gaussian_window(4.)):
@@ -712,89 +716,73 @@ WHERE
return self
def new_with_extinction(self):
self = self.copy()
def ready_for_extinction(self):
# ensure we have sufficient zerolag and noise samples before attempting the extinction curve fit
# none of the checks below should evaluate to True even with a small amount of zerolag and noise
# samples. They should only evaluate to True at the start of an online analysis
bg = self.noise_lr_lnpdf.copy().array
fg = self.zero_lag_lr_lnpdf.copy().array
bg[:10] = 0.
fg[:10] = 0.
if fg.sum() == 0 or bg.sum() == 0:
return False
fg_ccdf = numpy.cumsum(fg[::-1])[::-1]
ix_min = (fg_ccdf < fg_ccdf[0] * self.extinction_fitting_limits[0]).argmax()
ix_max = (fg_ccdf < fg_ccdf[0] * self.extinction_fitting_limits[1]).argmax()
if ix_min == ix_max:
return False
if fg[ix_min: ix_max + 1].sum() == 0 or bg[ix_min: ix_max + 1].sum() == 0:
return False
if (fg_ccdf[ix_min: ix_max + 1] == 0).any():
# log will evaluate to -inf, and the curve fitting will crash
return False
return True
# the probability that an event survives clustering is the
# probability that no event with a higher value of the
# ranking statistic is within +/- dt of the event in
# question. .noise_lr_lnpdf.count contains an accurate
# model of the counts of pre-clustered events in each
# ranking statistic bin, but we know the events are not
# independent and so the total number of them cannot be
# used to form a Poisson rate for the purpose of computing
# the probability we desire. it has been found,
# empirically, that if the CCDF of pre-clustered background
# event counts is raised to some power and the clustering
# survival probability computed from that as though it were
# the CCDF of a Poisson process with some effective mean
# event rate, the result is a very good model for the
# post-clustering distribution of ranking statistics. we
# have two unknown parameters: the power to which to raise
# the pre-clustered ranking statistic's CCDF, and the mean
# event rate to assume. we solve for these parameters by
# fitting to the observed zero-lag ranking statistic
# histogram
def new_with_extinction(self, verbose = False):
self = self.copy()
x = self.noise_lr_lnpdf.bins[0].centres()
assert (x == self.zero_lag_lr_lnpdf.bins[0].centres()).all()
# some candidates are rejected by the ranking statistic,
# causing there to be a spike in the zero-lag density at ln
# L = -inf. if enough candidates get rejected this spike
# becomes the mode of the PDF which messes up the mask
# constructed below for the fit. we zero the first 40 bins
# here to prevent that from happening (assume density
# estimation kernel = 4 bins wide, with 10 sigma impulse
# length)
zl_counts = self.zero_lag_lr_lnpdf.array.copy()
zl_counts[:40] = 0.
if not zl_counts.any():
raise ValueError("zero-lag counts are all zero")
# get the noise counts
noise_counts = self.noise_lr_lnpdf.array.copy()
# get the zerolag counts.
# we model the tail of the distribution - top 0.1 - 1% - where
# clustering only effects the result at a < 1% level.
if zl_counts.sum() < 100 * 100:
tail_zl_counts = zl_counts.sum() * 0.99
else:
tail_zl_counts = zl_counts.sum() - 100
onepercent = zl_counts.cumsum().searchsorted(tail_zl_counts)
# normalize the counts
noise_counts /= noise_counts.sum()
zl_counts /= zl_counts.sum()
# compute survival probability
norm = zl_counts[onepercent] / noise_counts[onepercent]
zl_counts[onepercent:] = 0
noise_counts[onepercent:] = 0
survival_probability = zl_counts / noise_counts
survival_probability[onepercent:] = norm
survival_probability[numpy.isnan(survival_probability)] = 0.0
# apply to background counts and signal counts
self.noise_lr_lnpdf.array *= survival_probability
self.noise_lr_lnpdf.normalize()
self.signal_lr_lnpdf.array *= survival_probability
self.signal_lr_lnpdf.normalize()
#FIXME: Add comprehensive explanation about the extinction model
#
# never allow PDFs that have had the extinction model
# applied to be written to disk: on-disk files must only
# ever provide the original data. forbid PDFs that have
# been extincted from being re-extincted.
#
lrs = self.noise_lr_lnpdf.centres()[0]
bg = self.noise_lr_lnpdf.array
fg = self.zero_lag_lr_lnpdf.array
# zero out the beginning bins of each because they are notoriously bad and should just be ignored
bg[:10] = 0.
fg[:10] = 0.
# fitting is done between ix_min and ix_max
fg_ccdf = numpy.cumsum(fg[::-1])[::-1]
ix_min = (fg_ccdf < fg_ccdf[0] * self.extinction_fitting_limits[0]).argmax()
ix_max = (fg_ccdf < fg_ccdf[0] * self.extinction_fitting_limits[1]).argmax()
bgtotal = bg[ix_min: ix_max + 1].sum()
bg_ccdf = numpy.cumsum(bg[::-1])[::-1] / bgtotal
def new_with_extinction(*args, **kwargs):
raise NotImplementedError("re-extincting an extincted RankingStatPDF object is forbidden")
self.new_with_extinction = new_with_extinction
def to_xml(*args, **kwargs):
raise NotImplementedError("writing extincted RankingStatPDF object to disk is forbidden")
self.to_xml = to_xml
# define a function for the extincted bg for scipy.optimize.curve_fit to call
def bg_ccdf_extinct_func(idx, c, A):
return numpy.log(A * bgtotal / c) + numpy.log1p(-numpy.exp(-1 * bg_ccdf[idx] * c))
# find the best fit c for extinction
c = scipy.optimize.curve_fit(bg_ccdf_extinct_func, range(ix_min, ix_max + 1), numpy.log(fg_ccdf[ix_min: ix_max + 1]), bounds = [0, numpy.inf], sigma = numpy.sqrt(1 / (bg_ccdf[ix_min: ix_max + 1] * bgtotal)))#, maxfev = 5000)
if verbose:
print(f"Best value of c is {c[0][0]}, A is {c[0][1]} with covariance {c[1]}", file = sys.stderr)
# calculate the extincted PDF
bg_pdf_extinct = c[0][1] * bg * numpy.exp(-1 * bg_ccdf * c[0][0])
self.noise_lr_lnpdf.array = bg_pdf_extinct
self.noise_lr_lnpdf.normalize()
return self
Loading