add a script to make ROC curves for the HLV code

d864df96 · Chad Hanna · c16dea7c · d864df96
Commit d864df96 authored 6 years ago by Chad Hanna
--- a/gstlal-inspiral/tests/lv_stat_test
+++ b/gstlal-inspiral/tests/lv_stat_test
+#!/usr/bin/python
+
+from gstlal.stats import inspiral_extrinsics
+import numpy
+import math
+from scipy.interpolate import interp1d
+import lal
+from lalburst import snglcoinc
+from scipy import stats
+
+import matplotlib
+matplotlib.use('Agg')
+from matplotlib import pyplot
+
+#
+# =============================================================================
+#
+#          NOTE OLD CODE!                       dt dphi PDF
+#
+# =============================================================================
+#
+
+
+dt_chebyshev_coeffs_polynomials = []
+dt_chebyshev_coeffs_polynomials.append(numpy.poly1d([-597.94227757949329, 2132.773853473127, -2944.126306979932, 1945.3033368083029, -603.91576991927593, 70.322754873993347]))
+dt_chebyshev_coeffs_polynomials.append(numpy.poly1d([-187.50681052710425, 695.84172327044325, -1021.0688423797938, 744.3266490236075, -272.12853221391498, 35.542404632554508]))
+dt_chebyshev_coeffs_polynomials.append(numpy.poly1d([52.128579054466599, -198.32054234352267, 301.34865541080791, -230.8522943433488, 90.780611645135437, -16.310130528927655]))
+dt_chebyshev_coeffs_polynomials.append(numpy.poly1d([48.216566165393878, -171.70632176976451, 238.48370471322843, -159.65005032451785, 50.122296925755677, -5.5466740894321367]))
+dt_chebyshev_coeffs_polynomials.append(numpy.poly1d([-34.030336093450863, 121.44714070928059, -169.91439486329773, 115.86873916702386, -38.08411813067778, 4.7396784315927816]))
+dt_chebyshev_coeffs_polynomials.append(numpy.poly1d([3.4576823675810178, -12.362609260376738, 17.3300203922424, -11.830868787176165, 3.900284020272442, -0.47157315012399248]))
+dt_chebyshev_coeffs_polynomials.append(numpy.poly1d([4.0423239651315113, -14.492611904657275, 20.847419746265583, -15.033846689362553, 5.3953159232942216, -0.78132676885883601]))
+norm_polynomial = numpy.poly1d([-348550.84040194791, 2288151.9147818103, -6623881.5646601757, 11116243.157047395, -11958335.1384027, 8606013.1361163966, -4193136.6690072878, 1365634.0450674745, -284615.52077054407, 34296.855844416605, -1815.7135263788341])
+
+dt_chebyshev_coeffs = [0]*13
+
+
+def __dphi_calc_A(combined_snr, delta_t):
+        B = -10.840765 * numpy.abs(delta_t) + 1.072866
+        M = 46.403738 * numpy.abs(delta_t) - 0.160205
+        nu = 0.009032 * numpy.abs(delta_t) + 0.001150
+        return (1.0 / (1.0 + math.exp(-B*(combined_snr - M)))**(1.0/nu))
+
+
+def __dphi_calc_mu(combined_snr, delta_t):
+        if delta_t >= 0.0:
+                A = 76.234617*delta_t + 0.001639
+                B = 0.290863
+                C = 4.775688
+                return (3.145953 - A* math.exp(-B*(combined_snr-C)))
+        elif delta_t < 0.0:
+                A = -130.877663*delta_t - 0.002814
+                B = 0.31023
+                C = 3.184671
+                return (3.145953 + A* math.exp(-B*(combined_snr-C)))
+
+
+def __dphi_calc_kappa(combined_snr, delta_t):
+        K = -176.540199*numpy.abs(delta_t) + 7.4387
+        B = -7.599585*numpy.abs(delta_t) + 0.215074
+        M = -1331.386835*numpy.abs(delta_t) - 35.309173
+        nu = 0.012225*numpy.abs(delta_t) + 0.000066
+        return (K / (1.0 + math.exp(-B*(combined_snr - M)))**(1.0/nu))
+
+
+def lnP_dphi_signal(delta_phi, delta_t, combined_snr):
+	# Compute von mises parameters
+	A_param = __dphi_calc_A(combined_snr, delta_t)
+	mu_param = __dphi_calc_mu(combined_snr, delta_t)
+	kappa_param = __dphi_calc_kappa(combined_snr, delta_t)
+	C_param = 1.0 - A_param
+
+	return math.log(A_param*stats.vonmises.pdf(delta_phi, kappa_param, loc=mu_param) + C_param/(2*math.pi))
+
+
+def lnP_dt_signal(dt, snr_ratio):
+	# FIXME Work out correct model
+
+	# Fits below an snr ratio of 0.33 are not reliable
+	if snr_ratio < 0.33:
+		snr_ratio = 0.33
+
+	dt_chebyshev_coeffs[0] = dt_chebyshev_coeffs_polynomials[0](snr_ratio)
+	dt_chebyshev_coeffs[2] = dt_chebyshev_coeffs_polynomials[1](snr_ratio)
+	dt_chebyshev_coeffs[4] = dt_chebyshev_coeffs_polynomials[2](snr_ratio)
+	dt_chebyshev_coeffs[6] = dt_chebyshev_coeffs_polynomials[3](snr_ratio)
+	dt_chebyshev_coeffs[8] = dt_chebyshev_coeffs_polynomials[4](snr_ratio)
+	dt_chebyshev_coeffs[10] = dt_chebyshev_coeffs_polynomials[5](snr_ratio)
+	dt_chebyshev_coeffs[12] = dt_chebyshev_coeffs_polynomials[6](snr_ratio)
+
+	return numpy.polynomial.chebyshev.chebval(dt/0.015013, dt_chebyshev_coeffs) - numpy.log(norm_polynomial(snr_ratio))
+
+def lnP_dt_dphi_uniform_H1L1(coincidence_window_extension):
+	# FIXME Dont hardcode
+	# NOTE This assumes the standard delta t
+	return -math.log((snglcoinc.light_travel_time("H1", "L1") + coincidence_window_extension) * (2. * math.pi))
+
+
+def lnP_dt_dphi_uniform(coincidence_window_extension):
+	# NOTE Currently hardcoded for H1L1
+	# NOTE this is future proofed so that in > 2 ifo scenarios, the
+	# appropriate length can be chosen for the uniform dt distribution
+	return lnP_dt_dphi_uniform_H1L1(coincidence_window_extension)
+
+
+def lnP_dt_dphi_signal(snrs, phase, dt, horizons, coincidence_window_extension):
+	# Return P(dt, dphi|{rho_{IFO}}, signal)
+	# FIXME Insert actual signal models
+	#print dt.keys()
+	#print sorted(dt.keys())
+	if sorted(dt.keys()) == ["H1", "L1"]:
+		#print "I'm in the loop!"
+		delta_t = float(dt["H1"] - dt["L1"])
+		delta_phi = (phase["H1"] - phase["L1"]) % (2*math.pi)
+		combined_snr = math.sqrt(snrs["H1"]**2. + snrs["L1"]**2.)
+		if horizons["H1"] != 0 and horizons["L1"] != 0:
+			# neither are zero, proceed as normal
+			H1_snr_over_horizon = snrs["H1"] / horizons["H1"]
+			L1_snr_over_horizon = snrs["L1"] / horizons["L1"]
+
+		elif horizons["H1"] == horizons["L1"]:
+			# both are zero, treat as equal
+			H1_snr_over_horizon = snrs["H1"]
+			L1_snr_over_horizon = snrs["L1"]
+
+		else:
+			# one of them is zero, treat snr_ratio as 0, which will get changed to 0.33 in lnP_dt_signal
+			# FIXME is this the right thing to do?
+			return lnP_dt_signal(abs(delta_t), 0.33) + lnP_dphi_signal(delta_phi, delta_t, combined_snr)
+
+		if H1_snr_over_horizon > L1_snr_over_horizon:
+			snr_ratio = L1_snr_over_horizon / H1_snr_over_horizon
+
+		else:
+			snr_ratio = H1_snr_over_horizon / L1_snr_over_horizon
+
+		return lnP_dt_signal(abs(delta_t), snr_ratio) + lnP_dphi_signal(delta_phi, delta_t, combined_snr)
+
+	else:
+		# IFOs != {H1,L1} case, thus just return uniform
+		# distribution so that dt/dphi terms dont affect
+		# likelihood ratio
+		# FIXME Work out general N detector case
+		return lnP_dt_dphi_uniform(coincidence_window_extension)
+
+
+
+min_instruments = 2
+# FIXME check
+DELTA_T = 0.015
+
+HORIZONS = {"H1":100., "L1":100.} # FIXME dont hardcode
+# The scale in the scipy exponential distribution described here:
+# https://docs.scipy.org/doc/numpy-1.14.0/reference/generated/numpy.random.exponential.html
+EXPSCALE = 6.
+SNR_THRESH = 4.0
+
+InspiralExtrinsics = inspiral_extrinsics.InspiralExtrinsics(min_instruments)
+SNRPDF = inspiral_extrinsics.SNRPDF.load()
+
+lnPSignalNone = []
+lnPSignalNew = []
+lnPSignalHLV = []
+lnPNoiseNone = []
+lnPNoiseNew = []
+lnPNoiseHLV = []
+
+#
+# Samples from an exponential distribution in SNR
+#
+def sample_noise(n = 10000):
+
+	snrs = {}
+	horizons = {}
+	phases = {}
+	dt = {}
+	cnt = 0
+	while cnt < n:
+		snrs["H1"] = numpy.random.exponential(EXPSCALE)
+		snrs["L1"] = numpy.random.exponential(EXPSCALE)
+		phases["H1"] = numpy.random.rand() * numpy.pi * 2
+		phases["L1"] = numpy.random.rand() * numpy.pi * 2
+		dt["H1"] = 0. # FIXME hardcoded
+		dt["L1"] = numpy.random.rand() * 2 * DELTA_T - DELTA_T # FIXME hardcoded
+		if snrs["H1"] >= SNR_THRESH and snrs["L1"] >= SNR_THRESH:
+			cnt += 1
+			yield snrs, HORIZONS, phases, dt
+
+def sample_signal(n = 10000, DH = lal.CachedDetectors[lal.LHO_4K_DETECTOR].response, DL = lal.CachedDetectors[lal.LLO_4K_DETECTOR].response, DV = lal.CachedDetectors[lal.VIRGO_DETECTOR].response, XH = lal.CachedDetectors[lal.LHO_4K_DETECTOR].location, XL = lal.CachedDetectors[lal.LLO_4K_DETECTOR].location, XV = lal.CachedDetectors[lal.VIRGO_DETECTOR].location, epoch = lal.LIGOTimeGPS(0), gmst = lal.GreenwichMeanSiderealTime(lal.LIGOTimeGPS(0))):
+
+	i = 0
+	snrs = {}
+	horizons = {}
+	phases = {}
+	dt = {}
+	while i != n:
+
+		D = numpy.random.power(3) * max(HORIZONS.values())
+		cosiota = numpy.random.uniform(-1.0,1.0)
+		ra = numpy.random.uniform(0.0,2.0*numpy.pi)
+		dec = numpy.arcsin(numpy.random.uniform(-1.0,1.0))
+		psi = numpy.random.uniform(0.0,2.0*numpy.pi)
+		hplus = 0.5 * (1.0 + cosiota**2)
+		hcross = cosiota
+		FplusH, FcrossH = lal.ComputeDetAMResponse(DH, ra, dec, psi, gmst)
+		FplusL, FcrossL = lal.ComputeDetAMResponse(DL, ra, dec, psi, gmst) 
+
+		dt["H1"] = lal.TimeDelayFromEarthCenter(XH, ra, dec, epoch)
+		dt["L1"] = lal.TimeDelayFromEarthCenter(XL, ra, dec, epoch)
+		# normalize with respect to H
+		dt["L1"] -= dt["H1"]
+		dt["H1"] = 0.0
+
+		# FINDCHIRP Eq. (3.3b)
+		phases["H1"] = - numpy.arctan2(FcrossH * hcross, FplusH * hplus)
+		phases["L1"] = - numpy.arctan2(FcrossL * hcross, FplusL * hplus)
+
+		# FINDCHIRP Eq. (3.3c)
+		DeffH = D / ((FplusH * hplus)**2 + (FcrossH * hcross)**2)**0.5
+		DeffL = D / ((FplusL * hplus)**2 + (FcrossL * hcross)**2)**0.5
+		snrs["H1"] = HORIZONS["H1"] / DeffH * 8
+		snrs["L1"] = HORIZONS["L1"] / DeffL * 8
+
+		if snrs["H1"] >= SNR_THRESH and snrs["L1"] >= SNR_THRESH:
+			i += 1
+			yield snrs, HORIZONS, phases, dt
+
+# NOTE enable this as a sanity check to sample signal and noise from same distribution
+#def sample_signal(n = 10000):
+#	return sample_noise(n)
+
+for snrs, horizons, phase, dt in sample_signal():
+
+	# See https://docs.scipy.org/doc/numpy-1.13.0/reference/generated/numpy.random.exponential.html#numpy.random.exponential
+	# This serves as the noise SNR model that is subtracted from the signal model
+	backgroundlnP = math.log(1. / EXPSCALE * numpy.exp(-snrs["H1"] / EXPSCALE)) + math.log(1. / EXPSCALE * numpy.exp(-snrs["L1"] / EXPSCALE))
+
+	lnPSignalNone.append(SNRPDF.lnP_instruments(snrs.keys(), horizons, min_instruments) + SNRPDF.lnP_snrs(snrs, horizons, min_instruments) + lnP_dt_dphi_uniform(DELTA_T) - backgroundlnP)
+
+	lnPSignalNew.append(SNRPDF.lnP_instruments(snrs.keys(), horizons, min_instruments) + SNRPDF.lnP_snrs(snrs, horizons, min_instruments) + lnP_dt_dphi_signal(snrs, phase, dt, horizons, DELTA_T) - backgroundlnP)
+
+	lnPSignalHLV.append(math.log(InspiralExtrinsics.p_of_instruments_given_horizons(snrs.keys(), horizons)) + math.log(InspiralExtrinsics.time_phase_snr(dt, phase, snrs, horizons)) - backgroundlnP)
+
+for snrs, horizons, phase, dt in sample_noise():
+
+	# See https://docs.scipy.org/doc/numpy-1.13.0/reference/generated/numpy.random.exponential.html#numpy.random.exponential
+	# This serves as the noise SNR model that is subtracted from the signal model
+	backgroundlnP = math.log(1. / EXPSCALE * numpy.exp(-snrs["H1"] / EXPSCALE)) + math.log(1. / EXPSCALE * numpy.exp(-snrs["L1"] / EXPSCALE))
+
+	lnPNoiseNone.append(SNRPDF.lnP_instruments(snrs.keys(), horizons, min_instruments) + SNRPDF.lnP_snrs(snrs, horizons, min_instruments) + lnP_dt_dphi_uniform(DELTA_T) - backgroundlnP)
+
+	lnPNoiseNew.append(SNRPDF.lnP_instruments(snrs.keys(), horizons, min_instruments) + SNRPDF.lnP_snrs(snrs, horizons, min_instruments) + lnP_dt_dphi_signal(snrs, phase, dt, horizons, DELTA_T) - backgroundlnP)
+
+	lnPNoiseHLV.append(math.log(InspiralExtrinsics.p_of_instruments_given_horizons(snrs.keys(), horizons)) + math.log(InspiralExtrinsics.time_phase_snr(dt, phase, snrs, horizons)) - backgroundlnP)
+
+#
+# Histogram the values
+#
+
+statvec = numpy.arange(-15, 50)
+pyplot.figure()
+ax = pyplot.subplot(221)
+pyplot.hist(lnPNoiseNew, statvec)
+ax.set_yscale("log")
+pyplot.xlabel("HL Stat Value Noise")
+pyplot.ylabel("Count")
+ax = pyplot.subplot(222)
+pyplot.hist(lnPSignalNew, statvec)
+ax.set_yscale("log")
+pyplot.xlabel("HL Stat Value Signal")
+pyplot.ylabel("Count")
+ax = pyplot.subplot(223)
+pyplot.hist(lnPNoiseNone, statvec)
+ax.set_yscale("log")
+pyplot.xlabel("Nodtdphi Stat Value Noise")
+pyplot.ylabel("Count")
+ax = pyplot.subplot(224)
+pyplot.hist(lnPSignalNone, statvec)
+ax.set_yscale("log")
+pyplot.xlabel("Nodtdphi Stat Value Signal")
+pyplot.ylabel("Count")
+pyplot.savefig('None_vs_HL_hist.png')
+
+statvec = numpy.arange(-15, 50)
+pyplot.figure()
+ax = pyplot.subplot(221)
+pyplot.hist(lnPNoiseHLV, statvec)
+ax.set_yscale("log")
+pyplot.xlabel("HLV Stat Value Noise")
+pyplot.ylabel("Count")
+ax = pyplot.subplot(222)
+pyplot.hist(lnPSignalHLV, statvec)
+ax.set_yscale("log")
+pyplot.xlabel("HLV Stat Value Signal")
+pyplot.ylabel("Count")
+ax = pyplot.subplot(223)
+pyplot.hist(lnPNoiseNew, statvec)
+ax.set_yscale("log")
+pyplot.xlabel("HL Stat Value Noise")
+pyplot.ylabel("Count")
+ax = pyplot.subplot(224)
+pyplot.hist(lnPSignalNew, statvec)
+ax.set_yscale("log")
+pyplot.xlabel("HL Stat Value Signal")
+pyplot.ylabel("Count")
+pyplot.savefig('HL_vs_HLV_hist.png')
+
+
+#
+# Make a ROC plot
+#
+
+minlnP = min(lnPSignalNone + lnPSignalNew + lnPSignalHLV + lnPNoiseNone + lnPNoiseNew + lnPNoiseHLV)
+maxlnP = max(lnPSignalNone + lnPSignalNew + lnPSignalHLV + lnPNoiseNone + lnPNoiseNew + lnPNoiseHLV)
+
+lnPs = numpy.linspace(minlnP, maxlnP, 1000)
+
+def ROC(s, data):
+	data.sort()
+	# Reverse sorted because we are counting number above a threshold
+	y = (numpy.arange(len(data)) / float(len(data)))[::-1]
+	f = interp1d(data, y, bounds_error = False)
+	return f(s)
+
+xnew = ROC(lnPs, lnPNoiseNew)
+ynew = ROC(lnPs, lnPSignalNew)
+xnone = ROC(lnPs, lnPNoiseNone)
+ynone = ROC(lnPs, lnPSignalNone)
+xhlv = ROC(lnPs, lnPNoiseHLV)
+yhlv = ROC(lnPs, lnPSignalHLV)
+
+pyplot.figure()
+pyplot.plot(xhlv, yhlv, label = "O3 Code")
+pyplot.plot(xnew, ynew, label = "O2 Code")
+pyplot.plot(xnone, ynone, label = "No dtdphi")
+pyplot.xlabel("Fraction above threshold in noise")
+pyplot.ylabel("Fraction above threshold in signal")
+pyplot.plot(numpy.arange(100)/100., numpy.arange(100)/100., "k")
+pyplot.legend(loc = "lower right")
+pyplot.grid()
+pyplot.savefig('O2_O3_LR_ROC.png')