From d864df9667aac85ca7d8acd03eb588eae1615eb5 Mon Sep 17 00:00:00 2001 From: Chad Hanna <chad.hanna@ligo.org> Date: Wed, 15 Aug 2018 20:51:42 -0400 Subject: [PATCH] add a script to make ROC curves for the HLV code --- gstlal-inspiral/tests/lv_stat_test | 340 +++++++++++++++++++++++++++++ 1 file changed, 340 insertions(+) create mode 100755 gstlal-inspiral/tests/lv_stat_test diff --git a/gstlal-inspiral/tests/lv_stat_test b/gstlal-inspiral/tests/lv_stat_test new file mode 100755 index 0000000000..305d090e89 --- /dev/null +++ b/gstlal-inspiral/tests/lv_stat_test @@ -0,0 +1,340 @@ +#!/usr/bin/python + +from gstlal.stats import inspiral_extrinsics +import numpy +import math +from scipy.interpolate import interp1d +import lal +from lalburst import snglcoinc +from scipy import stats + +import matplotlib +matplotlib.use('Agg') +from matplotlib import pyplot + +# +# ============================================================================= +# +# NOTE OLD CODE! dt dphi PDF +# +# ============================================================================= +# + + +dt_chebyshev_coeffs_polynomials = [] +dt_chebyshev_coeffs_polynomials.append(numpy.poly1d([-597.94227757949329, 2132.773853473127, -2944.126306979932, 1945.3033368083029, -603.91576991927593, 70.322754873993347])) +dt_chebyshev_coeffs_polynomials.append(numpy.poly1d([-187.50681052710425, 695.84172327044325, -1021.0688423797938, 744.3266490236075, -272.12853221391498, 35.542404632554508])) +dt_chebyshev_coeffs_polynomials.append(numpy.poly1d([52.128579054466599, -198.32054234352267, 301.34865541080791, -230.8522943433488, 90.780611645135437, -16.310130528927655])) +dt_chebyshev_coeffs_polynomials.append(numpy.poly1d([48.216566165393878, -171.70632176976451, 238.48370471322843, -159.65005032451785, 50.122296925755677, -5.5466740894321367])) +dt_chebyshev_coeffs_polynomials.append(numpy.poly1d([-34.030336093450863, 121.44714070928059, -169.91439486329773, 115.86873916702386, -38.08411813067778, 4.7396784315927816])) +dt_chebyshev_coeffs_polynomials.append(numpy.poly1d([3.4576823675810178, -12.362609260376738, 17.3300203922424, -11.830868787176165, 3.900284020272442, -0.47157315012399248])) +dt_chebyshev_coeffs_polynomials.append(numpy.poly1d([4.0423239651315113, -14.492611904657275, 20.847419746265583, -15.033846689362553, 5.3953159232942216, -0.78132676885883601])) +norm_polynomial = numpy.poly1d([-348550.84040194791, 2288151.9147818103, -6623881.5646601757, 11116243.157047395, -11958335.1384027, 8606013.1361163966, -4193136.6690072878, 1365634.0450674745, -284615.52077054407, 34296.855844416605, -1815.7135263788341]) + +dt_chebyshev_coeffs = [0]*13 + + +def __dphi_calc_A(combined_snr, delta_t): + B = -10.840765 * numpy.abs(delta_t) + 1.072866 + M = 46.403738 * numpy.abs(delta_t) - 0.160205 + nu = 0.009032 * numpy.abs(delta_t) + 0.001150 + return (1.0 / (1.0 + math.exp(-B*(combined_snr - M)))**(1.0/nu)) + + +def __dphi_calc_mu(combined_snr, delta_t): + if delta_t >= 0.0: + A = 76.234617*delta_t + 0.001639 + B = 0.290863 + C = 4.775688 + return (3.145953 - A* math.exp(-B*(combined_snr-C))) + elif delta_t < 0.0: + A = -130.877663*delta_t - 0.002814 + B = 0.31023 + C = 3.184671 + return (3.145953 + A* math.exp(-B*(combined_snr-C))) + + +def __dphi_calc_kappa(combined_snr, delta_t): + K = -176.540199*numpy.abs(delta_t) + 7.4387 + B = -7.599585*numpy.abs(delta_t) + 0.215074 + M = -1331.386835*numpy.abs(delta_t) - 35.309173 + nu = 0.012225*numpy.abs(delta_t) + 0.000066 + return (K / (1.0 + math.exp(-B*(combined_snr - M)))**(1.0/nu)) + + +def lnP_dphi_signal(delta_phi, delta_t, combined_snr): + # Compute von mises parameters + A_param = __dphi_calc_A(combined_snr, delta_t) + mu_param = __dphi_calc_mu(combined_snr, delta_t) + kappa_param = __dphi_calc_kappa(combined_snr, delta_t) + C_param = 1.0 - A_param + + return math.log(A_param*stats.vonmises.pdf(delta_phi, kappa_param, loc=mu_param) + C_param/(2*math.pi)) + + +def lnP_dt_signal(dt, snr_ratio): + # FIXME Work out correct model + + # Fits below an snr ratio of 0.33 are not reliable + if snr_ratio < 0.33: + snr_ratio = 0.33 + + dt_chebyshev_coeffs[0] = dt_chebyshev_coeffs_polynomials[0](snr_ratio) + dt_chebyshev_coeffs[2] = dt_chebyshev_coeffs_polynomials[1](snr_ratio) + dt_chebyshev_coeffs[4] = dt_chebyshev_coeffs_polynomials[2](snr_ratio) + dt_chebyshev_coeffs[6] = dt_chebyshev_coeffs_polynomials[3](snr_ratio) + dt_chebyshev_coeffs[8] = dt_chebyshev_coeffs_polynomials[4](snr_ratio) + dt_chebyshev_coeffs[10] = dt_chebyshev_coeffs_polynomials[5](snr_ratio) + dt_chebyshev_coeffs[12] = dt_chebyshev_coeffs_polynomials[6](snr_ratio) + + return numpy.polynomial.chebyshev.chebval(dt/0.015013, dt_chebyshev_coeffs) - numpy.log(norm_polynomial(snr_ratio)) + +def lnP_dt_dphi_uniform_H1L1(coincidence_window_extension): + # FIXME Dont hardcode + # NOTE This assumes the standard delta t + return -math.log((snglcoinc.light_travel_time("H1", "L1") + coincidence_window_extension) * (2. * math.pi)) + + +def lnP_dt_dphi_uniform(coincidence_window_extension): + # NOTE Currently hardcoded for H1L1 + # NOTE this is future proofed so that in > 2 ifo scenarios, the + # appropriate length can be chosen for the uniform dt distribution + return lnP_dt_dphi_uniform_H1L1(coincidence_window_extension) + + +def lnP_dt_dphi_signal(snrs, phase, dt, horizons, coincidence_window_extension): + # Return P(dt, dphi|{rho_{IFO}}, signal) + # FIXME Insert actual signal models + #print dt.keys() + #print sorted(dt.keys()) + if sorted(dt.keys()) == ["H1", "L1"]: + #print "I'm in the loop!" + delta_t = float(dt["H1"] - dt["L1"]) + delta_phi = (phase["H1"] - phase["L1"]) % (2*math.pi) + combined_snr = math.sqrt(snrs["H1"]**2. + snrs["L1"]**2.) + if horizons["H1"] != 0 and horizons["L1"] != 0: + # neither are zero, proceed as normal + H1_snr_over_horizon = snrs["H1"] / horizons["H1"] + L1_snr_over_horizon = snrs["L1"] / horizons["L1"] + + elif horizons["H1"] == horizons["L1"]: + # both are zero, treat as equal + H1_snr_over_horizon = snrs["H1"] + L1_snr_over_horizon = snrs["L1"] + + else: + # one of them is zero, treat snr_ratio as 0, which will get changed to 0.33 in lnP_dt_signal + # FIXME is this the right thing to do? + return lnP_dt_signal(abs(delta_t), 0.33) + lnP_dphi_signal(delta_phi, delta_t, combined_snr) + + if H1_snr_over_horizon > L1_snr_over_horizon: + snr_ratio = L1_snr_over_horizon / H1_snr_over_horizon + + else: + snr_ratio = H1_snr_over_horizon / L1_snr_over_horizon + + return lnP_dt_signal(abs(delta_t), snr_ratio) + lnP_dphi_signal(delta_phi, delta_t, combined_snr) + + else: + # IFOs != {H1,L1} case, thus just return uniform + # distribution so that dt/dphi terms dont affect + # likelihood ratio + # FIXME Work out general N detector case + return lnP_dt_dphi_uniform(coincidence_window_extension) + + + +min_instruments = 2 +# FIXME check +DELTA_T = 0.015 + +HORIZONS = {"H1":100., "L1":100.} # FIXME dont hardcode +# The scale in the scipy exponential distribution described here: +# https://docs.scipy.org/doc/numpy-1.14.0/reference/generated/numpy.random.exponential.html +EXPSCALE = 6. +SNR_THRESH = 4.0 + +InspiralExtrinsics = inspiral_extrinsics.InspiralExtrinsics(min_instruments) +SNRPDF = inspiral_extrinsics.SNRPDF.load() + +lnPSignalNone = [] +lnPSignalNew = [] +lnPSignalHLV = [] +lnPNoiseNone = [] +lnPNoiseNew = [] +lnPNoiseHLV = [] + +# +# Samples from an exponential distribution in SNR +# +def sample_noise(n = 10000): + + snrs = {} + horizons = {} + phases = {} + dt = {} + cnt = 0 + while cnt < n: + snrs["H1"] = numpy.random.exponential(EXPSCALE) + snrs["L1"] = numpy.random.exponential(EXPSCALE) + phases["H1"] = numpy.random.rand() * numpy.pi * 2 + phases["L1"] = numpy.random.rand() * numpy.pi * 2 + dt["H1"] = 0. # FIXME hardcoded + dt["L1"] = numpy.random.rand() * 2 * DELTA_T - DELTA_T # FIXME hardcoded + if snrs["H1"] >= SNR_THRESH and snrs["L1"] >= SNR_THRESH: + cnt += 1 + yield snrs, HORIZONS, phases, dt + +def sample_signal(n = 10000, DH = lal.CachedDetectors[lal.LHO_4K_DETECTOR].response, DL = lal.CachedDetectors[lal.LLO_4K_DETECTOR].response, DV = lal.CachedDetectors[lal.VIRGO_DETECTOR].response, XH = lal.CachedDetectors[lal.LHO_4K_DETECTOR].location, XL = lal.CachedDetectors[lal.LLO_4K_DETECTOR].location, XV = lal.CachedDetectors[lal.VIRGO_DETECTOR].location, epoch = lal.LIGOTimeGPS(0), gmst = lal.GreenwichMeanSiderealTime(lal.LIGOTimeGPS(0))): + + i = 0 + snrs = {} + horizons = {} + phases = {} + dt = {} + while i != n: + + D = numpy.random.power(3) * max(HORIZONS.values()) + cosiota = numpy.random.uniform(-1.0,1.0) + ra = numpy.random.uniform(0.0,2.0*numpy.pi) + dec = numpy.arcsin(numpy.random.uniform(-1.0,1.0)) + psi = numpy.random.uniform(0.0,2.0*numpy.pi) + hplus = 0.5 * (1.0 + cosiota**2) + hcross = cosiota + FplusH, FcrossH = lal.ComputeDetAMResponse(DH, ra, dec, psi, gmst) + FplusL, FcrossL = lal.ComputeDetAMResponse(DL, ra, dec, psi, gmst) + + dt["H1"] = lal.TimeDelayFromEarthCenter(XH, ra, dec, epoch) + dt["L1"] = lal.TimeDelayFromEarthCenter(XL, ra, dec, epoch) + # normalize with respect to H + dt["L1"] -= dt["H1"] + dt["H1"] = 0.0 + + # FINDCHIRP Eq. (3.3b) + phases["H1"] = - numpy.arctan2(FcrossH * hcross, FplusH * hplus) + phases["L1"] = - numpy.arctan2(FcrossL * hcross, FplusL * hplus) + + # FINDCHIRP Eq. (3.3c) + DeffH = D / ((FplusH * hplus)**2 + (FcrossH * hcross)**2)**0.5 + DeffL = D / ((FplusL * hplus)**2 + (FcrossL * hcross)**2)**0.5 + snrs["H1"] = HORIZONS["H1"] / DeffH * 8 + snrs["L1"] = HORIZONS["L1"] / DeffL * 8 + + if snrs["H1"] >= SNR_THRESH and snrs["L1"] >= SNR_THRESH: + i += 1 + yield snrs, HORIZONS, phases, dt + +# NOTE enable this as a sanity check to sample signal and noise from same distribution +#def sample_signal(n = 10000): +# return sample_noise(n) + +for snrs, horizons, phase, dt in sample_signal(): + + # See https://docs.scipy.org/doc/numpy-1.13.0/reference/generated/numpy.random.exponential.html#numpy.random.exponential + # This serves as the noise SNR model that is subtracted from the signal model + backgroundlnP = math.log(1. / EXPSCALE * numpy.exp(-snrs["H1"] / EXPSCALE)) + math.log(1. / EXPSCALE * numpy.exp(-snrs["L1"] / EXPSCALE)) + + lnPSignalNone.append(SNRPDF.lnP_instruments(snrs.keys(), horizons, min_instruments) + SNRPDF.lnP_snrs(snrs, horizons, min_instruments) + lnP_dt_dphi_uniform(DELTA_T) - backgroundlnP) + + lnPSignalNew.append(SNRPDF.lnP_instruments(snrs.keys(), horizons, min_instruments) + SNRPDF.lnP_snrs(snrs, horizons, min_instruments) + lnP_dt_dphi_signal(snrs, phase, dt, horizons, DELTA_T) - backgroundlnP) + + lnPSignalHLV.append(math.log(InspiralExtrinsics.p_of_instruments_given_horizons(snrs.keys(), horizons)) + math.log(InspiralExtrinsics.time_phase_snr(dt, phase, snrs, horizons)) - backgroundlnP) + +for snrs, horizons, phase, dt in sample_noise(): + + # See https://docs.scipy.org/doc/numpy-1.13.0/reference/generated/numpy.random.exponential.html#numpy.random.exponential + # This serves as the noise SNR model that is subtracted from the signal model + backgroundlnP = math.log(1. / EXPSCALE * numpy.exp(-snrs["H1"] / EXPSCALE)) + math.log(1. / EXPSCALE * numpy.exp(-snrs["L1"] / EXPSCALE)) + + lnPNoiseNone.append(SNRPDF.lnP_instruments(snrs.keys(), horizons, min_instruments) + SNRPDF.lnP_snrs(snrs, horizons, min_instruments) + lnP_dt_dphi_uniform(DELTA_T) - backgroundlnP) + + lnPNoiseNew.append(SNRPDF.lnP_instruments(snrs.keys(), horizons, min_instruments) + SNRPDF.lnP_snrs(snrs, horizons, min_instruments) + lnP_dt_dphi_signal(snrs, phase, dt, horizons, DELTA_T) - backgroundlnP) + + lnPNoiseHLV.append(math.log(InspiralExtrinsics.p_of_instruments_given_horizons(snrs.keys(), horizons)) + math.log(InspiralExtrinsics.time_phase_snr(dt, phase, snrs, horizons)) - backgroundlnP) + +# +# Histogram the values +# + +statvec = numpy.arange(-15, 50) +pyplot.figure() +ax = pyplot.subplot(221) +pyplot.hist(lnPNoiseNew, statvec) +ax.set_yscale("log") +pyplot.xlabel("HL Stat Value Noise") +pyplot.ylabel("Count") +ax = pyplot.subplot(222) +pyplot.hist(lnPSignalNew, statvec) +ax.set_yscale("log") +pyplot.xlabel("HL Stat Value Signal") +pyplot.ylabel("Count") +ax = pyplot.subplot(223) +pyplot.hist(lnPNoiseNone, statvec) +ax.set_yscale("log") +pyplot.xlabel("Nodtdphi Stat Value Noise") +pyplot.ylabel("Count") +ax = pyplot.subplot(224) +pyplot.hist(lnPSignalNone, statvec) +ax.set_yscale("log") +pyplot.xlabel("Nodtdphi Stat Value Signal") +pyplot.ylabel("Count") +pyplot.savefig('None_vs_HL_hist.png') + +statvec = numpy.arange(-15, 50) +pyplot.figure() +ax = pyplot.subplot(221) +pyplot.hist(lnPNoiseHLV, statvec) +ax.set_yscale("log") +pyplot.xlabel("HLV Stat Value Noise") +pyplot.ylabel("Count") +ax = pyplot.subplot(222) +pyplot.hist(lnPSignalHLV, statvec) +ax.set_yscale("log") +pyplot.xlabel("HLV Stat Value Signal") +pyplot.ylabel("Count") +ax = pyplot.subplot(223) +pyplot.hist(lnPNoiseNew, statvec) +ax.set_yscale("log") +pyplot.xlabel("HL Stat Value Noise") +pyplot.ylabel("Count") +ax = pyplot.subplot(224) +pyplot.hist(lnPSignalNew, statvec) +ax.set_yscale("log") +pyplot.xlabel("HL Stat Value Signal") +pyplot.ylabel("Count") +pyplot.savefig('HL_vs_HLV_hist.png') + + +# +# Make a ROC plot +# + +minlnP = min(lnPSignalNone + lnPSignalNew + lnPSignalHLV + lnPNoiseNone + lnPNoiseNew + lnPNoiseHLV) +maxlnP = max(lnPSignalNone + lnPSignalNew + lnPSignalHLV + lnPNoiseNone + lnPNoiseNew + lnPNoiseHLV) + +lnPs = numpy.linspace(minlnP, maxlnP, 1000) + +def ROC(s, data): + data.sort() + # Reverse sorted because we are counting number above a threshold + y = (numpy.arange(len(data)) / float(len(data)))[::-1] + f = interp1d(data, y, bounds_error = False) + return f(s) + +xnew = ROC(lnPs, lnPNoiseNew) +ynew = ROC(lnPs, lnPSignalNew) +xnone = ROC(lnPs, lnPNoiseNone) +ynone = ROC(lnPs, lnPSignalNone) +xhlv = ROC(lnPs, lnPNoiseHLV) +yhlv = ROC(lnPs, lnPSignalHLV) + +pyplot.figure() +pyplot.plot(xhlv, yhlv, label = "O3 Code") +pyplot.plot(xnew, ynew, label = "O2 Code") +pyplot.plot(xnone, ynone, label = "No dtdphi") +pyplot.xlabel("Fraction above threshold in noise") +pyplot.ylabel("Fraction above threshold in signal") +pyplot.plot(numpy.arange(100)/100., numpy.arange(100)/100., "k") +pyplot.legend(loc = "lower right") +pyplot.grid() +pyplot.savefig('O2_O3_LR_ROC.png') -- GitLab