From 63acb3f2611905c7ae966af0fc5e885b6a9f85b3 Mon Sep 17 00:00:00 2001 From: Shasvath Jagat Kapadia <shasvath.kapadia@submit.nemo.uwm.edu> Date: Tue, 28 May 2019 14:36:08 -0500 Subject: [PATCH] Added p_astro module, modified Makefile.am --- gstlal-inspiral/python/Makefile.am | 3 +- gstlal-inspiral/python/p_astro_gstlal.py | 414 +++++++++++++++++++++++ 2 files changed, 416 insertions(+), 1 deletion(-) create mode 100644 gstlal-inspiral/python/p_astro_gstlal.py diff --git a/gstlal-inspiral/python/Makefile.am b/gstlal-inspiral/python/Makefile.am index 932f339434..8786069886 100644 --- a/gstlal-inspiral/python/Makefile.am +++ b/gstlal-inspiral/python/Makefile.am @@ -35,7 +35,8 @@ pkgpython_PYTHON = \ svd_bank.py \ svd_bank_snr.py \ templates.py \ - webpage.py + webpage.py \ + p_astro_gstlal.py pkgpyexec_LTLIBRARIES = _rate_estimation.la _snglinspiraltable.la _spawaveform.la diff --git a/gstlal-inspiral/python/p_astro_gstlal.py b/gstlal-inspiral/python/p_astro_gstlal.py new file mode 100644 index 0000000000..036ca4ee48 --- /dev/null +++ b/gstlal-inspiral/python/p_astro_gstlal.py @@ -0,0 +1,414 @@ +import numpy as np +from ligo.lw import ligolw +from ligo.lw.ligolw import LIGOLWContentHandler +from ligo.lw import array as ligolw_array +from ligo.lw import param as ligolw_param +from ligo.lw import utils as ligolw_utils +from ligo.lw import lsctables +from ligo.segments import utils as segments_utils +from lal import rate + +def p_astro_update(category, event_bayesfac_dict, mean_values_dict): + """ + Compute `p_astro` for a new event using mean values of Poisson expected + counts constructed from all the previous events. Invoked with every new + GraceDB entry. + + Parameters + ---------- + category : string + source category + event_bayesfac_dict : dictionary + event Bayes factors + mean_values_dict : dictionary + mean values of Poisson counts + + Returns + ------- + p_astro : float + p_astro by source category + """ + if category == "counts_Terrestrial": + numerator = mean_values_dict["counts_Terrestrial"] + else: + numerator = \ + event_bayesfac_dict[category] * mean_values_dict[category] + + denominator = mean_values_dict["counts_Terrestrial"] + \ + np.sum([mean_values_dict[key] * event_bayesfac_dict[key] + for key in event_bayesfac_dict.keys()]) + + return numerator / denominator + + +def evaluate_p_astro_from_bayesfac(astro_bayesfac, + mean_values_dict, + mass1, + mass2, + spin1z=None, + spin2z=None, + num_bins=None, + activation_counts=None): + """ + Evaluates `p_astro` for a new event using Bayes factor, masses, and number + of astrophysical categories. Invoked with every new GraceDB entry. + + Parameters + ---------- + astro_bayesfac : float + astrophysical Bayes factor + mean_values_dict: dictionary + mean values of Poisson counts + mass1 : float + event mass1 + mass2 : float + event mass2 + spin1z : float + event spin1z + spin2z : float + event spin2z + url_weights_key: str + url config key pointing to weights file + + Returns + ------- + p_astro : dictionary + p_astro for all source categories + """ + + a_hat_bns, a_hat_bbh, a_hat_nsbh, a_hat_mg, num_bins = \ + make_weights_from_histograms(mass1, + mass2, + spin1z, + spin2z, + num_bins, + activation_counts) + + # Compute category-wise Bayes factors + # from astrophysical Bayes factor + rescaled_fb = num_bins * astro_bayesfac + bns_bayesfac = a_hat_bns * rescaled_fb + nsbh_bayesfac = a_hat_nsbh * rescaled_fb + bbh_bayesfac = a_hat_bbh * rescaled_fb + mg_bayesfac = a_hat_mg * rescaled_fb + + # Construct category-wise Bayes factor dictionary + event_bayesfac_dict = {"counts_BNS": bns_bayesfac, + "counts_NSBH": nsbh_bayesfac, + "counts_BBH": bbh_bayesfac, + "counts_MassGap": mg_bayesfac} + + # Compute the p-astro values for each source category + # using the mean values + p_astro_values = {} + for category in mean_values_dict: + p_astro_values[category.split("_")[1]] = \ + p_astro_update(category=category, + event_bayesfac_dict=event_bayesfac_dict, + mean_values_dict=mean_values_dict) + + return p_astro_values + + +def make_weights_from_hardcuts(mass1, mass2): + """ + Construct binary weights from component masses based on cuts in component + mass space that define astrophysical source categories. To be used for + MBTA, PyCBC and SPIIR. + + Parameters + ---------- + mass1 : float + heavier component mass of the event + mass2 : float + lighter component mass of the event + + Returns + ------- + a_bns, a_bbh, a_nshb, a_mg : floats + binary weights (i.e, 1 or 0) + """ + + a_hat_bns = int(mass1 <= 3 and mass2 <= 3) + a_hat_bbh = int(mass1 > 5 and mass2 > 5) + a_hat_nsbh = int(min(mass1, mass2) <= 3 and + max(mass1, mass2) > 5) + a_hat_mg = int(3 < mass1 <= 5 or 3 < mass2 <= 5) + num_bins = 4 + + return a_hat_bns, a_hat_bbh, a_hat_nsbh, a_hat_mg, num_bins + + +def closest_template(params, params_array): + """ + Associate event's template to a template in the template bank. The assumed + bank is the one used by Gstlal. Hence, for Gstlal events, the association + should be exact, up to rounding errors. + + Parameters + ---------- + params : tuple of floats + intrinsic params of event template + params_array: array of arrays + array of template bank's template params + + Returns + ------- + idx : int + index pertaining to params_array + for matching template + """ + idx = np.argmin(np.sum((params_array-params)**2,axis=1)) + + return idx + + +def make_weights_from_histograms(mass1, + mass2, + spin1z, + spin2z, + num_bins=None, + activation_counts=None): + """ + Construct binary weights from bin number provided by GstLAL, and a weights + matrix pre-constructed and stored in a file, to be read from a url. The + weights are keyed on template parameters of Gstlal's template bank. If that + doesn't work, construct binary weights. + + Parameters + ---------- + mass1 : float + heavier component mass of the event + mass2 : float + lighter component mass of the event + spin1z : float + z component spin of heavier mass + spin2z : float + z component spin of lighter mass + num_bins : int + number of bins for template weighting + activation_counts : pandas dataframe + data frame for template weights + + Returns + ------- + a_hat_bns, a_hat_bbh, a_hat_nsbh, a_hat_mg : floats + mass-based template weights + """ + + if activation_counts is None or num_bins is None: + a_hat_bns, a_hat_bbh, a_hat_nsbh, a_hat_mg, num_bins = \ + make_weights_from_hardcuts(mass1, mass2) + else: + params = (mass1, mass2, spin1z, spin2z) + params_names = ['mass1', 'mass2', 'spin1z', 'spin2z'] + params_array = \ + np.array([activation_counts[key][:] for key in params_names]).T + idx = closest_template(params, params_array) + a_hat_bns = activation_counts['bns'][:][idx] + a_hat_mg = activation_counts['mg'][:][idx] + a_hat_nsbh = activation_counts['nsbh'][:][idx] + a_hat_bbh = activation_counts['bbh'][:][idx] + + return a_hat_bns, a_hat_bbh, a_hat_nsbh, a_hat_mg, num_bins + + +def _get_event_ln_likelihood_ratio_svd_endtime_mass(xmldoc): + """ + Task to acquire event parameters + + Parameters + ---------- + xmldoc : lsctables object + Object from which event parameters can be extracted + + Returns + ------- + event parameters : tuple + Tuple of event parameters: lnl, component masses, snr, far + """ + + coinc_event, = lsctables.CoincTable.get_table(xmldoc) + sngl_inspiral = lsctables.SnglInspiralTable.get_table(xmldoc) + coinc_inspiral = lsctables.CoincInspiralTable.get_table(xmldoc) + + return (coinc_event.likelihood, + sngl_inspiral[0].mass1, + sngl_inspiral[0].mass2, + coinc_inspiral.snr, + coinc_inspiral.combined_far) + + +def _get_ln_f_over_b(rankingstatpdf, + ln_likelihood_ratios, + livetime): + + # affect the zeroing of the PDFs below threshold by hacking the + # histograms. Do the indexing ourselves to not 0 the bin @ threshold + noise_lr_lnpdf = rankingstatpdf.noise_lr_lnpdf + signal_lr_lnpdf = rankingstatpdf.signal_lr_lnpdf + zero_lag_lr_lnpdf = rankingstatpdf.zero_lag_lr_lnpdf + ssorted = zero_lag_lr_lnpdf.array.cumsum()[-1] - 10000 + idx = zero_lag_lr_lnpdf.array.cumsum().searchsorted(ssorted) + + ln_likelihood_ratio_threshold = \ + zero_lag_lr_lnpdf.bins[0].lower()[idx] + + # Compute FAR for threshold, and estimate + # terrestrial count consistent with threshold + fapfar = \ + FAPFAR(rankingstatpdf.new_with_extinction()) + far_threshold = fapfar.far_from_rank(ln_likelihood_ratio_threshold) + lam_0 = far_threshold*livetime + rankingstatpdf.noise_lr_lnpdf.array[ + :noise_lr_lnpdf.bins[0][ln_likelihood_ratio_threshold]] \ + = 0. + rankingstatpdf.noise_lr_lnpdf.normalize() + + rankingstatpdf.signal_lr_lnpdf.array[ + :signal_lr_lnpdf.bins[0][ln_likelihood_ratio_threshold]] \ + = 0. + rankingstatpdf.signal_lr_lnpdf.normalize() + + rankingstatpdf.zero_lag_lr_lnpdf.array[ + :zero_lag_lr_lnpdf.bins[0][ln_likelihood_ratio_threshold]] \ + = 0. + rankingstatpdf.zero_lag_lr_lnpdf.normalize() + + f = rankingstatpdf.signal_lr_lnpdf + b = rankingstatpdf.noise_lr_lnpdf + ln_f_over_b = \ + np.array([f[ln_lr, ] - b[ln_lr, ] for ln_lr in ln_likelihood_ratios]) + if np.isnan(ln_f_over_b).any(): + raise ValueError("NaN encountered in ranking statistic PDF ratios") + if np.isinf(np.exp(ln_f_over_b)).any(): + raise ValueError( + "infinity encountered in ranking statistic PDF ratios") + return ln_f_over_b, lam_0 + + +def compute_p_astro(event_ln_likelihood_ratio, + event_mass1, + event_mass2, + snr, + far, + livetime, + mean_values_dict): + """ + Task to compute `p_astro` by source category. + + Parameters + ---------- + event_ln_likelihood_ratio : float + Event's log likelihood ratio value + event_mass1 : float + Event's heavier component mass value + event_mass2 : float + Event's lighter component mass value + snr: float + Event's snr value + far: float + Event's far value + livetime: float + Livetime consistent with estimate of Poisson counts. + Eg. If counts are from O1 and O2, livetime is O1-O2 + livetime in seconds. + mean_values_dict : dictionary + dictionary of source specific FGMC Poisson counts + + Returns + ------- + p_astros : str + JSON dump of the p_astro by source category + + Example + ------- + >>> p_astros = json.loads(compute_p_astro(files)) + >>> p_astros + {'BNS': 0.999, 'BBH': 0.0, 'NSBH': 0.0, 'Terrestrial': 0.001} + """ + + # Using the zerolag log likelihood ratio value event, + # and the foreground/background model information provided + # in ranking_data.xml.gz, compute the ln(f/b) value for this event + zerolag_ln_likelihood_ratios = np.array([event_ln_likelihood_ratio]) + try: + ln_f_over_b, lam_0 = \ + _get_ln_f_over_b(rankingstatpdf, + zerolag_ln_likelihood_ratios, + livetime) + except ValueError: + return compute_p_astro_approx(snr, + far, + event_mass1, + event_mass2, + mean_values_dict) + + # Read mean values from url file + mean_values_dict["counts_Terrestrial"] = lam_0 + + # Compute categorical p_astro values + p_astro_values = \ + evaluate_p_astro_from_bayesfac(np.exp(ln_f_over_b[0]), + mean_values_dict, + event_mass1, + event_mass2) + + # Dump values in json file + return json.dumps(p_astro_values) + +def compute_p_astro_approx(snr, far, mass1, mass2, livetime, mean_values_dict): + """ + Task to compute `p_astro` by source category. + + Parameters + ---------- + snr : float + event's SNR + far : float + event's cfar + mass1 : float + event's mass1 + mass2 : float + event's mass2 + livetime: float + Livetime consistent with estimate of Poisson counts. + Eg. If counts are from O1 and O2, livetime is O1-O2 + livetime in seconds. + mean_values_dict : dictionary + dictionary of source specific FGMC Poisson counts + + Returns + ------- + p_astros : str + JSON dump of the p_astro by source category + + Example + ------- + >>> p_astros = json.loads(compute_p_astro(files)) + >>> p_astros + {'BNS': 0.999, 'BBH': 0.0, 'NSBH': 0.0, 'Terrestrial': 0.001} + """ + + # Define constants to compute bayesfactors + snr_star = 8.5 + far_star = 1 / (30 * 86400) + + # Compute astrophysical bayesfactor for + # GraceDB event + fground = 3 * snr_star**3 / (snr_choice**4) + bground = far / far_star + astro_bayesfac = fground / bground + + # Update terrestrial count based on far threshold + lam_0 = far_star * livetime + mean_values_dict["counts_Terrestrial"] = lam_0 + + # Compute categorical p_astro values + p_astro_values = \ + evaluate_p_astro_from_bayesfac(astro_bayesfac, + mean_values_dict, + mass1, + mass2) + # Dump mean values in json file + return json.dumps(p_astro_values) -- GitLab