diff --git a/gstlal-inspiral/bin/gstlal_inspiral b/gstlal-inspiral/bin/gstlal_inspiral index 2b6456fa148b280849d6d9d3e50a7e7143f741fa..b0f5e37dae504e90b71d42162c240d28816dc7c4 100755 --- a/gstlal-inspiral/bin/gstlal_inspiral +++ b/gstlal-inspiral/bin/gstlal_inspiral @@ -271,6 +271,7 @@ def parse_command_line(): group.add_option("--ranking-stat-pdf", metavar = "url", help = "Set the URL from which to load the ranking statistic PDF. This is used to compute false-alarm probabilities and false-alarm rates and is required for online operation (when --data-source is framexmit or lvshm). It is forbidden for offline operation (all other data sources)") group.add_option("--time-slide-file", metavar = "filename", help = "Set the name of the xml file to get time slide offsets (required).") group.add_option("--zerolag-rankingstat-pdf", metavar = "url", action = "append", help = "Record a histogram of the likelihood ratio ranking statistic values assigned to zero-lag candidates in this XML file. This is used to construct the extinction model and set the overall false-alarm rate normalization during online running. If it does not exist at start-up, a new file will be initialized, otherwise the counts will be added to the file's contents. Required when --data-source is lvshm or framexmit; optional otherwise. If given, exactly as many must be provided as there are --svd-bank options and they will be used in order.") + group.add_option("--activation-counts-file", metavar = "filename", help = "Set the name of the h5 file containing activation counts for multicomponent p-astro.") parser.add_option_group(group) group = OptionGroup(parser, "GracedB Options", "Adjust GracedB interaction") @@ -423,6 +424,22 @@ def parse_command_line(): import subprocess for partition in detectors.shm_part_dict.values(): subprocess.call(["smrepair", partition]) + + # FIXME REMOVE THIS. We need to get a better way of passing + # configuration information like this around. Putting file + # links into the prior dist stats is an options but it is also + # becoming cumbersome since it is easy to not then have the + # files it refers to... + if options.activation_counts_file is not None: + import h5py + import numpy + ac_file = h5py.File(options.activation_counts_file) + options.activation_counts = dict((k, float(numpy.array(v))) for k, v in ac_file[options.job_tag].items()) + # record the total number of bins + options.activation_counts['num_bins'] = len(ac_file) + else: + options.activation_counts = None + else: bad_options = [] for option in ["job_tag"]: @@ -430,6 +447,7 @@ def parse_command_line(): bad_options.append("--%s" % option.replace("_","-")) if bad_options: raise ValueError("cannot set %s when --data-source is not lvshm or framexmit " % ", ".join(bad_options)) + options.activation_counts = None if options.reference_psd is None: options.track_psd = True @@ -859,6 +877,7 @@ for output_file_number, (svd_bank_url_dict, output_url, ranking_stat_output_url, cluster = True,#options.data_source in ("lvshm", "framexmit"),# If uncommented, we only cluster when running online cap_singles = options.cap_singles, FAR_trialsfactor = options.far_trials_factor, + activation_counts = options.activation_counts, verbose = options.verbose ) if options.verbose: diff --git a/gstlal-inspiral/python/lloidhandler.py b/gstlal-inspiral/python/lloidhandler.py index f43ab2e8fc5880cc4da4e9132d33a40f252c15a8..2ed873e50556794baf26f5eb8bd6280b3d42ae62 100644 --- a/gstlal-inspiral/python/lloidhandler.py +++ b/gstlal-inspiral/python/lloidhandler.py @@ -647,7 +647,7 @@ class Handler(simplehandler.Handler): dumps of segment information, trigger files and background distribution statistics. """ - def __init__(self, mainloop, pipeline, coincs_document, rankingstat, horizon_distance_func, gracedbwrapper, zerolag_rankingstatpdf_url = None, rankingstatpdf_url = None, ranking_stat_output_url = None, ranking_stat_input_url = None, likelihood_snapshot_interval = None, sngls_snr_threshold = None, tag = "", kafka_server = "10.14.0.112:9092", cluster = False, cap_singles = False, FAR_trialsfactor = 1.0, verbose = False): + def __init__(self, mainloop, pipeline, coincs_document, rankingstat, horizon_distance_func, gracedbwrapper, zerolag_rankingstatpdf_url = None, rankingstatpdf_url = None, ranking_stat_output_url = None, ranking_stat_input_url = None, likelihood_snapshot_interval = None, sngls_snr_threshold = None, tag = "", kafka_server = "10.14.0.112:9092", cluster = False, cap_singles = False, FAR_trialsfactor = 1.0, activation_counts = None, verbose = False): """! @param mainloop The main application's event loop @param pipeline The gstreamer pipeline that is being @@ -674,6 +674,7 @@ class Handler(simplehandler.Handler): self.cluster = cluster self.cap_singles = cap_singles self.FAR_trialsfactor = FAR_trialsfactor + self.activation_counts = activation_counts self.gracedbwrapper = gracedbwrapper # FIXME: detangle this @@ -1314,7 +1315,7 @@ class Handler(simplehandler.Handler): return xmldoc def __get_p_astro_json(self, lr, m1, m2, snr, far): - return p_astro_gstlal.compute_p_astro(lr, m1, m2, snr, far, self.rankingstatpdf.copy()) + return p_astro_gstlal.compute_p_astro(lr, m1, m2, snr, far, self.rankingstatpdf.copy(), activation_counts = self.activation_counts) def __get_rankingstat_xmldoc_for_gracedb(self): # FIXME: remove this wrapper when the horizon history diff --git a/gstlal-inspiral/python/p_astro_gstlal.py b/gstlal-inspiral/python/p_astro_gstlal.py index f5b06fdd9632c6b6738c3fa143c2869f9e1d9a21..d2c533f2e655461d4961d5a7af53d584c119626d 100644 --- a/gstlal-inspiral/python/p_astro_gstlal.py +++ b/gstlal-inspiral/python/p_astro_gstlal.py @@ -47,9 +47,6 @@ def evaluate_p_astro_from_bayesfac(astro_bayesfac, mean_values_dict, mass1, mass2, - spin1z=None, - spin2z=None, - num_bins=None, activation_counts=None): """ Evaluates `p_astro` for a new event using Bayes factor, masses, and number @@ -65,12 +62,8 @@ def evaluate_p_astro_from_bayesfac(astro_bayesfac, event mass1 mass2 : float event mass2 - spin1z : float - event spin1z - spin2z : float - event spin2z - url_weights_key: str - url config key pointing to weights file + activation_counts: dictionary + array of activation counts keyed on source type and bin number Returns ------- @@ -81,9 +74,6 @@ def evaluate_p_astro_from_bayesfac(astro_bayesfac, a_hat_bns, a_hat_bbh, a_hat_nsbh, a_hat_mg, num_bins = \ make_weights_from_histograms(mass1, mass2, - spin1z, - spin2z, - num_bins, activation_counts) # Compute category-wise Bayes factors @@ -141,39 +131,12 @@ def make_weights_from_hardcuts(mass1, mass2): return a_hat_bns, a_hat_bbh, a_hat_nsbh, a_hat_mg, num_bins -def closest_template(params, params_array): - """ - Associate event's template to a template in the template bank. The assumed - bank is the one used by Gstlal. Hence, for Gstlal events, the association - should be exact, up to rounding errors. - - Parameters - ---------- - params : tuple of floats - intrinsic params of event template - params_array: array of arrays - array of template bank's template params - - Returns - ------- - idx : int - index pertaining to params_array - for matching template - """ - idx = np.argmin(np.sum((params_array-params)**2,axis=1)) - - return idx - - def make_weights_from_histograms(mass1, mass2, - spin1z, - spin2z, - num_bins=None, activation_counts=None): """ Construct binary weights from bin number provided by GstLAL, and a weights - matrix pre-constructed and stored in a file, to be read from a url. The + matrix pre-constructed and stored in a file, to be read from ???. The weights are keyed on template parameters of Gstlal's template bank. If that doesn't work, construct binary weights. @@ -187,10 +150,8 @@ def make_weights_from_histograms(mass1, z component spin of heavier mass spin2z : float z component spin of lighter mass - num_bins : int - number of bins for template weighting - activation_counts : pandas dataframe - data frame for template weights + activation_counts : hdf5 object/ dictionary + dictionary of activation counts keyed on bin number and source category Returns ------- @@ -198,19 +159,15 @@ def make_weights_from_histograms(mass1, mass-based template weights """ - if activation_counts is None or num_bins is None: + if activation_counts is None: a_hat_bns, a_hat_bbh, a_hat_nsbh, a_hat_mg, num_bins = \ make_weights_from_hardcuts(mass1, mass2) else: - params = (mass1, mass2, spin1z, spin2z) - params_names = ['mass1', 'mass2', 'spin1z', 'spin2z'] - params_array = \ - np.array([activation_counts[key][:] for key in params_names]).T - idx = closest_template(params, params_array) - a_hat_bns = activation_counts['bns'][:][idx] - a_hat_mg = activation_counts['mg'][:][idx] - a_hat_nsbh = activation_counts['nsbh'][:][idx] - a_hat_bbh = activation_counts['bbh'][:][idx] + a_hat_bns = activation_counts['BNS'] + a_hat_mg = activation_counts['MassGap'] + a_hat_nsbh = activation_counts['NSBH'] + a_hat_bbh = activation_counts['BBH'] + num_bins = activation_counts['num_bins'] return a_hat_bns, a_hat_bbh, a_hat_nsbh, a_hat_mg, num_bins @@ -282,6 +239,7 @@ def _get_ln_f_over_b(rankingstatpdf, ln_f_over_b = \ np.array([f[ln_lr, ] - b[ln_lr, ] for ln_lr in ln_likelihood_ratios]) if np.isnan(ln_f_over_b).any(): + lnlr = ln_likelihood_ratios[0] raise ValueError("NaN encountered in ranking statistic PDF ratios") if np.isinf(np.exp(ln_f_over_b)).any(): raise ValueError( @@ -300,7 +258,8 @@ def compute_p_astro(event_ln_likelihood_ratio, "counts_NSBH": 1.56679410666, "counts_BBH": 9.26042350393, "counts_MassGap": 2.40800240248, - "counts_Terrestrial": 3923} + "counts_Terrestrial": 3923}, + activation_counts = None ): """ Task to compute `p_astro` by source category. @@ -323,6 +282,9 @@ def compute_p_astro(event_ln_likelihood_ratio, livetime in seconds. mean_values_dict : dictionary dictionary of source specific FGMC Poisson counts + activation_counts : dictionary + (optional) dictionary of activation counts keyed on + bin number and source category Returns ------- @@ -346,7 +308,7 @@ def compute_p_astro(event_ln_likelihood_ratio, _get_ln_f_over_b(rankingstatpdf, zerolag_ln_likelihood_ratios, livetime) - except ValueError: + except ValueError as e: return compute_p_astro_approx(snr, far, event_mass1, @@ -362,11 +324,13 @@ def compute_p_astro(event_ln_likelihood_ratio, evaluate_p_astro_from_bayesfac(np.exp(ln_f_over_b[0]), mean_values_dict, event_mass1, - event_mass2) + event_mass2, + activation_counts=activation_counts) # Dump values in json file return json.dumps(p_astro_values) + def compute_p_astro_approx(snr, far, mass1, mass2, livetime, mean_values_dict): """ Task to compute `p_astro` by source category.