From 1fd1a5f8d2572760cf2b2595df3085251241808a Mon Sep 17 00:00:00 2001 From: Gregory Ashton <gregory.ashton@ligo.org> Date: Tue, 21 Mar 2023 20:51:53 +0000 Subject: [PATCH] Merge branch 'time-calibration' into 'master' Time calibration See merge request lscsoft/bilby!1234 (cherry picked from commit c76fcdf25dca1bcf9c0a9b08b77323efddbdc11d) 46f1403d Allow de-marginalisation of calibration along with time c30494b1 Parallelise the reweighting step 5396136e Remove unused import 9f665cb0 Remove print statement 8b066f78 Add handling of numpy wide unicode when writing h5 1d92131c Ad the progress bar back in when computing LogLs d0aa5eed Fix flake errors and array error fe1b075d flake changes 9ffae96d Fix casting to np.array e1430e4c flake8 2061de43 Cleanup code to use sample in loop 279ddcf2 DEV: neaten calibration marginalization to enable reconstruction ca4b03f1 Merge branch 'time-calibration' into 'time-calibration-reconstruction' 88c52d2a BUGFIX: add case of just time and calibration marginalzation ec10acda Merge branch 'time-calibration-reconstruction' into 'time-calibration' 411cf645 Add john-veitch as special case author name 438e705d Fix authorlist check --- bilby/core/result.py | 59 ++++++++++++++++++++---------- bilby/core/utils/io.py | 5 +++ bilby/gw/likelihood/base.py | 72 +++++++++++++++---------------------- test/check_author_list.py | 2 +- 4 files changed, 74 insertions(+), 64 deletions(-) diff --git a/bilby/core/result.py b/bilby/core/result.py index 7c8715594..4f2b610c6 100644 --- a/bilby/core/result.py +++ b/bilby/core/result.py @@ -6,7 +6,8 @@ from collections import namedtuple from copy import copy from importlib import import_module from itertools import product - +import multiprocessing +from functools import partial import numpy as np import pandas as pd import scipy.stats @@ -30,6 +31,11 @@ from .prior import Prior, PriorDict, DeltaFunction, ConditionalDeltaFunction EXTENSIONS = ["json", "hdf5", "h5", "pickle", "pkl"] +def __eval_l(likelihood, params): + likelihood.parameters.update(params) + return likelihood.log_likelihood() + + def result_file_name(outdir, label, extension='json', gzip=False): """ Returns the standard filename used for a result file @@ -104,7 +110,7 @@ def read_in_result(filename=None, outdir=None, label=None, extension='json', gzi def get_weights_for_reweighting( result, new_likelihood=None, new_prior=None, old_likelihood=None, - old_prior=None, resume_file=None, n_checkpoint=5000): + old_prior=None, resume_file=None, n_checkpoint=5000, npool=1): """ Calculate the weights for reweight() See bilby.core.result.reweight() for help with the inputs @@ -145,30 +151,45 @@ def get_weights_for_reweighting( starting_index = 0 - for ii, sample in tqdm(result.posterior.iloc[starting_index:].iterrows()): - # Convert sample to dictionary - par_sample = {key: sample[key] for key in result.posterior} + dict_samples = [{key: sample[key] for key in result.posterior} + for _, sample in result.posterior.iterrows()] + n = len(dict_samples) - starting_index + + # Helper function to compute likelihoods in parallel + def eval_pool(this_logl): + with multiprocessing.Pool(processes=npool) as pool: + chunksize = max(100, n // (2 * npool)) + return list(tqdm( + pool.imap(partial(__eval_l, this_logl), + dict_samples[starting_index:], chunksize=chunksize), + desc='Computing likelihoods', + total=n) + ) - if old_likelihood is not None: - old_likelihood.parameters.update(par_sample) - old_log_likelihood_array[ii] = old_likelihood.log_likelihood() - else: - old_log_likelihood_array[ii] = sample["log_likelihood"] + if old_likelihood is None: + old_log_likelihood_array[starting_index:] = \ + result.posterior["log_likelihood"][starting_index:].to_numpy() + else: + old_log_likelihood_array[starting_index:] = eval_pool(old_likelihood) - if new_likelihood is not None: - new_likelihood.parameters.update(par_sample) - new_log_likelihood_array[ii] = new_likelihood.log_likelihood() - else: - # Don't perform likelihood reweighting (i.e. likelihood isn't updated) - new_log_likelihood_array[ii] = old_log_likelihood_array[ii] + if new_likelihood is None: + # Don't perform likelihood reweighting (i.e. likelihood isn't updated) + new_log_likelihood_array[starting_index:] = old_log_likelihood_array[starting_index:] + else: + new_log_likelihood_array[starting_index:] = eval_pool(new_likelihood) + # Compute priors + for ii, sample in enumerate(tqdm(dict_samples[starting_index:], + desc='Computing priors', + total=n), + start=starting_index): if old_prior is not None: - old_log_prior_array[ii] = old_prior.ln_prob(par_sample) + old_log_prior_array[ii] = old_prior.ln_prob(sample) else: old_log_prior_array[ii] = sample["log_prior"] if new_prior is not None: - new_log_prior_array[ii] = new_prior.ln_prob(par_sample) + new_log_prior_array[ii] = new_prior.ln_prob(sample) else: # Don't perform prior reweighting (i.e. prior isn't updated) new_log_prior_array[ii] = old_log_prior_array[ii] @@ -272,7 +293,7 @@ def reweight(result, label=None, new_likelihood=None, new_prior=None, get_weights_for_reweighting( result, new_likelihood=new_likelihood, new_prior=new_prior, old_likelihood=old_likelihood, old_prior=old_prior, - resume_file=resume_file, n_checkpoint=n_checkpoint) + resume_file=resume_file, n_checkpoint=n_checkpoint, npool=npool) if use_nested_samples: ln_weights += np.log(result.posterior["weights"]) diff --git a/bilby/core/utils/io.py b/bilby/core/utils/io.py index d6b66750d..c55837bfd 100644 --- a/bilby/core/utils/io.py +++ b/bilby/core/utils/io.py @@ -268,6 +268,11 @@ def encode_for_hdf5(key, item): item = float(item) elif isinstance(item, np.complex_): item = complex(item) + if isinstance(item, np.ndarray): + # Numpy's wide unicode strings are not supported by hdf5 + if item.dtype.kind == 'U': + logger.debug(f'converting dtype {item.dtype} for hdf5') + item = np.array(item, dtype='S') if isinstance(item, (np.ndarray, int, float, complex, str, bytes)): output = item elif item is None: diff --git a/bilby/gw/likelihood/base.py b/bilby/gw/likelihood/base.py index 1daa9a1e5..ab8eadc1d 100644 --- a/bilby/gw/likelihood/base.py +++ b/bilby/gw/likelihood/base.py @@ -418,12 +418,7 @@ class GravitationalWaveTransient(Likelihood): def compute_log_likelihood_from_snrs(self, total_snrs): - if self.calibration_marginalization and self.time_marginalization: - log_l = self.time_and_calibration_marginalized_likelihood( - d_inner_h_array=total_snrs.d_inner_h_array, - h_inner_h=total_snrs.optimal_snr_squared_array) - - elif self.calibration_marginalization: + if self.calibration_marginalization: log_l = self.calibration_marginalized_likelihood( d_inner_h_calibration_array=total_snrs.d_inner_h_array, h_inner_h=total_snrs.optimal_snr_squared_array) @@ -492,11 +487,6 @@ class GravitationalWaveTransient(Likelihood): else: return self.parameters - if self.calibration_marginalization and self.time_marginalization: - raise AttributeError( - "Cannot use time and calibration marginalization simultaneously for regeneration at the moment!" - "The matrix manipulation has not been tested.") - if self.calibration_marginalization: new_calibration = self.generate_calibration_sample_from_marginalized_likelihood( signal_polarizations=signal_polarizations) @@ -752,52 +742,36 @@ class GravitationalWaveTransient(Likelihood): d_inner_h = ln_i0(abs(d_inner_h)) if self.calibration_marginalization and self.time_marginalization: - return d_inner_h - np.outer(h_inner_h, np.ones(np.shape(d_inner_h)[1])) / 2 + return d_inner_h - h_inner_h[:, np.newaxis] / 2 else: return d_inner_h - h_inner_h / 2 def time_marginalized_likelihood(self, d_inner_h_tc_array, h_inner_h): - if self.distance_marginalization: - log_l_tc_array = self.distance_marginalized_likelihood( - d_inner_h=d_inner_h_tc_array, h_inner_h=h_inner_h) - elif self.phase_marginalization: - log_l_tc_array = self.phase_marginalized_likelihood( - d_inner_h=d_inner_h_tc_array, - h_inner_h=h_inner_h) - else: - log_l_tc_array = np.real(d_inner_h_tc_array) - h_inner_h / 2 - times = self._times - if self.jitter_time: - times = self._times + self.parameters['time_jitter'] - time_prior_array = self.priors['geocent_time'].prob(times) * self._delta_tc - return logsumexp(log_l_tc_array, b=time_prior_array) - - def time_and_calibration_marginalized_likelihood(self, d_inner_h_array, h_inner_h): times = self._times if self.jitter_time: times = self._times + self.parameters['time_jitter'] _time_prior = self.priors['geocent_time'] - time_mask = np.logical_and((times >= _time_prior.minimum), (times <= _time_prior.maximum)) + time_mask = (times >= _time_prior.minimum) & (times <= _time_prior.maximum) times = times[time_mask] - time_probs = self.priors['geocent_time'].prob(times) * self._delta_tc - - d_inner_h_array = d_inner_h_array[:, time_mask] - h_inner_h = h_inner_h + time_prior_array = self.priors['geocent_time'].prob(times) * self._delta_tc + if self.calibration_marginalization: + d_inner_h_tc_array = d_inner_h_tc_array[:, time_mask] + else: + d_inner_h_tc_array = d_inner_h_tc_array[time_mask] if self.distance_marginalization: - log_l_array = self.distance_marginalized_likelihood( - d_inner_h=d_inner_h_array, h_inner_h=h_inner_h) + log_l_tc_array = self.distance_marginalized_likelihood( + d_inner_h=d_inner_h_tc_array, h_inner_h=h_inner_h) elif self.phase_marginalization: - log_l_array = self.phase_marginalized_likelihood( - d_inner_h=d_inner_h_array, + log_l_tc_array = self.phase_marginalized_likelihood( + d_inner_h=d_inner_h_tc_array, h_inner_h=h_inner_h) + elif self.calibration_marginalization: + log_l_tc_array = np.real(d_inner_h_tc_array) - h_inner_h[:, np.newaxis] / 2 else: - log_l_array = np.real(d_inner_h_array) - np.outer(h_inner_h, np.ones(np.shape(d_inner_h_array)[1])) / 2 - - prior_array = np.outer(time_probs, 1. / self.number_of_response_curves * np.ones(len(h_inner_h))).T - - return logsumexp(log_l_array, b=prior_array) + log_l_tc_array = np.real(d_inner_h_tc_array) - h_inner_h / 2 + return logsumexp(log_l_tc_array, b=time_prior_array, axis=-1) def get_calibration_log_likelihoods(self, signal_polarizations=None): self.parameters.update(self.get_sky_frame_parameters()) @@ -814,7 +788,12 @@ class GravitationalWaveTransient(Likelihood): total_snrs += per_detector_snr - if self.distance_marginalization: + if self.time_marginalization: + log_l_cal_array = self.time_marginalized_likelihood( + d_inner_h_tc_array=total_snrs.d_inner_h_array, + h_inner_h=total_snrs.optimal_snr_squared_array, + ) + elif self.distance_marginalization: log_l_cal_array = self.distance_marginalized_likelihood( d_inner_h=total_snrs.d_inner_h_array, h_inner_h=total_snrs.optimal_snr_squared_array) @@ -829,7 +808,12 @@ class GravitationalWaveTransient(Likelihood): return log_l_cal_array def calibration_marginalized_likelihood(self, d_inner_h_calibration_array, h_inner_h): - if self.distance_marginalization: + if self.time_marginalization: + log_l_cal_array = self.time_marginalized_likelihood( + d_inner_h_tc_array=d_inner_h_calibration_array, + h_inner_h=h_inner_h, + ) + elif self.distance_marginalization: log_l_cal_array = self.distance_marginalized_likelihood( d_inner_h=d_inner_h_calibration_array, h_inner_h=h_inner_h) elif self.phase_marginalization: diff --git a/test/check_author_list.py b/test/check_author_list.py index eac8a4a6c..c04932d5e 100644 --- a/test/check_author_list.py +++ b/test/check_author_list.py @@ -3,7 +3,7 @@ import re import subprocess -special_cases = ["plasky", "thomas", "mj-will", "richard"] +special_cases = ["plasky", "thomas", "mj-will", "richard", "douglas"] AUTHORS_list = [] with open("AUTHORS.md", "r") as f: AUTHORS_list = " ".join([line for line in f]).lower() -- GitLab