diff --git a/bilby/core/result.py b/bilby/core/result.py index 7c8715594608a9d1e155f1fe9d209c5f398418b1..4f2b610c663dc4f11b635f10e50ba89be99aafbc 100644 --- a/bilby/core/result.py +++ b/bilby/core/result.py @@ -6,7 +6,8 @@ from collections import namedtuple from copy import copy from importlib import import_module from itertools import product - +import multiprocessing +from functools import partial import numpy as np import pandas as pd import scipy.stats @@ -30,6 +31,11 @@ from .prior import Prior, PriorDict, DeltaFunction, ConditionalDeltaFunction EXTENSIONS = ["json", "hdf5", "h5", "pickle", "pkl"] +def __eval_l(likelihood, params): + likelihood.parameters.update(params) + return likelihood.log_likelihood() + + def result_file_name(outdir, label, extension='json', gzip=False): """ Returns the standard filename used for a result file @@ -104,7 +110,7 @@ def read_in_result(filename=None, outdir=None, label=None, extension='json', gzi def get_weights_for_reweighting( result, new_likelihood=None, new_prior=None, old_likelihood=None, - old_prior=None, resume_file=None, n_checkpoint=5000): + old_prior=None, resume_file=None, n_checkpoint=5000, npool=1): """ Calculate the weights for reweight() See bilby.core.result.reweight() for help with the inputs @@ -145,30 +151,45 @@ def get_weights_for_reweighting( starting_index = 0 - for ii, sample in tqdm(result.posterior.iloc[starting_index:].iterrows()): - # Convert sample to dictionary - par_sample = {key: sample[key] for key in result.posterior} + dict_samples = [{key: sample[key] for key in result.posterior} + for _, sample in result.posterior.iterrows()] + n = len(dict_samples) - starting_index + + # Helper function to compute likelihoods in parallel + def eval_pool(this_logl): + with multiprocessing.Pool(processes=npool) as pool: + chunksize = max(100, n // (2 * npool)) + return list(tqdm( + pool.imap(partial(__eval_l, this_logl), + dict_samples[starting_index:], chunksize=chunksize), + desc='Computing likelihoods', + total=n) + ) - if old_likelihood is not None: - old_likelihood.parameters.update(par_sample) - old_log_likelihood_array[ii] = old_likelihood.log_likelihood() - else: - old_log_likelihood_array[ii] = sample["log_likelihood"] + if old_likelihood is None: + old_log_likelihood_array[starting_index:] = \ + result.posterior["log_likelihood"][starting_index:].to_numpy() + else: + old_log_likelihood_array[starting_index:] = eval_pool(old_likelihood) - if new_likelihood is not None: - new_likelihood.parameters.update(par_sample) - new_log_likelihood_array[ii] = new_likelihood.log_likelihood() - else: - # Don't perform likelihood reweighting (i.e. likelihood isn't updated) - new_log_likelihood_array[ii] = old_log_likelihood_array[ii] + if new_likelihood is None: + # Don't perform likelihood reweighting (i.e. likelihood isn't updated) + new_log_likelihood_array[starting_index:] = old_log_likelihood_array[starting_index:] + else: + new_log_likelihood_array[starting_index:] = eval_pool(new_likelihood) + # Compute priors + for ii, sample in enumerate(tqdm(dict_samples[starting_index:], + desc='Computing priors', + total=n), + start=starting_index): if old_prior is not None: - old_log_prior_array[ii] = old_prior.ln_prob(par_sample) + old_log_prior_array[ii] = old_prior.ln_prob(sample) else: old_log_prior_array[ii] = sample["log_prior"] if new_prior is not None: - new_log_prior_array[ii] = new_prior.ln_prob(par_sample) + new_log_prior_array[ii] = new_prior.ln_prob(sample) else: # Don't perform prior reweighting (i.e. prior isn't updated) new_log_prior_array[ii] = old_log_prior_array[ii] @@ -272,7 +293,7 @@ def reweight(result, label=None, new_likelihood=None, new_prior=None, get_weights_for_reweighting( result, new_likelihood=new_likelihood, new_prior=new_prior, old_likelihood=old_likelihood, old_prior=old_prior, - resume_file=resume_file, n_checkpoint=n_checkpoint) + resume_file=resume_file, n_checkpoint=n_checkpoint, npool=npool) if use_nested_samples: ln_weights += np.log(result.posterior["weights"]) diff --git a/bilby/core/utils/io.py b/bilby/core/utils/io.py index d6b66750dfa86a6fb01fd1f4f43c8dd3d4359ff4..c55837bfded43ec3673e1114000e61b07b9821fa 100644 --- a/bilby/core/utils/io.py +++ b/bilby/core/utils/io.py @@ -268,6 +268,11 @@ def encode_for_hdf5(key, item): item = float(item) elif isinstance(item, np.complex_): item = complex(item) + if isinstance(item, np.ndarray): + # Numpy's wide unicode strings are not supported by hdf5 + if item.dtype.kind == 'U': + logger.debug(f'converting dtype {item.dtype} for hdf5') + item = np.array(item, dtype='S') if isinstance(item, (np.ndarray, int, float, complex, str, bytes)): output = item elif item is None: diff --git a/bilby/gw/likelihood/base.py b/bilby/gw/likelihood/base.py index 1daa9a1e5eee341764342a814c5ab19859eab977..ab8eadc1d96e4ee7c7c30f47faf34a3cb5302705 100644 --- a/bilby/gw/likelihood/base.py +++ b/bilby/gw/likelihood/base.py @@ -418,12 +418,7 @@ class GravitationalWaveTransient(Likelihood): def compute_log_likelihood_from_snrs(self, total_snrs): - if self.calibration_marginalization and self.time_marginalization: - log_l = self.time_and_calibration_marginalized_likelihood( - d_inner_h_array=total_snrs.d_inner_h_array, - h_inner_h=total_snrs.optimal_snr_squared_array) - - elif self.calibration_marginalization: + if self.calibration_marginalization: log_l = self.calibration_marginalized_likelihood( d_inner_h_calibration_array=total_snrs.d_inner_h_array, h_inner_h=total_snrs.optimal_snr_squared_array) @@ -492,11 +487,6 @@ class GravitationalWaveTransient(Likelihood): else: return self.parameters - if self.calibration_marginalization and self.time_marginalization: - raise AttributeError( - "Cannot use time and calibration marginalization simultaneously for regeneration at the moment!" - "The matrix manipulation has not been tested.") - if self.calibration_marginalization: new_calibration = self.generate_calibration_sample_from_marginalized_likelihood( signal_polarizations=signal_polarizations) @@ -752,52 +742,36 @@ class GravitationalWaveTransient(Likelihood): d_inner_h = ln_i0(abs(d_inner_h)) if self.calibration_marginalization and self.time_marginalization: - return d_inner_h - np.outer(h_inner_h, np.ones(np.shape(d_inner_h)[1])) / 2 + return d_inner_h - h_inner_h[:, np.newaxis] / 2 else: return d_inner_h - h_inner_h / 2 def time_marginalized_likelihood(self, d_inner_h_tc_array, h_inner_h): - if self.distance_marginalization: - log_l_tc_array = self.distance_marginalized_likelihood( - d_inner_h=d_inner_h_tc_array, h_inner_h=h_inner_h) - elif self.phase_marginalization: - log_l_tc_array = self.phase_marginalized_likelihood( - d_inner_h=d_inner_h_tc_array, - h_inner_h=h_inner_h) - else: - log_l_tc_array = np.real(d_inner_h_tc_array) - h_inner_h / 2 - times = self._times - if self.jitter_time: - times = self._times + self.parameters['time_jitter'] - time_prior_array = self.priors['geocent_time'].prob(times) * self._delta_tc - return logsumexp(log_l_tc_array, b=time_prior_array) - - def time_and_calibration_marginalized_likelihood(self, d_inner_h_array, h_inner_h): times = self._times if self.jitter_time: times = self._times + self.parameters['time_jitter'] _time_prior = self.priors['geocent_time'] - time_mask = np.logical_and((times >= _time_prior.minimum), (times <= _time_prior.maximum)) + time_mask = (times >= _time_prior.minimum) & (times <= _time_prior.maximum) times = times[time_mask] - time_probs = self.priors['geocent_time'].prob(times) * self._delta_tc - - d_inner_h_array = d_inner_h_array[:, time_mask] - h_inner_h = h_inner_h + time_prior_array = self.priors['geocent_time'].prob(times) * self._delta_tc + if self.calibration_marginalization: + d_inner_h_tc_array = d_inner_h_tc_array[:, time_mask] + else: + d_inner_h_tc_array = d_inner_h_tc_array[time_mask] if self.distance_marginalization: - log_l_array = self.distance_marginalized_likelihood( - d_inner_h=d_inner_h_array, h_inner_h=h_inner_h) + log_l_tc_array = self.distance_marginalized_likelihood( + d_inner_h=d_inner_h_tc_array, h_inner_h=h_inner_h) elif self.phase_marginalization: - log_l_array = self.phase_marginalized_likelihood( - d_inner_h=d_inner_h_array, + log_l_tc_array = self.phase_marginalized_likelihood( + d_inner_h=d_inner_h_tc_array, h_inner_h=h_inner_h) + elif self.calibration_marginalization: + log_l_tc_array = np.real(d_inner_h_tc_array) - h_inner_h[:, np.newaxis] / 2 else: - log_l_array = np.real(d_inner_h_array) - np.outer(h_inner_h, np.ones(np.shape(d_inner_h_array)[1])) / 2 - - prior_array = np.outer(time_probs, 1. / self.number_of_response_curves * np.ones(len(h_inner_h))).T - - return logsumexp(log_l_array, b=prior_array) + log_l_tc_array = np.real(d_inner_h_tc_array) - h_inner_h / 2 + return logsumexp(log_l_tc_array, b=time_prior_array, axis=-1) def get_calibration_log_likelihoods(self, signal_polarizations=None): self.parameters.update(self.get_sky_frame_parameters()) @@ -814,7 +788,12 @@ class GravitationalWaveTransient(Likelihood): total_snrs += per_detector_snr - if self.distance_marginalization: + if self.time_marginalization: + log_l_cal_array = self.time_marginalized_likelihood( + d_inner_h_tc_array=total_snrs.d_inner_h_array, + h_inner_h=total_snrs.optimal_snr_squared_array, + ) + elif self.distance_marginalization: log_l_cal_array = self.distance_marginalized_likelihood( d_inner_h=total_snrs.d_inner_h_array, h_inner_h=total_snrs.optimal_snr_squared_array) @@ -829,7 +808,12 @@ class GravitationalWaveTransient(Likelihood): return log_l_cal_array def calibration_marginalized_likelihood(self, d_inner_h_calibration_array, h_inner_h): - if self.distance_marginalization: + if self.time_marginalization: + log_l_cal_array = self.time_marginalized_likelihood( + d_inner_h_tc_array=d_inner_h_calibration_array, + h_inner_h=h_inner_h, + ) + elif self.distance_marginalization: log_l_cal_array = self.distance_marginalized_likelihood( d_inner_h=d_inner_h_calibration_array, h_inner_h=h_inner_h) elif self.phase_marginalization: diff --git a/test/check_author_list.py b/test/check_author_list.py index eac8a4a6c8c7470b4586e4a68ec9e3ea8059150e..c04932d5eaa945867eb4be4bf2e5f328d40d5946 100644 --- a/test/check_author_list.py +++ b/test/check_author_list.py @@ -3,7 +3,7 @@ import re import subprocess -special_cases = ["plasky", "thomas", "mj-will", "richard"] +special_cases = ["plasky", "thomas", "mj-will", "richard", "douglas"] AUTHORS_list = [] with open("AUTHORS.md", "r") as f: AUTHORS_list = " ".join([line for line in f]).lower()