Commit 5a895675 authored by Leo Pound Singer's avatar Leo Pound Singer
Browse files

Implement improved HDF5 posterior sample chain format

See:
 * https://bugs.ligo.org/redmine/issues/4509
 * https://www.lsc-group.phys.uwm.edu/ligovirgo/cbcnote/BayesCall20160721/HDFLayout
Original: 3c490168656a80cf482a80b14d9167b9e44dc964
parent fd2fcb4a
......@@ -1047,12 +1047,15 @@ void LALInferenceCheckpointMCMC(LALInferenceRunState *runState) {
for (t = 0; t < n_local_threads; t++) {
thread = runState->threads[t];
LALH5File *chain_group = XLALH5GroupOpen(group, thread->name);
char *chain_group_name = NULL;
asprintf(&chain_group_name, "%s-checkpoint", thread->name);
LALH5File *chain_group = XLALH5GroupOpen(group, chain_group_name);
free(chain_group_name);
/* Create run identifier group */
LALInferenceH5VariablesArray2Group(chain_group, thread->differentialPoints, thread->differentialPointsLength, "differential_points");
LALInferenceH5VariablesArray2Group(chain_group, &(thread->proposalArgs), 1, "proposal_arguments");
LALInferenceH5VariablesArray2Group(chain_group, &(thread->currentParams), 1, "current_parameters");
LALInferenceH5VariablesArrayToDataset(chain_group, thread->differentialPoints, thread->differentialPointsLength, "differential_points");
LALInferenceH5VariablesArrayToDataset(chain_group, &(thread->proposalArgs), 1, "proposal_arguments");
LALInferenceH5VariablesArrayToDataset(chain_group, &(thread->currentParams), 1, "current_parameters");
XLALH5FileAddScalarAttribute(chain_group, "last_step", &(thread->step), LAL_D_TYPE_CODE);
XLALH5FileAddScalarAttribute(chain_group, "effective_sample_size", &(thread->effective_sample_size), LAL_D_TYPE_CODE);
XLALH5FileAddScalarAttribute(chain_group, "differential_point_skip", &(thread->differentialPointsSkip), LAL_D_TYPE_CODE);
......@@ -1091,17 +1094,20 @@ void LALInferenceReadMCMCCheckpoint(LALInferenceRunState *runState) {
for (t = 0; t < n_local_threads; t++) {
thread = runState->threads[t];
LALH5File *chain_group = XLALH5GroupOpen(group, thread->name);
char *chain_group_name = NULL;
asprintf(&chain_group_name, "%s-checkpoint", thread->name);
LALH5File *chain_group = XLALH5GroupOpen(group, chain_group_name);
free(chain_group_name);
/* Restore differential evolution buffer */
LALH5File *de_group = XLALH5GroupOpen(chain_group, "differential_points");
LALInferenceH5GroupToVariablesArray(de_group, &(thread->differentialPoints), (UINT4 *)&(thread->differentialPointsLength));
LALH5Dataset *de_group = XLALH5DatasetRead(chain_group, "differential_points");
LALInferenceH5DatasetToVariablesArray(de_group, &(thread->differentialPoints), (UINT4 *)&(thread->differentialPointsLength));
thread->differentialPointsSize = thread->differentialPointsLength;
/* Restore proposal arguments, most importantly adaptation settings */
LALInferenceVariables **propArgs;
LALH5File *prop_arg_group = XLALH5GroupOpen(chain_group, "proposal_arguments");
LALInferenceH5GroupToVariablesArray(prop_arg_group, &propArgs, &n);
LALH5Dataset *prop_arg_group = XLALH5DatasetRead(chain_group, "proposal_arguments");
LALInferenceH5DatasetToVariablesArray(prop_arg_group, &propArgs, &n);
LALInferenceCopyVariables(propArgs[0], thread->proposalArgs);
/* We don't save strings, so we can't tell which parameter was last updated with adaptation.
......@@ -1112,8 +1118,8 @@ void LALInferenceReadMCMCCheckpoint(LALInferenceRunState *runState) {
/* Restore the parameters of the last sample */
LALInferenceVariables **currentParams;
LALH5File *current_param_group = XLALH5GroupOpen(chain_group, "current_parameters");
LALInferenceH5GroupToVariablesArray(current_param_group, &currentParams, &n);
LALH5Dataset *current_param_group = XLALH5DatasetRead(chain_group, "current_parameters");
LALInferenceH5DatasetToVariablesArray(current_param_group, &currentParams, &n);
LALInferenceCopyVariables(currentParams[0], thread->currentParams);
/* Recalculate the likelihood and prior for the restored parameters */
......@@ -1131,9 +1137,9 @@ void LALInferenceReadMCMCCheckpoint(LALInferenceRunState *runState) {
XLALH5FileQueryScalarAttributeValue(&(thread->differentialPointsSkip), chain_group, "differential_point_skip");
/* TODO: Write metadata */
XLALH5FileClose(current_param_group);
XLALH5FileClose(prop_arg_group);
XLALH5FileClose(de_group);
XLALH5DatasetFree(current_param_group);
XLALH5DatasetFree(prop_arg_group);
XLALH5DatasetFree(de_group);
XLALH5FileClose(chain_group);
}
XLALH5FileClose(group);
......@@ -1155,16 +1161,16 @@ void LALInferenceReadMCMCCheckpoint(LALInferenceRunState *runState) {
for (t = 0; t < n_local_threads; t++) {
thread = runState->threads[t];
LALH5File *chain_group = XLALH5GroupOpen(group, thread->name);
LALH5Dataset *chain_dataset = XLALH5DatasetRead(group, thread->name);
LALInferenceVariables **input_array;
UINT4 i,N;
LALInferenceH5GroupToVariablesArray(chain_group, &input_array, &N);
LALInferenceH5DatasetToVariablesArray(chain_dataset, &input_array, &N);
for (i=0; i<N; i++)
LALInferenceLogSampleToArray(thread->algorithmParams, input_array[i]);
/* TODO: Write metadata */
XLALH5FileClose(chain_group);
XLALH5DatasetFree(chain_dataset);
}
XLALH5FileClose(group);
XLALH5FileClose(li_group);
......@@ -1205,7 +1211,7 @@ void LALInferenceWriteMCMCSamples(LALInferenceRunState *runState) {
}
/* Create run identifier group */
LALInferenceH5VariablesArray2Group(group, output_array, N_output_array, thread->name);
LALInferenceH5VariablesArrayToDataset(group, output_array, N_output_array, thread->name);
/* TODO: Write metadata */
}
......
......@@ -6,8 +6,8 @@ import os
import sys
import lalinference
from lalinference import LALInferenceHDF5PosteriorSamplesGroupName as posterior_grp_name
from lalinference import LALInferenceHDF5NestedSamplesGroupName as nested_grp_name
from lalinference import LALInferenceHDF5PosteriorSamplesDatasetName as posterior_grp_name
from lalinference import LALInferenceHDF5NestedSamplesDatasetName as nested_grp_name
from lalinference.nest2pos import draw_posterior_many, draw_N_posterior_many, compute_weights
usage = '''%prog [-N Nlive] [-p posterior.hdf5] [-H header.txt] [--npos Npos] datafile1.hdf5 [datafile2.hdf5 ...]
......
......@@ -82,6 +82,7 @@ test/LALInferencePriorTest
test/LALInferenceProposalTest
test/LALInferenceTest
test/LALInferenceXMLTest
test/test.hdf5
test/test_bayestar_lattice_tmpltbank.xml
test/test_bayestar_littlehope.xml
test/test_bayestar_prune_neighborhood_tmpltbank.xml
......
This diff is collapsed.
/*
* Copyright (C) 2016 John Veitch
* Copyright (C) 2016 John Veitch and Leo Singer
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
......@@ -24,11 +24,9 @@
#include <lal/LALInference.h>
#include <lal/H5FileIO.h>
int LALInferenceH5VariablesArray2Group(LALH5File *h5file, LALInferenceVariables *const *const varsArray, UINT4 N, const char *GroupName);
int LALInferenceH5VariablesArrayToDataset(LALH5File *h5file, LALInferenceVariables *const *const varsArray, UINT4 N, const char *TableName);
int LALInferenceH5GroupToVariablesArray(LALH5File *group , LALInferenceVariables ***varsArray, UINT4 *N);
int LALInferenceH5VariableToAttribute(LALH5File *group, LALInferenceVariables *vars, char *name);
int LALInferenceH5DatasetToVariablesArray(LALH5Dataset *dataset, LALInferenceVariables ***varsArray, UINT4 *N);
/**
......@@ -38,8 +36,8 @@ int LALInferenceH5VariableToAttribute(LALH5File *group, LALInferenceVariables *v
*/
LALH5File *LALInferenceH5CreateGroupStructure(LALH5File *h5file, const char *codename, const char *runID);
extern const char LALInferenceHDF5PosteriorSamplesGroupName[];
extern const char LALInferenceHDF5NestedSamplesGroupName[];
extern const char LALInferenceHDF5PosteriorSamplesDatasetName[];
extern const char LALInferenceHDF5NestedSamplesDatasetName[];
#endif /* LALInferenceHDF5_h */
......@@ -106,14 +106,14 @@ static int WriteNSCheckPointH5(char *filename, LALInferenceRunState *runState, N
if(!group) XLAL_ERROR(XLAL_EFAILED,"Unable to read group lalinferencenest_checkpoint\n");
int retcode = _saveNSintegralStateH5(group,s);
if(retcode) XLAL_ERROR(XLAL_EFAILED,"Unable to save integral state\n");
LALInferenceH5VariablesArray2Group(group, runState->livePoints, Nlive, "live_points");
LALInferenceH5VariablesArrayToDataset(group, runState->livePoints, Nlive, "live_points");
INT4 N_output_array=0;
if(LALInferenceCheckVariable(runState->algorithmParams,"N_outputarray")) N_output_array=LALInferenceGetINT4Variable(runState->algorithmParams,"N_outputarray");
if(N_output_array>0)
{
LALInferenceVariables **output_array=NULL;
output_array=*(LALInferenceVariables ***)LALInferenceGetVariable(runState->algorithmParams,"outputarray");
LALInferenceH5VariablesArray2Group(group, output_array, N_output_array, "past_chain");
LALInferenceH5VariablesArrayToDataset(group, output_array, N_output_array, "past_chain");
XLALH5FileAddScalarAttribute(group, "N_outputarray", &N_output_array, LAL_I4_TYPE_CODE);
}
......@@ -148,18 +148,18 @@ static int ReadNSCheckPointH5(char *filename, LALInferenceRunState *runState, NS
XLALH5FileClose(h5file);
return 1;
}
LALH5File *liveGroup = XLALH5GroupOpen(group,"live_points");
retcode = LALInferenceH5GroupToVariablesArray(liveGroup , &(runState->livePoints), &Nlive );
LALH5Dataset *liveGroup = XLALH5DatasetRead(group,"live_points");
retcode = LALInferenceH5DatasetToVariablesArray(liveGroup , &(runState->livePoints), &Nlive );
printf("restored %i live points\n",s->size);
XLALH5FileClose(liveGroup);
XLALH5DatasetFree(liveGroup);
if(N_outputarray>0)
{
printf("restoring %i past iterations\n",N_outputarray);
LALH5File *outputGroup = XLALH5GroupOpen(group, "past_chain");
retcode |= LALInferenceH5GroupToVariablesArray(outputGroup, &outputarray, &N_outputarray);
LALH5Dataset *outputGroup = XLALH5DatasetRead(group, "past_chain");
retcode |= LALInferenceH5DatasetToVariablesArray(outputGroup, &outputarray, &N_outputarray);
LALInferenceAddVariable(runState->algorithmParams,"N_outputarray",&N_outputarray,LALINFERENCE_INT4_t,LALINFERENCE_PARAM_OUTPUT);
LALInferenceAddVariable(runState->algorithmParams,"outputarray",&outputarray,LALINFERENCE_void_ptr_t,LALINFERENCE_PARAM_OUTPUT);
XLALH5FileClose(outputGroup);
XLALH5DatasetFree(outputGroup);
}
XLALH5FileClose(group);
XLALH5FileClose(h5file);
......@@ -1136,7 +1136,7 @@ void LALInferenceNestedSamplingAlgorithm(LALInferenceRunState *runState)
LALH5File *groupPtr = LALInferenceH5CreateGroupStructure(h5file, "lalinference", runID);
/* Create run identifier group */
LALInferenceH5VariablesArray2Group(groupPtr, output_array, N_output_array, LALInferenceHDF5NestedSamplesGroupName);
LALInferenceH5VariablesArrayToDataset(groupPtr, output_array, N_output_array, LALInferenceHDF5NestedSamplesDatasetName);
/* TODO: Write metadata */
XLALH5FileAddScalarAttribute(groupPtr, "log_evidence", &logZ, LAL_D_TYPE_CODE);
XLALH5FileAddScalarAttribute(groupPtr, "log_bayes_factor", &logB, LAL_D_TYPE_CODE);
......
#include <lal/XLALError.h>
#include <lal/LALInferenceHDF5.h>
#include <gsl/gsl_test.h>
int main(int argc, char **argv)
{
/* Not used */
(void)argc;
(void)argv;
XLALSetErrorHandler(XLALExitErrorHandler);
/* Populate example variables array. */
UINT4 N = 64;
LALInferenceVariables **vars_array = XLALCalloc(
N, sizeof(LALInferenceVariables *));
for (UINT4 i = 0; i < N; i ++)
{
LALInferenceVariables *vars = XLALCalloc(1, sizeof(LALInferenceVariables));
vars_array[i] = vars;
LALInferenceAddREAL8Variable(vars, "abc", i, LALINFERENCE_PARAM_LINEAR);
LALInferenceAddREAL8Variable(vars, "def", i, LALINFERENCE_PARAM_CIRCULAR);
LALInferenceAddREAL8Variable(vars, "ghi", 5, LALINFERENCE_PARAM_FIXED);
LALInferenceAddREAL8Variable(vars, "ijk", i, LALINFERENCE_PARAM_OUTPUT);
LALInferenceAddINT4Variable (vars, "lmn", i, LALINFERENCE_PARAM_LINEAR);
LALInferenceAddINT4Variable (vars, "opq", i, LALINFERENCE_PARAM_CIRCULAR);
LALInferenceAddINT4Variable (vars, "rst", 5, LALINFERENCE_PARAM_FIXED);
LALInferenceAddINT4Variable (vars, "uvw", i, LALINFERENCE_PARAM_OUTPUT);
}
/* Open HDF5 file for writing and create group structure. */
LALH5File *file = XLALH5FileOpen("test.hdf5", "w");
LALH5File *group = LALInferenceH5CreateGroupStructure(
file, "lalinference", "lalinference_mcmc");
/* Write variables array to dataset. */
LALInferenceH5VariablesArrayToDataset(
group, vars_array, N, LALInferenceHDF5PosteriorSamplesDatasetName);
/* Free variables array. */
for (UINT4 i = 0; i < N; i ++)
{
LALInferenceClearVariables(vars_array[i]);
XLALFree(vars_array[i]);
}
XLALFree(vars_array);
/* Close file. */
XLALH5FileClose(group);
XLALH5FileClose(file);
/* Open file for reading and find dataset. */
file = XLALH5FileOpen("test.hdf5", "r");
LALH5Dataset *dataset = XLALH5DatasetRead(
file, "lalinference/lalinference_mcmc/posterior_samples");
/* Read dataset back to variables array. */
N = 0;
vars_array = NULL;
LALInferenceH5DatasetToVariablesArray(dataset, &vars_array, &N);
gsl_test_int(N, 64, "number of rows read back");
for (UINT4 i = 0; i < N; i ++)
{
LALInferenceVariables *vars = vars_array[i];
gsl_test_int(LALInferenceGetVariableDimension(vars), 8,
"number of columns read back");
gsl_test_abs(LALInferenceGetREAL8Variable(vars, "abc"), i, 0,
"value of column abc");
gsl_test_abs(LALInferenceGetREAL8Variable(vars, "def"), i, 0,
"value of column def");
gsl_test_abs(LALInferenceGetREAL8Variable(vars, "ghi"), 5, 0,
"value of column ghi");
gsl_test_abs(LALInferenceGetREAL8Variable(vars, "ijk"), i, 0,
"value of column ijk");
gsl_test_int(LALInferenceGetINT4Variable (vars, "lmn"), i,
"value of column lmn");
gsl_test_int(LALInferenceGetINT4Variable (vars, "opq"), i,
"value of column opq");
gsl_test_int(LALInferenceGetINT4Variable (vars, "rst"), 5,
"value of column rst");
gsl_test_int(LALInferenceGetINT4Variable (vars, "uvw"), i,
"value of column uvw");
gsl_test_int(LALInferenceGetVariableVaryType(vars, "abc"),
LALINFERENCE_PARAM_LINEAR, "vary type of column abc");
gsl_test_int(LALInferenceGetVariableVaryType(vars, "def"),
LALINFERENCE_PARAM_CIRCULAR, "vary type of column def");
gsl_test_int(LALInferenceGetVariableVaryType(vars, "ghi"),
LALINFERENCE_PARAM_FIXED, "vary type of column ghi");
gsl_test_int(LALInferenceGetVariableVaryType(vars, "ijk"),
LALINFERENCE_PARAM_OUTPUT, "vary type of column ijk");
gsl_test_int(LALInferenceGetVariableVaryType(vars, "lmn"),
LALINFERENCE_PARAM_LINEAR, "vary type of column lmn");
gsl_test_int(LALInferenceGetVariableVaryType(vars, "opq"),
LALINFERENCE_PARAM_CIRCULAR, "vary type of column opq");
gsl_test_int(LALInferenceGetVariableVaryType(vars, "rst"),
LALINFERENCE_PARAM_FIXED, "vary type of column rst");
gsl_test_int(LALInferenceGetVariableVaryType(vars, "uvw"),
LALINFERENCE_PARAM_OUTPUT, "vary type of column uvw");
}
/* Free variables array. */
for (UINT4 i = 0; i < N; i ++)
{
LALInferenceClearVariables(vars_array[i]);
XLALFree(vars_array[i]);
}
XLALFree(vars_array);
/* Close dataset. */
XLALH5DatasetFree(dataset);
/* Close file. */
XLALH5FileClose(file);
/* Check for memory leaks. */
LALCheckMemoryLeaks();
/* Done! */
return gsl_test_summary();
}
......@@ -9,6 +9,7 @@ test_programs += LALInferenceGenerateROQTest
#test_programs += LALInferenceInjectionTest
#test_programs += LALInferenceLikelihoodTest
#test_programs += LALInferenceProposalTest
test_programs += LALInferenceHDF5Test
# Add shell, Python, etc. test scripts to this variable
# Disable test_multiband.sh for now
......@@ -47,6 +48,7 @@ HL-INJECTIONS_1_TEST-1000000000-10.xml:
MOSTLYCLEANFILES = \
*.dat \
*.out \
test.hdf5 \
test_bayestar_sample_model_psd.xml \
test_bayestar_realize_coincs.xml \
test_bayestar_sim_to_tmpltbank.xml \
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment