From 85b963fe50e9cec802aa7e89c9e56b9dfe37eb0b Mon Sep 17 00:00:00 2001 From: Sophie Hourihane <sophie.hourihane@ligo.org> Date: Wed, 19 Jun 2024 11:10:06 -0700 Subject: [PATCH] Adding --thin-by as an argument to BayesWaveCleanFrame --- src/BayesWaveCleanFrame.c | 171 +++++++++++++++++++++++--------------- 1 file changed, 104 insertions(+), 67 deletions(-) diff --git a/src/BayesWaveCleanFrame.c b/src/BayesWaveCleanFrame.c index d8d2b97..f8060b5 100644 --- a/src/BayesWaveCleanFrame.c +++ b/src/BayesWaveCleanFrame.c @@ -81,6 +81,7 @@ struct CleanFrameData int fairdraw_index; //index of fair draw (note, this is 0-indexed. // The point must be from the second half of the chain file) + int thin_by; // factor by which to thin chains (default 1) char bw_model[MAXSTRINGSIZE]; //bayeswave glitch model double bw_seglen; //bayeswave segment length @@ -107,6 +108,49 @@ void printProgress (double percentage) } +int getNumberOfLinesInFile(FILE *paramsfile) { + // First, count the number of lines in the file. + int line_count = 0; + char ch; + + // Save the current position of the file pointer. + long initial_pos = ftell(paramsfile); + + // Count lines by reading character by character + while ((ch = fgetc(paramsfile)) != EOF) { + if (ch == '\n') { + line_count++; + } + } + // Reset the file pointer to the beginning of the file. + fseek(paramsfile, initial_pos, SEEK_SET); + return line_count; +} + +void jumpNLinesInFile(FILE *paramsfile, int line_number) { + if (line_number < 0) { + fprintf(stderr, "ERROR: Line number cannot be negative.\n"); + exit(1); + } + + char *line = NULL; + size_t len = 0; + ssize_t read; + int current_line = 0; + //fprintf(stdout, "Jumping %i lines in file.\n", line_number); + while (current_line < line_number) { + read = getline(&line, &len, paramsfile); + if (read == -1) { + fprintf(stderr, "ERROR: Could not jump to line %d in file. Reached EOF.\n", line_number); + free(line); // Free the allocated buffer before exiting + exit(1); + } + current_line++; + } + free(line); // Free the allocated buffer after use +} + + /* ============================ MAIN PROGRAM ============================ */ @@ -135,7 +179,7 @@ int main(int argc, char *argv[]) { fprintf(stdout, "\n"); /* Variable declaration */ - int i, j, k, NGWF; + int j, k, NGWF; double x; double dT; @@ -222,72 +266,58 @@ int main(int argc, char *argv[]) { FILE *paramsfile = fopen(data->bw_model, "r"); + int N_chain_samples = getNumberOfLinesInFile(paramsfile); + fprintf(stdout, "Number of lines in file: %i\n", N_chain_samples); + int glitch_model_size; double *glitch_model_params = malloc(NP * sizeof(double)); - //count lines in file - int N_chain_samples = 0; - while(!feof(paramsfile)) - { - fscanf(paramsfile, "%i", &glitch_model_size); - for(int d=1; d<=glitch_model_size; d++) - { - for(i=0; i<NP; i++) fscanf(paramsfile,"%lg", &glitch_model_params[i]); - } - N_chain_samples++; - } - rewind(paramsfile); - N_chain_samples--; - - - //get rid of burnin samples + //get rid of burnin samples by placing file pointer halfway down int N_burnin = N_chain_samples/2; - for(int n=0; n<N_burnin; n++) - { - fscanf(paramsfile, "%i", &glitch_model_size); - if(glitch_model_size>0) - { - for(int d=1; d<=glitch_model_size; d++) - { - for(i=0; i<5; i++) fscanf(paramsfile,"%lg", &glitch_model_params[i]); - } - } + int N_after_burnin = N_chain_samples - N_burnin; + int NWaveforms = 1; + if (data->medianFlag){ + NWaveforms = N_after_burnin / data->thin_by; } //setup array to store post-burnin wavelets - for(i=0; i<NBW; i++) + for(int i=0; i<NBW; i++) { - glitch_model_posterior[i] = malloc(sizeof(double)*N_burnin); - for(int n=0; n<N_burnin; n++) glitch_model_posterior[i][n] = 0.0; + glitch_model_posterior[i] = malloc(sizeof(double) * NWaveforms); + for(int n=0; n<NWaveforms; n++) glitch_model_posterior[i][n] = 0.0; } char filename[128]; FILE *tempfile; - // when running with fairdraw, we need to pick a sample at random - //pick a sample at "random" e.g. final line - int fair_draw = N_burnin - 1; - if (data->fairdraw_index >= 0) + // set fairdraw index to be last sample if not specified + int fairdraw_index = data->fairdraw_index < 0 ? N_chain_samples - 1 : data->fairdraw_index; + + int index = 0; + + if (data->medianFlag) { - if ((data->fairdraw_index - N_burnin) < 0) - { - fprintf(stderr, "ERROR: fairdraw index %i must be in second half of samples (there are %i samples)\n", data->fairdraw_index, N_chain_samples); - exit(1); - } - fair_draw = data->fairdraw_index - N_burnin; + index = N_burnin; + jumpNLinesInFile(paramsfile, N_burnin); + } + else { + index = fairdraw_index; + jumpNLinesInFile(paramsfile, fairdraw_index); } //parse posterior samples and generate waveforms printf("Generating glitch model posterior:\n"); - for(int n=0; n<N_burnin; n++) + for(int n=0; n<NWaveforms; n++) { - for(i=0; i<NBW; i++) glitch_model[i] = 0.0; + for(int i=0; i<NBW; i++) glitch_model[i] = 0.0; fscanf(paramsfile, "%i", &glitch_model_size); - if (n == fair_draw && !(data->medianFlag)){ + //fprintf(stdout, "Index is %i glitch model size: %i params:", index + n * data->thin_by, glitch_model_size); + + if (n == 0 && !(data->medianFlag)){ if (glitch_model_size == 0){ - fprintf(stderr, "WARNING! Fair draw requested at index %i, but the glitch is 0 dimensional there\n", fair_draw + N_burnin); + fprintf(stderr, "WARNING! Fair draw requested at index %i, but the glitch is 0 dimensional there\n", fairdraw_index); } } @@ -295,18 +325,14 @@ int main(int argc, char *argv[]) { { for(int d=1; d<=glitch_model_size; d++) { - for(i=0; i<NP; i++) fscanf(paramsfile,"%lg", &glitch_model_params[i]); - if ((data->medianFlag) || (n == fair_draw)) - { + for(int i=0; i<NP; i++) fscanf(paramsfile,"%lg", &glitch_model_params[i]); + //fprintf(stdout, "t: %f f: %f Q: %f A: %e phi: %f ", glitch_model_params[0], glitch_model_params[1], glitch_model_params[2], glitch_model_params[3], glitch_model_params[4]); SineGaussianTime(glitch_model, glitch_model_params, NBW, 1, data->bw_seglen); - } } - - //copy glitch_model to master glitch array for safe keeping - for(i=0; i<NBW; i++) glitch_model_posterior[i][n] = glitch_model[i]; + for(int i=0; i<NBW; i++) glitch_model_posterior[i][n] = glitch_model[i]; } - - printProgress((double)(n+1)/(double)N_burnin); + printProgress((double)(n+1)/(double)NWaveforms); + jumpNLinesInFile(paramsfile, data->thin_by); } printf("\n"); @@ -314,16 +340,16 @@ int main(int argc, char *argv[]) { //now, reuse glitch_model to store the reconstruction for subtraction if(data->medianFlag) { - for(i=0; i<NBW; i++) + for(int i=0; i<NBW; i++) { - gsl_sort(glitch_model_posterior[i], 1, N_burnin); - glitch_model[i] = gsl_stats_median_from_sorted_data(glitch_model_posterior[i], 1, N_burnin); + gsl_sort(glitch_model_posterior[i], 1, NWaveforms); + glitch_model[i] = gsl_stats_median_from_sorted_data(glitch_model_posterior[i], 1, NWaveforms); } } else { - for(i=0; i<NBW; i++) - glitch_model[i] = glitch_model_posterior[i][fair_draw]; + for(int i=0; i<NBW; i++) + glitch_model[i] = glitch_model_posterior[i][0]; } /* @@ -337,10 +363,10 @@ int main(int argc, char *argv[]) { k = j+NBW; //zero pad array - for(i=0; i<NGWF; i++) glitch_model_padded[i] = 0.0; + for(int i=0; i<NGWF; i++) glitch_model_padded[i] = 0.0; //place glitch model in right part of full array - for(i=j; i< k; i++) + for(int i=j; i< k; i++) { if(i>=0 && i<NGWF && (i-j)>=0 && (i-j)<NBW) glitch_model_padded[i] = glitch_model[i-j]; @@ -354,7 +380,7 @@ int main(int argc, char *argv[]) { // Initialise and populate clean data - for(i=0; i<NGWF; i++) + for(int i=0; i<NGWF; i++) { //fill LAL data structure with cleaned data for frame-making timeRes->data->data[i] = timeData->data->data[i] - glitch_model_padded[i]; @@ -367,7 +393,7 @@ int main(int argc, char *argv[]) { if(data->verboseFlag) { vFile = fopen("ingredients.dat","w"); - for(i=j; i<k; i++) + for(int i=j; i<k; i++) { fprintf(vFile,"%lg %lg %lg %lg\n",(double)(i-j)*dT,timeData->data->data[i],timeGlitch->data->data[i],timeRes->data->data[i]); } @@ -454,8 +480,6 @@ static void output_frame(REAL8TimeSeries *timeData, void parse_command_line_args(int argc, char **argv, struct CleanFrameData *data) { - int i; - if(argc==1) { print_usage(); @@ -463,13 +487,14 @@ void parse_command_line_args(int argc, char **argv, struct CleanFrameData *data) } FILE *outfile = fopen("bayeswave_cleanframe.run","w"); - for(i=0; i<argc; i++) fprintf(outfile,"%s ",argv[i]); + for(int i=0; i<argc; i++) fprintf(outfile,"%s ",argv[i]); fprintf(outfile,"\n"); fclose(outfile); data->medianFlag = 0; data->verboseFlag = 0; data->fairdraw_index = -1; // default to -1 meaning we take the last sample + data->thin_by = 1; static struct option long_options[] = { @@ -486,6 +511,7 @@ void parse_command_line_args(int argc, char **argv, struct CleanFrameData *data) {"clean-suffix", required_argument, 0, 0}, {"outdir", required_argument, 0, 0}, {"fairdraw-index",required_argument, 0, 0}, + {"thin-by", required_argument, 0, 0}, {"median", no_argument, 0, 0}, {"verbose", no_argument, 0, 0}, {"help", no_argument, 0,'h'}, @@ -497,7 +523,7 @@ void parse_command_line_args(int argc, char **argv, struct CleanFrameData *data) int argCount = 0; int argcheck[REQARG]; - for(i=0; i<REQARG; i++) argcheck[i] = 0; + for(int i=0; i<REQARG; i++) argcheck[i] = 0; //Loop through argv string and pluck out arguments while ((opt = getopt_long_only(argc, argv,"apl:b:", long_options, &long_index )) != -1) @@ -596,6 +622,16 @@ void parse_command_line_args(int argc, char **argv, struct CleanFrameData *data) exit(1); } } + if(strcmp("thin-by",long_options[long_index].name) == 0) + { + data->thin_by = atoi(optarg); + fprintf(stdout, "setting thin-by to %i\n", data->thin_by); + if (data->thin_by < 1) + { + fprintf(stderr, "ERROR: When supplied, thin_by must be a positive integer <= 1, given instead %i\n", data->thin_by); + exit(1); + } + } if(strcmp("help", long_options[long_index].name) == 0) { fprintf(stdout,"BayesWaveCleanFrame:\n"); @@ -618,7 +654,7 @@ void parse_command_line_args(int argc, char **argv, struct CleanFrameData *data) } //make sure there all the right arguments were used - for(i=0; i<REQARG; i++) + for(int i=0; i<REQARG; i++) { if(argcheck[i]!=1) { @@ -669,6 +705,7 @@ void print_usage() fprintf(stdout,"--verbose print intermediate data products to disk\n"); fprintf(stdout,"--median use median glitch reconstruction\n"); fprintf(stdout,"--fairdraw-index Index of fairdraw in glitch-model file. Note that the index must be in the second half of the file\n"); + fprintf(stdout,"--thin-by When calculating median, only takes median from every thin-by samples (default 1)\n"); fprintf(stdout," can only be used on a completed run\n"); fprintf(stdout,"--help print help output\n"); fprintf(stdout,"\n"); -- GitLab