Skip to content
Snippets Groups Projects
Forked from lscsoft / bayeswave
20 commits behind the upstream repository.
Code owners
Assign users and groups as approvers for specific file changes. Learn more.
BayesWaveCleanFrame.c 22.08 KiB
/*
 *  Copyright (C) 2018 Neil J. Cornish, Tyson B. Littenberg, James A. Clark, Jonah B. Kanner,
 *  Sudarshan Ghonge
 *
 *  This program is free software; you can redistribute it and/or modify
 *  it under the terms of the GNU General Public License as published by
 *  the Free Software Foundation; either version 2 of the License, or
 *  (at your option) any later version.
 *
 *  This program is distributed in the hope that it will be useful,
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *  GNU General Public License for more details.
 *
 *  You should have received a copy of the GNU General Public License
 *  along with with program; see the file COPYING. If not, write to the
 *  Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston,
 *  MA  02111-1307  USA
 */

/***************************  REQUIRED LIBRARIES  ***************************/

#include <getopt.h>
#include <stdio.h>
#include <stdlib.h>
#include <math.h>
#include <string.h>
#include <fftw3.h>
#include <sys/stat.h>
#include <gsl/gsl_math.h>
#include <gsl/gsl_rng.h>
#include <gsl/gsl_randist.h>
#include <gsl/gsl_sort_double.h>
#include <gsl/gsl_statistics.h>
#include <gsl/gsl_cdf.h>

#include <lal/LALCache.h>
#include <lal/LALFrStream.h>
#include <lal/TimeSeries.h>
#include <lal/XLALError.h>
#include <lal/Date.h>

#include "BayesCBC.h"
#include "BayesLine.h"
#include "BayesWave.h"
#include "BayesWaveIO.h"
#include "BayesWaveMCMC.h"
#include "BayesWaveMath.h"
#include "BayesWavePrior.h"
#include "BayesWaveModel.h"
#include "BayesWaveWavelet.h"
#include "BayesWaveProposal.h"
#include "BayesWaveEvidence.h"
#include "BayesWaveLikelihood.h"


/*************  PROTOTYPE DECLARATIONS FOR INTERNAL FUNCTIONS  **************/

#define REQARG 11
#define MAXSTRINGSIZE 1024

#define PBSTR "||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||"
#define PBWIDTH 60

#define NP 5 //number of wavelet parameters (only support for SineGaussian model)

struct CleanFrameData
{
  int medianFlag;
  int verboseFlag;

  //Set data products from command line
  char ifo[4];        //interferometer (e.g V1);

  char fr_cache[MAXSTRINGSIZE];//cache file (e.g. V1.cache)
  char fr_chanl[MAXSTRINGSIZE];//channel type (e.g. V1:Hrec_hoft_16384Hz)
  char fr_type[MAXSTRINGSIZE]; //frame type (e.g. V1_llhoft)

  double fr_seglen;   //frame segment length
  double fr_start;    //frame start time

  int fairdraw_index;    //index of fair draw (note, this is 0-indexed.
                         // The point must be from the second half of the chain file)
  char bw_model[MAXSTRINGSIZE]; //bayeswave glitch model

  double bw_seglen;   //bayeswave segment length
  double bw_start;    //bayeswave start time
  double bw_trigtime; //bayeswave trigger time

  char clean_suffix[MAXSTRINGSIZE]; //suffix to add to frame and channel type
  char outdir[MAXSTRINGSIZE]; //Output directory

};

void print_usage();
void parse_command_line_args(int argc, char **argv, struct CleanFrameData *data);

static void output_frame(REAL8TimeSeries *timeData, REAL8TimeSeries *timeRes, REAL8TimeSeries *timeGlitch, CHAR *frameType, CHAR *ifo, CHAR *cleandir);

void printProgress (double percentage)
{
  int val = (int) (percentage * 100);
  int lpad = (int) (percentage * PBWIDTH);
  int rpad = PBWIDTH - lpad;
  printf ("\r%3d%% [%.*s%*s]", val, lpad, PBSTR, rpad, "");
  fflush (stdout);
}


/* ============================  MAIN PROGRAM  ============================ */


int main(int argc, char *argv[]) {
  fprintf(stdout, "\n");
  fprintf(stdout, "#############################################################\n");
  fprintf(stdout, "                BayesWave Glitch Subtraction                 \n");
  fprintf(stdout, "                              *                              \n");
  fprintf(stdout, "                              *                              \n");
  fprintf(stdout, "                              *                              \n");
  fprintf(stdout, "                              *                              \n");
  fprintf(stdout, "                             **                              \n");
  fprintf(stdout, "                            * *                              \n");
  fprintf(stdout, "****************************  *   ***************************\n");
  fprintf(stdout, "                              * **                           \n");
  fprintf(stdout, "                              **                             \n");
  fprintf(stdout, "                              *                              \n");
  fprintf(stdout, "                              *                              \n");
  fprintf(stdout, "                              *                              \n");
  fprintf(stdout, "                              *                              \n");
  fprintf(stdout, "                                                             \n");
  fprintf(stdout, "#############################################################\n");
  fprintf(stdout, "\n");
  print_version(stdout);
  fprintf(stdout, "\n");
  fprintf(stdout, "\n");

  /*   Variable declaration   */
  int i, j, k, NGWF;

  double x;
  double dT;

  double alpha;

  int NBW;
  double norm;
  double s1, s2;
  double t_rise;

  struct CleanFrameData *data = malloc(sizeof(struct CleanFrameData));

  FILE *infile;
  FILE *vFile; //file pointer for verbose products

  // Put in a default clean_suffix
  // FIXME: We want to make this a required option
  // to be supplied in the config file in the future
  // The DCC number here refers to the cleaning recipe
  // here: https://dcc.ligo.org/LIGO-T1700406
  char *version = "T1700406_v4";
  sprintf(data->clean_suffix, "%s", version);
  
  // Put in the default directory name
  char *outdir = ".";
  sprintf(data->outdir, "%s", outdir);
  // ------------------------------------------------- //
  parse_command_line_args(argc, argv, data);

  REAL8TimeSeries* timeRes=NULL;
  REAL8TimeSeries* timeData=NULL;
  REAL8TimeSeries* timeGlitch=NULL;
  LIGOTimeGPS epoch;

  // Set time & retrieve data by reading frame cache
  XLALGPSSetREAL8(&epoch, data->fr_start);
  fprintf(stdout,"Reading original data:\n");
  fprintf(stdout,"     Type:    %s\n",data->fr_type);
  fprintf(stdout,"     Channel: %s\n",data->fr_chanl);
  fprintf(stdout,"\n");
  timeRes    = readTseries(data->fr_cache,data->fr_chanl,epoch,data->fr_seglen);
  timeData   = readTseries(data->fr_cache,data->fr_chanl,epoch,data->fr_seglen);
  timeGlitch = readTseries(data->fr_cache,data->fr_chanl,epoch,data->fr_seglen);

  double frame_sample_rate = 1.0 / timeData->deltaT;

  version = data->clean_suffix;
  outdir = data->outdir;

  char cleandir[MAXSTRINGSIZE];
  sprintf(cleandir, "%s/clean_frame", data->outdir);
  mkdir(cleandir, S_IRWXU | S_IRWXG | S_IROTH | S_IXOTH);

  char outframeType[MAXSTRINGSIZE];
  char outframeChannel[MAXSTRINGSIZE];
  char outframeGlitchChannel[MAXSTRINGSIZE];

  snprintf(outframeType, MAXSTRINGSIZE, "%s_%s", data->fr_type, version);
  snprintf(outframeChannel, MAXSTRINGSIZE, "%s_%s", data->fr_chanl, version);
  snprintf(outframeGlitchChannel, MAXSTRINGSIZE, "%s_glitch", data->fr_chanl);

  /* set the channel name */
  strncpy(timeData->name, data->fr_chanl, LALNameLength);
  strncpy(timeRes->name, outframeChannel, LALNameLength);
  strncpy(timeGlitch->name, outframeGlitchChannel, LALNameLength);

  fprintf(stdout, "Creating glitch-subtracted frame:\n");
  fprintf(stdout, "     Type:    %s\n", outframeType);
  fprintf(stdout, "     Channel: %s\n", outframeChannel);
  fprintf(stdout, "\n");
  // shortcuts to original frame size/cadence
  dT = timeData->deltaT;
  NGWF = timeData->data->length;

  // Length of the up-sampled glitch data
  NBW = (int) (data->bw_seglen * frame_sample_rate);

  // stores time series of glitch model
  double *glitch_model = malloc(sizeof(double) * NBW);
  double *glitch_model_padded = malloc(sizeof(double) * NGWF);
  double **glitch_model_posterior = malloc(sizeof(double *) * NBW);

  FILE *paramsfile = fopen(data->bw_model, "r");

  int glitch_model_size;
  double *glitch_model_params = malloc(NP * sizeof(double));

  //count lines in file
  int N_chain_samples = 0;
  while(!feof(paramsfile))
  {
    fscanf(paramsfile, "%i", &glitch_model_size);
    for(int d=1; d<=glitch_model_size; d++)
    {
      for(i=0; i<NP; i++) fscanf(paramsfile,"%lg", &glitch_model_params[i]);
    }
    N_chain_samples++;
  }
  rewind(paramsfile);
  N_chain_samples--;


  //get rid of burnin samples
  int N_burnin = N_chain_samples/2;
  for(int n=0; n<N_burnin; n++)
  {
    fscanf(paramsfile, "%i", &glitch_model_size);
    if(glitch_model_size>0)
    {
      for(int d=1; d<=glitch_model_size; d++)
      {
        for(i=0; i<5; i++) fscanf(paramsfile,"%lg", &glitch_model_params[i]);
      }
    }
  }

  //setup array to store post-burnin wavelets
  for(i=0; i<NBW; i++)
  {
    glitch_model_posterior[i] = malloc(sizeof(double)*N_burnin);
    for(int n=0; n<N_burnin; n++) glitch_model_posterior[i][n] = 0.0;
  }

  char filename[128];
  FILE *tempfile;

  // when running with fairdraw, we need to pick a sample at random
  //pick a sample at "random" e.g. final line
  int fair_draw = N_burnin - 1;
  if (data->fairdraw_index >= 0)
  {
    if ((data->fairdraw_index - N_burnin) < 0)
    {
      fprintf(stderr, "ERROR: fairdraw index %i must be in second half of samples (there are %i samples)\n", data->fairdraw_index, N_chain_samples);
      exit(1);
    }
    fair_draw = data->fairdraw_index - N_burnin;
  }

  //parse posterior samples and generate waveforms
  printf("Generating glitch model posterior:\n");
  for(int n=0; n<N_burnin; n++)
  {
    for(i=0; i<NBW; i++) glitch_model[i] = 0.0;
    
    fscanf(paramsfile, "%i", &glitch_model_size);

    if (n == fair_draw && !(data->medianFlag)){
      if (glitch_model_size == 0){
        fprintf(stderr, "WARNING! Fair draw requested at index %i, but the glitch is 0 dimensional there\n", fair_draw + N_burnin);
      }
    }
    
    if(glitch_model_size>0)
    {
      for(int d=1; d<=glitch_model_size; d++)
      {
        for(i=0; i<NP; i++) fscanf(paramsfile,"%lg", &glitch_model_params[i]);
        if ((data->medianFlag) || (n == fair_draw))
        {
          SineGaussianTime(glitch_model, glitch_model_params, NBW, 1, data->bw_seglen);
        }
      }

      //copy glitch_model to master glitch array for safe keeping
      for(i=0; i<NBW; i++) glitch_model_posterior[i][n] = glitch_model[i];
    }
    
    printProgress((double)(n+1)/(double)N_burnin);
  }
  printf("\n");
  
  
  //now, reuse glitch_model to store the reconstruction for subtraction
  if(data->medianFlag)
  {
    for(i=0; i<NBW; i++)
    {
      gsl_sort(glitch_model_posterior[i], 1, N_burnin);
      glitch_model[i] = gsl_stats_median_from_sorted_data(glitch_model_posterior[i], 1, N_burnin);
    }
  }
  else
  {
    for(i=0; i<NBW; i++)
      glitch_model[i] = glitch_model_posterior[i][fair_draw];
  }
  
  /*
   glitch_model_resampled now contains the glitch model,
   upsampled to the original cadence
   */
  printf("Aligning glitch model in full data array:\n");
  
  // figure out where to place the model in the frame
  j = (int)((data->bw_start-data->fr_start)/dT);
  k = j+NBW;

  //zero pad array
  for(i=0; i<NGWF; i++) glitch_model_padded[i] = 0.0;

  //place glitch model in right part of full array
  for(i=j; i< k; i++)
  {
    if(i>=0 && i<NGWF && (i-j)>=0 && (i-j)<NBW)
      glitch_model_padded[i] = glitch_model[i-j];
  }

  /*
   glitch_model_padded now contains the glitch model,
   over the same interval & cadence as the original frame
  */
  printf("Creating residual:\n");


  // Initialise and populate clean data
  for(i=0; i<NGWF; i++)
  {
    //fill LAL data structure with cleaned data for frame-making
    timeRes->data->data[i] = timeData->data->data[i] - glitch_model_padded[i];
    
    //fill another structure with glitch model
    timeGlitch->data->data[i] = glitch_model_padded[i];
  }


  if(data->verboseFlag)
  {
    vFile = fopen("ingredients.dat","w");
    for(i=j; i<k; i++)
    {
      fprintf(vFile,"%lg %lg %lg %lg\n",(double)(i-j)*dT,timeData->data->data[i],timeGlitch->data->data[i],timeRes->data->data[i]);
    }
    fclose(vFile);
  }
  

  // Output cleaned data!
  output_frame(timeData, timeRes, timeGlitch, outframeType, data->ifo, cleandir);
  XLALDestroyREAL8TimeSeries(timeData);

  fprintf(stdout,"\n");
  return 0;
}


/* ********************************************************************************** */
/*							                                        		          */
/*                                 Frame I/O                                          */
/*																			          */
/* ********************************************************************************** */

static void output_frame(REAL8TimeSeries *timeData,
                         REAL8TimeSeries *timeRes,
                         REAL8TimeSeries *timeGlitch,
                         CHAR *frameType,
                         CHAR *ifo,
                         CHAR *cleandir)
{
  CHAR fname[2048];
  INT4 duration;
  INT8 detectorFlags;
  LALFrameH *frame;

  int gpsStart = timeData->epoch.gpsSeconds;
  int gpsEnd = gpsStart + (int)timeData->data->length*timeData->deltaT;


  /* set detector flags */
  if ( strncmp( ifo, "H2", 2 ) == 0 )
    detectorFlags = LAL_LHO_2K_DETECTOR_BIT;
  else if ( strncmp( ifo, "H1", 2 ) == 0 )
    detectorFlags = LAL_LHO_4K_DETECTOR_BIT;
  else if ( strncmp( ifo, "L1", 2 ) == 0 )
    detectorFlags = LAL_LLO_4K_DETECTOR_BIT;
  else if ( strncmp( ifo, "G1", 2 ) == 0 )
    detectorFlags = LAL_GEO_600_DETECTOR_BIT;
  else if ( strncmp( ifo, "V1", 2 ) == 0 )
    detectorFlags = LAL_VIRGO_DETECTOR_BIT;
  else if ( strncmp( ifo, "T1", 2 ) == 0 )
    detectorFlags = LAL_TAMA_300_DETECTOR_BIT;
  else
  {
    fprintf( stderr, "ERROR: Unrecognised IFO: '%s'\n", ifo );
    exit( 1 );
  }

  /* get frame filename */
  duration = gpsEnd - gpsStart;
  snprintf( fname, FILENAME_MAX, "%s/%c-%s-%d-%d.gwf", cleandir, ifo[0], frameType, gpsStart, duration );

  /* define frame */
  frame = XLALFrameNew( &timeData->epoch, duration, "LIGO", 0, 1, detectorFlags );

  /* add channel to frame */
  XLALFrameAddREAL8TimeSeriesProcData( frame, timeRes );
  XLALFrameAddREAL8TimeSeriesProcData( frame, timeGlitch );
  XLALFrameAddREAL8TimeSeriesProcData( frame, timeData );

  fprintf( stdout, "Writing data to frame: '%s'\n", fname );

  /* write frame */
  if (XLALFrameWrite( frame, fname) != 0)
  {
    fprintf( stderr, "ERROR: Cannot save frame file: '%s'\n", fname );
    exit( 1 );
  }

  /* clear frame */
  XLALFrameFree( frame );

  return;
}

void parse_command_line_args(int argc, char **argv, struct CleanFrameData *data)
{
  int i;

  if(argc==1)
  {
    print_usage();
    exit(0);
  }

  FILE *outfile = fopen("bayeswave_cleanframe.run","w");
  for(i=0; i<argc; i++) fprintf(outfile,"%s ",argv[i]);
  fprintf(outfile,"\n");
  fclose(outfile);

  data->medianFlag  = 0;
  data->verboseFlag = 0;
  data->fairdraw_index = -1; // default to -1 meaning we take the last sample

  static struct option long_options[] =
  {
    {"ifo",           required_argument, 0, 0},
    {"glitch-model",  required_argument, 0, 0},
    {"cachefile",     required_argument, 0, 0},
    {"channel",       required_argument, 0, 0},
    {"frame-type",    required_argument, 0, 0},
    {"frame-length",  required_argument, 0, 0},
    {"frame-start",   required_argument, 0, 0},
    {"seglen",        required_argument, 0, 0},
    {"segment-start", required_argument, 0, 0},
    {"trigtime",      required_argument, 0, 0},
    {"clean-suffix",  required_argument, 0, 0},
    {"outdir",        required_argument, 0, 0},
    {"fairdraw-index",required_argument, 0, 0},
    {"median",        no_argument, 0, 0},
    {"verbose",       no_argument, 0, 0},
    {"help",          no_argument, 0,'h'},
    {0, 0, 0, 0}
  };

  int opt=0;
  int long_index=0;

  int argCount = 0;
  int argcheck[REQARG];
  for(i=0; i<REQARG; i++) argcheck[i] = 0;

  //Loop through argv string and pluck out arguments
  while ((opt = getopt_long_only(argc, argv,"apl:b:", long_options, &long_index )) != -1)
  {
    switch (opt)
    {

      case 0:
        if(strcmp("ifo",           long_options[long_index].name) == 0)
        {
          argcheck[argCount]++;
          argCount++;
          sprintf(data->ifo, "%s", optarg);
        }
        if(strcmp("glitch-model",  long_options[long_index].name) == 0)
        {
          argcheck[argCount]++;
          argCount++;
          sprintf(data->bw_model, "%s", optarg);
        }
        if(strcmp("cachefile", long_options[long_index].name) == 0)
        {
          argcheck[argCount]++;
          argCount++;
          sprintf(data->fr_cache, "%s", optarg);
        }
        if(strcmp("channel", long_options[long_index].name) == 0)
        {
          argcheck[argCount]++;
          argCount++;
          sprintf(data->fr_chanl, "%s", optarg);
        }
        if(strcmp("frame-type", long_options[long_index].name) == 0)
        {
          argcheck[argCount]++;
          argCount++;
          sprintf(data->fr_type, "%s", optarg);
        }
        if(strcmp("frame-length", long_options[long_index].name) == 0)
        {
          argcheck[argCount]++;
          argCount++;
          data->fr_seglen = (double)atof(optarg);
        }
        if(strcmp("frame-start", long_options[long_index].name) == 0)
        {
          argcheck[argCount]++;
          argCount++;
          data->fr_start = (double)atof(optarg);
        }
        if(strcmp("seglen", long_options[long_index].name) == 0)
        {
          argcheck[argCount]++;
          argCount++;
          data->bw_seglen = (double)atof(optarg);
        }
        if(strcmp("segment-start", long_options[long_index].name) == 0)
        {
          argcheck[argCount]++;
          argCount++;
          data->bw_start = (double)atof(optarg);
        }
        if(strcmp("trigtime", long_options[long_index].name) == 0)
        {
          argcheck[argCount]++;
          argCount++;
          data->bw_trigtime = (double)atof(optarg);
        }
        
        if(strcmp("clean-suffix", long_options[long_index].name) == 0)
        {
          argcheck[argCount]++;
          argCount++;
          sprintf(data->clean_suffix, "%s", optarg);       
        }
        if(strcmp("outdir", long_options[long_index].name) == 0)
        {
          argcheck[argCount]++;
          argCount++;
          sprintf(data->outdir, "%s", optarg);
        }
        if(strcmp("verbose",long_options[long_index].name) == 0)
        {
          data->verboseFlag = 1;
        }
        if(strcmp("median",long_options[long_index].name) == 0)
        {
          data->medianFlag = 1;
        }
        if(strcmp("fairdraw-index",long_options[long_index].name) == 0)
        {
          data->fairdraw_index = atoi(optarg);
          if (data->fairdraw_index < 0)
          {
            fprintf(stderr, "ERROR: When supplied, fairdraw-index must be a positive integer, given instead %i\n", data->fairdraw_index);
            exit(1);
          }
        }
        if(strcmp("help", long_options[long_index].name) == 0)
        {
          fprintf(stdout,"BayesWaveCleanFrame:\n");
          fprintf(stdout,"  Create glitch-subtracted frames from BayesWave residual\n");
          fprintf(stdout,"\n");
          print_usage();
          exit(0);
        }
        break;
      case 'h':
        fprintf(stdout,"BayesWaveCleanFrame:\n");
        fprintf(stdout,"  Create glitch-subtracted frames from BayesWave residual\n");
        fprintf(stdout,"\n");
        print_usage();
        exit(0);
        break;
      default: print_usage();
        exit(0);
    }
  }

  //make sure there all the right arguments were used
  for(i=0; i<REQARG; i++)
  {
    if(argcheck[i]!=1)
    {
      fprintf(stdout,"\nBayesWaveClean: missing requird argument\n\n");
      print_usage();
      exit(0);
    }
  }
  //See how we did with th command line
  fprintf(stdout,"interferometer.............%s\n",data->ifo);
  fprintf(stdout,"\n");
  fprintf(stdout,"cache file.................%s\n",data->fr_cache);
  fprintf(stdout,"channel type...............%s\n",data->fr_chanl);
  fprintf(stdout,"frame type.................%s\n",data->fr_type);
  fprintf(stdout,"\n");
  fprintf(stdout,"frame segment length.......%li\n",(long)data->fr_seglen);
  fprintf(stdout,"frame start time...........%li\n",(long)data->fr_start);
  fprintf(stdout,"\n");
  fprintf(stdout,"bayeswave glitch model.....%s\n",data->bw_model);
  fprintf(stdout,"bayeswave segment length...%li\n",(long)data->bw_seglen);
  fprintf(stdout,"bayeswave start time.......%li\n",(long)data->bw_start);
  fprintf(stdout,"bayeswave trigger time.....%li\n",(long)data->bw_trigtime);
  fprintf(stdout,"clean suffix...............%s\n",data->clean_suffix);
  fprintf(stdout,"output directory...........%s\n",data->outdir);
  fprintf(stdout,"\n");
}

void print_usage()
{
  fprintf(stdout,"Requird Arguments:\n");
  fprintf(stdout,"--ifo           interferometers (H1,L1,..)\n");
  fprintf(stdout,"--glitch-model  glitch reconstruction\n");
  fprintf(stdout,"--cachefile     cache files pointing to original frames\n");
  fprintf(stdout,"--channel       data channel name in original frames\n");
  fprintf(stdout,"--frame-type    original frame type\n");
  fprintf(stdout,"--frame-length  frame length\n");
  fprintf(stdout,"--frame-start   frame start time\n");
  fprintf(stdout,"--seglen        bayeswave segment length\n");
  fprintf(stdout,"--segment-start bayeswave segment start time\n");
  fprintf(stdout,"--trigtime      bayeswave trigger time\n");
  fprintf(stdout,"\n");
  fprintf(stdout,"Optional Arguments:\n");
  fprintf(stdout,"--clean-suffix  Suffix to be placed after frame and channel\n");
  fprintf(stdout,"                names to indicate the cleaned frames\n");
  fprintf(stdout,"                defaults to T1700406_v4 (FIXME)\n");
  fprintf(stdout,"--outdir        Output directory. Default is current directory ('.')\n");
  fprintf(stdout,"--verbose       print intermediate data products to disk\n");
  fprintf(stdout,"--median        use median glitch reconstruction\n");
  fprintf(stdout,"--fairdraw-index Index of fairdraw in glitch-model file. Note that the index must be in the second half of the file\n");
  fprintf(stdout,"                can only be used on a completed run\n");
  fprintf(stdout,"--help          print help output\n");
  fprintf(stdout,"\n");
}