Commit 9e57016b authored by Reinhard Prix's avatar Reinhard Prix
Browse files

ComputeFstat: more consistent and convenient collection of timing-data

- don't require compile-time switch, controlled via 'optionalArgs'
  argument to CFS setup function, timing data stored in (opaque)
  FstatInput structure
- API functions to extract and dump timing data to file
- Adapted ComputeFstatBenchmark code to output timing info in both Demod and Resamp cases
- refs #3133
Original: f3a6c0bea5c03301a4d648f3bbe5b2bca1bc815d
parent ea444d76
...@@ -70,7 +70,8 @@ const FstatOptionalArgs FstatOptionalArgsDefaults = { ...@@ -70,7 +70,8 @@ const FstatOptionalArgs FstatOptionalArgsDefaults = {
.injectSources = NULL, .injectSources = NULL,
.injectSqrtSX = NULL, .injectSqrtSX = NULL,
.assumeSqrtSX = NULL, .assumeSqrtSX = NULL,
.prevInput = NULL .prevInput = NULL,
.collectTiming = 0
}; };
// hidden global variables used to pass timings to test/benchmark programs // hidden global variables used to pass timings to test/benchmark programs
...@@ -985,3 +986,39 @@ XLALParseFstatMethodString ( FstatMethodType *Fmethod, //!< [out] Parse ...@@ -985,3 +986,39 @@ XLALParseFstatMethodString ( FstatMethodType *Fmethod, //!< [out] Parse
XLAL_ERROR ( XLAL_EINVAL, "Unknown FstatMethod '%s'\n", s ); XLAL_ERROR ( XLAL_EINVAL, "Unknown FstatMethod '%s'\n", s );
} // XLALParseFstatMethodString() } // XLALParseFstatMethodString()
int
XLALGetFstatTiming ( const FstatInput* input, REAL8 *tauF1Buf, REAL8 *tauF1NoBuf )
{
XLAL_CHECK ( input != NULL, XLAL_EINVAL );
XLAL_CHECK ( (tauF1Buf != NULL) && (tauF1NoBuf != NULL), XLAL_EINVAL );
if ( input->method < FMETHOD_RESAMP_GENERIC )
{
XLAL_CHECK ( XLALGetFstatTiming_Demod ( input->method_data, tauF1Buf, tauF1NoBuf ) == XLAL_SUCCESS, XLAL_EFUNC );
}
else
{
XLAL_CHECK ( XLALGetFstatTiming_Resamp ( input->method_data, tauF1Buf, tauF1NoBuf ) == XLAL_SUCCESS, XLAL_EFUNC );
}
return XLAL_SUCCESS;
} // XLALGetFstatTiming()
int
AppendFstatTimingInfo2File ( const FstatInput* input, FILE *fp )
{
XLAL_CHECK ( input != NULL, XLAL_EINVAL );
XLAL_CHECK ( fp != NULL, XLAL_EINVAL );
if ( input->method < FMETHOD_RESAMP_GENERIC )
{
XLAL_CHECK ( AppendFstatTimingInfo2File_Demod ( input->method_data, fp ) == XLAL_SUCCESS, XLAL_EFUNC );
}
else
{
XLAL_CHECK ( AppendFstatTimingInfo2File_Resamp ( input->method_data, fp ) == XLAL_SUCCESS, XLAL_EFUNC );
}
return XLAL_SUCCESS;
} // AppendFstatTimingInfo2File()
...@@ -132,7 +132,7 @@ typedef struct tagFstatOptionalArgs { ...@@ -132,7 +132,7 @@ typedef struct tagFstatOptionalArgs {
MultiNoiseFloor *injectSqrtSX; ///< Single-sided PSD values for fake Gaussian noise to be added to SFT data. MultiNoiseFloor *injectSqrtSX; ///< Single-sided PSD values for fake Gaussian noise to be added to SFT data.
MultiNoiseFloor *assumeSqrtSX; ///< Single-sided PSD values to be used for computing SFT noise weights instead of from a running median of the SFTs themselves. MultiNoiseFloor *assumeSqrtSX; ///< Single-sided PSD values to be used for computing SFT noise weights instead of from a running median of the SFTs themselves.
FstatInput *prevInput; ///< An \c FstatInput structure from a previous call to XLALCreateFstatInput(); may contain common workspace data than can be re-used to save memory. FstatInput *prevInput; ///< An \c FstatInput structure from a previous call to XLALCreateFstatInput(); may contain common workspace data than can be re-used to save memory.
FILE *timingLogFile; ///< Pointer to a file which might be used to log timing information by some \f$\mathcal{F}\f$-statistic methods. BOOLEAN collectTiming; ///< a flag to turn on/off the collection of F-stat-method-specific timing-data
} FstatOptionalArgs; } FstatOptionalArgs;
#ifdef SWIG // SWIG interface directives #ifdef SWIG // SWIG interface directives
SWIGLAL(COPY_CONSTRUCTOR(tagFstatOptionalArgs)); SWIGLAL(COPY_CONSTRUCTOR(tagFstatOptionalArgs));
...@@ -293,6 +293,8 @@ const MultiLALDetector* XLALGetFstatInputDetectors ( const FstatInput* input ); ...@@ -293,6 +293,8 @@ const MultiLALDetector* XLALGetFstatInputDetectors ( const FstatInput* input );
const MultiLIGOTimeGPSVector* XLALGetFstatInputTimestamps ( const FstatInput* input ); const MultiLIGOTimeGPSVector* XLALGetFstatInputTimestamps ( const FstatInput* input );
const MultiNoiseWeights* XLALGetFstatInputNoiseWeights ( const FstatInput* input ); const MultiNoiseWeights* XLALGetFstatInputNoiseWeights ( const FstatInput* input );
const MultiDetectorStateSeries* XLALGetFstatInputDetectorStates ( const FstatInput* input ); const MultiDetectorStateSeries* XLALGetFstatInputDetectorStates ( const FstatInput* input );
int XLALGetFstatTiming ( const FstatInput* input, REAL8 *tauF1Buf, REAL8 *tauF1NoBuf );
int AppendFstatTimingInfo2File ( const FstatInput* input, FILE *fp );
#ifdef SWIG // SWIG interface directives #ifdef SWIG // SWIG interface directives
SWIGLAL(INOUT_STRUCTS(FstatResults**, Fstats)); SWIGLAL(INOUT_STRUCTS(FstatResults**, Fstats));
......
...@@ -34,6 +34,20 @@ ...@@ -34,6 +34,20 @@
// ========== Demod internals ========== // ========== Demod internals ==========
// ----- local types ---------- // ----- local types ----------
typedef struct tagDemodTimingInfo
{ // NOTE: all times refer to a single-detector timing case
BOOLEAN collectTiming; // turn on/off the collection of F-stat-method-specific timing-data (stored in method-data 'demod')
UINT4 numFreqBins; // number of frequency bins to compute F-stat for
UINT4 numSFTs; // total number of SFTs used
UINT4 numDetectors; // number of detectors
REAL8 tauTotal; // total time spent in XLALComputeFstatDemod()
REAL8 tauBary; // time spent in barycentring
REAL8 tauF1Buf; // Demod timing 'constant': Fstat time per template per detector for a 'buffered' case (same skypos, same numFreqBins)
REAL8 tauF1NoBuf; // Demod timing 'constant': Fstat time per template per detector for an 'unbuffered' usage (different skypos and numFreqBins)
} DemodTimingInfo;
typedef struct { typedef struct {
int (*computefafb_func) ( // XLALComputeFaFb_...() function for the selected Demod hotloop int (*computefafb_func) ( // XLALComputeFaFb_...() function for the selected Demod hotloop
COMPLEX8 *, COMPLEX8 *, FstatAtomVector **, const SFTVector *, const PulsarSpins, const SSBtimes *, const AMCoeffs *, const UINT4 Dterms COMPLEX8 *, COMPLEX8 *, FstatAtomVector **, const SFTVector *, const PulsarSpins, const SSBtimes *, const AMCoeffs *, const UINT4 Dterms
...@@ -44,6 +58,8 @@ typedef struct { ...@@ -44,6 +58,8 @@ typedef struct {
LIGOTimeGPS prevRefTime; // buffering: keep track of previous refTime for SSBtimes buffering LIGOTimeGPS prevRefTime; // buffering: keep track of previous refTime for SSBtimes buffering
MultiSSBtimes *prevMultiSSBtimes; // buffering: previous multiSSB times, unique to skypos + SFTs MultiSSBtimes *prevMultiSSBtimes; // buffering: previous multiSSB times, unique to skypos + SFTs
MultiAMCoeffs *prevMultiAMcoef; // buffering: previous AM-coeffs, unique to skypos + SFTs MultiAMCoeffs *prevMultiAMcoef; // buffering: previous AM-coeffs, unique to skypos + SFTs
DemodTimingInfo timingInfo; // temporary storage for collecting timing data
} DemodMethodData; } DemodMethodData;
// ----- local prototypes ---------- // ----- local prototypes ----------
...@@ -78,12 +94,10 @@ XLALComputeFstatDemod ( FstatResults* Fstats, ...@@ -78,12 +94,10 @@ XLALComputeFstatDemod ( FstatResults* Fstats,
XLAL_CHECK(common != NULL, XLAL_EFAULT); XLAL_CHECK(common != NULL, XLAL_EFAULT);
XLAL_CHECK(method_data != NULL, XLAL_EFAULT); XLAL_CHECK(method_data != NULL, XLAL_EFAULT);
#if COLLECT_TIMING
// get internal timing info
REAL8 tic, toc, tauBary, tauTotal;
#endif
DemodMethodData *demod = (DemodMethodData*) method_data; DemodMethodData *demod = (DemodMethodData*) method_data;
// get internal timing info
DemodTimingInfo *ti = &(demod->timingInfo);
REAL8 tic = 0, toc = 0;
// Get which F-statistic quantities to compute // Get which F-statistic quantities to compute
const FstatQuantities whatToCompute = Fstats->whatWasComputed; const FstatQuantities whatToCompute = Fstats->whatWasComputed;
...@@ -99,10 +113,24 @@ XLALComputeFstatDemod ( FstatResults* Fstats, ...@@ -99,10 +113,24 @@ XLALComputeFstatDemod ( FstatResults* Fstats,
UINT4 numDetectors = multiSFTs->length; UINT4 numDetectors = multiSFTs->length;
XLAL_CHECK ( multiDetStates->length == numDetectors, XLAL_EINVAL ); XLAL_CHECK ( multiDetStates->length == numDetectors, XLAL_EINVAL );
XLAL_CHECK ( multiWeights==NULL || (multiWeights->length == numDetectors), XLAL_EINVAL ); XLAL_CHECK ( multiWeights==NULL || (multiWeights->length == numDetectors), XLAL_EINVAL );
UINT4 numSFTs = 0;
for ( UINT4 X = 0; X < numDetectors; X ++ ) {
numSFTs += multiDetStates->data[X]->length;
}
// initialize timing info struct
if ( ti->collectTiming )
{
XLAL_INIT_MEM ( (*ti) );
ti->collectTiming = 1;
ti->numDetectors = numDetectors;
ti->numFreqBins = Fstats->numFreqBins;
ti->numSFTs = numSFTs;
tic = XLALGetCPUTime();
}
#if COLLECT_TIMING
tic = XLALGetCPUTime();
#endif
MultiSSBtimes *multiSSB = NULL; MultiSSBtimes *multiSSB = NULL;
MultiAMCoeffs *multiAMcoef = NULL; MultiAMCoeffs *multiAMcoef = NULL;
// ----- check if we have buffered SSB+AMcoef for current sky-position // ----- check if we have buffered SSB+AMcoef for current sky-position
...@@ -146,10 +174,11 @@ XLALComputeFstatDemod ( FstatResults* Fstats, ...@@ -146,10 +174,11 @@ XLALComputeFstatDemod ( FstatResults* Fstats,
{ {
multiSSBTotal = multiSSB; multiSSBTotal = multiSSB;
} }
#if COLLECT_TIMING
toc = XLALGetCPUTime(); if ( ti->collectTiming ) {
tauBary = (toc - tic); toc = XLALGetCPUTime();
#endif ti->tauBary = (toc - tic);
}
// ----- compute final Fstatistic-value ----- // ----- compute final Fstatistic-value -----
REAL4 Ad = multiAMcoef->Mmunu.Ad; REAL4 Ad = multiAMcoef->Mmunu.Ad;
...@@ -248,12 +277,12 @@ XLALComputeFstatDemod ( FstatResults* Fstats, ...@@ -248,12 +277,12 @@ XLALComputeFstatDemod ( FstatResults* Fstats,
// Return amplitude modulation coefficients // Return amplitude modulation coefficients
Fstats->Mmunu = demod->prevMultiAMcoef->Mmunu; Fstats->Mmunu = demod->prevMultiAMcoef->Mmunu;
#if COLLECT_TIMING if ( ti->collectTiming ) {
toc = XLALGetCPUTime(); toc = XLALGetCPUTime();
tauTotal = (toc - tic); ti->tauTotal = (toc - tic);
Fstat_tauF1NoBuf = tauTotal / ( Fstats->numFreqBins * numDetectors ); ti->tauF1NoBuf = ti->tauTotal / ( Fstats->numFreqBins * numDetectors );
Fstat_tauF1Buf = (tauTotal - tauBary) / ( Fstats->numFreqBins * numDetectors ); ti->tauF1Buf = (ti->tauTotal - ti->tauBary) / ( Fstats->numFreqBins * numDetectors );
#endif }
return XLAL_SUCCESS; return XLAL_SUCCESS;
...@@ -303,6 +332,9 @@ XLALSetupFstatDemod ( void **method_data, ...@@ -303,6 +332,9 @@ XLALSetupFstatDemod ( void **method_data,
// Save Dterms // Save Dterms
demod->Dterms = optArgs->Dterms; demod->Dterms = optArgs->Dterms;
// Save flag about collecting timing data
demod->timingInfo.collectTiming = optArgs->collectTiming;
// Select XLALComputeFaFb_...() function for the user-requested hotloop variant // Select XLALComputeFaFb_...() function for the user-requested hotloop variant
switch ( optArgs->FstatMethod ) { switch ( optArgs->FstatMethod ) {
case FMETHOD_DEMOD_GENERIC: case FMETHOD_DEMOD_GENERIC:
...@@ -329,3 +361,46 @@ XLALSetupFstatDemod ( void **method_data, ...@@ -329,3 +361,46 @@ XLALSetupFstatDemod ( void **method_data,
return XLAL_SUCCESS; return XLAL_SUCCESS;
} // XLALSetupFstatDemod() } // XLALSetupFstatDemod()
// export timing constants from internally-stored 'method data' struct
int
XLALGetFstatTiming_Demod ( const void* method_data, REAL8 *tauF1Buf, REAL8 *tauF1NoBuf )
{
XLAL_CHECK ( method_data != NULL, XLAL_EINVAL );
XLAL_CHECK ( (tauF1Buf != NULL) && (tauF1NoBuf != NULL), XLAL_EINVAL );
const DemodMethodData *demod = (const DemodMethodData *)method_data;
(*tauF1Buf) = demod->timingInfo.tauF1Buf;
(*tauF1NoBuf) = demod->timingInfo.tauF1NoBuf;
return XLAL_SUCCESS;
} // XLALGetFstatTiming_Demod()
// append detailed (method-specific) timing info to given file
int
AppendFstatTimingInfo2File_Demod ( const void* method_data, FILE *fp )
{
XLAL_CHECK ( method_data != NULL, XLAL_EINVAL );
XLAL_CHECK ( fp != NULL, XLAL_EINVAL );
const DemodMethodData *demod = (const DemodMethodData *)method_data;
// print header on first call
static BOOLEAN print_header = 1;
if ( print_header ) {
fprintf (fp, "%%%%%8s %4s %10s ", "Nfreq", "Ndet", "Nsft" );
fprintf (fp, "%10s %10s %10s %10s %10s\n",
"tauTotal", "tauBary", "tauF1NoBuf", "tauF1Buf", "tauF0Demod" );
print_header = 0;
}
const DemodTimingInfo *ti = &(demod->timingInfo);
fprintf (fp, "%10d %4d %10d", ti->numFreqBins, ti->numDetectors, ti->numSFTs );
REAL8 numSFTsPerDet = ti->numSFTs / ti->numDetectors;
fprintf (fp, "%10.1e %10.1e %10.1e %10.1e %10.1e\n", ti->tauTotal, ti->tauBary, ti->tauF1NoBuf, ti->tauF1Buf, ti->tauF1Buf / numSFTsPerDet );
return XLAL_SUCCESS;
} // AppendFstatTimingInfo2File_Demod()
...@@ -58,9 +58,15 @@ ...@@ -58,9 +58,15 @@
// ----- local types ---------- // ----- local types ----------
// ----- workspace ----------
typedef struct tagResampTimingInfo typedef struct tagResampTimingInfo
{ // NOTE: all times refer to a single-detector timing case { // NOTE: all times refer to a single-detector timing case
BOOLEAN collectTiming; // turn on/off the collection of F-stat-method-specific timing-data (stored in workspace)
UINT4 numFreqBins; // number of frequency bins to compute F-stat for
UINT4 numSFTs; // total number of SFTs used
UINT4 numDetectors; // number of detectors
UINT4 numSamplesFFT; // length of FFT
REAL8 tauTotal; // total time spent in XLALComputeFstatResamp() REAL8 tauTotal; // total time spent in XLALComputeFstatResamp()
REAL8 tauBary; // time spent in barycentric resampling REAL8 tauBary; // time spent in barycentric resampling
REAL8 tauSpin; // time spent in spindown+frequency correction REAL8 tauSpin; // time spent in spindown+frequency correction
...@@ -73,6 +79,7 @@ typedef struct tagResampTimingInfo ...@@ -73,6 +79,7 @@ typedef struct tagResampTimingInfo
REAL8 tauF1NoBuf; // Resampling timing 'constant': Fstat time per template per detector for an 'unbuffered' usage (different skypos and numFreqBins) REAL8 tauF1NoBuf; // Resampling timing 'constant': Fstat time per template per detector for an 'unbuffered' usage (different skypos and numFreqBins)
} ResampTimingInfo; } ResampTimingInfo;
// ----- workspace ----------
typedef struct tagResampWorkspace typedef struct tagResampWorkspace
{ {
// intermediate quantities to interpolate and operate on SRC-frame timeseries // intermediate quantities to interpolate and operate on SRC-frame timeseries
...@@ -95,7 +102,7 @@ typedef struct tagResampWorkspace ...@@ -95,7 +102,7 @@ typedef struct tagResampWorkspace
COMPLEX8 *Fb_k; // properly normalized F_b(f_k) over output bins COMPLEX8 *Fb_k; // properly normalized F_b(f_k) over output bins
UINT4 numFreqBinsAlloc; // internal: keep track of allocated length of frequency-arrays UINT4 numFreqBinsAlloc; // internal: keep track of allocated length of frequency-arrays
ResampTimingInfo timingInfo; // temporary storage for collecting timing data ResampTimingInfo *timingInfo; // pointer to storage for collecting timing data (which lives in ResampMethodData)
} ResampWorkspace; } ResampWorkspace;
typedef struct typedef struct
...@@ -110,7 +117,7 @@ typedef struct ...@@ -110,7 +117,7 @@ typedef struct
MultiCOMPLEX8TimeSeries *multiTimeSeries_SRC_a; // multi-detector SRC-frame timeseries, multiplied by AM function a(t) MultiCOMPLEX8TimeSeries *multiTimeSeries_SRC_a; // multi-detector SRC-frame timeseries, multiplied by AM function a(t)
MultiCOMPLEX8TimeSeries *multiTimeSeries_SRC_b; // multi-detector SRC-frame timeseries, multiplied by AM function b(t) MultiCOMPLEX8TimeSeries *multiTimeSeries_SRC_b; // multi-detector SRC-frame timeseries, multiplied by AM function b(t)
FILE *timingLogFile; // file to write timing info to ResampTimingInfo timingInfo; // temporary storage for collecting timing data
} ResampMethodData; } ResampMethodData;
...@@ -173,36 +180,6 @@ XLALDestroyResampWorkspace ( void *workspace ) ...@@ -173,36 +180,6 @@ XLALDestroyResampWorkspace ( void *workspace )
} // XLALDestroyResampWorkspace() } // XLALDestroyResampWorkspace()
/// debug/optimizer helper function: dump internal info from resampling code into a file
static void
AppendResampInfo2File ( FILE *fp, const FstatCommon *common, const ResampMethodData *resamp )
{
// print header on first call
static BOOLEAN print_header = 1;
if ( print_header ) {
fprintf (fp, "%%%%%8s %10s %6s %10s %10s ",
"Nfreq", "NsFFT", "Nsft0", "Ns_DET0", "Ns_SRC0" );
fprintf (fp, "%10s %10s %10s %10s %10s %10s %10s %10s %10s %10s %10s\n",
"tauTotal", "tauFFT", "tauBary", "tauSpin", "tauAM", "tauNorm", "tauFab2F", "tauMem", "tauSumFabX", "tauF1NoBuf", "tauF1Buf" );
print_header = 0;
}
const ResampWorkspace *ws = (ResampWorkspace*) common->workspace;
fprintf (fp, "%10d %10d", ws->numFreqBinsOut, ws->numSamplesFFT );
UINT4 numSamples_DETX0 = resamp->multiTimeSeries_DET->data[0]->data->length;
UINT4 numSFTs_X0 = common->multiTimestamps->data[0]->length;
COMPLEX8TimeSeries *ts_SRCX0 = resamp->multiTimeSeries_SRC_a->data[0];
UINT4 numSamples_SRCX0 = ts_SRCX0->data->length;
fprintf (fp, " %6d %10d %10d ", numSFTs_X0, numSamples_DETX0, numSamples_SRCX0 );
const ResampTimingInfo *ti = &(ws->timingInfo);
fprintf (fp, "%10.1e %10.1e %10.1e %10.1e %10.1e %10.1e %10.1e %10.1e %10.1e %10.1e %10.1e\n",
ti->tauTotal, ti->tauFFT, ti->tauBary, ti->tauSpin, 0.0, ti->tauNorm, ti->tauFab2F, ti->tauMem, ti->tauSumFabX, ti->tauF1NoBuf, ti->tauF1Buf );
} // AppendResampInfo2File()
// ---------- internal functions ---------- // ---------- internal functions ----------
static void static void
XLALDestroyResampMethodData ( void* method_data ) XLALDestroyResampMethodData ( void* method_data )
...@@ -239,6 +216,10 @@ XLALSetupFstatResamp ( void **method_data, ...@@ -239,6 +216,10 @@ XLALSetupFstatResamp ( void **method_data,
ResampMethodData *resamp = *method_data = XLALCalloc( 1, sizeof(*resamp) ); ResampMethodData *resamp = *method_data = XLALCalloc( 1, sizeof(*resamp) );
XLAL_CHECK( resamp != NULL, XLAL_ENOMEM ); XLAL_CHECK( resamp != NULL, XLAL_ENOMEM );
// set flag about collecting timing data
XLAL_INIT_MEM ( resamp->timingInfo );
resamp->timingInfo.collectTiming = optArgs->collectTiming;
// Set method function pointers // Set method function pointers
funcs->compute_func = XLALComputeFstatResamp; funcs->compute_func = XLALComputeFstatResamp;
funcs->method_data_destroy_func = XLALDestroyResampMethodData; funcs->method_data_destroy_func = XLALDestroyResampMethodData;
...@@ -342,6 +323,7 @@ XLALSetupFstatResamp ( void **method_data, ...@@ -342,6 +323,7 @@ XLALSetupFstatResamp ( void **method_data,
XLAL_CHECK ( (ws->SRCtimes_DET->data = XLALRealloc ( ws->SRCtimes_DET->data, numSamplesMax_SRC * sizeof(REAL8) )) != NULL, XLAL_ENOMEM ); XLAL_CHECK ( (ws->SRCtimes_DET->data = XLALRealloc ( ws->SRCtimes_DET->data, numSamplesMax_SRC * sizeof(REAL8) )) != NULL, XLAL_ENOMEM );
ws->SRCtimes_DET->length = numSamplesMax_SRC; ws->SRCtimes_DET->length = numSamplesMax_SRC;
} }
} // end: if shared workspace given } // end: if shared workspace given
else else
{ {
...@@ -362,13 +344,6 @@ XLALSetupFstatResamp ( void **method_data, ...@@ -362,13 +344,6 @@ XLALSetupFstatResamp ( void **method_data,
common->workspace = ws; common->workspace = ws;
} // end: if we create our own workspace } // end: if we create our own workspace
#if COLLECT_TIMING
// Set up timing log file
resamp->timingLogFile = optArgs->timingLogFile;
#else
resamp->timingLogFile = NULL;
#endif
return XLAL_SUCCESS; return XLAL_SUCCESS;
} // XLALSetupFstatResamp() } // XLALSetupFstatResamp()
...@@ -392,20 +367,34 @@ XLALComputeFstatResamp ( FstatResults* Fstats, ...@@ -392,20 +367,34 @@ XLALComputeFstatResamp ( FstatResults* Fstats,
ResampWorkspace *ws = (ResampWorkspace*) common->workspace; ResampWorkspace *ws = (ResampWorkspace*) common->workspace;
#if COLLECT_TIMING
// collect internal timing info
XLAL_INIT_MEM ( ws->timingInfo );
ResampTimingInfo *ti = &(ws->timingInfo);
REAL8 ticStart,tocEnd;
ticStart = XLALGetCPUTime();
REAL8 tic,toc;
#endif
// ----- handy shortcuts ---------- // ----- handy shortcuts ----------
PulsarDopplerParams thisPoint = Fstats->doppler; PulsarDopplerParams thisPoint = Fstats->doppler;
const MultiCOMPLEX8TimeSeries *multiTimeSeries_DET = resamp->multiTimeSeries_DET; const MultiCOMPLEX8TimeSeries *multiTimeSeries_DET = resamp->multiTimeSeries_DET;
UINT4 numDetectors = multiTimeSeries_DET->length; UINT4 numDetectors = multiTimeSeries_DET->length;
UINT4 numSFTs = 0;
for ( UINT4 X = 0; X < numDetectors; X ++ ) {
numSFTs += common->multiDetectorStates->data[X]->length;
}
// collect internal timing info
ResampTimingInfo *ti = &(resamp->timingInfo);
REAL8 ticStart = 0, tocEnd = 0;
REAL8 tic = 0, toc = 0;
if ( ti->collectTiming ) {
XLAL_INIT_MEM ( (*ti) );
ti->collectTiming = 1;
ti->numDetectors = numDetectors;
ti->numFreqBins = Fstats->numFreqBins;
ti->numSFTs = numSFTs;
ti->numSamplesFFT = ws->numSamplesFFT;
ticStart = XLALGetCPUTime();
}
// store pointer to timing-info storage in workspace (for use by XLALComputeFaFb_Resamp() )
ws->timingInfo = ti;
// ============================== BEGIN: handle buffering ============================= // ============================== BEGIN: handle buffering =============================
BOOLEAN same_skypos = (resamp->prev_doppler.Alpha == thisPoint.Alpha) && (resamp->prev_doppler.Delta == thisPoint.Delta); BOOLEAN same_skypos = (resamp->prev_doppler.Alpha == thisPoint.Alpha) && (resamp->prev_doppler.Delta == thisPoint.Delta);
BOOLEAN same_refTime = ( GPSDIFF ( resamp->prev_doppler.refTime, thisPoint.refTime ) == 0 ); BOOLEAN same_refTime = ( GPSDIFF ( resamp->prev_doppler.refTime, thisPoint.refTime ) == 0 );
...@@ -417,17 +406,19 @@ XLALComputeFstatResamp ( FstatResults* Fstats, ...@@ -417,17 +406,19 @@ XLALComputeFstatResamp ( FstatResults* Fstats,
(resamp->prev_doppler.argp == thisPoint.argp); (resamp->prev_doppler.argp == thisPoint.argp);
// ----- not same skypos+binary+refTime? --> re-compute SRC-frame timeseries, AM-coeffs and store in buffer // ----- not same skypos+binary+refTime? --> re-compute SRC-frame timeseries, AM-coeffs and store in buffer
#if COLLECT_TIMING if ( ti->collectTiming ) {
tic = XLALGetCPUTime(); tic = XLALGetCPUTime();
#endif }
if ( ! ( same_skypos && same_refTime && same_binary) ) if ( ! ( same_skypos && same_refTime && same_binary) )
{ {
XLAL_CHECK ( XLALBarycentricResampleMultiCOMPLEX8TimeSeries ( resamp, &thisPoint, common ) == XLAL_SUCCESS, XLAL_EFUNC ); XLAL_CHECK ( XLALBarycentricResampleMultiCOMPLEX8TimeSeries ( resamp, &thisPoint, common ) == XLAL_SUCCESS, XLAL_EFUNC );
} }
#if COLLECT_TIMING
toc = XLALGetCPUTime(); if ( ti->collectTiming ) {
ti->tauBary = (toc-tic); toc = XLALGetCPUTime();
#endif ti->tauBary = (toc-tic);
}
MultiCOMPLEX8TimeSeries *multiTimeSeries_SRC_a = resamp->multiTimeSeries_SRC_a; MultiCOMPLEX8TimeSeries *multiTimeSeries_SRC_a = resamp->multiTimeSeries_SRC_a;
MultiCOMPLEX8TimeSeries *multiTimeSeries_SRC_b = resamp->multiTimeSeries_SRC_b; MultiCOMPLEX8TimeSeries *multiTimeSeries_SRC_b = resamp->multiTimeSeries_SRC_b;
...@@ -436,9 +427,9 @@ XLALComputeFstatResamp ( FstatResults* Fstats, ...@@ -436,9 +427,9 @@ XLALComputeFstatResamp ( FstatResults* Fstats,
// ----- workspace that depends on number of output frequency bins 'numFreqBins' ---------- // ----- workspace that depends on number of output frequency bins 'numFreqBins' ----------
UINT4 numFreqBins = Fstats->numFreqBins; UINT4 numFreqBins = Fstats->numFreqBins;
#if COLLECT_TIMING if ( ti->collectTiming ) {
tic = XLALGetCPUTime(); tic = XLALGetCPUTime();
#endif }
// NOTE: we try to use as much existing memory as possible in FstatResults, so we only // NOTE: we try to use as much existing memory as possible in FstatResults, so we only
// allocate local 'workspace' storage in case there's not already a vector allocated in FstatResults for it // allocate local 'workspace' storage in case there's not already a vector allocated in FstatResults for it
...@@ -480,10 +471,10 @@ XLALComputeFstatResamp ( FstatResults* Fstats, ...@@ -480,10 +471,10 @@ XLALComputeFstatResamp ( FstatResults* Fstats,
ws->numFreqBinsOut = numFreqBins; ws->numFreqBinsOut = numFreqBins;
// ==================================================================================================== // ====================================================================================================
#if COLLECT_TIMING if ( ti->collectTiming ) {
toc = XLALGetCPUTime(); toc = XLALGetCPUTime();
ti->tauMem = (toc-tic); // this one doesn't scale with number of detector! ti->tauMem = (toc-tic); // this one doesn't scale with number of detector!
#endif }
// loop over detectors // loop over detectors
for ( UINT4 X=0; X < numDetectors; X++ ) for ( UINT4 X=0; X < numDetectors; X++ )
...@@ -500,9 +491,9 @@ XLALComputeFstatResamp ( FstatResults* Fstats, ...@@ -500,9 +491,9 @@ XLALComputeFstatResamp ( FstatResults* Fstats,
// compute {Fa^X(f_k), Fb^X(f_k)}: results returned via workspace ws // compute {Fa^X(f_k), Fb^X(f_k)}: results returned via workspace ws
XLAL_CHECK ( XLALComputeFaFb_Resamp ( ws, thisPoint, common->dFreq, TimeSeriesX_SRC_a, TimeSeriesX_SRC_b ) == XLAL_SUCCESS, XLAL_EFUNC ); XLAL_CHECK ( XLALComputeFaFb_Resamp ( ws, thisPoint, common->dFreq, TimeSeriesX_SRC_a, TimeSeriesX_SRC_b ) == XLAL_SUCCESS, XLAL_EFUNC );
#if COLLECT_TIMING if ( ti->collectTiming ) {
tic = XLALGetCPUTime(); tic = XLALGetCPUTime();
#endif }
if ( X == 0 ) if ( X == 0 )
{ // avoid having to memset this array: for the first detector we *copy* results { // avoid having to memset this array: for the first detector we *copy* results
for ( UINT4 k = 0; k < numFreqBins; k++ ) for ( UINT4 k = 0; k < numFreqBins; k++ )
...@@ -519,11 +510,13 @@ XLALComputeFstatResamp ( FstatResults* Fstats, ...@@ -519,11 +510,13 @@ XLALComputeFstatResamp ( FstatResults* Fstats,
ws->Fb_k[k] += ws->FbX_k[k]; ws->Fb_k[k] += ws->FbX_k[k];
} }
} // end:if X>0 } // end:if X>0
#if COLLECT_TIMING
toc = XLALGetCPUTime(); if ( ti->collectTiming ) {
ti->tauSumFabX += (toc-tic); toc = XLALGetCPUTime();
tic = toc; ti->tauSumFabX += (toc-tic);
#endif tic = toc;
}
// ----- if requested: compute per-detector Fstat_X_k // ----- if requested: compute per-detector Fstat_X_k
if ( whatToCompute & FSTATQ_2F_PER_DET ) if ( whatToCompute & FSTATQ_2F_PER_DET )
{ {
...@@ -537,18 +530,20 @@ XLALComputeFstatResamp ( FstatResults* Fstats, ...@@ -537,18 +530,20 @@ XLALComputeFstatResamp ( FstatResults* Fstats,
Fstats->twoFPerDet[X][k] = XLALComputeFstatFromFaFb ( ws->FaX_k[k], ws->FbX_k[k], AdX, BdX, CdX, EdX, DdX_inv ); Fstats->twoFPerDet[X][k] = XLALComputeFstatFromFaFb ( ws->FaX_k[k], ws->FbX_k[k], AdX, BdX, CdX, EdX, DdX_inv );
} // for k < numFreqBins } // for k < numFreqBins
} // end: if compute F_X } // end: if compute F_X
#if COLLECT_TIMING
toc = XLALGetCPUTime(); if ( ti->collectTiming ) {
ti->tauFab2F += ( toc - tic ); toc = XLALGetCPUTime();
#endif