Commit 75f78042 authored by Reinhard Prix's avatar Reinhard Prix
Browse files

ComputeFstat: measure and export timing coefficients from Demod methods as well

- can be output by ComputeFstatBenchmark
- allow overriding COLLECT_TIMING via CFLAGS define
- refs #2001
Original: 979200cb8471c78a4e4e4e2b6ccfea37d2bdc486
parent c58a877e
...@@ -72,6 +72,10 @@ const FstatOptionalArgs FstatOptionalArgsDefaults = { ...@@ -72,6 +72,10 @@ const FstatOptionalArgs FstatOptionalArgsDefaults = {
.prevInput = NULL .prevInput = NULL
}; };
// hidden global variables used to pass timings to test/benchmark programs
REAL8 Fstat_tauF1Buf = 0.0;
REAL8 Fstat_tauF1NoBuf = 0.0;
// ==================== Function definitions =================== // // ==================== Function definitions =================== //
/// ///
......
...@@ -28,6 +28,7 @@ ...@@ -28,6 +28,7 @@
#include "ComputeFstat_internal.h" #include "ComputeFstat_internal.h"
#include <lal/Factorial.h> #include <lal/Factorial.h>
#include <lal/LogPrintf.h>
#include <lal/SinCosLUT.h> #include <lal/SinCosLUT.h>
// ========== Demod internals ========== // ========== Demod internals ==========
...@@ -77,6 +78,11 @@ XLALComputeFstatDemod ( FstatResults* Fstats, ...@@ -77,6 +78,11 @@ XLALComputeFstatDemod ( FstatResults* Fstats,
XLAL_CHECK(common != NULL, XLAL_EFAULT); XLAL_CHECK(common != NULL, XLAL_EFAULT);
XLAL_CHECK(method_data != NULL, XLAL_EFAULT); XLAL_CHECK(method_data != NULL, XLAL_EFAULT);
#if COLLECT_TIMING
// get internal timing info
REAL8 tic, toc, tauBary, tauTotal;
#endif
DemodMethodData *demod = (DemodMethodData*) method_data; DemodMethodData *demod = (DemodMethodData*) method_data;
// Get which F-statistic quantities to compute // Get which F-statistic quantities to compute
...@@ -94,6 +100,9 @@ XLALComputeFstatDemod ( FstatResults* Fstats, ...@@ -94,6 +100,9 @@ XLALComputeFstatDemod ( FstatResults* Fstats,
XLAL_CHECK ( multiDetStates->length == numDetectors, XLAL_EINVAL ); XLAL_CHECK ( multiDetStates->length == numDetectors, XLAL_EINVAL );
XLAL_CHECK ( multiWeights==NULL || (multiWeights->length == numDetectors), XLAL_EINVAL ); XLAL_CHECK ( multiWeights==NULL || (multiWeights->length == numDetectors), XLAL_EINVAL );
#if COLLECT_TIMING
tic = XLALGetCPUTime();
#endif
MultiSSBtimes *multiSSB = NULL; MultiSSBtimes *multiSSB = NULL;
MultiAMCoeffs *multiAMcoef = NULL; MultiAMCoeffs *multiAMcoef = NULL;
// ----- check if we have buffered SSB+AMcoef for current sky-position // ----- check if we have buffered SSB+AMcoef for current sky-position
...@@ -137,6 +146,10 @@ XLALComputeFstatDemod ( FstatResults* Fstats, ...@@ -137,6 +146,10 @@ XLALComputeFstatDemod ( FstatResults* Fstats,
{ {
multiSSBTotal = multiSSB; multiSSBTotal = multiSSB;
} }
#if COLLECT_TIMING
toc = XLALGetCPUTime();
tauBary = (toc - tic);
#endif
// ----- compute final Fstatistic-value ----- // ----- compute final Fstatistic-value -----
REAL4 Ad = multiAMcoef->Mmunu.Ad; REAL4 Ad = multiAMcoef->Mmunu.Ad;
...@@ -232,10 +245,16 @@ XLALComputeFstatDemod ( FstatResults* Fstats, ...@@ -232,10 +245,16 @@ XLALComputeFstatDemod ( FstatResults* Fstats,
// this needs to be free'ed, as it's currently not buffered // this needs to be free'ed, as it's currently not buffered
XLALDestroyMultiSSBtimes ( multiBinary ); XLALDestroyMultiSSBtimes ( multiBinary );
// Return amplitude modulation coefficients // Return amplitude modulation coefficients
Fstats->Mmunu = demod->prevMultiAMcoef->Mmunu; Fstats->Mmunu = demod->prevMultiAMcoef->Mmunu;
#if COLLECT_TIMING
toc = XLALGetCPUTime();
tauTotal = (toc - tic);
Fstat_tauF1NoBuf = tauTotal / ( Fstats->numFreqBins * numDetectors );
Fstat_tauF1Buf = (tauTotal - tauBary) / ( Fstats->numFreqBins * numDetectors );
#endif
return XLAL_SUCCESS; return XLAL_SUCCESS;
} // XLALComputeFstatDemod() } // XLALComputeFstatDemod()
......
...@@ -55,7 +55,6 @@ ...@@ -55,7 +55,6 @@
// ----- local constants // ----- local constants
#define COLLECT_TIMING 1
// ----- local types ---------- // ----- local types ----------
...@@ -74,10 +73,6 @@ typedef struct tagResampTimingInfo ...@@ -74,10 +73,6 @@ typedef struct tagResampTimingInfo
REAL8 tauF1NoBuf; // Resampling timing 'constant': Fstat time per template per detector for an 'unbuffered' usage (different skypos and numFreqBins) REAL8 tauF1NoBuf; // Resampling timing 'constant': Fstat time per template per detector for an 'unbuffered' usage (different skypos and numFreqBins)
} ResampTimingInfo; } ResampTimingInfo;
// hidden global variables used to pass timings to test/benchmark programs
REAL8 Resamp_tauF1Buf = 0.0;
REAL8 Resamp_tauF1NoBuf = 0.0;
typedef struct tagResampWorkspace typedef struct tagResampWorkspace
{ {
// intermediate quantities to interpolate and operate on SRC-frame timeseries // intermediate quantities to interpolate and operate on SRC-frame timeseries
...@@ -350,7 +345,7 @@ XLALSetupFstatResamp ( void **method_data, ...@@ -350,7 +345,7 @@ XLALSetupFstatResamp ( void **method_data,
XLAL_CHECK ( ( common->workspace = XLALCreateResampWorkspace ( numSamplesMax_SRC, numSamplesFFT )) != NULL, XLAL_EFUNC ); XLAL_CHECK ( ( common->workspace = XLALCreateResampWorkspace ( numSamplesMax_SRC, numSamplesFFT )) != NULL, XLAL_EFUNC );
} // end: if we create our own workspace } // end: if we create our own workspace
#ifdef COLLECT_TIMING #if COLLECT_TIMING
// Set up timing log file // Set up timing log file
resamp->timingLogFile = optArgs->timingLogFile; resamp->timingLogFile = optArgs->timingLogFile;
#else #else
...@@ -380,7 +375,7 @@ XLALComputeFstatResamp ( FstatResults* Fstats, ...@@ -380,7 +375,7 @@ XLALComputeFstatResamp ( FstatResults* Fstats,
ResampWorkspace *ws = (ResampWorkspace*) common->workspace; ResampWorkspace *ws = (ResampWorkspace*) common->workspace;
#ifdef COLLECT_TIMING #if COLLECT_TIMING
// collect internal timing info // collect internal timing info
XLAL_INIT_MEM ( ws->timingInfo ); XLAL_INIT_MEM ( ws->timingInfo );
ResampTimingInfo *ti = &(ws->timingInfo); ResampTimingInfo *ti = &(ws->timingInfo);
...@@ -405,14 +400,14 @@ XLALComputeFstatResamp ( FstatResults* Fstats, ...@@ -405,14 +400,14 @@ XLALComputeFstatResamp ( FstatResults* Fstats,
(resamp->prev_doppler.argp == thisPoint.argp); (resamp->prev_doppler.argp == thisPoint.argp);
// ----- not same skypos+binary+refTime? --> re-compute SRC-frame timeseries, AM-coeffs and store in buffer // ----- not same skypos+binary+refTime? --> re-compute SRC-frame timeseries, AM-coeffs and store in buffer
#ifdef COLLECT_TIMING #if COLLECT_TIMING
tic = XLALGetCPUTime(); tic = XLALGetCPUTime();
#endif #endif
if ( ! ( same_skypos && same_refTime && same_binary) ) if ( ! ( same_skypos && same_refTime && same_binary) )
{ {
XLAL_CHECK ( XLALBarycentricResampleMultiCOMPLEX8TimeSeries ( resamp, &thisPoint, common ) == XLAL_SUCCESS, XLAL_EFUNC ); XLAL_CHECK ( XLALBarycentricResampleMultiCOMPLEX8TimeSeries ( resamp, &thisPoint, common ) == XLAL_SUCCESS, XLAL_EFUNC );
} }
#ifdef COLLECT_TIMING #if COLLECT_TIMING
toc = XLALGetCPUTime(); toc = XLALGetCPUTime();
ti->tauBary = (toc-tic); ti->tauBary = (toc-tic);
#endif #endif
...@@ -424,7 +419,7 @@ XLALComputeFstatResamp ( FstatResults* Fstats, ...@@ -424,7 +419,7 @@ XLALComputeFstatResamp ( FstatResults* Fstats,
// ----- workspace that depends on number of output frequency bins 'numFreqBins' ---------- // ----- workspace that depends on number of output frequency bins 'numFreqBins' ----------
UINT4 numFreqBins = Fstats->numFreqBins; UINT4 numFreqBins = Fstats->numFreqBins;
#ifdef COLLECT_TIMING #if COLLECT_TIMING
tic = XLALGetCPUTime(); tic = XLALGetCPUTime();
#endif #endif
...@@ -468,7 +463,7 @@ XLALComputeFstatResamp ( FstatResults* Fstats, ...@@ -468,7 +463,7 @@ XLALComputeFstatResamp ( FstatResults* Fstats,
ws->numFreqBinsOut = numFreqBins; ws->numFreqBinsOut = numFreqBins;
// ==================================================================================================== // ====================================================================================================
#ifdef COLLECT_TIMING #if COLLECT_TIMING
toc = XLALGetCPUTime(); toc = XLALGetCPUTime();
ti->tauMem = (toc-tic); // this one doesn't scale with number of detector! ti->tauMem = (toc-tic); // this one doesn't scale with number of detector!
#endif #endif
...@@ -488,7 +483,7 @@ XLALComputeFstatResamp ( FstatResults* Fstats, ...@@ -488,7 +483,7 @@ XLALComputeFstatResamp ( FstatResults* Fstats,
// compute {Fa^X(f_k), Fb^X(f_k)}: results returned via workspace ws // compute {Fa^X(f_k), Fb^X(f_k)}: results returned via workspace ws
XLAL_CHECK ( XLALComputeFaFb_Resamp ( ws, thisPoint, common->dFreq, TimeSeriesX_SRC_a, TimeSeriesX_SRC_b ) == XLAL_SUCCESS, XLAL_EFUNC ); XLAL_CHECK ( XLALComputeFaFb_Resamp ( ws, thisPoint, common->dFreq, TimeSeriesX_SRC_a, TimeSeriesX_SRC_b ) == XLAL_SUCCESS, XLAL_EFUNC );
#ifdef COLLECT_TIMING #if COLLECT_TIMING
tic = XLALGetCPUTime(); tic = XLALGetCPUTime();
#endif #endif
if ( X == 0 ) if ( X == 0 )
...@@ -507,7 +502,7 @@ XLALComputeFstatResamp ( FstatResults* Fstats, ...@@ -507,7 +502,7 @@ XLALComputeFstatResamp ( FstatResults* Fstats,
ws->Fb_k[k] += ws->FbX_k[k]; ws->Fb_k[k] += ws->FbX_k[k];
} }
} // end:if X>0 } // end:if X>0
#ifdef COLLECT_TIMING #if COLLECT_TIMING
toc = XLALGetCPUTime(); toc = XLALGetCPUTime();
ti->tauSumFabX += (toc-tic); ti->tauSumFabX += (toc-tic);
tic = toc; tic = toc;
...@@ -525,14 +520,14 @@ XLALComputeFstatResamp ( FstatResults* Fstats, ...@@ -525,14 +520,14 @@ XLALComputeFstatResamp ( FstatResults* Fstats,
Fstats->twoFPerDet[X][k] = XLALComputeFstatFromFaFb ( ws->FaX_k[k], ws->FbX_k[k], AdX, BdX, CdX, EdX, DdX_inv ); Fstats->twoFPerDet[X][k] = XLALComputeFstatFromFaFb ( ws->FaX_k[k], ws->FbX_k[k], AdX, BdX, CdX, EdX, DdX_inv );
} // for k < numFreqBins } // for k < numFreqBins
} // end: if compute F_X } // end: if compute F_X
#ifdef COLLECT_TIMING #if COLLECT_TIMING
toc = XLALGetCPUTime(); toc = XLALGetCPUTime();
ti->tauFab2F += ( toc - tic ); ti->tauFab2F += ( toc - tic );
#endif #endif
} // for X < numDetectors } // for X < numDetectors
#ifdef COLLECT_TIMING #if COLLECT_TIMING
ti->tauSumFabX /= numDetectors; ti->tauSumFabX /= numDetectors;
ti->tauFab2F /= numDetectors; ti->tauFab2F /= numDetectors;
tic = XLALGetCPUTime(); tic = XLALGetCPUTime();
...@@ -549,7 +544,7 @@ XLALComputeFstatResamp ( FstatResults* Fstats, ...@@ -549,7 +544,7 @@ XLALComputeFstatResamp ( FstatResults* Fstats,
Fstats->twoF[k] = XLALComputeFstatFromFaFb ( ws->Fa_k[k], ws->Fb_k[k], Ad, Bd, Cd, Ed, Dd_inv ); Fstats->twoF[k] = XLALComputeFstatFromFaFb ( ws->Fa_k[k], ws->Fb_k[k], Ad, Bd, Cd, Ed, Dd_inv );
} }
} // if FSTATQ_2F } // if FSTATQ_2F
#ifdef COLLECT_TIMING #if COLLECT_TIMING
toc = XLALGetCPUTime(); toc = XLALGetCPUTime();
ti->tauFab2F += ( toc - tic ); ti->tauFab2F += ( toc - tic );
#endif #endif
...@@ -576,7 +571,7 @@ XLALComputeFstatResamp ( FstatResults* Fstats, ...@@ -576,7 +571,7 @@ XLALComputeFstatResamp ( FstatResults* Fstats,
ws->FbX_k = NULL; ws->FbX_k = NULL;
} }
#ifdef COLLECT_TIMING #if COLLECT_TIMING
// timings are per-detector // timings are per-detector
tocEnd = XLALGetCPUTime(); tocEnd = XLALGetCPUTime();
ti->tauTotal = (tocEnd - ticStart); ti->tauTotal = (tocEnd - ticStart);
...@@ -592,8 +587,8 @@ XLALComputeFstatResamp ( FstatResults* Fstats, ...@@ -592,8 +587,8 @@ XLALComputeFstatResamp ( FstatResults* Fstats,
ti->tauF1Buf = (ti->tauTotal - ti->tauBary - ti->tauMem) / numFreqBins; ti->tauF1Buf = (ti->tauTotal - ti->tauBary - ti->tauMem) / numFreqBins;
// pass out timings to test/benchmark programs // pass out timings to test/benchmark programs
Resamp_tauF1NoBuf = ti->tauF1NoBuf; Fstat_tauF1NoBuf = ti->tauF1NoBuf;
Resamp_tauF1Buf = ti->tauF1Buf; Fstat_tauF1Buf = ti->tauF1Buf;
// write timing info to log file // write timing info to log file
if ( resamp->timingLogFile != NULL ) { if ( resamp->timingLogFile != NULL ) {
...@@ -627,7 +622,7 @@ XLALComputeFaFb_Resamp ( ResampWorkspace *restrict ws, //!< [in,out] pre-allo ...@@ -627,7 +622,7 @@ XLALComputeFaFb_Resamp ( ResampWorkspace *restrict ws, //!< [in,out] pre-allo
REAL8 fMinFFT = fHet + freqShift - dFreq * (ws->numSamplesFFT/2); // we'll shift DC into the *middle bin* N/2 [N always even!] REAL8 fMinFFT = fHet + freqShift - dFreq * (ws->numSamplesFFT/2); // we'll shift DC into the *middle bin* N/2 [N always even!]
UINT4 offset_bins = (UINT4) lround ( ( FreqOut0 - fMinFFT ) / dFreq ); UINT4 offset_bins = (UINT4) lround ( ( FreqOut0 - fMinFFT ) / dFreq );
#ifdef COLLECT_TIMING #if COLLECT_TIMING
// collect some internal timing info // collect some internal timing info
ResampTimingInfo *ti = &(ws->timingInfo); ResampTimingInfo *ti = &(ws->timingInfo);
REAL8 tic,toc; REAL8 tic,toc;
...@@ -639,7 +634,7 @@ XLALComputeFaFb_Resamp ( ResampWorkspace *restrict ws, //!< [in,out] pre-allo ...@@ -639,7 +634,7 @@ XLALComputeFaFb_Resamp ( ResampWorkspace *restrict ws, //!< [in,out] pre-allo
// apply spindown phase-factors, store result in zero-padded timeseries for 'FFT'ing // apply spindown phase-factors, store result in zero-padded timeseries for 'FFT'ing
XLAL_CHECK ( XLALApplySpindownAndFreqShift ( ws->TS_FFT, TimeSeries_SRC_a, &thisPoint, freqShift ) == XLAL_SUCCESS, XLAL_EFUNC ); XLAL_CHECK ( XLALApplySpindownAndFreqShift ( ws->TS_FFT, TimeSeries_SRC_a, &thisPoint, freqShift ) == XLAL_SUCCESS, XLAL_EFUNC );
#ifdef COLLECT_TIMING #if COLLECT_TIMING
toc = XLALGetCPUTime(); toc = XLALGetCPUTime();
ti->tauSpin += ( toc - tic); ti->tauSpin += ( toc - tic);
tic = toc; tic = toc;
...@@ -652,7 +647,7 @@ XLALComputeFaFb_Resamp ( ResampWorkspace *restrict ws, //!< [in,out] pre-allo ...@@ -652,7 +647,7 @@ XLALComputeFaFb_Resamp ( ResampWorkspace *restrict ws, //!< [in,out] pre-allo
ws->FaX_k[k] = ws->FabX_Raw [ offset_bins + k ]; ws->FaX_k[k] = ws->FabX_Raw [ offset_bins + k ];
} }
#ifdef COLLECT_TIMING #if COLLECT_TIMING
toc = XLALGetCPUTime(); toc = XLALGetCPUTime();
ti->tauFFT += ( toc - tic); ti->tauFFT += ( toc - tic);
tic = toc; tic = toc;
...@@ -662,7 +657,7 @@ XLALComputeFaFb_Resamp ( ResampWorkspace *restrict ws, //!< [in,out] pre-allo ...@@ -662,7 +657,7 @@ XLALComputeFaFb_Resamp ( ResampWorkspace *restrict ws, //!< [in,out] pre-allo
// apply spindown phase-factors, store result in zero-padded timeseries for 'FFT'ing // apply spindown phase-factors, store result in zero-padded timeseries for 'FFT'ing
XLAL_CHECK ( XLALApplySpindownAndFreqShift ( ws->TS_FFT, TimeSeries_SRC_b, &thisPoint, freqShift ) == XLAL_SUCCESS, XLAL_EFUNC ); XLAL_CHECK ( XLALApplySpindownAndFreqShift ( ws->TS_FFT, TimeSeries_SRC_b, &thisPoint, freqShift ) == XLAL_SUCCESS, XLAL_EFUNC );
#ifdef COLLECT_TIMING #if COLLECT_TIMING
toc = XLALGetCPUTime(); toc = XLALGetCPUTime();
ti->tauSpin += ( toc - tic); ti->tauSpin += ( toc - tic);
tic = toc; tic = toc;
...@@ -675,7 +670,7 @@ XLALComputeFaFb_Resamp ( ResampWorkspace *restrict ws, //!< [in,out] pre-allo ...@@ -675,7 +670,7 @@ XLALComputeFaFb_Resamp ( ResampWorkspace *restrict ws, //!< [in,out] pre-allo
ws->FbX_k[k] = ws->FabX_Raw [ offset_bins + k ]; ws->FbX_k[k] = ws->FabX_Raw [ offset_bins + k ];
} }
#ifdef COLLECT_TIMING #if COLLECT_TIMING
toc = XLALGetCPUTime(); toc = XLALGetCPUTime();
ti->tauFFT += ( toc - tic); ti->tauFFT += ( toc - tic);
tic = toc; tic = toc;
...@@ -694,7 +689,7 @@ XLALComputeFaFb_Resamp ( ResampWorkspace *restrict ws, //!< [in,out] pre-allo ...@@ -694,7 +689,7 @@ XLALComputeFaFb_Resamp ( ResampWorkspace *restrict ws, //!< [in,out] pre-allo
ws->FbX_k[k] *= normX_k; ws->FbX_k[k] *= normX_k;
} // for k < numFreqBinsOut } // for k < numFreqBinsOut
#ifdef COLLECT_TIMING #if COLLECT_TIMING
toc = XLALGetCPUTime(); toc = XLALGetCPUTime();
ti->tauNorm += ( toc - tic); ti->tauNorm += ( toc - tic);
tic = toc; tic = toc;
......
...@@ -28,10 +28,20 @@ ...@@ -28,10 +28,20 @@
// // // //
// ================================================================================================= // // ================================================================================================= //
// ---------- Shared constants/defines ---------- //
#ifndef COLLECT_TIMING
#define COLLECT_TIMING 1
#endif
// ---------- Shared macro definitions ---------- // // ---------- Shared macro definitions ---------- //
#define SQ(x) ( (x) * (x) ) #define SQ(x) ( (x) * (x) )
// ---------- Shared global variables ---------- //
// hidden global variables used to pass timings to test/benchmark programs
extern REAL8 Fstat_tauF1Buf;
extern REAL8 Fstat_tauF1NoBuf;
// ---------- Shared struct definitions ---------- // // ---------- Shared struct definitions ---------- //
// Common input data for F-statistic methods // Common input data for F-statistic methods
......
...@@ -52,8 +52,8 @@ typedef struct ...@@ -52,8 +52,8 @@ typedef struct
} UserInput_t; } UserInput_t;
// hidden global variables used to pass timings to test/benchmark programs // hidden global variables used to pass timings to test/benchmark programs
extern REAL8 Resamp_tauF1Buf; extern REAL8 Fstat_tauF1Buf;
extern REAL8 Resamp_tauF1NoBuf; extern REAL8 Fstat_tauF1NoBuf;
// ---------- main ---------- // ---------- main ----------
int int
...@@ -238,8 +238,8 @@ main ( int argc, char *argv[] ) ...@@ -238,8 +238,8 @@ main ( int argc, char *argv[] )
XLAL_CHECK ( XLALComputeFstat ( &results, inputs->data[l], &Doppler, numFreqBins_i, whatToCompute ) == XLAL_SUCCESS, XLAL_EFUNC ); XLAL_CHECK ( XLALComputeFstat ( &results, inputs->data[l], &Doppler, numFreqBins_i, whatToCompute ) == XLAL_SUCCESS, XLAL_EFUNC );
// ----- output timing details if requested // ----- output timing details if requested
tauF1NoBuf_i += Resamp_tauF1NoBuf; tauF1NoBuf_i += Fstat_tauF1NoBuf;
tauF1Buf_i += Resamp_tauF1Buf; tauF1Buf_i += Fstat_tauF1Buf;
} // for l < numSegments } // for l < numSegments
tauF1NoBuf_i /= uvar->numSegments; tauF1NoBuf_i /= uvar->numSegments;
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment