Commit 75f78042 authored by Reinhard Prix's avatar Reinhard Prix

ComputeFstat: measure and export timing coefficients from Demod methods as well

- can be output by ComputeFstatBenchmark
- allow overriding COLLECT_TIMING via CFLAGS define
- refs #2001
Original: 979200cb8471c78a4e4e4e2b6ccfea37d2bdc486
parent c58a877e
......@@ -72,6 +72,10 @@ const FstatOptionalArgs FstatOptionalArgsDefaults = {
.prevInput = NULL
};
// hidden global variables used to pass timings to test/benchmark programs
REAL8 Fstat_tauF1Buf = 0.0;
REAL8 Fstat_tauF1NoBuf = 0.0;
// ==================== Function definitions =================== //
///
......
......@@ -28,6 +28,7 @@
#include "ComputeFstat_internal.h"
#include <lal/Factorial.h>
#include <lal/LogPrintf.h>
#include <lal/SinCosLUT.h>
// ========== Demod internals ==========
......@@ -77,6 +78,11 @@ XLALComputeFstatDemod ( FstatResults* Fstats,
XLAL_CHECK(common != NULL, XLAL_EFAULT);
XLAL_CHECK(method_data != NULL, XLAL_EFAULT);
#if COLLECT_TIMING
// get internal timing info
REAL8 tic, toc, tauBary, tauTotal;
#endif
DemodMethodData *demod = (DemodMethodData*) method_data;
// Get which F-statistic quantities to compute
......@@ -94,6 +100,9 @@ XLALComputeFstatDemod ( FstatResults* Fstats,
XLAL_CHECK ( multiDetStates->length == numDetectors, XLAL_EINVAL );
XLAL_CHECK ( multiWeights==NULL || (multiWeights->length == numDetectors), XLAL_EINVAL );
#if COLLECT_TIMING
tic = XLALGetCPUTime();
#endif
MultiSSBtimes *multiSSB = NULL;
MultiAMCoeffs *multiAMcoef = NULL;
// ----- check if we have buffered SSB+AMcoef for current sky-position
......@@ -137,6 +146,10 @@ XLALComputeFstatDemod ( FstatResults* Fstats,
{
multiSSBTotal = multiSSB;
}
#if COLLECT_TIMING
toc = XLALGetCPUTime();
tauBary = (toc - tic);
#endif
// ----- compute final Fstatistic-value -----
REAL4 Ad = multiAMcoef->Mmunu.Ad;
......@@ -232,10 +245,16 @@ XLALComputeFstatDemod ( FstatResults* Fstats,
// this needs to be free'ed, as it's currently not buffered
XLALDestroyMultiSSBtimes ( multiBinary );
// Return amplitude modulation coefficients
Fstats->Mmunu = demod->prevMultiAMcoef->Mmunu;
#if COLLECT_TIMING
toc = XLALGetCPUTime();
tauTotal = (toc - tic);
Fstat_tauF1NoBuf = tauTotal / ( Fstats->numFreqBins * numDetectors );
Fstat_tauF1Buf = (tauTotal - tauBary) / ( Fstats->numFreqBins * numDetectors );
#endif
return XLAL_SUCCESS;
} // XLALComputeFstatDemod()
......
......@@ -55,7 +55,6 @@
// ----- local constants
#define COLLECT_TIMING 1
// ----- local types ----------
......@@ -74,10 +73,6 @@ typedef struct tagResampTimingInfo
REAL8 tauF1NoBuf; // Resampling timing 'constant': Fstat time per template per detector for an 'unbuffered' usage (different skypos and numFreqBins)
} ResampTimingInfo;
// hidden global variables used to pass timings to test/benchmark programs
REAL8 Resamp_tauF1Buf = 0.0;
REAL8 Resamp_tauF1NoBuf = 0.0;
typedef struct tagResampWorkspace
{
// intermediate quantities to interpolate and operate on SRC-frame timeseries
......@@ -350,7 +345,7 @@ XLALSetupFstatResamp ( void **method_data,
XLAL_CHECK ( ( common->workspace = XLALCreateResampWorkspace ( numSamplesMax_SRC, numSamplesFFT )) != NULL, XLAL_EFUNC );
} // end: if we create our own workspace
#ifdef COLLECT_TIMING
#if COLLECT_TIMING
// Set up timing log file
resamp->timingLogFile = optArgs->timingLogFile;
#else
......@@ -380,7 +375,7 @@ XLALComputeFstatResamp ( FstatResults* Fstats,
ResampWorkspace *ws = (ResampWorkspace*) common->workspace;
#ifdef COLLECT_TIMING
#if COLLECT_TIMING
// collect internal timing info
XLAL_INIT_MEM ( ws->timingInfo );
ResampTimingInfo *ti = &(ws->timingInfo);
......@@ -405,14 +400,14 @@ XLALComputeFstatResamp ( FstatResults* Fstats,
(resamp->prev_doppler.argp == thisPoint.argp);
// ----- not same skypos+binary+refTime? --> re-compute SRC-frame timeseries, AM-coeffs and store in buffer
#ifdef COLLECT_TIMING
#if COLLECT_TIMING
tic = XLALGetCPUTime();
#endif
if ( ! ( same_skypos && same_refTime && same_binary) )
{
XLAL_CHECK ( XLALBarycentricResampleMultiCOMPLEX8TimeSeries ( resamp, &thisPoint, common ) == XLAL_SUCCESS, XLAL_EFUNC );
}
#ifdef COLLECT_TIMING
#if COLLECT_TIMING
toc = XLALGetCPUTime();
ti->tauBary = (toc-tic);
#endif
......@@ -424,7 +419,7 @@ XLALComputeFstatResamp ( FstatResults* Fstats,
// ----- workspace that depends on number of output frequency bins 'numFreqBins' ----------
UINT4 numFreqBins = Fstats->numFreqBins;
#ifdef COLLECT_TIMING
#if COLLECT_TIMING
tic = XLALGetCPUTime();
#endif
......@@ -468,7 +463,7 @@ XLALComputeFstatResamp ( FstatResults* Fstats,
ws->numFreqBinsOut = numFreqBins;
// ====================================================================================================
#ifdef COLLECT_TIMING
#if COLLECT_TIMING
toc = XLALGetCPUTime();
ti->tauMem = (toc-tic); // this one doesn't scale with number of detector!
#endif
......@@ -488,7 +483,7 @@ XLALComputeFstatResamp ( FstatResults* Fstats,
// compute {Fa^X(f_k), Fb^X(f_k)}: results returned via workspace ws
XLAL_CHECK ( XLALComputeFaFb_Resamp ( ws, thisPoint, common->dFreq, TimeSeriesX_SRC_a, TimeSeriesX_SRC_b ) == XLAL_SUCCESS, XLAL_EFUNC );
#ifdef COLLECT_TIMING
#if COLLECT_TIMING
tic = XLALGetCPUTime();
#endif
if ( X == 0 )
......@@ -507,7 +502,7 @@ XLALComputeFstatResamp ( FstatResults* Fstats,
ws->Fb_k[k] += ws->FbX_k[k];
}
} // end:if X>0
#ifdef COLLECT_TIMING
#if COLLECT_TIMING
toc = XLALGetCPUTime();
ti->tauSumFabX += (toc-tic);
tic = toc;
......@@ -525,14 +520,14 @@ XLALComputeFstatResamp ( FstatResults* Fstats,
Fstats->twoFPerDet[X][k] = XLALComputeFstatFromFaFb ( ws->FaX_k[k], ws->FbX_k[k], AdX, BdX, CdX, EdX, DdX_inv );
} // for k < numFreqBins
} // end: if compute F_X
#ifdef COLLECT_TIMING
#if COLLECT_TIMING
toc = XLALGetCPUTime();
ti->tauFab2F += ( toc - tic );
#endif
} // for X < numDetectors
#ifdef COLLECT_TIMING
#if COLLECT_TIMING
ti->tauSumFabX /= numDetectors;
ti->tauFab2F /= numDetectors;
tic = XLALGetCPUTime();
......@@ -549,7 +544,7 @@ XLALComputeFstatResamp ( FstatResults* Fstats,
Fstats->twoF[k] = XLALComputeFstatFromFaFb ( ws->Fa_k[k], ws->Fb_k[k], Ad, Bd, Cd, Ed, Dd_inv );
}
} // if FSTATQ_2F
#ifdef COLLECT_TIMING
#if COLLECT_TIMING
toc = XLALGetCPUTime();
ti->tauFab2F += ( toc - tic );
#endif
......@@ -576,7 +571,7 @@ XLALComputeFstatResamp ( FstatResults* Fstats,
ws->FbX_k = NULL;
}
#ifdef COLLECT_TIMING
#if COLLECT_TIMING
// timings are per-detector
tocEnd = XLALGetCPUTime();
ti->tauTotal = (tocEnd - ticStart);
......@@ -592,8 +587,8 @@ XLALComputeFstatResamp ( FstatResults* Fstats,
ti->tauF1Buf = (ti->tauTotal - ti->tauBary - ti->tauMem) / numFreqBins;
// pass out timings to test/benchmark programs
Resamp_tauF1NoBuf = ti->tauF1NoBuf;
Resamp_tauF1Buf = ti->tauF1Buf;
Fstat_tauF1NoBuf = ti->tauF1NoBuf;
Fstat_tauF1Buf = ti->tauF1Buf;
// write timing info to log file
if ( resamp->timingLogFile != NULL ) {
......@@ -627,7 +622,7 @@ XLALComputeFaFb_Resamp ( ResampWorkspace *restrict ws, //!< [in,out] pre-allo
REAL8 fMinFFT = fHet + freqShift - dFreq * (ws->numSamplesFFT/2); // we'll shift DC into the *middle bin* N/2 [N always even!]
UINT4 offset_bins = (UINT4) lround ( ( FreqOut0 - fMinFFT ) / dFreq );
#ifdef COLLECT_TIMING
#if COLLECT_TIMING
// collect some internal timing info
ResampTimingInfo *ti = &(ws->timingInfo);
REAL8 tic,toc;
......@@ -639,7 +634,7 @@ XLALComputeFaFb_Resamp ( ResampWorkspace *restrict ws, //!< [in,out] pre-allo
// apply spindown phase-factors, store result in zero-padded timeseries for 'FFT'ing
XLAL_CHECK ( XLALApplySpindownAndFreqShift ( ws->TS_FFT, TimeSeries_SRC_a, &thisPoint, freqShift ) == XLAL_SUCCESS, XLAL_EFUNC );
#ifdef COLLECT_TIMING
#if COLLECT_TIMING
toc = XLALGetCPUTime();
ti->tauSpin += ( toc - tic);
tic = toc;
......@@ -652,7 +647,7 @@ XLALComputeFaFb_Resamp ( ResampWorkspace *restrict ws, //!< [in,out] pre-allo
ws->FaX_k[k] = ws->FabX_Raw [ offset_bins + k ];
}
#ifdef COLLECT_TIMING
#if COLLECT_TIMING
toc = XLALGetCPUTime();
ti->tauFFT += ( toc - tic);
tic = toc;
......@@ -662,7 +657,7 @@ XLALComputeFaFb_Resamp ( ResampWorkspace *restrict ws, //!< [in,out] pre-allo
// apply spindown phase-factors, store result in zero-padded timeseries for 'FFT'ing
XLAL_CHECK ( XLALApplySpindownAndFreqShift ( ws->TS_FFT, TimeSeries_SRC_b, &thisPoint, freqShift ) == XLAL_SUCCESS, XLAL_EFUNC );
#ifdef COLLECT_TIMING
#if COLLECT_TIMING
toc = XLALGetCPUTime();
ti->tauSpin += ( toc - tic);
tic = toc;
......@@ -675,7 +670,7 @@ XLALComputeFaFb_Resamp ( ResampWorkspace *restrict ws, //!< [in,out] pre-allo
ws->FbX_k[k] = ws->FabX_Raw [ offset_bins + k ];
}
#ifdef COLLECT_TIMING
#if COLLECT_TIMING
toc = XLALGetCPUTime();
ti->tauFFT += ( toc - tic);
tic = toc;
......@@ -694,7 +689,7 @@ XLALComputeFaFb_Resamp ( ResampWorkspace *restrict ws, //!< [in,out] pre-allo
ws->FbX_k[k] *= normX_k;
} // for k < numFreqBinsOut
#ifdef COLLECT_TIMING
#if COLLECT_TIMING
toc = XLALGetCPUTime();
ti->tauNorm += ( toc - tic);
tic = toc;
......
......@@ -28,10 +28,20 @@
// //
// ================================================================================================= //
// ---------- Shared constants/defines ---------- //
#ifndef COLLECT_TIMING
#define COLLECT_TIMING 1
#endif
// ---------- Shared macro definitions ---------- //
#define SQ(x) ( (x) * (x) )
// ---------- Shared global variables ---------- //
// hidden global variables used to pass timings to test/benchmark programs
extern REAL8 Fstat_tauF1Buf;
extern REAL8 Fstat_tauF1NoBuf;
// ---------- Shared struct definitions ---------- //
// Common input data for F-statistic methods
......
......@@ -52,8 +52,8 @@ typedef struct
} UserInput_t;
// hidden global variables used to pass timings to test/benchmark programs
extern REAL8 Resamp_tauF1Buf;
extern REAL8 Resamp_tauF1NoBuf;
extern REAL8 Fstat_tauF1Buf;
extern REAL8 Fstat_tauF1NoBuf;
// ---------- main ----------
int
......@@ -238,8 +238,8 @@ main ( int argc, char *argv[] )
XLAL_CHECK ( XLALComputeFstat ( &results, inputs->data[l], &Doppler, numFreqBins_i, whatToCompute ) == XLAL_SUCCESS, XLAL_EFUNC );
// ----- output timing details if requested
tauF1NoBuf_i += Resamp_tauF1NoBuf;
tauF1Buf_i += Resamp_tauF1Buf;
tauF1NoBuf_i += Fstat_tauF1NoBuf;
tauF1Buf_i += Fstat_tauF1Buf;
} // for l < numSegments
tauF1NoBuf_i /= uvar->numSegments;
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment