Verified Commit 8ff7b39c authored by Thomas Almeida's avatar Thomas Almeida
Browse files

postcohtable: Remove detector names from table

This change removes detector names from the various data structures that
are used throughout the pipeline and ensures that all pointers into the
data structures have their offsets based upon the number of
interferometers.

Previously, adding support for an additional interferometer to the
pipeline would involve modifying all of the below files, and would often
require extensive testing to ensure that the massive number of changes
that would need to be done would be correct.

With these changes, when adding interferometers, the only file that
needs to be changed is `gstlal-spiir/include/pipe_macro.h`, with the
modification of `MAX_NIFO`, `IFOMap` and `IFOComboMap`. By changing
these variables, the next compilation of the pipeline will automatically
support the new interferometer.

-- Tom Almeida
parent 3dd4a5ac
......@@ -52,7 +52,7 @@ int scan_trigger_ifos(int icombo, PostcohInspiralTable *trigger) {
if (icombo & (1 << i)) {
// [THA]: This is a check that the data from this IFO is actually
// valid. If it's not valid, the number will be very *very* small
if (*(&trigger->snglsnr_H + i) > EPSILON) {
if (trigger->snglsnr[i] > EPSILON) {
strncpy(final_ifos + IFO_LEN * nifo, IFOMap[i].name,
one_ifo_size);
nifo++;
......
......@@ -32,6 +32,7 @@
#include <LIGOLwHeader.h>
#include <cohfar/background_stats.h>
#include <glib.h>
#include <postcohtable.h>
Bins1D *bins1D_create_long(double cmin, double cmax, int nbin);
......@@ -53,10 +54,20 @@ TriggerStats **trigger_stats_create(int icombo);
int bins1D_get_idx(double val, Bins1D *bins);
void trigger_stats_feature_rates_update(double snr,
double chisq,
FeatureStats *feature,
TriggerStats *cur_stats);
void trigger_stats_feature_rate_update(double snr,
double chisq,
FeatureStats *feature,
TriggerStats *cur_stats);
double trigger_stats_get_val_from_map(double snr, double chisq, Bins2D *bins);
int scan_trigger_ifos(int icombo, PostcohInspiralTable *trigger);
void trigger_stats_livetime_inc(TriggerStats **stats, const int index);
void trigger_stats_xml_reset(TriggerStatsXML *stats);
void signal_stats_init(TriggerStatsXML *sgstats, int source_type);
void trigger_stats_feature_rates_add(FeatureStats *feature1,
FeatureStats *feature2,
......
......@@ -120,8 +120,8 @@ static void update_stats_icombo(PostcohInspiralTable *intable,
for (ifo = 0, index = 0; ifo < MAX_NIFO; ifo++) {
if ((stats->icombo + 1) & (1 << ifo)) {
trigger_stats_feature_rate_update(
(double)((&intable->snglsnr_H)[ifo]),
(double)((&intable->chisq_H)[ifo]),
(double)(intable->snglsnr[ifo]),
(double)(intable->chisq[ifo]),
stats->multistats[index]->feature, stats->multistats[index]);
++index;
}
......
......@@ -133,72 +133,30 @@ static void update_trigger_fars(PostcohInspiralTable *table,
cur_stats->rank->rank_map);
table->rank = MAX(MAX(rank_1w, rank_1d), rank_2h);
/* FIXME: currently hardcoded for single detectors FAR */
cur_stats = element->bgstats_1w->multistats[0];
far = BOUND(FLT_MIN, gen_fap_from_feature((double)table->snglsnr_H,
(double)table->chisq_H, cur_stats)
* cur_stats->nevent
/ (cur_stats->livetime * hist_trials));
table->far_h_1w = far;
cur_stats = element->bgstats_1w->multistats[1];
far = BOUND(FLT_MIN, gen_fap_from_feature((double)table->snglsnr_L,
(double)table->chisq_L, cur_stats)
* cur_stats->nevent
/ (cur_stats->livetime * hist_trials));
table->far_l_1w = far;
cur_stats = element->bgstats_1w->multistats[2];
far = BOUND(FLT_MIN, gen_fap_from_feature((double)table->snglsnr_V,
(double)table->chisq_V, cur_stats)
* cur_stats->nevent
/ (cur_stats->livetime * hist_trials));
table->far_v_1w = far;
cur_stats = element->bgstats_1d->multistats[0];
far = BOUND(FLT_MIN, gen_fap_from_feature((double)table->snglsnr_H,
(double)table->chisq_H, cur_stats)
* cur_stats->nevent
/ (cur_stats->livetime * hist_trials));
table->far_h_1d = far;
cur_stats = element->bgstats_1d->multistats[1];
far = BOUND(FLT_MIN, gen_fap_from_feature((double)table->snglsnr_L,
(double)table->chisq_L, cur_stats)
* cur_stats->nevent
/ (cur_stats->livetime * hist_trials));
table->far_l_1d = far;
cur_stats = element->bgstats_1d->multistats[2];
far = BOUND(FLT_MIN, gen_fap_from_feature((double)table->snglsnr_V,
(double)table->chisq_V, cur_stats)
* cur_stats->nevent
/ (cur_stats->livetime * hist_trials));
table->far_v_1d = far;
cur_stats = element->bgstats_2h->multistats[0];
if (cur_stats->livetime > 0) {
far = BOUND(
FLT_MIN, gen_fap_from_feature((double)table->snglsnr_H,
(double)table->chisq_H, cur_stats)
for (int i = 0; i < MAX_NIFO; ++i) {
cur_stats = element->bgstats_1w->multistats[i];
far = BOUND(
FLT_MIN, gen_fap_from_feature((double)table->snglsnr[i],
(double)table->chisq[i], cur_stats)
* cur_stats->nevent / (cur_stats->livetime * hist_trials));
table->far_h_2h = far;
}
cur_stats = element->bgstats_2h->multistats[1];
if (cur_stats->livetime > 0) {
far = BOUND(
FLT_MIN, gen_fap_from_feature((double)table->snglsnr_L,
(double)table->chisq_L, cur_stats)
* cur_stats->nevent / (cur_stats->livetime * hist_trials));
table->far_l_2h = far;
}
cur_stats = element->bgstats_2h->multistats[2];
if (cur_stats->livetime > 0) {
far = BOUND(
FLT_MIN, gen_fap_from_feature((double)table->snglsnr_V,
(double)table->chisq_V, cur_stats)
table->far_1w_sngl[i] = far;
cur_stats = element->bgstats_1d->multistats[i];
far = BOUND(
FLT_MIN, gen_fap_from_feature((double)table->snglsnr[i],
(double)table->chisq[i], cur_stats)
* cur_stats->nevent / (cur_stats->livetime * hist_trials));
table->far_v_2h = far;
table->far_1d_sngl[i] = far;
cur_stats = element->bgstats_2h->multistats[i];
if (cur_stats->livetime > 0) {
far = BOUND(FLT_MIN,
gen_fap_from_feature((double)table->snglsnr[i],
(double)table->chisq[i], cur_stats)
* cur_stats->nevent
/ (cur_stats->livetime * hist_trials));
table->far_2h_sngl[i] = far;
}
}
}
......
......@@ -39,7 +39,7 @@
// y_hist_result: one-dimensional histogram
// result: a L*L matrix. result(i,j) =
// count(Bin_i)/h(x_j)*K[x-point(Bin_i)/h(x_j)] (each variable's meaning: see
//document of 'Shimazaki’s method' part.).
// document of 'Shimazaki’s method' part.).
//
// int main()
......@@ -282,7 +282,8 @@ double CostFunction(gsl_vector *y_hist,
// for (i = 0; i < y_hist->size; i++) {
// if (gsl_vector_get(y_hist, i) != 0) {
// gsl_vector_set(y_hist_nz, idx, gsl_vector_get(y_hist,
//i)); gsl_vector_set(t_nz, idx, gsl_vector_get(t, i)); idx++;
// i)); gsl_vector_set(t_nz, idx, gsl_vector_get(t, i));
// idx++;
// }
// }
//
......@@ -905,8 +906,8 @@ void ssvkernel(gsl_vector *x,
// gsl_vector * xb = gsl_vector_alloc(Nb);
// for (j = 0; j < Nb; j++) {
// gsl_vector_set(xb, j,
// gsl_vector_get(x_ab, floor(gsl_ran_flat(r, 0,
//N))));
// gsl_vector_get(x_ab, floor(gsl_ran_flat(r,
//0, N))));
// }
//
// gsl_vector_histc(xb, t_dt2, y_histb);
......@@ -923,7 +924,8 @@ void ssvkernel(gsl_vector *x,
// for (j = 0; j < tin->size; j++) {
// gsl_matrix_set(yb, i, j,
// gsl_interp_eval(linear, t->data,
//yb_buf->data, gsl_vector_get(tin, j), acc));
// yb_buf->data, gsl_vector_get(tin, j),
// acc));
// }
// gsl_vector_free(xb);
// }
......@@ -934,8 +936,8 @@ void ssvkernel(gsl_vector *x,
// gsl_sort_vector(yb_col);
// gsl_vector_set(lower_bound, i,
// gsl_vector_get(yb_col, floor((1 - confidence) *
//nbs))); gsl_vector_set(upper_bound, i, gsl_vector_get(yb_col,
//floor((confidence) * nbs)));
// nbs))); gsl_vector_set(upper_bound, i,
// gsl_vector_get(yb_col, floor((confidence) * nbs)));
// }
// gsl_interp_init(linear, t->data, yv->data, t->size);
// for (i = 0; i < tin->size; i++) {
......
......@@ -19,7 +19,7 @@
// y_hist_result: one-dimensional histogram
// result: a L*L matrix. result(i,j) =
// count(Bin_i)/h(x_j)*K[x-point(Bin_i)/h(x_j)] (each variable's meaning: see
//document of 'Shimazaki’s method' part.).
// document of 'Shimazaki’s method' part.).
//
// int main()
......
......@@ -144,7 +144,7 @@ void getTemp(int *tempPtr, float *histgt0Ptr) {
}
// tempPtr = {2 5 7 9 11 15 16 18 20 22 29 31 35}
// histgt0Ptr = {0.0714 0.0714 0.0714 0.0714 0.0714 0.0714 0.0714 0.0714
//0.0714 0.0714 0.0714 0.0714 0.1429 }
// 0.0714 0.0714 0.0714 0.0714 0.1429 }
}
void getHistaxis(int *tempPtr, int **histaxisPtr) {
......@@ -217,7 +217,7 @@ void getHwidth(float *kthDistPtr, float GlobalHband, float *hwidthPtr) {
// For K=2
// kthDistPtr =
//{1.4142 1.0000 1.4142 1.0000 1.0000 1.0000 1.0000 2.0000 1.4142 1.0000 1.0000
//2.2361 1.0000}
// 2.2361 1.0000}
}
void getPDF(float *hwidthPtr, float *histgt0Ptr, int **histaxisPtr) {
......
......@@ -61,7 +61,7 @@ static gboolean plugin_init(GstPlugin *plugin) {
{ "cuda_iirbank", CUDA_IIRBANK_TYPE },
// {"cuda_audioresample", CUDA_AUDIO_RESAMPLE_TYPE},
// {"gstlal_multidownsample",
//GSTLAL_MULTI_DOWNSAMPLE_TYPE},
// GSTLAL_MULTI_DOWNSAMPLE_TYPE},
{ "cuda_multiratespiir", CUDA_TYPE_MULTIRATESPIIR },
{ "cuda_postcoh", CUDA_TYPE_POSTCOH },
{ "postcoh_filesink", POSTCOH_TYPE_FILESINK },
......
......@@ -73,8 +73,8 @@ static gboolean need_flag_gap(GstPostcohCollectData *data,
for (i = 0; i < flag_segments->len; i++) {
this_segment = &((FlagSegment *)flag_segments->data)[i];
/* | start | stop
* | this_start
*(1) | s | e (2)
* |
*this_start (1) | s | e (2)
* | s | e
* | s | e
* |s | e
......@@ -1144,10 +1144,11 @@ static int cuda_postcoh_select_background(PeakList *pklist,
for (itrial = 1; itrial <= hist_trials; itrial++) {
background_cur = (itrial - 1) * max_npeak + peak_cur;
// FIXME: consider a different threshold for 3-detector
// if (sqrt(pklist->cohsnr_bg[background_cur]) > cohsnr_thresh
// if (sqrt(pklist->cohsnr_bg[background_cur]) >
// cohsnr_thresh
//* pklist->snglsnr_H[iifo*max_npeak + peak_cur])
if (sqrt(pklist->cohsnr_bg[background_cur])
> 1.414 + pklist->snglsnr_H[write_ifo * max_npeak + peak_cur]) {
> 1.414 + pklist->snglsnr[write_ifo][peak_cur]) {
left_backgrounds++;
GST_LOG("mark back,%d ipeak, %d itrial", ipeak, itrial);
} else
......@@ -1207,9 +1208,7 @@ static int cuda_postcoh_select_foreground(PostcohState *state,
peak_cur = peak_pos[ipeak];
// FIXME: consider a different threshold for 3-detector
if (sqrt(pklist->cohsnr[peak_cur])
> 1.414
+ pklist->snglsnr_H[write_ifo * (state->max_npeak)
+ peak_cur]) {
> 1.414 + pklist->snglsnr[write_ifo][peak_cur]) {
cluster_peak_pos[final_peaks++] = peak_cur;
} else
bubbled_peak_pos[bubbled_peaks++] = peak_cur;
......@@ -1290,33 +1289,22 @@ static int cuda_postcoh_write_table_to_buf(CudaPostcoh *postcoh,
len_cur = pklist->len_idx[peak_cur];
XLALGPSAdd(&(end_time), (double)len_cur / exe_len);
output->end_time = end_time;
XLALGPSAdd(&(end_time),
(double)pklist->ntoff_H[peak_cur] / exe_len);
output->end_time_H = end_time;
end_time = output->end_time;
XLALGPSAdd(&(end_time),
(double)pklist->ntoff_L[peak_cur] / exe_len);
output->end_time_L = end_time;
end_time = output->end_time;
XLALGPSAdd(&(end_time),
(double)pklist->ntoff_V[peak_cur] / exe_len);
output->end_time_V = end_time;
output->snglsnr_H = pklist->snglsnr_H[peak_cur];
output->snglsnr_L = pklist->snglsnr_L[peak_cur];
output->snglsnr_V = pklist->snglsnr_V[peak_cur];
output->coaphase_H = pklist->coaphase_H[peak_cur];
output->coaphase_L = pklist->coaphase_L[peak_cur];
output->coaphase_V = pklist->coaphase_V[peak_cur];
output->chisq_H = pklist->chisq_H[peak_cur];
output->chisq_L = pklist->chisq_L[peak_cur];
output->chisq_V = pklist->chisq_V[peak_cur];
for (int i = 0; i < MAX_NIFO; ++i) {
XLALGPSAdd(&(end_time),
(double)pklist->ntoff[i][peak_cur] / exe_len);
output->end_time_sngl[i] = end_time;
end_time = output->end_time;
output->snglsnr[i] = pklist->snglsnr[i][peak_cur];
output->coaphase[i] = pklist->coaphase[i][peak_cur];
output->chisq[i] = pklist->chisq[i][peak_cur];
}
for (jifo = 0; jifo < nifo; jifo++) {
int write_ifo = state->write_ifo_mapping[jifo];
*(&output->deff_H + write_ifo) =
output->deff[write_ifo] =
sqrt(state->sigmasq[jifo][cur_tmplt_idx])
/ *(pklist->snglsnr_H + write_ifo * state->max_npeak
+ peak_cur); // in MPC
/ pklist->snglsnr[write_ifo][peak_cur]; // in MPC
}
output->is_background = FLAG_FOREGROUND;
output->livetime = livetime;
......@@ -1384,20 +1372,20 @@ static int cuda_postcoh_write_table_to_buf(CudaPostcoh *postcoh,
output->skymap_fname[0] = '\0';
output->rank = 0;
GST_LOG_OBJECT(postcoh,
"end_time_L %d, ipeak %d, peak_cur %d, len_cur %d, "
"tmplt_idx %d, pix_idx %d \t,"
"snglsnr_L %f, snglsnr_H %f, snglsnr_V %f,"
"coaphase_L %f, coaphase_H %f, coa_phase_V %f,"
"chisq_L %f, chisq_H %f, chisq_V %f,"
"cohsnr %f, nullsnr %f, cmbchisq %f\n",
output->end_time_L.gpsSeconds, ipeak, peak_cur,
len_cur, output->tmplt_idx, output->pix_idx,
output->snglsnr_L, output->snglsnr_H,
output->snglsnr_V, output->coaphase_L,
output->coaphase_H, output->coaphase_V,
output->chisq_L, output->chisq_H, output->chisq_V,
output->cohsnr, output->nullsnr, output->cmbchisq);
GST_LOG_OBJECT(
postcoh,
"end_time_sngl_0 %d, ipeak %d, peak_cur %d, len_cur %d, "
"tmplt_idx %d, pix_idx %d \t,"
"snglsnr_0 %f, snglsnr_1 %f, snglsnr_2 %f,"
"coaphase_0 %f, coaphase_1 %f, coa_phase_2 %f,"
"chisq_0 %f, chisq_1 %f, chisq_2 %f,"
"cohsnr %f, nullsnr %f, cmbchisq %f\n",
output->end_time_sngl[0].gpsSeconds, ipeak, peak_cur, len_cur,
output->tmplt_idx, output->pix_idx, output->snglsnr[0],
output->snglsnr[1], output->snglsnr[2], output->coaphase[0],
output->coaphase[1], output->coaphase[2], output->chisq[0],
output->chisq[1], output->chisq[2], output->cohsnr,
output->nullsnr, output->cmbchisq);
XLALINT8NSToGPS(&output->epoch, ts);
output->deltaT = 1. / postcoh->rate;
......@@ -1424,15 +1412,12 @@ static int cuda_postcoh_write_table_to_buf(CudaPostcoh *postcoh,
state->all_ifos + IFO_LEN * iifo, one_ifo_size);
output->pivotal_ifo[IFO_LEN] = '\0';
output->tmplt_idx = pklist->tmplt_idx[peak_cur];
output->snglsnr_H = pklist->snglsnr_bg_H[peak_cur_bg];
output->snglsnr_L = pklist->snglsnr_bg_L[peak_cur_bg];
output->snglsnr_V = pklist->snglsnr_bg_V[peak_cur_bg];
output->coaphase_H = pklist->coaphase_bg_H[peak_cur_bg];
output->coaphase_L = pklist->coaphase_bg_L[peak_cur_bg];
output->coaphase_V = pklist->coaphase_bg_V[peak_cur_bg];
output->chisq_H = pklist->chisq_bg_H[peak_cur_bg];
output->chisq_L = pklist->chisq_bg_L[peak_cur_bg];
output->chisq_V = pklist->chisq_bg_V[peak_cur_bg];
for (int i = 0; i < MAX_NIFO; ++i) {
output->snglsnr[i] = pklist->snglsnr_bg[i][peak_cur_bg];
output->coaphase[i] =
pklist->coaphase_bg[i][peak_cur_bg];
output->chisq[i] = pklist->chisq_bg[i][peak_cur_bg];
}
// output->pix_idx = pklist->pix_idx[itrial*max_npeak +
// peak_cur];
......@@ -1448,16 +1433,16 @@ static int cuda_postcoh_write_table_to_buf(CudaPostcoh *postcoh,
postcoh,
"ipeak %d, itrial %d, len_cur %d, tmplt_idx %d, pix_idx "
"%d,"
"snglsnr_L %f, snglsnr_H %f, snglsnr_V %f,"
"coaphase_L %f, coaphase_H %f, coa_phase_V %f,"
"chisq_L %f, chisq_H %f, chisq_V %f,"
"snglsnr[0] %f, snglsnr[1] %f, snglsnr[2] %f,"
"coaphase[0] %f, coaphase[1] %f, coa_phase[2] %f,"
"chisq[0] %f, chisq[1] %f, chisq[2] %f,"
"cohsnr %f, nullsnr %f, cmbchisq %f\n",
ipeak, itrial, len_cur, output->tmplt_idx,
output->pix_idx, output->snglsnr_L, output->snglsnr_H,
output->snglsnr_V, output->coaphase_L, output->coaphase_H,
output->coaphase_V, output->chisq_L, output->chisq_H,
output->chisq_V, output->cohsnr, output->nullsnr,
output->cmbchisq);
output->pix_idx, output->snglsnr[0], output->snglsnr[2],
output->snglsnr[2], output->coaphase[0],
output->coaphase[1], output->coaphase[2],
output->chisq[0], output->chisq[1], output->chisq[2],
output->cohsnr, output->nullsnr, output->cmbchisq);
/* do not dump snr for background */
XLALINT8NSToGPS(&output->epoch, ts);
......
......@@ -88,29 +88,15 @@ typedef struct _PeakList {
int *tmplt_idx;
int *pix_idx;
int *pix_idx_bg; // background Ntoff needs this, do not remove
int *ntoff_H;
int *ntoff_L;
int *ntoff_V;
float *snglsnr_H;
float *snglsnr_L;
float *snglsnr_V;
float *coaphase_H;
float *coaphase_L;
float *coaphase_V;
float *chisq_H;
float *chisq_L;
float *chisq_V;
float *snglsnr_bg_H;
float *snglsnr_bg_L;
float *snglsnr_bg_V;
float *coaphase_bg_H;
float *coaphase_bg_L;
float *coaphase_bg_V;
float *chisq_bg_H;
float *chisq_bg_L;
float *chisq_bg_V;
int *ntoff[MAX_NIFO];
float *snglsnr[MAX_NIFO];
float *coaphase[MAX_NIFO];
float *chisq[MAX_NIFO];
float *snglsnr_bg[MAX_NIFO];
float *coaphase_bg[MAX_NIFO];
float *chisq_bg[MAX_NIFO];
float *cohsnr;
float *nullsnr;
......@@ -124,35 +110,25 @@ typedef struct _PeakList {
float *nullsnr_skymap;
/* structure on GPU device */
// [THA]: It is important to note that pointers on the host device are not
// exposed to the GPU device. For this reason, we can't allocate d_ntoff,
// d_snglsnr, etc. here on the stack with sized arrays. Instead, we need
// to malloc is when PeakList is built.
int *d_npeak;
int *d_peak_pos;
int *d_len_idx;
int *d_tmplt_idx;
int *d_pix_idx;
int *d_pix_idx_bg; // background Ntoff needs this, do not remove
int *d_ntoff_H;
int *d_ntoff_L;
int *d_ntoff_V;
float *d_snglsnr_H;
float *d_snglsnr_L;
float *d_snglsnr_V;
float *d_coaphase_H;
float *d_coaphase_L;
float *d_coaphase_V;
float *d_chisq_H;
float *d_chisq_L;
float *d_chisq_V;
float *d_snglsnr_bg_H;
float *d_snglsnr_bg_L;
float *d_snglsnr_bg_V;
float *d_coaphase_bg_H;
float *d_coaphase_bg_L;
float *d_coaphase_bg_V;
float *d_chisq_bg_H;
float *d_chisq_bg_L;
float *d_chisq_bg_V;
int **d_ntoff; // size (MAX_NIFO)
float **d_snglsnr; // size (MAX_NIFO)
float **d_coaphase; // size (MAX_NIFO)
float **d_chisq; // size (MAX_NIFO)
float **d_snglsnr_bg; // size (MAX_NIFO)
float **d_coaphase_bg; // size (MAX_NIFO)
float **d_chisq_bg; // size (MAX_NIFO)
float *d_cohsnr;
float *d_nullsnr;
......
......@@ -169,71 +169,107 @@ PeakList *create_peak_list(PostcohState *state, cudaStream_t stream) {
#endif
PeakList *pklist = (PeakList *)malloc(sizeof(PeakList));
int peak_intlen = (7 + hist_trials) * max_npeak + 1;
int peak_floatlen = (12 + hist_trials * 12) * max_npeak;
int peak_intlen = (4 + MAX_NIFO + hist_trials) * max_npeak + 1;
int peak_floatlen =
((4 * MAX_NIFO) + (hist_trials * 4 * MAX_NIFO)) * max_npeak;
pklist->peak_intlen = peak_intlen;
pklist->peak_floatlen = peak_floatlen;
// [THA]: Why do we use `cudaMallocManaged()` sometimes below? Well, a large
// number of the below pointers are to 2D arrays that we won't be accessing
// after setting up, but whilst we're setting them up they need to be able
// to be accessed on the CPU. `cudaMallocManaged()` allows us to access the
// memory involved on both the CPU and GPU, which means that we can assign
// the pointers here instead of having to do an awkward `cudaMemcpyAsync()`
// to copy across the right pointer.
//
// It's likely that there will be a small performance hit for having used
// `cudaMallocManaged()` instead of doing an async copy, so if its needed
// to, the below code can definitely can be changed and will work with
// `cudaMemcpyAsync()`.
//
// FIXME: Move to `cudaMemcpyAsync()` instead of `cudaMallocManaged()` for a
// slight performance bump
/* create device space for peak list for int-type variables */
CUDA_CHECK(
cudaMalloc((void **)&(pklist->d_npeak), sizeof(int) * peak_intlen));
CUDA_CHECK(
cudaMemsetAsync(pklist->d_npeak, 0, sizeof(int) * peak_intlen, stream));
pklist->d_peak_pos = pklist->d_npeak + 1;
pklist->d_len_idx = pklist->d_npeak + 1 + max_npeak;
pklist->d_peak_pos = pklist->d_npeak + 1 + 0 * max_npeak;
pklist->d_len_idx = pklist->d_npeak + 1 + 1 * max_npeak;
pklist->d_tmplt_idx = pklist->d_npeak + 1 + 2 * max_npeak;
pklist->d_pix_idx = pklist->d_npeak + 1 + 3 * max_npeak;
pklist->d_pix_idx_bg = pklist->d_npeak + 1 + 4 * max_npeak;
pklist->d_ntoff_H = pklist->d_npeak + 1 + (4 + hist_trials) * max_npeak;
pklist->d_ntoff_L = pklist->d_npeak + 1 + (5 + hist_trials) * max_npeak;
pklist->d_ntoff_V = pklist->d_npeak + 1 + (6 + hist_trials) * max_npeak;
CUDA_CHECK(cudaMallocManaged((void **)&(pklist->d_ntoff),
sizeof(int *) * MAX_NIFO,
cudaMemAttachGlobal));
for (int i = 0; i < MAX_NIFO; ++i) {
pklist->d_ntoff[i] =
pklist->d_npeak + 1 + ((4 + hist_trials + i) * max_npeak);
}
// printf("d_npeak %p\n", pklist->d_npeak);
// CUDA_CHECK(cudaMemsetAsync(pklist->d_npeak, 0, sizeof(int), stream));
/* create device space for peak list for float-type variables */
CUDA_CHECK(cudaMalloc((void **)&(pklist->d_snglsnr_H),
CUDA_CHECK(cudaMallocManaged((void **)&(pklist->d_snglsnr),
sizeof(float *) * MAX_NIFO,
cudaMemAttachGlobal));
CUDA_CHECK(cudaMallocManaged((void **)&(pklist->d_coaphase),
sizeof(float *) * MAX_NIFO,
cudaMemAttachGlobal));
CUDA_CHECK(cudaMallocManaged((void **)&(pklist->d_chisq),
sizeof(float *) * MAX_NIFO,
cudaMemAttachGlobal));
CUDA_CHECK(cudaMallocManaged((void **)&(pklist->d_snglsnr_bg),
sizeof(float *) * MAX_NIFO,
cudaMemAttachGlobal));
CUDA_CHECK(cudaMallocManaged((void **)&(pklist->d_coaphase_bg),
sizeof(float *) * MAX_NIFO,
cudaMemAttachGlobal));
CUDA_CHECK(cudaMallocManaged((void **)&(pklist->d_chisq_bg),
sizeof(float *) * MAX_NIFO,
cudaMemAttachGlobal));
CUDA_CHECK(cudaMalloc((void **)&(pklist->d_snglsnr[0]),
sizeof(float) * peak_floatlen));
CUDA_CHECK(cudaMemsetAsync(pklist->d_snglsnr_H, 0,
CUDA_CHECK(cudaMemsetAsync(pklist->d_snglsnr[0], 0,
sizeof(float) * peak_floatlen, stream));
pklist->d_snglsnr_L = pklist->d_snglsnr_H + max_npeak;
pklist->d_snglsnr_V = pklist->d_snglsnr_H + 2 * max_npeak;