From 8106ba0dd6eafda6613a16a6ef32f4aaf9a26732 Mon Sep 17 00:00:00 2001
From: Patrick Godwin <patrick.godwin@ligo.org>
Date: Mon, 2 Jul 2018 18:20:37 -0700
Subject: [PATCH] gstlal_feature_extractor + utils.py: added functionality to
 increase the sample rate of features being produced above 1 Hz

---
 gstlal-burst/bin/gstlal_feature_extractor | 10 +++++++---
 gstlal-burst/python/fxtools/utils.py      | 14 ++++++++------
 2 files changed, 15 insertions(+), 9 deletions(-)

diff --git a/gstlal-burst/bin/gstlal_feature_extractor b/gstlal-burst/bin/gstlal_feature_extractor
index b5b2905306..2bd3235659 100755
--- a/gstlal-burst/bin/gstlal_feature_extractor
+++ b/gstlal-burst/bin/gstlal_feature_extractor
@@ -220,7 +220,7 @@ class MultiChannelHandler(simplehandler.Handler):
 		self.frame_segments = data_source_info.frame_segments
 		self.keys = kwargs.pop("keys")
 		self.num_samples = len(self.keys)
-		self.sample_rate = 1 # NOTE: hard-coded for now
+		self.sample_rate = options.sample_rate
 		self.waveforms = kwargs.pop("waveforms")
 		self.basename = kwargs.pop("basename")
 		self.waveform_type = options.waveform
@@ -258,7 +258,7 @@ class MultiChannelHandler(simplehandler.Handler):
 
 		# feature saving properties
 		if options.save_format == 'hdf5':
-			self.fdata = utils.HDF5FeatureData(self.columns, keys = self.keys, cadence = self.cadence)
+			self.fdata = utils.HDF5FeatureData(self.columns, keys = self.keys, cadence = self.cadence, sample_rate = self.sample_rate)
 
 		elif options.save_format == 'ascii':
 			self.header = "# %18s\t%20s\t%20s\t%10s\t%8s\t%8s\t%8s\t%10s\t%s\n" % ("start_time", "stop_time", "trigger_time", "frequency", "phase", "q", "chisq", "snr", "channel")
@@ -673,6 +673,7 @@ def parse_command_line():
 	group.add_option("--disable-web-service", action = "store_true", help = "If set, disables web service that allows monitoring of PSDS of aux channels.")
 	group.add_option("-v", "--verbose", action = "store_true", help = "Be verbose.")
 	group.add_option("--nxydump-segment", metavar = "start:stop", help = "Set the time interval to dump from nxydump elements (optional).")
+	group.add_option("--sample-rate", type = "int", metavar = "Hz", help = "Set the sample rate for feature timeseries output, must be a power of 2. Default = 1 Hz.")
 	group.add_option("--feature-start-time", type = "int", metavar = "seconds", help = "Set the start time of the segment to output features in GPS seconds. Required unless --data-source=lvshm")
 	group.add_option("--feature-end-time", type = "int", metavar = "seconds", help = "Set the end time of the segment to output features in GPS seconds.  Required unless --data-source=lvshm")
 	parser.add_option_group(group)
@@ -696,6 +697,9 @@ def parse_command_line():
 	if options.feature_end_time is None:
 		options.feature_end_time = int(options.gps_end_time)
 
+	# check if input sample rate is sensible
+	assert options.sample_rate == 1 or options.sample_rate % 2 == 0
+
 	# check if persist and save cadence times are sensible
 	assert options.persist_cadence >= options.cadence
 	assert (options.persist_cadence % options.cadence) == 0
@@ -916,7 +920,7 @@ for subset_id, channel_subset in enumerate(data_source_info.channel_subsets, 1):
 				pipeparts.mknxydumpsink(pipeline, pipeparts.mkqueue(pipeline, tee), "snrtimeseries_%s_%s.txt" % (channel, repr(rate)), segment = options.nxydump_segment)
 
 			# extract features from time series
-			thishead = pipeparts.mktrigger(pipeline, tee, rate, max_snr = True)
+			thishead = pipeparts.mktrigger(pipeline, tee, int(rate // options.sample_rate), max_snr = True)
 
 			if options.latency_output:
 				thishead = pipeparts.mklatency(pipeline, thishead, name=utils.latency_name('aftertrigger', 5, channel, rate))
diff --git a/gstlal-burst/python/fxtools/utils.py b/gstlal-burst/python/fxtools/utils.py
index 5d40f95863..1d37a28f8f 100644
--- a/gstlal-burst/python/fxtools/utils.py
+++ b/gstlal-burst/python/fxtools/utils.py
@@ -218,8 +218,9 @@ class HDF5FeatureData(FeatureData):
 	def __init__(self, columns, keys, **kwargs):
 		super(HDF5FeatureData, self).__init__(columns, keys = keys, **kwargs)
 		self.cadence = kwargs.pop('cadence')
+		self.sample_rate = kwargs.pop('sample_rate')
 		self.dtype = [(column, '<f8') for column in self.columns]
-		self.feature_data = {key: numpy.empty((self.cadence,), dtype = self.dtype) for key in keys}
+		self.feature_data = {key: numpy.empty((self.cadence * self.sample_rate,), dtype = self.dtype) for key in keys}
 		self.last_save_time = 0
 		self.clear()
 
@@ -237,12 +238,13 @@ class HDF5FeatureData(FeatureData):
 		Append a feature buffer to data structure
 		"""
 		self.last_save_time = floor_div(timestamp, self.cadence)
-		idx = timestamp - self.last_save_time
+		time_idx = (timestamp - self.last_save_time) * self.sample_rate
 
-		### FIXME: assumes there is just one row per channel for now (denoting a sample rate of 1Hz)
 		for key in features.keys():
-			if features[key][0]:
-				self.feature_data[key][idx] = numpy.array(tuple(features[key][0][col] for col in self.columns), dtype=self.dtype)
+			for row_idx, row in enumerate(features[key]):
+				if row:
+					idx = time_idx + row_idx
+					self.feature_data[key][idx] = numpy.array(tuple(row[col] for col in self.columns), dtype=self.dtype)
 
 	def clear(self):
 		for key in self.keys:
@@ -271,7 +273,7 @@ class FeatureQueue(object):
 			self.counter[timestamp] += 1
 
 			### store row, aggregating if necessary
-			idx = self._idx(timestamp)
+			idx = self._idx(row['trigger_time'])
 			if not self.in_queue[timestamp][channel][idx] or (row['snr'] > self.in_queue[timestamp][channel][idx]['snr']):
 				self.in_queue[timestamp][channel][idx] = row
 
-- 
GitLab