From 3d467af54d1f7552a1cfead87999e65a60437c10 Mon Sep 17 00:00:00 2001
From: Patrick Godwin <patrick.godwin@ligo.org>
Date: Thu, 13 Jun 2019 13:40:28 -0700
Subject: [PATCH] feature_extraction: change block_stride for whitening, high
 pass from 1s to 0.25s, allow for a minimum downsampling rate to improve
 latency at expensive of computational cost

---
 gstlal-burst/bin/gstlal_feature_extractor        | 16 +++++++++-------
 gstlal-burst/python/fxtools/feature_extractor.py |  1 +
 .../python/fxtools/multirate_datasource.py       |  6 +++---
 gstlal-burst/python/fxtools/waveforms.py         | 11 +++++++----
 4 files changed, 20 insertions(+), 14 deletions(-)

diff --git a/gstlal-burst/bin/gstlal_feature_extractor b/gstlal-burst/bin/gstlal_feature_extractor
index b544471909..ce2cf04a76 100755
--- a/gstlal-burst/bin/gstlal_feature_extractor
+++ b/gstlal-burst/bin/gstlal_feature_extractor
@@ -221,6 +221,7 @@ def parse_command_line():
 
 	# check if input sample rate is sensible
 	assert options.sample_rate == 1 or options.sample_rate % 2 == 0
+	assert options.min_downsample_rate % 2 == 0
 
 	# check if persist and save cadence times are sensible
 	assert options.persist_cadence >= options.cadence
@@ -418,24 +419,25 @@ for subset_id, channel_subset in enumerate(data_source_info.channel_subsets, 1):
 			head[channel] = pipeparts.mklatency(pipeline, head[channel], name=utils.latency_name('beforewhitening', 2, channel))
 
 		# whiten auxiliary channel data
-		for rate, thishead in multirate_datasource.mkwhitened_multirate_src(pipeline, head[channel], rates, samp_rate, instrument, channel_name = channel, width=32, nxydump_segment=options.nxydump_segment, psd_fft_length=options.psd_fft_length).items():
+		for rate, thishead in multirate_datasource.mkwhitened_multirate_src(pipeline, head[channel], rates, samp_rate, instrument, channel_name = channel, width=32, nxydump_segment=options.nxydump_segment, psd_fft_length=options.psd_fft_length, min_rate=options.min_downsample_rate).items():
+			thisrate = max(options.min_downsample_rate, rate)
 			if options.latency_output:
 				thishead = pipeparts.mklatency(pipeline, thishead, name=utils.latency_name('afterwhitening', 3, channel, rate))
 
 			# determine whether to do time-domain or frequency-domain convolution
-			time_domain = (waveforms[channel].sample_pts(rate)*rate) < (5*waveforms[channel].sample_pts(rate)*numpy.log2(rate))
+			time_domain = (waveforms[channel].sample_pts(thisrate)*thisrate) < (5*waveforms[channel].sample_pts(thisrate)*numpy.log2(thisrate))
 
 			# create FIR bank of half sine-gaussian templates
-			fir_matrix = numpy.array([waveform for waveform in waveforms[channel].generate_templates(rate)])
+			fir_matrix = numpy.array([waveform for waveform in waveforms[channel].generate_templates(rate, sampling_rate=thisrate)])
 			thishead = pipeparts.mkqueue(pipeline, thishead, max_size_buffers = 0, max_size_bytes = 0, max_size_time = Gst.SECOND * 30)
-			thishead = pipeparts.mkfirbank(pipeline, thishead, fir_matrix = fir_matrix, time_domain = time_domain, block_stride = int(rate), latency = waveforms[channel].latency(rate))
+			thishead = pipeparts.mkfirbank(pipeline, thishead, fir_matrix = fir_matrix, time_domain = time_domain, block_stride = int(thisrate), latency = waveforms[channel].latency(thisrate))
 
 			# add queues, change stream format, add tags
 			if options.latency_output:
 				thishead = pipeparts.mklatency(pipeline, thishead, name=utils.latency_name('afterFIRbank', 4, channel, rate))
 			thishead = pipeparts.mkqueue(pipeline, thishead, max_size_buffers = 1, max_size_bytes = 0, max_size_time = 0)
 			thishead = pipeparts.mktogglecomplex(pipeline, thishead)
-			thishead = pipeparts.mkcapsfilter(pipeline, thishead, caps = "audio/x-raw, format=Z64LE, rate=%i" % rate)
+			thishead = pipeparts.mkcapsfilter(pipeline, thishead, caps = "audio/x-raw, format=Z64LE, rate=%i" % thisrate)
 			thishead = pipeparts.mktaginject(pipeline, thishead, "instrument=%s,channel-name=%s" %( instrument, channel))
 
 			# dump segments to disk if specified
@@ -445,9 +447,9 @@ for subset_id, channel_subset in enumerate(data_source_info.channel_subsets, 1):
 
 			# extract features from time series
 			if options.feature_mode == 'timeseries':
-				thishead = pipeparts.mktrigger(pipeline, tee, int(rate // options.sample_rate), max_snr = True)
+				thishead = pipeparts.mktrigger(pipeline, tee, int(thisrate // options.sample_rate), max_snr = True)
 			elif options.feature_mode == 'etg':
-				thishead = pipeparts.mktrigger(pipeline, tee, rate, snr_thresh = options.snr_threshold)
+				thishead = pipeparts.mktrigger(pipeline, tee, thisrate, snr_thresh = options.snr_threshold)
 
 			if options.latency_output:
 				thishead = pipeparts.mklatency(pipeline, thishead, name=utils.latency_name('aftertrigger', 5, channel, rate))
diff --git a/gstlal-burst/python/fxtools/feature_extractor.py b/gstlal-burst/python/fxtools/feature_extractor.py
index 81a9257372..f697a2b624 100644
--- a/gstlal-burst/python/fxtools/feature_extractor.py
+++ b/gstlal-burst/python/fxtools/feature_extractor.py
@@ -534,6 +534,7 @@ def append_options(parser):
 
 	group = optparse.OptionGroup(parser, "Program Behavior")
 	group.add_option("--psd-fft-length", metavar = "seconds", default = 32, type = "int", help = "The length of the FFT used to used to whiten the data (default is 32 s).")
+	group.add_option("--min-downsample-rate", metavar = "Hz", default = 128, type = "int", help = "The minimum sampling rate in which to downsample streams. Default = 128 Hz.")
 	group.add_option("--local-frame-caching", action = "store_true", help = "Pre-reads frame data and stores to local filespace.")
 	group.add_option("--disable-web-service", action = "store_true", help = "If set, disables web service that allows monitoring of PSDS of aux channels.")
 	group.add_option("-v", "--verbose", action = "store_true", help = "Be verbose.")
diff --git a/gstlal-burst/python/fxtools/multirate_datasource.py b/gstlal-burst/python/fxtools/multirate_datasource.py
index d45840a12d..f5a7f86bbf 100644
--- a/gstlal-burst/python/fxtools/multirate_datasource.py
+++ b/gstlal-burst/python/fxtools/multirate_datasource.py
@@ -57,7 +57,7 @@ NATIVE_RATE_CUTOFF = 128
 # =============================================================================
 #
 
-def mkwhitened_multirate_src(pipeline, src, rates, native_rate, instrument, psd = None, psd_fft_length = PSD_FFT_LENGTH, veto_segments = None, nxydump_segment = None, track_psd = True, block_duration = int(1 * Gst.SECOND), width = 64, channel_name = "hoft"):
+def mkwhitened_multirate_src(pipeline, src, rates, native_rate, instrument, psd = None, psd_fft_length = PSD_FFT_LENGTH, veto_segments = None, nxydump_segment = None, track_psd = True, block_duration = 0.25 * Gst.SECOND, width = 64, channel_name = "hoft", min_rate = 128):
 	"""!
 	Build pipeline stage to whiten and downsample auxiliary channels.
 
@@ -157,7 +157,7 @@ def mkwhitened_multirate_src(pipeline, src, rates, native_rate, instrument, psd
 	# high pass filter
 	#
 
-	block_stride = block_duration * max_rate // Gst.SECOND
+	block_stride = int(block_duration * max_rate // Gst.SECOND)
 	if native_rate >= NATIVE_RATE_CUTOFF:
 		kernel = reference_psd.one_second_highpass_kernel(max_rate, cutoff = 12)
 		assert len(kernel) % 2 == 1, "high-pass filter length is not odd"
@@ -266,7 +266,7 @@ def mkwhitened_multirate_src(pipeline, src, rates, native_rate, instrument, psd
 	for rate in sorted(set(rates))[:-1]:
 		head[rate] = pipeparts.mkqueue(pipeline, tee, max_size_buffers = 0, max_size_bytes = 0, max_size_time = Gst.SECOND * 8)
 		head[rate] = pipeparts.mkaudioamplify(pipeline, head[rate], 1. / math.sqrt(pipeparts.audioresample_variance_gain(quality, max_rate, rate)))
-		head[rate] = pipeparts.mkcapsfilter(pipeline, pipeparts.mkinterpolator(pipeline, head[rate]), caps = "audio/x-raw, rate=%d" % rate)
+		head[rate] = pipeparts.mkcapsfilter(pipeline, pipeparts.mkinterpolator(pipeline, head[rate]), caps = "audio/x-raw, rate=%d" % max(min_rate, rate))
 
 	#
 	# return value is a dictionary of elements indexed by sample rate
diff --git a/gstlal-burst/python/fxtools/waveforms.py b/gstlal-burst/python/fxtools/waveforms.py
index e760b972a5..a947335839 100644
--- a/gstlal-burst/python/fxtools/waveforms.py
+++ b/gstlal-burst/python/fxtools/waveforms.py
@@ -105,7 +105,7 @@ class TemplateGenerator(object):
 		"""
 		return NotImplementedError
 
-	def generate_templates(self, rate, quadrature = True):
+	def generate_templates(self, rate, quadrature = True, sampling_rate = None):
 		"""
 		Generate all templates corresponding to a parameter range for a given sampling rate.
 		If quadrature is set, yield two templates corresponding to in-phase and quadrature components of the waveform.
@@ -170,17 +170,20 @@ class HalfSineGaussianGenerator(TemplateGenerator):
 		"""
 		return 0.5 * (q/(2.*numpy.pi*f)) * numpy.log(1./self.tolerance)
 
-	def generate_templates(self, rate, quadrature = True):
+	def generate_templates(self, rate, quadrature = True, sampling_rate = None):
 		"""
 		generate all half sine gaussian templates corresponding to a parameter range and template duration
 		for a given sampling rate.
 		"""
+		if not sampling_rate:
+			sampling_rate = rate
+
 		for template in self.parameter_grid[rate]:
 			if quadrature:
 				for phase in self.phases:
-					yield self.waveform(rate, phase, template['frequency'], template['q'])
+					yield self.waveform(sampling_rate, phase, template['frequency'], template['q'])
 			else:
-				yield self.waveform(rate, self.phases[0], template['frequency'], template['q'])
+				yield self.waveform(sampling_rate, self.phases[0], template['frequency'], template['q'])
 
 	def waveform(self, rate, phase, f, q):
 		"""
-- 
GitLab