From 9d73a2c018731bda21237b247b051f30b0dc6e0d Mon Sep 17 00:00:00 2001 From: Patrick Godwin <patrick.godwin@ligo.org> Date: Fri, 31 Aug 2018 19:47:35 -0700 Subject: [PATCH] fxtools/feature_extractor.py: fix edge cases where wrong durations were being applied to hdf5 files, rename some methods to be more informative --- gstlal-burst/bin/gstlal_feature_extractor | 2 +- .../python/fxtools/feature_extractor.py | 24 +++++++++---------- 2 files changed, 13 insertions(+), 13 deletions(-) diff --git a/gstlal-burst/bin/gstlal_feature_extractor b/gstlal-burst/bin/gstlal_feature_extractor index 6fe3597de7..5b6d89c547 100755 --- a/gstlal-burst/bin/gstlal_feature_extractor +++ b/gstlal-burst/bin/gstlal_feature_extractor @@ -486,7 +486,7 @@ for subset_id, channel_subset in enumerate(data_source_info.channel_subsets, 1): # save remaining triggers logger.info("persisting features to disk...") - handler.finalize() + handler.flush_and_save_features() # # Shut down pipeline diff --git a/gstlal-burst/python/fxtools/feature_extractor.py b/gstlal-burst/python/fxtools/feature_extractor.py index e681da687f..06dde13542 100644 --- a/gstlal-burst/python/fxtools/feature_extractor.py +++ b/gstlal-burst/python/fxtools/feature_extractor.py @@ -216,14 +216,14 @@ class MultiChannelHandler(simplehandler.Handler): if self.save_format == 'hdf5': if self.timestamp and utils.in_new_epoch(self.timestamp, self.last_save_time, self.cadence) or (self.timestamp == self.feature_end_time): self.logger.info("saving features to disk at timestamp = %d" % self.timestamp) - self.to_hdf_file() + self.save_features() self.last_save_time = self.timestamp # persist triggers once per persist cadence if using hdf5 format if self.save_format == 'hdf5': if self.timestamp and utils.in_new_epoch(self.timestamp, self.last_persist_time, self.persist_cadence): self.logger.info("persisting features to disk at timestamp = %d" % self.timestamp) - self.finish_hdf_file() + self.persist_features() self.last_persist_time = self.timestamp self.set_hdf_file_properties(self.timestamp, self.persist_cadence) @@ -283,44 +283,44 @@ class MultiChannelHandler(simplehandler.Handler): feature_row = {'channel':channel, 'snr':row.snr, 'trigger_time':trigger_time, 'frequency':waveform['frequency'], 'q':waveform['q'], 'phase':row.phase} self.feature_queue.append(timestamp, channel, feature_row) - def to_hdf_file(self): + def save_features(self): """ - Dumps triggers saved in memory to disk in hdf5 format. + Dumps features saved in memory to disk. Uses the T050017 filenaming convention. NOTE: This method should only be called by an instance that is locked. """ self.fdata.dump(self.tmp_path, self.fname, utils.floor_div(self.last_save_time, self.cadence), tmp = True) - def finish_hdf_file(self): + def persist_features(self): """ - Move a temporary hdf5 file to its final location after + Move a temporary file to its final location after all file writes have been completed. """ final_path = os.path.join(self.fpath, self.fname)+".h5" tmp_path = os.path.join(self.tmp_path, self.fname)+".h5.tmp" shutil.move(tmp_path, final_path) - def finalize(self): + def flush_and_save_features(self): """ - Clears out remaining features from the queue for saving to disk. + Flushes out remaining features from the queue for saving to disk. """ - # save remaining triggers if self.save_format == 'hdf5': self.feature_queue.flush() while len(self.feature_queue): feature_subset = self.feature_queue.pop() self.fdata.append(feature_subset['timestamp'], feature_subset['features']) - self.to_hdf_file() - self.finish_hdf_file() + self.save_features() + self.persist_features() def set_hdf_file_properties(self, start_time, duration): """ - Returns the file name, as well as locations of temporary and permanent locations of + Updates the file name, as well as locations of temporary and permanent locations of directories where triggers will live, when given the current gps time and a gps duration. Also takes care of creating new directories as needed and removing any leftover temporary files. """ # set/update file names and directories with new gps time and duration + duration = min(duration, self.feature_end_time - start_time) self.fname = os.path.splitext(utils.to_trigger_filename(self.basename, start_time, duration, 'h5'))[0] self.fpath = utils.to_trigger_path(os.path.abspath(self.out_path), self.basename, start_time, self.job_id, self.subset_id) self.tmp_path = utils.to_trigger_path(self.tmp_dir, self.basename, start_time, self.job_id, self.subset_id) -- GitLab