changed directory structure for the new workflow and use injection file to add

sim inspiral table and make reconstruction segments when injection frames are used

changed directory structure for the new workflow and use injection file to add
7967660f · Divya Singh · 1b3e9754 · 7967660f · 7967660f · 7967660f
Commit 7967660f authored 1 year ago by Divya Singh
--- a/gstlal-inspiral/bin/gstlal_inspiral
+++ b/gstlal-inspiral/bin/gstlal_inspiral
@@ -9,7 +9,7 @@
 #
 # This program is distributed in the hope that it will be useful, but
 # WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.	See the GNU General
 # Public License for more details.
 #
 # You should have received a copy of the GNU General Public License along
@@ -21,7 +21,7 @@
 #
 # =============================================================================
 #
-#                                   Preamble
+#				    Preamble
 #
 # =============================================================================
 #
@@ -32,89 +32,89 @@
 ### .. graphviz::
 ###
 ###    digraph llpipe {
-###    	labeljust = "r";
-###    	label="gstlal_inspiral"
-###    	rankdir=LR;
-###    	graph [fontname="Roman", fontsize=24];
-###    	edge [ fontname="Roman", fontsize=10 ];
-###    	node [fontname="Roman", shape=box, fontsize=11];
+###	labeljust = "r";
+###	label="gstlal_inspiral"
+###	rankdir=LR;
+###	graph [fontname="Roman", fontsize=24];
+###	edge [ fontname="Roman", fontsize=10 ];
+###	node [fontname="Roman", shape=box, fontsize=11];
 ###
-###    	gracedb [label="GW\nCandidate\nDatabase", shape=oval, color=tomato3, style=filled];
+###	gracedb [label="GW\nCandidate\nDatabase", shape=oval, color=tomato3, style=filled];
 ###
 ###
-###    	subgraph clusterNodeN {
+###	subgraph clusterNodeN {
 ###
-###    		style=rounded;
-###    		label="gstreamer pipeline";
-###    		labeljust = "r";
-###    		fontsize = 14;
+###		style=rounded;
+###		label="gstreamer pipeline";
+###		labeljust = "r";
+###		fontsize = 14;
 ###
-###    		H1src [label="H1 data source:\n mkbasicsrc()", color=red4];
-###    		L1src [label="L1 data source:\n mkbasicsrc()", color=green4];
-###    		V1src [label="V1 data source:\n mkbasicsrc()", color=magenta4];
+###		H1src [label="H1 data source:\n mkbasicsrc()", color=red4];
+###		L1src [label="L1 data source:\n mkbasicsrc()", color=green4];
+###		V1src [label="V1 data source:\n mkbasicsrc()", color=magenta4];
 ###
-###    		H1multirate [label="H1 whitening and downsampling:\nmkwhitened_multirate_src()", color=red4];
-###    		L1multirate [label="L1 whitening and downsampling:\nmkwhitened_multirate_src()", color=green4];
-###    		V1multirate [label="V1 whitening and downsampling:\nmkwhitened_multirate_src()", color=magenta4];
+###		H1multirate [label="H1 whitening and downsampling:\nmkwhitened_multirate_src()", color=red4];
+###		L1multirate [label="L1 whitening and downsampling:\nmkwhitened_multirate_src()", color=green4];
+###		V1multirate [label="V1 whitening and downsampling:\nmkwhitened_multirate_src()", color=magenta4];
 ###
-###    		H1LLOID [label="H1 LLOID filtering engine:\nmkLLOIDmulti()", color=red4];
-###    		L1LLOID [label="L1 LLOID filtering engine:\nmkLLOIDmulti()", color=green4];
-###    		V1LLOID [label="V1 LLOID filtering engine:\nmkLLOIDmulti()", color=magenta4];
+###		H1LLOID [label="H1 LLOID filtering engine:\nmkLLOIDmulti()", color=red4];
+###		L1LLOID [label="L1 LLOID filtering engine:\nmkLLOIDmulti()", color=green4];
+###		V1LLOID [label="V1 LLOID filtering engine:\nmkLLOIDmulti()", color=magenta4];
 ###
-###    		H1Trig1 [label="H1 Triggering:\nsub bank 1", color=red4];
-###    		L1Trig1 [label="L1 Triggering:\nsub bank 1", color=green4];
-###    		V1Trig1 [label="V1 Triggering:\nsub bank 1", color=magenta4];
-###    		H1Trig2 [label="H1 Triggering:\nsub bank 2", color=red4];
-###    		L1Trig2 [label="L1 Triggering:\nsub bank 2", color=green4];
-###    		V1Trig2 [label="V1 Triggering:\nsub bank 2", color=magenta4];
-###    		H1TrigN [label="H1 Triggering:\nsub bank N", color=red4];
-###    		L1TrigN [label="L1 Triggering:\nsub bank N", color=green4];
-###    		V1TrigN [label="V1 Triggering:\nsub bank N", color=magenta4];
+###		H1Trig1 [label="H1 Triggering:\nsub bank 1", color=red4];
+###		L1Trig1 [label="L1 Triggering:\nsub bank 1", color=green4];
+###		V1Trig1 [label="V1 Triggering:\nsub bank 1", color=magenta4];
+###		H1Trig2 [label="H1 Triggering:\nsub bank 2", color=red4];
+###		L1Trig2 [label="L1 Triggering:\nsub bank 2", color=green4];
+###		V1Trig2 [label="V1 Triggering:\nsub bank 2", color=magenta4];
+###		H1TrigN [label="H1 Triggering:\nsub bank N", color=red4];
+###		L1TrigN [label="L1 Triggering:\nsub bank N", color=green4];
+###		V1TrigN [label="V1 Triggering:\nsub bank N", color=magenta4];
 ###
-###    		H1src -> H1multirate;
-###    		L1src -> L1multirate;
-###    		V1src -> V1multirate;
+###		H1src -> H1multirate;
+###		L1src -> L1multirate;
+###		V1src -> V1multirate;
 ###
-###    		H1multirate -> H1LLOID [label="h(t) 4096Hz"];
-###    		L1multirate -> L1LLOID [label="h(t) 4096Hz"];
-###    		V1multirate -> V1LLOID [label="h(t) 4096Hz"];
-###    		H1multirate -> H1LLOID [label="h(t) 2048Hz"];
-###    		L1multirate -> L1LLOID [label="h(t) 2048Hz"];
-###    		V1multirate -> V1LLOID [label="h(t) 2048Hz"];
-###    		H1multirate -> H1LLOID [label="h(t) Nth-pow-of-2 Hz"];
-###    		L1multirate -> L1LLOID [label="h(t) Nth-pow-of-2 Hz"];
-###    		V1multirate -> V1LLOID [label="h(t) Nth-pow-of-2 Hz"];
+###		H1multirate -> H1LLOID [label="h(t) 4096Hz"];
+###		L1multirate -> L1LLOID [label="h(t) 4096Hz"];
+###		V1multirate -> V1LLOID [label="h(t) 4096Hz"];
+###		H1multirate -> H1LLOID [label="h(t) 2048Hz"];
+###		L1multirate -> L1LLOID [label="h(t) 2048Hz"];
+###		V1multirate -> V1LLOID [label="h(t) 2048Hz"];
+###		H1multirate -> H1LLOID [label="h(t) Nth-pow-of-2 Hz"];
+###		L1multirate -> L1LLOID [label="h(t) Nth-pow-of-2 Hz"];
+###		V1multirate -> V1LLOID [label="h(t) Nth-pow-of-2 Hz"];
 ###
-###    		H1LLOID -> H1Trig1 [label="SNRs sub bank 1"];
-###    		L1LLOID -> L1Trig1 [label="SNRs sub bank 1"];
-###    		V1LLOID -> V1Trig1 [label="SNRs sub bank 1"];
-###    		H1LLOID -> H1Trig2 [label="SNRs sub bank 2"];
-###    		L1LLOID -> L1Trig2 [label="SNRs sub bank 2"];
-###    		V1LLOID -> V1Trig2 [label="SNRs sub bank 2"];
-###    		H1LLOID -> H1TrigN [label="SNRs sub bank N"];
-###    		L1LLOID -> L1TrigN [label="SNRs sub bank N"];
-###    		V1LLOID -> V1TrigN [label="SNRs sub bank N"];
-###    	}
+###		H1LLOID -> H1Trig1 [label="SNRs sub bank 1"];
+###		L1LLOID -> L1Trig1 [label="SNRs sub bank 1"];
+###		V1LLOID -> V1Trig1 [label="SNRs sub bank 1"];
+###		H1LLOID -> H1Trig2 [label="SNRs sub bank 2"];
+###		L1LLOID -> L1Trig2 [label="SNRs sub bank 2"];
+###		V1LLOID -> V1Trig2 [label="SNRs sub bank 2"];
+###		H1LLOID -> H1TrigN [label="SNRs sub bank N"];
+###		L1LLOID -> L1TrigN [label="SNRs sub bank N"];
+###		V1LLOID -> V1TrigN [label="SNRs sub bank N"];
+###	}
 ###
 ###
-###    	Coincidence [label="Coincidence\nO(1)s latency"];
-###    	SigEst [label="Significance\nEstimation\nO(1)s latency"];
-###    	Thresh [label="Thresholding\nO(1)s latency"];
-###    	EventGen [label="Event\nGeneration\nO(1)s latency"];
+###	Coincidence [label="Coincidence\nO(1)s latency"];
+###	SigEst [label="Significance\nEstimation\nO(1)s latency"];
+###	Thresh [label="Thresholding\nO(1)s latency"];
+###	EventGen [label="Event\nGeneration\nO(1)s latency"];
 ###
-###    	H1Trig1 -> Coincidence [label="Trigs sub bank 1"];
-###    	L1Trig1 -> Coincidence [label="Trigs sub bank 1"];
-###    	V1Trig1 -> Coincidence [label="Trigs sub bank 1"];
-###    	H1Trig2 -> Coincidence [label="Trigs sub bank 2"];
-###    	L1Trig2 -> Coincidence [label="Trigs sub bank 2"];
-###    	V1Trig2 -> Coincidence [label="Trigs sub bank 2"];
-###    	H1TrigN -> Coincidence [label="Trigs sub bank N"];
-###    	L1TrigN -> Coincidence [label="Trigs sub bank N"];
-###    	V1TrigN -> Coincidence [label="Trigs sub bank N"];
+###	H1Trig1 -> Coincidence [label="Trigs sub bank 1"];
+###	L1Trig1 -> Coincidence [label="Trigs sub bank 1"];
+###	V1Trig1 -> Coincidence [label="Trigs sub bank 1"];
+###	H1Trig2 -> Coincidence [label="Trigs sub bank 2"];
+###	L1Trig2 -> Coincidence [label="Trigs sub bank 2"];
+###	V1Trig2 -> Coincidence [label="Trigs sub bank 2"];
+###	H1TrigN -> Coincidence [label="Trigs sub bank N"];
+###	L1TrigN -> Coincidence [label="Trigs sub bank N"];
+###	V1TrigN -> Coincidence [label="Trigs sub bank N"];
 ###
-###    	Coincidence -> SigEst -> Thresh -> EventGen;
+###	Coincidence -> SigEst -> Thresh -> EventGen;
 ###
-###    	EventGen -> gracedb;
+###	EventGen -> gracedb;
 ###
 ###    }
 ###
@@ -122,7 +122,7 @@
 ### -------------
 ###
 ### +------------------------------------------------+---------------------------------------------+------------+
-### | Names                                          | Hash                                        | Date       |
+### | Names					     | Hash					   | Date	|
 ### +================================================+=============================================+============+
 ### | Florent, Sathya, Duncan Me, Jolien, Kipp, Chad | 9074294d6b57f43651143b5f93210751de1fe55a    | 2014-05-02 |
 ### +------------------------------------------------+---------------------------------------------+------------+
@@ -227,7 +227,7 @@ def service_domain(gracedb_search, gracedb_pipeline):
 #
 # =============================================================================
 #
-#                                 Command Line
+#				  Command Line
 #
 # =============================================================================
 #
@@ -258,7 +258,7 @@ def parse_command_line():
 	group.add_option("--control-peak-time", metavar = "seconds", type = "int", help = "Set a time window in seconds to find peaks in the control signal (optional, default is to disable composite detection statistic).")
 	group.add_option("--output", metavar = "filename", action = "append", default = [], help = "Set the name of the LIGO light-weight XML output file *.{xml,xml.gz} or an SQLite database *.sqlite (required).  Can be given multiple times.  Exactly as many output files must be specified as svd-bank files will be processed (see --svd-bank).")
 	group.add_option("--output-cache", metavar = "filename", help = "Provide a cache of output files.  This can be used instead of giving multiple --output options.  Cannot be combined with --output.")
-	group.add_option("--svd-bank", metavar = "filename", action = "append", default = [], help = "Set the name of the LIGO light-weight XML file from which to load the svd bank for a given instrument.  To analyze multiple instruments, --svd-bank can be called multiple times for svd banks corresponding to different instruments.  If --data-source is lvshm or framexmit, then only svd banks corresponding to a single bin must be given.  If given multiple times, the banks will be processed one-by-one, in order.  At least one svd bank for at least 2 detectors is required, but see also --svd-bank-cache.")
+	group.add_option("--svd-bank", metavar = "filename", action = "append", default = [], help = "Set the name of the LIGO light-weight XML file from which to load the svd bank for a given instrument.  To analyze multiple instruments, --svd-bank can be called multiple times for svd banks corresponding to different instruments.  If --data-source is lvshm or framexmit, then only svd banks corresponding to a single bin must be given.	If given multiple times, the banks will be processed one-by-one, in order.  At least one svd bank for at least 2 detectors is required, but see also --svd-bank-cache.")
 	group.add_option("--svd-bank-cache", metavar = "filename", help = "Provide a cache file of svd-bank files.  This can be used instead of giving multiple --svd-bank options.  Cannot be combined with --svd-bank options.")
 	# NOTE:  the clustering SQL scripts search for this option in the
 	# process_params table to determine the threshold below which it
@@ -275,7 +275,7 @@ def parse_command_line():
 	group.add_option("--coincidence-threshold", metavar = "seconds", type = "float", default = 0.005, help = "Set the coincidence window in seconds (default = 0.005 s).  The light-travel time between instruments will be added automatically in the coincidence test.")
 	group.add_option("--min-instruments", metavar = "count", type = "int", default = 2, help = "Set the minimum number of instruments that must contribute triggers to form a candidate (default = 2).")
 	group.add_option("--ranking-stat-input", metavar = "url", help = "Set the URL from which to load a ranking statistic definition.  When this is enabled, signal candidates will have ranking statistic values assigned on-the-fly.  Required when --data-source is lvshm or framexmit;  must also set --likelihood-snapshot-interval.")
-	group.add_option("--ranking-stat-output", metavar = "filename", action = "append", default = [], help = "Set the name of the file to which to write ranking statistic data collected from triggers (optional).  Can be given more than once.  If given, exactly as many must be provided as there are --svd-bank options and they will be writen to in order.")
+	group.add_option("--ranking-stat-output", metavar = "filename", action = "append", default = [], help = "Set the name of the file to which to write ranking statistic data collected from triggers (optional).	Can be given more than once.  If given, exactly as many must be provided as there are --svd-bank options and they will be writen to in order.")
 	group.add_option("--ranking-stat-output-cache", metavar = "filename", help = "Provide a cache of ranking statistic output files.  This can be used instead of giving multiple --ranking-stat-output options.  Cannot be combined with --ranking-stat-output.")
 	group.add_option("--compress-ranking-stat", action = "store_true", help = "Choose whether to compress the ranking stat upon start up. Only used when --ranking-stat-input is set.")
 	group.add_option("--compress-ranking-stat-threshold", type = "float", default = 0.03, help = "Only keep horizon distance values that differ by this much, fractionally, from their neighbours (default = 0.03).")
@@ -304,7 +304,7 @@ def parse_command_line():
 	group.add_option("--comment", metavar = "message", help = "Set the string to be recorded in comment and tag columns in various places in the output file (optional).")
 	group.add_option("--fir-stride", metavar = "seconds", type = "float", default = 8, help = "Set the length of the fir filter stride in seconds. default = 8")
 	group.add_option("--analysis-tag", metavar = "tag", default = "test", help = "Set the string to identify the analysis in which this job is part of. Used when --output-kafka-server is set. May not contain \".\" nor \"-\". Default is test.")
-	group.add_option("--job-tag", metavar = "tag", help = "Set the string to identify this job and register the resources it provides on a node.  Should be 4 digits of the form 0001, 0002, etc.;  may not contain \".\" nor \"-\".")
+	group.add_option("--job-tag", metavar = "tag", help = "Set the string to identify this job and register the resources it provides on a node.  Should be 4 digits of the form 0001, 0002, etc.;	may not contain \".\" nor \"-\".")
 	group.add_option("--local-frame-caching", action = "store_true", help = "Pre-reads frame data, performs downsampling, and stores to local filespace. ")
 	group.add_option("--nxydump-segment", metavar = "start:stop", default = ":", help = "Set the time interval to dump from nxydump elments (optional).  The default is \":\", i.e. dump all time.")
 	group.add_option("--reconstruction-segment", metavar = "start:stop", action = "append", help = "Only reconstruct the SNRs for this time interval (optional). Can be provided multiple times.")
@@ -384,12 +384,10 @@ def parse_command_line():
 		raise ValueError("must supply exactly as many --svd-bank options as --output")
 	if options.ranking_stat_output and len(options.ranking_stat_output) != len(options.output):
 		raise ValueError("must supply either none or exactly as many --ranking-stat-output options as --output")
-	if options.injections and not (options.injection_file or options.injection_frames):
-		raise ValueError("must supply either --injection-file or --injection-frames when --injections is set.")
-	if (options.injection_file or options.injection_frames) and not options.injections:
-		raise ValueError("must supply --injections when either --injection-file or --injection-frames is set")
-	if options.injection_frames and options.injection_file:
-		raise ValueError("must supply either --injection-file or --injection-frames, but not both")
+	if (options.injections and options.injection_frames) and not options.injection_file:
+		raise ValueError("must supply --injection-file when --injection-frames and --injections is set.")
+	if (options.injection_file and options.injection_frames) and not options.injections:
+		raise ValueError("must supply --injections when --injection-file and --injection-frames is set")
 	if (options.likelihood_snapshot_interval and not options.ranking_stat_output) and not options.injections:
 		raise ValueError("must set --ranking-stat-output when --likelihood-snapshot-interval is set")
 	if options.ranking_stat_output and options.injections:
@@ -564,7 +562,7 @@ def parse_command_line():
 #
 # =============================================================================
 #
-#                                Signal Handler
+#				 Signal Handler
 #
 # =============================================================================
 #
@@ -594,7 +592,7 @@ class OneTimeSignalHandler(object):
 #
 # =============================================================================
 #
-#                              Horizon Distances
+#			       Horizon Distances
 #
 # =============================================================================
 #
@@ -603,7 +601,7 @@ class OneTimeSignalHandler(object):
 #
 # =============================================================================
 #
-#                                     Main
+#				      Main
 #
 # =============================================================================
 #
@@ -846,7 +844,7 @@ for output_file_number, (svd_bank_url_dict, output_url, ranking_stat_output_url,

 	#
 	# construct dictionaries of whitened, conditioned, down-sampled
-	# h(t) streams.  NOTE:  we assume all banks for each instrument
+	# h(t) streams.  NOTE:	we assume all banks for each instrument
 	# were generated with the same processed PSD for that instrument
 	# and just extract the first without checking that this assumption
 	# is correct
@@ -1087,7 +1085,7 @@ for output_file_number, (svd_bank_url_dict, output_url, ranking_stat_output_url,

 	#
 	# Shutdown the web interface servers and garbage collect the Bottle
-	# app.  This should release the references the Bottle app's routes
+	# app.	This should release the references the Bottle app's routes
 	# hold to the pipeline's data (like template banks and so on).
 	#


--- a/gstlal-inspiral/python/config/inspiral.py
+++ b/gstlal-inspiral/python/config/inspiral.py
@@ -7,7 +7,7 @@
 #
 # This program is distributed in the hope that it will be useful, but
 # WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.	See the GNU General
 # Public License for more details.
 #
 # You should have received a copy of the GNU General Public License along
@@ -50,11 +50,16 @@ class Config(BaseConfig):
 		if "summary" in kwargs:
 			self.summary = dotdict(replace_keys(kwargs["summary"]))

-		# set up analysis directories
-		if not self.data.analysis_dir:
-			self.data.analysis_dir = os.getcwd()
-		if not self.data.rerank_dir:
-			self.data.rerank_dir = self.data.analysis_dir
+		# set up analysis directories; one must always give data_dir which can be used as filter, injection and rank dirs. 
+		if not self.data.data_dir:
+			raise ValueError('Specify the data-dir to read and write the staging data products.')
+		if not self.data.filter_dir:
+			self.data.filter_dir = os.getcwd()
+		if not self.data.rank_dir:
+			self.data.rank_dir = os.getcwd()
+
+		if self.filter.injections and not self.data.injection_dir:
+			self.data.injection_dir = self.data.filter_dir

 		# validate config
 		self.validate_inspiral()

--- a/gstlal-inspiral/python/dags/layers/inspiral.py
+++ b/gstlal-inspiral/python/dags/layers/inspiral.py
--- a/gstlal/python/dags/layers/psd.py
+++ b/gstlal/python/dags/layers/psd.py
@@ -34,7 +34,7 @@ def reference_psd_layer(config, dag):
 		transfer_files=config.condor.transfer_files,
 	)

-	psd_cache = DataCache.generate(DataType.REFERENCE_PSD, config.ifo_combos, config.time_bins)
+	psd_cache = DataCache.generate(DataType.REFERENCE_PSD, config.ifo_combos, config.time_bins, root=config.data.data_dir)

 	frame_opts = [
 		Option("data-source", "frames"),
@@ -84,7 +84,7 @@ def median_psd_layer(config, dag, psd_cache):
 		transfer_files=config.condor.transfer_files,
 	)

-	median_psd_cache = DataCache.generate(DataType.MEDIAN_PSD, config.all_ifos, config.span)
+	median_psd_cache = DataCache.generate(DataType.MEDIAN_PSD, config.all_ifos, config.span, root=config.data.data_dir)

 	layer += Node(
 		inputs = Argument("psds", psd_cache.files),