Prathamesh Joshi · d1a99ce5 · 65ae1370 · 61dfeea6 · 151911e0 · a2168231
--- a/gstlal-inspiral/bin/gstlal_inspiral_marginalize_likelihoods_online

+ 20

− 27
+++ b/gstlal-inspiral/bin/gstlal_inspiral_marginalize_likelihoods_online

+ 20

− 27
 @@ -152,23 +152,8 @@ def main():
 	# set up the output paths
 	#

-	marg_pdf_exists = os.path.isfile(options.output)
-	pdfs = DataCache.find(DataType.DIST_STAT_PDFS, svd_bins = "*")
-	if marg_pdf_exists and len(pdfs) == len(registries):
-		files_exist = True
-	elif not marg_pdf_exists and len(pdfs) == 0:
-		files_exist = False
-	elif marg_pdf_exists and len(pdfs) != len(registries): 
-		raise ValueError(f"Number of registry files provided ({len(registries)}) does not match number of DIST_STAT_PDF files found ({len(pdfs)})")
-	else:
-		raise ValueError("Could not find marg DIST_STAT_PDF file")
-
 	svd_bins = [reg[:4] for reg in registries]
-	if files_exist:
-		assert set(pdfs.groupby('svd_bin').keys()) == set(svd_bins), "svd bins of registry files are not the same as svd bins of found PDFs"
-	else:
-		pdfs = DataCache.generate(DataType.DIST_STAT_PDFS, CacheEntry.from_T050017(options.output).observatory, svd_bins = svd_bins)
-
+	pdfs = DataCache.generate(DataType.DIST_STAT_PDFS, CacheEntry.from_T050017(options.output).observatory, svd_bins = svd_bins)
 	pdfs = pdfs.groupby('svd_bin')

 	#
 @@ -220,8 +205,12 @@ def main():
 			url = url_from_registry(reg, likelihood_path)

 			svd_bin = reg[:4]
-			# load the old ranking stat pdf for this bin:
-			old_pdf = far.parse_likelihood_control_doc(ligolw_utils.load_url(pdfs[svd_bin][0], verbose = options.verbose, contenthandler = far.RankingStat.LIGOLWContentHandler)) if files_exist else None
+			if os.path.isfile(pdfs[svd_bin].files[0]):
+				# load the old ranking stat pdf for this bin:
+				_, old_pdf = far.parse_likelihood_control_doc(ligolw_utils.load_url(pdfs[svd_bin].files[0], verbose = options.verbose, contenthandler = far.RankingStat.LIGOLWContentHandler))
+			else:
+				logging.warning(f"Couldn't find {pdfs[svd_bin].files[0]}, starting from scratch")
+				old_pdf = None

 			# create the new ranking stat pdf and marginalize as we go
 			new_pdf_status, pdf = calc_rank_pdfs(url, ranking_stat_samples, options.num_cores, verbose = options.verbose)
 @@ -256,10 +245,14 @@ def main():
 				# get the zerolag pdf for this bin and use it to perform bin-specific extinction
 				zerolag_counts_url = url_from_registry(reg, zerolag_counts_path)
 				pdf += far.RankingStatPDF.from_xml(ligolw_utils.load_url(zerolag_counts_url, verbose = options.verbose, contenthandler = far.RankingStat.LIGOLWContentHandler), u"gstlal_inspiral_likelihood")
-				if data:
-					data += pdf.new_with_extinction()
+				if sum(pdf.zero_lag_lr_lnpdf.array) > 0:
+					# LR calculation has started and we are ready to perform first-round extinction
+					if data:
+						data += pdf.new_with_extinction()
+					else:
+						data = pdf.new_with_extinction()
 				else:
-					data = pdf.new_with_extinction()
+					logging.warning(f'Skipping first-round extinction for {pdfs[svd_bin].files[0]}')

 			# while looping through registries
 			# send heartbeat messages
 @@ -267,7 +260,8 @@ def main():
 				kafka_processor.heartbeat()
 		
 		# zero out the zerolag after the first round of extinction is finished
-		data.zero_lag_lr_lnpdf.count.array[:] = 0
+		if data:
+			data.zero_lag_lr_lnpdf.count.array[:] = 0

 		# if we fail to complete more than 1% of the bins,
 		# this is a serious problem and we should just quit
 @@ -293,7 +287,10 @@ def main():
 		zerolag_counts_url = url_from_registry("gstlal_ll_inspiral_trigger_counter_registry.txt", zerolag_counts_path)

 		# add zerolag counts url to marginalized data
-		data += far.RankingStatPDF.from_xml(ligolw_utils.load_url(zerolag_counts_url, verbose = options.verbose, contenthandler = far.RankingStat.LIGOLWContentHandler), u"gstlal_inspiral_likelihood")
+		if data:
+			data += far.RankingStatPDF.from_xml(ligolw_utils.load_url(zerolag_counts_url, verbose = options.verbose, contenthandler = far.RankingStat.LIGOLWContentHandler), u"gstlal_inspiral_likelihood")
+		else:
+			data = far.RankingStatPDF.from_xml(ligolw_utils.load_url(zerolag_counts_url, verbose = options.verbose, contenthandler = far.RankingStat.LIGOLWContentHandler), u"gstlal_inspiral_likelihood")

 		if kafka_processor:
 			kafka_processor.heartbeat()
 @@ -311,10 +308,6 @@ def main():
 		ligolw_utils.write_filename(xmldoc, options.output, verbose = options.verbose)
 		logging.info(f"Done marginalizing likelihoods.")

-		# we just created the bin-specific and marg DIST_STAT_PDFs,
-		# so the files definitely exist for the next iteration of the loop
-		files_exist = True
-
 		if kafka_processor:
 			kafka_processor.heartbeat()