Skip to content
Snippets Groups Projects

Online new extinction

Merged Prathamesh Joshi requested to merge o4b-online-new-extinction into o4b-online
10 unresolved threads
Files
4
@@ -152,23 +152,8 @@ def main():
# set up the output paths
#
marg_pdf_exists = os.path.isfile(options.output)
pdfs = DataCache.find(DataType.DIST_STAT_PDFS, svd_bins = "*")
if marg_pdf_exists and len(pdfs) == len(registries):
files_exist = True
elif not marg_pdf_exists and len(pdfs) == 0:
files_exist = False
elif marg_pdf_exists and len(pdfs) != len(registries):
raise ValueError(f"Number of registry files provided ({len(registries)}) does not match number of DIST_STAT_PDF files found ({len(pdfs)})")
else:
raise ValueError("Could not find marg DIST_STAT_PDF file")
svd_bins = [reg[:4] for reg in registries]
if files_exist:
assert set(pdfs.groupby('svd_bin').keys()) == set(svd_bins), "svd bins of registry files are not the same as svd bins of found PDFs"
else:
pdfs = DataCache.generate(DataType.DIST_STAT_PDFS, CacheEntry.from_T050017(options.output).observatory, svd_bins = svd_bins)
pdfs = DataCache.generate(DataType.DIST_STAT_PDFS, CacheEntry.from_T050017(options.output).observatory, svd_bins = svd_bins)
pdfs = pdfs.groupby('svd_bin')
#
@@ -220,8 +205,12 @@ def main():
url = url_from_registry(reg, likelihood_path)
svd_bin = reg[:4]
# load the old ranking stat pdf for this bin:
old_pdf = far.parse_likelihood_control_doc(ligolw_utils.load_url(pdfs[svd_bin][0], verbose = options.verbose, contenthandler = far.RankingStat.LIGOLWContentHandler)) if files_exist else None
if os.path.isfile(pdfs[svd_bin].files[0]):
# load the old ranking stat pdf for this bin:
_, old_pdf = far.parse_likelihood_control_doc(ligolw_utils.load_url(pdfs[svd_bin].files[0], verbose = options.verbose, contenthandler = far.RankingStat.LIGOLWContentHandler))
else:
logging.warning(f"Couldn't find {pdfs[svd_bin].files[0]}, starting from scratch")
old_pdf = None
# create the new ranking stat pdf and marginalize as we go
new_pdf_status, pdf = calc_rank_pdfs(url, ranking_stat_samples, options.num_cores, verbose = options.verbose)
@@ -256,16 +245,32 @@ def main():
# get the zerolag pdf for this bin and use it to perform bin-specific extinction
zerolag_counts_url = url_from_registry(reg, zerolag_counts_path)
pdf += far.RankingStatPDF.from_xml(ligolw_utils.load_url(zerolag_counts_url, verbose = options.verbose, contenthandler = far.RankingStat.LIGOLWContentHandler), u"gstlal_inspiral_likelihood")
if data:
data += pdf.new_with_extinction()
if pdf.ready_for_extinction():
# LR calculation has started and we are ready to perform first-round extinction
if data:
data += pdf.new_with_extinction()
else:
data = pdf.new_with_extinction()
else:
data = pdf.new_with_extinction()
# add a zeroed-out PDF instead, so that the template ids get added to data
logging.warning(f'Skipping first-round extinction for {pdfs[svd_bin].files[0]}, using an empty PDF instead')
pdf.noise_lr_lnpdf.array[:] = 0.
pdf.signal_lr_lnpdf.array[:] = 0.
pdf.zero_lag_lr_lnpdf.array[:] = 0.
if data:
data += pdf
else:
data = pdf
# while looping through registries
# send heartbeat messages
if kafka_processor:
kafka_processor.heartbeat()
# zero out the zerolag after the first round of extinction is finished
if data:
data.zero_lag_lr_lnpdf.count.array[:] = 0
# if we fail to complete more than 1% of the bins,
# this is a serious problem and we should just quit
@@ -284,7+289,7 @@
# noise and signal model ranking statistic histograms in the
# zero-lag counts files downloaded from the jobs must be all 0, and
# the zero-lag counts in the output generated by
# gstlal_inspiral_calc_rank_pdfs must be 0. NOTE: this is where
# the zero-lag counts have the density estimation transform
# applied.
zerolag_counts_url = url_from_registry("gstlal_ll_inspiral_trigger_counter_registry.txt", zerolag_counts_path)
# add zerolag counts url to marginalized data
data += far.RankingStatPDF.from_xml(ligolw_utils.load_url(zerolag_counts_url, verbose = options.verbose, contenthandler = far.RankingStat.LIGOLWContentHandler), u"gstlal_inspiral_likelihood")
if data:
data += far.RankingStatPDF.from_xml(ligolw_utils.load_url(zerolag_counts_url, verbose = options.verbose, contenthandler = far.RankingStat.LIGOLWContentHandler), u"gstlal_inspiral_likelihood")
else:
data = far.RankingStatPDF.from_xml(ligolw_utils.load_url(zerolag_counts_url, verbose = options.verbose, contenthandler = far.RankingStat.LIGOLWContentHandler), u"gstlal_inspiral_likelihood")
if kafka_processor:
kafka_processor.heartbeat()
@@ -309,10 +317,6 @@ def main():
ligolw_utils.write_filename(xmldoc, options.output, verbose = options.verbose)
logging.info(f"Done marginalizing likelihoods.")
# we just created the bin-specific and marg DIST_STAT_PDFs,
# so the files definitely exist for the next iteration of the loop
files_exist = True
if kafka_processor:
kafka_processor.heartbeat()
Loading