Commit b034902d authored by Reed Essick's avatar Reed Essick

added executables for ovl training and evaluation from the command line to...

added executables for ovl training and evaluation from the command line to support apples-to-apples comparison with MLA implementations. This included a bit of debugging in idq.py and ovl.py
Original: cb3afdbd9bae1c9356eea3c96a91b2625b8581d9
parent f0bf2c91
......@@ -36,6 +36,8 @@ pybin_scripts = \
laldetchar-idq-gst-relay \
laldetchar-idq-gst-listener \
laldetchar-idq-gdb-local-performance \
laldetchar-ovl-train \
laldetchar-ovl-evaluate \
$(END_OF_LIST)
endif
usage = "laldetchar-ovl-evaluate.py [--options] vetolist patfile"
description = "generate predictions from OVL based on the vetolist and patfile. This is provided to allow for apples-to-apples comparison with MLA implementations"
author = "Reed Essick (reed.essick@ligo.org)"
#-------------------------------------------------
import os
import numpy as np
from laldetchar.idq import idq
from laldetchar.idq import event
from ConfigParser import SafeConfigParser
from optparse import OptionParser
#-------------------------------------------------
parser = OptionParser(usage=usage, description=description)
parser.add_option('-v', '--verbose', default=False, action='store_true')
parser.add_option('-c', '--config', default=None, type='string',
help='the config file used with laldetchar-ovl-train')
parser.add_option('', '--output-filename', default=None, type='string',
help='the name of the output file. If not supplied, a name will be automatically generated \
based on the gps times in patfiles.')
opts, args = parser.parse_args()
assert len(args)>=2, "please supply at least 2 input arguments\n%s"%usage
vetolist = args[0]
patfiles = args[1:]
if not opts.config:
opts.config = raw_input('--config=')
config = SafeConfigParser()
config.read(opts.config)
#-------------------------------------------------
### find maximum vwin used in vetolist
if opts.verbose:
print( "finding maximum veto window used in vetolist" )
win=0.001 ### use this as an absolute mimimum. Will almost certainly be replaced, unless vetolist is empty
file_obj = open(vetolist, 'r')
for line in file_obj:
if line[0]=="#":
pass
else:
win = max(win, float(line.strip().split()[ovl.vD['vwin']]))
file_obj.close()
#---
### find time ranges of interest from patfile
if opts.verbose:
print( "defining segments in which we need KW triggers" )
gps = idq.slim_load_datfiles(patfiles, columns=['GPS_s', 'GPS_ms'])
gps = np.array(gps['GPS_s'], dtype=float) + 1e-6*np.array(gps['GPS_ms'], dtype=float)
Ngps = len(gps)
if Ngps==0:
raise ValueError, 'please supply at least one GPS time within : '+patfile
elif opts.verbose:
print( "found %d times"%Ngps )
segs = event.fixsegments([[t-win, t+win] for t in gps]) ### the segments in which we need KW triggers
#---
### look up KW trg files that intersect segs
if opts.verbose:
print( "finding relevant kw_trgfiles" )
kw_trgfiles = []
### iterate over different configurations used in training
for kwconf, dirname in eval(config.get('general', 'kw')).items(): ### this is kinda ugly...
if opts.verbose:
print( " searching for KW trgfiles corresponding to %s in %s within [%.3f, %.3f]"%(kwconf, dirname, segs[0][0], segs[-1][1]) )
### iterate over all trg files found in that directory
for trgfile in idq.get_all_files_in_range(dirname, segs[0][0], segs[-1][1], pad=0, suffix='.trg'):
### check whether there is some overlap
### not gauranteed if there are gaps between min and max gps times
if event.livetime(event.andsegments([[idq.extract_start_stop(trgfile, suffix='.trg')], segs])):
if opts.verbose:
print( " kept : "+trgfile )
kw_trgfiles.append( trgfile )
elif opts.verbose:
print( " discarded : "+trgfile )
#---
if opts.verbose:
print( "evaluating %d times using %d KW trgfiles"%(Ngps, len(kw_trgfiles) ) )
### set up output pointers
if opts.output_filename:
datfile = os.path.basename(opts.output_filename)
output_dir = os.path.dirname(opts.output_filename)
else:
datfile = os.path.basename(patfile).replace(".pat", ".ovldat")
output_dir = os.path.dirname(patfile)
### actually run the evaluation
idq.ovl_evaluate(
vetolist,
patfiles=patfiles,
kw_trgfiles=kw_trgfiles,
filename=datfile,
output_dir=output_dir,
)
if opts.verbose:
print( "predictions written to : %s/%s"%(output_dir, datfile) )
usage = """ovl-train.py [--options] gps_start gps_stop"""
description = """written to generate an ovl vetolist over the specified range"""
__author__= 'Reed Essick (reed.essick@ligo.org)'
#=================================================
import os
from laldetchar.idq import idq
from laldetchar.idq import ovl
from glue.ligolw import ligolw
from glue.ligolw import utils as ligolw_utils
from glue.ligolw import lsctables
import ConfigParser
import subprocess
from optparse import OptionParser
#=================================================
parser=OptionParser(usage=usage, description=description)
parser.add_option("-v", "--verbose", default=False, action="store_true")
parser.add_option("-c", "--config", default="./config.ini", type="string")
parser.add_option("", "--ignore-science-segments", default=False, action="store_true")
parser.add_option("", "--skip-collect-sngl-chan", default=False, action="store_true", help="if you already have the correct single channel summary files, skip that step here")
parser.add_option("", "--redundancies", default=False, action="store_true")
parser.add_option("", "--safety", default=False, action="store_true")
opts, args = parser.parse_args()
if len(args) != 2:
raise ValueError("please supply exactly 2 arguments")
gpsstart = int(args[0])
gpsstop = int(args[1])
### check config
if not os.path.exists(opts.config):
raise ValueError("--config=%s does not exist"%opts.config)
config = ConfigParser.SafeConfigParser()
config.read(opts.config)
#=================================================
### make output directory
output_dir = "%s/%d_%d/"%(config.get("general","traindir"), gpsstart, gpsstop)
if not os.path.exists(output_dir):
os.makedirs(output_dir)
#========================
### get science segments
if not opts.ignore_science_segments:
### query the database
args = '%s -t %s -q -a %s -s %d -e %d' % (config.get("get_science_segments", "program"),
config.get("get_science_segments", "segdb"),
config.get("get_science_segments", "include"),
gpsstart,
gpsstop)
if opts.verbose:
print "getting science segments"
print "\t", args
segfile = "%s/science_segments-%d-%d.xml"%(output_dir, gpsstart, int(gpsstop-gpsstart))
segfile_obj = open(segfile, "w")
p = subprocess.Popen(args.split(), stdout=segfile_obj)
p.wait()
segfile_obj.close()
### read in segments from xml file
(scisegs, coveredseg) = idq.extract_dq_segments(segfile, config.get('get_science_segments', 'include'))
else:
scisegs = [ [gpsstart, gpsstop] ]
### write segments to ascii list
if opts.verbose:
print "writing segments to ascii list"
sciseg_path = "%s/science_segments-%d-%d.seg"%(output_dir, int(gpsstart), int(gpsstop-gpsstart))
f = open(sciseg_path, 'w')
for line in scisegs:
print >> f, line[0], line[1]
f.close()
#========================
### create single channel summary files
snglchndir = config.get("general","snglchndir")
if opts.skip_collect_sngl_chan:
if opts.verbose:
print "skipping single channel summary file generation. Expecting all data to already exist in : %s"%snglchndir
else:
if opts.verbose:
print "writing single channel summary files into : %s"%snglchndir
if not os.path.exists(snglchndir):
os.makedirs(snglchndir)
new_dirs = []
for kwconfig, kwtrgdir in eval(config.get("general","kw")).items(): ### iterate over all kw directories
if opts.verbose:
print "\tkwconfig : %s\n\tkwtrgdir : %s"%(kwconfig, kwtrgdir)
new_dirs += idq.collect_sngl_chan_kw(gpsstart, gpsstop, kwconfig, source_dir=kwtrgdir, output_dir=snglchndir)
new_dirs = set(new_dirs)
if opts.verbose:
print "created %d new directories :"%len(new_dirs)
for new_dir in new_dirs:
print "\t", new_dir
#=================================================
### launch training job
#========================
### build params object
if opts.verbose:
print "generating params object"
analysis_range = [gpsstart, gpsstop]
### specify ovl pointer
ovlsegs = [sciseg_path]
### load from config object
auxdir = snglchndir
gwdir = snglchndir
gwchans = eval(config.get('general', 'gwchannels'))
gwthr = config.getfloat('general', 'gw_kwsignif_thr')
ifos = [config.get('general', 'ifo')]
gwsets = eval(config.get('ovl_train', 'gwsets'))
safety = config.get('ovl_train', 'safety')
windows = eval(config.get('ovl_train', 'windows'))
thresholds = eval(config.get('ovl_train', 'thresholds'))
Psigthr = config.getfloat('ovl_train', 'Psigthr')
effbydtthr = config.getfloat('ovl_train', 'effbydtthr')
if config.has_option('general', 'selected-channels'):
channels = config.get('general', 'selected-channels')
else:
channels = ''
if config.has_option('general', 'unsafe-channels'):
notused = config.get('general', 'unsafe-channels')
else:
notused = ''
metric = config.get('ovl_train', 'metric')
### parse channels and not used
if channels == '':
channels = False
if notused != '':
notused = [l.strip('\n') for l in open(notused, 'r').readlines() if l.strip("\n")]
else:
notused = []
params = ovl.params(analysis_range,
auxdir,
gwdir,
gwchans,
gwthr,
ifos,
gwsets,
metric=metric,
scisegs=[sciseg_path],
vetosegs=None,
channels=channels,
notused=notused,
windows=windows,
thresholds=thresholds,
Psigthr=Psigthr,
effbydtthr=effbydtthr,
safety=safety)
if opts.safety: ### a safety study
if opts.verbose:
print "launching safety study"
vetolists = ovl.safety(params, output_dir=output_dir, verbose=opts.verbose, write_channels=True)
elif not config.has_option('ovl_train', 'convergent'): # "normal" training
num_runs = config.getint('ovl_train', 'num_runs')
incremental = config.getint('ovl_train', 'incremental')
if opts.verbose:
print "launching \"normal\" training with:\n\t%d runs\n\t%d incremental"%(num_runs, incremental)
### launch training job
vetolists = ovl.train(params, num_runs=num_runs, incremental=incremental, output_dir=output_dir, verbose=opts.verbose, write_channels=True)
else: # "convergent" training
if opts.verbose:
print "launching \"convergent\" training"
vetolists = ovl.convergent_train(params, output_dir=output_dir, verbose=opts.verbose, write_channels=True)
### compute redundancies between channels
if opts.redundancies:
if opts.verbose:
print "launching redundancies job"
ovl.redundancies(params, output_dir=output_dir, verbose=opts.verbose, write_channels=True)
if opts.verbose:
print "Done"
......@@ -2066,7 +2066,7 @@ def build_auxmvc_vectors( trigger_dict, main_channel, time_window, signif_thresh
trigger_dict.include([[gps_start_time, gps_end_time]], channels=[main_channel])
# keep only triggers from the science segments if given........
if science_segments:
if science_segments!=None:
science_segments = event.andsegments([[gps_start_time - time_window, gps_end_time + time_window]], science_segments)
trigger_dict.include(science_segments)
......@@ -2086,7 +2086,7 @@ def build_auxmvc_vectors( trigger_dict, main_channel, time_window, signif_thresh
if clean_samples_rate:
# generate random times for clean samples
if science_segments:
if science_segments!=None:
clean_times = event.randomrate(clean_samples_rate,
event.andsegments([[gps_start_time
+ time_window, gps_end_time - time_window]],
......
[general]
ifo = L1
usertag = er7-safety
kw = {"/gds-l1/dmt/triggers/config/L-ER7_KW_AUX.cfg":"/gds-l1/dmt/triggers/L-KW_TRIGGERS/"}
gwchannels = ["L1_OAF-CAL_DARM_DQ_8_128", "L1_OAF-CAL_DARM_DQ_32_2048", "L1_OAF-CAL_DARM_DQ_1024_4096"]
gw_kwsignif_thr = 35
traindir = /home/reed.essick/ovl-offline/er7/safety/
snglchndir = /home/reed.essick/ovl-offline/er7/safety/sngl_chn/
;full path to the file containing list of auxiliary channels which will be used
;selected-channels = /home/reed.essick/ovl-offline/er7/L1-AUX-ER7_channels.txt
;full path to the file containing list of unsafe auxiliary channels
;unsafe-channels = /home/reed.essick/ovl-offline/er7/L1-AUX-ER7_unsafe-channels.txt
[get_science_segments]
include = L1:DMT-ANALYSIS_READY:1
program = ligolw_segment_query_dqsegdb
segdb = https://dqsegdb5.phy.syr.edu
[ovl_train]
metric = eff/dt
;convergent =
num_runs = 20
incremental = 1000
gwsets = ["kwl1"]
safety = None
windows = [0.025, 0.050, 0.100, 0.150, 0.200]
thresholds = [15, 25, 30, 50, 100, 200, 400, 800, 1600]
Psigthr = 1e-5
effbydtthr = 1.0
......@@ -2539,8 +2539,7 @@ def patfile_to_GPStimes(auxmvc_pat, skip_lines=1):
pat_lines = open(auxmvc_pat).readlines()
pat_lines = pat_lines[skip_lines:]
variables = dict([(line, i) for (i, line) in
enumerate(pat_lines[0].split())])
variables = dict([(line, i) for (i, line) in enumerate(pat_lines[0].split())])
GPStimes = []
if variables.has_key('i'):
......@@ -2548,13 +2547,21 @@ def patfile_to_GPStimes(auxmvc_pat, skip_lines=1):
line = line.strip().split()
GPStimes.append([float(line[variables['GPS_s']])
+ float(line[variables['GPS_ms']]) * 1e-3,
int(line[variables['i']])])
int(int(line[variables['i']]))])
elif variables.has_key('unclean'):
for line in pat_lines[1:]:
line = line.strip().split()
GPStimes.append([float(line[variables['GPS_s']])
+ float(line[variables['GPS_ms']]) * 1e-3,
int(float(line[variables['unclean']]))])
else:
for line in pat_lines[1:]:
line = line.strip().split()
GPStimes.append([float(line[variables['GPS_s']])
+ float(line[variables['GPS_ms']]) * 1e-3,
int(line[-1])]) # glitch/clean stored in line[-1]
int(float(line[-1]))]) # class stored in the last column. This may be buggy...
return GPStimes
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment