Commit 43682b25 authored by channa's avatar channa
Browse files

rio.py: a module to load a read-only ligolw sqlite database in gstlal inspiral...

rio.py: a module to load a read-only ligolw sqlite database in gstlal inspiral format into a dictionary with some convenient methods to iterate over common data representations
parent d03f5db0
Pipeline #212701 passed with stages
in 32 minutes and 47 seconds
......@@ -20,6 +20,7 @@ pkgpython_PYTHON = \
lloidparts.py \
lvalert_helper.py \
rate_estimation.py \
rio.py \
snglinspiraltable.py \
spawaveform.py \
streamthinca.py \
......
#!/usr/bin/env python
import sqlite3
import sys
from collections import UserDict
from ligo.segments import segment
from ligo.lw.utils import segments as lwseg
from ligo.lw import dbtables
from ligo.lw import lsctables
class load(UserDict):
def __init__(self, fname, verbose = False, data_segments_name = "datasegments", vetoes_name = "vetoes"):
"""
Example:
>>> from gstlal import rio;
>>> D = rio.load('H1L1V1-ALL_LLOID_split_injections-1256655642-732465.sqlite')
# iterate over zero-lag events
>>> for event in D.zl_events():
... print (event['coinc_event']['likelihood'], event['coinc_inspiral']['combined_far']); break
...
-8.360537003030897 0.06110694885193467
# iterate over found injections
>>> for inj in D.found_injections():
... print (inj['sim_inspiral']['mass1'], inj['coinc_event']['likelihood'], inj['coinc_inspiral']['combined_far']); break
...
35.35828 -23.43134661622628 0.06110694885193467
# iterate over missed injections
>>> for inj in D.missed_injections():
... print (inj['sim_inspiral']['mass1']); break
...
1.283698
# iterate over injections not done (not in analyzed time)
>>> for inj in D.not_analyzed_injections():
... print (inj['sim_inspiral']['mass1']); break
...
2.621253
"""
UserDict.__init__(self)
self.fname = fname
self.verbose = verbose
self.con = sqlite3.connect(fname)
self.con.row_factory = sqlite3.Row
self.tables = {}
self.__load_tables([ ('coinc_definer', ('coinc_def_id',)),
('coinc_inspiral', ('coinc_event_id',)),
('segment', ('segment_def_id',)),
('sngl_inspiral', ('event_id',)),
('coinc_event', ('coinc_event_id',)),
('process', ('process_id',)),
('segment_definer', ('segment_def_id',)),
('time_slide', ('time_slide_id',)),
('coinc_event_map', ('coinc_event_id',)),
('process_params', ('process_id',)),
('segment_summary', ('segment_def_id',)),
('sim_inspiral', ('simulation_id',))
])
xmldoc = dbtables.get_xml(self.con)
self.segs = lwseg.segmenttable_get_by_name(xmldoc, data_segments_name).coalesce() - lwseg.segmenttable_get_by_name(xmldoc, vetoes_name).coalesce()
xmldoc.unlink()
self.d = self.__to_dict()
self.con.close()
self.__process_injections()
def __iter__(self):
return iter(self.d)
def keys(self):
return self.d.keys()
def items(self):
return self.d.items()
def values(self):
return self.d.values()
def __getitem__(self, key):
return self.d[key]
def __setitem__(self, key, item):
self.d[key] = item
def has_key(self, k):
return k in self.d
def __cmp__(self, dict_):
return self.__cmp__(self.d, dict_)
def __contains__(self, item):
return item in self.d
def __load_tables(self, names):
for name, keys in names:
try:
print ('loading ... %s' % name, end = ' ')
res = self.con.cursor().execute('SELECT * FROM %s' % name)
out = {}
for row in res.fetchall():
rd = dict(row)
for key in keys:
out.setdefault(row[key], []).append(rd)
self.tables[name] = out
print('... %d' % len(self.tables[name]))
except sqlite3.OperationalError:
print('... NOT FOUND!')
def __process_injections(self):
self.__not_analyzed_injections = set([])
self.__analyzed_ids = set([])
if "sim_inspiral" in self.tables:
for sid, row in self.itertable('sim_inspiral'):
t = lsctables.LIGOTimeGPS(row[0]['geocent_end_time'], row[0]['geocent_end_time_ns'])
seg = segment([t, t])
if self.segs.intersects_segment(seg):
self.__analyzed_ids.add(sid)
else:
self.__not_analyzed_injections.add(sid)
self.__found_injections = {}
for cid, event in tuple(self['sim_inspiral<-->coinc_event coincidences (exact)'].values())[0].items():
self.__found_injections.setdefault(event['sim_inspiral'][0]['simulation_id'], []).append(event)
self.__missed_injections = self.__analyzed_ids - set(self.__found_injections)
def missed_injections(self):
for sim in self.__missed_injections:
yield {'sim_inspiral': self.tables['sim_inspiral'][sim][0]}
def not_analyzed_injections(self):
for sim in self.__not_analyzed_injections:
yield {'sim_inspiral': self.tables['sim_inspiral'][sim][0]}
def found_injections(self):
for inj in self.__found_injections.values():
out = {}
assert (len(inj) == 1)
out['sim_inspiral'] = inj[0]['sim_inspiral'][0]
assert (len(inj[0]['coinc_event']) == 1)
out['coinc_event'] = inj[0]['coinc_event'][0].copy()
out['sngl_inspiral'] = out['coinc_event']['sngl_inspiral']
del out['coinc_event']['sngl_inspiral']
out['coinc_inspiral'] = self.tables['coinc_inspiral'][out['coinc_event']['coinc_event_id']][0]
yield out
def zl_events(self):
for k in self['sngl_inspiral<-->sngl_inspiral coincidences']:
if not any([x[1] for x in k]):
for eid, event in self['sngl_inspiral<-->sngl_inspiral coincidences'][k].items():
out = {}
out['coinc_event'] = event.copy()
out['sngl_inspiral'] = out['coinc_event']['sngl_inspiral']
del out['coinc_event']['sngl_inspiral']
out['coinc_inspiral'] = self.tables['coinc_inspiral'][eid][0]
yield out
def itertable(self, table):
for tid, row in self.tables[table].items():
yield tid, row
def __to_dict(self):
print ('computing time slides ...')
tids = {}
for tid, rows in self.itertable('time_slide'):
for row in rows:
tids.setdefault(tid, []).append((row['instrument'], row['offset']))
tids = {tuple(sorted(v)):k for k,v in tids.items()}
# reconstruct coincs [0] is okay because there will only be one
coinc_types = {r[0]['description']: {} for r in self.tables['coinc_definer'].values()}
defids = {r[0]['description']: r[0]['coinc_def_id'] for r in self.tables['coinc_definer'].values()}
for description, coinc_type in coinc_types.items():
for offsets, tid in tids.items():
print ('getting coinc events for %s and time slide %s ...' % (description, offsets))
# get all the coinc events for this timeslide
# we can use v[0] because there will be one row per coinc_event_id in this case
coinc_type[offsets] = {k:v[0] for k,v in self.tables['coinc_event'].items() if v[0]['time_slide_id'] == tid and v[0]['coinc_def_id'] == defids[description]}
# get the coinc event maps
for cid, coinc_event in coinc_type[offsets].items():
for row in self.tables['coinc_event_map'][cid]:
# [0] should be okay here
coinc_event.setdefault(row['table_name'], []).append(self.tables[row['table_name']][row['event_id']][0])
return coinc_types
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment