Commit 20bb87a9 authored by Tanner Prestegard's avatar Tanner Prestegard Committed by GraceDB

Populate database columns for existing VOEvents

Data migration to populate new database columns for existing
VOEvents. We have to read the files in and try to extract the
parameter data, handling all of the changes to the file format
which have occurred over time.

I did a lot of testing by copying over the production database
and associated VOEvent files for all non-Test, non-MDC events
and superevents to make sure we are accounting for all changes
in VOEvent (file) schema that have happened over time.
parent 619080fa
......@@ -9,7 +9,7 @@ from django.db import migrations, models
class Migration(migrations.Migration):
dependencies = [
('events', '0033_pipelinelog_and_pipeline_enabled'),
('events', '0034_add_subgrb_search'),
]
operations = [
......
# -*- coding: utf-8 -*-
# Generated by Django 1.11.20 on 2019-06-10 16:58
from __future__ import unicode_literals
from cStringIO import StringIO
from hashlib import sha1
import os
from lxml import etree
from django.conf import settings
from django.db import migrations
# NOTE: there are a few additional mappings here compared to the ones in
# superevents.0004 since the superevent VOEvents are relatively new and
# the schema hasn't changed much. But we have to account for a few different
# schemas since event VOEvents have been around for a while.
PARAMETER_MAPPINGS = {
'prob_has_ns': [
"./What/Group[@type='Properties']/Param[@name='HasNS']",
"./What/Param[@name='ProbHasNS']",
],
'prob_has_remnant': [
"./What/Group[@type='Properties']/Param[@name='HasRemnant']",
"./What/Param[@name='ProbHasRemnant']",
],
'prob_bns': [
"./What/Group[@type='Classification']/Param[@name='BNS']",
],
'prob_nsbh': [
"./What/Group[@type='Classification']/Param[@name='NSBH']",
],
'prob_bbh': [
"./What/Group[@type='Classification']/Param[@name='BBH']",
],
'prob_mass_gap': [
"./What/Group[@type='Classification']/Param[@name='MassGap']",
],
'prob_terrestrial': [
"./What/Group[@type='Classification']/Param[@name='Terrestrial']",
],
'hardware_inj': [
"./What/Param[@name='HardwareInj']",
],
'internal': [
"./What/Param[@name='internal']",
],
'open_alert': [
"./What/Param[@name='OpenAlert']",
],
'skymap_type': [
"./What/Group[@type='GW_SKYMAP']",
],
'skymap_filename': [
"./What/Group[@type='GW_SKYMAP']/Param[@name='skymap_fits']",
"./What/Group[@type='GW_SKYMAP']/Param[@name='skymap_fits_shib']",
],
}
DESCRIPTION_PARAMETER_MAPPINGS = {
'coinc_comment': ['counterpart GRB']
}
# Have to write a custom method to get datadir paths
# since that method isn't available in migrations
def get_datadir(event_or_superevent):
hash_input = str(event_or_superevent.id)
if event_or_superevent.__class__.__name__.lower() == 'superevent':
hash_input = 'superevent' + hash_input
hdf = StringIO(sha1(hash_input).hexdigest())
# Build up the nodes of the directory structure
nodes = [hdf.read(i) for i in settings.GRACEDB_DIR_DIGITS]
# Read whatever is left over. This is the 'leaf' directory.
nodes.append(hdf.read())
return os.path.join(settings.GRACEDB_DATA_DIR, *nodes)
# Populate VOEvent parameter data from files
def populate_values(voevent, event_or_superevent):
# Load file
voevent_file_path = os.path.join(get_datadir(event_or_superevent),
voevent.filename)
with open(voevent_file_path, 'r') as f:
voevent_file = f.read()
# Convert to etree
root = etree.fromstring(voevent_file)
# Parse parameters
for parameter in PARAMETER_MAPPINGS:
for path in PARAMETER_MAPPINGS[parameter]:
result = None
try:
result = root.find(path)
except SyntaxError as e:
pass
if result is None:
# not found, likely due to an old VOEvent schema
continue
# Special parameter parsing
if parameter in ['hardware_inj', 'internal', 'open_alert']:
value = bool(int(result.attrib['value']))
elif parameter == 'skymap_filename':
value = result.attrib['value'].split('/')[-1]
elif parameter == 'skymap_type':
value = result.attrib['name']
else:
value = float(result.attrib['value'])
# Set VOEvent instance value
setattr(voevent, parameter, value)
# Parse description parameters
description_params = root.findall('./How/Description')
desc_text = [dp.text for dp in description_params]
for parameter, str_list in DESCRIPTION_PARAMETER_MAPPINGS.items():
for s in str_list:
if any([s in dp for dp in desc_text]):
setattr(voevent, parameter, True)
else:
setattr(voevent, parameter, False)
# Save VOEvent
voevent.save()
def populate_data(apps, schema_editor):
EventVOEvent = apps.get_model('events', 'VOEvent')
for v in EventVOEvent.objects.all():
populate_values(v, v.event)
class Migration(migrations.Migration):
dependencies = [
('events', '0035_add_voevent_fields'),
]
operations = [
migrations.RunPython(populate_data, migrations.RunPython.noop),
]
# -*- coding: utf-8 -*-
# Generated by Django 1.11.20 on 2019-06-10 16:58
from __future__ import unicode_literals
from cStringIO import StringIO
from hashlib import sha1
import os
from lxml import etree
from django.conf import settings
from django.db import migrations
PARAMETER_MAPPINGS = {
'prob_has_ns': [
"./What/Group[@type='Properties']/Param[@name='HasNS']",
"./What/Param[@name='ProbHasNS']",
],
'prob_has_remnant': [
"./What/Group[@type='Properties']/Param[@name='HasRemnant']",
"./What/Param[@name='ProbHasRemnant']",
],
'prob_bns': [
"./What/Group[@type='Classification']/Param[@name='BNS']",
],
'prob_nsbh': [
"./What/Group[@type='Classification']/Param[@name='NSBH']",
],
'prob_bbh': [
"./What/Group[@type='Classification']/Param[@name='BBH']",
],
'prob_mass_gap': [
"./What/Group[@type='Classification']/Param[@name='MassGap']",
],
'prob_terrestrial': [
"./What/Group[@type='Classification']/Param[@name='Terrestrial']",
],
'hardware_inj': [
"./What/Param[@name='HardwareInj']",
],
'internal': [
"./What/Param[@name='internal']",
],
'open_alert': [
"./What/Param[@name='OpenAlert']",
],
'skymap_type': [
"./What/Group[@type='GW_SKYMAP']",
],
'skymap_filename': [
"./What/Group[@type='GW_SKYMAP']/Param[@name='skymap_fits']",
],
}
DESCRIPTION_PARAMETER_MAPPINGS = {
'coinc_comment': ['counterpart GRB']
}
# Have to write a custom method to get datadir paths
# since that method isn't available in migrations
def get_datadir(event_or_superevent):
hash_input = str(event_or_superevent.id)
if event_or_superevent.__class__.__name__.lower() == 'superevent':
hash_input = 'superevent' + hash_input
hdf = StringIO(sha1(hash_input).hexdigest())
# Build up the nodes of the directory structure
nodes = [hdf.read(i) for i in settings.GRACEDB_DIR_DIGITS]
# Read whatever is left over. This is the 'leaf' directory.
nodes.append(hdf.read())
return os.path.join(settings.GRACEDB_DATA_DIR, *nodes)
# Populate VOEvent parameter data from files
def populate_values(voevent, event_or_superevent):
# Load file
voevent_file_path = os.path.join(get_datadir(event_or_superevent),
voevent.filename)
with open(voevent_file_path, 'r') as f:
voevent_file = f.read()
# Convert to etree
root = etree.fromstring(voevent_file)
# Parse parameters
for parameter in PARAMETER_MAPPINGS:
for path in PARAMETER_MAPPINGS[parameter]:
result = None
try:
result = root.find(path)
except SyntaxError as e:
pass
if result is None:
continue
# Special parameter parsing
if parameter in ['hardware_inj', 'internal', 'open_alert']:
value = bool(int(result.attrib['value']))
elif parameter == 'skymap_filename':
value = result.attrib['value'].split('/')[-1]
elif parameter == 'skymap_type':
value = result.attrib['name']
else:
value = float(result.attrib['value'])
# Set VOEvent instance value
setattr(voevent, parameter, value)
# Parse description parameters
description_params = root.findall('./How/Description')
desc_text = [dp.text for dp in description_params]
for parameter, str_list in DESCRIPTION_PARAMETER_MAPPINGS.items():
for s in str_list:
if any([s in dp for dp in desc_text]):
setattr(voevent, parameter, True)
else:
setattr(voevent, parameter, False)
# Save VOEvent
voevent.save()
def populate_data(apps, schema_editor):
SupereventVOEvent = apps.get_model('superevents', 'VOEvent')
for v in SupereventVOEvent.objects.all():
populate_values(v, v.superevent)
class Migration(migrations.Migration):
dependencies = [
('superevents', '0003_add_voevent_fields'),
]
operations = [
migrations.RunPython(populate_data, migrations.RunPython.noop),
]
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment