Commit 8aacd61b authored by Kipp Cannon's avatar Kipp Cannon

remove glue-ligolw-tools subpackage

- the transition to python-ligo-lw is now complete, with that package being
  in production and installed LDG-wide

- this patch removes ligolw_add, ligolw_cut, ligolw_print, ligolw_sqlite from
  bin/ in the glue source tree, and eliminates the "glue-ligolw-tools"
  sub-package from the packaging scripts

- test scripts used for verifying the four tools are also removed
parent 08efab39
#!/usr/bin/python
#
# Copyright (C) 2006 Kipp Cannon
#
# This program is free software; you can redistribute it and/or modify it
# under the terms of the GNU General Public License as published by the
# Free Software Foundation; either version 3 of the License, or (at your
# option) any later version.
#
# This program is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
# Public License for more details.
#
# You should have received a copy of the GNU General Public License along
# with this program; if not, write to the Free Software Foundation, Inc.,
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
#
# =============================================================================
#
# Preamble
#
# =============================================================================
#
"""
Add (merge) LIGO LW XML files containing LSC tables.
"""
from optparse import OptionParser
import os
import sys
from glue import git_version
from glue.lal import CacheEntry
from glue.ligolw import ligolw
from glue.ligolw import array as ligolw_array
from glue.ligolw import lsctables
from glue.ligolw import utils as ligolw_utils
from glue.ligolw.utils import ligolw_add
__author__ = "Kipp Cannon <kipp.cannon@ligo.org>"
__version__ = "git id %s" % git_version.id
__date__ = git_version.date
#
# =============================================================================
#
# Command Line
#
# =============================================================================
#
def parse_command_line():
"""
Parse the command line, return an options object and a list of URLs.
"""
parser = OptionParser(
version = "Name: %%prog\n%s" % git_version.verbose_msg,
usage = "%prog [options] [url ...]",
description = "Combines one or more LIGO Light Weight XML files into a single output file. The output is written to stdout or to the filename specified by --output. In addition to regular files, many common URL types can be read such as http:// and ftp://. Input documents that are gzip-compressed are automatically detected and decompressed. If the output file's name ends in \".gz\", the output document will be gzip-compressed. Table elements contained in the document will be merged so that there is not more than one table of any given name in the output. To accomplish this, any tables in the input documents that share the same name must have compatible columns, meaning the same column names with matching types (but not necessarily in the same order)."
)
parser.add_option("--ilwdchar-compat", action = "store_true", help = "Ignored. Included for command-line compatibility with newer versions of this tool. This version always requires ilwd:char row IDs.")
parser.add_option("-i", "--input-cache", metavar = "filename", action = "append", default = [], help = "Get input files from the LAL cache named filename.")
parser.add_option("--add-lfn-table", action = "store_true", help = "Add an lfn entry for each process.")
parser.add_option("--lfn-start-time", metavar = "GPS seconds", help = "Set lfn start_time (optional).")
parser.add_option("--lfn-end-time", metavar = "GPS seconds", help = "Set lfn end_time (optional).")
parser.add_option("--lfn-comment", metavar = "string", help = "Set lfn comment (optional).")
parser.add_option("--non-lsc-tables-ok", action = "store_true", help = "OK to merge documents containing non-LSC tables.")
parser.add_option("-o", "--output", metavar = "filename", help = "Write output to filename (default = stdout).")
parser.add_option("-v", "--verbose", action = "store_true", help = "Be verbose.")
parser.add_option("--remove-input", action = "store_true", help = "Remove input files after writing output (an attempt is made to not delete the output file in the event that it overwrote one of the input files).")
parser.add_option("--remove-input-except", metavar = "filename", action = "append", default = [], help = "When deleting input files, do not delete this file.")
options, urls = parser.parse_args()
if options.lfn_start_time:
options.lfn_start_time = int(options.lfn_start_time)
if options.lfn_end_time:
options.lfn_end_time = int(options.lfn_end_time)
urls += [CacheEntry(line).url for cache in options.input_cache for line in open(cache)]
if len(urls) < 1:
raise ValueError("no input files!")
return options, urls
#
# =============================================================================
#
# LFN Table
#
# =============================================================================
#
def update_lfn_table(xmldoc, pathname, start_time = None, end_time = None, comment = None):
"""
Update the LFN table in the document, adding it if needed.
"""
# determine the columns required by the user
cols = ["process_id", "lfn_id", "name"]
if start_time is not None:
cols.append("start_time")
if end_time is not None:
cols.append("end_time")
if comment is not None:
cols.append("comment")
# look for an existing LFN table. while at it, sync the next_id
# attribute and collect the process IDs that already have entries
# in the tables
lfn_table = None
existing_pids = set()
for lfn_table in lsctables.LfnTable.getTablesByName(xmldoc, lsctables.LfnTable.tableName):
lfn_table.sync_next_id()
existing_pids.update(lfn_table.getColumnByName("process_id"))
if lfn_table is None:
# didn't find an LFN table, add one
lfn_table = lsctables.New(lsctables.LfnTable, cols)
xmldoc.getElementsByTagName(ligolw.LIGO_LW.tagName)[0].appendChild(lfn_table)
else:
# already has an LFN table, are the columns OK?
if set(cols) != set(lfn_table.columnnames):
raise Exception("document contains an LFN table with columns %s, but columns %s are required" % (", ".join(lfn_table.columnnames), ", ".join(cols)))
pathname = os.path.basename(pathname)
# add a row to the LFN table for every process in every process
# table
for process_table in lsctables.ProcessTable.getTablesByName(xmldoc, lsctables.ProcessTable.tableName):
for pid in process_table.getColumnByName("process_id"):
if pid in existing_pids:
# LFN table already has an entry for this
# one, ignore
continue
row = lsctables.Lfn()
row.process_id = pid
row.lfn_id = lfn_table.get_next_id()
row.name = pathname
row.start_time = start_time
row.end_time = end_time
row.comment = comment
lfn_table.append(row)
return lfn_table
#
# =============================================================================
#
# Main
#
# =============================================================================
#
#
# Command line
#
options, urls = parse_command_line()
#
# Input
#
class ContentHandler(ligolw.LIGOLWContentHandler):
pass
ligolw_array.use_in(ContentHandler)
lsctables.use_in(ContentHandler)
lsctables.table.TableStream.RowBuilder = lsctables.table.InterningRowBuilder
xmldoc = ligolw_add.ligolw_add(ligolw.Document(), urls, non_lsc_tables_ok = options.non_lsc_tables_ok, verbose = options.verbose, contenthandler = ContentHandler)
#
# LFN table
#
if options.add_lfn_table:
if not options.output:
raise Exception("cannot add LFN table when no output filename is given")
update_lfn_table(xmldoc, options.output, options.lfn_start_time, options.lfn_end_time, options.lfn_comment)
#
# Output
#
ligolw_utils.write_filename(xmldoc, options.output, verbose = options.verbose, gz = (options.output or "stdout").endswith(".gz"))
#
# Remove input
#
if options.remove_input:
ligolw_add.remove_input(urls, [options.output] + options.remove_input_except, options.verbose)
#!/usr/bin/python
#
# Copyright (C) 2006 Kipp Cannon
#
# This program is free software; you can redistribute it and/or modify it
# under the terms of the GNU General Public License as published by the
# Free Software Foundation; either version 3 of the License, or (at your
# option) any later version.
#
# This program is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
# Public License for more details.
#
# You should have received a copy of the GNU General Public License along
# with this program; if not, write to the Free Software Foundation, Inc.,
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
#
# =============================================================================
#
# Preamble
#
# =============================================================================
#
"""
Cut pieces out of LIGO LW XML files containing LSC tables.
"""
from __future__ import print_function
from optparse import OptionParser
import sys
from glue import git_version
from glue.ligolw import ligolw
from glue.ligolw import table
from glue.ligolw import utils
__author__ = "Kipp Cannon <kipp.cannon@ligo.org>"
__version__ = "git id %s" % git_version.id
__date__ = git_version.date
#
# =============================================================================
#
# Command Line
#
# =============================================================================
#
def parse_command_line():
parser = OptionParser(
version = "Name: %%prog\n%s" % git_version.verbose_msg,
usage = "%prog [options] [file ...]",
description = "%prog removes XML elements from a LIGO Light Weight XML file. If file names are given on the command line, those files are read, processed, and rewritten one at a time, otherwise input is read from stdin and output written to stdout. Gzipped files are automatically detected on input, if the file's name ends in \".gz\" it will be gzip-compressed when written."
)
parser.add_option("--ilwdchar-compat", action = "store_true", help = "Ignored. Included for command-line compatibility with newer versions of this tool. This version always requires ilwd:char row IDs.")
parser.add_option("-v", "--verbose", action = "store_true", help = "Be verbose.")
parser.add_option("--delete-column", metavar = "name", action = "append", default = [], help = "Delete matching columns. Example \"--delete-column sim_burst:dtminus\".")
parser.add_option("--delete-element", metavar = "tag[,attr=value[,...]]", action = "append", default = [], help = "Delete matching elements. Example \"--delete-element Table,Name=search_summary\".")
parser.add_option("--delete-table", metavar = "name", action = "append", default = [], help = "Delete matching tables. Example \"--delete-table search_summary\".")
options, filenames = parser.parse_args()
# strip column names
options.delete_column = set(map(table.Column.ColumnName, options.delete_column))
# parse element specs
def parse_delete_element(arg):
arg = arg.split(",")
return arg[0], tuple(a.split("=") for a in arg[1:])
options.delete_element = set(map(parse_delete_element, options.delete_element))
# strip table names
options.delete_table = set(map(table.Table.TableName, options.delete_table))
return options, (filenames or [None])
#
# =============================================================================
#
# Input
#
# =============================================================================
#
class ElementFilter(object):
"""
Class implementing any cuts that can be performed during document
parsing.
"""
def __init__(self, delete_tables):
self.delete_tables = delete_tables
def element_filter(self, name, attrs):
# check for unwanted tables
return name != ligolw.Table.tagName or table.Table.TableName(attrs["Name"]) not in self.delete_tables
class ContentHandler(ligolw.FilteringLIGOLWContentHandler):
def __init__(self, xmldoc):
super(ContentHandler, self).__init__(xmldoc, ElementFilter(options.delete_table).element_filter)
table.use_in(ContentHandler)
#
# Enable attribute interning
#
table.TableStream.RowBuilder = table.InterningRowBuilder
#
# =============================================================================
#
# Cut
#
# =============================================================================
#
#
# Remove unwanted columns
#
def RemoveColumns(doc, columns):
for table_elem in doc.getElementsByTagName(ligolw.Table.tagName):
for name in columns:
for column in table.Column.getColumnsByName(table_elem, name):
table_elem.removeChild(column)
#
# Remove unwanted elements
#
def CompareDeleteElement(elem, name, attrvalues):
"""
Return 1 for !=, 0 for ==.
"""
if elem.tagName != name:
return 1
for attr, value in attrvalues:
try:
if elem.getAttribute(attr) != value:
return 1
except KeyError:
return 1
return 0
def IsDeleteElement(elem, specs):
"""
Return True if elem matches an elem spec for deleting.
"""
for name, attrvalues in specs:
if CompareDeleteElement(elem, name, attrvalues) == 0:
return True
return False
def RemoveElements(doc, specs):
for elem in doc.getElements(lambda e: IsDeleteElement(e, specs)):
elem.parentNode.removeChild(elem)
#
# =============================================================================
#
# Main
#
# =============================================================================
#
options, filenames = parse_command_line()
for n, filename in enumerate(filenames):
if options.verbose:
print("%d/%d:" % (n + 1, len(filenames)), end=' ', file=sys.stderr)
doc = utils.load_filename(filename, verbose = options.verbose, contenthandler = ContentHandler)
table.InterningRowBuilder.strings.clear()
if options.verbose:
print("processing", file=sys.stderr)
if options.delete_column:
RemoveColumns(doc, options.delete_column)
if options.delete_element:
RemoveElements(doc, options.delete_element)
utils.write_filename(doc, filename, verbose = options.verbose, gz = (filename or "stdout").endswith(".gz"))
doc.unlink()
#!/usr/bin/python
#
# Copyright (C) 2006 Kipp Cannon
#
# This program is free software; you can redistribute it and/or modify it
# under the terms of the GNU General Public License as published by the
# Free Software Foundation; either version 3 of the License, or (at your
# option) any later version.
#
# This program is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
# Public License for more details.
#
# You should have received a copy of the GNU General Public License along
# with this program; if not, write to the Free Software Foundation, Inc.,
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
#
# =============================================================================
#
# Preamble
#
# =============================================================================
#
"""
Print things from LIGO LW XML files. Inspired by lwtprint from LIGOTools.
"""
from __future__ import print_function
from optparse import OptionParser
from glue import git_version
from glue import segmentsUtils
from glue.lal import CacheEntry
from glue.ligolw import ligolw
from glue.ligolw import table
from glue.ligolw import lsctables
from glue.ligolw import array
from glue.ligolw import utils
from glue import git_version
import six
__author__ = "Kipp Cannon <kipp.cannon@ligo.org>"
__version__ = "git id %s" % git_version.id
__date__ = git_version.date
#
# =============================================================================
#
# Command Line
#
# =============================================================================
#
def parse_command_line():
parser = OptionParser(
version = "Name: %%prog\n%s" % git_version.verbose_msg,
usage = "%prog [options] [url ...]",
description = "Prints the contents of table elements from one or more LIGO Light Weight XML files to stdout in delimited ASCII format. In addition to regular files, the program can read from many common URLs such as http:// and ftp://. Gzipped files will be automatically detected and decompressed. If no filenames or URLs are given, then input is read from stdin."
)
parser.add_option("--ilwdchar-compat", action = "store_true", help = "Ignored. Included for command-line compatibility with newer versions of this tool. This version always requires ilwd:char row IDs.")
parser.add_option("-i", "--input-cache", metavar = "name", action = "append", default = [], help = "Get URLs from the LAL cache file. Can be provided multiple times to name several caches to iterate over.")
parser.add_option("-c", "--column", metavar = "name", action = "append", help = "Print only the contents of the given column. Can be provided multiple times to print multiple columns. The default is to print all columns from each table.")
parser.add_option("-d", "--delimiter", metavar = "string", default = ",", help = "Delimit output with the given string. The default is \",\".")
parser.add_option("-r", "--rows", metavar = "rowspec", default = ":", help = "Print rows in the given range(s). The format is first:last[,first:last,...]. Rows are numbered from 0. A single first:last pair requests rows in the range [first, last). If the first or last value of a pair is omited it means 0 or infinity respectively. The default is \":\", or to print all rows.")
parser.add_option("-t", "--table", metavar = "name", action = "append", default = [], help = "Print rows from this table. Can be provided multiple times to print rows from multiple tables. The default is to print the contents of all tables.")
parser.add_option("-a", "--array", metavar = "name", action = "append", default = [], help = "Print the contents of this array. Can be provided multiple times to print the elements from multiple arrays. The default is to print the contents of all arrays.")
parser.add_option("-v", "--verbose", action = "store_true", help = "Be verbose.")
parser.add_option("--constrain-lsc-tables", action = "store_true", help = "Impose additional constraints on official LSC tables. Provides format validation and allows RAM requirements to be reduced.")
options, urls = parser.parse_args()
# Ensure the delimiter is unicode compliant as well
options.delimiter = six.text_type(options.delimiter)
# add urls from cache files
urls += [CacheEntry(line).url for cache in options.input_cache for line in open(cache)]
# strip table names
if options.table:
options.table = list(map(table.Table.TableName, options.table))
if options.array:
options.array = list(map(array.Array.ArrayName, options.array))
# turn row requests into a segment list
try:
options.rows = segmentsUtils.from_range_strings(options.rows.split(","))
except ValueError as e:
raise ValueError("invalid rowspec: %s" % str(e))
# success
return options, (urls or [None])
#
# =============================================================================
#
# Input
#
# =============================================================================
#
#
# Enable column interning
#
table.TableStream.RowBuilder = table.InterningRowBuilder
#
# =============================================================================
#
# How to find things to print
#
# =============================================================================
#
#
# How to print a table
#
def print_table(table_elem, columns, rows):
if not columns:
columns = table_elem.columnnames
for n, row in enumerate(table_elem):
if n in rows:
print(options.delimiter.join([six.text_type(getattr(row, key)) for key in columns]))
#
# How to print an array
#
def print_array(array_elem):
a = array_elem.array
if len(a.shape) == 1:
# a one-dimensional array
for row in a:
print(repr(row))
elif len(a.shape) == 2:
for row in a.transpose()[:]:
print(options.delimiter.join(map(repr, row)))
else:
# a three or more dimensional array
raise ValueError("array has more than 2 dimensions")
#
# =============================================================================
#
# Main
#
# =============================================================================
#
options, urls = parse_command_line()
#
# Enable appropriate level of table parsing. If specific table names have
# been asked for, don't parse other tables so as to improve parsing speed
# and reduce memory requirements. Because we do this, we can assume later
# that we should print all the tables that can be found in the document.
#
if not (options.table or options.array):
# parse the entire document
ContentHandler = ligolw.LIGOLWContentHandler
else:
class ContentHandler(ligolw.PartialLIGOLWContentHandler):
def __init__(self, xmldoc):
super(ContentHandler, self).__init__(xmldoc, lambda name, attrs: (name in (ligolw.Table.tagName, ligolw.Array.tagName)) and (table.Table.TableName(attrs["Name"]) in options.table or array.Array.ArrayName(attrs["Name"]) in options.array))
array.use_in(ContentHandler)
if options.constrain_lsc_tables:
lsctables.use_in(ContentHandler)
else:
table.use_in(ContentHandler)
#
# If specific columns have been requested, don't load any others.
#
if options.column is not None:
table.Table.loadcolumns = set(options.column)
#
# Loop over input URLs
#
for url in urls:
xmldoc = utils.load_url(url, verbose = options.verbose, contenthandler = ContentHandler)
table.InterningRowBuilder.strings.clear()
for elem in ligolw.WalkChildren(xmldoc):
if elem.tagName == ligolw.Table.tagName:
print_table(elem, options.column, options.rows)
elif elem.tagName == ligolw.Array.tagName:
print_array(elem)
xmldoc.unlink()
#!/usr/bin/python
#
# Copyright (C) 2007-2011 Kipp Cannon
#
# This program is free software; you can redistribute it and/or modify it
# under the terms of the GNU General Public License as published by the
# Free Software Foundation; either version 3 of the License, or (at your
# option) any later version.
#
# This program is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
# Public License for more details.
#
# You should have received a copy of the GNU General Public License along
# with this program; if not, write to the Free Software Foundation, Inc.,
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
#
# =============================================================================
#
# Preamble
#
# =============================================================================
#
"""
Transfer table data between LIGO Light Weight XML files and SQLite
databases.
"""
from __future__ import print_function
from optparse import OptionParser
import os
import sqlite3
import sys
from glue import git_version
from glue.lal import CacheEntry
from glue.ligolw import ligolw
from glue.ligolw import dbtables
from glue.ligolw.utils import local_path_from_url
from glue.ligolw.utils import ligolw_sqlite
import six
# so they can be inserted into a database
dbtables.ligolwtypes.ToPyType["ilwd:char"] = six.text_type
__author__ = "Kipp Cannon <kipp.cannon@ligo.org>"
__version__ = "git id %s" % git_version.id
__date__ = git_version.date
#
# =============================================================================
#
# Command Line
#
# =============================================================================
#
def parse_command_line():
"""
Parse the command line, return an options object and a list of file
names.
"""
parser = OptionParser(
version = "Name: %%prog\n%s" % git_version.verbose_msg,
usage = "%prog -d|--database filename [options] [url ...]",
description = "Transfers data between LIGO Light Weight XML files and SQLite database files. The contents of the XML documents listed on the command line will be inserted, in order, into the SQLite database file identified by the --database argument. If the database exists the contents of the XML documents will be added to it, otherwise a new database file is created. If --extract is given on the command line, then the database contents will be converted to XML and written to the file named by this argument. The input XML documents can be regular files or many common URLs such as ftp:// and http://. If no input documents are named then input is read from stdin unless --extract is given in which case the datase contents are extracted to XML without reading any input documents. Input XML documents ending in .gz will be gzip-decompressed while being read, and if the file named by --extract ends in .gz then it will be gzip-compressed when written."
)
parser.add_option("-d", "--database", metavar = "filename", help = "Set the name of the SQLite3 database file (required).")
parser.add_option("--ilwdchar-compat", action = "store_true", help = "Ignored. Included for command-line compatibility with newer versions of this tool. This version always requires ilwd:char row IDs.")
parser.add_option("-i", "--input-cache", metavar = "filename", action = "append", default = [], help = "Get the names of XML documents to insert into the database from this LAL cache. This option can be given multiple times, and all files from all caches will be loaded.")
parser.add_option("-p", "--preserve-ids", action = "store_true", help = "Preserve row IDs from the XML in the database. The default is to assign new IDs to prevent collisisions. Inserts will fail if collisions occur.")
parser.add_option("-r", "--replace", action = "store_true", help = "If the database file already exists, over-write it instead of inserting into it.")
parser.add_option("-t", "--tmp-space", metavar = "path", help = "Path to a directory suitable for use as a work area while manipulating the database file. The database file will be worked on in this directory, and then moved to the final location when complete. This option is intended to improve performance when running in a networked environment, where there might be a local disk with higher bandwidth than is available to the filesystem on which the final output will reside.")
parser.add_option("-v", "--verbose", action = "store_true", help = "Be verbose.")
parser.add_option("-x", "--extract", metavar = "filename", default = None, help = "Extract database contents to the given XML file, \"-\" == stdout (use \"./-\" if you want to write to a file named \"-\"). Extraction is done after any inserts.")
options, urls = parser.parse_args()
urls += [CacheEntry(line).url for cache in options.input_cache for line in open(cache)]
if not options.database:
raise ValueError("missing required argument --database")
return options, (urls or [None])
#
# =============================================================================
#