Commit 729048c8 authored by Leo Pound Singer's avatar Leo Pound Singer

Deprecate gz kwarg and replace with compress kwarg

This is to prepare us to support multiple compression methods
including Zstandard.

See !13.
parent c1337801
Pipeline #144168 passed with stages
in 4 minutes and 54 seconds
......@@ -135,7 +135,7 @@ ligolw_utils.write_filename(
xmldoc,
options.output,
verbose = options.verbose,
gz = (options.output or "stdout").endswith(".gz")
compress = 'gz' if (options.output or "stdout").endswith(".gz") else False
)
......
......@@ -193,5 +193,5 @@ for n, filename in enumerate(filenames, 1):
RemoveColumns(doc, options.delete_column)
if options.delete_element:
RemoveElements(doc, options.delete_element)
ligolw_utils.write_filename(doc, filename, verbose = options.verbose, gz = (filename or "stdout").endswith(".gz"))
ligolw_utils.write_filename(doc, filename, verbose = options.verbose, compress = 'gz' if (filename or "stdout").endswith(".gz") else False)
doc.unlink()
......@@ -87,5 +87,5 @@ for filename in filenames or [None]:
ilwd.strip_ilwdchar(xmldoc)
ligolw_utils.write_filename(xmldoc, filename, gz = (filename or "stdout").endswith(".gz"), verbose = options.verbose)
ligolw_utils.write_filename(xmldoc, filename, compress = 'gz' if (filename or "stdout").endswith(".gz") else False, verbose = options.verbose)
xmldoc.unlink()
......@@ -218,8 +218,8 @@ for url in urls:
if not options.output:
ligolw_utils.write_url(xmldoc, url, verbose = options.verbose, gz = (url or "stdout").endswith(".gz"))
ligolw_utils.write_url(xmldoc, url, verbose = options.verbose, compress = 'gz' if (url or "stdout").endswith(".gz") else False)
elif options.output == "-":
ligolw_utils.write_filename(xmldoc, None, verbose = options.verbose)
else:
ligolw_utils.write_filename(xmldoc, options.output, verbose = options.verbose, gz = options.output.endswith(".gz"))
ligolw_utils.write_filename(xmldoc, options.output, verbose = options.verbose, compress = 'gz' if options.output.endswith(".gz") else False)
......@@ -73,7 +73,7 @@ Example:
...
>>> # write document. must explicitly state whether or not the file is to be
>>> # gzip compressed
>>> ligolw_utils.write_filename(xmldoc, filename, gz = filename.endswith(".gz"), verbose = True)
>>> ligolw_utils.write_filename(xmldoc, filename, compress = 'gz' if filename.endswith(".gz") else False, verbose = True)
"""
......
......@@ -309,17 +309,34 @@ class SignalsTrap(object):
return False
def load_fileobj(fileobj, gz = None, xmldoc = None, contenthandler = None):
def _normalize_compress_kwarg(compress = None, gz = None):
if gz is not None:
warnings.warn(
'The gz keyword argument is deprecated. '
'Use the compress keyword argument instead.',
DeprecationWarning,
stacklevel=3)
if compress is not None:
raise ValueError(
'must not specify both compress and gz keyword arguments')
if gz:
compress = 'gz'
else:
compress = False
return compress
def load_fileobj(fileobj, compress = None, gz = None, xmldoc = None, contenthandler = None):
"""
Parse the contents of the file object fileobj, and return the
contents as a LIGO Light Weight document tree. The file object
does not need to be seekable. The file object must be in binary
mode.
If the gz parameter is None (the default) then gzip compressed data
If the compress parameter is None (the default) then gzip compressed data
will be automatically detected and decompressed, otherwise
decompression can be forced on or off by setting gz to True or
False respectively.
decompression can be forced on by setting compress to the string ``gz``,
or force off by setting compress to False.
If the optional xmldoc argument is provided and not None, the
parsed XML tree will be appended to that document, otherwise a new
......@@ -341,9 +358,12 @@ def load_fileobj(fileobj, gz = None, xmldoc = None, contenthandler = None):
ligo.lw.ligolw.FilteringLIGOLWContentHandler for examples of custom
content handlers used to load subsets of documents into memory.
"""
# FIXME: remove the following once we drop the ``gz`` keyword argument.
compress = _normalize_compress_kwarg(compress = compress, gz = gz)
if contenthandler is None:
raise ValueError("missing required keyword argument \"contenthandler\"")
if gz or gz is None:
if compress == 'gz' or compress is None:
fileobj = RewindableInputFile(fileobj)
magic = fileobj.read(2)
fileobj.seek(0, os.SEEK_SET)
......@@ -411,14 +431,14 @@ def load_url(url, verbose = False, **kwargs):
return load_fileobj(fileobj, **kwargs)
def write_fileobj(xmldoc, fileobj, gz = False, compresslevel = 3, **kwargs):
def write_fileobj(xmldoc, fileobj, compress = None, gz = False, compresslevel = 3, **kwargs):
"""
Writes the LIGO Light Weight document tree rooted at xmldoc to the
given file object. Internally, the .write() method of the xmldoc
object is invoked and any additional keyword arguments are passed
to that method. The file object need not be seekable. The file
object must be in binary mode. The output data is gzip compressed
on the fly if gz is True, and in that case the compresslevel
on the fly if compress is ``gz``, and in that case the compresslevel
parameter sets the gzip compression level (the default is 3).
Example:
......@@ -428,8 +448,11 @@ def write_fileobj(xmldoc, fileobj, gz = False, compresslevel = 3, **kwargs):
>>> xmldoc = load_filename("demo.xml", contenthandler = ligolw.LIGOLWContentHandler)
>>> write_fileobj(xmldoc, open("/dev/null","wb"))
"""
# FIXME: remove the following once we drop the ``gz`` keyword argument.
compress = _normalize_compress_kwarg(compress = compress, gz = gz)
with NoCloseFlushWrapper(fileobj) as fileobj:
if gz:
if compress == 'gz':
fileobj = gzip.GzipFile(mode = "wb", fileobj = fileobj, compresslevel = compresslevel)
with codecs.getwriter("utf_8")(fileobj) as fileobj:
xmldoc.write(fileobj, **kwargs)
......@@ -465,13 +488,13 @@ class tildefile(object):
return False
def write_filename(xmldoc, filename, verbose = False, gz = False, with_mv = True, trap_signals = SignalsTrap.default_signals, **kwargs):
def write_filename(xmldoc, filename, verbose = False, compress = None, gz = False, with_mv = True, trap_signals = SignalsTrap.default_signals, **kwargs):
"""
Writes the LIGO Light Weight document tree rooted at xmldoc to the
file name filename. If filename is None the file is written to
stdout, otherwise it is written to the named file. Friendly
verbosity messages are printed while writing the file if verbose is
True. The output data is gzip compressed on the fly if gz is True.
True. The output data is gzip compressed on the fly if compress is ``gz``.
If with_mv is True and filename is not None the filename has a "~"
appended to it and the file is written to that name then moved to
the requested name once the write has completed successfully.
......@@ -494,21 +517,24 @@ def write_filename(xmldoc, filename, verbose = False, gz = False, with_mv = True
Example:
>>> write_filename(xmldoc, "demo.xml") # doctest: +SKIP
>>> write_filename(xmldoc, "demo.xml.gz", gz = True) # doctest: +SKIP
>>> write_filename(xmldoc, "demo.xml.gz", compress = 'gz') # doctest: +SKIP
"""
# FIXME: remove the following once we drop the ``gz`` keyword argument.
compress = _normalize_compress_kwarg(compress = compress, gz = gz)
if verbose:
sys.stderr.write("writing %s ...\n" % (("'%s'" % filename) if filename is not None else "stdout"))
with SignalsTrap(trap_signals):
if filename is None:
# In Python 3, sys.stdout has a .buffer attribute
# that is the underyling byte-oriented stream.
write_fileobj(xmldoc, sys.stdout.buffer if hasattr(sys.stdout, "buffer") else sys.stdout, gz = gz, **kwargs)
write_fileobj(xmldoc, sys.stdout.buffer if hasattr(sys.stdout, "buffer") else sys.stdout, compress = compress, **kwargs)
else:
if not gz and filename.endswith(".gz"):
if compress != 'gz' and filename.endswith(".gz"):
warnings.warn("filename '%s' ends in '.gz' but file is not being gzip-compressed" % filename, UserWarning)
binary_open = lambda filename: open(filename, "wb")
with (binary_open if not with_mv else tildefile)(filename) as fileobj:
write_fileobj(xmldoc, fileobj, gz = gz, **kwargs)
write_fileobj(xmldoc, fileobj, compress = compress, **kwargs)
def write_url(xmldoc, url, **kwargs):
......@@ -525,6 +551,6 @@ def write_url(xmldoc, url, **kwargs):
Example:
>>> write_url(xmldoc, "file:///data.xml") # doctest: +SKIP
>>> write_url(xmldoc, "file:///data.xml.gz", gz = True) # doctest: +SKIP
>>> write_url(xmldoc, "file:///data.xml.gz", compress = 'gz') # doctest: +SKIP
"""
return write_filename(xmldoc, local_path_from_url(url), **kwargs)
......@@ -301,7 +301,7 @@ def extract(connection, filename, table_names = None, verbose = False, xsl_file
"""
xmldoc = ligolw.Document()
xmldoc.appendChild(dbtables.get_xml(connection, table_names))
ligolw_utils.write_filename(xmldoc, filename, gz = (filename or "stdout").endswith(".gz"), verbose = verbose, xsl_file = xsl_file)
ligolw_utils.write_filename(xmldoc, filename, compress = 'gz' if (filename or "stdout").endswith(".gz") else False, verbose = verbose, xsl_file = xsl_file)
# delete cursors
xmldoc.unlink()
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment