types.py 9.9 KB
Newer Older
Kipp Cannon's avatar
Kipp Cannon committed
1
# Copyright (C) 2006--2013,2016,2017  Kipp Cannon
kipp's avatar
kipp committed
2 3 4
#
# This program is free software; you can redistribute it and/or modify it
# under the terms of the GNU General Public License as published by the
duncan's avatar
duncan committed
5
# Free Software Foundation; either version 3 of the License, or (at your
kipp's avatar
kipp committed
6 7 8 9 10 11 12 13 14 15 16
# option) any later version.
#
# This program is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
# Public License for more details.
#
# You should have received a copy of the GNU General Public License along
# with this program; if not, write to the Free Software Foundation, Inc.,
# 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.

kipp's avatar
kipp committed
17

18 19 20 21 22 23 24 25
#
# =============================================================================
#
#                                   Preamble
#
# =============================================================================
#

kipp's avatar
kipp committed
26

27 28
"""
Definitions of type strings found in LIGO Light Weight XML files.
29

kipp's avatar
kipp committed
30 31 32 33 34 35 36 37
Notes.  To guarantee that a double-precision floating-point number can be
reconstructed exactly from its representation as a decimal number, one must
use 17 decimal digits;  for single-precision, the number is 9.  Python uses
only double-precision numbers, but LIGO Light Weight XML allows for
single-precision values, so I provide distinct format specifiers for those
cases here.  In both cases, I have elected to use 1 fewer digits than are
required to uniquely reconstruct the number:  the XML written by this
library is lossy.  I made this choice to reduce the file size, for example
38 39 40 41 42 43 44 45 46

>>> "%.17g" % 0.1
'0.10000000000000001'

while

>>> "%.16g" % 0.1
'0.1'

kipp's avatar
kipp committed
47 48 49
In this worst case, storing full precision increases the size of the XML by
more than an order of magnitude.  If you wish to make a different choice
for your files, for example if you wish your XML files to be lossless,
50
simply include the lines::
51

52
	ligo.lw.types.FormatFunc.update({
53 54 55
		"real_4": u"%.9g".__mod__,
		"real_8": u"%.17g".__mod__,
		"float": u"%.9g".__mod__,
56
		"double": u"%.17g".__mod__,
57 58
		u"complex_8": ligo.lw.types.mk_complex_format_func(u"%.9g"),
		u"complex_16": ligo.lw.types.mk_complex_format_func(u"%.17g")
59 60 61 62 63 64 65
	})

anywhere in your code, but before you write the document to a file.

References:

	- http://docs.sun.com/source/806-3568/ncg_goldberg.html
66 67
"""

kipp's avatar
kipp committed
68

69 70 71
import base64


72
from . import __author__, __date__, __version__
73
from . import ilwd
74 75 76 77
import six


try:  # python < 3
Kipp Cannon's avatar
Kipp Cannon committed
78
	long
79
except NameError:  # python >= 3
Kipp Cannon's avatar
Kipp Cannon committed
80
	long = int
81 82


83 84 85
#
# =============================================================================
#
kipp's avatar
kipp committed
86
#                               Type Categories
87 88 89 90
#
# =============================================================================
#

kipp's avatar
kipp committed
91

kipp's avatar
kipp committed
92
IDTypes = set([u"ilwd:char", u"ilwd:char_u"])
Kipp Cannon's avatar
Kipp Cannon committed
93 94
"""LIGO Light-Weight XML type strings for ID-like data."""

kipp's avatar
kipp committed
95
BlobTypes = set([u"blob", u"ilwd:char_u"])
Kipp Cannon's avatar
Kipp Cannon committed
96 97
"""LIGO Light-Weight XML type strings for binary blob-like data."""

kipp's avatar
kipp committed
98
StringTypes = set([u"char_s", u"char_v", u"lstring", u"string", u"ilwd:char"])
Kipp Cannon's avatar
Kipp Cannon committed
99 100
"""LIGO Light-Weight XML type strings for string-like data."""

kipp's avatar
kipp committed
101
IntTypes = set([u"int_2s", u"int_2u", u"int_4s", u"int_4u", u"int_8s", u"int_8u", u"int"])
Kipp Cannon's avatar
Kipp Cannon committed
102 103
"""LIGO Light-Weight XML type strings for integer-like data."""

kipp's avatar
kipp committed
104
FloatTypes = set([u"real_4", u"real_8", u"float", u"double"])
Kipp Cannon's avatar
Kipp Cannon committed
105 106
"""LIGO Light-Weight XML type strings for floating-point-like data."""

107
ComplexTypes = set([u"complex_8", u"complex_16"])
Kipp Cannon's avatar
Kipp Cannon committed
108 109
"""LIGO Light-Weight XML type strings for complex-like data."""

110
NumericTypes = IntTypes | FloatTypes | ComplexTypes
Kipp Cannon's avatar
Kipp Cannon committed
111 112
"""LIGO Light-Weight XML type strings for number-like data."""

kipp's avatar
kipp committed
113
TimeTypes = set([u"GPS", u"Unix", u"ISO-8601"])
Kipp Cannon's avatar
Kipp Cannon committed
114 115
"""LIGO Light-Weight XML type strings for time-like data."""

kipp's avatar
kipp committed
116
Types = BlobTypes | StringTypes | NumericTypes | TimeTypes
Kipp Cannon's avatar
Kipp Cannon committed
117
"""All valid LIGO Light-Weight XML type strings."""
118

kipp's avatar
kipp committed
119

kipp's avatar
kipp committed
120 121 122 123 124 125 126 127 128 129
#
# =============================================================================
#
#                         Output Format Look-up Table
#
# =============================================================================
#


def string_format_func(s):
Kipp Cannon's avatar
Kipp Cannon committed
130 131 132 133 134
	"""
	Function used internally to format string data for output to XML.
	Escapes back-slashes and quotes, and wraps the resulting string in
	quotes.
	"""
135
	return u"\"%s\"" % six.text_type(s).replace(u"\\", u"\\\\").replace(u"\"", u"\\\"")
136 137


kipp's avatar
kipp committed
138
def blob_format_func(b):
Kipp Cannon's avatar
Kipp Cannon committed
139 140 141 142
	"""
	Function used internally to format binary data.  Base64-encodes the
	data and wraps the resulting string in quotes.
	"""
143 144 145
	return u"\"%s\"" % base64.standard_b64encode(b)


146
def mk_complex_format_func(fmt):
Kipp Cannon's avatar
Kipp Cannon committed
147 148 149 150
	"""
	Function used internally to generate functions to format complex
	valued data.
	"""
151 152 153 154 155 156
	fmt = fmt + u"+i" + fmt
	def complex_format_func(z):
		return fmt % (z.real, z.imag)
	return complex_format_func


157
FormatFunc = {
kipp's avatar
kipp committed
158 159
	u"char_s": string_format_func,
	u"char_v": string_format_func,
160
	u"ilwd:char": u"\"%s\"".__mod__,
kipp's avatar
kipp committed
161 162 163 164
	u"ilwd:char_u": blob_format_func,
	u"blob": blob_format_func,
	u"lstring": string_format_func,
	u"string": string_format_func,
165 166 167 168 169 170 171 172 173 174
	u"int_2s": u"%d".__mod__,
	u"int_2u": u"%u".__mod__,
	u"int_4s": u"%d".__mod__,
	u"int_4u": u"%u".__mod__,
	u"int_8s": u"%d".__mod__,
	u"int_8u": u"%u".__mod__,
	u"int": u"%d".__mod__,
	u"real_4": u"%.8g".__mod__,
	u"real_8": u"%.16g".__mod__,
	u"float": u"%.8g".__mod__,
175 176 177
	u"double": u"%.16g".__mod__,
	u"complex_8": mk_complex_format_func(u"%.8g"),
	u"complex_16": mk_complex_format_func(u"%.16g")
kipp's avatar
kipp committed
178
}
Kipp Cannon's avatar
Kipp Cannon committed
179 180 181
"""
Look-up table mapping LIGO Light-Weight XML data type strings to functions
for formating Python data for output.  This table is used universally by
182
ligo.lw XML writing codes.
Kipp Cannon's avatar
Kipp Cannon committed
183
"""
kipp's avatar
kipp committed
184

kipp's avatar
kipp committed
185

kipp's avatar
kipp committed
186 187 188 189 190 191 192 193 194
#
# =============================================================================
#
#                  Conversion To And From Native Python Types
#
# =============================================================================
#


kipp's avatar
kipp committed
195
ToPyType = {
196 197
	u"char_s": six.text_type,
	u"char_v": six.text_type,
198
	u"ilwd:char": ilwd.ilwdchar,
199 200
	u"ilwd:char_u": lambda s: memoryview(base64.b64decode(s)),
	u"blob": lambda s: memoryview(base64.b64decode(s)),
201 202
	u"lstring": six.text_type,
	u"string": six.text_type,
kipp's avatar
kipp committed
203 204 205 206 207 208 209 210 211 212
	u"int_2s": int,
	u"int_2u": int,
	u"int_4s": int,
	u"int_4u": int,
	u"int_8s": int,
	u"int_8u": int,
	u"int": int,
	u"real_4": float,
	u"real_8": float,
	u"float": float,
213
	u"double": float,
Kipp Cannon's avatar
Kipp Cannon committed
214 215
	u"complex_8": lambda s: complex(*map(float, s.split(u"+i"))),
	u"complex_16": lambda s: complex(*map(float, s.split(u"+i")))
kipp's avatar
kipp committed
216
}
Kipp Cannon's avatar
Kipp Cannon committed
217 218 219
"""
Look-up table mapping LIGO Light-Weight XML data type strings to functions
for parsing Python data from input.  This table is used universally by
220
ligo.lw XML parsing codes.
Kipp Cannon's avatar
Kipp Cannon committed
221
"""
kipp's avatar
kipp committed
222

kipp's avatar
kipp committed
223

224 225 226 227 228
class FromPyTypeCls(dict):
	def __getitem__(self, key):
		try:
			return super(FromPyTypeCls, self).__getitem__(key)
		except KeyError:
229
			for test_key, val in self.items():
230
				if issubclass(key, test_key):
231 232 233 234 235 236
					return val
			raise


FromPyType = FromPyTypeCls({
	ilwd._ilwd.ilwdchar: u"ilwd:char",
237
	memoryview: u"blob",
kipp's avatar
kipp committed
238
	str: u"lstring",
239
	six.text_type: u"lstring",
Kipp Cannon's avatar
Kipp Cannon committed
240
	bool: u"int_4s",
241
	int: u"int_8s",
kipp's avatar
kipp committed
242
	long: u"int_8s",
243 244
	float: u"real_8",
	complex: u"complex_16"
245
})
Kipp Cannon's avatar
Kipp Cannon committed
246 247 248 249 250
"""
Look-up table used to guess LIGO Light-Weight XML data type strings from
Python types.  This table is used when auto-generating XML from Python
objects.
"""
251

kipp's avatar
kipp committed
252

kipp's avatar
kipp committed
253 254 255 256 257 258 259 260 261
#
# =============================================================================
#
#                  Conversion To and From Native Numpy Types
#
# =============================================================================
#


kipp's avatar
kipp committed
262
ToNumPyType = {
263 264 265 266 267 268 269 270 271
	u"int_2s": "int16",
	u"int_2u": "uint16",
	u"int_4s": "int32",
	u"int_4u": "uint32",
	u"int_8s": "int64",
	u"int_8u": "uint64",
	u"int": "int32",
	u"real_4": "float32",
	u"real_8": "float64",
272 273 274 275
	u"float": "float32",
	u"double": "float64",
	u"complex_8": "complex64",
	u"complex_16": "complex128"
276
}
Kipp Cannon's avatar
Kipp Cannon committed
277 278
"""
Look-up table mapping LIGO Light-Weight XML data type strings to numpy
279
array type strings.  Used by ligo.lw array reading codes.
Kipp Cannon's avatar
Kipp Cannon committed
280
"""
kipp's avatar
kipp committed
281

kipp's avatar
kipp committed
282

kipp's avatar
kipp committed
283
FromNumPyType = {
284 285 286 287 288 289 290
	"int16": u"int_2s",
	"uint16": u"int_2u",
	"int32": u"int_4s",
	"uint32": u"int_4u",
	"int64": u"int_8s",
	"uint64": u"int_8u",
	"float32": u"real_4",
291 292 293
	"float64": u"real_8",
	"complex64": u"complex_8",
	"complex128": u"complex_16"
kipp's avatar
kipp committed
294
}
Kipp Cannon's avatar
Kipp Cannon committed
295 296
"""
Look-up table mapping numpy array type strings to LIGO Light-Weight XML
297
data type strings.  Uesd by ligo.lw array writing codes.
Kipp Cannon's avatar
Kipp Cannon committed
298
"""
kipp's avatar
kipp committed
299

kipp's avatar
kipp committed
300

kipp's avatar
kipp committed
301 302 303
#
# =============================================================================
#
Kipp Cannon's avatar
Kipp Cannon committed
304
#                 Conversion To and From Native Database Types
kipp's avatar
kipp committed
305 306 307 308 309
#
# =============================================================================
#


310 311 312 313 314 315 316
#
# SQL does not support complex numbers.  Documents containing
# complex-valued table columns cannot be stored in SQL databases at this
# time.
#


Kipp Cannon's avatar
Kipp Cannon committed
317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336
ToMySQLType = {
	u"char_s": "CHAR(20)",
	u"char_v": "VARCHAR(64)",
	u"ilwd:char": "VARCHAR(64)",
	u"ilwd:char_u": "BLOB",
	u"blob": "BLOB",
	u"lstring": "VARCHAR(255)",
	u"string": "VARCHAR(255)",
	u"int_2s": "SMALLINT",
	u"int_2u": "SMALLINT",
	u"int_4s": "INTEGER",
	u"int_4u": "INTEGER",
	u"int_8s": "BIGINT",
	u"int_8u": "BIGINT",
	u"int": "INTEGER",
	u"real_4": "FLOAT",
	u"real_8": "DOUBLE",
	u"float": "FLOAT",
	u"double": "DOUBLE"
}
Kipp Cannon's avatar
Kipp Cannon committed
337 338 339 340
"""
Look-up table mapping LIGO Light-Weight XML data type strings to MySQL
column types.  Used by XML --> MySQL conversion codes.
"""
Kipp Cannon's avatar
Kipp Cannon committed
341 342


343
ToSQLiteType = {
kipp's avatar
kipp committed
344 345
	u"char_s": "TEXT",
	u"char_v": "TEXT",
346
	u"ilwd:char": "TEXT",
kipp's avatar
kipp committed
347
	u"ilwd:char_u": "BLOB",
348
	u"blob": "BLOB",
kipp's avatar
kipp committed
349 350 351 352 353 354 355 356 357 358 359 360 361
	u"lstring": "TEXT",
	u"string": "TEXT",
	u"int_2s": "INTEGER",
	u"int_2u": "INTEGER",
	u"int_4s": "INTEGER",
	u"int_4u": "INTEGER",
	u"int_8s": "INTEGER",
	u"int_8u": "INTEGER",
	u"int": "INTEGER",
	u"real_4": "REAL",
	u"real_8": "REAL",
	u"float": "REAL",
	u"double": "REAL"
362
}
Kipp Cannon's avatar
Kipp Cannon committed
363 364 365 366
"""
Look-up table mapping LIGO Light-Weight XML data type strings to SQLite
column types.  Used by XML --> SQLite conversion codes.
"""
367

kipp's avatar
kipp committed
368

369
FromSQLiteType = {
370
	"BLOB": u"blob",
kipp's avatar
kipp committed
371 372 373 374
	"TEXT": u"lstring",
	"STRING": u"lstring",
	"INTEGER": u"int_4s",
	"REAL": u"real_8"
375
}
Kipp Cannon's avatar
Kipp Cannon committed
376 377 378 379 380
"""
Look-up table used to guess LIGO Light-Weight XML data type strings from
SQLite column types.  Used when auto-generating XML from the contents of an
SQLite database.
"""