tokenizer.c 4.09 KB
Newer Older
kipp's avatar
kipp committed
1
/*
Kipp Cannon's avatar
Kipp Cannon committed
2
 * Copyright (C) 2006-2009,2016,2017  Kipp C. Cannon
kipp's avatar
kipp committed
3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18
 *
 * This program is free software; you can redistribute it and/or modify it
 * under the terms of the GNU General Public License as published by the
 * Free Software Foundation; either version 2 of the License, or (at your
 * option) any later version.
 *
 * This program is distributed in the hope that it will be useful, but
 * WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License along
 * with this program; if not, write to the Free Software Foundation, Inc.,
 * 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
 */

19

kipp's avatar
kipp committed
20 21 22
/*
 * ============================================================================
 *
23
 *                     ligo.lw.tokenizer Extension Module
kipp's avatar
kipp committed
24 25 26 27
 *
 * ============================================================================
 */

28

kipp's avatar
kipp committed
29
#include <Python.h>
kipp's avatar
kipp committed
30 31
#include <ctype.h>
#include <stdio.h>
kipp's avatar
kipp committed
32
#include <stdlib.h>
kipp's avatar
kipp committed
33
#include <wchar.h>
34
#include <tokenizer.h>
kipp's avatar
kipp committed
35 36


37 38 39 40 41 42 43 44 45 46
/*
 * ============================================================================
 *
 *                              Helper Functions
 *
 * ============================================================================
 */


/*
47
 * Convert a sequence of unicode and/or strings to a tuple of unicodes.
48
 * Creates a reference to a new object, does not decref its argument.
49 50 51
 */


52
PyObject *llwtokenizer_build_attributes(PyObject *sequence)
53
{
54 55
	PyObject *result;
	int i;
56

57 58
	/* guaranteed to produce a new object */
	sequence = PySequence_List(sequence);
59 60 61
	if(!sequence)
		return NULL;

62 63 64 65 66 67
	for(i = 0; i < PyList_GET_SIZE(sequence); i++) {
		PyObject *item = PyList_GET_ITEM(sequence, i);
		if(!item) {
			Py_DECREF(sequence);
			return NULL;
		}
68 69
		if(!PyUnicode_Check(item)) {
			PyObject *str = PyUnicode_FromObject(item);
70 71 72
			if(!str) {
				Py_DECREF(sequence);
				return NULL;
73
			}
74 75
			Py_DECREF(item);
			PyList_SET_ITEM(sequence, i, str);
76 77 78
		}
	}

79
	result = PySequence_Tuple(sequence);
80 81
	Py_DECREF(sequence);

82
	return result;
83 84 85 86
}


/*
87 88
 * Convert a sequence of functions to a tuple of functions.  Creates a
 * reference to a new object, does not decref its argument.
89 90 91
 */


92
PyObject *llwtokenizer_build_formats(PyObject *sequence)
93
{
94
	return PySequence_Tuple(sequence);
95 96 97
}


kipp's avatar
kipp committed
98 99 100
/*
 * ============================================================================
 *
101
 *                            Module Registration
kipp's avatar
kipp committed
102 103 104 105
 *
 * ============================================================================
 */

106

107 108 109 110 111 112
#define MODULE_DOC \
"This module provides a tokenizer for LIGO Light Weight XML Stream and Array\n" \
"elements, as well as other utilities to assist in packing parsed tokens into\n" \
"various data storage units."


Kipp Cannon's avatar
Kipp Cannon committed
113 114 115 116
#if PY_MAJOR_VERSION < 3
PyMODINIT_FUNC inittokenizer(void); /* Silence -Wmissing-prototypes */
PyMODINIT_FUNC inittokenizer(void)
#else
117 118
PyMODINIT_FUNC PyInit_tokenizer(void); /* Silence -Wmissing-prototypes */
PyMODINIT_FUNC PyInit_tokenizer(void)
Kipp Cannon's avatar
Kipp Cannon committed
119
#endif
120
{
121 122 123 124 125 126 127
	/*
	 * Create the module.
	 */

#if PY_MAJOR_VERSION < 3
	PyObject *module = Py_InitModule3(MODULE_NAME, NULL, MODULE_DOC);
#else
Kipp Cannon's avatar
Kipp Cannon committed
128 129 130 131
	static PyModuleDef moduledef = {
		PyModuleDef_HEAD_INIT,
		MODULE_NAME, MODULE_DOC, -1, NULL
	};
132 133 134 135 136 137 138 139
	PyObject *module = PyModule_Create(&moduledef);
#endif
	if (!module)
		goto done;

	/*
	 * Initialize the classes
	 */
140 141 142 143 144 145 146 147

	if(PyType_Ready(&ligolw_Tokenizer_Type) < 0)
		goto done;
	if(PyType_Ready(&ligolw_RowBuilder_Type) < 0)
		goto done;
	if(PyType_Ready(&ligolw_RowDumper_Type) < 0)
		goto done;

148
	/*
149
	 * Add the Tokenizer class.
150 151
	 */

152 153
	Py_INCREF(&ligolw_Tokenizer_Type);
	PyModule_AddObject(module, "Tokenizer", (PyObject *) &ligolw_Tokenizer_Type);
154 155 156 157 158 159 160

	/*
	 * Add the RowBuilder class.
	 */

	Py_INCREF(&ligolw_RowBuilder_Type);
	PyModule_AddObject(module, "RowBuilder", (PyObject *) &ligolw_RowBuilder_Type);
161 162 163 164 165 166 167

	/*
	 * Add the RowDumper class.
	 */

	Py_INCREF(&ligolw_RowDumper_Type);
	PyModule_AddObject(module, "RowDumper", (PyObject *) &ligolw_RowDumper_Type);
168

169 170 171 172
	/*
	 * Done.
	 */

173
done:
Kipp Cannon's avatar
Kipp Cannon committed
174 175 176
#if PY_MAJOR_VERSION < 3
	return;
#else
177
	return module;
Kipp Cannon's avatar
Kipp Cannon committed
178
#endif
kipp's avatar
kipp committed
179
}