tokenizer.RowDumper.c 8.55 KB
Newer Older
1
/*
Kipp Cannon's avatar
Kipp Cannon committed
2
 * Copyright (C) 2007-2009,2011,2015-2017  Kipp C. Cannon
3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50
 *
 * This program is free software; you can redistribute it and/or modify it
 * under the terms of the GNU General Public License as published by the
 * Free Software Foundation; either version 2 of the License, or (at your
 * option) any later version.
 *
 * This program is distributed in the hope that it will be useful, but
 * WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License along
 * with this program; if not, write to the Free Software Foundation, Inc.,
 * 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
 */


/*
 * ============================================================================
 *
 *                         tokenizer.RowDumper Class
 *
 * ============================================================================
 */


#include <Python.h>
#include <structmember.h>
#include <stdlib.h>
#include <tokenizer.h>


/*
 * ============================================================================
 *
 *                              Row Dumper Type
 *
 * ============================================================================
 */


/*
 * Structure
 */


typedef struct {
	PyObject_HEAD
51
	/* delimiter character to be used in row construction */
52
	PyObject *delimiter;
kipp's avatar
kipp committed
53
	/* tuple of attribute names as Python strings */
54
	PyObject *attributes;
55
	/* tuple of row element format functions */
56
	PyObject *formats;
kipp's avatar
kipp committed
57
	/* the source of row objects to be turned to unicode strings */
58
	PyObject *iter;
59 60
	/* number of rows converted so far.  not used here, but helpful for
	 * constructing error messages in the calling code */
61
	Py_ssize_t rows_converted;
62 63
	/* tuple of unicode tokens from most recently converted row */
	PyObject *tokens;
64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79
} ligolw_RowDumper;


/*
 * __del__() method
 */


static void __del__(PyObject *self)
{
	ligolw_RowDumper *rowdumper = (ligolw_RowDumper *) self;

	Py_XDECREF(rowdumper->delimiter);
	Py_XDECREF(rowdumper->attributes);
	Py_XDECREF(rowdumper->formats);
	Py_XDECREF(rowdumper->iter);
80
	Py_XDECREF(rowdumper->tokens);
81 82 83 84 85 86 87 88 89 90 91 92 93

	self->ob_type->tp_free(self);
}


/*
 * __init__() method
 */


static int __init__(PyObject *self, PyObject *args, PyObject *kwds)
{
	ligolw_RowDumper *rowdumper = (ligolw_RowDumper *) self;
kipp's avatar
kipp committed
94
	Py_UNICODE default_delimiter = ',';
95

kipp's avatar
kipp committed
96
	rowdumper->delimiter = NULL;
97
	if(!PyArg_ParseTuple(args, "OO|U", &rowdumper->attributes, &rowdumper->formats, &rowdumper->delimiter))
98 99
		return -1;

100 101 102
	if(rowdumper->delimiter)
		Py_INCREF(rowdumper->delimiter);
	else
kipp's avatar
kipp committed
103
		rowdumper->delimiter = PyUnicode_FromUnicode(&default_delimiter, 1);
104 105
	rowdumper->attributes = llwtokenizer_build_attributes(rowdumper->attributes);
	rowdumper->formats = llwtokenizer_build_formats(rowdumper->formats);
kipp's avatar
kipp committed
106
	if(!rowdumper->delimiter || !rowdumper->attributes || !rowdumper->formats)
107
		/* memory clean-up happens in __del__() */
108 109 110
		return -1;

	if(PyTuple_GET_SIZE(rowdumper->attributes) != PyTuple_GET_SIZE(rowdumper->formats)) {
111
		/* memory clean-up happens in __del__() */
112 113 114 115 116
		PyErr_SetString(PyExc_ValueError, "len(attributes) != len(formats)");
		return -1;
	}

	rowdumper->rows_converted = 0;
kipp's avatar
kipp committed
117 118
	rowdumper->iter = Py_None;
	Py_INCREF(rowdumper->iter);
119 120
	rowdumper->tokens = Py_None;
	Py_INCREF(rowdumper->tokens);
121 122 123 124 125

	return 0;
}


kipp's avatar
kipp committed
126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146
/*
 * dump() method
 */


static PyObject *dump(PyObject *self, PyObject *iterable)
{
	ligolw_RowDumper *rowdumper = (ligolw_RowDumper *) self;
	PyObject *iter = PyObject_GetIter(iterable);

	if(!iter)
		return NULL;

	Py_DECREF(rowdumper->iter);
	rowdumper->iter = iter;

	Py_INCREF(self);
	return self;
}


147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166
/*
 * __iter__() method
 */


static PyObject *__iter__(PyObject *self)
{
	Py_INCREF(self);
	return self;
}


/*
 * next() method
 */


static PyObject *next(PyObject *self)
{
	ligolw_RowDumper *rowdumper = (ligolw_RowDumper *) self;
167
	const Py_ssize_t n = PyTuple_GET_SIZE(rowdumper->attributes);
168
	PyObject *tokens;
169
	PyObject *row;
kipp's avatar
kipp committed
170
	PyObject *result;
171
	Py_ssize_t i;
172

173 174 175 176
	/*
	 * retrieve the next row object
	 */

kipp's avatar
kipp committed
177 178 179 180
	if(!PyIter_Check(rowdumper->iter)) {
		PyErr_SetObject(PyExc_TypeError, rowdumper->iter);
		return NULL;
	}
181
	row = PyIter_Next(rowdumper->iter);
kipp's avatar
kipp committed
182
	if(!row) {
kipp's avatar
kipp committed
183 184 185 186
		if(!PyErr_Occurred()) {
			Py_DECREF(rowdumper->iter);
			rowdumper->iter = Py_None;
			Py_INCREF(rowdumper->iter);
kipp's avatar
kipp committed
187
			PyErr_SetNone(PyExc_StopIteration);
kipp's avatar
kipp committed
188
		}
kipp's avatar
kipp committed
189 190 191
		return NULL;
	}

192 193 194 195 196
	/*
	 * wipe out the tuple of tokens from the previous row, and start a
	 * new tuple
	 */

kipp's avatar
kipp committed
197
	Py_DECREF(rowdumper->tokens);
198 199 200 201 202
	rowdumper->tokens = Py_None;
	Py_INCREF(rowdumper->tokens);

	tokens = PyTuple_New(n);
	if(!tokens) {
kipp's avatar
kipp committed
203 204 205 206
		Py_DECREF(row);
		return NULL;
	}

207 208 209 210 211
	/*
	 * retrieve attributes from the row object one-by-one, convert to
	 * strings, and insert into new token tuple
	 */

kipp's avatar
kipp committed
212 213 214 215 216
	for(i = 0; i < n; i++) {
		PyObject *val = PyObject_GetAttr(row, PyTuple_GET_ITEM(rowdumper->attributes, i));
		PyObject *token;

		if(!val) {
217
			Py_DECREF(tokens);
218 219 220 221
			Py_DECREF(row);
			return NULL;
		}

kipp's avatar
kipp committed
222 223 224
		if(val == Py_None)
			token = PyUnicode_FromUnicode(NULL, 0); /* u"" */
		else
225
			token = PyObject_CallFunctionObjArgs(PyTuple_GET_ITEM(rowdumper->formats, i), val, NULL);
kipp's avatar
kipp committed
226
		Py_DECREF(val);
227

kipp's avatar
kipp committed
228
		if(!token) {
229
			Py_DECREF(tokens);
kipp's avatar
kipp committed
230 231 232
			Py_DECREF(row);
			return NULL;
		}
233

234
		PyTuple_SET_ITEM(tokens, i, token);
kipp's avatar
kipp committed
235 236
	}
	Py_DECREF(row);
237

238 239 240 241
	/*
	 * that worked, so expose the new token tuple
	 */

242 243 244
	Py_DECREF(rowdumper->tokens);
	rowdumper->tokens = tokens;

245 246 247 248 249
	/*
	 * return tokens concatenated into a single string using the
	 * delimiter
	 */

kipp's avatar
kipp committed
250
	result = PyUnicode_Join(rowdumper->delimiter, rowdumper->tokens);
251

kipp's avatar
kipp committed
252
	rowdumper->rows_converted += result != NULL;
253

kipp's avatar
kipp committed
254
	return result;
255 256 257 258 259 260 261 262 263
}


/*
 * Type information
 */


static struct PyMemberDef members[] = {
kipp's avatar
kipp committed
264
	{"delimiter", T_OBJECT, offsetof(ligolw_RowDumper, delimiter), READONLY, "The delimiter as a unicode string."},
kipp's avatar
kipp committed
265
	{"attributes", T_OBJECT, offsetof(ligolw_RowDumper, attributes), READONLY, "In-order tuple of attribute names as strings."},
266
	{"formats", T_OBJECT, offsetof(ligolw_RowDumper, formats), READONLY, "In-order tuple of row element format functions."},
kipp's avatar
kipp committed
267
	{"iter", T_OBJECT, offsetof(ligolw_RowDumper, iter), 0, "The iterator being used to provide rows for conversion."},
268
	{"rows_converted", T_LONG, offsetof(ligolw_RowDumper, rows_converted), 0, "Count of rows converted."},
269
	{"tokens", T_OBJECT, offsetof(ligolw_RowDumper, tokens), READONLY, "In-order tuple of unicode tokens from most recently converted row."},
270 271 272 273
	{NULL,}
};


kipp's avatar
kipp committed
274 275 276 277 278 279
static struct PyMethodDef methods[] = {
	{"dump", dump, METH_O, "Set the Python iterable from which row objects will be retrieved for dumping."},
	{NULL,}
};


280 281 282 283 284
PyTypeObject ligolw_RowDumper_Type = {
	PyObject_HEAD_INIT(NULL)
	.tp_basicsize = sizeof(ligolw_RowDumper),
	.tp_dealloc = __del__,
	.tp_doc =
kipp's avatar
kipp committed
285 286 287 288 289 290 291 292 293 294 295 296 297 298
"An iterator for converting row objects into string tokens.\n" \
"\n" \
"Example:\n" \
"\n" \
">>> class Row(object):\n" \
"...     pass\n" \
"... \n" \
">>> rows = [Row(), Row(), Row()]\n" \
">>> rows[0].snr = 10.1\n" \
">>> rows[1].snr = 15.2\n" \
">>> rows[2].snr = 20.3\n" \
">>> rows[0].status = \"bad\"\n" \
">>> rows[1].status = \"bad\"\n" \
">>> rows[2].status = \"good\"\n" \
299
">>> rowdumper = RowDumper((\"snr\", \"status\"), (\"%.16g\".__mod__, \"\\\"%s\\\"\".__mod__))\n" \
kipp's avatar
kipp committed
300
">>> for line in rowdumper.dump(rows):\n" \
301
"...     print(line)\n" \
kipp's avatar
kipp committed
302 303 304
"... \n" \
"10.1,\"bad\"\n" \
"15.2,\"bad\"\n" \
kipp's avatar
kipp committed
305 306
"20.3,\"good\"\n" \
"\n" \
kipp's avatar
kipp committed
307 308 309 310 311 312 313 314 315 316
"An instance of RowDumper is initialized with two arguments and an optional\n" \
"third argument.  The first argument is a sequence of attribute names.  The\n" \
"second argument is a sequence of Python format strings.  The third, optional,\n" \
"argument is the unicode string to use as the delimiter between tokens (the\n" \
"default is u\",\").  The row dumper is started by calling the .dump() method\n" \
"which takes a Python iterable as its single argument.  After the .dump()\n" \
"method has been called, when a RowDumper instance is iterated over it\n" \
"retrieves objects, one-by-one, from the iterable passed to the .dump() method\n" \
"and yields a sequence of unicode strings containing the delimited string\n" \
"representations of the values of the attributes of those objects.  The\n" \
kipp's avatar
kipp committed
317 318 319
"attribute values are printed in the order specified when the RowDumper was\n" \
"created, and using the formats specified.  An attribute whose value is None\n" \
"is printed as an empty string regardless of the requested format.",
320 321 322 323 324
	.tp_flags = Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE,
	.tp_init = __init__,
	.tp_iter = __iter__,
	.tp_iternext = next,
	.tp_members = members,
kipp's avatar
kipp committed
325
	.tp_methods = methods,
326 327 328
	.tp_name = MODULE_NAME ".RowDumper",
	.tp_new = PyType_GenericNew,
};