Line data Source code
1 : /* csv module */
2 :
3 : /*
4 :
5 : This module provides the low-level underpinnings of a CSV reading/writing
6 : module. Users should not use this module directly, but import the csv.py
7 : module instead.
8 :
9 : */
10 :
11 : #define MODULE_VERSION "1.0"
12 :
13 : #include "Python.h"
14 : #include "structmember.h"
15 :
16 : #define IS_BASESTRING(o) \
17 : PyUnicode_Check(o)
18 :
19 : typedef struct {
20 : PyObject *error_obj; /* CSV exception */
21 : PyObject *dialects; /* Dialect registry */
22 : long field_limit; /* max parsed field size */
23 : } _csvstate;
24 :
25 : #define _csvstate(o) ((_csvstate *)PyModule_GetState(o))
26 :
27 : static int
28 0 : _csv_clear(PyObject *m)
29 : {
30 0 : Py_CLEAR(_csvstate(m)->error_obj);
31 0 : Py_CLEAR(_csvstate(m)->dialects);
32 0 : return 0;
33 : }
34 :
35 : static int
36 0 : _csv_traverse(PyObject *m, visitproc visit, void *arg)
37 : {
38 0 : Py_VISIT(_csvstate(m)->error_obj);
39 0 : Py_VISIT(_csvstate(m)->dialects);
40 0 : return 0;
41 : }
42 :
43 : static void
44 0 : _csv_free(void *m)
45 : {
46 0 : _csv_clear((PyObject *)m);
47 0 : }
48 :
49 : static struct PyModuleDef _csvmodule;
50 :
51 : #define _csvstate_global ((_csvstate *)PyModule_GetState(PyState_FindModule(&_csvmodule)))
52 :
53 : typedef enum {
54 : START_RECORD, START_FIELD, ESCAPED_CHAR, IN_FIELD,
55 : IN_QUOTED_FIELD, ESCAPE_IN_QUOTED_FIELD, QUOTE_IN_QUOTED_FIELD,
56 : EAT_CRNL
57 : } ParserState;
58 :
59 : typedef enum {
60 : QUOTE_MINIMAL, QUOTE_ALL, QUOTE_NONNUMERIC, QUOTE_NONE
61 : } QuoteStyle;
62 :
63 : typedef struct {
64 : QuoteStyle style;
65 : char *name;
66 : } StyleDesc;
67 :
68 : static StyleDesc quote_styles[] = {
69 : { QUOTE_MINIMAL, "QUOTE_MINIMAL" },
70 : { QUOTE_ALL, "QUOTE_ALL" },
71 : { QUOTE_NONNUMERIC, "QUOTE_NONNUMERIC" },
72 : { QUOTE_NONE, "QUOTE_NONE" },
73 : { 0 }
74 : };
75 :
76 : typedef struct {
77 : PyObject_HEAD
78 :
79 : int doublequote; /* is " represented by ""? */
80 : Py_UCS4 delimiter; /* field separator */
81 : Py_UCS4 quotechar; /* quote character */
82 : Py_UCS4 escapechar; /* escape character */
83 : int skipinitialspace; /* ignore spaces following delimiter? */
84 : PyObject *lineterminator; /* string to write between records */
85 : int quoting; /* style of quoting to write */
86 :
87 : int strict; /* raise exception on bad CSV */
88 : } DialectObj;
89 :
90 : static PyTypeObject Dialect_Type;
91 :
92 : typedef struct {
93 : PyObject_HEAD
94 :
95 : PyObject *input_iter; /* iterate over this for input lines */
96 :
97 : DialectObj *dialect; /* parsing dialect */
98 :
99 : PyObject *fields; /* field list for current record */
100 : ParserState state; /* current CSV parse state */
101 : Py_UCS4 *field; /* temporary buffer */
102 : Py_ssize_t field_size; /* size of allocated buffer */
103 : Py_ssize_t field_len; /* length of current field */
104 : int numeric_field; /* treat field as numeric */
105 : unsigned long line_num; /* Source-file line number */
106 : } ReaderObj;
107 :
108 : static PyTypeObject Reader_Type;
109 :
110 : #define ReaderObject_Check(v) (Py_TYPE(v) == &Reader_Type)
111 :
112 : typedef struct {
113 : PyObject_HEAD
114 :
115 : PyObject *writeline; /* write output lines to this file */
116 :
117 : DialectObj *dialect; /* parsing dialect */
118 :
119 : Py_UCS4 *rec; /* buffer for parser.join */
120 : Py_ssize_t rec_size; /* size of allocated record */
121 : Py_ssize_t rec_len; /* length of record */
122 : int num_fields; /* number of fields in record */
123 : } WriterObj;
124 :
125 : static PyTypeObject Writer_Type;
126 :
127 : /*
128 : * DIALECT class
129 : */
130 :
131 : static PyObject *
132 0 : get_dialect_from_registry(PyObject * name_obj)
133 : {
134 : PyObject *dialect_obj;
135 :
136 0 : dialect_obj = PyDict_GetItem(_csvstate_global->dialects, name_obj);
137 0 : if (dialect_obj == NULL) {
138 0 : if (!PyErr_Occurred())
139 0 : PyErr_Format(_csvstate_global->error_obj, "unknown dialect");
140 : }
141 : else
142 0 : Py_INCREF(dialect_obj);
143 0 : return dialect_obj;
144 : }
145 :
146 : static PyObject *
147 0 : get_string(PyObject *str)
148 : {
149 0 : Py_XINCREF(str);
150 0 : return str;
151 : }
152 :
153 : static PyObject *
154 0 : get_nullchar_as_None(Py_UCS4 c)
155 : {
156 0 : if (c == '\0') {
157 0 : Py_INCREF(Py_None);
158 0 : return Py_None;
159 : }
160 : else
161 0 : return PyUnicode_FromOrdinal(c);
162 : }
163 :
164 : static PyObject *
165 0 : Dialect_get_lineterminator(DialectObj *self)
166 : {
167 0 : return get_string(self->lineterminator);
168 : }
169 :
170 : static PyObject *
171 0 : Dialect_get_delimiter(DialectObj *self)
172 : {
173 0 : return get_nullchar_as_None(self->delimiter);
174 : }
175 :
176 : static PyObject *
177 0 : Dialect_get_escapechar(DialectObj *self)
178 : {
179 0 : return get_nullchar_as_None(self->escapechar);
180 : }
181 :
182 : static PyObject *
183 0 : Dialect_get_quotechar(DialectObj *self)
184 : {
185 0 : return get_nullchar_as_None(self->quotechar);
186 : }
187 :
188 : static PyObject *
189 0 : Dialect_get_quoting(DialectObj *self)
190 : {
191 0 : return PyLong_FromLong(self->quoting);
192 : }
193 :
194 : static int
195 0 : _set_bool(const char *name, int *target, PyObject *src, int dflt)
196 : {
197 0 : if (src == NULL)
198 0 : *target = dflt;
199 : else {
200 0 : int b = PyObject_IsTrue(src);
201 0 : if (b < 0)
202 0 : return -1;
203 0 : *target = b;
204 : }
205 0 : return 0;
206 : }
207 :
208 : static int
209 0 : _set_int(const char *name, int *target, PyObject *src, int dflt)
210 : {
211 0 : if (src == NULL)
212 0 : *target = dflt;
213 : else {
214 : long value;
215 0 : if (!PyLong_CheckExact(src)) {
216 0 : PyErr_Format(PyExc_TypeError,
217 : "\"%s\" must be an integer", name);
218 0 : return -1;
219 : }
220 0 : value = PyLong_AsLong(src);
221 0 : if (value == -1 && PyErr_Occurred())
222 0 : return -1;
223 : #if SIZEOF_LONG > SIZEOF_INT
224 : if (value > INT_MAX || value < INT_MIN) {
225 : PyErr_Format(PyExc_ValueError,
226 : "integer out of range for \"%s\"", name);
227 : return -1;
228 : }
229 : #endif
230 0 : *target = (int)value;
231 : }
232 0 : return 0;
233 : }
234 :
235 : static int
236 0 : _set_char(const char *name, Py_UCS4 *target, PyObject *src, Py_UCS4 dflt)
237 : {
238 0 : if (src == NULL)
239 0 : *target = dflt;
240 : else {
241 0 : *target = '\0';
242 0 : if (src != Py_None) {
243 : Py_ssize_t len;
244 0 : len = PyUnicode_GetLength(src);
245 0 : if (len > 1) {
246 0 : PyErr_Format(PyExc_TypeError,
247 : "\"%s\" must be an 1-character string",
248 : name);
249 0 : return -1;
250 : }
251 0 : if (len > 0)
252 0 : *target = PyUnicode_READ_CHAR(src, 0);
253 : }
254 : }
255 0 : return 0;
256 : }
257 :
258 : static int
259 0 : _set_str(const char *name, PyObject **target, PyObject *src, const char *dflt)
260 : {
261 0 : if (src == NULL)
262 0 : *target = PyUnicode_DecodeASCII(dflt, strlen(dflt), NULL);
263 : else {
264 0 : if (src == Py_None)
265 0 : *target = NULL;
266 0 : else if (!IS_BASESTRING(src)) {
267 0 : PyErr_Format(PyExc_TypeError,
268 : "\"%s\" must be a string", name);
269 0 : return -1;
270 : }
271 : else {
272 0 : Py_XDECREF(*target);
273 0 : Py_INCREF(src);
274 0 : *target = src;
275 : }
276 : }
277 0 : return 0;
278 : }
279 :
280 : static int
281 0 : dialect_check_quoting(int quoting)
282 : {
283 : StyleDesc *qs;
284 :
285 0 : for (qs = quote_styles; qs->name; qs++) {
286 0 : if (qs->style == quoting)
287 0 : return 0;
288 : }
289 0 : PyErr_Format(PyExc_TypeError, "bad \"quoting\" value");
290 0 : return -1;
291 : }
292 :
293 : #define D_OFF(x) offsetof(DialectObj, x)
294 :
295 : static struct PyMemberDef Dialect_memberlist[] = {
296 : { "skipinitialspace", T_INT, D_OFF(skipinitialspace), READONLY },
297 : { "doublequote", T_INT, D_OFF(doublequote), READONLY },
298 : { "strict", T_INT, D_OFF(strict), READONLY },
299 : { NULL }
300 : };
301 :
302 : static PyGetSetDef Dialect_getsetlist[] = {
303 : { "delimiter", (getter)Dialect_get_delimiter},
304 : { "escapechar", (getter)Dialect_get_escapechar},
305 : { "lineterminator", (getter)Dialect_get_lineterminator},
306 : { "quotechar", (getter)Dialect_get_quotechar},
307 : { "quoting", (getter)Dialect_get_quoting},
308 : {NULL},
309 : };
310 :
311 : static void
312 0 : Dialect_dealloc(DialectObj *self)
313 : {
314 0 : Py_XDECREF(self->lineterminator);
315 0 : Py_TYPE(self)->tp_free((PyObject *)self);
316 0 : }
317 :
318 : static char *dialect_kws[] = {
319 : "dialect",
320 : "delimiter",
321 : "doublequote",
322 : "escapechar",
323 : "lineterminator",
324 : "quotechar",
325 : "quoting",
326 : "skipinitialspace",
327 : "strict",
328 : NULL
329 : };
330 :
331 : static PyObject *
332 0 : dialect_new(PyTypeObject *type, PyObject *args, PyObject *kwargs)
333 : {
334 : DialectObj *self;
335 0 : PyObject *ret = NULL;
336 0 : PyObject *dialect = NULL;
337 0 : PyObject *delimiter = NULL;
338 0 : PyObject *doublequote = NULL;
339 0 : PyObject *escapechar = NULL;
340 0 : PyObject *lineterminator = NULL;
341 0 : PyObject *quotechar = NULL;
342 0 : PyObject *quoting = NULL;
343 0 : PyObject *skipinitialspace = NULL;
344 0 : PyObject *strict = NULL;
345 :
346 0 : if (!PyArg_ParseTupleAndKeywords(args, kwargs,
347 : "|OOOOOOOOO", dialect_kws,
348 : &dialect,
349 : &delimiter,
350 : &doublequote,
351 : &escapechar,
352 : &lineterminator,
353 : "echar,
354 : "ing,
355 : &skipinitialspace,
356 : &strict))
357 0 : return NULL;
358 :
359 0 : if (dialect != NULL) {
360 0 : if (IS_BASESTRING(dialect)) {
361 0 : dialect = get_dialect_from_registry(dialect);
362 0 : if (dialect == NULL)
363 0 : return NULL;
364 : }
365 : else
366 0 : Py_INCREF(dialect);
367 : /* Can we reuse this instance? */
368 0 : if (PyObject_TypeCheck(dialect, &Dialect_Type) &&
369 0 : delimiter == 0 &&
370 0 : doublequote == 0 &&
371 0 : escapechar == 0 &&
372 0 : lineterminator == 0 &&
373 0 : quotechar == 0 &&
374 0 : quoting == 0 &&
375 0 : skipinitialspace == 0 &&
376 0 : strict == 0)
377 0 : return dialect;
378 : }
379 :
380 0 : self = (DialectObj *)type->tp_alloc(type, 0);
381 0 : if (self == NULL) {
382 0 : Py_XDECREF(dialect);
383 0 : return NULL;
384 : }
385 0 : self->lineterminator = NULL;
386 :
387 0 : Py_XINCREF(delimiter);
388 0 : Py_XINCREF(doublequote);
389 0 : Py_XINCREF(escapechar);
390 0 : Py_XINCREF(lineterminator);
391 0 : Py_XINCREF(quotechar);
392 0 : Py_XINCREF(quoting);
393 0 : Py_XINCREF(skipinitialspace);
394 0 : Py_XINCREF(strict);
395 0 : if (dialect != NULL) {
396 : #define DIALECT_GETATTR(v, n) \
397 : if (v == NULL) \
398 : v = PyObject_GetAttrString(dialect, n)
399 0 : DIALECT_GETATTR(delimiter, "delimiter");
400 0 : DIALECT_GETATTR(doublequote, "doublequote");
401 0 : DIALECT_GETATTR(escapechar, "escapechar");
402 0 : DIALECT_GETATTR(lineterminator, "lineterminator");
403 0 : DIALECT_GETATTR(quotechar, "quotechar");
404 0 : DIALECT_GETATTR(quoting, "quoting");
405 0 : DIALECT_GETATTR(skipinitialspace, "skipinitialspace");
406 0 : DIALECT_GETATTR(strict, "strict");
407 0 : PyErr_Clear();
408 : }
409 :
410 : /* check types and convert to C values */
411 : #define DIASET(meth, name, target, src, dflt) \
412 : if (meth(name, target, src, dflt)) \
413 : goto err
414 0 : DIASET(_set_char, "delimiter", &self->delimiter, delimiter, ',');
415 0 : DIASET(_set_bool, "doublequote", &self->doublequote, doublequote, 1);
416 0 : DIASET(_set_char, "escapechar", &self->escapechar, escapechar, 0);
417 0 : DIASET(_set_str, "lineterminator", &self->lineterminator, lineterminator, "\r\n");
418 0 : DIASET(_set_char, "quotechar", &self->quotechar, quotechar, '"');
419 0 : DIASET(_set_int, "quoting", &self->quoting, quoting, QUOTE_MINIMAL);
420 0 : DIASET(_set_bool, "skipinitialspace", &self->skipinitialspace, skipinitialspace, 0);
421 0 : DIASET(_set_bool, "strict", &self->strict, strict, 0);
422 :
423 : /* validate options */
424 0 : if (dialect_check_quoting(self->quoting))
425 0 : goto err;
426 0 : if (self->delimiter == 0) {
427 0 : PyErr_SetString(PyExc_TypeError, "delimiter must be set");
428 0 : goto err;
429 : }
430 0 : if (quotechar == Py_None && quoting == NULL)
431 0 : self->quoting = QUOTE_NONE;
432 0 : if (self->quoting != QUOTE_NONE && self->quotechar == 0) {
433 0 : PyErr_SetString(PyExc_TypeError,
434 : "quotechar must be set if quoting enabled");
435 0 : goto err;
436 : }
437 0 : if (self->lineterminator == 0) {
438 0 : PyErr_SetString(PyExc_TypeError, "lineterminator must be set");
439 0 : goto err;
440 : }
441 :
442 0 : ret = (PyObject *)self;
443 0 : Py_INCREF(self);
444 : err:
445 0 : Py_XDECREF(self);
446 0 : Py_XDECREF(dialect);
447 0 : Py_XDECREF(delimiter);
448 0 : Py_XDECREF(doublequote);
449 0 : Py_XDECREF(escapechar);
450 0 : Py_XDECREF(lineterminator);
451 0 : Py_XDECREF(quotechar);
452 0 : Py_XDECREF(quoting);
453 0 : Py_XDECREF(skipinitialspace);
454 0 : Py_XDECREF(strict);
455 0 : return ret;
456 : }
457 :
458 :
459 : PyDoc_STRVAR(Dialect_Type_doc,
460 : "CSV dialect\n"
461 : "\n"
462 : "The Dialect type records CSV parsing and generation options.\n");
463 :
464 : static PyTypeObject Dialect_Type = {
465 : PyVarObject_HEAD_INIT(NULL, 0)
466 : "_csv.Dialect", /* tp_name */
467 : sizeof(DialectObj), /* tp_basicsize */
468 : 0, /* tp_itemsize */
469 : /* methods */
470 : (destructor)Dialect_dealloc, /* tp_dealloc */
471 : (printfunc)0, /* tp_print */
472 : (getattrfunc)0, /* tp_getattr */
473 : (setattrfunc)0, /* tp_setattr */
474 : 0, /* tp_reserved */
475 : (reprfunc)0, /* tp_repr */
476 : 0, /* tp_as_number */
477 : 0, /* tp_as_sequence */
478 : 0, /* tp_as_mapping */
479 : (hashfunc)0, /* tp_hash */
480 : (ternaryfunc)0, /* tp_call */
481 : (reprfunc)0, /* tp_str */
482 : 0, /* tp_getattro */
483 : 0, /* tp_setattro */
484 : 0, /* tp_as_buffer */
485 : Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */
486 : Dialect_Type_doc, /* tp_doc */
487 : 0, /* tp_traverse */
488 : 0, /* tp_clear */
489 : 0, /* tp_richcompare */
490 : 0, /* tp_weaklistoffset */
491 : 0, /* tp_iter */
492 : 0, /* tp_iternext */
493 : 0, /* tp_methods */
494 : Dialect_memberlist, /* tp_members */
495 : Dialect_getsetlist, /* tp_getset */
496 : 0, /* tp_base */
497 : 0, /* tp_dict */
498 : 0, /* tp_descr_get */
499 : 0, /* tp_descr_set */
500 : 0, /* tp_dictoffset */
501 : 0, /* tp_init */
502 : 0, /* tp_alloc */
503 : dialect_new, /* tp_new */
504 : 0, /* tp_free */
505 : };
506 :
507 : /*
508 : * Return an instance of the dialect type, given a Python instance or kwarg
509 : * description of the dialect
510 : */
511 : static PyObject *
512 0 : _call_dialect(PyObject *dialect_inst, PyObject *kwargs)
513 : {
514 : PyObject *ctor_args;
515 : PyObject *dialect;
516 :
517 0 : ctor_args = Py_BuildValue(dialect_inst ? "(O)" : "()", dialect_inst);
518 0 : if (ctor_args == NULL)
519 0 : return NULL;
520 0 : dialect = PyObject_Call((PyObject *)&Dialect_Type, ctor_args, kwargs);
521 0 : Py_DECREF(ctor_args);
522 0 : return dialect;
523 : }
524 :
525 : /*
526 : * READER
527 : */
528 : static int
529 0 : parse_save_field(ReaderObj *self)
530 : {
531 : PyObject *field;
532 :
533 0 : field = PyUnicode_FromKindAndData(PyUnicode_4BYTE_KIND,
534 0 : (void *) self->field, self->field_len);
535 0 : if (field == NULL)
536 0 : return -1;
537 0 : self->field_len = 0;
538 0 : if (self->numeric_field) {
539 : PyObject *tmp;
540 :
541 0 : self->numeric_field = 0;
542 0 : tmp = PyNumber_Float(field);
543 0 : Py_DECREF(field);
544 0 : if (tmp == NULL)
545 0 : return -1;
546 0 : field = tmp;
547 : }
548 0 : PyList_Append(self->fields, field);
549 0 : Py_DECREF(field);
550 0 : return 0;
551 : }
552 :
553 : static int
554 0 : parse_grow_buff(ReaderObj *self)
555 : {
556 0 : if (self->field_size == 0) {
557 0 : self->field_size = 4096;
558 0 : if (self->field != NULL)
559 0 : PyMem_Free(self->field);
560 0 : self->field = PyMem_New(Py_UCS4, self->field_size);
561 : }
562 : else {
563 0 : Py_UCS4 *field = self->field;
564 0 : if (self->field_size > PY_SSIZE_T_MAX / 2) {
565 0 : PyErr_NoMemory();
566 0 : return 0;
567 : }
568 0 : self->field_size *= 2;
569 0 : self->field = PyMem_Resize(field, Py_UCS4, self->field_size);
570 : }
571 0 : if (self->field == NULL) {
572 0 : PyErr_NoMemory();
573 0 : return 0;
574 : }
575 0 : return 1;
576 : }
577 :
578 : static int
579 0 : parse_add_char(ReaderObj *self, Py_UCS4 c)
580 : {
581 0 : if (self->field_len >= _csvstate_global->field_limit) {
582 0 : PyErr_Format(_csvstate_global->error_obj, "field larger than field limit (%ld)",
583 0 : _csvstate_global->field_limit);
584 0 : return -1;
585 : }
586 0 : if (self->field_len == self->field_size && !parse_grow_buff(self))
587 0 : return -1;
588 0 : self->field[self->field_len++] = c;
589 0 : return 0;
590 : }
591 :
592 : static int
593 0 : parse_process_char(ReaderObj *self, Py_UCS4 c)
594 : {
595 0 : DialectObj *dialect = self->dialect;
596 :
597 0 : switch (self->state) {
598 : case START_RECORD:
599 : /* start of record */
600 0 : if (c == '\0')
601 : /* empty line - return [] */
602 0 : break;
603 0 : else if (c == '\n' || c == '\r') {
604 0 : self->state = EAT_CRNL;
605 0 : break;
606 : }
607 : /* normal character - handle as START_FIELD */
608 0 : self->state = START_FIELD;
609 : /* fallthru */
610 : case START_FIELD:
611 : /* expecting field */
612 0 : if (c == '\n' || c == '\r' || c == '\0') {
613 : /* save empty field - return [fields] */
614 0 : if (parse_save_field(self) < 0)
615 0 : return -1;
616 0 : self->state = (c == '\0' ? START_RECORD : EAT_CRNL);
617 : }
618 0 : else if (c == dialect->quotechar &&
619 0 : dialect->quoting != QUOTE_NONE) {
620 : /* start quoted field */
621 0 : self->state = IN_QUOTED_FIELD;
622 : }
623 0 : else if (c == dialect->escapechar) {
624 : /* possible escaped character */
625 0 : self->state = ESCAPED_CHAR;
626 : }
627 0 : else if (c == ' ' && dialect->skipinitialspace)
628 : /* ignore space at start of field */
629 : ;
630 0 : else if (c == dialect->delimiter) {
631 : /* save empty field */
632 0 : if (parse_save_field(self) < 0)
633 0 : return -1;
634 : }
635 : else {
636 : /* begin new unquoted field */
637 0 : if (dialect->quoting == QUOTE_NONNUMERIC)
638 0 : self->numeric_field = 1;
639 0 : if (parse_add_char(self, c) < 0)
640 0 : return -1;
641 0 : self->state = IN_FIELD;
642 : }
643 0 : break;
644 :
645 : case ESCAPED_CHAR:
646 0 : if (c == '\0')
647 0 : c = '\n';
648 0 : if (parse_add_char(self, c) < 0)
649 0 : return -1;
650 0 : self->state = IN_FIELD;
651 0 : break;
652 :
653 : case IN_FIELD:
654 : /* in unquoted field */
655 0 : if (c == '\n' || c == '\r' || c == '\0') {
656 : /* end of line - return [fields] */
657 0 : if (parse_save_field(self) < 0)
658 0 : return -1;
659 0 : self->state = (c == '\0' ? START_RECORD : EAT_CRNL);
660 : }
661 0 : else if (c == dialect->escapechar) {
662 : /* possible escaped character */
663 0 : self->state = ESCAPED_CHAR;
664 : }
665 0 : else if (c == dialect->delimiter) {
666 : /* save field - wait for new field */
667 0 : if (parse_save_field(self) < 0)
668 0 : return -1;
669 0 : self->state = START_FIELD;
670 : }
671 : else {
672 : /* normal character - save in field */
673 0 : if (parse_add_char(self, c) < 0)
674 0 : return -1;
675 : }
676 0 : break;
677 :
678 : case IN_QUOTED_FIELD:
679 : /* in quoted field */
680 0 : if (c == '\0')
681 : ;
682 0 : else if (c == dialect->escapechar) {
683 : /* Possible escape character */
684 0 : self->state = ESCAPE_IN_QUOTED_FIELD;
685 : }
686 0 : else if (c == dialect->quotechar &&
687 0 : dialect->quoting != QUOTE_NONE) {
688 0 : if (dialect->doublequote) {
689 : /* doublequote; " represented by "" */
690 0 : self->state = QUOTE_IN_QUOTED_FIELD;
691 : }
692 : else {
693 : /* end of quote part of field */
694 0 : self->state = IN_FIELD;
695 : }
696 : }
697 : else {
698 : /* normal character - save in field */
699 0 : if (parse_add_char(self, c) < 0)
700 0 : return -1;
701 : }
702 0 : break;
703 :
704 : case ESCAPE_IN_QUOTED_FIELD:
705 0 : if (c == '\0')
706 0 : c = '\n';
707 0 : if (parse_add_char(self, c) < 0)
708 0 : return -1;
709 0 : self->state = IN_QUOTED_FIELD;
710 0 : break;
711 :
712 : case QUOTE_IN_QUOTED_FIELD:
713 : /* doublequote - seen a quote in an quoted field */
714 0 : if (dialect->quoting != QUOTE_NONE &&
715 0 : c == dialect->quotechar) {
716 : /* save "" as " */
717 0 : if (parse_add_char(self, c) < 0)
718 0 : return -1;
719 0 : self->state = IN_QUOTED_FIELD;
720 : }
721 0 : else if (c == dialect->delimiter) {
722 : /* save field - wait for new field */
723 0 : if (parse_save_field(self) < 0)
724 0 : return -1;
725 0 : self->state = START_FIELD;
726 : }
727 0 : else if (c == '\n' || c == '\r' || c == '\0') {
728 : /* end of line - return [fields] */
729 0 : if (parse_save_field(self) < 0)
730 0 : return -1;
731 0 : self->state = (c == '\0' ? START_RECORD : EAT_CRNL);
732 : }
733 0 : else if (!dialect->strict) {
734 0 : if (parse_add_char(self, c) < 0)
735 0 : return -1;
736 0 : self->state = IN_FIELD;
737 : }
738 : else {
739 : /* illegal */
740 0 : PyErr_Format(_csvstate_global->error_obj, "'%c' expected after '%c'",
741 : dialect->delimiter,
742 : dialect->quotechar);
743 0 : return -1;
744 : }
745 0 : break;
746 :
747 : case EAT_CRNL:
748 0 : if (c == '\n' || c == '\r')
749 : ;
750 0 : else if (c == '\0')
751 0 : self->state = START_RECORD;
752 : else {
753 0 : PyErr_Format(_csvstate_global->error_obj, "new-line character seen in unquoted field - do you need to open the file in universal-newline mode?");
754 0 : return -1;
755 : }
756 0 : break;
757 :
758 : }
759 0 : return 0;
760 : }
761 :
762 : static int
763 0 : parse_reset(ReaderObj *self)
764 : {
765 0 : Py_XDECREF(self->fields);
766 0 : self->fields = PyList_New(0);
767 0 : if (self->fields == NULL)
768 0 : return -1;
769 0 : self->field_len = 0;
770 0 : self->state = START_RECORD;
771 0 : self->numeric_field = 0;
772 0 : return 0;
773 : }
774 :
775 : static PyObject *
776 0 : Reader_iternext(ReaderObj *self)
777 : {
778 0 : PyObject *fields = NULL;
779 : Py_UCS4 c;
780 : Py_ssize_t pos, linelen;
781 : unsigned int kind;
782 : void *data;
783 : PyObject *lineobj;
784 :
785 0 : if (parse_reset(self) < 0)
786 0 : return NULL;
787 : do {
788 0 : lineobj = PyIter_Next(self->input_iter);
789 0 : if (lineobj == NULL) {
790 : /* End of input OR exception */
791 0 : if (!PyErr_Occurred() && self->field_len != 0)
792 0 : PyErr_Format(_csvstate_global->error_obj,
793 : "newline inside string");
794 0 : return NULL;
795 : }
796 0 : if (!PyUnicode_Check(lineobj)) {
797 0 : PyErr_Format(_csvstate_global->error_obj,
798 : "iterator should return strings, "
799 : "not %.200s "
800 : "(did you open the file in text mode?)",
801 0 : lineobj->ob_type->tp_name
802 : );
803 0 : Py_DECREF(lineobj);
804 0 : return NULL;
805 : }
806 0 : ++self->line_num;
807 0 : kind = PyUnicode_KIND(lineobj);
808 0 : data = PyUnicode_DATA(lineobj);
809 0 : pos = 0;
810 0 : linelen = PyUnicode_GET_LENGTH(lineobj);
811 0 : while (linelen--) {
812 0 : c = PyUnicode_READ(kind, data, pos);
813 0 : if (c == '\0') {
814 0 : Py_DECREF(lineobj);
815 0 : PyErr_Format(_csvstate_global->error_obj,
816 : "line contains NULL byte");
817 0 : goto err;
818 : }
819 0 : if (parse_process_char(self, c) < 0) {
820 0 : Py_DECREF(lineobj);
821 0 : goto err;
822 : }
823 0 : pos++;
824 : }
825 0 : Py_DECREF(lineobj);
826 0 : if (parse_process_char(self, 0) < 0)
827 0 : goto err;
828 0 : } while (self->state != START_RECORD);
829 :
830 0 : fields = self->fields;
831 0 : self->fields = NULL;
832 : err:
833 0 : return fields;
834 : }
835 :
836 : static void
837 0 : Reader_dealloc(ReaderObj *self)
838 : {
839 0 : PyObject_GC_UnTrack(self);
840 0 : Py_XDECREF(self->dialect);
841 0 : Py_XDECREF(self->input_iter);
842 0 : Py_XDECREF(self->fields);
843 0 : if (self->field != NULL)
844 0 : PyMem_Free(self->field);
845 0 : PyObject_GC_Del(self);
846 0 : }
847 :
848 : static int
849 0 : Reader_traverse(ReaderObj *self, visitproc visit, void *arg)
850 : {
851 0 : Py_VISIT(self->dialect);
852 0 : Py_VISIT(self->input_iter);
853 0 : Py_VISIT(self->fields);
854 0 : return 0;
855 : }
856 :
857 : static int
858 0 : Reader_clear(ReaderObj *self)
859 : {
860 0 : Py_CLEAR(self->dialect);
861 0 : Py_CLEAR(self->input_iter);
862 0 : Py_CLEAR(self->fields);
863 0 : return 0;
864 : }
865 :
866 : PyDoc_STRVAR(Reader_Type_doc,
867 : "CSV reader\n"
868 : "\n"
869 : "Reader objects are responsible for reading and parsing tabular data\n"
870 : "in CSV format.\n"
871 : );
872 :
873 : static struct PyMethodDef Reader_methods[] = {
874 : { NULL, NULL }
875 : };
876 : #define R_OFF(x) offsetof(ReaderObj, x)
877 :
878 : static struct PyMemberDef Reader_memberlist[] = {
879 : { "dialect", T_OBJECT, R_OFF(dialect), READONLY },
880 : { "line_num", T_ULONG, R_OFF(line_num), READONLY },
881 : { NULL }
882 : };
883 :
884 :
885 : static PyTypeObject Reader_Type = {
886 : PyVarObject_HEAD_INIT(NULL, 0)
887 : "_csv.reader", /*tp_name*/
888 : sizeof(ReaderObj), /*tp_basicsize*/
889 : 0, /*tp_itemsize*/
890 : /* methods */
891 : (destructor)Reader_dealloc, /*tp_dealloc*/
892 : (printfunc)0, /*tp_print*/
893 : (getattrfunc)0, /*tp_getattr*/
894 : (setattrfunc)0, /*tp_setattr*/
895 : 0, /*tp_reserved*/
896 : (reprfunc)0, /*tp_repr*/
897 : 0, /*tp_as_number*/
898 : 0, /*tp_as_sequence*/
899 : 0, /*tp_as_mapping*/
900 : (hashfunc)0, /*tp_hash*/
901 : (ternaryfunc)0, /*tp_call*/
902 : (reprfunc)0, /*tp_str*/
903 : 0, /*tp_getattro*/
904 : 0, /*tp_setattro*/
905 : 0, /*tp_as_buffer*/
906 : Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE |
907 : Py_TPFLAGS_HAVE_GC, /*tp_flags*/
908 : Reader_Type_doc, /*tp_doc*/
909 : (traverseproc)Reader_traverse, /*tp_traverse*/
910 : (inquiry)Reader_clear, /*tp_clear*/
911 : 0, /*tp_richcompare*/
912 : 0, /*tp_weaklistoffset*/
913 : PyObject_SelfIter, /*tp_iter*/
914 : (getiterfunc)Reader_iternext, /*tp_iternext*/
915 : Reader_methods, /*tp_methods*/
916 : Reader_memberlist, /*tp_members*/
917 : 0, /*tp_getset*/
918 :
919 : };
920 :
921 : static PyObject *
922 0 : csv_reader(PyObject *module, PyObject *args, PyObject *keyword_args)
923 : {
924 0 : PyObject * iterator, * dialect = NULL;
925 0 : ReaderObj * self = PyObject_GC_New(ReaderObj, &Reader_Type);
926 :
927 0 : if (!self)
928 0 : return NULL;
929 :
930 0 : self->dialect = NULL;
931 0 : self->fields = NULL;
932 0 : self->input_iter = NULL;
933 0 : self->field = NULL;
934 0 : self->field_size = 0;
935 0 : self->line_num = 0;
936 :
937 0 : if (parse_reset(self) < 0) {
938 0 : Py_DECREF(self);
939 0 : return NULL;
940 : }
941 :
942 0 : if (!PyArg_UnpackTuple(args, "", 1, 2, &iterator, &dialect)) {
943 0 : Py_DECREF(self);
944 0 : return NULL;
945 : }
946 0 : self->input_iter = PyObject_GetIter(iterator);
947 0 : if (self->input_iter == NULL) {
948 0 : PyErr_SetString(PyExc_TypeError,
949 : "argument 1 must be an iterator");
950 0 : Py_DECREF(self);
951 0 : return NULL;
952 : }
953 0 : self->dialect = (DialectObj *)_call_dialect(dialect, keyword_args);
954 0 : if (self->dialect == NULL) {
955 0 : Py_DECREF(self);
956 0 : return NULL;
957 : }
958 :
959 0 : PyObject_GC_Track(self);
960 0 : return (PyObject *)self;
961 : }
962 :
963 : /*
964 : * WRITER
965 : */
966 : /* ---------------------------------------------------------------- */
967 : static void
968 0 : join_reset(WriterObj *self)
969 : {
970 0 : self->rec_len = 0;
971 0 : self->num_fields = 0;
972 0 : }
973 :
974 : #define MEM_INCR 32768
975 :
976 : /* Calculate new record length or append field to record. Return new
977 : * record length.
978 : */
979 : static Py_ssize_t
980 0 : join_append_data(WriterObj *self, unsigned int field_kind, void *field_data,
981 : Py_ssize_t field_len, int quote_empty, int *quoted,
982 : int copy_phase)
983 : {
984 0 : DialectObj *dialect = self->dialect;
985 : int i;
986 : Py_ssize_t rec_len;
987 :
988 : #define ADDCH(c) \
989 : do {\
990 : if (copy_phase) \
991 : self->rec[rec_len] = c;\
992 : rec_len++;\
993 : } while(0)
994 :
995 0 : rec_len = self->rec_len;
996 :
997 : /* If this is not the first field we need a field separator */
998 0 : if (self->num_fields > 0)
999 0 : ADDCH(dialect->delimiter);
1000 :
1001 : /* Handle preceding quote */
1002 0 : if (copy_phase && *quoted)
1003 0 : ADDCH(dialect->quotechar);
1004 :
1005 : /* Copy/count field data */
1006 : /* If field is null just pass over */
1007 0 : for (i = 0; field_data && (i < field_len); i++) {
1008 0 : Py_UCS4 c = PyUnicode_READ(field_kind, field_data, i);
1009 0 : int want_escape = 0;
1010 :
1011 0 : if (c == dialect->delimiter ||
1012 0 : c == dialect->escapechar ||
1013 0 : c == dialect->quotechar ||
1014 0 : PyUnicode_FindChar(
1015 : dialect->lineterminator, c, 0,
1016 0 : PyUnicode_GET_LENGTH(dialect->lineterminator), 1) >= 0) {
1017 0 : if (dialect->quoting == QUOTE_NONE)
1018 0 : want_escape = 1;
1019 : else {
1020 0 : if (c == dialect->quotechar) {
1021 0 : if (dialect->doublequote)
1022 0 : ADDCH(dialect->quotechar);
1023 : else
1024 0 : want_escape = 1;
1025 : }
1026 0 : if (!want_escape)
1027 0 : *quoted = 1;
1028 : }
1029 0 : if (want_escape) {
1030 0 : if (!dialect->escapechar) {
1031 0 : PyErr_Format(_csvstate_global->error_obj,
1032 : "need to escape, but no escapechar set");
1033 0 : return -1;
1034 : }
1035 0 : ADDCH(dialect->escapechar);
1036 : }
1037 : }
1038 : /* Copy field character into record buffer.
1039 : */
1040 0 : ADDCH(c);
1041 : }
1042 :
1043 : /* If field is empty check if it needs to be quoted.
1044 : */
1045 0 : if (i == 0 && quote_empty) {
1046 0 : if (dialect->quoting == QUOTE_NONE) {
1047 0 : PyErr_Format(_csvstate_global->error_obj,
1048 : "single empty field record must be quoted");
1049 0 : return -1;
1050 : }
1051 : else
1052 0 : *quoted = 1;
1053 : }
1054 :
1055 0 : if (*quoted) {
1056 0 : if (copy_phase)
1057 0 : ADDCH(dialect->quotechar);
1058 : else
1059 0 : rec_len += 2;
1060 : }
1061 0 : return rec_len;
1062 : #undef ADDCH
1063 : }
1064 :
1065 : static int
1066 0 : join_check_rec_size(WriterObj *self, Py_ssize_t rec_len)
1067 : {
1068 :
1069 0 : if (rec_len < 0 || rec_len > PY_SSIZE_T_MAX - MEM_INCR) {
1070 0 : PyErr_NoMemory();
1071 0 : return 0;
1072 : }
1073 :
1074 0 : if (rec_len > self->rec_size) {
1075 0 : if (self->rec_size == 0) {
1076 0 : self->rec_size = (rec_len / MEM_INCR + 1) * MEM_INCR;
1077 0 : if (self->rec != NULL)
1078 0 : PyMem_Free(self->rec);
1079 0 : self->rec = PyMem_New(Py_UCS4, self->rec_size);
1080 : }
1081 : else {
1082 0 : Py_UCS4* old_rec = self->rec;
1083 :
1084 0 : self->rec_size = (rec_len / MEM_INCR + 1) * MEM_INCR;
1085 0 : self->rec = PyMem_Resize(old_rec, Py_UCS4, self->rec_size);
1086 0 : if (self->rec == NULL)
1087 0 : PyMem_Free(old_rec);
1088 : }
1089 0 : if (self->rec == NULL) {
1090 0 : PyErr_NoMemory();
1091 0 : return 0;
1092 : }
1093 : }
1094 0 : return 1;
1095 : }
1096 :
1097 : static int
1098 0 : join_append(WriterObj *self, PyObject *field, int *quoted, int quote_empty)
1099 : {
1100 0 : unsigned int field_kind = -1;
1101 0 : void *field_data = NULL;
1102 0 : Py_ssize_t field_len = 0;
1103 : Py_ssize_t rec_len;
1104 :
1105 0 : if (field != NULL) {
1106 0 : field_kind = PyUnicode_KIND(field);
1107 0 : field_data = PyUnicode_DATA(field);
1108 0 : field_len = PyUnicode_GET_LENGTH(field);
1109 : }
1110 0 : rec_len = join_append_data(self, field_kind, field_data, field_len,
1111 : quote_empty, quoted, 0);
1112 0 : if (rec_len < 0)
1113 0 : return 0;
1114 :
1115 : /* grow record buffer if necessary */
1116 0 : if (!join_check_rec_size(self, rec_len))
1117 0 : return 0;
1118 :
1119 0 : self->rec_len = join_append_data(self, field_kind, field_data, field_len,
1120 : quote_empty, quoted, 1);
1121 0 : self->num_fields++;
1122 :
1123 0 : return 1;
1124 : }
1125 :
1126 : static int
1127 0 : join_append_lineterminator(WriterObj *self)
1128 : {
1129 : Py_ssize_t terminator_len, i;
1130 : unsigned int term_kind;
1131 : void *term_data;
1132 :
1133 0 : terminator_len = PyUnicode_GET_LENGTH(self->dialect->lineterminator);
1134 0 : if (terminator_len == -1)
1135 0 : return 0;
1136 :
1137 : /* grow record buffer if necessary */
1138 0 : if (!join_check_rec_size(self, self->rec_len + terminator_len))
1139 0 : return 0;
1140 :
1141 0 : term_kind = PyUnicode_KIND(self->dialect->lineterminator);
1142 0 : term_data = PyUnicode_DATA(self->dialect->lineterminator);
1143 0 : for (i = 0; i < terminator_len; i++)
1144 0 : self->rec[self->rec_len + i] = PyUnicode_READ(term_kind, term_data, i);
1145 0 : self->rec_len += terminator_len;
1146 :
1147 0 : return 1;
1148 : }
1149 :
1150 : PyDoc_STRVAR(csv_writerow_doc,
1151 : "writerow(sequence)\n"
1152 : "\n"
1153 : "Construct and write a CSV record from a sequence of fields. Non-string\n"
1154 : "elements will be converted to string.");
1155 :
1156 : static PyObject *
1157 0 : csv_writerow(WriterObj *self, PyObject *seq)
1158 : {
1159 0 : DialectObj *dialect = self->dialect;
1160 : Py_ssize_t len, i;
1161 : PyObject *line, *result;
1162 :
1163 0 : if (!PySequence_Check(seq))
1164 0 : return PyErr_Format(_csvstate_global->error_obj, "sequence expected");
1165 :
1166 0 : len = PySequence_Length(seq);
1167 0 : if (len < 0)
1168 0 : return NULL;
1169 :
1170 : /* Join all fields in internal buffer.
1171 : */
1172 0 : join_reset(self);
1173 0 : for (i = 0; i < len; i++) {
1174 : PyObject *field;
1175 : int append_ok;
1176 : int quoted;
1177 :
1178 0 : field = PySequence_GetItem(seq, i);
1179 0 : if (field == NULL)
1180 0 : return NULL;
1181 :
1182 0 : switch (dialect->quoting) {
1183 : case QUOTE_NONNUMERIC:
1184 0 : quoted = !PyNumber_Check(field);
1185 0 : break;
1186 : case QUOTE_ALL:
1187 0 : quoted = 1;
1188 0 : break;
1189 : default:
1190 0 : quoted = 0;
1191 0 : break;
1192 : }
1193 :
1194 0 : if (PyUnicode_Check(field)) {
1195 0 : append_ok = join_append(self, field, "ed, len == 1);
1196 0 : Py_DECREF(field);
1197 : }
1198 0 : else if (field == Py_None) {
1199 0 : append_ok = join_append(self, NULL, "ed, len == 1);
1200 0 : Py_DECREF(field);
1201 : }
1202 : else {
1203 : PyObject *str;
1204 :
1205 0 : str = PyObject_Str(field);
1206 0 : Py_DECREF(field);
1207 0 : if (str == NULL)
1208 0 : return NULL;
1209 0 : append_ok = join_append(self, str, "ed, len == 1);
1210 0 : Py_DECREF(str);
1211 : }
1212 0 : if (!append_ok)
1213 0 : return NULL;
1214 : }
1215 :
1216 : /* Add line terminator.
1217 : */
1218 0 : if (!join_append_lineterminator(self))
1219 0 : return 0;
1220 :
1221 0 : line = PyUnicode_FromKindAndData(PyUnicode_4BYTE_KIND,
1222 0 : (void *) self->rec, self->rec_len);
1223 0 : if (line == NULL)
1224 0 : return NULL;
1225 0 : result = PyObject_CallFunctionObjArgs(self->writeline, line, NULL);
1226 0 : Py_DECREF(line);
1227 0 : return result;
1228 : }
1229 :
1230 : PyDoc_STRVAR(csv_writerows_doc,
1231 : "writerows(sequence of sequences)\n"
1232 : "\n"
1233 : "Construct and write a series of sequences to a csv file. Non-string\n"
1234 : "elements will be converted to string.");
1235 :
1236 : static PyObject *
1237 0 : csv_writerows(WriterObj *self, PyObject *seqseq)
1238 : {
1239 : PyObject *row_iter, *row_obj, *result;
1240 :
1241 0 : row_iter = PyObject_GetIter(seqseq);
1242 0 : if (row_iter == NULL) {
1243 0 : PyErr_SetString(PyExc_TypeError,
1244 : "writerows() argument must be iterable");
1245 0 : return NULL;
1246 : }
1247 0 : while ((row_obj = PyIter_Next(row_iter))) {
1248 0 : result = csv_writerow(self, row_obj);
1249 0 : Py_DECREF(row_obj);
1250 0 : if (!result) {
1251 0 : Py_DECREF(row_iter);
1252 0 : return NULL;
1253 : }
1254 : else
1255 0 : Py_DECREF(result);
1256 : }
1257 0 : Py_DECREF(row_iter);
1258 0 : if (PyErr_Occurred())
1259 0 : return NULL;
1260 0 : Py_INCREF(Py_None);
1261 0 : return Py_None;
1262 : }
1263 :
1264 : static struct PyMethodDef Writer_methods[] = {
1265 : { "writerow", (PyCFunction)csv_writerow, METH_O, csv_writerow_doc},
1266 : { "writerows", (PyCFunction)csv_writerows, METH_O, csv_writerows_doc},
1267 : { NULL, NULL }
1268 : };
1269 :
1270 : #define W_OFF(x) offsetof(WriterObj, x)
1271 :
1272 : static struct PyMemberDef Writer_memberlist[] = {
1273 : { "dialect", T_OBJECT, W_OFF(dialect), READONLY },
1274 : { NULL }
1275 : };
1276 :
1277 : static void
1278 0 : Writer_dealloc(WriterObj *self)
1279 : {
1280 0 : PyObject_GC_UnTrack(self);
1281 0 : Py_XDECREF(self->dialect);
1282 0 : Py_XDECREF(self->writeline);
1283 0 : if (self->rec != NULL)
1284 0 : PyMem_Free(self->rec);
1285 0 : PyObject_GC_Del(self);
1286 0 : }
1287 :
1288 : static int
1289 0 : Writer_traverse(WriterObj *self, visitproc visit, void *arg)
1290 : {
1291 0 : Py_VISIT(self->dialect);
1292 0 : Py_VISIT(self->writeline);
1293 0 : return 0;
1294 : }
1295 :
1296 : static int
1297 0 : Writer_clear(WriterObj *self)
1298 : {
1299 0 : Py_CLEAR(self->dialect);
1300 0 : Py_CLEAR(self->writeline);
1301 0 : return 0;
1302 : }
1303 :
1304 : PyDoc_STRVAR(Writer_Type_doc,
1305 : "CSV writer\n"
1306 : "\n"
1307 : "Writer objects are responsible for generating tabular data\n"
1308 : "in CSV format from sequence input.\n"
1309 : );
1310 :
1311 : static PyTypeObject Writer_Type = {
1312 : PyVarObject_HEAD_INIT(NULL, 0)
1313 : "_csv.writer", /*tp_name*/
1314 : sizeof(WriterObj), /*tp_basicsize*/
1315 : 0, /*tp_itemsize*/
1316 : /* methods */
1317 : (destructor)Writer_dealloc, /*tp_dealloc*/
1318 : (printfunc)0, /*tp_print*/
1319 : (getattrfunc)0, /*tp_getattr*/
1320 : (setattrfunc)0, /*tp_setattr*/
1321 : 0, /*tp_reserved*/
1322 : (reprfunc)0, /*tp_repr*/
1323 : 0, /*tp_as_number*/
1324 : 0, /*tp_as_sequence*/
1325 : 0, /*tp_as_mapping*/
1326 : (hashfunc)0, /*tp_hash*/
1327 : (ternaryfunc)0, /*tp_call*/
1328 : (reprfunc)0, /*tp_str*/
1329 : 0, /*tp_getattro*/
1330 : 0, /*tp_setattro*/
1331 : 0, /*tp_as_buffer*/
1332 : Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE |
1333 : Py_TPFLAGS_HAVE_GC, /*tp_flags*/
1334 : Writer_Type_doc,
1335 : (traverseproc)Writer_traverse, /*tp_traverse*/
1336 : (inquiry)Writer_clear, /*tp_clear*/
1337 : 0, /*tp_richcompare*/
1338 : 0, /*tp_weaklistoffset*/
1339 : (getiterfunc)0, /*tp_iter*/
1340 : (getiterfunc)0, /*tp_iternext*/
1341 : Writer_methods, /*tp_methods*/
1342 : Writer_memberlist, /*tp_members*/
1343 : 0, /*tp_getset*/
1344 : };
1345 :
1346 : static PyObject *
1347 0 : csv_writer(PyObject *module, PyObject *args, PyObject *keyword_args)
1348 : {
1349 0 : PyObject * output_file, * dialect = NULL;
1350 0 : WriterObj * self = PyObject_GC_New(WriterObj, &Writer_Type);
1351 : _Py_IDENTIFIER(write);
1352 :
1353 0 : if (!self)
1354 0 : return NULL;
1355 :
1356 0 : self->dialect = NULL;
1357 0 : self->writeline = NULL;
1358 :
1359 0 : self->rec = NULL;
1360 0 : self->rec_size = 0;
1361 0 : self->rec_len = 0;
1362 0 : self->num_fields = 0;
1363 :
1364 0 : if (!PyArg_UnpackTuple(args, "", 1, 2, &output_file, &dialect)) {
1365 0 : Py_DECREF(self);
1366 0 : return NULL;
1367 : }
1368 0 : self->writeline = _PyObject_GetAttrId(output_file, &PyId_write);
1369 0 : if (self->writeline == NULL || !PyCallable_Check(self->writeline)) {
1370 0 : PyErr_SetString(PyExc_TypeError,
1371 : "argument 1 must have a \"write\" method");
1372 0 : Py_DECREF(self);
1373 0 : return NULL;
1374 : }
1375 0 : self->dialect = (DialectObj *)_call_dialect(dialect, keyword_args);
1376 0 : if (self->dialect == NULL) {
1377 0 : Py_DECREF(self);
1378 0 : return NULL;
1379 : }
1380 0 : PyObject_GC_Track(self);
1381 0 : return (PyObject *)self;
1382 : }
1383 :
1384 : /*
1385 : * DIALECT REGISTRY
1386 : */
1387 : static PyObject *
1388 0 : csv_list_dialects(PyObject *module, PyObject *args)
1389 : {
1390 0 : return PyDict_Keys(_csvstate_global->dialects);
1391 : }
1392 :
1393 : static PyObject *
1394 0 : csv_register_dialect(PyObject *module, PyObject *args, PyObject *kwargs)
1395 : {
1396 0 : PyObject *name_obj, *dialect_obj = NULL;
1397 : PyObject *dialect;
1398 :
1399 0 : if (!PyArg_UnpackTuple(args, "", 1, 2, &name_obj, &dialect_obj))
1400 0 : return NULL;
1401 0 : if (!IS_BASESTRING(name_obj)) {
1402 0 : PyErr_SetString(PyExc_TypeError,
1403 : "dialect name must be a string or unicode");
1404 0 : return NULL;
1405 : }
1406 0 : dialect = _call_dialect(dialect_obj, kwargs);
1407 0 : if (dialect == NULL)
1408 0 : return NULL;
1409 0 : if (PyDict_SetItem(_csvstate_global->dialects, name_obj, dialect) < 0) {
1410 0 : Py_DECREF(dialect);
1411 0 : return NULL;
1412 : }
1413 0 : Py_DECREF(dialect);
1414 0 : Py_INCREF(Py_None);
1415 0 : return Py_None;
1416 : }
1417 :
1418 : static PyObject *
1419 0 : csv_unregister_dialect(PyObject *module, PyObject *name_obj)
1420 : {
1421 0 : if (PyDict_DelItem(_csvstate_global->dialects, name_obj) < 0)
1422 0 : return PyErr_Format(_csvstate_global->error_obj, "unknown dialect");
1423 0 : Py_INCREF(Py_None);
1424 0 : return Py_None;
1425 : }
1426 :
1427 : static PyObject *
1428 0 : csv_get_dialect(PyObject *module, PyObject *name_obj)
1429 : {
1430 0 : return get_dialect_from_registry(name_obj);
1431 : }
1432 :
1433 : static PyObject *
1434 0 : csv_field_size_limit(PyObject *module, PyObject *args)
1435 : {
1436 0 : PyObject *new_limit = NULL;
1437 0 : long old_limit = _csvstate_global->field_limit;
1438 :
1439 0 : if (!PyArg_UnpackTuple(args, "field_size_limit", 0, 1, &new_limit))
1440 0 : return NULL;
1441 0 : if (new_limit != NULL) {
1442 0 : if (!PyLong_CheckExact(new_limit)) {
1443 0 : PyErr_Format(PyExc_TypeError,
1444 : "limit must be an integer");
1445 0 : return NULL;
1446 : }
1447 0 : _csvstate_global->field_limit = PyLong_AsLong(new_limit);
1448 0 : if (_csvstate_global->field_limit == -1 && PyErr_Occurred()) {
1449 0 : _csvstate_global->field_limit = old_limit;
1450 0 : return NULL;
1451 : }
1452 : }
1453 0 : return PyLong_FromLong(old_limit);
1454 : }
1455 :
1456 : /*
1457 : * MODULE
1458 : */
1459 :
1460 : PyDoc_STRVAR(csv_module_doc,
1461 : "CSV parsing and writing.\n"
1462 : "\n"
1463 : "This module provides classes that assist in the reading and writing\n"
1464 : "of Comma Separated Value (CSV) files, and implements the interface\n"
1465 : "described by PEP 305. Although many CSV files are simple to parse,\n"
1466 : "the format is not formally defined by a stable specification and\n"
1467 : "is subtle enough that parsing lines of a CSV file with something\n"
1468 : "like line.split(\",\") is bound to fail. The module supports three\n"
1469 : "basic APIs: reading, writing, and registration of dialects.\n"
1470 : "\n"
1471 : "\n"
1472 : "DIALECT REGISTRATION:\n"
1473 : "\n"
1474 : "Readers and writers support a dialect argument, which is a convenient\n"
1475 : "handle on a group of settings. When the dialect argument is a string,\n"
1476 : "it identifies one of the dialects previously registered with the module.\n"
1477 : "If it is a class or instance, the attributes of the argument are used as\n"
1478 : "the settings for the reader or writer:\n"
1479 : "\n"
1480 : " class excel:\n"
1481 : " delimiter = ','\n"
1482 : " quotechar = '\"'\n"
1483 : " escapechar = None\n"
1484 : " doublequote = True\n"
1485 : " skipinitialspace = False\n"
1486 : " lineterminator = '\\r\\n'\n"
1487 : " quoting = QUOTE_MINIMAL\n"
1488 : "\n"
1489 : "SETTINGS:\n"
1490 : "\n"
1491 : " * quotechar - specifies a one-character string to use as the \n"
1492 : " quoting character. It defaults to '\"'.\n"
1493 : " * delimiter - specifies a one-character string to use as the \n"
1494 : " field separator. It defaults to ','.\n"
1495 : " * skipinitialspace - specifies how to interpret whitespace which\n"
1496 : " immediately follows a delimiter. It defaults to False, which\n"
1497 : " means that whitespace immediately following a delimiter is part\n"
1498 : " of the following field.\n"
1499 : " * lineterminator - specifies the character sequence which should \n"
1500 : " terminate rows.\n"
1501 : " * quoting - controls when quotes should be generated by the writer.\n"
1502 : " It can take on any of the following module constants:\n"
1503 : "\n"
1504 : " csv.QUOTE_MINIMAL means only when required, for example, when a\n"
1505 : " field contains either the quotechar or the delimiter\n"
1506 : " csv.QUOTE_ALL means that quotes are always placed around fields.\n"
1507 : " csv.QUOTE_NONNUMERIC means that quotes are always placed around\n"
1508 : " fields which do not parse as integers or floating point\n"
1509 : " numbers.\n"
1510 : " csv.QUOTE_NONE means that quotes are never placed around fields.\n"
1511 : " * escapechar - specifies a one-character string used to escape \n"
1512 : " the delimiter when quoting is set to QUOTE_NONE.\n"
1513 : " * doublequote - controls the handling of quotes inside fields. When\n"
1514 : " True, two consecutive quotes are interpreted as one during read,\n"
1515 : " and when writing, each quote character embedded in the data is\n"
1516 : " written as two quotes\n");
1517 :
1518 : PyDoc_STRVAR(csv_reader_doc,
1519 : " csv_reader = reader(iterable [, dialect='excel']\n"
1520 : " [optional keyword args])\n"
1521 : " for row in csv_reader:\n"
1522 : " process(row)\n"
1523 : "\n"
1524 : "The \"iterable\" argument can be any object that returns a line\n"
1525 : "of input for each iteration, such as a file object or a list. The\n"
1526 : "optional \"dialect\" parameter is discussed below. The function\n"
1527 : "also accepts optional keyword arguments which override settings\n"
1528 : "provided by the dialect.\n"
1529 : "\n"
1530 : "The returned object is an iterator. Each iteration returns a row\n"
1531 : "of the CSV file (which can span multiple input lines):\n");
1532 :
1533 : PyDoc_STRVAR(csv_writer_doc,
1534 : " csv_writer = csv.writer(fileobj [, dialect='excel']\n"
1535 : " [optional keyword args])\n"
1536 : " for row in sequence:\n"
1537 : " csv_writer.writerow(row)\n"
1538 : "\n"
1539 : " [or]\n"
1540 : "\n"
1541 : " csv_writer = csv.writer(fileobj [, dialect='excel']\n"
1542 : " [optional keyword args])\n"
1543 : " csv_writer.writerows(rows)\n"
1544 : "\n"
1545 : "The \"fileobj\" argument can be any object that supports the file API.\n");
1546 :
1547 : PyDoc_STRVAR(csv_list_dialects_doc,
1548 : "Return a list of all know dialect names.\n"
1549 : " names = csv.list_dialects()");
1550 :
1551 : PyDoc_STRVAR(csv_get_dialect_doc,
1552 : "Return the dialect instance associated with name.\n"
1553 : " dialect = csv.get_dialect(name)");
1554 :
1555 : PyDoc_STRVAR(csv_register_dialect_doc,
1556 : "Create a mapping from a string name to a dialect class.\n"
1557 : " dialect = csv.register_dialect(name, dialect)");
1558 :
1559 : PyDoc_STRVAR(csv_unregister_dialect_doc,
1560 : "Delete the name/dialect mapping associated with a string name.\n"
1561 : " csv.unregister_dialect(name)");
1562 :
1563 : PyDoc_STRVAR(csv_field_size_limit_doc,
1564 : "Sets an upper limit on parsed fields.\n"
1565 : " csv.field_size_limit([limit])\n"
1566 : "\n"
1567 : "Returns old limit. If limit is not given, no new limit is set and\n"
1568 : "the old limit is returned");
1569 :
1570 : static struct PyMethodDef csv_methods[] = {
1571 : { "reader", (PyCFunction)csv_reader,
1572 : METH_VARARGS | METH_KEYWORDS, csv_reader_doc},
1573 : { "writer", (PyCFunction)csv_writer,
1574 : METH_VARARGS | METH_KEYWORDS, csv_writer_doc},
1575 : { "list_dialects", (PyCFunction)csv_list_dialects,
1576 : METH_NOARGS, csv_list_dialects_doc},
1577 : { "register_dialect", (PyCFunction)csv_register_dialect,
1578 : METH_VARARGS | METH_KEYWORDS, csv_register_dialect_doc},
1579 : { "unregister_dialect", (PyCFunction)csv_unregister_dialect,
1580 : METH_O, csv_unregister_dialect_doc},
1581 : { "get_dialect", (PyCFunction)csv_get_dialect,
1582 : METH_O, csv_get_dialect_doc},
1583 : { "field_size_limit", (PyCFunction)csv_field_size_limit,
1584 : METH_VARARGS, csv_field_size_limit_doc},
1585 : { NULL, NULL }
1586 : };
1587 :
1588 : static struct PyModuleDef _csvmodule = {
1589 : PyModuleDef_HEAD_INIT,
1590 : "_csv",
1591 : csv_module_doc,
1592 : sizeof(_csvstate),
1593 : csv_methods,
1594 : NULL,
1595 : _csv_traverse,
1596 : _csv_clear,
1597 : _csv_free
1598 : };
1599 :
1600 : PyMODINIT_FUNC
1601 0 : PyInit__csv(void)
1602 : {
1603 : PyObject *module;
1604 : StyleDesc *style;
1605 :
1606 0 : if (PyType_Ready(&Dialect_Type) < 0)
1607 0 : return NULL;
1608 :
1609 0 : if (PyType_Ready(&Reader_Type) < 0)
1610 0 : return NULL;
1611 :
1612 0 : if (PyType_Ready(&Writer_Type) < 0)
1613 0 : return NULL;
1614 :
1615 : /* Create the module and add the functions */
1616 0 : module = PyModule_Create(&_csvmodule);
1617 0 : if (module == NULL)
1618 0 : return NULL;
1619 :
1620 : /* Add version to the module. */
1621 0 : if (PyModule_AddStringConstant(module, "__version__",
1622 : MODULE_VERSION) == -1)
1623 0 : return NULL;
1624 :
1625 : /* Set the field limit */
1626 0 : _csvstate(module)->field_limit = 128 * 1024;
1627 : /* Do I still need to add this var to the Module Dict? */
1628 :
1629 : /* Add _dialects dictionary */
1630 0 : _csvstate(module)->dialects = PyDict_New();
1631 0 : if (_csvstate(module)->dialects == NULL)
1632 0 : return NULL;
1633 0 : Py_INCREF(_csvstate(module)->dialects);
1634 0 : if (PyModule_AddObject(module, "_dialects", _csvstate(module)->dialects))
1635 0 : return NULL;
1636 :
1637 : /* Add quote styles into dictionary */
1638 0 : for (style = quote_styles; style->name; style++) {
1639 0 : if (PyModule_AddIntConstant(module, style->name,
1640 0 : style->style) == -1)
1641 0 : return NULL;
1642 : }
1643 :
1644 : /* Add the Dialect type */
1645 0 : Py_INCREF(&Dialect_Type);
1646 0 : if (PyModule_AddObject(module, "Dialect", (PyObject *)&Dialect_Type))
1647 0 : return NULL;
1648 :
1649 : /* Add the CSV exception object to the module. */
1650 0 : _csvstate(module)->error_obj = PyErr_NewException("_csv.Error", NULL, NULL);
1651 0 : if (_csvstate(module)->error_obj == NULL)
1652 0 : return NULL;
1653 0 : Py_INCREF(_csvstate(module)->error_obj);
1654 0 : PyModule_AddObject(module, "Error", _csvstate(module)->error_obj);
1655 0 : return module;
1656 : }
|