LCOV - code coverage report
Current view: top level - libreoffice/workdir/unxlngi6.pro/UnpackedTarball/python3/Modules - _csv.c (source / functions) Hit Total Coverage
Test: libreoffice_filtered.info Lines: 0 667 0.0 %
Date: 2012-12-17 Functions: 0 46 0.0 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /* csv module */
       2             : 
       3             : /*
       4             : 
       5             : This module provides the low-level underpinnings of a CSV reading/writing
       6             : module.  Users should not use this module directly, but import the csv.py
       7             : module instead.
       8             : 
       9             : */
      10             : 
      11             : #define MODULE_VERSION "1.0"
      12             : 
      13             : #include "Python.h"
      14             : #include "structmember.h"
      15             : 
      16             : #define IS_BASESTRING(o) \
      17             :     PyUnicode_Check(o)
      18             : 
      19             : typedef struct {
      20             :     PyObject *error_obj;   /* CSV exception */
      21             :     PyObject *dialects;   /* Dialect registry */
      22             :     long field_limit;   /* max parsed field size */
      23             : } _csvstate;
      24             : 
      25             : #define _csvstate(o) ((_csvstate *)PyModule_GetState(o))
      26             : 
      27             : static int
      28           0 : _csv_clear(PyObject *m)
      29             : {
      30           0 :     Py_CLEAR(_csvstate(m)->error_obj);
      31           0 :     Py_CLEAR(_csvstate(m)->dialects);
      32           0 :     return 0;
      33             : }
      34             : 
      35             : static int
      36           0 : _csv_traverse(PyObject *m, visitproc visit, void *arg)
      37             : {
      38           0 :     Py_VISIT(_csvstate(m)->error_obj);
      39           0 :     Py_VISIT(_csvstate(m)->dialects);
      40           0 :     return 0;
      41             : }
      42             : 
      43             : static void
      44           0 : _csv_free(void *m)
      45             : {
      46           0 :    _csv_clear((PyObject *)m);
      47           0 : }
      48             : 
      49             : static struct PyModuleDef _csvmodule;
      50             : 
      51             : #define _csvstate_global ((_csvstate *)PyModule_GetState(PyState_FindModule(&_csvmodule)))
      52             : 
      53             : typedef enum {
      54             :     START_RECORD, START_FIELD, ESCAPED_CHAR, IN_FIELD,
      55             :     IN_QUOTED_FIELD, ESCAPE_IN_QUOTED_FIELD, QUOTE_IN_QUOTED_FIELD,
      56             :     EAT_CRNL
      57             : } ParserState;
      58             : 
      59             : typedef enum {
      60             :     QUOTE_MINIMAL, QUOTE_ALL, QUOTE_NONNUMERIC, QUOTE_NONE
      61             : } QuoteStyle;
      62             : 
      63             : typedef struct {
      64             :     QuoteStyle style;
      65             :     char *name;
      66             : } StyleDesc;
      67             : 
      68             : static StyleDesc quote_styles[] = {
      69             :     { QUOTE_MINIMAL,    "QUOTE_MINIMAL" },
      70             :     { QUOTE_ALL,        "QUOTE_ALL" },
      71             :     { QUOTE_NONNUMERIC, "QUOTE_NONNUMERIC" },
      72             :     { QUOTE_NONE,       "QUOTE_NONE" },
      73             :     { 0 }
      74             : };
      75             : 
      76             : typedef struct {
      77             :     PyObject_HEAD
      78             : 
      79             :     int doublequote;            /* is " represented by ""? */
      80             :     Py_UCS4 delimiter;       /* field separator */
      81             :     Py_UCS4 quotechar;       /* quote character */
      82             :     Py_UCS4 escapechar;      /* escape character */
      83             :     int skipinitialspace;       /* ignore spaces following delimiter? */
      84             :     PyObject *lineterminator; /* string to write between records */
      85             :     int quoting;                /* style of quoting to write */
      86             : 
      87             :     int strict;                 /* raise exception on bad CSV */
      88             : } DialectObj;
      89             : 
      90             : static PyTypeObject Dialect_Type;
      91             : 
      92             : typedef struct {
      93             :     PyObject_HEAD
      94             : 
      95             :     PyObject *input_iter;   /* iterate over this for input lines */
      96             : 
      97             :     DialectObj *dialect;    /* parsing dialect */
      98             : 
      99             :     PyObject *fields;           /* field list for current record */
     100             :     ParserState state;          /* current CSV parse state */
     101             :     Py_UCS4 *field;             /* temporary buffer */
     102             :     Py_ssize_t field_size;      /* size of allocated buffer */
     103             :     Py_ssize_t field_len;       /* length of current field */
     104             :     int numeric_field;          /* treat field as numeric */
     105             :     unsigned long line_num;     /* Source-file line number */
     106             : } ReaderObj;
     107             : 
     108             : static PyTypeObject Reader_Type;
     109             : 
     110             : #define ReaderObject_Check(v)   (Py_TYPE(v) == &Reader_Type)
     111             : 
     112             : typedef struct {
     113             :     PyObject_HEAD
     114             : 
     115             :     PyObject *writeline;    /* write output lines to this file */
     116             : 
     117             :     DialectObj *dialect;    /* parsing dialect */
     118             : 
     119             :     Py_UCS4 *rec;            /* buffer for parser.join */
     120             :     Py_ssize_t rec_size;        /* size of allocated record */
     121             :     Py_ssize_t rec_len;         /* length of record */
     122             :     int num_fields;             /* number of fields in record */
     123             : } WriterObj;
     124             : 
     125             : static PyTypeObject Writer_Type;
     126             : 
     127             : /*
     128             :  * DIALECT class
     129             :  */
     130             : 
     131             : static PyObject *
     132           0 : get_dialect_from_registry(PyObject * name_obj)
     133             : {
     134             :     PyObject *dialect_obj;
     135             : 
     136           0 :     dialect_obj = PyDict_GetItem(_csvstate_global->dialects, name_obj);
     137           0 :     if (dialect_obj == NULL) {
     138           0 :         if (!PyErr_Occurred())
     139           0 :             PyErr_Format(_csvstate_global->error_obj, "unknown dialect");
     140             :     }
     141             :     else
     142           0 :         Py_INCREF(dialect_obj);
     143           0 :     return dialect_obj;
     144             : }
     145             : 
     146             : static PyObject *
     147           0 : get_string(PyObject *str)
     148             : {
     149           0 :     Py_XINCREF(str);
     150           0 :     return str;
     151             : }
     152             : 
     153             : static PyObject *
     154           0 : get_nullchar_as_None(Py_UCS4 c)
     155             : {
     156           0 :     if (c == '\0') {
     157           0 :         Py_INCREF(Py_None);
     158           0 :         return Py_None;
     159             :     }
     160             :     else
     161           0 :         return PyUnicode_FromOrdinal(c);
     162             : }
     163             : 
     164             : static PyObject *
     165           0 : Dialect_get_lineterminator(DialectObj *self)
     166             : {
     167           0 :     return get_string(self->lineterminator);
     168             : }
     169             : 
     170             : static PyObject *
     171           0 : Dialect_get_delimiter(DialectObj *self)
     172             : {
     173           0 :     return get_nullchar_as_None(self->delimiter);
     174             : }
     175             : 
     176             : static PyObject *
     177           0 : Dialect_get_escapechar(DialectObj *self)
     178             : {
     179           0 :     return get_nullchar_as_None(self->escapechar);
     180             : }
     181             : 
     182             : static PyObject *
     183           0 : Dialect_get_quotechar(DialectObj *self)
     184             : {
     185           0 :     return get_nullchar_as_None(self->quotechar);
     186             : }
     187             : 
     188             : static PyObject *
     189           0 : Dialect_get_quoting(DialectObj *self)
     190             : {
     191           0 :     return PyLong_FromLong(self->quoting);
     192             : }
     193             : 
     194             : static int
     195           0 : _set_bool(const char *name, int *target, PyObject *src, int dflt)
     196             : {
     197           0 :     if (src == NULL)
     198           0 :         *target = dflt;
     199             :     else {
     200           0 :         int b = PyObject_IsTrue(src);
     201           0 :         if (b < 0)
     202           0 :             return -1;
     203           0 :         *target = b;
     204             :     }
     205           0 :     return 0;
     206             : }
     207             : 
     208             : static int
     209           0 : _set_int(const char *name, int *target, PyObject *src, int dflt)
     210             : {
     211           0 :     if (src == NULL)
     212           0 :         *target = dflt;
     213             :     else {
     214             :         long value;
     215           0 :         if (!PyLong_CheckExact(src)) {
     216           0 :             PyErr_Format(PyExc_TypeError,
     217             :                          "\"%s\" must be an integer", name);
     218           0 :             return -1;
     219             :         }
     220           0 :         value = PyLong_AsLong(src);
     221           0 :         if (value == -1 && PyErr_Occurred())
     222           0 :             return -1;
     223             : #if SIZEOF_LONG > SIZEOF_INT
     224             :         if (value > INT_MAX || value < INT_MIN) {
     225             :             PyErr_Format(PyExc_ValueError,
     226             :                          "integer out of range for \"%s\"", name);
     227             :             return -1;
     228             :         }
     229             : #endif
     230           0 :         *target = (int)value;
     231             :     }
     232           0 :     return 0;
     233             : }
     234             : 
     235             : static int
     236           0 : _set_char(const char *name, Py_UCS4 *target, PyObject *src, Py_UCS4 dflt)
     237             : {
     238           0 :     if (src == NULL)
     239           0 :         *target = dflt;
     240             :     else {
     241           0 :         *target = '\0';
     242           0 :         if (src != Py_None) {
     243             :             Py_ssize_t len;
     244           0 :             len = PyUnicode_GetLength(src);
     245           0 :             if (len > 1) {
     246           0 :                 PyErr_Format(PyExc_TypeError,
     247             :                     "\"%s\" must be an 1-character string",
     248             :                     name);
     249           0 :                 return -1;
     250             :             }
     251           0 :             if (len > 0)
     252           0 :                 *target = PyUnicode_READ_CHAR(src, 0);
     253             :         }
     254             :     }
     255           0 :     return 0;
     256             : }
     257             : 
     258             : static int
     259           0 : _set_str(const char *name, PyObject **target, PyObject *src, const char *dflt)
     260             : {
     261           0 :     if (src == NULL)
     262           0 :         *target = PyUnicode_DecodeASCII(dflt, strlen(dflt), NULL);
     263             :     else {
     264           0 :         if (src == Py_None)
     265           0 :             *target = NULL;
     266           0 :         else if (!IS_BASESTRING(src)) {
     267           0 :             PyErr_Format(PyExc_TypeError,
     268             :                          "\"%s\" must be a string", name);
     269           0 :             return -1;
     270             :         }
     271             :         else {
     272           0 :             Py_XDECREF(*target);
     273           0 :             Py_INCREF(src);
     274           0 :             *target = src;
     275             :         }
     276             :     }
     277           0 :     return 0;
     278             : }
     279             : 
     280             : static int
     281           0 : dialect_check_quoting(int quoting)
     282             : {
     283             :     StyleDesc *qs;
     284             : 
     285           0 :     for (qs = quote_styles; qs->name; qs++) {
     286           0 :         if (qs->style == quoting)
     287           0 :             return 0;
     288             :     }
     289           0 :     PyErr_Format(PyExc_TypeError, "bad \"quoting\" value");
     290           0 :     return -1;
     291             : }
     292             : 
     293             : #define D_OFF(x) offsetof(DialectObj, x)
     294             : 
     295             : static struct PyMemberDef Dialect_memberlist[] = {
     296             :     { "skipinitialspace",   T_INT, D_OFF(skipinitialspace), READONLY },
     297             :     { "doublequote",        T_INT, D_OFF(doublequote), READONLY },
     298             :     { "strict",             T_INT, D_OFF(strict), READONLY },
     299             :     { NULL }
     300             : };
     301             : 
     302             : static PyGetSetDef Dialect_getsetlist[] = {
     303             :     { "delimiter",          (getter)Dialect_get_delimiter},
     304             :     { "escapechar",             (getter)Dialect_get_escapechar},
     305             :     { "lineterminator",         (getter)Dialect_get_lineterminator},
     306             :     { "quotechar",              (getter)Dialect_get_quotechar},
     307             :     { "quoting",                (getter)Dialect_get_quoting},
     308             :     {NULL},
     309             : };
     310             : 
     311             : static void
     312           0 : Dialect_dealloc(DialectObj *self)
     313             : {
     314           0 :     Py_XDECREF(self->lineterminator);
     315           0 :     Py_TYPE(self)->tp_free((PyObject *)self);
     316           0 : }
     317             : 
     318             : static char *dialect_kws[] = {
     319             :     "dialect",
     320             :     "delimiter",
     321             :     "doublequote",
     322             :     "escapechar",
     323             :     "lineterminator",
     324             :     "quotechar",
     325             :     "quoting",
     326             :     "skipinitialspace",
     327             :     "strict",
     328             :     NULL
     329             : };
     330             : 
     331             : static PyObject *
     332           0 : dialect_new(PyTypeObject *type, PyObject *args, PyObject *kwargs)
     333             : {
     334             :     DialectObj *self;
     335           0 :     PyObject *ret = NULL;
     336           0 :     PyObject *dialect = NULL;
     337           0 :     PyObject *delimiter = NULL;
     338           0 :     PyObject *doublequote = NULL;
     339           0 :     PyObject *escapechar = NULL;
     340           0 :     PyObject *lineterminator = NULL;
     341           0 :     PyObject *quotechar = NULL;
     342           0 :     PyObject *quoting = NULL;
     343           0 :     PyObject *skipinitialspace = NULL;
     344           0 :     PyObject *strict = NULL;
     345             : 
     346           0 :     if (!PyArg_ParseTupleAndKeywords(args, kwargs,
     347             :                                      "|OOOOOOOOO", dialect_kws,
     348             :                                      &dialect,
     349             :                                      &delimiter,
     350             :                                      &doublequote,
     351             :                                      &escapechar,
     352             :                                      &lineterminator,
     353             :                                      &quotechar,
     354             :                                      &quoting,
     355             :                                      &skipinitialspace,
     356             :                                      &strict))
     357           0 :         return NULL;
     358             : 
     359           0 :     if (dialect != NULL) {
     360           0 :         if (IS_BASESTRING(dialect)) {
     361           0 :             dialect = get_dialect_from_registry(dialect);
     362           0 :             if (dialect == NULL)
     363           0 :                 return NULL;
     364             :         }
     365             :         else
     366           0 :             Py_INCREF(dialect);
     367             :         /* Can we reuse this instance? */
     368           0 :         if (PyObject_TypeCheck(dialect, &Dialect_Type) &&
     369           0 :             delimiter == 0 &&
     370           0 :             doublequote == 0 &&
     371           0 :             escapechar == 0 &&
     372           0 :             lineterminator == 0 &&
     373           0 :             quotechar == 0 &&
     374           0 :             quoting == 0 &&
     375           0 :             skipinitialspace == 0 &&
     376           0 :             strict == 0)
     377           0 :             return dialect;
     378             :     }
     379             : 
     380           0 :     self = (DialectObj *)type->tp_alloc(type, 0);
     381           0 :     if (self == NULL) {
     382           0 :         Py_XDECREF(dialect);
     383           0 :         return NULL;
     384             :     }
     385           0 :     self->lineterminator = NULL;
     386             : 
     387           0 :     Py_XINCREF(delimiter);
     388           0 :     Py_XINCREF(doublequote);
     389           0 :     Py_XINCREF(escapechar);
     390           0 :     Py_XINCREF(lineterminator);
     391           0 :     Py_XINCREF(quotechar);
     392           0 :     Py_XINCREF(quoting);
     393           0 :     Py_XINCREF(skipinitialspace);
     394           0 :     Py_XINCREF(strict);
     395           0 :     if (dialect != NULL) {
     396             : #define DIALECT_GETATTR(v, n) \
     397             :         if (v == NULL) \
     398             :             v = PyObject_GetAttrString(dialect, n)
     399           0 :         DIALECT_GETATTR(delimiter, "delimiter");
     400           0 :         DIALECT_GETATTR(doublequote, "doublequote");
     401           0 :         DIALECT_GETATTR(escapechar, "escapechar");
     402           0 :         DIALECT_GETATTR(lineterminator, "lineterminator");
     403           0 :         DIALECT_GETATTR(quotechar, "quotechar");
     404           0 :         DIALECT_GETATTR(quoting, "quoting");
     405           0 :         DIALECT_GETATTR(skipinitialspace, "skipinitialspace");
     406           0 :         DIALECT_GETATTR(strict, "strict");
     407           0 :         PyErr_Clear();
     408             :     }
     409             : 
     410             :     /* check types and convert to C values */
     411             : #define DIASET(meth, name, target, src, dflt) \
     412             :     if (meth(name, target, src, dflt)) \
     413             :         goto err
     414           0 :     DIASET(_set_char, "delimiter", &self->delimiter, delimiter, ',');
     415           0 :     DIASET(_set_bool, "doublequote", &self->doublequote, doublequote, 1);
     416           0 :     DIASET(_set_char, "escapechar", &self->escapechar, escapechar, 0);
     417           0 :     DIASET(_set_str, "lineterminator", &self->lineterminator, lineterminator, "\r\n");
     418           0 :     DIASET(_set_char, "quotechar", &self->quotechar, quotechar, '"');
     419           0 :     DIASET(_set_int, "quoting", &self->quoting, quoting, QUOTE_MINIMAL);
     420           0 :     DIASET(_set_bool, "skipinitialspace", &self->skipinitialspace, skipinitialspace, 0);
     421           0 :     DIASET(_set_bool, "strict", &self->strict, strict, 0);
     422             : 
     423             :     /* validate options */
     424           0 :     if (dialect_check_quoting(self->quoting))
     425           0 :         goto err;
     426           0 :     if (self->delimiter == 0) {
     427           0 :         PyErr_SetString(PyExc_TypeError, "delimiter must be set");
     428           0 :         goto err;
     429             :     }
     430           0 :     if (quotechar == Py_None && quoting == NULL)
     431           0 :         self->quoting = QUOTE_NONE;
     432           0 :     if (self->quoting != QUOTE_NONE && self->quotechar == 0) {
     433           0 :         PyErr_SetString(PyExc_TypeError,
     434             :                         "quotechar must be set if quoting enabled");
     435           0 :         goto err;
     436             :     }
     437           0 :     if (self->lineterminator == 0) {
     438           0 :         PyErr_SetString(PyExc_TypeError, "lineterminator must be set");
     439           0 :         goto err;
     440             :     }
     441             : 
     442           0 :     ret = (PyObject *)self;
     443           0 :     Py_INCREF(self);
     444             : err:
     445           0 :     Py_XDECREF(self);
     446           0 :     Py_XDECREF(dialect);
     447           0 :     Py_XDECREF(delimiter);
     448           0 :     Py_XDECREF(doublequote);
     449           0 :     Py_XDECREF(escapechar);
     450           0 :     Py_XDECREF(lineterminator);
     451           0 :     Py_XDECREF(quotechar);
     452           0 :     Py_XDECREF(quoting);
     453           0 :     Py_XDECREF(skipinitialspace);
     454           0 :     Py_XDECREF(strict);
     455           0 :     return ret;
     456             : }
     457             : 
     458             : 
     459             : PyDoc_STRVAR(Dialect_Type_doc,
     460             : "CSV dialect\n"
     461             : "\n"
     462             : "The Dialect type records CSV parsing and generation options.\n");
     463             : 
     464             : static PyTypeObject Dialect_Type = {
     465             :     PyVarObject_HEAD_INIT(NULL, 0)
     466             :     "_csv.Dialect",                         /* tp_name */
     467             :     sizeof(DialectObj),                     /* tp_basicsize */
     468             :     0,                                      /* tp_itemsize */
     469             :     /*  methods  */
     470             :     (destructor)Dialect_dealloc,            /* tp_dealloc */
     471             :     (printfunc)0,                           /* tp_print */
     472             :     (getattrfunc)0,                         /* tp_getattr */
     473             :     (setattrfunc)0,                         /* tp_setattr */
     474             :     0,                                      /* tp_reserved */
     475             :     (reprfunc)0,                            /* tp_repr */
     476             :     0,                                      /* tp_as_number */
     477             :     0,                                      /* tp_as_sequence */
     478             :     0,                                      /* tp_as_mapping */
     479             :     (hashfunc)0,                            /* tp_hash */
     480             :     (ternaryfunc)0,                         /* tp_call */
     481             :     (reprfunc)0,                                /* tp_str */
     482             :     0,                                      /* tp_getattro */
     483             :     0,                                      /* tp_setattro */
     484             :     0,                                      /* tp_as_buffer */
     485             :     Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */
     486             :     Dialect_Type_doc,                       /* tp_doc */
     487             :     0,                                      /* tp_traverse */
     488             :     0,                                      /* tp_clear */
     489             :     0,                                      /* tp_richcompare */
     490             :     0,                                      /* tp_weaklistoffset */
     491             :     0,                                      /* tp_iter */
     492             :     0,                                      /* tp_iternext */
     493             :     0,                                          /* tp_methods */
     494             :     Dialect_memberlist,                     /* tp_members */
     495             :     Dialect_getsetlist,                     /* tp_getset */
     496             :     0,                                          /* tp_base */
     497             :     0,                                          /* tp_dict */
     498             :     0,                                          /* tp_descr_get */
     499             :     0,                                          /* tp_descr_set */
     500             :     0,                                          /* tp_dictoffset */
     501             :     0,                                          /* tp_init */
     502             :     0,                                          /* tp_alloc */
     503             :     dialect_new,                                /* tp_new */
     504             :     0,                                          /* tp_free */
     505             : };
     506             : 
     507             : /*
     508             :  * Return an instance of the dialect type, given a Python instance or kwarg
     509             :  * description of the dialect
     510             :  */
     511             : static PyObject *
     512           0 : _call_dialect(PyObject *dialect_inst, PyObject *kwargs)
     513             : {
     514             :     PyObject *ctor_args;
     515             :     PyObject *dialect;
     516             : 
     517           0 :     ctor_args = Py_BuildValue(dialect_inst ? "(O)" : "()", dialect_inst);
     518           0 :     if (ctor_args == NULL)
     519           0 :         return NULL;
     520           0 :     dialect = PyObject_Call((PyObject *)&Dialect_Type, ctor_args, kwargs);
     521           0 :     Py_DECREF(ctor_args);
     522           0 :     return dialect;
     523             : }
     524             : 
     525             : /*
     526             :  * READER
     527             :  */
     528             : static int
     529           0 : parse_save_field(ReaderObj *self)
     530             : {
     531             :     PyObject *field;
     532             : 
     533           0 :     field = PyUnicode_FromKindAndData(PyUnicode_4BYTE_KIND,
     534           0 :                                       (void *) self->field, self->field_len);
     535           0 :     if (field == NULL)
     536           0 :         return -1;
     537           0 :     self->field_len = 0;
     538           0 :     if (self->numeric_field) {
     539             :         PyObject *tmp;
     540             : 
     541           0 :         self->numeric_field = 0;
     542           0 :         tmp = PyNumber_Float(field);
     543           0 :         Py_DECREF(field);
     544           0 :         if (tmp == NULL)
     545           0 :             return -1;
     546           0 :         field = tmp;
     547             :     }
     548           0 :     PyList_Append(self->fields, field);
     549           0 :     Py_DECREF(field);
     550           0 :     return 0;
     551             : }
     552             : 
     553             : static int
     554           0 : parse_grow_buff(ReaderObj *self)
     555             : {
     556           0 :     if (self->field_size == 0) {
     557           0 :         self->field_size = 4096;
     558           0 :         if (self->field != NULL)
     559           0 :             PyMem_Free(self->field);
     560           0 :         self->field = PyMem_New(Py_UCS4, self->field_size);
     561             :     }
     562             :     else {
     563           0 :         Py_UCS4 *field = self->field;
     564           0 :         if (self->field_size > PY_SSIZE_T_MAX / 2) {
     565           0 :             PyErr_NoMemory();
     566           0 :             return 0;
     567             :         }
     568           0 :         self->field_size *= 2;
     569           0 :         self->field = PyMem_Resize(field, Py_UCS4, self->field_size);
     570             :     }
     571           0 :     if (self->field == NULL) {
     572           0 :         PyErr_NoMemory();
     573           0 :         return 0;
     574             :     }
     575           0 :     return 1;
     576             : }
     577             : 
     578             : static int
     579           0 : parse_add_char(ReaderObj *self, Py_UCS4 c)
     580             : {
     581           0 :     if (self->field_len >= _csvstate_global->field_limit) {
     582           0 :         PyErr_Format(_csvstate_global->error_obj, "field larger than field limit (%ld)",
     583           0 :                      _csvstate_global->field_limit);
     584           0 :         return -1;
     585             :     }
     586           0 :     if (self->field_len == self->field_size && !parse_grow_buff(self))
     587           0 :         return -1;
     588           0 :     self->field[self->field_len++] = c;
     589           0 :     return 0;
     590             : }
     591             : 
     592             : static int
     593           0 : parse_process_char(ReaderObj *self, Py_UCS4 c)
     594             : {
     595           0 :     DialectObj *dialect = self->dialect;
     596             : 
     597           0 :     switch (self->state) {
     598             :     case START_RECORD:
     599             :         /* start of record */
     600           0 :         if (c == '\0')
     601             :             /* empty line - return [] */
     602           0 :             break;
     603           0 :         else if (c == '\n' || c == '\r') {
     604           0 :             self->state = EAT_CRNL;
     605           0 :             break;
     606             :         }
     607             :         /* normal character - handle as START_FIELD */
     608           0 :         self->state = START_FIELD;
     609             :         /* fallthru */
     610             :     case START_FIELD:
     611             :         /* expecting field */
     612           0 :         if (c == '\n' || c == '\r' || c == '\0') {
     613             :             /* save empty field - return [fields] */
     614           0 :             if (parse_save_field(self) < 0)
     615           0 :                 return -1;
     616           0 :             self->state = (c == '\0' ? START_RECORD : EAT_CRNL);
     617             :         }
     618           0 :         else if (c == dialect->quotechar &&
     619           0 :                  dialect->quoting != QUOTE_NONE) {
     620             :             /* start quoted field */
     621           0 :             self->state = IN_QUOTED_FIELD;
     622             :         }
     623           0 :         else if (c == dialect->escapechar) {
     624             :             /* possible escaped character */
     625           0 :             self->state = ESCAPED_CHAR;
     626             :         }
     627           0 :         else if (c == ' ' && dialect->skipinitialspace)
     628             :             /* ignore space at start of field */
     629             :             ;
     630           0 :         else if (c == dialect->delimiter) {
     631             :             /* save empty field */
     632           0 :             if (parse_save_field(self) < 0)
     633           0 :                 return -1;
     634             :         }
     635             :         else {
     636             :             /* begin new unquoted field */
     637           0 :             if (dialect->quoting == QUOTE_NONNUMERIC)
     638           0 :                 self->numeric_field = 1;
     639           0 :             if (parse_add_char(self, c) < 0)
     640           0 :                 return -1;
     641           0 :             self->state = IN_FIELD;
     642             :         }
     643           0 :         break;
     644             : 
     645             :     case ESCAPED_CHAR:
     646           0 :         if (c == '\0')
     647           0 :             c = '\n';
     648           0 :         if (parse_add_char(self, c) < 0)
     649           0 :             return -1;
     650           0 :         self->state = IN_FIELD;
     651           0 :         break;
     652             : 
     653             :     case IN_FIELD:
     654             :         /* in unquoted field */
     655           0 :         if (c == '\n' || c == '\r' || c == '\0') {
     656             :             /* end of line - return [fields] */
     657           0 :             if (parse_save_field(self) < 0)
     658           0 :                 return -1;
     659           0 :             self->state = (c == '\0' ? START_RECORD : EAT_CRNL);
     660             :         }
     661           0 :         else if (c == dialect->escapechar) {
     662             :             /* possible escaped character */
     663           0 :             self->state = ESCAPED_CHAR;
     664             :         }
     665           0 :         else if (c == dialect->delimiter) {
     666             :             /* save field - wait for new field */
     667           0 :             if (parse_save_field(self) < 0)
     668           0 :                 return -1;
     669           0 :             self->state = START_FIELD;
     670             :         }
     671             :         else {
     672             :             /* normal character - save in field */
     673           0 :             if (parse_add_char(self, c) < 0)
     674           0 :                 return -1;
     675             :         }
     676           0 :         break;
     677             : 
     678             :     case IN_QUOTED_FIELD:
     679             :         /* in quoted field */
     680           0 :         if (c == '\0')
     681             :             ;
     682           0 :         else if (c == dialect->escapechar) {
     683             :             /* Possible escape character */
     684           0 :             self->state = ESCAPE_IN_QUOTED_FIELD;
     685             :         }
     686           0 :         else if (c == dialect->quotechar &&
     687           0 :                  dialect->quoting != QUOTE_NONE) {
     688           0 :             if (dialect->doublequote) {
     689             :                 /* doublequote; " represented by "" */
     690           0 :                 self->state = QUOTE_IN_QUOTED_FIELD;
     691             :             }
     692             :             else {
     693             :                 /* end of quote part of field */
     694           0 :                 self->state = IN_FIELD;
     695             :             }
     696             :         }
     697             :         else {
     698             :             /* normal character - save in field */
     699           0 :             if (parse_add_char(self, c) < 0)
     700           0 :                 return -1;
     701             :         }
     702           0 :         break;
     703             : 
     704             :     case ESCAPE_IN_QUOTED_FIELD:
     705           0 :         if (c == '\0')
     706           0 :             c = '\n';
     707           0 :         if (parse_add_char(self, c) < 0)
     708           0 :             return -1;
     709           0 :         self->state = IN_QUOTED_FIELD;
     710           0 :         break;
     711             : 
     712             :     case QUOTE_IN_QUOTED_FIELD:
     713             :         /* doublequote - seen a quote in an quoted field */
     714           0 :         if (dialect->quoting != QUOTE_NONE &&
     715           0 :             c == dialect->quotechar) {
     716             :             /* save "" as " */
     717           0 :             if (parse_add_char(self, c) < 0)
     718           0 :                 return -1;
     719           0 :             self->state = IN_QUOTED_FIELD;
     720             :         }
     721           0 :         else if (c == dialect->delimiter) {
     722             :             /* save field - wait for new field */
     723           0 :             if (parse_save_field(self) < 0)
     724           0 :                 return -1;
     725           0 :             self->state = START_FIELD;
     726             :         }
     727           0 :         else if (c == '\n' || c == '\r' || c == '\0') {
     728             :             /* end of line - return [fields] */
     729           0 :             if (parse_save_field(self) < 0)
     730           0 :                 return -1;
     731           0 :             self->state = (c == '\0' ? START_RECORD : EAT_CRNL);
     732             :         }
     733           0 :         else if (!dialect->strict) {
     734           0 :             if (parse_add_char(self, c) < 0)
     735           0 :                 return -1;
     736           0 :             self->state = IN_FIELD;
     737             :         }
     738             :         else {
     739             :             /* illegal */
     740           0 :             PyErr_Format(_csvstate_global->error_obj, "'%c' expected after '%c'",
     741             :                             dialect->delimiter,
     742             :                             dialect->quotechar);
     743           0 :             return -1;
     744             :         }
     745           0 :         break;
     746             : 
     747             :     case EAT_CRNL:
     748           0 :         if (c == '\n' || c == '\r')
     749             :             ;
     750           0 :         else if (c == '\0')
     751           0 :             self->state = START_RECORD;
     752             :         else {
     753           0 :             PyErr_Format(_csvstate_global->error_obj, "new-line character seen in unquoted field - do you need to open the file in universal-newline mode?");
     754           0 :             return -1;
     755             :         }
     756           0 :         break;
     757             : 
     758             :     }
     759           0 :     return 0;
     760             : }
     761             : 
     762             : static int
     763           0 : parse_reset(ReaderObj *self)
     764             : {
     765           0 :     Py_XDECREF(self->fields);
     766           0 :     self->fields = PyList_New(0);
     767           0 :     if (self->fields == NULL)
     768           0 :         return -1;
     769           0 :     self->field_len = 0;
     770           0 :     self->state = START_RECORD;
     771           0 :     self->numeric_field = 0;
     772           0 :     return 0;
     773             : }
     774             : 
     775             : static PyObject *
     776           0 : Reader_iternext(ReaderObj *self)
     777             : {
     778           0 :     PyObject *fields = NULL;
     779             :     Py_UCS4 c;
     780             :     Py_ssize_t pos, linelen;
     781             :     unsigned int kind;
     782             :     void *data;
     783             :     PyObject *lineobj;
     784             : 
     785           0 :     if (parse_reset(self) < 0)
     786           0 :         return NULL;
     787             :     do {
     788           0 :         lineobj = PyIter_Next(self->input_iter);
     789           0 :         if (lineobj == NULL) {
     790             :             /* End of input OR exception */
     791           0 :             if (!PyErr_Occurred() && self->field_len != 0)
     792           0 :                 PyErr_Format(_csvstate_global->error_obj,
     793             :                              "newline inside string");
     794           0 :             return NULL;
     795             :         }
     796           0 :         if (!PyUnicode_Check(lineobj)) {
     797           0 :             PyErr_Format(_csvstate_global->error_obj,
     798             :                          "iterator should return strings, "
     799             :                          "not %.200s "
     800             :                          "(did you open the file in text mode?)",
     801           0 :                          lineobj->ob_type->tp_name
     802             :                 );
     803           0 :             Py_DECREF(lineobj);
     804           0 :             return NULL;
     805             :         }
     806           0 :         ++self->line_num;
     807           0 :         kind = PyUnicode_KIND(lineobj);
     808           0 :         data = PyUnicode_DATA(lineobj);
     809           0 :         pos = 0;
     810           0 :         linelen = PyUnicode_GET_LENGTH(lineobj);
     811           0 :         while (linelen--) {
     812           0 :             c = PyUnicode_READ(kind, data, pos);
     813           0 :             if (c == '\0') {
     814           0 :                 Py_DECREF(lineobj);
     815           0 :                 PyErr_Format(_csvstate_global->error_obj,
     816             :                              "line contains NULL byte");
     817           0 :                 goto err;
     818             :             }
     819           0 :             if (parse_process_char(self, c) < 0) {
     820           0 :                 Py_DECREF(lineobj);
     821           0 :                 goto err;
     822             :             }
     823           0 :             pos++;
     824             :         }
     825           0 :         Py_DECREF(lineobj);
     826           0 :         if (parse_process_char(self, 0) < 0)
     827           0 :             goto err;
     828           0 :     } while (self->state != START_RECORD);
     829             : 
     830           0 :     fields = self->fields;
     831           0 :     self->fields = NULL;
     832             : err:
     833           0 :     return fields;
     834             : }
     835             : 
     836             : static void
     837           0 : Reader_dealloc(ReaderObj *self)
     838             : {
     839           0 :     PyObject_GC_UnTrack(self);
     840           0 :     Py_XDECREF(self->dialect);
     841           0 :     Py_XDECREF(self->input_iter);
     842           0 :     Py_XDECREF(self->fields);
     843           0 :     if (self->field != NULL)
     844           0 :         PyMem_Free(self->field);
     845           0 :     PyObject_GC_Del(self);
     846           0 : }
     847             : 
     848             : static int
     849           0 : Reader_traverse(ReaderObj *self, visitproc visit, void *arg)
     850             : {
     851           0 :     Py_VISIT(self->dialect);
     852           0 :     Py_VISIT(self->input_iter);
     853           0 :     Py_VISIT(self->fields);
     854           0 :     return 0;
     855             : }
     856             : 
     857             : static int
     858           0 : Reader_clear(ReaderObj *self)
     859             : {
     860           0 :     Py_CLEAR(self->dialect);
     861           0 :     Py_CLEAR(self->input_iter);
     862           0 :     Py_CLEAR(self->fields);
     863           0 :     return 0;
     864             : }
     865             : 
     866             : PyDoc_STRVAR(Reader_Type_doc,
     867             : "CSV reader\n"
     868             : "\n"
     869             : "Reader objects are responsible for reading and parsing tabular data\n"
     870             : "in CSV format.\n"
     871             : );
     872             : 
     873             : static struct PyMethodDef Reader_methods[] = {
     874             :     { NULL, NULL }
     875             : };
     876             : #define R_OFF(x) offsetof(ReaderObj, x)
     877             : 
     878             : static struct PyMemberDef Reader_memberlist[] = {
     879             :     { "dialect", T_OBJECT, R_OFF(dialect), READONLY },
     880             :     { "line_num", T_ULONG, R_OFF(line_num), READONLY },
     881             :     { NULL }
     882             : };
     883             : 
     884             : 
     885             : static PyTypeObject Reader_Type = {
     886             :     PyVarObject_HEAD_INIT(NULL, 0)
     887             :     "_csv.reader",                          /*tp_name*/
     888             :     sizeof(ReaderObj),                      /*tp_basicsize*/
     889             :     0,                                      /*tp_itemsize*/
     890             :     /* methods */
     891             :     (destructor)Reader_dealloc,             /*tp_dealloc*/
     892             :     (printfunc)0,                           /*tp_print*/
     893             :     (getattrfunc)0,                         /*tp_getattr*/
     894             :     (setattrfunc)0,                         /*tp_setattr*/
     895             :     0,                                     /*tp_reserved*/
     896             :     (reprfunc)0,                            /*tp_repr*/
     897             :     0,                                      /*tp_as_number*/
     898             :     0,                                      /*tp_as_sequence*/
     899             :     0,                                      /*tp_as_mapping*/
     900             :     (hashfunc)0,                            /*tp_hash*/
     901             :     (ternaryfunc)0,                         /*tp_call*/
     902             :     (reprfunc)0,                                /*tp_str*/
     903             :     0,                                      /*tp_getattro*/
     904             :     0,                                      /*tp_setattro*/
     905             :     0,                                      /*tp_as_buffer*/
     906             :     Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE |
     907             :         Py_TPFLAGS_HAVE_GC,                     /*tp_flags*/
     908             :     Reader_Type_doc,                        /*tp_doc*/
     909             :     (traverseproc)Reader_traverse,          /*tp_traverse*/
     910             :     (inquiry)Reader_clear,                  /*tp_clear*/
     911             :     0,                                      /*tp_richcompare*/
     912             :     0,                                      /*tp_weaklistoffset*/
     913             :     PyObject_SelfIter,                          /*tp_iter*/
     914             :     (getiterfunc)Reader_iternext,           /*tp_iternext*/
     915             :     Reader_methods,                         /*tp_methods*/
     916             :     Reader_memberlist,                      /*tp_members*/
     917             :     0,                                      /*tp_getset*/
     918             : 
     919             : };
     920             : 
     921             : static PyObject *
     922           0 : csv_reader(PyObject *module, PyObject *args, PyObject *keyword_args)
     923             : {
     924           0 :     PyObject * iterator, * dialect = NULL;
     925           0 :     ReaderObj * self = PyObject_GC_New(ReaderObj, &Reader_Type);
     926             : 
     927           0 :     if (!self)
     928           0 :         return NULL;
     929             : 
     930           0 :     self->dialect = NULL;
     931           0 :     self->fields = NULL;
     932           0 :     self->input_iter = NULL;
     933           0 :     self->field = NULL;
     934           0 :     self->field_size = 0;
     935           0 :     self->line_num = 0;
     936             : 
     937           0 :     if (parse_reset(self) < 0) {
     938           0 :         Py_DECREF(self);
     939           0 :         return NULL;
     940             :     }
     941             : 
     942           0 :     if (!PyArg_UnpackTuple(args, "", 1, 2, &iterator, &dialect)) {
     943           0 :         Py_DECREF(self);
     944           0 :         return NULL;
     945             :     }
     946           0 :     self->input_iter = PyObject_GetIter(iterator);
     947           0 :     if (self->input_iter == NULL) {
     948           0 :         PyErr_SetString(PyExc_TypeError,
     949             :                         "argument 1 must be an iterator");
     950           0 :         Py_DECREF(self);
     951           0 :         return NULL;
     952             :     }
     953           0 :     self->dialect = (DialectObj *)_call_dialect(dialect, keyword_args);
     954           0 :     if (self->dialect == NULL) {
     955           0 :         Py_DECREF(self);
     956           0 :         return NULL;
     957             :     }
     958             : 
     959           0 :     PyObject_GC_Track(self);
     960           0 :     return (PyObject *)self;
     961             : }
     962             : 
     963             : /*
     964             :  * WRITER
     965             :  */
     966             : /* ---------------------------------------------------------------- */
     967             : static void
     968           0 : join_reset(WriterObj *self)
     969             : {
     970           0 :     self->rec_len = 0;
     971           0 :     self->num_fields = 0;
     972           0 : }
     973             : 
     974             : #define MEM_INCR 32768
     975             : 
     976             : /* Calculate new record length or append field to record.  Return new
     977             :  * record length.
     978             :  */
     979             : static Py_ssize_t
     980           0 : join_append_data(WriterObj *self, unsigned int field_kind, void *field_data,
     981             :                  Py_ssize_t field_len, int quote_empty, int *quoted,
     982             :                  int copy_phase)
     983             : {
     984           0 :     DialectObj *dialect = self->dialect;
     985             :     int i;
     986             :     Py_ssize_t rec_len;
     987             : 
     988             : #define ADDCH(c) \
     989             :     do {\
     990             :         if (copy_phase) \
     991             :             self->rec[rec_len] = c;\
     992             :         rec_len++;\
     993             :     } while(0)
     994             : 
     995           0 :     rec_len = self->rec_len;
     996             : 
     997             :     /* If this is not the first field we need a field separator */
     998           0 :     if (self->num_fields > 0)
     999           0 :         ADDCH(dialect->delimiter);
    1000             : 
    1001             :     /* Handle preceding quote */
    1002           0 :     if (copy_phase && *quoted)
    1003           0 :         ADDCH(dialect->quotechar);
    1004             : 
    1005             :     /* Copy/count field data */
    1006             :     /* If field is null just pass over */
    1007           0 :     for (i = 0; field_data && (i < field_len); i++) {
    1008           0 :         Py_UCS4 c = PyUnicode_READ(field_kind, field_data, i);
    1009           0 :         int want_escape = 0;
    1010             : 
    1011           0 :         if (c == dialect->delimiter ||
    1012           0 :             c == dialect->escapechar ||
    1013           0 :             c == dialect->quotechar  ||
    1014           0 :             PyUnicode_FindChar(
    1015             :                 dialect->lineterminator, c, 0,
    1016           0 :                 PyUnicode_GET_LENGTH(dialect->lineterminator), 1) >= 0) {
    1017           0 :             if (dialect->quoting == QUOTE_NONE)
    1018           0 :                 want_escape = 1;
    1019             :             else {
    1020           0 :                 if (c == dialect->quotechar) {
    1021           0 :                     if (dialect->doublequote)
    1022           0 :                         ADDCH(dialect->quotechar);
    1023             :                     else
    1024           0 :                         want_escape = 1;
    1025             :                 }
    1026           0 :                 if (!want_escape)
    1027           0 :                     *quoted = 1;
    1028             :             }
    1029           0 :             if (want_escape) {
    1030           0 :                 if (!dialect->escapechar) {
    1031           0 :                     PyErr_Format(_csvstate_global->error_obj,
    1032             :                                  "need to escape, but no escapechar set");
    1033           0 :                     return -1;
    1034             :                 }
    1035           0 :                 ADDCH(dialect->escapechar);
    1036             :             }
    1037             :         }
    1038             :         /* Copy field character into record buffer.
    1039             :          */
    1040           0 :         ADDCH(c);
    1041             :     }
    1042             : 
    1043             :     /* If field is empty check if it needs to be quoted.
    1044             :      */
    1045           0 :     if (i == 0 && quote_empty) {
    1046           0 :         if (dialect->quoting == QUOTE_NONE) {
    1047           0 :             PyErr_Format(_csvstate_global->error_obj,
    1048             :                 "single empty field record must be quoted");
    1049           0 :             return -1;
    1050             :         }
    1051             :         else
    1052           0 :             *quoted = 1;
    1053             :     }
    1054             : 
    1055           0 :     if (*quoted) {
    1056           0 :         if (copy_phase)
    1057           0 :             ADDCH(dialect->quotechar);
    1058             :         else
    1059           0 :             rec_len += 2;
    1060             :     }
    1061           0 :     return rec_len;
    1062             : #undef ADDCH
    1063             : }
    1064             : 
    1065             : static int
    1066           0 : join_check_rec_size(WriterObj *self, Py_ssize_t rec_len)
    1067             : {
    1068             : 
    1069           0 :     if (rec_len < 0 || rec_len > PY_SSIZE_T_MAX - MEM_INCR) {
    1070           0 :         PyErr_NoMemory();
    1071           0 :         return 0;
    1072             :     }
    1073             : 
    1074           0 :     if (rec_len > self->rec_size) {
    1075           0 :         if (self->rec_size == 0) {
    1076           0 :             self->rec_size = (rec_len / MEM_INCR + 1) * MEM_INCR;
    1077           0 :             if (self->rec != NULL)
    1078           0 :                 PyMem_Free(self->rec);
    1079           0 :             self->rec = PyMem_New(Py_UCS4, self->rec_size);
    1080             :         }
    1081             :         else {
    1082           0 :             Py_UCS4* old_rec = self->rec;
    1083             : 
    1084           0 :             self->rec_size = (rec_len / MEM_INCR + 1) * MEM_INCR;
    1085           0 :             self->rec = PyMem_Resize(old_rec, Py_UCS4, self->rec_size);
    1086           0 :             if (self->rec == NULL)
    1087           0 :                 PyMem_Free(old_rec);
    1088             :         }
    1089           0 :         if (self->rec == NULL) {
    1090           0 :             PyErr_NoMemory();
    1091           0 :             return 0;
    1092             :         }
    1093             :     }
    1094           0 :     return 1;
    1095             : }
    1096             : 
    1097             : static int
    1098           0 : join_append(WriterObj *self, PyObject *field, int *quoted, int quote_empty)
    1099             : {
    1100           0 :     unsigned int field_kind = -1;
    1101           0 :     void *field_data = NULL;
    1102           0 :     Py_ssize_t field_len = 0;
    1103             :     Py_ssize_t rec_len;
    1104             : 
    1105           0 :     if (field != NULL) {
    1106           0 :         field_kind = PyUnicode_KIND(field);
    1107           0 :         field_data = PyUnicode_DATA(field);
    1108           0 :         field_len = PyUnicode_GET_LENGTH(field);
    1109             :     }
    1110           0 :     rec_len = join_append_data(self, field_kind, field_data, field_len,
    1111             :                                quote_empty, quoted, 0);
    1112           0 :     if (rec_len < 0)
    1113           0 :         return 0;
    1114             : 
    1115             :     /* grow record buffer if necessary */
    1116           0 :     if (!join_check_rec_size(self, rec_len))
    1117           0 :         return 0;
    1118             : 
    1119           0 :     self->rec_len = join_append_data(self, field_kind, field_data, field_len,
    1120             :                                      quote_empty, quoted, 1);
    1121           0 :     self->num_fields++;
    1122             : 
    1123           0 :     return 1;
    1124             : }
    1125             : 
    1126             : static int
    1127           0 : join_append_lineterminator(WriterObj *self)
    1128             : {
    1129             :     Py_ssize_t terminator_len, i;
    1130             :     unsigned int term_kind;
    1131             :     void *term_data;
    1132             : 
    1133           0 :     terminator_len = PyUnicode_GET_LENGTH(self->dialect->lineterminator);
    1134           0 :     if (terminator_len == -1)
    1135           0 :         return 0;
    1136             : 
    1137             :     /* grow record buffer if necessary */
    1138           0 :     if (!join_check_rec_size(self, self->rec_len + terminator_len))
    1139           0 :         return 0;
    1140             : 
    1141           0 :     term_kind = PyUnicode_KIND(self->dialect->lineterminator);
    1142           0 :     term_data = PyUnicode_DATA(self->dialect->lineterminator);
    1143           0 :     for (i = 0; i < terminator_len; i++)
    1144           0 :         self->rec[self->rec_len + i] = PyUnicode_READ(term_kind, term_data, i);
    1145           0 :     self->rec_len += terminator_len;
    1146             : 
    1147           0 :     return 1;
    1148             : }
    1149             : 
    1150             : PyDoc_STRVAR(csv_writerow_doc,
    1151             : "writerow(sequence)\n"
    1152             : "\n"
    1153             : "Construct and write a CSV record from a sequence of fields.  Non-string\n"
    1154             : "elements will be converted to string.");
    1155             : 
    1156             : static PyObject *
    1157           0 : csv_writerow(WriterObj *self, PyObject *seq)
    1158             : {
    1159           0 :     DialectObj *dialect = self->dialect;
    1160             :     Py_ssize_t len, i;
    1161             :     PyObject *line, *result;
    1162             : 
    1163           0 :     if (!PySequence_Check(seq))
    1164           0 :         return PyErr_Format(_csvstate_global->error_obj, "sequence expected");
    1165             : 
    1166           0 :     len = PySequence_Length(seq);
    1167           0 :     if (len < 0)
    1168           0 :         return NULL;
    1169             : 
    1170             :     /* Join all fields in internal buffer.
    1171             :      */
    1172           0 :     join_reset(self);
    1173           0 :     for (i = 0; i < len; i++) {
    1174             :         PyObject *field;
    1175             :         int append_ok;
    1176             :         int quoted;
    1177             : 
    1178           0 :         field = PySequence_GetItem(seq, i);
    1179           0 :         if (field == NULL)
    1180           0 :             return NULL;
    1181             : 
    1182           0 :         switch (dialect->quoting) {
    1183             :         case QUOTE_NONNUMERIC:
    1184           0 :             quoted = !PyNumber_Check(field);
    1185           0 :             break;
    1186             :         case QUOTE_ALL:
    1187           0 :             quoted = 1;
    1188           0 :             break;
    1189             :         default:
    1190           0 :             quoted = 0;
    1191           0 :             break;
    1192             :         }
    1193             : 
    1194           0 :         if (PyUnicode_Check(field)) {
    1195           0 :             append_ok = join_append(self, field, &quoted, len == 1);
    1196           0 :             Py_DECREF(field);
    1197             :         }
    1198           0 :         else if (field == Py_None) {
    1199           0 :             append_ok = join_append(self, NULL, &quoted, len == 1);
    1200           0 :             Py_DECREF(field);
    1201             :         }
    1202             :         else {
    1203             :             PyObject *str;
    1204             : 
    1205           0 :             str = PyObject_Str(field);
    1206           0 :             Py_DECREF(field);
    1207           0 :             if (str == NULL)
    1208           0 :                 return NULL;
    1209           0 :             append_ok = join_append(self, str, &quoted, len == 1);
    1210           0 :             Py_DECREF(str);
    1211             :         }
    1212           0 :         if (!append_ok)
    1213           0 :             return NULL;
    1214             :     }
    1215             : 
    1216             :     /* Add line terminator.
    1217             :      */
    1218           0 :     if (!join_append_lineterminator(self))
    1219           0 :         return 0;
    1220             : 
    1221           0 :     line = PyUnicode_FromKindAndData(PyUnicode_4BYTE_KIND,
    1222           0 :                                      (void *) self->rec, self->rec_len);
    1223           0 :     if (line == NULL)
    1224           0 :         return NULL;
    1225           0 :     result = PyObject_CallFunctionObjArgs(self->writeline, line, NULL);
    1226           0 :     Py_DECREF(line);
    1227           0 :     return result;
    1228             : }
    1229             : 
    1230             : PyDoc_STRVAR(csv_writerows_doc,
    1231             : "writerows(sequence of sequences)\n"
    1232             : "\n"
    1233             : "Construct and write a series of sequences to a csv file.  Non-string\n"
    1234             : "elements will be converted to string.");
    1235             : 
    1236             : static PyObject *
    1237           0 : csv_writerows(WriterObj *self, PyObject *seqseq)
    1238             : {
    1239             :     PyObject *row_iter, *row_obj, *result;
    1240             : 
    1241           0 :     row_iter = PyObject_GetIter(seqseq);
    1242           0 :     if (row_iter == NULL) {
    1243           0 :         PyErr_SetString(PyExc_TypeError,
    1244             :                         "writerows() argument must be iterable");
    1245           0 :         return NULL;
    1246             :     }
    1247           0 :     while ((row_obj = PyIter_Next(row_iter))) {
    1248           0 :         result = csv_writerow(self, row_obj);
    1249           0 :         Py_DECREF(row_obj);
    1250           0 :         if (!result) {
    1251           0 :             Py_DECREF(row_iter);
    1252           0 :             return NULL;
    1253             :         }
    1254             :         else
    1255           0 :              Py_DECREF(result);
    1256             :     }
    1257           0 :     Py_DECREF(row_iter);
    1258           0 :     if (PyErr_Occurred())
    1259           0 :         return NULL;
    1260           0 :     Py_INCREF(Py_None);
    1261           0 :     return Py_None;
    1262             : }
    1263             : 
    1264             : static struct PyMethodDef Writer_methods[] = {
    1265             :     { "writerow", (PyCFunction)csv_writerow, METH_O, csv_writerow_doc},
    1266             :     { "writerows", (PyCFunction)csv_writerows, METH_O, csv_writerows_doc},
    1267             :     { NULL, NULL }
    1268             : };
    1269             : 
    1270             : #define W_OFF(x) offsetof(WriterObj, x)
    1271             : 
    1272             : static struct PyMemberDef Writer_memberlist[] = {
    1273             :     { "dialect", T_OBJECT, W_OFF(dialect), READONLY },
    1274             :     { NULL }
    1275             : };
    1276             : 
    1277             : static void
    1278           0 : Writer_dealloc(WriterObj *self)
    1279             : {
    1280           0 :     PyObject_GC_UnTrack(self);
    1281           0 :     Py_XDECREF(self->dialect);
    1282           0 :     Py_XDECREF(self->writeline);
    1283           0 :     if (self->rec != NULL)
    1284           0 :         PyMem_Free(self->rec);
    1285           0 :     PyObject_GC_Del(self);
    1286           0 : }
    1287             : 
    1288             : static int
    1289           0 : Writer_traverse(WriterObj *self, visitproc visit, void *arg)
    1290             : {
    1291           0 :     Py_VISIT(self->dialect);
    1292           0 :     Py_VISIT(self->writeline);
    1293           0 :     return 0;
    1294             : }
    1295             : 
    1296             : static int
    1297           0 : Writer_clear(WriterObj *self)
    1298             : {
    1299           0 :     Py_CLEAR(self->dialect);
    1300           0 :     Py_CLEAR(self->writeline);
    1301           0 :     return 0;
    1302             : }
    1303             : 
    1304             : PyDoc_STRVAR(Writer_Type_doc,
    1305             : "CSV writer\n"
    1306             : "\n"
    1307             : "Writer objects are responsible for generating tabular data\n"
    1308             : "in CSV format from sequence input.\n"
    1309             : );
    1310             : 
    1311             : static PyTypeObject Writer_Type = {
    1312             :     PyVarObject_HEAD_INIT(NULL, 0)
    1313             :     "_csv.writer",                          /*tp_name*/
    1314             :     sizeof(WriterObj),                      /*tp_basicsize*/
    1315             :     0,                                      /*tp_itemsize*/
    1316             :     /* methods */
    1317             :     (destructor)Writer_dealloc,             /*tp_dealloc*/
    1318             :     (printfunc)0,                           /*tp_print*/
    1319             :     (getattrfunc)0,                         /*tp_getattr*/
    1320             :     (setattrfunc)0,                         /*tp_setattr*/
    1321             :     0,                                      /*tp_reserved*/
    1322             :     (reprfunc)0,                            /*tp_repr*/
    1323             :     0,                                      /*tp_as_number*/
    1324             :     0,                                      /*tp_as_sequence*/
    1325             :     0,                                      /*tp_as_mapping*/
    1326             :     (hashfunc)0,                            /*tp_hash*/
    1327             :     (ternaryfunc)0,                         /*tp_call*/
    1328             :     (reprfunc)0,                            /*tp_str*/
    1329             :     0,                                      /*tp_getattro*/
    1330             :     0,                                      /*tp_setattro*/
    1331             :     0,                                      /*tp_as_buffer*/
    1332             :     Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE |
    1333             :         Py_TPFLAGS_HAVE_GC,                     /*tp_flags*/
    1334             :     Writer_Type_doc,
    1335             :     (traverseproc)Writer_traverse,          /*tp_traverse*/
    1336             :     (inquiry)Writer_clear,                  /*tp_clear*/
    1337             :     0,                                      /*tp_richcompare*/
    1338             :     0,                                      /*tp_weaklistoffset*/
    1339             :     (getiterfunc)0,                         /*tp_iter*/
    1340             :     (getiterfunc)0,                         /*tp_iternext*/
    1341             :     Writer_methods,                         /*tp_methods*/
    1342             :     Writer_memberlist,                      /*tp_members*/
    1343             :     0,                                      /*tp_getset*/
    1344             : };
    1345             : 
    1346             : static PyObject *
    1347           0 : csv_writer(PyObject *module, PyObject *args, PyObject *keyword_args)
    1348             : {
    1349           0 :     PyObject * output_file, * dialect = NULL;
    1350           0 :     WriterObj * self = PyObject_GC_New(WriterObj, &Writer_Type);
    1351             :     _Py_IDENTIFIER(write);
    1352             : 
    1353           0 :     if (!self)
    1354           0 :         return NULL;
    1355             : 
    1356           0 :     self->dialect = NULL;
    1357           0 :     self->writeline = NULL;
    1358             : 
    1359           0 :     self->rec = NULL;
    1360           0 :     self->rec_size = 0;
    1361           0 :     self->rec_len = 0;
    1362           0 :     self->num_fields = 0;
    1363             : 
    1364           0 :     if (!PyArg_UnpackTuple(args, "", 1, 2, &output_file, &dialect)) {
    1365           0 :         Py_DECREF(self);
    1366           0 :         return NULL;
    1367             :     }
    1368           0 :     self->writeline = _PyObject_GetAttrId(output_file, &PyId_write);
    1369           0 :     if (self->writeline == NULL || !PyCallable_Check(self->writeline)) {
    1370           0 :         PyErr_SetString(PyExc_TypeError,
    1371             :                         "argument 1 must have a \"write\" method");
    1372           0 :         Py_DECREF(self);
    1373           0 :         return NULL;
    1374             :     }
    1375           0 :     self->dialect = (DialectObj *)_call_dialect(dialect, keyword_args);
    1376           0 :     if (self->dialect == NULL) {
    1377           0 :         Py_DECREF(self);
    1378           0 :         return NULL;
    1379             :     }
    1380           0 :     PyObject_GC_Track(self);
    1381           0 :     return (PyObject *)self;
    1382             : }
    1383             : 
    1384             : /*
    1385             :  * DIALECT REGISTRY
    1386             :  */
    1387             : static PyObject *
    1388           0 : csv_list_dialects(PyObject *module, PyObject *args)
    1389             : {
    1390           0 :     return PyDict_Keys(_csvstate_global->dialects);
    1391             : }
    1392             : 
    1393             : static PyObject *
    1394           0 : csv_register_dialect(PyObject *module, PyObject *args, PyObject *kwargs)
    1395             : {
    1396           0 :     PyObject *name_obj, *dialect_obj = NULL;
    1397             :     PyObject *dialect;
    1398             : 
    1399           0 :     if (!PyArg_UnpackTuple(args, "", 1, 2, &name_obj, &dialect_obj))
    1400           0 :         return NULL;
    1401           0 :     if (!IS_BASESTRING(name_obj)) {
    1402           0 :         PyErr_SetString(PyExc_TypeError,
    1403             :                         "dialect name must be a string or unicode");
    1404           0 :         return NULL;
    1405             :     }
    1406           0 :     dialect = _call_dialect(dialect_obj, kwargs);
    1407           0 :     if (dialect == NULL)
    1408           0 :         return NULL;
    1409           0 :     if (PyDict_SetItem(_csvstate_global->dialects, name_obj, dialect) < 0) {
    1410           0 :         Py_DECREF(dialect);
    1411           0 :         return NULL;
    1412             :     }
    1413           0 :     Py_DECREF(dialect);
    1414           0 :     Py_INCREF(Py_None);
    1415           0 :     return Py_None;
    1416             : }
    1417             : 
    1418             : static PyObject *
    1419           0 : csv_unregister_dialect(PyObject *module, PyObject *name_obj)
    1420             : {
    1421           0 :     if (PyDict_DelItem(_csvstate_global->dialects, name_obj) < 0)
    1422           0 :         return PyErr_Format(_csvstate_global->error_obj, "unknown dialect");
    1423           0 :     Py_INCREF(Py_None);
    1424           0 :     return Py_None;
    1425             : }
    1426             : 
    1427             : static PyObject *
    1428           0 : csv_get_dialect(PyObject *module, PyObject *name_obj)
    1429             : {
    1430           0 :     return get_dialect_from_registry(name_obj);
    1431             : }
    1432             : 
    1433             : static PyObject *
    1434           0 : csv_field_size_limit(PyObject *module, PyObject *args)
    1435             : {
    1436           0 :     PyObject *new_limit = NULL;
    1437           0 :     long old_limit = _csvstate_global->field_limit;
    1438             : 
    1439           0 :     if (!PyArg_UnpackTuple(args, "field_size_limit", 0, 1, &new_limit))
    1440           0 :         return NULL;
    1441           0 :     if (new_limit != NULL) {
    1442           0 :         if (!PyLong_CheckExact(new_limit)) {
    1443           0 :             PyErr_Format(PyExc_TypeError,
    1444             :                          "limit must be an integer");
    1445           0 :             return NULL;
    1446             :         }
    1447           0 :         _csvstate_global->field_limit = PyLong_AsLong(new_limit);
    1448           0 :         if (_csvstate_global->field_limit == -1 && PyErr_Occurred()) {
    1449           0 :             _csvstate_global->field_limit = old_limit;
    1450           0 :             return NULL;
    1451             :         }
    1452             :     }
    1453           0 :     return PyLong_FromLong(old_limit);
    1454             : }
    1455             : 
    1456             : /*
    1457             :  * MODULE
    1458             :  */
    1459             : 
    1460             : PyDoc_STRVAR(csv_module_doc,
    1461             : "CSV parsing and writing.\n"
    1462             : "\n"
    1463             : "This module provides classes that assist in the reading and writing\n"
    1464             : "of Comma Separated Value (CSV) files, and implements the interface\n"
    1465             : "described by PEP 305.  Although many CSV files are simple to parse,\n"
    1466             : "the format is not formally defined by a stable specification and\n"
    1467             : "is subtle enough that parsing lines of a CSV file with something\n"
    1468             : "like line.split(\",\") is bound to fail.  The module supports three\n"
    1469             : "basic APIs: reading, writing, and registration of dialects.\n"
    1470             : "\n"
    1471             : "\n"
    1472             : "DIALECT REGISTRATION:\n"
    1473             : "\n"
    1474             : "Readers and writers support a dialect argument, which is a convenient\n"
    1475             : "handle on a group of settings.  When the dialect argument is a string,\n"
    1476             : "it identifies one of the dialects previously registered with the module.\n"
    1477             : "If it is a class or instance, the attributes of the argument are used as\n"
    1478             : "the settings for the reader or writer:\n"
    1479             : "\n"
    1480             : "    class excel:\n"
    1481             : "        delimiter = ','\n"
    1482             : "        quotechar = '\"'\n"
    1483             : "        escapechar = None\n"
    1484             : "        doublequote = True\n"
    1485             : "        skipinitialspace = False\n"
    1486             : "        lineterminator = '\\r\\n'\n"
    1487             : "        quoting = QUOTE_MINIMAL\n"
    1488             : "\n"
    1489             : "SETTINGS:\n"
    1490             : "\n"
    1491             : "    * quotechar - specifies a one-character string to use as the \n"
    1492             : "        quoting character.  It defaults to '\"'.\n"
    1493             : "    * delimiter - specifies a one-character string to use as the \n"
    1494             : "        field separator.  It defaults to ','.\n"
    1495             : "    * skipinitialspace - specifies how to interpret whitespace which\n"
    1496             : "        immediately follows a delimiter.  It defaults to False, which\n"
    1497             : "        means that whitespace immediately following a delimiter is part\n"
    1498             : "        of the following field.\n"
    1499             : "    * lineterminator -  specifies the character sequence which should \n"
    1500             : "        terminate rows.\n"
    1501             : "    * quoting - controls when quotes should be generated by the writer.\n"
    1502             : "        It can take on any of the following module constants:\n"
    1503             : "\n"
    1504             : "        csv.QUOTE_MINIMAL means only when required, for example, when a\n"
    1505             : "            field contains either the quotechar or the delimiter\n"
    1506             : "        csv.QUOTE_ALL means that quotes are always placed around fields.\n"
    1507             : "        csv.QUOTE_NONNUMERIC means that quotes are always placed around\n"
    1508             : "            fields which do not parse as integers or floating point\n"
    1509             : "            numbers.\n"
    1510             : "        csv.QUOTE_NONE means that quotes are never placed around fields.\n"
    1511             : "    * escapechar - specifies a one-character string used to escape \n"
    1512             : "        the delimiter when quoting is set to QUOTE_NONE.\n"
    1513             : "    * doublequote - controls the handling of quotes inside fields.  When\n"
    1514             : "        True, two consecutive quotes are interpreted as one during read,\n"
    1515             : "        and when writing, each quote character embedded in the data is\n"
    1516             : "        written as two quotes\n");
    1517             : 
    1518             : PyDoc_STRVAR(csv_reader_doc,
    1519             : "    csv_reader = reader(iterable [, dialect='excel']\n"
    1520             : "                        [optional keyword args])\n"
    1521             : "    for row in csv_reader:\n"
    1522             : "        process(row)\n"
    1523             : "\n"
    1524             : "The \"iterable\" argument can be any object that returns a line\n"
    1525             : "of input for each iteration, such as a file object or a list.  The\n"
    1526             : "optional \"dialect\" parameter is discussed below.  The function\n"
    1527             : "also accepts optional keyword arguments which override settings\n"
    1528             : "provided by the dialect.\n"
    1529             : "\n"
    1530             : "The returned object is an iterator.  Each iteration returns a row\n"
    1531             : "of the CSV file (which can span multiple input lines):\n");
    1532             : 
    1533             : PyDoc_STRVAR(csv_writer_doc,
    1534             : "    csv_writer = csv.writer(fileobj [, dialect='excel']\n"
    1535             : "                            [optional keyword args])\n"
    1536             : "    for row in sequence:\n"
    1537             : "        csv_writer.writerow(row)\n"
    1538             : "\n"
    1539             : "    [or]\n"
    1540             : "\n"
    1541             : "    csv_writer = csv.writer(fileobj [, dialect='excel']\n"
    1542             : "                            [optional keyword args])\n"
    1543             : "    csv_writer.writerows(rows)\n"
    1544             : "\n"
    1545             : "The \"fileobj\" argument can be any object that supports the file API.\n");
    1546             : 
    1547             : PyDoc_STRVAR(csv_list_dialects_doc,
    1548             : "Return a list of all know dialect names.\n"
    1549             : "    names = csv.list_dialects()");
    1550             : 
    1551             : PyDoc_STRVAR(csv_get_dialect_doc,
    1552             : "Return the dialect instance associated with name.\n"
    1553             : "    dialect = csv.get_dialect(name)");
    1554             : 
    1555             : PyDoc_STRVAR(csv_register_dialect_doc,
    1556             : "Create a mapping from a string name to a dialect class.\n"
    1557             : "    dialect = csv.register_dialect(name, dialect)");
    1558             : 
    1559             : PyDoc_STRVAR(csv_unregister_dialect_doc,
    1560             : "Delete the name/dialect mapping associated with a string name.\n"
    1561             : "    csv.unregister_dialect(name)");
    1562             : 
    1563             : PyDoc_STRVAR(csv_field_size_limit_doc,
    1564             : "Sets an upper limit on parsed fields.\n"
    1565             : "    csv.field_size_limit([limit])\n"
    1566             : "\n"
    1567             : "Returns old limit. If limit is not given, no new limit is set and\n"
    1568             : "the old limit is returned");
    1569             : 
    1570             : static struct PyMethodDef csv_methods[] = {
    1571             :     { "reader", (PyCFunction)csv_reader,
    1572             :         METH_VARARGS | METH_KEYWORDS, csv_reader_doc},
    1573             :     { "writer", (PyCFunction)csv_writer,
    1574             :         METH_VARARGS | METH_KEYWORDS, csv_writer_doc},
    1575             :     { "list_dialects", (PyCFunction)csv_list_dialects,
    1576             :         METH_NOARGS, csv_list_dialects_doc},
    1577             :     { "register_dialect", (PyCFunction)csv_register_dialect,
    1578             :         METH_VARARGS | METH_KEYWORDS, csv_register_dialect_doc},
    1579             :     { "unregister_dialect", (PyCFunction)csv_unregister_dialect,
    1580             :         METH_O, csv_unregister_dialect_doc},
    1581             :     { "get_dialect", (PyCFunction)csv_get_dialect,
    1582             :         METH_O, csv_get_dialect_doc},
    1583             :     { "field_size_limit", (PyCFunction)csv_field_size_limit,
    1584             :         METH_VARARGS, csv_field_size_limit_doc},
    1585             :     { NULL, NULL }
    1586             : };
    1587             : 
    1588             : static struct PyModuleDef _csvmodule = {
    1589             :     PyModuleDef_HEAD_INIT,
    1590             :     "_csv",
    1591             :     csv_module_doc,
    1592             :     sizeof(_csvstate),
    1593             :     csv_methods,
    1594             :     NULL,
    1595             :     _csv_traverse,
    1596             :     _csv_clear,
    1597             :     _csv_free
    1598             : };
    1599             : 
    1600             : PyMODINIT_FUNC
    1601           0 : PyInit__csv(void)
    1602             : {
    1603             :     PyObject *module;
    1604             :     StyleDesc *style;
    1605             : 
    1606           0 :     if (PyType_Ready(&Dialect_Type) < 0)
    1607           0 :         return NULL;
    1608             : 
    1609           0 :     if (PyType_Ready(&Reader_Type) < 0)
    1610           0 :         return NULL;
    1611             : 
    1612           0 :     if (PyType_Ready(&Writer_Type) < 0)
    1613           0 :         return NULL;
    1614             : 
    1615             :     /* Create the module and add the functions */
    1616           0 :     module = PyModule_Create(&_csvmodule);
    1617           0 :     if (module == NULL)
    1618           0 :         return NULL;
    1619             : 
    1620             :     /* Add version to the module. */
    1621           0 :     if (PyModule_AddStringConstant(module, "__version__",
    1622             :                                    MODULE_VERSION) == -1)
    1623           0 :         return NULL;
    1624             : 
    1625             :     /* Set the field limit */
    1626           0 :     _csvstate(module)->field_limit = 128 * 1024;
    1627             :     /* Do I still need to add this var to the Module Dict? */
    1628             : 
    1629             :     /* Add _dialects dictionary */
    1630           0 :     _csvstate(module)->dialects = PyDict_New();
    1631           0 :     if (_csvstate(module)->dialects == NULL)
    1632           0 :         return NULL;
    1633           0 :     Py_INCREF(_csvstate(module)->dialects);
    1634           0 :     if (PyModule_AddObject(module, "_dialects", _csvstate(module)->dialects))
    1635           0 :         return NULL;
    1636             : 
    1637             :     /* Add quote styles into dictionary */
    1638           0 :     for (style = quote_styles; style->name; style++) {
    1639           0 :         if (PyModule_AddIntConstant(module, style->name,
    1640           0 :                                     style->style) == -1)
    1641           0 :             return NULL;
    1642             :     }
    1643             : 
    1644             :     /* Add the Dialect type */
    1645           0 :     Py_INCREF(&Dialect_Type);
    1646           0 :     if (PyModule_AddObject(module, "Dialect", (PyObject *)&Dialect_Type))
    1647           0 :         return NULL;
    1648             : 
    1649             :     /* Add the CSV exception object to the module. */
    1650           0 :     _csvstate(module)->error_obj = PyErr_NewException("_csv.Error", NULL, NULL);
    1651           0 :     if (_csvstate(module)->error_obj == NULL)
    1652           0 :         return NULL;
    1653           0 :     Py_INCREF(_csvstate(module)->error_obj);
    1654           0 :     PyModule_AddObject(module, "Error", _csvstate(module)->error_obj);
    1655           0 :     return module;
    1656             : }

Generated by: LCOV version 1.10