Line data Source code
1 : #define PY_SSIZE_T_CLEAN
2 : #include "Python.h"
3 : #include "structmember.h"
4 : #include "accu.h"
5 : #include "_iomodule.h"
6 :
7 : /* Implementation note: the buffer is always at least one character longer
8 : than the enclosed string, for proper functioning of _PyIO_find_line_ending.
9 : */
10 :
11 : #define STATE_REALIZED 1
12 : #define STATE_ACCUMULATING 2
13 :
14 : typedef struct {
15 : PyObject_HEAD
16 : Py_UCS4 *buf;
17 : Py_ssize_t pos;
18 : Py_ssize_t string_size;
19 : size_t buf_size;
20 :
21 : /* The stringio object can be in two states: accumulating or realized.
22 : In accumulating state, the internal buffer contains nothing and
23 : the contents are given by the embedded _PyAccu structure.
24 : In realized state, the internal buffer is meaningful and the
25 : _PyAccu is destroyed.
26 : */
27 : int state;
28 : _PyAccu accu;
29 :
30 : char ok; /* initialized? */
31 : char closed;
32 : char readuniversal;
33 : char readtranslate;
34 : PyObject *decoder;
35 : PyObject *readnl;
36 : PyObject *writenl;
37 :
38 : PyObject *dict;
39 : PyObject *weakreflist;
40 : } stringio;
41 :
42 : #define CHECK_INITIALIZED(self) \
43 : if (self->ok <= 0) { \
44 : PyErr_SetString(PyExc_ValueError, \
45 : "I/O operation on uninitialized object"); \
46 : return NULL; \
47 : }
48 :
49 : #define CHECK_CLOSED(self) \
50 : if (self->closed) { \
51 : PyErr_SetString(PyExc_ValueError, \
52 : "I/O operation on closed file"); \
53 : return NULL; \
54 : }
55 :
56 : #define ENSURE_REALIZED(self) \
57 : if (realize(self) < 0) { \
58 : return NULL; \
59 : }
60 :
61 : PyDoc_STRVAR(stringio_doc,
62 : "Text I/O implementation using an in-memory buffer.\n"
63 : "\n"
64 : "The initial_value argument sets the value of object. The newline\n"
65 : "argument is like the one of TextIOWrapper's constructor.");
66 :
67 :
68 : /* Internal routine for changing the size, in terms of characters, of the
69 : buffer of StringIO objects. The caller should ensure that the 'size'
70 : argument is non-negative. Returns 0 on success, -1 otherwise. */
71 : static int
72 0 : resize_buffer(stringio *self, size_t size)
73 : {
74 : /* Here, unsigned types are used to avoid dealing with signed integer
75 : overflow, which is undefined in C. */
76 0 : size_t alloc = self->buf_size;
77 0 : Py_UCS4 *new_buf = NULL;
78 :
79 : assert(self->buf != NULL);
80 :
81 : /* Reserve one more char for line ending detection. */
82 0 : size = size + 1;
83 : /* For simplicity, stay in the range of the signed type. Anyway, Python
84 : doesn't allow strings to be longer than this. */
85 0 : if (size > PY_SSIZE_T_MAX)
86 0 : goto overflow;
87 :
88 0 : if (size < alloc / 2) {
89 : /* Major downsize; resize down to exact size. */
90 0 : alloc = size + 1;
91 : }
92 0 : else if (size < alloc) {
93 : /* Within allocated size; quick exit */
94 0 : return 0;
95 : }
96 0 : else if (size <= alloc * 1.125) {
97 : /* Moderate upsize; overallocate similar to list_resize() */
98 0 : alloc = size + (size >> 3) + (size < 9 ? 3 : 6);
99 : }
100 : else {
101 : /* Major upsize; resize up to exact size */
102 0 : alloc = size + 1;
103 : }
104 :
105 0 : if (alloc > PY_SIZE_MAX / sizeof(Py_UCS4))
106 0 : goto overflow;
107 0 : new_buf = (Py_UCS4 *)PyMem_Realloc(self->buf, alloc * sizeof(Py_UCS4));
108 0 : if (new_buf == NULL) {
109 0 : PyErr_NoMemory();
110 0 : return -1;
111 : }
112 0 : self->buf_size = alloc;
113 0 : self->buf = new_buf;
114 :
115 0 : return 0;
116 :
117 : overflow:
118 0 : PyErr_SetString(PyExc_OverflowError,
119 : "new buffer size too large");
120 0 : return -1;
121 : }
122 :
123 : static PyObject *
124 0 : make_intermediate(stringio *self)
125 : {
126 0 : PyObject *intermediate = _PyAccu_Finish(&self->accu);
127 0 : self->state = STATE_REALIZED;
128 0 : if (intermediate == NULL)
129 0 : return NULL;
130 0 : if (_PyAccu_Init(&self->accu) ||
131 0 : _PyAccu_Accumulate(&self->accu, intermediate)) {
132 0 : Py_DECREF(intermediate);
133 0 : return NULL;
134 : }
135 0 : self->state = STATE_ACCUMULATING;
136 0 : return intermediate;
137 : }
138 :
139 : static int
140 0 : realize(stringio *self)
141 : {
142 : Py_ssize_t len;
143 : PyObject *intermediate;
144 :
145 0 : if (self->state == STATE_REALIZED)
146 0 : return 0;
147 : assert(self->state == STATE_ACCUMULATING);
148 0 : self->state = STATE_REALIZED;
149 :
150 0 : intermediate = _PyAccu_Finish(&self->accu);
151 0 : if (intermediate == NULL)
152 0 : return -1;
153 :
154 : /* Append the intermediate string to the internal buffer.
155 : The length should be equal to the current cursor position.
156 : */
157 0 : len = PyUnicode_GET_LENGTH(intermediate);
158 0 : if (resize_buffer(self, len) < 0) {
159 0 : Py_DECREF(intermediate);
160 0 : return -1;
161 : }
162 0 : if (!PyUnicode_AsUCS4(intermediate, self->buf, len, 0)) {
163 0 : Py_DECREF(intermediate);
164 0 : return -1;
165 : }
166 :
167 0 : Py_DECREF(intermediate);
168 0 : return 0;
169 : }
170 :
171 : /* Internal routine for writing a whole PyUnicode object to the buffer of a
172 : StringIO object. Returns 0 on success, or -1 on error. */
173 : static Py_ssize_t
174 0 : write_str(stringio *self, PyObject *obj)
175 : {
176 : Py_ssize_t len;
177 0 : PyObject *decoded = NULL;
178 :
179 : assert(self->buf != NULL);
180 : assert(self->pos >= 0);
181 :
182 0 : if (self->decoder != NULL) {
183 0 : decoded = _PyIncrementalNewlineDecoder_decode(
184 : self->decoder, obj, 1 /* always final */);
185 : }
186 : else {
187 0 : decoded = obj;
188 0 : Py_INCREF(decoded);
189 : }
190 0 : if (self->writenl) {
191 0 : PyObject *translated = PyUnicode_Replace(
192 : decoded, _PyIO_str_nl, self->writenl, -1);
193 0 : Py_DECREF(decoded);
194 0 : decoded = translated;
195 : }
196 0 : if (decoded == NULL)
197 0 : return -1;
198 :
199 : assert(PyUnicode_Check(decoded));
200 0 : if (PyUnicode_READY(decoded)) {
201 0 : Py_DECREF(decoded);
202 0 : return -1;
203 : }
204 0 : len = PyUnicode_GET_LENGTH(decoded);
205 : assert(len >= 0);
206 :
207 : /* This overflow check is not strictly necessary. However, it avoids us to
208 : deal with funky things like comparing an unsigned and a signed
209 : integer. */
210 0 : if (self->pos > PY_SSIZE_T_MAX - len) {
211 0 : PyErr_SetString(PyExc_OverflowError,
212 : "new position too large");
213 0 : goto fail;
214 : }
215 :
216 0 : if (self->state == STATE_ACCUMULATING) {
217 0 : if (self->string_size == self->pos) {
218 0 : if (_PyAccu_Accumulate(&self->accu, decoded))
219 0 : goto fail;
220 0 : goto success;
221 : }
222 0 : if (realize(self))
223 0 : goto fail;
224 : }
225 :
226 0 : if (self->pos + len > self->string_size) {
227 0 : if (resize_buffer(self, self->pos + len) < 0)
228 0 : goto fail;
229 : }
230 :
231 0 : if (self->pos > self->string_size) {
232 : /* In case of overseek, pad with null bytes the buffer region between
233 : the end of stream and the current position.
234 :
235 : 0 lo string_size hi
236 : | |<---used--->|<----------available----------->|
237 : | | <--to pad-->|<---to write---> |
238 : 0 buf position
239 :
240 : */
241 0 : memset(self->buf + self->string_size, '\0',
242 0 : (self->pos - self->string_size) * sizeof(Py_UCS4));
243 : }
244 :
245 : /* Copy the data to the internal buffer, overwriting some of the
246 : existing data if self->pos < self->string_size. */
247 0 : if (!PyUnicode_AsUCS4(decoded,
248 0 : self->buf + self->pos,
249 0 : self->buf_size - self->pos,
250 : 0))
251 0 : goto fail;
252 :
253 : success:
254 : /* Set the new length of the internal string if it has changed. */
255 0 : self->pos += len;
256 0 : if (self->string_size < self->pos)
257 0 : self->string_size = self->pos;
258 :
259 0 : Py_DECREF(decoded);
260 0 : return 0;
261 :
262 : fail:
263 0 : Py_XDECREF(decoded);
264 0 : return -1;
265 : }
266 :
267 : PyDoc_STRVAR(stringio_getvalue_doc,
268 : "Retrieve the entire contents of the object.");
269 :
270 : static PyObject *
271 0 : stringio_getvalue(stringio *self)
272 : {
273 0 : CHECK_INITIALIZED(self);
274 0 : CHECK_CLOSED(self);
275 0 : if (self->state == STATE_ACCUMULATING)
276 0 : return make_intermediate(self);
277 0 : return PyUnicode_FromKindAndData(PyUnicode_4BYTE_KIND, self->buf,
278 : self->string_size);
279 : }
280 :
281 : PyDoc_STRVAR(stringio_tell_doc,
282 : "Tell the current file position.");
283 :
284 : static PyObject *
285 0 : stringio_tell(stringio *self)
286 : {
287 0 : CHECK_INITIALIZED(self);
288 0 : CHECK_CLOSED(self);
289 0 : return PyLong_FromSsize_t(self->pos);
290 : }
291 :
292 : PyDoc_STRVAR(stringio_read_doc,
293 : "Read at most n characters, returned as a string.\n"
294 : "\n"
295 : "If the argument is negative or omitted, read until EOF\n"
296 : "is reached. Return an empty string at EOF.\n");
297 :
298 : static PyObject *
299 0 : stringio_read(stringio *self, PyObject *args)
300 : {
301 : Py_ssize_t size, n;
302 : Py_UCS4 *output;
303 0 : PyObject *arg = Py_None;
304 :
305 0 : CHECK_INITIALIZED(self);
306 0 : if (!PyArg_ParseTuple(args, "|O:read", &arg))
307 0 : return NULL;
308 0 : CHECK_CLOSED(self);
309 :
310 0 : if (PyNumber_Check(arg)) {
311 0 : size = PyNumber_AsSsize_t(arg, PyExc_OverflowError);
312 0 : if (size == -1 && PyErr_Occurred())
313 0 : return NULL;
314 : }
315 0 : else if (arg == Py_None) {
316 : /* Read until EOF is reached, by default. */
317 0 : size = -1;
318 : }
319 : else {
320 0 : PyErr_Format(PyExc_TypeError, "integer argument expected, got '%s'",
321 0 : Py_TYPE(arg)->tp_name);
322 0 : return NULL;
323 : }
324 :
325 : /* adjust invalid sizes */
326 0 : n = self->string_size - self->pos;
327 0 : if (size < 0 || size > n) {
328 0 : size = n;
329 0 : if (size < 0)
330 0 : size = 0;
331 : }
332 :
333 : /* Optimization for seek(0); read() */
334 0 : if (self->state == STATE_ACCUMULATING && self->pos == 0 && size == n) {
335 0 : PyObject *result = make_intermediate(self);
336 0 : self->pos = self->string_size;
337 0 : return result;
338 : }
339 :
340 0 : ENSURE_REALIZED(self);
341 0 : output = self->buf + self->pos;
342 0 : self->pos += size;
343 0 : return PyUnicode_FromKindAndData(PyUnicode_4BYTE_KIND, output, size);
344 : }
345 :
346 : /* Internal helper, used by stringio_readline and stringio_iternext */
347 : static PyObject *
348 0 : _stringio_readline(stringio *self, Py_ssize_t limit)
349 : {
350 : Py_UCS4 *start, *end, old_char;
351 : Py_ssize_t len, consumed;
352 :
353 : /* In case of overseek, return the empty string */
354 0 : if (self->pos >= self->string_size)
355 0 : return PyUnicode_New(0, 0);
356 :
357 0 : start = self->buf + self->pos;
358 0 : if (limit < 0 || limit > self->string_size - self->pos)
359 0 : limit = self->string_size - self->pos;
360 :
361 0 : end = start + limit;
362 0 : old_char = *end;
363 0 : *end = '\0';
364 0 : len = _PyIO_find_line_ending(
365 0 : self->readtranslate, self->readuniversal, self->readnl,
366 : PyUnicode_4BYTE_KIND, (char*)start, (char*)end, &consumed);
367 0 : *end = old_char;
368 : /* If we haven't found any line ending, we just return everything
369 : (`consumed` is ignored). */
370 0 : if (len < 0)
371 0 : len = limit;
372 0 : self->pos += len;
373 0 : return PyUnicode_FromKindAndData(PyUnicode_4BYTE_KIND, start, len);
374 : }
375 :
376 : PyDoc_STRVAR(stringio_readline_doc,
377 : "Read until newline or EOF.\n"
378 : "\n"
379 : "Returns an empty string if EOF is hit immediately.\n");
380 :
381 : static PyObject *
382 0 : stringio_readline(stringio *self, PyObject *args)
383 : {
384 0 : PyObject *arg = Py_None;
385 0 : Py_ssize_t limit = -1;
386 :
387 0 : CHECK_INITIALIZED(self);
388 0 : if (!PyArg_ParseTuple(args, "|O:readline", &arg))
389 0 : return NULL;
390 0 : CHECK_CLOSED(self);
391 0 : ENSURE_REALIZED(self);
392 :
393 0 : if (PyNumber_Check(arg)) {
394 0 : limit = PyNumber_AsSsize_t(arg, PyExc_OverflowError);
395 0 : if (limit == -1 && PyErr_Occurred())
396 0 : return NULL;
397 : }
398 0 : else if (arg != Py_None) {
399 0 : PyErr_Format(PyExc_TypeError, "integer argument expected, got '%s'",
400 0 : Py_TYPE(arg)->tp_name);
401 0 : return NULL;
402 : }
403 0 : return _stringio_readline(self, limit);
404 : }
405 :
406 : static PyObject *
407 0 : stringio_iternext(stringio *self)
408 : {
409 : PyObject *line;
410 :
411 0 : CHECK_INITIALIZED(self);
412 0 : CHECK_CLOSED(self);
413 0 : ENSURE_REALIZED(self);
414 :
415 0 : if (Py_TYPE(self) == &PyStringIO_Type) {
416 : /* Skip method call overhead for speed */
417 0 : line = _stringio_readline(self, -1);
418 : }
419 : else {
420 : /* XXX is subclassing StringIO really supported? */
421 0 : line = PyObject_CallMethodObjArgs((PyObject *)self,
422 : _PyIO_str_readline, NULL);
423 0 : if (line && !PyUnicode_Check(line)) {
424 0 : PyErr_Format(PyExc_IOError,
425 : "readline() should have returned an str object, "
426 0 : "not '%.200s'", Py_TYPE(line)->tp_name);
427 0 : Py_DECREF(line);
428 0 : return NULL;
429 : }
430 : }
431 :
432 0 : if (line == NULL)
433 0 : return NULL;
434 :
435 0 : if (PyUnicode_GET_LENGTH(line) == 0) {
436 : /* Reached EOF */
437 0 : Py_DECREF(line);
438 0 : return NULL;
439 : }
440 :
441 0 : return line;
442 : }
443 :
444 : PyDoc_STRVAR(stringio_truncate_doc,
445 : "Truncate size to pos.\n"
446 : "\n"
447 : "The pos argument defaults to the current file position, as\n"
448 : "returned by tell(). The current file position is unchanged.\n"
449 : "Returns the new absolute position.\n");
450 :
451 : static PyObject *
452 0 : stringio_truncate(stringio *self, PyObject *args)
453 : {
454 : Py_ssize_t size;
455 0 : PyObject *arg = Py_None;
456 :
457 0 : CHECK_INITIALIZED(self);
458 0 : if (!PyArg_ParseTuple(args, "|O:truncate", &arg))
459 0 : return NULL;
460 0 : CHECK_CLOSED(self);
461 :
462 0 : if (PyNumber_Check(arg)) {
463 0 : size = PyNumber_AsSsize_t(arg, PyExc_OverflowError);
464 0 : if (size == -1 && PyErr_Occurred())
465 0 : return NULL;
466 : }
467 0 : else if (arg == Py_None) {
468 : /* Truncate to current position if no argument is passed. */
469 0 : size = self->pos;
470 : }
471 : else {
472 0 : PyErr_Format(PyExc_TypeError, "integer argument expected, got '%s'",
473 0 : Py_TYPE(arg)->tp_name);
474 0 : return NULL;
475 : }
476 :
477 0 : if (size < 0) {
478 0 : PyErr_Format(PyExc_ValueError,
479 : "Negative size value %zd", size);
480 0 : return NULL;
481 : }
482 :
483 0 : if (size < self->string_size) {
484 0 : ENSURE_REALIZED(self);
485 0 : if (resize_buffer(self, size) < 0)
486 0 : return NULL;
487 0 : self->string_size = size;
488 : }
489 :
490 0 : return PyLong_FromSsize_t(size);
491 : }
492 :
493 : PyDoc_STRVAR(stringio_seek_doc,
494 : "Change stream position.\n"
495 : "\n"
496 : "Seek to character offset pos relative to position indicated by whence:\n"
497 : " 0 Start of stream (the default). pos should be >= 0;\n"
498 : " 1 Current position - pos must be 0;\n"
499 : " 2 End of stream - pos must be 0.\n"
500 : "Returns the new absolute position.\n");
501 :
502 : static PyObject *
503 0 : stringio_seek(stringio *self, PyObject *args)
504 : {
505 : Py_ssize_t pos;
506 0 : int mode = 0;
507 :
508 0 : CHECK_INITIALIZED(self);
509 0 : if (!PyArg_ParseTuple(args, "n|i:seek", &pos, &mode))
510 0 : return NULL;
511 0 : CHECK_CLOSED(self);
512 :
513 0 : if (mode != 0 && mode != 1 && mode != 2) {
514 0 : PyErr_Format(PyExc_ValueError,
515 : "Invalid whence (%i, should be 0, 1 or 2)", mode);
516 0 : return NULL;
517 : }
518 0 : else if (pos < 0 && mode == 0) {
519 0 : PyErr_Format(PyExc_ValueError,
520 : "Negative seek position %zd", pos);
521 0 : return NULL;
522 : }
523 0 : else if (mode != 0 && pos != 0) {
524 0 : PyErr_SetString(PyExc_IOError,
525 : "Can't do nonzero cur-relative seeks");
526 0 : return NULL;
527 : }
528 :
529 : /* mode 0: offset relative to beginning of the string.
530 : mode 1: no change to current position.
531 : mode 2: change position to end of file. */
532 0 : if (mode == 1) {
533 0 : pos = self->pos;
534 : }
535 0 : else if (mode == 2) {
536 0 : pos = self->string_size;
537 : }
538 :
539 0 : self->pos = pos;
540 :
541 0 : return PyLong_FromSsize_t(self->pos);
542 : }
543 :
544 : PyDoc_STRVAR(stringio_write_doc,
545 : "Write string to file.\n"
546 : "\n"
547 : "Returns the number of characters written, which is always equal to\n"
548 : "the length of the string.\n");
549 :
550 : static PyObject *
551 0 : stringio_write(stringio *self, PyObject *obj)
552 : {
553 : Py_ssize_t size;
554 :
555 0 : CHECK_INITIALIZED(self);
556 0 : if (!PyUnicode_Check(obj)) {
557 0 : PyErr_Format(PyExc_TypeError, "string argument expected, got '%s'",
558 0 : Py_TYPE(obj)->tp_name);
559 0 : return NULL;
560 : }
561 0 : if (PyUnicode_READY(obj))
562 0 : return NULL;
563 0 : CHECK_CLOSED(self);
564 0 : size = PyUnicode_GET_LENGTH(obj);
565 :
566 0 : if (size > 0 && write_str(self, obj) < 0)
567 0 : return NULL;
568 :
569 0 : return PyLong_FromSsize_t(size);
570 : }
571 :
572 : PyDoc_STRVAR(stringio_close_doc,
573 : "Close the IO object. Attempting any further operation after the\n"
574 : "object is closed will raise a ValueError.\n"
575 : "\n"
576 : "This method has no effect if the file is already closed.\n");
577 :
578 : static PyObject *
579 0 : stringio_close(stringio *self)
580 : {
581 0 : self->closed = 1;
582 : /* Free up some memory */
583 0 : if (resize_buffer(self, 0) < 0)
584 0 : return NULL;
585 0 : _PyAccu_Destroy(&self->accu);
586 0 : Py_CLEAR(self->readnl);
587 0 : Py_CLEAR(self->writenl);
588 0 : Py_CLEAR(self->decoder);
589 0 : Py_RETURN_NONE;
590 : }
591 :
592 : static int
593 0 : stringio_traverse(stringio *self, visitproc visit, void *arg)
594 : {
595 0 : Py_VISIT(self->dict);
596 0 : return 0;
597 : }
598 :
599 : static int
600 0 : stringio_clear(stringio *self)
601 : {
602 0 : Py_CLEAR(self->dict);
603 0 : return 0;
604 : }
605 :
606 : static void
607 0 : stringio_dealloc(stringio *self)
608 : {
609 0 : _PyObject_GC_UNTRACK(self);
610 0 : self->ok = 0;
611 0 : if (self->buf) {
612 0 : PyMem_Free(self->buf);
613 0 : self->buf = NULL;
614 : }
615 0 : _PyAccu_Destroy(&self->accu);
616 0 : Py_CLEAR(self->readnl);
617 0 : Py_CLEAR(self->writenl);
618 0 : Py_CLEAR(self->decoder);
619 0 : Py_CLEAR(self->dict);
620 0 : if (self->weakreflist != NULL)
621 0 : PyObject_ClearWeakRefs((PyObject *) self);
622 0 : Py_TYPE(self)->tp_free(self);
623 0 : }
624 :
625 : static PyObject *
626 0 : stringio_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
627 : {
628 : stringio *self;
629 :
630 : assert(type != NULL && type->tp_alloc != NULL);
631 0 : self = (stringio *)type->tp_alloc(type, 0);
632 0 : if (self == NULL)
633 0 : return NULL;
634 :
635 : /* tp_alloc initializes all the fields to zero. So we don't have to
636 : initialize them here. */
637 :
638 0 : self->buf = (Py_UCS4 *)PyMem_Malloc(0);
639 0 : if (self->buf == NULL) {
640 0 : Py_DECREF(self);
641 0 : return PyErr_NoMemory();
642 : }
643 :
644 0 : return (PyObject *)self;
645 : }
646 :
647 : static int
648 0 : stringio_init(stringio *self, PyObject *args, PyObject *kwds)
649 : {
650 0 : char *kwlist[] = {"initial_value", "newline", NULL};
651 0 : PyObject *value = NULL;
652 0 : PyObject *newline_obj = NULL;
653 0 : char *newline = "\n";
654 : Py_ssize_t value_len;
655 :
656 0 : if (!PyArg_ParseTupleAndKeywords(args, kwds, "|OO:__init__", kwlist,
657 : &value, &newline_obj))
658 0 : return -1;
659 :
660 : /* Parse the newline argument. This used to be done with the 'z'
661 : specifier, however this allowed any object with the buffer interface to
662 : be converted. Thus we have to parse it manually since we only want to
663 : allow unicode objects or None. */
664 0 : if (newline_obj == Py_None) {
665 0 : newline = NULL;
666 : }
667 0 : else if (newline_obj) {
668 0 : if (!PyUnicode_Check(newline_obj)) {
669 0 : PyErr_Format(PyExc_TypeError,
670 : "newline must be str or None, not %.200s",
671 0 : Py_TYPE(newline_obj)->tp_name);
672 0 : return -1;
673 : }
674 0 : newline = _PyUnicode_AsString(newline_obj);
675 0 : if (newline == NULL)
676 0 : return -1;
677 : }
678 :
679 0 : if (newline && newline[0] != '\0'
680 0 : && !(newline[0] == '\n' && newline[1] == '\0')
681 0 : && !(newline[0] == '\r' && newline[1] == '\0')
682 0 : && !(newline[0] == '\r' && newline[1] == '\n' && newline[2] == '\0')) {
683 0 : PyErr_Format(PyExc_ValueError,
684 : "illegal newline value: %R", newline_obj);
685 0 : return -1;
686 : }
687 0 : if (value && value != Py_None && !PyUnicode_Check(value)) {
688 0 : PyErr_Format(PyExc_TypeError,
689 : "initial_value must be str or None, not %.200s",
690 0 : Py_TYPE(value)->tp_name);
691 0 : return -1;
692 : }
693 :
694 0 : self->ok = 0;
695 :
696 0 : _PyAccu_Destroy(&self->accu);
697 0 : Py_CLEAR(self->readnl);
698 0 : Py_CLEAR(self->writenl);
699 0 : Py_CLEAR(self->decoder);
700 :
701 : assert((newline != NULL && newline_obj != Py_None) ||
702 : (newline == NULL && newline_obj == Py_None));
703 :
704 0 : if (newline) {
705 0 : self->readnl = PyUnicode_FromString(newline);
706 0 : if (self->readnl == NULL)
707 0 : return -1;
708 : }
709 0 : self->readuniversal = (newline == NULL || newline[0] == '\0');
710 0 : self->readtranslate = (newline == NULL);
711 : /* If newline == "", we don't translate anything.
712 : If newline == "\n" or newline == None, we translate to "\n", which is
713 : a no-op.
714 : (for newline == None, TextIOWrapper translates to os.sepline, but it
715 : is pointless for StringIO)
716 : */
717 0 : if (newline != NULL && newline[0] == '\r') {
718 0 : self->writenl = self->readnl;
719 0 : Py_INCREF(self->writenl);
720 : }
721 :
722 0 : if (self->readuniversal) {
723 0 : self->decoder = PyObject_CallFunction(
724 : (PyObject *)&PyIncrementalNewlineDecoder_Type,
725 0 : "Oi", Py_None, (int) self->readtranslate);
726 0 : if (self->decoder == NULL)
727 0 : return -1;
728 : }
729 :
730 : /* Now everything is set up, resize buffer to size of initial value,
731 : and copy it */
732 0 : self->string_size = 0;
733 0 : if (value && value != Py_None)
734 0 : value_len = PyUnicode_GetLength(value);
735 : else
736 0 : value_len = 0;
737 0 : if (value_len > 0) {
738 : /* This is a heuristic, for newline translation might change
739 : the string length. */
740 0 : if (resize_buffer(self, 0) < 0)
741 0 : return -1;
742 0 : self->state = STATE_REALIZED;
743 0 : self->pos = 0;
744 0 : if (write_str(self, value) < 0)
745 0 : return -1;
746 : }
747 : else {
748 : /* Empty stringio object, we can start by accumulating */
749 0 : if (resize_buffer(self, 0) < 0)
750 0 : return -1;
751 0 : if (_PyAccu_Init(&self->accu))
752 0 : return -1;
753 0 : self->state = STATE_ACCUMULATING;
754 : }
755 0 : self->pos = 0;
756 :
757 0 : self->closed = 0;
758 0 : self->ok = 1;
759 0 : return 0;
760 : }
761 :
762 : /* Properties and pseudo-properties */
763 : static PyObject *
764 0 : stringio_seekable(stringio *self, PyObject *args)
765 : {
766 0 : CHECK_INITIALIZED(self);
767 0 : Py_RETURN_TRUE;
768 : }
769 :
770 : static PyObject *
771 0 : stringio_readable(stringio *self, PyObject *args)
772 : {
773 0 : CHECK_INITIALIZED(self);
774 0 : Py_RETURN_TRUE;
775 : }
776 :
777 : static PyObject *
778 0 : stringio_writable(stringio *self, PyObject *args)
779 : {
780 0 : CHECK_INITIALIZED(self);
781 0 : Py_RETURN_TRUE;
782 : }
783 :
784 : /* Pickling support.
785 :
786 : The implementation of __getstate__ is similar to the one for BytesIO,
787 : except that we also save the newline parameter. For __setstate__ and unlike
788 : BytesIO, we call __init__ to restore the object's state. Doing so allows us
789 : to avoid decoding the complex newline state while keeping the object
790 : representation compact.
791 :
792 : See comment in bytesio.c regarding why only pickle protocols and onward are
793 : supported.
794 : */
795 :
796 : static PyObject *
797 0 : stringio_getstate(stringio *self)
798 : {
799 0 : PyObject *initvalue = stringio_getvalue(self);
800 : PyObject *dict;
801 : PyObject *state;
802 :
803 0 : if (initvalue == NULL)
804 0 : return NULL;
805 0 : if (self->dict == NULL) {
806 0 : Py_INCREF(Py_None);
807 0 : dict = Py_None;
808 : }
809 : else {
810 0 : dict = PyDict_Copy(self->dict);
811 0 : if (dict == NULL)
812 0 : return NULL;
813 : }
814 :
815 0 : state = Py_BuildValue("(OOnN)", initvalue,
816 0 : self->readnl ? self->readnl : Py_None,
817 : self->pos, dict);
818 0 : Py_DECREF(initvalue);
819 0 : return state;
820 : }
821 :
822 : static PyObject *
823 0 : stringio_setstate(stringio *self, PyObject *state)
824 : {
825 : PyObject *initarg;
826 : PyObject *position_obj;
827 : PyObject *dict;
828 : Py_ssize_t pos;
829 :
830 : assert(state != NULL);
831 0 : CHECK_CLOSED(self);
832 :
833 : /* We allow the state tuple to be longer than 4, because we may need
834 : someday to extend the object's state without breaking
835 : backward-compatibility. */
836 0 : if (!PyTuple_Check(state) || Py_SIZE(state) < 4) {
837 0 : PyErr_Format(PyExc_TypeError,
838 : "%.200s.__setstate__ argument should be 4-tuple, got %.200s",
839 0 : Py_TYPE(self)->tp_name, Py_TYPE(state)->tp_name);
840 0 : return NULL;
841 : }
842 :
843 : /* Initialize the object's state. */
844 0 : initarg = PyTuple_GetSlice(state, 0, 2);
845 0 : if (initarg == NULL)
846 0 : return NULL;
847 0 : if (stringio_init(self, initarg, NULL) < 0) {
848 0 : Py_DECREF(initarg);
849 0 : return NULL;
850 : }
851 0 : Py_DECREF(initarg);
852 :
853 : /* Restore the buffer state. Even if __init__ did initialize the buffer,
854 : we have to initialize it again since __init__ may translates the
855 : newlines in the inital_value string. We clearly do not want that
856 : because the string value in the state tuple has already been translated
857 : once by __init__. So we do not take any chance and replace object's
858 : buffer completely. */
859 : {
860 : PyObject *item;
861 : Py_UCS4 *buf;
862 : Py_ssize_t bufsize;
863 :
864 0 : item = PyTuple_GET_ITEM(state, 0);
865 0 : buf = PyUnicode_AsUCS4Copy(item);
866 0 : if (buf == NULL)
867 0 : return NULL;
868 0 : bufsize = PyUnicode_GET_LENGTH(item);
869 :
870 0 : if (resize_buffer(self, bufsize) < 0) {
871 0 : PyMem_Free(buf);
872 0 : return NULL;
873 : }
874 0 : memcpy(self->buf, buf, bufsize * sizeof(Py_UCS4));
875 0 : PyMem_Free(buf);
876 0 : self->string_size = bufsize;
877 : }
878 :
879 : /* Set carefully the position value. Alternatively, we could use the seek
880 : method instead of modifying self->pos directly to better protect the
881 : object internal state against errneous (or malicious) inputs. */
882 0 : position_obj = PyTuple_GET_ITEM(state, 2);
883 0 : if (!PyLong_Check(position_obj)) {
884 0 : PyErr_Format(PyExc_TypeError,
885 : "third item of state must be an integer, got %.200s",
886 0 : Py_TYPE(position_obj)->tp_name);
887 0 : return NULL;
888 : }
889 0 : pos = PyLong_AsSsize_t(position_obj);
890 0 : if (pos == -1 && PyErr_Occurred())
891 0 : return NULL;
892 0 : if (pos < 0) {
893 0 : PyErr_SetString(PyExc_ValueError,
894 : "position value cannot be negative");
895 0 : return NULL;
896 : }
897 0 : self->pos = pos;
898 :
899 : /* Set the dictionary of the instance variables. */
900 0 : dict = PyTuple_GET_ITEM(state, 3);
901 0 : if (dict != Py_None) {
902 0 : if (!PyDict_Check(dict)) {
903 0 : PyErr_Format(PyExc_TypeError,
904 : "fourth item of state should be a dict, got a %.200s",
905 0 : Py_TYPE(dict)->tp_name);
906 0 : return NULL;
907 : }
908 0 : if (self->dict) {
909 : /* Alternatively, we could replace the internal dictionary
910 : completely. However, it seems more practical to just update it. */
911 0 : if (PyDict_Update(self->dict, dict) < 0)
912 0 : return NULL;
913 : }
914 : else {
915 0 : Py_INCREF(dict);
916 0 : self->dict = dict;
917 : }
918 : }
919 :
920 0 : Py_RETURN_NONE;
921 : }
922 :
923 :
924 : static PyObject *
925 0 : stringio_closed(stringio *self, void *context)
926 : {
927 0 : CHECK_INITIALIZED(self);
928 0 : return PyBool_FromLong(self->closed);
929 : }
930 :
931 : static PyObject *
932 0 : stringio_line_buffering(stringio *self, void *context)
933 : {
934 0 : CHECK_INITIALIZED(self);
935 0 : CHECK_CLOSED(self);
936 0 : Py_RETURN_FALSE;
937 : }
938 :
939 : static PyObject *
940 0 : stringio_newlines(stringio *self, void *context)
941 : {
942 0 : CHECK_INITIALIZED(self);
943 0 : CHECK_CLOSED(self);
944 0 : if (self->decoder == NULL)
945 0 : Py_RETURN_NONE;
946 0 : return PyObject_GetAttr(self->decoder, _PyIO_str_newlines);
947 : }
948 :
949 : static struct PyMethodDef stringio_methods[] = {
950 : {"close", (PyCFunction)stringio_close, METH_NOARGS, stringio_close_doc},
951 : {"getvalue", (PyCFunction)stringio_getvalue, METH_NOARGS, stringio_getvalue_doc},
952 : {"read", (PyCFunction)stringio_read, METH_VARARGS, stringio_read_doc},
953 : {"readline", (PyCFunction)stringio_readline, METH_VARARGS, stringio_readline_doc},
954 : {"tell", (PyCFunction)stringio_tell, METH_NOARGS, stringio_tell_doc},
955 : {"truncate", (PyCFunction)stringio_truncate, METH_VARARGS, stringio_truncate_doc},
956 : {"seek", (PyCFunction)stringio_seek, METH_VARARGS, stringio_seek_doc},
957 : {"write", (PyCFunction)stringio_write, METH_O, stringio_write_doc},
958 :
959 : {"seekable", (PyCFunction)stringio_seekable, METH_NOARGS},
960 : {"readable", (PyCFunction)stringio_readable, METH_NOARGS},
961 : {"writable", (PyCFunction)stringio_writable, METH_NOARGS},
962 :
963 : {"__getstate__", (PyCFunction)stringio_getstate, METH_NOARGS},
964 : {"__setstate__", (PyCFunction)stringio_setstate, METH_O},
965 : {NULL, NULL} /* sentinel */
966 : };
967 :
968 : static PyGetSetDef stringio_getset[] = {
969 : {"closed", (getter)stringio_closed, NULL, NULL},
970 : {"newlines", (getter)stringio_newlines, NULL, NULL},
971 : /* (following comments straight off of the original Python wrapper:)
972 : XXX Cruft to support the TextIOWrapper API. This would only
973 : be meaningful if StringIO supported the buffer attribute.
974 : Hopefully, a better solution, than adding these pseudo-attributes,
975 : will be found.
976 : */
977 : {"line_buffering", (getter)stringio_line_buffering, NULL, NULL},
978 : {NULL}
979 : };
980 :
981 : PyTypeObject PyStringIO_Type = {
982 : PyVarObject_HEAD_INIT(NULL, 0)
983 : "_io.StringIO", /*tp_name*/
984 : sizeof(stringio), /*tp_basicsize*/
985 : 0, /*tp_itemsize*/
986 : (destructor)stringio_dealloc, /*tp_dealloc*/
987 : 0, /*tp_print*/
988 : 0, /*tp_getattr*/
989 : 0, /*tp_setattr*/
990 : 0, /*tp_reserved*/
991 : 0, /*tp_repr*/
992 : 0, /*tp_as_number*/
993 : 0, /*tp_as_sequence*/
994 : 0, /*tp_as_mapping*/
995 : 0, /*tp_hash*/
996 : 0, /*tp_call*/
997 : 0, /*tp_str*/
998 : 0, /*tp_getattro*/
999 : 0, /*tp_setattro*/
1000 : 0, /*tp_as_buffer*/
1001 : Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE
1002 : | Py_TPFLAGS_HAVE_GC, /*tp_flags*/
1003 : stringio_doc, /*tp_doc*/
1004 : (traverseproc)stringio_traverse, /*tp_traverse*/
1005 : (inquiry)stringio_clear, /*tp_clear*/
1006 : 0, /*tp_richcompare*/
1007 : offsetof(stringio, weakreflist), /*tp_weaklistoffset*/
1008 : 0, /*tp_iter*/
1009 : (iternextfunc)stringio_iternext, /*tp_iternext*/
1010 : stringio_methods, /*tp_methods*/
1011 : 0, /*tp_members*/
1012 : stringio_getset, /*tp_getset*/
1013 : 0, /*tp_base*/
1014 : 0, /*tp_dict*/
1015 : 0, /*tp_descr_get*/
1016 : 0, /*tp_descr_set*/
1017 : offsetof(stringio, dict), /*tp_dictoffset*/
1018 : (initproc)stringio_init, /*tp_init*/
1019 : 0, /*tp_alloc*/
1020 : stringio_new, /*tp_new*/
1021 : };
|