Line data Source code
1 : /*
2 : An implementation of Text I/O as defined by PEP 3116 - "New I/O"
3 :
4 : Classes defined here: TextIOBase, IncrementalNewlineDecoder, TextIOWrapper.
5 :
6 : Written by Amaury Forgeot d'Arc and Antoine Pitrou
7 : */
8 :
9 : #define PY_SSIZE_T_CLEAN
10 : #include "Python.h"
11 : #include "structmember.h"
12 : #include "_iomodule.h"
13 :
14 : _Py_IDENTIFIER(close);
15 : _Py_IDENTIFIER(_dealloc_warn);
16 : _Py_IDENTIFIER(decode);
17 : _Py_IDENTIFIER(fileno);
18 : _Py_IDENTIFIER(flush);
19 : _Py_IDENTIFIER(getpreferredencoding);
20 : _Py_IDENTIFIER(isatty);
21 : _Py_IDENTIFIER(mode);
22 : _Py_IDENTIFIER(name);
23 : _Py_IDENTIFIER(raw);
24 : _Py_IDENTIFIER(read);
25 : _Py_IDENTIFIER(read1);
26 : _Py_IDENTIFIER(readable);
27 : _Py_IDENTIFIER(replace);
28 : _Py_IDENTIFIER(reset);
29 : _Py_IDENTIFIER(seek);
30 : _Py_IDENTIFIER(seekable);
31 : _Py_IDENTIFIER(setstate);
32 : _Py_IDENTIFIER(tell);
33 : _Py_IDENTIFIER(writable);
34 :
35 : /* TextIOBase */
36 :
37 : PyDoc_STRVAR(textiobase_doc,
38 : "Base class for text I/O.\n"
39 : "\n"
40 : "This class provides a character and line based interface to stream\n"
41 : "I/O. There is no readinto method because Python's character strings\n"
42 : "are immutable. There is no public constructor.\n"
43 : );
44 :
45 : static PyObject *
46 0 : _unsupported(const char *message)
47 : {
48 0 : PyErr_SetString(IO_STATE->unsupported_operation, message);
49 0 : return NULL;
50 : }
51 :
52 : PyDoc_STRVAR(textiobase_detach_doc,
53 : "Separate the underlying buffer from the TextIOBase and return it.\n"
54 : "\n"
55 : "After the underlying buffer has been detached, the TextIO is in an\n"
56 : "unusable state.\n"
57 : );
58 :
59 : static PyObject *
60 0 : textiobase_detach(PyObject *self)
61 : {
62 0 : return _unsupported("detach");
63 : }
64 :
65 : PyDoc_STRVAR(textiobase_read_doc,
66 : "Read at most n characters from stream.\n"
67 : "\n"
68 : "Read from underlying buffer until we have n characters or we hit EOF.\n"
69 : "If n is negative or omitted, read until EOF.\n"
70 : );
71 :
72 : static PyObject *
73 0 : textiobase_read(PyObject *self, PyObject *args)
74 : {
75 0 : return _unsupported("read");
76 : }
77 :
78 : PyDoc_STRVAR(textiobase_readline_doc,
79 : "Read until newline or EOF.\n"
80 : "\n"
81 : "Returns an empty string if EOF is hit immediately.\n"
82 : );
83 :
84 : static PyObject *
85 0 : textiobase_readline(PyObject *self, PyObject *args)
86 : {
87 0 : return _unsupported("readline");
88 : }
89 :
90 : PyDoc_STRVAR(textiobase_write_doc,
91 : "Write string to stream.\n"
92 : "Returns the number of characters written (which is always equal to\n"
93 : "the length of the string).\n"
94 : );
95 :
96 : static PyObject *
97 0 : textiobase_write(PyObject *self, PyObject *args)
98 : {
99 0 : return _unsupported("write");
100 : }
101 :
102 : PyDoc_STRVAR(textiobase_encoding_doc,
103 : "Encoding of the text stream.\n"
104 : "\n"
105 : "Subclasses should override.\n"
106 : );
107 :
108 : static PyObject *
109 0 : textiobase_encoding_get(PyObject *self, void *context)
110 : {
111 0 : Py_RETURN_NONE;
112 : }
113 :
114 : PyDoc_STRVAR(textiobase_newlines_doc,
115 : "Line endings translated so far.\n"
116 : "\n"
117 : "Only line endings translated during reading are considered.\n"
118 : "\n"
119 : "Subclasses should override.\n"
120 : );
121 :
122 : static PyObject *
123 0 : textiobase_newlines_get(PyObject *self, void *context)
124 : {
125 0 : Py_RETURN_NONE;
126 : }
127 :
128 : PyDoc_STRVAR(textiobase_errors_doc,
129 : "The error setting of the decoder or encoder.\n"
130 : "\n"
131 : "Subclasses should override.\n"
132 : );
133 :
134 : static PyObject *
135 0 : textiobase_errors_get(PyObject *self, void *context)
136 : {
137 0 : Py_RETURN_NONE;
138 : }
139 :
140 :
141 : static PyMethodDef textiobase_methods[] = {
142 : {"detach", (PyCFunction)textiobase_detach, METH_NOARGS, textiobase_detach_doc},
143 : {"read", textiobase_read, METH_VARARGS, textiobase_read_doc},
144 : {"readline", textiobase_readline, METH_VARARGS, textiobase_readline_doc},
145 : {"write", textiobase_write, METH_VARARGS, textiobase_write_doc},
146 : {NULL, NULL}
147 : };
148 :
149 : static PyGetSetDef textiobase_getset[] = {
150 : {"encoding", (getter)textiobase_encoding_get, NULL, textiobase_encoding_doc},
151 : {"newlines", (getter)textiobase_newlines_get, NULL, textiobase_newlines_doc},
152 : {"errors", (getter)textiobase_errors_get, NULL, textiobase_errors_doc},
153 : {NULL}
154 : };
155 :
156 : PyTypeObject PyTextIOBase_Type = {
157 : PyVarObject_HEAD_INIT(NULL, 0)
158 : "_io._TextIOBase", /*tp_name*/
159 : 0, /*tp_basicsize*/
160 : 0, /*tp_itemsize*/
161 : 0, /*tp_dealloc*/
162 : 0, /*tp_print*/
163 : 0, /*tp_getattr*/
164 : 0, /*tp_setattr*/
165 : 0, /*tp_compare */
166 : 0, /*tp_repr*/
167 : 0, /*tp_as_number*/
168 : 0, /*tp_as_sequence*/
169 : 0, /*tp_as_mapping*/
170 : 0, /*tp_hash */
171 : 0, /*tp_call*/
172 : 0, /*tp_str*/
173 : 0, /*tp_getattro*/
174 : 0, /*tp_setattro*/
175 : 0, /*tp_as_buffer*/
176 : Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /*tp_flags*/
177 : textiobase_doc, /* tp_doc */
178 : 0, /* tp_traverse */
179 : 0, /* tp_clear */
180 : 0, /* tp_richcompare */
181 : 0, /* tp_weaklistoffset */
182 : 0, /* tp_iter */
183 : 0, /* tp_iternext */
184 : textiobase_methods, /* tp_methods */
185 : 0, /* tp_members */
186 : textiobase_getset, /* tp_getset */
187 : &PyIOBase_Type, /* tp_base */
188 : 0, /* tp_dict */
189 : 0, /* tp_descr_get */
190 : 0, /* tp_descr_set */
191 : 0, /* tp_dictoffset */
192 : 0, /* tp_init */
193 : 0, /* tp_alloc */
194 : 0, /* tp_new */
195 : };
196 :
197 :
198 : /* IncrementalNewlineDecoder */
199 :
200 : PyDoc_STRVAR(incrementalnewlinedecoder_doc,
201 : "Codec used when reading a file in universal newlines mode. It wraps\n"
202 : "another incremental decoder, translating \\r\\n and \\r into \\n. It also\n"
203 : "records the types of newlines encountered. When used with\n"
204 : "translate=False, it ensures that the newline sequence is returned in\n"
205 : "one piece. When used with decoder=None, it expects unicode strings as\n"
206 : "decode input and translates newlines without first invoking an external\n"
207 : "decoder.\n"
208 : );
209 :
210 : typedef struct {
211 : PyObject_HEAD
212 : PyObject *decoder;
213 : PyObject *errors;
214 : signed int pendingcr: 1;
215 : signed int translate: 1;
216 : unsigned int seennl: 3;
217 : } nldecoder_object;
218 :
219 : static int
220 1 : incrementalnewlinedecoder_init(nldecoder_object *self,
221 : PyObject *args, PyObject *kwds)
222 : {
223 : PyObject *decoder;
224 : int translate;
225 1 : PyObject *errors = NULL;
226 1 : char *kwlist[] = {"decoder", "translate", "errors", NULL};
227 :
228 1 : if (!PyArg_ParseTupleAndKeywords(args, kwds, "Oi|O:IncrementalNewlineDecoder",
229 : kwlist, &decoder, &translate, &errors))
230 0 : return -1;
231 :
232 1 : self->decoder = decoder;
233 1 : Py_INCREF(decoder);
234 :
235 1 : if (errors == NULL) {
236 1 : self->errors = PyUnicode_FromString("strict");
237 1 : if (self->errors == NULL)
238 0 : return -1;
239 : }
240 : else {
241 0 : Py_INCREF(errors);
242 0 : self->errors = errors;
243 : }
244 :
245 1 : self->translate = translate;
246 1 : self->seennl = 0;
247 1 : self->pendingcr = 0;
248 :
249 1 : return 0;
250 : }
251 :
252 : static void
253 1 : incrementalnewlinedecoder_dealloc(nldecoder_object *self)
254 : {
255 1 : Py_CLEAR(self->decoder);
256 1 : Py_CLEAR(self->errors);
257 1 : Py_TYPE(self)->tp_free((PyObject *)self);
258 1 : }
259 :
260 : #define SEEN_CR 1
261 : #define SEEN_LF 2
262 : #define SEEN_CRLF 4
263 : #define SEEN_ALL (SEEN_CR | SEEN_LF | SEEN_CRLF)
264 :
265 : PyObject *
266 1 : _PyIncrementalNewlineDecoder_decode(PyObject *_self,
267 : PyObject *input, int final)
268 : {
269 : PyObject *output;
270 : Py_ssize_t output_len;
271 1 : nldecoder_object *self = (nldecoder_object *) _self;
272 :
273 1 : if (self->decoder == NULL) {
274 0 : PyErr_SetString(PyExc_ValueError,
275 : "IncrementalNewlineDecoder.__init__ not called");
276 0 : return NULL;
277 : }
278 :
279 : /* decode input (with the eventual \r from a previous pass) */
280 1 : if (self->decoder != Py_None) {
281 1 : output = PyObject_CallMethodObjArgs(self->decoder,
282 : _PyIO_str_decode, input, final ? Py_True : Py_False, NULL);
283 : }
284 : else {
285 0 : output = input;
286 0 : Py_INCREF(output);
287 : }
288 :
289 1 : if (output == NULL)
290 0 : return NULL;
291 :
292 1 : if (!PyUnicode_Check(output)) {
293 0 : PyErr_SetString(PyExc_TypeError,
294 : "decoder should return a string result");
295 0 : goto error;
296 : }
297 :
298 1 : if (PyUnicode_READY(output) == -1)
299 0 : goto error;
300 :
301 1 : output_len = PyUnicode_GET_LENGTH(output);
302 1 : if (self->pendingcr && (final || output_len > 0)) {
303 : /* Prefix output with CR */
304 : int kind;
305 : PyObject *modified;
306 : char *out;
307 :
308 0 : modified = PyUnicode_New(output_len + 1,
309 0 : PyUnicode_MAX_CHAR_VALUE(output));
310 0 : if (modified == NULL)
311 0 : goto error;
312 0 : kind = PyUnicode_KIND(modified);
313 0 : out = PyUnicode_DATA(modified);
314 0 : PyUnicode_WRITE(kind, PyUnicode_DATA(modified), 0, '\r');
315 0 : memcpy(out + kind, PyUnicode_DATA(output), kind * output_len);
316 0 : Py_DECREF(output);
317 0 : output = modified; /* output remains ready */
318 0 : self->pendingcr = 0;
319 0 : output_len++;
320 : }
321 :
322 : /* retain last \r even when not translating data:
323 : * then readline() is sure to get \r\n in one pass
324 : */
325 1 : if (!final) {
326 0 : if (output_len > 0
327 0 : && PyUnicode_READ_CHAR(output, output_len - 1) == '\r')
328 : {
329 0 : PyObject *modified = PyUnicode_Substring(output, 0, output_len -1);
330 0 : if (modified == NULL)
331 0 : goto error;
332 0 : Py_DECREF(output);
333 0 : output = modified;
334 0 : self->pendingcr = 1;
335 : }
336 : }
337 :
338 : /* Record which newlines are read and do newline translation if desired,
339 : all in one pass. */
340 : {
341 : void *in_str;
342 : Py_ssize_t len;
343 1 : int seennl = self->seennl;
344 1 : int only_lf = 0;
345 : int kind;
346 :
347 1 : in_str = PyUnicode_DATA(output);
348 1 : len = PyUnicode_GET_LENGTH(output);
349 1 : kind = PyUnicode_KIND(output);
350 :
351 1 : if (len == 0)
352 0 : return output;
353 :
354 : /* If, up to now, newlines are consistently \n, do a quick check
355 : for the \r *byte* with the libc's optimized memchr.
356 : */
357 1 : if (seennl == SEEN_LF || seennl == 0) {
358 1 : only_lf = (memchr(in_str, '\r', kind * len) == NULL);
359 : }
360 :
361 1 : if (only_lf) {
362 : /* If not already seen, quick scan for a possible "\n" character.
363 : (there's nothing else to be done, even when in translation mode)
364 : */
365 2 : if (seennl == 0 &&
366 1 : memchr(in_str, '\n', kind * len) != NULL) {
367 1 : if (kind == PyUnicode_1BYTE_KIND)
368 1 : seennl |= SEEN_LF;
369 : else {
370 0 : Py_ssize_t i = 0;
371 : for (;;) {
372 : Py_UCS4 c;
373 : /* Fast loop for non-control characters */
374 0 : while (PyUnicode_READ(kind, in_str, i) > '\n')
375 0 : i++;
376 0 : c = PyUnicode_READ(kind, in_str, i++);
377 0 : if (c == '\n') {
378 0 : seennl |= SEEN_LF;
379 0 : break;
380 : }
381 0 : if (i >= len)
382 0 : break;
383 0 : }
384 : }
385 : }
386 : /* Finished: we have scanned for newlines, and none of them
387 : need translating */
388 : }
389 0 : else if (!self->translate) {
390 0 : Py_ssize_t i = 0;
391 : /* We have already seen all newline types, no need to scan again */
392 0 : if (seennl == SEEN_ALL)
393 0 : goto endscan;
394 : for (;;) {
395 : Py_UCS4 c;
396 : /* Fast loop for non-control characters */
397 0 : while (PyUnicode_READ(kind, in_str, i) > '\r')
398 0 : i++;
399 0 : c = PyUnicode_READ(kind, in_str, i++);
400 0 : if (c == '\n')
401 0 : seennl |= SEEN_LF;
402 0 : else if (c == '\r') {
403 0 : if (PyUnicode_READ(kind, in_str, i) == '\n') {
404 0 : seennl |= SEEN_CRLF;
405 0 : i++;
406 : }
407 : else
408 0 : seennl |= SEEN_CR;
409 : }
410 0 : if (i >= len)
411 0 : break;
412 0 : if (seennl == SEEN_ALL)
413 0 : break;
414 0 : }
415 : endscan:
416 : ;
417 : }
418 : else {
419 : void *translated;
420 0 : int kind = PyUnicode_KIND(output);
421 0 : void *in_str = PyUnicode_DATA(output);
422 : Py_ssize_t in, out;
423 : /* XXX: Previous in-place translation here is disabled as
424 : resizing is not possible anymore */
425 : /* We could try to optimize this so that we only do a copy
426 : when there is something to translate. On the other hand,
427 : we already know there is a \r byte, so chances are high
428 : that something needs to be done. */
429 0 : translated = PyMem_Malloc(kind * len);
430 0 : if (translated == NULL) {
431 0 : PyErr_NoMemory();
432 0 : goto error;
433 : }
434 0 : in = out = 0;
435 : for (;;) {
436 : Py_UCS4 c;
437 : /* Fast loop for non-control characters */
438 0 : while ((c = PyUnicode_READ(kind, in_str, in++)) > '\r')
439 0 : PyUnicode_WRITE(kind, translated, out++, c);
440 0 : if (c == '\n') {
441 0 : PyUnicode_WRITE(kind, translated, out++, c);
442 0 : seennl |= SEEN_LF;
443 0 : continue;
444 : }
445 0 : if (c == '\r') {
446 0 : if (PyUnicode_READ(kind, in_str, in) == '\n') {
447 0 : in++;
448 0 : seennl |= SEEN_CRLF;
449 : }
450 : else
451 0 : seennl |= SEEN_CR;
452 0 : PyUnicode_WRITE(kind, translated, out++, '\n');
453 0 : continue;
454 : }
455 0 : if (in > len)
456 0 : break;
457 0 : PyUnicode_WRITE(kind, translated, out++, c);
458 0 : }
459 0 : Py_DECREF(output);
460 0 : output = PyUnicode_FromKindAndData(kind, translated, out);
461 0 : PyMem_Free(translated);
462 0 : if (!output)
463 0 : return NULL;
464 : }
465 1 : self->seennl |= seennl;
466 : }
467 :
468 1 : return output;
469 :
470 : error:
471 0 : Py_DECREF(output);
472 0 : return NULL;
473 : }
474 :
475 : static PyObject *
476 0 : incrementalnewlinedecoder_decode(nldecoder_object *self,
477 : PyObject *args, PyObject *kwds)
478 : {
479 0 : char *kwlist[] = {"input", "final", NULL};
480 : PyObject *input;
481 0 : int final = 0;
482 :
483 0 : if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|i:IncrementalNewlineDecoder",
484 : kwlist, &input, &final))
485 0 : return NULL;
486 0 : return _PyIncrementalNewlineDecoder_decode((PyObject *) self, input, final);
487 : }
488 :
489 : static PyObject *
490 0 : incrementalnewlinedecoder_getstate(nldecoder_object *self, PyObject *args)
491 : {
492 : PyObject *buffer;
493 : unsigned PY_LONG_LONG flag;
494 :
495 0 : if (self->decoder != Py_None) {
496 0 : PyObject *state = PyObject_CallMethodObjArgs(self->decoder,
497 : _PyIO_str_getstate, NULL);
498 0 : if (state == NULL)
499 0 : return NULL;
500 0 : if (!PyArg_Parse(state, "(OK)", &buffer, &flag)) {
501 0 : Py_DECREF(state);
502 0 : return NULL;
503 : }
504 0 : Py_INCREF(buffer);
505 0 : Py_DECREF(state);
506 : }
507 : else {
508 0 : buffer = PyBytes_FromString("");
509 0 : flag = 0;
510 : }
511 0 : flag <<= 1;
512 0 : if (self->pendingcr)
513 0 : flag |= 1;
514 0 : return Py_BuildValue("NK", buffer, flag);
515 : }
516 :
517 : static PyObject *
518 0 : incrementalnewlinedecoder_setstate(nldecoder_object *self, PyObject *state)
519 : {
520 : PyObject *buffer;
521 : unsigned PY_LONG_LONG flag;
522 :
523 0 : if (!PyArg_Parse(state, "(OK)", &buffer, &flag))
524 0 : return NULL;
525 :
526 0 : self->pendingcr = (int) flag & 1;
527 0 : flag >>= 1;
528 :
529 0 : if (self->decoder != Py_None)
530 0 : return _PyObject_CallMethodId(self->decoder,
531 : &PyId_setstate, "((OK))", buffer, flag);
532 : else
533 0 : Py_RETURN_NONE;
534 : }
535 :
536 : static PyObject *
537 0 : incrementalnewlinedecoder_reset(nldecoder_object *self, PyObject *args)
538 : {
539 0 : self->seennl = 0;
540 0 : self->pendingcr = 0;
541 0 : if (self->decoder != Py_None)
542 0 : return PyObject_CallMethodObjArgs(self->decoder, _PyIO_str_reset, NULL);
543 : else
544 0 : Py_RETURN_NONE;
545 : }
546 :
547 : static PyObject *
548 0 : incrementalnewlinedecoder_newlines_get(nldecoder_object *self, void *context)
549 : {
550 0 : switch (self->seennl) {
551 : case SEEN_CR:
552 0 : return PyUnicode_FromString("\r");
553 : case SEEN_LF:
554 0 : return PyUnicode_FromString("\n");
555 : case SEEN_CRLF:
556 0 : return PyUnicode_FromString("\r\n");
557 : case SEEN_CR | SEEN_LF:
558 0 : return Py_BuildValue("ss", "\r", "\n");
559 : case SEEN_CR | SEEN_CRLF:
560 0 : return Py_BuildValue("ss", "\r", "\r\n");
561 : case SEEN_LF | SEEN_CRLF:
562 0 : return Py_BuildValue("ss", "\n", "\r\n");
563 : case SEEN_CR | SEEN_LF | SEEN_CRLF:
564 0 : return Py_BuildValue("sss", "\r", "\n", "\r\n");
565 : default:
566 0 : Py_RETURN_NONE;
567 : }
568 :
569 : }
570 :
571 :
572 : static PyMethodDef incrementalnewlinedecoder_methods[] = {
573 : {"decode", (PyCFunction)incrementalnewlinedecoder_decode, METH_VARARGS|METH_KEYWORDS},
574 : {"getstate", (PyCFunction)incrementalnewlinedecoder_getstate, METH_NOARGS},
575 : {"setstate", (PyCFunction)incrementalnewlinedecoder_setstate, METH_O},
576 : {"reset", (PyCFunction)incrementalnewlinedecoder_reset, METH_NOARGS},
577 : {NULL}
578 : };
579 :
580 : static PyGetSetDef incrementalnewlinedecoder_getset[] = {
581 : {"newlines", (getter)incrementalnewlinedecoder_newlines_get, NULL, NULL},
582 : {NULL}
583 : };
584 :
585 : PyTypeObject PyIncrementalNewlineDecoder_Type = {
586 : PyVarObject_HEAD_INIT(NULL, 0)
587 : "_io.IncrementalNewlineDecoder", /*tp_name*/
588 : sizeof(nldecoder_object), /*tp_basicsize*/
589 : 0, /*tp_itemsize*/
590 : (destructor)incrementalnewlinedecoder_dealloc, /*tp_dealloc*/
591 : 0, /*tp_print*/
592 : 0, /*tp_getattr*/
593 : 0, /*tp_setattr*/
594 : 0, /*tp_compare */
595 : 0, /*tp_repr*/
596 : 0, /*tp_as_number*/
597 : 0, /*tp_as_sequence*/
598 : 0, /*tp_as_mapping*/
599 : 0, /*tp_hash */
600 : 0, /*tp_call*/
601 : 0, /*tp_str*/
602 : 0, /*tp_getattro*/
603 : 0, /*tp_setattro*/
604 : 0, /*tp_as_buffer*/
605 : Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /*tp_flags*/
606 : incrementalnewlinedecoder_doc, /* tp_doc */
607 : 0, /* tp_traverse */
608 : 0, /* tp_clear */
609 : 0, /* tp_richcompare */
610 : 0, /*tp_weaklistoffset*/
611 : 0, /* tp_iter */
612 : 0, /* tp_iternext */
613 : incrementalnewlinedecoder_methods, /* tp_methods */
614 : 0, /* tp_members */
615 : incrementalnewlinedecoder_getset, /* tp_getset */
616 : 0, /* tp_base */
617 : 0, /* tp_dict */
618 : 0, /* tp_descr_get */
619 : 0, /* tp_descr_set */
620 : 0, /* tp_dictoffset */
621 : (initproc)incrementalnewlinedecoder_init, /* tp_init */
622 : 0, /* tp_alloc */
623 : PyType_GenericNew, /* tp_new */
624 : };
625 :
626 :
627 : /* TextIOWrapper */
628 :
629 : PyDoc_STRVAR(textiowrapper_doc,
630 : "Character and line based layer over a BufferedIOBase object, buffer.\n"
631 : "\n"
632 : "encoding gives the name of the encoding that the stream will be\n"
633 : "decoded or encoded with. It defaults to locale.getpreferredencoding(False).\n"
634 : "\n"
635 : "errors determines the strictness of encoding and decoding (see the\n"
636 : "codecs.register) and defaults to \"strict\".\n"
637 : "\n"
638 : "newline controls how line endings are handled. It can be None, '',\n"
639 : "'\\n', '\\r', and '\\r\\n'. It works as follows:\n"
640 : "\n"
641 : "* On input, if newline is None, universal newlines mode is\n"
642 : " enabled. Lines in the input can end in '\\n', '\\r', or '\\r\\n', and\n"
643 : " these are translated into '\\n' before being returned to the\n"
644 : " caller. If it is '', universal newline mode is enabled, but line\n"
645 : " endings are returned to the caller untranslated. If it has any of\n"
646 : " the other legal values, input lines are only terminated by the given\n"
647 : " string, and the line ending is returned to the caller untranslated.\n"
648 : "\n"
649 : "* On output, if newline is None, any '\\n' characters written are\n"
650 : " translated to the system default line separator, os.linesep. If\n"
651 : " newline is '' or '\n', no translation takes place. If newline is any\n"
652 : " of the other legal values, any '\\n' characters written are translated\n"
653 : " to the given string.\n"
654 : "\n"
655 : "If line_buffering is True, a call to flush is implied when a call to\n"
656 : "write contains a newline character."
657 : );
658 :
659 : typedef PyObject *
660 : (*encodefunc_t)(PyObject *, PyObject *);
661 :
662 : typedef struct
663 : {
664 : PyObject_HEAD
665 : int ok; /* initialized? */
666 : int detached;
667 : Py_ssize_t chunk_size;
668 : PyObject *buffer;
669 : PyObject *encoding;
670 : PyObject *encoder;
671 : PyObject *decoder;
672 : PyObject *readnl;
673 : PyObject *errors;
674 : const char *writenl; /* utf-8 encoded, NULL stands for \n */
675 : char line_buffering;
676 : char write_through;
677 : char readuniversal;
678 : char readtranslate;
679 : char writetranslate;
680 : char seekable;
681 : char has_read1;
682 : char telling;
683 : char deallocating;
684 : /* Specialized encoding func (see below) */
685 : encodefunc_t encodefunc;
686 : /* Whether or not it's the start of the stream */
687 : char encoding_start_of_stream;
688 :
689 : /* Reads and writes are internally buffered in order to speed things up.
690 : However, any read will first flush the write buffer if itsn't empty.
691 :
692 : Please also note that text to be written is first encoded before being
693 : buffered. This is necessary so that encoding errors are immediately
694 : reported to the caller, but it unfortunately means that the
695 : IncrementalEncoder (whose encode() method is always written in Python)
696 : becomes a bottleneck for small writes.
697 : */
698 : PyObject *decoded_chars; /* buffer for text returned from decoder */
699 : Py_ssize_t decoded_chars_used; /* offset into _decoded_chars for read() */
700 : PyObject *pending_bytes; /* list of bytes objects waiting to be
701 : written, or NULL */
702 : Py_ssize_t pending_bytes_count;
703 :
704 : /* snapshot is either None, or a tuple (dec_flags, next_input) where
705 : * dec_flags is the second (integer) item of the decoder state and
706 : * next_input is the chunk of input bytes that comes next after the
707 : * snapshot point. We use this to reconstruct decoder states in tell().
708 : */
709 : PyObject *snapshot;
710 : /* Bytes-to-characters ratio for the current chunk. Serves as input for
711 : the heuristic in tell(). */
712 : double b2cratio;
713 :
714 : /* Cache raw object if it's a FileIO object */
715 : PyObject *raw;
716 :
717 : PyObject *weakreflist;
718 : PyObject *dict;
719 : } textio;
720 :
721 :
722 : /* A couple of specialized cases in order to bypass the slow incremental
723 : encoding methods for the most popular encodings. */
724 :
725 : static PyObject *
726 0 : ascii_encode(textio *self, PyObject *text)
727 : {
728 0 : return _PyUnicode_AsASCIIString(text, PyBytes_AS_STRING(self->errors));
729 : }
730 :
731 : static PyObject *
732 0 : utf16be_encode(textio *self, PyObject *text)
733 : {
734 0 : return _PyUnicode_EncodeUTF16(text,
735 0 : PyBytes_AS_STRING(self->errors), 1);
736 : }
737 :
738 : static PyObject *
739 0 : utf16le_encode(textio *self, PyObject *text)
740 : {
741 0 : return _PyUnicode_EncodeUTF16(text,
742 0 : PyBytes_AS_STRING(self->errors), -1);
743 : }
744 :
745 : static PyObject *
746 0 : utf16_encode(textio *self, PyObject *text)
747 : {
748 0 : if (!self->encoding_start_of_stream) {
749 : /* Skip the BOM and use native byte ordering */
750 : #if defined(WORDS_BIGENDIAN)
751 : return utf16be_encode(self, text);
752 : #else
753 0 : return utf16le_encode(self, text);
754 : #endif
755 : }
756 0 : return _PyUnicode_EncodeUTF16(text,
757 0 : PyBytes_AS_STRING(self->errors), 0);
758 : }
759 :
760 : static PyObject *
761 0 : utf32be_encode(textio *self, PyObject *text)
762 : {
763 0 : return _PyUnicode_EncodeUTF32(text,
764 0 : PyBytes_AS_STRING(self->errors), 1);
765 : }
766 :
767 : static PyObject *
768 0 : utf32le_encode(textio *self, PyObject *text)
769 : {
770 0 : return _PyUnicode_EncodeUTF32(text,
771 0 : PyBytes_AS_STRING(self->errors), -1);
772 : }
773 :
774 : static PyObject *
775 0 : utf32_encode(textio *self, PyObject *text)
776 : {
777 0 : if (!self->encoding_start_of_stream) {
778 : /* Skip the BOM and use native byte ordering */
779 : #if defined(WORDS_BIGENDIAN)
780 : return utf32be_encode(self, text);
781 : #else
782 0 : return utf32le_encode(self, text);
783 : #endif
784 : }
785 0 : return _PyUnicode_EncodeUTF32(text,
786 0 : PyBytes_AS_STRING(self->errors), 0);
787 : }
788 :
789 : static PyObject *
790 0 : utf8_encode(textio *self, PyObject *text)
791 : {
792 0 : return _PyUnicode_AsUTF8String(text, PyBytes_AS_STRING(self->errors));
793 : }
794 :
795 : static PyObject *
796 0 : latin1_encode(textio *self, PyObject *text)
797 : {
798 0 : return _PyUnicode_AsLatin1String(text, PyBytes_AS_STRING(self->errors));
799 : }
800 :
801 : /* Map normalized encoding names onto the specialized encoding funcs */
802 :
803 : typedef struct {
804 : const char *name;
805 : encodefunc_t encodefunc;
806 : } encodefuncentry;
807 :
808 : static encodefuncentry encodefuncs[] = {
809 : {"ascii", (encodefunc_t) ascii_encode},
810 : {"iso8859-1", (encodefunc_t) latin1_encode},
811 : {"utf-8", (encodefunc_t) utf8_encode},
812 : {"utf-16-be", (encodefunc_t) utf16be_encode},
813 : {"utf-16-le", (encodefunc_t) utf16le_encode},
814 : {"utf-16", (encodefunc_t) utf16_encode},
815 : {"utf-32-be", (encodefunc_t) utf32be_encode},
816 : {"utf-32-le", (encodefunc_t) utf32le_encode},
817 : {"utf-32", (encodefunc_t) utf32_encode},
818 : {NULL, NULL}
819 : };
820 :
821 :
822 : static int
823 4 : textiowrapper_init(textio *self, PyObject *args, PyObject *kwds)
824 : {
825 4 : char *kwlist[] = {"buffer", "encoding", "errors",
826 : "newline", "line_buffering", "write_through",
827 : NULL};
828 : PyObject *buffer, *raw;
829 4 : char *encoding = NULL;
830 4 : char *errors = NULL;
831 4 : char *newline = NULL;
832 4 : int line_buffering = 0, write_through = 0;
833 4 : _PyIO_State *state = IO_STATE;
834 :
835 : PyObject *res;
836 : int r;
837 :
838 4 : self->ok = 0;
839 4 : self->detached = 0;
840 4 : if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|zzzii:fileio",
841 : kwlist, &buffer, &encoding, &errors,
842 : &newline, &line_buffering, &write_through))
843 0 : return -1;
844 :
845 4 : if (newline && newline[0] != '\0'
846 3 : && !(newline[0] == '\n' && newline[1] == '\0')
847 0 : && !(newline[0] == '\r' && newline[1] == '\0')
848 0 : && !(newline[0] == '\r' && newline[1] == '\n' && newline[2] == '\0')) {
849 0 : PyErr_Format(PyExc_ValueError,
850 : "illegal newline value: %s", newline);
851 0 : return -1;
852 : }
853 :
854 4 : Py_CLEAR(self->buffer);
855 4 : Py_CLEAR(self->encoding);
856 4 : Py_CLEAR(self->encoder);
857 4 : Py_CLEAR(self->decoder);
858 4 : Py_CLEAR(self->readnl);
859 4 : Py_CLEAR(self->decoded_chars);
860 4 : Py_CLEAR(self->pending_bytes);
861 4 : Py_CLEAR(self->snapshot);
862 4 : Py_CLEAR(self->errors);
863 4 : Py_CLEAR(self->raw);
864 4 : self->decoded_chars_used = 0;
865 4 : self->pending_bytes_count = 0;
866 4 : self->encodefunc = NULL;
867 4 : self->b2cratio = 0.0;
868 :
869 4 : if (encoding == NULL) {
870 : /* Try os.device_encoding(fileno) */
871 : PyObject *fileno;
872 4 : fileno = _PyObject_CallMethodId(buffer, &PyId_fileno, NULL);
873 : /* Ignore only AttributeError and UnsupportedOperation */
874 4 : if (fileno == NULL) {
875 0 : if (PyErr_ExceptionMatches(PyExc_AttributeError) ||
876 0 : PyErr_ExceptionMatches(state->unsupported_operation)) {
877 0 : PyErr_Clear();
878 : }
879 : else {
880 : goto error;
881 : }
882 : }
883 : else {
884 4 : int fd = (int) PyLong_AsLong(fileno);
885 4 : Py_DECREF(fileno);
886 4 : if (fd == -1 && PyErr_Occurred()) {
887 0 : goto error;
888 : }
889 :
890 4 : self->encoding = _Py_device_encoding(fd);
891 4 : if (self->encoding == NULL)
892 0 : goto error;
893 4 : else if (!PyUnicode_Check(self->encoding))
894 4 : Py_CLEAR(self->encoding);
895 : }
896 : }
897 4 : if (encoding == NULL && self->encoding == NULL) {
898 4 : if (state->locale_module == NULL) {
899 1 : state->locale_module = PyImport_ImportModule("locale");
900 1 : if (state->locale_module == NULL)
901 0 : goto catch_ImportError;
902 : else
903 1 : goto use_locale;
904 : }
905 : else {
906 : use_locale:
907 4 : self->encoding = _PyObject_CallMethodId(
908 : state->locale_module, &PyId_getpreferredencoding, "O", Py_False);
909 4 : if (self->encoding == NULL) {
910 : catch_ImportError:
911 : /*
912 : Importing locale can raise a ImportError because of
913 : _functools, and locale.getpreferredencoding can raise a
914 : ImportError if _locale is not available. These will happen
915 : during module building.
916 : */
917 0 : if (PyErr_ExceptionMatches(PyExc_ImportError)) {
918 0 : PyErr_Clear();
919 0 : self->encoding = PyUnicode_FromString("ascii");
920 : }
921 : else
922 0 : goto error;
923 : }
924 4 : else if (!PyUnicode_Check(self->encoding))
925 0 : Py_CLEAR(self->encoding);
926 : }
927 : }
928 4 : if (self->encoding != NULL) {
929 4 : encoding = _PyUnicode_AsString(self->encoding);
930 4 : if (encoding == NULL)
931 0 : goto error;
932 : }
933 0 : else if (encoding != NULL) {
934 0 : self->encoding = PyUnicode_FromString(encoding);
935 0 : if (self->encoding == NULL)
936 0 : goto error;
937 : }
938 : else {
939 0 : PyErr_SetString(PyExc_IOError,
940 : "could not determine default encoding");
941 : }
942 :
943 4 : if (errors == NULL)
944 3 : errors = "strict";
945 4 : self->errors = PyBytes_FromString(errors);
946 4 : if (self->errors == NULL)
947 0 : goto error;
948 :
949 4 : self->chunk_size = 8192;
950 4 : self->readuniversal = (newline == NULL || newline[0] == '\0');
951 4 : self->line_buffering = line_buffering;
952 4 : self->write_through = write_through;
953 4 : self->readtranslate = (newline == NULL);
954 4 : if (newline) {
955 3 : self->readnl = PyUnicode_FromString(newline);
956 3 : if (self->readnl == NULL)
957 0 : return -1;
958 : }
959 4 : self->writetranslate = (newline == NULL || newline[0] != '\0');
960 4 : if (!self->readuniversal && self->readnl) {
961 3 : self->writenl = _PyUnicode_AsString(self->readnl);
962 3 : if (self->writenl == NULL)
963 0 : goto error;
964 3 : if (!strcmp(self->writenl, "\n"))
965 3 : self->writenl = NULL;
966 : }
967 : #ifdef MS_WINDOWS
968 : else
969 : self->writenl = "\r\n";
970 : #endif
971 :
972 : /* Build the decoder object */
973 4 : res = _PyObject_CallMethodId(buffer, &PyId_readable, NULL);
974 4 : if (res == NULL)
975 0 : goto error;
976 4 : r = PyObject_IsTrue(res);
977 4 : Py_DECREF(res);
978 4 : if (r == -1)
979 0 : goto error;
980 4 : if (r == 1) {
981 2 : self->decoder = PyCodec_IncrementalDecoder(
982 : encoding, errors);
983 2 : if (self->decoder == NULL)
984 0 : goto error;
985 :
986 2 : if (self->readuniversal) {
987 1 : PyObject *incrementalDecoder = PyObject_CallFunction(
988 : (PyObject *)&PyIncrementalNewlineDecoder_Type,
989 1 : "Oi", self->decoder, (int)self->readtranslate);
990 1 : if (incrementalDecoder == NULL)
991 0 : goto error;
992 1 : Py_CLEAR(self->decoder);
993 1 : self->decoder = incrementalDecoder;
994 : }
995 : }
996 :
997 : /* Build the encoder object */
998 4 : res = _PyObject_CallMethodId(buffer, &PyId_writable, NULL);
999 4 : if (res == NULL)
1000 0 : goto error;
1001 4 : r = PyObject_IsTrue(res);
1002 4 : Py_DECREF(res);
1003 4 : if (r == -1)
1004 0 : goto error;
1005 4 : if (r == 1) {
1006 : PyObject *ci;
1007 2 : self->encoder = PyCodec_IncrementalEncoder(
1008 : encoding, errors);
1009 2 : if (self->encoder == NULL)
1010 0 : goto error;
1011 : /* Get the normalized named of the codec */
1012 2 : ci = _PyCodec_Lookup(encoding);
1013 2 : if (ci == NULL)
1014 0 : goto error;
1015 2 : res = _PyObject_GetAttrId(ci, &PyId_name);
1016 2 : Py_DECREF(ci);
1017 2 : if (res == NULL) {
1018 0 : if (PyErr_ExceptionMatches(PyExc_AttributeError))
1019 0 : PyErr_Clear();
1020 : else
1021 0 : goto error;
1022 : }
1023 2 : else if (PyUnicode_Check(res)) {
1024 2 : encodefuncentry *e = encodefuncs;
1025 8 : while (e->name != NULL) {
1026 6 : if (!PyUnicode_CompareWithASCIIString(res, e->name)) {
1027 2 : self->encodefunc = e->encodefunc;
1028 2 : break;
1029 : }
1030 4 : e++;
1031 : }
1032 : }
1033 2 : Py_XDECREF(res);
1034 : }
1035 :
1036 4 : self->buffer = buffer;
1037 4 : Py_INCREF(buffer);
1038 :
1039 6 : if (Py_TYPE(buffer) == &PyBufferedReader_Type ||
1040 2 : Py_TYPE(buffer) == &PyBufferedWriter_Type ||
1041 0 : Py_TYPE(buffer) == &PyBufferedRandom_Type) {
1042 4 : raw = _PyObject_GetAttrId(buffer, &PyId_raw);
1043 : /* Cache the raw FileIO object to speed up 'closed' checks */
1044 4 : if (raw == NULL) {
1045 0 : if (PyErr_ExceptionMatches(PyExc_AttributeError))
1046 0 : PyErr_Clear();
1047 : else
1048 0 : goto error;
1049 : }
1050 4 : else if (Py_TYPE(raw) == &PyFileIO_Type)
1051 4 : self->raw = raw;
1052 : else
1053 0 : Py_DECREF(raw);
1054 : }
1055 :
1056 4 : res = _PyObject_CallMethodId(buffer, &PyId_seekable, NULL);
1057 4 : if (res == NULL)
1058 0 : goto error;
1059 4 : r = PyObject_IsTrue(res);
1060 4 : Py_DECREF(res);
1061 4 : if (r < 0)
1062 0 : goto error;
1063 4 : self->seekable = self->telling = r;
1064 :
1065 4 : self->has_read1 = _PyObject_HasAttrId(buffer, &PyId_read1);
1066 :
1067 4 : self->encoding_start_of_stream = 0;
1068 4 : if (self->seekable && self->encoder) {
1069 : PyObject *cookieObj;
1070 : int cmp;
1071 :
1072 0 : self->encoding_start_of_stream = 1;
1073 :
1074 0 : cookieObj = PyObject_CallMethodObjArgs(buffer, _PyIO_str_tell, NULL);
1075 0 : if (cookieObj == NULL)
1076 0 : goto error;
1077 :
1078 0 : cmp = PyObject_RichCompareBool(cookieObj, _PyIO_zero, Py_EQ);
1079 0 : Py_DECREF(cookieObj);
1080 0 : if (cmp < 0) {
1081 0 : goto error;
1082 : }
1083 :
1084 0 : if (cmp == 0) {
1085 0 : self->encoding_start_of_stream = 0;
1086 0 : res = PyObject_CallMethodObjArgs(self->encoder, _PyIO_str_setstate,
1087 : _PyIO_zero, NULL);
1088 0 : if (res == NULL)
1089 0 : goto error;
1090 0 : Py_DECREF(res);
1091 : }
1092 : }
1093 :
1094 4 : self->ok = 1;
1095 4 : return 0;
1096 :
1097 : error:
1098 0 : return -1;
1099 : }
1100 :
1101 : static int
1102 1 : _textiowrapper_clear(textio *self)
1103 : {
1104 1 : if (self->ok && _PyIOBase_finalize((PyObject *) self) < 0)
1105 0 : return -1;
1106 1 : self->ok = 0;
1107 1 : Py_CLEAR(self->buffer);
1108 1 : Py_CLEAR(self->encoding);
1109 1 : Py_CLEAR(self->encoder);
1110 1 : Py_CLEAR(self->decoder);
1111 1 : Py_CLEAR(self->readnl);
1112 1 : Py_CLEAR(self->decoded_chars);
1113 1 : Py_CLEAR(self->pending_bytes);
1114 1 : Py_CLEAR(self->snapshot);
1115 1 : Py_CLEAR(self->errors);
1116 1 : Py_CLEAR(self->raw);
1117 1 : return 0;
1118 : }
1119 :
1120 : static void
1121 1 : textiowrapper_dealloc(textio *self)
1122 : {
1123 1 : self->deallocating = 1;
1124 1 : if (_textiowrapper_clear(self) < 0)
1125 1 : return;
1126 1 : _PyObject_GC_UNTRACK(self);
1127 1 : if (self->weakreflist != NULL)
1128 0 : PyObject_ClearWeakRefs((PyObject *)self);
1129 1 : Py_CLEAR(self->dict);
1130 1 : Py_TYPE(self)->tp_free((PyObject *)self);
1131 : }
1132 :
1133 : static int
1134 10 : textiowrapper_traverse(textio *self, visitproc visit, void *arg)
1135 : {
1136 10 : Py_VISIT(self->buffer);
1137 10 : Py_VISIT(self->encoding);
1138 10 : Py_VISIT(self->encoder);
1139 10 : Py_VISIT(self->decoder);
1140 10 : Py_VISIT(self->readnl);
1141 10 : Py_VISIT(self->decoded_chars);
1142 10 : Py_VISIT(self->pending_bytes);
1143 10 : Py_VISIT(self->snapshot);
1144 10 : Py_VISIT(self->errors);
1145 10 : Py_VISIT(self->raw);
1146 :
1147 10 : Py_VISIT(self->dict);
1148 10 : return 0;
1149 : }
1150 :
1151 : static int
1152 0 : textiowrapper_clear(textio *self)
1153 : {
1154 0 : if (_textiowrapper_clear(self) < 0)
1155 0 : return -1;
1156 0 : Py_CLEAR(self->dict);
1157 0 : return 0;
1158 : }
1159 :
1160 : static PyObject *
1161 : textiowrapper_closed_get(textio *self, void *context);
1162 :
1163 : /* This macro takes some shortcuts to make the common case faster. */
1164 : #define CHECK_CLOSED(self) \
1165 : do { \
1166 : int r; \
1167 : PyObject *_res; \
1168 : if (Py_TYPE(self) == &PyTextIOWrapper_Type) { \
1169 : if (self->raw != NULL) \
1170 : r = _PyFileIO_closed(self->raw); \
1171 : else { \
1172 : _res = textiowrapper_closed_get(self, NULL); \
1173 : if (_res == NULL) \
1174 : return NULL; \
1175 : r = PyObject_IsTrue(_res); \
1176 : Py_DECREF(_res); \
1177 : if (r < 0) \
1178 : return NULL; \
1179 : } \
1180 : if (r > 0) { \
1181 : PyErr_SetString(PyExc_ValueError, \
1182 : "I/O operation on closed file."); \
1183 : return NULL; \
1184 : } \
1185 : } \
1186 : else if (_PyIOBase_check_closed((PyObject *)self, Py_True) == NULL) \
1187 : return NULL; \
1188 : } while (0)
1189 :
1190 : #define CHECK_INITIALIZED(self) \
1191 : if (self->ok <= 0) { \
1192 : if (self->detached) { \
1193 : PyErr_SetString(PyExc_ValueError, \
1194 : "underlying buffer has been detached"); \
1195 : } else { \
1196 : PyErr_SetString(PyExc_ValueError, \
1197 : "I/O operation on uninitialized object"); \
1198 : } \
1199 : return NULL; \
1200 : }
1201 :
1202 : #define CHECK_INITIALIZED_INT(self) \
1203 : if (self->ok <= 0) { \
1204 : if (self->detached) { \
1205 : PyErr_SetString(PyExc_ValueError, \
1206 : "underlying buffer has been detached"); \
1207 : } else { \
1208 : PyErr_SetString(PyExc_ValueError, \
1209 : "I/O operation on uninitialized object"); \
1210 : } \
1211 : return -1; \
1212 : }
1213 :
1214 :
1215 : static PyObject *
1216 0 : textiowrapper_detach(textio *self)
1217 : {
1218 : PyObject *buffer, *res;
1219 0 : CHECK_INITIALIZED(self);
1220 0 : res = PyObject_CallMethodObjArgs((PyObject *)self, _PyIO_str_flush, NULL);
1221 0 : if (res == NULL)
1222 0 : return NULL;
1223 0 : Py_DECREF(res);
1224 0 : buffer = self->buffer;
1225 0 : self->buffer = NULL;
1226 0 : self->detached = 1;
1227 0 : self->ok = 0;
1228 0 : return buffer;
1229 : }
1230 :
1231 : /* Flush the internal write buffer. This doesn't explicitly flush the
1232 : underlying buffered object, though. */
1233 : static int
1234 2 : _textiowrapper_writeflush(textio *self)
1235 : {
1236 : PyObject *pending, *b, *ret;
1237 :
1238 2 : if (self->pending_bytes == NULL)
1239 2 : return 0;
1240 :
1241 0 : pending = self->pending_bytes;
1242 0 : Py_INCREF(pending);
1243 0 : self->pending_bytes_count = 0;
1244 0 : Py_CLEAR(self->pending_bytes);
1245 :
1246 0 : b = _PyBytes_Join(_PyIO_empty_bytes, pending);
1247 0 : Py_DECREF(pending);
1248 0 : if (b == NULL)
1249 0 : return -1;
1250 0 : ret = PyObject_CallMethodObjArgs(self->buffer,
1251 : _PyIO_str_write, b, NULL);
1252 0 : Py_DECREF(b);
1253 0 : if (ret == NULL)
1254 0 : return -1;
1255 0 : Py_DECREF(ret);
1256 0 : return 0;
1257 : }
1258 :
1259 : static PyObject *
1260 0 : textiowrapper_write(textio *self, PyObject *args)
1261 : {
1262 : PyObject *ret;
1263 : PyObject *text; /* owned reference */
1264 : PyObject *b;
1265 : Py_ssize_t textlen;
1266 0 : int haslf = 0;
1267 0 : int needflush = 0;
1268 :
1269 0 : CHECK_INITIALIZED(self);
1270 :
1271 0 : if (!PyArg_ParseTuple(args, "U:write", &text)) {
1272 0 : return NULL;
1273 : }
1274 :
1275 0 : if (PyUnicode_READY(text) == -1)
1276 0 : return NULL;
1277 :
1278 0 : CHECK_CLOSED(self);
1279 :
1280 0 : if (self->encoder == NULL)
1281 0 : return _unsupported("not writable");
1282 :
1283 0 : Py_INCREF(text);
1284 :
1285 0 : textlen = PyUnicode_GET_LENGTH(text);
1286 :
1287 0 : if ((self->writetranslate && self->writenl != NULL) || self->line_buffering)
1288 0 : if (PyUnicode_FindChar(text, '\n', 0, PyUnicode_GET_LENGTH(text), 1) != -1)
1289 0 : haslf = 1;
1290 :
1291 0 : if (haslf && self->writetranslate && self->writenl != NULL) {
1292 0 : PyObject *newtext = _PyObject_CallMethodId(
1293 : text, &PyId_replace, "ss", "\n", self->writenl);
1294 0 : Py_DECREF(text);
1295 0 : if (newtext == NULL)
1296 0 : return NULL;
1297 0 : text = newtext;
1298 : }
1299 :
1300 0 : if (self->write_through)
1301 0 : needflush = 1;
1302 0 : else if (self->line_buffering &&
1303 0 : (haslf ||
1304 0 : PyUnicode_FindChar(text, '\r', 0, PyUnicode_GET_LENGTH(text), 1) != -1))
1305 0 : needflush = 1;
1306 :
1307 : /* XXX What if we were just reading? */
1308 0 : if (self->encodefunc != NULL) {
1309 0 : b = (*self->encodefunc)((PyObject *) self, text);
1310 0 : self->encoding_start_of_stream = 0;
1311 : }
1312 : else
1313 0 : b = PyObject_CallMethodObjArgs(self->encoder,
1314 : _PyIO_str_encode, text, NULL);
1315 0 : Py_DECREF(text);
1316 0 : if (b == NULL)
1317 0 : return NULL;
1318 :
1319 0 : if (self->pending_bytes == NULL) {
1320 0 : self->pending_bytes = PyList_New(0);
1321 0 : if (self->pending_bytes == NULL) {
1322 0 : Py_DECREF(b);
1323 0 : return NULL;
1324 : }
1325 0 : self->pending_bytes_count = 0;
1326 : }
1327 0 : if (PyList_Append(self->pending_bytes, b) < 0) {
1328 0 : Py_DECREF(b);
1329 0 : return NULL;
1330 : }
1331 0 : self->pending_bytes_count += PyBytes_GET_SIZE(b);
1332 0 : Py_DECREF(b);
1333 0 : if (self->pending_bytes_count > self->chunk_size || needflush) {
1334 0 : if (_textiowrapper_writeflush(self) < 0)
1335 0 : return NULL;
1336 : }
1337 :
1338 0 : if (needflush) {
1339 0 : ret = PyObject_CallMethodObjArgs(self->buffer, _PyIO_str_flush, NULL);
1340 0 : if (ret == NULL)
1341 0 : return NULL;
1342 0 : Py_DECREF(ret);
1343 : }
1344 :
1345 0 : Py_CLEAR(self->snapshot);
1346 :
1347 0 : if (self->decoder) {
1348 0 : ret = _PyObject_CallMethodId(self->decoder, &PyId_reset, NULL);
1349 0 : if (ret == NULL)
1350 0 : return NULL;
1351 0 : Py_DECREF(ret);
1352 : }
1353 :
1354 0 : return PyLong_FromSsize_t(textlen);
1355 : }
1356 :
1357 : /* Steal a reference to chars and store it in the decoded_char buffer;
1358 : */
1359 : static void
1360 0 : textiowrapper_set_decoded_chars(textio *self, PyObject *chars)
1361 : {
1362 0 : Py_CLEAR(self->decoded_chars);
1363 0 : self->decoded_chars = chars;
1364 0 : self->decoded_chars_used = 0;
1365 0 : }
1366 :
1367 : static PyObject *
1368 1 : textiowrapper_get_decoded_chars(textio *self, Py_ssize_t n)
1369 : {
1370 : PyObject *chars;
1371 : Py_ssize_t avail;
1372 :
1373 1 : if (self->decoded_chars == NULL)
1374 1 : return PyUnicode_FromStringAndSize(NULL, 0);
1375 :
1376 : /* decoded_chars is guaranteed to be "ready". */
1377 0 : avail = (PyUnicode_GET_LENGTH(self->decoded_chars)
1378 0 : - self->decoded_chars_used);
1379 :
1380 : assert(avail >= 0);
1381 :
1382 0 : if (n < 0 || n > avail)
1383 0 : n = avail;
1384 :
1385 0 : if (self->decoded_chars_used > 0 || n < avail) {
1386 0 : chars = PyUnicode_Substring(self->decoded_chars,
1387 : self->decoded_chars_used,
1388 0 : self->decoded_chars_used + n);
1389 0 : if (chars == NULL)
1390 0 : return NULL;
1391 : }
1392 : else {
1393 0 : chars = self->decoded_chars;
1394 0 : Py_INCREF(chars);
1395 : }
1396 :
1397 0 : self->decoded_chars_used += n;
1398 0 : return chars;
1399 : }
1400 :
1401 : /* Read and decode the next chunk of data from the BufferedReader.
1402 : */
1403 : static int
1404 0 : textiowrapper_read_chunk(textio *self, Py_ssize_t size_hint)
1405 : {
1406 0 : PyObject *dec_buffer = NULL;
1407 0 : PyObject *dec_flags = NULL;
1408 0 : PyObject *input_chunk = NULL;
1409 : PyObject *decoded_chars, *chunk_size;
1410 : Py_ssize_t nbytes, nchars;
1411 : int eof;
1412 :
1413 : /* The return value is True unless EOF was reached. The decoded string is
1414 : * placed in self._decoded_chars (replacing its previous value). The
1415 : * entire input chunk is sent to the decoder, though some of it may remain
1416 : * buffered in the decoder, yet to be converted.
1417 : */
1418 :
1419 0 : if (self->decoder == NULL) {
1420 0 : _unsupported("not readable");
1421 0 : return -1;
1422 : }
1423 :
1424 0 : if (self->telling) {
1425 : /* To prepare for tell(), we need to snapshot a point in the file
1426 : * where the decoder's input buffer is empty.
1427 : */
1428 :
1429 0 : PyObject *state = PyObject_CallMethodObjArgs(self->decoder,
1430 : _PyIO_str_getstate, NULL);
1431 0 : if (state == NULL)
1432 0 : return -1;
1433 : /* Given this, we know there was a valid snapshot point
1434 : * len(dec_buffer) bytes ago with decoder state (b'', dec_flags).
1435 : */
1436 0 : if (PyArg_Parse(state, "(OO)", &dec_buffer, &dec_flags) < 0) {
1437 0 : Py_DECREF(state);
1438 0 : return -1;
1439 : }
1440 0 : Py_INCREF(dec_buffer);
1441 0 : Py_INCREF(dec_flags);
1442 0 : Py_DECREF(state);
1443 : }
1444 :
1445 : /* Read a chunk, decode it, and put the result in self._decoded_chars. */
1446 0 : if (size_hint > 0) {
1447 0 : size_hint = (Py_ssize_t)(Py_MAX(self->b2cratio, 1.0) * size_hint);
1448 : }
1449 0 : chunk_size = PyLong_FromSsize_t(Py_MAX(self->chunk_size, size_hint));
1450 0 : if (chunk_size == NULL)
1451 0 : goto fail;
1452 0 : input_chunk = PyObject_CallMethodObjArgs(self->buffer,
1453 0 : (self->has_read1 ? _PyIO_str_read1: _PyIO_str_read),
1454 : chunk_size, NULL);
1455 0 : Py_DECREF(chunk_size);
1456 0 : if (input_chunk == NULL)
1457 0 : goto fail;
1458 : assert(PyBytes_Check(input_chunk));
1459 :
1460 0 : nbytes = PyBytes_Size(input_chunk);
1461 0 : eof = (nbytes == 0);
1462 :
1463 0 : if (Py_TYPE(self->decoder) == &PyIncrementalNewlineDecoder_Type) {
1464 0 : decoded_chars = _PyIncrementalNewlineDecoder_decode(
1465 : self->decoder, input_chunk, eof);
1466 : }
1467 : else {
1468 0 : decoded_chars = PyObject_CallMethodObjArgs(self->decoder,
1469 : _PyIO_str_decode, input_chunk, eof ? Py_True : Py_False, NULL);
1470 : }
1471 :
1472 : /* TODO sanity check: isinstance(decoded_chars, unicode) */
1473 0 : if (decoded_chars == NULL)
1474 0 : goto fail;
1475 0 : if (PyUnicode_READY(decoded_chars) == -1)
1476 0 : goto fail;
1477 0 : textiowrapper_set_decoded_chars(self, decoded_chars);
1478 0 : nchars = PyUnicode_GET_LENGTH(decoded_chars);
1479 0 : if (nchars > 0)
1480 0 : self->b2cratio = (double) nbytes / nchars;
1481 : else
1482 0 : self->b2cratio = 0.0;
1483 0 : if (nchars > 0)
1484 0 : eof = 0;
1485 :
1486 0 : if (self->telling) {
1487 : /* At the snapshot point, len(dec_buffer) bytes before the read, the
1488 : * next input to be decoded is dec_buffer + input_chunk.
1489 : */
1490 0 : PyObject *next_input = PyNumber_Add(dec_buffer, input_chunk);
1491 0 : if (next_input == NULL)
1492 0 : goto fail;
1493 : assert (PyBytes_Check(next_input));
1494 0 : Py_DECREF(dec_buffer);
1495 0 : Py_CLEAR(self->snapshot);
1496 0 : self->snapshot = Py_BuildValue("NN", dec_flags, next_input);
1497 : }
1498 0 : Py_DECREF(input_chunk);
1499 :
1500 0 : return (eof == 0);
1501 :
1502 : fail:
1503 0 : Py_XDECREF(dec_buffer);
1504 0 : Py_XDECREF(dec_flags);
1505 0 : Py_XDECREF(input_chunk);
1506 0 : return -1;
1507 : }
1508 :
1509 : static PyObject *
1510 1 : textiowrapper_read(textio *self, PyObject *args)
1511 : {
1512 1 : Py_ssize_t n = -1;
1513 1 : PyObject *result = NULL, *chunks = NULL;
1514 :
1515 1 : CHECK_INITIALIZED(self);
1516 :
1517 1 : if (!PyArg_ParseTuple(args, "|O&:read", &_PyIO_ConvertSsize_t, &n))
1518 0 : return NULL;
1519 :
1520 1 : CHECK_CLOSED(self);
1521 :
1522 1 : if (self->decoder == NULL)
1523 0 : return _unsupported("not readable");
1524 :
1525 1 : if (_textiowrapper_writeflush(self) < 0)
1526 0 : return NULL;
1527 :
1528 1 : if (n < 0) {
1529 : /* Read everything */
1530 1 : PyObject *bytes = _PyObject_CallMethodId(self->buffer, &PyId_read, NULL);
1531 : PyObject *decoded;
1532 1 : if (bytes == NULL)
1533 0 : goto fail;
1534 :
1535 1 : if (Py_TYPE(self->decoder) == &PyIncrementalNewlineDecoder_Type)
1536 1 : decoded = _PyIncrementalNewlineDecoder_decode(self->decoder,
1537 : bytes, 1);
1538 : else
1539 0 : decoded = PyObject_CallMethodObjArgs(
1540 : self->decoder, _PyIO_str_decode, bytes, Py_True, NULL);
1541 1 : Py_DECREF(bytes);
1542 1 : if (decoded == NULL)
1543 0 : goto fail;
1544 :
1545 1 : result = textiowrapper_get_decoded_chars(self, -1);
1546 :
1547 1 : if (result == NULL) {
1548 0 : Py_DECREF(decoded);
1549 0 : return NULL;
1550 : }
1551 :
1552 1 : PyUnicode_AppendAndDel(&result, decoded);
1553 1 : if (result == NULL)
1554 0 : goto fail;
1555 :
1556 1 : Py_CLEAR(self->snapshot);
1557 1 : return result;
1558 : }
1559 : else {
1560 0 : int res = 1;
1561 0 : Py_ssize_t remaining = n;
1562 :
1563 0 : result = textiowrapper_get_decoded_chars(self, n);
1564 0 : if (result == NULL)
1565 0 : goto fail;
1566 0 : if (PyUnicode_READY(result) == -1)
1567 0 : goto fail;
1568 0 : remaining -= PyUnicode_GET_LENGTH(result);
1569 :
1570 : /* Keep reading chunks until we have n characters to return */
1571 0 : while (remaining > 0) {
1572 0 : res = textiowrapper_read_chunk(self, remaining);
1573 0 : if (res < 0) {
1574 : /* NOTE: PyErr_SetFromErrno() calls PyErr_CheckSignals()
1575 : when EINTR occurs so we needn't do it ourselves. */
1576 0 : if (_PyIO_trap_eintr()) {
1577 0 : continue;
1578 : }
1579 0 : goto fail;
1580 : }
1581 0 : if (res == 0) /* EOF */
1582 0 : break;
1583 0 : if (chunks == NULL) {
1584 0 : chunks = PyList_New(0);
1585 0 : if (chunks == NULL)
1586 0 : goto fail;
1587 : }
1588 0 : if (PyUnicode_GET_LENGTH(result) > 0 &&
1589 0 : PyList_Append(chunks, result) < 0)
1590 0 : goto fail;
1591 0 : Py_DECREF(result);
1592 0 : result = textiowrapper_get_decoded_chars(self, remaining);
1593 0 : if (result == NULL)
1594 0 : goto fail;
1595 0 : remaining -= PyUnicode_GET_LENGTH(result);
1596 : }
1597 0 : if (chunks != NULL) {
1598 0 : if (result != NULL && PyList_Append(chunks, result) < 0)
1599 0 : goto fail;
1600 0 : Py_CLEAR(result);
1601 0 : result = PyUnicode_Join(_PyIO_empty_str, chunks);
1602 0 : if (result == NULL)
1603 0 : goto fail;
1604 0 : Py_CLEAR(chunks);
1605 : }
1606 0 : return result;
1607 : }
1608 : fail:
1609 0 : Py_XDECREF(result);
1610 0 : Py_XDECREF(chunks);
1611 0 : return NULL;
1612 : }
1613 :
1614 :
1615 : /* NOTE: `end` must point to the real end of the Py_UCS4 storage,
1616 : that is to the NUL character. Otherwise the function will produce
1617 : incorrect results. */
1618 : static char *
1619 0 : find_control_char(int kind, char *s, char *end, Py_UCS4 ch)
1620 : {
1621 0 : if (kind == PyUnicode_1BYTE_KIND) {
1622 : assert(ch < 256);
1623 0 : return (char *) memchr((void *) s, (char) ch, end - s);
1624 : }
1625 : for (;;) {
1626 0 : while (PyUnicode_READ(kind, s, 0) > ch)
1627 0 : s += kind;
1628 0 : if (PyUnicode_READ(kind, s, 0) == ch)
1629 0 : return s;
1630 0 : if (s == end)
1631 0 : return NULL;
1632 0 : s += kind;
1633 0 : }
1634 : }
1635 :
1636 : Py_ssize_t
1637 0 : _PyIO_find_line_ending(
1638 : int translated, int universal, PyObject *readnl,
1639 : int kind, char *start, char *end, Py_ssize_t *consumed)
1640 : {
1641 0 : Py_ssize_t len = ((char*)end - (char*)start)/kind;
1642 :
1643 0 : if (translated) {
1644 : /* Newlines are already translated, only search for \n */
1645 0 : char *pos = find_control_char(kind, start, end, '\n');
1646 0 : if (pos != NULL)
1647 0 : return (pos - start)/kind + 1;
1648 : else {
1649 0 : *consumed = len;
1650 0 : return -1;
1651 : }
1652 : }
1653 0 : else if (universal) {
1654 : /* Universal newline search. Find any of \r, \r\n, \n
1655 : * The decoder ensures that \r\n are not split in two pieces
1656 : */
1657 0 : char *s = start;
1658 : for (;;) {
1659 : Py_UCS4 ch;
1660 : /* Fast path for non-control chars. The loop always ends
1661 : since the Unicode string is NUL-terminated. */
1662 0 : while (PyUnicode_READ(kind, s, 0) > '\r')
1663 0 : s += kind;
1664 0 : if (s >= end) {
1665 0 : *consumed = len;
1666 0 : return -1;
1667 : }
1668 0 : ch = PyUnicode_READ(kind, s, 0);
1669 0 : s += kind;
1670 0 : if (ch == '\n')
1671 0 : return (s - start)/kind;
1672 0 : if (ch == '\r') {
1673 0 : if (PyUnicode_READ(kind, s, 0) == '\n')
1674 0 : return (s - start)/kind + 1;
1675 : else
1676 0 : return (s - start)/kind;
1677 : }
1678 0 : }
1679 : }
1680 : else {
1681 : /* Non-universal mode. */
1682 0 : Py_ssize_t readnl_len = PyUnicode_GET_LENGTH(readnl);
1683 0 : char *nl = PyUnicode_DATA(readnl);
1684 : /* Assume that readnl is an ASCII character. */
1685 : assert(PyUnicode_KIND(readnl) == PyUnicode_1BYTE_KIND);
1686 0 : if (readnl_len == 1) {
1687 0 : char *pos = find_control_char(kind, start, end, nl[0]);
1688 0 : if (pos != NULL)
1689 0 : return (pos - start)/kind + 1;
1690 0 : *consumed = len;
1691 0 : return -1;
1692 : }
1693 : else {
1694 0 : char *s = start;
1695 0 : char *e = end - (readnl_len - 1)*kind;
1696 : char *pos;
1697 0 : if (e < s)
1698 0 : e = s;
1699 0 : while (s < e) {
1700 : Py_ssize_t i;
1701 0 : char *pos = find_control_char(kind, s, end, nl[0]);
1702 0 : if (pos == NULL || pos >= e)
1703 : break;
1704 0 : for (i = 1; i < readnl_len; i++) {
1705 0 : if (PyUnicode_READ(kind, pos, i) != nl[i])
1706 0 : break;
1707 : }
1708 0 : if (i == readnl_len)
1709 0 : return (pos - start)/kind + readnl_len;
1710 0 : s = pos + kind;
1711 : }
1712 0 : pos = find_control_char(kind, e, end, nl[0]);
1713 0 : if (pos == NULL)
1714 0 : *consumed = len;
1715 : else
1716 0 : *consumed = (pos - start)/kind;
1717 0 : return -1;
1718 : }
1719 : }
1720 : }
1721 :
1722 : static PyObject *
1723 0 : _textiowrapper_readline(textio *self, Py_ssize_t limit)
1724 : {
1725 0 : PyObject *line = NULL, *chunks = NULL, *remaining = NULL;
1726 : Py_ssize_t start, endpos, chunked, offset_to_buffer;
1727 : int res;
1728 :
1729 0 : CHECK_CLOSED(self);
1730 :
1731 0 : if (_textiowrapper_writeflush(self) < 0)
1732 0 : return NULL;
1733 :
1734 0 : chunked = 0;
1735 :
1736 : while (1) {
1737 : char *ptr;
1738 : Py_ssize_t line_len;
1739 : int kind;
1740 0 : Py_ssize_t consumed = 0;
1741 :
1742 : /* First, get some data if necessary */
1743 0 : res = 1;
1744 0 : while (!self->decoded_chars ||
1745 0 : !PyUnicode_GET_LENGTH(self->decoded_chars)) {
1746 0 : res = textiowrapper_read_chunk(self, 0);
1747 0 : if (res < 0) {
1748 : /* NOTE: PyErr_SetFromErrno() calls PyErr_CheckSignals()
1749 : when EINTR occurs so we needn't do it ourselves. */
1750 0 : if (_PyIO_trap_eintr()) {
1751 0 : continue;
1752 : }
1753 : goto error;
1754 : }
1755 0 : if (res == 0)
1756 0 : break;
1757 : }
1758 0 : if (res == 0) {
1759 : /* end of file */
1760 0 : textiowrapper_set_decoded_chars(self, NULL);
1761 0 : Py_CLEAR(self->snapshot);
1762 0 : start = endpos = offset_to_buffer = 0;
1763 : break;
1764 : }
1765 :
1766 0 : if (remaining == NULL) {
1767 0 : line = self->decoded_chars;
1768 0 : start = self->decoded_chars_used;
1769 0 : offset_to_buffer = 0;
1770 0 : Py_INCREF(line);
1771 : }
1772 : else {
1773 : assert(self->decoded_chars_used == 0);
1774 0 : line = PyUnicode_Concat(remaining, self->decoded_chars);
1775 0 : start = 0;
1776 0 : offset_to_buffer = PyUnicode_GET_LENGTH(remaining);
1777 0 : Py_CLEAR(remaining);
1778 0 : if (line == NULL)
1779 : goto error;
1780 0 : if (PyUnicode_READY(line) == -1)
1781 : goto error;
1782 : }
1783 :
1784 0 : ptr = PyUnicode_DATA(line);
1785 0 : line_len = PyUnicode_GET_LENGTH(line);
1786 0 : kind = PyUnicode_KIND(line);
1787 :
1788 0 : endpos = _PyIO_find_line_ending(
1789 0 : self->readtranslate, self->readuniversal, self->readnl,
1790 : kind,
1791 0 : ptr + kind * start,
1792 0 : ptr + kind * line_len,
1793 : &consumed);
1794 0 : if (endpos >= 0) {
1795 0 : endpos += start;
1796 0 : if (limit >= 0 && (endpos - start) + chunked >= limit)
1797 0 : endpos = start + limit - chunked;
1798 : break;
1799 : }
1800 :
1801 : /* We can put aside up to `endpos` */
1802 0 : endpos = consumed + start;
1803 0 : if (limit >= 0 && (endpos - start) + chunked >= limit) {
1804 : /* Didn't find line ending, but reached length limit */
1805 0 : endpos = start + limit - chunked;
1806 : break;
1807 : }
1808 :
1809 0 : if (endpos > start) {
1810 : /* No line ending seen yet - put aside current data */
1811 : PyObject *s;
1812 0 : if (chunks == NULL) {
1813 0 : chunks = PyList_New(0);
1814 0 : if (chunks == NULL)
1815 : goto error;
1816 : }
1817 0 : s = PyUnicode_Substring(line, start, endpos);
1818 0 : if (s == NULL)
1819 : goto error;
1820 0 : if (PyList_Append(chunks, s) < 0) {
1821 0 : Py_DECREF(s);
1822 : goto error;
1823 : }
1824 0 : chunked += PyUnicode_GET_LENGTH(s);
1825 0 : Py_DECREF(s);
1826 : }
1827 : /* There may be some remaining bytes we'll have to prepend to the
1828 : next chunk of data */
1829 0 : if (endpos < line_len) {
1830 0 : remaining = PyUnicode_Substring(line, endpos, line_len);
1831 0 : if (remaining == NULL)
1832 : goto error;
1833 : }
1834 0 : Py_CLEAR(line);
1835 : /* We have consumed the buffer */
1836 0 : textiowrapper_set_decoded_chars(self, NULL);
1837 0 : }
1838 :
1839 0 : if (line != NULL) {
1840 : /* Our line ends in the current buffer */
1841 0 : self->decoded_chars_used = endpos - offset_to_buffer;
1842 0 : if (start > 0 || endpos < PyUnicode_GET_LENGTH(line)) {
1843 0 : PyObject *s = PyUnicode_Substring(line, start, endpos);
1844 0 : Py_CLEAR(line);
1845 0 : if (s == NULL)
1846 0 : goto error;
1847 0 : line = s;
1848 : }
1849 : }
1850 0 : if (remaining != NULL) {
1851 0 : if (chunks == NULL) {
1852 0 : chunks = PyList_New(0);
1853 0 : if (chunks == NULL)
1854 0 : goto error;
1855 : }
1856 0 : if (PyList_Append(chunks, remaining) < 0)
1857 0 : goto error;
1858 0 : Py_CLEAR(remaining);
1859 : }
1860 0 : if (chunks != NULL) {
1861 0 : if (line != NULL) {
1862 0 : if (PyList_Append(chunks, line) < 0)
1863 0 : goto error;
1864 0 : Py_DECREF(line);
1865 : }
1866 0 : line = PyUnicode_Join(_PyIO_empty_str, chunks);
1867 0 : if (line == NULL)
1868 0 : goto error;
1869 0 : Py_CLEAR(chunks);
1870 : }
1871 0 : if (line == NULL) {
1872 0 : Py_INCREF(_PyIO_empty_str);
1873 0 : line = _PyIO_empty_str;
1874 : }
1875 :
1876 0 : return line;
1877 :
1878 : error:
1879 0 : Py_XDECREF(chunks);
1880 0 : Py_XDECREF(remaining);
1881 0 : Py_XDECREF(line);
1882 0 : return NULL;
1883 : }
1884 :
1885 : static PyObject *
1886 0 : textiowrapper_readline(textio *self, PyObject *args)
1887 : {
1888 0 : Py_ssize_t limit = -1;
1889 :
1890 0 : CHECK_INITIALIZED(self);
1891 0 : if (!PyArg_ParseTuple(args, "|n:readline", &limit)) {
1892 0 : return NULL;
1893 : }
1894 0 : return _textiowrapper_readline(self, limit);
1895 : }
1896 :
1897 : /* Seek and Tell */
1898 :
1899 : typedef struct {
1900 : Py_off_t start_pos;
1901 : int dec_flags;
1902 : int bytes_to_feed;
1903 : int chars_to_skip;
1904 : char need_eof;
1905 : } cookie_type;
1906 :
1907 : /*
1908 : To speed up cookie packing/unpacking, we store the fields in a temporary
1909 : string and call _PyLong_FromByteArray() or _PyLong_AsByteArray (resp.).
1910 : The following macros define at which offsets in the intermediary byte
1911 : string the various CookieStruct fields will be stored.
1912 : */
1913 :
1914 : #define COOKIE_BUF_LEN (sizeof(Py_off_t) + 3 * sizeof(int) + sizeof(char))
1915 :
1916 : #if defined(WORDS_BIGENDIAN)
1917 :
1918 : # define IS_LITTLE_ENDIAN 0
1919 :
1920 : /* We want the least significant byte of start_pos to also be the least
1921 : significant byte of the cookie, which means that in big-endian mode we
1922 : must copy the fields in reverse order. */
1923 :
1924 : # define OFF_START_POS (sizeof(char) + 3 * sizeof(int))
1925 : # define OFF_DEC_FLAGS (sizeof(char) + 2 * sizeof(int))
1926 : # define OFF_BYTES_TO_FEED (sizeof(char) + sizeof(int))
1927 : # define OFF_CHARS_TO_SKIP (sizeof(char))
1928 : # define OFF_NEED_EOF 0
1929 :
1930 : #else
1931 :
1932 : # define IS_LITTLE_ENDIAN 1
1933 :
1934 : /* Little-endian mode: the least significant byte of start_pos will
1935 : naturally end up the least significant byte of the cookie. */
1936 :
1937 : # define OFF_START_POS 0
1938 : # define OFF_DEC_FLAGS (sizeof(Py_off_t))
1939 : # define OFF_BYTES_TO_FEED (sizeof(Py_off_t) + sizeof(int))
1940 : # define OFF_CHARS_TO_SKIP (sizeof(Py_off_t) + 2 * sizeof(int))
1941 : # define OFF_NEED_EOF (sizeof(Py_off_t) + 3 * sizeof(int))
1942 :
1943 : #endif
1944 :
1945 : static int
1946 0 : textiowrapper_parse_cookie(cookie_type *cookie, PyObject *cookieObj)
1947 : {
1948 : unsigned char buffer[COOKIE_BUF_LEN];
1949 0 : PyLongObject *cookieLong = (PyLongObject *)PyNumber_Long(cookieObj);
1950 0 : if (cookieLong == NULL)
1951 0 : return -1;
1952 :
1953 0 : if (_PyLong_AsByteArray(cookieLong, buffer, sizeof(buffer),
1954 : IS_LITTLE_ENDIAN, 0) < 0) {
1955 0 : Py_DECREF(cookieLong);
1956 0 : return -1;
1957 : }
1958 0 : Py_DECREF(cookieLong);
1959 :
1960 0 : memcpy(&cookie->start_pos, buffer + OFF_START_POS, sizeof(cookie->start_pos));
1961 0 : memcpy(&cookie->dec_flags, buffer + OFF_DEC_FLAGS, sizeof(cookie->dec_flags));
1962 0 : memcpy(&cookie->bytes_to_feed, buffer + OFF_BYTES_TO_FEED, sizeof(cookie->bytes_to_feed));
1963 0 : memcpy(&cookie->chars_to_skip, buffer + OFF_CHARS_TO_SKIP, sizeof(cookie->chars_to_skip));
1964 0 : memcpy(&cookie->need_eof, buffer + OFF_NEED_EOF, sizeof(cookie->need_eof));
1965 :
1966 0 : return 0;
1967 : }
1968 :
1969 : static PyObject *
1970 0 : textiowrapper_build_cookie(cookie_type *cookie)
1971 : {
1972 : unsigned char buffer[COOKIE_BUF_LEN];
1973 :
1974 0 : memcpy(buffer + OFF_START_POS, &cookie->start_pos, sizeof(cookie->start_pos));
1975 0 : memcpy(buffer + OFF_DEC_FLAGS, &cookie->dec_flags, sizeof(cookie->dec_flags));
1976 0 : memcpy(buffer + OFF_BYTES_TO_FEED, &cookie->bytes_to_feed, sizeof(cookie->bytes_to_feed));
1977 0 : memcpy(buffer + OFF_CHARS_TO_SKIP, &cookie->chars_to_skip, sizeof(cookie->chars_to_skip));
1978 0 : memcpy(buffer + OFF_NEED_EOF, &cookie->need_eof, sizeof(cookie->need_eof));
1979 :
1980 0 : return _PyLong_FromByteArray(buffer, sizeof(buffer), IS_LITTLE_ENDIAN, 0);
1981 : }
1982 : #undef IS_LITTLE_ENDIAN
1983 :
1984 : static int
1985 0 : _textiowrapper_decoder_setstate(textio *self, cookie_type *cookie)
1986 : {
1987 : PyObject *res;
1988 : /* When seeking to the start of the stream, we call decoder.reset()
1989 : rather than decoder.getstate().
1990 : This is for a few decoders such as utf-16 for which the state value
1991 : at start is not (b"", 0) but e.g. (b"", 2) (meaning, in the case of
1992 : utf-16, that we are expecting a BOM).
1993 : */
1994 0 : if (cookie->start_pos == 0 && cookie->dec_flags == 0)
1995 0 : res = PyObject_CallMethodObjArgs(self->decoder, _PyIO_str_reset, NULL);
1996 : else
1997 0 : res = _PyObject_CallMethodId(self->decoder, &PyId_setstate,
1998 : "((yi))", "", cookie->dec_flags);
1999 0 : if (res == NULL)
2000 0 : return -1;
2001 0 : Py_DECREF(res);
2002 0 : return 0;
2003 : }
2004 :
2005 : static int
2006 0 : _textiowrapper_encoder_setstate(textio *self, cookie_type *cookie)
2007 : {
2008 : PyObject *res;
2009 : /* Same as _textiowrapper_decoder_setstate() above. */
2010 0 : if (cookie->start_pos == 0 && cookie->dec_flags == 0) {
2011 0 : res = PyObject_CallMethodObjArgs(self->encoder, _PyIO_str_reset, NULL);
2012 0 : self->encoding_start_of_stream = 1;
2013 : }
2014 : else {
2015 0 : res = PyObject_CallMethodObjArgs(self->encoder, _PyIO_str_setstate,
2016 : _PyIO_zero, NULL);
2017 0 : self->encoding_start_of_stream = 0;
2018 : }
2019 0 : if (res == NULL)
2020 0 : return -1;
2021 0 : Py_DECREF(res);
2022 0 : return 0;
2023 : }
2024 :
2025 : static PyObject *
2026 0 : textiowrapper_seek(textio *self, PyObject *args)
2027 : {
2028 : PyObject *cookieObj, *posobj;
2029 : cookie_type cookie;
2030 0 : int whence = 0;
2031 : PyObject *res;
2032 : int cmp;
2033 :
2034 0 : CHECK_INITIALIZED(self);
2035 :
2036 0 : if (!PyArg_ParseTuple(args, "O|i:seek", &cookieObj, &whence))
2037 0 : return NULL;
2038 0 : CHECK_CLOSED(self);
2039 :
2040 0 : Py_INCREF(cookieObj);
2041 :
2042 0 : if (!self->seekable) {
2043 0 : _unsupported("underlying stream is not seekable");
2044 0 : goto fail;
2045 : }
2046 :
2047 0 : if (whence == 1) {
2048 : /* seek relative to current position */
2049 0 : cmp = PyObject_RichCompareBool(cookieObj, _PyIO_zero, Py_EQ);
2050 0 : if (cmp < 0)
2051 0 : goto fail;
2052 :
2053 0 : if (cmp == 0) {
2054 0 : _unsupported("can't do nonzero cur-relative seeks");
2055 0 : goto fail;
2056 : }
2057 :
2058 : /* Seeking to the current position should attempt to
2059 : * sync the underlying buffer with the current position.
2060 : */
2061 0 : Py_DECREF(cookieObj);
2062 0 : cookieObj = _PyObject_CallMethodId((PyObject *)self, &PyId_tell, NULL);
2063 0 : if (cookieObj == NULL)
2064 0 : goto fail;
2065 : }
2066 0 : else if (whence == 2) {
2067 : /* seek relative to end of file */
2068 0 : cmp = PyObject_RichCompareBool(cookieObj, _PyIO_zero, Py_EQ);
2069 0 : if (cmp < 0)
2070 0 : goto fail;
2071 :
2072 0 : if (cmp == 0) {
2073 0 : _unsupported("can't do nonzero end-relative seeks");
2074 0 : goto fail;
2075 : }
2076 :
2077 0 : res = _PyObject_CallMethodId((PyObject *)self, &PyId_flush, NULL);
2078 0 : if (res == NULL)
2079 0 : goto fail;
2080 0 : Py_DECREF(res);
2081 :
2082 0 : textiowrapper_set_decoded_chars(self, NULL);
2083 0 : Py_CLEAR(self->snapshot);
2084 0 : if (self->decoder) {
2085 0 : res = _PyObject_CallMethodId(self->decoder, &PyId_reset, NULL);
2086 0 : if (res == NULL)
2087 0 : goto fail;
2088 0 : Py_DECREF(res);
2089 : }
2090 :
2091 0 : res = _PyObject_CallMethodId(self->buffer, &PyId_seek, "ii", 0, 2);
2092 0 : Py_XDECREF(cookieObj);
2093 0 : return res;
2094 : }
2095 0 : else if (whence != 0) {
2096 0 : PyErr_Format(PyExc_ValueError,
2097 : "invalid whence (%d, should be 0, 1 or 2)", whence);
2098 0 : goto fail;
2099 : }
2100 :
2101 0 : cmp = PyObject_RichCompareBool(cookieObj, _PyIO_zero, Py_LT);
2102 0 : if (cmp < 0)
2103 0 : goto fail;
2104 :
2105 0 : if (cmp == 1) {
2106 0 : PyErr_Format(PyExc_ValueError,
2107 : "negative seek position %R", cookieObj);
2108 0 : goto fail;
2109 : }
2110 :
2111 0 : res = PyObject_CallMethodObjArgs((PyObject *)self, _PyIO_str_flush, NULL);
2112 0 : if (res == NULL)
2113 0 : goto fail;
2114 0 : Py_DECREF(res);
2115 :
2116 : /* The strategy of seek() is to go back to the safe start point
2117 : * and replay the effect of read(chars_to_skip) from there.
2118 : */
2119 0 : if (textiowrapper_parse_cookie(&cookie, cookieObj) < 0)
2120 0 : goto fail;
2121 :
2122 : /* Seek back to the safe start point. */
2123 0 : posobj = PyLong_FromOff_t(cookie.start_pos);
2124 0 : if (posobj == NULL)
2125 0 : goto fail;
2126 0 : res = PyObject_CallMethodObjArgs(self->buffer,
2127 : _PyIO_str_seek, posobj, NULL);
2128 0 : Py_DECREF(posobj);
2129 0 : if (res == NULL)
2130 0 : goto fail;
2131 0 : Py_DECREF(res);
2132 :
2133 0 : textiowrapper_set_decoded_chars(self, NULL);
2134 0 : Py_CLEAR(self->snapshot);
2135 :
2136 : /* Restore the decoder to its state from the safe start point. */
2137 0 : if (self->decoder) {
2138 0 : if (_textiowrapper_decoder_setstate(self, &cookie) < 0)
2139 0 : goto fail;
2140 : }
2141 :
2142 0 : if (cookie.chars_to_skip) {
2143 : /* Just like _read_chunk, feed the decoder and save a snapshot. */
2144 0 : PyObject *input_chunk = _PyObject_CallMethodId(
2145 : self->buffer, &PyId_read, "i", cookie.bytes_to_feed);
2146 : PyObject *decoded;
2147 :
2148 0 : if (input_chunk == NULL)
2149 0 : goto fail;
2150 :
2151 : assert (PyBytes_Check(input_chunk));
2152 :
2153 0 : self->snapshot = Py_BuildValue("iN", cookie.dec_flags, input_chunk);
2154 0 : if (self->snapshot == NULL) {
2155 0 : Py_DECREF(input_chunk);
2156 0 : goto fail;
2157 : }
2158 :
2159 0 : decoded = _PyObject_CallMethodId(self->decoder, &PyId_decode,
2160 0 : "Oi", input_chunk, (int)cookie.need_eof);
2161 :
2162 0 : if (decoded == NULL)
2163 0 : goto fail;
2164 0 : if (PyUnicode_READY(decoded) == -1) {
2165 0 : Py_DECREF(decoded);
2166 0 : goto fail;
2167 : }
2168 :
2169 0 : textiowrapper_set_decoded_chars(self, decoded);
2170 :
2171 : /* Skip chars_to_skip of the decoded characters. */
2172 0 : if (PyUnicode_GetLength(self->decoded_chars) < cookie.chars_to_skip) {
2173 0 : PyErr_SetString(PyExc_IOError, "can't restore logical file position");
2174 0 : goto fail;
2175 : }
2176 0 : self->decoded_chars_used = cookie.chars_to_skip;
2177 : }
2178 : else {
2179 0 : self->snapshot = Py_BuildValue("iy", cookie.dec_flags, "");
2180 0 : if (self->snapshot == NULL)
2181 0 : goto fail;
2182 : }
2183 :
2184 : /* Finally, reset the encoder (merely useful for proper BOM handling) */
2185 0 : if (self->encoder) {
2186 0 : if (_textiowrapper_encoder_setstate(self, &cookie) < 0)
2187 0 : goto fail;
2188 : }
2189 0 : return cookieObj;
2190 : fail:
2191 0 : Py_XDECREF(cookieObj);
2192 0 : return NULL;
2193 :
2194 : }
2195 :
2196 : static PyObject *
2197 0 : textiowrapper_tell(textio *self, PyObject *args)
2198 : {
2199 : PyObject *res;
2200 0 : PyObject *posobj = NULL;
2201 0 : cookie_type cookie = {0,0,0,0,0};
2202 : PyObject *next_input;
2203 : Py_ssize_t chars_to_skip, chars_decoded;
2204 : Py_ssize_t skip_bytes, skip_back;
2205 0 : PyObject *saved_state = NULL;
2206 : char *input, *input_end;
2207 : char *dec_buffer;
2208 : Py_ssize_t dec_buffer_len;
2209 : int dec_flags;
2210 :
2211 0 : CHECK_INITIALIZED(self);
2212 0 : CHECK_CLOSED(self);
2213 :
2214 0 : if (!self->seekable) {
2215 0 : _unsupported("underlying stream is not seekable");
2216 0 : goto fail;
2217 : }
2218 0 : if (!self->telling) {
2219 0 : PyErr_SetString(PyExc_IOError,
2220 : "telling position disabled by next() call");
2221 0 : goto fail;
2222 : }
2223 :
2224 0 : if (_textiowrapper_writeflush(self) < 0)
2225 0 : return NULL;
2226 0 : res = _PyObject_CallMethodId((PyObject *)self, &PyId_flush, NULL);
2227 0 : if (res == NULL)
2228 0 : goto fail;
2229 0 : Py_DECREF(res);
2230 :
2231 0 : posobj = _PyObject_CallMethodId(self->buffer, &PyId_tell, NULL);
2232 0 : if (posobj == NULL)
2233 0 : goto fail;
2234 :
2235 0 : if (self->decoder == NULL || self->snapshot == NULL) {
2236 : assert (self->decoded_chars == NULL || PyUnicode_GetLength(self->decoded_chars) == 0);
2237 0 : return posobj;
2238 : }
2239 :
2240 : #if defined(HAVE_LARGEFILE_SUPPORT)
2241 0 : cookie.start_pos = PyLong_AsLongLong(posobj);
2242 : #else
2243 : cookie.start_pos = PyLong_AsLong(posobj);
2244 : #endif
2245 0 : Py_DECREF(posobj);
2246 0 : if (PyErr_Occurred())
2247 0 : goto fail;
2248 :
2249 : /* Skip backward to the snapshot point (see _read_chunk). */
2250 0 : if (!PyArg_Parse(self->snapshot, "(iO)", &cookie.dec_flags, &next_input))
2251 0 : goto fail;
2252 :
2253 : assert (PyBytes_Check(next_input));
2254 :
2255 0 : cookie.start_pos -= PyBytes_GET_SIZE(next_input);
2256 :
2257 : /* How many decoded characters have been used up since the snapshot? */
2258 0 : if (self->decoded_chars_used == 0) {
2259 : /* We haven't moved from the snapshot point. */
2260 0 : return textiowrapper_build_cookie(&cookie);
2261 : }
2262 :
2263 0 : chars_to_skip = self->decoded_chars_used;
2264 :
2265 : /* Decoder state will be restored at the end */
2266 0 : saved_state = PyObject_CallMethodObjArgs(self->decoder,
2267 : _PyIO_str_getstate, NULL);
2268 0 : if (saved_state == NULL)
2269 0 : goto fail;
2270 :
2271 : #define DECODER_GETSTATE() do { \
2272 : PyObject *_state = PyObject_CallMethodObjArgs(self->decoder, \
2273 : _PyIO_str_getstate, NULL); \
2274 : if (_state == NULL) \
2275 : goto fail; \
2276 : if (!PyArg_Parse(_state, "(y#i)", &dec_buffer, &dec_buffer_len, &dec_flags)) { \
2277 : Py_DECREF(_state); \
2278 : goto fail; \
2279 : } \
2280 : Py_DECREF(_state); \
2281 : } while (0)
2282 :
2283 : /* TODO: replace assert with exception */
2284 : #define DECODER_DECODE(start, len, res) do { \
2285 : PyObject *_decoded = _PyObject_CallMethodId( \
2286 : self->decoder, &PyId_decode, "y#", start, len); \
2287 : if (_decoded == NULL) \
2288 : goto fail; \
2289 : assert (PyUnicode_Check(_decoded)); \
2290 : res = PyUnicode_GET_LENGTH(_decoded); \
2291 : Py_DECREF(_decoded); \
2292 : } while (0)
2293 :
2294 : /* Fast search for an acceptable start point, close to our
2295 : current pos */
2296 0 : skip_bytes = (Py_ssize_t) (self->b2cratio * chars_to_skip);
2297 0 : skip_back = 1;
2298 : assert(skip_back <= PyBytes_GET_SIZE(next_input));
2299 0 : input = PyBytes_AS_STRING(next_input);
2300 0 : while (skip_bytes > 0) {
2301 : /* Decode up to temptative start point */
2302 0 : if (_textiowrapper_decoder_setstate(self, &cookie) < 0)
2303 0 : goto fail;
2304 0 : DECODER_DECODE(input, skip_bytes, chars_decoded);
2305 0 : if (chars_decoded <= chars_to_skip) {
2306 0 : DECODER_GETSTATE();
2307 0 : if (dec_buffer_len == 0) {
2308 : /* Before pos and no bytes buffered in decoder => OK */
2309 0 : cookie.dec_flags = dec_flags;
2310 0 : chars_to_skip -= chars_decoded;
2311 0 : break;
2312 : }
2313 : /* Skip back by buffered amount and reset heuristic */
2314 0 : skip_bytes -= dec_buffer_len;
2315 0 : skip_back = 1;
2316 : }
2317 : else {
2318 : /* We're too far ahead, skip back a bit */
2319 0 : skip_bytes -= skip_back;
2320 0 : skip_back *= 2;
2321 : }
2322 : }
2323 0 : if (skip_bytes <= 0) {
2324 0 : skip_bytes = 0;
2325 0 : if (_textiowrapper_decoder_setstate(self, &cookie) < 0)
2326 0 : goto fail;
2327 : }
2328 :
2329 : /* Note our initial start point. */
2330 0 : cookie.start_pos += skip_bytes;
2331 0 : cookie.chars_to_skip = chars_to_skip;
2332 0 : if (chars_to_skip == 0)
2333 0 : goto finally;
2334 :
2335 : /* We should be close to the desired position. Now feed the decoder one
2336 : * byte at a time until we reach the `chars_to_skip` target.
2337 : * As we go, note the nearest "safe start point" before the current
2338 : * location (a point where the decoder has nothing buffered, so seek()
2339 : * can safely start from there and advance to this location).
2340 : */
2341 0 : chars_decoded = 0;
2342 0 : input = PyBytes_AS_STRING(next_input);
2343 0 : input_end = input + PyBytes_GET_SIZE(next_input);
2344 0 : input += skip_bytes;
2345 0 : while (input < input_end) {
2346 : Py_ssize_t n;
2347 :
2348 0 : DECODER_DECODE(input, 1, n);
2349 : /* We got n chars for 1 byte */
2350 0 : chars_decoded += n;
2351 0 : cookie.bytes_to_feed += 1;
2352 0 : DECODER_GETSTATE();
2353 :
2354 0 : if (dec_buffer_len == 0 && chars_decoded <= chars_to_skip) {
2355 : /* Decoder buffer is empty, so this is a safe start point. */
2356 0 : cookie.start_pos += cookie.bytes_to_feed;
2357 0 : chars_to_skip -= chars_decoded;
2358 0 : cookie.dec_flags = dec_flags;
2359 0 : cookie.bytes_to_feed = 0;
2360 0 : chars_decoded = 0;
2361 : }
2362 0 : if (chars_decoded >= chars_to_skip)
2363 0 : break;
2364 0 : input++;
2365 : }
2366 0 : if (input == input_end) {
2367 : /* We didn't get enough decoded data; signal EOF to get more. */
2368 0 : PyObject *decoded = _PyObject_CallMethodId(
2369 : self->decoder, &PyId_decode, "yi", "", /* final = */ 1);
2370 0 : if (decoded == NULL)
2371 0 : goto fail;
2372 : assert (PyUnicode_Check(decoded));
2373 0 : chars_decoded += PyUnicode_GET_LENGTH(decoded);
2374 0 : Py_DECREF(decoded);
2375 0 : cookie.need_eof = 1;
2376 :
2377 0 : if (chars_decoded < chars_to_skip) {
2378 0 : PyErr_SetString(PyExc_IOError,
2379 : "can't reconstruct logical file position");
2380 0 : goto fail;
2381 : }
2382 : }
2383 :
2384 : finally:
2385 0 : res = _PyObject_CallMethodId(self->decoder, &PyId_setstate, "(O)", saved_state);
2386 0 : Py_DECREF(saved_state);
2387 0 : if (res == NULL)
2388 0 : return NULL;
2389 0 : Py_DECREF(res);
2390 :
2391 : /* The returned cookie corresponds to the last safe start point. */
2392 0 : cookie.chars_to_skip = Py_SAFE_DOWNCAST(chars_to_skip, Py_ssize_t, int);
2393 0 : return textiowrapper_build_cookie(&cookie);
2394 :
2395 : fail:
2396 0 : if (saved_state) {
2397 : PyObject *type, *value, *traceback;
2398 0 : PyErr_Fetch(&type, &value, &traceback);
2399 :
2400 0 : res = _PyObject_CallMethodId(self->decoder, &PyId_setstate, "(O)", saved_state);
2401 0 : Py_DECREF(saved_state);
2402 0 : if (res == NULL)
2403 0 : return NULL;
2404 0 : Py_DECREF(res);
2405 :
2406 0 : PyErr_Restore(type, value, traceback);
2407 : }
2408 0 : return NULL;
2409 : }
2410 :
2411 : static PyObject *
2412 0 : textiowrapper_truncate(textio *self, PyObject *args)
2413 : {
2414 0 : PyObject *pos = Py_None;
2415 : PyObject *res;
2416 :
2417 0 : CHECK_INITIALIZED(self)
2418 0 : if (!PyArg_ParseTuple(args, "|O:truncate", &pos)) {
2419 0 : return NULL;
2420 : }
2421 :
2422 0 : res = PyObject_CallMethodObjArgs((PyObject *) self, _PyIO_str_flush, NULL);
2423 0 : if (res == NULL)
2424 0 : return NULL;
2425 0 : Py_DECREF(res);
2426 :
2427 0 : return PyObject_CallMethodObjArgs(self->buffer, _PyIO_str_truncate, pos, NULL);
2428 : }
2429 :
2430 : static PyObject *
2431 1 : textiowrapper_repr(textio *self)
2432 : {
2433 : PyObject *nameobj, *modeobj, *res, *s;
2434 :
2435 1 : CHECK_INITIALIZED(self);
2436 :
2437 1 : res = PyUnicode_FromString("<_io.TextIOWrapper");
2438 1 : if (res == NULL)
2439 0 : return NULL;
2440 1 : nameobj = _PyObject_GetAttrId((PyObject *) self, &PyId_name);
2441 1 : if (nameobj == NULL) {
2442 0 : if (PyErr_ExceptionMatches(PyExc_AttributeError))
2443 0 : PyErr_Clear();
2444 : else
2445 0 : goto error;
2446 : }
2447 : else {
2448 1 : s = PyUnicode_FromFormat(" name=%R", nameobj);
2449 1 : Py_DECREF(nameobj);
2450 1 : if (s == NULL)
2451 0 : goto error;
2452 1 : PyUnicode_AppendAndDel(&res, s);
2453 1 : if (res == NULL)
2454 0 : return NULL;
2455 : }
2456 1 : modeobj = _PyObject_GetAttrId((PyObject *) self, &PyId_mode);
2457 1 : if (modeobj == NULL) {
2458 0 : if (PyErr_ExceptionMatches(PyExc_AttributeError))
2459 0 : PyErr_Clear();
2460 : else
2461 0 : goto error;
2462 : }
2463 : else {
2464 1 : s = PyUnicode_FromFormat(" mode=%R", modeobj);
2465 1 : Py_DECREF(modeobj);
2466 1 : if (s == NULL)
2467 0 : goto error;
2468 1 : PyUnicode_AppendAndDel(&res, s);
2469 1 : if (res == NULL)
2470 0 : return NULL;
2471 : }
2472 1 : s = PyUnicode_FromFormat("%U encoding=%R>",
2473 : res, self->encoding);
2474 1 : Py_DECREF(res);
2475 1 : return s;
2476 : error:
2477 0 : Py_XDECREF(res);
2478 0 : return NULL;
2479 : }
2480 :
2481 :
2482 : /* Inquiries */
2483 :
2484 : static PyObject *
2485 0 : textiowrapper_fileno(textio *self, PyObject *args)
2486 : {
2487 0 : CHECK_INITIALIZED(self);
2488 0 : return _PyObject_CallMethodId(self->buffer, &PyId_fileno, NULL);
2489 : }
2490 :
2491 : static PyObject *
2492 0 : textiowrapper_seekable(textio *self, PyObject *args)
2493 : {
2494 0 : CHECK_INITIALIZED(self);
2495 0 : return _PyObject_CallMethodId(self->buffer, &PyId_seekable, NULL);
2496 : }
2497 :
2498 : static PyObject *
2499 0 : textiowrapper_readable(textio *self, PyObject *args)
2500 : {
2501 0 : CHECK_INITIALIZED(self);
2502 0 : return _PyObject_CallMethodId(self->buffer, &PyId_readable, NULL);
2503 : }
2504 :
2505 : static PyObject *
2506 0 : textiowrapper_writable(textio *self, PyObject *args)
2507 : {
2508 0 : CHECK_INITIALIZED(self);
2509 0 : return _PyObject_CallMethodId(self->buffer, &PyId_writable, NULL);
2510 : }
2511 :
2512 : static PyObject *
2513 0 : textiowrapper_isatty(textio *self, PyObject *args)
2514 : {
2515 0 : CHECK_INITIALIZED(self);
2516 0 : return _PyObject_CallMethodId(self->buffer, &PyId_isatty, NULL);
2517 : }
2518 :
2519 : static PyObject *
2520 0 : textiowrapper_getstate(textio *self, PyObject *args)
2521 : {
2522 0 : PyErr_Format(PyExc_TypeError,
2523 0 : "cannot serialize '%s' object", Py_TYPE(self)->tp_name);
2524 0 : return NULL;
2525 : }
2526 :
2527 : static PyObject *
2528 1 : textiowrapper_flush(textio *self, PyObject *args)
2529 : {
2530 1 : CHECK_INITIALIZED(self);
2531 1 : CHECK_CLOSED(self);
2532 1 : self->telling = self->seekable;
2533 1 : if (_textiowrapper_writeflush(self) < 0)
2534 0 : return NULL;
2535 1 : return _PyObject_CallMethodId(self->buffer, &PyId_flush, NULL);
2536 : }
2537 :
2538 : static PyObject *
2539 1 : textiowrapper_close(textio *self, PyObject *args)
2540 : {
2541 : PyObject *res;
2542 : int r;
2543 1 : CHECK_INITIALIZED(self);
2544 :
2545 1 : res = textiowrapper_closed_get(self, NULL);
2546 1 : if (res == NULL)
2547 0 : return NULL;
2548 1 : r = PyObject_IsTrue(res);
2549 1 : Py_DECREF(res);
2550 1 : if (r < 0)
2551 0 : return NULL;
2552 :
2553 1 : if (r > 0) {
2554 0 : Py_RETURN_NONE; /* stream already closed */
2555 : }
2556 : else {
2557 1 : if (self->deallocating) {
2558 1 : res = _PyObject_CallMethodId(self->buffer, &PyId__dealloc_warn, "O", self);
2559 1 : if (res)
2560 1 : Py_DECREF(res);
2561 : else
2562 0 : PyErr_Clear();
2563 : }
2564 1 : res = _PyObject_CallMethodId((PyObject *)self, &PyId_flush, NULL);
2565 1 : if (res == NULL) {
2566 0 : return NULL;
2567 : }
2568 : else
2569 1 : Py_DECREF(res);
2570 :
2571 1 : return _PyObject_CallMethodId(self->buffer, &PyId_close, NULL);
2572 : }
2573 : }
2574 :
2575 : static PyObject *
2576 0 : textiowrapper_iternext(textio *self)
2577 : {
2578 : PyObject *line;
2579 :
2580 0 : CHECK_INITIALIZED(self);
2581 :
2582 0 : self->telling = 0;
2583 0 : if (Py_TYPE(self) == &PyTextIOWrapper_Type) {
2584 : /* Skip method call overhead for speed */
2585 0 : line = _textiowrapper_readline(self, -1);
2586 : }
2587 : else {
2588 0 : line = PyObject_CallMethodObjArgs((PyObject *)self,
2589 : _PyIO_str_readline, NULL);
2590 0 : if (line && !PyUnicode_Check(line)) {
2591 0 : PyErr_Format(PyExc_IOError,
2592 : "readline() should have returned an str object, "
2593 0 : "not '%.200s'", Py_TYPE(line)->tp_name);
2594 0 : Py_DECREF(line);
2595 0 : return NULL;
2596 : }
2597 : }
2598 :
2599 0 : if (line == NULL || PyUnicode_READY(line) == -1)
2600 0 : return NULL;
2601 :
2602 0 : if (PyUnicode_GET_LENGTH(line) == 0) {
2603 : /* Reached EOF or would have blocked */
2604 0 : Py_DECREF(line);
2605 0 : Py_CLEAR(self->snapshot);
2606 0 : self->telling = self->seekable;
2607 0 : return NULL;
2608 : }
2609 :
2610 0 : return line;
2611 : }
2612 :
2613 : static PyObject *
2614 1 : textiowrapper_name_get(textio *self, void *context)
2615 : {
2616 1 : CHECK_INITIALIZED(self);
2617 1 : return _PyObject_GetAttrId(self->buffer, &PyId_name);
2618 : }
2619 :
2620 : static PyObject *
2621 2 : textiowrapper_closed_get(textio *self, void *context)
2622 : {
2623 2 : CHECK_INITIALIZED(self);
2624 2 : return PyObject_GetAttr(self->buffer, _PyIO_str_closed);
2625 : }
2626 :
2627 : static PyObject *
2628 0 : textiowrapper_newlines_get(textio *self, void *context)
2629 : {
2630 : PyObject *res;
2631 0 : CHECK_INITIALIZED(self);
2632 0 : if (self->decoder == NULL)
2633 0 : Py_RETURN_NONE;
2634 0 : res = PyObject_GetAttr(self->decoder, _PyIO_str_newlines);
2635 0 : if (res == NULL) {
2636 0 : if (PyErr_ExceptionMatches(PyExc_AttributeError)) {
2637 0 : PyErr_Clear();
2638 0 : Py_RETURN_NONE;
2639 : }
2640 : else {
2641 0 : return NULL;
2642 : }
2643 : }
2644 0 : return res;
2645 : }
2646 :
2647 : static PyObject *
2648 0 : textiowrapper_errors_get(textio *self, void *context)
2649 : {
2650 0 : CHECK_INITIALIZED(self);
2651 0 : return PyUnicode_FromString(PyBytes_AS_STRING(self->errors));
2652 : }
2653 :
2654 : static PyObject *
2655 0 : textiowrapper_chunk_size_get(textio *self, void *context)
2656 : {
2657 0 : CHECK_INITIALIZED(self);
2658 0 : return PyLong_FromSsize_t(self->chunk_size);
2659 : }
2660 :
2661 : static int
2662 0 : textiowrapper_chunk_size_set(textio *self, PyObject *arg, void *context)
2663 : {
2664 : Py_ssize_t n;
2665 0 : CHECK_INITIALIZED_INT(self);
2666 0 : n = PyNumber_AsSsize_t(arg, PyExc_ValueError);
2667 0 : if (n == -1 && PyErr_Occurred())
2668 0 : return -1;
2669 0 : if (n <= 0) {
2670 0 : PyErr_SetString(PyExc_ValueError,
2671 : "a strictly positive integer is required");
2672 0 : return -1;
2673 : }
2674 0 : self->chunk_size = n;
2675 0 : return 0;
2676 : }
2677 :
2678 : static PyMethodDef textiowrapper_methods[] = {
2679 : {"detach", (PyCFunction)textiowrapper_detach, METH_NOARGS},
2680 : {"write", (PyCFunction)textiowrapper_write, METH_VARARGS},
2681 : {"read", (PyCFunction)textiowrapper_read, METH_VARARGS},
2682 : {"readline", (PyCFunction)textiowrapper_readline, METH_VARARGS},
2683 : {"flush", (PyCFunction)textiowrapper_flush, METH_NOARGS},
2684 : {"close", (PyCFunction)textiowrapper_close, METH_NOARGS},
2685 :
2686 : {"fileno", (PyCFunction)textiowrapper_fileno, METH_NOARGS},
2687 : {"seekable", (PyCFunction)textiowrapper_seekable, METH_NOARGS},
2688 : {"readable", (PyCFunction)textiowrapper_readable, METH_NOARGS},
2689 : {"writable", (PyCFunction)textiowrapper_writable, METH_NOARGS},
2690 : {"isatty", (PyCFunction)textiowrapper_isatty, METH_NOARGS},
2691 : {"__getstate__", (PyCFunction)textiowrapper_getstate, METH_NOARGS},
2692 :
2693 : {"seek", (PyCFunction)textiowrapper_seek, METH_VARARGS},
2694 : {"tell", (PyCFunction)textiowrapper_tell, METH_NOARGS},
2695 : {"truncate", (PyCFunction)textiowrapper_truncate, METH_VARARGS},
2696 : {NULL, NULL}
2697 : };
2698 :
2699 : static PyMemberDef textiowrapper_members[] = {
2700 : {"encoding", T_OBJECT, offsetof(textio, encoding), READONLY},
2701 : {"buffer", T_OBJECT, offsetof(textio, buffer), READONLY},
2702 : {"line_buffering", T_BOOL, offsetof(textio, line_buffering), READONLY},
2703 : {NULL}
2704 : };
2705 :
2706 : static PyGetSetDef textiowrapper_getset[] = {
2707 : {"name", (getter)textiowrapper_name_get, NULL, NULL},
2708 : {"closed", (getter)textiowrapper_closed_get, NULL, NULL},
2709 : /* {"mode", (getter)TextIOWrapper_mode_get, NULL, NULL},
2710 : */
2711 : {"newlines", (getter)textiowrapper_newlines_get, NULL, NULL},
2712 : {"errors", (getter)textiowrapper_errors_get, NULL, NULL},
2713 : {"_CHUNK_SIZE", (getter)textiowrapper_chunk_size_get,
2714 : (setter)textiowrapper_chunk_size_set, NULL},
2715 : {NULL}
2716 : };
2717 :
2718 : PyTypeObject PyTextIOWrapper_Type = {
2719 : PyVarObject_HEAD_INIT(NULL, 0)
2720 : "_io.TextIOWrapper", /*tp_name*/
2721 : sizeof(textio), /*tp_basicsize*/
2722 : 0, /*tp_itemsize*/
2723 : (destructor)textiowrapper_dealloc, /*tp_dealloc*/
2724 : 0, /*tp_print*/
2725 : 0, /*tp_getattr*/
2726 : 0, /*tps_etattr*/
2727 : 0, /*tp_compare */
2728 : (reprfunc)textiowrapper_repr,/*tp_repr*/
2729 : 0, /*tp_as_number*/
2730 : 0, /*tp_as_sequence*/
2731 : 0, /*tp_as_mapping*/
2732 : 0, /*tp_hash */
2733 : 0, /*tp_call*/
2734 : 0, /*tp_str*/
2735 : 0, /*tp_getattro*/
2736 : 0, /*tp_setattro*/
2737 : 0, /*tp_as_buffer*/
2738 : Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE
2739 : | Py_TPFLAGS_HAVE_GC, /*tp_flags*/
2740 : textiowrapper_doc, /* tp_doc */
2741 : (traverseproc)textiowrapper_traverse, /* tp_traverse */
2742 : (inquiry)textiowrapper_clear, /* tp_clear */
2743 : 0, /* tp_richcompare */
2744 : offsetof(textio, weakreflist), /*tp_weaklistoffset*/
2745 : 0, /* tp_iter */
2746 : (iternextfunc)textiowrapper_iternext, /* tp_iternext */
2747 : textiowrapper_methods, /* tp_methods */
2748 : textiowrapper_members, /* tp_members */
2749 : textiowrapper_getset, /* tp_getset */
2750 : 0, /* tp_base */
2751 : 0, /* tp_dict */
2752 : 0, /* tp_descr_get */
2753 : 0, /* tp_descr_set */
2754 : offsetof(textio, dict), /*tp_dictoffset*/
2755 : (initproc)textiowrapper_init, /* tp_init */
2756 : 0, /* tp_alloc */
2757 : PyType_GenericNew, /* tp_new */
2758 : };
|