Line data Source code
1 : /* bytes object implementation */
2 :
3 : #define PY_SSIZE_T_CLEAN
4 :
5 : #include "Python.h"
6 :
7 : #include "bytes_methods.h"
8 : #include <stddef.h>
9 :
10 : static Py_ssize_t
11 2 : _getbuffer(PyObject *obj, Py_buffer *view)
12 : {
13 2 : PyBufferProcs *buffer = Py_TYPE(obj)->tp_as_buffer;
14 :
15 2 : if (buffer == NULL || buffer->bf_getbuffer == NULL)
16 : {
17 0 : PyErr_Format(PyExc_TypeError,
18 : "Type %.100s doesn't support the buffer API",
19 0 : Py_TYPE(obj)->tp_name);
20 0 : return -1;
21 : }
22 :
23 2 : if (buffer->bf_getbuffer(obj, view, PyBUF_SIMPLE) < 0)
24 0 : return -1;
25 2 : return view->len;
26 : }
27 :
28 : #ifdef COUNT_ALLOCS
29 : Py_ssize_t null_strings, one_strings;
30 : #endif
31 :
32 : static PyBytesObject *characters[UCHAR_MAX + 1];
33 : static PyBytesObject *nullstring;
34 :
35 : /* PyBytesObject_SIZE gives the basic size of a string; any memory allocation
36 : for a string of length n should request PyBytesObject_SIZE + n bytes.
37 :
38 : Using PyBytesObject_SIZE instead of sizeof(PyBytesObject) saves
39 : 3 bytes per string allocation on a typical system.
40 : */
41 : #define PyBytesObject_SIZE (offsetof(PyBytesObject, ob_sval) + 1)
42 :
43 : /*
44 : For PyBytes_FromString(), the parameter `str' points to a null-terminated
45 : string containing exactly `size' bytes.
46 :
47 : For PyBytes_FromStringAndSize(), the parameter the parameter `str' is
48 : either NULL or else points to a string containing at least `size' bytes.
49 : For PyBytes_FromStringAndSize(), the string in the `str' parameter does
50 : not have to be null-terminated. (Therefore it is safe to construct a
51 : substring by calling `PyBytes_FromStringAndSize(origstring, substrlen)'.)
52 : If `str' is NULL then PyBytes_FromStringAndSize() will allocate `size+1'
53 : bytes (setting the last byte to the null terminating character) and you can
54 : fill in the data yourself. If `str' is non-NULL then the resulting
55 : PyBytes object must be treated as immutable and you must not fill in nor
56 : alter the data yourself, since the strings may be shared.
57 :
58 : The PyObject member `op->ob_size', which denotes the number of "extra
59 : items" in a variable-size object, will contain the number of bytes
60 : allocated for string data, not counting the null terminating character.
61 : It is therefore equal to the `size' parameter (for
62 : PyBytes_FromStringAndSize()) or the length of the string in the `str'
63 : parameter (for PyBytes_FromString()).
64 : */
65 : PyObject *
66 5019 : PyBytes_FromStringAndSize(const char *str, Py_ssize_t size)
67 : {
68 : register PyBytesObject *op;
69 5019 : if (size < 0) {
70 0 : PyErr_SetString(PyExc_SystemError,
71 : "Negative size passed to PyBytes_FromStringAndSize");
72 0 : return NULL;
73 : }
74 5019 : if (size == 0 && (op = nullstring) != NULL) {
75 : #ifdef COUNT_ALLOCS
76 : null_strings++;
77 : #endif
78 312 : Py_INCREF(op);
79 312 : return (PyObject *)op;
80 : }
81 4774 : if (size == 1 && str != NULL &&
82 67 : (op = characters[*str & UCHAR_MAX]) != NULL)
83 : {
84 : #ifdef COUNT_ALLOCS
85 : one_strings++;
86 : #endif
87 58 : Py_INCREF(op);
88 58 : return (PyObject *)op;
89 : }
90 :
91 4649 : if (size > PY_SSIZE_T_MAX - PyBytesObject_SIZE) {
92 0 : PyErr_SetString(PyExc_OverflowError,
93 : "byte string is too large");
94 0 : return NULL;
95 : }
96 :
97 : /* Inline PyObject_NewVar */
98 4649 : op = (PyBytesObject *)PyObject_MALLOC(PyBytesObject_SIZE + size);
99 4649 : if (op == NULL)
100 0 : return PyErr_NoMemory();
101 4649 : PyObject_INIT_VAR(op, &PyBytes_Type, size);
102 4649 : op->ob_shash = -1;
103 4649 : if (str != NULL)
104 1894 : Py_MEMCPY(op->ob_sval, str, size);
105 4649 : op->ob_sval[size] = '\0';
106 : /* share short strings */
107 4649 : if (size == 0) {
108 1 : nullstring = op;
109 1 : Py_INCREF(op);
110 4648 : } else if (size == 1 && str != NULL) {
111 9 : characters[*str & UCHAR_MAX] = op;
112 9 : Py_INCREF(op);
113 : }
114 4649 : return (PyObject *) op;
115 : }
116 :
117 : PyObject *
118 30 : PyBytes_FromString(const char *str)
119 : {
120 : register size_t size;
121 : register PyBytesObject *op;
122 :
123 : assert(str != NULL);
124 30 : size = strlen(str);
125 30 : if (size > PY_SSIZE_T_MAX - PyBytesObject_SIZE) {
126 0 : PyErr_SetString(PyExc_OverflowError,
127 : "byte string is too long");
128 0 : return NULL;
129 : }
130 30 : if (size == 0 && (op = nullstring) != NULL) {
131 : #ifdef COUNT_ALLOCS
132 : null_strings++;
133 : #endif
134 0 : Py_INCREF(op);
135 0 : return (PyObject *)op;
136 : }
137 30 : if (size == 1 && (op = characters[*str & UCHAR_MAX]) != NULL) {
138 : #ifdef COUNT_ALLOCS
139 : one_strings++;
140 : #endif
141 0 : Py_INCREF(op);
142 0 : return (PyObject *)op;
143 : }
144 :
145 : /* Inline PyObject_NewVar */
146 30 : op = (PyBytesObject *)PyObject_MALLOC(PyBytesObject_SIZE + size);
147 30 : if (op == NULL)
148 0 : return PyErr_NoMemory();
149 30 : PyObject_INIT_VAR(op, &PyBytes_Type, size);
150 30 : op->ob_shash = -1;
151 30 : Py_MEMCPY(op->ob_sval, str, size+1);
152 : /* share short strings */
153 30 : if (size == 0) {
154 0 : nullstring = op;
155 0 : Py_INCREF(op);
156 30 : } else if (size == 1) {
157 0 : characters[*str & UCHAR_MAX] = op;
158 0 : Py_INCREF(op);
159 : }
160 30 : return (PyObject *) op;
161 : }
162 :
163 : PyObject *
164 0 : PyBytes_FromFormatV(const char *format, va_list vargs)
165 : {
166 : va_list count;
167 0 : Py_ssize_t n = 0;
168 : const char* f;
169 : char *s;
170 : PyObject* string;
171 :
172 0 : Py_VA_COPY(count, vargs);
173 : /* step 1: figure out how large a buffer we need */
174 0 : for (f = format; *f; f++) {
175 0 : if (*f == '%') {
176 0 : const char* p = f;
177 0 : while (*++f && *f != '%' && !Py_ISALPHA(*f))
178 : ;
179 :
180 : /* skip the 'l' or 'z' in {%ld, %zd, %lu, %zu} since
181 : * they don't affect the amount of space we reserve.
182 : */
183 0 : if ((*f == 'l' || *f == 'z') &&
184 0 : (f[1] == 'd' || f[1] == 'u'))
185 0 : ++f;
186 :
187 0 : switch (*f) {
188 : case 'c':
189 0 : (void)va_arg(count, int);
190 : /* fall through... */
191 : case '%':
192 0 : n++;
193 0 : break;
194 : case 'd': case 'u': case 'i': case 'x':
195 0 : (void) va_arg(count, int);
196 : /* 20 bytes is enough to hold a 64-bit
197 : integer. Decimal takes the most space.
198 : This isn't enough for octal. */
199 0 : n += 20;
200 0 : break;
201 : case 's':
202 0 : s = va_arg(count, char*);
203 0 : n += strlen(s);
204 0 : break;
205 : case 'p':
206 0 : (void) va_arg(count, int);
207 : /* maximum 64-bit pointer representation:
208 : * 0xffffffffffffffff
209 : * so 19 characters is enough.
210 : * XXX I count 18 -- what's the extra for?
211 : */
212 0 : n += 19;
213 0 : break;
214 : default:
215 : /* if we stumble upon an unknown
216 : formatting code, copy the rest of
217 : the format string to the output
218 : string. (we cannot just skip the
219 : code, since there's no way to know
220 : what's in the argument list) */
221 0 : n += strlen(p);
222 0 : goto expand;
223 : }
224 : } else
225 0 : n++;
226 : }
227 : expand:
228 : /* step 2: fill the buffer */
229 : /* Since we've analyzed how much space we need for the worst case,
230 : use sprintf directly instead of the slower PyOS_snprintf. */
231 0 : string = PyBytes_FromStringAndSize(NULL, n);
232 0 : if (!string)
233 0 : return NULL;
234 :
235 0 : s = PyBytes_AsString(string);
236 :
237 0 : for (f = format; *f; f++) {
238 0 : if (*f == '%') {
239 0 : const char* p = f++;
240 : Py_ssize_t i;
241 0 : int longflag = 0;
242 0 : int size_tflag = 0;
243 : /* parse the width.precision part (we're only
244 : interested in the precision value, if any) */
245 0 : n = 0;
246 0 : while (Py_ISDIGIT(*f))
247 0 : n = (n*10) + *f++ - '0';
248 0 : if (*f == '.') {
249 0 : f++;
250 0 : n = 0;
251 0 : while (Py_ISDIGIT(*f))
252 0 : n = (n*10) + *f++ - '0';
253 : }
254 0 : while (*f && *f != '%' && !Py_ISALPHA(*f))
255 0 : f++;
256 : /* handle the long flag, but only for %ld and %lu.
257 : others can be added when necessary. */
258 0 : if (*f == 'l' && (f[1] == 'd' || f[1] == 'u')) {
259 0 : longflag = 1;
260 0 : ++f;
261 : }
262 : /* handle the size_t flag. */
263 0 : if (*f == 'z' && (f[1] == 'd' || f[1] == 'u')) {
264 0 : size_tflag = 1;
265 0 : ++f;
266 : }
267 :
268 0 : switch (*f) {
269 : case 'c':
270 0 : *s++ = va_arg(vargs, int);
271 0 : break;
272 : case 'd':
273 0 : if (longflag)
274 0 : sprintf(s, "%ld", va_arg(vargs, long));
275 0 : else if (size_tflag)
276 0 : sprintf(s, "%" PY_FORMAT_SIZE_T "d",
277 : va_arg(vargs, Py_ssize_t));
278 : else
279 0 : sprintf(s, "%d", va_arg(vargs, int));
280 0 : s += strlen(s);
281 0 : break;
282 : case 'u':
283 0 : if (longflag)
284 0 : sprintf(s, "%lu",
285 : va_arg(vargs, unsigned long));
286 0 : else if (size_tflag)
287 0 : sprintf(s, "%" PY_FORMAT_SIZE_T "u",
288 : va_arg(vargs, size_t));
289 : else
290 0 : sprintf(s, "%u",
291 : va_arg(vargs, unsigned int));
292 0 : s += strlen(s);
293 0 : break;
294 : case 'i':
295 0 : sprintf(s, "%i", va_arg(vargs, int));
296 0 : s += strlen(s);
297 0 : break;
298 : case 'x':
299 0 : sprintf(s, "%x", va_arg(vargs, int));
300 0 : s += strlen(s);
301 0 : break;
302 : case 's':
303 0 : p = va_arg(vargs, char*);
304 0 : i = strlen(p);
305 0 : if (n > 0 && i > n)
306 0 : i = n;
307 0 : Py_MEMCPY(s, p, i);
308 0 : s += i;
309 0 : break;
310 : case 'p':
311 0 : sprintf(s, "%p", va_arg(vargs, void*));
312 : /* %p is ill-defined: ensure leading 0x. */
313 0 : if (s[1] == 'X')
314 0 : s[1] = 'x';
315 0 : else if (s[1] != 'x') {
316 0 : memmove(s+2, s, strlen(s)+1);
317 0 : s[0] = '0';
318 0 : s[1] = 'x';
319 : }
320 0 : s += strlen(s);
321 0 : break;
322 : case '%':
323 0 : *s++ = '%';
324 0 : break;
325 : default:
326 0 : strcpy(s, p);
327 0 : s += strlen(s);
328 0 : goto end;
329 : }
330 : } else
331 0 : *s++ = *f;
332 : }
333 :
334 : end:
335 0 : _PyBytes_Resize(&string, s - PyBytes_AS_STRING(string));
336 0 : return string;
337 : }
338 :
339 : PyObject *
340 0 : PyBytes_FromFormat(const char *format, ...)
341 : {
342 : PyObject* ret;
343 : va_list vargs;
344 :
345 : #ifdef HAVE_STDARG_PROTOTYPES
346 0 : va_start(vargs, format);
347 : #else
348 : va_start(vargs);
349 : #endif
350 0 : ret = PyBytes_FromFormatV(format, vargs);
351 0 : va_end(vargs);
352 0 : return ret;
353 : }
354 :
355 : static void
356 1224 : bytes_dealloc(PyObject *op)
357 : {
358 1224 : Py_TYPE(op)->tp_free(op);
359 1224 : }
360 :
361 : /* Unescape a backslash-escaped string. If unicode is non-zero,
362 : the string is a u-literal. If recode_encoding is non-zero,
363 : the string is UTF-8 encoded and should be re-encoded in the
364 : specified encoding. */
365 :
366 0 : PyObject *PyBytes_DecodeEscape(const char *s,
367 : Py_ssize_t len,
368 : const char *errors,
369 : Py_ssize_t unicode,
370 : const char *recode_encoding)
371 : {
372 : int c;
373 : char *p, *buf;
374 : const char *end;
375 : PyObject *v;
376 0 : Py_ssize_t newlen = recode_encoding ? 4*len:len;
377 0 : v = PyBytes_FromStringAndSize((char *)NULL, newlen);
378 0 : if (v == NULL)
379 0 : return NULL;
380 0 : p = buf = PyBytes_AsString(v);
381 0 : end = s + len;
382 0 : while (s < end) {
383 0 : if (*s != '\\') {
384 : non_esc:
385 0 : if (recode_encoding && (*s & 0x80)) {
386 : PyObject *u, *w;
387 : char *r;
388 : const char* t;
389 : Py_ssize_t rn;
390 0 : t = s;
391 : /* Decode non-ASCII bytes as UTF-8. */
392 0 : while (t < end && (*t & 0x80)) t++;
393 0 : u = PyUnicode_DecodeUTF8(s, t - s, errors);
394 0 : if(!u) goto failed;
395 :
396 : /* Recode them in target encoding. */
397 0 : w = PyUnicode_AsEncodedString(
398 : u, recode_encoding, errors);
399 0 : Py_DECREF(u);
400 0 : if (!w) goto failed;
401 :
402 : /* Append bytes to output buffer. */
403 : assert(PyBytes_Check(w));
404 0 : r = PyBytes_AS_STRING(w);
405 0 : rn = PyBytes_GET_SIZE(w);
406 0 : Py_MEMCPY(p, r, rn);
407 0 : p += rn;
408 0 : Py_DECREF(w);
409 0 : s = t;
410 : } else {
411 0 : *p++ = *s++;
412 : }
413 0 : continue;
414 : }
415 0 : s++;
416 0 : if (s==end) {
417 0 : PyErr_SetString(PyExc_ValueError,
418 : "Trailing \\ in string");
419 0 : goto failed;
420 : }
421 0 : switch (*s++) {
422 : /* XXX This assumes ASCII! */
423 0 : case '\n': break;
424 0 : case '\\': *p++ = '\\'; break;
425 0 : case '\'': *p++ = '\''; break;
426 0 : case '\"': *p++ = '\"'; break;
427 0 : case 'b': *p++ = '\b'; break;
428 0 : case 'f': *p++ = '\014'; break; /* FF */
429 0 : case 't': *p++ = '\t'; break;
430 0 : case 'n': *p++ = '\n'; break;
431 0 : case 'r': *p++ = '\r'; break;
432 0 : case 'v': *p++ = '\013'; break; /* VT */
433 0 : case 'a': *p++ = '\007'; break; /* BEL, not classic C */
434 : case '0': case '1': case '2': case '3':
435 : case '4': case '5': case '6': case '7':
436 0 : c = s[-1] - '0';
437 0 : if (s < end && '0' <= *s && *s <= '7') {
438 0 : c = (c<<3) + *s++ - '0';
439 0 : if (s < end && '0' <= *s && *s <= '7')
440 0 : c = (c<<3) + *s++ - '0';
441 : }
442 0 : *p++ = c;
443 0 : break;
444 : case 'x':
445 0 : if (s+1 < end && Py_ISXDIGIT(s[0]) && Py_ISXDIGIT(s[1])) {
446 0 : unsigned int x = 0;
447 0 : c = Py_CHARMASK(*s);
448 0 : s++;
449 0 : if (Py_ISDIGIT(c))
450 0 : x = c - '0';
451 0 : else if (Py_ISLOWER(c))
452 0 : x = 10 + c - 'a';
453 : else
454 0 : x = 10 + c - 'A';
455 0 : x = x << 4;
456 0 : c = Py_CHARMASK(*s);
457 0 : s++;
458 0 : if (Py_ISDIGIT(c))
459 0 : x += c - '0';
460 0 : else if (Py_ISLOWER(c))
461 0 : x += 10 + c - 'a';
462 : else
463 0 : x += 10 + c - 'A';
464 0 : *p++ = x;
465 0 : break;
466 : }
467 0 : if (!errors || strcmp(errors, "strict") == 0) {
468 0 : PyErr_SetString(PyExc_ValueError,
469 : "invalid \\x escape");
470 0 : goto failed;
471 : }
472 0 : if (strcmp(errors, "replace") == 0) {
473 0 : *p++ = '?';
474 0 : } else if (strcmp(errors, "ignore") == 0)
475 : /* do nothing */;
476 : else {
477 0 : PyErr_Format(PyExc_ValueError,
478 : "decoding error; unknown "
479 : "error handling code: %.400s",
480 : errors);
481 0 : goto failed;
482 : }
483 : default:
484 0 : *p++ = '\\';
485 0 : s--;
486 0 : goto non_esc; /* an arbitrary number of unescaped
487 : UTF-8 bytes may follow. */
488 : }
489 : }
490 0 : if (p-buf < newlen)
491 0 : _PyBytes_Resize(&v, p - buf);
492 0 : return v;
493 : failed:
494 0 : Py_DECREF(v);
495 0 : return NULL;
496 : }
497 :
498 : /* -------------------------------------------------------------------- */
499 : /* object api */
500 :
501 : Py_ssize_t
502 1275 : PyBytes_Size(register PyObject *op)
503 : {
504 1275 : if (!PyBytes_Check(op)) {
505 0 : PyErr_Format(PyExc_TypeError,
506 0 : "expected bytes, %.200s found", Py_TYPE(op)->tp_name);
507 0 : return -1;
508 : }
509 1275 : return Py_SIZE(op);
510 : }
511 :
512 : char *
513 1352 : PyBytes_AsString(register PyObject *op)
514 : {
515 1352 : if (!PyBytes_Check(op)) {
516 0 : PyErr_Format(PyExc_TypeError,
517 0 : "expected bytes, %.200s found", Py_TYPE(op)->tp_name);
518 0 : return NULL;
519 : }
520 1352 : return ((PyBytesObject *)op)->ob_sval;
521 : }
522 :
523 : int
524 0 : PyBytes_AsStringAndSize(register PyObject *obj,
525 : register char **s,
526 : register Py_ssize_t *len)
527 : {
528 0 : if (s == NULL) {
529 0 : PyErr_BadInternalCall();
530 0 : return -1;
531 : }
532 :
533 0 : if (!PyBytes_Check(obj)) {
534 0 : PyErr_Format(PyExc_TypeError,
535 0 : "expected bytes, %.200s found", Py_TYPE(obj)->tp_name);
536 0 : return -1;
537 : }
538 :
539 0 : *s = PyBytes_AS_STRING(obj);
540 0 : if (len != NULL)
541 0 : *len = PyBytes_GET_SIZE(obj);
542 0 : else if (strlen(*s) != (size_t)PyBytes_GET_SIZE(obj)) {
543 0 : PyErr_SetString(PyExc_TypeError,
544 : "expected bytes with no null");
545 0 : return -1;
546 : }
547 0 : return 0;
548 : }
549 :
550 : /* -------------------------------------------------------------------- */
551 : /* Methods */
552 :
553 : #include "stringlib/stringdefs.h"
554 :
555 : #include "stringlib/fastsearch.h"
556 : #include "stringlib/count.h"
557 : #include "stringlib/find.h"
558 : #include "stringlib/partition.h"
559 : #include "stringlib/split.h"
560 : #include "stringlib/ctype.h"
561 :
562 : #include "stringlib/transmogrify.h"
563 :
564 : PyObject *
565 0 : PyBytes_Repr(PyObject *obj, int smartquotes)
566 : {
567 0 : register PyBytesObject* op = (PyBytesObject*) obj;
568 0 : Py_ssize_t i, length = Py_SIZE(op);
569 : size_t newsize, squotes, dquotes;
570 : PyObject *v;
571 : unsigned char quote, *s, *p;
572 :
573 : /* Compute size of output string */
574 0 : squotes = dquotes = 0;
575 0 : newsize = 3; /* b'' */
576 0 : s = (unsigned char*)op->ob_sval;
577 0 : for (i = 0; i < length; i++) {
578 0 : switch(s[i]) {
579 0 : case '\'': squotes++; newsize++; break;
580 0 : case '"': dquotes++; newsize++; break;
581 : case '\\': case '\t': case '\n': case '\r':
582 0 : newsize += 2; break; /* \C */
583 : default:
584 0 : if (s[i] < ' ' || s[i] >= 0x7f)
585 0 : newsize += 4; /* \xHH */
586 : else
587 0 : newsize++;
588 : }
589 : }
590 0 : quote = '\'';
591 0 : if (smartquotes && squotes && !dquotes)
592 0 : quote = '"';
593 0 : if (squotes && quote == '\'')
594 0 : newsize += squotes;
595 :
596 0 : if (newsize > (PY_SSIZE_T_MAX - sizeof(PyUnicodeObject) - 1)) {
597 0 : PyErr_SetString(PyExc_OverflowError,
598 : "bytes object is too large to make repr");
599 0 : return NULL;
600 : }
601 :
602 0 : v = PyUnicode_New(newsize, 127);
603 0 : if (v == NULL) {
604 0 : return NULL;
605 : }
606 0 : p = PyUnicode_1BYTE_DATA(v);
607 :
608 0 : *p++ = 'b', *p++ = quote;
609 0 : for (i = 0; i < length; i++) {
610 0 : unsigned char c = op->ob_sval[i];
611 0 : if (c == quote || c == '\\')
612 0 : *p++ = '\\', *p++ = c;
613 0 : else if (c == '\t')
614 0 : *p++ = '\\', *p++ = 't';
615 0 : else if (c == '\n')
616 0 : *p++ = '\\', *p++ = 'n';
617 0 : else if (c == '\r')
618 0 : *p++ = '\\', *p++ = 'r';
619 0 : else if (c < ' ' || c >= 0x7f) {
620 0 : *p++ = '\\';
621 0 : *p++ = 'x';
622 0 : *p++ = Py_hexdigits[(c & 0xf0) >> 4];
623 0 : *p++ = Py_hexdigits[c & 0xf];
624 : }
625 : else
626 0 : *p++ = c;
627 : }
628 0 : *p++ = quote;
629 : assert(_PyUnicode_CheckConsistency(v, 1));
630 0 : return v;
631 : }
632 :
633 : static PyObject *
634 0 : bytes_repr(PyObject *op)
635 : {
636 0 : return PyBytes_Repr(op, 1);
637 : }
638 :
639 : static PyObject *
640 0 : bytes_str(PyObject *op)
641 : {
642 0 : if (Py_BytesWarningFlag) {
643 0 : if (PyErr_WarnEx(PyExc_BytesWarning,
644 : "str() on a bytes instance", 1))
645 0 : return NULL;
646 : }
647 0 : return bytes_repr(op);
648 : }
649 :
650 : static Py_ssize_t
651 90 : bytes_length(PyBytesObject *a)
652 : {
653 90 : return Py_SIZE(a);
654 : }
655 :
656 : /* This is also used by PyBytes_Concat() */
657 : static PyObject *
658 1 : bytes_concat(PyObject *a, PyObject *b)
659 : {
660 : Py_ssize_t size;
661 : Py_buffer va, vb;
662 1 : PyObject *result = NULL;
663 :
664 1 : va.len = -1;
665 1 : vb.len = -1;
666 2 : if (_getbuffer(a, &va) < 0 ||
667 1 : _getbuffer(b, &vb) < 0) {
668 0 : PyErr_Format(PyExc_TypeError, "can't concat %.100s to %.100s",
669 0 : Py_TYPE(a)->tp_name, Py_TYPE(b)->tp_name);
670 0 : goto done;
671 : }
672 :
673 : /* Optimize end cases */
674 1 : if (va.len == 0 && PyBytes_CheckExact(b)) {
675 1 : result = b;
676 1 : Py_INCREF(result);
677 1 : goto done;
678 : }
679 0 : if (vb.len == 0 && PyBytes_CheckExact(a)) {
680 0 : result = a;
681 0 : Py_INCREF(result);
682 0 : goto done;
683 : }
684 :
685 0 : size = va.len + vb.len;
686 0 : if (size < 0) {
687 0 : PyErr_NoMemory();
688 0 : goto done;
689 : }
690 :
691 0 : result = PyBytes_FromStringAndSize(NULL, size);
692 0 : if (result != NULL) {
693 0 : memcpy(PyBytes_AS_STRING(result), va.buf, va.len);
694 0 : memcpy(PyBytes_AS_STRING(result) + va.len, vb.buf, vb.len);
695 : }
696 :
697 : done:
698 1 : if (va.len != -1)
699 1 : PyBuffer_Release(&va);
700 1 : if (vb.len != -1)
701 1 : PyBuffer_Release(&vb);
702 1 : return result;
703 : }
704 :
705 : static PyObject *
706 0 : bytes_repeat(register PyBytesObject *a, register Py_ssize_t n)
707 : {
708 : register Py_ssize_t i;
709 : register Py_ssize_t j;
710 : register Py_ssize_t size;
711 : register PyBytesObject *op;
712 : size_t nbytes;
713 0 : if (n < 0)
714 0 : n = 0;
715 : /* watch out for overflows: the size can overflow int,
716 : * and the # of bytes needed can overflow size_t
717 : */
718 0 : if (n > 0 && Py_SIZE(a) > PY_SSIZE_T_MAX / n) {
719 0 : PyErr_SetString(PyExc_OverflowError,
720 : "repeated bytes are too long");
721 0 : return NULL;
722 : }
723 0 : size = Py_SIZE(a) * n;
724 0 : if (size == Py_SIZE(a) && PyBytes_CheckExact(a)) {
725 0 : Py_INCREF(a);
726 0 : return (PyObject *)a;
727 : }
728 0 : nbytes = (size_t)size;
729 0 : if (nbytes + PyBytesObject_SIZE <= nbytes) {
730 0 : PyErr_SetString(PyExc_OverflowError,
731 : "repeated bytes are too long");
732 0 : return NULL;
733 : }
734 0 : op = (PyBytesObject *)PyObject_MALLOC(PyBytesObject_SIZE + nbytes);
735 0 : if (op == NULL)
736 0 : return PyErr_NoMemory();
737 0 : PyObject_INIT_VAR(op, &PyBytes_Type, size);
738 0 : op->ob_shash = -1;
739 0 : op->ob_sval[size] = '\0';
740 0 : if (Py_SIZE(a) == 1 && n > 0) {
741 0 : memset(op->ob_sval, a->ob_sval[0] , n);
742 0 : return (PyObject *) op;
743 : }
744 0 : i = 0;
745 0 : if (i < size) {
746 0 : Py_MEMCPY(op->ob_sval, a->ob_sval, Py_SIZE(a));
747 0 : i = Py_SIZE(a);
748 : }
749 0 : while (i < size) {
750 0 : j = (i <= size-i) ? i : size-i;
751 0 : Py_MEMCPY(op->ob_sval+i, op->ob_sval, j);
752 0 : i += j;
753 : }
754 0 : return (PyObject *) op;
755 : }
756 :
757 : static int
758 0 : bytes_contains(PyObject *self, PyObject *arg)
759 : {
760 0 : Py_ssize_t ival = PyNumber_AsSsize_t(arg, PyExc_ValueError);
761 0 : if (ival == -1 && PyErr_Occurred()) {
762 : Py_buffer varg;
763 : Py_ssize_t pos;
764 0 : PyErr_Clear();
765 0 : if (_getbuffer(arg, &varg) < 0)
766 0 : return -1;
767 0 : pos = stringlib_find(PyBytes_AS_STRING(self), Py_SIZE(self),
768 0 : varg.buf, varg.len, 0);
769 0 : PyBuffer_Release(&varg);
770 0 : return pos >= 0;
771 : }
772 0 : if (ival < 0 || ival >= 256) {
773 0 : PyErr_SetString(PyExc_ValueError, "byte must be in range(0, 256)");
774 0 : return -1;
775 : }
776 :
777 0 : return memchr(PyBytes_AS_STRING(self), (int) ival, Py_SIZE(self)) != NULL;
778 : }
779 :
780 : static PyObject *
781 0 : bytes_item(PyBytesObject *a, register Py_ssize_t i)
782 : {
783 0 : if (i < 0 || i >= Py_SIZE(a)) {
784 0 : PyErr_SetString(PyExc_IndexError, "index out of range");
785 0 : return NULL;
786 : }
787 0 : return PyLong_FromLong((unsigned char)a->ob_sval[i]);
788 : }
789 :
790 : static PyObject*
791 47 : bytes_richcompare(PyBytesObject *a, PyBytesObject *b, int op)
792 : {
793 : int c;
794 : Py_ssize_t len_a, len_b;
795 : Py_ssize_t min_len;
796 : PyObject *result;
797 :
798 : /* Make sure both arguments are strings. */
799 47 : if (!(PyBytes_Check(a) && PyBytes_Check(b))) {
800 0 : if (Py_BytesWarningFlag && (op == Py_EQ || op == Py_NE) &&
801 0 : (PyObject_IsInstance((PyObject*)a,
802 0 : (PyObject*)&PyUnicode_Type) ||
803 0 : PyObject_IsInstance((PyObject*)b,
804 : (PyObject*)&PyUnicode_Type))) {
805 0 : if (PyErr_WarnEx(PyExc_BytesWarning,
806 : "Comparison between bytes and string", 1))
807 0 : return NULL;
808 : }
809 0 : result = Py_NotImplemented;
810 0 : goto out;
811 : }
812 47 : if (a == b) {
813 0 : switch (op) {
814 : case Py_EQ:case Py_LE:case Py_GE:
815 0 : result = Py_True;
816 0 : goto out;
817 : case Py_NE:case Py_LT:case Py_GT:
818 0 : result = Py_False;
819 0 : goto out;
820 : }
821 : }
822 47 : if (op == Py_EQ) {
823 : /* Supporting Py_NE here as well does not save
824 : much time, since Py_NE is rarely used. */
825 2 : if (Py_SIZE(a) == Py_SIZE(b)
826 2 : && (a->ob_sval[0] == b->ob_sval[0]
827 2 : && memcmp(a->ob_sval, b->ob_sval, Py_SIZE(a)) == 0)) {
828 2 : result = Py_True;
829 : } else {
830 0 : result = Py_False;
831 : }
832 2 : goto out;
833 : }
834 45 : len_a = Py_SIZE(a); len_b = Py_SIZE(b);
835 45 : min_len = (len_a < len_b) ? len_a : len_b;
836 45 : if (min_len > 0) {
837 45 : c = Py_CHARMASK(*a->ob_sval) - Py_CHARMASK(*b->ob_sval);
838 45 : if (c==0)
839 45 : c = memcmp(a->ob_sval, b->ob_sval, min_len);
840 : } else
841 0 : c = 0;
842 45 : if (c == 0)
843 45 : c = (len_a < len_b) ? -1 : (len_a > len_b) ? 1 : 0;
844 45 : switch (op) {
845 0 : case Py_LT: c = c < 0; break;
846 0 : case Py_LE: c = c <= 0; break;
847 0 : case Py_EQ: assert(0); break; /* unreachable */
848 45 : case Py_NE: c = c != 0; break;
849 0 : case Py_GT: c = c > 0; break;
850 0 : case Py_GE: c = c >= 0; break;
851 : default:
852 0 : result = Py_NotImplemented;
853 0 : goto out;
854 : }
855 45 : result = c ? Py_True : Py_False;
856 : out:
857 47 : Py_INCREF(result);
858 47 : return result;
859 : }
860 :
861 : static Py_hash_t
862 1598 : bytes_hash(PyBytesObject *a)
863 : {
864 1598 : if (a->ob_shash == -1) {
865 : /* Can't fail */
866 777 : a->ob_shash = _Py_HashBytes((unsigned char *) a->ob_sval, Py_SIZE(a));
867 : }
868 1598 : return a->ob_shash;
869 : }
870 :
871 : static PyObject*
872 541 : bytes_subscript(PyBytesObject* self, PyObject* item)
873 : {
874 541 : if (PyIndex_Check(item)) {
875 360 : Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
876 360 : if (i == -1 && PyErr_Occurred())
877 0 : return NULL;
878 360 : if (i < 0)
879 0 : i += PyBytes_GET_SIZE(self);
880 360 : if (i < 0 || i >= PyBytes_GET_SIZE(self)) {
881 0 : PyErr_SetString(PyExc_IndexError,
882 : "index out of range");
883 0 : return NULL;
884 : }
885 360 : return PyLong_FromLong((unsigned char)self->ob_sval[i]);
886 : }
887 181 : else if (PySlice_Check(item)) {
888 : Py_ssize_t start, stop, step, slicelength, cur, i;
889 : char* source_buf;
890 : char* result_buf;
891 : PyObject* result;
892 :
893 181 : if (PySlice_GetIndicesEx(item,
894 : PyBytes_GET_SIZE(self),
895 : &start, &stop, &step, &slicelength) < 0) {
896 0 : return NULL;
897 : }
898 :
899 181 : if (slicelength <= 0) {
900 1 : return PyBytes_FromStringAndSize("", 0);
901 : }
902 225 : else if (start == 0 && step == 1 &&
903 45 : slicelength == PyBytes_GET_SIZE(self) &&
904 0 : PyBytes_CheckExact(self)) {
905 0 : Py_INCREF(self);
906 0 : return (PyObject *)self;
907 : }
908 180 : else if (step == 1) {
909 360 : return PyBytes_FromStringAndSize(
910 180 : PyBytes_AS_STRING(self) + start,
911 : slicelength);
912 : }
913 : else {
914 0 : source_buf = PyBytes_AS_STRING(self);
915 0 : result = PyBytes_FromStringAndSize(NULL, slicelength);
916 0 : if (result == NULL)
917 0 : return NULL;
918 :
919 0 : result_buf = PyBytes_AS_STRING(result);
920 0 : for (cur = start, i = 0; i < slicelength;
921 0 : cur += step, i++) {
922 0 : result_buf[i] = source_buf[cur];
923 : }
924 :
925 0 : return result;
926 : }
927 : }
928 : else {
929 0 : PyErr_Format(PyExc_TypeError,
930 : "byte indices must be integers, not %.200s",
931 0 : Py_TYPE(item)->tp_name);
932 0 : return NULL;
933 : }
934 : }
935 :
936 : static int
937 1432 : bytes_buffer_getbuffer(PyBytesObject *self, Py_buffer *view, int flags)
938 : {
939 1432 : return PyBuffer_FillInfo(view, (PyObject*)self, (void *)self->ob_sval, Py_SIZE(self),
940 : 1, flags);
941 : }
942 :
943 : static PySequenceMethods bytes_as_sequence = {
944 : (lenfunc)bytes_length, /*sq_length*/
945 : (binaryfunc)bytes_concat, /*sq_concat*/
946 : (ssizeargfunc)bytes_repeat, /*sq_repeat*/
947 : (ssizeargfunc)bytes_item, /*sq_item*/
948 : 0, /*sq_slice*/
949 : 0, /*sq_ass_item*/
950 : 0, /*sq_ass_slice*/
951 : (objobjproc)bytes_contains /*sq_contains*/
952 : };
953 :
954 : static PyMappingMethods bytes_as_mapping = {
955 : (lenfunc)bytes_length,
956 : (binaryfunc)bytes_subscript,
957 : 0,
958 : };
959 :
960 : static PyBufferProcs bytes_as_buffer = {
961 : (getbufferproc)bytes_buffer_getbuffer,
962 : NULL,
963 : };
964 :
965 :
966 : #define LEFTSTRIP 0
967 : #define RIGHTSTRIP 1
968 : #define BOTHSTRIP 2
969 :
970 : /* Arrays indexed by above */
971 : static const char *stripformat[] = {"|O:lstrip", "|O:rstrip", "|O:strip"};
972 :
973 : #define STRIPNAME(i) (stripformat[i]+3)
974 :
975 : PyDoc_STRVAR(split__doc__,
976 : "B.split(sep=None, maxsplit=-1) -> list of bytes\n\
977 : \n\
978 : Return a list of the sections in B, using sep as the delimiter.\n\
979 : If sep is not specified or is None, B is split on ASCII whitespace\n\
980 : characters (space, tab, return, newline, formfeed, vertical tab).\n\
981 : If maxsplit is given, at most maxsplit splits are done.");
982 :
983 : static PyObject *
984 0 : bytes_split(PyBytesObject *self, PyObject *args, PyObject *kwds)
985 : {
986 : static char *kwlist[] = {"sep", "maxsplit", 0};
987 0 : Py_ssize_t len = PyBytes_GET_SIZE(self), n;
988 0 : Py_ssize_t maxsplit = -1;
989 0 : const char *s = PyBytes_AS_STRING(self), *sub;
990 : Py_buffer vsub;
991 0 : PyObject *list, *subobj = Py_None;
992 :
993 0 : if (!PyArg_ParseTupleAndKeywords(args, kwds, "|On:split",
994 : kwlist, &subobj, &maxsplit))
995 0 : return NULL;
996 0 : if (maxsplit < 0)
997 0 : maxsplit = PY_SSIZE_T_MAX;
998 0 : if (subobj == Py_None)
999 0 : return stringlib_split_whitespace((PyObject*) self, s, len, maxsplit);
1000 0 : if (_getbuffer(subobj, &vsub) < 0)
1001 0 : return NULL;
1002 0 : sub = vsub.buf;
1003 0 : n = vsub.len;
1004 :
1005 0 : list = stringlib_split((PyObject*) self, s, len, sub, n, maxsplit);
1006 0 : PyBuffer_Release(&vsub);
1007 0 : return list;
1008 : }
1009 :
1010 : PyDoc_STRVAR(partition__doc__,
1011 : "B.partition(sep) -> (head, sep, tail)\n\
1012 : \n\
1013 : Search for the separator sep in B, and return the part before it,\n\
1014 : the separator itself, and the part after it. If the separator is not\n\
1015 : found, returns B and two empty bytes objects.");
1016 :
1017 : static PyObject *
1018 0 : bytes_partition(PyBytesObject *self, PyObject *sep_obj)
1019 : {
1020 : const char *sep;
1021 : Py_ssize_t sep_len;
1022 :
1023 0 : if (PyBytes_Check(sep_obj)) {
1024 0 : sep = PyBytes_AS_STRING(sep_obj);
1025 0 : sep_len = PyBytes_GET_SIZE(sep_obj);
1026 : }
1027 0 : else if (PyObject_AsCharBuffer(sep_obj, &sep, &sep_len))
1028 0 : return NULL;
1029 :
1030 0 : return stringlib_partition(
1031 : (PyObject*) self,
1032 0 : PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
1033 : sep_obj, sep, sep_len
1034 : );
1035 : }
1036 :
1037 : PyDoc_STRVAR(rpartition__doc__,
1038 : "B.rpartition(sep) -> (head, sep, tail)\n\
1039 : \n\
1040 : Search for the separator sep in B, starting at the end of B,\n\
1041 : and return the part before it, the separator itself, and the\n\
1042 : part after it. If the separator is not found, returns two empty\n\
1043 : bytes objects and B.");
1044 :
1045 : static PyObject *
1046 0 : bytes_rpartition(PyBytesObject *self, PyObject *sep_obj)
1047 : {
1048 : const char *sep;
1049 : Py_ssize_t sep_len;
1050 :
1051 0 : if (PyBytes_Check(sep_obj)) {
1052 0 : sep = PyBytes_AS_STRING(sep_obj);
1053 0 : sep_len = PyBytes_GET_SIZE(sep_obj);
1054 : }
1055 0 : else if (PyObject_AsCharBuffer(sep_obj, &sep, &sep_len))
1056 0 : return NULL;
1057 :
1058 0 : return stringlib_rpartition(
1059 : (PyObject*) self,
1060 0 : PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
1061 : sep_obj, sep, sep_len
1062 : );
1063 : }
1064 :
1065 : PyDoc_STRVAR(rsplit__doc__,
1066 : "B.rsplit(sep=None, maxsplit=-1) -> list of bytes\n\
1067 : \n\
1068 : Return a list of the sections in B, using sep as the delimiter,\n\
1069 : starting at the end of B and working to the front.\n\
1070 : If sep is not given, B is split on ASCII whitespace characters\n\
1071 : (space, tab, return, newline, formfeed, vertical tab).\n\
1072 : If maxsplit is given, at most maxsplit splits are done.");
1073 :
1074 :
1075 : static PyObject *
1076 0 : bytes_rsplit(PyBytesObject *self, PyObject *args, PyObject *kwds)
1077 : {
1078 : static char *kwlist[] = {"sep", "maxsplit", 0};
1079 0 : Py_ssize_t len = PyBytes_GET_SIZE(self), n;
1080 0 : Py_ssize_t maxsplit = -1;
1081 0 : const char *s = PyBytes_AS_STRING(self), *sub;
1082 : Py_buffer vsub;
1083 0 : PyObject *list, *subobj = Py_None;
1084 :
1085 0 : if (!PyArg_ParseTupleAndKeywords(args, kwds, "|On:rsplit",
1086 : kwlist, &subobj, &maxsplit))
1087 0 : return NULL;
1088 0 : if (maxsplit < 0)
1089 0 : maxsplit = PY_SSIZE_T_MAX;
1090 0 : if (subobj == Py_None)
1091 0 : return stringlib_rsplit_whitespace((PyObject*) self, s, len, maxsplit);
1092 0 : if (_getbuffer(subobj, &vsub) < 0)
1093 0 : return NULL;
1094 0 : sub = vsub.buf;
1095 0 : n = vsub.len;
1096 :
1097 0 : list = stringlib_rsplit((PyObject*) self, s, len, sub, n, maxsplit);
1098 0 : PyBuffer_Release(&vsub);
1099 0 : return list;
1100 : }
1101 :
1102 :
1103 : PyDoc_STRVAR(join__doc__,
1104 : "B.join(iterable_of_bytes) -> bytes\n\
1105 : \n\
1106 : Concatenate any number of bytes objects, with B in between each pair.\n\
1107 : Example: b'.'.join([b'ab', b'pq', b'rs']) -> b'ab.pq.rs'.");
1108 :
1109 : static PyObject *
1110 0 : bytes_join(PyObject *self, PyObject *orig)
1111 : {
1112 0 : char *sep = PyBytes_AS_STRING(self);
1113 0 : const Py_ssize_t seplen = PyBytes_GET_SIZE(self);
1114 0 : PyObject *res = NULL;
1115 : char *p;
1116 0 : Py_ssize_t seqlen = 0;
1117 0 : size_t sz = 0;
1118 : Py_ssize_t i;
1119 : PyObject *seq, *item;
1120 :
1121 0 : seq = PySequence_Fast(orig, "");
1122 0 : if (seq == NULL) {
1123 0 : return NULL;
1124 : }
1125 :
1126 0 : seqlen = PySequence_Size(seq);
1127 0 : if (seqlen == 0) {
1128 0 : Py_DECREF(seq);
1129 0 : return PyBytes_FromString("");
1130 : }
1131 0 : if (seqlen == 1) {
1132 0 : item = PySequence_Fast_GET_ITEM(seq, 0);
1133 0 : if (PyBytes_CheckExact(item)) {
1134 0 : Py_INCREF(item);
1135 0 : Py_DECREF(seq);
1136 0 : return item;
1137 : }
1138 : }
1139 :
1140 : /* There are at least two things to join, or else we have a subclass
1141 : * of the builtin types in the sequence.
1142 : * Do a pre-pass to figure out the total amount of space we'll
1143 : * need (sz), and see whether all argument are bytes.
1144 : */
1145 : /* XXX Shouldn't we use _getbuffer() on these items instead? */
1146 0 : for (i = 0; i < seqlen; i++) {
1147 0 : const size_t old_sz = sz;
1148 0 : item = PySequence_Fast_GET_ITEM(seq, i);
1149 0 : if (!PyBytes_Check(item) && !PyByteArray_Check(item)) {
1150 0 : PyErr_Format(PyExc_TypeError,
1151 : "sequence item %zd: expected bytes,"
1152 : " %.80s found",
1153 0 : i, Py_TYPE(item)->tp_name);
1154 0 : Py_DECREF(seq);
1155 0 : return NULL;
1156 : }
1157 0 : sz += Py_SIZE(item);
1158 0 : if (i != 0)
1159 0 : sz += seplen;
1160 0 : if (sz < old_sz || sz > PY_SSIZE_T_MAX) {
1161 0 : PyErr_SetString(PyExc_OverflowError,
1162 : "join() result is too long for bytes");
1163 0 : Py_DECREF(seq);
1164 0 : return NULL;
1165 : }
1166 : }
1167 :
1168 : /* Allocate result space. */
1169 0 : res = PyBytes_FromStringAndSize((char*)NULL, sz);
1170 0 : if (res == NULL) {
1171 0 : Py_DECREF(seq);
1172 0 : return NULL;
1173 : }
1174 :
1175 : /* Catenate everything. */
1176 : /* I'm not worried about a PyByteArray item growing because there's
1177 : nowhere in this function where we release the GIL. */
1178 0 : p = PyBytes_AS_STRING(res);
1179 0 : for (i = 0; i < seqlen; ++i) {
1180 : size_t n;
1181 : char *q;
1182 0 : if (i) {
1183 0 : Py_MEMCPY(p, sep, seplen);
1184 0 : p += seplen;
1185 : }
1186 0 : item = PySequence_Fast_GET_ITEM(seq, i);
1187 0 : n = Py_SIZE(item);
1188 0 : if (PyBytes_Check(item))
1189 0 : q = PyBytes_AS_STRING(item);
1190 : else
1191 0 : q = PyByteArray_AS_STRING(item);
1192 0 : Py_MEMCPY(p, q, n);
1193 0 : p += n;
1194 : }
1195 :
1196 0 : Py_DECREF(seq);
1197 0 : return res;
1198 : }
1199 :
1200 : PyObject *
1201 0 : _PyBytes_Join(PyObject *sep, PyObject *x)
1202 : {
1203 : assert(sep != NULL && PyBytes_Check(sep));
1204 : assert(x != NULL);
1205 0 : return bytes_join(sep, x);
1206 : }
1207 :
1208 : /* helper macro to fixup start/end slice values */
1209 : #define ADJUST_INDICES(start, end, len) \
1210 : if (end > len) \
1211 : end = len; \
1212 : else if (end < 0) { \
1213 : end += len; \
1214 : if (end < 0) \
1215 : end = 0; \
1216 : } \
1217 : if (start < 0) { \
1218 : start += len; \
1219 : if (start < 0) \
1220 : start = 0; \
1221 : }
1222 :
1223 : Py_LOCAL_INLINE(Py_ssize_t)
1224 0 : bytes_find_internal(PyBytesObject *self, PyObject *args, int dir)
1225 : {
1226 : PyObject *subobj;
1227 : char byte;
1228 : Py_buffer subbuf;
1229 : const char *sub;
1230 : Py_ssize_t sub_len;
1231 0 : Py_ssize_t start=0, end=PY_SSIZE_T_MAX;
1232 : Py_ssize_t res;
1233 :
1234 0 : if (!stringlib_parse_args_finds_byte("find/rfind/index/rindex",
1235 : args, &subobj, &byte, &start, &end))
1236 0 : return -2;
1237 :
1238 0 : if (subobj) {
1239 0 : if (_getbuffer(subobj, &subbuf) < 0)
1240 0 : return -2;
1241 :
1242 0 : sub = subbuf.buf;
1243 0 : sub_len = subbuf.len;
1244 : }
1245 : else {
1246 0 : sub = &byte;
1247 0 : sub_len = 1;
1248 : }
1249 :
1250 0 : if (dir > 0)
1251 0 : res = stringlib_find_slice(
1252 0 : PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
1253 : sub, sub_len, start, end);
1254 : else
1255 0 : res = stringlib_rfind_slice(
1256 0 : PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
1257 : sub, sub_len, start, end);
1258 :
1259 0 : if (subobj)
1260 0 : PyBuffer_Release(&subbuf);
1261 :
1262 0 : return res;
1263 : }
1264 :
1265 :
1266 : PyDoc_STRVAR(find__doc__,
1267 : "B.find(sub[, start[, end]]) -> int\n\
1268 : \n\
1269 : Return the lowest index in B where substring sub is found,\n\
1270 : such that sub is contained within B[start:end]. Optional\n\
1271 : arguments start and end are interpreted as in slice notation.\n\
1272 : \n\
1273 : Return -1 on failure.");
1274 :
1275 : static PyObject *
1276 0 : bytes_find(PyBytesObject *self, PyObject *args)
1277 : {
1278 0 : Py_ssize_t result = bytes_find_internal(self, args, +1);
1279 0 : if (result == -2)
1280 0 : return NULL;
1281 0 : return PyLong_FromSsize_t(result);
1282 : }
1283 :
1284 :
1285 : PyDoc_STRVAR(index__doc__,
1286 : "B.index(sub[, start[, end]]) -> int\n\
1287 : \n\
1288 : Like B.find() but raise ValueError when the substring is not found.");
1289 :
1290 : static PyObject *
1291 0 : bytes_index(PyBytesObject *self, PyObject *args)
1292 : {
1293 0 : Py_ssize_t result = bytes_find_internal(self, args, +1);
1294 0 : if (result == -2)
1295 0 : return NULL;
1296 0 : if (result == -1) {
1297 0 : PyErr_SetString(PyExc_ValueError,
1298 : "substring not found");
1299 0 : return NULL;
1300 : }
1301 0 : return PyLong_FromSsize_t(result);
1302 : }
1303 :
1304 :
1305 : PyDoc_STRVAR(rfind__doc__,
1306 : "B.rfind(sub[, start[, end]]) -> int\n\
1307 : \n\
1308 : Return the highest index in B where substring sub is found,\n\
1309 : such that sub is contained within B[start:end]. Optional\n\
1310 : arguments start and end are interpreted as in slice notation.\n\
1311 : \n\
1312 : Return -1 on failure.");
1313 :
1314 : static PyObject *
1315 0 : bytes_rfind(PyBytesObject *self, PyObject *args)
1316 : {
1317 0 : Py_ssize_t result = bytes_find_internal(self, args, -1);
1318 0 : if (result == -2)
1319 0 : return NULL;
1320 0 : return PyLong_FromSsize_t(result);
1321 : }
1322 :
1323 :
1324 : PyDoc_STRVAR(rindex__doc__,
1325 : "B.rindex(sub[, start[, end]]) -> int\n\
1326 : \n\
1327 : Like B.rfind() but raise ValueError when the substring is not found.");
1328 :
1329 : static PyObject *
1330 0 : bytes_rindex(PyBytesObject *self, PyObject *args)
1331 : {
1332 0 : Py_ssize_t result = bytes_find_internal(self, args, -1);
1333 0 : if (result == -2)
1334 0 : return NULL;
1335 0 : if (result == -1) {
1336 0 : PyErr_SetString(PyExc_ValueError,
1337 : "substring not found");
1338 0 : return NULL;
1339 : }
1340 0 : return PyLong_FromSsize_t(result);
1341 : }
1342 :
1343 :
1344 : Py_LOCAL_INLINE(PyObject *)
1345 0 : do_xstrip(PyBytesObject *self, int striptype, PyObject *sepobj)
1346 : {
1347 : Py_buffer vsep;
1348 0 : char *s = PyBytes_AS_STRING(self);
1349 0 : Py_ssize_t len = PyBytes_GET_SIZE(self);
1350 : char *sep;
1351 : Py_ssize_t seplen;
1352 : Py_ssize_t i, j;
1353 :
1354 0 : if (_getbuffer(sepobj, &vsep) < 0)
1355 0 : return NULL;
1356 0 : sep = vsep.buf;
1357 0 : seplen = vsep.len;
1358 :
1359 0 : i = 0;
1360 0 : if (striptype != RIGHTSTRIP) {
1361 0 : while (i < len && memchr(sep, Py_CHARMASK(s[i]), seplen)) {
1362 0 : i++;
1363 : }
1364 : }
1365 :
1366 0 : j = len;
1367 0 : if (striptype != LEFTSTRIP) {
1368 : do {
1369 0 : j--;
1370 0 : } while (j >= i && memchr(sep, Py_CHARMASK(s[j]), seplen));
1371 0 : j++;
1372 : }
1373 :
1374 0 : PyBuffer_Release(&vsep);
1375 :
1376 0 : if (i == 0 && j == len && PyBytes_CheckExact(self)) {
1377 0 : Py_INCREF(self);
1378 0 : return (PyObject*)self;
1379 : }
1380 : else
1381 0 : return PyBytes_FromStringAndSize(s+i, j-i);
1382 : }
1383 :
1384 :
1385 : Py_LOCAL_INLINE(PyObject *)
1386 0 : do_strip(PyBytesObject *self, int striptype)
1387 : {
1388 0 : char *s = PyBytes_AS_STRING(self);
1389 0 : Py_ssize_t len = PyBytes_GET_SIZE(self), i, j;
1390 :
1391 0 : i = 0;
1392 0 : if (striptype != RIGHTSTRIP) {
1393 0 : while (i < len && Py_ISSPACE(s[i])) {
1394 0 : i++;
1395 : }
1396 : }
1397 :
1398 0 : j = len;
1399 0 : if (striptype != LEFTSTRIP) {
1400 : do {
1401 0 : j--;
1402 0 : } while (j >= i && Py_ISSPACE(s[j]));
1403 0 : j++;
1404 : }
1405 :
1406 0 : if (i == 0 && j == len && PyBytes_CheckExact(self)) {
1407 0 : Py_INCREF(self);
1408 0 : return (PyObject*)self;
1409 : }
1410 : else
1411 0 : return PyBytes_FromStringAndSize(s+i, j-i);
1412 : }
1413 :
1414 :
1415 : Py_LOCAL_INLINE(PyObject *)
1416 0 : do_argstrip(PyBytesObject *self, int striptype, PyObject *args)
1417 : {
1418 0 : PyObject *sep = NULL;
1419 :
1420 0 : if (!PyArg_ParseTuple(args, (char *)stripformat[striptype], &sep))
1421 0 : return NULL;
1422 :
1423 0 : if (sep != NULL && sep != Py_None) {
1424 0 : return do_xstrip(self, striptype, sep);
1425 : }
1426 0 : return do_strip(self, striptype);
1427 : }
1428 :
1429 :
1430 : PyDoc_STRVAR(strip__doc__,
1431 : "B.strip([bytes]) -> bytes\n\
1432 : \n\
1433 : Strip leading and trailing bytes contained in the argument.\n\
1434 : If the argument is omitted, strip leading and trailing ASCII whitespace.");
1435 : static PyObject *
1436 0 : bytes_strip(PyBytesObject *self, PyObject *args)
1437 : {
1438 0 : if (PyTuple_GET_SIZE(args) == 0)
1439 0 : return do_strip(self, BOTHSTRIP); /* Common case */
1440 : else
1441 0 : return do_argstrip(self, BOTHSTRIP, args);
1442 : }
1443 :
1444 :
1445 : PyDoc_STRVAR(lstrip__doc__,
1446 : "B.lstrip([bytes]) -> bytes\n\
1447 : \n\
1448 : Strip leading bytes contained in the argument.\n\
1449 : If the argument is omitted, strip leading ASCII whitespace.");
1450 : static PyObject *
1451 0 : bytes_lstrip(PyBytesObject *self, PyObject *args)
1452 : {
1453 0 : if (PyTuple_GET_SIZE(args) == 0)
1454 0 : return do_strip(self, LEFTSTRIP); /* Common case */
1455 : else
1456 0 : return do_argstrip(self, LEFTSTRIP, args);
1457 : }
1458 :
1459 :
1460 : PyDoc_STRVAR(rstrip__doc__,
1461 : "B.rstrip([bytes]) -> bytes\n\
1462 : \n\
1463 : Strip trailing bytes contained in the argument.\n\
1464 : If the argument is omitted, strip trailing ASCII whitespace.");
1465 : static PyObject *
1466 0 : bytes_rstrip(PyBytesObject *self, PyObject *args)
1467 : {
1468 0 : if (PyTuple_GET_SIZE(args) == 0)
1469 0 : return do_strip(self, RIGHTSTRIP); /* Common case */
1470 : else
1471 0 : return do_argstrip(self, RIGHTSTRIP, args);
1472 : }
1473 :
1474 :
1475 : PyDoc_STRVAR(count__doc__,
1476 : "B.count(sub[, start[, end]]) -> int\n\
1477 : \n\
1478 : Return the number of non-overlapping occurrences of substring sub in\n\
1479 : string B[start:end]. Optional arguments start and end are interpreted\n\
1480 : as in slice notation.");
1481 :
1482 : static PyObject *
1483 0 : bytes_count(PyBytesObject *self, PyObject *args)
1484 : {
1485 : PyObject *sub_obj;
1486 0 : const char *str = PyBytes_AS_STRING(self), *sub;
1487 : Py_ssize_t sub_len;
1488 : char byte;
1489 0 : Py_ssize_t start = 0, end = PY_SSIZE_T_MAX;
1490 :
1491 : Py_buffer vsub;
1492 : PyObject *count_obj;
1493 :
1494 0 : if (!stringlib_parse_args_finds_byte("count", args, &sub_obj, &byte,
1495 : &start, &end))
1496 0 : return NULL;
1497 :
1498 0 : if (sub_obj) {
1499 0 : if (_getbuffer(sub_obj, &vsub) < 0)
1500 0 : return NULL;
1501 :
1502 0 : sub = vsub.buf;
1503 0 : sub_len = vsub.len;
1504 : }
1505 : else {
1506 0 : sub = &byte;
1507 0 : sub_len = 1;
1508 : }
1509 :
1510 0 : ADJUST_INDICES(start, end, PyBytes_GET_SIZE(self));
1511 :
1512 0 : count_obj = PyLong_FromSsize_t(
1513 : stringlib_count(str + start, end - start, sub, sub_len, PY_SSIZE_T_MAX)
1514 : );
1515 :
1516 0 : if (sub_obj)
1517 0 : PyBuffer_Release(&vsub);
1518 :
1519 0 : return count_obj;
1520 : }
1521 :
1522 :
1523 : PyDoc_STRVAR(translate__doc__,
1524 : "B.translate(table[, deletechars]) -> bytes\n\
1525 : \n\
1526 : Return a copy of B, where all characters occurring in the\n\
1527 : optional argument deletechars are removed, and the remaining\n\
1528 : characters have been mapped through the given translation\n\
1529 : table, which must be a bytes object of length 256.");
1530 :
1531 : static PyObject *
1532 0 : bytes_translate(PyBytesObject *self, PyObject *args)
1533 : {
1534 : register char *input, *output;
1535 : const char *table;
1536 0 : register Py_ssize_t i, c, changed = 0;
1537 0 : PyObject *input_obj = (PyObject*)self;
1538 0 : const char *output_start, *del_table=NULL;
1539 0 : Py_ssize_t inlen, tablen, dellen = 0;
1540 : PyObject *result;
1541 : int trans_table[256];
1542 0 : PyObject *tableobj, *delobj = NULL;
1543 :
1544 0 : if (!PyArg_UnpackTuple(args, "translate", 1, 2,
1545 : &tableobj, &delobj))
1546 0 : return NULL;
1547 :
1548 0 : if (PyBytes_Check(tableobj)) {
1549 0 : table = PyBytes_AS_STRING(tableobj);
1550 0 : tablen = PyBytes_GET_SIZE(tableobj);
1551 : }
1552 0 : else if (tableobj == Py_None) {
1553 0 : table = NULL;
1554 0 : tablen = 256;
1555 : }
1556 0 : else if (PyObject_AsCharBuffer(tableobj, &table, &tablen))
1557 0 : return NULL;
1558 :
1559 0 : if (tablen != 256) {
1560 0 : PyErr_SetString(PyExc_ValueError,
1561 : "translation table must be 256 characters long");
1562 0 : return NULL;
1563 : }
1564 :
1565 0 : if (delobj != NULL) {
1566 0 : if (PyBytes_Check(delobj)) {
1567 0 : del_table = PyBytes_AS_STRING(delobj);
1568 0 : dellen = PyBytes_GET_SIZE(delobj);
1569 : }
1570 0 : else if (PyObject_AsCharBuffer(delobj, &del_table, &dellen))
1571 0 : return NULL;
1572 : }
1573 : else {
1574 0 : del_table = NULL;
1575 0 : dellen = 0;
1576 : }
1577 :
1578 0 : inlen = PyBytes_GET_SIZE(input_obj);
1579 0 : result = PyBytes_FromStringAndSize((char *)NULL, inlen);
1580 0 : if (result == NULL)
1581 0 : return NULL;
1582 0 : output_start = output = PyBytes_AsString(result);
1583 0 : input = PyBytes_AS_STRING(input_obj);
1584 :
1585 0 : if (dellen == 0 && table != NULL) {
1586 : /* If no deletions are required, use faster code */
1587 0 : for (i = inlen; --i >= 0; ) {
1588 0 : c = Py_CHARMASK(*input++);
1589 0 : if (Py_CHARMASK((*output++ = table[c])) != c)
1590 0 : changed = 1;
1591 : }
1592 0 : if (changed || !PyBytes_CheckExact(input_obj))
1593 0 : return result;
1594 0 : Py_DECREF(result);
1595 0 : Py_INCREF(input_obj);
1596 0 : return input_obj;
1597 : }
1598 :
1599 0 : if (table == NULL) {
1600 0 : for (i = 0; i < 256; i++)
1601 0 : trans_table[i] = Py_CHARMASK(i);
1602 : } else {
1603 0 : for (i = 0; i < 256; i++)
1604 0 : trans_table[i] = Py_CHARMASK(table[i]);
1605 : }
1606 :
1607 0 : for (i = 0; i < dellen; i++)
1608 0 : trans_table[(int) Py_CHARMASK(del_table[i])] = -1;
1609 :
1610 0 : for (i = inlen; --i >= 0; ) {
1611 0 : c = Py_CHARMASK(*input++);
1612 0 : if (trans_table[c] != -1)
1613 0 : if (Py_CHARMASK(*output++ = (char)trans_table[c]) == c)
1614 0 : continue;
1615 0 : changed = 1;
1616 : }
1617 0 : if (!changed && PyBytes_CheckExact(input_obj)) {
1618 0 : Py_DECREF(result);
1619 0 : Py_INCREF(input_obj);
1620 0 : return input_obj;
1621 : }
1622 : /* Fix the size of the resulting string */
1623 0 : if (inlen > 0)
1624 0 : _PyBytes_Resize(&result, output - output_start);
1625 0 : return result;
1626 : }
1627 :
1628 :
1629 : static PyObject *
1630 0 : bytes_maketrans(PyObject *null, PyObject *args)
1631 : {
1632 0 : return _Py_bytes_maketrans(args);
1633 : }
1634 :
1635 : /* find and count characters and substrings */
1636 :
1637 : #define findchar(target, target_len, c) \
1638 : ((char *)memchr((const void *)(target), c, target_len))
1639 :
1640 : /* String ops must return a string. */
1641 : /* If the object is subclass of string, create a copy */
1642 : Py_LOCAL(PyBytesObject *)
1643 0 : return_self(PyBytesObject *self)
1644 : {
1645 0 : if (PyBytes_CheckExact(self)) {
1646 0 : Py_INCREF(self);
1647 0 : return self;
1648 : }
1649 0 : return (PyBytesObject *)PyBytes_FromStringAndSize(
1650 0 : PyBytes_AS_STRING(self),
1651 : PyBytes_GET_SIZE(self));
1652 : }
1653 :
1654 : Py_LOCAL_INLINE(Py_ssize_t)
1655 0 : countchar(const char *target, Py_ssize_t target_len, char c, Py_ssize_t maxcount)
1656 : {
1657 0 : Py_ssize_t count=0;
1658 0 : const char *start=target;
1659 0 : const char *end=target+target_len;
1660 :
1661 0 : while ( (start=findchar(start, end-start, c)) != NULL ) {
1662 0 : count++;
1663 0 : if (count >= maxcount)
1664 0 : break;
1665 0 : start += 1;
1666 : }
1667 0 : return count;
1668 : }
1669 :
1670 :
1671 : /* Algorithms for different cases of string replacement */
1672 :
1673 : /* len(self)>=1, from="", len(to)>=1, maxcount>=1 */
1674 : Py_LOCAL(PyBytesObject *)
1675 0 : replace_interleave(PyBytesObject *self,
1676 : const char *to_s, Py_ssize_t to_len,
1677 : Py_ssize_t maxcount)
1678 : {
1679 : char *self_s, *result_s;
1680 : Py_ssize_t self_len, result_len;
1681 : Py_ssize_t count, i;
1682 : PyBytesObject *result;
1683 :
1684 0 : self_len = PyBytes_GET_SIZE(self);
1685 :
1686 : /* 1 at the end plus 1 after every character;
1687 : count = min(maxcount, self_len + 1) */
1688 0 : if (maxcount <= self_len)
1689 0 : count = maxcount;
1690 : else
1691 : /* Can't overflow: self_len + 1 <= maxcount <= PY_SSIZE_T_MAX. */
1692 0 : count = self_len + 1;
1693 :
1694 : /* Check for overflow */
1695 : /* result_len = count * to_len + self_len; */
1696 : assert(count > 0);
1697 0 : if (to_len > (PY_SSIZE_T_MAX - self_len) / count) {
1698 0 : PyErr_SetString(PyExc_OverflowError,
1699 : "replacement bytes are too long");
1700 0 : return NULL;
1701 : }
1702 0 : result_len = count * to_len + self_len;
1703 :
1704 0 : if (! (result = (PyBytesObject *)
1705 : PyBytes_FromStringAndSize(NULL, result_len)) )
1706 0 : return NULL;
1707 :
1708 0 : self_s = PyBytes_AS_STRING(self);
1709 0 : result_s = PyBytes_AS_STRING(result);
1710 :
1711 : /* TODO: special case single character, which doesn't need memcpy */
1712 :
1713 : /* Lay the first one down (guaranteed this will occur) */
1714 0 : Py_MEMCPY(result_s, to_s, to_len);
1715 0 : result_s += to_len;
1716 0 : count -= 1;
1717 :
1718 0 : for (i=0; i<count; i++) {
1719 0 : *result_s++ = *self_s++;
1720 0 : Py_MEMCPY(result_s, to_s, to_len);
1721 0 : result_s += to_len;
1722 : }
1723 :
1724 : /* Copy the rest of the original string */
1725 0 : Py_MEMCPY(result_s, self_s, self_len-i);
1726 :
1727 0 : return result;
1728 : }
1729 :
1730 : /* Special case for deleting a single character */
1731 : /* len(self)>=1, len(from)==1, to="", maxcount>=1 */
1732 : Py_LOCAL(PyBytesObject *)
1733 0 : replace_delete_single_character(PyBytesObject *self,
1734 : char from_c, Py_ssize_t maxcount)
1735 : {
1736 : char *self_s, *result_s;
1737 : char *start, *next, *end;
1738 : Py_ssize_t self_len, result_len;
1739 : Py_ssize_t count;
1740 : PyBytesObject *result;
1741 :
1742 0 : self_len = PyBytes_GET_SIZE(self);
1743 0 : self_s = PyBytes_AS_STRING(self);
1744 :
1745 0 : count = countchar(self_s, self_len, from_c, maxcount);
1746 0 : if (count == 0) {
1747 0 : return return_self(self);
1748 : }
1749 :
1750 0 : result_len = self_len - count; /* from_len == 1 */
1751 : assert(result_len>=0);
1752 :
1753 0 : if ( (result = (PyBytesObject *)
1754 : PyBytes_FromStringAndSize(NULL, result_len)) == NULL)
1755 0 : return NULL;
1756 0 : result_s = PyBytes_AS_STRING(result);
1757 :
1758 0 : start = self_s;
1759 0 : end = self_s + self_len;
1760 0 : while (count-- > 0) {
1761 0 : next = findchar(start, end-start, from_c);
1762 0 : if (next == NULL)
1763 0 : break;
1764 0 : Py_MEMCPY(result_s, start, next-start);
1765 0 : result_s += (next-start);
1766 0 : start = next+1;
1767 : }
1768 0 : Py_MEMCPY(result_s, start, end-start);
1769 :
1770 0 : return result;
1771 : }
1772 :
1773 : /* len(self)>=1, len(from)>=2, to="", maxcount>=1 */
1774 :
1775 : Py_LOCAL(PyBytesObject *)
1776 0 : replace_delete_substring(PyBytesObject *self,
1777 : const char *from_s, Py_ssize_t from_len,
1778 : Py_ssize_t maxcount) {
1779 : char *self_s, *result_s;
1780 : char *start, *next, *end;
1781 : Py_ssize_t self_len, result_len;
1782 : Py_ssize_t count, offset;
1783 : PyBytesObject *result;
1784 :
1785 0 : self_len = PyBytes_GET_SIZE(self);
1786 0 : self_s = PyBytes_AS_STRING(self);
1787 :
1788 0 : count = stringlib_count(self_s, self_len,
1789 : from_s, from_len,
1790 : maxcount);
1791 :
1792 0 : if (count == 0) {
1793 : /* no matches */
1794 0 : return return_self(self);
1795 : }
1796 :
1797 0 : result_len = self_len - (count * from_len);
1798 : assert (result_len>=0);
1799 :
1800 0 : if ( (result = (PyBytesObject *)
1801 : PyBytes_FromStringAndSize(NULL, result_len)) == NULL )
1802 0 : return NULL;
1803 :
1804 0 : result_s = PyBytes_AS_STRING(result);
1805 :
1806 0 : start = self_s;
1807 0 : end = self_s + self_len;
1808 0 : while (count-- > 0) {
1809 0 : offset = stringlib_find(start, end-start,
1810 : from_s, from_len,
1811 : 0);
1812 0 : if (offset == -1)
1813 0 : break;
1814 0 : next = start + offset;
1815 :
1816 0 : Py_MEMCPY(result_s, start, next-start);
1817 :
1818 0 : result_s += (next-start);
1819 0 : start = next+from_len;
1820 : }
1821 0 : Py_MEMCPY(result_s, start, end-start);
1822 0 : return result;
1823 : }
1824 :
1825 : /* len(self)>=1, len(from)==len(to)==1, maxcount>=1 */
1826 : Py_LOCAL(PyBytesObject *)
1827 0 : replace_single_character_in_place(PyBytesObject *self,
1828 : char from_c, char to_c,
1829 : Py_ssize_t maxcount)
1830 : {
1831 : char *self_s, *result_s, *start, *end, *next;
1832 : Py_ssize_t self_len;
1833 : PyBytesObject *result;
1834 :
1835 : /* The result string will be the same size */
1836 0 : self_s = PyBytes_AS_STRING(self);
1837 0 : self_len = PyBytes_GET_SIZE(self);
1838 :
1839 0 : next = findchar(self_s, self_len, from_c);
1840 :
1841 0 : if (next == NULL) {
1842 : /* No matches; return the original string */
1843 0 : return return_self(self);
1844 : }
1845 :
1846 : /* Need to make a new string */
1847 0 : result = (PyBytesObject *) PyBytes_FromStringAndSize(NULL, self_len);
1848 0 : if (result == NULL)
1849 0 : return NULL;
1850 0 : result_s = PyBytes_AS_STRING(result);
1851 0 : Py_MEMCPY(result_s, self_s, self_len);
1852 :
1853 : /* change everything in-place, starting with this one */
1854 0 : start = result_s + (next-self_s);
1855 0 : *start = to_c;
1856 0 : start++;
1857 0 : end = result_s + self_len;
1858 :
1859 0 : while (--maxcount > 0) {
1860 0 : next = findchar(start, end-start, from_c);
1861 0 : if (next == NULL)
1862 0 : break;
1863 0 : *next = to_c;
1864 0 : start = next+1;
1865 : }
1866 :
1867 0 : return result;
1868 : }
1869 :
1870 : /* len(self)>=1, len(from)==len(to)>=2, maxcount>=1 */
1871 : Py_LOCAL(PyBytesObject *)
1872 0 : replace_substring_in_place(PyBytesObject *self,
1873 : const char *from_s, Py_ssize_t from_len,
1874 : const char *to_s, Py_ssize_t to_len,
1875 : Py_ssize_t maxcount)
1876 : {
1877 : char *result_s, *start, *end;
1878 : char *self_s;
1879 : Py_ssize_t self_len, offset;
1880 : PyBytesObject *result;
1881 :
1882 : /* The result string will be the same size */
1883 :
1884 0 : self_s = PyBytes_AS_STRING(self);
1885 0 : self_len = PyBytes_GET_SIZE(self);
1886 :
1887 0 : offset = stringlib_find(self_s, self_len,
1888 : from_s, from_len,
1889 : 0);
1890 0 : if (offset == -1) {
1891 : /* No matches; return the original string */
1892 0 : return return_self(self);
1893 : }
1894 :
1895 : /* Need to make a new string */
1896 0 : result = (PyBytesObject *) PyBytes_FromStringAndSize(NULL, self_len);
1897 0 : if (result == NULL)
1898 0 : return NULL;
1899 0 : result_s = PyBytes_AS_STRING(result);
1900 0 : Py_MEMCPY(result_s, self_s, self_len);
1901 :
1902 : /* change everything in-place, starting with this one */
1903 0 : start = result_s + offset;
1904 0 : Py_MEMCPY(start, to_s, from_len);
1905 0 : start += from_len;
1906 0 : end = result_s + self_len;
1907 :
1908 0 : while ( --maxcount > 0) {
1909 0 : offset = stringlib_find(start, end-start,
1910 : from_s, from_len,
1911 : 0);
1912 0 : if (offset==-1)
1913 0 : break;
1914 0 : Py_MEMCPY(start+offset, to_s, from_len);
1915 0 : start += offset+from_len;
1916 : }
1917 :
1918 0 : return result;
1919 : }
1920 :
1921 : /* len(self)>=1, len(from)==1, len(to)>=2, maxcount>=1 */
1922 : Py_LOCAL(PyBytesObject *)
1923 0 : replace_single_character(PyBytesObject *self,
1924 : char from_c,
1925 : const char *to_s, Py_ssize_t to_len,
1926 : Py_ssize_t maxcount)
1927 : {
1928 : char *self_s, *result_s;
1929 : char *start, *next, *end;
1930 : Py_ssize_t self_len, result_len;
1931 : Py_ssize_t count;
1932 : PyBytesObject *result;
1933 :
1934 0 : self_s = PyBytes_AS_STRING(self);
1935 0 : self_len = PyBytes_GET_SIZE(self);
1936 :
1937 0 : count = countchar(self_s, self_len, from_c, maxcount);
1938 0 : if (count == 0) {
1939 : /* no matches, return unchanged */
1940 0 : return return_self(self);
1941 : }
1942 :
1943 : /* use the difference between current and new, hence the "-1" */
1944 : /* result_len = self_len + count * (to_len-1) */
1945 : assert(count > 0);
1946 0 : if (to_len - 1 > (PY_SSIZE_T_MAX - self_len) / count) {
1947 0 : PyErr_SetString(PyExc_OverflowError,
1948 : "replacement bytes are too long");
1949 0 : return NULL;
1950 : }
1951 0 : result_len = self_len + count * (to_len - 1);
1952 :
1953 0 : if ( (result = (PyBytesObject *)
1954 : PyBytes_FromStringAndSize(NULL, result_len)) == NULL)
1955 0 : return NULL;
1956 0 : result_s = PyBytes_AS_STRING(result);
1957 :
1958 0 : start = self_s;
1959 0 : end = self_s + self_len;
1960 0 : while (count-- > 0) {
1961 0 : next = findchar(start, end-start, from_c);
1962 0 : if (next == NULL)
1963 0 : break;
1964 :
1965 0 : if (next == start) {
1966 : /* replace with the 'to' */
1967 0 : Py_MEMCPY(result_s, to_s, to_len);
1968 0 : result_s += to_len;
1969 0 : start += 1;
1970 : } else {
1971 : /* copy the unchanged old then the 'to' */
1972 0 : Py_MEMCPY(result_s, start, next-start);
1973 0 : result_s += (next-start);
1974 0 : Py_MEMCPY(result_s, to_s, to_len);
1975 0 : result_s += to_len;
1976 0 : start = next+1;
1977 : }
1978 : }
1979 : /* Copy the remainder of the remaining string */
1980 0 : Py_MEMCPY(result_s, start, end-start);
1981 :
1982 0 : return result;
1983 : }
1984 :
1985 : /* len(self)>=1, len(from)>=2, len(to)>=2, maxcount>=1 */
1986 : Py_LOCAL(PyBytesObject *)
1987 0 : replace_substring(PyBytesObject *self,
1988 : const char *from_s, Py_ssize_t from_len,
1989 : const char *to_s, Py_ssize_t to_len,
1990 : Py_ssize_t maxcount) {
1991 : char *self_s, *result_s;
1992 : char *start, *next, *end;
1993 : Py_ssize_t self_len, result_len;
1994 : Py_ssize_t count, offset;
1995 : PyBytesObject *result;
1996 :
1997 0 : self_s = PyBytes_AS_STRING(self);
1998 0 : self_len = PyBytes_GET_SIZE(self);
1999 :
2000 0 : count = stringlib_count(self_s, self_len,
2001 : from_s, from_len,
2002 : maxcount);
2003 :
2004 0 : if (count == 0) {
2005 : /* no matches, return unchanged */
2006 0 : return return_self(self);
2007 : }
2008 :
2009 : /* Check for overflow */
2010 : /* result_len = self_len + count * (to_len-from_len) */
2011 : assert(count > 0);
2012 0 : if (to_len - from_len > (PY_SSIZE_T_MAX - self_len) / count) {
2013 0 : PyErr_SetString(PyExc_OverflowError,
2014 : "replacement bytes are too long");
2015 0 : return NULL;
2016 : }
2017 0 : result_len = self_len + count * (to_len-from_len);
2018 :
2019 0 : if ( (result = (PyBytesObject *)
2020 : PyBytes_FromStringAndSize(NULL, result_len)) == NULL)
2021 0 : return NULL;
2022 0 : result_s = PyBytes_AS_STRING(result);
2023 :
2024 0 : start = self_s;
2025 0 : end = self_s + self_len;
2026 0 : while (count-- > 0) {
2027 0 : offset = stringlib_find(start, end-start,
2028 : from_s, from_len,
2029 : 0);
2030 0 : if (offset == -1)
2031 0 : break;
2032 0 : next = start+offset;
2033 0 : if (next == start) {
2034 : /* replace with the 'to' */
2035 0 : Py_MEMCPY(result_s, to_s, to_len);
2036 0 : result_s += to_len;
2037 0 : start += from_len;
2038 : } else {
2039 : /* copy the unchanged old then the 'to' */
2040 0 : Py_MEMCPY(result_s, start, next-start);
2041 0 : result_s += (next-start);
2042 0 : Py_MEMCPY(result_s, to_s, to_len);
2043 0 : result_s += to_len;
2044 0 : start = next+from_len;
2045 : }
2046 : }
2047 : /* Copy the remainder of the remaining string */
2048 0 : Py_MEMCPY(result_s, start, end-start);
2049 :
2050 0 : return result;
2051 : }
2052 :
2053 :
2054 : Py_LOCAL(PyBytesObject *)
2055 0 : replace(PyBytesObject *self,
2056 : const char *from_s, Py_ssize_t from_len,
2057 : const char *to_s, Py_ssize_t to_len,
2058 : Py_ssize_t maxcount)
2059 : {
2060 0 : if (maxcount < 0) {
2061 0 : maxcount = PY_SSIZE_T_MAX;
2062 0 : } else if (maxcount == 0 || PyBytes_GET_SIZE(self) == 0) {
2063 : /* nothing to do; return the original string */
2064 0 : return return_self(self);
2065 : }
2066 :
2067 0 : if (maxcount == 0 ||
2068 0 : (from_len == 0 && to_len == 0)) {
2069 : /* nothing to do; return the original string */
2070 0 : return return_self(self);
2071 : }
2072 :
2073 : /* Handle zero-length special cases */
2074 :
2075 0 : if (from_len == 0) {
2076 : /* insert the 'to' string everywhere. */
2077 : /* >>> "Python".replace("", ".") */
2078 : /* '.P.y.t.h.o.n.' */
2079 0 : return replace_interleave(self, to_s, to_len, maxcount);
2080 : }
2081 :
2082 : /* Except for "".replace("", "A") == "A" there is no way beyond this */
2083 : /* point for an empty self string to generate a non-empty string */
2084 : /* Special case so the remaining code always gets a non-empty string */
2085 0 : if (PyBytes_GET_SIZE(self) == 0) {
2086 0 : return return_self(self);
2087 : }
2088 :
2089 0 : if (to_len == 0) {
2090 : /* delete all occurrences of 'from' string */
2091 0 : if (from_len == 1) {
2092 0 : return replace_delete_single_character(
2093 0 : self, from_s[0], maxcount);
2094 : } else {
2095 0 : return replace_delete_substring(self, from_s,
2096 : from_len, maxcount);
2097 : }
2098 : }
2099 :
2100 : /* Handle special case where both strings have the same length */
2101 :
2102 0 : if (from_len == to_len) {
2103 0 : if (from_len == 1) {
2104 0 : return replace_single_character_in_place(
2105 : self,
2106 0 : from_s[0],
2107 0 : to_s[0],
2108 : maxcount);
2109 : } else {
2110 0 : return replace_substring_in_place(
2111 : self, from_s, from_len, to_s, to_len,
2112 : maxcount);
2113 : }
2114 : }
2115 :
2116 : /* Otherwise use the more generic algorithms */
2117 0 : if (from_len == 1) {
2118 0 : return replace_single_character(self, from_s[0],
2119 : to_s, to_len, maxcount);
2120 : } else {
2121 : /* len('from')>=2, len('to')>=1 */
2122 0 : return replace_substring(self, from_s, from_len, to_s, to_len,
2123 : maxcount);
2124 : }
2125 : }
2126 :
2127 : PyDoc_STRVAR(replace__doc__,
2128 : "B.replace(old, new[, count]) -> bytes\n\
2129 : \n\
2130 : Return a copy of B with all occurrences of subsection\n\
2131 : old replaced by new. If the optional argument count is\n\
2132 : given, only first count occurances are replaced.");
2133 :
2134 : static PyObject *
2135 0 : bytes_replace(PyBytesObject *self, PyObject *args)
2136 : {
2137 0 : Py_ssize_t count = -1;
2138 : PyObject *from, *to;
2139 : const char *from_s, *to_s;
2140 : Py_ssize_t from_len, to_len;
2141 :
2142 0 : if (!PyArg_ParseTuple(args, "OO|n:replace", &from, &to, &count))
2143 0 : return NULL;
2144 :
2145 0 : if (PyBytes_Check(from)) {
2146 0 : from_s = PyBytes_AS_STRING(from);
2147 0 : from_len = PyBytes_GET_SIZE(from);
2148 : }
2149 0 : else if (PyObject_AsCharBuffer(from, &from_s, &from_len))
2150 0 : return NULL;
2151 :
2152 0 : if (PyBytes_Check(to)) {
2153 0 : to_s = PyBytes_AS_STRING(to);
2154 0 : to_len = PyBytes_GET_SIZE(to);
2155 : }
2156 0 : else if (PyObject_AsCharBuffer(to, &to_s, &to_len))
2157 0 : return NULL;
2158 :
2159 0 : return (PyObject *)replace((PyBytesObject *) self,
2160 : from_s, from_len,
2161 : to_s, to_len, count);
2162 : }
2163 :
2164 : /** End DALKE **/
2165 :
2166 : /* Matches the end (direction >= 0) or start (direction < 0) of self
2167 : * against substr, using the start and end arguments. Returns
2168 : * -1 on error, 0 if not found and 1 if found.
2169 : */
2170 : Py_LOCAL(int)
2171 0 : _bytes_tailmatch(PyBytesObject *self, PyObject *substr, Py_ssize_t start,
2172 : Py_ssize_t end, int direction)
2173 : {
2174 0 : Py_ssize_t len = PyBytes_GET_SIZE(self);
2175 : Py_ssize_t slen;
2176 : const char* sub;
2177 : const char* str;
2178 :
2179 0 : if (PyBytes_Check(substr)) {
2180 0 : sub = PyBytes_AS_STRING(substr);
2181 0 : slen = PyBytes_GET_SIZE(substr);
2182 : }
2183 0 : else if (PyObject_AsCharBuffer(substr, &sub, &slen))
2184 0 : return -1;
2185 0 : str = PyBytes_AS_STRING(self);
2186 :
2187 0 : ADJUST_INDICES(start, end, len);
2188 :
2189 0 : if (direction < 0) {
2190 : /* startswith */
2191 0 : if (start+slen > len)
2192 0 : return 0;
2193 : } else {
2194 : /* endswith */
2195 0 : if (end-start < slen || start > len)
2196 0 : return 0;
2197 :
2198 0 : if (end-slen > start)
2199 0 : start = end - slen;
2200 : }
2201 0 : if (end-start >= slen)
2202 0 : return ! memcmp(str+start, sub, slen);
2203 0 : return 0;
2204 : }
2205 :
2206 :
2207 : PyDoc_STRVAR(startswith__doc__,
2208 : "B.startswith(prefix[, start[, end]]) -> bool\n\
2209 : \n\
2210 : Return True if B starts with the specified prefix, False otherwise.\n\
2211 : With optional start, test B beginning at that position.\n\
2212 : With optional end, stop comparing B at that position.\n\
2213 : prefix can also be a tuple of bytes to try.");
2214 :
2215 : static PyObject *
2216 0 : bytes_startswith(PyBytesObject *self, PyObject *args)
2217 : {
2218 0 : Py_ssize_t start = 0;
2219 0 : Py_ssize_t end = PY_SSIZE_T_MAX;
2220 : PyObject *subobj;
2221 : int result;
2222 :
2223 0 : if (!stringlib_parse_args_finds("startswith", args, &subobj, &start, &end))
2224 0 : return NULL;
2225 0 : if (PyTuple_Check(subobj)) {
2226 : Py_ssize_t i;
2227 0 : for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {
2228 0 : result = _bytes_tailmatch(self,
2229 0 : PyTuple_GET_ITEM(subobj, i),
2230 : start, end, -1);
2231 0 : if (result == -1)
2232 0 : return NULL;
2233 0 : else if (result) {
2234 0 : Py_RETURN_TRUE;
2235 : }
2236 : }
2237 0 : Py_RETURN_FALSE;
2238 : }
2239 0 : result = _bytes_tailmatch(self, subobj, start, end, -1);
2240 0 : if (result == -1) {
2241 0 : if (PyErr_ExceptionMatches(PyExc_TypeError))
2242 0 : PyErr_Format(PyExc_TypeError, "startswith first arg must be bytes "
2243 0 : "or a tuple of bytes, not %s", Py_TYPE(subobj)->tp_name);
2244 0 : return NULL;
2245 : }
2246 : else
2247 0 : return PyBool_FromLong(result);
2248 : }
2249 :
2250 :
2251 : PyDoc_STRVAR(endswith__doc__,
2252 : "B.endswith(suffix[, start[, end]]) -> bool\n\
2253 : \n\
2254 : Return True if B ends with the specified suffix, False otherwise.\n\
2255 : With optional start, test B beginning at that position.\n\
2256 : With optional end, stop comparing B at that position.\n\
2257 : suffix can also be a tuple of bytes to try.");
2258 :
2259 : static PyObject *
2260 0 : bytes_endswith(PyBytesObject *self, PyObject *args)
2261 : {
2262 0 : Py_ssize_t start = 0;
2263 0 : Py_ssize_t end = PY_SSIZE_T_MAX;
2264 : PyObject *subobj;
2265 : int result;
2266 :
2267 0 : if (!stringlib_parse_args_finds("endswith", args, &subobj, &start, &end))
2268 0 : return NULL;
2269 0 : if (PyTuple_Check(subobj)) {
2270 : Py_ssize_t i;
2271 0 : for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {
2272 0 : result = _bytes_tailmatch(self,
2273 0 : PyTuple_GET_ITEM(subobj, i),
2274 : start, end, +1);
2275 0 : if (result == -1)
2276 0 : return NULL;
2277 0 : else if (result) {
2278 0 : Py_RETURN_TRUE;
2279 : }
2280 : }
2281 0 : Py_RETURN_FALSE;
2282 : }
2283 0 : result = _bytes_tailmatch(self, subobj, start, end, +1);
2284 0 : if (result == -1) {
2285 0 : if (PyErr_ExceptionMatches(PyExc_TypeError))
2286 0 : PyErr_Format(PyExc_TypeError, "endswith first arg must be bytes or "
2287 0 : "a tuple of bytes, not %s", Py_TYPE(subobj)->tp_name);
2288 0 : return NULL;
2289 : }
2290 : else
2291 0 : return PyBool_FromLong(result);
2292 : }
2293 :
2294 :
2295 : PyDoc_STRVAR(decode__doc__,
2296 : "B.decode(encoding='utf-8', errors='strict') -> str\n\
2297 : \n\
2298 : Decode B using the codec registered for encoding. Default encoding\n\
2299 : is 'utf-8'. errors may be given to set a different error\n\
2300 : handling scheme. Default is 'strict' meaning that encoding errors raise\n\
2301 : a UnicodeDecodeError. Other possible values are 'ignore' and 'replace'\n\
2302 : as well as any other name registerd with codecs.register_error that is\n\
2303 : able to handle UnicodeDecodeErrors.");
2304 :
2305 : static PyObject *
2306 2 : bytes_decode(PyObject *self, PyObject *args, PyObject *kwargs)
2307 : {
2308 2 : const char *encoding = NULL;
2309 2 : const char *errors = NULL;
2310 : static char *kwlist[] = {"encoding", "errors", 0};
2311 :
2312 2 : if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|ss:decode", kwlist, &encoding, &errors))
2313 0 : return NULL;
2314 2 : if (encoding == NULL)
2315 0 : encoding = PyUnicode_GetDefaultEncoding();
2316 2 : return PyUnicode_FromEncodedObject(self, encoding, errors);
2317 : }
2318 :
2319 :
2320 : PyDoc_STRVAR(splitlines__doc__,
2321 : "B.splitlines([keepends]) -> list of lines\n\
2322 : \n\
2323 : Return a list of the lines in B, breaking at line boundaries.\n\
2324 : Line breaks are not included in the resulting list unless keepends\n\
2325 : is given and true.");
2326 :
2327 : static PyObject*
2328 0 : bytes_splitlines(PyObject *self, PyObject *args, PyObject *kwds)
2329 : {
2330 : static char *kwlist[] = {"keepends", 0};
2331 0 : int keepends = 0;
2332 :
2333 0 : if (!PyArg_ParseTupleAndKeywords(args, kwds, "|i:splitlines",
2334 : kwlist, &keepends))
2335 0 : return NULL;
2336 :
2337 0 : return stringlib_splitlines(
2338 0 : (PyObject*) self, PyBytes_AS_STRING(self),
2339 : PyBytes_GET_SIZE(self), keepends
2340 : );
2341 : }
2342 :
2343 :
2344 : PyDoc_STRVAR(fromhex_doc,
2345 : "bytes.fromhex(string) -> bytes\n\
2346 : \n\
2347 : Create a bytes object from a string of hexadecimal numbers.\n\
2348 : Spaces between two numbers are accepted.\n\
2349 : Example: bytes.fromhex('B9 01EF') -> b'\\xb9\\x01\\xef'.");
2350 :
2351 : static int
2352 0 : hex_digit_to_int(Py_UCS4 c)
2353 : {
2354 0 : if (c >= 128)
2355 0 : return -1;
2356 0 : if (Py_ISDIGIT(c))
2357 0 : return c - '0';
2358 : else {
2359 0 : if (Py_ISUPPER(c))
2360 0 : c = Py_TOLOWER(c);
2361 0 : if (c >= 'a' && c <= 'f')
2362 0 : return c - 'a' + 10;
2363 : }
2364 0 : return -1;
2365 : }
2366 :
2367 : static PyObject *
2368 0 : bytes_fromhex(PyObject *cls, PyObject *args)
2369 : {
2370 : PyObject *newstring, *hexobj;
2371 : char *buf;
2372 : Py_ssize_t hexlen, byteslen, i, j;
2373 : int top, bot;
2374 : void *data;
2375 : unsigned int kind;
2376 :
2377 0 : if (!PyArg_ParseTuple(args, "U:fromhex", &hexobj))
2378 0 : return NULL;
2379 : assert(PyUnicode_Check(hexobj));
2380 0 : if (PyUnicode_READY(hexobj))
2381 0 : return NULL;
2382 0 : kind = PyUnicode_KIND(hexobj);
2383 0 : data = PyUnicode_DATA(hexobj);
2384 0 : hexlen = PyUnicode_GET_LENGTH(hexobj);
2385 :
2386 0 : byteslen = hexlen/2; /* This overestimates if there are spaces */
2387 0 : newstring = PyBytes_FromStringAndSize(NULL, byteslen);
2388 0 : if (!newstring)
2389 0 : return NULL;
2390 0 : buf = PyBytes_AS_STRING(newstring);
2391 0 : for (i = j = 0; i < hexlen; i += 2) {
2392 : /* skip over spaces in the input */
2393 0 : while (PyUnicode_READ(kind, data, i) == ' ')
2394 0 : i++;
2395 0 : if (i >= hexlen)
2396 0 : break;
2397 0 : top = hex_digit_to_int(PyUnicode_READ(kind, data, i));
2398 0 : bot = hex_digit_to_int(PyUnicode_READ(kind, data, i+1));
2399 0 : if (top == -1 || bot == -1) {
2400 0 : PyErr_Format(PyExc_ValueError,
2401 : "non-hexadecimal number found in "
2402 : "fromhex() arg at position %zd", i);
2403 0 : goto error;
2404 : }
2405 0 : buf[j++] = (top << 4) + bot;
2406 : }
2407 0 : if (j != byteslen && _PyBytes_Resize(&newstring, j) < 0)
2408 0 : goto error;
2409 0 : return newstring;
2410 :
2411 : error:
2412 0 : Py_XDECREF(newstring);
2413 0 : return NULL;
2414 : }
2415 :
2416 : PyDoc_STRVAR(sizeof__doc__,
2417 : "B.__sizeof__() -> size of B in memory, in bytes");
2418 :
2419 : static PyObject *
2420 0 : bytes_sizeof(PyBytesObject *v)
2421 : {
2422 : Py_ssize_t res;
2423 0 : res = PyBytesObject_SIZE + Py_SIZE(v) * Py_TYPE(v)->tp_itemsize;
2424 0 : return PyLong_FromSsize_t(res);
2425 : }
2426 :
2427 :
2428 : static PyObject *
2429 0 : bytes_getnewargs(PyBytesObject *v)
2430 : {
2431 0 : return Py_BuildValue("(y#)", v->ob_sval, Py_SIZE(v));
2432 : }
2433 :
2434 :
2435 : static PyMethodDef
2436 : bytes_methods[] = {
2437 : {"__getnewargs__", (PyCFunction)bytes_getnewargs, METH_NOARGS},
2438 : {"capitalize", (PyCFunction)stringlib_capitalize, METH_NOARGS,
2439 : _Py_capitalize__doc__},
2440 : {"center", (PyCFunction)stringlib_center, METH_VARARGS, center__doc__},
2441 : {"count", (PyCFunction)bytes_count, METH_VARARGS, count__doc__},
2442 : {"decode", (PyCFunction)bytes_decode, METH_VARARGS | METH_KEYWORDS, decode__doc__},
2443 : {"endswith", (PyCFunction)bytes_endswith, METH_VARARGS,
2444 : endswith__doc__},
2445 : {"expandtabs", (PyCFunction)stringlib_expandtabs, METH_VARARGS,
2446 : expandtabs__doc__},
2447 : {"find", (PyCFunction)bytes_find, METH_VARARGS, find__doc__},
2448 : {"fromhex", (PyCFunction)bytes_fromhex, METH_VARARGS|METH_CLASS,
2449 : fromhex_doc},
2450 : {"index", (PyCFunction)bytes_index, METH_VARARGS, index__doc__},
2451 : {"isalnum", (PyCFunction)stringlib_isalnum, METH_NOARGS,
2452 : _Py_isalnum__doc__},
2453 : {"isalpha", (PyCFunction)stringlib_isalpha, METH_NOARGS,
2454 : _Py_isalpha__doc__},
2455 : {"isdigit", (PyCFunction)stringlib_isdigit, METH_NOARGS,
2456 : _Py_isdigit__doc__},
2457 : {"islower", (PyCFunction)stringlib_islower, METH_NOARGS,
2458 : _Py_islower__doc__},
2459 : {"isspace", (PyCFunction)stringlib_isspace, METH_NOARGS,
2460 : _Py_isspace__doc__},
2461 : {"istitle", (PyCFunction)stringlib_istitle, METH_NOARGS,
2462 : _Py_istitle__doc__},
2463 : {"isupper", (PyCFunction)stringlib_isupper, METH_NOARGS,
2464 : _Py_isupper__doc__},
2465 : {"join", (PyCFunction)bytes_join, METH_O, join__doc__},
2466 : {"ljust", (PyCFunction)stringlib_ljust, METH_VARARGS, ljust__doc__},
2467 : {"lower", (PyCFunction)stringlib_lower, METH_NOARGS, _Py_lower__doc__},
2468 : {"lstrip", (PyCFunction)bytes_lstrip, METH_VARARGS, lstrip__doc__},
2469 : {"maketrans", (PyCFunction)bytes_maketrans, METH_VARARGS|METH_STATIC,
2470 : _Py_maketrans__doc__},
2471 : {"partition", (PyCFunction)bytes_partition, METH_O, partition__doc__},
2472 : {"replace", (PyCFunction)bytes_replace, METH_VARARGS, replace__doc__},
2473 : {"rfind", (PyCFunction)bytes_rfind, METH_VARARGS, rfind__doc__},
2474 : {"rindex", (PyCFunction)bytes_rindex, METH_VARARGS, rindex__doc__},
2475 : {"rjust", (PyCFunction)stringlib_rjust, METH_VARARGS, rjust__doc__},
2476 : {"rpartition", (PyCFunction)bytes_rpartition, METH_O,
2477 : rpartition__doc__},
2478 : {"rsplit", (PyCFunction)bytes_rsplit, METH_VARARGS | METH_KEYWORDS, rsplit__doc__},
2479 : {"rstrip", (PyCFunction)bytes_rstrip, METH_VARARGS, rstrip__doc__},
2480 : {"split", (PyCFunction)bytes_split, METH_VARARGS | METH_KEYWORDS, split__doc__},
2481 : {"splitlines", (PyCFunction)bytes_splitlines, METH_VARARGS | METH_KEYWORDS,
2482 : splitlines__doc__},
2483 : {"startswith", (PyCFunction)bytes_startswith, METH_VARARGS,
2484 : startswith__doc__},
2485 : {"strip", (PyCFunction)bytes_strip, METH_VARARGS, strip__doc__},
2486 : {"swapcase", (PyCFunction)stringlib_swapcase, METH_NOARGS,
2487 : _Py_swapcase__doc__},
2488 : {"title", (PyCFunction)stringlib_title, METH_NOARGS, _Py_title__doc__},
2489 : {"translate", (PyCFunction)bytes_translate, METH_VARARGS,
2490 : translate__doc__},
2491 : {"upper", (PyCFunction)stringlib_upper, METH_NOARGS, _Py_upper__doc__},
2492 : {"zfill", (PyCFunction)stringlib_zfill, METH_VARARGS, zfill__doc__},
2493 : {"__sizeof__", (PyCFunction)bytes_sizeof, METH_NOARGS,
2494 : sizeof__doc__},
2495 : {NULL, NULL} /* sentinel */
2496 : };
2497 :
2498 : static PyObject *
2499 : str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds);
2500 :
2501 : static PyObject *
2502 1 : bytes_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
2503 : {
2504 1 : PyObject *x = NULL;
2505 1 : const char *encoding = NULL;
2506 1 : const char *errors = NULL;
2507 1 : PyObject *new = NULL;
2508 : Py_ssize_t size;
2509 : static char *kwlist[] = {"source", "encoding", "errors", 0};
2510 :
2511 1 : if (type != &PyBytes_Type)
2512 0 : return str_subtype_new(type, args, kwds);
2513 1 : if (!PyArg_ParseTupleAndKeywords(args, kwds, "|Oss:bytes", kwlist, &x,
2514 : &encoding, &errors))
2515 0 : return NULL;
2516 1 : if (x == NULL) {
2517 0 : if (encoding != NULL || errors != NULL) {
2518 0 : PyErr_SetString(PyExc_TypeError,
2519 : "encoding or errors without sequence "
2520 : "argument");
2521 0 : return NULL;
2522 : }
2523 0 : return PyBytes_FromString("");
2524 : }
2525 :
2526 1 : if (PyUnicode_Check(x)) {
2527 : /* Encode via the codec registry */
2528 0 : if (encoding == NULL) {
2529 0 : PyErr_SetString(PyExc_TypeError,
2530 : "string argument without an encoding");
2531 0 : return NULL;
2532 : }
2533 0 : new = PyUnicode_AsEncodedString(x, encoding, errors);
2534 0 : if (new == NULL)
2535 0 : return NULL;
2536 : assert(PyBytes_Check(new));
2537 0 : return new;
2538 : }
2539 : /* Is it an integer? */
2540 1 : size = PyNumber_AsSsize_t(x, PyExc_OverflowError);
2541 1 : if (size == -1 && PyErr_Occurred()) {
2542 1 : if (PyErr_ExceptionMatches(PyExc_OverflowError))
2543 0 : return NULL;
2544 1 : PyErr_Clear();
2545 : }
2546 0 : else if (size < 0) {
2547 0 : PyErr_SetString(PyExc_ValueError, "negative count");
2548 0 : return NULL;
2549 : }
2550 : else {
2551 0 : new = PyBytes_FromStringAndSize(NULL, size);
2552 0 : if (new == NULL) {
2553 0 : return NULL;
2554 : }
2555 0 : if (size > 0) {
2556 0 : memset(((PyBytesObject*)new)->ob_sval, 0, size);
2557 : }
2558 0 : return new;
2559 : }
2560 :
2561 : /* If it's not unicode, there can't be encoding or errors */
2562 2 : if (encoding != NULL || errors != NULL) {
2563 0 : PyErr_SetString(PyExc_TypeError,
2564 : "encoding or errors without a string argument");
2565 0 : return NULL;
2566 : }
2567 1 : return PyObject_Bytes(x);
2568 : }
2569 :
2570 : PyObject *
2571 3 : PyBytes_FromObject(PyObject *x)
2572 : {
2573 : PyObject *new, *it;
2574 : Py_ssize_t i, size;
2575 :
2576 3 : if (x == NULL) {
2577 0 : PyErr_BadInternalCall();
2578 0 : return NULL;
2579 : }
2580 :
2581 3 : if (PyBytes_CheckExact(x)) {
2582 2 : Py_INCREF(x);
2583 2 : return x;
2584 : }
2585 :
2586 : /* Use the modern buffer interface */
2587 1 : if (PyObject_CheckBuffer(x)) {
2588 : Py_buffer view;
2589 0 : if (PyObject_GetBuffer(x, &view, PyBUF_FULL_RO) < 0)
2590 0 : return NULL;
2591 0 : new = PyBytes_FromStringAndSize(NULL, view.len);
2592 0 : if (!new)
2593 0 : goto fail;
2594 0 : if (PyBuffer_ToContiguous(((PyBytesObject *)new)->ob_sval,
2595 : &view, view.len, 'C') < 0)
2596 0 : goto fail;
2597 0 : PyBuffer_Release(&view);
2598 0 : return new;
2599 : fail:
2600 0 : Py_XDECREF(new);
2601 0 : PyBuffer_Release(&view);
2602 0 : return NULL;
2603 : }
2604 1 : if (PyUnicode_Check(x)) {
2605 0 : PyErr_SetString(PyExc_TypeError,
2606 : "cannot convert unicode object to bytes");
2607 0 : return NULL;
2608 : }
2609 :
2610 1 : if (PyList_CheckExact(x)) {
2611 0 : new = PyBytes_FromStringAndSize(NULL, Py_SIZE(x));
2612 0 : if (new == NULL)
2613 0 : return NULL;
2614 0 : for (i = 0; i < Py_SIZE(x); i++) {
2615 0 : Py_ssize_t value = PyNumber_AsSsize_t(
2616 0 : PyList_GET_ITEM(x, i), PyExc_ValueError);
2617 0 : if (value == -1 && PyErr_Occurred()) {
2618 0 : Py_DECREF(new);
2619 0 : return NULL;
2620 : }
2621 0 : if (value < 0 || value >= 256) {
2622 0 : PyErr_SetString(PyExc_ValueError,
2623 : "bytes must be in range(0, 256)");
2624 0 : Py_DECREF(new);
2625 0 : return NULL;
2626 : }
2627 0 : ((PyBytesObject *)new)->ob_sval[i] = (char) value;
2628 : }
2629 0 : return new;
2630 : }
2631 1 : if (PyTuple_CheckExact(x)) {
2632 0 : new = PyBytes_FromStringAndSize(NULL, Py_SIZE(x));
2633 0 : if (new == NULL)
2634 0 : return NULL;
2635 0 : for (i = 0; i < Py_SIZE(x); i++) {
2636 0 : Py_ssize_t value = PyNumber_AsSsize_t(
2637 : PyTuple_GET_ITEM(x, i), PyExc_ValueError);
2638 0 : if (value == -1 && PyErr_Occurred()) {
2639 0 : Py_DECREF(new);
2640 0 : return NULL;
2641 : }
2642 0 : if (value < 0 || value >= 256) {
2643 0 : PyErr_SetString(PyExc_ValueError,
2644 : "bytes must be in range(0, 256)");
2645 0 : Py_DECREF(new);
2646 0 : return NULL;
2647 : }
2648 0 : ((PyBytesObject *)new)->ob_sval[i] = (char) value;
2649 : }
2650 0 : return new;
2651 : }
2652 :
2653 : /* For iterator version, create a string object and resize as needed */
2654 1 : size = _PyObject_LengthHint(x, 64);
2655 1 : if (size == -1 && PyErr_Occurred())
2656 0 : return NULL;
2657 : /* Allocate an extra byte to prevent PyBytes_FromStringAndSize() from
2658 : returning a shared empty bytes string. This required because we
2659 : want to call _PyBytes_Resize() the returned object, which we can
2660 : only do on bytes objects with refcount == 1. */
2661 1 : size += 1;
2662 1 : new = PyBytes_FromStringAndSize(NULL, size);
2663 1 : if (new == NULL)
2664 0 : return NULL;
2665 :
2666 : /* Get the iterator */
2667 1 : it = PyObject_GetIter(x);
2668 1 : if (it == NULL)
2669 0 : goto error;
2670 :
2671 : /* Run the iterator to exhaustion */
2672 5 : for (i = 0; ; i++) {
2673 : PyObject *item;
2674 : Py_ssize_t value;
2675 :
2676 : /* Get the next item */
2677 5 : item = PyIter_Next(it);
2678 5 : if (item == NULL) {
2679 1 : if (PyErr_Occurred())
2680 0 : goto error;
2681 1 : break;
2682 : }
2683 :
2684 : /* Interpret it as an int (__index__) */
2685 4 : value = PyNumber_AsSsize_t(item, PyExc_ValueError);
2686 4 : Py_DECREF(item);
2687 4 : if (value == -1 && PyErr_Occurred())
2688 0 : goto error;
2689 :
2690 : /* Range check */
2691 4 : if (value < 0 || value >= 256) {
2692 0 : PyErr_SetString(PyExc_ValueError,
2693 : "bytes must be in range(0, 256)");
2694 0 : goto error;
2695 : }
2696 :
2697 : /* Append the byte */
2698 4 : if (i >= size) {
2699 0 : size = 2 * size + 1;
2700 0 : if (_PyBytes_Resize(&new, size) < 0)
2701 0 : goto error;
2702 : }
2703 4 : ((PyBytesObject *)new)->ob_sval[i] = (char) value;
2704 4 : }
2705 1 : _PyBytes_Resize(&new, i);
2706 :
2707 : /* Clean up and return success */
2708 1 : Py_DECREF(it);
2709 1 : return new;
2710 :
2711 : error:
2712 : /* Error handling when new != NULL */
2713 0 : Py_XDECREF(it);
2714 0 : Py_DECREF(new);
2715 0 : return NULL;
2716 : }
2717 :
2718 : static PyObject *
2719 0 : str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
2720 : {
2721 : PyObject *tmp, *pnew;
2722 : Py_ssize_t n;
2723 :
2724 : assert(PyType_IsSubtype(type, &PyBytes_Type));
2725 0 : tmp = bytes_new(&PyBytes_Type, args, kwds);
2726 0 : if (tmp == NULL)
2727 0 : return NULL;
2728 : assert(PyBytes_CheckExact(tmp));
2729 0 : n = PyBytes_GET_SIZE(tmp);
2730 0 : pnew = type->tp_alloc(type, n);
2731 0 : if (pnew != NULL) {
2732 0 : Py_MEMCPY(PyBytes_AS_STRING(pnew),
2733 0 : PyBytes_AS_STRING(tmp), n+1);
2734 0 : ((PyBytesObject *)pnew)->ob_shash =
2735 0 : ((PyBytesObject *)tmp)->ob_shash;
2736 : }
2737 0 : Py_DECREF(tmp);
2738 0 : return pnew;
2739 : }
2740 :
2741 : PyDoc_STRVAR(bytes_doc,
2742 : "bytes(iterable_of_ints) -> bytes\n\
2743 : bytes(string, encoding[, errors]) -> bytes\n\
2744 : bytes(bytes_or_buffer) -> immutable copy of bytes_or_buffer\n\
2745 : bytes(int) -> bytes object of size given by the parameter initialized with null bytes\n\
2746 : bytes() -> empty bytes object\n\
2747 : \n\
2748 : Construct an immutable array of bytes from:\n\
2749 : - an iterable yielding integers in range(256)\n\
2750 : - a text string encoded using the specified encoding\n\
2751 : - any object implementing the buffer API.\n\
2752 : - an integer");
2753 :
2754 : static PyObject *bytes_iter(PyObject *seq);
2755 :
2756 : PyTypeObject PyBytes_Type = {
2757 : PyVarObject_HEAD_INIT(&PyType_Type, 0)
2758 : "bytes",
2759 : PyBytesObject_SIZE,
2760 : sizeof(char),
2761 : bytes_dealloc, /* tp_dealloc */
2762 : 0, /* tp_print */
2763 : 0, /* tp_getattr */
2764 : 0, /* tp_setattr */
2765 : 0, /* tp_reserved */
2766 : (reprfunc)bytes_repr, /* tp_repr */
2767 : 0, /* tp_as_number */
2768 : &bytes_as_sequence, /* tp_as_sequence */
2769 : &bytes_as_mapping, /* tp_as_mapping */
2770 : (hashfunc)bytes_hash, /* tp_hash */
2771 : 0, /* tp_call */
2772 : bytes_str, /* tp_str */
2773 : PyObject_GenericGetAttr, /* tp_getattro */
2774 : 0, /* tp_setattro */
2775 : &bytes_as_buffer, /* tp_as_buffer */
2776 : Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE |
2777 : Py_TPFLAGS_BYTES_SUBCLASS, /* tp_flags */
2778 : bytes_doc, /* tp_doc */
2779 : 0, /* tp_traverse */
2780 : 0, /* tp_clear */
2781 : (richcmpfunc)bytes_richcompare, /* tp_richcompare */
2782 : 0, /* tp_weaklistoffset */
2783 : bytes_iter, /* tp_iter */
2784 : 0, /* tp_iternext */
2785 : bytes_methods, /* tp_methods */
2786 : 0, /* tp_members */
2787 : 0, /* tp_getset */
2788 : &PyBaseObject_Type, /* tp_base */
2789 : 0, /* tp_dict */
2790 : 0, /* tp_descr_get */
2791 : 0, /* tp_descr_set */
2792 : 0, /* tp_dictoffset */
2793 : 0, /* tp_init */
2794 : 0, /* tp_alloc */
2795 : bytes_new, /* tp_new */
2796 : PyObject_Del, /* tp_free */
2797 : };
2798 :
2799 : void
2800 0 : PyBytes_Concat(register PyObject **pv, register PyObject *w)
2801 : {
2802 : register PyObject *v;
2803 : assert(pv != NULL);
2804 0 : if (*pv == NULL)
2805 0 : return;
2806 0 : if (w == NULL) {
2807 0 : Py_DECREF(*pv);
2808 0 : *pv = NULL;
2809 0 : return;
2810 : }
2811 0 : v = bytes_concat(*pv, w);
2812 0 : Py_DECREF(*pv);
2813 0 : *pv = v;
2814 : }
2815 :
2816 : void
2817 0 : PyBytes_ConcatAndDel(register PyObject **pv, register PyObject *w)
2818 : {
2819 0 : PyBytes_Concat(pv, w);
2820 0 : Py_XDECREF(w);
2821 0 : }
2822 :
2823 :
2824 : /* The following function breaks the notion that strings are immutable:
2825 : it changes the size of a string. We get away with this only if there
2826 : is only one module referencing the object. You can also think of it
2827 : as creating a new string object and destroying the old one, only
2828 : more efficiently. In any case, don't use this if the string may
2829 : already be known to some other part of the code...
2830 : Note that if there's not enough memory to resize the string, the original
2831 : string object at *pv is deallocated, *pv is set to NULL, an "out of
2832 : memory" exception is set, and -1 is returned. Else (on success) 0 is
2833 : returned, and the value in *pv may or may not be the same as on input.
2834 : As always, an extra byte is allocated for a trailing \0 byte (newsize
2835 : does *not* include that), and a trailing \0 byte is stored.
2836 : */
2837 :
2838 : int
2839 175 : _PyBytes_Resize(PyObject **pv, Py_ssize_t newsize)
2840 : {
2841 : register PyObject *v;
2842 : register PyBytesObject *sv;
2843 175 : v = *pv;
2844 175 : if (!PyBytes_Check(v) || Py_REFCNT(v) != 1 || newsize < 0) {
2845 0 : *pv = 0;
2846 0 : Py_DECREF(v);
2847 0 : PyErr_BadInternalCall();
2848 0 : return -1;
2849 : }
2850 : /* XXX UNREF/NEWREF interface should be more symmetrical */
2851 : _Py_DEC_REFTOTAL;
2852 : _Py_ForgetReference(v);
2853 175 : *pv = (PyObject *)
2854 175 : PyObject_REALLOC((char *)v, PyBytesObject_SIZE + newsize);
2855 175 : if (*pv == NULL) {
2856 0 : PyObject_Del(v);
2857 0 : PyErr_NoMemory();
2858 0 : return -1;
2859 : }
2860 175 : _Py_NewReference(*pv);
2861 175 : sv = (PyBytesObject *) *pv;
2862 175 : Py_SIZE(sv) = newsize;
2863 175 : sv->ob_sval[newsize] = '\0';
2864 175 : sv->ob_shash = -1; /* invalidate cached hash value */
2865 175 : return 0;
2866 : }
2867 :
2868 : void
2869 0 : PyBytes_Fini(void)
2870 : {
2871 : int i;
2872 0 : for (i = 0; i < UCHAR_MAX + 1; i++) {
2873 0 : Py_XDECREF(characters[i]);
2874 0 : characters[i] = NULL;
2875 : }
2876 0 : Py_XDECREF(nullstring);
2877 0 : nullstring = NULL;
2878 0 : }
2879 :
2880 : /*********************** Bytes Iterator ****************************/
2881 :
2882 : typedef struct {
2883 : PyObject_HEAD
2884 : Py_ssize_t it_index;
2885 : PyBytesObject *it_seq; /* Set to NULL when iterator is exhausted */
2886 : } striterobject;
2887 :
2888 : static void
2889 2 : striter_dealloc(striterobject *it)
2890 : {
2891 2 : _PyObject_GC_UNTRACK(it);
2892 2 : Py_XDECREF(it->it_seq);
2893 2 : PyObject_GC_Del(it);
2894 2 : }
2895 :
2896 : static int
2897 0 : striter_traverse(striterobject *it, visitproc visit, void *arg)
2898 : {
2899 0 : Py_VISIT(it->it_seq);
2900 0 : return 0;
2901 : }
2902 :
2903 : static PyObject *
2904 65 : striter_next(striterobject *it)
2905 : {
2906 : PyBytesObject *seq;
2907 : PyObject *item;
2908 :
2909 : assert(it != NULL);
2910 65 : seq = it->it_seq;
2911 65 : if (seq == NULL)
2912 0 : return NULL;
2913 : assert(PyBytes_Check(seq));
2914 :
2915 65 : if (it->it_index < PyBytes_GET_SIZE(seq)) {
2916 64 : item = PyLong_FromLong(
2917 64 : (unsigned char)seq->ob_sval[it->it_index]);
2918 64 : if (item != NULL)
2919 64 : ++it->it_index;
2920 64 : return item;
2921 : }
2922 :
2923 1 : Py_DECREF(seq);
2924 1 : it->it_seq = NULL;
2925 1 : return NULL;
2926 : }
2927 :
2928 : static PyObject *
2929 0 : striter_len(striterobject *it)
2930 : {
2931 0 : Py_ssize_t len = 0;
2932 0 : if (it->it_seq)
2933 0 : len = PyBytes_GET_SIZE(it->it_seq) - it->it_index;
2934 0 : return PyLong_FromSsize_t(len);
2935 : }
2936 :
2937 : PyDoc_STRVAR(length_hint_doc,
2938 : "Private method returning an estimate of len(list(it)).");
2939 :
2940 : static PyObject *
2941 0 : striter_reduce(striterobject *it)
2942 : {
2943 0 : if (it->it_seq != NULL) {
2944 0 : return Py_BuildValue("N(O)n", _PyObject_GetBuiltin("iter"),
2945 : it->it_seq, it->it_index);
2946 : } else {
2947 0 : PyObject *u = PyUnicode_FromUnicode(NULL, 0);
2948 0 : if (u == NULL)
2949 0 : return NULL;
2950 0 : return Py_BuildValue("N(N)", _PyObject_GetBuiltin("iter"), u);
2951 : }
2952 : }
2953 :
2954 : PyDoc_STRVAR(reduce_doc, "Return state information for pickling.");
2955 :
2956 : static PyObject *
2957 0 : striter_setstate(striterobject *it, PyObject *state)
2958 : {
2959 0 : Py_ssize_t index = PyLong_AsSsize_t(state);
2960 0 : if (index == -1 && PyErr_Occurred())
2961 0 : return NULL;
2962 0 : if (index < 0)
2963 0 : index = 0;
2964 0 : it->it_index = index;
2965 0 : Py_RETURN_NONE;
2966 : }
2967 :
2968 : PyDoc_STRVAR(setstate_doc, "Set state information for unpickling.");
2969 :
2970 : static PyMethodDef striter_methods[] = {
2971 : {"__length_hint__", (PyCFunction)striter_len, METH_NOARGS,
2972 : length_hint_doc},
2973 : {"__reduce__", (PyCFunction)striter_reduce, METH_NOARGS,
2974 : reduce_doc},
2975 : {"__setstate__", (PyCFunction)striter_setstate, METH_O,
2976 : setstate_doc},
2977 : {NULL, NULL} /* sentinel */
2978 : };
2979 :
2980 : PyTypeObject PyBytesIter_Type = {
2981 : PyVarObject_HEAD_INIT(&PyType_Type, 0)
2982 : "bytes_iterator", /* tp_name */
2983 : sizeof(striterobject), /* tp_basicsize */
2984 : 0, /* tp_itemsize */
2985 : /* methods */
2986 : (destructor)striter_dealloc, /* tp_dealloc */
2987 : 0, /* tp_print */
2988 : 0, /* tp_getattr */
2989 : 0, /* tp_setattr */
2990 : 0, /* tp_reserved */
2991 : 0, /* tp_repr */
2992 : 0, /* tp_as_number */
2993 : 0, /* tp_as_sequence */
2994 : 0, /* tp_as_mapping */
2995 : 0, /* tp_hash */
2996 : 0, /* tp_call */
2997 : 0, /* tp_str */
2998 : PyObject_GenericGetAttr, /* tp_getattro */
2999 : 0, /* tp_setattro */
3000 : 0, /* tp_as_buffer */
3001 : Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC,/* tp_flags */
3002 : 0, /* tp_doc */
3003 : (traverseproc)striter_traverse, /* tp_traverse */
3004 : 0, /* tp_clear */
3005 : 0, /* tp_richcompare */
3006 : 0, /* tp_weaklistoffset */
3007 : PyObject_SelfIter, /* tp_iter */
3008 : (iternextfunc)striter_next, /* tp_iternext */
3009 : striter_methods, /* tp_methods */
3010 : 0,
3011 : };
3012 :
3013 : static PyObject *
3014 2 : bytes_iter(PyObject *seq)
3015 : {
3016 : striterobject *it;
3017 :
3018 2 : if (!PyBytes_Check(seq)) {
3019 0 : PyErr_BadInternalCall();
3020 0 : return NULL;
3021 : }
3022 2 : it = PyObject_GC_New(striterobject, &PyBytesIter_Type);
3023 2 : if (it == NULL)
3024 0 : return NULL;
3025 2 : it->it_index = 0;
3026 2 : Py_INCREF(seq);
3027 2 : it->it_seq = (PyBytesObject *)seq;
3028 2 : _PyObject_GC_TRACK(it);
3029 2 : return (PyObject *)it;
3030 : }
|