Line data Source code
1 : /*
2 : unicode_format.h -- implementation of str.format().
3 : */
4 :
5 : /* Defines for more efficiently reallocating the string buffer */
6 : #define INITIAL_SIZE_INCREMENT 100
7 : #define SIZE_MULTIPLIER 2
8 : #define MAX_SIZE_INCREMENT 3200
9 :
10 :
11 : /************************************************************************/
12 : /*********** Global data structures and forward declarations *********/
13 : /************************************************************************/
14 :
15 : /*
16 : A SubString consists of the characters between two string or
17 : unicode pointers.
18 : */
19 : typedef struct {
20 : PyObject *str; /* borrowed reference */
21 : Py_ssize_t start, end;
22 : } SubString;
23 :
24 :
25 : typedef enum {
26 : ANS_INIT,
27 : ANS_AUTO,
28 : ANS_MANUAL
29 : } AutoNumberState; /* Keep track if we're auto-numbering fields */
30 :
31 : /* Keeps track of our auto-numbering state, and which number field we're on */
32 : typedef struct {
33 : AutoNumberState an_state;
34 : int an_field_number;
35 : } AutoNumber;
36 :
37 :
38 : /* forward declaration for recursion */
39 : static PyObject *
40 : build_string(SubString *input, PyObject *args, PyObject *kwargs,
41 : int recursion_depth, AutoNumber *auto_number);
42 :
43 :
44 :
45 : /************************************************************************/
46 : /************************** Utility functions ************************/
47 : /************************************************************************/
48 :
49 : static void
50 58 : AutoNumber_Init(AutoNumber *auto_number)
51 : {
52 58 : auto_number->an_state = ANS_INIT;
53 58 : auto_number->an_field_number = 0;
54 58 : }
55 :
56 : /* fill in a SubString from a pointer and length */
57 : Py_LOCAL_INLINE(void)
58 1201 : SubString_init(SubString *str, PyObject *s, Py_ssize_t start, Py_ssize_t end)
59 : {
60 1201 : str->str = s;
61 1201 : str->start = start;
62 1201 : str->end = end;
63 1201 : }
64 :
65 : /* return a new string. if str->str is NULL, return None */
66 : Py_LOCAL_INLINE(PyObject *)
67 94 : SubString_new_object(SubString *str)
68 : {
69 94 : if (str->str == NULL) {
70 0 : Py_INCREF(Py_None);
71 0 : return Py_None;
72 : }
73 94 : return PyUnicode_Substring(str->str, str->start, str->end);
74 : }
75 :
76 : /* return a new string. if str->str is NULL, return None */
77 : Py_LOCAL_INLINE(PyObject *)
78 0 : SubString_new_object_or_empty(SubString *str)
79 : {
80 0 : if (str->str == NULL) {
81 0 : return PyUnicode_New(0, 0);
82 : }
83 0 : return SubString_new_object(str);
84 : }
85 :
86 : /* Return 1 if an error has been detected switching between automatic
87 : field numbering and manual field specification, else return 0. Set
88 : ValueError on error. */
89 : static int
90 25 : autonumber_state_error(AutoNumberState state, int field_name_is_empty)
91 : {
92 25 : if (state == ANS_MANUAL) {
93 0 : if (field_name_is_empty) {
94 0 : PyErr_SetString(PyExc_ValueError, "cannot switch from "
95 : "manual field specification to "
96 : "automatic field numbering");
97 0 : return 1;
98 : }
99 : }
100 : else {
101 25 : if (!field_name_is_empty) {
102 0 : PyErr_SetString(PyExc_ValueError, "cannot switch from "
103 : "automatic field numbering to "
104 : "manual field specification");
105 0 : return 1;
106 : }
107 : }
108 25 : return 0;
109 : }
110 :
111 :
112 : /************************************************************************/
113 : /*********** Format string parsing -- integers and identifiers *********/
114 : /************************************************************************/
115 :
116 : static Py_ssize_t
117 119 : get_integer(const SubString *str)
118 : {
119 119 : Py_ssize_t accumulator = 0;
120 : Py_ssize_t digitval;
121 : Py_ssize_t i;
122 :
123 : /* empty string is an error */
124 119 : if (str->start >= str->end)
125 25 : return -1;
126 :
127 94 : for (i = str->start; i < str->end; i++) {
128 94 : digitval = Py_UNICODE_TODECIMAL(PyUnicode_READ_CHAR(str->str, i));
129 94 : if (digitval < 0)
130 94 : return -1;
131 : /*
132 : Detect possible overflow before it happens:
133 :
134 : accumulator * 10 + digitval > PY_SSIZE_T_MAX if and only if
135 : accumulator > (PY_SSIZE_T_MAX - digitval) / 10.
136 : */
137 0 : if (accumulator > (PY_SSIZE_T_MAX - digitval) / 10) {
138 0 : PyErr_Format(PyExc_ValueError,
139 : "Too many decimal digits in format string");
140 0 : return -1;
141 : }
142 0 : accumulator = accumulator * 10 + digitval;
143 : }
144 0 : return accumulator;
145 : }
146 :
147 : /************************************************************************/
148 : /******** Functions to get field objects and specification strings ******/
149 : /************************************************************************/
150 :
151 : /* do the equivalent of obj.name */
152 : static PyObject *
153 0 : getattr(PyObject *obj, SubString *name)
154 : {
155 : PyObject *newobj;
156 0 : PyObject *str = SubString_new_object(name);
157 0 : if (str == NULL)
158 0 : return NULL;
159 0 : newobj = PyObject_GetAttr(obj, str);
160 0 : Py_DECREF(str);
161 0 : return newobj;
162 : }
163 :
164 : /* do the equivalent of obj[idx], where obj is a sequence */
165 : static PyObject *
166 0 : getitem_sequence(PyObject *obj, Py_ssize_t idx)
167 : {
168 0 : return PySequence_GetItem(obj, idx);
169 : }
170 :
171 : /* do the equivalent of obj[idx], where obj is not a sequence */
172 : static PyObject *
173 0 : getitem_idx(PyObject *obj, Py_ssize_t idx)
174 : {
175 : PyObject *newobj;
176 0 : PyObject *idx_obj = PyLong_FromSsize_t(idx);
177 0 : if (idx_obj == NULL)
178 0 : return NULL;
179 0 : newobj = PyObject_GetItem(obj, idx_obj);
180 0 : Py_DECREF(idx_obj);
181 0 : return newobj;
182 : }
183 :
184 : /* do the equivalent of obj[name] */
185 : static PyObject *
186 0 : getitem_str(PyObject *obj, SubString *name)
187 : {
188 : PyObject *newobj;
189 0 : PyObject *str = SubString_new_object(name);
190 0 : if (str == NULL)
191 0 : return NULL;
192 0 : newobj = PyObject_GetItem(obj, str);
193 0 : Py_DECREF(str);
194 0 : return newobj;
195 : }
196 :
197 : typedef struct {
198 : /* the entire string we're parsing. we assume that someone else
199 : is managing its lifetime, and that it will exist for the
200 : lifetime of the iterator. can be empty */
201 : SubString str;
202 :
203 : /* index to where we are inside field_name */
204 : Py_ssize_t index;
205 : } FieldNameIterator;
206 :
207 :
208 : static int
209 119 : FieldNameIterator_init(FieldNameIterator *self, PyObject *s,
210 : Py_ssize_t start, Py_ssize_t end)
211 : {
212 119 : SubString_init(&self->str, s, start, end);
213 119 : self->index = start;
214 119 : return 1;
215 : }
216 :
217 : static int
218 0 : _FieldNameIterator_attr(FieldNameIterator *self, SubString *name)
219 : {
220 : Py_UCS4 c;
221 :
222 0 : name->str = self->str.str;
223 0 : name->start = self->index;
224 :
225 : /* return everything until '.' or '[' */
226 0 : while (self->index < self->str.end) {
227 0 : c = PyUnicode_READ_CHAR(self->str.str, self->index++);
228 0 : switch (c) {
229 : case '[':
230 : case '.':
231 : /* backup so that we this character will be seen next time */
232 0 : self->index--;
233 0 : break;
234 : default:
235 0 : continue;
236 : }
237 0 : break;
238 : }
239 : /* end of string is okay */
240 0 : name->end = self->index;
241 0 : return 1;
242 : }
243 :
244 : static int
245 0 : _FieldNameIterator_item(FieldNameIterator *self, SubString *name)
246 : {
247 0 : int bracket_seen = 0;
248 : Py_UCS4 c;
249 :
250 0 : name->str = self->str.str;
251 0 : name->start = self->index;
252 :
253 : /* return everything until ']' */
254 0 : while (self->index < self->str.end) {
255 0 : c = PyUnicode_READ_CHAR(self->str.str, self->index++);
256 0 : switch (c) {
257 : case ']':
258 0 : bracket_seen = 1;
259 0 : break;
260 : default:
261 0 : continue;
262 : }
263 0 : break;
264 : }
265 : /* make sure we ended with a ']' */
266 0 : if (!bracket_seen) {
267 0 : PyErr_SetString(PyExc_ValueError, "Missing ']' in format string");
268 0 : return 0;
269 : }
270 :
271 : /* end of string is okay */
272 : /* don't include the ']' */
273 0 : name->end = self->index-1;
274 0 : return 1;
275 : }
276 :
277 : /* returns 0 on error, 1 on non-error termination, and 2 if it returns a value */
278 : static int
279 119 : FieldNameIterator_next(FieldNameIterator *self, int *is_attribute,
280 : Py_ssize_t *name_idx, SubString *name)
281 : {
282 : /* check at end of input */
283 119 : if (self->index >= self->str.end)
284 119 : return 1;
285 :
286 0 : switch (PyUnicode_READ_CHAR(self->str.str, self->index++)) {
287 : case '.':
288 0 : *is_attribute = 1;
289 0 : if (_FieldNameIterator_attr(self, name) == 0)
290 0 : return 0;
291 0 : *name_idx = -1;
292 0 : break;
293 : case '[':
294 0 : *is_attribute = 0;
295 0 : if (_FieldNameIterator_item(self, name) == 0)
296 0 : return 0;
297 0 : *name_idx = get_integer(name);
298 0 : if (*name_idx == -1 && PyErr_Occurred())
299 0 : return 0;
300 0 : break;
301 : default:
302 : /* Invalid character follows ']' */
303 0 : PyErr_SetString(PyExc_ValueError, "Only '.' or '[' may "
304 : "follow ']' in format field specifier");
305 0 : return 0;
306 : }
307 :
308 : /* empty string is an error */
309 0 : if (name->start == name->end) {
310 0 : PyErr_SetString(PyExc_ValueError, "Empty attribute in format string");
311 0 : return 0;
312 : }
313 :
314 0 : return 2;
315 : }
316 :
317 :
318 : /* input: field_name
319 : output: 'first' points to the part before the first '[' or '.'
320 : 'first_idx' is -1 if 'first' is not an integer, otherwise
321 : it's the value of first converted to an integer
322 : 'rest' is an iterator to return the rest
323 : */
324 : static int
325 119 : field_name_split(PyObject *str, Py_ssize_t start, Py_ssize_t end, SubString *first,
326 : Py_ssize_t *first_idx, FieldNameIterator *rest,
327 : AutoNumber *auto_number)
328 : {
329 : Py_UCS4 c;
330 119 : Py_ssize_t i = start;
331 : int field_name_is_empty;
332 : int using_numeric_index;
333 :
334 : /* find the part up until the first '.' or '[' */
335 986 : while (i < end) {
336 748 : switch (c = PyUnicode_READ_CHAR(str, i++)) {
337 : case '[':
338 : case '.':
339 : /* backup so that we this character is available to the
340 : "rest" iterator */
341 0 : i--;
342 0 : break;
343 : default:
344 748 : continue;
345 : }
346 0 : break;
347 : }
348 :
349 : /* set up the return values */
350 119 : SubString_init(first, str, start, i);
351 119 : FieldNameIterator_init(rest, str, i, end);
352 :
353 : /* see if "first" is an integer, in which case it's used as an index */
354 119 : *first_idx = get_integer(first);
355 119 : if (*first_idx == -1 && PyErr_Occurred())
356 0 : return 0;
357 :
358 119 : field_name_is_empty = first->start >= first->end;
359 :
360 : /* If the field name is omitted or if we have a numeric index
361 : specified, then we're doing numeric indexing into args. */
362 119 : using_numeric_index = field_name_is_empty || *first_idx != -1;
363 :
364 : /* We always get here exactly one time for each field we're
365 : processing. And we get here in field order (counting by left
366 : braces). So this is the perfect place to handle automatic field
367 : numbering if the field name is omitted. */
368 :
369 : /* Check if we need to do the auto-numbering. It's not needed if
370 : we're called from string.Format routines, because it's handled
371 : in that class by itself. */
372 119 : if (auto_number) {
373 : /* Initialize our auto numbering state if this is the first
374 : time we're either auto-numbering or manually numbering. */
375 119 : if (auto_number->an_state == ANS_INIT && using_numeric_index)
376 23 : auto_number->an_state = field_name_is_empty ?
377 : ANS_AUTO : ANS_MANUAL;
378 :
379 : /* Make sure our state is consistent with what we're doing
380 : this time through. Only check if we're using a numeric
381 : index. */
382 119 : if (using_numeric_index)
383 25 : if (autonumber_state_error(auto_number->an_state,
384 : field_name_is_empty))
385 0 : return 0;
386 : /* Zero length field means we want to do auto-numbering of the
387 : fields. */
388 119 : if (field_name_is_empty)
389 25 : *first_idx = (auto_number->an_field_number)++;
390 : }
391 :
392 119 : return 1;
393 : }
394 :
395 :
396 : /*
397 : get_field_object returns the object inside {}, before the
398 : format_spec. It handles getindex and getattr lookups and consumes
399 : the entire input string.
400 : */
401 : static PyObject *
402 119 : get_field_object(SubString *input, PyObject *args, PyObject *kwargs,
403 : AutoNumber *auto_number)
404 : {
405 119 : PyObject *obj = NULL;
406 : int ok;
407 : int is_attribute;
408 : SubString name;
409 : SubString first;
410 : Py_ssize_t index;
411 : FieldNameIterator rest;
412 :
413 119 : if (!field_name_split(input->str, input->start, input->end, &first,
414 : &index, &rest, auto_number)) {
415 0 : goto error;
416 : }
417 :
418 119 : if (index == -1) {
419 : /* look up in kwargs */
420 94 : PyObject *key = SubString_new_object(&first);
421 94 : if (key == NULL)
422 0 : goto error;
423 :
424 : /* Use PyObject_GetItem instead of PyDict_GetItem because this
425 : code is no longer just used with kwargs. It might be passed
426 : a non-dict when called through format_map. */
427 94 : if ((kwargs == NULL) || (obj = PyObject_GetItem(kwargs, key)) == NULL) {
428 0 : PyErr_SetObject(PyExc_KeyError, key);
429 0 : Py_DECREF(key);
430 0 : goto error;
431 : }
432 94 : Py_DECREF(key);
433 : }
434 : else {
435 : /* If args is NULL, we have a format string with a positional field
436 : with only kwargs to retrieve it from. This can only happen when
437 : used with format_map(), where positional arguments are not
438 : allowed. */
439 25 : if (args == NULL) {
440 0 : PyErr_SetString(PyExc_ValueError, "Format string contains "
441 : "positional fields");
442 0 : goto error;
443 : }
444 :
445 : /* look up in args */
446 25 : obj = PySequence_GetItem(args, index);
447 25 : if (obj == NULL)
448 0 : goto error;
449 : }
450 :
451 : /* iterate over the rest of the field_name */
452 238 : while ((ok = FieldNameIterator_next(&rest, &is_attribute, &index,
453 : &name)) == 2) {
454 : PyObject *tmp;
455 :
456 0 : if (is_attribute)
457 : /* getattr lookup "." */
458 0 : tmp = getattr(obj, &name);
459 : else
460 : /* getitem lookup "[]" */
461 0 : if (index == -1)
462 0 : tmp = getitem_str(obj, &name);
463 : else
464 0 : if (PySequence_Check(obj))
465 0 : tmp = getitem_sequence(obj, index);
466 : else
467 : /* not a sequence */
468 0 : tmp = getitem_idx(obj, index);
469 0 : if (tmp == NULL)
470 0 : goto error;
471 :
472 : /* assign to obj */
473 0 : Py_DECREF(obj);
474 0 : obj = tmp;
475 : }
476 : /* end of iterator, this is the non-error case */
477 119 : if (ok == 1)
478 119 : return obj;
479 : error:
480 0 : Py_XDECREF(obj);
481 0 : return NULL;
482 : }
483 :
484 : /************************************************************************/
485 : /***************** Field rendering functions **************************/
486 : /************************************************************************/
487 :
488 : /*
489 : render_field() is the main function in this section. It takes the
490 : field object and field specification string generated by
491 : get_field_and_spec, and renders the field into the output string.
492 :
493 : render_field calls fieldobj.__format__(format_spec) method, and
494 : appends to the output.
495 : */
496 : static int
497 119 : render_field(PyObject *fieldobj, SubString *format_spec, _PyUnicodeWriter *writer)
498 : {
499 119 : int ok = 0;
500 119 : PyObject *result = NULL;
501 119 : PyObject *format_spec_object = NULL;
502 119 : int (*formatter) (_PyUnicodeWriter*, PyObject *, PyObject *, Py_ssize_t, Py_ssize_t) = NULL;
503 : int err;
504 :
505 : /* If we know the type exactly, skip the lookup of __format__ and just
506 : call the formatter directly. */
507 119 : if (PyUnicode_CheckExact(fieldobj))
508 97 : formatter = _PyUnicode_FormatAdvancedWriter;
509 22 : else if (PyLong_CheckExact(fieldobj))
510 22 : formatter = _PyLong_FormatAdvancedWriter;
511 0 : else if (PyFloat_CheckExact(fieldobj))
512 0 : formatter = _PyFloat_FormatAdvancedWriter;
513 0 : else if (PyComplex_CheckExact(fieldobj))
514 0 : formatter = _PyComplex_FormatAdvancedWriter;
515 :
516 119 : if (formatter) {
517 : /* we know exactly which formatter will be called when __format__ is
518 : looked up, so call it directly, instead. */
519 119 : err = formatter(writer, fieldobj, format_spec->str,
520 : format_spec->start, format_spec->end);
521 119 : return (err == 0);
522 : }
523 : else {
524 : /* We need to create an object out of the pointers we have, because
525 : __format__ takes a string/unicode object for format_spec. */
526 0 : if (format_spec->str)
527 0 : format_spec_object = PyUnicode_Substring(format_spec->str,
528 : format_spec->start,
529 : format_spec->end);
530 : else
531 0 : format_spec_object = PyUnicode_New(0, 0);
532 0 : if (format_spec_object == NULL)
533 0 : goto done;
534 :
535 0 : result = PyObject_Format(fieldobj, format_spec_object);
536 : }
537 0 : if (result == NULL)
538 0 : goto done;
539 :
540 0 : if (_PyUnicodeWriter_WriteStr(writer, result) == -1)
541 0 : goto done;
542 0 : ok = 1;
543 :
544 : done:
545 0 : Py_XDECREF(format_spec_object);
546 0 : Py_XDECREF(result);
547 0 : return ok;
548 : }
549 :
550 : static int
551 119 : parse_field(SubString *str, SubString *field_name, SubString *format_spec,
552 : Py_UCS4 *conversion)
553 : {
554 : /* Note this function works if the field name is zero length,
555 : which is good. Zero length field names are handled later, in
556 : field_name_split. */
557 :
558 119 : Py_UCS4 c = 0;
559 :
560 : /* initialize these, as they may be empty */
561 119 : *conversion = '\0';
562 119 : SubString_init(format_spec, NULL, 0, 0);
563 :
564 : /* Search for the field name. it's terminated by the end of
565 : the string, or a ':' or '!' */
566 119 : field_name->str = str->str;
567 119 : field_name->start = str->start;
568 986 : while (str->start < str->end) {
569 795 : switch ((c = PyUnicode_READ_CHAR(str->str, str->start++))) {
570 : case ':':
571 : case '!':
572 47 : break;
573 : default:
574 748 : continue;
575 : }
576 47 : break;
577 : }
578 :
579 119 : if (c == '!' || c == ':') {
580 : /* we have a format specifier and/or a conversion */
581 : /* don't include the last character */
582 47 : field_name->end = str->start-1;
583 :
584 : /* the format specifier is the rest of the string */
585 47 : format_spec->str = str->str;
586 47 : format_spec->start = str->start;
587 47 : format_spec->end = str->end;
588 :
589 : /* see if there's a conversion specifier */
590 94 : if (c == '!') {
591 : /* there must be another character present */
592 25 : if (format_spec->start >= format_spec->end) {
593 0 : PyErr_SetString(PyExc_ValueError,
594 : "end of format while looking for conversion "
595 : "specifier");
596 0 : return 0;
597 : }
598 25 : *conversion = PyUnicode_READ_CHAR(format_spec->str, format_spec->start++);
599 :
600 : /* if there is another character, it must be a colon */
601 25 : if (format_spec->start < format_spec->end) {
602 0 : c = PyUnicode_READ_CHAR(format_spec->str, format_spec->start++);
603 0 : if (c != ':') {
604 0 : PyErr_SetString(PyExc_ValueError,
605 : "expected ':' after format specifier");
606 0 : return 0;
607 : }
608 : }
609 : }
610 : }
611 : else
612 : /* end of string, there's no format_spec or conversion */
613 72 : field_name->end = str->start;
614 :
615 119 : return 1;
616 : }
617 :
618 : /************************************************************************/
619 : /******* Output string allocation and escape-to-markup processing ******/
620 : /************************************************************************/
621 :
622 : /* MarkupIterator breaks the string into pieces of either literal
623 : text, or things inside {} that need to be marked up. it is
624 : designed to make it easy to wrap a Python iterator around it, for
625 : use with the Formatter class */
626 :
627 : typedef struct {
628 : SubString str;
629 : } MarkupIterator;
630 :
631 : static int
632 58 : MarkupIterator_init(MarkupIterator *self, PyObject *str,
633 : Py_ssize_t start, Py_ssize_t end)
634 : {
635 58 : SubString_init(&self->str, str, start, end);
636 58 : return 1;
637 : }
638 :
639 : /* returns 0 on error, 1 on non-error termination, and 2 if it got a
640 : string (or something to be expanded) */
641 : static int
642 203 : MarkupIterator_next(MarkupIterator *self, SubString *literal,
643 : int *field_present, SubString *field_name,
644 : SubString *format_spec, Py_UCS4 *conversion,
645 : int *format_spec_needs_expanding)
646 : {
647 : int at_end;
648 203 : Py_UCS4 c = 0;
649 : Py_ssize_t start;
650 : int count;
651 : Py_ssize_t len;
652 203 : int markup_follows = 0;
653 :
654 : /* initialize all of the output variables */
655 203 : SubString_init(literal, NULL, 0, 0);
656 203 : SubString_init(field_name, NULL, 0, 0);
657 203 : SubString_init(format_spec, NULL, 0, 0);
658 203 : *conversion = '\0';
659 203 : *format_spec_needs_expanding = 0;
660 203 : *field_present = 0;
661 :
662 : /* No more input, end of iterator. This is the normal exit
663 : path. */
664 203 : if (self->str.start >= self->str.end)
665 58 : return 1;
666 :
667 145 : start = self->str.start;
668 :
669 : /* First read any literal text. Read until the end of string, an
670 : escaped '{' or '}', or an unescaped '{'. In order to never
671 : allocate memory and so I can just pass pointers around, if
672 : there's an escaped '{' or '}' then we'll return the literal
673 : including the brace, but no format object. The next time
674 : through, we'll return the rest of the literal, skipping past
675 : the second consecutive brace. */
676 4059 : while (self->str.start < self->str.end) {
677 3888 : switch (c = PyUnicode_READ_CHAR(self->str.str, self->str.start++)) {
678 : case '{':
679 : case '}':
680 119 : markup_follows = 1;
681 119 : break;
682 : default:
683 3769 : continue;
684 : }
685 119 : break;
686 : }
687 :
688 145 : at_end = self->str.start >= self->str.end;
689 145 : len = self->str.start - start;
690 :
691 145 : if ((c == '}') && (at_end ||
692 0 : (c != PyUnicode_READ_CHAR(self->str.str,
693 : self->str.start)))) {
694 0 : PyErr_SetString(PyExc_ValueError, "Single '}' encountered "
695 : "in format string");
696 0 : return 0;
697 : }
698 145 : if (at_end && c == '{') {
699 0 : PyErr_SetString(PyExc_ValueError, "Single '{' encountered "
700 : "in format string");
701 0 : return 0;
702 : }
703 145 : if (!at_end) {
704 119 : if (c == PyUnicode_READ_CHAR(self->str.str, self->str.start)) {
705 : /* escaped } or {, skip it in the input. there is no
706 : markup object following us, just this literal text */
707 0 : self->str.start++;
708 0 : markup_follows = 0;
709 : }
710 : else
711 119 : len--;
712 : }
713 :
714 : /* record the literal text */
715 145 : literal->str = self->str.str;
716 145 : literal->start = start;
717 145 : literal->end = start + len;
718 :
719 145 : if (!markup_follows)
720 26 : return 2;
721 :
722 : /* this is markup, find the end of the string by counting nested
723 : braces. note that this prohibits escaped braces, so that
724 : format_specs cannot have braces in them. */
725 119 : *field_present = 1;
726 119 : count = 1;
727 :
728 119 : start = self->str.start;
729 :
730 : /* we know we can't have a zero length string, so don't worry
731 : about that case */
732 1080 : while (self->str.start < self->str.end) {
733 961 : switch (c = PyUnicode_READ_CHAR(self->str.str, self->str.start++)) {
734 : case '{':
735 : /* the format spec needs to be recursively expanded.
736 : this is an optimization, and not strictly needed */
737 0 : *format_spec_needs_expanding = 1;
738 0 : count++;
739 0 : break;
740 : case '}':
741 119 : count--;
742 119 : if (count <= 0) {
743 : /* we're done. parse and get out */
744 : SubString s;
745 :
746 119 : SubString_init(&s, self->str.str, start, self->str.start - 1);
747 119 : if (parse_field(&s, field_name, format_spec, conversion) == 0)
748 0 : return 0;
749 :
750 : /* success */
751 119 : return 2;
752 : }
753 0 : break;
754 : }
755 : }
756 :
757 : /* end of string while searching for matching '}' */
758 0 : PyErr_SetString(PyExc_ValueError, "unmatched '{' in format");
759 0 : return 0;
760 : }
761 :
762 :
763 : /* do the !r or !s conversion on obj */
764 : static PyObject *
765 25 : do_conversion(PyObject *obj, Py_UCS4 conversion)
766 : {
767 : /* XXX in pre-3.0, do we need to convert this to unicode, since it
768 : might have returned a string? */
769 25 : switch (conversion) {
770 : case 'r':
771 25 : return PyObject_Repr(obj);
772 : case 's':
773 0 : return PyObject_Str(obj);
774 : case 'a':
775 0 : return PyObject_ASCII(obj);
776 : default:
777 0 : if (conversion > 32 && conversion < 127) {
778 : /* It's the ASCII subrange; casting to char is safe
779 : (assuming the execution character set is an ASCII
780 : superset). */
781 0 : PyErr_Format(PyExc_ValueError,
782 : "Unknown conversion specifier %c",
783 0 : (char)conversion);
784 : } else
785 0 : PyErr_Format(PyExc_ValueError,
786 : "Unknown conversion specifier \\x%x",
787 : (unsigned int)conversion);
788 0 : return NULL;
789 : }
790 : }
791 :
792 : /* given:
793 :
794 : {field_name!conversion:format_spec}
795 :
796 : compute the result and write it to output.
797 : format_spec_needs_expanding is an optimization. if it's false,
798 : just output the string directly, otherwise recursively expand the
799 : format_spec string.
800 :
801 : field_name is allowed to be zero length, in which case we
802 : are doing auto field numbering.
803 : */
804 :
805 : static int
806 119 : output_markup(SubString *field_name, SubString *format_spec,
807 : int format_spec_needs_expanding, Py_UCS4 conversion,
808 : _PyUnicodeWriter *writer, PyObject *args, PyObject *kwargs,
809 : int recursion_depth, AutoNumber *auto_number)
810 : {
811 119 : PyObject *tmp = NULL;
812 119 : PyObject *fieldobj = NULL;
813 : SubString expanded_format_spec;
814 : SubString *actual_format_spec;
815 119 : int result = 0;
816 :
817 : /* convert field_name to an object */
818 119 : fieldobj = get_field_object(field_name, args, kwargs, auto_number);
819 119 : if (fieldobj == NULL)
820 0 : goto done;
821 :
822 119 : if (conversion != '\0') {
823 25 : tmp = do_conversion(fieldobj, conversion);
824 25 : if (tmp == NULL || PyUnicode_READY(tmp) == -1)
825 : goto done;
826 :
827 : /* do the assignment, transferring ownership: fieldobj = tmp */
828 25 : Py_DECREF(fieldobj);
829 25 : fieldobj = tmp;
830 25 : tmp = NULL;
831 : }
832 :
833 : /* if needed, recurively compute the format_spec */
834 119 : if (format_spec_needs_expanding) {
835 0 : tmp = build_string(format_spec, args, kwargs, recursion_depth-1,
836 : auto_number);
837 0 : if (tmp == NULL || PyUnicode_READY(tmp) == -1)
838 : goto done;
839 :
840 : /* note that in the case we're expanding the format string,
841 : tmp must be kept around until after the call to
842 : render_field. */
843 0 : SubString_init(&expanded_format_spec, tmp, 0, PyUnicode_GET_LENGTH(tmp));
844 0 : actual_format_spec = &expanded_format_spec;
845 : }
846 : else
847 119 : actual_format_spec = format_spec;
848 :
849 119 : if (render_field(fieldobj, actual_format_spec, writer) == 0)
850 0 : goto done;
851 :
852 119 : result = 1;
853 :
854 : done:
855 119 : Py_XDECREF(fieldobj);
856 119 : Py_XDECREF(tmp);
857 :
858 119 : return result;
859 : }
860 :
861 : /*
862 : do_markup is the top-level loop for the format() method. It
863 : searches through the format string for escapes to markup codes, and
864 : calls other functions to move non-markup text to the output,
865 : and to perform the markup to the output.
866 : */
867 : static int
868 58 : do_markup(SubString *input, PyObject *args, PyObject *kwargs,
869 : _PyUnicodeWriter *writer, int recursion_depth, AutoNumber *auto_number)
870 : {
871 : MarkupIterator iter;
872 : int format_spec_needs_expanding;
873 : int result;
874 : int field_present;
875 : SubString literal;
876 : SubString field_name;
877 : SubString format_spec;
878 : Py_UCS4 conversion, maxchar;
879 : Py_ssize_t sublen;
880 : int err;
881 :
882 58 : MarkupIterator_init(&iter, input->str, input->start, input->end);
883 58 : while ((result = MarkupIterator_next(&iter, &literal, &field_present,
884 : &field_name, &format_spec,
885 : &conversion,
886 : &format_spec_needs_expanding)) == 2) {
887 145 : sublen = literal.end - literal.start;
888 145 : if (sublen) {
889 117 : maxchar = _PyUnicode_FindMaxChar(literal.str,
890 : literal.start, literal.end);
891 117 : err = _PyUnicodeWriter_Prepare(writer, sublen, maxchar);
892 117 : if (err == -1)
893 0 : return 0;
894 117 : _PyUnicode_FastCopyCharacters(writer->buffer, writer->pos,
895 : literal.str, literal.start, sublen);
896 117 : writer->pos += sublen;
897 : }
898 :
899 145 : if (field_present) {
900 119 : if (iter.str.start == iter.str.end)
901 32 : writer->overallocate = 0;
902 119 : if (!output_markup(&field_name, &format_spec,
903 : format_spec_needs_expanding, conversion, writer,
904 : args, kwargs, recursion_depth, auto_number))
905 0 : return 0;
906 : }
907 : }
908 58 : return result;
909 : }
910 :
911 :
912 : /*
913 : build_string allocates the output string and then
914 : calls do_markup to do the heavy lifting.
915 : */
916 : static PyObject *
917 58 : build_string(SubString *input, PyObject *args, PyObject *kwargs,
918 : int recursion_depth, AutoNumber *auto_number)
919 : {
920 : _PyUnicodeWriter writer;
921 : Py_ssize_t minlen;
922 :
923 : /* check the recursion level */
924 58 : if (recursion_depth <= 0) {
925 0 : PyErr_SetString(PyExc_ValueError,
926 : "Max string recursion exceeded");
927 0 : return NULL;
928 : }
929 :
930 58 : minlen = PyUnicode_GET_LENGTH(input->str) + 100;
931 58 : _PyUnicodeWriter_Init(&writer, minlen);
932 :
933 58 : if (!do_markup(input, args, kwargs, &writer, recursion_depth,
934 : auto_number)) {
935 0 : _PyUnicodeWriter_Dealloc(&writer);
936 0 : return NULL;
937 : }
938 :
939 58 : return _PyUnicodeWriter_Finish(&writer);
940 : }
941 :
942 : /************************************************************************/
943 : /*********** main routine ***********************************************/
944 : /************************************************************************/
945 :
946 : /* this is the main entry point */
947 : static PyObject *
948 58 : do_string_format(PyObject *self, PyObject *args, PyObject *kwargs)
949 : {
950 : SubString input;
951 :
952 : /* PEP 3101 says only 2 levels, so that
953 : "{0:{1}}".format('abc', 's') # works
954 : "{0:{1:{2}}}".format('abc', 's', '') # fails
955 : */
956 58 : int recursion_depth = 2;
957 :
958 : AutoNumber auto_number;
959 :
960 58 : if (PyUnicode_READY(self) == -1)
961 0 : return NULL;
962 :
963 58 : AutoNumber_Init(&auto_number);
964 58 : SubString_init(&input, self, 0, PyUnicode_GET_LENGTH(self));
965 58 : return build_string(&input, args, kwargs, recursion_depth, &auto_number);
966 : }
967 :
968 : static PyObject *
969 0 : do_string_format_map(PyObject *self, PyObject *obj)
970 : {
971 0 : return do_string_format(self, NULL, obj);
972 : }
973 :
974 :
975 : /************************************************************************/
976 : /*********** formatteriterator ******************************************/
977 : /************************************************************************/
978 :
979 : /* This is used to implement string.Formatter.vparse(). It exists so
980 : Formatter can share code with the built in unicode.format() method.
981 : It's really just a wrapper around MarkupIterator that is callable
982 : from Python. */
983 :
984 : typedef struct {
985 : PyObject_HEAD
986 : PyObject *str;
987 : MarkupIterator it_markup;
988 : } formatteriterobject;
989 :
990 : static void
991 0 : formatteriter_dealloc(formatteriterobject *it)
992 : {
993 0 : Py_XDECREF(it->str);
994 0 : PyObject_FREE(it);
995 0 : }
996 :
997 : /* returns a tuple:
998 : (literal, field_name, format_spec, conversion)
999 :
1000 : literal is any literal text to output. might be zero length
1001 : field_name is the string before the ':'. might be None
1002 : format_spec is the string after the ':'. mibht be None
1003 : conversion is either None, or the string after the '!'
1004 : */
1005 : static PyObject *
1006 0 : formatteriter_next(formatteriterobject *it)
1007 : {
1008 : SubString literal;
1009 : SubString field_name;
1010 : SubString format_spec;
1011 : Py_UCS4 conversion;
1012 : int format_spec_needs_expanding;
1013 : int field_present;
1014 0 : int result = MarkupIterator_next(&it->it_markup, &literal, &field_present,
1015 : &field_name, &format_spec, &conversion,
1016 : &format_spec_needs_expanding);
1017 :
1018 : /* all of the SubString objects point into it->str, so no
1019 : memory management needs to be done on them */
1020 : assert(0 <= result && result <= 2);
1021 0 : if (result == 0 || result == 1)
1022 : /* if 0, error has already been set, if 1, iterator is empty */
1023 0 : return NULL;
1024 : else {
1025 0 : PyObject *literal_str = NULL;
1026 0 : PyObject *field_name_str = NULL;
1027 0 : PyObject *format_spec_str = NULL;
1028 0 : PyObject *conversion_str = NULL;
1029 0 : PyObject *tuple = NULL;
1030 :
1031 0 : literal_str = SubString_new_object(&literal);
1032 0 : if (literal_str == NULL)
1033 0 : goto done;
1034 :
1035 0 : field_name_str = SubString_new_object(&field_name);
1036 0 : if (field_name_str == NULL)
1037 0 : goto done;
1038 :
1039 : /* if field_name is non-zero length, return a string for
1040 : format_spec (even if zero length), else return None */
1041 0 : format_spec_str = (field_present ?
1042 0 : SubString_new_object_or_empty :
1043 : SubString_new_object)(&format_spec);
1044 0 : if (format_spec_str == NULL)
1045 0 : goto done;
1046 :
1047 : /* if the conversion is not specified, return a None,
1048 : otherwise create a one length string with the conversion
1049 : character */
1050 0 : if (conversion == '\0') {
1051 0 : conversion_str = Py_None;
1052 0 : Py_INCREF(conversion_str);
1053 : }
1054 : else
1055 0 : conversion_str = PyUnicode_FromKindAndData(PyUnicode_4BYTE_KIND,
1056 : &conversion, 1);
1057 0 : if (conversion_str == NULL)
1058 0 : goto done;
1059 :
1060 0 : tuple = PyTuple_Pack(4, literal_str, field_name_str, format_spec_str,
1061 : conversion_str);
1062 : done:
1063 0 : Py_XDECREF(literal_str);
1064 0 : Py_XDECREF(field_name_str);
1065 0 : Py_XDECREF(format_spec_str);
1066 0 : Py_XDECREF(conversion_str);
1067 0 : return tuple;
1068 : }
1069 : }
1070 :
1071 : static PyMethodDef formatteriter_methods[] = {
1072 : {NULL, NULL} /* sentinel */
1073 : };
1074 :
1075 : static PyTypeObject PyFormatterIter_Type = {
1076 : PyVarObject_HEAD_INIT(&PyType_Type, 0)
1077 : "formatteriterator", /* tp_name */
1078 : sizeof(formatteriterobject), /* tp_basicsize */
1079 : 0, /* tp_itemsize */
1080 : /* methods */
1081 : (destructor)formatteriter_dealloc, /* tp_dealloc */
1082 : 0, /* tp_print */
1083 : 0, /* tp_getattr */
1084 : 0, /* tp_setattr */
1085 : 0, /* tp_reserved */
1086 : 0, /* tp_repr */
1087 : 0, /* tp_as_number */
1088 : 0, /* tp_as_sequence */
1089 : 0, /* tp_as_mapping */
1090 : 0, /* tp_hash */
1091 : 0, /* tp_call */
1092 : 0, /* tp_str */
1093 : PyObject_GenericGetAttr, /* tp_getattro */
1094 : 0, /* tp_setattro */
1095 : 0, /* tp_as_buffer */
1096 : Py_TPFLAGS_DEFAULT, /* tp_flags */
1097 : 0, /* tp_doc */
1098 : 0, /* tp_traverse */
1099 : 0, /* tp_clear */
1100 : 0, /* tp_richcompare */
1101 : 0, /* tp_weaklistoffset */
1102 : PyObject_SelfIter, /* tp_iter */
1103 : (iternextfunc)formatteriter_next, /* tp_iternext */
1104 : formatteriter_methods, /* tp_methods */
1105 : 0,
1106 : };
1107 :
1108 : /* unicode_formatter_parser is used to implement
1109 : string.Formatter.vformat. it parses a string and returns tuples
1110 : describing the parsed elements. It's a wrapper around
1111 : stringlib/string_format.h's MarkupIterator */
1112 : static PyObject *
1113 0 : formatter_parser(PyObject *ignored, PyObject *self)
1114 : {
1115 : formatteriterobject *it;
1116 :
1117 0 : if (!PyUnicode_Check(self)) {
1118 0 : PyErr_Format(PyExc_TypeError, "expected str, got %s", Py_TYPE(self)->tp_name);
1119 0 : return NULL;
1120 : }
1121 :
1122 0 : if (PyUnicode_READY(self) == -1)
1123 0 : return NULL;
1124 :
1125 0 : it = PyObject_New(formatteriterobject, &PyFormatterIter_Type);
1126 0 : if (it == NULL)
1127 0 : return NULL;
1128 :
1129 : /* take ownership, give the object to the iterator */
1130 0 : Py_INCREF(self);
1131 0 : it->str = self;
1132 :
1133 : /* initialize the contained MarkupIterator */
1134 0 : MarkupIterator_init(&it->it_markup, (PyObject*)self, 0, PyUnicode_GET_LENGTH(self));
1135 0 : return (PyObject *)it;
1136 : }
1137 :
1138 :
1139 : /************************************************************************/
1140 : /*********** fieldnameiterator ******************************************/
1141 : /************************************************************************/
1142 :
1143 :
1144 : /* This is used to implement string.Formatter.vparse(). It parses the
1145 : field name into attribute and item values. It's a Python-callable
1146 : wrapper around FieldNameIterator */
1147 :
1148 : typedef struct {
1149 : PyObject_HEAD
1150 : PyObject *str;
1151 : FieldNameIterator it_field;
1152 : } fieldnameiterobject;
1153 :
1154 : static void
1155 0 : fieldnameiter_dealloc(fieldnameiterobject *it)
1156 : {
1157 0 : Py_XDECREF(it->str);
1158 0 : PyObject_FREE(it);
1159 0 : }
1160 :
1161 : /* returns a tuple:
1162 : (is_attr, value)
1163 : is_attr is true if we used attribute syntax (e.g., '.foo')
1164 : false if we used index syntax (e.g., '[foo]')
1165 : value is an integer or string
1166 : */
1167 : static PyObject *
1168 0 : fieldnameiter_next(fieldnameiterobject *it)
1169 : {
1170 : int result;
1171 : int is_attr;
1172 : Py_ssize_t idx;
1173 : SubString name;
1174 :
1175 0 : result = FieldNameIterator_next(&it->it_field, &is_attr,
1176 : &idx, &name);
1177 0 : if (result == 0 || result == 1)
1178 : /* if 0, error has already been set, if 1, iterator is empty */
1179 0 : return NULL;
1180 : else {
1181 0 : PyObject* result = NULL;
1182 0 : PyObject* is_attr_obj = NULL;
1183 0 : PyObject* obj = NULL;
1184 :
1185 0 : is_attr_obj = PyBool_FromLong(is_attr);
1186 0 : if (is_attr_obj == NULL)
1187 0 : goto done;
1188 :
1189 : /* either an integer or a string */
1190 0 : if (idx != -1)
1191 0 : obj = PyLong_FromSsize_t(idx);
1192 : else
1193 0 : obj = SubString_new_object(&name);
1194 0 : if (obj == NULL)
1195 0 : goto done;
1196 :
1197 : /* return a tuple of values */
1198 0 : result = PyTuple_Pack(2, is_attr_obj, obj);
1199 :
1200 : done:
1201 0 : Py_XDECREF(is_attr_obj);
1202 0 : Py_XDECREF(obj);
1203 0 : return result;
1204 : }
1205 : }
1206 :
1207 : static PyMethodDef fieldnameiter_methods[] = {
1208 : {NULL, NULL} /* sentinel */
1209 : };
1210 :
1211 : static PyTypeObject PyFieldNameIter_Type = {
1212 : PyVarObject_HEAD_INIT(&PyType_Type, 0)
1213 : "fieldnameiterator", /* tp_name */
1214 : sizeof(fieldnameiterobject), /* tp_basicsize */
1215 : 0, /* tp_itemsize */
1216 : /* methods */
1217 : (destructor)fieldnameiter_dealloc, /* tp_dealloc */
1218 : 0, /* tp_print */
1219 : 0, /* tp_getattr */
1220 : 0, /* tp_setattr */
1221 : 0, /* tp_reserved */
1222 : 0, /* tp_repr */
1223 : 0, /* tp_as_number */
1224 : 0, /* tp_as_sequence */
1225 : 0, /* tp_as_mapping */
1226 : 0, /* tp_hash */
1227 : 0, /* tp_call */
1228 : 0, /* tp_str */
1229 : PyObject_GenericGetAttr, /* tp_getattro */
1230 : 0, /* tp_setattro */
1231 : 0, /* tp_as_buffer */
1232 : Py_TPFLAGS_DEFAULT, /* tp_flags */
1233 : 0, /* tp_doc */
1234 : 0, /* tp_traverse */
1235 : 0, /* tp_clear */
1236 : 0, /* tp_richcompare */
1237 : 0, /* tp_weaklistoffset */
1238 : PyObject_SelfIter, /* tp_iter */
1239 : (iternextfunc)fieldnameiter_next, /* tp_iternext */
1240 : fieldnameiter_methods, /* tp_methods */
1241 : 0};
1242 :
1243 : /* unicode_formatter_field_name_split is used to implement
1244 : string.Formatter.vformat. it takes an PEP 3101 "field name", and
1245 : returns a tuple of (first, rest): "first", the part before the
1246 : first '.' or '['; and "rest", an iterator for the rest of the field
1247 : name. it's a wrapper around stringlib/string_format.h's
1248 : field_name_split. The iterator it returns is a
1249 : FieldNameIterator */
1250 : static PyObject *
1251 0 : formatter_field_name_split(PyObject *ignored, PyObject *self)
1252 : {
1253 : SubString first;
1254 : Py_ssize_t first_idx;
1255 : fieldnameiterobject *it;
1256 :
1257 0 : PyObject *first_obj = NULL;
1258 0 : PyObject *result = NULL;
1259 :
1260 0 : if (!PyUnicode_Check(self)) {
1261 0 : PyErr_Format(PyExc_TypeError, "expected str, got %s", Py_TYPE(self)->tp_name);
1262 0 : return NULL;
1263 : }
1264 :
1265 0 : if (PyUnicode_READY(self) == -1)
1266 0 : return NULL;
1267 :
1268 0 : it = PyObject_New(fieldnameiterobject, &PyFieldNameIter_Type);
1269 0 : if (it == NULL)
1270 0 : return NULL;
1271 :
1272 : /* take ownership, give the object to the iterator. this is
1273 : just to keep the field_name alive */
1274 0 : Py_INCREF(self);
1275 0 : it->str = self;
1276 :
1277 : /* Pass in auto_number = NULL. We'll return an empty string for
1278 : first_obj in that case. */
1279 0 : if (!field_name_split((PyObject*)self, 0, PyUnicode_GET_LENGTH(self),
1280 : &first, &first_idx, &it->it_field, NULL))
1281 0 : goto done;
1282 :
1283 : /* first becomes an integer, if possible; else a string */
1284 0 : if (first_idx != -1)
1285 0 : first_obj = PyLong_FromSsize_t(first_idx);
1286 : else
1287 : /* convert "first" into a string object */
1288 0 : first_obj = SubString_new_object(&first);
1289 0 : if (first_obj == NULL)
1290 0 : goto done;
1291 :
1292 : /* return a tuple of values */
1293 0 : result = PyTuple_Pack(2, first_obj, it);
1294 :
1295 : done:
1296 0 : Py_XDECREF(it);
1297 0 : Py_XDECREF(first_obj);
1298 0 : return result;
1299 : }
|