LCOV - code coverage report
Current view: top level - libreoffice/workdir/unxlngi6.pro/UnpackedTarball/python3/Objects/stringlib - codecs.h (source / functions) Hit Total Coverage
Test: libreoffice_filtered.info Lines: 75 251 29.9 %
Date: 2012-12-17 Functions: 4 14 28.6 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /* stringlib: codec implementations */
       2             : 
       3             : #if STRINGLIB_IS_UNICODE
       4             : 
       5             : /* Mask to check or force alignment of a pointer to C 'long' boundaries */
       6             : #define LONG_PTR_MASK (size_t) (SIZEOF_LONG - 1)
       7             : 
       8             : /* Mask to quickly check whether a C 'long' contains a
       9             :    non-ASCII, UTF8-encoded char. */
      10             : #if (SIZEOF_LONG == 8)
      11             : # define ASCII_CHAR_MASK 0x8080808080808080UL
      12             : #elif (SIZEOF_LONG == 4)
      13             : # define ASCII_CHAR_MASK 0x80808080UL
      14             : #else
      15             : # error C 'long' size should be either 4 or 8!
      16             : #endif
      17             : 
      18             : /* 10xxxxxx */
      19             : #define IS_CONTINUATION_BYTE(ch) ((ch) >= 0x80 && (ch) < 0xC0)
      20             : 
      21             : Py_LOCAL_INLINE(Py_UCS4)
      22         163 : STRINGLIB(utf8_decode)(const char **inptr, const char *end,
      23             :                        STRINGLIB_CHAR *dest,
      24             :                        Py_ssize_t *outpos)
      25             : {
      26             :     Py_UCS4 ch;
      27         163 :     const char *s = *inptr;
      28         163 :     const char *aligned_end = (const char *) ((size_t) end & ~LONG_PTR_MASK);
      29         163 :     STRINGLIB_CHAR *p = dest + *outpos;
      30             : 
      31        1054 :     while (s < end) {
      32         822 :         ch = (unsigned char)*s;
      33             : 
      34         822 :         if (ch < 0x80) {
      35             :             /* Fast path for runs of ASCII characters. Given that common UTF-8
      36             :                input will consist of an overwhelming majority of ASCII
      37             :                characters, we try to optimize for this case by checking
      38             :                as many characters as a C 'long' can contain.
      39             :                First, check if we can do an aligned read, as most CPUs have
      40             :                a penalty for unaligned reads.
      41             :             */
      42         544 :             if (!((size_t) s & LONG_PTR_MASK)) {
      43             :                 /* Help register allocation */
      44         187 :                 register const char *_s = s;
      45         187 :                 register STRINGLIB_CHAR *_p = p;
      46        4788 :                 while (_s < aligned_end) {
      47             :                     /* Read a whole long at a time (either 4 or 8 bytes),
      48             :                        and do a fast unrolled copy if it only contains ASCII
      49             :                        characters. */
      50        4551 :                     unsigned long value = *(unsigned long *) _s;
      51        4551 :                     if (value & ASCII_CHAR_MASK)
      52         137 :                         break;
      53             : #ifdef BYTEORDER_IS_LITTLE_ENDIAN
      54        4414 :                     _p[0] = (STRINGLIB_CHAR)(value & 0xFFu);
      55        4414 :                     _p[1] = (STRINGLIB_CHAR)((value >> 8) & 0xFFu);
      56        4414 :                     _p[2] = (STRINGLIB_CHAR)((value >> 16) & 0xFFu);
      57        4414 :                     _p[3] = (STRINGLIB_CHAR)((value >> 24) & 0xFFu);
      58             : # if SIZEOF_LONG == 8
      59             :                     _p[4] = (STRINGLIB_CHAR)((value >> 32) & 0xFFu);
      60             :                     _p[5] = (STRINGLIB_CHAR)((value >> 40) & 0xFFu);
      61             :                     _p[6] = (STRINGLIB_CHAR)((value >> 48) & 0xFFu);
      62             :                     _p[7] = (STRINGLIB_CHAR)((value >> 56) & 0xFFu);
      63             : # endif
      64             : #else
      65             : # if SIZEOF_LONG == 8
      66             :                     _p[0] = (STRINGLIB_CHAR)((value >> 56) & 0xFFu);
      67             :                     _p[1] = (STRINGLIB_CHAR)((value >> 48) & 0xFFu);
      68             :                     _p[2] = (STRINGLIB_CHAR)((value >> 40) & 0xFFu);
      69             :                     _p[3] = (STRINGLIB_CHAR)((value >> 32) & 0xFFu);
      70             :                     _p[4] = (STRINGLIB_CHAR)((value >> 24) & 0xFFu);
      71             :                     _p[5] = (STRINGLIB_CHAR)((value >> 16) & 0xFFu);
      72             :                     _p[6] = (STRINGLIB_CHAR)((value >> 8) & 0xFFu);
      73             :                     _p[7] = (STRINGLIB_CHAR)(value & 0xFFu);
      74             : # else
      75             :                     _p[0] = (STRINGLIB_CHAR)((value >> 24) & 0xFFu);
      76             :                     _p[1] = (STRINGLIB_CHAR)((value >> 16) & 0xFFu);
      77             :                     _p[2] = (STRINGLIB_CHAR)((value >> 8) & 0xFFu);
      78             :                     _p[3] = (STRINGLIB_CHAR)(value & 0xFFu);
      79             : # endif
      80             : #endif
      81        4414 :                     _s += SIZEOF_LONG;
      82        4414 :                     _p += SIZEOF_LONG;
      83             :                 }
      84         187 :                 s = _s;
      85         187 :                 p = _p;
      86         187 :                 if (s == end)
      87           7 :                     break;
      88         180 :                 ch = (unsigned char)*s;
      89             :             }
      90         537 :             if (ch < 0x80) {
      91         505 :                 s++;
      92         505 :                 *p++ = ch;
      93         505 :                 continue;
      94             :             }
      95             :         }
      96             : 
      97         310 :         if (ch < 0xC2) {
      98             :             /* invalid sequence
      99             :                \x80-\xBF -- continuation byte
     100             :                \xC0-\xC1 -- fake 0000-007F */
     101           0 :             goto InvalidStart;
     102             :         }
     103             : 
     104         310 :         if (ch < 0xE0) {
     105             :             /* \xC2\x80-\xDF\xBF -- 0080-07FF */
     106             :             Py_UCS4 ch2;
     107         102 :             if (end - s < 2) {
     108             :                 /* unexpected end of data: the caller will decide whether
     109             :                    it's an error or not */
     110           0 :                 break;
     111             :             }
     112         102 :             ch2 = (unsigned char)s[1];
     113         102 :             if (!IS_CONTINUATION_BYTE(ch2))
     114             :                 /* invalid continuation byte */
     115             :                 goto InvalidContinuation;
     116         102 :             ch = (ch << 6) + ch2 -
     117             :                  ((0xC0 << 6) + 0x80);
     118             :             assert ((ch > 0x007F) && (ch <= 0x07FF));
     119         102 :             s += 2;
     120           4 :             if (STRINGLIB_MAX_CHAR <= 0x007F ||
     121             :                 (STRINGLIB_MAX_CHAR < 0x07FF && ch > STRINGLIB_MAX_CHAR))
     122          12 :                 goto Overflow;
     123          90 :             *p++ = ch;
     124          90 :             continue;
     125             :         }
     126             : 
     127         208 :         if (ch < 0xF0) {
     128             :             /* \xE0\xA0\x80-\xEF\xBF\xBF -- 0800-FFFF */
     129             :             Py_UCS4 ch2, ch3;
     130         208 :             if (end - s < 3) {
     131             :                 /* unexpected end of data: the caller will decide whether
     132             :                    it's an error or not */
     133           0 :                 break;
     134             :             }
     135         208 :             ch2 = (unsigned char)s[1];
     136         208 :             ch3 = (unsigned char)s[2];
     137         208 :             if (!IS_CONTINUATION_BYTE(ch2) ||
     138         208 :                 !IS_CONTINUATION_BYTE(ch3)) {
     139             :                 /* invalid continuation byte */
     140             :                 goto InvalidContinuation;
     141             :             }
     142         208 :             if (ch == 0xE0) {
     143           0 :                 if (ch2 < 0xA0)
     144             :                     /* invalid sequence
     145             :                        \xE0\x80\x80-\xE0\x9F\xBF -- fake 0000-0800 */
     146           0 :                     goto InvalidContinuation;
     147             :             }
     148         208 :             else if (ch == 0xED && ch2 > 0x9F) {
     149             :                 /* Decoding UTF-8 sequences in range \xED\xA0\x80-\xED\xBF\xBF
     150             :                    will result in surrogates in range D800-DFFF. Surrogates are
     151             :                    not valid UTF-8 so they are rejected.
     152             :                    See http://www.unicode.org/versions/Unicode5.2.0/ch03.pdf
     153             :                    (table 3-7) and http://www.rfc-editor.org/rfc/rfc3629.txt */
     154           0 :                 goto InvalidContinuation;
     155             :             }
     156         208 :             ch = (ch << 12) + (ch2 << 6) + ch3 -
     157             :                  ((0xE0 << 12) + (0x80 << 6) + 0x80);
     158             :             assert ((ch > 0x07FF) && (ch <= 0xFFFF));
     159         208 :             s += 3;
     160             :             if (STRINGLIB_MAX_CHAR <= 0x07FF ||
     161             :                 (STRINGLIB_MAX_CHAR < 0xFFFF && ch > STRINGLIB_MAX_CHAR))
     162          75 :                 goto Overflow;
     163         133 :             *p++ = ch;
     164         133 :             continue;
     165             :         }
     166             : 
     167           0 :         if (ch < 0xF5) {
     168             :             /* \xF0\x90\x80\x80-\xF4\x8F\xBF\xBF -- 10000-10FFFF */
     169             :             Py_UCS4 ch2, ch3, ch4;
     170           0 :             if (end - s < 4) {
     171             :                 /* unexpected end of data: the caller will decide whether
     172             :                    it's an error or not */
     173           0 :                 break;
     174             :             }
     175           0 :             ch2 = (unsigned char)s[1];
     176           0 :             ch3 = (unsigned char)s[2];
     177           0 :             ch4 = (unsigned char)s[3];
     178           0 :             if (!IS_CONTINUATION_BYTE(ch2) ||
     179           0 :                 !IS_CONTINUATION_BYTE(ch3) ||
     180           0 :                 !IS_CONTINUATION_BYTE(ch4)) {
     181             :                 /* invalid continuation byte */
     182             :                 goto InvalidContinuation;
     183             :             }
     184           0 :             if (ch == 0xF0) {
     185           0 :                 if (ch2 < 0x90)
     186             :                     /* invalid sequence
     187             :                        \xF0\x80\x80\x80-\xF0\x80\xBF\xBF -- fake 0000-FFFF */
     188           0 :                     goto InvalidContinuation;
     189             :             }
     190           0 :             else if (ch == 0xF4 && ch2 > 0x8F) {
     191             :                 /* invalid sequence
     192             :                    \xF4\x90\x80\80- -- 110000- overflow */
     193           0 :                 goto InvalidContinuation;
     194             :             }
     195           0 :             ch = (ch << 18) + (ch2 << 12) + (ch3 << 6) + ch4 -
     196             :                  ((0xF0 << 18) + (0x80 << 12) + (0x80 << 6) + 0x80);
     197             :             assert ((ch > 0xFFFF) && (ch <= 0x10FFFF));
     198           0 :             s += 4;
     199             :             if (STRINGLIB_MAX_CHAR <= 0xFFFF ||
     200             :                 (STRINGLIB_MAX_CHAR < 0x10FFFF && ch > STRINGLIB_MAX_CHAR))
     201           0 :                 goto Overflow;
     202           0 :             *p++ = ch;
     203           0 :             continue;
     204             :         }
     205           0 :         goto InvalidStart;
     206             :     }
     207          76 :     ch = 0;
     208             : Overflow:
     209             : Return:
     210         163 :     *inptr = s;
     211         163 :     *outpos = p - dest;
     212         326 :     return ch;
     213             : InvalidStart:
     214           0 :     ch = 1;
     215           0 :     goto Return;
     216             : InvalidContinuation:
     217           0 :     ch = 2;
     218           0 :     goto Return;
     219             : }
     220             : 
     221             : #undef ASCII_CHAR_MASK
     222             : #undef IS_CONTINUATION_BYTE
     223             : 
     224             : 
     225             : /* UTF-8 encoder specialized for a Unicode kind to avoid the slow
     226             :    PyUnicode_READ() macro. Delete some parts of the code depending on the kind:
     227             :    UCS-1 strings don't need to handle surrogates for example. */
     228             : Py_LOCAL_INLINE(PyObject *)
     229           1 : STRINGLIB(utf8_encoder)(PyObject *unicode,
     230             :                         STRINGLIB_CHAR *data,
     231             :                         Py_ssize_t size,
     232             :                         const char *errors)
     233             : {
     234             : #define MAX_SHORT_UNICHARS 300  /* largest size we'll do on the stack */
     235             : 
     236             :     Py_ssize_t i;                /* index into s of next input byte */
     237             :     PyObject *result;            /* result string object */
     238             :     char *p;                     /* next free byte in output buffer */
     239             :     Py_ssize_t nallocated;      /* number of result bytes allocated */
     240             :     Py_ssize_t nneeded;            /* number of result bytes needed */
     241             : #if STRINGLIB_SIZEOF_CHAR > 1
     242           0 :     PyObject *errorHandler = NULL;
     243           0 :     PyObject *exc = NULL;
     244           0 :     PyObject *rep = NULL;
     245             : #endif
     246             : #if STRINGLIB_SIZEOF_CHAR == 1
     247           1 :     const Py_ssize_t max_char_size = 2;
     248             :     char stackbuf[MAX_SHORT_UNICHARS * 2];
     249             : #elif STRINGLIB_SIZEOF_CHAR == 2
     250           0 :     const Py_ssize_t max_char_size = 3;
     251             :     char stackbuf[MAX_SHORT_UNICHARS * 3];
     252             : #else /*  STRINGLIB_SIZEOF_CHAR == 4 */
     253           0 :     const Py_ssize_t max_char_size = 4;
     254             :     char stackbuf[MAX_SHORT_UNICHARS * 4];
     255             : #endif
     256             : 
     257             :     assert(size >= 0);
     258             : 
     259           1 :     if (size <= MAX_SHORT_UNICHARS) {
     260             :         /* Write into the stack buffer; nallocated can't overflow.
     261             :          * At the end, we'll allocate exactly as much heap space as it
     262             :          * turns out we need.
     263             :          */
     264           0 :         nallocated = Py_SAFE_DOWNCAST(sizeof(stackbuf), size_t, int);
     265           0 :         result = NULL;   /* will allocate after we're done */
     266           0 :         p = stackbuf;
     267             :     }
     268             :     else {
     269           1 :         if (size > PY_SSIZE_T_MAX / max_char_size) {
     270             :             /* integer overflow */
     271           0 :             return PyErr_NoMemory();
     272             :         }
     273             :         /* Overallocate on the heap, and give the excess back at the end. */
     274           1 :         nallocated = size * max_char_size;
     275           1 :         result = PyBytes_FromStringAndSize(NULL, nallocated);
     276           1 :         if (result == NULL)
     277           0 :             return NULL;
     278           1 :         p = PyBytes_AS_STRING(result);
     279             :     }
     280             : 
     281        6520 :     for (i = 0; i < size;) {
     282        6518 :         Py_UCS4 ch = data[i++];
     283             : 
     284        6518 :         if (ch < 0x80) {
     285             :             /* Encode ASCII */
     286        6515 :             *p++ = (char) ch;
     287             : 
     288             :         }
     289             :         else
     290             : #if STRINGLIB_SIZEOF_CHAR > 1
     291           0 :         if (ch < 0x0800)
     292             : #endif
     293             :         {
     294             :             /* Encode Latin-1 */
     295           3 :             *p++ = (char)(0xc0 | (ch >> 6));
     296           3 :             *p++ = (char)(0x80 | (ch & 0x3f));
     297             :         }
     298             : #if STRINGLIB_SIZEOF_CHAR > 1
     299           0 :         else if (Py_UNICODE_IS_SURROGATE(ch)) {
     300             :             Py_ssize_t newpos;
     301             :             Py_ssize_t repsize, k, startpos;
     302           0 :             startpos = i-1;
     303           0 :             rep = unicode_encode_call_errorhandler(
     304             :                   errors, &errorHandler, "utf-8", "surrogates not allowed",
     305             :                   unicode, &exc, startpos, startpos+1, &newpos);
     306           0 :             if (!rep)
     307             :                 goto error;
     308             : 
     309           0 :             if (PyBytes_Check(rep))
     310           0 :                 repsize = PyBytes_GET_SIZE(rep);
     311             :             else
     312           0 :                 repsize = PyUnicode_GET_LENGTH(rep);
     313             : 
     314           0 :             if (repsize > max_char_size) {
     315             :                 Py_ssize_t offset;
     316             : 
     317           0 :                 if (result == NULL)
     318           0 :                     offset = p - stackbuf;
     319             :                 else
     320           0 :                     offset = p - PyBytes_AS_STRING(result);
     321             : 
     322           0 :                 if (nallocated > PY_SSIZE_T_MAX - repsize + max_char_size) {
     323             :                     /* integer overflow */
     324           0 :                     PyErr_NoMemory();
     325             :                     goto error;
     326             :                 }
     327           0 :                 nallocated += repsize - max_char_size;
     328           0 :                 if (result != NULL) {
     329           0 :                     if (_PyBytes_Resize(&result, nallocated) < 0)
     330             :                         goto error;
     331             :                 } else {
     332           0 :                     result = PyBytes_FromStringAndSize(NULL, nallocated);
     333           0 :                     if (result == NULL)
     334             :                         goto error;
     335           0 :                     Py_MEMCPY(PyBytes_AS_STRING(result), stackbuf, offset);
     336             :                 }
     337           0 :                 p = PyBytes_AS_STRING(result) + offset;
     338             :             }
     339             : 
     340           0 :             if (PyBytes_Check(rep)) {
     341           0 :                 char *prep = PyBytes_AS_STRING(rep);
     342           0 :                 for(k = repsize; k > 0; k--)
     343           0 :                     *p++ = *prep++;
     344             :             } else /* rep is unicode */ {
     345             :                 enum PyUnicode_Kind repkind;
     346             :                 void *repdata;
     347             : 
     348           0 :                 if (PyUnicode_READY(rep) < 0)
     349             :                     goto error;
     350           0 :                 repkind = PyUnicode_KIND(rep);
     351           0 :                 repdata = PyUnicode_DATA(rep);
     352             : 
     353           0 :                 for(k=0; k<repsize; k++) {
     354           0 :                     Py_UCS4 c = PyUnicode_READ(repkind, repdata, k);
     355           0 :                     if (0x80 <= c) {
     356           0 :                         raise_encode_exception(&exc, "utf-8",
     357             :                                                unicode,
     358             :                                                i-1, i,
     359             :                                                "surrogates not allowed");
     360             :                         goto error;
     361             :                     }
     362           0 :                     *p++ = (char)c;
     363             :                 }
     364             :             }
     365           0 :             Py_CLEAR(rep);
     366             :         }
     367             :         else
     368             : #if STRINGLIB_SIZEOF_CHAR > 2
     369           0 :         if (ch < 0x10000)
     370             : #endif
     371             :         {
     372           0 :             *p++ = (char)(0xe0 | (ch >> 12));
     373           0 :             *p++ = (char)(0x80 | ((ch >> 6) & 0x3f));
     374           0 :             *p++ = (char)(0x80 | (ch & 0x3f));
     375             :         }
     376             : #if STRINGLIB_SIZEOF_CHAR > 2
     377             :         else /* ch >= 0x10000 */
     378             :         {
     379             :             assert(ch <= MAX_UNICODE);
     380             :             /* Encode UCS4 Unicode ordinals */
     381           0 :             *p++ = (char)(0xf0 | (ch >> 18));
     382           0 :             *p++ = (char)(0x80 | ((ch >> 12) & 0x3f));
     383           0 :             *p++ = (char)(0x80 | ((ch >> 6) & 0x3f));
     384           0 :             *p++ = (char)(0x80 | (ch & 0x3f));
     385             :         }
     386             : #endif /* STRINGLIB_SIZEOF_CHAR > 2 */
     387             : #endif /* STRINGLIB_SIZEOF_CHAR > 1 */
     388             :     }
     389             : 
     390           1 :     if (result == NULL) {
     391             :         /* This was stack allocated. */
     392           0 :         nneeded = p - stackbuf;
     393             :         assert(nneeded <= nallocated);
     394           0 :         result = PyBytes_FromStringAndSize(stackbuf, nneeded);
     395             :     }
     396             :     else {
     397             :         /* Cut back to size actually needed. */
     398           1 :         nneeded = p - PyBytes_AS_STRING(result);
     399             :         assert(nneeded <= nallocated);
     400           1 :         _PyBytes_Resize(&result, nneeded);
     401             :     }
     402             : 
     403             : #if STRINGLIB_SIZEOF_CHAR > 1
     404           0 :     Py_XDECREF(errorHandler);
     405           0 :     Py_XDECREF(exc);
     406             : #endif
     407           1 :     return result;
     408             : 
     409             : #if STRINGLIB_SIZEOF_CHAR > 1
     410             :  error:
     411           0 :     Py_XDECREF(rep);
     412           0 :     Py_XDECREF(errorHandler);
     413           0 :     Py_XDECREF(exc);
     414           0 :     Py_XDECREF(result);
     415           0 :     return NULL;
     416             : #endif
     417             : 
     418             : #undef MAX_SHORT_UNICHARS
     419             : }
     420             : 
     421             : /* The pattern for constructing UCS2-repeated masks. */
     422             : #if SIZEOF_LONG == 8
     423             : # define UCS2_REPEAT_MASK 0x0001000100010001ul
     424             : #elif SIZEOF_LONG == 4
     425             : # define UCS2_REPEAT_MASK 0x00010001ul
     426             : #else
     427             : # error C 'long' size should be either 4 or 8!
     428             : #endif
     429             : 
     430             : /* The mask for fast checking. */
     431             : #if STRINGLIB_SIZEOF_CHAR == 1
     432             : /* The mask for fast checking of whether a C 'long' contains a
     433             :    non-ASCII or non-Latin1 UTF16-encoded characters. */
     434             : # define FAST_CHAR_MASK         (UCS2_REPEAT_MASK * (0xFFFFu & ~STRINGLIB_MAX_CHAR))
     435             : #else
     436             : /* The mask for fast checking of whether a C 'long' may contain
     437             :    UTF16-encoded surrogate characters. This is an efficient heuristic,
     438             :    assuming that non-surrogate characters with a code point >= 0x8000 are
     439             :    rare in most input.
     440             : */
     441             : # define FAST_CHAR_MASK         (UCS2_REPEAT_MASK * 0x8000u)
     442             : #endif
     443             : /* The mask for fast byte-swapping. */
     444             : #define STRIPPED_MASK           (UCS2_REPEAT_MASK * 0x00FFu)
     445             : /* Swap bytes. */
     446             : #define SWAB(value)             ((((value) >> 8) & STRIPPED_MASK) | \
     447             :                                  (((value) & STRIPPED_MASK) << 8))
     448             : 
     449             : Py_LOCAL_INLINE(Py_UCS4)
     450           0 : STRINGLIB(utf16_decode)(const unsigned char **inptr, const unsigned char *e,
     451             :                         STRINGLIB_CHAR *dest, Py_ssize_t *outpos,
     452             :                         int native_ordering)
     453             : {
     454             :     Py_UCS4 ch;
     455           0 :     const unsigned char *aligned_end =
     456           0 :             (const unsigned char *) ((size_t) e & ~LONG_PTR_MASK);
     457           0 :     const unsigned char *q = *inptr;
     458           0 :     STRINGLIB_CHAR *p = dest + *outpos;
     459             :     /* Offsets from q for retrieving byte pairs in the right order. */
     460             : #ifdef BYTEORDER_IS_LITTLE_ENDIAN
     461           0 :     int ihi = !!native_ordering, ilo = !native_ordering;
     462             : #else
     463             :     int ihi = !native_ordering, ilo = !!native_ordering;
     464             : #endif
     465           0 :     --e;
     466             : 
     467           0 :     while (q < e) {
     468             :         Py_UCS4 ch2;
     469             :         /* First check for possible aligned read of a C 'long'. Unaligned
     470             :            reads are more expensive, better to defer to another iteration. */
     471           0 :         if (!((size_t) q & LONG_PTR_MASK)) {
     472             :             /* Fast path for runs of in-range non-surrogate chars. */
     473           0 :             register const unsigned char *_q = q;
     474           0 :             while (_q < aligned_end) {
     475           0 :                 unsigned long block = * (unsigned long *) _q;
     476           0 :                 if (native_ordering) {
     477             :                     /* Can use buffer directly */
     478           0 :                     if (block & FAST_CHAR_MASK)
     479           0 :                         break;
     480             :                 }
     481             :                 else {
     482             :                     /* Need to byte-swap */
     483           0 :                     if (block & SWAB(FAST_CHAR_MASK))
     484           0 :                         break;
     485             : #if STRINGLIB_SIZEOF_CHAR == 1
     486           0 :                     block >>= 8;
     487             : #else
     488           0 :                     block = SWAB(block);
     489             : #endif
     490             :                 }
     491             : #ifdef BYTEORDER_IS_LITTLE_ENDIAN
     492             : # if SIZEOF_LONG == 4
     493           0 :                 p[0] = (STRINGLIB_CHAR)(block & 0xFFFFu);
     494           0 :                 p[1] = (STRINGLIB_CHAR)(block >> 16);
     495             : # elif SIZEOF_LONG == 8
     496             :                 p[0] = (STRINGLIB_CHAR)(block & 0xFFFFu);
     497             :                 p[1] = (STRINGLIB_CHAR)((block >> 16) & 0xFFFFu);
     498             :                 p[2] = (STRINGLIB_CHAR)((block >> 32) & 0xFFFFu);
     499             :                 p[3] = (STRINGLIB_CHAR)(block >> 48);
     500             : # endif
     501             : #else
     502             : # if SIZEOF_LONG == 4
     503             :                 p[0] = (STRINGLIB_CHAR)(block >> 16);
     504             :                 p[1] = (STRINGLIB_CHAR)(block & 0xFFFFu);
     505             : # elif SIZEOF_LONG == 8
     506             :                 p[0] = (STRINGLIB_CHAR)(block >> 48);
     507             :                 p[1] = (STRINGLIB_CHAR)((block >> 32) & 0xFFFFu);
     508             :                 p[2] = (STRINGLIB_CHAR)((block >> 16) & 0xFFFFu);
     509             :                 p[3] = (STRINGLIB_CHAR)(block & 0xFFFFu);
     510             : # endif
     511             : #endif
     512           0 :                 _q += SIZEOF_LONG;
     513           0 :                 p += SIZEOF_LONG / 2;
     514             :             }
     515           0 :             q = _q;
     516           0 :             if (q >= e)
     517           0 :                 break;
     518             :         }
     519             : 
     520           0 :         ch = (q[ihi] << 8) | q[ilo];
     521           0 :         q += 2;
     522           0 :         if (!Py_UNICODE_IS_SURROGATE(ch)) {
     523             : #if STRINGLIB_SIZEOF_CHAR < 2
     524           0 :             if (ch > STRINGLIB_MAX_CHAR)
     525             :                 /* Out-of-range */
     526           0 :                 goto Return;
     527             : #endif
     528           0 :             *p++ = (STRINGLIB_CHAR)ch;
     529           0 :             continue;
     530             :         }
     531             : 
     532             :         /* UTF-16 code pair: */
     533           0 :         if (q >= e)
     534           0 :             goto UnexpectedEnd;
     535           0 :         if (!Py_UNICODE_IS_HIGH_SURROGATE(ch))
     536             :             goto IllegalEncoding;
     537           0 :         ch2 = (q[ihi] << 8) | q[ilo];
     538           0 :         q += 2;
     539           0 :         if (!Py_UNICODE_IS_LOW_SURROGATE(ch2))
     540             :             goto IllegalSurrogate;
     541           0 :         ch = Py_UNICODE_JOIN_SURROGATES(ch, ch2);
     542             : #if STRINGLIB_SIZEOF_CHAR < 4
     543             :         /* Out-of-range */
     544           0 :         goto Return;
     545             : #else
     546           0 :         *p++ = (STRINGLIB_CHAR)ch;
     547             : #endif
     548             :     }
     549           0 :     ch = 0;
     550             : Return:
     551           0 :     *inptr = q;
     552           0 :     *outpos = p - dest;
     553           0 :     return ch;
     554             : UnexpectedEnd:
     555           0 :     ch = 1;
     556           0 :     goto Return;
     557             : IllegalEncoding:
     558           0 :     ch = 2;
     559           0 :     goto Return;
     560             : IllegalSurrogate:
     561           0 :     ch = 3;
     562           0 :     goto Return;
     563             : }
     564             : #undef UCS2_REPEAT_MASK
     565             : #undef FAST_CHAR_MASK
     566             : #undef STRIPPED_MASK
     567             : #undef SWAB
     568             : #undef LONG_PTR_MASK
     569             : 
     570             : 
     571             : Py_LOCAL_INLINE(void)
     572           0 : STRINGLIB(utf16_encode)(unsigned short *out,
     573             :                         const STRINGLIB_CHAR *in,
     574             :                         Py_ssize_t len,
     575             :                         int native_ordering)
     576             : {
     577           0 :     const STRINGLIB_CHAR *end = in + len;
     578             : #if STRINGLIB_SIZEOF_CHAR == 1
     579             : # define SWAB2(CH)  ((CH) << 8)
     580             : #else
     581             : # define SWAB2(CH)  (((CH) << 8) | ((CH) >> 8))
     582             : #endif
     583             : #if STRINGLIB_MAX_CHAR < 0x10000
     584           0 :     if (native_ordering) {
     585             : # if STRINGLIB_SIZEOF_CHAR == 2
     586           0 :         Py_MEMCPY(out, in, 2 * len);
     587             : # else
     588           0 :         _PyUnicode_CONVERT_BYTES(STRINGLIB_CHAR, unsigned short, in, end, out);
     589             : # endif
     590             :     } else {
     591           0 :         const STRINGLIB_CHAR *unrolled_end = in + (len & ~ (Py_ssize_t) 3);
     592           0 :         while (in < unrolled_end) {
     593           0 :             out[0] = SWAB2(in[0]);
     594           0 :             out[1] = SWAB2(in[1]);
     595           0 :             out[2] = SWAB2(in[2]);
     596           0 :             out[3] = SWAB2(in[3]);
     597           0 :             in += 4; out += 4;
     598             :         }
     599           0 :         while (in < end) {
     600           0 :             *out++ = SWAB2(*in);
     601           0 :             ++in;
     602             :         }
     603             :     }
     604             : #else
     605           0 :     if (native_ordering) {
     606           0 :         while (in < end) {
     607           0 :             Py_UCS4 ch = *in++;
     608           0 :             if (ch < 0x10000)
     609           0 :                 *out++ = ch;
     610             :             else {
     611           0 :                 out[0] = Py_UNICODE_HIGH_SURROGATE(ch);
     612           0 :                 out[1] = Py_UNICODE_LOW_SURROGATE(ch);
     613           0 :                 out += 2;
     614             :             }
     615             :         }
     616             :     } else {
     617           0 :         while (in < end) {
     618           0 :             Py_UCS4 ch = *in++;
     619           0 :             if (ch < 0x10000)
     620           0 :                 *out++ = SWAB2((Py_UCS2)ch);
     621             :             else {
     622           0 :                 Py_UCS2 ch1 = Py_UNICODE_HIGH_SURROGATE(ch);
     623           0 :                 Py_UCS2 ch2 = Py_UNICODE_LOW_SURROGATE(ch);
     624           0 :                 out[0] = SWAB2(ch1);
     625           0 :                 out[1] = SWAB2(ch2);
     626           0 :                 out += 2;
     627             :             }
     628             :         }
     629             :     }
     630             : #endif
     631             : #undef SWAB2
     632           0 : }
     633             : #endif /* STRINGLIB_IS_UNICODE */

Generated by: LCOV version 1.10