LCOV - code coverage report
Current view: top level - libreoffice/workdir/unxlngi6.pro/UnpackedTarball/python3/Objects - unicodectype.c (source / functions) Hit Total Coverage
Test: libreoffice_filtered.info Lines: 34 104 32.7 %
Date: 2012-12-17 Functions: 10 22 45.5 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /*
       2             :    Unicode character type helpers.
       3             : 
       4             :    Written by Marc-Andre Lemburg (mal@lemburg.com).
       5             :    Modified for Python 2.0 by Fredrik Lundh (fredrik@pythonware.com)
       6             : 
       7             :    Copyright (c) Corporation for National Research Initiatives.
       8             : 
       9             : */
      10             : 
      11             : #include "Python.h"
      12             : 
      13             : #define ALPHA_MASK 0x01
      14             : #define DECIMAL_MASK 0x02
      15             : #define DIGIT_MASK 0x04
      16             : #define LOWER_MASK 0x08
      17             : #define LINEBREAK_MASK 0x10
      18             : #define SPACE_MASK 0x20
      19             : #define TITLE_MASK 0x40
      20             : #define UPPER_MASK 0x80
      21             : #define XID_START_MASK 0x100
      22             : #define XID_CONTINUE_MASK 0x200
      23             : #define PRINTABLE_MASK 0x400
      24             : #define NUMERIC_MASK 0x800
      25             : #define CASE_IGNORABLE_MASK 0x1000
      26             : #define CASED_MASK 0x2000
      27             : #define EXTENDED_CASE_MASK 0x4000
      28             : 
      29             : typedef struct {
      30             :     /* 
      31             :        These are either deltas to the character or offsets in
      32             :        _PyUnicode_ExtendedCase.
      33             :     */
      34             :     const int upper;
      35             :     const int lower;
      36             :     const int title;
      37             :     /* Note if more flag space is needed, decimal and digit could be unified. */
      38             :     const unsigned char decimal;
      39             :     const unsigned char digit;
      40             :     const unsigned short flags;
      41             : } _PyUnicode_TypeRecord;
      42             : 
      43             : #include "unicodetype_db.h"
      44             : 
      45             : static const _PyUnicode_TypeRecord *
      46       15599 : gettyperecord(Py_UCS4 code)
      47             : {
      48             :     int index;
      49             : 
      50       15599 :     if (code >= 0x110000)
      51           0 :         index = 0;
      52             :     else
      53             :     {
      54       15599 :         index = index1[(code>>SHIFT)];
      55       15599 :         index = index2[(index<<SHIFT)+(code&((1<<SHIFT)-1))];
      56             :     }
      57             : 
      58       15599 :     return &_PyUnicode_TypeRecords[index];
      59             : }
      60             : 
      61             : /* Returns the titlecase Unicode characters corresponding to ch or just
      62             :    ch if no titlecase mapping is known. */
      63             : 
      64           0 : Py_UCS4 _PyUnicode_ToTitlecase(register Py_UCS4 ch)
      65             : {
      66           0 :     const _PyUnicode_TypeRecord *ctype = gettyperecord(ch);
      67             : 
      68           0 :     if (ctype->flags & EXTENDED_CASE_MASK)
      69           0 :         return _PyUnicode_ExtendedCase[ctype->title & 0xFFFF];
      70           0 :     return ch + ctype->title;
      71             : }
      72             : 
      73             : /* Returns 1 for Unicode characters having the category 'Lt', 0
      74             :    otherwise. */
      75             : 
      76           0 : int _PyUnicode_IsTitlecase(Py_UCS4 ch)
      77             : {
      78           0 :     const _PyUnicode_TypeRecord *ctype = gettyperecord(ch);
      79             : 
      80           0 :     return (ctype->flags & TITLE_MASK) != 0;
      81             : }
      82             : 
      83             : /* Returns 1 for Unicode characters having the XID_Start property, 0
      84             :    otherwise. */
      85             : 
      86           9 : int _PyUnicode_IsXidStart(Py_UCS4 ch)
      87             : {
      88           9 :     const _PyUnicode_TypeRecord *ctype = gettyperecord(ch);
      89             : 
      90           9 :     return (ctype->flags & XID_START_MASK) != 0;
      91             : }
      92             : 
      93             : /* Returns 1 for Unicode characters having the XID_Continue property,
      94             :    0 otherwise. */
      95             : 
      96          51 : int _PyUnicode_IsXidContinue(Py_UCS4 ch)
      97             : {
      98          51 :     const _PyUnicode_TypeRecord *ctype = gettyperecord(ch);
      99             : 
     100          51 :     return (ctype->flags & XID_CONTINUE_MASK) != 0;
     101             : }
     102             : 
     103             : /* Returns the integer decimal (0-9) for Unicode characters having
     104             :    this property, -1 otherwise. */
     105             : 
     106        3372 : int _PyUnicode_ToDecimalDigit(Py_UCS4 ch)
     107             : {
     108        3372 :     const _PyUnicode_TypeRecord *ctype = gettyperecord(ch);
     109             : 
     110        3372 :     return (ctype->flags & DECIMAL_MASK) ? ctype->decimal : -1;
     111             : }
     112             : 
     113        3256 : int _PyUnicode_IsDecimalDigit(Py_UCS4 ch)
     114             : {
     115        3256 :     if (_PyUnicode_ToDecimalDigit(ch) < 0)
     116        2473 :         return 0;
     117         783 :     return 1;
     118             : }
     119             : 
     120             : /* Returns the integer digit (0-9) for Unicode characters having
     121             :    this property, -1 otherwise. */
     122             : 
     123        1408 : int _PyUnicode_ToDigit(Py_UCS4 ch)
     124             : {
     125        1408 :     const _PyUnicode_TypeRecord *ctype = gettyperecord(ch);
     126             : 
     127        1408 :     return (ctype->flags & DIGIT_MASK) ? ctype->digit : -1;
     128             : }
     129             : 
     130        1408 : int _PyUnicode_IsDigit(Py_UCS4 ch)
     131             : {
     132        1408 :     if (_PyUnicode_ToDigit(ch) < 0)
     133        1408 :         return 0;
     134           0 :     return 1;
     135             : }
     136             : 
     137             : /* Returns the numeric value as double for Unicode characters having
     138             :    this property, -1.0 otherwise. */
     139             : 
     140        1397 : int _PyUnicode_IsNumeric(Py_UCS4 ch)
     141             : {
     142        1397 :     const _PyUnicode_TypeRecord *ctype = gettyperecord(ch);
     143             : 
     144        1397 :     return (ctype->flags & NUMERIC_MASK) != 0;
     145             : }
     146             : 
     147             : /* Returns 1 for Unicode characters to be hex-escaped when repr()ed,
     148             :    0 otherwise.
     149             :    All characters except those characters defined in the Unicode character
     150             :    database as following categories are considered printable.
     151             :       * Cc (Other, Control)
     152             :       * Cf (Other, Format)
     153             :       * Cs (Other, Surrogate)
     154             :       * Co (Other, Private Use)
     155             :       * Cn (Other, Not Assigned)
     156             :       * Zl Separator, Line ('\u2028', LINE SEPARATOR)
     157             :       * Zp Separator, Paragraph ('\u2029', PARAGRAPH SEPARATOR)
     158             :       * Zs (Separator, Space) other than ASCII space('\x20').
     159             : */
     160           0 : int _PyUnicode_IsPrintable(Py_UCS4 ch)
     161             : {
     162           0 :     const _PyUnicode_TypeRecord *ctype = gettyperecord(ch);
     163             : 
     164           0 :     return (ctype->flags & PRINTABLE_MASK) != 0;
     165             : }
     166             : 
     167             : /* Returns 1 for Unicode characters having the category 'Ll', 0
     168             :    otherwise. */
     169             : 
     170           0 : int _PyUnicode_IsLowercase(Py_UCS4 ch)
     171             : {
     172           0 :     const _PyUnicode_TypeRecord *ctype = gettyperecord(ch);
     173             : 
     174           0 :     return (ctype->flags & LOWER_MASK) != 0;
     175             : }
     176             : 
     177             : /* Returns 1 for Unicode characters having the category 'Lu', 0
     178             :    otherwise. */
     179             : 
     180           0 : int _PyUnicode_IsUppercase(Py_UCS4 ch)
     181             : {
     182           0 :     const _PyUnicode_TypeRecord *ctype = gettyperecord(ch);
     183             : 
     184           0 :     return (ctype->flags & UPPER_MASK) != 0;
     185             : }
     186             : 
     187             : /* Returns the uppercase Unicode characters corresponding to ch or just
     188             :    ch if no uppercase mapping is known. */
     189             : 
     190           0 : Py_UCS4 _PyUnicode_ToUppercase(Py_UCS4 ch)
     191             : {
     192           0 :     const _PyUnicode_TypeRecord *ctype = gettyperecord(ch);
     193             : 
     194           0 :     if (ctype->flags & EXTENDED_CASE_MASK)
     195           0 :         return _PyUnicode_ExtendedCase[ctype->upper & 0xFFFF];
     196           0 :     return ch + ctype->upper;
     197             : }
     198             : 
     199             : /* Returns the lowercase Unicode characters corresponding to ch or just
     200             :    ch if no lowercase mapping is known. */
     201             : 
     202         269 : Py_UCS4 _PyUnicode_ToLowercase(Py_UCS4 ch)
     203             : {
     204         269 :     const _PyUnicode_TypeRecord *ctype = gettyperecord(ch);
     205             : 
     206         269 :     if (ctype->flags & EXTENDED_CASE_MASK)
     207           0 :         return _PyUnicode_ExtendedCase[ctype->lower & 0xFFFF];
     208         269 :     return ch + ctype->lower;
     209             : }
     210             : 
     211           0 : int _PyUnicode_ToLowerFull(Py_UCS4 ch, Py_UCS4 *res)
     212             : {
     213           0 :     const _PyUnicode_TypeRecord *ctype = gettyperecord(ch);
     214             : 
     215           0 :     if (ctype->flags & EXTENDED_CASE_MASK) {
     216           0 :         int index = ctype->lower & 0xFFFF;
     217           0 :         int n = ctype->lower >> 24;
     218             :         int i;
     219           0 :         for (i = 0; i < n; i++)
     220           0 :             res[i] = _PyUnicode_ExtendedCase[index + i];
     221           0 :         return n;
     222             :     }
     223           0 :     res[0] = ch + ctype->lower;
     224           0 :     return 1;
     225             : }
     226             : 
     227           0 : int _PyUnicode_ToTitleFull(Py_UCS4 ch, Py_UCS4 *res)
     228             : {
     229           0 :     const _PyUnicode_TypeRecord *ctype = gettyperecord(ch);
     230             : 
     231           0 :     if (ctype->flags & EXTENDED_CASE_MASK) {
     232           0 :         int index = ctype->title & 0xFFFF;
     233           0 :         int n = ctype->title >> 24;
     234             :         int i;
     235           0 :         for (i = 0; i < n; i++)
     236           0 :             res[i] = _PyUnicode_ExtendedCase[index + i];
     237           0 :         return n;
     238             :     }
     239           0 :     res[0] = ch + ctype->title;
     240           0 :     return 1;
     241             : }
     242             : 
     243           0 : int _PyUnicode_ToUpperFull(Py_UCS4 ch, Py_UCS4 *res)
     244             : {
     245           0 :     const _PyUnicode_TypeRecord *ctype = gettyperecord(ch);
     246             : 
     247           0 :     if (ctype->flags & EXTENDED_CASE_MASK) {
     248           0 :         int index = ctype->upper & 0xFFFF;
     249           0 :         int n = ctype->upper >> 24;
     250             :         int i;
     251           0 :         for (i = 0; i < n; i++)
     252           0 :             res[i] = _PyUnicode_ExtendedCase[index + i];
     253           0 :         return n;
     254             :     }
     255           0 :     res[0] = ch + ctype->upper;
     256           0 :     return 1;
     257             : }
     258             : 
     259           0 : int _PyUnicode_ToFoldedFull(Py_UCS4 ch, Py_UCS4 *res)
     260             : {
     261           0 :     const _PyUnicode_TypeRecord *ctype = gettyperecord(ch);
     262             : 
     263           0 :     if (ctype->flags & EXTENDED_CASE_MASK && (ctype->lower >> 20) & 7) {
     264           0 :         int index = (ctype->lower & 0xFFFF) + (ctype->lower >> 24);
     265           0 :         int n = (ctype->lower >> 20) & 7;
     266             :         int i;
     267           0 :         for (i = 0; i < n; i++)
     268           0 :             res[i] = _PyUnicode_ExtendedCase[index + i];
     269           0 :         return n;
     270             :     }
     271           0 :     return _PyUnicode_ToLowerFull(ch, res);
     272             : }
     273             : 
     274           0 : int _PyUnicode_IsCased(Py_UCS4 ch)
     275             : {
     276           0 :     const _PyUnicode_TypeRecord *ctype = gettyperecord(ch);
     277             : 
     278           0 :     return (ctype->flags & CASED_MASK) != 0;
     279             : }
     280             : 
     281           0 : int _PyUnicode_IsCaseIgnorable(Py_UCS4 ch)
     282             : {
     283           0 :     const _PyUnicode_TypeRecord *ctype = gettyperecord(ch);
     284             : 
     285           0 :     return (ctype->flags & CASE_IGNORABLE_MASK) != 0;
     286             : }
     287             : 
     288             : /* Returns 1 for Unicode characters having the category 'Ll', 'Lu', 'Lt',
     289             :    'Lo' or 'Lm',  0 otherwise. */
     290             : 
     291        9093 : int _PyUnicode_IsAlpha(Py_UCS4 ch)
     292             : {
     293        9093 :     const _PyUnicode_TypeRecord *ctype = gettyperecord(ch);
     294             : 
     295        9093 :     return (ctype->flags & ALPHA_MASK) != 0;
     296             : }
     297             : 

Generated by: LCOV version 1.10