| File: | workdir/unxlngi6.pro/CustomTarget/ucpp/source/lexer.c |
| Location: | line 966, column 37 |
| Description: | Branch condition evaluates to a garbage value |
| 1 | /* | |||
| 2 | * (c) Thomas Pornin 1999 - 2002 | |||
| 3 | * | |||
| 4 | * Redistribution and use in source and binary forms, with or without | |||
| 5 | * modification, are permitted provided that the following conditions | |||
| 6 | * are met: | |||
| 7 | * 1. Redistributions of source code must retain the above copyright | |||
| 8 | * notice, this list of conditions and the following disclaimer. | |||
| 9 | * 2. Redistributions in binary form must reproduce the above copyright | |||
| 10 | * notice, this list of conditions and the following disclaimer in the | |||
| 11 | * documentation and/or other materials provided with the distribution. | |||
| 12 | * 4. The name of the authors may not be used to endorse or promote | |||
| 13 | * products derived from this software without specific prior written | |||
| 14 | * permission. | |||
| 15 | * | |||
| 16 | * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR | |||
| 17 | * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED | |||
| 18 | * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |||
| 19 | * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE | |||
| 20 | * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR | |||
| 21 | * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT | |||
| 22 | * OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR | |||
| 23 | * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, | |||
| 24 | * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE | |||
| 25 | * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, | |||
| 26 | * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
| 27 | * | |||
| 28 | */ | |||
| 29 | ||||
| 30 | #include "tune.h" | |||
| 31 | #include <stdio.h> | |||
| 32 | #include <string.h> | |||
| 33 | #include <stddef.h> | |||
| 34 | #include <limits.h> | |||
| 35 | #include "ucppi.h" | |||
| 36 | #include "mem.h" | |||
| 37 | #ifdef UCPP_MMAP | |||
| 38 | #include <unistd.h> | |||
| 39 | #include <sys/types.h> | |||
| 40 | #include <sys/mman.h> | |||
| 41 | #endif | |||
| 42 | ||||
| 43 | /* | |||
| 44 | * Character classes for description of the automaton. | |||
| 45 | * The characters used for representing classes should not appear | |||
| 46 | * explicitely in an automaton rule. | |||
| 47 | */ | |||
| 48 | #define SPC' ' ' ' /* whitespace characters */ | |||
| 49 | #define ALP'Z' 'Z' /* A-Z, a-z, _ */ | |||
| 50 | #define NUM'9' '9' /* 0-9 */ | |||
| 51 | #define ANY'Y' 'Y' /* any character */ | |||
| 52 | #define VCH'F' 'F' /* void character (for end of input) */ | |||
| 53 | ||||
| 54 | /* | |||
| 55 | * flags and macros to test those flags | |||
| 56 | * STO: the currently read string is a complete token | |||
| 57 | * PUT: the currently read character must be added to the string | |||
| 58 | * FRZ: the currently read character must be kept and read again | |||
| 59 | */ | |||
| 60 | #define MOD_MK255 255 | |||
| 61 | #define noMOD(x)((x) & 255) ((x) & 255) | |||
| 62 | #define STO(x)((x) | 256) ((x) | 256) | |||
| 63 | #define ttSTO(x)((x) & 256) ((x) & 256) | |||
| 64 | #define FRZ(x)((x) | 512) ((x) | 512) | |||
| 65 | #define ttFRZ(x)((x) & 512) ((x) & 512) | |||
| 66 | #define PUT(x)((x) | 1024) ((x) | 1024) | |||
| 67 | #define ttPUT(x)((x) & 1024) ((x) & 1024) | |||
| 68 | ||||
| 69 | /* order is important */ | |||
| 70 | enum { | |||
| 71 | S_START, S_SPACE, S_BANG, S_STRING, S_STRING2, S_COLON, | |||
| 72 | S_SHARP, S_PCT, S_PCT2, S_PCT3, S_AMPER, S_CHAR, S_CHAR2, S_STAR, | |||
| 73 | S_PLUS, S_MINUS, S_DOT, S_DOT2, S_SLASH, S_NUMBER, S_NUMBER2, S_LT, | |||
| 74 | S_LT2, S_EQ, S_GT, S_GT2, S_CIRC, S_PIPE, S_BACKSLASH, | |||
| 75 | S_COMMENT, S_COMMENT2, S_COMMENT3, S_COMMENT4, S_COMMENT5, | |||
| 76 | S_NAME, S_NAME_BS, S_LCHAR, | |||
| 77 | MSTATE, | |||
| 78 | S_ILL, S_DDOT, S_DDSHARP, S_BS, S_ROGUE_BS, S_BEHEAD, S_DECAY, | |||
| 79 | S_TRUNC, S_TRUNCC, S_OUCH | |||
| 80 | }; | |||
| 81 | ||||
| 82 | #define CMT(x)((x) >= S_COMMENT && (x) <= S_COMMENT5) ((x) >= S_COMMENT && (x) <= S_COMMENT5) | |||
| 83 | ||||
| 84 | #define CMCR2 2 | |||
| 85 | ||||
| 86 | /* | |||
| 87 | * This is the description of the automaton. It is not used "as is" | |||
| 88 | * but copied at execution time into a table. | |||
| 89 | * | |||
| 90 | * To my utmost displeasure, there are a few hacks in read_token() | |||
| 91 | * (which uses the transformed automaton) about the special handling | |||
| 92 | * of slashes, sharps, and the letter L. | |||
| 93 | */ | |||
| 94 | static struct machine_state { | |||
| 95 | int state; | |||
| 96 | unsigned char input[CMCR2]; | |||
| 97 | int new_state; | |||
| 98 | } cppms[] = { | |||
| 99 | /* S_START is the generic beginning state */ | |||
| 100 | { S_START, { ANY'Y' }, S_ILL }, | |||
| 101 | #ifdef SEMPER_FIDELIS | |||
| 102 | { S_START, { SPC' ' }, PUT(S_SPACE)((S_SPACE) | 1024) }, | |||
| 103 | #else | |||
| 104 | { S_START, { SPC' ' }, S_SPACE }, | |||
| 105 | #endif | |||
| 106 | { S_START, { '\n' }, STO(NEWLINE)((NEWLINE) | 256) }, | |||
| 107 | { S_START, { '!' }, S_BANG }, | |||
| 108 | { S_START, { '"' }, PUT(S_STRING)((S_STRING) | 1024) }, | |||
| 109 | { S_START, { '#' }, S_SHARP }, | |||
| 110 | { S_START, { '%' }, S_PCT }, | |||
| 111 | { S_START, { '&' }, S_AMPER }, | |||
| 112 | { S_START, { '\'' }, PUT(S_CHAR)((S_CHAR) | 1024) }, | |||
| 113 | { S_START, { '(' }, STO(LPAR)((LPAR) | 256) }, | |||
| 114 | { S_START, { ')' }, STO(RPAR)((RPAR) | 256) }, | |||
| 115 | { S_START, { '*' }, S_STAR }, | |||
| 116 | { S_START, { '+' }, S_PLUS }, | |||
| 117 | { S_START, { ',' }, STO(COMMA)((COMMA) | 256) }, | |||
| 118 | { S_START, { '-' }, S_MINUS }, | |||
| 119 | { S_START, { '.' }, PUT(S_DOT)((S_DOT) | 1024) }, | |||
| 120 | #ifdef SEMPER_FIDELIS | |||
| 121 | { S_START, { '/' }, PUT(S_SLASH)((S_SLASH) | 1024) }, | |||
| 122 | #else | |||
| 123 | { S_START, { '/' }, S_SLASH }, | |||
| 124 | #endif | |||
| 125 | { S_START, { NUM'9' }, PUT(S_NUMBER)((S_NUMBER) | 1024) }, | |||
| 126 | { S_START, { ':' }, S_COLON }, | |||
| 127 | { S_START, { ';' }, STO(SEMIC)((SEMIC) | 256) }, | |||
| 128 | { S_START, { '<' }, S_LT }, | |||
| 129 | { S_START, { '=' }, S_EQ }, | |||
| 130 | { S_START, { '>' }, S_GT }, | |||
| 131 | { S_START, { '?' }, STO(QUEST)((QUEST) | 256) }, | |||
| 132 | { S_START, { ALP'Z' }, PUT(S_NAME)((S_NAME) | 1024) }, | |||
| 133 | { S_START, { 'L' }, PUT(S_LCHAR)((S_LCHAR) | 1024) }, | |||
| 134 | { S_START, { '[' }, STO(LBRK)((LBRK) | 256) }, | |||
| 135 | { S_START, { ']' }, STO(RBRK)((RBRK) | 256) }, | |||
| 136 | { S_START, { '^' }, S_CIRC }, | |||
| 137 | { S_START, { '{' }, STO(LBRA)((LBRA) | 256) }, | |||
| 138 | { S_START, { '|' }, S_PIPE }, | |||
| 139 | { S_START, { '}' }, STO(RBRA)((RBRA) | 256) }, | |||
| 140 | { S_START, { '~' }, STO(NOT)((NOT) | 256) }, | |||
| 141 | { S_START, { '\\' }, S_BACKSLASH }, | |||
| 142 | ||||
| 143 | /* after a space */ | |||
| 144 | { S_SPACE, { ANY'Y' }, FRZ(STO(NONE))((((NONE) | 256)) | 512) }, | |||
| 145 | #ifdef SEMPER_FIDELIS | |||
| 146 | { S_SPACE, { SPC' ' }, PUT(S_SPACE)((S_SPACE) | 1024) }, | |||
| 147 | #else | |||
| 148 | { S_SPACE, { SPC' ' }, S_SPACE }, | |||
| 149 | #endif | |||
| 150 | ||||
| 151 | /* after a ! */ | |||
| 152 | { S_BANG, { ANY'Y' }, FRZ(STO(LNOT))((((LNOT) | 256)) | 512) }, | |||
| 153 | { S_BANG, { '=' }, STO(NEQ)((NEQ) | 256) }, | |||
| 154 | ||||
| 155 | /* after a " */ | |||
| 156 | { S_STRING, { ANY'Y' }, PUT(S_STRING)((S_STRING) | 1024) }, | |||
| 157 | { S_STRING, { VCH'F' }, FRZ(S_TRUNC)((S_TRUNC) | 512) }, | |||
| 158 | { S_STRING, { '\n' }, FRZ(S_BEHEAD)((S_BEHEAD) | 512) }, | |||
| 159 | { S_STRING, { '\\' }, PUT(S_STRING2)((S_STRING2) | 1024) }, | |||
| 160 | { S_STRING, { '"' }, PUT(STO(STRING))((((STRING) | 256)) | 1024) }, | |||
| 161 | ||||
| 162 | { S_STRING2, { ANY'Y' }, PUT(S_STRING)((S_STRING) | 1024) }, | |||
| 163 | { S_STRING2, { VCH'F' }, FRZ(S_TRUNC)((S_TRUNC) | 512) }, | |||
| 164 | ||||
| 165 | /* after a # */ | |||
| 166 | { S_SHARP, { ANY'Y' }, FRZ(STO(SHARP))((((SHARP) | 256)) | 512) }, | |||
| 167 | { S_SHARP, { '#' }, STO(DSHARP)((DSHARP) | 256) }, | |||
| 168 | ||||
| 169 | /* after a : */ | |||
| 170 | { S_COLON, { ANY'Y' }, FRZ(STO(COLON))((((COLON) | 256)) | 512) }, | |||
| 171 | { S_COLON, { '>' }, STO(DIG_RBRK)((DIG_RBRK) | 256) }, | |||
| 172 | ||||
| 173 | /* after a % */ | |||
| 174 | { S_PCT, { ANY'Y' }, FRZ(STO(PCT))((((PCT) | 256)) | 512) }, | |||
| 175 | { S_PCT, { '=' }, STO(ASPCT)((ASPCT) | 256) }, | |||
| 176 | { S_PCT, { '>' }, STO(DIG_RBRA)((DIG_RBRA) | 256) }, | |||
| 177 | { S_PCT, { ':' }, S_PCT2 }, | |||
| 178 | ||||
| 179 | /* after a %: */ | |||
| 180 | { S_PCT2, { ANY'Y' }, FRZ(STO(DIG_SHARP))((((DIG_SHARP) | 256)) | 512) }, | |||
| 181 | { S_PCT2, { '%' }, S_PCT3 }, | |||
| 182 | ||||
| 183 | /* after a %:% */ | |||
| 184 | { S_PCT3, { ANY'Y' }, FRZ(S_DDSHARP)((S_DDSHARP) | 512) }, | |||
| 185 | { S_PCT3, { ':' }, STO(DIG_DSHARP)((DIG_DSHARP) | 256) }, | |||
| 186 | ||||
| 187 | /* after a & */ | |||
| 188 | { S_AMPER, { ANY'Y' }, FRZ(STO(AND))((((AND) | 256)) | 512) }, | |||
| 189 | { S_AMPER, { '=' }, STO(ASAND)((ASAND) | 256) }, | |||
| 190 | { S_AMPER, { '&' }, STO(LAND)((LAND) | 256) }, | |||
| 191 | ||||
| 192 | /* after a ' */ | |||
| 193 | { S_CHAR, { ANY'Y' }, PUT(S_CHAR)((S_CHAR) | 1024) }, | |||
| 194 | { S_CHAR, { VCH'F' }, FRZ(S_TRUNC)((S_TRUNC) | 512) }, | |||
| 195 | { S_CHAR, { '\'' }, PUT(STO(CHAR))((((CHAR) | 256)) | 1024) }, | |||
| 196 | { S_CHAR, { '\\' }, PUT(S_CHAR2)((S_CHAR2) | 1024) }, | |||
| 197 | ||||
| 198 | /* after a \ in a character constant | |||
| 199 | useful only for '\'' */ | |||
| 200 | { S_CHAR2, { ANY'Y' }, PUT(S_CHAR)((S_CHAR) | 1024) }, | |||
| 201 | { S_CHAR2, { VCH'F' }, FRZ(S_TRUNC)((S_TRUNC) | 512) }, | |||
| 202 | ||||
| 203 | /* after a * */ | |||
| 204 | { S_STAR, { ANY'Y' }, FRZ(STO(STAR))((((STAR) | 256)) | 512) }, | |||
| 205 | { S_STAR, { '=' }, STO(ASSTAR)((ASSTAR) | 256) }, | |||
| 206 | ||||
| 207 | /* after a + */ | |||
| 208 | { S_PLUS, { ANY'Y' }, FRZ(STO(PLUS))((((PLUS) | 256)) | 512) }, | |||
| 209 | { S_PLUS, { '+' }, STO(PPLUS)((PPLUS) | 256) }, | |||
| 210 | { S_PLUS, { '=' }, STO(ASPLUS)((ASPLUS) | 256) }, | |||
| 211 | ||||
| 212 | /* after a - */ | |||
| 213 | { S_MINUS, { ANY'Y' }, FRZ(STO(MINUS))((((MINUS) | 256)) | 512) }, | |||
| 214 | { S_MINUS, { '-' }, STO(MMINUS)((MMINUS) | 256) }, | |||
| 215 | { S_MINUS, { '=' }, STO(ASMINUS)((ASMINUS) | 256) }, | |||
| 216 | { S_MINUS, { '>' }, STO(ARROW)((ARROW) | 256) }, | |||
| 217 | ||||
| 218 | /* after a . */ | |||
| 219 | { S_DOT, { ANY'Y' }, FRZ(STO(DOT))((((DOT) | 256)) | 512) }, | |||
| 220 | { S_DOT, { NUM'9' }, PUT(S_NUMBER)((S_NUMBER) | 1024) }, | |||
| 221 | { S_DOT, { '.' }, S_DOT2 }, | |||
| 222 | ||||
| 223 | /* after .. */ | |||
| 224 | { S_DOT2, { ANY'Y' }, FRZ(S_DDOT)((S_DDOT) | 512) }, | |||
| 225 | { S_DOT2, { '.' }, STO(MDOTS)((MDOTS) | 256) }, | |||
| 226 | ||||
| 227 | /* after a / */ | |||
| 228 | { S_SLASH, { ANY'Y' }, FRZ(STO(SLASH))((((SLASH) | 256)) | 512) }, | |||
| 229 | { S_SLASH, { '=' }, STO(ASSLASH)((ASSLASH) | 256) }, | |||
| 230 | #ifdef SEMPER_FIDELIS | |||
| 231 | { S_SLASH, { '*' }, PUT(S_COMMENT)((S_COMMENT) | 1024) }, | |||
| 232 | { S_SLASH, { '/' }, PUT(S_COMMENT5)((S_COMMENT5) | 1024) }, | |||
| 233 | #else | |||
| 234 | { S_SLASH, { '*' }, S_COMMENT }, | |||
| 235 | { S_SLASH, { '/' }, S_COMMENT5 }, | |||
| 236 | #endif | |||
| 237 | /* | |||
| 238 | * There is a little hack in read_token() to disable | |||
| 239 | * this last rule, if C++ (C99) comments are not enabled. | |||
| 240 | */ | |||
| 241 | ||||
| 242 | /* after a number */ | |||
| 243 | { S_NUMBER, { ANY'Y' }, FRZ(STO(NUMBER))((((NUMBER) | 256)) | 512) }, | |||
| 244 | { S_NUMBER, { ALP'Z', NUM'9' }, PUT(S_NUMBER)((S_NUMBER) | 1024) }, | |||
| 245 | { S_NUMBER, { '.' }, PUT(S_NUMBER)((S_NUMBER) | 1024) }, | |||
| 246 | { S_NUMBER, { 'E', 'e' }, PUT(S_NUMBER2)((S_NUMBER2) | 1024) }, | |||
| 247 | { S_NUMBER, { 'P', 'p' }, PUT(S_NUMBER2)((S_NUMBER2) | 1024) }, | |||
| 248 | ||||
| 249 | { S_NUMBER2, { ANY'Y' }, FRZ(STO(NUMBER))((((NUMBER) | 256)) | 512) }, | |||
| 250 | { S_NUMBER2, { ALP'Z', NUM'9' }, PUT(S_NUMBER)((S_NUMBER) | 1024) }, | |||
| 251 | { S_NUMBER2, { '+', '-' }, PUT(S_NUMBER)((S_NUMBER) | 1024) }, | |||
| 252 | ||||
| 253 | /* after a < */ | |||
| 254 | { S_LT, { ANY'Y' }, FRZ(STO(LT))((((LT) | 256)) | 512) }, | |||
| 255 | { S_LT, { '=' }, STO(LEQ)((LEQ) | 256) }, | |||
| 256 | { S_LT, { '<' }, S_LT2 }, | |||
| 257 | { S_LT, { ':' }, STO(DIG_LBRK)((DIG_LBRK) | 256) }, | |||
| 258 | { S_LT, { '%' }, STO(DIG_LBRA)((DIG_LBRA) | 256) }, | |||
| 259 | ||||
| 260 | { S_LT2, { ANY'Y' }, FRZ(STO(LSH))((((LSH) | 256)) | 512) }, | |||
| 261 | { S_LT2, { '=' }, STO(ASLSH)((ASLSH) | 256) }, | |||
| 262 | ||||
| 263 | /* after a > */ | |||
| 264 | { S_GT, { ANY'Y' }, FRZ(STO(GT))((((GT) | 256)) | 512) }, | |||
| 265 | { S_GT, { '=' }, STO(GEQ)((GEQ) | 256) }, | |||
| 266 | { S_GT, { '>' }, S_GT2 }, | |||
| 267 | ||||
| 268 | { S_GT2, { ANY'Y' }, FRZ(STO(RSH))((((RSH) | 256)) | 512) }, | |||
| 269 | { S_GT2, { '=' }, STO(ASRSH)((ASRSH) | 256) }, | |||
| 270 | ||||
| 271 | /* after a = */ | |||
| 272 | { S_EQ, { ANY'Y' }, FRZ(STO(ASGN))((((ASGN) | 256)) | 512) }, | |||
| 273 | { S_EQ, { '=' }, STO(SAME)((SAME) | 256) }, | |||
| 274 | #ifdef CAST_OP | |||
| 275 | { S_EQ, { '>' }, STO(CAST)((CAST) | 256) }, | |||
| 276 | #endif | |||
| 277 | ||||
| 278 | /* after a \ */ | |||
| 279 | { S_BACKSLASH, { ANY'Y' }, FRZ(S_BS)((S_BS) | 512) }, | |||
| 280 | { S_BACKSLASH, { 'U', 'u' }, FRZ(S_NAME_BS)((S_NAME_BS) | 512) }, | |||
| 281 | ||||
| 282 | /* after a letter */ | |||
| 283 | { S_NAME, { ANY'Y' }, FRZ(STO(NAME))((((NAME) | 256)) | 512) }, | |||
| 284 | { S_NAME, { ALP'Z', NUM'9' }, PUT(S_NAME)((S_NAME) | 1024) }, | |||
| 285 | { S_NAME, { '\\' }, S_NAME_BS }, | |||
| 286 | ||||
| 287 | /* after a \ in an identifier */ | |||
| 288 | { S_NAME_BS, { ANY'Y' }, FRZ(S_ROGUE_BS)((S_ROGUE_BS) | 512) }, | |||
| 289 | { S_NAME_BS, { 'u', 'U' }, PUT(S_NAME)((S_NAME) | 1024) }, | |||
| 290 | ||||
| 291 | /* after a L */ | |||
| 292 | { S_LCHAR, { ANY'Y' }, FRZ(S_NAME)((S_NAME) | 512) }, | |||
| 293 | { S_LCHAR, { '"' }, PUT(S_STRING)((S_STRING) | 1024) }, | |||
| 294 | { S_LCHAR, { '\'' }, PUT(S_CHAR)((S_CHAR) | 1024) }, | |||
| 295 | ||||
| 296 | /* after a ^ */ | |||
| 297 | { S_CIRC, { ANY'Y' }, FRZ(STO(CIRC))((((CIRC) | 256)) | 512) }, | |||
| 298 | { S_CIRC, { '=' }, STO(ASCIRC)((ASCIRC) | 256) }, | |||
| 299 | ||||
| 300 | /* after a | */ | |||
| 301 | { S_PIPE, { ANY'Y' }, FRZ(STO(OR))((((OR) | 256)) | 512) }, | |||
| 302 | { S_PIPE, { '=' }, STO(ASOR)((ASOR) | 256) }, | |||
| 303 | { S_PIPE, { '|' }, STO(LOR)((LOR) | 256) }, | |||
| 304 | ||||
| 305 | /* after a / and * */ | |||
| 306 | #ifdef SEMPER_FIDELIS | |||
| 307 | { S_COMMENT, { ANY'Y' }, PUT(S_COMMENT)((S_COMMENT) | 1024) }, | |||
| 308 | { S_COMMENT, { VCH'F' }, FRZ(S_TRUNCC)((S_TRUNCC) | 512) }, | |||
| 309 | { S_COMMENT, { '*' }, PUT(S_COMMENT2)((S_COMMENT2) | 1024) }, | |||
| 310 | ||||
| 311 | { S_COMMENT2, { ANY'Y' }, FRZ(S_COMMENT)((S_COMMENT) | 512) }, | |||
| 312 | { S_COMMENT2, { VCH'F' }, FRZ(S_TRUNCC)((S_TRUNCC) | 512) }, | |||
| 313 | { S_COMMENT2, { '*' }, PUT(S_COMMENT2)((S_COMMENT2) | 1024) }, | |||
| 314 | { S_COMMENT2, { '/' }, STO(PUT(COMMENT))((((COMMENT) | 1024)) | 256) }, | |||
| 315 | ||||
| 316 | { S_COMMENT5, { ANY'Y' }, PUT(S_COMMENT5)((S_COMMENT5) | 1024) }, | |||
| 317 | { S_COMMENT5, { VCH'F' }, FRZ(S_DECAY)((S_DECAY) | 512) }, | |||
| 318 | { S_COMMENT5, { '\n' }, FRZ(STO(COMMENT))((((COMMENT) | 256)) | 512) }, | |||
| 319 | #else | |||
| 320 | { S_COMMENT, { ANY'Y' }, S_COMMENT }, | |||
| 321 | { S_COMMENT, { VCH'F' }, FRZ(S_TRUNCC)((S_TRUNCC) | 512) }, | |||
| 322 | { S_COMMENT, { '*' }, S_COMMENT2 }, | |||
| 323 | ||||
| 324 | { S_COMMENT2, { ANY'Y' }, FRZ(S_COMMENT)((S_COMMENT) | 512) }, | |||
| 325 | { S_COMMENT2, { VCH'F' }, FRZ(S_TRUNCC)((S_TRUNCC) | 512) }, | |||
| 326 | { S_COMMENT2, { '*' }, S_COMMENT2 }, | |||
| 327 | { S_COMMENT2, { '/' }, STO(COMMENT)((COMMENT) | 256) }, | |||
| 328 | ||||
| 329 | { S_COMMENT5, { ANY'Y' }, S_COMMENT5 }, | |||
| 330 | { S_COMMENT5, { VCH'F' }, FRZ(S_DECAY)((S_DECAY) | 512) }, | |||
| 331 | { S_COMMENT5, { '\n' }, FRZ(STO(COMMENT))((((COMMENT) | 256)) | 512) }, | |||
| 332 | #endif | |||
| 333 | ||||
| 334 | /* dummy end of machine description */ | |||
| 335 | { 0, { 0 }, 0 } | |||
| 336 | }; | |||
| 337 | ||||
| 338 | /* | |||
| 339 | * cppm is the table used to store the automaton: if we are in state s | |||
| 340 | * and we read character c, we apply the action cppm[s][c] (jumping to | |||
| 341 | * another state, or emitting a token). | |||
| 342 | * cppm_vch is the table for the special virtual character "end of input" | |||
| 343 | */ | |||
| 344 | static int cppm[MSTATE][MAX_CHAR_VAL128]; | |||
| 345 | static int cppm_vch[MSTATE]; | |||
| 346 | ||||
| 347 | /* | |||
| 348 | * init_cppm() fills cppm[][] with the information stored in cppms[]. | |||
| 349 | * It must be called before beginning the lexing process. | |||
| 350 | */ | |||
| 351 | void init_cppmucpp_init_cppm(void) | |||
| 352 | { | |||
| 353 | int i, j, k, c; | |||
| 354 | static unsigned char upper[] = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"; | |||
| 355 | static unsigned char lower[] = "abcdefghijklmnopqrstuvwxyz"; | |||
| 356 | unsigned char *cp; | |||
| 357 | ||||
| 358 | for (i = 0; i < MSTATE; i ++) { | |||
| 359 | for (j = 0; j < MAX_CHAR_VAL128; j ++) cppm[i][j] = S_OUCH; | |||
| 360 | cppm_vch[i] = S_OUCH; | |||
| 361 | } | |||
| 362 | for (i = 0; cppms[i].input[0]; i ++) for (k = 0; k < CMCR2; k ++) { | |||
| 363 | int s = cppms[i].state; | |||
| 364 | int ns = cppms[i].new_state; | |||
| 365 | ||||
| 366 | switch (c = cppms[i].input[k]) { | |||
| 367 | case 0: | |||
| 368 | break; | |||
| 369 | case SPC' ': | |||
| 370 | /* see space_char() also */ | |||
| 371 | cppm[s][' '] = ns; | |||
| 372 | cppm[s]['\t'] = ns; | |||
| 373 | cppm[s]['\v'] = ns; | |||
| 374 | cppm[s]['\f'] = ns; | |||
| 375 | #ifdef UNBREAKABLE_SPACE | |||
| 376 | if (MAX_CHAR_VAL128 > UNBREAKABLE_SPACE) | |||
| 377 | cppm[s][UNBREAKABLE_SPACE] = ns; | |||
| 378 | #endif | |||
| 379 | break; | |||
| 380 | case ALP'Z': | |||
| 381 | for (cp = upper; *cp; cp ++) cppm[s][(int)*cp] = ns; | |||
| 382 | for (cp = lower; *cp; cp ++) cppm[s][(int)*cp] = ns; | |||
| 383 | cppm[s]['_'] = ns; | |||
| 384 | break; | |||
| 385 | case NUM'9': | |||
| 386 | for (j = '0'; j <= '9'; j ++) cppm[s][j] = ns; | |||
| 387 | break; | |||
| 388 | case ANY'Y': | |||
| 389 | for (j = 0; j < MAX_CHAR_VAL128; j ++) cppm[s][j] = ns; | |||
| 390 | cppm_vch[s] = ns; | |||
| 391 | break; | |||
| 392 | case VCH'F': | |||
| 393 | cppm_vch[s] = ns; | |||
| 394 | break; | |||
| 395 | default: | |||
| 396 | cppm[s][c] = ns; | |||
| 397 | break; | |||
| 398 | } | |||
| 399 | } | |||
| 400 | } | |||
| 401 | ||||
| 402 | /* | |||
| 403 | * Make some character as equivalent to a letter for identifiers. | |||
| 404 | */ | |||
| 405 | void set_identifier_char(int c) | |||
| 406 | { | |||
| 407 | cppm[S_START][c] = PUT(S_NAME)((S_NAME) | 1024); | |||
| 408 | cppm[S_NAME][c] = PUT(S_NAME)((S_NAME) | 1024); | |||
| 409 | } | |||
| 410 | ||||
| 411 | /* | |||
| 412 | * Remove the "identifier" status from a character. | |||
| 413 | */ | |||
| 414 | void unset_identifier_char(int c) | |||
| 415 | { | |||
| 416 | cppm[S_START][c] = S_ILL; | |||
| 417 | cppm[S_NAME][c] = FRZ(STO(NAME))((((NAME) | 256)) | 512); | |||
| 418 | } | |||
| 419 | ||||
| 420 | int space_charucpp_space_char(int c) | |||
| 421 | { | |||
| 422 | if (c == ' ' || c == '\t' || c == '\v' || c == '\f' | |||
| 423 | #ifdef UNBREAKABLE_SPACE | |||
| 424 | || c == UNBREAKABLE_SPACE | |||
| 425 | #endif | |||
| 426 | ) return 1; | |||
| 427 | return 0; | |||
| 428 | } | |||
| 429 | ||||
| 430 | #ifndef NO_UCPP_BUF1 | |||
| 431 | /* | |||
| 432 | * our output buffer is full, flush it | |||
| 433 | */ | |||
| 434 | void flush_output(struct lexer_state *ls) | |||
| 435 | { | |||
| 436 | size_t x = ls->sbuf, y = 0, z; | |||
| 437 | ||||
| 438 | if (ls->sbuf == 0) return; | |||
| 439 | do { | |||
| 440 | z = fwrite(ls->output_buf + y, 1, x, ls->output); | |||
| 441 | x -= z; | |||
| 442 | y += z; | |||
| 443 | } while (z && x > 0); | |||
| 444 | if (!y) { | |||
| 445 | errorucpp_error(ls->line, "could not flush output (disk full ?)"); | |||
| 446 | die(); | |||
| 447 | } | |||
| 448 | ls->sbuf = 0; | |||
| 449 | } | |||
| 450 | #endif | |||
| 451 | ||||
| 452 | /* | |||
| 453 | * Output one character; flush the buffer if needed. | |||
| 454 | * This function should not be called, except by put_char(). | |||
| 455 | */ | |||
| 456 | static inline void write_char(struct lexer_state *ls, unsigned char c) | |||
| 457 | { | |||
| 458 | #ifndef NO_UCPP_BUF1 | |||
| 459 | ls->output_buf[ls->sbuf ++] = c; | |||
| 460 | if (ls->sbuf == OUTPUT_BUF_MEMG8192) flush_output(ls); | |||
| 461 | #else | |||
| 462 | if (putc((int)c, ls->output)_IO_putc ((int)c, ls->output) == EOF(-1)) { | |||
| 463 | errorucpp_error(ls->line, "output write error (disk full ?)"); | |||
| 464 | die(); | |||
| 465 | } | |||
| 466 | #endif | |||
| 467 | if (c == '\n') { | |||
| 468 | ls->oline ++; | |||
| 469 | } | |||
| 470 | } | |||
| 471 | ||||
| 472 | /* | |||
| 473 | * schedule a character for output | |||
| 474 | */ | |||
| 475 | void put_charucpp_put_char(struct lexer_state *ls, unsigned char c) | |||
| 476 | { | |||
| 477 | if (ls->flags & KEEP_OUTPUT0x020000UL) write_char(ls, c); | |||
| 478 | } | |||
| 479 | ||||
| 480 | /* | |||
| 481 | * get next raw input character | |||
| 482 | */ | |||
| 483 | static inline int read_char(struct lexer_state *ls) | |||
| 484 | { | |||
| 485 | unsigned char c; | |||
| 486 | ||||
| 487 | if (!ls->input) { | |||
| 488 | return ((ls->pbuf ++) < ls->ebuf) ? | |||
| 489 | ls->input_string[ls->pbuf - 1] : -1; | |||
| 490 | } | |||
| 491 | while (1) { | |||
| 492 | #ifndef NO_UCPP_BUF1 | |||
| 493 | if (ls->pbuf == ls->ebuf) { | |||
| 494 | #ifdef UCPP_MMAP | |||
| 495 | if (ls->from_mmap) { | |||
| 496 | munmap((void *)ls->input_buf, ls->ebuf); | |||
| 497 | ls->from_mmap = 0; | |||
| 498 | ls->input_buf = ls->input_buf_sav; | |||
| 499 | } | |||
| 500 | #endif | |||
| 501 | ls->ebuf = fread(ls->input_buf, 1, | |||
| 502 | INPUT_BUF_MEMG8192, ls->input); | |||
| 503 | ls->pbuf = 0; | |||
| 504 | } | |||
| 505 | if (ls->ebuf == 0) return -1; | |||
| 506 | c = ls->input_buf[ls->pbuf ++]; | |||
| 507 | #else | |||
| 508 | int x = getc(ls->input)_IO_getc (ls->input); | |||
| 509 | ||||
| 510 | if (x == EOF(-1)) return -1; | |||
| 511 | c = x; | |||
| 512 | #endif | |||
| 513 | if (ls->flags & COPY_LINE0x040000UL) { | |||
| 514 | if (c == '\n') { | |||
| 515 | ls->copy_line[ls->cli] = 0; | |||
| 516 | ls->cli = 0; | |||
| 517 | } else if (ls->cli < (COPY_LINE_LENGTH80 - 1)) { | |||
| 518 | ls->copy_line[ls->cli ++] = c; | |||
| 519 | } | |||
| 520 | } | |||
| 521 | if (ls->macfile && c == '\n') { | |||
| 522 | ls->macfile = 0; | |||
| 523 | continue; | |||
| 524 | } | |||
| 525 | ls->macfile = 0; | |||
| 526 | if (c == '\r') { | |||
| 527 | /* | |||
| 528 | * We found a '\r'; we handle it as a newline | |||
| 529 | * and ignore the next newline. This should work | |||
| 530 | * with all combinations of Msdos, MacIntosh and | |||
| 531 | * Unix files on these three platforms. On other | |||
| 532 | * platforms, native file formats are always | |||
| 533 | * supported. | |||
| 534 | */ | |||
| 535 | ls->macfile = 1; | |||
| 536 | c = '\n'; | |||
| 537 | } | |||
| 538 | break; | |||
| 539 | } | |||
| 540 | return c; | |||
| 541 | } | |||
| 542 | ||||
| 543 | /* | |||
| 544 | * next_fifo_char(), char_lka1() and char_lka2() give a two character | |||
| 545 | * look-ahead on the input stream; this is needed for trigraphs | |||
| 546 | */ | |||
| 547 | static inline int next_fifo_char(struct lexer_state *ls) | |||
| 548 | { | |||
| 549 | int c; | |||
| 550 | ||||
| 551 | if (ls->nlka != 0) { | |||
| 552 | c = ls->lka[0]; | |||
| 553 | ls->lka[0] = ls->lka[1]; | |||
| 554 | ls->nlka --; | |||
| 555 | } else c = read_char(ls); | |||
| 556 | return c; | |||
| 557 | } | |||
| 558 | ||||
| 559 | static inline int char_lka1(struct lexer_state *ls) | |||
| 560 | { | |||
| 561 | if (ls->nlka == 0) { | |||
| 562 | ls->lka[0] = read_char(ls); | |||
| 563 | ls->nlka ++; | |||
| 564 | } | |||
| 565 | return ls->lka[0]; | |||
| 566 | } | |||
| 567 | ||||
| 568 | static inline int char_lka2(struct lexer_state *ls) | |||
| 569 | { | |||
| 570 | #ifdef AUDIT | |||
| 571 | if (ls->nlka == 0) ouchucpp_ouch("always in motion future is"); | |||
| 572 | #endif | |||
| 573 | if (ls->nlka == 1) { | |||
| 574 | ls->lka[1] = read_char(ls); | |||
| 575 | ls->nlka ++; | |||
| 576 | } | |||
| 577 | return ls->lka[1]; | |||
| 578 | } | |||
| 579 | ||||
| 580 | static struct trigraph { | |||
| 581 | int old, new; | |||
| 582 | } trig[9] = { | |||
| 583 | { '=', '#' }, | |||
| 584 | { '/', '\\' }, | |||
| 585 | { '\'', '^' }, | |||
| 586 | { '(', '[' }, | |||
| 587 | { ')', ']' }, | |||
| 588 | { '!', '|' }, | |||
| 589 | { '<', '{' }, | |||
| 590 | { '>', '}' }, | |||
| 591 | { '-', '~' } | |||
| 592 | }; | |||
| 593 | ||||
| 594 | /* | |||
| 595 | * Returns the next character, after treatment of trigraphs and terminating | |||
| 596 | * backslashes. Return value is -1 if there is no more input. | |||
| 597 | */ | |||
| 598 | static inline int next_char(struct lexer_state *ls) | |||
| 599 | { | |||
| 600 | int c; | |||
| 601 | ||||
| 602 | if (!ls->discard) return ls->last; | |||
| 603 | ls->discard = 0; | |||
| 604 | do { | |||
| 605 | c = next_fifo_char(ls); | |||
| 606 | /* check trigraphs */ | |||
| 607 | if (c == '?' && char_lka1(ls) == '?' | |||
| 608 | && (ls->flags & HANDLE_TRIGRAPHS0x008000UL)) { | |||
| 609 | int i, d; | |||
| 610 | ||||
| 611 | d = char_lka2(ls); | |||
| 612 | for (i = 0; i < 9; i ++) if (d == trig[i].old) { | |||
| 613 | if (ls->flags & WARN_TRIGRAPHS0x000004UL) { | |||
| 614 | ls->count_trigraphs ++; | |||
| 615 | } | |||
| 616 | if (ls->flags & WARN_TRIGRAPHS_MORE0x000008UL) { | |||
| 617 | warningucpp_warning(ls->line, "trigraph ?""?%c " | |||
| 618 | "encountered", d); | |||
| 619 | } | |||
| 620 | next_fifo_char(ls); | |||
| 621 | next_fifo_char(ls); | |||
| 622 | c = trig[i].new; | |||
| 623 | break; | |||
| 624 | } | |||
| 625 | } | |||
| 626 | if (c == '\\' && char_lka1(ls) == '\n') { | |||
| 627 | ls->line ++; | |||
| 628 | next_fifo_char(ls); | |||
| 629 | } else if (c == '\r' && char_lka1(ls) == '\n') { | |||
| 630 | ls->line ++; | |||
| 631 | next_fifo_char(ls); | |||
| 632 | c = '\n'; | |||
| 633 | return c; | |||
| 634 | } else { | |||
| 635 | ls->last = c; | |||
| 636 | return c; | |||
| 637 | } | |||
| 638 | } while (1); | |||
| 639 | } | |||
| 640 | ||||
| 641 | /* | |||
| 642 | * wrapper for next_char(), to be called from outside | |||
| 643 | * (used by #error, #include directives) | |||
| 644 | */ | |||
| 645 | int grap_charucpp_grap_char(struct lexer_state *ls) | |||
| 646 | { | |||
| 647 | return next_char(ls); | |||
| 648 | } | |||
| 649 | ||||
| 650 | /* | |||
| 651 | * Discard the current character, so that the next call to next_char() | |||
| 652 | * will step into the input stream. | |||
| 653 | */ | |||
| 654 | void discard_charucpp_discard_char(struct lexer_state *ls) | |||
| 655 | { | |||
| 656 | #ifdef AUDIT | |||
| 657 | if (ls->discard) ouchucpp_ouch("overcollecting garbage"); | |||
| 658 | #endif | |||
| 659 | ls->discard = 1; | |||
| 660 | ls->utf8 = 0; | |||
| 661 | if (ls->last == '\n') ls->line ++; | |||
| 662 | } | |||
| 663 | ||||
| 664 | /* | |||
| 665 | * Convert an UTF-8 encoded character to a Universal Character Name | |||
| 666 | * using \u (or \U when appropriate). | |||
| 667 | */ | |||
| 668 | static int utf8_to_string(unsigned char buf[], unsigned long utf8) | |||
| 669 | { | |||
| 670 | unsigned long val = 0; | |||
| 671 | static char hex[16] = "0123456789abcdef"; | |||
| 672 | ||||
| 673 | if (utf8 & 0x80UL) { | |||
| 674 | unsigned long x1, x2, x3, x4; | |||
| 675 | ||||
| 676 | x1 = (utf8 >> 24) & 0x7fUL; | |||
| 677 | x2 = (utf8 >> 16) & 0x7fUL; | |||
| 678 | x3 = (utf8 >> 8) & 0x7fUL; | |||
| 679 | x4 = (utf8) & 0x3fUL; | |||
| 680 | x1 &= 0x07UL; | |||
| 681 | if (x2 & 0x40UL) x2 &= 0x0fUL; | |||
| 682 | if (x3 & 0x40UL) x3 &= 0x1fUL; | |||
| 683 | val = x4 | (x3 << 6) | (x2 << 12) | (x1 << 16); | |||
| 684 | } else val = utf8; | |||
| 685 | if (val < 128) { | |||
| 686 | buf[0] = val; | |||
| 687 | buf[1] = 0; | |||
| 688 | return 1; | |||
| 689 | } else if (val < 0xffffUL) { | |||
| 690 | buf[0] = '\\'; | |||
| 691 | buf[1] = 'u'; | |||
| 692 | buf[2] = hex[(size_t)(val >> 12)]; | |||
| 693 | buf[3] = hex[(size_t)((val >> 8) & 0xfU)]; | |||
| 694 | buf[4] = hex[(size_t)((val >> 4) & 0xfU)]; | |||
| 695 | buf[5] = hex[(size_t)(val & 0xfU)]; | |||
| 696 | buf[6] = 0; | |||
| 697 | return 6; | |||
| 698 | } | |||
| 699 | buf[0] = '\\'; | |||
| 700 | buf[1] = 'U'; | |||
| 701 | buf[2] = '0'; | |||
| 702 | buf[3] = '0'; | |||
| 703 | buf[4] = hex[(size_t)(val >> 20)]; | |||
| 704 | buf[5] = hex[(size_t)((val >> 16) & 0xfU)]; | |||
| 705 | buf[6] = hex[(size_t)((val >> 12) & 0xfU)]; | |||
| 706 | buf[7] = hex[(size_t)((val >> 8) & 0xfU)]; | |||
| 707 | buf[8] = hex[(size_t)((val >> 4) & 0xfU)]; | |||
| 708 | buf[9] = hex[(size_t)(val & 0xfU)]; | |||
| 709 | buf[10] = 0; | |||
| 710 | return 10; | |||
| 711 | } | |||
| 712 | ||||
| 713 | /* | |||
| 714 | * Scan the identifier and put it in canonical form: | |||
| 715 | * -- tranform \U0000xxxx into \uxxxx | |||
| 716 | * -- inside \u and \U, make letters low case | |||
| 717 | * -- report (some) incorrect use of UCN | |||
| 718 | */ | |||
| 719 | static void canonize_id(struct lexer_state *ls, char *id) | |||
| 720 | { | |||
| 721 | char *c, *d; | |||
| 722 | ||||
| 723 | for (c = d = id; *c;) { | |||
| 724 | if (*c == '\\') { | |||
| 725 | int i; | |||
| 726 | ||||
| 727 | if (!*(c + 1)) goto canon_error; | |||
| 728 | if (*(c + 1) == 'U') { | |||
| 729 | for (i = 0; i < 8 && *(c + i + 2); i ++); | |||
| 730 | if (i != 8) goto canon_error; | |||
| 731 | *(d ++) = '\\'; | |||
| 732 | c += 2; | |||
| 733 | for (i = 0; i < 4 && *(c + i) == '0'; i ++); | |||
| 734 | if (i == 4) { | |||
| 735 | *(d ++) = 'u'; | |||
| 736 | c += 4; | |||
| 737 | } else { | |||
| 738 | *(d ++) = 'U'; | |||
| 739 | i = 8; | |||
| 740 | } | |||
| 741 | for (; i > 0; i --) { | |||
| 742 | switch (*c) { | |||
| 743 | case 'A': *(d ++) = 'a'; break; | |||
| 744 | case 'B': *(d ++) = 'b'; break; | |||
| 745 | case 'C': *(d ++) = 'c'; break; | |||
| 746 | case 'D': *(d ++) = 'd'; break; | |||
| 747 | case 'E': *(d ++) = 'e'; break; | |||
| 748 | case 'F': *(d ++) = 'f'; break; | |||
| 749 | default: *(d ++) = *c; break; | |||
| 750 | } | |||
| 751 | c ++; | |||
| 752 | } | |||
| 753 | } else if (*(c + 1) == 'u') { | |||
| 754 | for (i = 0; i < 4 && *(c + i + 2); i ++); | |||
| 755 | if (i != 4) goto canon_error; | |||
| 756 | *(d ++) = '\\'; | |||
| 757 | *(d ++) = 'u'; | |||
| 758 | c += 2; | |||
| 759 | for (; i > 0; i --) { | |||
| 760 | switch (*c) { | |||
| 761 | case 'A': *(d ++) = 'a'; break; | |||
| 762 | case 'B': *(d ++) = 'b'; break; | |||
| 763 | case 'C': *(d ++) = 'c'; break; | |||
| 764 | case 'D': *(d ++) = 'd'; break; | |||
| 765 | case 'E': *(d ++) = 'e'; break; | |||
| 766 | case 'F': *(d ++) = 'f'; break; | |||
| 767 | default: *(d ++) = *c; break; | |||
| 768 | } | |||
| 769 | c ++; | |||
| 770 | } | |||
| 771 | } else goto canon_error; | |||
| 772 | continue; | |||
| 773 | } | |||
| 774 | *(d ++) = *(c ++); | |||
| 775 | } | |||
| 776 | *d = 0; | |||
| 777 | return; | |||
| 778 | ||||
| 779 | canon_error: | |||
| 780 | for (; *c; *(d ++) = *(c ++)); | |||
| 781 | if (ls->flags & WARN_STANDARD0x000001UL) { | |||
| 782 | warningucpp_warning(ls->line, "malformed identifier with UCN: '%s'", id); | |||
| 783 | } | |||
| 784 | *d = 0; | |||
| 785 | } | |||
| 786 | ||||
| 787 | /* | |||
| 788 | * Run the automaton, in order to get the next token. | |||
| 789 | * This function should not be called, except by next_token() | |||
| 790 | * | |||
| 791 | * return value: 1 on error, 2 on end-of-file, 0 otherwise. | |||
| 792 | */ | |||
| 793 | static inline int read_token(struct lexer_state *ls) | |||
| 794 | { | |||
| 795 | int cstat = S_START, nstat; | |||
| 796 | size_t ltok = 0; | |||
| 797 | int c, outc = 0, ucn_in_id = 0; | |||
| 798 | int shift_state; | |||
| 799 | unsigned long utf8; | |||
| 800 | long l = ls->line; | |||
| 801 | ||||
| 802 | ls->ctok->line = l; | |||
| 803 | if (ls->pending_token) { | |||
| 804 | if ((ls->ctok->type = ls->pending_token) == BUNCH) { | |||
| 805 | ls->ctok->name[0] = '\\'; | |||
| 806 | ls->ctok->name[1] = 0; | |||
| 807 | } | |||
| 808 | ls->pending_token = 0; | |||
| 809 | return 0; | |||
| 810 | } | |||
| 811 | if (ls->flags & UTF8_SOURCE0x004000UL) { | |||
| 812 | utf8 = ls->utf8; | |||
| 813 | shift_state = 0; | |||
| 814 | } | |||
| 815 | if (!(ls->flags & LEXER0x010000UL) && (ls->flags & KEEP_OUTPUT0x020000UL)) | |||
| 816 | for (; ls->line > ls->oline;) put_charucpp_put_char(ls, '\n'); | |||
| 817 | do { | |||
| 818 | c = next_char(ls); | |||
| 819 | if (c < 0) { | |||
| 820 | if ((ls->flags & UTF8_SOURCE0x004000UL) && shift_state) { | |||
| 821 | if (ls->flags & WARN_STANDARD0x000001UL) | |||
| 822 | warningucpp_warning(ls->line, "truncated UTF-8 " | |||
| 823 | "character"); | |||
| 824 | shift_state = 0; | |||
| 825 | utf8 = 0; | |||
| 826 | } | |||
| 827 | if (cstat == S_START) return 2; | |||
| 828 | nstat = cppm_vch[cstat]; | |||
| 829 | } else { | |||
| 830 | if (ls->flags & UTF8_SOURCE0x004000UL) { | |||
| 831 | if (shift_state) { | |||
| 832 | if ((c & 0xc0) != 0x80) { | |||
| 833 | if (ls->flags & WARN_STANDARD0x000001UL) | |||
| 834 | warningucpp_warning(ls->line, | |||
| 835 | "truncated " | |||
| 836 | "UTF-8 " | |||
| 837 | "character"); | |||
| 838 | shift_state = 0; | |||
| 839 | utf8 = 0; | |||
| 840 | c = '_'; | |||
| 841 | } else { | |||
| 842 | utf8 = (utf8 << 8) | c; | |||
| 843 | if (-- shift_state) { | |||
| 844 | ls->discard = 1; | |||
| 845 | continue; | |||
| 846 | } | |||
| 847 | c = '_'; | |||
| 848 | } | |||
| 849 | } else if ((c & 0xc0) == 0xc0) { | |||
| 850 | if ((c & 0x30) == 0x30) { | |||
| 851 | shift_state = 3; | |||
| 852 | } else if (c & 0x20) { | |||
| 853 | shift_state = 2; | |||
| 854 | } else { | |||
| 855 | shift_state = 1; | |||
| 856 | } | |||
| 857 | utf8 = c; | |||
| 858 | ls->discard = 1; | |||
| 859 | continue; | |||
| 860 | } else utf8 = 0; | |||
| 861 | } | |||
| 862 | nstat = cppm[cstat][c < MAX_CHAR_VAL128 ? c : 0]; | |||
| 863 | } | |||
| 864 | #ifdef AUDIT | |||
| 865 | if (nstat == S_OUCH) { | |||
| 866 | ouchucpp_ouch("bad move..."); | |||
| 867 | } | |||
| 868 | #endif | |||
| 869 | /* | |||
| 870 | * disable C++-like comments | |||
| 871 | */ | |||
| 872 | if (nstat == S_COMMENT5 && !(ls->flags & CPLUSPLUS_COMMENTS0x000100UL)) | |||
| 873 | nstat = FRZ(STO(SLASH))((((SLASH) | 256)) | 512); | |||
| 874 | ||||
| 875 | if (noMOD(nstat)((nstat) & 255) >= MSTATE && !ttSTO(nstat)((nstat) & 256)) | |||
| 876 | switch (noMOD(nstat)((nstat) & 255)) { | |||
| 877 | case S_ILL: | |||
| 878 | if (ls->flags & CCHARSET0x000040UL) { | |||
| 879 | errorucpp_error(ls->line, "illegal character '%c'", c); | |||
| 880 | return 1; | |||
| 881 | } | |||
| 882 | nstat = PUT(STO(BUNCH))((((BUNCH) | 256)) | 1024); | |||
| 883 | break; | |||
| 884 | case S_BS: | |||
| 885 | ls->ctok->name[0] = '\\'; | |||
| 886 | ltok ++; | |||
| 887 | nstat = FRZ(STO(BUNCH))((((BUNCH) | 256)) | 512); | |||
| 888 | if (!(ls->flags & LEXER0x010000UL)) put_charucpp_put_char(ls, '\\'); | |||
| 889 | break; | |||
| 890 | case S_ROGUE_BS: | |||
| 891 | ls->pending_token = BUNCH; | |||
| 892 | nstat = FRZ(STO(NAME))((((NAME) | 256)) | 512); | |||
| 893 | break; | |||
| 894 | case S_DDOT: | |||
| 895 | ls->pending_token = DOT; | |||
| 896 | nstat = FRZ(STO(DOT))((((DOT) | 256)) | 512); | |||
| 897 | break; | |||
| 898 | case S_DDSHARP: | |||
| 899 | ls->pending_token = PCT; | |||
| 900 | nstat = FRZ(STO(DIG_SHARP))((((DIG_SHARP) | 256)) | 512); | |||
| 901 | break; | |||
| 902 | case S_BEHEAD: | |||
| 903 | errorucpp_error(l, "unfinished string at end of line"); | |||
| 904 | return 1; | |||
| 905 | case S_DECAY: | |||
| 906 | warningucpp_warning(l, "unterminated // comment"); | |||
| 907 | nstat = FRZ(STO(COMMENT))((((COMMENT) | 256)) | 512); | |||
| 908 | break; | |||
| 909 | case S_TRUNC: | |||
| 910 | errorucpp_error(l, "truncated token"); | |||
| 911 | return 1; | |||
| 912 | case S_TRUNCC: | |||
| 913 | errorucpp_error(l, "truncated comment"); | |||
| 914 | return 1; | |||
| 915 | #ifdef AUDIT | |||
| 916 | case S_OUCH: | |||
| 917 | ouchucpp_ouch("machine went out of control"); | |||
| 918 | break; | |||
| 919 | #endif | |||
| 920 | } | |||
| 921 | if (!ttFRZ(nstat)((nstat) & 512)) { | |||
| 922 | discard_charucpp_discard_char(ls); | |||
| 923 | if (!(ls->flags & LEXER0x010000UL) && ls->condcomp) { | |||
| 924 | int z = ttSTO(nstat)((nstat) & 256) ? S_ILL : noMOD(nstat)((nstat) & 255); | |||
| 925 | ||||
| 926 | if (cstat == S_NAME || z == S_NAME | |||
| 927 | || ((CMT(cstat)((cstat) >= S_COMMENT && (cstat) <= S_COMMENT5) || CMT(z)((z) >= S_COMMENT && (z) <= S_COMMENT5)) | |||
| 928 | && (ls->flags & DISCARD_COMMENTS0x000080UL))) { | |||
| 929 | outc = 0; | |||
| 930 | } else if (z == S_LCHAR || z == S_SLASH | |||
| 931 | || (z == S_SHARP && ls->ltwnl) | |||
| 932 | || (z == S_PCT && ls->ltwnl) | |||
| 933 | || (z == S_BACKSLASH)) { | |||
| 934 | outc = c; | |||
| 935 | } else if (z == S_PCT2 && ls->ltwnl) { | |||
| 936 | outc = -1; | |||
| 937 | } else if (z == S_PCT3 && ls->ltwnl) { | |||
| 938 | /* we have %:% but this still might | |||
| 939 | not be a %:%: */ | |||
| 940 | outc = -2; | |||
| 941 | } else { | |||
| 942 | if (outc < 0) { | |||
| 943 | put_charucpp_put_char(ls, '%'); | |||
| 944 | put_charucpp_put_char(ls, ':'); | |||
| 945 | if (outc == -2) | |||
| 946 | put_charucpp_put_char(ls, '%'); | |||
| 947 | outc = 0; | |||
| 948 | } else if (outc) { | |||
| 949 | put_charucpp_put_char(ls, outc); | |||
| 950 | outc = 0; | |||
| 951 | } | |||
| 952 | put_charucpp_put_char(ls, c); | |||
| 953 | } | |||
| 954 | } | |||
| 955 | } else if (outc == '/' && !(ls->flags & LEXER0x010000UL) | |||
| 956 | && ls->condcomp) { | |||
| 957 | /* this is a hack: we need to dump a pending slash */ | |||
| 958 | put_charucpp_put_char(ls, outc); | |||
| 959 | outc = 0; | |||
| 960 | } | |||
| 961 | if (ttPUT(nstat)((nstat) & 1024)) { | |||
| 962 | if (cstat == S_NAME_BS) { | |||
| 963 | ucn_in_id = 1; | |||
| 964 | wan(ls->ctok->name, ltok, '\\', ls->tknl)do { if ((ltok) == (ls->tknl)) { (ls->tknl) += (ls-> tknl); (ls->ctok->name) = incmem((ls->ctok->name) , (ltok) * sizeof('\\'), (ls->tknl) * sizeof('\\')); } (ls ->ctok->name)[(ltok) ++] = ('\\'); } while (0); | |||
| 965 | } | |||
| 966 | if ((ls->flags & UTF8_SOURCE0x004000UL) && utf8) { | |||
| ||||
| 967 | unsigned char buf[11]; | |||
| 968 | int i, j; | |||
| 969 | ||||
| 970 | for (i = 0, j = utf8_to_string(buf, utf8); | |||
| 971 | i < j; i ++) | |||
| 972 | wan(ls->ctok->name, ltok, buf[i],do { if ((ltok) == (ls->tknl)) { (ls->tknl) += (ls-> tknl); (ls->ctok->name) = incmem((ls->ctok->name) , (ltok) * sizeof(buf[i]), (ls->tknl) * sizeof(buf[i])); } (ls->ctok->name)[(ltok) ++] = (buf[i]); } while (0) | |||
| 973 | ls->tknl)do { if ((ltok) == (ls->tknl)) { (ls->tknl) += (ls-> tknl); (ls->ctok->name) = incmem((ls->ctok->name) , (ltok) * sizeof(buf[i]), (ls->tknl) * sizeof(buf[i])); } (ls->ctok->name)[(ltok) ++] = (buf[i]); } while (0); | |||
| 974 | /* if (j > 1) ucn_in_id = 1; */ | |||
| 975 | } else wan(ls->ctok->name, ltok,do { if ((ltok) == (ls->tknl)) { (ls->tknl) += (ls-> tknl); (ls->ctok->name) = incmem((ls->ctok->name) , (ltok) * sizeof((unsigned char)c), (ls->tknl) * sizeof(( unsigned char)c)); } (ls->ctok->name)[(ltok) ++] = ((unsigned char)c); } while (0) | |||
| 976 | (unsigned char)c, ls->tknl)do { if ((ltok) == (ls->tknl)) { (ls->tknl) += (ls-> tknl); (ls->ctok->name) = incmem((ls->ctok->name) , (ltok) * sizeof((unsigned char)c), (ls->tknl) * sizeof(( unsigned char)c)); } (ls->ctok->name)[(ltok) ++] = ((unsigned char)c); } while (0); | |||
| 977 | } | |||
| 978 | if (ttSTO(nstat)((nstat) & 256)) { | |||
| 979 | if (S_TOKEN(noMOD(nstat))((((nstat) & 255)) >= NUMBER && (((nstat) & 255)) <= CHAR)) { | |||
| 980 | wan(ls->ctok->name, ltok,do { if ((ltok) == (ls->tknl)) { (ls->tknl) += (ls-> tknl); (ls->ctok->name) = incmem((ls->ctok->name) , (ltok) * sizeof((unsigned char)0), (ls->tknl) * sizeof(( unsigned char)0)); } (ls->ctok->name)[(ltok) ++] = ((unsigned char)0); } while (0) | |||
| 981 | (unsigned char)0, ls->tknl)do { if ((ltok) == (ls->tknl)) { (ls->tknl) += (ls-> tknl); (ls->ctok->name) = incmem((ls->ctok->name) , (ltok) * sizeof((unsigned char)0), (ls->tknl) * sizeof(( unsigned char)0)); } (ls->ctok->name)[(ltok) ++] = ((unsigned char)0); } while (0); | |||
| 982 | } | |||
| 983 | ls->ctok->type = noMOD(nstat)((nstat) & 255); | |||
| 984 | break; | |||
| 985 | } | |||
| 986 | cstat = noMOD(nstat)((nstat) & 255); | |||
| 987 | } while (1); | |||
| 988 | if (!(ls->flags & LEXER0x010000UL) && (ls->flags & DISCARD_COMMENTS0x000080UL) | |||
| 989 | && ls->ctok->type == COMMENT) put_charucpp_put_char(ls, ' '); | |||
| 990 | if (ucn_in_id && ls->ctok->type == NAME) | |||
| 991 | canonize_id(ls, ls->ctok->name); | |||
| 992 | return 0; | |||
| 993 | } | |||
| 994 | ||||
| 995 | /* | |||
| 996 | * fills ls->ctok with the next token | |||
| 997 | */ | |||
| 998 | int next_tokenucpp_next_token(struct lexer_state *ls) | |||
| 999 | { | |||
| 1000 | if (ls->flags & READ_AGAIN0x080000UL) { | |||
| ||||
| 1001 | ls->flags &= ~READ_AGAIN0x080000UL; | |||
| 1002 | if (!(ls->flags & LEXER0x010000UL)) { | |||
| 1003 | char *c = S_TOKEN(ls->ctok->type)((ls->ctok->type) >= NUMBER && (ls->ctok-> type) <= CHAR) ? | |||
| 1004 | ls->ctok->name : token_nameucpp_token_name(ls->ctok); | |||
| 1005 | if (ls->ctok->type == OPT_NONE) { | |||
| 1006 | ls->ctok->type = NONE; | |||
| 1007 | #ifdef SEMPER_FIDELIS | |||
| 1008 | ls->ctok->name[0] = ' '; | |||
| 1009 | ls->ctok->name[1] = 0; | |||
| 1010 | #endif | |||
| 1011 | put_charucpp_put_char(ls, ' '); | |||
| 1012 | } else if (ls->ctok->type != NAME && | |||
| 1013 | !(ls->ltwnl && (ls->ctok->type == SHARP | |||
| 1014 | || ls->ctok->type == DIG_SHARP))) | |||
| 1015 | for (; *c; c ++) put_charucpp_put_char(ls, *c); | |||
| 1016 | } | |||
| 1017 | return 0; | |||
| 1018 | } | |||
| 1019 | return read_token(ls); | |||
| 1020 | } |