File: | workdir/unxlngi6.pro/CustomTarget/ucpp/source/lexer.c |
Location: | line 966, column 37 |
Description: | Branch condition evaluates to a garbage value |
1 | /* | |||
2 | * (c) Thomas Pornin 1999 - 2002 | |||
3 | * | |||
4 | * Redistribution and use in source and binary forms, with or without | |||
5 | * modification, are permitted provided that the following conditions | |||
6 | * are met: | |||
7 | * 1. Redistributions of source code must retain the above copyright | |||
8 | * notice, this list of conditions and the following disclaimer. | |||
9 | * 2. Redistributions in binary form must reproduce the above copyright | |||
10 | * notice, this list of conditions and the following disclaimer in the | |||
11 | * documentation and/or other materials provided with the distribution. | |||
12 | * 4. The name of the authors may not be used to endorse or promote | |||
13 | * products derived from this software without specific prior written | |||
14 | * permission. | |||
15 | * | |||
16 | * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR | |||
17 | * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED | |||
18 | * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |||
19 | * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE | |||
20 | * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR | |||
21 | * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT | |||
22 | * OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR | |||
23 | * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, | |||
24 | * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE | |||
25 | * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, | |||
26 | * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
27 | * | |||
28 | */ | |||
29 | ||||
30 | #include "tune.h" | |||
31 | #include <stdio.h> | |||
32 | #include <string.h> | |||
33 | #include <stddef.h> | |||
34 | #include <limits.h> | |||
35 | #include "ucppi.h" | |||
36 | #include "mem.h" | |||
37 | #ifdef UCPP_MMAP | |||
38 | #include <unistd.h> | |||
39 | #include <sys/types.h> | |||
40 | #include <sys/mman.h> | |||
41 | #endif | |||
42 | ||||
43 | /* | |||
44 | * Character classes for description of the automaton. | |||
45 | * The characters used for representing classes should not appear | |||
46 | * explicitely in an automaton rule. | |||
47 | */ | |||
48 | #define SPC' ' ' ' /* whitespace characters */ | |||
49 | #define ALP'Z' 'Z' /* A-Z, a-z, _ */ | |||
50 | #define NUM'9' '9' /* 0-9 */ | |||
51 | #define ANY'Y' 'Y' /* any character */ | |||
52 | #define VCH'F' 'F' /* void character (for end of input) */ | |||
53 | ||||
54 | /* | |||
55 | * flags and macros to test those flags | |||
56 | * STO: the currently read string is a complete token | |||
57 | * PUT: the currently read character must be added to the string | |||
58 | * FRZ: the currently read character must be kept and read again | |||
59 | */ | |||
60 | #define MOD_MK255 255 | |||
61 | #define noMOD(x)((x) & 255) ((x) & 255) | |||
62 | #define STO(x)((x) | 256) ((x) | 256) | |||
63 | #define ttSTO(x)((x) & 256) ((x) & 256) | |||
64 | #define FRZ(x)((x) | 512) ((x) | 512) | |||
65 | #define ttFRZ(x)((x) & 512) ((x) & 512) | |||
66 | #define PUT(x)((x) | 1024) ((x) | 1024) | |||
67 | #define ttPUT(x)((x) & 1024) ((x) & 1024) | |||
68 | ||||
69 | /* order is important */ | |||
70 | enum { | |||
71 | S_START, S_SPACE, S_BANG, S_STRING, S_STRING2, S_COLON, | |||
72 | S_SHARP, S_PCT, S_PCT2, S_PCT3, S_AMPER, S_CHAR, S_CHAR2, S_STAR, | |||
73 | S_PLUS, S_MINUS, S_DOT, S_DOT2, S_SLASH, S_NUMBER, S_NUMBER2, S_LT, | |||
74 | S_LT2, S_EQ, S_GT, S_GT2, S_CIRC, S_PIPE, S_BACKSLASH, | |||
75 | S_COMMENT, S_COMMENT2, S_COMMENT3, S_COMMENT4, S_COMMENT5, | |||
76 | S_NAME, S_NAME_BS, S_LCHAR, | |||
77 | MSTATE, | |||
78 | S_ILL, S_DDOT, S_DDSHARP, S_BS, S_ROGUE_BS, S_BEHEAD, S_DECAY, | |||
79 | S_TRUNC, S_TRUNCC, S_OUCH | |||
80 | }; | |||
81 | ||||
82 | #define CMT(x)((x) >= S_COMMENT && (x) <= S_COMMENT5) ((x) >= S_COMMENT && (x) <= S_COMMENT5) | |||
83 | ||||
84 | #define CMCR2 2 | |||
85 | ||||
86 | /* | |||
87 | * This is the description of the automaton. It is not used "as is" | |||
88 | * but copied at execution time into a table. | |||
89 | * | |||
90 | * To my utmost displeasure, there are a few hacks in read_token() | |||
91 | * (which uses the transformed automaton) about the special handling | |||
92 | * of slashes, sharps, and the letter L. | |||
93 | */ | |||
94 | static struct machine_state { | |||
95 | int state; | |||
96 | unsigned char input[CMCR2]; | |||
97 | int new_state; | |||
98 | } cppms[] = { | |||
99 | /* S_START is the generic beginning state */ | |||
100 | { S_START, { ANY'Y' }, S_ILL }, | |||
101 | #ifdef SEMPER_FIDELIS | |||
102 | { S_START, { SPC' ' }, PUT(S_SPACE)((S_SPACE) | 1024) }, | |||
103 | #else | |||
104 | { S_START, { SPC' ' }, S_SPACE }, | |||
105 | #endif | |||
106 | { S_START, { '\n' }, STO(NEWLINE)((NEWLINE) | 256) }, | |||
107 | { S_START, { '!' }, S_BANG }, | |||
108 | { S_START, { '"' }, PUT(S_STRING)((S_STRING) | 1024) }, | |||
109 | { S_START, { '#' }, S_SHARP }, | |||
110 | { S_START, { '%' }, S_PCT }, | |||
111 | { S_START, { '&' }, S_AMPER }, | |||
112 | { S_START, { '\'' }, PUT(S_CHAR)((S_CHAR) | 1024) }, | |||
113 | { S_START, { '(' }, STO(LPAR)((LPAR) | 256) }, | |||
114 | { S_START, { ')' }, STO(RPAR)((RPAR) | 256) }, | |||
115 | { S_START, { '*' }, S_STAR }, | |||
116 | { S_START, { '+' }, S_PLUS }, | |||
117 | { S_START, { ',' }, STO(COMMA)((COMMA) | 256) }, | |||
118 | { S_START, { '-' }, S_MINUS }, | |||
119 | { S_START, { '.' }, PUT(S_DOT)((S_DOT) | 1024) }, | |||
120 | #ifdef SEMPER_FIDELIS | |||
121 | { S_START, { '/' }, PUT(S_SLASH)((S_SLASH) | 1024) }, | |||
122 | #else | |||
123 | { S_START, { '/' }, S_SLASH }, | |||
124 | #endif | |||
125 | { S_START, { NUM'9' }, PUT(S_NUMBER)((S_NUMBER) | 1024) }, | |||
126 | { S_START, { ':' }, S_COLON }, | |||
127 | { S_START, { ';' }, STO(SEMIC)((SEMIC) | 256) }, | |||
128 | { S_START, { '<' }, S_LT }, | |||
129 | { S_START, { '=' }, S_EQ }, | |||
130 | { S_START, { '>' }, S_GT }, | |||
131 | { S_START, { '?' }, STO(QUEST)((QUEST) | 256) }, | |||
132 | { S_START, { ALP'Z' }, PUT(S_NAME)((S_NAME) | 1024) }, | |||
133 | { S_START, { 'L' }, PUT(S_LCHAR)((S_LCHAR) | 1024) }, | |||
134 | { S_START, { '[' }, STO(LBRK)((LBRK) | 256) }, | |||
135 | { S_START, { ']' }, STO(RBRK)((RBRK) | 256) }, | |||
136 | { S_START, { '^' }, S_CIRC }, | |||
137 | { S_START, { '{' }, STO(LBRA)((LBRA) | 256) }, | |||
138 | { S_START, { '|' }, S_PIPE }, | |||
139 | { S_START, { '}' }, STO(RBRA)((RBRA) | 256) }, | |||
140 | { S_START, { '~' }, STO(NOT)((NOT) | 256) }, | |||
141 | { S_START, { '\\' }, S_BACKSLASH }, | |||
142 | ||||
143 | /* after a space */ | |||
144 | { S_SPACE, { ANY'Y' }, FRZ(STO(NONE))((((NONE) | 256)) | 512) }, | |||
145 | #ifdef SEMPER_FIDELIS | |||
146 | { S_SPACE, { SPC' ' }, PUT(S_SPACE)((S_SPACE) | 1024) }, | |||
147 | #else | |||
148 | { S_SPACE, { SPC' ' }, S_SPACE }, | |||
149 | #endif | |||
150 | ||||
151 | /* after a ! */ | |||
152 | { S_BANG, { ANY'Y' }, FRZ(STO(LNOT))((((LNOT) | 256)) | 512) }, | |||
153 | { S_BANG, { '=' }, STO(NEQ)((NEQ) | 256) }, | |||
154 | ||||
155 | /* after a " */ | |||
156 | { S_STRING, { ANY'Y' }, PUT(S_STRING)((S_STRING) | 1024) }, | |||
157 | { S_STRING, { VCH'F' }, FRZ(S_TRUNC)((S_TRUNC) | 512) }, | |||
158 | { S_STRING, { '\n' }, FRZ(S_BEHEAD)((S_BEHEAD) | 512) }, | |||
159 | { S_STRING, { '\\' }, PUT(S_STRING2)((S_STRING2) | 1024) }, | |||
160 | { S_STRING, { '"' }, PUT(STO(STRING))((((STRING) | 256)) | 1024) }, | |||
161 | ||||
162 | { S_STRING2, { ANY'Y' }, PUT(S_STRING)((S_STRING) | 1024) }, | |||
163 | { S_STRING2, { VCH'F' }, FRZ(S_TRUNC)((S_TRUNC) | 512) }, | |||
164 | ||||
165 | /* after a # */ | |||
166 | { S_SHARP, { ANY'Y' }, FRZ(STO(SHARP))((((SHARP) | 256)) | 512) }, | |||
167 | { S_SHARP, { '#' }, STO(DSHARP)((DSHARP) | 256) }, | |||
168 | ||||
169 | /* after a : */ | |||
170 | { S_COLON, { ANY'Y' }, FRZ(STO(COLON))((((COLON) | 256)) | 512) }, | |||
171 | { S_COLON, { '>' }, STO(DIG_RBRK)((DIG_RBRK) | 256) }, | |||
172 | ||||
173 | /* after a % */ | |||
174 | { S_PCT, { ANY'Y' }, FRZ(STO(PCT))((((PCT) | 256)) | 512) }, | |||
175 | { S_PCT, { '=' }, STO(ASPCT)((ASPCT) | 256) }, | |||
176 | { S_PCT, { '>' }, STO(DIG_RBRA)((DIG_RBRA) | 256) }, | |||
177 | { S_PCT, { ':' }, S_PCT2 }, | |||
178 | ||||
179 | /* after a %: */ | |||
180 | { S_PCT2, { ANY'Y' }, FRZ(STO(DIG_SHARP))((((DIG_SHARP) | 256)) | 512) }, | |||
181 | { S_PCT2, { '%' }, S_PCT3 }, | |||
182 | ||||
183 | /* after a %:% */ | |||
184 | { S_PCT3, { ANY'Y' }, FRZ(S_DDSHARP)((S_DDSHARP) | 512) }, | |||
185 | { S_PCT3, { ':' }, STO(DIG_DSHARP)((DIG_DSHARP) | 256) }, | |||
186 | ||||
187 | /* after a & */ | |||
188 | { S_AMPER, { ANY'Y' }, FRZ(STO(AND))((((AND) | 256)) | 512) }, | |||
189 | { S_AMPER, { '=' }, STO(ASAND)((ASAND) | 256) }, | |||
190 | { S_AMPER, { '&' }, STO(LAND)((LAND) | 256) }, | |||
191 | ||||
192 | /* after a ' */ | |||
193 | { S_CHAR, { ANY'Y' }, PUT(S_CHAR)((S_CHAR) | 1024) }, | |||
194 | { S_CHAR, { VCH'F' }, FRZ(S_TRUNC)((S_TRUNC) | 512) }, | |||
195 | { S_CHAR, { '\'' }, PUT(STO(CHAR))((((CHAR) | 256)) | 1024) }, | |||
196 | { S_CHAR, { '\\' }, PUT(S_CHAR2)((S_CHAR2) | 1024) }, | |||
197 | ||||
198 | /* after a \ in a character constant | |||
199 | useful only for '\'' */ | |||
200 | { S_CHAR2, { ANY'Y' }, PUT(S_CHAR)((S_CHAR) | 1024) }, | |||
201 | { S_CHAR2, { VCH'F' }, FRZ(S_TRUNC)((S_TRUNC) | 512) }, | |||
202 | ||||
203 | /* after a * */ | |||
204 | { S_STAR, { ANY'Y' }, FRZ(STO(STAR))((((STAR) | 256)) | 512) }, | |||
205 | { S_STAR, { '=' }, STO(ASSTAR)((ASSTAR) | 256) }, | |||
206 | ||||
207 | /* after a + */ | |||
208 | { S_PLUS, { ANY'Y' }, FRZ(STO(PLUS))((((PLUS) | 256)) | 512) }, | |||
209 | { S_PLUS, { '+' }, STO(PPLUS)((PPLUS) | 256) }, | |||
210 | { S_PLUS, { '=' }, STO(ASPLUS)((ASPLUS) | 256) }, | |||
211 | ||||
212 | /* after a - */ | |||
213 | { S_MINUS, { ANY'Y' }, FRZ(STO(MINUS))((((MINUS) | 256)) | 512) }, | |||
214 | { S_MINUS, { '-' }, STO(MMINUS)((MMINUS) | 256) }, | |||
215 | { S_MINUS, { '=' }, STO(ASMINUS)((ASMINUS) | 256) }, | |||
216 | { S_MINUS, { '>' }, STO(ARROW)((ARROW) | 256) }, | |||
217 | ||||
218 | /* after a . */ | |||
219 | { S_DOT, { ANY'Y' }, FRZ(STO(DOT))((((DOT) | 256)) | 512) }, | |||
220 | { S_DOT, { NUM'9' }, PUT(S_NUMBER)((S_NUMBER) | 1024) }, | |||
221 | { S_DOT, { '.' }, S_DOT2 }, | |||
222 | ||||
223 | /* after .. */ | |||
224 | { S_DOT2, { ANY'Y' }, FRZ(S_DDOT)((S_DDOT) | 512) }, | |||
225 | { S_DOT2, { '.' }, STO(MDOTS)((MDOTS) | 256) }, | |||
226 | ||||
227 | /* after a / */ | |||
228 | { S_SLASH, { ANY'Y' }, FRZ(STO(SLASH))((((SLASH) | 256)) | 512) }, | |||
229 | { S_SLASH, { '=' }, STO(ASSLASH)((ASSLASH) | 256) }, | |||
230 | #ifdef SEMPER_FIDELIS | |||
231 | { S_SLASH, { '*' }, PUT(S_COMMENT)((S_COMMENT) | 1024) }, | |||
232 | { S_SLASH, { '/' }, PUT(S_COMMENT5)((S_COMMENT5) | 1024) }, | |||
233 | #else | |||
234 | { S_SLASH, { '*' }, S_COMMENT }, | |||
235 | { S_SLASH, { '/' }, S_COMMENT5 }, | |||
236 | #endif | |||
237 | /* | |||
238 | * There is a little hack in read_token() to disable | |||
239 | * this last rule, if C++ (C99) comments are not enabled. | |||
240 | */ | |||
241 | ||||
242 | /* after a number */ | |||
243 | { S_NUMBER, { ANY'Y' }, FRZ(STO(NUMBER))((((NUMBER) | 256)) | 512) }, | |||
244 | { S_NUMBER, { ALP'Z', NUM'9' }, PUT(S_NUMBER)((S_NUMBER) | 1024) }, | |||
245 | { S_NUMBER, { '.' }, PUT(S_NUMBER)((S_NUMBER) | 1024) }, | |||
246 | { S_NUMBER, { 'E', 'e' }, PUT(S_NUMBER2)((S_NUMBER2) | 1024) }, | |||
247 | { S_NUMBER, { 'P', 'p' }, PUT(S_NUMBER2)((S_NUMBER2) | 1024) }, | |||
248 | ||||
249 | { S_NUMBER2, { ANY'Y' }, FRZ(STO(NUMBER))((((NUMBER) | 256)) | 512) }, | |||
250 | { S_NUMBER2, { ALP'Z', NUM'9' }, PUT(S_NUMBER)((S_NUMBER) | 1024) }, | |||
251 | { S_NUMBER2, { '+', '-' }, PUT(S_NUMBER)((S_NUMBER) | 1024) }, | |||
252 | ||||
253 | /* after a < */ | |||
254 | { S_LT, { ANY'Y' }, FRZ(STO(LT))((((LT) | 256)) | 512) }, | |||
255 | { S_LT, { '=' }, STO(LEQ)((LEQ) | 256) }, | |||
256 | { S_LT, { '<' }, S_LT2 }, | |||
257 | { S_LT, { ':' }, STO(DIG_LBRK)((DIG_LBRK) | 256) }, | |||
258 | { S_LT, { '%' }, STO(DIG_LBRA)((DIG_LBRA) | 256) }, | |||
259 | ||||
260 | { S_LT2, { ANY'Y' }, FRZ(STO(LSH))((((LSH) | 256)) | 512) }, | |||
261 | { S_LT2, { '=' }, STO(ASLSH)((ASLSH) | 256) }, | |||
262 | ||||
263 | /* after a > */ | |||
264 | { S_GT, { ANY'Y' }, FRZ(STO(GT))((((GT) | 256)) | 512) }, | |||
265 | { S_GT, { '=' }, STO(GEQ)((GEQ) | 256) }, | |||
266 | { S_GT, { '>' }, S_GT2 }, | |||
267 | ||||
268 | { S_GT2, { ANY'Y' }, FRZ(STO(RSH))((((RSH) | 256)) | 512) }, | |||
269 | { S_GT2, { '=' }, STO(ASRSH)((ASRSH) | 256) }, | |||
270 | ||||
271 | /* after a = */ | |||
272 | { S_EQ, { ANY'Y' }, FRZ(STO(ASGN))((((ASGN) | 256)) | 512) }, | |||
273 | { S_EQ, { '=' }, STO(SAME)((SAME) | 256) }, | |||
274 | #ifdef CAST_OP | |||
275 | { S_EQ, { '>' }, STO(CAST)((CAST) | 256) }, | |||
276 | #endif | |||
277 | ||||
278 | /* after a \ */ | |||
279 | { S_BACKSLASH, { ANY'Y' }, FRZ(S_BS)((S_BS) | 512) }, | |||
280 | { S_BACKSLASH, { 'U', 'u' }, FRZ(S_NAME_BS)((S_NAME_BS) | 512) }, | |||
281 | ||||
282 | /* after a letter */ | |||
283 | { S_NAME, { ANY'Y' }, FRZ(STO(NAME))((((NAME) | 256)) | 512) }, | |||
284 | { S_NAME, { ALP'Z', NUM'9' }, PUT(S_NAME)((S_NAME) | 1024) }, | |||
285 | { S_NAME, { '\\' }, S_NAME_BS }, | |||
286 | ||||
287 | /* after a \ in an identifier */ | |||
288 | { S_NAME_BS, { ANY'Y' }, FRZ(S_ROGUE_BS)((S_ROGUE_BS) | 512) }, | |||
289 | { S_NAME_BS, { 'u', 'U' }, PUT(S_NAME)((S_NAME) | 1024) }, | |||
290 | ||||
291 | /* after a L */ | |||
292 | { S_LCHAR, { ANY'Y' }, FRZ(S_NAME)((S_NAME) | 512) }, | |||
293 | { S_LCHAR, { '"' }, PUT(S_STRING)((S_STRING) | 1024) }, | |||
294 | { S_LCHAR, { '\'' }, PUT(S_CHAR)((S_CHAR) | 1024) }, | |||
295 | ||||
296 | /* after a ^ */ | |||
297 | { S_CIRC, { ANY'Y' }, FRZ(STO(CIRC))((((CIRC) | 256)) | 512) }, | |||
298 | { S_CIRC, { '=' }, STO(ASCIRC)((ASCIRC) | 256) }, | |||
299 | ||||
300 | /* after a | */ | |||
301 | { S_PIPE, { ANY'Y' }, FRZ(STO(OR))((((OR) | 256)) | 512) }, | |||
302 | { S_PIPE, { '=' }, STO(ASOR)((ASOR) | 256) }, | |||
303 | { S_PIPE, { '|' }, STO(LOR)((LOR) | 256) }, | |||
304 | ||||
305 | /* after a / and * */ | |||
306 | #ifdef SEMPER_FIDELIS | |||
307 | { S_COMMENT, { ANY'Y' }, PUT(S_COMMENT)((S_COMMENT) | 1024) }, | |||
308 | { S_COMMENT, { VCH'F' }, FRZ(S_TRUNCC)((S_TRUNCC) | 512) }, | |||
309 | { S_COMMENT, { '*' }, PUT(S_COMMENT2)((S_COMMENT2) | 1024) }, | |||
310 | ||||
311 | { S_COMMENT2, { ANY'Y' }, FRZ(S_COMMENT)((S_COMMENT) | 512) }, | |||
312 | { S_COMMENT2, { VCH'F' }, FRZ(S_TRUNCC)((S_TRUNCC) | 512) }, | |||
313 | { S_COMMENT2, { '*' }, PUT(S_COMMENT2)((S_COMMENT2) | 1024) }, | |||
314 | { S_COMMENT2, { '/' }, STO(PUT(COMMENT))((((COMMENT) | 1024)) | 256) }, | |||
315 | ||||
316 | { S_COMMENT5, { ANY'Y' }, PUT(S_COMMENT5)((S_COMMENT5) | 1024) }, | |||
317 | { S_COMMENT5, { VCH'F' }, FRZ(S_DECAY)((S_DECAY) | 512) }, | |||
318 | { S_COMMENT5, { '\n' }, FRZ(STO(COMMENT))((((COMMENT) | 256)) | 512) }, | |||
319 | #else | |||
320 | { S_COMMENT, { ANY'Y' }, S_COMMENT }, | |||
321 | { S_COMMENT, { VCH'F' }, FRZ(S_TRUNCC)((S_TRUNCC) | 512) }, | |||
322 | { S_COMMENT, { '*' }, S_COMMENT2 }, | |||
323 | ||||
324 | { S_COMMENT2, { ANY'Y' }, FRZ(S_COMMENT)((S_COMMENT) | 512) }, | |||
325 | { S_COMMENT2, { VCH'F' }, FRZ(S_TRUNCC)((S_TRUNCC) | 512) }, | |||
326 | { S_COMMENT2, { '*' }, S_COMMENT2 }, | |||
327 | { S_COMMENT2, { '/' }, STO(COMMENT)((COMMENT) | 256) }, | |||
328 | ||||
329 | { S_COMMENT5, { ANY'Y' }, S_COMMENT5 }, | |||
330 | { S_COMMENT5, { VCH'F' }, FRZ(S_DECAY)((S_DECAY) | 512) }, | |||
331 | { S_COMMENT5, { '\n' }, FRZ(STO(COMMENT))((((COMMENT) | 256)) | 512) }, | |||
332 | #endif | |||
333 | ||||
334 | /* dummy end of machine description */ | |||
335 | { 0, { 0 }, 0 } | |||
336 | }; | |||
337 | ||||
338 | /* | |||
339 | * cppm is the table used to store the automaton: if we are in state s | |||
340 | * and we read character c, we apply the action cppm[s][c] (jumping to | |||
341 | * another state, or emitting a token). | |||
342 | * cppm_vch is the table for the special virtual character "end of input" | |||
343 | */ | |||
344 | static int cppm[MSTATE][MAX_CHAR_VAL128]; | |||
345 | static int cppm_vch[MSTATE]; | |||
346 | ||||
347 | /* | |||
348 | * init_cppm() fills cppm[][] with the information stored in cppms[]. | |||
349 | * It must be called before beginning the lexing process. | |||
350 | */ | |||
351 | void init_cppmucpp_init_cppm(void) | |||
352 | { | |||
353 | int i, j, k, c; | |||
354 | static unsigned char upper[] = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"; | |||
355 | static unsigned char lower[] = "abcdefghijklmnopqrstuvwxyz"; | |||
356 | unsigned char *cp; | |||
357 | ||||
358 | for (i = 0; i < MSTATE; i ++) { | |||
359 | for (j = 0; j < MAX_CHAR_VAL128; j ++) cppm[i][j] = S_OUCH; | |||
360 | cppm_vch[i] = S_OUCH; | |||
361 | } | |||
362 | for (i = 0; cppms[i].input[0]; i ++) for (k = 0; k < CMCR2; k ++) { | |||
363 | int s = cppms[i].state; | |||
364 | int ns = cppms[i].new_state; | |||
365 | ||||
366 | switch (c = cppms[i].input[k]) { | |||
367 | case 0: | |||
368 | break; | |||
369 | case SPC' ': | |||
370 | /* see space_char() also */ | |||
371 | cppm[s][' '] = ns; | |||
372 | cppm[s]['\t'] = ns; | |||
373 | cppm[s]['\v'] = ns; | |||
374 | cppm[s]['\f'] = ns; | |||
375 | #ifdef UNBREAKABLE_SPACE | |||
376 | if (MAX_CHAR_VAL128 > UNBREAKABLE_SPACE) | |||
377 | cppm[s][UNBREAKABLE_SPACE] = ns; | |||
378 | #endif | |||
379 | break; | |||
380 | case ALP'Z': | |||
381 | for (cp = upper; *cp; cp ++) cppm[s][(int)*cp] = ns; | |||
382 | for (cp = lower; *cp; cp ++) cppm[s][(int)*cp] = ns; | |||
383 | cppm[s]['_'] = ns; | |||
384 | break; | |||
385 | case NUM'9': | |||
386 | for (j = '0'; j <= '9'; j ++) cppm[s][j] = ns; | |||
387 | break; | |||
388 | case ANY'Y': | |||
389 | for (j = 0; j < MAX_CHAR_VAL128; j ++) cppm[s][j] = ns; | |||
390 | cppm_vch[s] = ns; | |||
391 | break; | |||
392 | case VCH'F': | |||
393 | cppm_vch[s] = ns; | |||
394 | break; | |||
395 | default: | |||
396 | cppm[s][c] = ns; | |||
397 | break; | |||
398 | } | |||
399 | } | |||
400 | } | |||
401 | ||||
402 | /* | |||
403 | * Make some character as equivalent to a letter for identifiers. | |||
404 | */ | |||
405 | void set_identifier_char(int c) | |||
406 | { | |||
407 | cppm[S_START][c] = PUT(S_NAME)((S_NAME) | 1024); | |||
408 | cppm[S_NAME][c] = PUT(S_NAME)((S_NAME) | 1024); | |||
409 | } | |||
410 | ||||
411 | /* | |||
412 | * Remove the "identifier" status from a character. | |||
413 | */ | |||
414 | void unset_identifier_char(int c) | |||
415 | { | |||
416 | cppm[S_START][c] = S_ILL; | |||
417 | cppm[S_NAME][c] = FRZ(STO(NAME))((((NAME) | 256)) | 512); | |||
418 | } | |||
419 | ||||
420 | int space_charucpp_space_char(int c) | |||
421 | { | |||
422 | if (c == ' ' || c == '\t' || c == '\v' || c == '\f' | |||
423 | #ifdef UNBREAKABLE_SPACE | |||
424 | || c == UNBREAKABLE_SPACE | |||
425 | #endif | |||
426 | ) return 1; | |||
427 | return 0; | |||
428 | } | |||
429 | ||||
430 | #ifndef NO_UCPP_BUF1 | |||
431 | /* | |||
432 | * our output buffer is full, flush it | |||
433 | */ | |||
434 | void flush_output(struct lexer_state *ls) | |||
435 | { | |||
436 | size_t x = ls->sbuf, y = 0, z; | |||
437 | ||||
438 | if (ls->sbuf == 0) return; | |||
439 | do { | |||
440 | z = fwrite(ls->output_buf + y, 1, x, ls->output); | |||
441 | x -= z; | |||
442 | y += z; | |||
443 | } while (z && x > 0); | |||
444 | if (!y) { | |||
445 | errorucpp_error(ls->line, "could not flush output (disk full ?)"); | |||
446 | die(); | |||
447 | } | |||
448 | ls->sbuf = 0; | |||
449 | } | |||
450 | #endif | |||
451 | ||||
452 | /* | |||
453 | * Output one character; flush the buffer if needed. | |||
454 | * This function should not be called, except by put_char(). | |||
455 | */ | |||
456 | static inline void write_char(struct lexer_state *ls, unsigned char c) | |||
457 | { | |||
458 | #ifndef NO_UCPP_BUF1 | |||
459 | ls->output_buf[ls->sbuf ++] = c; | |||
460 | if (ls->sbuf == OUTPUT_BUF_MEMG8192) flush_output(ls); | |||
461 | #else | |||
462 | if (putc((int)c, ls->output)_IO_putc ((int)c, ls->output) == EOF(-1)) { | |||
463 | errorucpp_error(ls->line, "output write error (disk full ?)"); | |||
464 | die(); | |||
465 | } | |||
466 | #endif | |||
467 | if (c == '\n') { | |||
468 | ls->oline ++; | |||
469 | } | |||
470 | } | |||
471 | ||||
472 | /* | |||
473 | * schedule a character for output | |||
474 | */ | |||
475 | void put_charucpp_put_char(struct lexer_state *ls, unsigned char c) | |||
476 | { | |||
477 | if (ls->flags & KEEP_OUTPUT0x020000UL) write_char(ls, c); | |||
478 | } | |||
479 | ||||
480 | /* | |||
481 | * get next raw input character | |||
482 | */ | |||
483 | static inline int read_char(struct lexer_state *ls) | |||
484 | { | |||
485 | unsigned char c; | |||
486 | ||||
487 | if (!ls->input) { | |||
488 | return ((ls->pbuf ++) < ls->ebuf) ? | |||
489 | ls->input_string[ls->pbuf - 1] : -1; | |||
490 | } | |||
491 | while (1) { | |||
492 | #ifndef NO_UCPP_BUF1 | |||
493 | if (ls->pbuf == ls->ebuf) { | |||
494 | #ifdef UCPP_MMAP | |||
495 | if (ls->from_mmap) { | |||
496 | munmap((void *)ls->input_buf, ls->ebuf); | |||
497 | ls->from_mmap = 0; | |||
498 | ls->input_buf = ls->input_buf_sav; | |||
499 | } | |||
500 | #endif | |||
501 | ls->ebuf = fread(ls->input_buf, 1, | |||
502 | INPUT_BUF_MEMG8192, ls->input); | |||
503 | ls->pbuf = 0; | |||
504 | } | |||
505 | if (ls->ebuf == 0) return -1; | |||
506 | c = ls->input_buf[ls->pbuf ++]; | |||
507 | #else | |||
508 | int x = getc(ls->input)_IO_getc (ls->input); | |||
509 | ||||
510 | if (x == EOF(-1)) return -1; | |||
511 | c = x; | |||
512 | #endif | |||
513 | if (ls->flags & COPY_LINE0x040000UL) { | |||
514 | if (c == '\n') { | |||
515 | ls->copy_line[ls->cli] = 0; | |||
516 | ls->cli = 0; | |||
517 | } else if (ls->cli < (COPY_LINE_LENGTH80 - 1)) { | |||
518 | ls->copy_line[ls->cli ++] = c; | |||
519 | } | |||
520 | } | |||
521 | if (ls->macfile && c == '\n') { | |||
522 | ls->macfile = 0; | |||
523 | continue; | |||
524 | } | |||
525 | ls->macfile = 0; | |||
526 | if (c == '\r') { | |||
527 | /* | |||
528 | * We found a '\r'; we handle it as a newline | |||
529 | * and ignore the next newline. This should work | |||
530 | * with all combinations of Msdos, MacIntosh and | |||
531 | * Unix files on these three platforms. On other | |||
532 | * platforms, native file formats are always | |||
533 | * supported. | |||
534 | */ | |||
535 | ls->macfile = 1; | |||
536 | c = '\n'; | |||
537 | } | |||
538 | break; | |||
539 | } | |||
540 | return c; | |||
541 | } | |||
542 | ||||
543 | /* | |||
544 | * next_fifo_char(), char_lka1() and char_lka2() give a two character | |||
545 | * look-ahead on the input stream; this is needed for trigraphs | |||
546 | */ | |||
547 | static inline int next_fifo_char(struct lexer_state *ls) | |||
548 | { | |||
549 | int c; | |||
550 | ||||
551 | if (ls->nlka != 0) { | |||
552 | c = ls->lka[0]; | |||
553 | ls->lka[0] = ls->lka[1]; | |||
554 | ls->nlka --; | |||
555 | } else c = read_char(ls); | |||
556 | return c; | |||
557 | } | |||
558 | ||||
559 | static inline int char_lka1(struct lexer_state *ls) | |||
560 | { | |||
561 | if (ls->nlka == 0) { | |||
562 | ls->lka[0] = read_char(ls); | |||
563 | ls->nlka ++; | |||
564 | } | |||
565 | return ls->lka[0]; | |||
566 | } | |||
567 | ||||
568 | static inline int char_lka2(struct lexer_state *ls) | |||
569 | { | |||
570 | #ifdef AUDIT | |||
571 | if (ls->nlka == 0) ouchucpp_ouch("always in motion future is"); | |||
572 | #endif | |||
573 | if (ls->nlka == 1) { | |||
574 | ls->lka[1] = read_char(ls); | |||
575 | ls->nlka ++; | |||
576 | } | |||
577 | return ls->lka[1]; | |||
578 | } | |||
579 | ||||
580 | static struct trigraph { | |||
581 | int old, new; | |||
582 | } trig[9] = { | |||
583 | { '=', '#' }, | |||
584 | { '/', '\\' }, | |||
585 | { '\'', '^' }, | |||
586 | { '(', '[' }, | |||
587 | { ')', ']' }, | |||
588 | { '!', '|' }, | |||
589 | { '<', '{' }, | |||
590 | { '>', '}' }, | |||
591 | { '-', '~' } | |||
592 | }; | |||
593 | ||||
594 | /* | |||
595 | * Returns the next character, after treatment of trigraphs and terminating | |||
596 | * backslashes. Return value is -1 if there is no more input. | |||
597 | */ | |||
598 | static inline int next_char(struct lexer_state *ls) | |||
599 | { | |||
600 | int c; | |||
601 | ||||
602 | if (!ls->discard) return ls->last; | |||
603 | ls->discard = 0; | |||
604 | do { | |||
605 | c = next_fifo_char(ls); | |||
606 | /* check trigraphs */ | |||
607 | if (c == '?' && char_lka1(ls) == '?' | |||
608 | && (ls->flags & HANDLE_TRIGRAPHS0x008000UL)) { | |||
609 | int i, d; | |||
610 | ||||
611 | d = char_lka2(ls); | |||
612 | for (i = 0; i < 9; i ++) if (d == trig[i].old) { | |||
613 | if (ls->flags & WARN_TRIGRAPHS0x000004UL) { | |||
614 | ls->count_trigraphs ++; | |||
615 | } | |||
616 | if (ls->flags & WARN_TRIGRAPHS_MORE0x000008UL) { | |||
617 | warningucpp_warning(ls->line, "trigraph ?""?%c " | |||
618 | "encountered", d); | |||
619 | } | |||
620 | next_fifo_char(ls); | |||
621 | next_fifo_char(ls); | |||
622 | c = trig[i].new; | |||
623 | break; | |||
624 | } | |||
625 | } | |||
626 | if (c == '\\' && char_lka1(ls) == '\n') { | |||
627 | ls->line ++; | |||
628 | next_fifo_char(ls); | |||
629 | } else if (c == '\r' && char_lka1(ls) == '\n') { | |||
630 | ls->line ++; | |||
631 | next_fifo_char(ls); | |||
632 | c = '\n'; | |||
633 | return c; | |||
634 | } else { | |||
635 | ls->last = c; | |||
636 | return c; | |||
637 | } | |||
638 | } while (1); | |||
639 | } | |||
640 | ||||
641 | /* | |||
642 | * wrapper for next_char(), to be called from outside | |||
643 | * (used by #error, #include directives) | |||
644 | */ | |||
645 | int grap_charucpp_grap_char(struct lexer_state *ls) | |||
646 | { | |||
647 | return next_char(ls); | |||
648 | } | |||
649 | ||||
650 | /* | |||
651 | * Discard the current character, so that the next call to next_char() | |||
652 | * will step into the input stream. | |||
653 | */ | |||
654 | void discard_charucpp_discard_char(struct lexer_state *ls) | |||
655 | { | |||
656 | #ifdef AUDIT | |||
657 | if (ls->discard) ouchucpp_ouch("overcollecting garbage"); | |||
658 | #endif | |||
659 | ls->discard = 1; | |||
660 | ls->utf8 = 0; | |||
661 | if (ls->last == '\n') ls->line ++; | |||
662 | } | |||
663 | ||||
664 | /* | |||
665 | * Convert an UTF-8 encoded character to a Universal Character Name | |||
666 | * using \u (or \U when appropriate). | |||
667 | */ | |||
668 | static int utf8_to_string(unsigned char buf[], unsigned long utf8) | |||
669 | { | |||
670 | unsigned long val = 0; | |||
671 | static char hex[16] = "0123456789abcdef"; | |||
672 | ||||
673 | if (utf8 & 0x80UL) { | |||
674 | unsigned long x1, x2, x3, x4; | |||
675 | ||||
676 | x1 = (utf8 >> 24) & 0x7fUL; | |||
677 | x2 = (utf8 >> 16) & 0x7fUL; | |||
678 | x3 = (utf8 >> 8) & 0x7fUL; | |||
679 | x4 = (utf8) & 0x3fUL; | |||
680 | x1 &= 0x07UL; | |||
681 | if (x2 & 0x40UL) x2 &= 0x0fUL; | |||
682 | if (x3 & 0x40UL) x3 &= 0x1fUL; | |||
683 | val = x4 | (x3 << 6) | (x2 << 12) | (x1 << 16); | |||
684 | } else val = utf8; | |||
685 | if (val < 128) { | |||
686 | buf[0] = val; | |||
687 | buf[1] = 0; | |||
688 | return 1; | |||
689 | } else if (val < 0xffffUL) { | |||
690 | buf[0] = '\\'; | |||
691 | buf[1] = 'u'; | |||
692 | buf[2] = hex[(size_t)(val >> 12)]; | |||
693 | buf[3] = hex[(size_t)((val >> 8) & 0xfU)]; | |||
694 | buf[4] = hex[(size_t)((val >> 4) & 0xfU)]; | |||
695 | buf[5] = hex[(size_t)(val & 0xfU)]; | |||
696 | buf[6] = 0; | |||
697 | return 6; | |||
698 | } | |||
699 | buf[0] = '\\'; | |||
700 | buf[1] = 'U'; | |||
701 | buf[2] = '0'; | |||
702 | buf[3] = '0'; | |||
703 | buf[4] = hex[(size_t)(val >> 20)]; | |||
704 | buf[5] = hex[(size_t)((val >> 16) & 0xfU)]; | |||
705 | buf[6] = hex[(size_t)((val >> 12) & 0xfU)]; | |||
706 | buf[7] = hex[(size_t)((val >> 8) & 0xfU)]; | |||
707 | buf[8] = hex[(size_t)((val >> 4) & 0xfU)]; | |||
708 | buf[9] = hex[(size_t)(val & 0xfU)]; | |||
709 | buf[10] = 0; | |||
710 | return 10; | |||
711 | } | |||
712 | ||||
713 | /* | |||
714 | * Scan the identifier and put it in canonical form: | |||
715 | * -- tranform \U0000xxxx into \uxxxx | |||
716 | * -- inside \u and \U, make letters low case | |||
717 | * -- report (some) incorrect use of UCN | |||
718 | */ | |||
719 | static void canonize_id(struct lexer_state *ls, char *id) | |||
720 | { | |||
721 | char *c, *d; | |||
722 | ||||
723 | for (c = d = id; *c;) { | |||
724 | if (*c == '\\') { | |||
725 | int i; | |||
726 | ||||
727 | if (!*(c + 1)) goto canon_error; | |||
728 | if (*(c + 1) == 'U') { | |||
729 | for (i = 0; i < 8 && *(c + i + 2); i ++); | |||
730 | if (i != 8) goto canon_error; | |||
731 | *(d ++) = '\\'; | |||
732 | c += 2; | |||
733 | for (i = 0; i < 4 && *(c + i) == '0'; i ++); | |||
734 | if (i == 4) { | |||
735 | *(d ++) = 'u'; | |||
736 | c += 4; | |||
737 | } else { | |||
738 | *(d ++) = 'U'; | |||
739 | i = 8; | |||
740 | } | |||
741 | for (; i > 0; i --) { | |||
742 | switch (*c) { | |||
743 | case 'A': *(d ++) = 'a'; break; | |||
744 | case 'B': *(d ++) = 'b'; break; | |||
745 | case 'C': *(d ++) = 'c'; break; | |||
746 | case 'D': *(d ++) = 'd'; break; | |||
747 | case 'E': *(d ++) = 'e'; break; | |||
748 | case 'F': *(d ++) = 'f'; break; | |||
749 | default: *(d ++) = *c; break; | |||
750 | } | |||
751 | c ++; | |||
752 | } | |||
753 | } else if (*(c + 1) == 'u') { | |||
754 | for (i = 0; i < 4 && *(c + i + 2); i ++); | |||
755 | if (i != 4) goto canon_error; | |||
756 | *(d ++) = '\\'; | |||
757 | *(d ++) = 'u'; | |||
758 | c += 2; | |||
759 | for (; i > 0; i --) { | |||
760 | switch (*c) { | |||
761 | case 'A': *(d ++) = 'a'; break; | |||
762 | case 'B': *(d ++) = 'b'; break; | |||
763 | case 'C': *(d ++) = 'c'; break; | |||
764 | case 'D': *(d ++) = 'd'; break; | |||
765 | case 'E': *(d ++) = 'e'; break; | |||
766 | case 'F': *(d ++) = 'f'; break; | |||
767 | default: *(d ++) = *c; break; | |||
768 | } | |||
769 | c ++; | |||
770 | } | |||
771 | } else goto canon_error; | |||
772 | continue; | |||
773 | } | |||
774 | *(d ++) = *(c ++); | |||
775 | } | |||
776 | *d = 0; | |||
777 | return; | |||
778 | ||||
779 | canon_error: | |||
780 | for (; *c; *(d ++) = *(c ++)); | |||
781 | if (ls->flags & WARN_STANDARD0x000001UL) { | |||
782 | warningucpp_warning(ls->line, "malformed identifier with UCN: '%s'", id); | |||
783 | } | |||
784 | *d = 0; | |||
785 | } | |||
786 | ||||
787 | /* | |||
788 | * Run the automaton, in order to get the next token. | |||
789 | * This function should not be called, except by next_token() | |||
790 | * | |||
791 | * return value: 1 on error, 2 on end-of-file, 0 otherwise. | |||
792 | */ | |||
793 | static inline int read_token(struct lexer_state *ls) | |||
794 | { | |||
795 | int cstat = S_START, nstat; | |||
796 | size_t ltok = 0; | |||
797 | int c, outc = 0, ucn_in_id = 0; | |||
798 | int shift_state; | |||
799 | unsigned long utf8; | |||
800 | long l = ls->line; | |||
801 | ||||
802 | ls->ctok->line = l; | |||
803 | if (ls->pending_token) { | |||
804 | if ((ls->ctok->type = ls->pending_token) == BUNCH) { | |||
805 | ls->ctok->name[0] = '\\'; | |||
806 | ls->ctok->name[1] = 0; | |||
807 | } | |||
808 | ls->pending_token = 0; | |||
809 | return 0; | |||
810 | } | |||
811 | if (ls->flags & UTF8_SOURCE0x004000UL) { | |||
812 | utf8 = ls->utf8; | |||
813 | shift_state = 0; | |||
814 | } | |||
815 | if (!(ls->flags & LEXER0x010000UL) && (ls->flags & KEEP_OUTPUT0x020000UL)) | |||
816 | for (; ls->line > ls->oline;) put_charucpp_put_char(ls, '\n'); | |||
817 | do { | |||
818 | c = next_char(ls); | |||
819 | if (c < 0) { | |||
820 | if ((ls->flags & UTF8_SOURCE0x004000UL) && shift_state) { | |||
821 | if (ls->flags & WARN_STANDARD0x000001UL) | |||
822 | warningucpp_warning(ls->line, "truncated UTF-8 " | |||
823 | "character"); | |||
824 | shift_state = 0; | |||
825 | utf8 = 0; | |||
826 | } | |||
827 | if (cstat == S_START) return 2; | |||
828 | nstat = cppm_vch[cstat]; | |||
829 | } else { | |||
830 | if (ls->flags & UTF8_SOURCE0x004000UL) { | |||
831 | if (shift_state) { | |||
832 | if ((c & 0xc0) != 0x80) { | |||
833 | if (ls->flags & WARN_STANDARD0x000001UL) | |||
834 | warningucpp_warning(ls->line, | |||
835 | "truncated " | |||
836 | "UTF-8 " | |||
837 | "character"); | |||
838 | shift_state = 0; | |||
839 | utf8 = 0; | |||
840 | c = '_'; | |||
841 | } else { | |||
842 | utf8 = (utf8 << 8) | c; | |||
843 | if (-- shift_state) { | |||
844 | ls->discard = 1; | |||
845 | continue; | |||
846 | } | |||
847 | c = '_'; | |||
848 | } | |||
849 | } else if ((c & 0xc0) == 0xc0) { | |||
850 | if ((c & 0x30) == 0x30) { | |||
851 | shift_state = 3; | |||
852 | } else if (c & 0x20) { | |||
853 | shift_state = 2; | |||
854 | } else { | |||
855 | shift_state = 1; | |||
856 | } | |||
857 | utf8 = c; | |||
858 | ls->discard = 1; | |||
859 | continue; | |||
860 | } else utf8 = 0; | |||
861 | } | |||
862 | nstat = cppm[cstat][c < MAX_CHAR_VAL128 ? c : 0]; | |||
863 | } | |||
864 | #ifdef AUDIT | |||
865 | if (nstat == S_OUCH) { | |||
866 | ouchucpp_ouch("bad move..."); | |||
867 | } | |||
868 | #endif | |||
869 | /* | |||
870 | * disable C++-like comments | |||
871 | */ | |||
872 | if (nstat == S_COMMENT5 && !(ls->flags & CPLUSPLUS_COMMENTS0x000100UL)) | |||
873 | nstat = FRZ(STO(SLASH))((((SLASH) | 256)) | 512); | |||
874 | ||||
875 | if (noMOD(nstat)((nstat) & 255) >= MSTATE && !ttSTO(nstat)((nstat) & 256)) | |||
876 | switch (noMOD(nstat)((nstat) & 255)) { | |||
877 | case S_ILL: | |||
878 | if (ls->flags & CCHARSET0x000040UL) { | |||
879 | errorucpp_error(ls->line, "illegal character '%c'", c); | |||
880 | return 1; | |||
881 | } | |||
882 | nstat = PUT(STO(BUNCH))((((BUNCH) | 256)) | 1024); | |||
883 | break; | |||
884 | case S_BS: | |||
885 | ls->ctok->name[0] = '\\'; | |||
886 | ltok ++; | |||
887 | nstat = FRZ(STO(BUNCH))((((BUNCH) | 256)) | 512); | |||
888 | if (!(ls->flags & LEXER0x010000UL)) put_charucpp_put_char(ls, '\\'); | |||
889 | break; | |||
890 | case S_ROGUE_BS: | |||
891 | ls->pending_token = BUNCH; | |||
892 | nstat = FRZ(STO(NAME))((((NAME) | 256)) | 512); | |||
893 | break; | |||
894 | case S_DDOT: | |||
895 | ls->pending_token = DOT; | |||
896 | nstat = FRZ(STO(DOT))((((DOT) | 256)) | 512); | |||
897 | break; | |||
898 | case S_DDSHARP: | |||
899 | ls->pending_token = PCT; | |||
900 | nstat = FRZ(STO(DIG_SHARP))((((DIG_SHARP) | 256)) | 512); | |||
901 | break; | |||
902 | case S_BEHEAD: | |||
903 | errorucpp_error(l, "unfinished string at end of line"); | |||
904 | return 1; | |||
905 | case S_DECAY: | |||
906 | warningucpp_warning(l, "unterminated // comment"); | |||
907 | nstat = FRZ(STO(COMMENT))((((COMMENT) | 256)) | 512); | |||
908 | break; | |||
909 | case S_TRUNC: | |||
910 | errorucpp_error(l, "truncated token"); | |||
911 | return 1; | |||
912 | case S_TRUNCC: | |||
913 | errorucpp_error(l, "truncated comment"); | |||
914 | return 1; | |||
915 | #ifdef AUDIT | |||
916 | case S_OUCH: | |||
917 | ouchucpp_ouch("machine went out of control"); | |||
918 | break; | |||
919 | #endif | |||
920 | } | |||
921 | if (!ttFRZ(nstat)((nstat) & 512)) { | |||
922 | discard_charucpp_discard_char(ls); | |||
923 | if (!(ls->flags & LEXER0x010000UL) && ls->condcomp) { | |||
924 | int z = ttSTO(nstat)((nstat) & 256) ? S_ILL : noMOD(nstat)((nstat) & 255); | |||
925 | ||||
926 | if (cstat == S_NAME || z == S_NAME | |||
927 | || ((CMT(cstat)((cstat) >= S_COMMENT && (cstat) <= S_COMMENT5) || CMT(z)((z) >= S_COMMENT && (z) <= S_COMMENT5)) | |||
928 | && (ls->flags & DISCARD_COMMENTS0x000080UL))) { | |||
929 | outc = 0; | |||
930 | } else if (z == S_LCHAR || z == S_SLASH | |||
931 | || (z == S_SHARP && ls->ltwnl) | |||
932 | || (z == S_PCT && ls->ltwnl) | |||
933 | || (z == S_BACKSLASH)) { | |||
934 | outc = c; | |||
935 | } else if (z == S_PCT2 && ls->ltwnl) { | |||
936 | outc = -1; | |||
937 | } else if (z == S_PCT3 && ls->ltwnl) { | |||
938 | /* we have %:% but this still might | |||
939 | not be a %:%: */ | |||
940 | outc = -2; | |||
941 | } else { | |||
942 | if (outc < 0) { | |||
943 | put_charucpp_put_char(ls, '%'); | |||
944 | put_charucpp_put_char(ls, ':'); | |||
945 | if (outc == -2) | |||
946 | put_charucpp_put_char(ls, '%'); | |||
947 | outc = 0; | |||
948 | } else if (outc) { | |||
949 | put_charucpp_put_char(ls, outc); | |||
950 | outc = 0; | |||
951 | } | |||
952 | put_charucpp_put_char(ls, c); | |||
953 | } | |||
954 | } | |||
955 | } else if (outc == '/' && !(ls->flags & LEXER0x010000UL) | |||
956 | && ls->condcomp) { | |||
957 | /* this is a hack: we need to dump a pending slash */ | |||
958 | put_charucpp_put_char(ls, outc); | |||
959 | outc = 0; | |||
960 | } | |||
961 | if (ttPUT(nstat)((nstat) & 1024)) { | |||
962 | if (cstat == S_NAME_BS) { | |||
963 | ucn_in_id = 1; | |||
964 | wan(ls->ctok->name, ltok, '\\', ls->tknl)do { if ((ltok) == (ls->tknl)) { (ls->tknl) += (ls-> tknl); (ls->ctok->name) = incmem((ls->ctok->name) , (ltok) * sizeof('\\'), (ls->tknl) * sizeof('\\')); } (ls ->ctok->name)[(ltok) ++] = ('\\'); } while (0); | |||
965 | } | |||
966 | if ((ls->flags & UTF8_SOURCE0x004000UL) && utf8) { | |||
| ||||
967 | unsigned char buf[11]; | |||
968 | int i, j; | |||
969 | ||||
970 | for (i = 0, j = utf8_to_string(buf, utf8); | |||
971 | i < j; i ++) | |||
972 | wan(ls->ctok->name, ltok, buf[i],do { if ((ltok) == (ls->tknl)) { (ls->tknl) += (ls-> tknl); (ls->ctok->name) = incmem((ls->ctok->name) , (ltok) * sizeof(buf[i]), (ls->tknl) * sizeof(buf[i])); } (ls->ctok->name)[(ltok) ++] = (buf[i]); } while (0) | |||
973 | ls->tknl)do { if ((ltok) == (ls->tknl)) { (ls->tknl) += (ls-> tknl); (ls->ctok->name) = incmem((ls->ctok->name) , (ltok) * sizeof(buf[i]), (ls->tknl) * sizeof(buf[i])); } (ls->ctok->name)[(ltok) ++] = (buf[i]); } while (0); | |||
974 | /* if (j > 1) ucn_in_id = 1; */ | |||
975 | } else wan(ls->ctok->name, ltok,do { if ((ltok) == (ls->tknl)) { (ls->tknl) += (ls-> tknl); (ls->ctok->name) = incmem((ls->ctok->name) , (ltok) * sizeof((unsigned char)c), (ls->tknl) * sizeof(( unsigned char)c)); } (ls->ctok->name)[(ltok) ++] = ((unsigned char)c); } while (0) | |||
976 | (unsigned char)c, ls->tknl)do { if ((ltok) == (ls->tknl)) { (ls->tknl) += (ls-> tknl); (ls->ctok->name) = incmem((ls->ctok->name) , (ltok) * sizeof((unsigned char)c), (ls->tknl) * sizeof(( unsigned char)c)); } (ls->ctok->name)[(ltok) ++] = ((unsigned char)c); } while (0); | |||
977 | } | |||
978 | if (ttSTO(nstat)((nstat) & 256)) { | |||
979 | if (S_TOKEN(noMOD(nstat))((((nstat) & 255)) >= NUMBER && (((nstat) & 255)) <= CHAR)) { | |||
980 | wan(ls->ctok->name, ltok,do { if ((ltok) == (ls->tknl)) { (ls->tknl) += (ls-> tknl); (ls->ctok->name) = incmem((ls->ctok->name) , (ltok) * sizeof((unsigned char)0), (ls->tknl) * sizeof(( unsigned char)0)); } (ls->ctok->name)[(ltok) ++] = ((unsigned char)0); } while (0) | |||
981 | (unsigned char)0, ls->tknl)do { if ((ltok) == (ls->tknl)) { (ls->tknl) += (ls-> tknl); (ls->ctok->name) = incmem((ls->ctok->name) , (ltok) * sizeof((unsigned char)0), (ls->tknl) * sizeof(( unsigned char)0)); } (ls->ctok->name)[(ltok) ++] = ((unsigned char)0); } while (0); | |||
982 | } | |||
983 | ls->ctok->type = noMOD(nstat)((nstat) & 255); | |||
984 | break; | |||
985 | } | |||
986 | cstat = noMOD(nstat)((nstat) & 255); | |||
987 | } while (1); | |||
988 | if (!(ls->flags & LEXER0x010000UL) && (ls->flags & DISCARD_COMMENTS0x000080UL) | |||
989 | && ls->ctok->type == COMMENT) put_charucpp_put_char(ls, ' '); | |||
990 | if (ucn_in_id && ls->ctok->type == NAME) | |||
991 | canonize_id(ls, ls->ctok->name); | |||
992 | return 0; | |||
993 | } | |||
994 | ||||
995 | /* | |||
996 | * fills ls->ctok with the next token | |||
997 | */ | |||
998 | int next_tokenucpp_next_token(struct lexer_state *ls) | |||
999 | { | |||
1000 | if (ls->flags & READ_AGAIN0x080000UL) { | |||
| ||||
1001 | ls->flags &= ~READ_AGAIN0x080000UL; | |||
1002 | if (!(ls->flags & LEXER0x010000UL)) { | |||
1003 | char *c = S_TOKEN(ls->ctok->type)((ls->ctok->type) >= NUMBER && (ls->ctok-> type) <= CHAR) ? | |||
1004 | ls->ctok->name : token_nameucpp_token_name(ls->ctok); | |||
1005 | if (ls->ctok->type == OPT_NONE) { | |||
1006 | ls->ctok->type = NONE; | |||
1007 | #ifdef SEMPER_FIDELIS | |||
1008 | ls->ctok->name[0] = ' '; | |||
1009 | ls->ctok->name[1] = 0; | |||
1010 | #endif | |||
1011 | put_charucpp_put_char(ls, ' '); | |||
1012 | } else if (ls->ctok->type != NAME && | |||
1013 | !(ls->ltwnl && (ls->ctok->type == SHARP | |||
1014 | || ls->ctok->type == DIG_SHARP))) | |||
1015 | for (; *c; c ++) put_charucpp_put_char(ls, *c); | |||
1016 | } | |||
1017 | return 0; | |||
1018 | } | |||
1019 | return read_token(ls); | |||
1020 | } |