Line data Source code
1 : /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 : /*
3 : * This file is part of the LibreOffice project.
4 : *
5 : * This Source Code Form is subject to the terms of the Mozilla Public
6 : * License, v. 2.0. If a copy of the MPL was not distributed with this
7 : * file, You can obtain one at http://mozilla.org/MPL/2.0/.
8 : *
9 : * This file incorporates work covered by the following license notice:
10 : *
11 : * Licensed to the Apache Software Foundation (ASF) under one or more
12 : * contributor license agreements. See the NOTICE file distributed
13 : * with this work for additional information regarding copyright
14 : * ownership. The ASF licenses this file to you under the Apache
15 : * License, Version 2.0 (the "License"); you may not use this file
16 : * except in compliance with the License. You may obtain a copy of
17 : * the License at http://www.apache.org/licenses/LICENSE-2.0 .
18 : */
19 :
20 : #include <sal/types.h>
21 : #include <stdio.h>
22 : #include <ctype.h>
23 : #include <string.h>
24 : #include "cppdef.h"
25 : #include "cpp.h"
26 :
27 : /*ER evaluate macros to pDefOut */
28 :
29 : /*
30 : * skipnl() skips over input text to the end of the line.
31 : * skipws() skips over "whitespace" (spaces or tabs), but
32 : * not skip over the end of the line. It skips over
33 : * TOK_SEP, however (though that shouldn't happen).
34 : * scanid() reads the next token (C identifier) into token[].
35 : * The caller has already read the first character of
36 : * the identifier. Unlike macroid(), the token is
37 : * never expanded.
38 : * macroid() reads the next token (C identifier) into token[].
39 : * If it is a #defined macro, it is expanded, and
40 : * macroid() returns TRUE, otherwise, FALSE.
41 : * catenate() Does the dirty work of token concatenation, TRUE if it did.
42 : * scanstring() Reads a string from the input stream, calling
43 : * a user-supplied function for each character.
44 : * This function may be output() to write the
45 : * string to the output file, or save() to save
46 : * the string in the work buffer.
47 : * scannumber() Reads a C numeric constant from the input stream,
48 : * calling the user-supplied function for each
49 : * character. (output() or save() as noted above.)
50 : * save() Save one character in the work[] buffer.
51 : * savestring() Saves a string in malloc() memory.
52 : * getfile() Initialize a new FILEINFO structure, called when
53 : * #include opens a new file, or a macro is to be
54 : * expanded.
55 : * getmem() Get a specified number of bytes from malloc memory.
56 : * output() Write one character to stdout (calling PUTCHAR) --
57 : * implemented as a function so its address may be
58 : * passed to scanstring() and scannumber().
59 : * lookid() Scans the next token (identifier) from the input
60 : * stream. Looks for it in the #defined symbol table.
61 : * Returns a pointer to the definition, if found, or NULL
62 : * if not present. The identifier is stored in token[].
63 : * defnedel() Define enter/delete subroutine. Updates the
64 : * symbol table.
65 : * get() Read the next byte from the current input stream,
66 : * handling end of (macro/file) input and embedded
67 : * comments appropriately. Note that the global
68 : * instring is -- essentially -- a parameter to get().
69 : * cget() Like get(), but skip over TOK_SEP.
70 : * unget() Push last gotten character back on the input stream.
71 : * cerror(), cwarn(), cfatal(), cierror(), ciwarn()
72 : * These routines format an print messages to the user.
73 : * cerror & cwarn take a format and a single string argument.
74 : * cierror & ciwarn take a format and a single int (char) argument.
75 : * cfatal takes a format and a single string argument.
76 : */
77 :
78 : /*
79 : * This table must be rewritten for a non-Ascii machine.
80 : *
81 : * Note that several "non-visible" characters have special meaning:
82 : * Hex 1D DEF_MAGIC -- a flag to prevent #define recursion.
83 : * Hex 1E TOK_SEP -- a delimiter for token concatenation
84 : * Hex 1F COM_SEP -- a zero-width whitespace for comment concatenation
85 : */
86 : #if TOK_SEP != 0x1E || COM_SEP != 0x1F || DEF_MAGIC != 0x1D
87 : << error type table is not correct >>
88 : #endif
89 :
90 : #if OK_DOLLAR
91 : #define DOL LET
92 : #else
93 : #define DOL 000
94 : #endif
95 :
96 : #ifdef EBCDIC
97 :
98 : char type[256] = { /* Character type codes Hex */
99 : END, 000, 000, 000, 000, SPA, 000, 000, /* 00 */
100 : 000, 000, 000, 000, 000, 000, 000, 000, /* 08 */
101 : 000, 000, 000, 000, 000, 000, 000, 000, /* 10 */
102 : 000, 000, 000, 000, 000, LET, 000, SPA, /* 18 */
103 : 000, 000, 000, 000, 000, 000, 000, 000, /* 20 */
104 : 000, 000, 000, 000, 000, 000, 000, 000, /* 28 */
105 : 000, 000, 000, 000, 000, 000, 000, 000, /* 30 */
106 : 000, 000, 000, 000, 000, 000, 000, 000, /* 38 */
107 : SPA, 000, 000, 000, 000, 000, 000, 000, /* 40 */
108 : 000, 000, 000, DOT, OP_LT,OP_LPA,OP_ADD, OP_OR, /* 48 .<(+| */
109 : OP_AND, 000, 000, 000, 000, 000, 000, 000, /* 50 & */
110 : 000, 000,OP_NOT, DOL,OP_MUL,OP_RPA, 000,OP_XOR, /* 58 !$*);^ */
111 : OP_SUB,OP_DIV, 000, 000, 000, 000, 000, 000, /* 60 -/ */
112 : 000, 000, 000, 000,OP_MOD, LET, OP_GT,OP_QUE, /* 68 ,%_>? */
113 : 000, 000, 000, 000, 000, 000, 000, 000, /* 70 */
114 : 000, 000,OP_COL, 000, 000, QUO, OP_EQ, QUO, /* 78 `:#@'=" */
115 : 000, LET, LET, LET, LET, LET, LET, LET, /* 80 abcdefg */
116 : LET, LET, 000, 000, 000, 000, 000, 000, /* 88 hi */
117 : 000, LET, LET, LET, LET, LET, LET, LET, /* 90 jklmnop */
118 : LET, LET, 000, 000, 000, 000, 000, 000, /* 98 qr */
119 : 000,OP_NOT, LET, LET, LET, LET, LET, LET, /* A0 ~stuvwx */
120 : LET, LET, 000, 000, 000, 000, 000, 000, /* A8 yz [ */
121 : 000, 000, 000, 000, 000, 000, 000, 000, /* B0 */
122 : 000, 000, 000, 000, 000, 000, 000, 000, /* B8 ] */
123 : 000, LET, LET, LET, LET, LET, LET, LET, /* C0 {ABCDEFG */
124 : LET, LET, 000, 000, 000, 000, 000, 000, /* C8 HI */
125 : 000, LET, LET, LET, LET, LET, LET, LET, /* D0 }JKLMNOP */
126 : LET, LET, 000, 000, 000, 000, 000, 000, /* D8 QR */
127 : BSH, 000, LET, LET, LET, LET, LET, LET, /* E0 \ STUVWX */
128 : LET, LET, 000, 000, 000, 000, 000, 000, /* E8 YZ */
129 : DIG, DIG, DIG, DIG, DIG, DIG, DIG, DIG, /* F0 01234567 */
130 : DIG, DIG, 000, 000, 000, 000, 000, 000, /* F8 89 */
131 : };
132 :
133 : #else
134 :
135 : char type[256] = { /* Character type codes Hex */
136 : END, 000, 000, 000, 000, 000, 000, 000, /* 00 */
137 : 000, SPA, 000, 000, 000, 000, 000, 000, /* 08 */
138 : 000, 000, 000, 000, 000, 000, 000, 000, /* 10 */
139 : 000, 000, 000, 000, 000, LET, 000, SPA, /* 18 */
140 : SPA,OP_NOT, QUO, 000, DOL,OP_MOD,OP_AND, QUO, /* 20 !"#$%&' */
141 : OP_LPA,OP_RPA,OP_MUL,OP_ADD, 000,OP_SUB, DOT,OP_DIV, /* 28 ()*+,-./ */
142 : DIG, DIG, DIG, DIG, DIG, DIG, DIG, DIG, /* 30 01234567 */
143 : DIG, DIG,OP_COL, 000, OP_LT, OP_EQ, OP_GT,OP_QUE, /* 38 89:;<=>? */
144 : 000, LET, LET, LET, LET, LET, LET, LET, /* 40 @ABCDEFG */
145 : LET, LET, LET, LET, LET, LET, LET, LET, /* 48 HIJKLMNO */
146 : LET, LET, LET, LET, LET, LET, LET, LET, /* 50 PQRSTUVW */
147 : LET, LET, LET, 000, BSH, 000,OP_XOR, LET, /* 58 XYZ[\]^_ */
148 : 000, LET, LET, LET, LET, LET, LET, LET, /* 60 `abcdefg */
149 : LET, LET, LET, LET, LET, LET, LET, LET, /* 68 hijklmno */
150 : LET, LET, LET, LET, LET, LET, LET, LET, /* 70 pqrstuvw */
151 : LET, LET, LET, 000, OP_OR, 000,OP_NOT, 000, /* 78 xyz{|}~ */
152 : 000, 000, 000, 000, 000, 000, 000, 000, /* 80 .. FF */
153 : 000, 000, 000, 000, 000, 000, 000, 000, /* 80 .. FF */
154 : 000, 000, 000, 000, 000, 000, 000, 000, /* 80 .. FF */
155 : 000, 000, 000, 000, 000, 000, 000, 000, /* 80 .. FF */
156 : 000, 000, 000, 000, 000, 000, 000, 000, /* 80 .. FF */
157 : 000, 000, 000, 000, 000, 000, 000, 000, /* 80 .. FF */
158 : 000, 000, 000, 000, 000, 000, 000, 000, /* 80 .. FF */
159 : 000, 000, 000, 000, 000, 000, 000, 000, /* 80 .. FF */
160 : };
161 :
162 : #endif
163 :
164 : /*
165 : * C P P S y m b o l T a b l e s
166 : */
167 :
168 : /*
169 : * SBSIZE defines the number of hash-table slots for the symbol table.
170 : * It must be a power of 2.
171 : */
172 : #ifndef SBSIZE
173 : #define SBSIZE 64
174 : #endif
175 : #define SBMASK (SBSIZE - 1)
176 : #if (SBSIZE ^ SBMASK) != ((SBSIZE * 2) - 1)
177 : << error, SBSIZE must be a power of 2 >>
178 : #endif
179 :
180 :
181 : static DEFBUF *symtab[SBSIZE]; /* Symbol table queue headers */
182 :
183 630 : void InitCpp6()
184 : {
185 : int i;
186 40950 : for( i = 0; i < SBSIZE; i++ )
187 40320 : symtab[ i ] = NULL;
188 630 : }
189 :
190 :
191 :
192 22529712 : void skipnl()
193 : /*
194 : * Skip to the end of the current input line.
195 : */
196 : {
197 : register int c;
198 :
199 : do { /* Skip to newline */
200 22529712 : c = get();
201 22529712 : } while (c != '\n' && c != EOF_CHAR);
202 404672 : }
203 :
204 : int
205 24120419 : skipws()
206 : /*
207 : * Skip over whitespace
208 : */
209 : {
210 : register int c;
211 :
212 : do { /* Skip whitespace */
213 24120419 : c = get();
214 : #if COMMENT_INVISIBLE
215 : } while (type[c] == SPA || c == COM_SEP);
216 : #else
217 24120419 : } while (type[c] == SPA);
218 : #endif
219 4692130 : return (c);
220 : }
221 :
222 2297347 : void scanid(int c)
223 : /*
224 : * Get the next token (an id) into the token buffer.
225 : * Note: this code is duplicated in lookid().
226 : * Change one, change both.
227 : */
228 : {
229 : register char *bp;
230 :
231 2297347 : if (c == DEF_MAGIC) /* Eat the magic token */
232 0 : c = get(); /* undefiner. */
233 2297347 : bp = token;
234 : do {
235 19935823 : if (bp < &token[IDMAX]) /* token dim is IDMAX+1 */
236 19935823 : *bp++ = (char)c;
237 19935823 : c = get();
238 19935823 : } while (type[c] == LET || type[c] == DIG);
239 2297347 : unget();
240 2297347 : *bp = EOS;
241 2297347 : }
242 :
243 : int
244 857439 : macroid(int c)
245 : /*
246 : * If c is a letter, scan the id. if it's #defined, expand it and scan
247 : * the next character and try again.
248 : *
249 : * Else, return the character. If type[c] is a LET, the token is in token.
250 : */
251 : {
252 : register DEFBUF *dp;
253 :
254 857439 : if (infile != NULL && infile->fp != NULL)
255 577093 : recursion = 0;
256 1830847 : while (type[c] == LET && (dp = lookid(c)) != NULL) {
257 115969 : expand(dp);
258 115969 : c = get();
259 : }
260 857439 : return (c);
261 : }
262 :
263 : int
264 173803 : catenate()
265 : /*
266 : * A token was just read (via macroid).
267 : * If the next character is TOK_SEP, concatenate the next token
268 : * return TRUE -- which should recall macroid after refreshing
269 : * macroid's argument. If it is not TOK_SEP, unget() the character
270 : * and return FALSE.
271 : */
272 : {
273 : register int c;
274 : register char *token1;
275 :
276 : #if OK_CONCAT
277 173803 : if (get() != TOK_SEP) { /* Token concatenation */
278 173707 : unget();
279 173707 : return (FALSE);
280 : }
281 : else {
282 96 : token1 = savestring(token); /* Save first token */
283 96 : c = macroid(get()); /* Scan next token */
284 96 : switch(type[c]) { /* What was it? */
285 : case LET: /* An identifier, ... */
286 92 : if (strlen(token1) + strlen(token) >= NWORK)
287 0 : cfatal("work buffer overflow doing %s #", token1);
288 92 : sprintf(work, "%s%s", token1, token);
289 92 : break;
290 :
291 : case DIG: /* A digit string */
292 4 : strcpy(work, token1);
293 4 : workp = work + strlen(work);
294 : do {
295 1148 : save(c);
296 1148 : } while ((c = get()) != TOK_SEP);
297 : /*
298 : * The trailing TOK_SEP is no longer needed.
299 : */
300 4 : save(EOS);
301 4 : break;
302 :
303 : default: /* An error, ... */
304 : #if ! COMMENT_INVISIBLE
305 0 : if (isprint(c))
306 0 : cierror("Strange character '%c' after #", c);
307 : else
308 0 : cierror("Strange character (%d.) after #", c);
309 : #endif
310 0 : strcpy(work, token1);
311 0 : unget();
312 0 : break;
313 : }
314 : /*
315 : * work has the concatenated token and token1 has
316 : * the first token (no longer needed). Unget the
317 : * new (concatenated) token after freeing token1.
318 : * Finally, setup to read the new token.
319 : */
320 96 : free(token1); /* Free up memory */
321 96 : ungetstring(work); /* Unget the new thing, */
322 96 : return (TRUE);
323 : }
324 : #else
325 : return (FALSE); /* Not supported */
326 : #endif
327 : }
328 :
329 : int
330 219701 : scanstring(int delim,
331 : #ifndef _NO_PROTO
332 : void (*outfun)( int ) /* BP */ /* Output function */
333 : #else
334 : void (*outfun)() /* BP */
335 : #endif
336 : )
337 : /*
338 : * Scan off a string. Warning if terminated by newline or EOF.
339 : * outfun() outputs the character -- to a buffer if in a macro.
340 : * TRUE if ok, FALSE if error.
341 : */
342 : {
343 : register int c;
344 :
345 219701 : instring = TRUE; /* Don't strip comments */
346 219701 : (*outfun)(delim);
347 5272256 : while ((c = get()) != delim
348 4832854 : && c != '\n'
349 4832854 : && c != EOF_CHAR) {
350 :
351 4832854 : if (c != DEF_MAGIC)
352 4832854 : (*outfun)(c);
353 4832854 : if (c == '\\')
354 1116 : (*outfun)(get());
355 : }
356 219701 : instring = FALSE;
357 219701 : if (c == delim) {
358 219701 : (*outfun)(c);
359 219701 : return (TRUE);
360 : }
361 : else {
362 0 : cerror("Unterminated string", NULLST);
363 0 : unget();
364 0 : return (FALSE);
365 : }
366 : }
367 :
368 955282 : void scannumber(int c,
369 : #ifndef _NO_PROTO
370 : register void (*outfun)( int ) /* BP */ /* Output/store func */
371 : #else
372 : register void (*outfun)() /* BP */
373 : #endif
374 : )
375 : /*
376 : * Process a number. We know that c is from 0 to 9 or dot.
377 : * Algorithm from Dave Conroy's Decus C.
378 : */
379 : {
380 : register int radix; /* 8, 10, or 16 */
381 : int expseen; /* 'e' seen in floater */
382 : int signseen; /* '+' or '-' seen */
383 : int octal89; /* For bad octal test */
384 : int dotflag; /* TRUE if '.' was seen */
385 :
386 955282 : expseen = FALSE; /* No exponent seen yet */
387 955282 : signseen = TRUE; /* No +/- allowed yet */
388 955282 : octal89 = FALSE; /* No bad octal yet */
389 955282 : radix = 10; /* Assume decimal */
390 955282 : if ((dotflag = (c == '.')) != FALSE) { /* . something? */
391 0 : (*outfun)('.'); /* Always out the dot */
392 0 : if (type[(c = get())] != DIG) { /* If not a float numb, */
393 0 : unget(); /* Rescan strange char */
394 955282 : return; /* All done for now */
395 : }
396 : } /* End of float test */
397 955282 : else if (c == '0') { /* Octal or hex? */
398 32236 : (*outfun)(c); /* Stuff initial zero */
399 32236 : radix = 8; /* Assume it's octal */
400 32236 : c = get(); /* Look for an 'x' */
401 32236 : if (c == 'x' || c == 'X') { /* Did we get one? */
402 22088 : radix = 16; /* Remember new radix */
403 22088 : (*outfun)(c); /* Stuff the 'x' */
404 22088 : c = get(); /* Get next character */
405 : }
406 : }
407 : for (;;) { /* Process curr. char. */
408 : /*
409 : * Note that this algorithm accepts "012e4" and "03.4"
410 : * as legitimate floating-point numbers.
411 : */
412 3486304 : if (radix != 16 && (c == 'e' || c == 'E')) {
413 0 : if (expseen) /* Already saw 'E'? */
414 0 : break; /* Exit loop, bad nbr. */
415 0 : expseen = TRUE; /* Set exponent seen */
416 0 : signseen = FALSE; /* We can read '+' now */
417 0 : radix = 10; /* Decimal exponent */
418 : }
419 3486304 : else if (radix != 16 && c == '.') {
420 0 : if (dotflag) /* Saw dot already? */
421 0 : break; /* Exit loop, two dots */
422 0 : dotflag = TRUE; /* Remember the dot */
423 0 : radix = 10; /* Decimal fraction */
424 : }
425 3486304 : else if (c == '+' || c == '-') { /* 1.0e+10 */
426 7518 : if (signseen) /* Sign in wrong place? */
427 7518 : break; /* Exit loop, not nbr. */
428 : /* signseen = TRUE; */ /* Remember we saw it */
429 : }
430 : else { /* Check the digit */
431 3478786 : switch (c) {
432 : case '8': case '9': /* Sometimes wrong */
433 346469 : octal89 = TRUE; /* Do check later */
434 : case '0': case '1': case '2': case '3':
435 : case '4': case '5': case '6': case '7':
436 2518630 : break; /* Always ok */
437 :
438 : case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
439 : case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
440 12392 : if (radix == 16) /* Alpha's are ok only */
441 12392 : break; /* if reading hex. */
442 : default: /* At number end */
443 947764 : goto done; /* Break from for loop */
444 : } /* End of switch */
445 : } /* End general case */
446 2531022 : (*outfun)(c); /* Accept the character */
447 2531022 : signseen = TRUE; /* Don't read sign now */
448 2531022 : c = get(); /* Read another char */
449 2531022 : } /* End of scan loop */
450 : /*
451 : * When we break out of the scan loop, c contains the first
452 : * character (maybe) not in the number. If the number is an
453 : * integer, allow a trailing 'L' for long and/or a trailing 'U'
454 : * for unsigned. If not those, push the trailing character back
455 : * on the input stream. Floating point numbers accept a trailing
456 : * 'L' for "long double".
457 : */
458 955282 : done: if (dotflag || expseen) { /* Floating point? */
459 0 : if (c == 'l' || c == 'L') {
460 0 : (*outfun)(c);
461 0 : get(); /* Ungotten later */
462 : }
463 : }
464 : else { /* Else it's an integer */
465 : /*
466 : * We know that dotflag and expseen are both zero, now:
467 : * dotflag signals "saw 'L'", and
468 : * expseen signals "saw 'U'".
469 : */
470 : for (;;) {
471 962608 : switch (c) {
472 : case 'l':
473 : case 'L':
474 3663 : if (dotflag)
475 0 : goto nomore;
476 3663 : dotflag = TRUE;
477 3663 : break;
478 :
479 : case 'u':
480 : case 'U':
481 3663 : if (expseen)
482 0 : goto nomore;
483 3663 : expseen = TRUE;
484 3663 : break;
485 :
486 : default:
487 955282 : goto nomore;
488 : }
489 7326 : (*outfun)(c); /* Got 'L' or 'U'. */
490 7326 : c = get(); /* Look at next, too. */
491 7326 : }
492 : }
493 955282 : nomore: unget(); /* Not part of a number */
494 955282 : if (octal89 && radix == 8)
495 0 : cwarn("Illegal digit in octal number", NULLST);
496 : }
497 :
498 22012616 : void save(int c)
499 : {
500 22012616 : if (workp >= &work[NWORK]) {
501 0 : work[NWORK-1] = '\0';
502 0 : cfatal("Work buffer overflow: %s", work);
503 : }
504 22012616 : else *workp++ = (char)c;
505 22012616 : }
506 :
507 : char *
508 1192182 : savestring(char* text)
509 : /*
510 : * Store a string into free memory.
511 : */
512 : {
513 : register char *result;
514 :
515 1192182 : result = getmem(strlen(text) + 1);
516 1192182 : strcpy(result, text);
517 1192182 : return (result);
518 : }
519 :
520 : FILEINFO *
521 125474 : getfile(int bufsize, char* name)
522 : /*
523 : * Common FILEINFO buffer initialization for a new file or macro.
524 : */
525 : {
526 : register FILEINFO *file;
527 : register int size;
528 :
529 125474 : size = strlen(name); /* File/macro name */
530 125474 : file = (FILEINFO *) getmem(sizeof (FILEINFO) + bufsize + size);
531 125474 : file->parent = infile; /* Chain files together */
532 125474 : file->fp = NULL; /* No file yet */
533 125474 : file->filename = savestring(name); /* Save file/macro name */
534 125474 : file->progname = NULL; /* No #line seen yet */
535 125474 : file->unrecur = 0; /* No macro fixup */
536 125474 : file->bptr = file->buffer; /* Initialize line ptr */
537 125474 : file->buffer[0] = EOS; /* Force first read */
538 125474 : file->line = 0; /* (Not used just yet) */
539 125474 : if (infile != NULL) /* If #include file */
540 124844 : infile->line = line; /* Save current line */
541 125474 : infile = file; /* New current file */
542 125474 : line = 1; /* Note first line */
543 125474 : return (file); /* All done. */
544 : }
545 :
546 : char *
547 2363389 : getmem(int size)
548 : /*
549 : * Get a block of free memory.
550 : */
551 : {
552 : register char *result;
553 :
554 2363389 : if ((result = malloc((unsigned) size)) == NULL)
555 0 : cfatal("Out of memory", NULLST);
556 2363389 : return (result);
557 : }
558 :
559 : DEFBUF *
560 1349010 : lookid(int c)
561 : /*
562 : * Look for the next token in the symbol table. Returns token in "token".
563 : * If found, returns the table pointer; Else returns NULL.
564 : */
565 : {
566 : register int nhash;
567 : register DEFBUF *dp;
568 : register char *np;
569 1349010 : int temp = 0;
570 : int isrecurse; /* For #define foo foo */
571 :
572 1349010 : np = token;
573 1349010 : nhash = 0;
574 1349010 : if (0 != (isrecurse = (c == DEF_MAGIC))) /* If recursive macro */
575 0 : c = get(); /* hack, skip DEF_MAGIC */
576 : do {
577 23127286 : if (np < &token[IDMAX]) { /* token dim is IDMAX+1 */
578 23127286 : *np++ = (char)c; /* Store token byte */
579 23127286 : nhash += c; /* Update hash value */
580 : }
581 23127286 : c = get(); /* And get another byte */
582 23127286 : } while (type[c] == LET || type[c] == DIG);
583 1349010 : unget(); /* Rescan terminator */
584 1349010 : *np = EOS; /* Terminate token */
585 1349010 : if (isrecurse) /* Recursive definition */
586 0 : return (NULL); /* undefined just now */
587 1349010 : nhash += (np - token); /* Fix hash value */
588 1349010 : dp = symtab[nhash & SBMASK]; /* Starting bucket */
589 27629386 : while (dp != (DEFBUF *) NULL) { /* Search symbol table */
590 25426859 : if (dp->hash == nhash /* Fast precheck */
591 1003005 : && (temp = strcmp(dp->name, token)) >= 0)
592 495493 : break;
593 24931366 : dp = dp->link; /* Nope, try next one */
594 : }
595 1349010 : return ((temp == 0) ? dp : NULL);
596 : }
597 :
598 : DEFBUF *
599 1045172 : defendel(char* name, int delete)
600 : /*
601 : * Enter this name in the lookup table (delete = FALSE)
602 : * or delete this name (delete = TRUE).
603 : * Returns a pointer to the define block (delete = FALSE)
604 : * Returns NULL if the symbol wasn't defined (delete = TRUE).
605 : */
606 : {
607 : register DEFBUF *dp;
608 : register DEFBUF **prevp;
609 : register char *np;
610 : int nhash;
611 : int temp;
612 : int size;
613 :
614 22122551 : for (nhash = 0, np = name; *np != EOS;)
615 20032207 : nhash += *np++;
616 1045172 : size = (np - name);
617 1045172 : nhash += size;
618 1045172 : prevp = &symtab[nhash & SBMASK];
619 19613505 : while ((dp = *prevp) != (DEFBUF *) NULL) {
620 17869493 : if (dp->hash == nhash
621 774450 : && (temp = strcmp(dp->name, name)) >= 0) {
622 346332 : if (temp > 0)
623 346263 : dp = NULL; /* Not found */
624 : else {
625 69 : *prevp = dp->link; /* Found, unlink and */
626 69 : if (dp->repl != NULL) /* Free the replacement */
627 69 : free(dp->repl); /* if any, and then */
628 69 : free((char *) dp); /* Free the symbol */
629 : }
630 346332 : break;
631 : }
632 17523161 : prevp = &dp->link;
633 : }
634 1045172 : if (!delete) {
635 1045103 : dp = (DEFBUF *) getmem(sizeof (DEFBUF) + size);
636 1045103 : dp->link = *prevp;
637 1045103 : *prevp = dp;
638 1045103 : dp->hash = nhash;
639 1045103 : dp->repl = NULL;
640 1045103 : dp->nargs = 0;
641 1045103 : strcpy(dp->name, name);
642 : }
643 1045172 : return (dp);
644 : }
645 :
646 : #if OSL_DEBUG_LEVEL > 1
647 :
648 : void dumpdef(char *why)
649 : {
650 : register DEFBUF *dp;
651 : register DEFBUF **syp;
652 : FILE *pRememberOut = NULL;
653 :
654 : if ( bDumpDefs ) /*ER */
655 : {
656 : pRememberOut = pCppOut;
657 : pCppOut = pDefOut;
658 : }
659 : fprintf( pCppOut, "CPP symbol table dump %s\n", why);
660 : for (syp = symtab; syp < &symtab[SBSIZE]; syp++) {
661 : if ((dp = *syp) != (DEFBUF *) NULL) {
662 : fprintf( pCppOut, "symtab[%" SAL_PRI_PTRDIFFT "d]\n", (syp - symtab));
663 : do {
664 : dumpadef((char *) NULL, dp);
665 : } while ((dp = dp->link) != (DEFBUF *) NULL);
666 : }
667 : }
668 : if ( bDumpDefs )
669 : {
670 : fprintf( pCppOut, "\n");
671 : pCppOut = pRememberOut;
672 : }
673 : }
674 :
675 : void dumpadef(char *why, register DEFBUF *dp)
676 : {
677 : register char *cp;
678 : register int c;
679 : FILE *pRememberOut = NULL;
680 :
681 : /*ER dump #define's to pDefOut */
682 : if ( bDumpDefs )
683 : {
684 : pRememberOut = pCppOut;
685 : pCppOut = pDefOut;
686 : }
687 : fprintf( pCppOut, " \"%s\" [%d]", dp->name, dp->nargs);
688 : if (why != NULL)
689 : fprintf( pCppOut, " (%s)", why);
690 : if (dp->repl != NULL) {
691 : fprintf( pCppOut, " => ");
692 : for (cp = dp->repl; (c = *cp++ & 0xFF) != EOS;) {
693 : #ifdef SOLAR
694 : if (c == DEL) {
695 : c = *cp++ & 0xFF;
696 : if( c == EOS ) break;
697 : fprintf( pCppOut, "<%%%d>", c - MAC_PARM);
698 : }
699 : #else
700 : if (c >= MAC_PARM && c <= (MAC_PARM + PAR_MAC))
701 : fprintf( pCppOut, "<%%%d>", c - MAC_PARM);
702 : #endif
703 : else if (isprint(c) || c == '\n' || c == '\t')
704 : PUTCHAR(c);
705 : else if (c < ' ')
706 : fprintf( pCppOut, "<^%c>", c + '@');
707 : else
708 : fprintf( pCppOut, "<\\0%o>", c);
709 : }
710 : /*ER evaluate macros to pDefOut */
711 : #ifdef EVALDEFS
712 : if ( bDumpDefs && !bIsInEval && dp->nargs <= 0 )
713 : {
714 : FILEINFO *infileSave = infile;
715 : char *tokenSave = savestring( token );
716 : char *workSave = savestring( work );
717 : int lineSave = line;
718 : int wronglineSave = wrongline;
719 : int recursionSave = recursion;
720 : FILEINFO *file;
721 : EVALTYPE valEval;
722 :
723 : bIsInEval = 1;
724 : infile = NULL; /* start from scrap */
725 : line = 0;
726 : wrongline = 0;
727 : *token = EOS;
728 : *work = EOS;
729 : recursion = 0;
730 : file = getfile( strlen( dp->repl ), dp->name );
731 : strcpy( file->buffer, dp->repl );
732 : fprintf( pCppOut, " ===> ");
733 : nEvalOff = 0;
734 : cppmain(); /* get() frees also *file */
735 : valEval = 0;
736 : if ( 0 == evaluate( EvalBuf, &valEval ) )
737 : {
738 : #ifdef EVALFLOATS
739 : if ( valEval != (EVALTYPE)((long)valEval ) )
740 : fprintf( pCppOut, " ==eval=> %f", valEval );
741 : else
742 : #endif
743 : fprintf( pCppOut, " ==eval=> %ld", (long)valEval );
744 : }
745 : recursion = recursionSave;
746 : wrongline = wronglineSave;
747 : line = lineSave;
748 : strcpy( work, workSave );
749 : free( workSave );
750 : strcpy( token, tokenSave );
751 : free( tokenSave );
752 : infile = infileSave;
753 : bIsInEval = 0;
754 : }
755 : #endif
756 : }
757 : else {
758 : fprintf( pCppOut, ", no replacement.");
759 : }
760 : PUTCHAR('\n');
761 : if ( bDumpDefs )
762 : pCppOut = pRememberOut;
763 : }
764 : #endif
765 :
766 : /*
767 : * G E T
768 : */
769 :
770 : int
771 130224865 : get()
772 : /*
773 : * Return the next character from a macro or the current file.
774 : * Handle end of file from #include files.
775 : */
776 : {
777 : register int c;
778 : register FILEINFO *file;
779 : register int popped; /* Recursion fixup */
780 :
781 130224865 : popped = 0;
782 : get_from_file:
783 130349709 : if ((file = infile) == NULL)
784 1 : return (EOF_CHAR);
785 : newline:
786 :
787 : /*
788 : * Read a character from the current input line or macro.
789 : * At EOS, either finish the current macro (freeing temp.
790 : * storage) or read another line from the current input file.
791 : * At EOF, exit the current file (#include) or, at EOF from
792 : * the cpp input file, return EOF_CHAR to finish processing.
793 : */
794 132838341 : if ((c = *file->bptr++ & 0xFF) == EOS) {
795 : /*
796 : * Nothing in current line or macro. Get next line (if
797 : * input from a file), or do end of file/macro processing.
798 : * In the latter case, jump back to restart from the top.
799 : */
800 2561945 : if (file->fp == NULL) { /* NULL if macro */
801 116065 : popped++;
802 116065 : recursion -= file->unrecur;
803 116065 : if (recursion < 0)
804 0 : recursion = 0;
805 116065 : infile = file->parent; /* Unwind file chain */
806 : }
807 : else { /* Else get from a file */
808 2445880 : if ((file->bptr = fgets(file->buffer, NBUFF, file->fp))
809 : != NULL) {
810 : #if OSL_DEBUG_LEVEL > 1
811 : if (debug > 1) { /* Dump it to stdout */
812 : fprintf( pCppOut, "\n#line %d (%s), %s",
813 : line, file->filename, file->buffer);
814 : }
815 : #endif
816 2436471 : goto newline; /* process the line */
817 : }
818 : else {
819 9409 : if( file->fp != stdin )
820 9409 : fclose(file->fp); /* Close finished file */
821 9409 : if ((infile = file->parent) != NULL) {
822 : /*
823 : * There is an "ungotten" newline in the current
824 : * infile buffer (set there by doinclude() in
825 : * cpp1.c). Thus, we know that the mainline code
826 : * is skipping over blank lines and will do a
827 : * #line at its convenience.
828 : */
829 8779 : wrongline = TRUE; /* Need a #line now */
830 : }
831 : }
832 : }
833 : /*
834 : * Free up space used by the (finished) file or macro and
835 : * restart input from the parent file/macro, if any.
836 : */
837 125474 : free(file->filename); /* Free name and */
838 125474 : if (file->progname != NULL) /* if a #line was seen, */
839 0 : free(file->progname); /* free it, too. */
840 125474 : free((char *) file); /* Free file space */
841 125474 : if (infile == NULL) /* If at end of file */
842 630 : return (EOF_CHAR); /* Return end of file */
843 124844 : line = infile->line; /* Reset line number */
844 124844 : goto get_from_file; /* Get from the top. */
845 : }
846 : /*
847 : * Common processing for the new character.
848 : */
849 130276396 : if (c == DEF_MAGIC && file->fp != NULL) /* Don't allow delete */
850 0 : goto newline; /* from a file */
851 130276396 : if (file->parent != NULL) { /* Macro or #include */
852 125127247 : if (popped != 0)
853 81713 : file->parent->unrecur += popped;
854 : else {
855 125045534 : recursion -= file->parent->unrecur;
856 125045534 : if (recursion < 0)
857 943 : recursion = 0;
858 125045534 : file->parent->unrecur = 0;
859 : }
860 : }
861 : #if (HOST == SYS_UNIX)
862 130276396 : /*ER*/ if (c == '\r')
863 0 : /*ER*/ return get(); /* DOS fuck */
864 : #endif
865 130276396 : if (c == '\n') /* Maintain current */
866 3678722 : ++line; /* line counter */
867 130276396 : if (instring) /* Strings just return */
868 25558773 : return (c); /* the character. */
869 104717623 : else if (c == '/') { /* Comment? */
870 295862 : instring = TRUE; /* So get() won't loop */
871 : /*MM c++ comments */
872 295862 : /*MM*/ c = get();
873 295862 : /*MM*/ if ((c != '*') && (c != '/')) { /* Next byte '*'? */
874 2407 : instring = FALSE; /* Nope, no comment */
875 2407 : unget(); /* Push the char. back */
876 2407 : return ('/'); /* Return the slash */
877 : }
878 293455 : if (keepcomments) { /* If writing comments */
879 0 : PUTCHAR('/'); /* Write out the */
880 : /* initializer */
881 0 : /*MM*/ if( '*' == c )
882 0 : PUTCHAR('*');
883 : /*MM*/ else
884 0 : /*MM*/ PUTCHAR('/');
885 :
886 : }
887 293455 : /*MM*/ if( '*' == c ){
888 : for (;;) { /* Eat a comment */
889 9650739 : c = get();
890 9892780 : test: if (keepcomments && c != EOF_CHAR)
891 0 : cput(c);
892 9892780 : switch (c) {
893 : case EOF_CHAR:
894 0 : cerror("EOF in comment", NULLST);
895 0 : return (EOF_CHAR);
896 :
897 : case '/':
898 85480 : if ((c = get()) != '*') /* Don't let comments */
899 85480 : goto test; /* Nest. */
900 : #ifdef STRICT_COMMENTS
901 : cwarn("Nested comments", NULLST);
902 : #endif
903 : /* Fall into * stuff */
904 : case '*':
905 185641 : if ((c = get()) != '/') /* If comment doesn't */
906 156561 : goto test; /* end, look at next */
907 29080 : instring = FALSE; /* End of comment, */
908 29080 : if (keepcomments) { /* Put out the comment */
909 0 : cput(c); /* terminator, too */
910 : }
911 : /*
912 : * A comment is syntactically "whitespace" --
913 : * however, there are certain strange sequences
914 : * such as
915 : * #define foo(x) (something)
916 : * foo|* comment *|(123)
917 : * these are '/' ^ ^
918 : * where just returning space (or COM_SEP) will cause
919 : * problems. This can be "fixed" by overwriting the
920 : * '/' in the input line buffer with ' ' (or COM_SEP)
921 : * but that may mess up an error message.
922 : * So, we peek ahead -- if the next character is
923 : * "whitespace" we just get another character, if not,
924 : * we modify the buffer. All in the name of purity.
925 : */
926 29080 : if (*file->bptr == '\n'
927 111 : || type[*file->bptr & 0xFF] == SPA)
928 : goto newline;
929 : #if COMMENT_INVISIBLE
930 : /*
931 : * Return magic (old-fashioned) syntactic space.
932 : */
933 : return ((file->bptr[-1] = COM_SEP));
934 : #else
935 77 : return ((file->bptr[-1] = ' '));
936 : #endif
937 :
938 : case '\n': /* we'll need a #line */
939 172928 : if (!keepcomments)
940 172928 : wrongline = TRUE; /* later... */
941 : default: /* Anything else is */
942 9621659 : break; /* Just a character */
943 : } /* End switch */
944 9621659 : } /* End comment loop */
945 : }
946 : else{ /* c++ comment */
947 : /*MM c++ comment*/
948 : for (;;) { /* Eat a comment */
949 10153406 : c = get();
950 10153406 : if (keepcomments && c != EOF_CHAR)
951 0 : cput(c);
952 10153406 : if( EOF_CHAR == c )
953 0 : return (EOF_CHAR);
954 10153406 : else if( '\n' == c ){
955 264375 : instring = FALSE; /* End of comment, */
956 264375 : return( c );
957 : }
958 9889031 : }
959 : }
960 : } /* End if in comment */
961 104421761 : else if (!inmacro && c == '\\') { /* If backslash, peek */
962 23159 : if ((c = get()) == '\n') { /* for a <nl>. If so, */
963 23159 : wrongline = TRUE;
964 23159 : goto newline;
965 : }
966 : else { /* Backslash anything */
967 0 : unget(); /* Get it later */
968 0 : return ('\\'); /* Return the backslash */
969 : }
970 : }
971 104398602 : else if (c == '\f' || c == VT) /* Form Feed, Vertical */
972 0 : c = ' '; /* Tab are whitespace */
973 104398602 : else if (c == 0xef) /* eat up UTF-8 BOM */
974 : {
975 8 : if((c = get()) == 0xbb)
976 : {
977 8 : if((c = get()) == 0xbf)
978 : {
979 8 : c = get();
980 8 : return c;
981 : }
982 : else
983 : {
984 0 : unget();
985 0 : unget();
986 0 : return 0xef;
987 : }
988 : }
989 : else
990 : {
991 0 : unget();
992 0 : return 0xef;
993 : }
994 : }
995 104398594 : return (c); /* Just return the char */
996 : }
997 :
998 5972882 : void unget()
999 : /*
1000 : * Backup the pointer to reread the last character. Fatal error
1001 : * (code bug) if we backup too far. unget() may be called,
1002 : * without problems, at end of file. Only one character may
1003 : * be ungotten. If you need to unget more, call ungetstring().
1004 : */
1005 : {
1006 : register FILEINFO *file;
1007 :
1008 5972882 : if ((file = infile) == NULL)
1009 5972882 : return; /* Unget after EOF */
1010 5972882 : if (--file->bptr < file->buffer)
1011 0 : cfatal("Too much pushback", NULLST);
1012 5972882 : if (*file->bptr == '\n') /* Ungetting a newline? */
1013 1222965 : --line; /* Unget the line number, too */
1014 : }
1015 :
1016 96 : void ungetstring(char* text)
1017 : /*
1018 : * Push a string back on the input stream. This is done by treating
1019 : * the text as if it were a macro.
1020 : */
1021 : {
1022 : register FILEINFO *file;
1023 : extern FILEINFO *getfile();
1024 96 : file = getfile(strlen(text) + 1, "");
1025 96 : strcpy(file->buffer, text);
1026 96 : }
1027 :
1028 : int
1029 34883 : cget()
1030 : /*
1031 : * Get one character, absorb "funny space" after comments or
1032 : * token concatenation
1033 : */
1034 : {
1035 : register int c;
1036 :
1037 : do {
1038 34883 : c = get();
1039 : #if COMMENT_INVISIBLE
1040 : } while (c == TOK_SEP || c == COM_SEP);
1041 : #else
1042 34883 : } while (c == TOK_SEP);
1043 : #endif
1044 34883 : return (c);
1045 : }
1046 :
1047 : /*
1048 : * Error messages and other hacks. The first byte of severity
1049 : * is 'S' for string arguments and 'I' for int arguments. This
1050 : * is needed for portability with machines that have int's that
1051 : * are shorter than char *'s.
1052 : */
1053 :
1054 0 : static void domsg(char* severity, char* format, void* arg)
1055 : /*
1056 : * Print filenames, macro names, and line numbers for error messages.
1057 : */
1058 : {
1059 : register char *tp;
1060 : register FILEINFO *file;
1061 :
1062 0 : fprintf(stderr, "%sline %d, %s: ", MSG_PREFIX, line, &severity[1]);
1063 0 : if (*severity == 'S')
1064 0 : fprintf(stderr, format, (char *)arg);
1065 : else
1066 0 : fprintf(stderr, format, *((int *)arg) );
1067 0 : putc('\n', stderr);
1068 0 : if ((file = infile) == NULL)
1069 0 : return; /* At end of file */
1070 0 : if (file->fp != NULL) {
1071 0 : tp = file->buffer; /* Print current file */
1072 0 : fprintf(stderr, "%s", tp); /* name, making sure */
1073 0 : if (tp[strlen(tp) - 1] != '\n') /* there's a newline */
1074 0 : putc('\n', stderr);
1075 : }
1076 0 : while ((file = file->parent) != NULL) { /* Print #includes, too */
1077 0 : if (file->fp == NULL)
1078 0 : fprintf(stderr, "from macro %s\n", file->filename);
1079 : else {
1080 0 : tp = file->buffer;
1081 0 : fprintf(stderr, "from file %s, line %d:\n%s",
1082 0 : (file->progname != NULL)
1083 : ? file->progname : file->filename,
1084 : file->line, tp);
1085 0 : if (tp[strlen(tp) - 1] != '\n')
1086 0 : putc('\n', stderr);
1087 : }
1088 : }
1089 : }
1090 :
1091 0 : void cerror(char* format, char* sarg)
1092 : /*
1093 : * Print a normal error message, string argument.
1094 : */
1095 : {
1096 0 : domsg("SError", format, sarg);
1097 0 : errors++;
1098 0 : }
1099 :
1100 0 : void cierror(char* format, int narg)
1101 : /*
1102 : * Print a normal error message, numeric argument.
1103 : */
1104 : {
1105 0 : domsg("IError", format, &narg);
1106 0 : errors++;
1107 0 : }
1108 :
1109 0 : void cfatal(char* format, char* sarg)
1110 : /*
1111 : * A real disaster
1112 : */
1113 : {
1114 0 : domsg("SFatal error", format, sarg);
1115 0 : exit(IO_ERROR);
1116 : }
1117 :
1118 0 : void cwarn(char* format, char* sarg)
1119 : /*
1120 : * A non-fatal error, string argument.
1121 : */
1122 : {
1123 0 : domsg("SWarning", format, sarg);
1124 0 : }
1125 :
1126 0 : void ciwarn(char* format, int narg)
1127 : /*
1128 : * A non-fatal error, numeric argument.
1129 : */
1130 : {
1131 0 : domsg("IWarning", format, &narg);
1132 0 : }
1133 :
1134 : /* vim:set shiftwidth=4 softtabstop=4 expandtab: */
|