Line data Source code
1 : /* Parser generator */
2 :
3 : /* For a description, see the comments at end of this file */
4 :
5 : #include "Python.h"
6 : #include "pgenheaders.h"
7 : #include "token.h"
8 : #include "node.h"
9 : #include "grammar.h"
10 : #include "metagrammar.h"
11 : #include "pgen.h"
12 :
13 : extern int Py_DebugFlag;
14 : extern int Py_IgnoreEnvironmentFlag; /* needed by Py_GETENV */
15 :
16 :
17 : /* PART ONE -- CONSTRUCT NFA -- Cf. Algorithm 3.2 from [Aho&Ullman 77] */
18 :
19 : typedef struct _nfaarc {
20 : int ar_label;
21 : int ar_arrow;
22 : } nfaarc;
23 :
24 : typedef struct _nfastate {
25 : int st_narcs;
26 : nfaarc *st_arc;
27 : } nfastate;
28 :
29 : typedef struct _nfa {
30 : int nf_type;
31 : char *nf_name;
32 : int nf_nstates;
33 : nfastate *nf_state;
34 : int nf_start, nf_finish;
35 : } nfa;
36 :
37 : /* Forward */
38 : static void compile_rhs(labellist *ll,
39 : nfa *nf, node *n, int *pa, int *pb);
40 : static void compile_alt(labellist *ll,
41 : nfa *nf, node *n, int *pa, int *pb);
42 : static void compile_item(labellist *ll,
43 : nfa *nf, node *n, int *pa, int *pb);
44 : static void compile_atom(labellist *ll,
45 : nfa *nf, node *n, int *pa, int *pb);
46 :
47 : static int
48 0 : addnfastate(nfa *nf)
49 : {
50 : nfastate *st;
51 :
52 0 : nf->nf_state = (nfastate *)PyObject_REALLOC(nf->nf_state,
53 0 : sizeof(nfastate) * (nf->nf_nstates + 1));
54 0 : if (nf->nf_state == NULL)
55 0 : Py_FatalError("out of mem");
56 0 : st = &nf->nf_state[nf->nf_nstates++];
57 0 : st->st_narcs = 0;
58 0 : st->st_arc = NULL;
59 0 : return st - nf->nf_state;
60 : }
61 :
62 : static void
63 0 : addnfaarc(nfa *nf, int from, int to, int lbl)
64 : {
65 : nfastate *st;
66 : nfaarc *ar;
67 :
68 0 : st = &nf->nf_state[from];
69 0 : st->st_arc = (nfaarc *)PyObject_REALLOC(st->st_arc,
70 0 : sizeof(nfaarc) * (st->st_narcs + 1));
71 0 : if (st->st_arc == NULL)
72 0 : Py_FatalError("out of mem");
73 0 : ar = &st->st_arc[st->st_narcs++];
74 0 : ar->ar_label = lbl;
75 0 : ar->ar_arrow = to;
76 0 : }
77 :
78 : static nfa *
79 0 : newnfa(char *name)
80 : {
81 : nfa *nf;
82 : static int type = NT_OFFSET; /* All types will be disjunct */
83 :
84 0 : nf = (nfa *)PyObject_MALLOC(sizeof(nfa));
85 0 : if (nf == NULL)
86 0 : Py_FatalError("no mem for new nfa");
87 0 : nf->nf_type = type++;
88 0 : nf->nf_name = name; /* XXX strdup(name) ??? */
89 0 : nf->nf_nstates = 0;
90 0 : nf->nf_state = NULL;
91 0 : nf->nf_start = nf->nf_finish = -1;
92 0 : return nf;
93 : }
94 :
95 : typedef struct _nfagrammar {
96 : int gr_nnfas;
97 : nfa **gr_nfa;
98 : labellist gr_ll;
99 : } nfagrammar;
100 :
101 : /* Forward */
102 : static void compile_rule(nfagrammar *gr, node *n);
103 :
104 : static nfagrammar *
105 0 : newnfagrammar(void)
106 : {
107 : nfagrammar *gr;
108 :
109 0 : gr = (nfagrammar *)PyObject_MALLOC(sizeof(nfagrammar));
110 0 : if (gr == NULL)
111 0 : Py_FatalError("no mem for new nfa grammar");
112 0 : gr->gr_nnfas = 0;
113 0 : gr->gr_nfa = NULL;
114 0 : gr->gr_ll.ll_nlabels = 0;
115 0 : gr->gr_ll.ll_label = NULL;
116 0 : addlabel(&gr->gr_ll, ENDMARKER, "EMPTY");
117 0 : return gr;
118 : }
119 :
120 : static nfa *
121 0 : addnfa(nfagrammar *gr, char *name)
122 : {
123 : nfa *nf;
124 :
125 0 : nf = newnfa(name);
126 0 : gr->gr_nfa = (nfa **)PyObject_REALLOC(gr->gr_nfa,
127 0 : sizeof(nfa*) * (gr->gr_nnfas + 1));
128 0 : if (gr->gr_nfa == NULL)
129 0 : Py_FatalError("out of mem");
130 0 : gr->gr_nfa[gr->gr_nnfas++] = nf;
131 0 : addlabel(&gr->gr_ll, NAME, nf->nf_name);
132 0 : return nf;
133 : }
134 :
135 : #ifdef Py_DEBUG
136 :
137 : static char REQNFMT[] = "metacompile: less than %d children\n";
138 :
139 : #define REQN(i, count) \
140 : if (i < count) { \
141 : fprintf(stderr, REQNFMT, count); \
142 : Py_FatalError("REQN"); \
143 : } else
144 :
145 : #else
146 : #define REQN(i, count) /* empty */
147 : #endif
148 :
149 : static nfagrammar *
150 0 : metacompile(node *n)
151 : {
152 : nfagrammar *gr;
153 : int i;
154 :
155 0 : if (Py_DebugFlag)
156 0 : printf("Compiling (meta-) parse tree into NFA grammar\n");
157 0 : gr = newnfagrammar();
158 : REQ(n, MSTART);
159 0 : i = n->n_nchildren - 1; /* Last child is ENDMARKER */
160 0 : n = n->n_child;
161 0 : for (; --i >= 0; n++) {
162 0 : if (n->n_type != NEWLINE)
163 0 : compile_rule(gr, n);
164 : }
165 0 : return gr;
166 : }
167 :
168 : static void
169 0 : compile_rule(nfagrammar *gr, node *n)
170 : {
171 : nfa *nf;
172 :
173 : REQ(n, RULE);
174 : REQN(n->n_nchildren, 4);
175 0 : n = n->n_child;
176 : REQ(n, NAME);
177 0 : nf = addnfa(gr, n->n_str);
178 0 : n++;
179 : REQ(n, COLON);
180 0 : n++;
181 : REQ(n, RHS);
182 0 : compile_rhs(&gr->gr_ll, nf, n, &nf->nf_start, &nf->nf_finish);
183 0 : n++;
184 : REQ(n, NEWLINE);
185 0 : }
186 :
187 : static void
188 0 : compile_rhs(labellist *ll, nfa *nf, node *n, int *pa, int *pb)
189 : {
190 : int i;
191 : int a, b;
192 :
193 : REQ(n, RHS);
194 0 : i = n->n_nchildren;
195 : REQN(i, 1);
196 0 : n = n->n_child;
197 : REQ(n, ALT);
198 0 : compile_alt(ll, nf, n, pa, pb);
199 0 : if (--i <= 0)
200 0 : return;
201 0 : n++;
202 0 : a = *pa;
203 0 : b = *pb;
204 0 : *pa = addnfastate(nf);
205 0 : *pb = addnfastate(nf);
206 0 : addnfaarc(nf, *pa, a, EMPTY);
207 0 : addnfaarc(nf, b, *pb, EMPTY);
208 0 : for (; --i >= 0; n++) {
209 : REQ(n, VBAR);
210 : REQN(i, 1);
211 0 : --i;
212 0 : n++;
213 : REQ(n, ALT);
214 0 : compile_alt(ll, nf, n, &a, &b);
215 0 : addnfaarc(nf, *pa, a, EMPTY);
216 0 : addnfaarc(nf, b, *pb, EMPTY);
217 : }
218 : }
219 :
220 : static void
221 0 : compile_alt(labellist *ll, nfa *nf, node *n, int *pa, int *pb)
222 : {
223 : int i;
224 : int a, b;
225 :
226 : REQ(n, ALT);
227 0 : i = n->n_nchildren;
228 : REQN(i, 1);
229 0 : n = n->n_child;
230 : REQ(n, ITEM);
231 0 : compile_item(ll, nf, n, pa, pb);
232 0 : --i;
233 0 : n++;
234 0 : for (; --i >= 0; n++) {
235 : REQ(n, ITEM);
236 0 : compile_item(ll, nf, n, &a, &b);
237 0 : addnfaarc(nf, *pb, a, EMPTY);
238 0 : *pb = b;
239 : }
240 0 : }
241 :
242 : static void
243 0 : compile_item(labellist *ll, nfa *nf, node *n, int *pa, int *pb)
244 : {
245 : int i;
246 : int a, b;
247 :
248 : REQ(n, ITEM);
249 0 : i = n->n_nchildren;
250 : REQN(i, 1);
251 0 : n = n->n_child;
252 0 : if (n->n_type == LSQB) {
253 : REQN(i, 3);
254 0 : n++;
255 : REQ(n, RHS);
256 0 : *pa = addnfastate(nf);
257 0 : *pb = addnfastate(nf);
258 0 : addnfaarc(nf, *pa, *pb, EMPTY);
259 0 : compile_rhs(ll, nf, n, &a, &b);
260 0 : addnfaarc(nf, *pa, a, EMPTY);
261 0 : addnfaarc(nf, b, *pb, EMPTY);
262 : REQN(i, 1);
263 0 : n++;
264 : REQ(n, RSQB);
265 : }
266 : else {
267 0 : compile_atom(ll, nf, n, pa, pb);
268 0 : if (--i <= 0)
269 0 : return;
270 0 : n++;
271 0 : addnfaarc(nf, *pb, *pa, EMPTY);
272 0 : if (n->n_type == STAR)
273 0 : *pb = *pa;
274 : else
275 : REQ(n, PLUS);
276 : }
277 : }
278 :
279 : static void
280 0 : compile_atom(labellist *ll, nfa *nf, node *n, int *pa, int *pb)
281 : {
282 : int i;
283 :
284 : REQ(n, ATOM);
285 0 : i = n->n_nchildren;
286 : REQN(i, 1);
287 0 : n = n->n_child;
288 0 : if (n->n_type == LPAR) {
289 : REQN(i, 3);
290 0 : n++;
291 : REQ(n, RHS);
292 0 : compile_rhs(ll, nf, n, pa, pb);
293 0 : n++;
294 : REQ(n, RPAR);
295 : }
296 0 : else if (n->n_type == NAME || n->n_type == STRING) {
297 0 : *pa = addnfastate(nf);
298 0 : *pb = addnfastate(nf);
299 0 : addnfaarc(nf, *pa, *pb, addlabel(ll, n->n_type, n->n_str));
300 : }
301 : else
302 : REQ(n, NAME);
303 0 : }
304 :
305 : static void
306 0 : dumpstate(labellist *ll, nfa *nf, int istate)
307 : {
308 : nfastate *st;
309 : int i;
310 : nfaarc *ar;
311 :
312 0 : printf("%c%2d%c",
313 0 : istate == nf->nf_start ? '*' : ' ',
314 : istate,
315 0 : istate == nf->nf_finish ? '.' : ' ');
316 0 : st = &nf->nf_state[istate];
317 0 : ar = st->st_arc;
318 0 : for (i = 0; i < st->st_narcs; i++) {
319 0 : if (i > 0)
320 0 : printf("\n ");
321 0 : printf("-> %2d %s", ar->ar_arrow,
322 0 : PyGrammar_LabelRepr(&ll->ll_label[ar->ar_label]));
323 0 : ar++;
324 : }
325 0 : printf("\n");
326 0 : }
327 :
328 : static void
329 0 : dumpnfa(labellist *ll, nfa *nf)
330 : {
331 : int i;
332 :
333 0 : printf("NFA '%s' has %d states; start %d, finish %d\n",
334 : nf->nf_name, nf->nf_nstates, nf->nf_start, nf->nf_finish);
335 0 : for (i = 0; i < nf->nf_nstates; i++)
336 0 : dumpstate(ll, nf, i);
337 0 : }
338 :
339 :
340 : /* PART TWO -- CONSTRUCT DFA -- Algorithm 3.1 from [Aho&Ullman 77] */
341 :
342 : static void
343 0 : addclosure(bitset ss, nfa *nf, int istate)
344 : {
345 0 : if (addbit(ss, istate)) {
346 0 : nfastate *st = &nf->nf_state[istate];
347 0 : nfaarc *ar = st->st_arc;
348 : int i;
349 :
350 0 : for (i = st->st_narcs; --i >= 0; ) {
351 0 : if (ar->ar_label == EMPTY)
352 0 : addclosure(ss, nf, ar->ar_arrow);
353 0 : ar++;
354 : }
355 : }
356 0 : }
357 :
358 : typedef struct _ss_arc {
359 : bitset sa_bitset;
360 : int sa_arrow;
361 : int sa_label;
362 : } ss_arc;
363 :
364 : typedef struct _ss_state {
365 : bitset ss_ss;
366 : int ss_narcs;
367 : struct _ss_arc *ss_arc;
368 : int ss_deleted;
369 : int ss_finish;
370 : int ss_rename;
371 : } ss_state;
372 :
373 : typedef struct _ss_dfa {
374 : int sd_nstates;
375 : ss_state *sd_state;
376 : } ss_dfa;
377 :
378 : /* Forward */
379 : static void printssdfa(int xx_nstates, ss_state *xx_state, int nbits,
380 : labellist *ll, char *msg);
381 : static void simplify(int xx_nstates, ss_state *xx_state);
382 : static void convert(dfa *d, int xx_nstates, ss_state *xx_state);
383 :
384 : static void
385 0 : makedfa(nfagrammar *gr, nfa *nf, dfa *d)
386 : {
387 0 : int nbits = nf->nf_nstates;
388 : bitset ss;
389 : int xx_nstates;
390 : ss_state *xx_state, *yy;
391 : ss_arc *zz;
392 : int istate, jstate, iarc, jarc, ibit;
393 : nfastate *st;
394 : nfaarc *ar;
395 :
396 0 : ss = newbitset(nbits);
397 0 : addclosure(ss, nf, nf->nf_start);
398 0 : xx_state = (ss_state *)PyObject_MALLOC(sizeof(ss_state));
399 0 : if (xx_state == NULL)
400 0 : Py_FatalError("no mem for xx_state in makedfa");
401 0 : xx_nstates = 1;
402 0 : yy = &xx_state[0];
403 0 : yy->ss_ss = ss;
404 0 : yy->ss_narcs = 0;
405 0 : yy->ss_arc = NULL;
406 0 : yy->ss_deleted = 0;
407 0 : yy->ss_finish = testbit(ss, nf->nf_finish);
408 0 : if (yy->ss_finish)
409 0 : printf("Error: nonterminal '%s' may produce empty.\n",
410 : nf->nf_name);
411 :
412 : /* This algorithm is from a book written before
413 : the invention of structured programming... */
414 :
415 : /* For each unmarked state... */
416 0 : for (istate = 0; istate < xx_nstates; ++istate) {
417 : size_t size;
418 0 : yy = &xx_state[istate];
419 0 : ss = yy->ss_ss;
420 : /* For all its states... */
421 0 : for (ibit = 0; ibit < nf->nf_nstates; ++ibit) {
422 0 : if (!testbit(ss, ibit))
423 0 : continue;
424 0 : st = &nf->nf_state[ibit];
425 : /* For all non-empty arcs from this state... */
426 0 : for (iarc = 0; iarc < st->st_narcs; iarc++) {
427 0 : ar = &st->st_arc[iarc];
428 0 : if (ar->ar_label == EMPTY)
429 0 : continue;
430 : /* Look up in list of arcs from this state */
431 0 : for (jarc = 0; jarc < yy->ss_narcs; ++jarc) {
432 0 : zz = &yy->ss_arc[jarc];
433 0 : if (ar->ar_label == zz->sa_label)
434 0 : goto found;
435 : }
436 : /* Add new arc for this state */
437 0 : size = sizeof(ss_arc) * (yy->ss_narcs + 1);
438 0 : yy->ss_arc = (ss_arc *)PyObject_REALLOC(
439 0 : yy->ss_arc, size);
440 0 : if (yy->ss_arc == NULL)
441 0 : Py_FatalError("out of mem");
442 0 : zz = &yy->ss_arc[yy->ss_narcs++];
443 0 : zz->sa_label = ar->ar_label;
444 0 : zz->sa_bitset = newbitset(nbits);
445 0 : zz->sa_arrow = -1;
446 : found: ;
447 : /* Add destination */
448 0 : addclosure(zz->sa_bitset, nf, ar->ar_arrow);
449 : }
450 : }
451 : /* Now look up all the arrow states */
452 0 : for (jarc = 0; jarc < xx_state[istate].ss_narcs; jarc++) {
453 0 : zz = &xx_state[istate].ss_arc[jarc];
454 0 : for (jstate = 0; jstate < xx_nstates; jstate++) {
455 0 : if (samebitset(zz->sa_bitset,
456 0 : xx_state[jstate].ss_ss, nbits)) {
457 0 : zz->sa_arrow = jstate;
458 0 : goto done;
459 : }
460 : }
461 0 : size = sizeof(ss_state) * (xx_nstates + 1);
462 0 : xx_state = (ss_state *)PyObject_REALLOC(xx_state,
463 : size);
464 0 : if (xx_state == NULL)
465 0 : Py_FatalError("out of mem");
466 0 : zz->sa_arrow = xx_nstates;
467 0 : yy = &xx_state[xx_nstates++];
468 0 : yy->ss_ss = zz->sa_bitset;
469 0 : yy->ss_narcs = 0;
470 0 : yy->ss_arc = NULL;
471 0 : yy->ss_deleted = 0;
472 0 : yy->ss_finish = testbit(yy->ss_ss, nf->nf_finish);
473 : done: ;
474 : }
475 : }
476 :
477 0 : if (Py_DebugFlag)
478 0 : printssdfa(xx_nstates, xx_state, nbits, &gr->gr_ll,
479 : "before minimizing");
480 :
481 0 : simplify(xx_nstates, xx_state);
482 :
483 0 : if (Py_DebugFlag)
484 0 : printssdfa(xx_nstates, xx_state, nbits, &gr->gr_ll,
485 : "after minimizing");
486 :
487 0 : convert(d, xx_nstates, xx_state);
488 :
489 : /* XXX cleanup */
490 0 : PyObject_FREE(xx_state);
491 0 : }
492 :
493 : static void
494 0 : printssdfa(int xx_nstates, ss_state *xx_state, int nbits,
495 : labellist *ll, char *msg)
496 : {
497 : int i, ibit, iarc;
498 : ss_state *yy;
499 : ss_arc *zz;
500 :
501 0 : printf("Subset DFA %s\n", msg);
502 0 : for (i = 0; i < xx_nstates; i++) {
503 0 : yy = &xx_state[i];
504 0 : if (yy->ss_deleted)
505 0 : continue;
506 0 : printf(" Subset %d", i);
507 0 : if (yy->ss_finish)
508 0 : printf(" (finish)");
509 0 : printf(" { ");
510 0 : for (ibit = 0; ibit < nbits; ibit++) {
511 0 : if (testbit(yy->ss_ss, ibit))
512 0 : printf("%d ", ibit);
513 : }
514 0 : printf("}\n");
515 0 : for (iarc = 0; iarc < yy->ss_narcs; iarc++) {
516 0 : zz = &yy->ss_arc[iarc];
517 0 : printf(" Arc to state %d, label %s\n",
518 : zz->sa_arrow,
519 : PyGrammar_LabelRepr(
520 0 : &ll->ll_label[zz->sa_label]));
521 : }
522 : }
523 0 : }
524 :
525 :
526 : /* PART THREE -- SIMPLIFY DFA */
527 :
528 : /* Simplify the DFA by repeatedly eliminating states that are
529 : equivalent to another oner. This is NOT Algorithm 3.3 from
530 : [Aho&Ullman 77]. It does not always finds the minimal DFA,
531 : but it does usually make a much smaller one... (For an example
532 : of sub-optimal behavior, try S: x a b+ | y a b+.)
533 : */
534 :
535 : static int
536 0 : samestate(ss_state *s1, ss_state *s2)
537 : {
538 : int i;
539 :
540 0 : if (s1->ss_narcs != s2->ss_narcs || s1->ss_finish != s2->ss_finish)
541 0 : return 0;
542 0 : for (i = 0; i < s1->ss_narcs; i++) {
543 0 : if (s1->ss_arc[i].sa_arrow != s2->ss_arc[i].sa_arrow ||
544 0 : s1->ss_arc[i].sa_label != s2->ss_arc[i].sa_label)
545 0 : return 0;
546 : }
547 0 : return 1;
548 : }
549 :
550 : static void
551 0 : renamestates(int xx_nstates, ss_state *xx_state, int from, int to)
552 : {
553 : int i, j;
554 :
555 0 : if (Py_DebugFlag)
556 0 : printf("Rename state %d to %d.\n", from, to);
557 0 : for (i = 0; i < xx_nstates; i++) {
558 0 : if (xx_state[i].ss_deleted)
559 0 : continue;
560 0 : for (j = 0; j < xx_state[i].ss_narcs; j++) {
561 0 : if (xx_state[i].ss_arc[j].sa_arrow == from)
562 0 : xx_state[i].ss_arc[j].sa_arrow = to;
563 : }
564 : }
565 0 : }
566 :
567 : static void
568 0 : simplify(int xx_nstates, ss_state *xx_state)
569 : {
570 : int changes;
571 : int i, j;
572 :
573 : do {
574 0 : changes = 0;
575 0 : for (i = 1; i < xx_nstates; i++) {
576 0 : if (xx_state[i].ss_deleted)
577 0 : continue;
578 0 : for (j = 0; j < i; j++) {
579 0 : if (xx_state[j].ss_deleted)
580 0 : continue;
581 0 : if (samestate(&xx_state[i], &xx_state[j])) {
582 0 : xx_state[i].ss_deleted++;
583 0 : renamestates(xx_nstates, xx_state,
584 : i, j);
585 0 : changes++;
586 0 : break;
587 : }
588 : }
589 : }
590 0 : } while (changes);
591 0 : }
592 :
593 :
594 : /* PART FOUR -- GENERATE PARSING TABLES */
595 :
596 : /* Convert the DFA into a grammar that can be used by our parser */
597 :
598 : static void
599 0 : convert(dfa *d, int xx_nstates, ss_state *xx_state)
600 : {
601 : int i, j;
602 : ss_state *yy;
603 : ss_arc *zz;
604 :
605 0 : for (i = 0; i < xx_nstates; i++) {
606 0 : yy = &xx_state[i];
607 0 : if (yy->ss_deleted)
608 0 : continue;
609 0 : yy->ss_rename = addstate(d);
610 : }
611 :
612 0 : for (i = 0; i < xx_nstates; i++) {
613 0 : yy = &xx_state[i];
614 0 : if (yy->ss_deleted)
615 0 : continue;
616 0 : for (j = 0; j < yy->ss_narcs; j++) {
617 0 : zz = &yy->ss_arc[j];
618 0 : addarc(d, yy->ss_rename,
619 0 : xx_state[zz->sa_arrow].ss_rename,
620 : zz->sa_label);
621 : }
622 0 : if (yy->ss_finish)
623 0 : addarc(d, yy->ss_rename, yy->ss_rename, 0);
624 : }
625 :
626 0 : d->d_initial = 0;
627 0 : }
628 :
629 :
630 : /* PART FIVE -- GLUE IT ALL TOGETHER */
631 :
632 : static grammar *
633 0 : maketables(nfagrammar *gr)
634 : {
635 : int i;
636 : nfa *nf;
637 : dfa *d;
638 : grammar *g;
639 :
640 0 : if (gr->gr_nnfas == 0)
641 0 : return NULL;
642 0 : g = newgrammar(gr->gr_nfa[0]->nf_type);
643 : /* XXX first rule must be start rule */
644 0 : g->g_ll = gr->gr_ll;
645 :
646 0 : for (i = 0; i < gr->gr_nnfas; i++) {
647 0 : nf = gr->gr_nfa[i];
648 0 : if (Py_DebugFlag) {
649 0 : printf("Dump of NFA for '%s' ...\n", nf->nf_name);
650 0 : dumpnfa(&gr->gr_ll, nf);
651 0 : printf("Making DFA for '%s' ...\n", nf->nf_name);
652 : }
653 0 : d = adddfa(g, nf->nf_type, nf->nf_name);
654 0 : makedfa(gr, gr->gr_nfa[i], d);
655 : }
656 :
657 0 : return g;
658 : }
659 :
660 : grammar *
661 0 : pgen(node *n)
662 : {
663 : nfagrammar *gr;
664 : grammar *g;
665 :
666 0 : gr = metacompile(n);
667 0 : g = maketables(gr);
668 0 : translatelabels(g);
669 0 : addfirstsets(g);
670 0 : PyObject_FREE(gr);
671 0 : return g;
672 : }
673 :
674 : grammar *
675 0 : Py_pgen(node *n)
676 : {
677 0 : return pgen(n);
678 : }
679 :
680 : /*
681 :
682 : Description
683 : -----------
684 :
685 : Input is a grammar in extended BNF (using * for repetition, + for
686 : at-least-once repetition, [] for optional parts, | for alternatives and
687 : () for grouping). This has already been parsed and turned into a parse
688 : tree.
689 :
690 : Each rule is considered as a regular expression in its own right.
691 : It is turned into a Non-deterministic Finite Automaton (NFA), which
692 : is then turned into a Deterministic Finite Automaton (DFA), which is then
693 : optimized to reduce the number of states. See [Aho&Ullman 77] chapter 3,
694 : or similar compiler books (this technique is more often used for lexical
695 : analyzers).
696 :
697 : The DFA's are used by the parser as parsing tables in a special way
698 : that's probably unique. Before they are usable, the FIRST sets of all
699 : non-terminals are computed.
700 :
701 : Reference
702 : ---------
703 :
704 : [Aho&Ullman 77]
705 : Aho&Ullman, Principles of Compiler Design, Addison-Wesley 1977
706 : (first edition)
707 :
708 : */
|