Line data Source code
1 : /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 : /*
3 : * This file is part of the LibreOffice project.
4 : *
5 : * This Source Code Form is subject to the terms of the Mozilla Public
6 : * License, v. 2.0. If a copy of the MPL was not distributed with this
7 : * file, You can obtain one at http://mozilla.org/MPL/2.0/.
8 : *
9 : * This file incorporates work covered by the following license notice:
10 : *
11 : * Licensed to the Apache Software Foundation (ASF) under one or more
12 : * contributor license agreements. See the NOTICE file distributed
13 : * with this work for additional information regarding copyright
14 : * ownership. The ASF licenses this file to you under the Apache
15 : * License, Version 2.0 (the "License"); you may not use this file
16 : * except in compliance with the License. You may obtain a copy of
17 : * the License at http://www.apache.org/licenses/LICENSE-2.0 .
18 : */
19 :
20 : #include <assert.h>
21 : #include <stdio.h>
22 : #include <string.h>
23 : #include <ctype.h>
24 :
25 : // DVO: always use standard headers:
26 : #include <istream>
27 : #include <sstream>
28 : using namespace std;
29 :
30 : #include "mzstring.h"
31 : #include "hwpeq.h"
32 : #include <sal/types.h>
33 : #include <sal/macros.h>
34 :
35 : /* @Man: hwp수식을 LaTeX으로 바꾸기 */
36 : #ifdef WIN32
37 : # define ENDL "\r\n"
38 : #else /* !WIN32 */
39 : # define ENDL "\n"
40 : #endif
41 :
42 : #define WS " \t\r\n\v\f"
43 :
44 : #define EQ_CASE 0x01 // case sensitive cmd
45 : #define EQ_ENV 0x02 // equiv to latex environment
46 : #define EQ_ATOP 0x04 // must revert order
47 :
48 : #define IS_WS(ch) (strchr(WS, ch))
49 : #define IS_BINARY(ch) (strchr("+-<=>", ch))
50 :
51 : #ifdef WIN32
52 : #define STRICMP stricmp
53 : #else
54 : #define STRICMP strcasecmp
55 : #endif
56 :
57 : // sub and sup scipt script status
58 : enum { SCRIPT_NONE, SCRIPT_SUB, SCRIPT_SUP, SCRIPT_ALL};
59 :
60 : static int eq_word(MzString& outs, istream *strm, int script = SCRIPT_NONE);
61 : static bool eq_sentence(MzString& outs, istream *strm, const char *end = 0);
62 :
63 : struct hwpeq {
64 : const char *key; // hwp math keyword
65 : const char *latex; // corresponding latex keywork
66 : int nargs; // # of argument
67 : unsigned char flag; // case sensitive?
68 : };
69 :
70 : static const hwpeq eq_tbl[] = {
71 : { "!=", "\\equiv ", 0, 0 },
72 : { "#", "\\\\", 0, 0 },
73 : { "+-", "\\pm ", 0, 0 },
74 : { "-+", "\\mp ", 0, 0 },
75 : { "<=", "\\leq ", 0, 0 },
76 : { "==", "\\equiv ", 0, 0 },
77 : { ">=", "\\geq ", 0, 0 },
78 : { "Pr", NULL, 0, 0 },
79 : { "^", "^", 1, 0 },
80 : { "_", "_", 1, 0 },
81 : { "`", "\\;", 0, 0 },
82 : { "acute", NULL, 1, 0 },
83 : { "aleph", NULL, 0, 0 },
84 : { "alpha", NULL, 0, EQ_CASE },
85 : { "amalg", NULL, 0, 0 },
86 : { "and", NULL, 0, 0 },
87 : { "angle", NULL, 0, 0 },
88 : { "angstrom", NULL, 0, 0 },
89 : { "approx", NULL, 0, 0 },
90 : { "arc", NULL, 0, 0 },
91 : { "arccos", NULL, 0, 0 },
92 : { "arch", NULL, 0, 0 },
93 : { "arcsin", NULL, 0, 0 },
94 : { "arctan", NULL, 0, 0 },
95 : { "arg", NULL, 0, 0 },
96 : { "assert", "\\vdash", 0, 0 },
97 : { "ast", NULL, 0, 0 },
98 : { "asymp", NULL, 0, 0 },
99 : { "atop", NULL, 1, EQ_ATOP },
100 : { "backslash", NULL, 0, 0 },
101 : { "bar", NULL, 1, 0 },
102 : { "because", NULL, 0, 0 },
103 : { "beta", NULL, 0, EQ_CASE },
104 : { "big", NULL, 0, EQ_CASE },
105 : { "bigcap", NULL, 0, 0 },
106 : { "bigcirc", NULL, 0, 0 },
107 : { "bigcup", NULL, 0, 0 },
108 : { "bigg", NULL, 0, EQ_CASE },
109 : { "bigodiv", NULL, 0, 0 },
110 : { "bigodot", NULL, 0, 0 },
111 : { "bigominus", NULL, 0, 0 },
112 : { "bigoplus", NULL, 0, 0 },
113 : { "bigotimes", NULL, 0, 0 },
114 : { "bigsqcap", NULL, 0, 0 },
115 : { "bigsqcup", NULL, 0, 0 },
116 : { "biguplus", NULL, 0, 0 },
117 : { "bigvee", NULL, 0, 0 },
118 : { "bigwedge", NULL, 0, 0 },
119 : { "binom", NULL, 2, 0 },
120 : { "bmatrix", NULL, 0, EQ_ENV },
121 : { "bold", NULL, 0, 0 },
122 : { "bot", NULL, 0, 0 },
123 : { "breve", NULL, 1, 0 },
124 : { "buildrel", NULL, 0, 0 }, // LATER
125 : { "bullet", NULL, 0, 0 },
126 : { "cap", NULL, 0, 0 },
127 : { "cases", NULL, 0, EQ_ENV },
128 : { "ccol", NULL, 0, 0 }, /* 세로로 가운데 */
129 : { "cdot", NULL, 0, 0 },
130 : { "cdots", NULL, 0, 0 },
131 : { "check", NULL, 1, 0 },
132 : { "chi", NULL, 0, EQ_CASE },
133 : { "choose", NULL, 0, EQ_ATOP },
134 : { "circ", NULL, 0, 0 },
135 : { "col", NULL, 0, 0 }, // LATER
136 : { "cong", NULL, 0, 0 },
137 : { "coprod", NULL, 0, 0 },
138 : { "cos", NULL, 0, 0 },
139 : { "cosec", NULL, 0, 0 },
140 : { "cosh", NULL, 0, 0 },
141 : { "cot", NULL, 0, 0 },
142 : { "coth", NULL, 0, 0 },
143 : { "cpile", NULL, 0, 0 }, // LATER
144 : { "csc", NULL, 0, 0 },
145 : { "cup", NULL, 0, 0 },
146 : { "dagger", NULL, 0, 0 },
147 : { "dashv", NULL, 0, 0 },
148 : { "ddagger", NULL, 0, 0 },
149 : { "ddot", NULL, 1, 0 },
150 : { "ddots", NULL, 0, 0 },
151 : { "def", NULL, 0, 0 },
152 : { "deg", NULL, 0, 0 },
153 : { "del", NULL, 0, 0 },
154 : { "delta", NULL, 0, EQ_CASE },
155 : { "diamond", NULL, 0, 0 },
156 : { "dim", NULL, 0, 0 },
157 : { "div", NULL, 0, 0 },
158 : { "divide", NULL, 0, 0 },
159 : { "dline", NULL, 0, 0 },
160 : { "dmatrix", NULL, 0, EQ_ENV },
161 : { "dot", NULL, 1, 0 },
162 : { "doteq", NULL, 0, 0 },
163 : { "dotsaxis", NULL, 0, 0 },
164 : { "dotsdiag", NULL, 0, 0 },
165 : { "dotslow", "\\ldots", 0, 0 },
166 : { "dotsvert", "\\vdots", 0, 0 },
167 : { "downarrow", NULL, 0, EQ_CASE },
168 : { "dsum", "+", 0, 0 },
169 : { "dyad", NULL, 0, 0 }, // LATER
170 : { "ell", NULL, 0, 0 },
171 : { "emptyset", NULL, 0, 0 },
172 : { "epsilon", NULL, 0, EQ_CASE },
173 : { "eqalign", NULL, 0, EQ_ENV },
174 : { "equiv", NULL, 0, 0 },
175 : { "eta", NULL, 0, EQ_CASE },
176 : { "exarrow", NULL, 0, 0 },
177 : { "exist", "\\exists", 0, 0 },
178 : { "exists", NULL, 0, 0 },
179 : { "exp", NULL, 0, EQ_CASE },
180 : { "for", NULL, 0, 0 },
181 : { "forall", NULL, 0, 0 },
182 : { "from", "_", 1, 0 },
183 : { "gamma", NULL, 0, EQ_CASE },
184 : { "gcd", NULL, 0, 0 },
185 : { "ge", "\\geq", 0, 0 },
186 : { "geq", NULL, 0, 0 },
187 : { "ggg", NULL, 0, 0 },
188 : { "grad", NULL, 0, 0 },
189 : { "grave", NULL, 1, 0 },
190 : { "hat", "\\widehat", 1, 0 },
191 : { "hbar", NULL, 0, 0 },
192 : { "hom", NULL, 0, 0 },
193 : { "hookleft", NULL, 0, 0 },
194 : { "hookright", NULL, 0, 0 },
195 : { "identical", NULL, 0, 0 }, // LATER
196 : { "if", NULL, 0, 0 },
197 : { "imag", NULL, 0, 0 },
198 : { "image", NULL, 0, 0 },
199 : { "imath", NULL, 0, 0 },
200 : { "in", NULL, 0, 0 },
201 : { "inf", "\\infty", 0, 0 },
202 : { "infinity", "\\infty", 0, 0 },
203 : { "infty", NULL, 0, 0 },
204 : { "int", NULL, 0, 0 },
205 : { "integral", "\\int", 0, 0 },
206 : { "inter", "\\bigcap", 0, 0 },
207 : { "iota", NULL, 0, EQ_CASE },
208 : { "iso", NULL, 0, 0 }, // ams
209 : { "it", NULL, 0, 0 },
210 : { "jmath", NULL, 0, 0 },
211 : { "kappa", NULL, 0, EQ_CASE },
212 : { "ker", NULL, 0, 0 },
213 : { "lambda", NULL, 0, EQ_CASE },
214 : { "land", NULL, 0, 0 }, // LATER
215 : { "langle", NULL, 0, 0 },
216 : { "larrow", "\\leftarrow", 0, EQ_CASE },
217 : { "lbrace", NULL, 0, 0 },
218 : { "lbrack", "[", 0, 0 },
219 : { "lceil", NULL, 0, 0 },
220 : { "lcol", NULL, 0, 0 }, // LATER
221 : { "ldots", NULL, 0, 0 },
222 : { "le", NULL, 0, 0 },
223 : { "left", NULL, 0, 0 },
224 : { "leftarrow", NULL, 0, EQ_CASE },
225 : { "leq", NULL, 0, 0 },
226 : { "lfloor", NULL, 0, 0 },
227 : { "lg", NULL, 0, 0 },
228 : { "lim", NULL, 0, EQ_CASE },
229 : { "line", "\\vert", 0, 0 },
230 : { "liter", "\\ell", 0, 0 },
231 : { "lll", NULL, 0, 0 }, // ams
232 : { "ln", NULL, 0, 0 },
233 : { "log", NULL, 0, 0 },
234 : { "lor", "\\vee", 0, 0 },
235 : { "lparen", "(", 0, 0 },
236 : { "lpile", NULL, 0, 0 }, // LATER
237 : { "lrarrow", "\\leftrightarrow", 0, EQ_CASE },
238 : { "lrharpoons", "\\leftrightharpoons",0, 0 },
239 : { "mapsto", NULL, 0, 0 },
240 : { "massert", "\\dashv", 0, 0 },
241 : { "matrix", NULL, 0, EQ_ENV },
242 : { "max", NULL, 0, 0 },
243 : { "mho", NULL, 0, 0 }, // ams
244 : { "min", NULL, 0, 0 },
245 : { "minusplus", NULL, 0, 0 },
246 : { "mit", "", 0, 0 }, // font
247 : { "mod", "\\bmod", 0, 0 },
248 : { "models", NULL, 0, 0 },
249 : { "msangle", NULL, 0, 0 }, // LATER
250 : { "mu", NULL, 0, EQ_CASE },
251 : { "nabla", NULL, 0, 0 },
252 : { "ne", NULL, 0, 0 },
253 : { "nearrow", NULL, 0, 0 },
254 : { "neg", NULL, 0, 0 },
255 : { "neq", NULL, 0, 0 },
256 : { "nequiv", NULL, 0, 0 },
257 : { "ni", NULL, 0, 0 },
258 : { "not", NULL, 0, 0 },
259 : { "notin", NULL, 0, 0 },
260 : { "nu", NULL, 0, EQ_CASE },
261 : { "nwarrow", NULL, 0, 0 },
262 : { "odiv", NULL, 0, 0 },
263 : { "odot", NULL, 0, 0 },
264 : { "oint", NULL, 0, 0 },
265 : { "omega", NULL, 0, EQ_CASE },
266 : { "omicron", NULL, 0, EQ_CASE },
267 : { "ominus", NULL, 0, 0 },
268 : { "oplus", NULL, 0, 0 },
269 : { "or ", NULL, 0, 0 },
270 : { "oslash", NULL, 0, 0 },
271 : { "otimes", NULL, 0, 0 },
272 : { "over", NULL, 1, EQ_ATOP },
273 : { "overline", NULL, 1, 0 },
274 : { "owns", "\\ni", 0, 0 },
275 : { "parallel", NULL, 0, 0 },
276 : { "partial", NULL, 0, 0 },
277 : { "phantom", NULL, 0, 0 },
278 : { "phi", NULL, 0, EQ_CASE },
279 : { "pi", NULL, 0, EQ_CASE },
280 : { "pile", NULL, 0, 0 }, // LATER
281 : { "plusminus", "\\pm", 0, 0 },
282 : { "pmatrix", NULL, 0, EQ_ENV },
283 : { "prec", NULL, 0, 0 },
284 : { "prep", NULL, 0, 0 },
285 : { "prime", NULL, 0, 0 },
286 : { "prod", NULL, 0, 0 },
287 : { "propto", NULL, 0, 0 },
288 : { "psi", NULL, 0, EQ_CASE },
289 : { "rangle", NULL, 0, 0 },
290 : { "rarrow", "\\rightarrow", 0, EQ_CASE },
291 : { "rbrace", "]", 0, 0 },
292 : { "rbrace", NULL, 0, 0 },
293 : { "rceil", NULL, 0, 0 },
294 : { "rcol", NULL, 0, 0 }, // LATER
295 : { "real", "\\Re", 0, 0 },
296 : { "reimage", NULL, 0, 0 },
297 : { "rel", NULL, 0, 0 },
298 : { "rfloor", NULL, 0, 0 },
299 : { "rho", NULL, 0, EQ_CASE },
300 : { "right", NULL, 0, 0 },
301 : { "rightarrow", NULL, 0, EQ_CASE },
302 : { "rlharpoons", NULL, 0, 0 },
303 : { "rm", NULL, 0, 0 },
304 : { "root", "\\sqrt", 1, 0 },
305 : { "rparen", ")", 0, 0 },
306 : { "rpile", NULL, 0, 0 }, // LATER
307 : { "rtangle", NULL, 0, 0 },
308 : { "sangle", NULL, 0, 0 },
309 : { "scale", NULL, 0, 0 },
310 : { "searrow", NULL, 0, 0 },
311 : { "sec", NULL, 0, 0 },
312 : { "sigma", NULL, 0, EQ_CASE },
313 : { "sim", NULL, 0, 0 },
314 : { "simeq", NULL, 0, 0 },
315 : { "sin", NULL, 0, 0 },
316 : { "sinh", NULL, 0, 0 },
317 : { "slash", NULL, 0, 0 },
318 : { "smallint", NULL, 0, 0 },
319 : { "smallinter", NULL, 0, 0 },
320 : { "smalloint", NULL, 0, 0 },
321 : { "smallprod", NULL, 0, 0 },
322 : { "smallsum", NULL, 0, 0 },
323 : { "smallunion", NULL, 0, 0 },
324 : { "smcoprod", NULL, 0, 0 },
325 : { "sqcap", NULL, 0, 0 },
326 : { "sqcup", NULL, 0, 0 },
327 : { "sqrt", NULL, 1, 0 },
328 : { "sqsubset", NULL, 0, 0 },
329 : { "sqsubseteq", NULL, 0, 0 },
330 : { "sqsupset", NULL, 0, 0 },
331 : { "sqsupseteq", NULL, 0, 0 },
332 : { "star", NULL, 0, 0 },
333 : { "sub", "_", 0, 0 },
334 : { "subset", NULL, 0, 0 },
335 : { "subseteq", NULL, 0, 0 },
336 : { "succ", NULL, 0, 0 },
337 : { "sum", NULL, 0, 0 },
338 : { "sup", "^", 0, 0 },
339 : { "superset", NULL, 0, 0 },
340 : { "supset", NULL, 0, 0 },
341 : { "supseteq", NULL, 0, 0 },
342 : { "swarrow", NULL, 0, 0 },
343 : { "tan", NULL, 0, 0 },
344 : { "tanh", NULL, 0, 0 },
345 : { "tau", NULL, 0, EQ_CASE },
346 : { "therefore", NULL, 0, 0 },
347 : { "theta", NULL, 0, EQ_CASE },
348 : { "tilde", "\\widetilde", 1, 0 },
349 : { "times", NULL, 0, 0 },
350 : { "to", "^", 1, 0 },
351 : { "top", NULL, 0, 0 },
352 : { "triangle", NULL, 0, 0 },
353 : { "triangled", NULL, 0, 0 },
354 : { "trianglel", NULL, 0, 0 },
355 : { "triangler", NULL, 0, 0 },
356 : { "triangleu", NULL, 0, 0 },
357 : { "udarrow", "\\updownarrow",0, EQ_CASE },
358 : { "under", "\\underline", 1, 0 },
359 : { "underline", "\\underline", 1, 0 },
360 : { "union", "\\bigcup", 0, 0 },
361 : { "uparrow", NULL, 0, EQ_CASE },
362 : { "uplus", NULL, 0, 0 },
363 : { "upsilon", NULL, 0, EQ_CASE },
364 : { "varepsilon", NULL, 0, 0 },
365 : { "varphi", NULL, 0, 0 },
366 : { "varpi", NULL, 0, 0 },
367 : { "varrho", NULL, 0, 0 },
368 : { "varsigma", NULL, 0, 0 },
369 : { "vartheta", NULL, 0, 0 },
370 : { "varupsilon", NULL, 0, 0 },
371 : { "vdash", NULL, 0, 0 },
372 : { "vdots", NULL, 0, 0 },
373 : { "vec", NULL, 1, 0 },
374 : { "vee", NULL, 0, 0 },
375 : { "vert", NULL, 0, 0 },
376 : { "wedge", NULL, 0, 0 },
377 : { "wp", NULL, 0, 0 },
378 : { "xi", NULL, 0, EQ_CASE },
379 : { "xor", NULL, 0, 0 },
380 : { "zeta", NULL, 0, EQ_CASE }
381 : };
382 :
383 0 : static const hwpeq *lookup_eqn(char *str)
384 : {
385 : static const int eqCount = SAL_N_ELEMENTS(eq_tbl);
386 0 : int l = 0, r = eqCount;
387 0 : const hwpeq *result = 0;
388 :
389 0 : while( l < r ) {
390 0 : const int m = (l + r) / 2;
391 0 : const int k = strcmp(eq_tbl[m].key, str);
392 0 : if( k == 0 ) {
393 0 : result = eq_tbl + m;
394 0 : break;
395 : }
396 0 : else if( k < 0 )
397 0 : l = m + 1;
398 : else
399 0 : r = m;
400 : }
401 0 : return result;
402 : }
403 :
404 : /* 첫자만 대문자이거나 전부 대문자면 소문자로 바꾼다. */
405 0 : void make_keyword( char *keyword, const char *token)
406 : {
407 : char* ptr;
408 0 : bool result = true;
409 0 : int len = strlen(token);
410 : assert(keyword);
411 :
412 0 : if( 255 < len )
413 : {
414 0 : len = 255;
415 : }
416 0 : memcpy(keyword, token, len);
417 0 : keyword[len] = 0;
418 :
419 0 : if( (token[0] & 0x80) || islower(token[0]) || strlen(token) < 2 )
420 0 : return;
421 :
422 0 : int capital = isupper(keyword[1]);
423 0 : for( ptr = keyword + 2; *ptr && result; ptr++ )
424 : {
425 0 : if( (*ptr & 0x80) ||
426 0 : (!capital && isupper(*ptr)) ||
427 0 : (capital && islower(*ptr)) )
428 : {
429 0 : result = false;
430 : }
431 : }
432 :
433 0 : if( result )
434 : {
435 0 : ptr = keyword;
436 0 : while( *ptr )
437 : {
438 0 : if( isupper(*ptr) )
439 0 : *ptr = sal::static_int_cast<char>(tolower(*ptr));
440 0 : ptr++;
441 : }
442 : }
443 0 : return;
444 : }
445 :
446 : // token reading function
447 0 : struct eq_stack {
448 : MzString white;
449 : MzString token;
450 : istream *strm;
451 :
452 0 : eq_stack() { strm = 0; };
453 0 : bool state(istream *s) {
454 0 : if( strm != s) { white = 0; token = 0; }
455 0 : return token.length() != 0;
456 : }
457 : };
458 :
459 : static eq_stack *stk = 0;
460 :
461 0 : void push_token(MzString &white, MzString &token, istream *strm)
462 : {
463 : // one time stack
464 : assert(stk->token.length() == 0);
465 :
466 0 : stk->white = white;
467 0 : stk->token = token;
468 0 : stk->strm = strm;
469 0 : }
470 :
471 : /*
472 : 읽은 토큰의 길이를 반환한다.
473 : */
474 : /* control char, control sequence, binary sequence,
475 : alphabet string, sigle character */
476 0 : static int next_token(MzString &white, MzString &token, istream *strm)
477 : {
478 0 : int ch = 0;
479 :
480 0 : if( stk->state(strm) ) {
481 0 : white = stk->white;
482 0 : token = stk->token;
483 0 : stk->token = 0;
484 0 : stk->white = 0;
485 0 : return token.length();
486 : }
487 :
488 0 : token = 0;
489 0 : white = 0;
490 0 : if( !strm->good() || (ch = strm->get()) == EOF )
491 0 : return 0;
492 :
493 : // read preceding ws
494 0 : if( IS_WS(ch) ) {
495 0 : do white << (char) ch;
496 0 : while( IS_WS(ch = strm->get()) );
497 : }
498 :
499 0 : if( ch == '\\' || ch & 0x80 || isalpha(ch) ) {
500 0 : if( ch == '\\' ) {
501 0 : token << (char) ch;
502 0 : ch = strm->get();
503 : }
504 0 : do {
505 0 : token << (char) ch;
506 0 : ch = strm->get();
507 0 : } while( ch != EOF && (ch & 0x80 || isalpha(ch)) ) ;
508 0 : strm->putback(sal::static_int_cast<char>(ch));
509 : /* sub, sub, over, atop 특수 처리
510 : 그 이유는 next_state()에 영향을 미치기 때문이다.
511 : */
512 0 : if( !STRICMP("sub", token) || !STRICMP("from", token) ||
513 0 : !STRICMP("sup", token) || !STRICMP("to", token) ||
514 0 : !STRICMP("over", token) || !STRICMP("atop", token) ||
515 0 : !STRICMP("left", token) || !STRICMP("right", token) )
516 : {
517 : char buf[256];
518 0 : make_keyword(buf, token);
519 0 : token = buf;
520 : }
521 0 : if( !token.compare("sub") || !token.compare("from") )
522 0 : token = "_";
523 0 : if( !token.compare("sup") || !token.compare("to") )
524 0 : token = "^";
525 : }
526 0 : else if( IS_BINARY(ch) ) {
527 0 : do token << (char) ch;
528 0 : while( IS_BINARY(ch = strm->get()) );
529 0 : strm->putback(sal::static_int_cast<char>(ch));
530 : }
531 0 : else if( isdigit(ch) ) {
532 0 : do token << (char) ch;
533 0 : while( isdigit(ch = strm->get()) );
534 0 : strm->putback(sal::static_int_cast<char>(ch));
535 : }
536 : else
537 0 : token << (char) ch;
538 :
539 0 : return token.length();
540 : }
541 :
542 0 : static int read_white_space(MzString& outs, istream *strm)
543 : {
544 : int result;
545 :
546 0 : if( stk->state(strm) ) {
547 0 : outs << stk->white;
548 0 : stk->white = 0;
549 0 : result = stk->token[0];
550 : }
551 : else {
552 : int ch;
553 0 : while( IS_WS(ch = strm->get()) )
554 0 : outs << (char )ch;
555 0 : strm->putback(sal::static_int_cast<char>(ch));
556 0 : result = ch;
557 : }
558 0 : return result;
559 : }
560 :
561 : /* 인수가 필요하지 않은 경우 각 항목간의 구분은 space와 brace
562 : sqrt {ab}c = sqrt{ab} c
563 : (, }는 grouping
564 : ^, _ 는 앞뒤로 결합한다.
565 :
566 : sqrt 등과 같이 인수가 있는 형식 정리
567 : sqrt a -> sqrt{a}
568 : sqrt {a} -> sqrt{a}
569 : 1 이상의 인수가 있는 경우 인수들간의 역백은 없앤다.
570 : \frac a b -> frac{a}{b}
571 : over의 형식 정리
572 : a over b -> {a}over{b}
573 : */
574 :
575 0 : static int eq_word(MzString& outs, istream *strm, int status)
576 : {
577 0 : MzString token, white, state;
578 : int result;
579 : char keyword[256];
580 : const hwpeq *eq;
581 :
582 0 : next_token(white, token, strm);
583 0 : if (token.length() <= 0)
584 0 : return 0;
585 0 : result = token[0];
586 :
587 0 : if( token.compare("{") == 0 ) {
588 0 : state << white << token;
589 0 : eq_sentence(state, strm, "}");
590 : }
591 0 : else if( token.compare("left") == 0 ) {
592 0 : state << white << token;
593 0 : next_token(white, token, strm);
594 0 : state << white << token;
595 :
596 0 : eq_sentence(state, strm, "right");
597 :
598 0 : next_token(white, token, strm);
599 0 : state << white << token;
600 : }
601 : else {
602 : /* 정상적인 token */
603 0 : int script_status = SCRIPT_NONE;
604 : while( true ) {
605 0 : state << white << token;
606 0 : make_keyword(keyword, token);
607 0 : if( token[0] == '^' )
608 0 : script_status |= SCRIPT_SUP;
609 0 : else if( token[0] == '_' )
610 0 : script_status |= SCRIPT_SUB;
611 : else
612 0 : script_status = SCRIPT_NONE;
613 :
614 0 : if( 0 != (eq = lookup_eqn(keyword)) ) {
615 0 : int nargs = eq->nargs;
616 0 : while( nargs-- ) {
617 0 : const int ch = read_white_space(state, strm);
618 0 : if( ch != '{' ) state << '{';
619 0 : eq_word(state, strm, script_status);
620 0 : if( ch != '{' ) state << '}';
621 : }
622 : }
623 :
624 0 : if( !next_token(white, token, strm) )
625 0 : break;
626 : // end loop and restart with this
627 0 : if( (token[0] == '^' && status && !(status & SCRIPT_SUP)) ||
628 0 : (token[0] == '_' && status && !(status & SCRIPT_SUB)) ||
629 0 : strcmp("over", token) == 0 || strcmp("atop", token) == 0 ||
630 0 : strchr("{}#&`", token[0]) ||
631 0 : (!strchr("^_", token[0]) && white.length()) )
632 : {
633 0 : push_token(white, token, strm);
634 0 : break;
635 : }
636 0 : }
637 : }
638 0 : outs << state;
639 :
640 0 : return result;
641 : }
642 :
643 0 : static bool eq_sentence(MzString& outs, istream *strm, const char *end)
644 : {
645 0 : MzString state;
646 0 : MzString white, token;
647 0 : bool multiline = false;
648 :
649 0 : read_white_space(outs, strm);
650 0 : while( eq_word(state, strm) ) {
651 0 : if( !next_token(white, token, strm) ||
652 0 : (end && strcmp(token.c_str(), end) == 0) )
653 : {
654 0 : state << white << token;
655 0 : break;
656 : }
657 0 : push_token(white, token, strm);
658 0 : if( !token.compare("atop") || !token.compare("over") )
659 0 : outs << '{' << state << '}';
660 : else {
661 0 : if( !token.compare("#") )
662 0 : multiline = true;
663 0 : outs << state;
664 : }
665 0 : state = 0;
666 0 : read_white_space(outs, strm);
667 : }
668 0 : outs << state;
669 0 : return multiline;
670 : }
671 :
672 0 : static char eq2ltxconv(MzString& sstr, istream *strm, const char *sentinel)
673 : {
674 0 : MzString white, token;
675 : char key[256];
676 : int ch, result;
677 0 : const hwpeq *eq = 0;
678 :
679 0 : while( 0 != (result = next_token(white, token, strm)) ) {
680 0 : if( sentinel && (result == 1) && strchr(sentinel, token[0]) )
681 0 : break;
682 0 : make_keyword(key, token);
683 0 : if( (eq = lookup_eqn(key)) != 0 ) {
684 0 : if( eq->latex )
685 0 : strcpy(key, eq->latex);
686 : else {
687 0 : key[0] = '\\';
688 0 : strcpy(key + 1, eq->key);
689 : }
690 0 : if( (eq->flag & EQ_CASE) && isupper(token[0]) )
691 0 : key[1] = sal::static_int_cast<char>(toupper(key[1]));
692 0 : token = key;
693 : }
694 :
695 0 : if( token[0] == '{' ) { // grouping
696 0 : sstr << white << token;
697 0 : eq2ltxconv(sstr, strm, "}");
698 0 : sstr << '}';
699 : }
700 0 : else if( eq && (eq->flag & EQ_ENV) ) {
701 0 : next_token(white, token, strm);
702 0 : if( token[0] != '{' )
703 0 : return 0;
704 0 : sstr << "\\begin" << "{" << eq->key << "}" << ENDL ;
705 0 : eq2ltxconv(sstr, strm, "}");
706 0 : if( sstr[sstr.length() - 1] != '\n' )
707 0 : sstr << ENDL ;
708 0 : sstr << "\\end" << "{" << eq->key << "}" << ENDL ;
709 : }
710 0 : else if( eq && (eq->flag & EQ_ATOP) ) {
711 0 : if( sstr.length() == 0 )
712 0 : sstr << '{';
713 : else {
714 0 : int pos = sstr.rfind('}');
715 0 : if( 0 < pos)
716 0 : sstr.replace(pos, ' ');
717 : }
718 0 : sstr << token;
719 0 : while( (ch = strm->get()) != EOF && IS_WS(ch) )
720 0 : sstr << (char)ch;
721 0 : if( ch != '{' )
722 0 : sstr << "{}";
723 : else {
724 0 : eq2ltxconv(sstr, strm, "}");
725 0 : sstr << '}';
726 0 : }
727 : }
728 : else
729 0 : sstr << white << token;
730 : }
731 0 : return token[0];
732 : }
733 :
734 0 : void eq2latex(MzString& outs, char *s)
735 : {
736 : assert(s);
737 0 : if( stk == 0 )
738 0 : stk = new eq_stack;
739 :
740 0 : MzString tstr;
741 :
742 0 : istringstream tstrm(s);
743 0 : bool eqnarray = eq_sentence(tstr, &tstrm);
744 0 : istringstream strm(tstr.c_str());
745 :
746 0 : if( eqnarray )
747 0 : outs << "\\begin{array}{rllll}" << ENDL;
748 0 : eq2ltxconv(outs, &strm, 0);
749 0 : outs << ENDL;
750 0 : if( eqnarray )
751 0 : outs << "\\end{array}" << ENDL;
752 0 : delete stk;
753 0 : stk = 0;
754 0 : }
755 :
756 : /* vim:set shiftwidth=4 softtabstop=4 expandtab: */
|