Line data Source code
1 : /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 : /*
3 : * This file is part of the LibreOffice project.
4 : *
5 : * This Source Code Form is subject to the terms of the Mozilla Public
6 : * License, v. 2.0. If a copy of the MPL was not distributed with this
7 : * file, You can obtain one at http://mozilla.org/MPL/2.0/.
8 : *
9 : * This file incorporates work covered by the following license notice:
10 : *
11 : * Licensed to the Apache Software Foundation (ASF) under one or more
12 : * contributor license agreements. See the NOTICE file distributed
13 : * with this work for additional information regarding copyright
14 : * ownership. The ASF licenses this file to you under the Apache
15 : * License, Version 2.0 (the "License"); you may not use this file
16 : * except in compliance with the License. You may obtain a copy of
17 : * the License at http://www.apache.org/licenses/LICENSE-2.0 .
18 : */
19 :
20 : #include "stringutil.hxx"
21 : #include "global.hxx"
22 : #include <svl/zforlist.hxx>
23 :
24 : #include <rtl/ustrbuf.hxx>
25 : #include <rtl/strbuf.hxx>
26 : #include <rtl/math.hxx>
27 :
28 20224 : ScSetStringParam::ScSetStringParam() :
29 : mpNumFormatter(NULL),
30 : mbDetectNumberFormat(true),
31 : meSetTextNumFormat(Never),
32 20224 : mbHandleApostrophe(true)
33 : {
34 20224 : }
35 :
36 6400 : void ScSetStringParam::setTextInput()
37 : {
38 6400 : mbDetectNumberFormat = false;
39 6400 : mbHandleApostrophe = false;
40 6400 : meSetTextNumFormat = Always;
41 6400 : }
42 :
43 664 : void ScSetStringParam::setNumericInput()
44 : {
45 664 : mbDetectNumberFormat = true;
46 664 : mbHandleApostrophe = true;
47 664 : meSetTextNumFormat = Never;
48 664 : }
49 :
50 598 : bool ScStringUtil::parseSimpleNumber(
51 : const OUString& rStr, sal_Unicode dsep, sal_Unicode gsep, double& rVal)
52 : {
53 : // Actually almost the entire pre-check is unnecessary and we could call
54 : // rtl::math::stringToDouble() just after having exchanged ascii space with
55 : // non-breaking space, if it wasn't for check of grouped digits. The NaN
56 : // and Inf cases that are accepted by stringToDouble() could be detected
57 : // using rtl::math::isFinite() on the result.
58 :
59 : /* TODO: The grouped digits check isn't even valid for locales that do not
60 : * group in thousands ... e.g. Indian locales. But that's something also
61 : * the number scanner doesn't implement yet, only the formatter. */
62 :
63 598 : OUStringBuffer aBuf;
64 :
65 598 : sal_Int32 i = 0;
66 598 : sal_Int32 n = rStr.getLength();
67 598 : const sal_Unicode* p = rStr.getStr();
68 598 : const sal_Unicode* pLast = p + (n-1);
69 598 : sal_Int32 nPosDSep = -1, nPosGSep = -1;
70 598 : sal_uInt32 nDigitCount = 0;
71 598 : sal_Int32 nPosExponent = -1;
72 :
73 : // Skip preceding spaces.
74 618 : for (i = 0; i < n; ++i, ++p)
75 : {
76 618 : sal_Unicode c = *p;
77 618 : if (c != 0x0020 && c != 0x00A0)
78 : // first non-space character. Exit.
79 598 : break;
80 : }
81 :
82 598 : if (i == n)
83 : // the whole string is space. Fail.
84 0 : return false;
85 :
86 598 : n -= i; // Subtract the length of the preceding spaces.
87 :
88 : // Determine the last non-space character.
89 628 : for (; p != pLast; --pLast, --n)
90 : {
91 472 : sal_Unicode c = *pLast;
92 472 : if (c != 0x0020 && c != 0x00A0)
93 : // Non space character. Exit.
94 442 : break;
95 : }
96 :
97 1282 : for (i = 0; i < n; ++i, ++p)
98 : {
99 1044 : sal_Unicode c = *p;
100 1044 : if (c == 0x0020 && gsep == 0x00A0)
101 : // ascii space to unicode space if that is group separator
102 0 : c = 0x00A0;
103 :
104 1044 : if ('0' <= c && c <= '9')
105 : {
106 : // this is a digit.
107 616 : aBuf.append(c);
108 616 : ++nDigitCount;
109 : }
110 428 : else if (c == dsep)
111 : {
112 : // this is a decimal separator.
113 :
114 52 : if (nPosDSep >= 0)
115 : // a second decimal separator -> not a valid number.
116 0 : return false;
117 :
118 52 : if (nPosGSep >= 0 && i - nPosGSep != 4)
119 : // the number has a group separator and the decimal sep is not
120 : // positioned correctly.
121 0 : return false;
122 :
123 52 : nPosDSep = i;
124 52 : nPosGSep = -1;
125 52 : aBuf.append(c);
126 52 : nDigitCount = 0;
127 : }
128 376 : else if (c == gsep)
129 : {
130 : // this is a group (thousand) separator.
131 :
132 16 : if (i == 0)
133 : // not allowed as the first character.
134 0 : return false;
135 :
136 16 : if (nPosDSep >= 0)
137 : // not allowed after the decimal separator.
138 0 : return false;
139 :
140 16 : if (nPosGSep >= 0 && nDigitCount != 3)
141 : // must be exactly 3 digits since the last group separator.
142 0 : return false;
143 :
144 16 : if (nPosExponent >= 0)
145 : // not allowed in exponent.
146 0 : return false;
147 :
148 16 : nPosGSep = i;
149 16 : nDigitCount = 0;
150 : }
151 360 : else if (c == '-' || c == '+')
152 : {
153 : // A sign must be the first character if it's given, or immediately
154 : // follow the exponent character if present.
155 34 : if (i == 0 || (nPosExponent >= 0 && i == nPosExponent + 1))
156 0 : aBuf.append(c);
157 : else
158 34 : return false;
159 : }
160 326 : else if (c == 'E' || c == 'e')
161 : {
162 : // this is an exponent designator.
163 :
164 0 : if (nPosExponent >= 0)
165 : // Only one exponent allowed.
166 0 : return false;
167 :
168 0 : if (nPosGSep >= 0 && nDigitCount != 3)
169 : // must be exactly 3 digits since the last group separator.
170 0 : return false;
171 :
172 0 : aBuf.append(c);
173 0 : nPosExponent = i;
174 0 : nPosDSep = -1;
175 0 : nPosGSep = -1;
176 0 : nDigitCount = 0;
177 : }
178 : else
179 326 : return false;
180 : }
181 :
182 : // finished parsing the number.
183 :
184 238 : if (nPosGSep >= 0 && nDigitCount != 3)
185 : // must be exactly 3 digits since the last group separator.
186 8 : return false;
187 :
188 230 : rtl_math_ConversionStatus eStatus = rtl_math_ConversionStatus_Ok;
189 230 : sal_Int32 nParseEnd = 0;
190 460 : OUString aString( aBuf.makeStringAndClear());
191 230 : rVal = ::rtl::math::stringToDouble( aString, dsep, gsep, &eStatus, &nParseEnd);
192 230 : if (eStatus != rtl_math_ConversionStatus_Ok || nParseEnd < aString.getLength())
193 : // Not a valid number or not entire string consumed.
194 0 : return false;
195 :
196 828 : return true;
197 : }
198 :
199 0 : bool ScStringUtil::parseSimpleNumber(
200 : const char* p, size_t n, char dsep, char gsep, double& rVal)
201 : {
202 : // Actually almost the entire pre-check is unnecessary and we could call
203 : // rtl::math::stringToDouble() just after having exchanged ascii space with
204 : // non-breaking space, if it wasn't for check of grouped digits. The NaN
205 : // and Inf cases that are accepted by stringToDouble() could be detected
206 : // using rtl::math::isFinite() on the result.
207 :
208 : /* TODO: The grouped digits check isn't even valid for locales that do not
209 : * group in thousands ... e.g. Indian locales. But that's something also
210 : * the number scanner doesn't implement yet, only the formatter. */
211 :
212 0 : OStringBuffer aBuf;
213 :
214 0 : size_t i = 0;
215 0 : const char* pLast = p + (n-1);
216 0 : sal_Int32 nPosDSep = -1, nPosGSep = -1;
217 0 : sal_uInt32 nDigitCount = 0;
218 0 : sal_Int32 nPosExponent = -1;
219 :
220 : // Skip preceding spaces.
221 0 : for (i = 0; i < n; ++i, ++p)
222 : {
223 0 : char c = *p;
224 0 : if (c != ' ')
225 : // first non-space character. Exit.
226 0 : break;
227 : }
228 :
229 0 : if (i == n)
230 : // the whole string is space. Fail.
231 0 : return false;
232 :
233 0 : n -= i; // Subtract the length of the preceding spaces.
234 :
235 : // Determine the last non-space character.
236 0 : for (; p != pLast; --pLast, --n)
237 : {
238 0 : char c = *pLast;
239 0 : if (c != ' ')
240 : // Non space character. Exit.
241 0 : break;
242 : }
243 :
244 0 : for (i = 0; i < n; ++i, ++p)
245 : {
246 0 : char c = *p;
247 :
248 0 : if ('0' <= c && c <= '9')
249 : {
250 : // this is a digit.
251 0 : aBuf.append(c);
252 0 : ++nDigitCount;
253 : }
254 0 : else if (c == dsep)
255 : {
256 : // this is a decimal separator.
257 :
258 0 : if (nPosDSep >= 0)
259 : // a second decimal separator -> not a valid number.
260 0 : return false;
261 :
262 0 : if (nPosGSep >= 0 && i - nPosGSep != 4)
263 : // the number has a group separator and the decimal sep is not
264 : // positioned correctly.
265 0 : return false;
266 :
267 0 : nPosDSep = i;
268 0 : nPosGSep = -1;
269 0 : aBuf.append(c);
270 0 : nDigitCount = 0;
271 : }
272 0 : else if (c == gsep)
273 : {
274 : // this is a group (thousand) separator.
275 :
276 0 : if (i == 0)
277 : // not allowed as the first character.
278 0 : return false;
279 :
280 0 : if (nPosDSep >= 0)
281 : // not allowed after the decimal separator.
282 0 : return false;
283 :
284 0 : if (nPosGSep >= 0 && nDigitCount != 3)
285 : // must be exactly 3 digits since the last group separator.
286 0 : return false;
287 :
288 0 : if (nPosExponent >= 0)
289 : // not allowed in exponent.
290 0 : return false;
291 :
292 0 : nPosGSep = i;
293 0 : nDigitCount = 0;
294 : }
295 0 : else if (c == '-' || c == '+')
296 : {
297 : // A sign must be the first character if it's given, or immediately
298 : // follow the exponent character if present.
299 0 : if (i == 0 || (nPosExponent >= 0 && i == static_cast<size_t>(nPosExponent+1)))
300 0 : aBuf.append(c);
301 : else
302 0 : return false;
303 : }
304 0 : else if (c == 'E' || c == 'e')
305 : {
306 : // this is an exponent designator.
307 :
308 0 : if (nPosExponent >= 0)
309 : // Only one exponent allowed.
310 0 : return false;
311 :
312 0 : if (nPosGSep >= 0 && nDigitCount != 3)
313 : // must be exactly 3 digits since the last group separator.
314 0 : return false;
315 :
316 0 : aBuf.append(c);
317 0 : nPosExponent = i;
318 0 : nPosDSep = -1;
319 0 : nPosGSep = -1;
320 0 : nDigitCount = 0;
321 : }
322 : else
323 0 : return false;
324 : }
325 :
326 : // finished parsing the number.
327 :
328 0 : if (nPosGSep >= 0 && nDigitCount != 3)
329 : // must be exactly 3 digits since the last group separator.
330 0 : return false;
331 :
332 0 : rtl_math_ConversionStatus eStatus = rtl_math_ConversionStatus_Ok;
333 0 : sal_Int32 nParseEnd = 0;
334 0 : OString aString( aBuf.makeStringAndClear());
335 0 : rVal = ::rtl::math::stringToDouble( aString, dsep, gsep, &eStatus, &nParseEnd);
336 0 : if (eStatus != rtl_math_ConversionStatus_Ok || nParseEnd < aString.getLength())
337 : // Not a valid number or not entire string consumed.
338 0 : return false;
339 :
340 0 : return true;
341 : }
342 :
343 0 : sal_Int32 ScStringUtil::GetQuotedTokenCount(const OUString &rIn, const OUString& rQuotedPairs, sal_Unicode cTok )
344 : {
345 : assert( !(rQuotedPairs.getLength()%2) );
346 : assert( rQuotedPairs.indexOf(cTok) );
347 :
348 : // empty string: TokenCount is 0 per definition
349 0 : if ( rIn.isEmpty() )
350 0 : return 0;
351 :
352 0 : sal_Int32 nTokCount = 1;
353 0 : sal_Int32 nLen = rIn.getLength();
354 0 : sal_Int32 nQuotedLen = rQuotedPairs.getLength();
355 0 : sal_Unicode cQuotedEndChar = 0;
356 0 : const sal_Unicode* pQuotedStr = rQuotedPairs.getStr();
357 0 : const sal_Unicode* pStr = rIn.getStr();
358 0 : sal_Int32 nIndex = 0;
359 0 : while ( nIndex < nLen )
360 : {
361 0 : sal_Unicode c = *pStr;
362 0 : if ( cQuotedEndChar )
363 : {
364 : // reached end of the quote ?
365 0 : if ( c == cQuotedEndChar )
366 0 : cQuotedEndChar = 0;
367 : }
368 : else
369 : {
370 : // Is the char a quote-beginn char ?
371 0 : sal_Int32 nQuoteIndex = 0;
372 0 : while ( nQuoteIndex < nQuotedLen )
373 : {
374 0 : if ( pQuotedStr[nQuoteIndex] == c )
375 : {
376 0 : cQuotedEndChar = pQuotedStr[nQuoteIndex+1];
377 0 : break;
378 : }
379 : else
380 0 : nQuoteIndex += 2;
381 : }
382 :
383 : // If the token-char matches then increase TokCount
384 0 : if ( c == cTok )
385 0 : ++nTokCount;
386 : }
387 :
388 : ++pStr,
389 0 : ++nIndex;
390 : }
391 :
392 0 : return nTokCount;
393 : }
394 :
395 0 : OUString ScStringUtil::GetQuotedToken(const OUString &rIn, sal_Int32 nToken, const OUString& rQuotedPairs,
396 : sal_Unicode cTok, sal_Int32& rIndex )
397 : {
398 : assert( !(rQuotedPairs.getLength()%2) );
399 : assert( rQuotedPairs.indexOf(cTok) == -1 );
400 :
401 0 : const sal_Unicode* pStr = rIn.getStr();
402 0 : const sal_Unicode* pQuotedStr = rQuotedPairs.getStr();
403 0 : sal_Unicode cQuotedEndChar = 0;
404 0 : sal_Int32 nQuotedLen = rQuotedPairs.getLength();
405 0 : sal_Int32 nLen = rIn.getLength();
406 0 : sal_Int32 nTok = 0;
407 0 : sal_Int32 nFirstChar = rIndex;
408 0 : sal_Int32 i = nFirstChar;
409 :
410 : // detect token position and length
411 0 : pStr += i;
412 0 : while ( i < nLen )
413 : {
414 0 : sal_Unicode c = *pStr;
415 0 : if ( cQuotedEndChar )
416 : {
417 : // end of the quote reached ?
418 0 : if ( c == cQuotedEndChar )
419 0 : cQuotedEndChar = 0;
420 : }
421 : else
422 : {
423 : // Is the char a quote-begin char ?
424 0 : sal_Int32 nQuoteIndex = 0;
425 0 : while ( nQuoteIndex < nQuotedLen )
426 : {
427 0 : if ( pQuotedStr[nQuoteIndex] == c )
428 : {
429 0 : cQuotedEndChar = pQuotedStr[nQuoteIndex+1];
430 0 : break;
431 : }
432 : else
433 0 : nQuoteIndex += 2;
434 : }
435 :
436 : // If the token-char matches then increase TokCount
437 0 : if ( c == cTok )
438 : {
439 0 : ++nTok;
440 :
441 0 : if ( nTok == nToken )
442 0 : nFirstChar = i+1;
443 : else
444 : {
445 0 : if ( nTok > nToken )
446 0 : break;
447 : }
448 : }
449 : }
450 :
451 : ++pStr,
452 0 : ++i;
453 : }
454 :
455 0 : if ( nTok >= nToken )
456 : {
457 0 : if ( i < nLen )
458 0 : rIndex = i+1;
459 : else
460 0 : rIndex = -1;
461 0 : return rIn.copy( nFirstChar, i-nFirstChar );
462 : }
463 : else
464 : {
465 0 : rIndex = -1;
466 0 : return OUString();
467 : }
468 : }
469 :
470 4652 : bool ScStringUtil::isMultiline( const OUString& rStr )
471 : {
472 4652 : if (rStr.indexOf('\n') != -1)
473 6 : return true;
474 :
475 4646 : if (rStr.indexOf(CHAR_CR) != -1)
476 0 : return true;
477 :
478 4646 : return false;
479 : }
480 :
481 4870 : ScInputStringType ScStringUtil::parseInputString(
482 : SvNumberFormatter& rFormatter, const OUString& rStr, LanguageType eLang )
483 : {
484 4870 : ScInputStringType aRet;
485 4870 : aRet.mnFormatType = 0;
486 4870 : aRet.meType = ScInputStringType::Unknown;
487 4870 : aRet.maText = rStr;
488 4870 : aRet.mfValue = 0.0;
489 :
490 4870 : if (rStr.getLength() > 1 && rStr[0] == '=')
491 : {
492 228 : aRet.meType = ScInputStringType::Formula;
493 : }
494 4642 : else if (rStr.getLength() > 1 && rStr[0] == '\'')
495 : {
496 : // for bEnglish, "'" at the beginning is always interpreted as text
497 : // marker and stripped
498 0 : aRet.maText = rStr.copy(1);
499 0 : aRet.meType = ScInputStringType::Text;
500 : }
501 : else // (nur) auf englisches Zahlformat testen
502 : {
503 4642 : sal_uInt32 nNumFormat = rFormatter.GetStandardIndex(eLang);
504 :
505 4642 : if (rFormatter.IsNumberFormat(rStr, nNumFormat, aRet.mfValue))
506 : {
507 2 : aRet.meType = ScInputStringType::Number;
508 2 : aRet.mnFormatType = rFormatter.GetType(nNumFormat);
509 : }
510 4640 : else if (!rStr.isEmpty())
511 2270 : aRet.meType = ScInputStringType::Text;
512 :
513 : // das (englische) Zahlformat wird nicht gesetzt
514 : //! passendes lokales Format suchen und setzen???
515 : }
516 :
517 4870 : return aRet;
518 228 : }
519 :
520 : /* vim:set shiftwidth=4 softtabstop=4 expandtab: */
|