Line data Source code
1 : /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 : /*
3 : * This file is part of the LibreOffice project.
4 : *
5 : * This Source Code Form is subject to the terms of the Mozilla Public
6 : * License, v. 2.0. If a copy of the MPL was not distributed with this
7 : * file, You can obtain one at http://mozilla.org/MPL/2.0/.
8 : *
9 : * This file incorporates work covered by the following license notice:
10 : *
11 : * Licensed to the Apache Software Foundation (ASF) under one or more
12 : * contributor license agreements. See the NOTICE file distributed
13 : * with this work for additional information regarding copyright
14 : * ownership. The ASF licenses this file to you under the Apache
15 : * License, Version 2.0 (the "License"); you may not use this file
16 : * except in compliance with the License. You may obtain a copy of
17 : * the License at http://www.apache.org/licenses/LICENSE-2.0 .
18 : */
19 :
20 : #include "stringutil.hxx"
21 : #include "global.hxx"
22 : #include <svl/zforlist.hxx>
23 :
24 : #include <rtl/ustrbuf.hxx>
25 : #include <rtl/strbuf.hxx>
26 : #include <rtl/math.hxx>
27 :
28 11110 : ScSetStringParam::ScSetStringParam() :
29 : mpNumFormatter(NULL),
30 : mbDetectNumberFormat(true),
31 : meSetTextNumFormat(Never),
32 : mbHandleApostrophe(true),
33 11110 : meStartListening(sc::SingleCellListening)
34 : {
35 11110 : }
36 :
37 3215 : void ScSetStringParam::setTextInput()
38 : {
39 3215 : mbDetectNumberFormat = false;
40 3215 : mbHandleApostrophe = false;
41 3215 : meSetTextNumFormat = Always;
42 3215 : }
43 :
44 340 : void ScSetStringParam::setNumericInput()
45 : {
46 340 : mbDetectNumberFormat = true;
47 340 : mbHandleApostrophe = true;
48 340 : meSetTextNumFormat = Never;
49 340 : }
50 :
51 299 : bool ScStringUtil::parseSimpleNumber(
52 : const OUString& rStr, sal_Unicode dsep, sal_Unicode gsep, double& rVal)
53 : {
54 : // Actually almost the entire pre-check is unnecessary and we could call
55 : // rtl::math::stringToDouble() just after having exchanged ascii space with
56 : // non-breaking space, if it wasn't for check of grouped digits. The NaN
57 : // and Inf cases that are accepted by stringToDouble() could be detected
58 : // using rtl::math::isFinite() on the result.
59 :
60 : /* TODO: The grouped digits check isn't even valid for locales that do not
61 : * group in thousands ... e.g. Indian locales. But that's something also
62 : * the number scanner doesn't implement yet, only the formatter. */
63 :
64 299 : OUStringBuffer aBuf;
65 :
66 299 : sal_Int32 i = 0;
67 299 : sal_Int32 n = rStr.getLength();
68 299 : const sal_Unicode* p = rStr.getStr();
69 299 : const sal_Unicode* pLast = p + (n-1);
70 299 : sal_Int32 nPosDSep = -1, nPosGSep = -1;
71 299 : sal_uInt32 nDigitCount = 0;
72 299 : sal_Int32 nPosExponent = -1;
73 :
74 : // Skip preceding spaces.
75 309 : for (i = 0; i < n; ++i, ++p)
76 : {
77 309 : sal_Unicode c = *p;
78 309 : if (c != 0x0020 && c != 0x00A0)
79 : // first non-space character. Exit.
80 299 : break;
81 : }
82 :
83 299 : if (i == n)
84 : // the whole string is space. Fail.
85 0 : return false;
86 :
87 299 : n -= i; // Subtract the length of the preceding spaces.
88 :
89 : // Determine the last non-space character.
90 314 : for (; p != pLast; --pLast, --n)
91 : {
92 236 : sal_Unicode c = *pLast;
93 236 : if (c != 0x0020 && c != 0x00A0)
94 : // Non space character. Exit.
95 221 : break;
96 : }
97 :
98 641 : for (i = 0; i < n; ++i, ++p)
99 : {
100 522 : sal_Unicode c = *p;
101 522 : if (c == 0x0020 && gsep == 0x00A0)
102 : // ascii space to unicode space if that is group separator
103 0 : c = 0x00A0;
104 :
105 522 : if ('0' <= c && c <= '9')
106 : {
107 : // this is a digit.
108 308 : aBuf.append(c);
109 308 : ++nDigitCount;
110 : }
111 214 : else if (c == dsep)
112 : {
113 : // this is a decimal separator.
114 :
115 26 : if (nPosDSep >= 0)
116 : // a second decimal separator -> not a valid number.
117 0 : return false;
118 :
119 26 : if (nPosGSep >= 0 && i - nPosGSep != 4)
120 : // the number has a group separator and the decimal sep is not
121 : // positioned correctly.
122 0 : return false;
123 :
124 26 : nPosDSep = i;
125 26 : nPosGSep = -1;
126 26 : aBuf.append(c);
127 26 : nDigitCount = 0;
128 : }
129 188 : else if (c == gsep)
130 : {
131 : // this is a group (thousand) separator.
132 :
133 8 : if (i == 0)
134 : // not allowed as the first character.
135 0 : return false;
136 :
137 8 : if (nPosDSep >= 0)
138 : // not allowed after the decimal separator.
139 0 : return false;
140 :
141 8 : if (nPosGSep >= 0 && nDigitCount != 3)
142 : // must be exactly 3 digits since the last group separator.
143 0 : return false;
144 :
145 8 : if (nPosExponent >= 0)
146 : // not allowed in exponent.
147 0 : return false;
148 :
149 8 : nPosGSep = i;
150 8 : nDigitCount = 0;
151 : }
152 180 : else if (c == '-' || c == '+')
153 : {
154 : // A sign must be the first character if it's given, or immediately
155 : // follow the exponent character if present.
156 17 : if (i == 0 || (nPosExponent >= 0 && i == nPosExponent + 1))
157 0 : aBuf.append(c);
158 : else
159 17 : return false;
160 : }
161 163 : else if (c == 'E' || c == 'e')
162 : {
163 : // this is an exponent designator.
164 :
165 0 : if (nPosExponent >= 0)
166 : // Only one exponent allowed.
167 0 : return false;
168 :
169 0 : if (nPosGSep >= 0 && nDigitCount != 3)
170 : // must be exactly 3 digits since the last group separator.
171 0 : return false;
172 :
173 0 : aBuf.append(c);
174 0 : nPosExponent = i;
175 0 : nPosDSep = -1;
176 0 : nPosGSep = -1;
177 0 : nDigitCount = 0;
178 : }
179 : else
180 163 : return false;
181 : }
182 :
183 : // finished parsing the number.
184 :
185 119 : if (nPosGSep >= 0 && nDigitCount != 3)
186 : // must be exactly 3 digits since the last group separator.
187 4 : return false;
188 :
189 115 : rtl_math_ConversionStatus eStatus = rtl_math_ConversionStatus_Ok;
190 115 : sal_Int32 nParseEnd = 0;
191 230 : OUString aString( aBuf.makeStringAndClear());
192 115 : rVal = ::rtl::math::stringToDouble( aString, dsep, gsep, &eStatus, &nParseEnd);
193 115 : if (eStatus != rtl_math_ConversionStatus_Ok || nParseEnd < aString.getLength())
194 : // Not a valid number or not entire string consumed.
195 0 : return false;
196 :
197 414 : return true;
198 : }
199 :
200 0 : bool ScStringUtil::parseSimpleNumber(
201 : const char* p, size_t n, char dsep, char gsep, double& rVal)
202 : {
203 : // Actually almost the entire pre-check is unnecessary and we could call
204 : // rtl::math::stringToDouble() just after having exchanged ascii space with
205 : // non-breaking space, if it wasn't for check of grouped digits. The NaN
206 : // and Inf cases that are accepted by stringToDouble() could be detected
207 : // using rtl::math::isFinite() on the result.
208 :
209 : /* TODO: The grouped digits check isn't even valid for locales that do not
210 : * group in thousands ... e.g. Indian locales. But that's something also
211 : * the number scanner doesn't implement yet, only the formatter. */
212 :
213 0 : OStringBuffer aBuf;
214 :
215 0 : size_t i = 0;
216 0 : const char* pLast = p + (n-1);
217 0 : sal_Int32 nPosDSep = -1, nPosGSep = -1;
218 0 : sal_uInt32 nDigitCount = 0;
219 0 : sal_Int32 nPosExponent = -1;
220 :
221 : // Skip preceding spaces.
222 0 : for (i = 0; i < n; ++i, ++p)
223 : {
224 0 : char c = *p;
225 0 : if (c != ' ')
226 : // first non-space character. Exit.
227 0 : break;
228 : }
229 :
230 0 : if (i == n)
231 : // the whole string is space. Fail.
232 0 : return false;
233 :
234 0 : n -= i; // Subtract the length of the preceding spaces.
235 :
236 : // Determine the last non-space character.
237 0 : for (; p != pLast; --pLast, --n)
238 : {
239 0 : char c = *pLast;
240 0 : if (c != ' ')
241 : // Non space character. Exit.
242 0 : break;
243 : }
244 :
245 0 : for (i = 0; i < n; ++i, ++p)
246 : {
247 0 : char c = *p;
248 :
249 0 : if ('0' <= c && c <= '9')
250 : {
251 : // this is a digit.
252 0 : aBuf.append(c);
253 0 : ++nDigitCount;
254 : }
255 0 : else if (c == dsep)
256 : {
257 : // this is a decimal separator.
258 :
259 0 : if (nPosDSep >= 0)
260 : // a second decimal separator -> not a valid number.
261 0 : return false;
262 :
263 0 : if (nPosGSep >= 0 && i - nPosGSep != 4)
264 : // the number has a group separator and the decimal sep is not
265 : // positioned correctly.
266 0 : return false;
267 :
268 0 : nPosDSep = i;
269 0 : nPosGSep = -1;
270 0 : aBuf.append(c);
271 0 : nDigitCount = 0;
272 : }
273 0 : else if (c == gsep)
274 : {
275 : // this is a group (thousand) separator.
276 :
277 0 : if (i == 0)
278 : // not allowed as the first character.
279 0 : return false;
280 :
281 0 : if (nPosDSep >= 0)
282 : // not allowed after the decimal separator.
283 0 : return false;
284 :
285 0 : if (nPosGSep >= 0 && nDigitCount != 3)
286 : // must be exactly 3 digits since the last group separator.
287 0 : return false;
288 :
289 0 : if (nPosExponent >= 0)
290 : // not allowed in exponent.
291 0 : return false;
292 :
293 0 : nPosGSep = i;
294 0 : nDigitCount = 0;
295 : }
296 0 : else if (c == '-' || c == '+')
297 : {
298 : // A sign must be the first character if it's given, or immediately
299 : // follow the exponent character if present.
300 0 : if (i == 0 || (nPosExponent >= 0 && i == static_cast<size_t>(nPosExponent+1)))
301 0 : aBuf.append(c);
302 : else
303 0 : return false;
304 : }
305 0 : else if (c == 'E' || c == 'e')
306 : {
307 : // this is an exponent designator.
308 :
309 0 : if (nPosExponent >= 0)
310 : // Only one exponent allowed.
311 0 : return false;
312 :
313 0 : if (nPosGSep >= 0 && nDigitCount != 3)
314 : // must be exactly 3 digits since the last group separator.
315 0 : return false;
316 :
317 0 : aBuf.append(c);
318 0 : nPosExponent = i;
319 0 : nPosDSep = -1;
320 0 : nPosGSep = -1;
321 0 : nDigitCount = 0;
322 : }
323 : else
324 0 : return false;
325 : }
326 :
327 : // finished parsing the number.
328 :
329 0 : if (nPosGSep >= 0 && nDigitCount != 3)
330 : // must be exactly 3 digits since the last group separator.
331 0 : return false;
332 :
333 0 : rtl_math_ConversionStatus eStatus = rtl_math_ConversionStatus_Ok;
334 0 : sal_Int32 nParseEnd = 0;
335 0 : OString aString( aBuf.makeStringAndClear());
336 0 : rVal = ::rtl::math::stringToDouble( aString, dsep, gsep, &eStatus, &nParseEnd);
337 0 : if (eStatus != rtl_math_ConversionStatus_Ok || nParseEnd < aString.getLength())
338 : // Not a valid number or not entire string consumed.
339 0 : return false;
340 :
341 0 : return true;
342 : }
343 :
344 0 : sal_Int32 ScStringUtil::GetQuotedTokenCount(const OUString &rIn, const OUString& rQuotedPairs, sal_Unicode cTok )
345 : {
346 : assert( !(rQuotedPairs.getLength()%2) );
347 : assert( rQuotedPairs.indexOf(cTok) );
348 :
349 : // empty string: TokenCount is 0 per definition
350 0 : if ( rIn.isEmpty() )
351 0 : return 0;
352 :
353 0 : sal_Int32 nTokCount = 1;
354 0 : sal_Int32 nLen = rIn.getLength();
355 0 : sal_Int32 nQuotedLen = rQuotedPairs.getLength();
356 0 : sal_Unicode cQuotedEndChar = 0;
357 0 : const sal_Unicode* pQuotedStr = rQuotedPairs.getStr();
358 0 : const sal_Unicode* pStr = rIn.getStr();
359 0 : sal_Int32 nIndex = 0;
360 0 : while ( nIndex < nLen )
361 : {
362 0 : sal_Unicode c = *pStr;
363 0 : if ( cQuotedEndChar )
364 : {
365 : // reached end of the quote ?
366 0 : if ( c == cQuotedEndChar )
367 0 : cQuotedEndChar = 0;
368 : }
369 : else
370 : {
371 : // Is the char a quote-beginn char ?
372 0 : sal_Int32 nQuoteIndex = 0;
373 0 : while ( nQuoteIndex < nQuotedLen )
374 : {
375 0 : if ( pQuotedStr[nQuoteIndex] == c )
376 : {
377 0 : cQuotedEndChar = pQuotedStr[nQuoteIndex+1];
378 0 : break;
379 : }
380 : else
381 0 : nQuoteIndex += 2;
382 : }
383 :
384 : // If the token-char matches then increase TokCount
385 0 : if ( c == cTok )
386 0 : ++nTokCount;
387 : }
388 :
389 : ++pStr,
390 0 : ++nIndex;
391 : }
392 :
393 0 : return nTokCount;
394 : }
395 :
396 0 : OUString ScStringUtil::GetQuotedToken(const OUString &rIn, sal_Int32 nToken, const OUString& rQuotedPairs,
397 : sal_Unicode cTok, sal_Int32& rIndex )
398 : {
399 : assert( !(rQuotedPairs.getLength()%2) );
400 : assert( rQuotedPairs.indexOf(cTok) == -1 );
401 :
402 0 : const sal_Unicode* pStr = rIn.getStr();
403 0 : const sal_Unicode* pQuotedStr = rQuotedPairs.getStr();
404 0 : sal_Unicode cQuotedEndChar = 0;
405 0 : sal_Int32 nQuotedLen = rQuotedPairs.getLength();
406 0 : sal_Int32 nLen = rIn.getLength();
407 0 : sal_Int32 nTok = 0;
408 0 : sal_Int32 nFirstChar = rIndex;
409 0 : sal_Int32 i = nFirstChar;
410 :
411 : // detect token position and length
412 0 : pStr += i;
413 0 : while ( i < nLen )
414 : {
415 0 : sal_Unicode c = *pStr;
416 0 : if ( cQuotedEndChar )
417 : {
418 : // end of the quote reached ?
419 0 : if ( c == cQuotedEndChar )
420 0 : cQuotedEndChar = 0;
421 : }
422 : else
423 : {
424 : // Is the char a quote-begin char ?
425 0 : sal_Int32 nQuoteIndex = 0;
426 0 : while ( nQuoteIndex < nQuotedLen )
427 : {
428 0 : if ( pQuotedStr[nQuoteIndex] == c )
429 : {
430 0 : cQuotedEndChar = pQuotedStr[nQuoteIndex+1];
431 0 : break;
432 : }
433 : else
434 0 : nQuoteIndex += 2;
435 : }
436 :
437 : // If the token-char matches then increase TokCount
438 0 : if ( c == cTok )
439 : {
440 0 : ++nTok;
441 :
442 0 : if ( nTok == nToken )
443 0 : nFirstChar = i+1;
444 : else
445 : {
446 0 : if ( nTok > nToken )
447 0 : break;
448 : }
449 : }
450 : }
451 :
452 : ++pStr,
453 0 : ++i;
454 : }
455 :
456 0 : if ( nTok >= nToken )
457 : {
458 0 : if ( i < nLen )
459 0 : rIndex = i+1;
460 : else
461 0 : rIndex = -1;
462 0 : return rIn.copy( nFirstChar, i-nFirstChar );
463 : }
464 : else
465 : {
466 0 : rIndex = -1;
467 0 : return OUString();
468 : }
469 : }
470 :
471 2327 : bool ScStringUtil::isMultiline( const OUString& rStr )
472 : {
473 2327 : if (rStr.indexOf('\n') != -1)
474 3 : return true;
475 :
476 2324 : if (rStr.indexOf(CHAR_CR) != -1)
477 0 : return true;
478 :
479 2324 : return false;
480 : }
481 :
482 2436 : ScInputStringType ScStringUtil::parseInputString(
483 : SvNumberFormatter& rFormatter, const OUString& rStr, LanguageType eLang )
484 : {
485 2436 : ScInputStringType aRet;
486 2436 : aRet.mnFormatType = 0;
487 2436 : aRet.meType = ScInputStringType::Unknown;
488 2436 : aRet.maText = rStr;
489 2436 : aRet.mfValue = 0.0;
490 :
491 2436 : if (rStr.getLength() > 1 && rStr[0] == '=')
492 : {
493 114 : aRet.meType = ScInputStringType::Formula;
494 : }
495 2322 : else if (rStr.getLength() > 1 && rStr[0] == '\'')
496 : {
497 : // for bEnglish, "'" at the beginning is always interpreted as text
498 : // marker and stripped
499 0 : aRet.maText = rStr.copy(1);
500 0 : aRet.meType = ScInputStringType::Text;
501 : }
502 : else // (nur) auf englisches Zahlformat testen
503 : {
504 2322 : sal_uInt32 nNumFormat = rFormatter.GetStandardIndex(eLang);
505 :
506 2322 : if (rFormatter.IsNumberFormat(rStr, nNumFormat, aRet.mfValue))
507 : {
508 1 : aRet.meType = ScInputStringType::Number;
509 1 : aRet.mnFormatType = rFormatter.GetType(nNumFormat);
510 : }
511 2321 : else if (!rStr.isEmpty())
512 1136 : aRet.meType = ScInputStringType::Text;
513 :
514 : // the (English) number format is not set
515 : //TODO: find and replace with matching local format???
516 : }
517 :
518 2436 : return aRet;
519 156 : }
520 :
521 : /* vim:set shiftwidth=4 softtabstop=4 expandtab: */
|