Line data Source code
1 : /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 : /*
3 : * This file is part of the LibreOffice project.
4 : *
5 : * This Source Code Form is subject to the terms of the Mozilla Public
6 : * License, v. 2.0. If a copy of the MPL was not distributed with this
7 : * file, You can obtain one at http://mozilla.org/MPL/2.0/.
8 : *
9 : * This file incorporates work covered by the following license notice:
10 : *
11 : * Licensed to the Apache Software Foundation (ASF) under one or more
12 : * contributor license agreements. See the NOTICE file distributed
13 : * with this work for additional information regarding copyright
14 : * ownership. The ASF licenses this file to you under the Apache
15 : * License, Version 2.0 (the "License"); you may not use this file
16 : * except in compliance with the License. You may obtain a copy of
17 : * the License at http://www.apache.org/licenses/LICENSE-2.0 .
18 : */
19 :
20 : #include "stringutil.hxx"
21 : #include "rtl/ustrbuf.hxx"
22 : #include "rtl/math.hxx"
23 :
24 : using ::rtl::OUString;
25 : using ::rtl::OUStringBuffer;
26 :
27 1812 : ScSetStringParam::ScSetStringParam() :
28 : mpNumFormatter(NULL),
29 : mbDetectNumberFormat(true),
30 : meSetTextNumFormat(Never),
31 1812 : mbHandleApostrophe(true)
32 : {
33 1812 : }
34 :
35 : // ============================================================================-
36 :
37 229 : bool ScStringUtil::parseSimpleNumber(
38 : const OUString& rStr, sal_Unicode dsep, sal_Unicode gsep, double& rVal)
39 : {
40 : // Actually almost the entire pre-check is unnecessary and we could call
41 : // rtl::math::stringToDouble() just after having exchanged ascii space with
42 : // non-breaking space, if it wasn't for check of grouped digits. The NaN
43 : // and Inf cases that are accepted by stringToDouble() could be detected
44 : // using rtl::math::isFinite() on the result.
45 :
46 : /* TODO: The grouped digits check isn't even valid for locales that do not
47 : * group in thousands ... e.g. Indian locales. But that's something also
48 : * the number scanner doesn't implement yet, only the formatter. */
49 :
50 229 : OUStringBuffer aBuf;
51 :
52 229 : sal_Int32 i = 0;
53 229 : sal_Int32 n = rStr.getLength();
54 229 : const sal_Unicode* p = rStr.getStr();
55 229 : const sal_Unicode* pLast = p + (n-1);
56 229 : sal_Int32 nPosDSep = -1, nPosGSep = -1;
57 229 : sal_uInt32 nDigitCount = 0;
58 229 : sal_Int32 nPosExponent = -1;
59 :
60 : // Skip preceding spaces.
61 233 : for (i = 0; i < n; ++i, ++p)
62 : {
63 233 : sal_Unicode c = *p;
64 233 : if (c != 0x0020 && c != 0x00A0)
65 : // first non-space character. Exit.
66 229 : break;
67 : }
68 :
69 229 : if (i == n)
70 : // the whole string is space. Fail.
71 0 : return false;
72 :
73 229 : n -= i; // Subtract the length of the preceding spaces.
74 :
75 : // Determine the last non-space character.
76 235 : for (; p != pLast; --pLast, --n)
77 : {
78 169 : sal_Unicode c = *pLast;
79 169 : if (c != 0x0020 && c != 0x00A0)
80 : // Non space character. Exit.
81 163 : break;
82 : }
83 :
84 532 : for (i = 0; i < n; ++i, ++p)
85 : {
86 416 : sal_Unicode c = *p;
87 416 : if (c == 0x0020 && gsep == 0x00A0)
88 : // ascii space to unicode space if that is group separator
89 0 : c = 0x00A0;
90 :
91 416 : if (sal_Unicode('0') <= c && c <= sal_Unicode('9'))
92 : {
93 : // this is a digit.
94 269 : aBuf.append(c);
95 269 : ++nDigitCount;
96 : }
97 147 : else if (c == dsep)
98 : {
99 : // this is a decimal separator.
100 :
101 26 : if (nPosDSep >= 0)
102 : // a second decimal separator -> not a valid number.
103 0 : return false;
104 :
105 26 : if (nPosGSep >= 0 && i - nPosGSep != 4)
106 : // the number has a group separator and the decimal sep is not
107 : // positioned correctly.
108 0 : return false;
109 :
110 26 : nPosDSep = i;
111 26 : nPosGSep = -1;
112 26 : aBuf.append(c);
113 26 : nDigitCount = 0;
114 : }
115 121 : else if (c == gsep)
116 : {
117 : // this is a group (thousand) separator.
118 :
119 8 : if (i == 0)
120 : // not allowed as the first character.
121 0 : return false;
122 :
123 8 : if (nPosDSep >= 0)
124 : // not allowed after the decimal separator.
125 0 : return false;
126 :
127 8 : if (nPosGSep >= 0 && nDigitCount != 3)
128 : // must be exactly 3 digits since the last group separator.
129 0 : return false;
130 :
131 8 : if (nPosExponent >= 0)
132 : // not allowed in exponent.
133 0 : return false;
134 :
135 8 : nPosGSep = i;
136 8 : nDigitCount = 0;
137 : }
138 113 : else if (c == sal_Unicode('-') || c == sal_Unicode('+'))
139 : {
140 : // A sign must be the first character if it's given, or immediately
141 : // follow the exponent character if present.
142 8 : if (i == 0 || (nPosExponent >= 0 && i == nPosExponent + 1))
143 0 : aBuf.append(c);
144 : else
145 8 : return false;
146 : }
147 105 : else if (c == sal_Unicode('E') || c == sal_Unicode('e'))
148 : {
149 : // this is an exponent designator.
150 :
151 0 : if (nPosExponent >= 0)
152 : // Only one exponent allowed.
153 0 : return false;
154 :
155 0 : if (nPosGSep >= 0 && nDigitCount != 3)
156 : // must be exactly 3 digits since the last group separator.
157 0 : return false;
158 :
159 0 : aBuf.append(c);
160 0 : nPosExponent = i;
161 0 : nPosDSep = -1;
162 0 : nPosGSep = -1;
163 0 : nDigitCount = 0;
164 : }
165 : else
166 105 : return false;
167 : }
168 :
169 : // finished parsing the number.
170 :
171 116 : if (nPosGSep >= 0 && nDigitCount != 3)
172 : // must be exactly 3 digits since the last group separator.
173 4 : return false;
174 :
175 112 : rtl_math_ConversionStatus eStatus = rtl_math_ConversionStatus_Ok;
176 112 : sal_Int32 nParseEnd = 0;
177 112 : OUString aString( aBuf.makeStringAndClear());
178 112 : rVal = ::rtl::math::stringToDouble( aString, dsep, gsep, &eStatus, &nParseEnd);
179 112 : if (eStatus != rtl_math_ConversionStatus_Ok || nParseEnd < aString.getLength())
180 : // Not a valid number or not entire string consumed.
181 0 : return false;
182 :
183 112 : return true;
184 : }
185 :
186 0 : xub_StrLen ScStringUtil::GetQuotedTokenCount(const UniString &rIn, const UniString& rQuotedPairs, sal_Unicode cTok )
187 : {
188 : assert( !(rQuotedPairs.Len()%2) );
189 : assert( rQuotedPairs.Search(cTok) );
190 :
191 : // empty string: TokenCount is 0 per definition
192 0 : if ( !rIn.Len() )
193 0 : return 0;
194 :
195 0 : xub_StrLen nTokCount = 1;
196 0 : sal_Int32 nLen = rIn.Len();
197 0 : xub_StrLen nQuotedLen = rQuotedPairs.Len();
198 0 : sal_Unicode cQuotedEndChar = 0;
199 0 : const sal_Unicode* pQuotedStr = rQuotedPairs.GetBuffer();
200 0 : const sal_Unicode* pStr = rIn.GetBuffer();
201 0 : sal_Int32 nIndex = 0;
202 0 : while ( nIndex < nLen )
203 : {
204 0 : sal_Unicode c = *pStr;
205 0 : if ( cQuotedEndChar )
206 : {
207 : // reached end of the quote ?
208 0 : if ( c == cQuotedEndChar )
209 0 : cQuotedEndChar = 0;
210 : }
211 : else
212 : {
213 : // Is the char a quote-beginn char ?
214 0 : xub_StrLen nQuoteIndex = 0;
215 0 : while ( nQuoteIndex < nQuotedLen )
216 : {
217 0 : if ( pQuotedStr[nQuoteIndex] == c )
218 : {
219 0 : cQuotedEndChar = pQuotedStr[nQuoteIndex+1];
220 0 : break;
221 : }
222 : else
223 0 : nQuoteIndex += 2;
224 : }
225 :
226 : // If the token-char matches then increase TokCount
227 0 : if ( c == cTok )
228 0 : ++nTokCount;
229 : }
230 :
231 : ++pStr,
232 0 : ++nIndex;
233 : }
234 :
235 0 : return nTokCount;
236 : }
237 :
238 0 : UniString ScStringUtil::GetQuotedToken(const UniString &rIn, xub_StrLen nToken, const UniString& rQuotedPairs,
239 : sal_Unicode cTok, xub_StrLen& rIndex )
240 : {
241 : assert( !(rQuotedPairs.Len()%2) );
242 : assert( rQuotedPairs.Search(cTok) == STRING_NOTFOUND );
243 :
244 0 : const sal_Unicode* pStr = rIn.GetBuffer();
245 0 : const sal_Unicode* pQuotedStr = rQuotedPairs.GetBuffer();
246 0 : sal_Unicode cQuotedEndChar = 0;
247 0 : xub_StrLen nQuotedLen = rQuotedPairs.Len();
248 0 : xub_StrLen nLen = rIn.Len();
249 0 : xub_StrLen nTok = 0;
250 0 : xub_StrLen nFirstChar = rIndex;
251 0 : xub_StrLen i = nFirstChar;
252 :
253 : // detect token position and length
254 0 : pStr += i;
255 0 : while ( i < nLen )
256 : {
257 0 : sal_Unicode c = *pStr;
258 0 : if ( cQuotedEndChar )
259 : {
260 : // end of the quote reached ?
261 0 : if ( c == cQuotedEndChar )
262 0 : cQuotedEndChar = 0;
263 : }
264 : else
265 : {
266 : // Is the char a quote-begin char ?
267 0 : xub_StrLen nQuoteIndex = 0;
268 0 : while ( nQuoteIndex < nQuotedLen )
269 : {
270 0 : if ( pQuotedStr[nQuoteIndex] == c )
271 : {
272 0 : cQuotedEndChar = pQuotedStr[nQuoteIndex+1];
273 0 : break;
274 : }
275 : else
276 0 : nQuoteIndex += 2;
277 : }
278 :
279 : // If the token-char matches then increase TokCount
280 0 : if ( c == cTok )
281 : {
282 0 : ++nTok;
283 :
284 0 : if ( nTok == nToken )
285 0 : nFirstChar = i+1;
286 : else
287 : {
288 0 : if ( nTok > nToken )
289 0 : break;
290 : }
291 : }
292 : }
293 :
294 : ++pStr,
295 0 : ++i;
296 : }
297 :
298 0 : if ( nTok >= nToken )
299 : {
300 0 : if ( i < nLen )
301 0 : rIndex = i+1;
302 : else
303 0 : rIndex = STRING_NOTFOUND;
304 0 : return rIn.Copy( nFirstChar, i-nFirstChar );
305 : }
306 : else
307 : {
308 0 : rIndex = STRING_NOTFOUND;
309 0 : return UniString();
310 : }
311 : }
312 :
313 : /* vim:set shiftwidth=4 softtabstop=4 expandtab: */
|