Line data Source code
1 : /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 : /*
3 : * This file is part of the LibreOffice project.
4 : *
5 : * This Source Code Form is subject to the terms of the Mozilla Public
6 : * License, v. 2.0. If a copy of the MPL was not distributed with this
7 : * file, You can obtain one at http://mozilla.org/MPL/2.0/.
8 : *
9 : * This file incorporates work covered by the following license notice:
10 : *
11 : * Licensed to the Apache Software Foundation (ASF) under one or more
12 : * contributor license agreements. See the NOTICE file distributed
13 : * with this work for additional information regarding copyright
14 : * ownership. The ASF licenses this file to you under the Apache
15 : * License, Version 2.0 (the "License"); you may not use this file
16 : * except in compliance with the License. You may obtain a copy of
17 : * the License at http://www.apache.org/licenses/LICENSE-2.0 .
18 : */
19 :
20 : #include "sal/config.h"
21 :
22 : #ifdef _MSC_VER
23 : #pragma warning(disable:4738) // storing 32-bit float result in memory, possible loss of performance
24 : #endif
25 :
26 : #include <cassert>
27 : #include <cstdlib>
28 :
29 : #include <osl/interlck.h>
30 : #include <rtl/alloc.h>
31 : #include <osl/diagnose.h>
32 : #include <rtl/tencinfo.h>
33 :
34 : #include "strimp.hxx"
35 : #include <rtl/character.hxx>
36 : #include <rtl/string.h>
37 :
38 : #include "rtl/math.h"
39 :
40 : /* ======================================================================= */
41 :
42 : /* static data to be referenced by all empty strings
43 : * the refCount is predefined to 1 and must never become 0 !
44 : */
45 : static rtl_String const aImplEmpty_rtl_String =
46 : {
47 : SAL_STRING_STATIC_FLAG|1,
48 : /* sal_Int32 refCount; */
49 : 0, /* sal_Int32 length; */
50 : { 0 } /* sal_Char buffer[1]; */
51 : };
52 :
53 : /* ======================================================================= */
54 : /* These macros are for the "poor-man templates" included from
55 : * the strtmpl.cxx just below, used to share code between here and
56 : * ustring.cxx
57 : */
58 :
59 : #define IMPL_RTL_IS_USTRING 0
60 :
61 : #define IMPL_RTL_STRCODE sal_Char
62 : #define IMPL_RTL_USTRCODE( c ) ((unsigned char)c)
63 : #define IMPL_RTL_STRNAME( n ) rtl_str_ ## n
64 :
65 : #define IMPL_RTL_STRINGNAME( n ) rtl_string_ ## n
66 : #define IMPL_RTL_STRINGDATA rtl_String
67 : #define IMPL_RTL_EMPTYSTRING aImplEmpty_rtl_String
68 :
69 : #if USE_SDT_PROBES
70 : #define RTL_LOG_STRING_BITS 8
71 : #endif
72 :
73 : /* ======================================================================= */
74 :
75 : /* Include String/UString template code */
76 :
77 : #include "strtmpl.cxx"
78 :
79 : #undef IMPL_RTL_EMPTYSTRING
80 : #undef IMPL_RTL_IS_USTRING
81 : #undef IMPL_RTL_STRCODE
82 : #undef IMPL_RTL_STRINGDATA
83 : #undef IMPL_RTL_STRINGNAME
84 : #undef IMPL_RTL_STRNAME
85 : #undef IMPL_RTL_USTRCODE
86 : #undef RTL_LOG_STRING_BITS
87 :
88 102 : sal_Int32 SAL_CALL rtl_str_valueOfFloat(sal_Char * pStr, float f)
89 : SAL_THROW_EXTERN_C()
90 : {
91 : assert(pStr);
92 102 : rtl_String * pResult = NULL;
93 : sal_Int32 nLen;
94 : rtl_math_doubleToString(
95 : &pResult, 0, 0, f, rtl_math_StringFormat_G,
96 : RTL_STR_MAX_VALUEOFFLOAT - RTL_CONSTASCII_LENGTH("-x.E-xxx"), '.', 0, 0,
97 102 : sal_True);
98 102 : nLen = pResult->length;
99 : OSL_ASSERT(nLen < RTL_STR_MAX_VALUEOFFLOAT);
100 102 : memcpy(pStr, pResult->buffer, (nLen + 1) * sizeof(sal_Char));
101 102 : rtl_string_release(pResult);
102 102 : return nLen;
103 : }
104 :
105 5790 : sal_Int32 SAL_CALL rtl_str_valueOfDouble(sal_Char * pStr, double d)
106 : SAL_THROW_EXTERN_C()
107 : {
108 : assert(pStr);
109 5790 : rtl_String * pResult = NULL;
110 : sal_Int32 nLen;
111 : rtl_math_doubleToString(
112 : &pResult, 0, 0, d, rtl_math_StringFormat_G,
113 : RTL_STR_MAX_VALUEOFDOUBLE - RTL_CONSTASCII_LENGTH("-x.E-xxx"), '.', 0,
114 5790 : 0, sal_True);
115 5790 : nLen = pResult->length;
116 : OSL_ASSERT(nLen < RTL_STR_MAX_VALUEOFDOUBLE);
117 5790 : memcpy(pStr, pResult->buffer, (nLen + 1) * sizeof(sal_Char));
118 5790 : rtl_string_release(pResult);
119 5790 : return nLen;
120 : }
121 :
122 1910 : float SAL_CALL rtl_str_toFloat(sal_Char const * pStr) SAL_THROW_EXTERN_C()
123 : {
124 : assert(pStr);
125 1910 : return (float) rtl_math_stringToDouble(pStr, pStr + rtl_str_getLength(pStr),
126 1910 : '.', 0, 0, 0);
127 : }
128 :
129 43055 : double SAL_CALL rtl_str_toDouble(sal_Char const * pStr) SAL_THROW_EXTERN_C()
130 : {
131 : assert(pStr);
132 43055 : return rtl_math_stringToDouble(pStr, pStr + rtl_str_getLength(pStr), '.', 0,
133 43055 : 0, 0);
134 : }
135 :
136 : /* ======================================================================= */
137 :
138 95027463 : static int rtl_ImplGetFastUTF8ByteLen( const sal_Unicode* pStr, sal_Int32 nLen )
139 : {
140 : int n;
141 : sal_Unicode c;
142 : sal_uInt32 nUCS4Char;
143 : const sal_Unicode* pEndStr;
144 :
145 95027463 : n = 0;
146 95027463 : pEndStr = pStr+nLen;
147 1677284587 : while ( pStr < pEndStr )
148 : {
149 1487229684 : c = *pStr;
150 :
151 1487229684 : if ( c < 0x80 )
152 1487161579 : n++;
153 68105 : else if ( c < 0x800 )
154 4865 : n += 2;
155 : else
156 : {
157 63240 : if ( !rtl::isHighSurrogate(c) )
158 63217 : n += 3;
159 : else
160 : {
161 0 : nUCS4Char = c;
162 :
163 0 : if ( pStr+1 < pEndStr )
164 : {
165 0 : c = *(pStr+1);
166 0 : if ( rtl::isLowSurrogate(c) )
167 : {
168 0 : nUCS4Char = rtl::combineSurrogates(nUCS4Char, c);
169 0 : pStr++;
170 : }
171 : }
172 :
173 0 : if ( nUCS4Char < 0x10000 )
174 0 : n += 3;
175 0 : else if ( nUCS4Char < 0x200000 )
176 0 : n += 4;
177 0 : else if ( nUCS4Char < 0x4000000 )
178 0 : n += 5;
179 : else
180 0 : n += 6;
181 : }
182 : }
183 :
184 1487229661 : pStr++;
185 : }
186 :
187 95027440 : return n;
188 : }
189 :
190 : /* ----------------------------------------------------------------------- */
191 :
192 167198416 : bool SAL_CALL rtl_impl_convertUStringToString(rtl_String ** pTarget,
193 : sal_Unicode const * pSource,
194 : sal_Int32 nLength,
195 : rtl_TextEncoding nEncoding,
196 : sal_uInt32 nFlags,
197 : bool bCheckErrors)
198 : {
199 : assert(pTarget != nullptr);
200 : assert(pSource != nullptr || nLength == 0);
201 : assert(nLength >= 0);
202 : OSL_ASSERT(nLength == 0 || rtl_isOctetTextEncoding(nEncoding));
203 :
204 167198416 : if ( !nLength )
205 240516 : rtl_string_new( pTarget );
206 : else
207 : {
208 : rtl_String* pTemp;
209 : rtl_UnicodeToTextConverter hConverter;
210 : sal_uInt32 nInfo;
211 : sal_Size nSrcChars;
212 : sal_Size nDestBytes;
213 : sal_Size nNewLen;
214 : sal_Size nNotConvertedChars;
215 : sal_Size nMaxCharLen;
216 :
217 : /* Optimization for UTF-8 - we try to calculate the exact length */
218 : /* For all other encoding we try an good estimation */
219 166957900 : if ( nEncoding == RTL_TEXTENCODING_UTF8 )
220 : {
221 95027440 : nNewLen = rtl_ImplGetFastUTF8ByteLen( pSource, nLength );
222 : /* Includes the string only ASCII, then we could copy
223 : the buffer faster */
224 95027438 : if ( nNewLen == (sal_Size)nLength )
225 : {
226 : sal_Char* pBuffer;
227 95004474 : if ( *pTarget )
228 86051514 : rtl_string_release( *pTarget );
229 95004472 : *pTarget = rtl_string_ImplAlloc( nLength );
230 : OSL_ASSERT(*pTarget != NULL);
231 95004472 : pBuffer = (*pTarget)->buffer;
232 1487146202 : do
233 : {
234 : /* Check ASCII range */
235 : OSL_ENSURE( *pSource <= 127,
236 : "rtl_uString2String() - UTF8 test is encoding is wrong" );
237 :
238 1487146202 : *pBuffer = (sal_Char)(unsigned char)*pSource;
239 1487146202 : pBuffer++;
240 1487146202 : pSource++;
241 1487146202 : nLength--;
242 : }
243 : while ( nLength );
244 190008966 : return true;
245 : }
246 :
247 22964 : nMaxCharLen = 4;
248 : }
249 : else
250 : {
251 : rtl_TextEncodingInfo aTextEncInfo;
252 71930460 : aTextEncInfo.StructSize = sizeof( aTextEncInfo );
253 71930460 : if ( !rtl_getTextEncodingInfo( nEncoding, &aTextEncInfo ) )
254 : {
255 18 : aTextEncInfo.AverageCharSize = 1;
256 18 : aTextEncInfo.MaximumCharSize = 8;
257 : }
258 :
259 71930460 : nNewLen = nLength * static_cast<sal_Size>(aTextEncInfo.AverageCharSize);
260 71930460 : nMaxCharLen = aTextEncInfo.MaximumCharSize;
261 : }
262 :
263 71953424 : nFlags |= RTL_UNICODETOTEXT_FLAGS_FLUSH;
264 71953424 : hConverter = rtl_createUnicodeToTextConverter( nEncoding );
265 :
266 : for (;;)
267 : {
268 71953427 : pTemp = rtl_string_ImplAlloc( nNewLen );
269 : OSL_ASSERT(pTemp != NULL);
270 : nDestBytes = rtl_convertUnicodeToText( hConverter, 0,
271 : pSource, nLength,
272 : pTemp->buffer, nNewLen,
273 : nFlags,
274 71953427 : &nInfo, &nSrcChars );
275 71953427 : if (bCheckErrors && (nInfo & RTL_UNICODETOTEXT_INFO_ERROR) != 0)
276 : {
277 22 : rtl_freeMemory(pTemp);
278 22 : rtl_destroyUnicodeToTextConverter(hConverter);
279 22 : return false;
280 : }
281 :
282 71953405 : if ((nInfo & RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL) == 0)
283 71953402 : break;
284 :
285 : /* Buffer not big enough, try again with enough space */
286 3 : rtl_freeMemory( pTemp );
287 :
288 : /* Try with the max. count of characters with
289 : additional overhead for replacing functionality */
290 3 : nNotConvertedChars = nLength-nSrcChars;
291 3 : nNewLen = nDestBytes+(nNotConvertedChars*nMaxCharLen)+nNotConvertedChars+4;
292 : }
293 :
294 : /* Set the buffer to the correct size or is there to
295 : much overhead, reallocate to the correct size */
296 71953402 : if ( nNewLen > nDestBytes+8 )
297 : {
298 92 : rtl_String* pTemp2 = rtl_string_ImplAlloc( nDestBytes );
299 : OSL_ASSERT(pTemp2 != NULL);
300 92 : rtl_str_ImplCopy( pTemp2->buffer, pTemp->buffer, nDestBytes );
301 92 : rtl_freeMemory( pTemp );
302 92 : pTemp = pTemp2;
303 : }
304 : else
305 : {
306 71953310 : pTemp->length = nDestBytes;
307 71953310 : pTemp->buffer[nDestBytes] = 0;
308 : }
309 :
310 71953402 : rtl_destroyUnicodeToTextConverter( hConverter );
311 71953402 : if ( *pTarget )
312 75668 : rtl_string_release( *pTarget );
313 71953402 : *pTarget = pTemp;
314 :
315 : /* Results the conversion in an empty buffer -
316 : create an empty string */
317 71953402 : if ( pTemp && !nDestBytes )
318 4 : rtl_string_new( pTarget );
319 : }
320 72193918 : return true;
321 : }
322 :
323 81059781 : void SAL_CALL rtl_uString2String( rtl_String** ppThis,
324 : const sal_Unicode* pUStr,
325 : sal_Int32 nULen,
326 : rtl_TextEncoding eTextEncoding,
327 : sal_uInt32 nCvtFlags )
328 : SAL_THROW_EXTERN_C()
329 : {
330 : rtl_impl_convertUStringToString(ppThis, pUStr, nULen, eTextEncoding,
331 81059781 : nCvtFlags, false);
332 81059780 : }
333 :
334 86138638 : sal_Bool SAL_CALL rtl_convertUStringToString(rtl_String ** pTarget,
335 : sal_Unicode const * pSource,
336 : sal_Int32 nLength,
337 : rtl_TextEncoding nEncoding,
338 : sal_uInt32 nFlags)
339 : SAL_THROW_EXTERN_C()
340 : {
341 : return rtl_impl_convertUStringToString(pTarget, pSource, nLength, nEncoding,
342 86138638 : nFlags, true);
343 : }
344 :
345 105198 : void rtl_string_newReplaceFirst(
346 : rtl_String ** newStr, rtl_String * str, char const * from,
347 : sal_Int32 fromLength, char const * to, sal_Int32 toLength,
348 : sal_Int32 * index) SAL_THROW_EXTERN_C()
349 : {
350 : assert(str != 0);
351 : assert(index != 0);
352 : assert(*index >= 0 && *index <= str->length);
353 : assert(fromLength >= 0);
354 : assert(toLength >= 0);
355 : sal_Int32 i = rtl_str_indexOfStr_WithLength(
356 105198 : str->buffer + *index, str->length - *index, from, fromLength);
357 105198 : if (i == -1) {
358 93880 : rtl_string_assign(newStr, str);
359 : } else {
360 : assert(i <= str->length - *index);
361 11318 : i += *index;
362 : assert(fromLength <= str->length);
363 11318 : if (str->length - fromLength > SAL_MAX_INT32 - toLength) {
364 0 : std::abort();
365 : }
366 11318 : sal_Int32 n = str->length - fromLength + toLength;
367 11318 : rtl_string_acquire(str); // in case *newStr == str
368 11318 : rtl_string_new_WithLength(newStr, n);
369 11318 : if (n != 0) {
370 11317 : (*newStr)->length = n;
371 : assert(i >= 0 && i < str->length);
372 11317 : memcpy((*newStr)->buffer, str->buffer, i);
373 11317 : memcpy((*newStr)->buffer + i, to, toLength);
374 : memcpy(
375 11317 : (*newStr)->buffer + i + toLength, str->buffer + i + fromLength,
376 22634 : str->length - i - fromLength);
377 : }
378 11318 : rtl_string_release(str);
379 : }
380 105198 : *index = i;
381 105198 : }
382 :
383 71401 : void rtl_string_newReplaceAll(
384 : rtl_String ** newStr, rtl_String * str, char const * from,
385 : sal_Int32 fromLength, char const * to, sal_Int32 toLength)
386 : SAL_THROW_EXTERN_C()
387 : {
388 71401 : rtl_string_assign(newStr, str);
389 82684 : for (sal_Int32 i = 0;; i += toLength) {
390 : rtl_string_newReplaceFirst(
391 82684 : newStr, *newStr, from, fromLength, to, toLength, &i);
392 82684 : if (i == -1) {
393 71401 : break;
394 : }
395 11283 : }
396 71401 : }
397 :
398 : /* vim:set shiftwidth=4 softtabstop=4 expandtab: */
|