Line data Source code
1 : /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 : /*
3 : * This file is part of the LibreOffice project.
4 : *
5 : * This Source Code Form is subject to the terms of the Mozilla Public
6 : * License, v. 2.0. If a copy of the MPL was not distributed with this
7 : * file, You can obtain one at http://mozilla.org/MPL/2.0/.
8 : *
9 : * This file incorporates work covered by the following license notice:
10 : *
11 : * Licensed to the Apache Software Foundation (ASF) under one or more
12 : * contributor license agreements. See the NOTICE file distributed
13 : * with this work for additional information regarding copyright
14 : * ownership. The ASF licenses this file to you under the Apache
15 : * License, Version 2.0 (the "License"); you may not use this file
16 : * except in compliance with the License. You may obtain a copy of
17 : * the License at http://www.apache.org/licenses/LICENSE-2.0 .
18 : */
19 :
20 : #include "sal/config.h"
21 :
22 : #ifdef _MSC_VER
23 : #pragma warning(disable:4738) // storing 32-bit float result in memory, possible loss of performance
24 : #endif
25 :
26 : #include <cassert>
27 : #include <cstdlib>
28 :
29 : #include <osl/interlck.h>
30 : #include <rtl/alloc.h>
31 : #include <osl/diagnose.h>
32 : #include <rtl/tencinfo.h>
33 :
34 : #include "strimp.hxx"
35 : #include "surrogates.hxx"
36 : #include <rtl/string.h>
37 :
38 : #include "rtl/math.h"
39 : #include "rtl/tencinfo.h"
40 :
41 : /* ======================================================================= */
42 :
43 : /* static data to be referenced by all empty strings
44 : * the refCount is predefined to 1 and must never become 0 !
45 : */
46 : static rtl_String const aImplEmpty_rtl_String =
47 : {
48 : SAL_STRING_STATIC_FLAG|1,
49 : /* sal_Int32 refCount; */
50 : 0, /* sal_Int32 length; */
51 : { 0 } /* sal_Char buffer[1]; */
52 : };
53 :
54 : /* ======================================================================= */
55 :
56 : #define IMPL_RTL_STRCODE sal_Char
57 : #define IMPL_RTL_USTRCODE( c ) ((unsigned char)c)
58 : #define IMPL_RTL_STRNAME( n ) rtl_str_ ## n
59 :
60 : #define IMPL_RTL_STRINGNAME( n ) rtl_string_ ## n
61 : #define IMPL_RTL_STRINGDATA rtl_String
62 : #define IMPL_RTL_EMPTYSTRING aImplEmpty_rtl_String
63 :
64 : #undef RTL_LOG_STRING_NEW
65 : #define RTL_LOG_STRING_NEW(s)
66 : #undef RTL_LOG_STRING_DELETE
67 : #define RTL_LOG_STRING_DELETE(s)
68 :
69 : /* ======================================================================= */
70 :
71 : /* Include String/UString template code */
72 :
73 : #include "strtmpl.cxx"
74 :
75 101 : sal_Int32 SAL_CALL rtl_str_valueOfFloat(sal_Char * pStr, float f)
76 : SAL_THROW_EXTERN_C()
77 : {
78 101 : rtl_String * pResult = NULL;
79 : sal_Int32 nLen;
80 : rtl_math_doubleToString(
81 : &pResult, 0, 0, f, rtl_math_StringFormat_G,
82 : RTL_STR_MAX_VALUEOFFLOAT - RTL_CONSTASCII_LENGTH("-x.E-xxx"), '.', 0, 0,
83 101 : sal_True);
84 101 : nLen = pResult->length;
85 : OSL_ASSERT(nLen < RTL_STR_MAX_VALUEOFFLOAT);
86 101 : memcpy(pStr, pResult->buffer, (nLen + 1) * sizeof(sal_Char));
87 101 : rtl_string_release(pResult);
88 101 : return nLen;
89 : }
90 :
91 715 : sal_Int32 SAL_CALL rtl_str_valueOfDouble(sal_Char * pStr, double d)
92 : SAL_THROW_EXTERN_C()
93 : {
94 715 : rtl_String * pResult = NULL;
95 : sal_Int32 nLen;
96 : rtl_math_doubleToString(
97 : &pResult, 0, 0, d, rtl_math_StringFormat_G,
98 : RTL_STR_MAX_VALUEOFDOUBLE - RTL_CONSTASCII_LENGTH("-x.E-xxx"), '.', 0,
99 715 : 0, sal_True);
100 715 : nLen = pResult->length;
101 : OSL_ASSERT(nLen < RTL_STR_MAX_VALUEOFDOUBLE);
102 715 : memcpy(pStr, pResult->buffer, (nLen + 1) * sizeof(sal_Char));
103 715 : rtl_string_release(pResult);
104 715 : return nLen;
105 : }
106 :
107 0 : float SAL_CALL rtl_str_toFloat(sal_Char const * pStr) SAL_THROW_EXTERN_C()
108 : {
109 0 : return (float) rtl_math_stringToDouble(pStr, pStr + rtl_str_getLength(pStr),
110 0 : '.', 0, 0, 0);
111 : }
112 :
113 14775 : double SAL_CALL rtl_str_toDouble(sal_Char const * pStr) SAL_THROW_EXTERN_C()
114 : {
115 14775 : return rtl_math_stringToDouble(pStr, pStr + rtl_str_getLength(pStr), '.', 0,
116 14775 : 0, 0);
117 : }
118 :
119 : /* ======================================================================= */
120 :
121 5526583 : static int rtl_ImplGetFastUTF8ByteLen( const sal_Unicode* pStr, sal_Int32 nLen )
122 : {
123 : int n;
124 : sal_Unicode c;
125 : sal_uInt32 nUCS4Char;
126 : const sal_Unicode* pEndStr;
127 :
128 5526583 : n = 0;
129 5526583 : pEndStr = pStr+nLen;
130 137051920 : while ( pStr < pEndStr )
131 : {
132 125998754 : c = *pStr;
133 :
134 125998754 : if ( c < 0x80 )
135 125982529 : n++;
136 16225 : else if ( c < 0x800 )
137 1550 : n += 2;
138 : else
139 : {
140 14675 : if ( !SAL_RTL_IS_HIGH_SURROGATE(c) )
141 14675 : n += 3;
142 : else
143 : {
144 0 : nUCS4Char = c;
145 :
146 0 : if ( pStr+1 < pEndStr )
147 : {
148 0 : c = *(pStr+1);
149 0 : if ( SAL_RTL_IS_LOW_SURROGATE(c) )
150 : {
151 0 : nUCS4Char = SAL_RTL_COMBINE_SURROGATES(nUCS4Char, c);
152 0 : pStr++;
153 : }
154 : }
155 :
156 0 : if ( nUCS4Char < 0x10000 )
157 0 : n += 3;
158 0 : else if ( nUCS4Char < 0x200000 )
159 0 : n += 4;
160 0 : else if ( nUCS4Char < 0x4000000 )
161 0 : n += 5;
162 : else
163 0 : n += 6;
164 : }
165 : }
166 :
167 125998754 : pStr++;
168 : }
169 :
170 5526583 : return n;
171 : }
172 :
173 : /* ----------------------------------------------------------------------- */
174 :
175 6515089 : sal_Bool SAL_CALL rtl_impl_convertUStringToString(rtl_String ** pTarget,
176 : sal_Unicode const * pSource,
177 : sal_Int32 nLength,
178 : rtl_TextEncoding nEncoding,
179 : sal_uInt32 nFlags,
180 : sal_Bool bCheckErrors)
181 : {
182 : OSL_ASSERT(pTarget != NULL
183 : && (pSource != NULL || nLength == 0)
184 : && nLength >= 0
185 : && (nLength == 0 || rtl_isOctetTextEncoding(nEncoding)));
186 :
187 6515089 : if ( !nLength )
188 285338 : rtl_string_new( pTarget );
189 : else
190 : {
191 : rtl_String* pTemp;
192 : rtl_UnicodeToTextConverter hConverter;
193 : sal_uInt32 nInfo;
194 : sal_Size nSrcChars;
195 : sal_Size nDestBytes;
196 : sal_Size nNewLen;
197 : sal_Size nNotConvertedChars;
198 : sal_Size nMaxCharLen;
199 :
200 : /* Optimization for UTF-8 - we try to calculate the exact length */
201 : /* For all other encoding we try an good estimation */
202 6229751 : if ( nEncoding == RTL_TEXTENCODING_UTF8 )
203 : {
204 5526584 : nNewLen = rtl_ImplGetFastUTF8ByteLen( pSource, nLength );
205 : /* Includes the string only ASCII, then we could copy
206 : the buffer faster */
207 5526583 : if ( nNewLen == (sal_Size)nLength )
208 : {
209 : IMPL_RTL_STRCODE* pBuffer;
210 5520949 : if ( *pTarget )
211 588906 : IMPL_RTL_STRINGNAME( release )( *pTarget );
212 5520949 : *pTarget = IMPL_RTL_STRINGNAME( ImplAlloc )( nLength );
213 : OSL_ASSERT(*pTarget != NULL);
214 5520950 : pBuffer = (*pTarget)->buffer;
215 125979404 : do
216 : {
217 : /* Check ASCII range */
218 : OSL_ENSURE( *pSource <= 127,
219 : "rtl_uString2String() - UTF8 test is encoding is wrong" );
220 :
221 125979404 : *pBuffer = (IMPL_RTL_STRCODE)(unsigned char)*pSource;
222 125979404 : pBuffer++;
223 125979404 : pSource++;
224 125979404 : nLength--;
225 : }
226 : while ( nLength );
227 11041903 : return sal_True;
228 : }
229 :
230 5634 : nMaxCharLen = 4;
231 : }
232 : else
233 : {
234 : rtl_TextEncodingInfo aTextEncInfo;
235 703167 : aTextEncInfo.StructSize = sizeof( aTextEncInfo );
236 703167 : if ( !rtl_getTextEncodingInfo( nEncoding, &aTextEncInfo ) )
237 : {
238 129 : aTextEncInfo.AverageCharSize = 1;
239 129 : aTextEncInfo.MaximumCharSize = 8;
240 : }
241 :
242 703167 : nNewLen = nLength*aTextEncInfo.AverageCharSize;
243 703167 : nMaxCharLen = aTextEncInfo.MaximumCharSize;
244 : }
245 :
246 708801 : nFlags |= RTL_UNICODETOTEXT_FLAGS_FLUSH;
247 708801 : hConverter = rtl_createUnicodeToTextConverter( nEncoding );
248 :
249 : for (;;)
250 : {
251 708803 : pTemp = IMPL_RTL_STRINGNAME( ImplAlloc )( nNewLen );
252 : OSL_ASSERT(pTemp != NULL);
253 : nDestBytes = rtl_convertUnicodeToText( hConverter, 0,
254 : pSource, nLength,
255 : pTemp->buffer, nNewLen,
256 : nFlags,
257 708803 : &nInfo, &nSrcChars );
258 708803 : if (bCheckErrors && (nInfo & RTL_UNICODETOTEXT_INFO_ERROR) != 0)
259 : {
260 3 : rtl_freeMemory(pTemp);
261 3 : rtl_destroyUnicodeToTextConverter(hConverter);
262 3 : return sal_False;
263 : }
264 :
265 708800 : if ((nInfo & RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL) == 0)
266 708798 : break;
267 :
268 : /* Buffer not big enough, try again with enough space */
269 2 : rtl_freeMemory( pTemp );
270 :
271 : /* Try with the max. count of characters with
272 : additional overhead for replacing functionality */
273 2 : nNotConvertedChars = nLength-nSrcChars;
274 2 : nNewLen = nDestBytes+(nNotConvertedChars*nMaxCharLen)+nNotConvertedChars+4;
275 : }
276 :
277 : /* Set the buffer to the correct size or is there to
278 : much overhead, reallocate to the correct size */
279 708798 : if ( nNewLen > nDestBytes+8 )
280 : {
281 19 : rtl_String* pTemp2 = IMPL_RTL_STRINGNAME( ImplAlloc )( nDestBytes );
282 : OSL_ASSERT(pTemp2 != NULL);
283 19 : rtl_str_ImplCopy( pTemp2->buffer, pTemp->buffer, nDestBytes );
284 19 : rtl_freeMemory( pTemp );
285 19 : pTemp = pTemp2;
286 : }
287 : else
288 : {
289 708779 : pTemp->length = nDestBytes;
290 708779 : pTemp->buffer[nDestBytes] = 0;
291 : }
292 :
293 708798 : rtl_destroyUnicodeToTextConverter( hConverter );
294 708798 : if ( *pTarget )
295 68195 : IMPL_RTL_STRINGNAME( release )( *pTarget );
296 708798 : *pTarget = pTemp;
297 :
298 : /* Results the conversion in an empty buffer -
299 : create an empty string */
300 708798 : if ( pTemp && !nDestBytes )
301 3 : rtl_string_new( pTarget );
302 : }
303 994136 : return sal_True;
304 : }
305 :
306 5847790 : void SAL_CALL rtl_uString2String( rtl_String** ppThis,
307 : const sal_Unicode* pUStr,
308 : sal_Int32 nULen,
309 : rtl_TextEncoding eTextEncoding,
310 : sal_uInt32 nCvtFlags )
311 : SAL_THROW_EXTERN_C()
312 : {
313 : rtl_impl_convertUStringToString(ppThis, pUStr, nULen, eTextEncoding,
314 5847790 : nCvtFlags, sal_False);
315 5847790 : }
316 :
317 667299 : sal_Bool SAL_CALL rtl_convertUStringToString(rtl_String ** pTarget,
318 : sal_Unicode const * pSource,
319 : sal_Int32 nLength,
320 : rtl_TextEncoding nEncoding,
321 : sal_uInt32 nFlags)
322 : SAL_THROW_EXTERN_C()
323 : {
324 : return rtl_impl_convertUStringToString(pTarget, pSource, nLength, nEncoding,
325 667299 : nFlags, sal_True);
326 : }
327 :
328 96028 : void rtl_string_newReplaceFirst(
329 : rtl_String ** newStr, rtl_String * str, char const * from,
330 : sal_Int32 fromLength, char const * to, sal_Int32 toLength,
331 : sal_Int32 * index) SAL_THROW_EXTERN_C()
332 : {
333 : assert(str != 0);
334 : assert(index != 0);
335 : assert(*index >= 0 && *index <= str->length);
336 : assert(fromLength >= 0);
337 : assert(toLength >= 0);
338 : sal_Int32 i = rtl_str_indexOfStr_WithLength(
339 96028 : str->buffer + *index, str->length - *index, from, fromLength);
340 96028 : if (i == -1) {
341 82089 : rtl_string_assign(newStr, str);
342 : } else {
343 : assert(i <= str->length - *index);
344 13939 : i += *index;
345 : assert(fromLength <= str->length);
346 13939 : if (str->length - fromLength > SAL_MAX_INT32 - toLength) {
347 0 : std::abort();
348 : }
349 13939 : sal_Int32 n = str->length - fromLength + toLength;
350 13939 : rtl_string_acquire(str); // in case *newStr == str
351 13939 : rtl_string_new_WithLength(newStr, n);
352 13939 : if (n != 0) {
353 13938 : (*newStr)->length = n;
354 : assert(i >= 0 && i < str->length);
355 13938 : memcpy((*newStr)->buffer, str->buffer, i);
356 13938 : memcpy((*newStr)->buffer + i, to, toLength);
357 : memcpy(
358 13938 : (*newStr)->buffer + i + toLength, str->buffer + i + fromLength,
359 27876 : str->length - i - fromLength);
360 : }
361 13939 : rtl_string_release(str);
362 : }
363 96028 : *index = i;
364 96028 : }
365 :
366 49251 : void rtl_string_newReplaceAll(
367 : rtl_String ** newStr, rtl_String * str, char const * from,
368 : sal_Int32 fromLength, char const * to, sal_Int32 toLength)
369 : SAL_THROW_EXTERN_C()
370 : {
371 49251 : rtl_string_assign(newStr, str);
372 62758 : for (sal_Int32 i = 0;; i += toLength) {
373 : rtl_string_newReplaceFirst(
374 62758 : newStr, *newStr, from, fromLength, to, toLength, &i);
375 62758 : if (i == -1) {
376 49251 : break;
377 : }
378 13507 : }
379 49251 : }
380 :
381 : /* vim:set shiftwidth=4 softtabstop=4 expandtab: */
|