Line data Source code
1 : /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 : /*
3 : * This file is part of the LibreOffice project.
4 : *
5 : * This Source Code Form is subject to the terms of the Mozilla Public
6 : * License, v. 2.0. If a copy of the MPL was not distributed with this
7 : * file, You can obtain one at http://mozilla.org/MPL/2.0/.
8 : *
9 : * This file incorporates work covered by the following license notice:
10 : *
11 : * Licensed to the Apache Software Foundation (ASF) under one or more
12 : * contributor license agreements. See the NOTICE file distributed
13 : * with this work for additional information regarding copyright
14 : * ownership. The ASF licenses this file to you under the Apache
15 : * License, Version 2.0 (the "License"); you may not use this file
16 : * except in compliance with the License. You may obtain a copy of
17 : * the License at http://www.apache.org/licenses/LICENSE-2.0 .
18 : */
19 :
20 : #include "sal/config.h"
21 :
22 : #ifdef _MSC_VER
23 : #pragma warning(disable:4738) // storing 32-bit float result in memory, possible loss of performance
24 : #endif
25 :
26 : #include <cassert>
27 : #include <cstdlib>
28 :
29 : #include <osl/interlck.h>
30 : #include <rtl/alloc.h>
31 : #include <osl/diagnose.h>
32 : #include <rtl/tencinfo.h>
33 :
34 : #include "strimp.hxx"
35 : #include "surrogates.hxx"
36 : #include <rtl/string.h>
37 :
38 : #include "rtl/math.h"
39 :
40 : /* ======================================================================= */
41 :
42 : /* static data to be referenced by all empty strings
43 : * the refCount is predefined to 1 and must never become 0 !
44 : */
45 : static rtl_String const aImplEmpty_rtl_String =
46 : {
47 : SAL_STRING_STATIC_FLAG|1,
48 : /* sal_Int32 refCount; */
49 : 0, /* sal_Int32 length; */
50 : { 0 } /* sal_Char buffer[1]; */
51 : };
52 :
53 : /* ======================================================================= */
54 : /* These macros are for the "poor-man templates" included from
55 : * the strtmpl.cxx just below, used to share code between here and
56 : * ustring.cxx
57 : */
58 : #define IMPL_RTL_STRCODE sal_Char
59 : #define IMPL_RTL_USTRCODE( c ) ((unsigned char)c)
60 : #define IMPL_RTL_STRNAME( n ) rtl_str_ ## n
61 :
62 : #define IMPL_RTL_STRINGNAME( n ) rtl_string_ ## n
63 : #define IMPL_RTL_STRINGDATA rtl_String
64 : #define IMPL_RTL_EMPTYSTRING aImplEmpty_rtl_String
65 :
66 : #if USE_SDT_PROBES
67 : #define RTL_LOG_STRING_BITS 8
68 : #endif
69 :
70 : /* ======================================================================= */
71 :
72 : /* Include String/UString template code */
73 :
74 : #include "strtmpl.cxx"
75 :
76 0 : sal_Int32 SAL_CALL rtl_str_valueOfFloat(sal_Char * pStr, float f)
77 : SAL_THROW_EXTERN_C()
78 : {
79 0 : rtl_String * pResult = NULL;
80 : sal_Int32 nLen;
81 : rtl_math_doubleToString(
82 : &pResult, 0, 0, f, rtl_math_StringFormat_G,
83 : RTL_STR_MAX_VALUEOFFLOAT - RTL_CONSTASCII_LENGTH("-x.E-xxx"), '.', 0, 0,
84 0 : sal_True);
85 0 : nLen = pResult->length;
86 : OSL_ASSERT(nLen < RTL_STR_MAX_VALUEOFFLOAT);
87 0 : memcpy(pStr, pResult->buffer, (nLen + 1) * sizeof(sal_Char));
88 0 : rtl_string_release(pResult);
89 0 : return nLen;
90 : }
91 :
92 0 : sal_Int32 SAL_CALL rtl_str_valueOfDouble(sal_Char * pStr, double d)
93 : SAL_THROW_EXTERN_C()
94 : {
95 0 : rtl_String * pResult = NULL;
96 : sal_Int32 nLen;
97 : rtl_math_doubleToString(
98 : &pResult, 0, 0, d, rtl_math_StringFormat_G,
99 : RTL_STR_MAX_VALUEOFDOUBLE - RTL_CONSTASCII_LENGTH("-x.E-xxx"), '.', 0,
100 0 : 0, sal_True);
101 0 : nLen = pResult->length;
102 : OSL_ASSERT(nLen < RTL_STR_MAX_VALUEOFDOUBLE);
103 0 : memcpy(pStr, pResult->buffer, (nLen + 1) * sizeof(sal_Char));
104 0 : rtl_string_release(pResult);
105 0 : return nLen;
106 : }
107 :
108 0 : float SAL_CALL rtl_str_toFloat(sal_Char const * pStr) SAL_THROW_EXTERN_C()
109 : {
110 0 : return (float) rtl_math_stringToDouble(pStr, pStr + rtl_str_getLength(pStr),
111 0 : '.', 0, 0, 0);
112 : }
113 :
114 79 : double SAL_CALL rtl_str_toDouble(sal_Char const * pStr) SAL_THROW_EXTERN_C()
115 : {
116 79 : return rtl_math_stringToDouble(pStr, pStr + rtl_str_getLength(pStr), '.', 0,
117 79 : 0, 0);
118 : }
119 :
120 : /* ======================================================================= */
121 :
122 4252558 : static int rtl_ImplGetFastUTF8ByteLen( const sal_Unicode* pStr, sal_Int32 nLen )
123 : {
124 : int n;
125 : sal_Unicode c;
126 : sal_uInt32 nUCS4Char;
127 : const sal_Unicode* pEndStr;
128 :
129 4252558 : n = 0;
130 4252558 : pEndStr = pStr+nLen;
131 198083890 : while ( pStr < pEndStr )
132 : {
133 189578774 : c = *pStr;
134 :
135 189578774 : if ( c < 0x80 )
136 189578774 : n++;
137 0 : else if ( c < 0x800 )
138 0 : n += 2;
139 : else
140 : {
141 0 : if ( !SAL_RTL_IS_HIGH_SURROGATE(c) )
142 0 : n += 3;
143 : else
144 : {
145 0 : nUCS4Char = c;
146 :
147 0 : if ( pStr+1 < pEndStr )
148 : {
149 0 : c = *(pStr+1);
150 0 : if ( SAL_RTL_IS_LOW_SURROGATE(c) )
151 : {
152 0 : nUCS4Char = SAL_RTL_COMBINE_SURROGATES(nUCS4Char, c);
153 0 : pStr++;
154 : }
155 : }
156 :
157 0 : if ( nUCS4Char < 0x10000 )
158 0 : n += 3;
159 0 : else if ( nUCS4Char < 0x200000 )
160 0 : n += 4;
161 0 : else if ( nUCS4Char < 0x4000000 )
162 0 : n += 5;
163 : else
164 0 : n += 6;
165 : }
166 : }
167 :
168 189578774 : pStr++;
169 : }
170 :
171 4252558 : return n;
172 : }
173 :
174 : /* ----------------------------------------------------------------------- */
175 :
176 4507595 : bool SAL_CALL rtl_impl_convertUStringToString(rtl_String ** pTarget,
177 : sal_Unicode const * pSource,
178 : sal_Int32 nLength,
179 : rtl_TextEncoding nEncoding,
180 : sal_uInt32 nFlags,
181 : bool bCheckErrors)
182 : {
183 : OSL_ASSERT(pTarget != NULL
184 : && (pSource != NULL || nLength == 0)
185 : && nLength >= 0
186 : && (nLength == 0 || rtl_isOctetTextEncoding(nEncoding)));
187 :
188 4507595 : if ( !nLength )
189 0 : rtl_string_new( pTarget );
190 : else
191 : {
192 : rtl_String* pTemp;
193 : rtl_UnicodeToTextConverter hConverter;
194 : sal_uInt32 nInfo;
195 : sal_Size nSrcChars;
196 : sal_Size nDestBytes;
197 : sal_Size nNewLen;
198 : sal_Size nNotConvertedChars;
199 : sal_Size nMaxCharLen;
200 :
201 : /* Optimization for UTF-8 - we try to calculate the exact length */
202 : /* For all other encoding we try an good estimation */
203 4507595 : if ( nEncoding == RTL_TEXTENCODING_UTF8 )
204 : {
205 4252558 : nNewLen = rtl_ImplGetFastUTF8ByteLen( pSource, nLength );
206 : /* Includes the string only ASCII, then we could copy
207 : the buffer faster */
208 4252558 : if ( nNewLen == (sal_Size)nLength )
209 : {
210 : sal_Char* pBuffer;
211 4252558 : if ( *pTarget )
212 7 : rtl_string_release( *pTarget );
213 4252558 : *pTarget = rtl_string_ImplAlloc( nLength );
214 : OSL_ASSERT(*pTarget != NULL);
215 4252558 : pBuffer = (*pTarget)->buffer;
216 189578774 : do
217 : {
218 : /* Check ASCII range */
219 : OSL_ENSURE( *pSource <= 127,
220 : "rtl_uString2String() - UTF8 test is encoding is wrong" );
221 :
222 189578774 : *pBuffer = (sal_Char)(unsigned char)*pSource;
223 189578774 : pBuffer++;
224 189578774 : pSource++;
225 189578774 : nLength--;
226 : }
227 : while ( nLength );
228 8505116 : return true;
229 : }
230 :
231 0 : nMaxCharLen = 4;
232 : }
233 : else
234 : {
235 : rtl_TextEncodingInfo aTextEncInfo;
236 255037 : aTextEncInfo.StructSize = sizeof( aTextEncInfo );
237 255037 : if ( !rtl_getTextEncodingInfo( nEncoding, &aTextEncInfo ) )
238 : {
239 0 : aTextEncInfo.AverageCharSize = 1;
240 0 : aTextEncInfo.MaximumCharSize = 8;
241 : }
242 :
243 255037 : nNewLen = nLength*aTextEncInfo.AverageCharSize;
244 255037 : nMaxCharLen = aTextEncInfo.MaximumCharSize;
245 : }
246 :
247 255037 : nFlags |= RTL_UNICODETOTEXT_FLAGS_FLUSH;
248 255037 : hConverter = rtl_createUnicodeToTextConverter( nEncoding );
249 :
250 : for (;;)
251 : {
252 255037 : pTemp = rtl_string_ImplAlloc( nNewLen );
253 : OSL_ASSERT(pTemp != NULL);
254 : nDestBytes = rtl_convertUnicodeToText( hConverter, 0,
255 : pSource, nLength,
256 : pTemp->buffer, nNewLen,
257 : nFlags,
258 255037 : &nInfo, &nSrcChars );
259 255037 : if (bCheckErrors && (nInfo & RTL_UNICODETOTEXT_INFO_ERROR) != 0)
260 : {
261 0 : rtl_freeMemory(pTemp);
262 0 : rtl_destroyUnicodeToTextConverter(hConverter);
263 0 : return false;
264 : }
265 :
266 255037 : if ((nInfo & RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL) == 0)
267 255037 : break;
268 :
269 : /* Buffer not big enough, try again with enough space */
270 0 : rtl_freeMemory( pTemp );
271 :
272 : /* Try with the max. count of characters with
273 : additional overhead for replacing functionality */
274 0 : nNotConvertedChars = nLength-nSrcChars;
275 0 : nNewLen = nDestBytes+(nNotConvertedChars*nMaxCharLen)+nNotConvertedChars+4;
276 : }
277 :
278 : /* Set the buffer to the correct size or is there to
279 : much overhead, reallocate to the correct size */
280 255037 : if ( nNewLen > nDestBytes+8 )
281 : {
282 0 : rtl_String* pTemp2 = rtl_string_ImplAlloc( nDestBytes );
283 : OSL_ASSERT(pTemp2 != NULL);
284 0 : rtl_str_ImplCopy( pTemp2->buffer, pTemp->buffer, nDestBytes );
285 0 : rtl_freeMemory( pTemp );
286 0 : pTemp = pTemp2;
287 : }
288 : else
289 : {
290 255037 : pTemp->length = nDestBytes;
291 255037 : pTemp->buffer[nDestBytes] = 0;
292 : }
293 :
294 255037 : rtl_destroyUnicodeToTextConverter( hConverter );
295 255037 : if ( *pTarget )
296 0 : rtl_string_release( *pTarget );
297 255037 : *pTarget = pTemp;
298 :
299 : /* Results the conversion in an empty buffer -
300 : create an empty string */
301 255037 : if ( pTemp && !nDestBytes )
302 0 : rtl_string_new( pTarget );
303 : }
304 255037 : return true;
305 : }
306 :
307 4507588 : void SAL_CALL rtl_uString2String( rtl_String** ppThis,
308 : const sal_Unicode* pUStr,
309 : sal_Int32 nULen,
310 : rtl_TextEncoding eTextEncoding,
311 : sal_uInt32 nCvtFlags )
312 : SAL_THROW_EXTERN_C()
313 : {
314 : rtl_impl_convertUStringToString(ppThis, pUStr, nULen, eTextEncoding,
315 4507588 : nCvtFlags, false);
316 4507588 : }
317 :
318 7 : sal_Bool SAL_CALL rtl_convertUStringToString(rtl_String ** pTarget,
319 : sal_Unicode const * pSource,
320 : sal_Int32 nLength,
321 : rtl_TextEncoding nEncoding,
322 : sal_uInt32 nFlags)
323 : SAL_THROW_EXTERN_C()
324 : {
325 : return rtl_impl_convertUStringToString(pTarget, pSource, nLength, nEncoding,
326 7 : nFlags, true);
327 : }
328 :
329 0 : void rtl_string_newReplaceFirst(
330 : rtl_String ** newStr, rtl_String * str, char const * from,
331 : sal_Int32 fromLength, char const * to, sal_Int32 toLength,
332 : sal_Int32 * index) SAL_THROW_EXTERN_C()
333 : {
334 : assert(str != 0);
335 : assert(index != 0);
336 : assert(*index >= 0 && *index <= str->length);
337 : assert(fromLength >= 0);
338 : assert(toLength >= 0);
339 : sal_Int32 i = rtl_str_indexOfStr_WithLength(
340 0 : str->buffer + *index, str->length - *index, from, fromLength);
341 0 : if (i == -1) {
342 0 : rtl_string_assign(newStr, str);
343 : } else {
344 : assert(i <= str->length - *index);
345 0 : i += *index;
346 : assert(fromLength <= str->length);
347 0 : if (str->length - fromLength > SAL_MAX_INT32 - toLength) {
348 0 : std::abort();
349 : }
350 0 : sal_Int32 n = str->length - fromLength + toLength;
351 0 : rtl_string_acquire(str); // in case *newStr == str
352 0 : rtl_string_new_WithLength(newStr, n);
353 0 : if (n != 0) {
354 0 : (*newStr)->length = n;
355 : assert(i >= 0 && i < str->length);
356 0 : memcpy((*newStr)->buffer, str->buffer, i);
357 0 : memcpy((*newStr)->buffer + i, to, toLength);
358 : memcpy(
359 0 : (*newStr)->buffer + i + toLength, str->buffer + i + fromLength,
360 0 : str->length - i - fromLength);
361 : }
362 0 : rtl_string_release(str);
363 : }
364 0 : *index = i;
365 0 : }
366 :
367 0 : void rtl_string_newReplaceAll(
368 : rtl_String ** newStr, rtl_String * str, char const * from,
369 : sal_Int32 fromLength, char const * to, sal_Int32 toLength)
370 : SAL_THROW_EXTERN_C()
371 : {
372 0 : rtl_string_assign(newStr, str);
373 0 : for (sal_Int32 i = 0;; i += toLength) {
374 : rtl_string_newReplaceFirst(
375 0 : newStr, *newStr, from, fromLength, to, toLength, &i);
376 0 : if (i == -1) {
377 0 : break;
378 : }
379 0 : }
380 0 : }
381 :
382 : /* vim:set shiftwidth=4 softtabstop=4 expandtab: */
|