Line data Source code
1 : /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 : /*
3 : * This file is part of the LibreOffice project.
4 : *
5 : * This Source Code Form is subject to the terms of the Mozilla Public
6 : * License, v. 2.0. If a copy of the MPL was not distributed with this
7 : * file, You can obtain one at http://mozilla.org/MPL/2.0/.
8 : *
9 : * This file incorporates work covered by the following license notice:
10 : *
11 : * Licensed to the Apache Software Foundation (ASF) under one or more
12 : * contributor license agreements. See the NOTICE file distributed
13 : * with this work for additional information regarding copyright
14 : * ownership. The ASF licenses this file to you under the Apache
15 : * License, Version 2.0 (the "License"); you may not use this file
16 : * except in compliance with the License. You may obtain a copy of
17 : * the License at http://www.apache.org/licenses/LICENSE-2.0 .
18 : */
19 :
20 : #include "fastserializer.hxx"
21 :
22 : #include <com/sun/star/xml/sax/FastTokenHandler.hpp>
23 : #include <rtl/math.hxx>
24 : #include <sal/log.hxx>
25 : #include <comphelper/processfactory.hxx>
26 : #include <comphelper/sequence.hxx>
27 :
28 : #include <string.h>
29 :
30 : #if OSL_DEBUG_LEVEL > 0
31 : #include <iostream>
32 : #include <set>
33 : #endif
34 :
35 : using ::std::vector;
36 : using ::com::sun::star::uno::Reference;
37 : using ::com::sun::star::uno::Sequence;
38 : using ::com::sun::star::xml::Attribute;
39 : using ::com::sun::star::io::XOutputStream;
40 :
41 : #define HAS_NAMESPACE(x) ((x & 0xffff0000) != 0)
42 : #define NAMESPACE(x) (x >> 16)
43 : #define TOKEN(x) (x & 0xffff)
44 : // number of characters without terminating 0
45 : #define N_CHARS(string) (SAL_N_ELEMENTS(string) - 1)
46 :
47 : static const char sClosingBracket[] = ">";
48 : static const char sSlashAndClosingBracket[] = "/>";
49 : static const char sColon[] = ":";
50 : static const char sOpeningBracket[] = "<";
51 : static const char sOpeningBracketAndSlash[] = "</";
52 : static const char sQuote[] = "\"";
53 : static const char sEqualSignAndQuote[] = "=\"";
54 : static const char sSpace[] = " ";
55 : static const char sXmlHeader[] = "<?xml version=\"1.0\" encoding=\"UTF-8\" standalone=\"yes\"?>\n";
56 :
57 : namespace sax_fastparser {
58 4656 : FastSaxSerializer::FastSaxSerializer( const css::uno::Reference< css::io::XOutputStream >& xOutputStream )
59 : : maCachedOutputStream()
60 : , maMarkStack()
61 : , mbMarkStackEmpty(true)
62 : , mpDoubleStr(NULL)
63 4656 : , mnDoubleStrCapacity(RTL_STR_MAX_VALUEOFDOUBLE)
64 : {
65 4656 : rtl_string_new_WithLength(&mpDoubleStr, mnDoubleStrCapacity);
66 9312 : mxFastTokenHandler = css::xml::sax::FastTokenHandler::create(
67 4656 : ::comphelper::getProcessComponentContext());
68 : assert(xOutputStream.is()); // cannot do anything without that
69 4656 : maCachedOutputStream.setOutputStream( xOutputStream );
70 4656 : }
71 :
72 9312 : FastSaxSerializer::~FastSaxSerializer()
73 : {
74 4656 : rtl_string_release(mpDoubleStr);
75 4656 : }
76 :
77 4656 : void FastSaxSerializer::startDocument()
78 : {
79 4656 : writeBytes(sXmlHeader, N_CHARS(sXmlHeader));
80 4656 : }
81 :
82 1407 : void FastSaxSerializer::write( double value )
83 : {
84 : rtl_math_doubleToString(
85 : &mpDoubleStr, &mnDoubleStrCapacity, 0, value, rtl_math_StringFormat_G,
86 : RTL_STR_MAX_VALUEOFDOUBLE - RTL_CONSTASCII_LENGTH("-x.E-xxx"), '.', 0,
87 1407 : 0, sal_True);
88 :
89 1407 : write(mpDoubleStr->buffer, mpDoubleStr->length);
90 : // and "clear" the string
91 1407 : mpDoubleStr->length = 0;
92 1407 : mnDoubleStrCapacity = RTL_STR_MAX_VALUEOFDOUBLE;
93 1407 : }
94 :
95 16350 : void FastSaxSerializer::write( const OUString& sOutput, bool bEscape )
96 : {
97 16350 : const sal_Int32 nLength = sOutput.getLength();
98 494829 : for (sal_Int32 i = 0; i < nLength; ++i)
99 : {
100 478479 : const sal_Unicode cUnicode = sOutput[ i ];
101 478479 : const char cChar = cUnicode;
102 478479 : if (cUnicode & 0xff80)
103 : {
104 1276 : write( OString(&cUnicode, 1, RTL_TEXTENCODING_UTF8) );
105 : }
106 477203 : else if(bEscape) switch( cChar )
107 : {
108 59 : case '<': writeBytes( "<", 4 ); break;
109 62 : case '>': writeBytes( ">", 4 ); break;
110 119 : case '&': writeBytes( "&", 5 ); break;
111 215 : case '\'': writeBytes( "'", 6 ); break;
112 483 : case '"': writeBytes( """, 6 ); break;
113 1 : case '\n': writeBytes( " ", 5 ); break;
114 0 : case '\r': writeBytes( " ", 5 ); break;
115 476264 : default: writeBytes( &cChar, 1 ); break;
116 : }
117 : else
118 0 : writeBytes( &cChar, 1 );
119 : }
120 16350 : }
121 :
122 6520 : void FastSaxSerializer::write( const OString& sOutput, bool bEscape )
123 : {
124 6520 : write( sOutput.getStr(), sOutput.getLength(), bEscape );
125 6520 : }
126 :
127 705321 : void FastSaxSerializer::write( const char* pStr, sal_Int32 nLen, bool bEscape )
128 : {
129 705321 : if (nLen == -1)
130 258567 : nLen = pStr ? strlen(pStr) : 0;
131 :
132 705321 : if (!bEscape)
133 : {
134 15026 : writeBytes( pStr, nLen );
135 720347 : return;
136 : }
137 :
138 5794582 : for (sal_Int32 i = 0; i < nLen; ++i)
139 : {
140 5104287 : char c = pStr[ i ];
141 5104287 : switch( c )
142 : {
143 3 : case '<': writeBytes( "<", 4 ); break;
144 3 : case '>': writeBytes( ">", 4 ); break;
145 2 : case '&': writeBytes( "&", 5 ); break;
146 100 : case '\'': writeBytes( "'", 6 ); break;
147 48 : case '"': writeBytes( """, 6 ); break;
148 10 : case '\n': writeBytes( " ", 5 ); break;
149 0 : case '\r': writeBytes( " ", 5 ); break;
150 5104121 : default: writeBytes( &c, 1 ); break;
151 : }
152 : }
153 : }
154 :
155 4656 : void FastSaxSerializer::endDocument()
156 : {
157 : assert(mbMarkStackEmpty && maMarkStack.empty());
158 4656 : maCachedOutputStream.flush();
159 4656 : }
160 :
161 1377039 : void FastSaxSerializer::writeId( ::sal_Int32 nElement )
162 : {
163 1377039 : if( HAS_NAMESPACE( nElement ) ) {
164 1235585 : writeBytes(mxFastTokenHandler->getUTF8Identifier(NAMESPACE(nElement)));
165 1235585 : writeBytes(sColon, N_CHARS(sColon));
166 1235585 : writeBytes(mxFastTokenHandler->getUTF8Identifier(TOKEN(nElement)));
167 : } else
168 141454 : writeBytes(mxFastTokenHandler->getUTF8Identifier(nElement));
169 1377039 : }
170 :
171 : #ifdef DBG_UTIL
172 : OString FastSaxSerializer::getId( ::sal_Int32 nElement )
173 : {
174 : if (HAS_NAMESPACE(nElement)) {
175 : Sequence<sal_Int8> const ns(
176 : mxFastTokenHandler->getUTF8Identifier(NAMESPACE(nElement)));
177 : Sequence<sal_Int8> const name(
178 : mxFastTokenHandler->getUTF8Identifier(TOKEN(nElement)));
179 : return OString(reinterpret_cast<sal_Char const*>(ns.getConstArray()), ns.getLength())
180 : + OString(sColon, N_CHARS(sColon))
181 : + OString(reinterpret_cast<sal_Char const*>(name.getConstArray()), name.getLength());
182 : } else {
183 : Sequence<sal_Int8> const name(
184 : mxFastTokenHandler->getUTF8Identifier(nElement));
185 : return OString(reinterpret_cast<sal_Char const*>(name.getConstArray()), name.getLength());
186 : }
187 : }
188 : #endif
189 :
190 183441 : void FastSaxSerializer::startFastElement( ::sal_Int32 Element, FastAttributeList* pAttrList )
191 : {
192 183441 : if ( !mbMarkStackEmpty )
193 : {
194 78589 : maCachedOutputStream.flush();
195 78589 : maMarkStack.top()->setCurrentElement( Element );
196 : }
197 :
198 : #ifdef DBG_UTIL
199 : m_DebugStartedElements.push(Element);
200 : #endif
201 :
202 183441 : writeBytes(sOpeningBracket, N_CHARS(sOpeningBracket));
203 :
204 183441 : writeId(Element);
205 183441 : if (pAttrList)
206 19933 : writeFastAttributeList(*pAttrList);
207 : else
208 163508 : writeTokenValueList();
209 :
210 183441 : writeBytes(sClosingBracket, N_CHARS(sClosingBracket));
211 183441 : }
212 :
213 183441 : void FastSaxSerializer::endFastElement( ::sal_Int32 Element )
214 : {
215 : #ifdef DBG_UTIL
216 : assert(!m_DebugStartedElements.empty());
217 : // Well-formedness constraint: Element Type Match
218 : assert(Element == m_DebugStartedElements.top());
219 : m_DebugStartedElements.pop();
220 : #endif
221 :
222 183441 : writeBytes(sOpeningBracketAndSlash, N_CHARS(sOpeningBracketAndSlash));
223 :
224 183441 : writeId(Element);
225 :
226 183441 : writeBytes(sClosingBracket, N_CHARS(sClosingBracket));
227 183441 : }
228 :
229 318066 : void FastSaxSerializer::singleFastElement( ::sal_Int32 Element, FastAttributeList* pAttrList )
230 : {
231 318066 : if ( !mbMarkStackEmpty )
232 : {
233 114858 : maCachedOutputStream.flush();
234 114858 : maMarkStack.top()->setCurrentElement( Element );
235 : }
236 :
237 318066 : writeBytes(sOpeningBracket, N_CHARS(sOpeningBracket));
238 :
239 318066 : writeId(Element);
240 318066 : if (pAttrList)
241 121543 : writeFastAttributeList(*pAttrList);
242 : else
243 196523 : writeTokenValueList();
244 :
245 318066 : writeBytes(sSlashAndClosingBracket, N_CHARS(sSlashAndClosingBracket));
246 318066 : }
247 :
248 4385 : ::com::sun::star::uno::Reference< ::com::sun::star::io::XOutputStream > FastSaxSerializer::getOutputStream()
249 : {
250 4385 : return maCachedOutputStream.getOutputStream();
251 : }
252 :
253 360031 : void FastSaxSerializer::writeTokenValueList()
254 : {
255 : #ifdef DBG_UTIL
256 : ::std::set<OString> DebugAttributes;
257 : #endif
258 611355 : for (size_t j = 0; j < maTokenValues.size(); j++)
259 : {
260 251324 : writeBytes(sSpace, N_CHARS(sSpace));
261 :
262 251324 : sal_Int32 nToken = maTokenValues[j].nToken;
263 251324 : writeId(nToken);
264 :
265 : #ifdef DBG_UTIL
266 : // Well-formedness constraint: Unique Att Spec
267 : OString const nameId(getId(nToken));
268 : assert(DebugAttributes.find(nameId) == DebugAttributes.end());
269 : DebugAttributes.insert(nameId);
270 : #endif
271 :
272 251324 : writeBytes(sEqualSignAndQuote, N_CHARS(sEqualSignAndQuote));
273 :
274 251324 : write(maTokenValues[j].pValue, -1, true);
275 :
276 251324 : writeBytes(sQuote, N_CHARS(sQuote));
277 : }
278 360031 : maTokenValues.clear();
279 360031 : }
280 :
281 141476 : void FastSaxSerializer::writeFastAttributeList(FastAttributeList& rAttrList)
282 : {
283 : #ifdef DBG_UTIL
284 : ::std::set<OString> DebugAttributes;
285 : #endif
286 141476 : const std::vector< sal_Int32 >& Tokens = rAttrList.getFastAttributeTokens();
287 580303 : for (size_t j = 0; j < Tokens.size(); j++)
288 : {
289 438827 : writeBytes(sSpace, N_CHARS(sSpace));
290 :
291 438827 : sal_Int32 nToken = Tokens[j];
292 438827 : writeId(nToken);
293 :
294 : #ifdef DBG_UTIL
295 : // Well-formedness constraint: Unique Att Spec
296 : OString const nameId(getId(nToken));
297 : SAL_WARN_IF(DebugAttributes.find(nameId) != DebugAttributes.end(), "sax", "Duplicate attribute: " << nameId );
298 : assert(DebugAttributes.find(nameId) == DebugAttributes.end());
299 : DebugAttributes.insert(nameId);
300 : #endif
301 :
302 438827 : writeBytes(sEqualSignAndQuote, N_CHARS(sEqualSignAndQuote));
303 :
304 438827 : write(rAttrList.getFastAttributeValue(j), rAttrList.AttributeValueLength(j), true);
305 :
306 438827 : writeBytes(sQuote, N_CHARS(sQuote));
307 : }
308 141476 : }
309 :
310 121899 : void FastSaxSerializer::mark( const Int32Sequence& aOrder )
311 : {
312 121899 : if ( aOrder.hasElements() )
313 : {
314 45176 : boost::shared_ptr< ForMerge > pSort( new ForSort( aOrder ) );
315 45176 : maMarkStack.push( pSort );
316 45176 : maCachedOutputStream.setOutput( pSort );
317 : }
318 : else
319 : {
320 76723 : boost::shared_ptr< ForMerge > pMerge( new ForMerge( ) );
321 76723 : maMarkStack.push( pMerge );
322 76723 : maCachedOutputStream.setOutput( pMerge );
323 : }
324 121899 : mbMarkStackEmpty = false;
325 121899 : }
326 :
327 121899 : void FastSaxSerializer::mergeTopMarks( sax_fastparser::MergeMarksEnum eMergeType )
328 : {
329 : SAL_WARN_IF(mbMarkStackEmpty, "sax", "Empty mark stack - nothing to merge");
330 121899 : if ( mbMarkStackEmpty )
331 26287 : return;
332 :
333 : // flush, so that we get everything in getData()
334 121899 : maCachedOutputStream.flush();
335 :
336 121899 : if ( maMarkStack.size() == 1 && eMergeType != MERGE_MARKS_IGNORE)
337 : {
338 26287 : Sequence<sal_Int8> aSeq( maMarkStack.top()->getData() );
339 26287 : maMarkStack.pop();
340 26287 : mbMarkStackEmpty = true;
341 26287 : maCachedOutputStream.resetOutputToStream();
342 26287 : maCachedOutputStream.writeBytes( aSeq.getConstArray(), aSeq.getLength() );
343 26287 : return;
344 : }
345 :
346 95612 : const Int8Sequence aMerge( maMarkStack.top()->getData() );
347 95612 : maMarkStack.pop();
348 95612 : if (maMarkStack.empty())
349 : {
350 1 : mbMarkStackEmpty = true;
351 1 : maCachedOutputStream.resetOutputToStream();
352 : }
353 : else
354 : {
355 95611 : maCachedOutputStream.setOutput( maMarkStack.top() );
356 : }
357 :
358 95612 : switch ( eMergeType )
359 : {
360 65813 : case MERGE_MARKS_APPEND: maMarkStack.top()->append( aMerge ); break;
361 28342 : case MERGE_MARKS_PREPEND: maMarkStack.top()->prepend( aMerge ); break;
362 1456 : case MERGE_MARKS_POSTPONE: maMarkStack.top()->postpone( aMerge ); break;
363 1 : case MERGE_MARKS_IGNORE : break;
364 :
365 95612 : }
366 : }
367 :
368 2612624 : void FastSaxSerializer::writeBytes( const Sequence< sal_Int8 >& rData )
369 : {
370 2612624 : maCachedOutputStream.writeBytes( rData.getConstArray(), rData.getLength() );
371 2612624 : }
372 :
373 10277106 : void FastSaxSerializer::writeBytes( const char* pStr, size_t nLen )
374 : {
375 10277106 : maCachedOutputStream.writeBytes( reinterpret_cast<const sal_Int8*>(pStr), nLen );
376 10277106 : }
377 :
378 121899 : FastSaxSerializer::Int8Sequence& FastSaxSerializer::ForMerge::getData()
379 : {
380 121899 : merge( maData, maPostponed, true );
381 121899 : maPostponed.realloc( 0 );
382 :
383 121899 : return maData;
384 : }
385 :
386 : #if OSL_DEBUG_LEVEL > 0
387 : void FastSaxSerializer::ForMerge::print( )
388 : {
389 : std::cerr << "Data: ";
390 : for ( sal_Int32 i=0, len=maData.getLength(); i < len; i++ )
391 : {
392 : std::cerr << maData[i];
393 : }
394 :
395 : std::cerr << "\nPostponed: ";
396 : for ( sal_Int32 i=0, len=maPostponed.getLength(); i < len; i++ )
397 : {
398 : std::cerr << maPostponed[i];
399 : }
400 :
401 : std::cerr << "\n";
402 : }
403 : #endif
404 :
405 28334 : void FastSaxSerializer::ForMerge::prepend( const Int8Sequence &rWhat )
406 : {
407 28334 : merge( maData, rWhat, false );
408 28334 : }
409 :
410 500267 : void FastSaxSerializer::ForMerge::append( const Int8Sequence &rWhat )
411 : {
412 500267 : merge( maData, rWhat, true );
413 500267 : }
414 :
415 1456 : void FastSaxSerializer::ForMerge::postpone( const Int8Sequence &rWhat )
416 : {
417 1456 : merge( maPostponed, rWhat, true );
418 1456 : }
419 :
420 836737 : void FastSaxSerializer::ForMerge::merge( Int8Sequence &rTop, const Int8Sequence &rMerge, bool bAppend )
421 : {
422 836737 : sal_Int32 nMergeLen = rMerge.getLength();
423 836737 : if ( nMergeLen > 0 )
424 : {
425 405348 : sal_Int32 nTopLen = rTop.getLength();
426 :
427 405348 : rTop.realloc( nTopLen + nMergeLen );
428 405348 : if ( bAppend )
429 : {
430 : // append the rMerge to the rTop
431 377014 : memcpy( rTop.getArray() + nTopLen, rMerge.getConstArray(), nMergeLen );
432 : }
433 : else
434 : {
435 : // prepend the rMerge to the rTop
436 28334 : memmove( rTop.getArray() + nMergeLen, rTop.getConstArray(), nTopLen );
437 28334 : memcpy( rTop.getArray(), rMerge.getConstArray(), nMergeLen );
438 : }
439 : }
440 836737 : }
441 :
442 45176 : void FastSaxSerializer::ForMerge::resetData( )
443 : {
444 45176 : maData = Int8Sequence();
445 45176 : }
446 :
447 94136 : void FastSaxSerializer::ForSort::setCurrentElement( sal_Int32 nElement )
448 : {
449 94136 : vector< sal_Int32 > aOrder( comphelper::sequenceToContainer<vector<sal_Int32> >(maOrder) );
450 94136 : if( std::find( aOrder.begin(), aOrder.end(), nElement ) != aOrder.end() )
451 : {
452 88361 : mnCurrentElement = nElement;
453 88361 : if ( maData.find( nElement ) == maData.end() )
454 86371 : maData[ nElement ] = Int8Sequence();
455 94136 : }
456 94136 : }
457 :
458 8 : void FastSaxSerializer::ForSort::prepend( const Int8Sequence &rWhat )
459 : {
460 8 : append( rWhat );
461 8 : }
462 :
463 184781 : void FastSaxSerializer::ForSort::append( const Int8Sequence &rWhat )
464 : {
465 184781 : merge( maData[mnCurrentElement], rWhat, true );
466 184781 : }
467 :
468 45176 : void FastSaxSerializer::ForSort::sort()
469 : {
470 : // Clear the ForMerge data to avoid duplicate items
471 45176 : resetData();
472 :
473 : // Sort it all
474 45176 : std::map< sal_Int32, Int8Sequence >::iterator iter;
475 2125298 : for ( sal_Int32 i=0, len=maOrder.getLength(); i < len; i++ )
476 : {
477 2080122 : iter = maData.find( maOrder[i] );
478 2080122 : if ( iter != maData.end() )
479 86371 : ForMerge::append( iter->second );
480 : }
481 45176 : }
482 :
483 45176 : FastSaxSerializer::Int8Sequence& FastSaxSerializer::ForSort::getData()
484 : {
485 45176 : sort( );
486 45176 : return ForMerge::getData();
487 : }
488 :
489 : #if OSL_DEBUG_LEVEL > 0
490 : void FastSaxSerializer::ForSort::print( )
491 : {
492 : std::map< sal_Int32, Int8Sequence >::iterator iter = maData.begin();
493 : while ( iter != maData.end( ) )
494 : {
495 : std::cerr << "pair: " << iter->first;
496 : for ( sal_Int32 i=0, len=iter->second.getLength(); i < len; ++i )
497 : std::cerr << iter->second[i];
498 : std::cerr << "\n";
499 : ++iter;
500 : }
501 :
502 : sort( );
503 : ForMerge::print();
504 : }
505 : #endif
506 :
507 2841 : } // namespace sax_fastparser
508 :
509 : /* vim:set shiftwidth=4 softtabstop=4 expandtab: */
|