Line data Source code
1 : /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 : /*
3 : * This file is part of the LibreOffice project.
4 : *
5 : * This Source Code Form is subject to the terms of the Mozilla Public
6 : * License, v. 2.0. If a copy of the MPL was not distributed with this
7 : * file, You can obtain one at http://mozilla.org/MPL/2.0/.
8 : *
9 : * This file incorporates work covered by the following license notice:
10 : *
11 : * Licensed to the Apache Software Foundation (ASF) under one or more
12 : * contributor license agreements. See the NOTICE file distributed
13 : * with this work for additional information regarding copyright
14 : * ownership. The ASF licenses this file to you under the Apache
15 : * License, Version 2.0 (the "License"); you may not use this file
16 : * except in compliance with the License. You may obtain a copy of
17 : * the License at http://www.apache.org/licenses/LICENSE-2.0 .
18 : */
19 :
20 : #include "fastserializer.hxx"
21 :
22 : #include <com/sun/star/xml/sax/FastTokenHandler.hpp>
23 : #include <rtl/math.hxx>
24 : #include <comphelper/processfactory.hxx>
25 : #include <comphelper/sequenceasvector.hxx>
26 :
27 : #include <string.h>
28 :
29 : #if OSL_DEBUG_LEVEL > 0
30 : #include <iostream>
31 : #include <set>
32 : #endif
33 :
34 : using ::comphelper::SequenceAsVector;
35 : using ::com::sun::star::uno::Reference;
36 : using ::com::sun::star::uno::Sequence;
37 : using ::com::sun::star::xml::Attribute;
38 : using ::com::sun::star::io::XOutputStream;
39 :
40 : #define HAS_NAMESPACE(x) ((x & 0xffff0000) != 0)
41 : #define NAMESPACE(x) (x >> 16)
42 : #define TOKEN(x) (x & 0xffff)
43 : // number of characters without terminating 0
44 : #define N_CHARS(string) (SAL_N_ELEMENTS(string) - 1)
45 :
46 : static const char sClosingBracket[] = ">";
47 : static const char sSlashAndClosingBracket[] = "/>";
48 : static const char sColon[] = ":";
49 : static const char sOpeningBracket[] = "<";
50 : static const char sOpeningBracketAndSlash[] = "</";
51 : static const char sQuote[] = "\"";
52 : static const char sEqualSignAndQuote[] = "=\"";
53 : static const char sSpace[] = " ";
54 : static const char sXmlHeader[] = "<?xml version=\"1.0\" encoding=\"UTF-8\" standalone=\"yes\"?>\n";
55 :
56 : namespace sax_fastparser {
57 7446 : FastSaxSerializer::FastSaxSerializer( const css::uno::Reference< css::io::XOutputStream >& xOutputStream )
58 : : maCachedOutputStream()
59 : , maMarkStack()
60 : , mbMarkStackEmpty(true)
61 : , mpDoubleStr(NULL)
62 7446 : , mnDoubleStrCapacity(RTL_STR_MAX_VALUEOFDOUBLE)
63 : {
64 7446 : rtl_string_new_WithLength(&mpDoubleStr, mnDoubleStrCapacity);
65 14892 : mxFastTokenHandler = css::xml::sax::FastTokenHandler::create(
66 7446 : ::comphelper::getProcessComponentContext());
67 : assert(xOutputStream.is()); // cannot do anything without that
68 7446 : maCachedOutputStream.setOutputStream( xOutputStream );
69 7446 : }
70 :
71 14892 : FastSaxSerializer::~FastSaxSerializer()
72 : {
73 7446 : rtl_string_release(mpDoubleStr);
74 7446 : }
75 :
76 7446 : void FastSaxSerializer::startDocument()
77 : {
78 7446 : writeBytes(sXmlHeader, N_CHARS(sXmlHeader));
79 7446 : }
80 :
81 2352 : void FastSaxSerializer::write( double value )
82 : {
83 : rtl_math_doubleToString(
84 : &mpDoubleStr, &mnDoubleStrCapacity, 0, value, rtl_math_StringFormat_G,
85 : RTL_STR_MAX_VALUEOFDOUBLE - RTL_CONSTASCII_LENGTH("-x.E-xxx"), '.', 0,
86 2352 : 0, sal_True);
87 :
88 2352 : write(mpDoubleStr->buffer, mpDoubleStr->length);
89 : // and "clear" the string
90 2352 : mpDoubleStr->length = 0;
91 2352 : mnDoubleStrCapacity = RTL_STR_MAX_VALUEOFDOUBLE;
92 2352 : }
93 :
94 27000 : void FastSaxSerializer::write( const OUString& sOutput, bool bEscape )
95 : {
96 27000 : const sal_Int32 nLength = sOutput.getLength();
97 880740 : for (sal_Int32 i = 0; i < nLength; ++i)
98 : {
99 853740 : const sal_Unicode cUnicode = sOutput[ i ];
100 853740 : const char cChar = cUnicode;
101 853740 : if (cUnicode & 0xff80)
102 : {
103 2346 : write( OString(&cUnicode, 1, RTL_TEXTENCODING_UTF8) );
104 : }
105 851394 : else if(bEscape) switch( cChar )
106 : {
107 60 : case '<': writeBytes( "<", 4 ); break;
108 56 : case '>': writeBytes( ">", 4 ); break;
109 168 : case '&': writeBytes( "&", 5 ); break;
110 284 : case '\'': writeBytes( "'", 6 ); break;
111 876 : case '"': writeBytes( """, 6 ); break;
112 2 : case '\n': writeBytes( " ", 5 ); break;
113 0 : case '\r': writeBytes( " ", 5 ); break;
114 849948 : default: writeBytes( &cChar, 1 ); break;
115 : }
116 : else
117 0 : writeBytes( &cChar, 1 );
118 : }
119 27000 : }
120 :
121 10912 : void FastSaxSerializer::write( const OString& sOutput, bool bEscape )
122 : {
123 10912 : write( sOutput.getStr(), sOutput.getLength(), bEscape );
124 10912 : }
125 :
126 1210492 : void FastSaxSerializer::write( const char* pStr, sal_Int32 nLen, bool bEscape )
127 : {
128 1210492 : if (nLen == -1)
129 389256 : nLen = pStr ? strlen(pStr) : 0;
130 :
131 1210492 : if (!bEscape)
132 : {
133 19854 : writeBytes( pStr, nLen );
134 1230346 : return;
135 : }
136 :
137 10092172 : for (sal_Int32 i = 0; i < nLen; ++i)
138 : {
139 8901534 : char c = pStr[ i ];
140 8901534 : switch( c )
141 : {
142 2 : case '<': writeBytes( "<", 4 ); break;
143 2 : case '>': writeBytes( ">", 4 ); break;
144 4 : case '&': writeBytes( "&", 5 ); break;
145 200 : case '\'': writeBytes( "'", 6 ); break;
146 88 : case '"': writeBytes( """, 6 ); break;
147 20 : case '\n': writeBytes( " ", 5 ); break;
148 0 : case '\r': writeBytes( " ", 5 ); break;
149 8901218 : default: writeBytes( &c, 1 ); break;
150 : }
151 : }
152 : }
153 :
154 7446 : void FastSaxSerializer::endDocument()
155 : {
156 : assert(mbMarkStackEmpty && maMarkStack.empty());
157 7446 : maCachedOutputStream.flush();
158 7446 : }
159 :
160 2331158 : void FastSaxSerializer::writeId( ::sal_Int32 nElement )
161 : {
162 2331158 : if( HAS_NAMESPACE( nElement ) ) {
163 2147536 : writeBytes(mxFastTokenHandler->getUTF8Identifier(NAMESPACE(nElement)));
164 2147536 : writeBytes(sColon, N_CHARS(sColon));
165 2147536 : writeBytes(mxFastTokenHandler->getUTF8Identifier(TOKEN(nElement)));
166 : } else
167 183622 : writeBytes(mxFastTokenHandler->getUTF8Identifier(nElement));
168 2331158 : }
169 :
170 : #ifdef DBG_UTIL
171 : OString FastSaxSerializer::getId( ::sal_Int32 nElement )
172 : {
173 : if (HAS_NAMESPACE(nElement)) {
174 : Sequence<sal_Int8> const ns(
175 : mxFastTokenHandler->getUTF8Identifier(NAMESPACE(nElement)));
176 : Sequence<sal_Int8> const name(
177 : mxFastTokenHandler->getUTF8Identifier(TOKEN(nElement)));
178 : return OString(reinterpret_cast<sal_Char const*>(ns.getConstArray()), ns.getLength())
179 : + OString(sColon, N_CHARS(sColon))
180 : + OString(reinterpret_cast<sal_Char const*>(name.getConstArray()), name.getLength());
181 : } else {
182 : Sequence<sal_Int8> const name(
183 : mxFastTokenHandler->getUTF8Identifier(nElement));
184 : return OString(reinterpret_cast<sal_Char const*>(name.getConstArray()), name.getLength());
185 : }
186 : }
187 : #endif
188 :
189 295900 : void FastSaxSerializer::startFastElement( ::sal_Int32 Element, FastAttributeList* pAttrList )
190 : {
191 295900 : if ( !mbMarkStackEmpty )
192 : {
193 134948 : maCachedOutputStream.flush();
194 134948 : maMarkStack.top()->setCurrentElement( Element );
195 : }
196 :
197 : #ifdef DBG_UTIL
198 : m_DebugStartedElements.push(Element);
199 : #endif
200 :
201 295900 : writeBytes(sOpeningBracket, N_CHARS(sOpeningBracket));
202 :
203 295900 : writeId(Element);
204 295900 : if (pAttrList)
205 35956 : writeFastAttributeList(*pAttrList);
206 : else
207 259944 : writeTokenValueList();
208 :
209 295900 : writeBytes(sClosingBracket, N_CHARS(sClosingBracket));
210 295900 : }
211 :
212 295900 : void FastSaxSerializer::endFastElement( ::sal_Int32 Element )
213 : {
214 : #ifdef DBG_UTIL
215 : assert(!m_DebugStartedElements.empty());
216 : // Well-formedness constraint: Element Type Match
217 : assert(Element == m_DebugStartedElements.top());
218 : m_DebugStartedElements.pop();
219 : #endif
220 :
221 295900 : writeBytes(sOpeningBracketAndSlash, N_CHARS(sOpeningBracketAndSlash));
222 :
223 295900 : writeId(Element);
224 :
225 295900 : writeBytes(sClosingBracket, N_CHARS(sClosingBracket));
226 295900 : }
227 :
228 547426 : void FastSaxSerializer::singleFastElement( ::sal_Int32 Element, FastAttributeList* pAttrList )
229 : {
230 547426 : if ( !mbMarkStackEmpty )
231 : {
232 193910 : maCachedOutputStream.flush();
233 193910 : maMarkStack.top()->setCurrentElement( Element );
234 : }
235 :
236 547426 : writeBytes(sOpeningBracket, N_CHARS(sOpeningBracket));
237 :
238 547426 : writeId(Element);
239 547426 : if (pAttrList)
240 221966 : writeFastAttributeList(*pAttrList);
241 : else
242 325460 : writeTokenValueList();
243 :
244 547426 : writeBytes(sSlashAndClosingBracket, N_CHARS(sSlashAndClosingBracket));
245 547426 : }
246 :
247 6812 : ::com::sun::star::uno::Reference< ::com::sun::star::io::XOutputStream > FastSaxSerializer::getOutputStream()
248 : {
249 6812 : return maCachedOutputStream.getOutputStream();
250 : }
251 :
252 585404 : void FastSaxSerializer::writeTokenValueList()
253 : {
254 : #ifdef DBG_UTIL
255 : ::std::set<OString> DebugAttributes;
256 : #endif
257 967866 : for (size_t j = 0; j < maTokenValues.size(); j++)
258 : {
259 382462 : writeBytes(sSpace, N_CHARS(sSpace));
260 :
261 382462 : sal_Int32 nToken = maTokenValues[j].nToken;
262 382462 : writeId(nToken);
263 :
264 : #ifdef DBG_UTIL
265 : // Well-formedness constraint: Unique Att Spec
266 : OString const nameId(getId(nToken));
267 : assert(DebugAttributes.find(nameId) == DebugAttributes.end());
268 : DebugAttributes.insert(nameId);
269 : #endif
270 :
271 382462 : writeBytes(sEqualSignAndQuote, N_CHARS(sEqualSignAndQuote));
272 :
273 382462 : write(maTokenValues[j].pValue, -1, true);
274 :
275 382462 : writeBytes(sQuote, N_CHARS(sQuote));
276 : }
277 585404 : maTokenValues.clear();
278 585404 : }
279 :
280 257922 : void FastSaxSerializer::writeFastAttributeList(FastAttributeList& rAttrList)
281 : {
282 : #ifdef DBG_UTIL
283 : ::std::set<OString> DebugAttributes;
284 : #endif
285 257922 : const std::vector< sal_Int32 >& Tokens = rAttrList.getFastAttributeTokens();
286 1065894 : for (size_t j = 0; j < Tokens.size(); j++)
287 : {
288 807972 : writeBytes(sSpace, N_CHARS(sSpace));
289 :
290 807972 : sal_Int32 nToken = Tokens[j];
291 807972 : writeId(nToken);
292 :
293 : #ifdef DBG_UTIL
294 : // Well-formedness constraint: Unique Att Spec
295 : OString const nameId(getId(nToken));
296 : assert(DebugAttributes.find(nameId) == DebugAttributes.end());
297 : DebugAttributes.insert(nameId);
298 : #endif
299 :
300 807972 : writeBytes(sEqualSignAndQuote, N_CHARS(sEqualSignAndQuote));
301 :
302 807972 : write(rAttrList.getFastAttributeValue(j), rAttrList.AttributeValueLength(j), true);
303 :
304 807972 : writeBytes(sQuote, N_CHARS(sQuote));
305 : }
306 257922 : }
307 :
308 213072 : void FastSaxSerializer::mark( const Int32Sequence& aOrder )
309 : {
310 213072 : if ( aOrder.hasElements() )
311 : {
312 79022 : boost::shared_ptr< ForMerge > pSort( new ForSort( aOrder ) );
313 79022 : maMarkStack.push( pSort );
314 79022 : maCachedOutputStream.setOutput( pSort );
315 : }
316 : else
317 : {
318 134050 : boost::shared_ptr< ForMerge > pMerge( new ForMerge( ) );
319 134050 : maMarkStack.push( pMerge );
320 134050 : maCachedOutputStream.setOutput( pMerge );
321 : }
322 213072 : mbMarkStackEmpty = false;
323 213072 : }
324 :
325 213072 : void FastSaxSerializer::mergeTopMarks( sax_fastparser::MergeMarksEnum eMergeType )
326 : {
327 : SAL_WARN_IF(mbMarkStackEmpty, "sax", "Empty mark stack - nothing to merge");
328 213072 : if ( mbMarkStackEmpty )
329 46008 : return;
330 :
331 : // flush, so that we get everything in getData()
332 213072 : maCachedOutputStream.flush();
333 :
334 213072 : if ( maMarkStack.size() == 1 && eMergeType != MERGE_MARKS_IGNORE)
335 : {
336 46008 : Sequence<sal_Int8> aSeq( maMarkStack.top()->getData() );
337 46008 : maMarkStack.pop();
338 46008 : mbMarkStackEmpty = true;
339 46008 : maCachedOutputStream.resetOutputToStream();
340 46008 : maCachedOutputStream.writeBytes( aSeq.getConstArray(), aSeq.getLength() );
341 46008 : return;
342 : }
343 :
344 167064 : const Int8Sequence aMerge( maMarkStack.top()->getData() );
345 167064 : maMarkStack.pop();
346 167064 : if (maMarkStack.empty())
347 : {
348 2 : mbMarkStackEmpty = true;
349 2 : maCachedOutputStream.resetOutputToStream();
350 : }
351 : else
352 : {
353 167062 : maCachedOutputStream.setOutput( maMarkStack.top() );
354 : }
355 :
356 167064 : switch ( eMergeType )
357 : {
358 114956 : case MERGE_MARKS_APPEND: maMarkStack.top()->append( aMerge ); break;
359 49550 : case MERGE_MARKS_PREPEND: maMarkStack.top()->prepend( aMerge ); break;
360 2556 : case MERGE_MARKS_POSTPONE: maMarkStack.top()->postpone( aMerge ); break;
361 2 : case MERGE_MARKS_IGNORE : break;
362 :
363 167064 : }
364 : }
365 :
366 4478694 : void FastSaxSerializer::writeBytes( const Sequence< sal_Int8 >& rData )
367 : {
368 4478694 : maCachedOutputStream.writeBytes( rData.getConstArray(), rData.getLength() );
369 4478694 : }
370 :
371 17777518 : void FastSaxSerializer::writeBytes( const char* pStr, size_t nLen )
372 : {
373 17777518 : maCachedOutputStream.writeBytes( reinterpret_cast<const sal_Int8*>(pStr), nLen );
374 17777518 : }
375 :
376 213072 : FastSaxSerializer::Int8Sequence& FastSaxSerializer::ForMerge::getData()
377 : {
378 213072 : merge( maData, maPostponed, true );
379 213072 : maPostponed.realloc( 0 );
380 :
381 213072 : return maData;
382 : }
383 :
384 : #if OSL_DEBUG_LEVEL > 0
385 : void FastSaxSerializer::ForMerge::print( )
386 : {
387 : std::cerr << "Data: ";
388 : for ( sal_Int32 i=0, len=maData.getLength(); i < len; i++ )
389 : {
390 : std::cerr << maData[i];
391 : }
392 :
393 : std::cerr << "\nPostponed: ";
394 : for ( sal_Int32 i=0, len=maPostponed.getLength(); i < len; i++ )
395 : {
396 : std::cerr << maPostponed[i];
397 : }
398 :
399 : std::cerr << "\n";
400 : }
401 : #endif
402 :
403 49534 : void FastSaxSerializer::ForMerge::prepend( const Int8Sequence &rWhat )
404 : {
405 49534 : merge( maData, rWhat, false );
406 49534 : }
407 :
408 866158 : void FastSaxSerializer::ForMerge::append( const Int8Sequence &rWhat )
409 : {
410 866158 : merge( maData, rWhat, true );
411 866158 : }
412 :
413 2556 : void FastSaxSerializer::ForMerge::postpone( const Int8Sequence &rWhat )
414 : {
415 2556 : merge( maPostponed, rWhat, true );
416 2556 : }
417 :
418 1450268 : void FastSaxSerializer::ForMerge::merge( Int8Sequence &rTop, const Int8Sequence &rMerge, bool bAppend )
419 : {
420 1450268 : sal_Int32 nMergeLen = rMerge.getLength();
421 1450268 : if ( nMergeLen > 0 )
422 : {
423 696660 : sal_Int32 nTopLen = rTop.getLength();
424 :
425 696660 : rTop.realloc( nTopLen + nMergeLen );
426 696660 : if ( bAppend )
427 : {
428 : // append the rMerge to the rTop
429 647126 : memcpy( rTop.getArray() + nTopLen, rMerge.getConstArray(), nMergeLen );
430 : }
431 : else
432 : {
433 : // prepend the rMerge to the rTop
434 49534 : memmove( rTop.getArray() + nMergeLen, rTop.getConstArray(), nTopLen );
435 49534 : memcpy( rTop.getArray(), rMerge.getConstArray(), nMergeLen );
436 : }
437 : }
438 1450268 : }
439 :
440 79022 : void FastSaxSerializer::ForMerge::resetData( )
441 : {
442 79022 : maData = Int8Sequence();
443 79022 : }
444 :
445 160390 : void FastSaxSerializer::ForSort::setCurrentElement( sal_Int32 nElement )
446 : {
447 160390 : SequenceAsVector< sal_Int32 > aOrder( maOrder );
448 160390 : if( std::find( aOrder.begin(), aOrder.end(), nElement ) != aOrder.end() )
449 : {
450 151356 : mnCurrentElement = nElement;
451 151356 : if ( maData.find( nElement ) == maData.end() )
452 148070 : maData[ nElement ] = Int8Sequence();
453 160390 : }
454 160390 : }
455 :
456 16 : void FastSaxSerializer::ForSort::prepend( const Int8Sequence &rWhat )
457 : {
458 16 : append( rWhat );
459 16 : }
460 :
461 318948 : void FastSaxSerializer::ForSort::append( const Int8Sequence &rWhat )
462 : {
463 318948 : merge( maData[mnCurrentElement], rWhat, true );
464 318948 : }
465 :
466 79022 : void FastSaxSerializer::ForSort::sort()
467 : {
468 : // Clear the ForMerge data to avoid duplicate items
469 79022 : resetData();
470 :
471 : // Sort it all
472 79022 : std::map< sal_Int32, Int8Sequence >::iterator iter;
473 3720010 : for ( sal_Int32 i=0, len=maOrder.getLength(); i < len; i++ )
474 : {
475 3640988 : iter = maData.find( maOrder[i] );
476 3640988 : if ( iter != maData.end() )
477 148070 : ForMerge::append( iter->second );
478 : }
479 79022 : }
480 :
481 79022 : FastSaxSerializer::Int8Sequence& FastSaxSerializer::ForSort::getData()
482 : {
483 79022 : sort( );
484 79022 : return ForMerge::getData();
485 : }
486 :
487 : #if OSL_DEBUG_LEVEL > 0
488 : void FastSaxSerializer::ForSort::print( )
489 : {
490 : std::map< sal_Int32, Int8Sequence >::iterator iter = maData.begin();
491 : while ( iter != maData.end( ) )
492 : {
493 : std::cerr << "pair: " << iter->first;
494 : for ( sal_Int32 i=0, len=iter->second.getLength(); i < len; ++i )
495 : std::cerr << iter->second[i];
496 : std::cerr << "\n";
497 : ++iter;
498 : }
499 :
500 : sort( );
501 : ForMerge::print();
502 : }
503 : #endif
504 :
505 3291 : } // namespace sax_fastparser
506 :
507 : /* vim:set shiftwidth=4 softtabstop=4 expandtab: */
|