Line data Source code
1 : /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 : /*
3 : * This file is part of the LibreOffice project.
4 : *
5 : * This Source Code Form is subject to the terms of the Mozilla Public
6 : * License, v. 2.0. If a copy of the MPL was not distributed with this
7 : * file, You can obtain one at http://mozilla.org/MPL/2.0/.
8 : *
9 : * This file incorporates work covered by the following license notice:
10 : *
11 : * Licensed to the Apache Software Foundation (ASF) under one or more
12 : * contributor license agreements. See the NOTICE file distributed
13 : * with this work for additional information regarding copyright
14 : * ownership. The ASF licenses this file to you under the Apache
15 : * License, Version 2.0 (the "License"); you may not use this file
16 : * except in compliance with the License. You may obtain a copy of
17 : * the License at http://www.apache.org/licenses/LICENSE-2.0 .
18 : */
19 :
20 : #include "fastserializer.hxx"
21 : #include <rtl/ustrbuf.hxx>
22 :
23 : #include <comphelper/sequenceasvector.hxx>
24 :
25 : #include <com/sun/star/xml/Attribute.hpp>
26 : #include <com/sun/star/xml/FastAttribute.hpp>
27 : #include <com/sun/star/xml/sax/XFastAttributeList.hpp>
28 :
29 : #include <string.h>
30 :
31 : #if OSL_DEBUG_LEVEL > 0
32 : #include <iostream>
33 : #include <set>
34 : #endif
35 :
36 : using ::comphelper::SequenceAsVector;
37 : using ::com::sun::star::uno::Reference;
38 : using ::com::sun::star::uno::RuntimeException;
39 : using ::com::sun::star::uno::Sequence;
40 : using ::com::sun::star::uno::toUnoSequence;
41 : using ::com::sun::star::xml::FastAttribute;
42 : using ::com::sun::star::xml::Attribute;
43 : using ::com::sun::star::xml::sax::SAXException;
44 : using ::com::sun::star::xml::sax::XFastAttributeList;
45 : using ::com::sun::star::io::XOutputStream;
46 : using ::com::sun::star::io::NotConnectedException;
47 : using ::com::sun::star::io::IOException;
48 : using ::com::sun::star::io::BufferSizeExceededException;
49 :
50 : #define HAS_NAMESPACE(x) ((x & 0xffff0000) != 0)
51 : #define NAMESPACE(x) (x >> 16)
52 : #define TOKEN(x) (x & 0xffff)
53 :
54 : namespace sax_fastparser {
55 2014 : FastSaxSerializer::FastSaxSerializer( )
56 : : mxOutputStream()
57 : , mxFastTokenHandler()
58 : , maMarkStack()
59 : , maClosingBracket((const sal_Int8 *)">", 1)
60 : , maSlashAndClosingBracket((const sal_Int8 *)"/>", 2)
61 : , maColon((const sal_Int8 *)":", 1)
62 : , maOpeningBracket((const sal_Int8 *)"<", 1)
63 : , maOpeningBracketAndSlash((const sal_Int8 *)"</", 2)
64 : , maQuote((const sal_Int8 *)"\"", 1)
65 : , maEqualSignAndQuote((const sal_Int8 *)"=\"", 2)
66 2014 : , maSpace((const sal_Int8 *)" ", 1)
67 : {
68 2014 : }
69 2014 : FastSaxSerializer::~FastSaxSerializer() {}
70 :
71 2014 : void SAL_CALL FastSaxSerializer::startDocument( ) throw (SAXException, RuntimeException)
72 : {
73 : assert(mxOutputStream.is()); // cannot do anything without that
74 2014 : if (!mxOutputStream.is())
75 2014 : return;
76 2014 : rtl::ByteSequence aXmlHeader((const sal_Int8*) "<?xml version=\"1.0\" encoding=\"UTF-8\" standalone=\"yes\"?>\n", 56);
77 2014 : writeBytes(toUnoSequence(aXmlHeader));
78 : }
79 :
80 391682 : OUString FastSaxSerializer::escapeXml( const OUString& s )
81 : {
82 391682 : OUStringBuffer sBuf( s.getLength() );
83 391682 : const sal_Unicode* pStr = s.getStr();
84 391682 : sal_Int32 nLen = s.getLength();
85 3402549 : for( sal_Int32 i = 0; i < nLen; ++i)
86 : {
87 3010867 : sal_Unicode c = pStr[ i ];
88 3010867 : switch( c )
89 : {
90 17 : case '<': sBuf.appendAscii( "<" ); break;
91 16 : case '>': sBuf.appendAscii( ">" ); break;
92 79 : case '&': sBuf.appendAscii( "&" ); break;
93 165 : case '\'': sBuf.appendAscii( "'" ); break;
94 399 : case '"': sBuf.appendAscii( """ ); break;
95 3 : case '\n': sBuf.appendAscii( " " ); break;
96 0 : case '\r': sBuf.appendAscii( " " ); break;
97 3010188 : default: sBuf.append( c ); break;
98 : }
99 : }
100 391682 : return sBuf.makeStringAndClear();
101 : }
102 :
103 395423 : void FastSaxSerializer::write( const OUString& s )
104 : {
105 395423 : OString sOutput( OUStringToOString( s, RTL_TEXTENCODING_UTF8 ) );
106 : writeBytes( Sequence< sal_Int8 >(
107 395423 : reinterpret_cast< const sal_Int8*>( sOutput.getStr() ),
108 790846 : sOutput.getLength() ) );
109 395423 : }
110 :
111 2014 : void SAL_CALL FastSaxSerializer::endDocument( ) throw (SAXException, RuntimeException)
112 : {
113 2014 : if (!mxOutputStream.is())
114 0 : return;
115 : }
116 :
117 759613 : void SAL_CALL FastSaxSerializer::writeId( ::sal_Int32 nElement )
118 : {
119 759613 : if( HAS_NAMESPACE( nElement ) ) {
120 717286 : writeBytes(mxFastTokenHandler->getUTF8Identifier(NAMESPACE(nElement)));
121 717286 : writeBytes(toUnoSequence(maColon));
122 717286 : writeBytes(mxFastTokenHandler->getUTF8Identifier(TOKEN(nElement)));
123 : } else
124 42327 : writeBytes(mxFastTokenHandler->getUTF8Identifier(nElement));
125 759613 : }
126 :
127 : #ifdef DBG_UTIL
128 : OString SAL_CALL FastSaxSerializer::getId( ::sal_Int32 nElement )
129 : {
130 : if (HAS_NAMESPACE(nElement)) {
131 : Sequence<sal_Int8> const ns(
132 : mxFastTokenHandler->getUTF8Identifier(NAMESPACE(nElement)));
133 : Sequence<sal_Int8> const name(
134 : mxFastTokenHandler->getUTF8Identifier(TOKEN(nElement)));
135 : return OString(reinterpret_cast<sal_Char const*>(ns.getConstArray()), ns.getLength())
136 : + OString(reinterpret_cast<sal_Char const*>(maColon.getConstArray()), maColon.getLength())
137 : + OString(reinterpret_cast<sal_Char const*>(name.getConstArray()), name.getLength());
138 : } else {
139 : Sequence<sal_Int8> const name(
140 : mxFastTokenHandler->getUTF8Identifier(nElement));
141 : return OString(reinterpret_cast<sal_Char const*>(name.getConstArray()), name.getLength());
142 : }
143 : }
144 : #endif
145 :
146 94565 : void SAL_CALL FastSaxSerializer::startFastElement( ::sal_Int32 Element, const Reference< XFastAttributeList >& Attribs )
147 : throw (SAXException, RuntimeException)
148 : {
149 94565 : if (!mxOutputStream.is())
150 94565 : return;
151 :
152 94565 : if ( !maMarkStack.empty() )
153 49759 : maMarkStack.top()->setCurrentElement( Element );
154 :
155 : #ifdef DBG_UTIL
156 : m_DebugStartedElements.push(Element);
157 : #endif
158 :
159 94565 : writeBytes(toUnoSequence(maOpeningBracket));
160 :
161 94565 : writeId(Element);
162 94565 : writeFastAttributeList(Attribs);
163 :
164 94565 : writeBytes(toUnoSequence(maClosingBracket));
165 : }
166 :
167 94565 : void SAL_CALL FastSaxSerializer::endFastElement( ::sal_Int32 Element )
168 : throw (SAXException, RuntimeException)
169 : {
170 94565 : if (!mxOutputStream.is())
171 94565 : return;
172 :
173 : #ifdef DBG_UTIL
174 : assert(!m_DebugStartedElements.empty());
175 : // Well-formedness constraint: Element Type Match
176 : assert(Element == m_DebugStartedElements.top());
177 : m_DebugStartedElements.pop();
178 : #endif
179 :
180 94565 : writeBytes(toUnoSequence(maOpeningBracketAndSlash));
181 :
182 94565 : writeId(Element);
183 :
184 94565 : writeBytes(toUnoSequence(maClosingBracket));
185 : }
186 :
187 186891 : void SAL_CALL FastSaxSerializer::singleFastElement( ::sal_Int32 Element, const Reference< XFastAttributeList >& Attribs )
188 : throw (SAXException, RuntimeException)
189 : {
190 186891 : if (!mxOutputStream.is())
191 186891 : return;
192 :
193 186891 : if ( !maMarkStack.empty() )
194 73145 : maMarkStack.top()->setCurrentElement( Element );
195 :
196 186891 : writeBytes(toUnoSequence(maOpeningBracket));
197 :
198 186891 : writeId(Element);
199 186891 : writeFastAttributeList(Attribs);
200 :
201 186891 : writeBytes(toUnoSequence(maSlashAndClosingBracket));
202 : }
203 :
204 12304 : void SAL_CALL FastSaxSerializer::characters( const OUString& aChars )
205 : throw (SAXException, RuntimeException)
206 : {
207 12304 : if (!mxOutputStream.is())
208 12304 : return;
209 :
210 12304 : write( aChars );
211 : }
212 :
213 2014 : void SAL_CALL FastSaxSerializer::setOutputStream( const ::com::sun::star::uno::Reference< ::com::sun::star::io::XOutputStream >& xOutputStream )
214 : throw (::com::sun::star::uno::RuntimeException)
215 : {
216 2014 : mxOutputStream = xOutputStream;
217 : assert(mxOutputStream.is()); // cannot do anything without that
218 2014 : }
219 :
220 2014 : void SAL_CALL FastSaxSerializer::setFastTokenHandler( const ::com::sun::star::uno::Reference< ::com::sun::star::xml::sax::XFastTokenHandler >& xFastTokenHandler )
221 : throw (::com::sun::star::uno::RuntimeException)
222 : {
223 2014 : mxFastTokenHandler = xFastTokenHandler;
224 2014 : }
225 281456 : void FastSaxSerializer::writeFastAttributeList( const Reference< XFastAttributeList >& Attribs )
226 : {
227 : #ifdef DBG_UTIL
228 : ::std::set<OUString> DebugAttributes;
229 : #endif
230 281456 : Sequence< Attribute > aAttrSeq = Attribs->getUnknownAttributes();
231 281456 : const Attribute *pAttr = aAttrSeq.getConstArray();
232 281456 : sal_Int32 nAttrLength = aAttrSeq.getLength();
233 281456 : for (sal_Int32 i = 0; i < nAttrLength; i++)
234 : {
235 0 : writeBytes(toUnoSequence(maSpace));
236 :
237 0 : OUString const& rAttrName(pAttr[i].Name);
238 : #ifdef DBG_UTIL
239 : // Well-formedness constraint: Unique Att Spec
240 : assert(DebugAttributes.find(rAttrName) == DebugAttributes.end());
241 : DebugAttributes.insert(rAttrName);
242 : #endif
243 0 : write(rAttrName);
244 0 : writeBytes(toUnoSequence(maEqualSignAndQuote));
245 0 : write(escapeXml(pAttr[i].Value));
246 0 : writeBytes(toUnoSequence(maQuote));
247 : }
248 :
249 562912 : Sequence< FastAttribute > aFastAttrSeq = Attribs->getFastAttributes();
250 281456 : const FastAttribute *pFastAttr = aFastAttrSeq.getConstArray();
251 281456 : sal_Int32 nFastAttrLength = aFastAttrSeq.getLength();
252 664575 : for (sal_Int32 j = 0; j < nFastAttrLength; j++)
253 : {
254 383119 : writeBytes(toUnoSequence(maSpace));
255 :
256 383119 : sal_Int32 nToken = pFastAttr[j].Token;
257 383119 : writeId(nToken);
258 :
259 : #ifdef DBG_UTIL
260 : // Well-formedness constraint: Unique Att Spec
261 : OUString const name(OStringToOUString(getId(nToken),
262 : RTL_TEXTENCODING_UTF8));
263 : assert(DebugAttributes.find(name) == DebugAttributes.end());
264 : DebugAttributes.insert(name);
265 : #endif
266 :
267 383119 : writeBytes(toUnoSequence(maEqualSignAndQuote));
268 :
269 383119 : write(escapeXml(Attribs->getValue(pFastAttr[j].Token)));
270 :
271 383119 : writeBytes(toUnoSequence(maQuote));
272 281456 : }
273 281456 : }
274 :
275 70700 : void FastSaxSerializer::mark( Int32Sequence aOrder )
276 : {
277 70700 : if ( aOrder.hasElements() )
278 : {
279 20911 : boost::shared_ptr< ForMerge > pSort( new ForSort( aOrder ) );
280 20911 : maMarkStack.push( pSort );
281 : }
282 : else
283 : {
284 49789 : boost::shared_ptr< ForMerge > pMerge( new ForMerge( ) );
285 49789 : maMarkStack.push( pMerge );
286 : }
287 70700 : }
288 :
289 70700 : void FastSaxSerializer::mergeTopMarks( sax_fastparser::MergeMarksEnum eMergeType )
290 : {
291 70700 : if ( maMarkStack.empty() )
292 13634 : return;
293 :
294 70700 : if ( maMarkStack.size() == 1 )
295 : {
296 13634 : mxOutputStream->writeBytes( maMarkStack.top()->getData() );
297 13634 : maMarkStack.pop();
298 13634 : return;
299 : }
300 :
301 57066 : const Int8Sequence aMerge( maMarkStack.top()->getData() );
302 57066 : maMarkStack.pop();
303 :
304 57066 : switch ( eMergeType )
305 : {
306 37912 : case MERGE_MARKS_APPEND: maMarkStack.top()->append( aMerge ); break;
307 18431 : case MERGE_MARKS_PREPEND: maMarkStack.top()->prepend( aMerge ); break;
308 723 : case MERGE_MARKS_POSTPONE: maMarkStack.top()->postpone( aMerge ); break;
309 57066 : }
310 : }
311 :
312 4493021 : void FastSaxSerializer::writeBytes( const Sequence< ::sal_Int8 >& aData ) throw ( NotConnectedException, BufferSizeExceededException, IOException, RuntimeException )
313 : {
314 4493021 : if ( maMarkStack.empty() )
315 2854091 : mxOutputStream->writeBytes( aData );
316 : else
317 1638930 : maMarkStack.top()->append( aData );
318 4493021 : }
319 :
320 70700 : FastSaxSerializer::Int8Sequence& FastSaxSerializer::ForMerge::getData()
321 : {
322 70700 : merge( maData, maPostponed, true );
323 70700 : maPostponed.realloc( 0 );
324 :
325 70700 : return maData;
326 : }
327 :
328 : #if OSL_DEBUG_LEVEL > 0
329 : void FastSaxSerializer::ForMerge::print( )
330 : {
331 : std::cerr << "Data: ";
332 : for ( sal_Int32 i=0, len=maData.getLength(); i < len; i++ )
333 : {
334 : std::cerr << maData[i];
335 : }
336 :
337 : std::cerr << "\nPostponed: ";
338 : for ( sal_Int32 i=0, len=maPostponed.getLength(); i < len; i++ )
339 : {
340 : std::cerr << maPostponed[i];
341 : }
342 :
343 : std::cerr << "\n";
344 : }
345 : #endif
346 :
347 18431 : void FastSaxSerializer::ForMerge::prepend( const Int8Sequence &rWhat )
348 : {
349 18431 : merge( maData, rWhat, false );
350 18431 : }
351 :
352 986060 : void FastSaxSerializer::ForMerge::append( const Int8Sequence &rWhat )
353 : {
354 986060 : merge( maData, rWhat, true );
355 986060 : }
356 :
357 723 : void FastSaxSerializer::ForMerge::postpone( const Int8Sequence &rWhat )
358 : {
359 723 : merge( maPostponed, rWhat, true );
360 723 : }
361 :
362 1811452 : void FastSaxSerializer::ForMerge::merge( Int8Sequence &rTop, const Int8Sequence &rMerge, bool bAppend )
363 : {
364 1811452 : sal_Int32 nMergeLen = rMerge.getLength();
365 1811452 : if ( nMergeLen > 0 )
366 : {
367 1737245 : sal_Int32 nTopLen = rTop.getLength();
368 :
369 1737245 : rTop.realloc( nTopLen + nMergeLen );
370 1737245 : if ( bAppend )
371 : {
372 : // append the rMerge to the rTop
373 1718814 : memcpy( rTop.getArray() + nTopLen, rMerge.getConstArray(), nMergeLen );
374 : }
375 : else
376 : {
377 : // prepend the rMerge to the rTop
378 18431 : memmove( rTop.getArray() + nMergeLen, rTop.getConstArray(), nTopLen );
379 18431 : memcpy( rTop.getArray(), rMerge.getConstArray(), nMergeLen );
380 : }
381 : }
382 1811452 : }
383 :
384 20911 : void FastSaxSerializer::ForMerge::resetData( )
385 : {
386 20911 : maData = Int8Sequence();
387 20911 : }
388 :
389 47673 : void FastSaxSerializer::ForSort::setCurrentElement( sal_Int32 nElement )
390 : {
391 47673 : SequenceAsVector< sal_Int32 > aOrder( maOrder );
392 47673 : if( std::find( aOrder.begin(), aOrder.end(), nElement ) != aOrder.end() )
393 : {
394 44820 : mnCurrentElement = nElement;
395 44820 : if ( maData.find( nElement ) == maData.end() )
396 44756 : maData[ nElement ] = Int8Sequence();
397 47673 : }
398 47673 : }
399 :
400 0 : void FastSaxSerializer::ForSort::prepend( const Int8Sequence &rWhat )
401 : {
402 0 : append( rWhat );
403 0 : }
404 :
405 735538 : void FastSaxSerializer::ForSort::append( const Int8Sequence &rWhat )
406 : {
407 735538 : merge( maData[mnCurrentElement], rWhat, true );
408 735538 : }
409 :
410 20911 : void FastSaxSerializer::ForSort::sort()
411 : {
412 : // Clear the ForMerge data to avoid duplicate items
413 20911 : resetData();
414 :
415 : // Sort it all
416 20911 : std::map< sal_Int32, Int8Sequence >::iterator iter;
417 951511 : for ( sal_Int32 i=0, len=maOrder.getLength(); i < len; i++ )
418 : {
419 930600 : iter = maData.find( maOrder[i] );
420 930600 : if ( iter != maData.end() )
421 44756 : ForMerge::append( iter->second );
422 : }
423 20911 : }
424 :
425 20911 : FastSaxSerializer::Int8Sequence& FastSaxSerializer::ForSort::getData()
426 : {
427 20911 : sort( );
428 20911 : return ForMerge::getData();
429 : }
430 :
431 : #if OSL_DEBUG_LEVEL > 0
432 : void FastSaxSerializer::ForSort::print( )
433 : {
434 : std::map< sal_Int32, Int8Sequence >::iterator iter = maData.begin();
435 : while ( iter != maData.end( ) )
436 : {
437 : std::cerr << "pair: " << iter->first;
438 : for ( sal_Int32 i=0, len=iter->second.getLength(); i < len; ++i )
439 : std::cerr << iter->second[i];
440 : std::cerr << "\n";
441 : ++iter;
442 : }
443 :
444 : sort( );
445 : ForMerge::print();
446 : }
447 : #endif
448 :
449 1167 : } // namespace sax_fastparser
450 :
451 : /* vim:set shiftwidth=4 softtabstop=4 expandtab: */
|