Line data Source code
1 : /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 : /*
3 : * This file is part of the LibreOffice project.
4 : *
5 : * This Source Code Form is subject to the terms of the Mozilla Public
6 : * License, v. 2.0. If a copy of the MPL was not distributed with this
7 : * file, You can obtain one at http://mozilla.org/MPL/2.0/.
8 : *
9 : * This file incorporates work covered by the following license notice:
10 : *
11 : * Licensed to the Apache Software Foundation (ASF) under one or more
12 : * contributor license agreements. See the NOTICE file distributed
13 : * with this work for additional information regarding copyright
14 : * ownership. The ASF licenses this file to you under the Apache
15 : * License, Version 2.0 (the "License"); you may not use this file
16 : * except in compliance with the License. You may obtain a copy of
17 : * the License at http://www.apache.org/licenses/LICENSE-2.0 .
18 : */
19 :
20 : #include "fastserializer.hxx"
21 : #include <rtl/ustrbuf.hxx>
22 :
23 : #include <comphelper/sequenceasvector.hxx>
24 :
25 : #include <com/sun/star/xml/Attribute.hpp>
26 : #include <com/sun/star/xml/FastAttribute.hpp>
27 : #include <com/sun/star/xml/sax/XFastAttributeList.hpp>
28 :
29 : #include <string.h>
30 :
31 : #if OSL_DEBUG_LEVEL > 0
32 : #include <iostream>
33 : #include <set>
34 : #endif
35 :
36 : using ::comphelper::SequenceAsVector;
37 : using ::com::sun::star::uno::Reference;
38 : using ::com::sun::star::uno::RuntimeException;
39 : using ::com::sun::star::uno::Sequence;
40 : using ::com::sun::star::uno::toUnoSequence;
41 : using ::com::sun::star::xml::FastAttribute;
42 : using ::com::sun::star::xml::Attribute;
43 : using ::com::sun::star::xml::sax::SAXException;
44 : using ::com::sun::star::xml::sax::XFastAttributeList;
45 : using ::com::sun::star::io::XOutputStream;
46 : using ::com::sun::star::io::NotConnectedException;
47 : using ::com::sun::star::io::IOException;
48 : using ::com::sun::star::io::BufferSizeExceededException;
49 :
50 : #define HAS_NAMESPACE(x) ((x & 0xffff0000) != 0)
51 : #define NAMESPACE(x) (x >> 16)
52 : #define TOKEN(x) (x & 0xffff)
53 :
54 : namespace sax_fastparser {
55 0 : FastSaxSerializer::FastSaxSerializer( )
56 : : mxOutputStream()
57 : , mxFastTokenHandler()
58 : , maMarkStack()
59 : , maClosingBracket((const sal_Int8 *)">", 1)
60 : , maSlashAndClosingBracket((const sal_Int8 *)"/>", 2)
61 : , maColon((const sal_Int8 *)":", 1)
62 : , maOpeningBracket((const sal_Int8 *)"<", 1)
63 : , maOpeningBracketAndSlash((const sal_Int8 *)"</", 2)
64 : , maQuote((const sal_Int8 *)"\"", 1)
65 : , maEqualSignAndQuote((const sal_Int8 *)"=\"", 2)
66 0 : , maSpace((const sal_Int8 *)" ", 1)
67 : {
68 0 : }
69 0 : FastSaxSerializer::~FastSaxSerializer() {}
70 :
71 0 : void SAL_CALL FastSaxSerializer::startDocument( ) throw (SAXException, RuntimeException)
72 : {
73 : assert(mxOutputStream.is()); // cannot do anything without that
74 0 : if (!mxOutputStream.is())
75 0 : return;
76 0 : rtl::ByteSequence aXmlHeader((const sal_Int8*) "<?xml version=\"1.0\" encoding=\"UTF-8\" standalone=\"yes\"?>\n", 56);
77 0 : writeBytes(toUnoSequence(aXmlHeader));
78 : }
79 :
80 0 : OUString FastSaxSerializer::escapeXml( const OUString& s )
81 : {
82 0 : OUStringBuffer sBuf( s.getLength() );
83 0 : const sal_Unicode* pStr = s.getStr();
84 0 : sal_Int32 nLen = s.getLength();
85 0 : for( sal_Int32 i = 0; i < nLen; ++i)
86 : {
87 0 : sal_Unicode c = pStr[ i ];
88 0 : switch( c )
89 : {
90 0 : case '<': sBuf.appendAscii( "<" ); break;
91 0 : case '>': sBuf.appendAscii( ">" ); break;
92 0 : case '&': sBuf.appendAscii( "&" ); break;
93 0 : case '\'': sBuf.appendAscii( "'" ); break;
94 0 : case '"': sBuf.appendAscii( """ ); break;
95 0 : case '\n': sBuf.appendAscii( " " ); break;
96 0 : case '\r': sBuf.appendAscii( " " ); break;
97 0 : default: sBuf.append( c ); break;
98 : }
99 : }
100 0 : return sBuf.makeStringAndClear();
101 : }
102 :
103 0 : void FastSaxSerializer::write( const OUString& s )
104 : {
105 0 : OString sOutput( OUStringToOString( s, RTL_TEXTENCODING_UTF8 ) );
106 : writeBytes( Sequence< sal_Int8 >(
107 0 : reinterpret_cast< const sal_Int8*>( sOutput.getStr() ),
108 0 : sOutput.getLength() ) );
109 0 : }
110 :
111 0 : void SAL_CALL FastSaxSerializer::endDocument( ) throw (SAXException, RuntimeException)
112 : {
113 0 : if (!mxOutputStream.is())
114 0 : return;
115 : }
116 :
117 0 : void SAL_CALL FastSaxSerializer::writeId( ::sal_Int32 nElement )
118 : {
119 0 : if( HAS_NAMESPACE( nElement ) ) {
120 0 : writeBytes(mxFastTokenHandler->getUTF8Identifier(NAMESPACE(nElement)));
121 0 : writeBytes(toUnoSequence(maColon));
122 0 : writeBytes(mxFastTokenHandler->getUTF8Identifier(TOKEN(nElement)));
123 : } else
124 0 : writeBytes(mxFastTokenHandler->getUTF8Identifier(nElement));
125 0 : }
126 :
127 : #ifdef DBG_UTIL
128 : OString SAL_CALL FastSaxSerializer::getId( ::sal_Int32 nElement )
129 : {
130 : if (HAS_NAMESPACE(nElement)) {
131 : Sequence<sal_Int8> const ns(
132 : mxFastTokenHandler->getUTF8Identifier(NAMESPACE(nElement)));
133 : Sequence<sal_Int8> const name(
134 : mxFastTokenHandler->getUTF8Identifier(TOKEN(nElement)));
135 : return OString(reinterpret_cast<sal_Char const*>(ns.getConstArray()), ns.getLength())
136 : + OString(reinterpret_cast<sal_Char const*>(maColon.getConstArray()), maColon.getLength())
137 : + OString(reinterpret_cast<sal_Char const*>(name.getConstArray()), name.getLength());
138 : } else {
139 : Sequence<sal_Int8> const name(
140 : mxFastTokenHandler->getUTF8Identifier(nElement));
141 : return OString(reinterpret_cast<sal_Char const*>(name.getConstArray()), name.getLength());
142 : }
143 : }
144 : #endif
145 :
146 0 : void SAL_CALL FastSaxSerializer::startFastElement( ::sal_Int32 Element, const Reference< XFastAttributeList >& Attribs )
147 : throw (SAXException, RuntimeException)
148 : {
149 0 : if (!mxOutputStream.is())
150 0 : return;
151 :
152 0 : if ( !maMarkStack.empty() )
153 0 : maMarkStack.top()->setCurrentElement( Element );
154 :
155 : #ifdef DBG_UTIL
156 : m_DebugStartedElements.push(Element);
157 : #endif
158 :
159 0 : writeBytes(toUnoSequence(maOpeningBracket));
160 :
161 0 : writeId(Element);
162 0 : writeFastAttributeList(Attribs);
163 :
164 0 : writeBytes(toUnoSequence(maClosingBracket));
165 : }
166 :
167 0 : void SAL_CALL FastSaxSerializer::endFastElement( ::sal_Int32 Element )
168 : throw (SAXException, RuntimeException)
169 : {
170 0 : if (!mxOutputStream.is())
171 0 : return;
172 :
173 : #ifdef DBG_UTIL
174 : assert(!m_DebugStartedElements.empty());
175 : // Well-formedness constraint: Element Type Match
176 : assert(Element == m_DebugStartedElements.top());
177 : m_DebugStartedElements.pop();
178 : #endif
179 :
180 0 : writeBytes(toUnoSequence(maOpeningBracketAndSlash));
181 :
182 0 : writeId(Element);
183 :
184 0 : writeBytes(toUnoSequence(maClosingBracket));
185 : }
186 :
187 0 : void SAL_CALL FastSaxSerializer::singleFastElement( ::sal_Int32 Element, const Reference< XFastAttributeList >& Attribs )
188 : throw (SAXException, RuntimeException)
189 : {
190 0 : if (!mxOutputStream.is())
191 0 : return;
192 :
193 0 : if ( !maMarkStack.empty() )
194 0 : maMarkStack.top()->setCurrentElement( Element );
195 :
196 0 : writeBytes(toUnoSequence(maOpeningBracket));
197 :
198 0 : writeId(Element);
199 0 : writeFastAttributeList(Attribs);
200 :
201 0 : writeBytes(toUnoSequence(maSlashAndClosingBracket));
202 : }
203 :
204 0 : void SAL_CALL FastSaxSerializer::characters( const OUString& aChars )
205 : throw (SAXException, RuntimeException)
206 : {
207 0 : if (!mxOutputStream.is())
208 0 : return;
209 :
210 0 : write( aChars );
211 : }
212 :
213 0 : void SAL_CALL FastSaxSerializer::setOutputStream( const ::com::sun::star::uno::Reference< ::com::sun::star::io::XOutputStream >& xOutputStream )
214 : throw (::com::sun::star::uno::RuntimeException)
215 : {
216 0 : mxOutputStream = xOutputStream;
217 : assert(mxOutputStream.is()); // cannot do anything without that
218 0 : }
219 :
220 0 : void SAL_CALL FastSaxSerializer::setFastTokenHandler( const ::com::sun::star::uno::Reference< ::com::sun::star::xml::sax::XFastTokenHandler >& xFastTokenHandler )
221 : throw (::com::sun::star::uno::RuntimeException)
222 : {
223 0 : mxFastTokenHandler = xFastTokenHandler;
224 0 : }
225 0 : void FastSaxSerializer::writeFastAttributeList( const Reference< XFastAttributeList >& Attribs )
226 : {
227 : #ifdef DBG_UTIL
228 : ::std::set<OUString> DebugAttributes;
229 : #endif
230 0 : Sequence< Attribute > aAttrSeq = Attribs->getUnknownAttributes();
231 0 : const Attribute *pAttr = aAttrSeq.getConstArray();
232 0 : sal_Int32 nAttrLength = aAttrSeq.getLength();
233 0 : for (sal_Int32 i = 0; i < nAttrLength; i++)
234 : {
235 0 : writeBytes(toUnoSequence(maSpace));
236 :
237 0 : OUString const& rAttrName(pAttr[i].Name);
238 : #ifdef DBG_UTIL
239 : // Well-formedness constraint: Unique Att Spec
240 : assert(DebugAttributes.find(rAttrName) == DebugAttributes.end());
241 : DebugAttributes.insert(rAttrName);
242 : #endif
243 0 : write(rAttrName);
244 0 : writeBytes(toUnoSequence(maEqualSignAndQuote));
245 0 : write(escapeXml(pAttr[i].Value));
246 0 : writeBytes(toUnoSequence(maQuote));
247 : }
248 :
249 0 : Sequence< FastAttribute > aFastAttrSeq = Attribs->getFastAttributes();
250 0 : const FastAttribute *pFastAttr = aFastAttrSeq.getConstArray();
251 0 : sal_Int32 nFastAttrLength = aFastAttrSeq.getLength();
252 0 : for (sal_Int32 j = 0; j < nFastAttrLength; j++)
253 : {
254 0 : writeBytes(toUnoSequence(maSpace));
255 :
256 0 : sal_Int32 nToken = pFastAttr[j].Token;
257 0 : writeId(nToken);
258 :
259 : #ifdef DBG_UTIL
260 : // Well-formedness constraint: Unique Att Spec
261 : OUString const name(OStringToOUString(getId(nToken),
262 : RTL_TEXTENCODING_UTF8));
263 : assert(DebugAttributes.find(name) == DebugAttributes.end());
264 : DebugAttributes.insert(name);
265 : #endif
266 :
267 0 : writeBytes(toUnoSequence(maEqualSignAndQuote));
268 :
269 0 : write(escapeXml(Attribs->getValue(pFastAttr[j].Token)));
270 :
271 0 : writeBytes(toUnoSequence(maQuote));
272 0 : }
273 0 : }
274 :
275 0 : void FastSaxSerializer::mark( Int32Sequence aOrder )
276 : {
277 0 : if ( aOrder.hasElements() )
278 : {
279 0 : boost::shared_ptr< ForMerge > pSort( new ForSort( aOrder ) );
280 0 : maMarkStack.push( pSort );
281 : }
282 : else
283 : {
284 0 : boost::shared_ptr< ForMerge > pMerge( new ForMerge( ) );
285 0 : maMarkStack.push( pMerge );
286 : }
287 0 : }
288 :
289 0 : void FastSaxSerializer::mergeTopMarks( sax_fastparser::MergeMarksEnum eMergeType )
290 : {
291 0 : if ( maMarkStack.empty() )
292 0 : return;
293 :
294 0 : if ( maMarkStack.size() == 1 )
295 : {
296 0 : mxOutputStream->writeBytes( maMarkStack.top()->getData() );
297 0 : maMarkStack.pop();
298 0 : return;
299 : }
300 :
301 0 : const Int8Sequence aMerge( maMarkStack.top()->getData() );
302 0 : maMarkStack.pop();
303 :
304 0 : switch ( eMergeType )
305 : {
306 0 : case MERGE_MARKS_APPEND: maMarkStack.top()->append( aMerge ); break;
307 0 : case MERGE_MARKS_PREPEND: maMarkStack.top()->prepend( aMerge ); break;
308 0 : case MERGE_MARKS_POSTPONE: maMarkStack.top()->postpone( aMerge ); break;
309 0 : }
310 : }
311 :
312 0 : void FastSaxSerializer::writeBytes( const Sequence< ::sal_Int8 >& aData ) throw ( NotConnectedException, BufferSizeExceededException, IOException, RuntimeException )
313 : {
314 0 : if ( maMarkStack.empty() )
315 0 : mxOutputStream->writeBytes( aData );
316 : else
317 0 : maMarkStack.top()->append( aData );
318 0 : }
319 :
320 0 : FastSaxSerializer::Int8Sequence& FastSaxSerializer::ForMerge::getData()
321 : {
322 0 : merge( maData, maPostponed, true );
323 0 : maPostponed.realloc( 0 );
324 :
325 0 : return maData;
326 : }
327 :
328 : #if OSL_DEBUG_LEVEL > 0
329 : void FastSaxSerializer::ForMerge::print( )
330 : {
331 : std::cerr << "Data: ";
332 : for ( sal_Int32 i=0, len=maData.getLength(); i < len; i++ )
333 : {
334 : std::cerr << maData[i];
335 : }
336 :
337 : std::cerr << "\nPostponed: ";
338 : for ( sal_Int32 i=0, len=maPostponed.getLength(); i < len; i++ )
339 : {
340 : std::cerr << maPostponed[i];
341 : }
342 :
343 : std::cerr << "\n";
344 : }
345 : #endif
346 :
347 0 : void FastSaxSerializer::ForMerge::prepend( const Int8Sequence &rWhat )
348 : {
349 0 : merge( maData, rWhat, false );
350 0 : }
351 :
352 0 : void FastSaxSerializer::ForMerge::append( const Int8Sequence &rWhat )
353 : {
354 0 : merge( maData, rWhat, true );
355 0 : }
356 :
357 0 : void FastSaxSerializer::ForMerge::postpone( const Int8Sequence &rWhat )
358 : {
359 0 : merge( maPostponed, rWhat, true );
360 0 : }
361 :
362 0 : void FastSaxSerializer::ForMerge::merge( Int8Sequence &rTop, const Int8Sequence &rMerge, bool bAppend )
363 : {
364 0 : sal_Int32 nMergeLen = rMerge.getLength();
365 0 : if ( nMergeLen > 0 )
366 : {
367 0 : sal_Int32 nTopLen = rTop.getLength();
368 :
369 0 : rTop.realloc( nTopLen + nMergeLen );
370 0 : if ( bAppend )
371 : {
372 : // append the rMerge to the rTop
373 0 : memcpy( rTop.getArray() + nTopLen, rMerge.getConstArray(), nMergeLen );
374 : }
375 : else
376 : {
377 : // prepend the rMerge to the rTop
378 0 : memmove( rTop.getArray() + nMergeLen, rTop.getConstArray(), nTopLen );
379 0 : memcpy( rTop.getArray(), rMerge.getConstArray(), nMergeLen );
380 : }
381 : }
382 0 : }
383 :
384 0 : void FastSaxSerializer::ForMerge::resetData( )
385 : {
386 0 : maData = Int8Sequence();
387 0 : }
388 :
389 0 : void FastSaxSerializer::ForSort::setCurrentElement( sal_Int32 nElement )
390 : {
391 0 : SequenceAsVector< sal_Int32 > aOrder( maOrder );
392 0 : if( std::find( aOrder.begin(), aOrder.end(), nElement ) != aOrder.end() )
393 : {
394 0 : mnCurrentElement = nElement;
395 0 : if ( maData.find( nElement ) == maData.end() )
396 0 : maData[ nElement ] = Int8Sequence();
397 0 : }
398 0 : }
399 :
400 0 : void FastSaxSerializer::ForSort::prepend( const Int8Sequence &rWhat )
401 : {
402 0 : append( rWhat );
403 0 : }
404 :
405 0 : void FastSaxSerializer::ForSort::append( const Int8Sequence &rWhat )
406 : {
407 0 : merge( maData[mnCurrentElement], rWhat, true );
408 0 : }
409 :
410 0 : void FastSaxSerializer::ForSort::sort()
411 : {
412 : // Clear the ForMerge data to avoid duplicate items
413 0 : resetData();
414 :
415 : // Sort it all
416 0 : std::map< sal_Int32, Int8Sequence >::iterator iter;
417 0 : for ( sal_Int32 i=0, len=maOrder.getLength(); i < len; i++ )
418 : {
419 0 : iter = maData.find( maOrder[i] );
420 0 : if ( iter != maData.end() )
421 0 : ForMerge::append( iter->second );
422 : }
423 0 : }
424 :
425 0 : FastSaxSerializer::Int8Sequence& FastSaxSerializer::ForSort::getData()
426 : {
427 0 : sort( );
428 0 : return ForMerge::getData();
429 : }
430 :
431 : #if OSL_DEBUG_LEVEL > 0
432 : void FastSaxSerializer::ForSort::print( )
433 : {
434 : std::map< sal_Int32, Int8Sequence >::iterator iter = maData.begin();
435 : while ( iter != maData.end( ) )
436 : {
437 : std::cerr << "pair: " << iter->first;
438 : for ( sal_Int32 i=0, len=iter->second.getLength(); i < len; ++i )
439 : std::cerr << iter->second[i];
440 : std::cerr << "\n";
441 : ++iter;
442 : }
443 :
444 : sort( );
445 : ForMerge::print();
446 : }
447 : #endif
448 :
449 3 : } // namespace sax_fastparser
450 :
451 : /* vim:set shiftwidth=4 softtabstop=4 expandtab: */
|