Line data Source code
1 : /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 : /*
3 : * This file is part of the LibreOffice project.
4 : *
5 : * This Source Code Form is subject to the terms of the Mozilla Public
6 : * License, v. 2.0. If a copy of the MPL was not distributed with this
7 : * file, You can obtain one at http://mozilla.org/MPL/2.0/.
8 : *
9 : * This file incorporates work covered by the following license notice:
10 : *
11 : * Licensed to the Apache Software Foundation (ASF) under one or more
12 : * contributor license agreements. See the NOTICE file distributed
13 : * with this work for additional information regarding copyright
14 : * ownership. The ASF licenses this file to you under the Apache
15 : * License, Version 2.0 (the "License"); you may not use this file
16 : * except in compliance with the License. You may obtain a copy of
17 : * the License at http://www.apache.org/licenses/LICENSE-2.0 .
18 : */
19 : #include <stdlib.h>
20 : #include <string.h>
21 : #include <sal/alloca.h>
22 : #include <cassert>
23 : #include <vector>
24 :
25 : #include <osl/diagnose.h>
26 :
27 : #include <com/sun/star/lang/XServiceInfo.hpp>
28 : #include <com/sun/star/lang/XInitialization.hpp>
29 : #include <com/sun/star/uno/XComponentContext.hpp>
30 : #include <com/sun/star/xml/sax/XExtendedDocumentHandler.hpp>
31 : #include <com/sun/star/xml/sax/XParser.hpp>
32 : #include <com/sun/star/xml/sax/SAXParseException.hpp>
33 : #include <com/sun/star/io/XSeekable.hpp>
34 :
35 : #include <cppuhelper/weak.hxx>
36 : #include <cppuhelper/implbase3.hxx>
37 : #include <cppuhelper/supportsservice.hxx>
38 : #include <rtl/ref.hxx>
39 :
40 : #include <expat.h>
41 :
42 : using namespace ::std;
43 : using namespace ::osl;
44 : using namespace ::cppu;
45 : using namespace ::com::sun::star::lang;
46 : using namespace ::com::sun::star::xml::sax;
47 : using namespace ::com::sun::star::io;
48 :
49 : #include "attrlistimpl.hxx"
50 : #include "xml2utf.hxx"
51 :
52 : namespace {
53 :
54 : // Useful macros for correct String conversion depending on the chosen expat-mode
55 : #ifdef XML_UNICODE
56 : OUString XmlNChar2OUString( const XML_Char *p , int nLen )
57 : {
58 : if( p ) {
59 : if( sizeof( sal_Unicode ) == sizeof( XML_Char ) )
60 : {
61 : return OUString( (sal_Unicode*)p,nLen);
62 : }
63 : else
64 : {
65 : sal_Unicode *pWchar = (sal_Unicode *)alloca( sizeof( sal_Unicode ) * nLen );
66 : for( int n = 0 ; n < nLen ; n++ ) {
67 : pWchar[n] = (sal_Unicode) p[n];
68 : }
69 : return OUString( pWchar , nLen );
70 : }
71 : }
72 : else {
73 : return OUString();
74 : }
75 : }
76 :
77 : OUString XmlChar2OUString( const XML_Char *p )
78 : {
79 : if( p ) {
80 : int nLen;
81 : for( nLen = 0 ; p[nLen] ; nLen ++ )
82 : ;
83 : return XmlNChar2OUString( p , nLen );
84 : }
85 : else return OUString();
86 : }
87 :
88 :
89 : #define XML_CHAR_TO_OUSTRING(x) XmlChar2OUString(x)
90 : #define XML_CHAR_N_TO_USTRING(x,n) XmlNChar2OUString(x,n)
91 : #else
92 : #define XML_CHAR_TO_OUSTRING(x) OUString(x , strlen( x ), RTL_TEXTENCODING_UTF8)
93 : #define XML_CHAR_N_TO_USTRING(x,n) OUString(x,n, RTL_TEXTENCODING_UTF8 )
94 : #endif
95 :
96 :
97 : /*
98 : * The following macro encapsulates any call to an event handler.
99 : * It ensures, that exceptions thrown by the event handler are
100 : * treated properly.
101 : */
102 : #define CALL_ELEMENT_HANDLER_AND_CARE_FOR_EXCEPTIONS(pThis,call) \
103 : if( ! pThis->bExceptionWasThrown ) { \
104 : try {\
105 : pThis->call;\
106 : }\
107 : catch( const SAXParseException &e ) {\
108 : callErrorHandler( pThis , e );\
109 : }\
110 : catch( const SAXException &e ) {\
111 : callErrorHandler( pThis , SAXParseException(\
112 : e.Message, \
113 : e.Context, \
114 : e.WrappedException,\
115 : pThis->rDocumentLocator->getPublicId(),\
116 : pThis->rDocumentLocator->getSystemId(),\
117 : pThis->rDocumentLocator->getLineNumber(),\
118 : pThis->rDocumentLocator->getColumnNumber()\
119 : ) );\
120 : }\
121 : catch( const com::sun::star::uno::RuntimeException &e ) {\
122 : pThis->bExceptionWasThrown = true; \
123 : pThis->bRTExceptionWasThrown = true; \
124 : pImpl->rtexception = e; \
125 : }\
126 : }\
127 : ((void)0)
128 :
129 :
130 : class SaxExpatParser_Impl;
131 :
132 : // This class implements the external Parser interface
133 : class SaxExpatParser
134 : : public WeakImplHelper3< XInitialization
135 : , XServiceInfo
136 : , XParser >
137 : {
138 :
139 : public:
140 : SaxExpatParser();
141 : virtual ~SaxExpatParser();
142 :
143 : // ::com::sun::star::lang::XInitialization:
144 : virtual void SAL_CALL initialize(css::uno::Sequence<css::uno::Any> const& rArguments)
145 : throw (css::uno::RuntimeException, css::uno::Exception, std::exception) SAL_OVERRIDE;
146 :
147 : // The SAX-Parser-Interface
148 : virtual void SAL_CALL parseStream( const InputSource& structSource)
149 : throw ( SAXException,
150 : IOException,
151 : css::uno::RuntimeException, std::exception) SAL_OVERRIDE;
152 : virtual void SAL_CALL setDocumentHandler(const css::uno::Reference< XDocumentHandler > & xHandler)
153 : throw (css::uno::RuntimeException, std::exception) SAL_OVERRIDE;
154 :
155 : virtual void SAL_CALL setErrorHandler(const css::uno::Reference< XErrorHandler > & xHandler)
156 : throw (css::uno::RuntimeException, std::exception) SAL_OVERRIDE;
157 : virtual void SAL_CALL setDTDHandler(const css::uno::Reference < XDTDHandler > & xHandler)
158 : throw (css::uno::RuntimeException, std::exception) SAL_OVERRIDE;
159 : virtual void SAL_CALL setEntityResolver(const css::uno::Reference< XEntityResolver >& xResolver)
160 : throw (css::uno::RuntimeException, std::exception) SAL_OVERRIDE;
161 :
162 : virtual void SAL_CALL setLocale( const Locale &locale ) throw (css::uno::RuntimeException, std::exception) SAL_OVERRIDE;
163 :
164 : public: // XServiceInfo
165 : OUString SAL_CALL getImplementationName() throw (std::exception) SAL_OVERRIDE;
166 : css::uno::Sequence< OUString > SAL_CALL getSupportedServiceNames(void) throw (std::exception) SAL_OVERRIDE;
167 : sal_Bool SAL_CALL supportsService(const OUString& ServiceName) throw (std::exception) SAL_OVERRIDE;
168 :
169 : private:
170 :
171 : SaxExpatParser_Impl *m_pImpl;
172 :
173 : };
174 :
175 :
176 : // Entity binds all information neede for a single file
177 142760 : struct Entity
178 : {
179 : InputSource structSource;
180 : XML_Parser pParser;
181 : sax_expatwrap::XMLFile2UTFConverter converter;
182 : };
183 :
184 :
185 44703 : class SaxExpatParser_Impl
186 : {
187 : public: // module scope
188 : Mutex aMutex;
189 : OUString sCDATA;
190 : bool m_bEnableDoS; // fdo#60471 thank you Adobe Illustrator
191 :
192 : css::uno::Reference< XDocumentHandler > rDocumentHandler;
193 : css::uno::Reference< XExtendedDocumentHandler > rExtendedDocumentHandler;
194 :
195 : css::uno::Reference< XErrorHandler > rErrorHandler;
196 : css::uno::Reference< XDTDHandler > rDTDHandler;
197 : css::uno::Reference< XEntityResolver > rEntityResolver;
198 : css::uno::Reference < XLocator > rDocumentLocator;
199 :
200 :
201 : rtl::Reference < sax_expatwrap::AttributeList > rAttrList;
202 :
203 : // External entity stack
204 : vector<struct Entity> vecEntity;
205 35493 : void pushEntity( const struct Entity &entity )
206 35493 : { vecEntity.push_back( entity ); }
207 35493 : void popEntity()
208 35493 : { vecEntity.pop_back( ); }
209 142426 : struct Entity &getEntity()
210 142426 : { return vecEntity.back(); }
211 :
212 :
213 : // Exception cannot be thrown through the C-XmlParser (possible resource leaks),
214 : // therefore the exception must be saved somewhere.
215 : SAXParseException exception;
216 : css::uno::RuntimeException rtexception;
217 : bool bExceptionWasThrown;
218 : bool bRTExceptionWasThrown;
219 :
220 : Locale locale;
221 :
222 : public:
223 44703 : SaxExpatParser_Impl()
224 : : sCDATA("CDATA")
225 : , m_bEnableDoS(false)
226 : , bExceptionWasThrown(false)
227 44703 : , bRTExceptionWasThrown(false)
228 : {
229 44703 : }
230 :
231 : // the C-Callbacks for the expat parser
232 : void static callbackStartElement(void *userData, const XML_Char *name , const XML_Char **atts);
233 : void static callbackEndElement(void *userData, const XML_Char *name);
234 : void static callbackCharacters( void *userData , const XML_Char *s , int nLen );
235 : void static callbackProcessingInstruction( void *userData ,
236 : const XML_Char *sTarget ,
237 : const XML_Char *sData );
238 :
239 : void static callbackEntityDecl( void *userData ,
240 : const XML_Char *entityName,
241 : int is_parameter_entity,
242 : const XML_Char *value,
243 : int value_length,
244 : const XML_Char *base,
245 : const XML_Char *systemId,
246 : const XML_Char *publicId,
247 : const XML_Char *notationName);
248 :
249 : void static callbackNotationDecl( void *userData,
250 : const XML_Char *notationName,
251 : const XML_Char *base,
252 : const XML_Char *systemId,
253 : const XML_Char *publicId);
254 :
255 : bool static callbackExternalEntityRef( XML_Parser parser,
256 : const XML_Char *openEntityNames,
257 : const XML_Char *base,
258 : const XML_Char *systemId,
259 : const XML_Char *publicId);
260 :
261 : int static callbackUnknownEncoding(void *encodingHandlerData,
262 : const XML_Char *name,
263 : XML_Encoding *info);
264 :
265 : void static callbackDefault( void *userData, const XML_Char *s, int len);
266 :
267 : void static callbackStartCDATA( void *userData );
268 : void static callbackEndCDATA( void *userData );
269 : void static callbackComment( void *userData , const XML_Char *s );
270 : void static callErrorHandler( SaxExpatParser_Impl *pImpl , const SAXParseException &e );
271 :
272 : public:
273 : void parse();
274 : };
275 :
276 : extern "C"
277 : {
278 635550 : static void call_callbackStartElement(void *userData, const XML_Char *name , const XML_Char **atts)
279 : {
280 635550 : SaxExpatParser_Impl::callbackStartElement(userData,name,atts);
281 635550 : }
282 635550 : static void call_callbackEndElement(void *userData, const XML_Char *name)
283 : {
284 635550 : SaxExpatParser_Impl::callbackEndElement(userData,name);
285 635550 : }
286 554116 : static void call_callbackCharacters( void *userData , const XML_Char *s , int nLen )
287 : {
288 554116 : SaxExpatParser_Impl::callbackCharacters(userData,s,nLen);
289 554116 : }
290 0 : static void call_callbackProcessingInstruction(void *userData,const XML_Char *sTarget,const XML_Char *sData )
291 : {
292 0 : SaxExpatParser_Impl::callbackProcessingInstruction(userData,sTarget,sData );
293 0 : }
294 0 : static void call_callbackEntityDecl(void *userData ,
295 : const XML_Char *entityName,
296 : int is_parameter_entity,
297 : const XML_Char *value,
298 : int value_length,
299 : const XML_Char *base,
300 : const XML_Char *systemId,
301 : const XML_Char *publicId,
302 : const XML_Char *notationName)
303 : {
304 : SaxExpatParser_Impl::callbackEntityDecl(userData, entityName,
305 : is_parameter_entity, value, value_length,
306 0 : base, systemId, publicId, notationName);
307 0 : }
308 0 : static void call_callbackNotationDecl(void *userData,
309 : const XML_Char *notationName,
310 : const XML_Char *base,
311 : const XML_Char *systemId,
312 : const XML_Char *publicId)
313 : {
314 0 : SaxExpatParser_Impl::callbackNotationDecl(userData,notationName,base,systemId,publicId);
315 0 : }
316 0 : static int call_callbackExternalEntityRef(XML_Parser parser,
317 : const XML_Char *openEntityNames,
318 : const XML_Char *base,
319 : const XML_Char *systemId,
320 : const XML_Char *publicId)
321 : {
322 0 : return SaxExpatParser_Impl::callbackExternalEntityRef(parser,openEntityNames,base,systemId,publicId);
323 : }
324 0 : static int call_callbackUnknownEncoding(void *encodingHandlerData,
325 : const XML_Char *name,
326 : XML_Encoding *info)
327 : {
328 0 : return SaxExpatParser_Impl::callbackUnknownEncoding(encodingHandlerData,name,info);
329 : }
330 10889 : static void call_callbackDefault( void *userData, const XML_Char *s, int len)
331 : {
332 10889 : SaxExpatParser_Impl::callbackDefault(userData,s,len);
333 10889 : }
334 0 : static void call_callbackStartCDATA( void *userData )
335 : {
336 0 : SaxExpatParser_Impl::callbackStartCDATA(userData);
337 0 : }
338 0 : static void call_callbackEndCDATA( void *userData )
339 : {
340 0 : SaxExpatParser_Impl::callbackEndCDATA(userData);
341 0 : }
342 1545 : static void call_callbackComment( void *userData , const XML_Char *s )
343 : {
344 1545 : SaxExpatParser_Impl::callbackComment(userData,s);
345 1545 : }
346 : }
347 :
348 :
349 :
350 : // LocatorImpl
351 :
352 89406 : class LocatorImpl :
353 : public WeakImplHelper2< XLocator, com::sun::star::io::XSeekable >
354 : // should use a different interface for stream positions!
355 : {
356 : public:
357 44703 : LocatorImpl( SaxExpatParser_Impl *p )
358 44703 : {
359 44703 : m_pParser = p;
360 44703 : }
361 :
362 : public: //XLocator
363 164 : virtual sal_Int32 SAL_CALL getColumnNumber(void) throw (std::exception) SAL_OVERRIDE
364 : {
365 164 : return XML_GetCurrentColumnNumber( m_pParser->getEntity().pParser );
366 : }
367 172 : virtual sal_Int32 SAL_CALL getLineNumber(void) throw (std::exception) SAL_OVERRIDE
368 : {
369 172 : return XML_GetCurrentLineNumber( m_pParser->getEntity().pParser );
370 : }
371 164 : virtual OUString SAL_CALL getPublicId(void) throw (std::exception) SAL_OVERRIDE
372 : {
373 164 : return m_pParser->getEntity().structSource.sPublicId;
374 : }
375 172 : virtual OUString SAL_CALL getSystemId(void) throw (std::exception) SAL_OVERRIDE
376 : {
377 172 : return m_pParser->getEntity().structSource.sSystemId;
378 : }
379 :
380 : // XSeekable (only for getPosition)
381 :
382 0 : virtual void SAL_CALL seek( sal_Int64 ) throw(std::exception) SAL_OVERRIDE
383 : {
384 0 : }
385 988 : virtual sal_Int64 SAL_CALL getPosition() throw(std::exception) SAL_OVERRIDE
386 : {
387 988 : return XML_GetCurrentByteIndex( m_pParser->getEntity().pParser );
388 : }
389 0 : virtual ::sal_Int64 SAL_CALL getLength() throw(std::exception) SAL_OVERRIDE
390 : {
391 0 : return 0;
392 : }
393 :
394 : private:
395 :
396 : SaxExpatParser_Impl *m_pParser;
397 : };
398 :
399 :
400 :
401 :
402 44703 : SaxExpatParser::SaxExpatParser( )
403 : {
404 44703 : m_pImpl = new SaxExpatParser_Impl;
405 :
406 44703 : LocatorImpl *pLoc = new LocatorImpl( m_pImpl );
407 44703 : m_pImpl->rDocumentLocator = css::uno::Reference< XLocator > ( pLoc );
408 :
409 : // Performance-improvement; handing out the same object with every call of
410 : // the startElement callback is allowed (see sax-specification):
411 44703 : m_pImpl->rAttrList = new sax_expatwrap::AttributeList;
412 :
413 44703 : m_pImpl->bExceptionWasThrown = false;
414 44703 : m_pImpl->bRTExceptionWasThrown = false;
415 44703 : }
416 :
417 134109 : SaxExpatParser::~SaxExpatParser()
418 : {
419 44703 : delete m_pImpl;
420 89406 : }
421 :
422 : // ::com::sun::star::lang::XInitialization:
423 : void SAL_CALL
424 300 : SaxExpatParser::initialize(css::uno::Sequence< css::uno::Any > const& rArguments)
425 : throw (css::uno::RuntimeException, css::uno::Exception, std::exception)
426 : {
427 : // possible arguments: a string "DoSmeplease"
428 300 : if (rArguments.getLength())
429 : {
430 300 : OUString str;
431 300 : if ((rArguments[0] >>= str) && "DoSmeplease" == str)
432 : {
433 300 : MutexGuard guard( m_pImpl->aMutex );
434 300 : m_pImpl->m_bEnableDoS = true;
435 300 : }
436 : }
437 300 : }
438 :
439 : /***************
440 : *
441 : * parseStream does Parser-startup initializations. The SaxExpatParser_Impl::parse() method does
442 : * the file-specific initialization work. (During a parser run, external files may be opened)
443 : *
444 : ****************/
445 35887 : void SaxExpatParser::parseStream( const InputSource& structSource)
446 : throw (SAXException,
447 : IOException,
448 : css::uno::RuntimeException, std::exception)
449 : {
450 : // Only one text at one time
451 35887 : MutexGuard guard( m_pImpl->aMutex );
452 :
453 :
454 71774 : struct Entity entity;
455 35887 : entity.structSource = structSource;
456 :
457 35887 : if( ! entity.structSource.aInputStream.is() )
458 : {
459 : throw SAXException("No input source",
460 394 : css::uno::Reference< css::uno::XInterface > () , css::uno::Any() );
461 : }
462 :
463 35493 : entity.converter.setInputStream( entity.structSource.aInputStream );
464 35493 : if( !entity.structSource.sEncoding.isEmpty() )
465 : {
466 : entity.converter.setEncoding(
467 0 : OUStringToOString( entity.structSource.sEncoding , RTL_TEXTENCODING_ASCII_US ) );
468 : }
469 :
470 : // create parser with proper encoding
471 35493 : entity.pParser = XML_ParserCreate( 0 );
472 35493 : if( ! entity.pParser )
473 : {
474 : throw SAXException("Couldn't create parser",
475 0 : css::uno::Reference< css::uno::XInterface > (), css::uno::Any() );
476 : }
477 :
478 : // set all necessary C-Callbacks
479 35493 : XML_SetUserData( entity.pParser , m_pImpl );
480 : XML_SetElementHandler( entity.pParser ,
481 : call_callbackStartElement ,
482 35493 : call_callbackEndElement );
483 35493 : XML_SetCharacterDataHandler( entity.pParser , call_callbackCharacters );
484 : XML_SetProcessingInstructionHandler(entity.pParser ,
485 35493 : call_callbackProcessingInstruction );
486 35493 : if (!m_pImpl->m_bEnableDoS)
487 : {
488 35193 : XML_SetEntityDeclHandler(entity.pParser, call_callbackEntityDecl);
489 : }
490 35493 : XML_SetNotationDeclHandler( entity.pParser, call_callbackNotationDecl );
491 : XML_SetExternalEntityRefHandler( entity.pParser,
492 35493 : call_callbackExternalEntityRef);
493 35493 : XML_SetUnknownEncodingHandler( entity.pParser, call_callbackUnknownEncoding ,0);
494 :
495 35493 : if( m_pImpl->rExtendedDocumentHandler.is() ) {
496 :
497 : // These handlers just delegate calls to the ExtendedHandler. If no extended handler is
498 : // given, these callbacks can be ignored
499 3805 : XML_SetDefaultHandlerExpand( entity.pParser, call_callbackDefault );
500 3805 : XML_SetCommentHandler( entity.pParser, call_callbackComment );
501 : XML_SetCdataSectionHandler( entity.pParser ,
502 : call_callbackStartCDATA ,
503 3805 : call_callbackEndCDATA );
504 : }
505 :
506 :
507 35493 : m_pImpl->exception = SAXParseException();
508 35493 : m_pImpl->pushEntity( entity );
509 : try
510 : {
511 : // start the document
512 35493 : if( m_pImpl->rDocumentHandler.is() ) {
513 35485 : m_pImpl->rDocumentHandler->setDocumentLocator( m_pImpl->rDocumentLocator );
514 35485 : m_pImpl->rDocumentHandler->startDocument();
515 : }
516 :
517 35493 : m_pImpl->parse();
518 :
519 : // finish document
520 35485 : if( m_pImpl->rDocumentHandler.is() ) {
521 35477 : m_pImpl->rDocumentHandler->endDocument();
522 : }
523 : }
524 : // catch( SAXParseException &e )
525 : // {
526 : // m_pImpl->popEntity();
527 : // XML_ParserFree( entity.pParser );
528 : // css::uno::Any aAny;
529 : // aAny <<= e;
530 : // throw SAXException( e.Message, e.Context, aAny );
531 : // }
532 16 : catch( SAXException & )
533 : {
534 8 : m_pImpl->popEntity();
535 8 : XML_ParserFree( entity.pParser );
536 8 : throw;
537 : }
538 0 : catch( IOException & )
539 : {
540 0 : m_pImpl->popEntity();
541 0 : XML_ParserFree( entity.pParser );
542 0 : throw;
543 : }
544 0 : catch( css::uno::RuntimeException & )
545 : {
546 0 : m_pImpl->popEntity();
547 0 : XML_ParserFree( entity.pParser );
548 0 : throw;
549 : }
550 :
551 35485 : m_pImpl->popEntity();
552 71372 : XML_ParserFree( entity.pParser );
553 35485 : }
554 :
555 62713 : void SaxExpatParser::setDocumentHandler(const css::uno::Reference< XDocumentHandler > & xHandler)
556 : throw (css::uno::RuntimeException, std::exception)
557 : {
558 62713 : m_pImpl->rDocumentHandler = xHandler;
559 125426 : m_pImpl->rExtendedDocumentHandler =
560 62713 : css::uno::Reference< XExtendedDocumentHandler >( xHandler , css::uno::UNO_QUERY );
561 62713 : }
562 :
563 0 : void SaxExpatParser::setErrorHandler(const css::uno::Reference< XErrorHandler > & xHandler)
564 : throw (css::uno::RuntimeException, std::exception)
565 : {
566 0 : m_pImpl->rErrorHandler = xHandler;
567 0 : }
568 :
569 0 : void SaxExpatParser::setDTDHandler(const css::uno::Reference< XDTDHandler > & xHandler)
570 : throw (css::uno::RuntimeException, std::exception)
571 : {
572 0 : m_pImpl->rDTDHandler = xHandler;
573 0 : }
574 :
575 241 : void SaxExpatParser::setEntityResolver(const css::uno::Reference < XEntityResolver > & xResolver)
576 : throw (css::uno::RuntimeException, std::exception)
577 : {
578 241 : m_pImpl->rEntityResolver = xResolver;
579 241 : }
580 :
581 :
582 0 : void SaxExpatParser::setLocale( const Locale & locale ) throw (css::uno::RuntimeException, std::exception)
583 : {
584 0 : m_pImpl->locale = locale;
585 0 : }
586 :
587 : // XServiceInfo
588 0 : OUString SaxExpatParser::getImplementationName() throw (std::exception)
589 : {
590 0 : return OUString("com.sun.star.comp.extensions.xml.sax.ParserExpat");
591 : }
592 :
593 : // XServiceInfo
594 0 : sal_Bool SaxExpatParser::supportsService(const OUString& ServiceName) throw (std::exception)
595 : {
596 0 : return cppu::supportsService(this, ServiceName);
597 : }
598 :
599 : // XServiceInfo
600 0 : css::uno::Sequence< OUString > SaxExpatParser::getSupportedServiceNames(void) throw (std::exception)
601 : {
602 0 : css::uno::Sequence<OUString> seq(1);
603 0 : seq[0] = "com.sun.star.xml.sax.Parser";
604 0 : return seq;
605 : }
606 :
607 :
608 : /*---------------------------------------
609 : *
610 : * Helper functions and classes
611 : *
612 : *
613 : *-------------------------------------------*/
614 8 : OUString getErrorMessage( XML_Error xmlE, const OUString& sSystemId , sal_Int32 nLine )
615 : {
616 8 : OUString Message;
617 8 : if( XML_ERROR_NONE == xmlE ) {
618 0 : Message = "No";
619 : }
620 8 : else if( XML_ERROR_NO_MEMORY == xmlE ) {
621 0 : Message = "no memory";
622 : }
623 8 : else if( XML_ERROR_SYNTAX == xmlE ) {
624 0 : Message = "syntax";
625 : }
626 8 : else if( XML_ERROR_NO_ELEMENTS == xmlE ) {
627 0 : Message = "no elements";
628 : }
629 8 : else if( XML_ERROR_INVALID_TOKEN == xmlE ) {
630 8 : Message = "invalid token";
631 : }
632 0 : else if( XML_ERROR_UNCLOSED_TOKEN == xmlE ) {
633 0 : Message = "unclosed token";
634 : }
635 0 : else if( XML_ERROR_PARTIAL_CHAR == xmlE ) {
636 0 : Message = "partial char";
637 : }
638 0 : else if( XML_ERROR_TAG_MISMATCH == xmlE ) {
639 0 : Message = "tag mismatch";
640 : }
641 0 : else if( XML_ERROR_DUPLICATE_ATTRIBUTE == xmlE ) {
642 0 : Message = "duplicate attribute";
643 : }
644 0 : else if( XML_ERROR_JUNK_AFTER_DOC_ELEMENT == xmlE ) {
645 0 : Message = "junk after doc element";
646 : }
647 0 : else if( XML_ERROR_PARAM_ENTITY_REF == xmlE ) {
648 0 : Message = "parameter entity reference";
649 : }
650 0 : else if( XML_ERROR_UNDEFINED_ENTITY == xmlE ) {
651 0 : Message = "undefined entity";
652 : }
653 0 : else if( XML_ERROR_RECURSIVE_ENTITY_REF == xmlE ) {
654 0 : Message = "recursive entity reference";
655 : }
656 0 : else if( XML_ERROR_ASYNC_ENTITY == xmlE ) {
657 0 : Message = "async entity";
658 : }
659 0 : else if( XML_ERROR_BAD_CHAR_REF == xmlE ) {
660 0 : Message = "bad char reference";
661 : }
662 0 : else if( XML_ERROR_BINARY_ENTITY_REF == xmlE ) {
663 0 : Message = "binary entity reference";
664 : }
665 0 : else if( XML_ERROR_ATTRIBUTE_EXTERNAL_ENTITY_REF == xmlE ) {
666 0 : Message = "attribute external entity reference";
667 : }
668 0 : else if( XML_ERROR_MISPLACED_XML_PI == xmlE ) {
669 0 : Message = "misplaced xml processing instruction";
670 : }
671 0 : else if( XML_ERROR_UNKNOWN_ENCODING == xmlE ) {
672 0 : Message = "unknown encoding";
673 : }
674 0 : else if( XML_ERROR_INCORRECT_ENCODING == xmlE ) {
675 0 : Message = "incorrect encoding";
676 : }
677 0 : else if( XML_ERROR_UNCLOSED_CDATA_SECTION == xmlE ) {
678 0 : Message = "unclosed cdata section";
679 : }
680 0 : else if( XML_ERROR_EXTERNAL_ENTITY_HANDLING == xmlE ) {
681 0 : Message = "external entity reference";
682 : }
683 0 : else if( XML_ERROR_NOT_STANDALONE == xmlE ) {
684 0 : Message = "not standalone";
685 : }
686 :
687 8 : OUString str("[");
688 8 : str += sSystemId;
689 8 : str += " line ";
690 8 : str += OUString::number( nLine );
691 8 : str += "]: ";
692 8 : str += Message;
693 8 : str += "error";
694 :
695 8 : return str;
696 : }
697 :
698 :
699 : // starts parsing with actual parser !
700 35493 : void SaxExpatParser_Impl::parse( )
701 : {
702 35493 : const int nBufSize = 16*1024;
703 :
704 35493 : int nRead = nBufSize;
705 35493 : css::uno::Sequence< sal_Int8 > seqOut(nBufSize);
706 :
707 105872 : while( nRead ) {
708 70379 : nRead = getEntity().converter.readAndConvert( seqOut , nBufSize );
709 :
710 70379 : if( ! nRead ) {
711 35485 : XML_Parse( getEntity().pParser ,
712 35485 : ( const char * ) seqOut.getArray() ,
713 : 0 ,
714 35485 : 1 );
715 35485 : break;
716 : }
717 :
718 34894 : bool bContinue = ( XML_Parse( getEntity().pParser ,
719 34894 : (const char *) seqOut.getArray(),
720 : nRead,
721 34894 : 0 ) != XML_STATUS_ERROR );
722 :
723 34894 : if( ! bContinue || this->bExceptionWasThrown ) {
724 :
725 8 : if ( this->bRTExceptionWasThrown )
726 0 : throw rtexception;
727 :
728 : // Error during parsing !
729 8 : XML_Error xmlE = XML_GetErrorCode( getEntity().pParser );
730 8 : OUString sSystemId = rDocumentLocator->getSystemId();
731 8 : sal_Int32 nLine = rDocumentLocator->getLineNumber();
732 :
733 : SAXParseException aExcept(
734 : getErrorMessage(xmlE , sSystemId, nLine) ,
735 : css::uno::Reference< css::uno::XInterface >(),
736 8 : css::uno::Any( &exception , getCppuType( &exception) ),
737 8 : rDocumentLocator->getPublicId(),
738 8 : rDocumentLocator->getSystemId(),
739 8 : rDocumentLocator->getLineNumber(),
740 8 : rDocumentLocator->getColumnNumber()
741 48 : );
742 :
743 8 : if( rErrorHandler.is() ) {
744 :
745 : // error handler is set, so the handler may throw the exception
746 0 : css::uno::Any a;
747 0 : a <<= aExcept;
748 0 : rErrorHandler->fatalError( a );
749 : }
750 :
751 : // Error handler has not thrown an exception, but parsing cannot go on,
752 : // so an exception MUST be thrown.
753 16 : throw aExcept;
754 : } // if( ! bContinue )
755 35493 : } // while
756 35485 : }
757 :
758 :
759 :
760 : // The C-Callbacks
761 :
762 :
763 635550 : void SaxExpatParser_Impl::callbackStartElement( void *pvThis ,
764 : const XML_Char *pwName ,
765 : const XML_Char **awAttributes )
766 : {
767 635550 : SaxExpatParser_Impl *pImpl = ((SaxExpatParser_Impl*)pvThis);
768 :
769 635550 : if( pImpl->rDocumentHandler.is() ) {
770 :
771 635208 : int i = 0;
772 635208 : pImpl->rAttrList->clear();
773 :
774 2566882 : while( awAttributes[i] ) {
775 : assert(awAttributes[i+1]);
776 : pImpl->rAttrList->addAttribute(
777 2592932 : XML_CHAR_TO_OUSTRING( awAttributes[i] ) ,
778 : pImpl->sCDATA, // expat doesn't know types
779 3889398 : XML_CHAR_TO_OUSTRING( awAttributes[i+1] ) );
780 1296466 : i +=2;
781 : }
782 :
783 635208 : CALL_ELEMENT_HANDLER_AND_CARE_FOR_EXCEPTIONS(
784 : pImpl ,
785 : rDocumentHandler->startElement( XML_CHAR_TO_OUSTRING( pwName ) ,
786 : pImpl->rAttrList.get() ) );
787 : }
788 635550 : }
789 :
790 635550 : void SaxExpatParser_Impl::callbackEndElement( void *pvThis , const XML_Char *pwName )
791 : {
792 635550 : SaxExpatParser_Impl *pImpl = ((SaxExpatParser_Impl*)pvThis);
793 :
794 635550 : if( pImpl->rDocumentHandler.is() ) {
795 635208 : CALL_ELEMENT_HANDLER_AND_CARE_FOR_EXCEPTIONS( pImpl,
796 : rDocumentHandler->endElement( XML_CHAR_TO_OUSTRING( pwName ) ) );
797 : }
798 635550 : }
799 :
800 :
801 554116 : void SaxExpatParser_Impl::callbackCharacters( void *pvThis , const XML_Char *s , int nLen )
802 : {
803 554116 : SaxExpatParser_Impl *pImpl = ((SaxExpatParser_Impl*)pvThis);
804 :
805 554116 : if( pImpl->rDocumentHandler.is() ) {
806 553946 : CALL_ELEMENT_HANDLER_AND_CARE_FOR_EXCEPTIONS( pImpl ,
807 : rDocumentHandler->characters( XML_CHAR_N_TO_USTRING(s,nLen) ) );
808 : }
809 554116 : }
810 :
811 0 : void SaxExpatParser_Impl::callbackProcessingInstruction( void *pvThis,
812 : const XML_Char *sTarget ,
813 : const XML_Char *sData )
814 : {
815 0 : SaxExpatParser_Impl *pImpl = ((SaxExpatParser_Impl*)pvThis);
816 0 : if( pImpl->rDocumentHandler.is() ) {
817 0 : CALL_ELEMENT_HANDLER_AND_CARE_FOR_EXCEPTIONS(
818 : pImpl ,
819 : rDocumentHandler->processingInstruction( XML_CHAR_TO_OUSTRING( sTarget ),
820 : XML_CHAR_TO_OUSTRING( sData ) ) );
821 : }
822 0 : }
823 :
824 :
825 0 : void SaxExpatParser_Impl::callbackEntityDecl(
826 : void *pvThis, const XML_Char *entityName,
827 : SAL_UNUSED_PARAMETER int /*is_parameter_entity*/,
828 : const XML_Char *value, SAL_UNUSED_PARAMETER int /*value_length*/,
829 : SAL_UNUSED_PARAMETER const XML_Char * /*base*/, const XML_Char *systemId,
830 : const XML_Char *publicId, const XML_Char *notationName)
831 : {
832 0 : SaxExpatParser_Impl *pImpl = ((SaxExpatParser_Impl*)pvThis);
833 0 : if (value) { // value != 0 means internal entity
834 : SAL_INFO("sax","SaxExpatParser: internal entity declaration, stopping");
835 0 : XML_StopParser(pImpl->getEntity().pParser, XML_FALSE);
836 0 : pImpl->exception = SAXParseException(
837 : "SaxExpatParser: internal entity declaration, stopping",
838 : 0, css::uno::Any(),
839 0 : pImpl->rDocumentLocator->getPublicId(),
840 0 : pImpl->rDocumentLocator->getSystemId(),
841 0 : pImpl->rDocumentLocator->getLineNumber(),
842 0 : pImpl->rDocumentLocator->getColumnNumber() );
843 0 : pImpl->bExceptionWasThrown = true;
844 : } else {
845 0 : if( pImpl->rDTDHandler.is() ) {
846 0 : CALL_ELEMENT_HANDLER_AND_CARE_FOR_EXCEPTIONS(
847 : pImpl ,
848 : rDTDHandler->unparsedEntityDecl(
849 : XML_CHAR_TO_OUSTRING( entityName ),
850 : XML_CHAR_TO_OUSTRING( publicId ) ,
851 : XML_CHAR_TO_OUSTRING( systemId ) ,
852 : XML_CHAR_TO_OUSTRING( notationName ) ) );
853 : }
854 : }
855 0 : }
856 :
857 0 : void SaxExpatParser_Impl::callbackNotationDecl(
858 : void *pvThis, const XML_Char *notationName,
859 : SAL_UNUSED_PARAMETER const XML_Char * /*base*/, const XML_Char *systemId,
860 : const XML_Char *publicId)
861 : {
862 0 : SaxExpatParser_Impl *pImpl = ((SaxExpatParser_Impl*)pvThis);
863 0 : if( pImpl->rDTDHandler.is() ) {
864 0 : CALL_ELEMENT_HANDLER_AND_CARE_FOR_EXCEPTIONS( pImpl,
865 : rDTDHandler->notationDecl( XML_CHAR_TO_OUSTRING( notationName ) ,
866 : XML_CHAR_TO_OUSTRING( publicId ) ,
867 : XML_CHAR_TO_OUSTRING( systemId ) ) );
868 : }
869 :
870 0 : }
871 :
872 :
873 :
874 0 : bool SaxExpatParser_Impl::callbackExternalEntityRef(
875 : XML_Parser parser, const XML_Char *context,
876 : SAL_UNUSED_PARAMETER const XML_Char * /*base*/, const XML_Char *systemId,
877 : const XML_Char *publicId)
878 : {
879 0 : bool bOK = true;
880 0 : InputSource source;
881 0 : SaxExpatParser_Impl *pImpl = ((SaxExpatParser_Impl*)XML_GetUserData( parser ));
882 :
883 0 : struct Entity entity;
884 :
885 0 : if( pImpl->rEntityResolver.is() ) {
886 : try
887 : {
888 0 : entity.structSource = pImpl->rEntityResolver->resolveEntity(
889 0 : XML_CHAR_TO_OUSTRING( publicId ) ,
890 0 : XML_CHAR_TO_OUSTRING( systemId ) );
891 : }
892 0 : catch( const SAXParseException & e )
893 : {
894 0 : pImpl->exception = e;
895 0 : bOK = false;
896 : }
897 0 : catch( const SAXException & e )
898 : {
899 0 : pImpl->exception = SAXParseException(
900 : e.Message , e.Context , e.WrappedException ,
901 0 : pImpl->rDocumentLocator->getPublicId(),
902 0 : pImpl->rDocumentLocator->getSystemId(),
903 0 : pImpl->rDocumentLocator->getLineNumber(),
904 0 : pImpl->rDocumentLocator->getColumnNumber() );
905 0 : bOK = false;
906 : }
907 : }
908 :
909 0 : if( entity.structSource.aInputStream.is() ) {
910 0 : entity.pParser = XML_ExternalEntityParserCreate( parser , context, 0 );
911 0 : if( ! entity.pParser )
912 : {
913 0 : return false;
914 : }
915 :
916 0 : entity.converter.setInputStream( entity.structSource.aInputStream );
917 0 : pImpl->pushEntity( entity );
918 : try
919 : {
920 0 : pImpl->parse();
921 : }
922 0 : catch( const SAXParseException & e )
923 : {
924 0 : pImpl->exception = e;
925 0 : bOK = false;
926 : }
927 0 : catch( const IOException &e )
928 : {
929 0 : pImpl->exception.WrappedException <<= e;
930 0 : bOK = false;
931 : }
932 0 : catch( const css::uno::RuntimeException &e )
933 : {
934 0 : pImpl->exception.WrappedException <<=e;
935 0 : bOK = false;
936 : }
937 :
938 0 : pImpl->popEntity();
939 :
940 0 : XML_ParserFree( entity.pParser );
941 : }
942 :
943 0 : return bOK;
944 : }
945 :
946 0 : int SaxExpatParser_Impl::callbackUnknownEncoding(
947 : SAL_UNUSED_PARAMETER void * /*encodingHandlerData*/,
948 : SAL_UNUSED_PARAMETER const XML_Char * /*name*/,
949 : SAL_UNUSED_PARAMETER XML_Encoding * /*info*/)
950 : {
951 0 : return 0;
952 : }
953 :
954 10889 : void SaxExpatParser_Impl::callbackDefault( void *pvThis, const XML_Char *s, int len)
955 : {
956 10889 : SaxExpatParser_Impl *pImpl = ((SaxExpatParser_Impl*)pvThis);
957 :
958 10889 : CALL_ELEMENT_HANDLER_AND_CARE_FOR_EXCEPTIONS( pImpl,
959 : rExtendedDocumentHandler->unknown( XML_CHAR_N_TO_USTRING( s ,len) ) );
960 10889 : }
961 :
962 1545 : void SaxExpatParser_Impl::callbackComment( void *pvThis , const XML_Char *s )
963 : {
964 1545 : SaxExpatParser_Impl *pImpl = ((SaxExpatParser_Impl*)pvThis);
965 1545 : CALL_ELEMENT_HANDLER_AND_CARE_FOR_EXCEPTIONS( pImpl,
966 : rExtendedDocumentHandler->comment( XML_CHAR_TO_OUSTRING( s ) ) );
967 1545 : }
968 :
969 0 : void SaxExpatParser_Impl::callbackStartCDATA( void *pvThis )
970 : {
971 0 : SaxExpatParser_Impl *pImpl = ((SaxExpatParser_Impl*)pvThis);
972 :
973 0 : CALL_ELEMENT_HANDLER_AND_CARE_FOR_EXCEPTIONS( pImpl, rExtendedDocumentHandler->startCDATA() );
974 0 : }
975 :
976 :
977 0 : void SaxExpatParser_Impl::callErrorHandler( SaxExpatParser_Impl *pImpl ,
978 : const SAXParseException & e )
979 : {
980 : try
981 : {
982 0 : if( pImpl->rErrorHandler.is() ) {
983 0 : css::uno::Any a;
984 0 : a <<= e;
985 0 : pImpl->rErrorHandler->error( a );
986 : }
987 : else {
988 0 : pImpl->exception = e;
989 0 : pImpl->bExceptionWasThrown = true;
990 : }
991 : }
992 0 : catch( const SAXParseException & ex ) {
993 0 : pImpl->exception = ex;
994 0 : pImpl->bExceptionWasThrown = true;
995 : }
996 0 : catch( const SAXException & ex ) {
997 0 : pImpl->exception = SAXParseException(
998 : ex.Message,
999 : ex.Context,
1000 : ex.WrappedException,
1001 0 : pImpl->rDocumentLocator->getPublicId(),
1002 0 : pImpl->rDocumentLocator->getSystemId(),
1003 0 : pImpl->rDocumentLocator->getLineNumber(),
1004 0 : pImpl->rDocumentLocator->getColumnNumber()
1005 0 : );
1006 0 : pImpl->bExceptionWasThrown = true;
1007 : }
1008 0 : }
1009 :
1010 0 : void SaxExpatParser_Impl::callbackEndCDATA( void *pvThis )
1011 : {
1012 0 : SaxExpatParser_Impl *pImpl = ((SaxExpatParser_Impl*)pvThis);
1013 :
1014 0 : CALL_ELEMENT_HANDLER_AND_CARE_FOR_EXCEPTIONS(pImpl,rExtendedDocumentHandler->endCDATA() );
1015 0 : }
1016 :
1017 : } // namespace
1018 :
1019 : extern "C" SAL_DLLPUBLIC_EXPORT css::uno::XInterface * SAL_CALL
1020 44703 : com_sun_star_comp_extensions_xml_sax_ParserExpat_get_implementation(
1021 : css::uno::XComponentContext *,
1022 : css::uno::Sequence<css::uno::Any> const &)
1023 : {
1024 44703 : return cppu::acquire(new SaxExpatParser);
1025 : }
1026 :
1027 : /* vim:set shiftwidth=4 softtabstop=4 expandtab: */
|