Branch data Line data Source code
1 : : /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 : : /*
3 : : * This file is part of the LibreOffice project.
4 : : *
5 : : * This Source Code Form is subject to the terms of the Mozilla Public
6 : : * License, v. 2.0. If a copy of the MPL was not distributed with this
7 : : * file, You can obtain one at http://mozilla.org/MPL/2.0/.
8 : : *
9 : : * This file incorporates work covered by the following license notice:
10 : : *
11 : : * Licensed to the Apache Software Foundation (ASF) under one or more
12 : : * contributor license agreements. See the NOTICE file distributed
13 : : * with this work for additional information regarding copyright
14 : : * ownership. The ASF licenses this file to you under the Apache
15 : : * License, Version 2.0 (the "License"); you may not use this file
16 : : * except in compliance with the License. You may obtain a copy of
17 : : * the License at http://www.apache.org/licenses/LICENSE-2.0 .
18 : : */
19 : :
20 : : #include "oox/vml/vmlinputstream.hxx"
21 : :
22 : : #include <com/sun/star/io/XTextInputStream.hpp>
23 : : #include <map>
24 : : #include <string.h>
25 : : #include <rtl/strbuf.hxx>
26 : : #include "oox/helper/helper.hxx"
27 : : #include "oox/helper/textinputstream.hxx"
28 : :
29 : : namespace oox {
30 : : namespace vml {
31 : :
32 : : // ============================================================================
33 : :
34 : : using namespace ::com::sun::star::io;
35 : : using namespace ::com::sun::star::uno;
36 : :
37 : : using ::rtl::OString;
38 : : using ::rtl::OStringBuffer;
39 : :
40 : : // ============================================================================
41 : :
42 : : namespace {
43 : :
44 : 24 : inline const sal_Char* lclFindCharacter( const sal_Char* pcBeg, const sal_Char* pcEnd, sal_Char cChar )
45 : : {
46 : 24 : sal_Int32 nIndex = rtl_str_indexOfChar_WithLength( pcBeg, static_cast< sal_Int32 >( pcEnd - pcBeg ), cChar );
47 [ + - ]: 24 : return (nIndex < 0) ? pcEnd : (pcBeg + nIndex);
48 : : }
49 : :
50 : 5517 : inline bool lclIsWhiteSpace( sal_Char cChar )
51 : : {
52 : 5517 : return cChar < 32;
53 : : }
54 : :
55 : 225 : const sal_Char* lclFindWhiteSpace( const sal_Char* pcBeg, const sal_Char* pcEnd )
56 : : {
57 [ + + ]: 5424 : for( ; pcBeg < pcEnd; ++pcBeg )
58 [ + + ]: 5211 : if( lclIsWhiteSpace( *pcBeg ) )
59 : 12 : return pcBeg;
60 : 225 : return pcEnd;
61 : : }
62 : :
63 : 435 : const sal_Char* lclFindNonWhiteSpace( const sal_Char* pcBeg, const sal_Char* pcEnd )
64 : : {
65 [ + + ]: 600 : for( ; pcBeg < pcEnd; ++pcBeg )
66 [ + + ]: 288 : if( !lclIsWhiteSpace( *pcBeg ) )
67 : 123 : return pcBeg;
68 : 435 : return pcEnd;
69 : : }
70 : :
71 : 12 : const sal_Char* lclTrimWhiteSpaceFromEnd( const sal_Char* pcBeg, const sal_Char* pcEnd )
72 : : {
73 [ + - ][ - + ]: 12 : while( (pcBeg < pcEnd) && lclIsWhiteSpace( pcEnd[ -1 ] ) )
[ - + ]
74 : 0 : --pcEnd;
75 : 12 : return pcEnd;
76 : : }
77 : :
78 : 228 : inline void lclAppendToBuffer( OStringBuffer& rBuffer, const sal_Char* pcBeg, const sal_Char* pcEnd )
79 : : {
80 : 228 : rBuffer.append( pcBeg, static_cast< sal_Int32 >( pcEnd - pcBeg ) );
81 : 228 : }
82 : :
83 : : // ----------------------------------------------------------------------------
84 : :
85 : 9 : void lclProcessAttribs( OStringBuffer& rBuffer, const sal_Char* pcBeg, const sal_Char* pcEnd )
86 : : {
87 : : /* Map attribute names to char-pointer of all attributes. This map is used
88 : : to find multiple occurrences of attributes with the same name. The
89 : : mapped pointers are used as map key in the next map below. */
90 : : typedef ::std::map< OString, const sal_Char* > AttributeNameMap;
91 [ + - ]: 9 : AttributeNameMap aAttributeNames;
92 : :
93 : : /* Map the char-pointers of all attributes to the full attribute definition
94 : : string. This preserves the original order of the used attributes. */
95 : : typedef ::std::map< const sal_Char*, OString > AttributeDataMap;
96 [ + - ]: 9 : AttributeDataMap aAttributes;
97 : :
98 : 9 : bool bOk = true;
99 : 9 : const sal_Char* pcNameBeg = pcBeg;
100 [ + + ][ + + ]: 21 : while( bOk && (pcNameBeg < pcEnd) )
[ + + ]
101 : : {
102 : : // pcNameBeg points to begin of attribute name, find equality sign
103 : 12 : const sal_Char* pcEqualSign = lclFindCharacter( pcNameBeg, pcEnd, '=' );
104 [ + - ]: 12 : if( (bOk = pcEqualSign < pcEnd) == true )
105 : : {
106 : : // find end of attribute name (ignore whitespace between name and equality sign)
107 : 12 : const sal_Char* pcNameEnd = lclTrimWhiteSpaceFromEnd( pcNameBeg, pcEqualSign );
108 [ + - ]: 12 : if( (bOk = pcNameBeg < pcNameEnd) == true )
109 : : {
110 : : // find begin of attribute value (must be single or double quote)
111 : 12 : const sal_Char* pcValueBeg = lclFindNonWhiteSpace( pcEqualSign + 1, pcEnd );
112 [ + - ][ + - ]: 12 : if( (bOk = (pcValueBeg < pcEnd) && ((*pcValueBeg == '\'') || (*pcValueBeg == '"'))) == true )
[ + - ][ + - ]
113 : : {
114 : : // find end of attribute value (matching quote character)
115 : 12 : const sal_Char* pcValueEnd = lclFindCharacter( pcValueBeg + 1, pcEnd, *pcValueBeg );
116 [ + - ]: 12 : if( (bOk = pcValueEnd < pcEnd) == true )
117 : : {
118 : 12 : ++pcValueEnd;
119 : 12 : OString aAttribName( pcNameBeg, static_cast< sal_Int32 >( pcNameEnd - pcNameBeg ) );
120 : 12 : OString aAttribData( pcNameBeg, static_cast< sal_Int32 >( pcValueEnd - pcNameBeg ) );
121 : : // search for an existing attribute with the same name
122 [ + - ]: 12 : AttributeNameMap::iterator aIt = aAttributeNames.find( aAttribName );
123 : : // remove its definition from the data map
124 [ - + ]: 12 : if( aIt != aAttributeNames.end() )
125 [ # # ]: 0 : aAttributes.erase( aIt->second );
126 : : // insert the attribute into both maps
127 [ + - ]: 12 : aAttributeNames[ aAttribName ] = pcNameBeg;
128 [ + - ]: 12 : aAttributes[ pcNameBeg ] = aAttribData;
129 : : // continue with next attribute (skip whitespace after this attribute)
130 : 12 : pcNameBeg = pcValueEnd;
131 [ + + ][ + + ]: 12 : if( (pcNameBeg < pcEnd) && ((bOk = lclIsWhiteSpace( *pcNameBeg )) == true) )
[ + + ]
132 : 12 : pcNameBeg = lclFindNonWhiteSpace( pcNameBeg + 1, pcEnd );
133 : : }
134 : : }
135 : : }
136 : : }
137 : : }
138 : :
139 : : // if no error has occurred, build the resulting attribute list
140 [ + + ]: 9 : if( bOk )
141 [ + + ]: 15 : for( AttributeDataMap::iterator aIt = aAttributes.begin(), aEnd = aAttributes.end(); aIt != aEnd; ++aIt )
142 [ + - ][ + - ]: 9 : rBuffer.append( ' ' ).append( aIt->second );
143 : : // on error, just append the complete passed string
144 : : else
145 [ + - ]: 9 : lclAppendToBuffer( rBuffer, pcBeg, pcEnd );
146 : 9 : }
147 : :
148 : 195 : void lclProcessElement( OStringBuffer& rBuffer, const OString& rElement )
149 : : {
150 : : // check that passed string starts and ends with the brackets of an XML element
151 : 195 : sal_Int32 nElementLen = rElement.getLength();
152 [ - + ]: 195 : if( nElementLen == 0 )
153 : 195 : return;
154 : :
155 : 195 : const sal_Char* pcOpen = rElement.getStr();
156 : 195 : const sal_Char* pcClose = pcOpen + nElementLen - 1;
157 : :
158 : : // no complete element found
159 [ + - ][ - + ]: 195 : if( (pcOpen >= pcClose) || (*pcOpen != '<') || (*pcClose != '>') )
[ + - ]
160 : : {
161 : : // just append all passed characters
162 : 0 : rBuffer.append( rElement );
163 : : }
164 : :
165 : : // skip parser instructions: '<![...]>'
166 [ + - ][ - + ]: 195 : else if( (nElementLen >= 5) && (pcOpen[ 1 ] == '!') && (pcOpen[ 2 ] == '[') && (pcClose[ -1 ] == ']') )
[ # # ][ # # ]
167 : : {
168 : : // do nothing
169 : : }
170 : :
171 : : // replace '<br>' element with newline
172 [ + - ][ - + ]: 195 : else if( (nElementLen >= 4) && (pcOpen[ 1 ] == 'b') && (pcOpen[ 2 ] == 'r') && (lclFindNonWhiteSpace( pcOpen + 3, pcClose ) == pcClose) )
[ # # ][ # # ]
[ - + ]
173 : : {
174 : 0 : rBuffer.append( '\n' );
175 : : }
176 : :
177 : : // check start elements and simple elements for repeated attributes
178 [ + + ]: 195 : else if( pcOpen[ 1 ] != '/' )
179 : : {
180 : : // find positions of text content inside brackets, exclude '/' in '<simpleelement/>'
181 : 126 : const sal_Char* pcContentBeg = pcOpen + 1;
182 : 126 : bool bIsEmptyElement = pcClose[ -1 ] == '/';
183 [ + + ]: 126 : const sal_Char* pcContentEnd = bIsEmptyElement ? (pcClose - 1) : pcClose;
184 : : // append opening bracket and element name to buffer
185 : 126 : const sal_Char* pcWhiteSpace = lclFindWhiteSpace( pcContentBeg, pcContentEnd );
186 : 126 : lclAppendToBuffer( rBuffer, pcOpen, pcWhiteSpace );
187 : : // find begin of attributes, and process all attributes
188 : 126 : const sal_Char* pcAttribBeg = lclFindNonWhiteSpace( pcWhiteSpace, pcContentEnd );
189 [ + + ]: 126 : if( pcAttribBeg < pcContentEnd )
190 : 9 : lclProcessAttribs( rBuffer, pcAttribBeg, pcContentEnd );
191 : : // close the element
192 [ + + ]: 126 : if( bIsEmptyElement )
193 : 57 : rBuffer.append( '/' );
194 : 126 : rBuffer.append( '>' );
195 : : }
196 : :
197 : : // append end elements without further processing
198 : : else
199 : : {
200 : 69 : rBuffer.append( rElement );
201 : : }
202 : : }
203 : :
204 : 195 : bool lclProcessCharacters( OStringBuffer& rBuffer, const OString& rChars )
205 : : {
206 : : /* MSO has a very weird way to store and handle whitespaces. The stream
207 : : may contain lots of spaces, tabs, and newlines which have to be handled
208 : : as single space character. This will be done in this function.
209 : :
210 : : If the element text contains a literal line break, it will be stored as
211 : : <br> tag (without matching </br> element). This input stream wrapper
212 : : will replace this element with a literal LF character (see below).
213 : :
214 : : A single space character for its own is stored as is. Example: The
215 : : element
216 : : <font> </font>
217 : : represents a single space character. The XML parser will ignore this
218 : : space character completely without issuing a 'characters' event. The
219 : : VML import filter implementation has to react on this case manually.
220 : :
221 : : A single space character following another character is stored
222 : : literally and must not be stipped away here. Example: The element
223 : : <font>abc </font>
224 : : contains the three letters a, b, and c, followed by a space character.
225 : :
226 : : Consecutive space characters, or a leading single space character, are
227 : : stored in a <span> element. If there are N space characters (N > 1),
228 : : then the <span> element contains exactly (N-1) NBSP (non-breaking
229 : : space) characters, followed by a regular space character. Examples:
230 : : The element
231 : : <font><span style='mso-spacerun:yes'>\xA0\xA0\xA0 </span></font>
232 : : represents 4 consecutive space characters. Has to be handled by the
233 : : implementation. The element
234 : : <font><span style='mso-spacerun:yes'> abc</span></font>
235 : : represents a space characters followed by the letters a, b, c. These
236 : : strings have to be handled by the VML import filter implementation.
237 : : */
238 : :
239 : : // passed string ends with the leading opening bracket of an XML element
240 : 195 : const sal_Char* pcBeg = rChars.getStr();
241 : 195 : const sal_Char* pcEnd = pcBeg + rChars.getLength();
242 [ + - ][ + - ]: 195 : bool bHasBracket = (pcBeg < pcEnd) && (pcEnd[ -1 ] == '<');
243 [ + - ]: 195 : if( bHasBracket ) --pcEnd;
244 : :
245 : : // skip leading whitespace
246 : 195 : const sal_Char* pcContentsBeg = lclFindNonWhiteSpace( pcBeg, pcEnd );
247 [ + + ]: 294 : while( pcContentsBeg < pcEnd )
248 : : {
249 : 99 : const sal_Char* pcWhitespaceBeg = lclFindWhiteSpace( pcContentsBeg + 1, pcEnd );
250 : 99 : lclAppendToBuffer( rBuffer, pcContentsBeg, pcWhitespaceBeg );
251 [ + + ]: 99 : if( pcWhitespaceBeg < pcEnd )
252 : 3 : rBuffer.append( ' ' );
253 : 99 : pcContentsBeg = lclFindNonWhiteSpace( pcWhitespaceBeg, pcEnd );
254 : : }
255 : :
256 : 195 : return bHasBracket;
257 : : }
258 : :
259 : : } // namespace
260 : :
261 : : // ============================================================================
262 : :
263 : 6 : InputStream::InputStream( const Reference< XComponentContext >& rxContext, const Reference< XInputStream >& rxInStrm ) :
264 : : // use single-byte ISO-8859-1 encoding which maps all byte characters to the first 256 Unicode characters
265 : : mxTextStrm( TextInputStream::createXTextInputStream( rxContext, rxInStrm, RTL_TEXTENCODING_ISO_8859_1 ) ),
266 : : maOpeningBracket( 1 ),
267 : : maClosingBracket( 1 ),
268 : : maOpeningCData( CREATE_OSTRING( "<![CDATA[" ) ),
269 : : maClosingCData( CREATE_OSTRING( "]]>" ) ),
270 [ + - ][ + - ]: 6 : mnBufferPos( 0 )
[ + - ]
271 : : {
272 [ + - ]: 6 : maOpeningBracket[ 0 ] = '<';
273 [ + - ]: 6 : maClosingBracket[ 0 ] = '>';
274 : 6 : }
275 : :
276 [ + - ][ + - ]: 6 : InputStream::~InputStream()
277 : : {
278 [ - + ]: 12 : }
279 : :
280 : 12 : sal_Int32 SAL_CALL InputStream::readBytes( Sequence< sal_Int8 >& rData, sal_Int32 nBytesToRead )
281 : : throw (NotConnectedException, BufferSizeExceededException, IOException, RuntimeException)
282 : : {
283 [ - + ]: 12 : if( nBytesToRead < 0 )
284 [ # # ]: 0 : throw IOException();
285 : :
286 : 12 : rData.realloc( nBytesToRead );
287 : 12 : sal_Int8* pcDest = rData.getArray();
288 : 12 : sal_Int32 nRet = 0;
289 [ + - ][ + + ]: 207 : while( (nBytesToRead > 0) && !mxTextStrm->isEOF() )
[ + + ]
290 : : {
291 : 195 : updateBuffer();
292 [ + - ]: 195 : sal_Int32 nReadSize = ::std::min( nBytesToRead, maBuffer.getLength() - mnBufferPos );
293 [ + - ]: 195 : if( nReadSize > 0 )
294 : : {
295 : 195 : memcpy( pcDest + nRet, maBuffer.getStr() + mnBufferPos, static_cast< size_t >( nReadSize ) );
296 : 195 : mnBufferPos += nReadSize;
297 : 195 : nBytesToRead -= nReadSize;
298 : 195 : nRet += nReadSize;
299 : : }
300 : : }
301 [ + - ]: 12 : if( nRet < rData.getLength() )
302 : 12 : rData.realloc( nRet );
303 : 12 : return nRet;
304 : : }
305 : :
306 : 12 : sal_Int32 SAL_CALL InputStream::readSomeBytes( Sequence< sal_Int8 >& rData, sal_Int32 nMaxBytesToRead )
307 : : throw (NotConnectedException, BufferSizeExceededException, IOException, RuntimeException)
308 : : {
309 : 12 : return readBytes( rData, nMaxBytesToRead );
310 : : }
311 : :
312 : 0 : void SAL_CALL InputStream::skipBytes( sal_Int32 nBytesToSkip )
313 : : throw (NotConnectedException, BufferSizeExceededException, IOException, RuntimeException)
314 : : {
315 [ # # ]: 0 : if( nBytesToSkip < 0 )
316 [ # # ]: 0 : throw IOException();
317 : :
318 [ # # ][ # # ]: 0 : while( (nBytesToSkip > 0) && !mxTextStrm->isEOF() )
[ # # ]
319 : : {
320 : 0 : updateBuffer();
321 [ # # ]: 0 : sal_Int32 nSkipSize = ::std::min( nBytesToSkip, maBuffer.getLength() - mnBufferPos );
322 : 0 : mnBufferPos += nSkipSize;
323 : 0 : nBytesToSkip -= nSkipSize;
324 : : }
325 : 0 : }
326 : :
327 : 0 : sal_Int32 SAL_CALL InputStream::available() throw (NotConnectedException, IOException, RuntimeException)
328 : : {
329 : 0 : updateBuffer();
330 : 0 : return maBuffer.getLength() - mnBufferPos;
331 : : }
332 : :
333 : 0 : void SAL_CALL InputStream::closeInput() throw (NotConnectedException, IOException, RuntimeException)
334 : : {
335 : 0 : mxTextStrm->closeInput();
336 : 0 : }
337 : :
338 : : // private --------------------------------------------------------------------
339 : :
340 : 195 : void InputStream::updateBuffer() throw (IOException, RuntimeException)
341 : : {
342 [ + + ][ + - ]: 390 : while( (mnBufferPos >= maBuffer.getLength()) && !mxTextStrm->isEOF() )
[ + + ]
343 : : {
344 : : // collect new contents in a string buffer
345 : 195 : OStringBuffer aBuffer;
346 : :
347 : : // read and process characters until the opening bracket of the next XML element
348 [ + - ]: 195 : OString aChars = readToElementBegin();
349 [ + - ]: 195 : bool bHasOpeningBracket = lclProcessCharacters( aBuffer, aChars );
350 : :
351 : : // read and process characters until (and including) closing bracket (an XML element)
352 : : OSL_ENSURE( bHasOpeningBracket || mxTextStrm->isEOF(), "InputStream::updateBuffer - missing opening bracket of XML element" );
353 [ + - ][ + - ]: 195 : if( bHasOpeningBracket && !mxTextStrm->isEOF() )
[ + - ][ + - ]
[ + - ]
354 : : {
355 : : // read the element text (add the leading opening bracket manually)
356 [ + - ]: 195 : OString aElement = OString( '<' ) + readToElementEnd();
357 : : // check for CDATA part, starting with '<![CDATA['
358 [ - + ]: 195 : if( aElement.match( maOpeningCData ) )
359 : : {
360 : : // search the end tag ']]>'
361 [ # # ][ # # ]: 0 : while( ((aElement.getLength() < maClosingCData.getLength()) || !aElement.match( maClosingCData, aElement.getLength() - maClosingCData.getLength() )) && !mxTextStrm->isEOF() )
[ # # ][ # # ]
[ # # ][ # # ]
362 [ # # ]: 0 : aElement += readToElementEnd();
363 : : // copy the entire CDATA part
364 [ # # ]: 0 : aBuffer.append( aElement );
365 : : }
366 : : else
367 : : {
368 : : // no CDATA part - process the contents of the element
369 [ + - ]: 195 : lclProcessElement( aBuffer, aElement );
370 : 195 : }
371 : : }
372 : :
373 : 195 : maBuffer = aBuffer.makeStringAndClear();
374 : 195 : mnBufferPos = 0;
375 : 195 : }
376 : 195 : }
377 : :
378 : 195 : OString InputStream::readToElementBegin() throw (IOException, RuntimeException)
379 : : {
380 [ + - ]: 195 : return OUStringToOString( mxTextStrm->readString( maOpeningBracket, sal_False ), RTL_TEXTENCODING_ISO_8859_1 );
381 : : }
382 : :
383 : 195 : OString InputStream::readToElementEnd() throw (IOException, RuntimeException)
384 : : {
385 [ + - ]: 195 : OString aText = OUStringToOString( mxTextStrm->readString( maClosingBracket, sal_False ), RTL_TEXTENCODING_ISO_8859_1 );
386 : : OSL_ENSURE( !aText.isEmpty() && (aText[ aText.getLength() - 1 ] == '>'), "InputStream::readToElementEnd - missing closing bracket of XML element" );
387 : 195 : return aText;
388 : : }
389 : :
390 : : // ============================================================================
391 : :
392 : : } // namespace vml
393 : : } // namespave oox
394 : :
395 : : /* vim:set shiftwidth=4 softtabstop=4 expandtab: */
|