Line data Source code
1 : /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 : /*
3 : * This file is part of the LibreOffice project.
4 : *
5 : * This Source Code Form is subject to the terms of the Mozilla Public
6 : * License, v. 2.0. If a copy of the MPL was not distributed with this
7 : * file, You can obtain one at http://mozilla.org/MPL/2.0/.
8 : *
9 : * This file incorporates work covered by the following license notice:
10 : *
11 : * Licensed to the Apache Software Foundation (ASF) under one or more
12 : * contributor license agreements. See the NOTICE file distributed
13 : * with this work for additional information regarding copyright
14 : * ownership. The ASF licenses this file to you under the Apache
15 : * License, Version 2.0 (the "License"); you may not use this file
16 : * except in compliance with the License. You may obtain a copy of
17 : * the License at http://www.apache.org/licenses/LICENSE-2.0 .
18 : */
19 :
20 : #include <com/sun/star/io/XInputStream.hpp>
21 : #include <com/sun/star/lang/XMultiServiceFactory.hpp>
22 : #include <com/sun/star/xml/sax/XAttributeList.hpp>
23 : #include <com/sun/star/xml/sax/XDocumentHandler.hpp>
24 : #include <com/sun/star/xml/sax/XParser.hpp>
25 : #include <com/sun/star/ucb/XCommandEnvironment.hpp>
26 : #include <com/sun/star/uno/Reference.hxx>
27 : #include <comphelper/processfactory.hxx>
28 : #include <cppuhelper/supportsservice.hxx>
29 : #include <osl/diagnose.h>
30 : #include <rtl/tencinfo.h>
31 : #include <sal/macros.h>
32 : #include <tools/stream.hxx>
33 : #include <vector>
34 : #include <ucbhelper/content.hxx>
35 : #include <xmloff/attrlist.hxx>
36 :
37 : #include "LotusWordProImportFilter.hxx"
38 : #include "lwpfilter.hxx"
39 :
40 : using namespace com::sun::star;
41 : using com::sun::star::uno::Sequence;
42 : using com::sun::star::lang::XComponent;
43 : using com::sun::star::uno::Any;
44 : using com::sun::star::uno::UNO_QUERY;
45 : using com::sun::star::uno::XInterface;
46 : using com::sun::star::uno::Exception;
47 : using com::sun::star::uno::RuntimeException;
48 : using com::sun::star::io::XInputStream;
49 : using com::sun::star::lang::XMultiServiceFactory;
50 : using com::sun::star::beans::PropertyValue;
51 : using com::sun::star::document::XFilter;
52 : using com::sun::star::document::XExtendedFilterDetection;
53 : using com::sun::star::ucb::XCommandEnvironment;
54 :
55 : using com::sun::star::document::XImporter;
56 : using com::sun::star::xml::sax::XAttributeList;
57 : using com::sun::star::xml::sax::XDocumentHandler;
58 : using com::sun::star::xml::sax::XParser;
59 :
60 : // W o r d P r o
61 : static const sal_Int8 header[] = { 0x57, 0x6f, 0x72, 0x64, 0x50, 0x72, 0x6f };
62 :
63 : const sal_Int32 MAXCHARS = 65534;
64 :
65 : // Simple xml importer, currently the importer is very very simple
66 : // it only extracts pure text from the wordpro file. Absolutely no formatting
67 : // information is currently imported.
68 : // To reflect the current state of this importer the sax events sent
69 : // to the document handler are also the simplest possible. In addition to
70 : // the basic attributes set up for the 'office:document' element
71 : // all the imported text is inserted into 'text:p' elements.
72 : // The parser extracts the pure text and creates simple a simple 'text:p'
73 : // element to contain that text. In the event of the text exceeding
74 : // MAXCHARS new 'text:p' elements are created as needed
75 : class SimpleXMLImporter
76 : {
77 : private:
78 :
79 : uno::Reference< XDocumentHandler > m_xDocHandler;
80 : std::vector< OUString > m_vStringChunks;
81 : SvStream& m_InputStream;
82 :
83 : bool CheckValidData( sal_Int8 nChar ) const
84 : {
85 : if( ( nChar >= 0x20 && nChar <= 0x7E ) && ( nChar != 0X40 ) )
86 : return true;
87 : return false;
88 : }
89 :
90 : void addAttribute( SvXMLAttributeList* pAttrList, const char* key, const char* val )
91 : {
92 : pAttrList->AddAttribute( OUString::createFromAscii( key ), OUString::createFromAscii( val ) );
93 : }
94 :
95 : void writeTextChunk( const OUString& sChunk )
96 : {
97 : SvXMLAttributeList *pAttrList = new SvXMLAttributeList();
98 : uno::Reference < XAttributeList > xAttrList(pAttrList);
99 :
100 : pAttrList->AddAttribute( "text:style-name", "Standard" );
101 :
102 : m_xDocHandler->startElement( "text:p", xAttrList );
103 : m_xDocHandler->characters( sChunk );
104 : m_xDocHandler->endElement( "text:p" );
105 : }
106 :
107 : void writeDocContentPreamble()
108 : {
109 : SvXMLAttributeList *pDocContentPropList = new SvXMLAttributeList();
110 : uno::Reference < XAttributeList > xDocContentList(pDocContentPropList);
111 : addAttribute( pDocContentPropList, "xmlns:office", "urn:oasis:names:tc:opendocument:xmlns:office:1.0" );
112 : addAttribute( pDocContentPropList, "xmlns:style", "urn:oasis:names:tc:opendocument:xmlns:style:1.0");
113 : addAttribute( pDocContentPropList, "xmlns:text", "urn:oasis:names:tc:opendocument:xmlns:text:1.0" );
114 : addAttribute( pDocContentPropList, "xmlns:table", "urn:oasis:names:tc:opendocument:xmlns:table:1.0" );
115 : addAttribute( pDocContentPropList, "xmlns:draw", "urn:oasis:names:tc:opendocument:xmlns:drawing:1.0" );
116 : addAttribute( pDocContentPropList, "xmlns:fo", "urn:oasis:names:tc:opendocument:xmlns:xsl-fo-compatible:1.0" );
117 : addAttribute( pDocContentPropList, "xmlns:xlink", "http://www.w3.org/1999/xlink" );
118 : addAttribute( pDocContentPropList, "xmlns:dc", "http://purl.org/dc/elements/1.1/" );
119 : addAttribute( pDocContentPropList, "xmlns:meta", "urn:oasis:names:tc:opendocument:xmlns:meta:1.0" );
120 : addAttribute( pDocContentPropList, "xmlns:number", "urn:oasis:names:tc:opendocument:xmlns:datastyle:1.0" );
121 : addAttribute( pDocContentPropList, "xmlns:svg", "urn:oasis:names:tc:opendocument:xmlns:svg-compatible:1.0" );
122 : addAttribute( pDocContentPropList, "xmlns:chart", "urn:oasis:names:tc:opendocument:xmlns:chart:1.0" );
123 : addAttribute( pDocContentPropList, "xmlns:dr3d", "urn:oasis:names:tc:opendocument:xmlns:dr3d:1.0" );
124 : addAttribute( pDocContentPropList, "xmlns:math", "http://www.w3.org/1998/Math/MathML" );
125 : addAttribute( pDocContentPropList, "xmlns:form", "urn:oasis:names:tc:opendocument:xmlns:form:1.0" );
126 : addAttribute( pDocContentPropList, "xmlns:script", "urn:oasis:names:tc:opendocument:xmlns:script:1.0" );
127 : addAttribute( pDocContentPropList, "xmlns:ooo", "http://openoffice.org/2004/office" );
128 : addAttribute( pDocContentPropList, "xmlns:ooow", "http://openoffice.org/2004/writer" );
129 : addAttribute( pDocContentPropList, "xmlns:oooc", "http://openoffice.org/2004/calc" );
130 : addAttribute( pDocContentPropList, "xmlns:dom", "http://www.w3.org/2001/xml-events" );
131 : addAttribute( pDocContentPropList, "xmlns:xforms", "http://www.w3.org/2002/xforms" );
132 : addAttribute( pDocContentPropList, "xmlns:xsd", "http://www.w3.org/2001/XMLSchema");
133 : addAttribute( pDocContentPropList, "xmlns:xsi", "http://www.w3.org/2001/XMLSchema-instance" );
134 : addAttribute( pDocContentPropList, "office:version", "1.0");
135 : m_xDocHandler->startElement("office:document-content" , xDocContentList );
136 : }
137 :
138 : void parseDoc()
139 : {
140 : sal_uInt8 nDelim, nDummy, nLen, nData;
141 : sal_uInt16 nOpcode;
142 : OUStringBuffer sBuf( MAXCHARS );
143 : sal_Int32 nChars = 0;
144 :
145 : while( !m_InputStream.IsEof())
146 : {
147 : m_InputStream.ReadUChar( nDelim );
148 : if( nDelim == 0x40 )
149 : {
150 : m_InputStream.ReadUChar( nDummy ).ReadUInt16( nOpcode );
151 : switch( nOpcode )
152 : {
153 : case 0xC00B: // Dictionary Word
154 : m_InputStream.ReadUChar( nLen ).ReadUChar( nDummy );
155 : while( nLen > 0 && !m_InputStream.IsEof() )
156 : {
157 : sal_uInt8 nChar;
158 : m_InputStream.ReadUChar( nChar );
159 : if( CheckValidData( nChar ) )
160 : {
161 : sBuf.appendAscii( (sal_Char*)(&nChar),1 );
162 : if ( ++nChars >= MAXCHARS )
163 : {
164 : m_vStringChunks.push_back( sBuf.makeStringAndClear() );
165 : nChars = 0;
166 : }
167 : }
168 : nLen--;
169 : }
170 : break;
171 :
172 : case 0x0242: // Non Dictionary word
173 : m_InputStream.ReadUChar( nData );
174 : if( nData == 0x02 )
175 : {
176 : m_InputStream.ReadUChar( nLen ).ReadUChar( nDummy );
177 : while( nLen > 0 && !m_InputStream.IsEof() )
178 : {
179 : m_InputStream.ReadUChar( nData );
180 : if( CheckValidData( nData ) )
181 : {
182 : sBuf.appendAscii( (sal_Char*)(&nData),1 );
183 : if ( ++nChars >= MAXCHARS )
184 : {
185 : m_vStringChunks.push_back( sBuf.makeStringAndClear() );
186 : nChars = 0;
187 : }
188 : }
189 : nLen--;
190 : }
191 : }
192 : break;
193 : }
194 : }
195 : }
196 : if ( nChars )
197 : m_vStringChunks.push_back( sBuf.makeStringAndClear() );
198 : }
199 :
200 : void writeXML()
201 : {
202 : if ( !m_vStringChunks.empty() )
203 : {
204 : m_xDocHandler->startDocument();
205 : SvXMLAttributeList *pAttrList = new SvXMLAttributeList();
206 : writeDocContentPreamble(); // writes "office:document-content" elem
207 : uno::Reference < XAttributeList > xAttrList(pAttrList);
208 :
209 : m_xDocHandler->startElement( "office:body", xAttrList );
210 :
211 : // process strings imported
212 : std::vector< OUString >::const_iterator it = m_vStringChunks.begin();
213 : std::vector< OUString >::const_iterator it_end = m_vStringChunks.end();
214 : for ( ; it!=it_end; ++it )
215 : writeTextChunk( *it );
216 :
217 : m_xDocHandler->endElement( "office:body" );
218 : m_xDocHandler->endElement( "office:document-content" );
219 : m_xDocHandler->endDocument();
220 : }
221 : }
222 : public:
223 :
224 : SimpleXMLImporter( const uno::Reference< XDocumentHandler >& xDocHandler, SvStream& rStream ) : m_xDocHandler( xDocHandler ), m_InputStream( rStream ) {}
225 :
226 : void import()
227 : {
228 : parseDoc();
229 : writeXML();
230 : }
231 : };
232 :
233 0 : sal_Bool SAL_CALL LotusWordProImportFilter::importImpl( const Sequence< ::com::sun::star::beans::PropertyValue >& aDescriptor )
234 : throw (RuntimeException)
235 : {
236 :
237 0 : sal_Int32 nLength = aDescriptor.getLength();
238 0 : const PropertyValue * pValue = aDescriptor.getConstArray();
239 0 : OUString sURL;
240 0 : for ( sal_Int32 i = 0 ; i < nLength; i++)
241 : {
242 : //Note, we should attempt to use InputStream here first!
243 0 : if ( pValue[i].Name == "URL" )
244 0 : pValue[i].Value >>= sURL;
245 : }
246 :
247 0 : SvFileStream inputStream( sURL, STREAM_READ );
248 0 : if ( inputStream.IsEof() || ( inputStream.GetError() != SVSTREAM_OK ) )
249 0 : return sal_False;
250 :
251 : // An XML import service: what we push sax messages to..
252 0 : OUString sXMLImportService ( "com.sun.star.comp.Writer.XMLImporter" );
253 :
254 0 : uno::Reference< XDocumentHandler > xInternalHandler( mxContext->getServiceManager()->createInstanceWithContext( sXMLImportService, mxContext ), UNO_QUERY );
255 0 : uno::Reference < XImporter > xImporter(xInternalHandler, UNO_QUERY);
256 0 : if (xImporter.is())
257 0 : xImporter->setTargetDocument(mxDoc);
258 :
259 0 : return ( ReadWordproFile( inputStream, xInternalHandler) == 0 );
260 :
261 : }
262 :
263 0 : sal_Bool SAL_CALL LotusWordProImportFilter::filter( const Sequence< ::com::sun::star::beans::PropertyValue >& aDescriptor )
264 : throw (RuntimeException, std::exception)
265 : {
266 0 : return importImpl ( aDescriptor );
267 : }
268 0 : void SAL_CALL LotusWordProImportFilter::cancel( )
269 : throw (RuntimeException, std::exception)
270 : {
271 0 : }
272 :
273 : // XImporter
274 0 : void SAL_CALL LotusWordProImportFilter::setTargetDocument( const uno::Reference< ::com::sun::star::lang::XComponent >& xDoc )
275 : throw (::com::sun::star::lang::IllegalArgumentException, RuntimeException, std::exception)
276 : {
277 0 : mxDoc = xDoc;
278 0 : }
279 :
280 : // XExtendedFilterDetection
281 0 : OUString SAL_CALL LotusWordProImportFilter::detect( com::sun::star::uno::Sequence< PropertyValue >& Descriptor )
282 : throw( com::sun::star::uno::RuntimeException, std::exception )
283 : {
284 :
285 0 : OUString sTypeName( "writer_LotusWordPro_Document" );
286 0 : sal_Int32 nLength = Descriptor.getLength();
287 0 : OUString sURL;
288 0 : const PropertyValue * pValue = Descriptor.getConstArray();
289 0 : uno::Reference < XInputStream > xInputStream;
290 0 : for ( sal_Int32 i = 0 ; i < nLength; i++)
291 : {
292 0 : if ( pValue[i].Name == "TypeName" )
293 0 : pValue[i].Value >>= sTypeName;
294 0 : else if ( pValue[i].Name == "InputStream" )
295 0 : pValue[i].Value >>= xInputStream;
296 0 : else if ( pValue[i].Name == "URL" )
297 0 : pValue[i].Value >>= sURL;
298 : }
299 :
300 0 : uno::Reference< com::sun::star::ucb::XCommandEnvironment > xEnv;
301 0 : if (!xInputStream.is())
302 : {
303 : try
304 : {
305 0 : ::ucbhelper::Content aContent(sURL, xEnv, mxContext);
306 0 : xInputStream = aContent.openStream();
307 : }
308 0 : catch ( Exception& )
309 : {
310 0 : return OUString();
311 : }
312 :
313 0 : if (!xInputStream.is())
314 0 : return OUString();
315 : }
316 :
317 0 : Sequence< ::sal_Int8 > aData;
318 0 : sal_Int32 nLen = SAL_N_ELEMENTS( header );
319 0 : if ( !( ( nLen == xInputStream->readBytes( aData, nLen ) )
320 0 : && ( memcmp( ( void* )header, (void*) aData.getConstArray(), nLen ) == 0 ) ) )
321 0 : sTypeName = OUString();
322 :
323 0 : return sTypeName;
324 : }
325 :
326 : // XInitialization
327 0 : void SAL_CALL LotusWordProImportFilter::initialize( const Sequence< Any >& aArguments )
328 : throw (Exception, RuntimeException, std::exception)
329 : {
330 0 : Sequence < PropertyValue > aAnySeq;
331 0 : sal_Int32 nLength = aArguments.getLength();
332 0 : if ( nLength && ( aArguments[0] >>= aAnySeq ) )
333 : {
334 0 : const PropertyValue * pValue = aAnySeq.getConstArray();
335 0 : nLength = aAnySeq.getLength();
336 0 : for ( sal_Int32 i = 0 ; i < nLength; i++)
337 : {
338 0 : if ( pValue[i].Name == "Type" )
339 : {
340 0 : pValue[i].Value >>= msFilterName;
341 0 : break;
342 : }
343 : }
344 0 : }
345 0 : }
346 :
347 0 : OUString LotusWordProImportFilter_getImplementationName ()
348 : throw (RuntimeException)
349 : {
350 0 : return OUString ( "com.sun.star.comp.Writer.LotusWordProImportFilter" );
351 : }
352 :
353 : #define SERVICE_NAME1 "com.sun.star.document.ImportFilter"
354 : #define SERVICE_NAME2 "com.sun.star.document.ExtendedTypeDetection"
355 :
356 0 : Sequence< OUString > SAL_CALL LotusWordProImportFilter_getSupportedServiceNames( )
357 : throw (RuntimeException)
358 : {
359 0 : Sequence < OUString > aRet(2);
360 0 : OUString* pArray = aRet.getArray();
361 0 : pArray[0] = OUString ( SERVICE_NAME1 );
362 0 : pArray[1] = OUString ( SERVICE_NAME2 );
363 0 : return aRet;
364 : }
365 :
366 : #undef SERVICE_NAME2
367 : #undef SERVICE_NAME1
368 :
369 0 : uno::Reference< XInterface > SAL_CALL LotusWordProImportFilter_createInstance( const uno::Reference< XMultiServiceFactory > & rSMgr)
370 : throw( Exception )
371 : {
372 0 : return (cppu::OWeakObject*) new LotusWordProImportFilter( comphelper::getComponentContext(rSMgr) );
373 : }
374 :
375 : // XServiceInfo
376 0 : OUString SAL_CALL LotusWordProImportFilter::getImplementationName( )
377 : throw (RuntimeException, std::exception)
378 : {
379 0 : return LotusWordProImportFilter_getImplementationName();
380 : }
381 0 : sal_Bool SAL_CALL LotusWordProImportFilter::supportsService( const OUString& rServiceName )
382 : throw (RuntimeException, std::exception)
383 : {
384 0 : return cppu::supportsService(this, rServiceName);
385 : }
386 0 : Sequence< OUString > SAL_CALL LotusWordProImportFilter::getSupportedServiceNames( )
387 : throw (RuntimeException, std::exception)
388 : {
389 0 : return LotusWordProImportFilter_getSupportedServiceNames();
390 : }
391 :
392 : /* vim:set shiftwidth=4 softtabstop=4 expandtab: */
|