Line data Source code
1 : /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 : /*
3 : * Version: MPL 1.1 / GPLv3+ / LGPLv3+
4 : *
5 : * The contents of this file are subject to the Mozilla Public License Version
6 : * 1.1 (the "License"); you may not use this file except in compliance with
7 : * the License or as specified alternatively below. You may obtain a copy of
8 : * the License at http://www.mozilla.org/MPL/
9 : *
10 : * Software distributed under the License is distributed on an "AS IS" basis,
11 : * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
12 : * for the specific language governing rights and limitations under the
13 : * License.
14 : *
15 : * The Initial Developer of the Original Code is
16 : * Fong Lin <pflin@novell.com>
17 : * Portions created by the Initial Developer are Copyright (C) 2010 the
18 : * Initial Developer. All Rights Reserved.
19 : *
20 : * Contributor(s):
21 : * Fong Lin <pflin@novell.com>
22 : * Noel Power <noel.power@novell.com>
23 : *
24 : * For minor contributions see the git repository.
25 : *
26 : * Alternatively, the contents of this file may be used under the terms of
27 : * either the GNU General Public License Version 3 or later (the "GPLv3+"), or
28 : * the GNU Lesser General Public License Version 3 or later (the "LGPLv3+"),
29 : * in which case the provisions of the GPLv3+ or the LGPLv3+ are applicable
30 : * instead of those above.
31 : */
32 : #include <osl/diagnose.h>
33 : #include <sal/macros.h>
34 : #include <rtl/tencinfo.h>
35 : #include <com/sun/star/lang/XMultiServiceFactory.hpp>
36 : #include <com/sun/star/io/XInputStream.hpp>
37 : #include <com/sun/star/xml/sax/XAttributeList.hpp>
38 : #include <com/sun/star/xml/sax/XDocumentHandler.hpp>
39 : #include <com/sun/star/xml/sax/XParser.hpp>
40 : #include <com/sun/star/ucb/XCommandEnvironment.hpp>
41 : #include <com/sun/star/io/XInputStream.hpp>
42 : #include <com/sun/star/uno/Reference.hxx>
43 : #include <comphelper/processfactory.hxx>
44 : #include <xmloff/attrlist.hxx>
45 :
46 : #include <ucbhelper/content.hxx>
47 :
48 : #include <tools/stream.hxx>
49 :
50 : #include "LotusWordProImportFilter.hxx"
51 :
52 : #include <vector>
53 :
54 : #include "lwpfilter.hxx"
55 :
56 : using namespace com::sun::star;
57 : using rtl::OString;
58 : using rtl::OUStringBuffer;
59 : using com::sun::star::uno::Sequence;
60 : using com::sun::star::lang::XComponent;
61 : using com::sun::star::uno::Any;
62 : using com::sun::star::uno::UNO_QUERY;
63 : using com::sun::star::uno::XInterface;
64 : using com::sun::star::uno::Exception;
65 : using com::sun::star::uno::RuntimeException;
66 : using com::sun::star::io::XInputStream;
67 : using com::sun::star::lang::XMultiServiceFactory;
68 : using com::sun::star::beans::PropertyValue;
69 : using com::sun::star::document::XFilter;
70 : using com::sun::star::document::XExtendedFilterDetection;
71 : using com::sun::star::ucb::XCommandEnvironment;
72 :
73 : using com::sun::star::document::XImporter;
74 : using com::sun::star::xml::sax::XAttributeList;
75 : using com::sun::star::xml::sax::XDocumentHandler;
76 : using com::sun::star::xml::sax::XParser;
77 :
78 : // W o r d P r o
79 : static const sal_Int8 header[] = { 0x57, 0x6f, 0x72, 0x64, 0x50, 0x72, 0x6f };
80 :
81 : const sal_Int32 MAXCHARS = 65534;
82 :
83 : // Simple xml importer, currently the importer is very very simple
84 : // it only extracts pure text from the wordpro file. Absolutely no formating
85 : // information is currently imported.
86 : // To reflect the current state of this importer the sax events sent
87 : // to the document handler are also the simplest possible. In addition to
88 : // the the basic attributes set up for the 'office:document' element
89 : // all the imported text is inserted into 'text:p' elements.
90 : // The parser extracts the pure text and creates simple a simple 'text:p'
91 : // element to contain that text. In the event of the text exceeding
92 : // MAXCHARS new 'text:p' elements are created as needed
93 : class SimpleXMLImporter
94 : {
95 : private:
96 :
97 : uno::Reference< XDocumentHandler > m_xDocHandler;
98 : std::vector< OUString > m_vStringChunks;
99 : SvStream& m_InputStream;
100 :
101 : bool CheckValidData( sal_Int8 nChar ) const
102 : {
103 : if( ( nChar >= 0x20 && nChar <= 0x7E ) && ( nChar != 0X40 ) )
104 : return true;
105 : return false;
106 : }
107 :
108 : void addAttribute( SvXMLAttributeList* pAttrList, const char* key, const char* val )
109 : {
110 : pAttrList->AddAttribute( OUString::createFromAscii( key ), OUString::createFromAscii( val ) );
111 : }
112 :
113 : void writeTextChunk( const OUString& sChunk )
114 : {
115 : SvXMLAttributeList *pAttrList = new SvXMLAttributeList();
116 : uno::Reference < XAttributeList > xAttrList(pAttrList);
117 :
118 : pAttrList->AddAttribute( "text:style-name", "Standard" );
119 :
120 : m_xDocHandler->startElement( "text:p", xAttrList );
121 : m_xDocHandler->characters( sChunk );
122 : m_xDocHandler->endElement( "text:p" );
123 : }
124 :
125 : void writeDocContentPreamble()
126 : {
127 : SvXMLAttributeList *pDocContentPropList = new SvXMLAttributeList();
128 : uno::Reference < XAttributeList > xDocContentList(pDocContentPropList);
129 : addAttribute( pDocContentPropList, "xmlns:office", "urn:oasis:names:tc:opendocument:xmlns:office:1.0" );
130 : addAttribute( pDocContentPropList, "xmlns:style", "urn:oasis:names:tc:opendocument:xmlns:style:1.0");
131 : addAttribute( pDocContentPropList, "xmlns:text", "urn:oasis:names:tc:opendocument:xmlns:text:1.0" );
132 : addAttribute( pDocContentPropList, "xmlns:table", "urn:oasis:names:tc:opendocument:xmlns:table:1.0" );
133 : addAttribute( pDocContentPropList, "xmlns:draw", "urn:oasis:names:tc:opendocument:xmlns:drawing:1.0" );
134 : addAttribute( pDocContentPropList, "xmlns:fo", "urn:oasis:names:tc:opendocument:xmlns:xsl-fo-compatible:1.0" );
135 : addAttribute( pDocContentPropList, "xmlns:xlink", "http://www.w3.org/1999/xlink" );
136 : addAttribute( pDocContentPropList, "xmlns:dc", "http://purl.org/dc/elements/1.1/" );
137 : addAttribute( pDocContentPropList, "xmlns:meta", "urn:oasis:names:tc:opendocument:xmlns:meta:1.0" );
138 : addAttribute( pDocContentPropList, "xmlns:number", "urn:oasis:names:tc:opendocument:xmlns:datastyle:1.0" );
139 : addAttribute( pDocContentPropList, "xmlns:svg", "urn:oasis:names:tc:opendocument:xmlns:svg-compatible:1.0" );
140 : addAttribute( pDocContentPropList, "xmlns:chart", "urn:oasis:names:tc:opendocument:xmlns:chart:1.0" );
141 : addAttribute( pDocContentPropList, "xmlns:dr3d", "urn:oasis:names:tc:opendocument:xmlns:dr3d:1.0" );
142 : addAttribute( pDocContentPropList, "xmlns:math", "http://www.w3.org/1998/Math/MathML" );
143 : addAttribute( pDocContentPropList, "xmlns:form", "urn:oasis:names:tc:opendocument:xmlns:form:1.0" );
144 : addAttribute( pDocContentPropList, "xmlns:script", "urn:oasis:names:tc:opendocument:xmlns:script:1.0" );
145 : addAttribute( pDocContentPropList, "xmlns:ooo", "http://openoffice.org/2004/office" );
146 : addAttribute( pDocContentPropList, "xmlns:ooow", "http://openoffice.org/2004/writer" );
147 : addAttribute( pDocContentPropList, "xmlns:oooc", "http://openoffice.org/2004/calc" );
148 : addAttribute( pDocContentPropList, "xmlns:dom", "http://www.w3.org/2001/xml-events" );
149 : addAttribute( pDocContentPropList, "xmlns:xforms", "http://www.w3.org/2002/xforms" );
150 : addAttribute( pDocContentPropList, "xmlns:xsd", "http://www.w3.org/2001/XMLSchema");
151 : addAttribute( pDocContentPropList, "xmlns:xsi", "http://www.w3.org/2001/XMLSchema-instance" );
152 : addAttribute( pDocContentPropList, "office:version", "1.0");
153 : m_xDocHandler->startElement("office:document-content" , xDocContentList );
154 : }
155 :
156 : void parseDoc()
157 : {
158 : sal_uInt8 nDelim, nDummy, nLen, nData;
159 : sal_uInt16 nOpcode;
160 : OUStringBuffer sBuf( MAXCHARS );
161 : sal_Int32 nChars = 0;
162 :
163 : while( !m_InputStream.IsEof())
164 : {
165 : m_InputStream >> nDelim;
166 : if( nDelim == 0x40 )
167 : {
168 : m_InputStream >> nDummy >> nOpcode;
169 : switch( nOpcode )
170 : {
171 : case 0xC00B: // Dictionary Word
172 : m_InputStream >> nLen >> nDummy;
173 : while( nLen > 0 && !m_InputStream.IsEof() )
174 : {
175 : sal_uInt8 nChar;
176 : m_InputStream >> nChar;
177 : if( CheckValidData( nChar ) )
178 : {
179 : sBuf.appendAscii( (sal_Char*)(&nChar),1 );
180 : if ( ++nChars >= MAXCHARS )
181 : {
182 : m_vStringChunks.push_back( sBuf.makeStringAndClear() );
183 : nChars = 0;
184 : }
185 : }
186 : nLen--;
187 : }
188 : break;
189 :
190 : case 0x0242: // Non Dictionary word
191 : m_InputStream >> nData;
192 : if( nData == 0x02 )
193 : {
194 : m_InputStream >> nLen >> nDummy;
195 : while( nLen > 0 && !m_InputStream.IsEof() )
196 : {
197 : m_InputStream >> nData;
198 : if( CheckValidData( nData ) )
199 : {
200 : sBuf.appendAscii( (sal_Char*)(&nData),1 );
201 : if ( ++nChars >= MAXCHARS )
202 : {
203 : m_vStringChunks.push_back( sBuf.makeStringAndClear() );
204 : nChars = 0;
205 : }
206 : }
207 : nLen--;
208 : }
209 : }
210 : break;
211 : }
212 : }
213 : }
214 : if ( nChars )
215 : m_vStringChunks.push_back( sBuf.makeStringAndClear() );
216 : }
217 :
218 : void writeXML()
219 : {
220 : if ( !m_vStringChunks.empty() )
221 : {
222 : m_xDocHandler->startDocument();
223 : SvXMLAttributeList *pAttrList = new SvXMLAttributeList();
224 : writeDocContentPreamble(); // writes "office:document-content" elem
225 : uno::Reference < XAttributeList > xAttrList(pAttrList);
226 :
227 : m_xDocHandler->startElement( "office:body", xAttrList );
228 :
229 : // process strings imported
230 : std::vector< OUString >::const_iterator it = m_vStringChunks.begin();
231 : std::vector< OUString >::const_iterator it_end = m_vStringChunks.end();
232 : for ( ; it!=it_end; ++it )
233 : writeTextChunk( *it );
234 :
235 : m_xDocHandler->endElement( "office:body" );
236 : m_xDocHandler->endElement( "office:document-content" );
237 : m_xDocHandler->endDocument();
238 : }
239 : }
240 : public:
241 :
242 : SimpleXMLImporter( const uno::Reference< XDocumentHandler >& xDocHandler, SvStream& rStream ) : m_xDocHandler( xDocHandler ), m_InputStream( rStream ) {}
243 :
244 : void import()
245 : {
246 : parseDoc();
247 : writeXML();
248 : }
249 : };
250 :
251 6 : sal_Bool SAL_CALL LotusWordProImportFilter::importImpl( const Sequence< ::com::sun::star::beans::PropertyValue >& aDescriptor )
252 : throw (RuntimeException)
253 : {
254 :
255 6 : sal_Int32 nLength = aDescriptor.getLength();
256 6 : const PropertyValue * pValue = aDescriptor.getConstArray();
257 6 : OUString sURL;
258 12 : for ( sal_Int32 i = 0 ; i < nLength; i++)
259 : {
260 : //Note, we should attempt to use InputStream here first!
261 6 : if ( pValue[i].Name == "URL" )
262 6 : pValue[i].Value >>= sURL;
263 : }
264 :
265 6 : SvFileStream inputStream( sURL, STREAM_READ );
266 6 : if ( inputStream.IsEof() || ( inputStream.GetError() != SVSTREAM_OK ) )
267 0 : return sal_False;
268 :
269 : // An XML import service: what we push sax messages to..
270 6 : OUString sXMLImportService ( "com.sun.star.comp.Writer.XMLImporter" );
271 :
272 6 : uno::Reference< XDocumentHandler > xInternalHandler( mxMSF->createInstance( sXMLImportService ), UNO_QUERY );
273 6 : uno::Reference < XImporter > xImporter(xInternalHandler, UNO_QUERY);
274 6 : if (xImporter.is())
275 0 : xImporter->setTargetDocument(mxDoc);
276 :
277 6 : return ( ReadWordproFile( inputStream, xInternalHandler) == 0 );
278 :
279 : }
280 :
281 6 : sal_Bool SAL_CALL LotusWordProImportFilter::filter( const Sequence< ::com::sun::star::beans::PropertyValue >& aDescriptor )
282 : throw (RuntimeException)
283 : {
284 6 : return importImpl ( aDescriptor );
285 : }
286 0 : void SAL_CALL LotusWordProImportFilter::cancel( )
287 : throw (RuntimeException)
288 : {
289 0 : }
290 :
291 : // XImporter
292 0 : void SAL_CALL LotusWordProImportFilter::setTargetDocument( const uno::Reference< ::com::sun::star::lang::XComponent >& xDoc )
293 : throw (::com::sun::star::lang::IllegalArgumentException, RuntimeException)
294 : {
295 0 : meType = FILTER_IMPORT;
296 0 : mxDoc = xDoc;
297 0 : }
298 :
299 : // XExtendedFilterDetection
300 0 : OUString SAL_CALL LotusWordProImportFilter::detect( com::sun::star::uno::Sequence< PropertyValue >& Descriptor )
301 : throw( com::sun::star::uno::RuntimeException )
302 : {
303 :
304 0 : OUString sTypeName( "writer_LotusWordPro_Document" );
305 0 : sal_Int32 nLength = Descriptor.getLength();
306 0 : OUString sURL;
307 0 : const PropertyValue * pValue = Descriptor.getConstArray();
308 0 : uno::Reference < XInputStream > xInputStream;
309 0 : for ( sal_Int32 i = 0 ; i < nLength; i++)
310 : {
311 0 : if ( pValue[i].Name == "TypeName" )
312 0 : pValue[i].Value >>= sTypeName;
313 0 : else if ( pValue[i].Name == "InputStream" )
314 0 : pValue[i].Value >>= xInputStream;
315 0 : else if ( pValue[i].Name == "URL" )
316 0 : pValue[i].Value >>= sURL;
317 : }
318 :
319 0 : uno::Reference< com::sun::star::ucb::XCommandEnvironment > xEnv;
320 0 : if (!xInputStream.is())
321 : {
322 : try
323 : {
324 0 : ::ucbhelper::Content aContent(sURL, xEnv, comphelper::getComponentContext(mxMSF));
325 0 : xInputStream = aContent.openStream();
326 : }
327 0 : catch ( Exception& )
328 : {
329 0 : return ::rtl::OUString();
330 : }
331 :
332 0 : if (!xInputStream.is())
333 0 : return ::rtl::OUString();
334 : }
335 :
336 0 : Sequence< ::sal_Int8 > aData;
337 0 : sal_Int32 nLen = SAL_N_ELEMENTS( header );
338 0 : if ( !( ( nLen == xInputStream->readBytes( aData, nLen ) )
339 0 : && ( memcmp( ( void* )header, (void*) aData.getConstArray(), nLen ) == 0 ) ) )
340 0 : sTypeName = ::rtl::OUString();
341 :
342 0 : return sTypeName;
343 : }
344 :
345 :
346 : // XInitialization
347 0 : void SAL_CALL LotusWordProImportFilter::initialize( const Sequence< Any >& aArguments )
348 : throw (Exception, RuntimeException)
349 : {
350 0 : Sequence < PropertyValue > aAnySeq;
351 0 : sal_Int32 nLength = aArguments.getLength();
352 0 : if ( nLength && ( aArguments[0] >>= aAnySeq ) )
353 : {
354 0 : const PropertyValue * pValue = aAnySeq.getConstArray();
355 0 : nLength = aAnySeq.getLength();
356 0 : for ( sal_Int32 i = 0 ; i < nLength; i++)
357 : {
358 0 : if ( pValue[i].Name == "Type" )
359 : {
360 0 : pValue[i].Value >>= msFilterName;
361 0 : break;
362 : }
363 : }
364 0 : }
365 0 : }
366 1 : OUString LotusWordProImportFilter_getImplementationName ()
367 : throw (RuntimeException)
368 : {
369 1 : return OUString ( "com.sun.star.comp.Writer.LotusWordProImportFilter" );
370 : }
371 :
372 : #define SERVICE_NAME1 "com.sun.star.document.ImportFilter"
373 : #define SERVICE_NAME2 "com.sun.star.document.ExtendedTypeDetection"
374 0 : sal_Bool SAL_CALL LotusWordProImportFilter_supportsService( const OUString& ServiceName )
375 : throw (RuntimeException)
376 : {
377 0 : return ServiceName == SERVICE_NAME1 || ServiceName == SERVICE_NAME2;
378 : }
379 1 : Sequence< OUString > SAL_CALL LotusWordProImportFilter_getSupportedServiceNames( )
380 : throw (RuntimeException)
381 : {
382 1 : Sequence < OUString > aRet(2);
383 1 : OUString* pArray = aRet.getArray();
384 1 : pArray[0] = OUString ( SERVICE_NAME1 );
385 1 : pArray[1] = OUString ( SERVICE_NAME2 );
386 1 : return aRet;
387 : }
388 : #undef SERVICE_NAME2
389 : #undef SERVICE_NAME1
390 :
391 1 : uno::Reference< XInterface > SAL_CALL LotusWordProImportFilter_createInstance( const uno::Reference< XMultiServiceFactory > & rSMgr)
392 : throw( Exception )
393 : {
394 1 : return (cppu::OWeakObject*) new LotusWordProImportFilter( rSMgr );
395 : }
396 :
397 : // XServiceInfo
398 0 : OUString SAL_CALL LotusWordProImportFilter::getImplementationName( )
399 : throw (RuntimeException)
400 : {
401 0 : return LotusWordProImportFilter_getImplementationName();
402 : }
403 0 : sal_Bool SAL_CALL LotusWordProImportFilter::supportsService( const OUString& rServiceName )
404 : throw (RuntimeException)
405 : {
406 0 : return LotusWordProImportFilter_supportsService( rServiceName );
407 : }
408 0 : Sequence< OUString > SAL_CALL LotusWordProImportFilter::getSupportedServiceNames( )
409 : throw (RuntimeException)
410 : {
411 0 : return LotusWordProImportFilter_getSupportedServiceNames();
412 : }
413 :
414 : /* vim:set shiftwidth=4 softtabstop=4 expandtab: */
|