LCOV - code coverage report
Current view: top level - filter/source/textfilterdetect - filterdetect.cxx (source / functions) Hit Total Coverage
Test: commit 0e63ca4fde4e446f346e35849c756a30ca294aab Lines: 55 94 58.5 %
Date: 2014-04-11 Functions: 8 12 66.7 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
       2             : /*
       3             :  * This file is part of the LibreOffice project.
       4             :  *
       5             :  * This Source Code Form is subject to the terms of the Mozilla Public
       6             :  * License, v. 2.0. If a copy of the MPL was not distributed with this
       7             :  * file, You can obtain one at http://mozilla.org/MPL/2.0/.
       8             :  */
       9             : 
      10             : #include "filterdetect.hxx"
      11             : 
      12             : #include <svtools/htmltokn.h>
      13             : #include <tools/urlobj.hxx>
      14             : #include <ucbhelper/content.hxx>
      15             : #include <unotools/mediadescriptor.hxx>
      16             : #include <unotools/ucbstreamhelper.hxx>
      17             : 
      18             : #include <com/sun/star/lang/XMultiServiceFactory.hpp>
      19             : #include <com/sun/star/io/XInputStream.hpp>
      20             : #include <cppuhelper/supportsservice.hxx>
      21             : #include <boost/scoped_ptr.hpp>
      22             : 
      23             : #define WRITER_TEXT_FILTER "Text"
      24             : #define CALC_TEXT_FILTER   "Text - txt - csv (StarCalc)"
      25             : 
      26             : #define WEB_HTML_FILTER    "HTML"
      27             : #define WRITER_HTML_FILTER "HTML (StarWriter)"
      28             : #define CALC_HTML_FILTER   "calc_HTML_WebQuery"
      29             : 
      30             : #define WRITER_DOCSERVICE  "com.sun.star.text.TextDocument"
      31             : #define CALC_DOCSERVICE    "com.sun.star.sheet.SpreadsheetDocument"
      32             : 
      33             : using namespace ::com::sun::star;
      34             : using utl::MediaDescriptor;
      35             : 
      36             : namespace {
      37             : 
      38           3 : bool IsHTMLStream( const uno::Reference<io::XInputStream>& xInStream )
      39             : {
      40           3 :     boost::scoped_ptr<SvStream> pInStream( utl::UcbStreamHelper::CreateStream( xInStream ) );
      41           3 :     if ( !pInStream || pInStream->GetError() )
      42             :         // No stream
      43           0 :         return false;
      44             : 
      45             :     // Read the stream header
      46           3 :     pInStream->StartReadingUnicodeText( RTL_TEXTENCODING_DONTKNOW );
      47           3 :     const sal_Size nUniPos = pInStream->Tell();
      48           3 :     const sal_uInt16 nSize = 4096;
      49             : 
      50           6 :     OString sHeader;
      51           3 :     if ( nUniPos == 3 || nUniPos == 0 ) // UTF-8 or non-Unicode
      52           3 :         sHeader = read_uInt8s_ToOString( *pInStream, nSize );
      53             :     else // UTF-16 (nUniPos = 2)
      54           0 :         sHeader = OUStringToOString( read_uInt16s_ToOUString( *pInStream, nSize ), RTL_TEXTENCODING_ASCII_US );
      55             : 
      56             :     // Now check whether the stream begins with a known HTML tag.
      57             :     enum DetectPhase { BeforeTag, TagOpened, InTagName };
      58           3 :     DetectPhase dp = BeforeTag;
      59             : 
      60           3 :     const char* pHeader = sHeader.getStr();
      61           3 :     const int   nLength = sHeader.getLength();
      62           3 :     int i = 0, nStartOfTagIndex = 0;
      63             : 
      64           6 :     for ( i = 0; i < nLength; ++i, ++pHeader )
      65             :     {
      66           6 :         char c = *pHeader;
      67           6 :         if ( c == ' ' || c == '\n' || c == '\t' || c == '\r' || c == '\f' )
      68             :         {
      69           0 :             if ( dp == TagOpened )
      70           0 :                 return false; // Invalid: Should start with a tag name
      71           0 :             else if ( dp == InTagName )
      72           0 :                 break; // End of tag name reached
      73             :         }
      74           6 :         else if ( c == '<' )
      75             :         {
      76           3 :             if ( dp == BeforeTag )
      77           3 :                 dp = TagOpened;
      78             :             else
      79           0 :                 return false; // Invalid: Nested '<'
      80             :         }
      81           3 :         else if ( c == '>' )
      82             :         {
      83           0 :             if ( dp == InTagName )
      84           0 :                 break; // End of tag name reached
      85             :             else
      86           0 :                 return false; // Invalid: Empty tag or before '<'
      87             :         }
      88           3 :         else if ( c == '!' )
      89             :         {
      90           3 :             if ( dp == TagOpened )
      91           3 :                 return true; // "<!" - DOCTYPE or comments block
      92             :             else
      93           0 :                 return false; // Invalid: '!' before '<' or inside tag name
      94             :         }
      95             :         else
      96             :         {
      97           0 :             if ( dp == BeforeTag )
      98           0 :                 return false; // Invalid: Should start with a tag
      99           0 :             else if ( dp == TagOpened )
     100             :             {
     101           0 :                 nStartOfTagIndex = i;
     102           0 :                 dp = InTagName;
     103             :             }
     104             :         }
     105             :     }
     106             : 
     107             :     // The string following '<' has to be a known HTML token.
     108           0 :     OString aToken = sHeader.copy( nStartOfTagIndex, i - nStartOfTagIndex );
     109           0 :     if ( GetHTMLToken( OStringToOUString( aToken.toAsciiLowerCase(), RTL_TEXTENCODING_ASCII_US ) ) != 0 )
     110           0 :         return true;
     111             : 
     112           3 :     return false;
     113             : }
     114             : 
     115             : }
     116             : 
     117           3 : PlainTextFilterDetect::PlainTextFilterDetect(const uno::Reference<uno::XComponentContext>& xCxt) :
     118           3 :     mxCxt(xCxt) {}
     119             : 
     120           6 : PlainTextFilterDetect::~PlainTextFilterDetect() {}
     121             : 
     122           3 : OUString SAL_CALL PlainTextFilterDetect::detect(uno::Sequence<beans::PropertyValue>& lDescriptor) throw (uno::RuntimeException, std::exception)
     123             : {
     124           3 :     MediaDescriptor aMediaDesc(lDescriptor);
     125             : 
     126           6 :     OUString aType = aMediaDesc.getUnpackedValueOrDefault(MediaDescriptor::PROP_TYPENAME(), OUString() );
     127           6 :     OUString aDocService = aMediaDesc.getUnpackedValueOrDefault(MediaDescriptor::PROP_DOCUMENTSERVICE(), OUString() );
     128           6 :     OUString aUrl = aMediaDesc.getUnpackedValueOrDefault(MediaDescriptor::PROP_URL(), OUString() );
     129             : 
     130             :     // Get the file name extension.
     131           6 :     INetURLObject aParser(aUrl);
     132           6 :     OUString aExt = aParser.getExtension(INetURLObject::LAST_SEGMENT, true, INetURLObject::DECODE_WITH_CHARSET);
     133           3 :     aExt = aExt.toAsciiLowerCase();
     134             : 
     135           3 :     if (aType == "generic_HTML")
     136             :     {
     137           3 :         uno::Reference<io::XInputStream> xInStream(aMediaDesc[MediaDescriptor::PROP_INPUTSTREAM()], uno::UNO_QUERY);
     138           3 :         if (!xInStream.is() || !IsHTMLStream(xInStream))
     139           0 :             return OUString();
     140             : 
     141             :         // Decide which filter to use based on the document service first,
     142             :         // then on extension if that's not available.
     143             : 
     144           3 :         if (aDocService == CALC_DOCSERVICE)
     145           0 :             aMediaDesc[MediaDescriptor::PROP_FILTERNAME()] <<= OUString(CALC_HTML_FILTER);
     146           3 :         else if (aDocService == WRITER_DOCSERVICE)
     147           2 :             aMediaDesc[MediaDescriptor::PROP_FILTERNAME()] <<= OUString(WRITER_HTML_FILTER);
     148           1 :         else if (aExt == "xls")
     149           0 :             aMediaDesc[MediaDescriptor::PROP_FILTERNAME()] <<= OUString(CALC_HTML_FILTER);
     150             :         else
     151           1 :             aMediaDesc[MediaDescriptor::PROP_FILTERNAME()] <<= OUString(WEB_HTML_FILTER);
     152             :     }
     153             : 
     154           0 :     else if (aType == "generic_Text")
     155             :     {
     156           0 :         if (aDocService == CALC_DOCSERVICE)
     157           0 :             aMediaDesc[MediaDescriptor::PROP_FILTERNAME()] <<= OUString(CALC_TEXT_FILTER);
     158           0 :         else if (aDocService == WRITER_DOCSERVICE)
     159           0 :             aMediaDesc[MediaDescriptor::PROP_FILTERNAME()] <<= OUString(WRITER_TEXT_FILTER);
     160           0 :         else if (aExt == "csv" || aExt == "tsv" || aExt == "tab" || aExt == "xls")
     161           0 :             aMediaDesc[MediaDescriptor::PROP_FILTERNAME()] <<= OUString(CALC_TEXT_FILTER);
     162             :         else
     163           0 :             aMediaDesc[MediaDescriptor::PROP_FILTERNAME()] <<= OUString(WRITER_TEXT_FILTER);
     164             :     }
     165             : 
     166             :     else
     167             :         // Nothing to detect.
     168           0 :         return OUString();
     169             : 
     170           3 :     aMediaDesc >> lDescriptor;
     171           6 :     return aType;
     172             : }
     173             : 
     174             : // XInitialization
     175             : 
     176           0 : void SAL_CALL PlainTextFilterDetect::initialize(const uno::Sequence<uno::Any>& /*aArguments*/)
     177             :     throw (uno::Exception, uno::RuntimeException, std::exception)
     178             : {
     179           0 : }
     180             : 
     181           2 : OUString PlainTextFilterDetect_getImplementationName()
     182             : {
     183           2 :     return OUString("com.sun.star.comp.filters.PlainTextFilterDetect");
     184             : }
     185             : 
     186           2 : uno::Sequence<OUString> PlainTextFilterDetect_getSupportedServiceNames()
     187             : {
     188           2 :     uno::Sequence<OUString> aRet(2);
     189           2 :     OUString* pArray = aRet.getArray();
     190           2 :     pArray[0] = "com.sun.star.document.ExtendedTypeDetection";
     191           2 :     pArray[1] = "com.sun.star.comp.filters.PlainTextFilterDetect";
     192           2 :     return aRet;
     193             : }
     194             : 
     195           3 : uno::Reference<uno::XInterface> PlainTextFilterDetect_createInstance(
     196             :     const uno::Reference<uno::XComponentContext> & rCxt)
     197             : {
     198           3 :     return (cppu::OWeakObject*) new PlainTextFilterDetect(rCxt);
     199             : }
     200             : 
     201             : // XServiceInfo
     202           0 : OUString SAL_CALL PlainTextFilterDetect::getImplementationName()
     203             :     throw (uno::RuntimeException, std::exception)
     204             : {
     205           0 :     return PlainTextFilterDetect_getImplementationName();
     206             : }
     207             : 
     208           0 : sal_Bool SAL_CALL PlainTextFilterDetect::supportsService(const OUString& rServiceName)
     209             :     throw (uno::RuntimeException, std::exception)
     210             : {
     211           0 :     return cppu::supportsService(this, rServiceName);
     212             : }
     213             : 
     214           0 : uno::Sequence<OUString> SAL_CALL PlainTextFilterDetect::getSupportedServiceNames()
     215             :     throw (uno::RuntimeException, std::exception)
     216             : {
     217           0 :     return PlainTextFilterDetect_getSupportedServiceNames();
     218             : }
     219             : 
     220             : /* vim:set shiftwidth=4 softtabstop=4 expandtab: */

Generated by: LCOV version 1.10