LCOV - code coverage report
Current view: top level - filter/source/textfilterdetect - filterdetect.cxx (source / functions) Hit Total Coverage
Test: commit 10e77ab3ff6f4314137acd6e2702a6e5c1ce1fae Lines: 50 91 54.9 %
Date: 2014-11-03 Functions: 8 12 66.7 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
       2             : /*
       3             :  * This file is part of the LibreOffice project.
       4             :  *
       5             :  * This Source Code Form is subject to the terms of the Mozilla Public
       6             :  * License, v. 2.0. If a copy of the MPL was not distributed with this
       7             :  * file, You can obtain one at http://mozilla.org/MPL/2.0/.
       8             :  */
       9             : 
      10             : #include "filterdetect.hxx"
      11             : 
      12             : #include <svtools/htmltokn.h>
      13             : #include <tools/urlobj.hxx>
      14             : #include <ucbhelper/content.hxx>
      15             : #include <unotools/mediadescriptor.hxx>
      16             : #include <unotools/ucbstreamhelper.hxx>
      17             : 
      18             : #include <com/sun/star/lang/XMultiServiceFactory.hpp>
      19             : #include <com/sun/star/io/XInputStream.hpp>
      20             : #include <cppuhelper/supportsservice.hxx>
      21             : #include <boost/scoped_ptr.hpp>
      22             : 
      23             : #define WRITER_TEXT_FILTER "Text"
      24             : #define CALC_TEXT_FILTER   "Text - txt - csv (StarCalc)"
      25             : 
      26             : #define WEB_HTML_FILTER    "HTML"
      27             : #define WRITER_HTML_FILTER "HTML (StarWriter)"
      28             : #define CALC_HTML_FILTER   "calc_HTML_WebQuery"
      29             : 
      30             : #define WRITER_DOCSERVICE  "com.sun.star.text.TextDocument"
      31             : #define CALC_DOCSERVICE    "com.sun.star.sheet.SpreadsheetDocument"
      32             : 
      33             : using namespace ::com::sun::star;
      34             : using utl::MediaDescriptor;
      35             : 
      36             : namespace {
      37             : 
      38          16 : bool IsHTMLStream( const uno::Reference<io::XInputStream>& xInStream )
      39             : {
      40          16 :     boost::scoped_ptr<SvStream> pInStream( utl::UcbStreamHelper::CreateStream( xInStream ) );
      41          16 :     if ( !pInStream || pInStream->GetError() )
      42             :         // No stream
      43           0 :         return false;
      44             : 
      45             :     // Read the stream header
      46          16 :     pInStream->StartReadingUnicodeText( RTL_TEXTENCODING_DONTKNOW );
      47          16 :     const sal_Size nUniPos = pInStream->Tell();
      48          16 :     const sal_uInt16 nSize = 4096;
      49             : 
      50          32 :     OString sHeader;
      51          16 :     if ( nUniPos == 3 || nUniPos == 0 ) // UTF-8 or non-Unicode
      52          16 :         sHeader = read_uInt8s_ToOString( *pInStream, nSize );
      53             :     else // UTF-16 (nUniPos = 2)
      54           0 :         sHeader = OUStringToOString( read_uInt16s_ToOUString( *pInStream, nSize ), RTL_TEXTENCODING_ASCII_US );
      55             : 
      56             :     // Now check whether the stream begins with a known HTML tag.
      57             :     enum DetectPhase { BeforeTag, TagOpened, InTagName };
      58          16 :     DetectPhase dp = BeforeTag;
      59             : 
      60          16 :     const char* pHeader = sHeader.getStr();
      61          16 :     const int   nLength = sHeader.getLength();
      62          16 :     int i = 0, nStartOfTagIndex = 0;
      63             : 
      64          32 :     for ( i = 0; i < nLength; ++i, ++pHeader )
      65             :     {
      66          32 :         char c = *pHeader;
      67          32 :         if ( c == ' ' || c == '\n' || c == '\t' || c == '\r' || c == '\f' )
      68             :         {
      69           0 :             if ( dp == TagOpened )
      70           0 :                 return false; // Invalid: Should start with a tag name
      71           0 :             else if ( dp == InTagName )
      72           0 :                 break; // End of tag name reached
      73             :         }
      74          32 :         else if ( c == '<' )
      75             :         {
      76          16 :             if ( dp == BeforeTag )
      77          16 :                 dp = TagOpened;
      78             :             else
      79           0 :                 return false; // Invalid: Nested '<'
      80             :         }
      81          16 :         else if ( c == '>' )
      82             :         {
      83           0 :             if ( dp == InTagName )
      84           0 :                 break; // End of tag name reached
      85             :             else
      86           0 :                 return false; // Invalid: Empty tag or before '<'
      87             :         }
      88          16 :         else if ( c == '!' )
      89             :         {
      90          16 :             if ( dp == TagOpened )
      91          16 :                 return true; // "<!" - DOCTYPE or comments block
      92             :             else
      93           0 :                 return false; // Invalid: '!' before '<' or inside tag name
      94             :         }
      95             :         else
      96             :         {
      97           0 :             if ( dp == BeforeTag )
      98           0 :                 return false; // Invalid: Should start with a tag
      99           0 :             else if ( dp == TagOpened )
     100             :             {
     101           0 :                 nStartOfTagIndex = i;
     102           0 :                 dp = InTagName;
     103             :             }
     104             :         }
     105             :     }
     106             : 
     107             :     // The string following '<' has to be a known HTML token.
     108           0 :     OString aToken = sHeader.copy( nStartOfTagIndex, i - nStartOfTagIndex );
     109           0 :     if ( GetHTMLToken( OStringToOUString( aToken.toAsciiLowerCase(), RTL_TEXTENCODING_ASCII_US ) ) != 0 )
     110           0 :         return true;
     111             : 
     112          16 :     return false;
     113             : }
     114             : 
     115             : }
     116             : 
     117          16 : PlainTextFilterDetect::PlainTextFilterDetect(const uno::Reference<uno::XComponentContext>& xCxt) :
     118          16 :     mxCxt(xCxt) {}
     119             : 
     120          32 : PlainTextFilterDetect::~PlainTextFilterDetect() {}
     121             : 
     122          16 : OUString SAL_CALL PlainTextFilterDetect::detect(uno::Sequence<beans::PropertyValue>& lDescriptor) throw (uno::RuntimeException, std::exception)
     123             : {
     124          16 :     MediaDescriptor aMediaDesc(lDescriptor);
     125             : 
     126          32 :     OUString aType = aMediaDesc.getUnpackedValueOrDefault(MediaDescriptor::PROP_TYPENAME(), OUString() );
     127          32 :     OUString aDocService = aMediaDesc.getUnpackedValueOrDefault(MediaDescriptor::PROP_DOCUMENTSERVICE(), OUString() );
     128             : 
     129          16 :     if ((aType == "generic_HTML") || (aType == "calc_HTML"))
     130             :     {
     131          16 :         uno::Reference<io::XInputStream> xInStream(aMediaDesc[MediaDescriptor::PROP_INPUTSTREAM()], uno::UNO_QUERY);
     132          16 :         if (!xInStream.is() || !IsHTMLStream(xInStream))
     133           0 :             return OUString();
     134             : 
     135          16 :         if ((aDocService == CALC_DOCSERVICE) || (aType == "calc_HTML"))
     136           0 :             aMediaDesc[MediaDescriptor::PROP_FILTERNAME()] <<= OUString(CALC_HTML_FILTER);
     137          16 :         else if (aDocService == WRITER_DOCSERVICE)
     138          14 :             aMediaDesc[MediaDescriptor::PROP_FILTERNAME()] <<= OUString(WRITER_HTML_FILTER);
     139             :         else
     140           2 :             aMediaDesc[MediaDescriptor::PROP_FILTERNAME()] <<= OUString(WEB_HTML_FILTER);
     141             :     }
     142             : 
     143           0 :     else if (aType == "generic_Text")
     144             :     {
     145             :         // Get the file name extension.
     146           0 :         INetURLObject aParser(aMediaDesc.getUnpackedValueOrDefault(MediaDescriptor::PROP_URL(), OUString() ) );
     147           0 :         OUString aExt = aParser.getExtension(INetURLObject::LAST_SEGMENT, true, INetURLObject::DECODE_WITH_CHARSET);
     148           0 :         aExt = aExt.toAsciiLowerCase();
     149             : 
     150             :         // Decide which filter to use based on the document service first,
     151             :         // then on extension if that's not available.
     152           0 :         if (aDocService == CALC_DOCSERVICE)
     153           0 :             aMediaDesc[MediaDescriptor::PROP_FILTERNAME()] <<= OUString(CALC_TEXT_FILTER);
     154           0 :         else if (aDocService == WRITER_DOCSERVICE)
     155           0 :             aMediaDesc[MediaDescriptor::PROP_FILTERNAME()] <<= OUString(WRITER_TEXT_FILTER);
     156           0 :         else if (aExt == "csv" || aExt == "tsv" || aExt == "tab" || aExt == "xls")
     157           0 :             aMediaDesc[MediaDescriptor::PROP_FILTERNAME()] <<= OUString(CALC_TEXT_FILTER);
     158             :         else
     159           0 :             aMediaDesc[MediaDescriptor::PROP_FILTERNAME()] <<= OUString(WRITER_TEXT_FILTER);
     160             :     }
     161             : 
     162             :     else
     163             :         // Nothing to detect.
     164           0 :         return OUString();
     165             : 
     166          16 :     aMediaDesc >> lDescriptor;
     167          32 :     return aType;
     168             : }
     169             : 
     170             : // XInitialization
     171             : 
     172           0 : void SAL_CALL PlainTextFilterDetect::initialize(const uno::Sequence<uno::Any>& /*aArguments*/)
     173             :     throw (uno::Exception, uno::RuntimeException, std::exception)
     174             : {
     175           0 : }
     176             : 
     177           6 : OUString PlainTextFilterDetect_getImplementationName()
     178             : {
     179           6 :     return OUString("com.sun.star.comp.filters.PlainTextFilterDetect");
     180             : }
     181             : 
     182           6 : uno::Sequence<OUString> PlainTextFilterDetect_getSupportedServiceNames()
     183             : {
     184           6 :     uno::Sequence<OUString> aRet(2);
     185           6 :     OUString* pArray = aRet.getArray();
     186           6 :     pArray[0] = "com.sun.star.document.ExtendedTypeDetection";
     187           6 :     pArray[1] = "com.sun.star.comp.filters.PlainTextFilterDetect";
     188           6 :     return aRet;
     189             : }
     190             : 
     191          16 : uno::Reference<uno::XInterface> PlainTextFilterDetect_createInstance(
     192             :     const uno::Reference<uno::XComponentContext> & rCxt)
     193             : {
     194          16 :     return (cppu::OWeakObject*) new PlainTextFilterDetect(rCxt);
     195             : }
     196             : 
     197             : // XServiceInfo
     198           0 : OUString SAL_CALL PlainTextFilterDetect::getImplementationName()
     199             :     throw (uno::RuntimeException, std::exception)
     200             : {
     201           0 :     return PlainTextFilterDetect_getImplementationName();
     202             : }
     203             : 
     204           0 : sal_Bool SAL_CALL PlainTextFilterDetect::supportsService(const OUString& rServiceName)
     205             :     throw (uno::RuntimeException, std::exception)
     206             : {
     207           0 :     return cppu::supportsService(this, rServiceName);
     208             : }
     209             : 
     210           0 : uno::Sequence<OUString> SAL_CALL PlainTextFilterDetect::getSupportedServiceNames()
     211             :     throw (uno::RuntimeException, std::exception)
     212             : {
     213           0 :     return PlainTextFilterDetect_getSupportedServiceNames();
     214             : }
     215             : 
     216             : /* vim:set shiftwidth=4 softtabstop=4 expandtab: */

Generated by: LCOV version 1.10