LCOV - code coverage report
Current view: top level - include/svtools - parhtml.hxx (source / functions) Hit Total Coverage
Test: commit c8344322a7af75b84dd3ca8f78b05543a976dfd5 Lines: 13 33 39.4 %
Date: 2015-06-13 12:38:46 Functions: 13 21 61.9 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
       2             : /*
       3             :  * This file is part of the LibreOffice project.
       4             :  *
       5             :  * This Source Code Form is subject to the terms of the Mozilla Public
       6             :  * License, v. 2.0. If a copy of the MPL was not distributed with this
       7             :  * file, You can obtain one at http://mozilla.org/MPL/2.0/.
       8             :  *
       9             :  * This file incorporates work covered by the following license notice:
      10             :  *
      11             :  *   Licensed to the Apache Software Foundation (ASF) under one or more
      12             :  *   contributor license agreements. See the NOTICE file distributed
      13             :  *   with this work for additional information regarding copyright
      14             :  *   ownership. The ASF licenses this file to you under the Apache
      15             :  *   License, Version 2.0 (the "License"); you may not use this file
      16             :  *   except in compliance with the License. You may obtain a copy of
      17             :  *   the License at http://www.apache.org/licenses/LICENSE-2.0 .
      18             :  */
      19             : 
      20             : #ifndef INCLUDED_SVTOOLS_PARHTML_HXX
      21             : #define INCLUDED_SVTOOLS_PARHTML_HXX
      22             : 
      23             : #include <svtools/svtdllapi.h>
      24             : #include <svtools/svparser.hxx>
      25             : 
      26             : #include <boost/ptr_container/ptr_vector.hpp>
      27             : 
      28             : namespace com { namespace sun { namespace star {
      29             :     namespace document {
      30             :         class XDocumentProperties;
      31             :     }
      32             : } } }
      33             : 
      34             : class Color;
      35             : class SvNumberFormatter;
      36             : class SvKeyValueIterator;
      37             : 
      38             : #define HTMLFONTSZ1_DFLT 7
      39             : #define HTMLFONTSZ2_DFLT 10
      40             : #define HTMLFONTSZ3_DFLT 12
      41             : #define HTMLFONTSZ4_DFLT 14
      42             : #define HTMLFONTSZ5_DFLT 18
      43             : #define HTMLFONTSZ6_DFLT 24
      44             : #define HTMLFONTSZ7_DFLT 36
      45             : 
      46             : enum HTMLTableFrame { HTML_TF_VOID, HTML_TF_ABOVE, HTML_TF_BELOW,
      47             :     HTML_TF_HSIDES, HTML_TF_LHS, HTML_TF_RHS, HTML_TF_VSIDES, HTML_TF_BOX };
      48             : 
      49             : enum HTMLTableRules { HTML_TR_NONE, HTML_TR_GROUPS, HTML_TR_ROWS,
      50             :     HTML_TR_COLS, HTML_TR_ALL };
      51             : 
      52             : enum HTMLInputType
      53             : {
      54             :     HTML_IT_TEXT =      0x01,
      55             :     HTML_IT_PASSWORD =  0x02,
      56             :     HTML_IT_CHECKBOX =  0x03,
      57             :     HTML_IT_RADIO =     0x04,
      58             :     HTML_IT_RANGE =     0x05,
      59             :     HTML_IT_SCRIBBLE =  0x06,
      60             :     HTML_IT_FILE =      0x07,
      61             :     HTML_IT_HIDDEN =    0x08,
      62             :     HTML_IT_SUBMIT =    0x09,
      63             :     HTML_IT_IMAGE =     0x0a,
      64             :     HTML_IT_RESET =     0x0b,
      65             :     HTML_IT_BUTTON =    0x0c
      66             : };
      67             : 
      68             : enum HTMLScriptLanguage
      69             : {
      70             :     HTML_SL_STARBASIC,
      71             :     HTML_SL_JAVASCRIPT,
      72             :     HTML_SL_UNKNOWN
      73             : };
      74             : 
      75             : struct HTMLOptionEnum
      76             : {
      77             :     const sal_Char *pName;  // value of an HTML option
      78             :     sal_uInt16 nValue;      // and corresponding value of an enum
      79             : };
      80             : 
      81             : /** Representation of an HTML option (=attribute in a start tag).
      82             :  * The values of the options are always stored as strings.
      83             :  * The methods GetNumber,... may only be called if the option
      84             :  * is actually numerical,...
      85             :  */
      86        1455 : class SVT_DLLPUBLIC HTMLOption
      87             : {
      88             :     OUString aValue;          // value of the option (always as string)
      89             :     OUString aToken;          // name of the option as string
      90             :     sal_uInt16 nToken;        // and respective token
      91             : 
      92             : public:
      93             : 
      94             :     HTMLOption( sal_uInt16 nTyp, const OUString& rToken, const OUString& rValue );
      95             : 
      96             :     // name of the option...
      97        1520 :     sal_uInt16 GetToken() const { return nToken; }  // ... as enum
      98           7 :     const OUString& GetTokenString() const { return aToken; } // ... as string
      99             : 
     100             :     // value of the option ...
     101        1424 :     const OUString& GetString() const { return aValue; }  // ... as string
     102             : 
     103             :     sal_uInt32 GetNumber() const;                           // ... as number
     104             :     sal_Int32 GetSNumber() const;                           // ... as number
     105             :     void GetNumbers( std::vector<sal_uInt32> &rNumbers,                  // ... as numbers
     106             :                      bool bSpaceDelim=false ) const;
     107             :     void GetColor( Color& ) const;                      // ... as color
     108             : 
     109             :     // ... as enum; pOptEnums is an HTMLOptionEnum array
     110             :     sal_uInt16 GetEnum( const HTMLOptionEnum *pOptEnums,
     111             :                         sal_uInt16 nDflt=0 ) const;
     112             :     bool GetEnum( sal_uInt16 &rEnum, const HTMLOptionEnum *pOptEnums ) const;
     113             : 
     114             :     // ... and as a few special enums
     115             :     HTMLInputType GetInputType() const;                 // <INPUT TYPE=...>
     116             :     HTMLTableFrame GetTableFrame() const;               // <TABLE FRAME=...>
     117             :     HTMLTableRules GetTableRules() const;               // <TABLE RULES=...>
     118             :     //SvxAdjust GetAdjust() const;                      // <P,TH,TD ALIGN=>
     119             : };
     120             : 
     121             : typedef ::boost::ptr_vector<HTMLOption> HTMLOptions;
     122             : 
     123             : class SVT_DLLPUBLIC HTMLParser : public SvParser
     124             : {
     125             : private:
     126             :     mutable HTMLOptions maOptions; // options of the start tag
     127             : 
     128             :     bool bNewDoc        : 1;        // read new Doc?
     129             :     bool bIsInHeader    : 1;        // scan header section
     130             :     bool bIsInBody      : 1;        // scan body section
     131             :     bool bReadListing   : 1;        // read listings
     132             :     bool bReadXMP       : 1;        // read XMP
     133             :     bool bReadPRE       : 1;        // read preformatted text
     134             :     bool bReadTextArea  : 1;        // read TEXTAREA
     135             :     bool bReadScript    : 1;        // read <SCRIPT>
     136             :     bool bReadStyle     : 1;        // read <STYLE>
     137             :     bool bEndTokenFound : 1;        // found </SCRIPT> or </STYLE>
     138             : 
     139             :     bool bPre_IgnoreNewPara : 1;    // flags for reading of PRE paragraphs
     140             :     bool bReadNextChar : 1;         // true: read NextChar again(JavaScript!)
     141             :     bool bReadComment : 1;          // true: read NextChar again (JavaScript!)
     142             : 
     143             :     sal_uInt32 nPre_LinePos;            // Pos in the line in the PRE-Tag
     144             : 
     145             :     int mnPendingOffToken;          ///< OFF token pending for a <XX.../> ON/OFF ON token
     146             : 
     147             :     OUString aEndToken;
     148             : 
     149             : protected:
     150             :     OUString sSaveToken;             // the read tag as string
     151             : 
     152             :     int ScanText( const sal_Unicode cBreak = 0U );
     153             : 
     154             :     int _GetNextRawToken();
     155             : 
     156             :     // scan next token
     157             :     virtual int _GetNextToken() SAL_OVERRIDE;
     158             : 
     159             :     virtual ~HTMLParser();
     160             : 
     161          16 :     void FinishHeader( bool bBody ) { bIsInHeader = false; bIsInBody = bBody; }
     162             : 
     163             : public:
     164             :     HTMLParser( SvStream& rIn, bool bReadNewDoc = true );
     165             : 
     166             :     virtual SvParserState CallParser() SAL_OVERRIDE;
     167             : 
     168         272 :     bool IsNewDoc() const       { return bNewDoc; }
     169          43 :     bool IsInHeader() const     { return bIsInHeader; }
     170             :     bool IsInBody() const       { return bIsInBody; }
     171         292 :     bool IsReadListing() const  { return bReadListing; }
     172         292 :     bool IsReadXMP() const      { return bReadXMP; }
     173        1373 :     bool IsReadPRE() const      { return bReadPRE; }
     174         106 :     bool IsReadScript() const   { return bReadScript; }
     175         106 :     bool IsReadStyle() const    { return bReadStyle; }
     176             : 
     177             :     void SetReadNextChar()      { bReadNextChar = true; }
     178             : 
     179             :     // start PRE-/LISTING or XMP mode or filter tags respectively
     180             :     inline void StartPRE( bool bRestart=false );
     181           0 :     void FinishPRE() { bReadPRE = false; }
     182             :     int FilterPRE( int nToken );
     183             : 
     184             :     inline void StartListing( bool bRestart=false );
     185           0 :     void FinishListing() { bReadListing = false; }
     186             :     int FilterListing( int nToken );
     187             : 
     188             :     inline void StartXMP( bool bRestart=false );
     189           0 :     void FinishXMP() { bReadXMP = false; }
     190             :     int FilterXMP( int nToken );
     191             : 
     192           0 :     void FinishTextArea() { bReadTextArea = false; }
     193             : 
     194             :     // finish PRE-/LISTING- and XMP mode
     195         584 :     void FinishPREListingXMP() { bReadPRE = bReadListing = bReadXMP = false; }
     196             : 
     197             :     // Filter the current token according to the current mode
     198             :     // (PRE, XMP, ...) and set the flags. Is called by Continue before
     199             :     // NextToken is called. If you implement own loops or call
     200             :     // NextToken yourself, you should call this method beforehand.
     201             :     int FilterToken( int nToken );
     202             : 
     203             :     // end scanning of a script (should only be called right after
     204             :     // reading of a <SCRIPT>)
     205             :     void EndScanScript() { bReadScript = false; }
     206             : 
     207           0 :     void ReadRawData( const OUString &rEndToken ) { aEndToken = rEndToken; }
     208             : 
     209             :     // Token without \-sequences
     210             :     void UnescapeToken();
     211             : 
     212             :     // Determine the options. pNoConvertToken is the optional token
     213             :     // of an option, for which the CR/LFs are not deleted from the value
     214             :     // of the option.
     215             :     const HTMLOptions& GetOptions( sal_uInt16 *pNoConvertToken=0 );
     216             : 
     217             :     // for asynchronous reading from the SvStream
     218             :     virtual void Continue( int nToken ) SAL_OVERRIDE;
     219             : 
     220             : 
     221             : protected:
     222             : 
     223             :     static rtl_TextEncoding GetEncodingByMIME( const OUString& rMime );
     224             : 
     225             :     /// template method: called when ParseMetaOptions adds a user-defined meta
     226             :     virtual void AddMetaUserDefined( OUString const & i_rMetaName );
     227             : 
     228             : private:
     229             :     /// parse meta options into XDocumentProperties and encoding
     230             :     bool ParseMetaOptionsImpl( const ::com::sun::star::uno::Reference<
     231             :                 ::com::sun::star::document::XDocumentProperties>&,
     232             :             SvKeyValueIterator*,
     233             :             const HTMLOptions&,
     234             :             rtl_TextEncoding& rEnc );
     235             : 
     236             : public:
     237             :     /// overriding method must call this implementation!
     238             :     virtual bool ParseMetaOptions( const ::com::sun::star::uno::Reference<
     239             :                 ::com::sun::star::document::XDocumentProperties>&,
     240             :             SvKeyValueIterator* );
     241             : 
     242             :     bool ParseScriptOptions( OUString& rLangString, const OUString&, HTMLScriptLanguage& rLang,
     243             :                              OUString& rSrc, OUString& rLibrary, OUString& rModule );
     244             : 
     245             :     // remove a comment around the content of <SCRIPT> or <STYLE>
     246             :     // In case of 'bFull', the whole line behind a "<!--" might
     247             :     // be deleted (for JavaSript)
     248             :     static void RemoveSGMLComment( OUString &rString, bool bFull );
     249             : 
     250             :     static bool InternalImgToPrivateURL( OUString& rURL );
     251             :     static rtl_TextEncoding GetEncodingByHttpHeader( SvKeyValueIterator *pHTTPHeader );
     252             :     bool SetEncodingByHTTPHeader( SvKeyValueIterator *pHTTPHeader );
     253             : };
     254             : 
     255           0 : inline void HTMLParser::StartPRE( bool bRestart )
     256             : {
     257           0 :     bReadPRE = true;
     258           0 :     bPre_IgnoreNewPara = !bRestart;
     259           0 :     nPre_LinePos = 0UL;
     260           0 : }
     261             : 
     262           0 : inline void HTMLParser::StartListing( bool bRestart )
     263             : {
     264           0 :     bReadListing = true;
     265           0 :     bPre_IgnoreNewPara = !bRestart;
     266           0 :     nPre_LinePos = 0UL;
     267           0 : }
     268             : 
     269           0 : inline void HTMLParser::StartXMP( bool bRestart )
     270             : {
     271           0 :     bReadXMP = true;
     272           0 :     bPre_IgnoreNewPara = !bRestart;
     273           0 :     nPre_LinePos = 0UL;
     274           0 : }
     275             : 
     276             : #endif
     277             : 
     278             : /* vim:set shiftwidth=4 softtabstop=4 expandtab: */

Generated by: LCOV version 1.11