LCOV - code coverage report
Current view: top level - libreoffice/sc/source/filter/inc - htmlpars.hxx (source / functions) Hit Total Coverage
Test: libreoffice_filtered.info Lines: 18 35 51.4 %
Date: 2012-12-27 Functions: 13 23 56.5 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
       2             : /*
       3             :  * This file is part of the LibreOffice project.
       4             :  *
       5             :  * This Source Code Form is subject to the terms of the Mozilla Public
       6             :  * License, v. 2.0. If a copy of the MPL was not distributed with this
       7             :  * file, You can obtain one at http://mozilla.org/MPL/2.0/.
       8             :  *
       9             :  * This file incorporates work covered by the following license notice:
      10             :  *
      11             :  *   Licensed to the Apache Software Foundation (ASF) under one or more
      12             :  *   contributor license agreements. See the NOTICE file distributed
      13             :  *   with this work for additional information regarding copyright
      14             :  *   ownership. The ASF licenses this file to you under the Apache
      15             :  *   License, Version 2.0 (the "License"); you may not use this file
      16             :  *   except in compliance with the License. You may obtain a copy of
      17             :  *   the License at http://www.apache.org/licenses/LICENSE-2.0 .
      18             :  */
      19             : 
      20             : #ifndef SC_HTMLPARS_HXX
      21             : #define SC_HTMLPARS_HXX
      22             : 
      23             : #include <memory>
      24             : #include <stack>
      25             : #include <vector>
      26             : #include <list>
      27             : #include <map>
      28             : #include <o3tl/sorted_vector.hxx>
      29             : #include <boost/ptr_container/ptr_map.hpp>
      30             : #include <boost/unordered_map.hpp>
      31             : 
      32             : #include "rangelst.hxx"
      33             : #include "eeparser.hxx"
      34             : 
      35             : const sal_uInt32 SC_HTML_FONTSIZES = 7;        // wie Export, HTML-Options
      36             : 
      37             : // Pixel tolerance for SeekOffset and related.
      38             : const sal_uInt16 SC_HTML_OFFSET_TOLERANCE_SMALL = 1;    // single table
      39             : const sal_uInt16 SC_HTML_OFFSET_TOLERANCE_LARGE = 10;   // nested
      40             : 
      41             : // ============================================================================
      42             : // BASE class for HTML parser classes
      43             : // ============================================================================
      44             : 
      45             : class ScHTMLTable;
      46             : 
      47             : /**
      48             :  * Collection of HTML style data parsed from the content of <style>
      49             :  * elements.
      50             :  */
      51           1 : class ScHTMLStyles
      52             : {
      53             :     typedef ::boost::unordered_map<rtl::OUString, rtl::OUString, rtl::OUStringHash> PropsType;
      54             :     typedef ::boost::ptr_map<rtl::OUString, PropsType> NamePropsType;
      55             :     typedef ::boost::ptr_map<rtl::OUString, NamePropsType> ElemsType;
      56             : 
      57             :     NamePropsType maGlobalProps;     /// global properties (for a given class for all elements)
      58             :     NamePropsType maElemGlobalProps; /// element global properties (no class specified)
      59             :     ElemsType maElemProps;           /// element to class to properties (both element and class are given)
      60             :     const rtl::OUString maEmpty;     /// just a persistent empty string.
      61             : public:
      62             :     ScHTMLStyles();
      63             : 
      64             :     void add(const char* pElemName, size_t nElemName, const char* pClassName, size_t nClassName,
      65             :              const rtl::OUString& aProp, const rtl::OUString& aValue);
      66             : 
      67             :     /**
      68             :      * Find best-matching property value for given element and class names.
      69             :      */
      70             :     const rtl::OUString& getPropertyValue(
      71             :         const rtl::OUString& rElem, const rtl::OUString& rClass, const rtl::OUString& rPropName) const;
      72             : 
      73             : private:
      74             :     static void insertProp(
      75             :         NamePropsType& rProps, const rtl::OUString& aName,
      76             :         const rtl::OUString& aProp, const rtl::OUString& aValue);
      77             : };
      78             : 
      79             : /** Base class for HTML parser classes. */
      80             : class ScHTMLParser : public ScEEParser
      81             : {
      82             :     ScHTMLStyles                maStyles;
      83             : protected:
      84             :     sal_uInt32                  maFontHeights[ SC_HTML_FONTSIZES ];
      85             :     ScDocument*                 mpDoc;          /// The destination document.
      86             : 
      87             : public:
      88             :     explicit                    ScHTMLParser( EditEngine* pEditEngine, ScDocument* pDoc );
      89             :     virtual                     ~ScHTMLParser();
      90             : 
      91             :     virtual sal_uLong               Read( SvStream& rStrm, const String& rBaseURL  ) = 0;
      92             : 
      93             :     ScHTMLStyles&               GetStyles();
      94             :     ScDocument&                 GetDoc();
      95             : 
      96             :     /** Returns the "global table" which contains the entire HTML document. */
      97             :     virtual const ScHTMLTable*  GetGlobalTable() const = 0;
      98             : };
      99             : 
     100             : 
     101             : // ============================================================================
     102             : 
     103             : typedef o3tl::sorted_vector<sal_uLong> ScHTMLColOffset;
     104             : 
     105             : struct ScHTMLTableStackEntry
     106             : {
     107             :     ScRangeListRef      xLockedList;
     108             :     ScEEParseEntry*     pCellEntry;
     109             :     ScHTMLColOffset*    pLocalColOffset;
     110             :     sal_uLong               nFirstTableCell;
     111             :     SCCOL               nColCnt;
     112             :     SCROW               nRowCnt;
     113             :     SCCOL               nColCntStart;
     114             :     SCCOL               nMaxCol;
     115             :     sal_uInt16              nTable;
     116             :     sal_uInt16              nTableWidth;
     117             :     sal_uInt16              nColOffset;
     118             :     sal_uInt16              nColOffsetStart;
     119             :     bool                bFirstRow;
     120           0 :                         ScHTMLTableStackEntry( ScEEParseEntry* pE,
     121             :                                 const ScRangeListRef& rL, ScHTMLColOffset* pTO,
     122             :                                 sal_uLong nFTC,
     123             :                                 SCCOL nCol, SCROW nRow,
     124             :                                 SCCOL nStart, SCCOL nMax, sal_uInt16 nTab,
     125             :                                 sal_uInt16 nTW, sal_uInt16 nCO, sal_uInt16 nCOS,
     126             :                                 bool bFR )
     127             :                             : xLockedList( rL ), pCellEntry( pE ),
     128             :                             pLocalColOffset( pTO ),
     129             :                             nFirstTableCell( nFTC ),
     130             :                             nColCnt( nCol ), nRowCnt( nRow ),
     131             :                             nColCntStart( nStart ), nMaxCol( nMax ),
     132             :                             nTable( nTab ), nTableWidth( nTW ),
     133             :                             nColOffset( nCO ), nColOffsetStart( nCOS ),
     134           0 :                             bFirstRow( bFR )
     135           0 :                             {}
     136           0 :                         ~ScHTMLTableStackEntry() {}
     137             : };
     138             : typedef ::std::stack< ScHTMLTableStackEntry* > ScHTMLTableStack;
     139             : 
     140             : struct ScHTMLAdjustStackEntry
     141             : {
     142             :     SCCOL               nLastCol;
     143             :     SCROW               nNextRow;
     144             :     SCROW               nCurRow;
     145           0 :                         ScHTMLAdjustStackEntry( SCCOL nLCol, SCROW nNRow,
     146             :                                 SCROW nCRow )
     147             :                             : nLastCol( nLCol ), nNextRow( nNRow ),
     148           0 :                             nCurRow( nCRow )
     149           0 :                             {}
     150             : };
     151             : typedef ::std::stack< ScHTMLAdjustStackEntry* > ScHTMLAdjustStack;
     152             : 
     153             : 
     154             : // ============================================================================
     155             : 
     156             : class EditEngine;
     157             : class ScDocument;
     158             : class HTMLOption;
     159             : 
     160             : // TODO these need better names
     161             : typedef ::std::map<SCROW, SCROW> InnerMap;
     162             : typedef ::std::map<sal_uInt16, InnerMap*> OuterMap;
     163             : 
     164             : class ScHTMLLayoutParser : public ScHTMLParser
     165             : {
     166             : private:
     167             :     Size                aPageSize;
     168             :     rtl::OUString       aBaseURL;
     169             :     ScHTMLTableStack    aTableStack;
     170             :     rtl::OUString       aString;
     171             :     ScRangeListRef      xLockedList;        // je Table
     172             :     OuterMap*           pTables;
     173             :     ScHTMLColOffset*    pColOffset;
     174             :     ScHTMLColOffset*    pLocalColOffset;    // je Table
     175             :     sal_uLong               nFirstTableCell;    // je Table
     176             :     short               nTableLevel;
     177             :     sal_uInt16              nTable;
     178             :     sal_uInt16              nMaxTable;
     179             :     SCCOL               nColCntStart;       // erste Col je Table
     180             :     SCCOL               nMaxCol;            // je Table
     181             :     sal_uInt16              nTableWidth;        // je Table
     182             :     sal_uInt16              nColOffset;         // aktuell, Pixel
     183             :     sal_uInt16              nColOffsetStart;    // Startwert je Table, in Pixel
     184             :     sal_uInt16              nOffsetTolerance;   // for use with SeekOffset and related
     185             :     bool                bTabInTabCell:1;
     186             :     bool                bFirstRow:1;          // je Table, ob in erster Zeile
     187             :     bool                bInCell:1;
     188             :     bool                bInTitle:1;
     189             : 
     190             :     DECL_LINK( HTMLImportHdl, ImportInfo* );
     191             :     void                NewActEntry( ScEEParseEntry* );
     192             :     void                EntryEnd( ScEEParseEntry*, const ESelection& );
     193             :     void                ProcToken( ImportInfo* );
     194             :     void                CloseEntry( ImportInfo* );
     195             :     void                NextRow(  ImportInfo*  );
     196             :     void                SkipLocked( ScEEParseEntry*, bool bJoin = true );
     197             :     static bool         SeekOffset( ScHTMLColOffset*, sal_uInt16 nOffset,
     198             :                                     SCCOL* pCol, sal_uInt16 nOffsetTol );
     199             :     static void         MakeCol( ScHTMLColOffset*, sal_uInt16& nOffset,
     200             :                                 sal_uInt16& nWidth, sal_uInt16 nOffsetTol,
     201             :                                 sal_uInt16 nWidthTol );
     202             :     static void         MakeColNoRef( ScHTMLColOffset*, sal_uInt16 nOffset,
     203             :                                 sal_uInt16 nWidth, sal_uInt16 nOffsetTol,
     204             :                                 sal_uInt16 nWidthTol );
     205             :     static void         ModifyOffset( ScHTMLColOffset*, sal_uInt16& nOldOffset,
     206             :                                     sal_uInt16& nNewOffset, sal_uInt16 nOffsetTol );
     207             :     void                Colonize( ScEEParseEntry* );
     208             :     sal_uInt16              GetWidth( ScEEParseEntry* );
     209             :     void                SetWidths();
     210             :     void                Adjust();
     211             : 
     212             :     sal_uInt16              GetWidthPixel( const HTMLOption& );
     213             :     bool                IsAtBeginningOfText( ImportInfo* );
     214             : 
     215             :     void                TableOn( ImportInfo* );
     216             :     void                ColOn( ImportInfo* );
     217             :     void                TableRowOn( ImportInfo* );
     218             :     void                TableRowOff( ImportInfo* );
     219             :     void                TableDataOn( ImportInfo* );
     220             :     void                TableDataOff( ImportInfo* );
     221             :     void                TableOff( ImportInfo* );
     222             :     void                Image( ImportInfo* );
     223             :     void                AnchorOn( ImportInfo* );
     224             :     void                FontOn( ImportInfo* );
     225             : 
     226             : public:
     227             :                         ScHTMLLayoutParser( EditEngine*, const String& rBaseURL, const Size& aPageSize, ScDocument* );
     228             :     virtual             ~ScHTMLLayoutParser();
     229             :     virtual sal_uLong       Read( SvStream&, const String& rBaseURL  );
     230             :     virtual const ScHTMLTable*  GetGlobalTable() const;
     231             : };
     232             : 
     233             : 
     234             : 
     235             : // ============================================================================
     236             : // HTML DATA QUERY PARSER
     237             : // ============================================================================
     238             : 
     239             : /** Declares the orientation in or for a table: column or row. */
     240             : enum ScHTMLOrient { tdCol = 0 , tdRow = 1 };
     241             : 
     242             : /** Type for a unique identifier for each table. */
     243             : typedef sal_uInt16 ScHTMLTableId;
     244             : /** Identifier of the "global table" (the entire HTML document). */
     245             : const ScHTMLTableId SC_HTML_GLOBAL_TABLE = 0;
     246             : /** Used as table index for normal (non-table) entries in ScHTMLEntry structs. */
     247             : const ScHTMLTableId SC_HTML_NO_TABLE = 0;
     248             : 
     249             : // ============================================================================
     250             : 
     251             : /** A 2D cell position in an HTML table. */
     252             : struct ScHTMLPos
     253             : {
     254             :     SCCOL               mnCol;
     255             :     SCROW               mnRow;
     256             : 
     257           4 :     inline explicit     ScHTMLPos() : mnCol( 0 ), mnRow( 0 ) {}
     258           7 :     inline explicit     ScHTMLPos( SCCOL nCol, SCROW nRow ) :
     259           7 :                             mnCol( nCol ), mnRow( nRow ) {}
     260           0 :     inline explicit     ScHTMLPos( const ScAddress& rAddr ) { Set( rAddr ); }
     261             : 
     262          14 :     inline SCCOLROW     Get( ScHTMLOrient eOrient ) const
     263          14 :                             { return (eOrient == tdCol) ? mnCol : mnRow; }
     264           0 :     inline void         Set( SCCOL nCol, SCROW nRow )
     265           0 :                             { mnCol = nCol; mnRow = nRow; }
     266           0 :     inline void         Set( const ScAddress& rAddr )
     267           0 :                             { Set( rAddr.Col(), rAddr.Row() ); }
     268             :     inline void         Move( SCsCOL nColDiff, SCsROW nRowDiff )
     269             :                             { mnCol = mnCol + nColDiff; mnRow = mnRow + nRowDiff; }
     270          71 :     inline ScAddress    MakeAddr() const
     271          71 :                             { return ScAddress( mnCol, mnRow, 0 ); }
     272             : };
     273             : 
     274             : inline bool operator==( const ScHTMLPos& rPos1, const ScHTMLPos& rPos2 )
     275             : {
     276             :     return (rPos1.mnRow == rPos2.mnRow) && (rPos1.mnCol == rPos2.mnCol);
     277             : }
     278             : 
     279          21 : inline bool operator<( const ScHTMLPos& rPos1, const ScHTMLPos& rPos2 )
     280             : {
     281          21 :     return (rPos1.mnRow < rPos2.mnRow) || ((rPos1.mnRow == rPos2.mnRow) && (rPos1.mnCol < rPos2.mnCol));
     282             : }
     283             : 
     284             : // ----------------------------------------------------------------------------
     285             : 
     286             : /** A 2D cell size in an HTML table. */
     287             : struct ScHTMLSize
     288             : {
     289             :     SCCOL               mnCols;
     290             :     SCROW               mnRows;
     291             : 
     292             :     inline explicit     ScHTMLSize() : mnCols( 0 ), mnRows( 0 ) {}
     293          44 :     inline explicit     ScHTMLSize( SCCOL nCols, SCROW nRows ) :
     294          44 :                             mnCols( nCols ), mnRows( nRows ) {}
     295             : 
     296             :     inline SCCOLROW     Get( ScHTMLOrient eOrient ) const
     297             :                             { return (eOrient == tdCol) ? mnCols : mnRows; }
     298           0 :     inline void         Set( SCCOL nCols, SCROW nRows )
     299           0 :                             { mnCols = nCols; mnRows = nRows; }
     300             :     inline void         Expand( SCsCOL nColDiff, SCsROW nRowDiff )
     301             :                             { mnCols = mnCols + nColDiff; mnRows = mnRows + nRowDiff; }
     302             : };
     303             : 
     304             : inline bool operator==( const ScHTMLSize& rSize1, const ScHTMLSize& rSize2 )
     305             : {
     306             :     return (rSize1.mnRows == rSize2.mnRows) && (rSize1.mnCols == rSize2.mnCols);
     307             : }
     308             : 
     309             : // ============================================================================
     310             : 
     311             : /** A single entry containing a line of text or representing a table. */
     312          21 : struct ScHTMLEntry : public ScEEParseEntry
     313             : {
     314             : public:
     315             :     explicit            ScHTMLEntry(
     316             :                             const SfxItemSet& rItemSet,
     317             :                             ScHTMLTableId nTableId = SC_HTML_NO_TABLE );
     318             : 
     319             :     /** Returns true, if the selection of the entry is empty. */
     320           0 :     inline bool         IsEmpty() const { return !aSel.HasRange(); }
     321             :     /** Returns true, if the entry has any content to be imported. */
     322             :     bool                HasContents() const;
     323             :     /** Returns true, if the entry represents a table. */
     324          34 :     inline bool         IsTable() const { return nTab != SC_HTML_NO_TABLE; }
     325             :     /** Returns true, if the entry represents a table. */
     326          21 :     inline ScHTMLTableId GetTableId() const { return nTab; }
     327             : 
     328             :     /** Sets or cleares the import always state. */
     329           0 :     inline void         SetImportAlways( bool bSet = true ) { mbImportAlways = bSet; }
     330             :     /** Sets start point of the entry selection to the start of the import info object. */
     331             :     void                AdjustStart( const ImportInfo& rInfo );
     332             :     /** Sets end point of the entry selection to the end of the import info object. */
     333             :     void                AdjustEnd( const ImportInfo& rInfo );
     334             :     /** Deletes leading and trailing empty paragraphs from the entry. */
     335             :     void                Strip( const EditEngine& rEditEngine );
     336             : 
     337             :     /** Returns read/write access to the item set of this entry. */
     338           0 :     inline SfxItemSet&  GetItemSet() { return aItemSet; }
     339             :     /** Returns read-only access to the item set of this entry. */
     340             :     inline const SfxItemSet& GetItemSet() const { return aItemSet; }
     341             : 
     342             : private:
     343             :     bool                mbImportAlways;     /// true = Always import this entry.
     344             : };
     345             : 
     346             : // ============================================================================
     347             : 
     348             : /** This struct handles creation of unique table identifiers. */
     349             : struct ScHTMLTableAutoId
     350             : {
     351             :     const ScHTMLTableId mnTableId;          /// The created unique table identifier.
     352             :     ScHTMLTableId&      mrnUnusedId;        /// Reference to global unused identifier variable.
     353             : 
     354             :     /** The constructor assigns an unused identifier to member mnTableId. */
     355             :     explicit            ScHTMLTableAutoId( ScHTMLTableId& rnUnusedId );
     356             : };
     357             : 
     358             : // ----------------------------------------------------------------------------
     359             : 
     360             : class ScHTMLTableMap;
     361             : 
     362             : /** Stores data for one table in an HTML document.
     363             : 
     364             :     This class does the main work for importing an HTML document. It manages
     365             :     the correct insertion of parse entries into the correct cells and the
     366             :     creation of nested tables. Recalculation of resulting document size and
     367             :     position is done recursively in all nested tables.
     368             :  */
     369             : class ScHTMLTable
     370             : {
     371             : public:
     372             :     /** Creates a new HTML table without content.
     373             :         @descr  Internally handles a current cell position. This position is
     374             :             invalid until first calls of RowOn() and DataOn().
     375             :         @param rParentTable  Reference to the parent table that owns this table.
     376             :         @param bPreFormText  true = Table is based on preformatted text (<pre> tag). */
     377             :     explicit            ScHTMLTable(
     378             :                             ScHTMLTable& rParentTable,
     379             :                             const ImportInfo& rInfo,
     380             :                             bool bPreFormText );
     381             : 
     382             :     virtual             ~ScHTMLTable();
     383             : 
     384             :     /** Returns the name of the table, specified in the TABLE tag. */
     385           1 :     inline const rtl::OUString& GetTableName() const { return maTableName; }
     386             :     /** Returns the unique identifier of the table. */
     387           7 :     inline ScHTMLTableId GetTableId() const { return maTableId.mnTableId; }
     388             :     /** Returns the table size. */
     389             :     inline const ScHTMLSize& GetSize() const { return maSize; }
     390             :     /** Returns the cell spanning of the specified cell. */
     391             :     ScHTMLSize          GetSpan( const ScHTMLPos& rCellPos ) const;
     392             : 
     393             :     /** Searches in all nested tables for the specified table.
     394             :         @param nTableId  Unique identifier of the table. */
     395             :     ScHTMLTable*        FindNestedTable( ScHTMLTableId nTableId ) const;
     396             : 
     397             :     /** Puts the item into the item set of the current entry. */
     398             :     void                PutItem( const SfxPoolItem& rItem );
     399             :     /** Inserts a text portion into current entry. */
     400             :     void                PutText( const ImportInfo& rInfo );
     401             :     /** Inserts a new line, if in preformatted text, else does nothing. */
     402             :     void                InsertPara( const ImportInfo& rInfo );
     403             : 
     404             :     /** Inserts a line break (<br> tag).
     405             :         @descr  Inserts the current entry regardless if it is empty. */
     406             :     void                BreakOn();
     407             :     /** Inserts a heading line (<p> and <h*> tags). */
     408             :     void                HeadingOn();
     409             :     /** Processes a hyperlink (<a> tag). */
     410             :     void                AnchorOn();
     411             : 
     412             :     /** Starts a *new* table nested in this table (<table> tag).
     413             :         @return  Pointer to the new table. */
     414             :     ScHTMLTable*        TableOn( const ImportInfo& rInfo );
     415             :     /** Closes *this* table (</table> tag).
     416             :         @return  Pointer to the parent table. */
     417             :     ScHTMLTable*        TableOff( const ImportInfo& rInfo );
     418             :     /** Starts a *new* table based on preformatted text (<pre> tag).
     419             :         @return  Pointer to the new table. */
     420             :     ScHTMLTable*        PreOn( const ImportInfo& rInfo );
     421             :     /** Closes *this* table based on preformatted text (</pre> tag).
     422             :         @return  Pointer to the parent table. */
     423             :     ScHTMLTable*        PreOff( const ImportInfo& rInfo );
     424             : 
     425             :     /** Starts next row (<tr> tag).
     426             :         @descr  Cell address is invalid until first call of DataOn(). */
     427             :     void                RowOn( const ImportInfo& rInfo );
     428             :     /** Closes the current row (<tr> tag).
     429             :         @descr  Cell address is invalid until call of RowOn() and DataOn(). */
     430             :     void                RowOff( const ImportInfo& rInfo );
     431             :     /** Starts the next cell (<td> or <th> tag). */
     432             :     void                DataOn( const ImportInfo& rInfo );
     433             :     /** Closes the current cell (</td> or </th> tag).
     434             :         @descr  Cell address is invalid until next call of DataOn(). */
     435             :     void                DataOff( const ImportInfo& rInfo );
     436             : 
     437             :     /** Starts the body of the HTML document (<body> tag). */
     438             :     void                BodyOn( const ImportInfo& rInfo );
     439             :     /** Closes the body of the HTML document (</body> tag). */
     440             :     void                BodyOff( const ImportInfo& rInfo );
     441             : 
     442             :     /** Closes *this* table (</table> tag) or preformatted text (</pre> tag).
     443             :         @descr  Used to close this table object regardless on opening tag type.
     444             :         @return  Pointer to the parent table, or this, if no parent found. */
     445             :     ScHTMLTable*        CloseTable( const ImportInfo& rInfo );
     446             : 
     447             :     /** Returns the resulting document row/column count of the specified HTML row/column. */
     448             :     SCCOLROW            GetDocSize( ScHTMLOrient eOrient, SCCOLROW nCellPos ) const;
     449             :     /** Returns the resulting document row/column count in the half-open range [nCellBegin, nCellEnd). */
     450             :     SCCOLROW            GetDocSize( ScHTMLOrient eOrient, SCCOLROW nCellBegin, SCCOLROW nCellEnd ) const;
     451             :     /** Returns the total document row/column count in the specified direction. */
     452             :     SCCOLROW            GetDocSize( ScHTMLOrient eOrient ) const;
     453             :     /** Returns the total document row/column count of the specified HTML cell. */
     454             :     ScHTMLSize          GetDocSize( const ScHTMLPos& rCellPos ) const;
     455             : 
     456             :     /** Returns the resulting Calc position of the top left edge of the table. */
     457           1 :     inline const ScHTMLPos& GetDocPos() const { return maDocBasePos; }
     458             :     /** Calculates the resulting Calc position of the specified HTML column/row. */
     459             :     SCCOLROW            GetDocPos( ScHTMLOrient eOrient, SCCOLROW nCellPos = 0 ) const;
     460             :     /** Calculates the resulting Calc position of the specified HTML cell. */
     461             :     ScHTMLPos           GetDocPos( const ScHTMLPos& rCellPos ) const;
     462             : 
     463             :     /** Calculates the current Calc document area of this table. */
     464             :     void                GetDocRange( ScRange& rRange ) const;
     465             : 
     466             :     /** Applies border formatting to the passed document. */
     467             :     void                ApplyCellBorders( ScDocument* pDoc, const ScAddress& rFirstPos ) const;
     468             : 
     469             :     SvNumberFormatter* GetFormatTable();
     470             : 
     471             : protected:
     472             :     /** Creates a new HTML table without parent.
     473             :         @descr  This constructor is used to create the "global table". */
     474             :     explicit            ScHTMLTable(
     475             :                             SfxItemPool& rPool,
     476             :                             EditEngine& rEditEngine,
     477             :                             ::std::vector< ScEEParseEntry* >& rEEParseList,
     478             :                             ScHTMLTableId& rnUnusedId, ScHTMLParser* pParser );
     479             : 
     480             :     /** Fills all empty cells in this and nested tables with dummy parse entries. */
     481             :     void                FillEmptyCells();
     482             :     /** Recalculates the size of all columns/rows in the table, regarding nested tables. */
     483             :     void                RecalcDocSize();
     484             :     /** Recalculates the position of all cell entries and nested tables.
     485             :         @param rBasePos  The origin of the table in the Calc document. */
     486             :     void                RecalcDocPos( const ScHTMLPos& rBasePos );
     487             : 
     488             : private:
     489             :     typedef ::std::auto_ptr< ScHTMLTableMap >           ScHTMLTableMapPtr;
     490             :     typedef ::std::auto_ptr< SfxItemSet >               SfxItemSetPtr;
     491             :     typedef ::std::vector< SCCOLROW >                   ScSizeVec;
     492             :     typedef ::std::list< ScHTMLEntry* >                 ScHTMLEntryList;
     493             :     typedef ::std::map< ScHTMLPos, ScHTMLEntryList >    ScHTMLEntryMap;
     494             :     typedef ::std::auto_ptr< ScHTMLEntry >              ScHTMLEntryPtr;
     495             : 
     496             :     /** Returns true, if the current cell does not contain an entry yet. */
     497             :     bool                IsEmptyCell() const;
     498             :     /** Returns the item set from cell, row, or table, depending on current state. */
     499             :     const SfxItemSet&   GetCurrItemSet() const;
     500             : 
     501             :     /** Returns true, if import info represents a space character. */
     502             :     static bool         IsSpaceCharInfo( const ImportInfo& rInfo );
     503             : 
     504             :     /** Creates and returns a new empty flying entry at position (0,0). */
     505             :     ScHTMLEntryPtr      CreateEntry() const;
     506             :     /** Creates a new flying entry.
     507             :         @param rInfo  Contains the initial edit engine selection for the entry. */
     508             :     void                CreateNewEntry( const ImportInfo& rInfo );
     509             : 
     510             :     /** Inserts an empty line in front of the next entry. */
     511             :     void                InsertLeadingEmptyLine();
     512             : 
     513             :     /** Pushes the passed entry into the list of the current cell. */
     514             :     void                ImplPushEntryToList( ScHTMLEntryList& rEntryList, ScHTMLEntryPtr& rxEntry );
     515             :     /** Tries to insert the entry into the current cell.
     516             :         @descr  If insertion is not possible (i.e., currently no cell open), the
     517             :         entry will be inserted into the parent table.
     518             :         @return  true = Entry as been pushed into the current cell; false = Entry dropped. */
     519             :     bool                PushEntry( ScHTMLEntryPtr& rxEntry );
     520             :     /** Puts the current entry into the entry list, if it is not empty.
     521             :         @param rInfo  The import info struct containing the end position of the current entry.
     522             :         @param bLastInCell  true = If cell is still empty, put this entry always.
     523             :         @return  true = Entry as been pushed into the current cell; false = Entry dropped. */
     524             :     bool                PushEntry( const ImportInfo& rInfo, bool bLastInCell = false );
     525             :     /** Pushes a new entry into current cell which references a nested table.
     526             :         @return  true = Entry as been pushed into the current cell; false = Entry dropped. */
     527             :     bool                PushTableEntry( ScHTMLTableId nTableId );
     528             : 
     529             :     /** Tries to find a table from the table container.
     530             :         @descr  Assumes that the table is located in the current container or
     531             :         that the passed table identifier is 0.
     532             :         @param nTableId  Unique identifier of the table or 0. */
     533             :     ScHTMLTable*        GetExistingTable( ScHTMLTableId nTableId ) const;
     534             :     /** Inserts a nested table in the current cell at the specified position.
     535             :         @param bPreFormText  true = New table is based on preformatted text (<pre> tag). */
     536             :     ScHTMLTable*        InsertNestedTable( const ImportInfo& rInfo, bool bPreFormText );
     537             : 
     538             :     /** Inserts a new cell in an unused position, starting from current cell position. */
     539             :     void                InsertNewCell( const ScHTMLSize& rSpanSize );
     540             : 
     541             :     /** Set internal states for a new table row. */
     542             :     void                ImplRowOn();
     543             :     /** Set internal states for leaving a table row. */
     544             :     void                ImplRowOff();
     545             :     /** Set internal states for entering a new table cell. */
     546             :     void                ImplDataOn( const ScHTMLSize& rSpanSize );
     547             :     /** Set internal states for leaving a table cell. */
     548             :     void                ImplDataOff();
     549             : 
     550             :     /** Inserts additional formatting options from import info into the item set. */
     551             :     void                ProcessFormatOptions( SfxItemSet& rItemSet, const ImportInfo& rInfo );
     552             : 
     553             :     /** Updates the document column/row size of the specified column or row.
     554             :         @descr  Only increases the present count, never decreases. */
     555             :     void                SetDocSize( ScHTMLOrient eOrient, SCCOLROW nCellPos, SCCOLROW nSize );
     556             :     /** Calculates and sets the resulting size the cell needs in the document.
     557             :         @descr  Reduces the needed size in merged cells.
     558             :         @param nCellPos  The first column/row position of the (merged) cell.
     559             :         @param nCellSpan  The cell spanning in the specified orientation.
     560             :         @param nRealDocSize  The raw document size of all entries of the cell. */
     561             :     void                CalcNeededDocSize(
     562             :                             ScHTMLOrient eOrient, SCCOLROW nCellPos,
     563             :                             SCCOLROW nCellSpan, SCCOLROW nRealDocSize );
     564             : 
     565             : private:
     566             :     ScHTMLTable*        mpParentTable;      /// Pointer to parent table.
     567             :     ScHTMLTableMapPtr   mxNestedTables;     /// Table of nested HTML tables.
     568             :     rtl::OUString       maTableName;        /// Table name from <table id> option.
     569             :     ScHTMLTableAutoId   maTableId;          /// Unique identifier of this table.
     570             :     SfxItemSet          maTableItemSet;     /// Items for the entire table.
     571             :     SfxItemSetPtr       mxRowItemSet;       /// Items for the current table row.
     572             :     SfxItemSetPtr       mxDataItemSet;      /// Items for the current cell.
     573             :     ScRangeList         maHMergedCells;     /// List of all horizontally merged cells.
     574             :     ScRangeList         maVMergedCells;     /// List of all vertically merged cells.
     575             :     ScRangeList         maUsedCells;        /// List of all used cells.
     576             :     EditEngine&         mrEditEngine;       /// Edit engine (from ScEEParser).
     577             :     ::std::vector< ScEEParseEntry* >& mrEEParseList;      /// List that owns the parse entries (from ScEEParser).
     578             :     ScHTMLEntryMap      maEntryMap;         /// List of entries for each cell.
     579             :     ScHTMLEntryList*    mpCurrEntryList;    /// Current entry list from map for faster access.
     580             :     ScHTMLEntryPtr      mxCurrEntry;        /// Working entry, not yet inserted in a list.
     581             :     ScSizeVec           maCumSizes[ 2 ];    /// Cumulated cell counts for each HTML table column/row.
     582             :     ScHTMLSize          maSize;             /// Size of the table.
     583             :     ScHTMLPos           maCurrCell;         /// Address of current cell to fill.
     584             :     ScHTMLPos           maDocBasePos;       /// Resulting base address in a Calc document.
     585             :     ScHTMLParser*       mpParser;
     586             :     bool                mbBorderOn:1;       /// true = Table borders on.
     587             :     bool                mbPreFormText:1;    /// true = Table from preformatted text (<pre> tag).
     588             :     bool                mbRowOn:1;          /// true = Inside of <tr> </tr>.
     589             :     bool                mbDataOn:1;         /// true = Inside of <td> </td> or <th> </th>.
     590             :     bool                mbPushEmptyLine:1;  /// true = Insert empty line before current entry.
     591             : };
     592             : 
     593             : // ----------------------------------------------------------------------------
     594             : 
     595             : /** The "global table" representing the entire HTML document. */
     596             : class ScHTMLGlobalTable : public ScHTMLTable
     597             : {
     598             : public:
     599             :     explicit            ScHTMLGlobalTable(
     600             :                             SfxItemPool& rPool,
     601             :                             EditEngine& rEditEngine,
     602             :                             ::std::vector< ScEEParseEntry* >& rEEParseList,
     603             :                             ScHTMLTableId& rnUnusedId, ScHTMLParser* pParser );
     604             : 
     605             :     virtual             ~ScHTMLGlobalTable();
     606             : 
     607             :     /** Recalculates sizes and resulting positions of all document entries. */
     608             :     void                Recalc();
     609             : };
     610             : 
     611             : // ============================================================================
     612             : 
     613             : /** The HTML parser for data queries. Focuses on data import, not on layout.
     614             : 
     615             :     Builds the table structure correctly, ignores extended formatting like
     616             :     pictures or column widths.
     617             :  */
     618             : class ScHTMLQueryParser : public ScHTMLParser
     619             : {
     620             : public:
     621             :     explicit            ScHTMLQueryParser( EditEngine* pEditEngine, ScDocument* pDoc );
     622             :     virtual             ~ScHTMLQueryParser();
     623             : 
     624             :     virtual sal_uLong       Read( SvStream& rStrm, const String& rBaseURL  );
     625             : 
     626             :     /** Returns the "global table" which contains the entire HTML document. */
     627             :     virtual const ScHTMLTable* GetGlobalTable() const;
     628             : 
     629             : private:
     630             :     /** Handles all possible tags in the HTML document. */
     631             :     void                ProcessToken( const ImportInfo& rInfo );
     632             :     /** Inserts a text portion into current entry. */
     633             :     void                InsertText( const ImportInfo& rInfo );
     634             :     /** Processes the <font> tag. */
     635             :     void                FontOn( const ImportInfo& rInfo );
     636             : 
     637             :     /** Processes the <meta> tag. */
     638             :     void                MetaOn( const ImportInfo& rInfo );
     639             :     /** Opens the title of the HTML document (<title> tag). */
     640             :     void                TitleOn( const ImportInfo& rInfo );
     641             :     /** Closes the title of the HTML document (</title> tag). */
     642             :     void                TitleOff( const ImportInfo& rInfo );
     643             : 
     644             :     /** Opens a new table at the current position. */
     645             :     void                TableOn( const ImportInfo& rInfo );
     646             :     /** Closes the current table. */
     647             :     void                TableOff( const ImportInfo& rInfo );
     648             :     /** Opens a new table based on preformatted text. */
     649             :     void                PreOn( const ImportInfo& rInfo );
     650             :     /** Closes the current preformatted text table. */
     651             :     void                PreOff( const ImportInfo& rInfo );
     652             : 
     653             :     /** Closes the current table, regardless on opening tag. */
     654             :     void                CloseTable( const ImportInfo& rInfo );
     655             : 
     656             :     void                ParseStyle(const rtl::OUString& rStrm);
     657             : 
     658             :     DECL_LINK( HTMLImportHdl, const ImportInfo* );
     659             : 
     660             : private:
     661             :     typedef ::std::auto_ptr< ScHTMLGlobalTable >    ScHTMLGlobalTablePtr;
     662             : 
     663             :     rtl::OUStringBuffer maTitle;            /// The title of the document.
     664             :     ScHTMLGlobalTablePtr mxGlobTable;       /// Contains the entire imported document.
     665             :     ScHTMLTable*        mpCurrTable;        /// Pointer to current table (performance).
     666             :     ScHTMLTableId       mnUnusedId;         /// First unused table identifier.
     667             :     bool                mbTitleOn;          /// true = Inside of <title> </title>.
     668             : };
     669             : 
     670             : 
     671             : // ============================================================================
     672             : 
     673             : #endif
     674             : 
     675             : /* vim:set shiftwidth=4 softtabstop=4 expandtab: */

Generated by: LCOV version 1.10