Line data Source code
1 : /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 : /*
3 : * This file is part of the LibreOffice project.
4 : *
5 : * This Source Code Form is subject to the terms of the Mozilla Public
6 : * License, v. 2.0. If a copy of the MPL was not distributed with this
7 : * file, You can obtain one at http://mozilla.org/MPL/2.0/.
8 : *
9 : * This file incorporates work covered by the following license notice:
10 : *
11 : * Licensed to the Apache Software Foundation (ASF) under one or more
12 : * contributor license agreements. See the NOTICE file distributed
13 : * with this work for additional information regarding copyright
14 : * ownership. The ASF licenses this file to you under the Apache
15 : * License, Version 2.0 (the "License"); you may not use this file
16 : * except in compliance with the License. You may obtain a copy of
17 : * the License at http://www.apache.org/licenses/LICENSE-2.0 .
18 : */
19 :
20 : #ifndef SC_HTMLPARS_HXX
21 : #define SC_HTMLPARS_HXX
22 :
23 : #include <memory>
24 : #include <stack>
25 : #include <vector>
26 : #include <list>
27 : #include <map>
28 : #include <o3tl/sorted_vector.hxx>
29 : #include <boost/ptr_container/ptr_map.hpp>
30 : #include <boost/unordered_map.hpp>
31 :
32 : #include "rangelst.hxx"
33 : #include "eeparser.hxx"
34 :
35 : const sal_uInt32 SC_HTML_FONTSIZES = 7; // wie Export, HTML-Options
36 :
37 : // Pixel tolerance for SeekOffset and related.
38 : const sal_uInt16 SC_HTML_OFFSET_TOLERANCE_SMALL = 1; // single table
39 : const sal_uInt16 SC_HTML_OFFSET_TOLERANCE_LARGE = 10; // nested
40 :
41 : // ============================================================================
42 : // BASE class for HTML parser classes
43 : // ============================================================================
44 :
45 : class ScHTMLTable;
46 :
47 : /**
48 : * Collection of HTML style data parsed from the content of <style>
49 : * elements.
50 : */
51 1 : class ScHTMLStyles
52 : {
53 : typedef ::boost::unordered_map<rtl::OUString, rtl::OUString, rtl::OUStringHash> PropsType;
54 : typedef ::boost::ptr_map<rtl::OUString, PropsType> NamePropsType;
55 : typedef ::boost::ptr_map<rtl::OUString, NamePropsType> ElemsType;
56 :
57 : NamePropsType maGlobalProps; /// global properties (for a given class for all elements)
58 : NamePropsType maElemGlobalProps; /// element global properties (no class specified)
59 : ElemsType maElemProps; /// element to class to properties (both element and class are given)
60 : const rtl::OUString maEmpty; /// just a persistent empty string.
61 : public:
62 : ScHTMLStyles();
63 :
64 : void add(const char* pElemName, size_t nElemName, const char* pClassName, size_t nClassName,
65 : const rtl::OUString& aProp, const rtl::OUString& aValue);
66 :
67 : /**
68 : * Find best-matching property value for given element and class names.
69 : */
70 : const rtl::OUString& getPropertyValue(
71 : const rtl::OUString& rElem, const rtl::OUString& rClass, const rtl::OUString& rPropName) const;
72 :
73 : private:
74 : static void insertProp(
75 : NamePropsType& rProps, const rtl::OUString& aName,
76 : const rtl::OUString& aProp, const rtl::OUString& aValue);
77 : };
78 :
79 : /** Base class for HTML parser classes. */
80 : class ScHTMLParser : public ScEEParser
81 : {
82 : ScHTMLStyles maStyles;
83 : protected:
84 : sal_uInt32 maFontHeights[ SC_HTML_FONTSIZES ];
85 : ScDocument* mpDoc; /// The destination document.
86 :
87 : public:
88 : explicit ScHTMLParser( EditEngine* pEditEngine, ScDocument* pDoc );
89 : virtual ~ScHTMLParser();
90 :
91 : virtual sal_uLong Read( SvStream& rStrm, const String& rBaseURL ) = 0;
92 :
93 : ScHTMLStyles& GetStyles();
94 : ScDocument& GetDoc();
95 :
96 : /** Returns the "global table" which contains the entire HTML document. */
97 : virtual const ScHTMLTable* GetGlobalTable() const = 0;
98 : };
99 :
100 :
101 : // ============================================================================
102 :
103 : typedef o3tl::sorted_vector<sal_uLong> ScHTMLColOffset;
104 :
105 : struct ScHTMLTableStackEntry
106 : {
107 : ScRangeListRef xLockedList;
108 : ScEEParseEntry* pCellEntry;
109 : ScHTMLColOffset* pLocalColOffset;
110 : sal_uLong nFirstTableCell;
111 : SCCOL nColCnt;
112 : SCROW nRowCnt;
113 : SCCOL nColCntStart;
114 : SCCOL nMaxCol;
115 : sal_uInt16 nTable;
116 : sal_uInt16 nTableWidth;
117 : sal_uInt16 nColOffset;
118 : sal_uInt16 nColOffsetStart;
119 : bool bFirstRow;
120 0 : ScHTMLTableStackEntry( ScEEParseEntry* pE,
121 : const ScRangeListRef& rL, ScHTMLColOffset* pTO,
122 : sal_uLong nFTC,
123 : SCCOL nCol, SCROW nRow,
124 : SCCOL nStart, SCCOL nMax, sal_uInt16 nTab,
125 : sal_uInt16 nTW, sal_uInt16 nCO, sal_uInt16 nCOS,
126 : bool bFR )
127 : : xLockedList( rL ), pCellEntry( pE ),
128 : pLocalColOffset( pTO ),
129 : nFirstTableCell( nFTC ),
130 : nColCnt( nCol ), nRowCnt( nRow ),
131 : nColCntStart( nStart ), nMaxCol( nMax ),
132 : nTable( nTab ), nTableWidth( nTW ),
133 : nColOffset( nCO ), nColOffsetStart( nCOS ),
134 0 : bFirstRow( bFR )
135 0 : {}
136 0 : ~ScHTMLTableStackEntry() {}
137 : };
138 : typedef ::std::stack< ScHTMLTableStackEntry* > ScHTMLTableStack;
139 :
140 : struct ScHTMLAdjustStackEntry
141 : {
142 : SCCOL nLastCol;
143 : SCROW nNextRow;
144 : SCROW nCurRow;
145 0 : ScHTMLAdjustStackEntry( SCCOL nLCol, SCROW nNRow,
146 : SCROW nCRow )
147 : : nLastCol( nLCol ), nNextRow( nNRow ),
148 0 : nCurRow( nCRow )
149 0 : {}
150 : };
151 : typedef ::std::stack< ScHTMLAdjustStackEntry* > ScHTMLAdjustStack;
152 :
153 :
154 : // ============================================================================
155 :
156 : class EditEngine;
157 : class ScDocument;
158 : class HTMLOption;
159 :
160 : // TODO these need better names
161 : typedef ::std::map<SCROW, SCROW> InnerMap;
162 : typedef ::std::map<sal_uInt16, InnerMap*> OuterMap;
163 :
164 : class ScHTMLLayoutParser : public ScHTMLParser
165 : {
166 : private:
167 : Size aPageSize;
168 : rtl::OUString aBaseURL;
169 : ScHTMLTableStack aTableStack;
170 : rtl::OUString aString;
171 : ScRangeListRef xLockedList; // je Table
172 : OuterMap* pTables;
173 : ScHTMLColOffset* pColOffset;
174 : ScHTMLColOffset* pLocalColOffset; // je Table
175 : sal_uLong nFirstTableCell; // je Table
176 : short nTableLevel;
177 : sal_uInt16 nTable;
178 : sal_uInt16 nMaxTable;
179 : SCCOL nColCntStart; // erste Col je Table
180 : SCCOL nMaxCol; // je Table
181 : sal_uInt16 nTableWidth; // je Table
182 : sal_uInt16 nColOffset; // aktuell, Pixel
183 : sal_uInt16 nColOffsetStart; // Startwert je Table, in Pixel
184 : sal_uInt16 nOffsetTolerance; // for use with SeekOffset and related
185 : bool bTabInTabCell:1;
186 : bool bFirstRow:1; // je Table, ob in erster Zeile
187 : bool bInCell:1;
188 : bool bInTitle:1;
189 :
190 : DECL_LINK( HTMLImportHdl, ImportInfo* );
191 : void NewActEntry( ScEEParseEntry* );
192 : void EntryEnd( ScEEParseEntry*, const ESelection& );
193 : void ProcToken( ImportInfo* );
194 : void CloseEntry( ImportInfo* );
195 : void NextRow( ImportInfo* );
196 : void SkipLocked( ScEEParseEntry*, bool bJoin = true );
197 : static bool SeekOffset( ScHTMLColOffset*, sal_uInt16 nOffset,
198 : SCCOL* pCol, sal_uInt16 nOffsetTol );
199 : static void MakeCol( ScHTMLColOffset*, sal_uInt16& nOffset,
200 : sal_uInt16& nWidth, sal_uInt16 nOffsetTol,
201 : sal_uInt16 nWidthTol );
202 : static void MakeColNoRef( ScHTMLColOffset*, sal_uInt16 nOffset,
203 : sal_uInt16 nWidth, sal_uInt16 nOffsetTol,
204 : sal_uInt16 nWidthTol );
205 : static void ModifyOffset( ScHTMLColOffset*, sal_uInt16& nOldOffset,
206 : sal_uInt16& nNewOffset, sal_uInt16 nOffsetTol );
207 : void Colonize( ScEEParseEntry* );
208 : sal_uInt16 GetWidth( ScEEParseEntry* );
209 : void SetWidths();
210 : void Adjust();
211 :
212 : sal_uInt16 GetWidthPixel( const HTMLOption& );
213 : bool IsAtBeginningOfText( ImportInfo* );
214 :
215 : void TableOn( ImportInfo* );
216 : void ColOn( ImportInfo* );
217 : void TableRowOn( ImportInfo* );
218 : void TableRowOff( ImportInfo* );
219 : void TableDataOn( ImportInfo* );
220 : void TableDataOff( ImportInfo* );
221 : void TableOff( ImportInfo* );
222 : void Image( ImportInfo* );
223 : void AnchorOn( ImportInfo* );
224 : void FontOn( ImportInfo* );
225 :
226 : public:
227 : ScHTMLLayoutParser( EditEngine*, const String& rBaseURL, const Size& aPageSize, ScDocument* );
228 : virtual ~ScHTMLLayoutParser();
229 : virtual sal_uLong Read( SvStream&, const String& rBaseURL );
230 : virtual const ScHTMLTable* GetGlobalTable() const;
231 : };
232 :
233 :
234 :
235 : // ============================================================================
236 : // HTML DATA QUERY PARSER
237 : // ============================================================================
238 :
239 : /** Declares the orientation in or for a table: column or row. */
240 : enum ScHTMLOrient { tdCol = 0 , tdRow = 1 };
241 :
242 : /** Type for a unique identifier for each table. */
243 : typedef sal_uInt16 ScHTMLTableId;
244 : /** Identifier of the "global table" (the entire HTML document). */
245 : const ScHTMLTableId SC_HTML_GLOBAL_TABLE = 0;
246 : /** Used as table index for normal (non-table) entries in ScHTMLEntry structs. */
247 : const ScHTMLTableId SC_HTML_NO_TABLE = 0;
248 :
249 : // ============================================================================
250 :
251 : /** A 2D cell position in an HTML table. */
252 : struct ScHTMLPos
253 : {
254 : SCCOL mnCol;
255 : SCROW mnRow;
256 :
257 4 : inline explicit ScHTMLPos() : mnCol( 0 ), mnRow( 0 ) {}
258 7 : inline explicit ScHTMLPos( SCCOL nCol, SCROW nRow ) :
259 7 : mnCol( nCol ), mnRow( nRow ) {}
260 0 : inline explicit ScHTMLPos( const ScAddress& rAddr ) { Set( rAddr ); }
261 :
262 14 : inline SCCOLROW Get( ScHTMLOrient eOrient ) const
263 14 : { return (eOrient == tdCol) ? mnCol : mnRow; }
264 0 : inline void Set( SCCOL nCol, SCROW nRow )
265 0 : { mnCol = nCol; mnRow = nRow; }
266 0 : inline void Set( const ScAddress& rAddr )
267 0 : { Set( rAddr.Col(), rAddr.Row() ); }
268 : inline void Move( SCsCOL nColDiff, SCsROW nRowDiff )
269 : { mnCol = mnCol + nColDiff; mnRow = mnRow + nRowDiff; }
270 71 : inline ScAddress MakeAddr() const
271 71 : { return ScAddress( mnCol, mnRow, 0 ); }
272 : };
273 :
274 : inline bool operator==( const ScHTMLPos& rPos1, const ScHTMLPos& rPos2 )
275 : {
276 : return (rPos1.mnRow == rPos2.mnRow) && (rPos1.mnCol == rPos2.mnCol);
277 : }
278 :
279 21 : inline bool operator<( const ScHTMLPos& rPos1, const ScHTMLPos& rPos2 )
280 : {
281 21 : return (rPos1.mnRow < rPos2.mnRow) || ((rPos1.mnRow == rPos2.mnRow) && (rPos1.mnCol < rPos2.mnCol));
282 : }
283 :
284 : // ----------------------------------------------------------------------------
285 :
286 : /** A 2D cell size in an HTML table. */
287 : struct ScHTMLSize
288 : {
289 : SCCOL mnCols;
290 : SCROW mnRows;
291 :
292 : inline explicit ScHTMLSize() : mnCols( 0 ), mnRows( 0 ) {}
293 44 : inline explicit ScHTMLSize( SCCOL nCols, SCROW nRows ) :
294 44 : mnCols( nCols ), mnRows( nRows ) {}
295 :
296 : inline SCCOLROW Get( ScHTMLOrient eOrient ) const
297 : { return (eOrient == tdCol) ? mnCols : mnRows; }
298 0 : inline void Set( SCCOL nCols, SCROW nRows )
299 0 : { mnCols = nCols; mnRows = nRows; }
300 : inline void Expand( SCsCOL nColDiff, SCsROW nRowDiff )
301 : { mnCols = mnCols + nColDiff; mnRows = mnRows + nRowDiff; }
302 : };
303 :
304 : inline bool operator==( const ScHTMLSize& rSize1, const ScHTMLSize& rSize2 )
305 : {
306 : return (rSize1.mnRows == rSize2.mnRows) && (rSize1.mnCols == rSize2.mnCols);
307 : }
308 :
309 : // ============================================================================
310 :
311 : /** A single entry containing a line of text or representing a table. */
312 21 : struct ScHTMLEntry : public ScEEParseEntry
313 : {
314 : public:
315 : explicit ScHTMLEntry(
316 : const SfxItemSet& rItemSet,
317 : ScHTMLTableId nTableId = SC_HTML_NO_TABLE );
318 :
319 : /** Returns true, if the selection of the entry is empty. */
320 0 : inline bool IsEmpty() const { return !aSel.HasRange(); }
321 : /** Returns true, if the entry has any content to be imported. */
322 : bool HasContents() const;
323 : /** Returns true, if the entry represents a table. */
324 34 : inline bool IsTable() const { return nTab != SC_HTML_NO_TABLE; }
325 : /** Returns true, if the entry represents a table. */
326 21 : inline ScHTMLTableId GetTableId() const { return nTab; }
327 :
328 : /** Sets or cleares the import always state. */
329 0 : inline void SetImportAlways( bool bSet = true ) { mbImportAlways = bSet; }
330 : /** Sets start point of the entry selection to the start of the import info object. */
331 : void AdjustStart( const ImportInfo& rInfo );
332 : /** Sets end point of the entry selection to the end of the import info object. */
333 : void AdjustEnd( const ImportInfo& rInfo );
334 : /** Deletes leading and trailing empty paragraphs from the entry. */
335 : void Strip( const EditEngine& rEditEngine );
336 :
337 : /** Returns read/write access to the item set of this entry. */
338 0 : inline SfxItemSet& GetItemSet() { return aItemSet; }
339 : /** Returns read-only access to the item set of this entry. */
340 : inline const SfxItemSet& GetItemSet() const { return aItemSet; }
341 :
342 : private:
343 : bool mbImportAlways; /// true = Always import this entry.
344 : };
345 :
346 : // ============================================================================
347 :
348 : /** This struct handles creation of unique table identifiers. */
349 : struct ScHTMLTableAutoId
350 : {
351 : const ScHTMLTableId mnTableId; /// The created unique table identifier.
352 : ScHTMLTableId& mrnUnusedId; /// Reference to global unused identifier variable.
353 :
354 : /** The constructor assigns an unused identifier to member mnTableId. */
355 : explicit ScHTMLTableAutoId( ScHTMLTableId& rnUnusedId );
356 : };
357 :
358 : // ----------------------------------------------------------------------------
359 :
360 : class ScHTMLTableMap;
361 :
362 : /** Stores data for one table in an HTML document.
363 :
364 : This class does the main work for importing an HTML document. It manages
365 : the correct insertion of parse entries into the correct cells and the
366 : creation of nested tables. Recalculation of resulting document size and
367 : position is done recursively in all nested tables.
368 : */
369 : class ScHTMLTable
370 : {
371 : public:
372 : /** Creates a new HTML table without content.
373 : @descr Internally handles a current cell position. This position is
374 : invalid until first calls of RowOn() and DataOn().
375 : @param rParentTable Reference to the parent table that owns this table.
376 : @param bPreFormText true = Table is based on preformatted text (<pre> tag). */
377 : explicit ScHTMLTable(
378 : ScHTMLTable& rParentTable,
379 : const ImportInfo& rInfo,
380 : bool bPreFormText );
381 :
382 : virtual ~ScHTMLTable();
383 :
384 : /** Returns the name of the table, specified in the TABLE tag. */
385 1 : inline const rtl::OUString& GetTableName() const { return maTableName; }
386 : /** Returns the unique identifier of the table. */
387 7 : inline ScHTMLTableId GetTableId() const { return maTableId.mnTableId; }
388 : /** Returns the table size. */
389 : inline const ScHTMLSize& GetSize() const { return maSize; }
390 : /** Returns the cell spanning of the specified cell. */
391 : ScHTMLSize GetSpan( const ScHTMLPos& rCellPos ) const;
392 :
393 : /** Searches in all nested tables for the specified table.
394 : @param nTableId Unique identifier of the table. */
395 : ScHTMLTable* FindNestedTable( ScHTMLTableId nTableId ) const;
396 :
397 : /** Puts the item into the item set of the current entry. */
398 : void PutItem( const SfxPoolItem& rItem );
399 : /** Inserts a text portion into current entry. */
400 : void PutText( const ImportInfo& rInfo );
401 : /** Inserts a new line, if in preformatted text, else does nothing. */
402 : void InsertPara( const ImportInfo& rInfo );
403 :
404 : /** Inserts a line break (<br> tag).
405 : @descr Inserts the current entry regardless if it is empty. */
406 : void BreakOn();
407 : /** Inserts a heading line (<p> and <h*> tags). */
408 : void HeadingOn();
409 : /** Processes a hyperlink (<a> tag). */
410 : void AnchorOn();
411 :
412 : /** Starts a *new* table nested in this table (<table> tag).
413 : @return Pointer to the new table. */
414 : ScHTMLTable* TableOn( const ImportInfo& rInfo );
415 : /** Closes *this* table (</table> tag).
416 : @return Pointer to the parent table. */
417 : ScHTMLTable* TableOff( const ImportInfo& rInfo );
418 : /** Starts a *new* table based on preformatted text (<pre> tag).
419 : @return Pointer to the new table. */
420 : ScHTMLTable* PreOn( const ImportInfo& rInfo );
421 : /** Closes *this* table based on preformatted text (</pre> tag).
422 : @return Pointer to the parent table. */
423 : ScHTMLTable* PreOff( const ImportInfo& rInfo );
424 :
425 : /** Starts next row (<tr> tag).
426 : @descr Cell address is invalid until first call of DataOn(). */
427 : void RowOn( const ImportInfo& rInfo );
428 : /** Closes the current row (<tr> tag).
429 : @descr Cell address is invalid until call of RowOn() and DataOn(). */
430 : void RowOff( const ImportInfo& rInfo );
431 : /** Starts the next cell (<td> or <th> tag). */
432 : void DataOn( const ImportInfo& rInfo );
433 : /** Closes the current cell (</td> or </th> tag).
434 : @descr Cell address is invalid until next call of DataOn(). */
435 : void DataOff( const ImportInfo& rInfo );
436 :
437 : /** Starts the body of the HTML document (<body> tag). */
438 : void BodyOn( const ImportInfo& rInfo );
439 : /** Closes the body of the HTML document (</body> tag). */
440 : void BodyOff( const ImportInfo& rInfo );
441 :
442 : /** Closes *this* table (</table> tag) or preformatted text (</pre> tag).
443 : @descr Used to close this table object regardless on opening tag type.
444 : @return Pointer to the parent table, or this, if no parent found. */
445 : ScHTMLTable* CloseTable( const ImportInfo& rInfo );
446 :
447 : /** Returns the resulting document row/column count of the specified HTML row/column. */
448 : SCCOLROW GetDocSize( ScHTMLOrient eOrient, SCCOLROW nCellPos ) const;
449 : /** Returns the resulting document row/column count in the half-open range [nCellBegin, nCellEnd). */
450 : SCCOLROW GetDocSize( ScHTMLOrient eOrient, SCCOLROW nCellBegin, SCCOLROW nCellEnd ) const;
451 : /** Returns the total document row/column count in the specified direction. */
452 : SCCOLROW GetDocSize( ScHTMLOrient eOrient ) const;
453 : /** Returns the total document row/column count of the specified HTML cell. */
454 : ScHTMLSize GetDocSize( const ScHTMLPos& rCellPos ) const;
455 :
456 : /** Returns the resulting Calc position of the top left edge of the table. */
457 1 : inline const ScHTMLPos& GetDocPos() const { return maDocBasePos; }
458 : /** Calculates the resulting Calc position of the specified HTML column/row. */
459 : SCCOLROW GetDocPos( ScHTMLOrient eOrient, SCCOLROW nCellPos = 0 ) const;
460 : /** Calculates the resulting Calc position of the specified HTML cell. */
461 : ScHTMLPos GetDocPos( const ScHTMLPos& rCellPos ) const;
462 :
463 : /** Calculates the current Calc document area of this table. */
464 : void GetDocRange( ScRange& rRange ) const;
465 :
466 : /** Applies border formatting to the passed document. */
467 : void ApplyCellBorders( ScDocument* pDoc, const ScAddress& rFirstPos ) const;
468 :
469 : SvNumberFormatter* GetFormatTable();
470 :
471 : protected:
472 : /** Creates a new HTML table without parent.
473 : @descr This constructor is used to create the "global table". */
474 : explicit ScHTMLTable(
475 : SfxItemPool& rPool,
476 : EditEngine& rEditEngine,
477 : ::std::vector< ScEEParseEntry* >& rEEParseList,
478 : ScHTMLTableId& rnUnusedId, ScHTMLParser* pParser );
479 :
480 : /** Fills all empty cells in this and nested tables with dummy parse entries. */
481 : void FillEmptyCells();
482 : /** Recalculates the size of all columns/rows in the table, regarding nested tables. */
483 : void RecalcDocSize();
484 : /** Recalculates the position of all cell entries and nested tables.
485 : @param rBasePos The origin of the table in the Calc document. */
486 : void RecalcDocPos( const ScHTMLPos& rBasePos );
487 :
488 : private:
489 : typedef ::std::auto_ptr< ScHTMLTableMap > ScHTMLTableMapPtr;
490 : typedef ::std::auto_ptr< SfxItemSet > SfxItemSetPtr;
491 : typedef ::std::vector< SCCOLROW > ScSizeVec;
492 : typedef ::std::list< ScHTMLEntry* > ScHTMLEntryList;
493 : typedef ::std::map< ScHTMLPos, ScHTMLEntryList > ScHTMLEntryMap;
494 : typedef ::std::auto_ptr< ScHTMLEntry > ScHTMLEntryPtr;
495 :
496 : /** Returns true, if the current cell does not contain an entry yet. */
497 : bool IsEmptyCell() const;
498 : /** Returns the item set from cell, row, or table, depending on current state. */
499 : const SfxItemSet& GetCurrItemSet() const;
500 :
501 : /** Returns true, if import info represents a space character. */
502 : static bool IsSpaceCharInfo( const ImportInfo& rInfo );
503 :
504 : /** Creates and returns a new empty flying entry at position (0,0). */
505 : ScHTMLEntryPtr CreateEntry() const;
506 : /** Creates a new flying entry.
507 : @param rInfo Contains the initial edit engine selection for the entry. */
508 : void CreateNewEntry( const ImportInfo& rInfo );
509 :
510 : /** Inserts an empty line in front of the next entry. */
511 : void InsertLeadingEmptyLine();
512 :
513 : /** Pushes the passed entry into the list of the current cell. */
514 : void ImplPushEntryToList( ScHTMLEntryList& rEntryList, ScHTMLEntryPtr& rxEntry );
515 : /** Tries to insert the entry into the current cell.
516 : @descr If insertion is not possible (i.e., currently no cell open), the
517 : entry will be inserted into the parent table.
518 : @return true = Entry as been pushed into the current cell; false = Entry dropped. */
519 : bool PushEntry( ScHTMLEntryPtr& rxEntry );
520 : /** Puts the current entry into the entry list, if it is not empty.
521 : @param rInfo The import info struct containing the end position of the current entry.
522 : @param bLastInCell true = If cell is still empty, put this entry always.
523 : @return true = Entry as been pushed into the current cell; false = Entry dropped. */
524 : bool PushEntry( const ImportInfo& rInfo, bool bLastInCell = false );
525 : /** Pushes a new entry into current cell which references a nested table.
526 : @return true = Entry as been pushed into the current cell; false = Entry dropped. */
527 : bool PushTableEntry( ScHTMLTableId nTableId );
528 :
529 : /** Tries to find a table from the table container.
530 : @descr Assumes that the table is located in the current container or
531 : that the passed table identifier is 0.
532 : @param nTableId Unique identifier of the table or 0. */
533 : ScHTMLTable* GetExistingTable( ScHTMLTableId nTableId ) const;
534 : /** Inserts a nested table in the current cell at the specified position.
535 : @param bPreFormText true = New table is based on preformatted text (<pre> tag). */
536 : ScHTMLTable* InsertNestedTable( const ImportInfo& rInfo, bool bPreFormText );
537 :
538 : /** Inserts a new cell in an unused position, starting from current cell position. */
539 : void InsertNewCell( const ScHTMLSize& rSpanSize );
540 :
541 : /** Set internal states for a new table row. */
542 : void ImplRowOn();
543 : /** Set internal states for leaving a table row. */
544 : void ImplRowOff();
545 : /** Set internal states for entering a new table cell. */
546 : void ImplDataOn( const ScHTMLSize& rSpanSize );
547 : /** Set internal states for leaving a table cell. */
548 : void ImplDataOff();
549 :
550 : /** Inserts additional formatting options from import info into the item set. */
551 : void ProcessFormatOptions( SfxItemSet& rItemSet, const ImportInfo& rInfo );
552 :
553 : /** Updates the document column/row size of the specified column or row.
554 : @descr Only increases the present count, never decreases. */
555 : void SetDocSize( ScHTMLOrient eOrient, SCCOLROW nCellPos, SCCOLROW nSize );
556 : /** Calculates and sets the resulting size the cell needs in the document.
557 : @descr Reduces the needed size in merged cells.
558 : @param nCellPos The first column/row position of the (merged) cell.
559 : @param nCellSpan The cell spanning in the specified orientation.
560 : @param nRealDocSize The raw document size of all entries of the cell. */
561 : void CalcNeededDocSize(
562 : ScHTMLOrient eOrient, SCCOLROW nCellPos,
563 : SCCOLROW nCellSpan, SCCOLROW nRealDocSize );
564 :
565 : private:
566 : ScHTMLTable* mpParentTable; /// Pointer to parent table.
567 : ScHTMLTableMapPtr mxNestedTables; /// Table of nested HTML tables.
568 : rtl::OUString maTableName; /// Table name from <table id> option.
569 : ScHTMLTableAutoId maTableId; /// Unique identifier of this table.
570 : SfxItemSet maTableItemSet; /// Items for the entire table.
571 : SfxItemSetPtr mxRowItemSet; /// Items for the current table row.
572 : SfxItemSetPtr mxDataItemSet; /// Items for the current cell.
573 : ScRangeList maHMergedCells; /// List of all horizontally merged cells.
574 : ScRangeList maVMergedCells; /// List of all vertically merged cells.
575 : ScRangeList maUsedCells; /// List of all used cells.
576 : EditEngine& mrEditEngine; /// Edit engine (from ScEEParser).
577 : ::std::vector< ScEEParseEntry* >& mrEEParseList; /// List that owns the parse entries (from ScEEParser).
578 : ScHTMLEntryMap maEntryMap; /// List of entries for each cell.
579 : ScHTMLEntryList* mpCurrEntryList; /// Current entry list from map for faster access.
580 : ScHTMLEntryPtr mxCurrEntry; /// Working entry, not yet inserted in a list.
581 : ScSizeVec maCumSizes[ 2 ]; /// Cumulated cell counts for each HTML table column/row.
582 : ScHTMLSize maSize; /// Size of the table.
583 : ScHTMLPos maCurrCell; /// Address of current cell to fill.
584 : ScHTMLPos maDocBasePos; /// Resulting base address in a Calc document.
585 : ScHTMLParser* mpParser;
586 : bool mbBorderOn:1; /// true = Table borders on.
587 : bool mbPreFormText:1; /// true = Table from preformatted text (<pre> tag).
588 : bool mbRowOn:1; /// true = Inside of <tr> </tr>.
589 : bool mbDataOn:1; /// true = Inside of <td> </td> or <th> </th>.
590 : bool mbPushEmptyLine:1; /// true = Insert empty line before current entry.
591 : };
592 :
593 : // ----------------------------------------------------------------------------
594 :
595 : /** The "global table" representing the entire HTML document. */
596 : class ScHTMLGlobalTable : public ScHTMLTable
597 : {
598 : public:
599 : explicit ScHTMLGlobalTable(
600 : SfxItemPool& rPool,
601 : EditEngine& rEditEngine,
602 : ::std::vector< ScEEParseEntry* >& rEEParseList,
603 : ScHTMLTableId& rnUnusedId, ScHTMLParser* pParser );
604 :
605 : virtual ~ScHTMLGlobalTable();
606 :
607 : /** Recalculates sizes and resulting positions of all document entries. */
608 : void Recalc();
609 : };
610 :
611 : // ============================================================================
612 :
613 : /** The HTML parser for data queries. Focuses on data import, not on layout.
614 :
615 : Builds the table structure correctly, ignores extended formatting like
616 : pictures or column widths.
617 : */
618 : class ScHTMLQueryParser : public ScHTMLParser
619 : {
620 : public:
621 : explicit ScHTMLQueryParser( EditEngine* pEditEngine, ScDocument* pDoc );
622 : virtual ~ScHTMLQueryParser();
623 :
624 : virtual sal_uLong Read( SvStream& rStrm, const String& rBaseURL );
625 :
626 : /** Returns the "global table" which contains the entire HTML document. */
627 : virtual const ScHTMLTable* GetGlobalTable() const;
628 :
629 : private:
630 : /** Handles all possible tags in the HTML document. */
631 : void ProcessToken( const ImportInfo& rInfo );
632 : /** Inserts a text portion into current entry. */
633 : void InsertText( const ImportInfo& rInfo );
634 : /** Processes the <font> tag. */
635 : void FontOn( const ImportInfo& rInfo );
636 :
637 : /** Processes the <meta> tag. */
638 : void MetaOn( const ImportInfo& rInfo );
639 : /** Opens the title of the HTML document (<title> tag). */
640 : void TitleOn( const ImportInfo& rInfo );
641 : /** Closes the title of the HTML document (</title> tag). */
642 : void TitleOff( const ImportInfo& rInfo );
643 :
644 : /** Opens a new table at the current position. */
645 : void TableOn( const ImportInfo& rInfo );
646 : /** Closes the current table. */
647 : void TableOff( const ImportInfo& rInfo );
648 : /** Opens a new table based on preformatted text. */
649 : void PreOn( const ImportInfo& rInfo );
650 : /** Closes the current preformatted text table. */
651 : void PreOff( const ImportInfo& rInfo );
652 :
653 : /** Closes the current table, regardless on opening tag. */
654 : void CloseTable( const ImportInfo& rInfo );
655 :
656 : void ParseStyle(const rtl::OUString& rStrm);
657 :
658 : DECL_LINK( HTMLImportHdl, const ImportInfo* );
659 :
660 : private:
661 : typedef ::std::auto_ptr< ScHTMLGlobalTable > ScHTMLGlobalTablePtr;
662 :
663 : rtl::OUStringBuffer maTitle; /// The title of the document.
664 : ScHTMLGlobalTablePtr mxGlobTable; /// Contains the entire imported document.
665 : ScHTMLTable* mpCurrTable; /// Pointer to current table (performance).
666 : ScHTMLTableId mnUnusedId; /// First unused table identifier.
667 : bool mbTitleOn; /// true = Inside of <title> </title>.
668 : };
669 :
670 :
671 : // ============================================================================
672 :
673 : #endif
674 :
675 : /* vim:set shiftwidth=4 softtabstop=4 expandtab: */
|