Line data Source code
1 : /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 : /*
3 : * This file is part of the LibreOffice project.
4 : *
5 : * This Source Code Form is subject to the terms of the Mozilla Public
6 : * License, v. 2.0. If a copy of the MPL was not distributed with this
7 : * file, You can obtain one at http://mozilla.org/MPL/2.0/.
8 : *
9 : * This file incorporates work covered by the following license notice:
10 : *
11 : * Licensed to the Apache Software Foundation (ASF) under one or more
12 : * contributor license agreements. See the NOTICE file distributed
13 : * with this work for additional information regarding copyright
14 : * ownership. The ASF licenses this file to you under the Apache
15 : * License, Version 2.0 (the "License"); you may not use this file
16 : * except in compliance with the License. You may obtain a copy of
17 : * the License at http://www.apache.org/licenses/LICENSE-2.0 .
18 : */
19 :
20 : #ifndef SC_HTMLPARS_HXX
21 : #define SC_HTMLPARS_HXX
22 :
23 : #include <memory>
24 : #include <stack>
25 : #include <vector>
26 : #include <list>
27 : #include <map>
28 : #include <o3tl/sorted_vector.hxx>
29 : #include <boost/ptr_container/ptr_map.hpp>
30 : #include <boost/unordered_map.hpp>
31 :
32 : #include "rangelst.hxx"
33 : #include "eeparser.hxx"
34 :
35 : const sal_uInt32 SC_HTML_FONTSIZES = 7; // wie Export, HTML-Options
36 :
37 : // Pixel tolerance for SeekOffset and related.
38 : const sal_uInt16 SC_HTML_OFFSET_TOLERANCE_SMALL = 1; // single table
39 : const sal_uInt16 SC_HTML_OFFSET_TOLERANCE_LARGE = 10; // nested
40 :
41 :
42 : // BASE class for HTML parser classes
43 :
44 :
45 : class ScHTMLTable;
46 :
47 : /**
48 : * Collection of HTML style data parsed from the content of <style>
49 : * elements.
50 : */
51 0 : class ScHTMLStyles
52 : {
53 : typedef ::boost::unordered_map<OUString, OUString, OUStringHash> PropsType;
54 : typedef ::boost::ptr_map<OUString, PropsType> NamePropsType;
55 : typedef ::boost::ptr_map<OUString, NamePropsType> ElemsType;
56 :
57 : NamePropsType maGlobalProps; /// global properties (for a given class for all elements)
58 : NamePropsType maElemGlobalProps; /// element global properties (no class specified)
59 : ElemsType maElemProps; /// element to class to properties (both element and class are given)
60 : const OUString maEmpty; /// just a persistent empty string.
61 : public:
62 : ScHTMLStyles();
63 :
64 : void add(const char* pElemName, size_t nElemName, const char* pClassName, size_t nClassName,
65 : const OUString& aProp, const OUString& aValue);
66 :
67 : /**
68 : * Find best-matching property value for given element and class names.
69 : */
70 : const OUString& getPropertyValue(
71 : const OUString& rElem, const OUString& rClass, const OUString& rPropName) const;
72 :
73 : private:
74 : static void insertProp(
75 : NamePropsType& rProps, const OUString& aName,
76 : const OUString& aProp, const OUString& aValue);
77 : };
78 :
79 : /** Base class for HTML parser classes. */
80 : class ScHTMLParser : public ScEEParser
81 : {
82 : ScHTMLStyles maStyles;
83 : protected:
84 : sal_uInt32 maFontHeights[ SC_HTML_FONTSIZES ];
85 : ScDocument* mpDoc; /// The destination document.
86 :
87 : public:
88 : explicit ScHTMLParser( EditEngine* pEditEngine, ScDocument* pDoc );
89 : virtual ~ScHTMLParser();
90 :
91 : virtual sal_uLong Read( SvStream& rStrm, const OUString& rBaseURL ) SAL_OVERRIDE = 0;
92 :
93 : ScHTMLStyles& GetStyles();
94 : ScDocument& GetDoc();
95 :
96 : /** Returns the "global table" which contains the entire HTML document. */
97 : virtual const ScHTMLTable* GetGlobalTable() const = 0;
98 : };
99 :
100 :
101 : typedef o3tl::sorted_vector<sal_uLong> ScHTMLColOffset;
102 :
103 : struct ScHTMLTableStackEntry
104 : {
105 : ScRangeListRef xLockedList;
106 : ScEEParseEntry* pCellEntry;
107 : ScHTMLColOffset* pLocalColOffset;
108 : sal_uLong nFirstTableCell;
109 : SCCOL nColCnt;
110 : SCROW nRowCnt;
111 : SCCOL nColCntStart;
112 : SCCOL nMaxCol;
113 : sal_uInt16 nTable;
114 : sal_uInt16 nTableWidth;
115 : sal_uInt16 nColOffset;
116 : sal_uInt16 nColOffsetStart;
117 : bool bFirstRow;
118 0 : ScHTMLTableStackEntry( ScEEParseEntry* pE,
119 : const ScRangeListRef& rL, ScHTMLColOffset* pTO,
120 : sal_uLong nFTC,
121 : SCCOL nCol, SCROW nRow,
122 : SCCOL nStart, SCCOL nMax, sal_uInt16 nTab,
123 : sal_uInt16 nTW, sal_uInt16 nCO, sal_uInt16 nCOS,
124 : bool bFR )
125 : : xLockedList( rL ), pCellEntry( pE ),
126 : pLocalColOffset( pTO ),
127 : nFirstTableCell( nFTC ),
128 : nColCnt( nCol ), nRowCnt( nRow ),
129 : nColCntStart( nStart ), nMaxCol( nMax ),
130 : nTable( nTab ), nTableWidth( nTW ),
131 : nColOffset( nCO ), nColOffsetStart( nCOS ),
132 0 : bFirstRow( bFR )
133 0 : {}
134 0 : ~ScHTMLTableStackEntry() {}
135 : };
136 : typedef ::std::stack< ScHTMLTableStackEntry* > ScHTMLTableStack;
137 :
138 : struct ScHTMLAdjustStackEntry
139 : {
140 : SCCOL nLastCol;
141 : SCROW nNextRow;
142 : SCROW nCurRow;
143 0 : ScHTMLAdjustStackEntry( SCCOL nLCol, SCROW nNRow,
144 : SCROW nCRow )
145 : : nLastCol( nLCol ), nNextRow( nNRow ),
146 0 : nCurRow( nCRow )
147 0 : {}
148 : };
149 : typedef ::std::stack< ScHTMLAdjustStackEntry* > ScHTMLAdjustStack;
150 :
151 :
152 : class EditEngine;
153 : class ScDocument;
154 : class HTMLOption;
155 :
156 : // TODO these need better names
157 : typedef ::std::map<SCROW, SCROW> InnerMap;
158 : typedef ::std::map<sal_uInt16, InnerMap*> OuterMap;
159 :
160 : class ScHTMLLayoutParser : public ScHTMLParser
161 : {
162 : private:
163 : Size aPageSize;
164 : OUString aBaseURL;
165 : ScHTMLTableStack aTableStack;
166 : OUString aString;
167 : ScRangeListRef xLockedList; // je Table
168 : OuterMap* pTables;
169 : ScHTMLColOffset* pColOffset;
170 : ScHTMLColOffset* pLocalColOffset; // je Table
171 : sal_uLong nFirstTableCell; // je Table
172 : short nTableLevel;
173 : sal_uInt16 nTable;
174 : sal_uInt16 nMaxTable;
175 : SCCOL nColCntStart; // erste Col je Table
176 : SCCOL nMaxCol; // je Table
177 : sal_uInt16 nTableWidth; // je Table
178 : sal_uInt16 nColOffset; // aktuell, Pixel
179 : sal_uInt16 nColOffsetStart; // Startwert je Table, in Pixel
180 : sal_uInt16 nOffsetTolerance; // for use with SeekOffset and related
181 : bool bTabInTabCell:1;
182 : bool bFirstRow:1; // je Table, ob in erster Zeile
183 : bool bInCell:1;
184 : bool bInTitle:1;
185 :
186 : DECL_LINK( HTMLImportHdl, ImportInfo* );
187 : void NewActEntry( ScEEParseEntry* );
188 : void EntryEnd( ScEEParseEntry*, const ESelection& );
189 : void ProcToken( ImportInfo* );
190 : void CloseEntry( ImportInfo* );
191 : void NextRow( ImportInfo* );
192 : void SkipLocked( ScEEParseEntry*, bool bJoin = true );
193 : static bool SeekOffset( ScHTMLColOffset*, sal_uInt16 nOffset,
194 : SCCOL* pCol, sal_uInt16 nOffsetTol );
195 : static void MakeCol( ScHTMLColOffset*, sal_uInt16& nOffset,
196 : sal_uInt16& nWidth, sal_uInt16 nOffsetTol,
197 : sal_uInt16 nWidthTol );
198 : static void MakeColNoRef( ScHTMLColOffset*, sal_uInt16 nOffset,
199 : sal_uInt16 nWidth, sal_uInt16 nOffsetTol,
200 : sal_uInt16 nWidthTol );
201 : static void ModifyOffset( ScHTMLColOffset*, sal_uInt16& nOldOffset,
202 : sal_uInt16& nNewOffset, sal_uInt16 nOffsetTol );
203 : void Colonize( ScEEParseEntry* );
204 : sal_uInt16 GetWidth( ScEEParseEntry* );
205 : void SetWidths();
206 : void Adjust();
207 :
208 : sal_uInt16 GetWidthPixel( const HTMLOption& );
209 : bool IsAtBeginningOfText( ImportInfo* );
210 :
211 : void TableOn( ImportInfo* );
212 : void ColOn( ImportInfo* );
213 : void TableRowOn( ImportInfo* );
214 : void TableRowOff( ImportInfo* );
215 : void TableDataOn( ImportInfo* );
216 : void TableDataOff( ImportInfo* );
217 : void TableOff( ImportInfo* );
218 : void Image( ImportInfo* );
219 : void AnchorOn( ImportInfo* );
220 : void FontOn( ImportInfo* );
221 :
222 : public:
223 : ScHTMLLayoutParser( EditEngine*, const OUString& rBaseURL, const Size& aPageSize, ScDocument* );
224 : virtual ~ScHTMLLayoutParser();
225 : virtual sal_uLong Read( SvStream&, const OUString& rBaseURL ) SAL_OVERRIDE;
226 : virtual const ScHTMLTable* GetGlobalTable() const SAL_OVERRIDE;
227 : };
228 :
229 :
230 : // HTML DATA QUERY PARSER
231 :
232 :
233 : /** Declares the orientation in or for a table: column or row. */
234 : enum ScHTMLOrient { tdCol = 0 , tdRow = 1 };
235 :
236 : /** Type for a unique identifier for each table. */
237 : typedef sal_uInt16 ScHTMLTableId;
238 : /** Identifier of the "global table" (the entire HTML document). */
239 : const ScHTMLTableId SC_HTML_GLOBAL_TABLE = 0;
240 : /** Used as table index for normal (non-table) entries in ScHTMLEntry structs. */
241 : const ScHTMLTableId SC_HTML_NO_TABLE = 0;
242 :
243 : /** A 2D cell position in an HTML table. */
244 : struct ScHTMLPos
245 : {
246 : SCCOL mnCol;
247 : SCROW mnRow;
248 :
249 0 : inline explicit ScHTMLPos() : mnCol( 0 ), mnRow( 0 ) {}
250 0 : inline explicit ScHTMLPos( SCCOL nCol, SCROW nRow ) :
251 0 : mnCol( nCol ), mnRow( nRow ) {}
252 0 : inline explicit ScHTMLPos( const ScAddress& rAddr ) { Set( rAddr ); }
253 :
254 0 : inline SCCOLROW Get( ScHTMLOrient eOrient ) const
255 0 : { return (eOrient == tdCol) ? mnCol : mnRow; }
256 0 : inline void Set( SCCOL nCol, SCROW nRow )
257 0 : { mnCol = nCol; mnRow = nRow; }
258 0 : inline void Set( const ScAddress& rAddr )
259 0 : { Set( rAddr.Col(), rAddr.Row() ); }
260 : inline void Move( SCsCOL nColDiff, SCsROW nRowDiff )
261 : { mnCol = mnCol + nColDiff; mnRow = mnRow + nRowDiff; }
262 0 : inline ScAddress MakeAddr() const
263 0 : { return ScAddress( mnCol, mnRow, 0 ); }
264 : };
265 :
266 : inline bool operator==( const ScHTMLPos& rPos1, const ScHTMLPos& rPos2 )
267 : {
268 : return (rPos1.mnRow == rPos2.mnRow) && (rPos1.mnCol == rPos2.mnCol);
269 : }
270 :
271 0 : inline bool operator<( const ScHTMLPos& rPos1, const ScHTMLPos& rPos2 )
272 : {
273 0 : return (rPos1.mnRow < rPos2.mnRow) || ((rPos1.mnRow == rPos2.mnRow) && (rPos1.mnCol < rPos2.mnCol));
274 : }
275 :
276 : /** A 2D cell size in an HTML table. */
277 : struct ScHTMLSize
278 : {
279 : SCCOL mnCols;
280 : SCROW mnRows;
281 :
282 : inline explicit ScHTMLSize() : mnCols( 0 ), mnRows( 0 ) {}
283 0 : inline explicit ScHTMLSize( SCCOL nCols, SCROW nRows ) :
284 0 : mnCols( nCols ), mnRows( nRows ) {}
285 :
286 : inline SCCOLROW Get( ScHTMLOrient eOrient ) const
287 : { return (eOrient == tdCol) ? mnCols : mnRows; }
288 0 : inline void Set( SCCOL nCols, SCROW nRows )
289 0 : { mnCols = nCols; mnRows = nRows; }
290 : inline void Expand( SCsCOL nColDiff, SCsROW nRowDiff )
291 : { mnCols = mnCols + nColDiff; mnRows = mnRows + nRowDiff; }
292 : };
293 :
294 : inline bool operator==( const ScHTMLSize& rSize1, const ScHTMLSize& rSize2 )
295 : {
296 : return (rSize1.mnRows == rSize2.mnRows) && (rSize1.mnCols == rSize2.mnCols);
297 : }
298 :
299 : /** A single entry containing a line of text or representing a table. */
300 0 : struct ScHTMLEntry : public ScEEParseEntry
301 : {
302 : public:
303 : explicit ScHTMLEntry(
304 : const SfxItemSet& rItemSet,
305 : ScHTMLTableId nTableId = SC_HTML_NO_TABLE );
306 :
307 : /** Returns true, if the selection of the entry is empty. */
308 0 : inline bool IsEmpty() const { return !aSel.HasRange(); }
309 : /** Returns true, if the entry has any content to be imported. */
310 : bool HasContents() const;
311 : /** Returns true, if the entry represents a table. */
312 0 : inline bool IsTable() const { return nTab != SC_HTML_NO_TABLE; }
313 : /** Returns true, if the entry represents a table. */
314 0 : inline ScHTMLTableId GetTableId() const { return nTab; }
315 :
316 : /** Sets or cleares the import always state. */
317 0 : inline void SetImportAlways( bool bSet = true ) { mbImportAlways = bSet; }
318 : /** Sets start point of the entry selection to the start of the import info object. */
319 : void AdjustStart( const ImportInfo& rInfo );
320 : /** Sets end point of the entry selection to the end of the import info object. */
321 : void AdjustEnd( const ImportInfo& rInfo );
322 : /** Deletes leading and trailing empty paragraphs from the entry. */
323 : void Strip( const EditEngine& rEditEngine );
324 :
325 : /** Returns read/write access to the item set of this entry. */
326 0 : inline SfxItemSet& GetItemSet() { return aItemSet; }
327 : /** Returns read-only access to the item set of this entry. */
328 : inline const SfxItemSet& GetItemSet() const { return aItemSet; }
329 :
330 : private:
331 : bool mbImportAlways; /// true = Always import this entry.
332 : };
333 :
334 : /** This struct handles creation of unique table identifiers. */
335 : struct ScHTMLTableAutoId
336 : {
337 : const ScHTMLTableId mnTableId; /// The created unique table identifier.
338 : ScHTMLTableId& mrnUnusedId; /// Reference to global unused identifier variable.
339 :
340 : /** The constructor assigns an unused identifier to member mnTableId. */
341 : explicit ScHTMLTableAutoId( ScHTMLTableId& rnUnusedId );
342 : };
343 :
344 : class ScHTMLTableMap;
345 :
346 : /** Stores data for one table in an HTML document.
347 :
348 : This class does the main work for importing an HTML document. It manages
349 : the correct insertion of parse entries into the correct cells and the
350 : creation of nested tables. Recalculation of resulting document size and
351 : position is done recursively in all nested tables.
352 : */
353 : class ScHTMLTable
354 : {
355 : public:
356 : /** Creates a new HTML table without content.
357 : @descr Internally handles a current cell position. This position is
358 : invalid until first calls of RowOn() and DataOn().
359 : @param rParentTable Reference to the parent table that owns this table.
360 : @param bPreFormText true = Table is based on preformatted text (<pre> tag). */
361 : explicit ScHTMLTable(
362 : ScHTMLTable& rParentTable,
363 : const ImportInfo& rInfo,
364 : bool bPreFormText );
365 :
366 : virtual ~ScHTMLTable();
367 :
368 : /** Returns the name of the table, specified in the TABLE tag. */
369 0 : inline const OUString& GetTableName() const { return maTableName; }
370 : /** Returns the unique identifier of the table. */
371 0 : inline ScHTMLTableId GetTableId() const { return maTableId.mnTableId; }
372 : /** Returns the table size. */
373 : inline const ScHTMLSize& GetSize() const { return maSize; }
374 : /** Returns the cell spanning of the specified cell. */
375 : ScHTMLSize GetSpan( const ScHTMLPos& rCellPos ) const;
376 :
377 : /** Searches in all nested tables for the specified table.
378 : @param nTableId Unique identifier of the table. */
379 : ScHTMLTable* FindNestedTable( ScHTMLTableId nTableId ) const;
380 :
381 : /** Puts the item into the item set of the current entry. */
382 : void PutItem( const SfxPoolItem& rItem );
383 : /** Inserts a text portion into current entry. */
384 : void PutText( const ImportInfo& rInfo );
385 : /** Inserts a new line, if in preformatted text, else does nothing. */
386 : void InsertPara( const ImportInfo& rInfo );
387 :
388 : /** Inserts a line break (<br> tag).
389 : @descr Inserts the current entry regardless if it is empty. */
390 : void BreakOn();
391 : /** Inserts a heading line (<p> and <h*> tags). */
392 : void HeadingOn();
393 : /** Processes a hyperlink (<a> tag). */
394 : void AnchorOn();
395 :
396 : /** Starts a *new* table nested in this table (<table> tag).
397 : @return Pointer to the new table. */
398 : ScHTMLTable* TableOn( const ImportInfo& rInfo );
399 : /** Closes *this* table (</table> tag).
400 : @return Pointer to the parent table. */
401 : ScHTMLTable* TableOff( const ImportInfo& rInfo );
402 : /** Starts a *new* table based on preformatted text (<pre> tag).
403 : @return Pointer to the new table. */
404 : ScHTMLTable* PreOn( const ImportInfo& rInfo );
405 : /** Closes *this* table based on preformatted text (</pre> tag).
406 : @return Pointer to the parent table. */
407 : ScHTMLTable* PreOff( const ImportInfo& rInfo );
408 :
409 : /** Starts next row (<tr> tag).
410 : @descr Cell address is invalid until first call of DataOn(). */
411 : void RowOn( const ImportInfo& rInfo );
412 : /** Closes the current row (<tr> tag).
413 : @descr Cell address is invalid until call of RowOn() and DataOn(). */
414 : void RowOff( const ImportInfo& rInfo );
415 : /** Starts the next cell (<td> or <th> tag). */
416 : void DataOn( const ImportInfo& rInfo );
417 : /** Closes the current cell (</td> or </th> tag).
418 : @descr Cell address is invalid until next call of DataOn(). */
419 : void DataOff( const ImportInfo& rInfo );
420 :
421 : /** Starts the body of the HTML document (<body> tag). */
422 : void BodyOn( const ImportInfo& rInfo );
423 : /** Closes the body of the HTML document (</body> tag). */
424 : void BodyOff( const ImportInfo& rInfo );
425 :
426 : /** Closes *this* table (</table> tag) or preformatted text (</pre> tag).
427 : @descr Used to close this table object regardless on opening tag type.
428 : @return Pointer to the parent table, or this, if no parent found. */
429 : ScHTMLTable* CloseTable( const ImportInfo& rInfo );
430 :
431 : /** Returns the resulting document row/column count of the specified HTML row/column. */
432 : SCCOLROW GetDocSize( ScHTMLOrient eOrient, SCCOLROW nCellPos ) const;
433 : /** Returns the resulting document row/column count in the half-open range [nCellBegin, nCellEnd). */
434 : SCCOLROW GetDocSize( ScHTMLOrient eOrient, SCCOLROW nCellBegin, SCCOLROW nCellEnd ) const;
435 : /** Returns the total document row/column count in the specified direction. */
436 : SCCOLROW GetDocSize( ScHTMLOrient eOrient ) const;
437 : /** Returns the total document row/column count of the specified HTML cell. */
438 : ScHTMLSize GetDocSize( const ScHTMLPos& rCellPos ) const;
439 :
440 : /** Returns the resulting Calc position of the top left edge of the table. */
441 0 : inline const ScHTMLPos& GetDocPos() const { return maDocBasePos; }
442 : /** Calculates the resulting Calc position of the specified HTML column/row. */
443 : SCCOLROW GetDocPos( ScHTMLOrient eOrient, SCCOLROW nCellPos = 0 ) const;
444 : /** Calculates the resulting Calc position of the specified HTML cell. */
445 : ScHTMLPos GetDocPos( const ScHTMLPos& rCellPos ) const;
446 :
447 : /** Calculates the current Calc document area of this table. */
448 : void GetDocRange( ScRange& rRange ) const;
449 :
450 : /** Applies border formatting to the passed document. */
451 : void ApplyCellBorders( ScDocument* pDoc, const ScAddress& rFirstPos ) const;
452 :
453 : SvNumberFormatter* GetFormatTable();
454 :
455 : protected:
456 : /** Creates a new HTML table without parent.
457 : @descr This constructor is used to create the "global table". */
458 : explicit ScHTMLTable(
459 : SfxItemPool& rPool,
460 : EditEngine& rEditEngine,
461 : ::std::vector< ScEEParseEntry* >& rEEParseList,
462 : ScHTMLTableId& rnUnusedId, ScHTMLParser* pParser );
463 :
464 : /** Fills all empty cells in this and nested tables with dummy parse entries. */
465 : void FillEmptyCells();
466 : /** Recalculates the size of all columns/rows in the table, regarding nested tables. */
467 : void RecalcDocSize();
468 : /** Recalculates the position of all cell entries and nested tables.
469 : @param rBasePos The origin of the table in the Calc document. */
470 : void RecalcDocPos( const ScHTMLPos& rBasePos );
471 :
472 : private:
473 : typedef ::std::auto_ptr< ScHTMLTableMap > ScHTMLTableMapPtr;
474 : typedef ::std::auto_ptr< SfxItemSet > SfxItemSetPtr;
475 : typedef ::std::vector< SCCOLROW > ScSizeVec;
476 : typedef ::std::list< ScHTMLEntry* > ScHTMLEntryList;
477 : typedef ::std::map< ScHTMLPos, ScHTMLEntryList > ScHTMLEntryMap;
478 : typedef ::std::auto_ptr< ScHTMLEntry > ScHTMLEntryPtr;
479 :
480 : /** Returns true, if the current cell does not contain an entry yet. */
481 : bool IsEmptyCell() const;
482 : /** Returns the item set from cell, row, or table, depending on current state. */
483 : const SfxItemSet& GetCurrItemSet() const;
484 :
485 : /** Returns true, if import info represents a space character. */
486 : static bool IsSpaceCharInfo( const ImportInfo& rInfo );
487 :
488 : /** Creates and returns a new empty flying entry at position (0,0). */
489 : ScHTMLEntryPtr CreateEntry() const;
490 : /** Creates a new flying entry.
491 : @param rInfo Contains the initial edit engine selection for the entry. */
492 : void CreateNewEntry( const ImportInfo& rInfo );
493 :
494 : /** Inserts an empty line in front of the next entry. */
495 : void InsertLeadingEmptyLine();
496 :
497 : /** Pushes the passed entry into the list of the current cell. */
498 : void ImplPushEntryToList( ScHTMLEntryList& rEntryList, ScHTMLEntryPtr& rxEntry );
499 : /** Tries to insert the entry into the current cell.
500 : @descr If insertion is not possible (i.e., currently no cell open), the
501 : entry will be inserted into the parent table.
502 : @return true = Entry as been pushed into the current cell; false = Entry dropped. */
503 : bool PushEntry( ScHTMLEntryPtr& rxEntry );
504 : /** Puts the current entry into the entry list, if it is not empty.
505 : @param rInfo The import info struct containing the end position of the current entry.
506 : @param bLastInCell true = If cell is still empty, put this entry always.
507 : @return true = Entry as been pushed into the current cell; false = Entry dropped. */
508 : bool PushEntry( const ImportInfo& rInfo, bool bLastInCell = false );
509 : /** Pushes a new entry into current cell which references a nested table.
510 : @return true = Entry as been pushed into the current cell; false = Entry dropped. */
511 : bool PushTableEntry( ScHTMLTableId nTableId );
512 :
513 : /** Tries to find a table from the table container.
514 : @descr Assumes that the table is located in the current container or
515 : that the passed table identifier is 0.
516 : @param nTableId Unique identifier of the table or 0. */
517 : ScHTMLTable* GetExistingTable( ScHTMLTableId nTableId ) const;
518 : /** Inserts a nested table in the current cell at the specified position.
519 : @param bPreFormText true = New table is based on preformatted text (<pre> tag). */
520 : ScHTMLTable* InsertNestedTable( const ImportInfo& rInfo, bool bPreFormText );
521 :
522 : /** Inserts a new cell in an unused position, starting from current cell position. */
523 : void InsertNewCell( const ScHTMLSize& rSpanSize );
524 :
525 : /** Set internal states for a new table row. */
526 : void ImplRowOn();
527 : /** Set internal states for leaving a table row. */
528 : void ImplRowOff();
529 : /** Set internal states for entering a new table cell. */
530 : void ImplDataOn( const ScHTMLSize& rSpanSize );
531 : /** Set internal states for leaving a table cell. */
532 : void ImplDataOff();
533 :
534 : /** Inserts additional formatting options from import info into the item set. */
535 : void ProcessFormatOptions( SfxItemSet& rItemSet, const ImportInfo& rInfo );
536 :
537 : /** Updates the document column/row size of the specified column or row.
538 : @descr Only increases the present count, never decreases. */
539 : void SetDocSize( ScHTMLOrient eOrient, SCCOLROW nCellPos, SCCOLROW nSize );
540 : /** Calculates and sets the resulting size the cell needs in the document.
541 : @descr Reduces the needed size in merged cells.
542 : @param nCellPos The first column/row position of the (merged) cell.
543 : @param nCellSpan The cell spanning in the specified orientation.
544 : @param nRealDocSize The raw document size of all entries of the cell. */
545 : void CalcNeededDocSize(
546 : ScHTMLOrient eOrient, SCCOLROW nCellPos,
547 : SCCOLROW nCellSpan, SCCOLROW nRealDocSize );
548 :
549 : private:
550 : ScHTMLTable* mpParentTable; /// Pointer to parent table.
551 : ScHTMLTableMapPtr mxNestedTables; /// Table of nested HTML tables.
552 : OUString maTableName; /// Table name from <table id> option.
553 : ScHTMLTableAutoId maTableId; /// Unique identifier of this table.
554 : SfxItemSet maTableItemSet; /// Items for the entire table.
555 : SfxItemSetPtr mxRowItemSet; /// Items for the current table row.
556 : SfxItemSetPtr mxDataItemSet; /// Items for the current cell.
557 : ScRangeList maHMergedCells; /// List of all horizontally merged cells.
558 : ScRangeList maVMergedCells; /// List of all vertically merged cells.
559 : ScRangeList maUsedCells; /// List of all used cells.
560 : EditEngine& mrEditEngine; /// Edit engine (from ScEEParser).
561 : ::std::vector< ScEEParseEntry* >& mrEEParseList; /// List that owns the parse entries (from ScEEParser).
562 : ScHTMLEntryMap maEntryMap; /// List of entries for each cell.
563 : ScHTMLEntryList* mpCurrEntryList; /// Current entry list from map for faster access.
564 : ScHTMLEntryPtr mxCurrEntry; /// Working entry, not yet inserted in a list.
565 : ScSizeVec maCumSizes[ 2 ]; /// Cumulated cell counts for each HTML table column/row.
566 : ScHTMLSize maSize; /// Size of the table.
567 : ScHTMLPos maCurrCell; /// Address of current cell to fill.
568 : ScHTMLPos maDocBasePos; /// Resulting base address in a Calc document.
569 : ScHTMLParser* mpParser;
570 : bool mbBorderOn:1; /// true = Table borders on.
571 : bool mbPreFormText:1; /// true = Table from preformatted text (<pre> tag).
572 : bool mbRowOn:1; /// true = Inside of <tr> </tr>.
573 : bool mbDataOn:1; /// true = Inside of <td> </td> or <th> </th>.
574 : bool mbPushEmptyLine:1; /// true = Insert empty line before current entry.
575 : };
576 :
577 : /** The "global table" representing the entire HTML document. */
578 : class ScHTMLGlobalTable : public ScHTMLTable
579 : {
580 : public:
581 : explicit ScHTMLGlobalTable(
582 : SfxItemPool& rPool,
583 : EditEngine& rEditEngine,
584 : ::std::vector< ScEEParseEntry* >& rEEParseList,
585 : ScHTMLTableId& rnUnusedId, ScHTMLParser* pParser );
586 :
587 : virtual ~ScHTMLGlobalTable();
588 :
589 : /** Recalculates sizes and resulting positions of all document entries. */
590 : void Recalc();
591 : };
592 :
593 : /** The HTML parser for data queries. Focuses on data import, not on layout.
594 :
595 : Builds the table structure correctly, ignores extended formatting like
596 : pictures or column widths.
597 : */
598 : class ScHTMLQueryParser : public ScHTMLParser
599 : {
600 : public:
601 : explicit ScHTMLQueryParser( EditEngine* pEditEngine, ScDocument* pDoc );
602 : virtual ~ScHTMLQueryParser();
603 :
604 : virtual sal_uLong Read( SvStream& rStrm, const OUString& rBaseURL ) SAL_OVERRIDE;
605 :
606 : /** Returns the "global table" which contains the entire HTML document. */
607 : virtual const ScHTMLTable* GetGlobalTable() const SAL_OVERRIDE;
608 :
609 : private:
610 : /** Handles all possible tags in the HTML document. */
611 : void ProcessToken( const ImportInfo& rInfo );
612 : /** Inserts a text portion into current entry. */
613 : void InsertText( const ImportInfo& rInfo );
614 : /** Processes the <font> tag. */
615 : void FontOn( const ImportInfo& rInfo );
616 :
617 : /** Processes the <meta> tag. */
618 : void MetaOn( const ImportInfo& rInfo );
619 : /** Opens the title of the HTML document (<title> tag). */
620 : void TitleOn( const ImportInfo& rInfo );
621 : /** Closes the title of the HTML document (</title> tag). */
622 : void TitleOff( const ImportInfo& rInfo );
623 :
624 : /** Opens a new table at the current position. */
625 : void TableOn( const ImportInfo& rInfo );
626 : /** Closes the current table. */
627 : void TableOff( const ImportInfo& rInfo );
628 : /** Opens a new table based on preformatted text. */
629 : void PreOn( const ImportInfo& rInfo );
630 : /** Closes the current preformatted text table. */
631 : void PreOff( const ImportInfo& rInfo );
632 :
633 : /** Closes the current table, regardless on opening tag. */
634 : void CloseTable( const ImportInfo& rInfo );
635 :
636 : void ParseStyle(const OUString& rStrm);
637 :
638 : DECL_LINK( HTMLImportHdl, const ImportInfo* );
639 :
640 : private:
641 : typedef ::std::auto_ptr< ScHTMLGlobalTable > ScHTMLGlobalTablePtr;
642 :
643 : OUStringBuffer maTitle; /// The title of the document.
644 : ScHTMLGlobalTablePtr mxGlobTable; /// Contains the entire imported document.
645 : ScHTMLTable* mpCurrTable; /// Pointer to current table (performance).
646 : ScHTMLTableId mnUnusedId; /// First unused table identifier.
647 : bool mbTitleOn; /// true = Inside of <title> </title>.
648 : };
649 :
650 :
651 : #endif
652 :
653 : /* vim:set shiftwidth=4 softtabstop=4 expandtab: */
|