Line data Source code
1 : /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 : /*
3 : * This file is part of the LibreOffice project.
4 : *
5 : * This Source Code Form is subject to the terms of the Mozilla Public
6 : * License, v. 2.0. If a copy of the MPL was not distributed with this
7 : * file, You can obtain one at http://mozilla.org/MPL/2.0/.
8 : *
9 : * This file incorporates work covered by the following license notice:
10 : *
11 : * Licensed to the Apache Software Foundation (ASF) under one or more
12 : * contributor license agreements. See the NOTICE file distributed
13 : * with this work for additional information regarding copyright
14 : * ownership. The ASF licenses this file to you under the Apache
15 : * License, Version 2.0 (the "License"); you may not use this file
16 : * except in compliance with the License. You may obtain a copy of
17 : * the License at http://www.apache.org/licenses/LICENSE-2.0 .
18 : */
19 :
20 : #ifndef INCLUDED_SC_SOURCE_FILTER_INC_HTMLPARS_HXX
21 : #define INCLUDED_SC_SOURCE_FILTER_INC_HTMLPARS_HXX
22 :
23 : #include <list>
24 : #include <memory>
25 : #include <map>
26 : #include <stack>
27 : #include <unordered_map>
28 : #include <vector>
29 : #include <o3tl/sorted_vector.hxx>
30 : #include <boost/ptr_container/ptr_map.hpp>
31 :
32 : #include "rangelst.hxx"
33 : #include "eeparser.hxx"
34 :
35 : const sal_uInt32 SC_HTML_FONTSIZES = 7; // wie Export, HTML-Options
36 :
37 : // Pixel tolerance for SeekOffset and related.
38 : const sal_uInt16 SC_HTML_OFFSET_TOLERANCE_SMALL = 1; // single table
39 : const sal_uInt16 SC_HTML_OFFSET_TOLERANCE_LARGE = 10; // nested
40 :
41 : // BASE class for HTML parser classes
42 :
43 : class ScHTMLTable;
44 :
45 : /**
46 : * Collection of HTML style data parsed from the content of <style>
47 : * elements.
48 : */
49 1 : class ScHTMLStyles
50 : {
51 : typedef std::unordered_map<OUString, OUString, OUStringHash> PropsType;
52 : typedef ::boost::ptr_map<OUString, PropsType> NamePropsType;
53 : typedef ::boost::ptr_map<OUString, NamePropsType> ElemsType;
54 :
55 : NamePropsType maGlobalProps; /// global properties (for a given class for all elements)
56 : NamePropsType maElemGlobalProps; /// element global properties (no class specified)
57 : ElemsType maElemProps; /// element to class to properties (both element and class are given)
58 : const OUString maEmpty; /// just a persistent empty string.
59 : public:
60 : ScHTMLStyles();
61 :
62 : void add(const char* pElemName, size_t nElemName, const char* pClassName, size_t nClassName,
63 : const OUString& aProp, const OUString& aValue);
64 :
65 : /**
66 : * Find best-matching property value for given element and class names.
67 : */
68 : const OUString& getPropertyValue(
69 : const OUString& rElem, const OUString& rClass, const OUString& rPropName) const;
70 :
71 : private:
72 : static void insertProp(
73 : NamePropsType& rProps, const OUString& aName,
74 : const OUString& aProp, const OUString& aValue);
75 : };
76 :
77 : /** Base class for HTML parser classes. */
78 : class ScHTMLParser : public ScEEParser
79 : {
80 : ScHTMLStyles maStyles;
81 : protected:
82 : sal_uInt32 maFontHeights[ SC_HTML_FONTSIZES ];
83 : ScDocument* mpDoc; /// The destination document.
84 :
85 : public:
86 : explicit ScHTMLParser( EditEngine* pEditEngine, ScDocument* pDoc );
87 : virtual ~ScHTMLParser();
88 :
89 : virtual sal_uLong Read( SvStream& rStrm, const OUString& rBaseURL ) SAL_OVERRIDE = 0;
90 :
91 0 : ScHTMLStyles& GetStyles() { return maStyles;}
92 0 : ScDocument& GetDoc() { return *mpDoc;}
93 :
94 : /** Returns the "global table" which contains the entire HTML document. */
95 : virtual const ScHTMLTable* GetGlobalTable() const = 0;
96 : };
97 :
98 : typedef o3tl::sorted_vector<sal_uLong> ScHTMLColOffset;
99 :
100 : struct ScHTMLTableStackEntry
101 : {
102 : ScRangeListRef xLockedList;
103 : ScEEParseEntry* pCellEntry;
104 : ScHTMLColOffset* pLocalColOffset;
105 : sal_uLong nFirstTableCell;
106 : SCCOL nColCnt;
107 : SCROW nRowCnt;
108 : SCCOL nColCntStart;
109 : SCCOL nMaxCol;
110 : sal_uInt16 nTable;
111 : sal_uInt16 nTableWidth;
112 : sal_uInt16 nColOffset;
113 : sal_uInt16 nColOffsetStart;
114 : bool bFirstRow;
115 0 : ScHTMLTableStackEntry( ScEEParseEntry* pE,
116 : const ScRangeListRef& rL, ScHTMLColOffset* pTO,
117 : sal_uLong nFTC,
118 : SCCOL nCol, SCROW nRow,
119 : SCCOL nStart, SCCOL nMax, sal_uInt16 nTab,
120 : sal_uInt16 nTW, sal_uInt16 nCO, sal_uInt16 nCOS,
121 : bool bFR )
122 : : xLockedList( rL ), pCellEntry( pE ),
123 : pLocalColOffset( pTO ),
124 : nFirstTableCell( nFTC ),
125 : nColCnt( nCol ), nRowCnt( nRow ),
126 : nColCntStart( nStart ), nMaxCol( nMax ),
127 : nTable( nTab ), nTableWidth( nTW ),
128 : nColOffset( nCO ), nColOffsetStart( nCOS ),
129 0 : bFirstRow( bFR )
130 0 : {}
131 0 : ~ScHTMLTableStackEntry() {}
132 : };
133 : typedef ::std::stack< ScHTMLTableStackEntry* > ScHTMLTableStack;
134 :
135 : struct ScHTMLAdjustStackEntry
136 : {
137 : SCCOL nLastCol;
138 : SCROW nNextRow;
139 : SCROW nCurRow;
140 0 : ScHTMLAdjustStackEntry( SCCOL nLCol, SCROW nNRow,
141 : SCROW nCRow )
142 : : nLastCol( nLCol ), nNextRow( nNRow ),
143 0 : nCurRow( nCRow )
144 0 : {}
145 : };
146 : typedef ::std::stack< ScHTMLAdjustStackEntry* > ScHTMLAdjustStack;
147 :
148 : class EditEngine;
149 : class ScDocument;
150 : class HTMLOption;
151 :
152 : // TODO these need better names
153 : typedef ::std::map<SCROW, SCROW> InnerMap;
154 : typedef ::std::map<sal_uInt16, InnerMap*> OuterMap;
155 :
156 : class ScHTMLLayoutParser : public ScHTMLParser
157 : {
158 : private:
159 : Size aPageSize;
160 : OUString aBaseURL;
161 : ScHTMLTableStack aTableStack;
162 : OUString aString;
163 : ScRangeListRef xLockedList; // je Table
164 : OuterMap* pTables;
165 : ScHTMLColOffset* pColOffset;
166 : ScHTMLColOffset* pLocalColOffset; // je Table
167 : sal_uLong nFirstTableCell; // je Table
168 : short nTableLevel;
169 : sal_uInt16 nTable;
170 : sal_uInt16 nMaxTable;
171 : SCCOL nColCntStart; // erste Col je Table
172 : SCCOL nMaxCol; // je Table
173 : sal_uInt16 nTableWidth; // je Table
174 : sal_uInt16 nColOffset; // aktuell, Pixel
175 : sal_uInt16 nColOffsetStart; // Startwert je Table, in Pixel
176 : sal_uInt16 nOffsetTolerance; // for use with SeekOffset and related
177 : bool bTabInTabCell:1;
178 : bool bFirstRow:1; // je Table, ob in erster Zeile
179 : bool bInCell:1;
180 : bool bInTitle:1;
181 :
182 : DECL_LINK( HTMLImportHdl, ImportInfo* );
183 : void NewActEntry( ScEEParseEntry* );
184 : static void EntryEnd( ScEEParseEntry*, const ESelection& );
185 : void ProcToken( ImportInfo* );
186 : void CloseEntry( ImportInfo* );
187 : void NextRow( ImportInfo* );
188 : void SkipLocked( ScEEParseEntry*, bool bJoin = true );
189 : static bool SeekOffset( ScHTMLColOffset*, sal_uInt16 nOffset,
190 : SCCOL* pCol, sal_uInt16 nOffsetTol );
191 : static void MakeCol( ScHTMLColOffset*, sal_uInt16& nOffset,
192 : sal_uInt16& nWidth, sal_uInt16 nOffsetTol,
193 : sal_uInt16 nWidthTol );
194 : static void MakeColNoRef( ScHTMLColOffset*, sal_uInt16 nOffset,
195 : sal_uInt16 nWidth, sal_uInt16 nOffsetTol,
196 : sal_uInt16 nWidthTol );
197 : static void ModifyOffset( ScHTMLColOffset*, sal_uInt16& nOldOffset,
198 : sal_uInt16& nNewOffset, sal_uInt16 nOffsetTol );
199 : void Colonize( ScEEParseEntry* );
200 : sal_uInt16 GetWidth( ScEEParseEntry* );
201 : void SetWidths();
202 : void Adjust();
203 :
204 : sal_uInt16 GetWidthPixel( const HTMLOption& );
205 : bool IsAtBeginningOfText( ImportInfo* );
206 :
207 : void TableOn( ImportInfo* );
208 : void ColOn( ImportInfo* );
209 : void TableRowOn( ImportInfo* );
210 : void TableRowOff( ImportInfo* );
211 : void TableDataOn( ImportInfo* );
212 : void TableDataOff( ImportInfo* );
213 : void TableOff( ImportInfo* );
214 : void Image( ImportInfo* );
215 : void AnchorOn( ImportInfo* );
216 : void FontOn( ImportInfo* );
217 :
218 : public:
219 : ScHTMLLayoutParser( EditEngine*, const OUString& rBaseURL, const Size& aPageSize, ScDocument* );
220 : virtual ~ScHTMLLayoutParser();
221 : virtual sal_uLong Read( SvStream&, const OUString& rBaseURL ) SAL_OVERRIDE;
222 : virtual const ScHTMLTable* GetGlobalTable() const SAL_OVERRIDE;
223 : };
224 :
225 : // HTML DATA QUERY PARSER
226 :
227 : /** Declares the orientation in or for a table: column or row. */
228 : enum ScHTMLOrient { tdCol = 0 , tdRow = 1 };
229 :
230 : /** Type for a unique identifier for each table. */
231 : typedef sal_uInt16 ScHTMLTableId;
232 : /** Identifier of the "global table" (the entire HTML document). */
233 : const ScHTMLTableId SC_HTML_GLOBAL_TABLE = 0;
234 : /** Used as table index for normal (non-table) entries in ScHTMLEntry structs. */
235 : const ScHTMLTableId SC_HTML_NO_TABLE = 0;
236 :
237 : /** A 2D cell position in an HTML table. */
238 : struct ScHTMLPos
239 : {
240 : SCCOL mnCol;
241 : SCROW mnRow;
242 :
243 4 : inline explicit ScHTMLPos() : mnCol( 0 ), mnRow( 0 ) {}
244 7 : inline explicit ScHTMLPos( SCCOL nCol, SCROW nRow ) :
245 7 : mnCol( nCol ), mnRow( nRow ) {}
246 0 : inline explicit ScHTMLPos( const ScAddress& rAddr ) { Set( rAddr ); }
247 :
248 14 : inline SCCOLROW Get( ScHTMLOrient eOrient ) const
249 14 : { return (eOrient == tdCol) ? mnCol : mnRow; }
250 0 : inline void Set( SCCOL nCol, SCROW nRow )
251 0 : { mnCol = nCol; mnRow = nRow; }
252 0 : inline void Set( const ScAddress& rAddr )
253 0 : { Set( rAddr.Col(), rAddr.Row() ); }
254 : inline void Move( SCsCOL nColDiff, SCsROW nRowDiff )
255 : { mnCol = mnCol + nColDiff; mnRow = mnRow + nRowDiff; }
256 71 : inline ScAddress MakeAddr() const
257 71 : { return ScAddress( mnCol, mnRow, 0 ); }
258 : };
259 :
260 : inline bool operator==( const ScHTMLPos& rPos1, const ScHTMLPos& rPos2 )
261 : {
262 : return (rPos1.mnRow == rPos2.mnRow) && (rPos1.mnCol == rPos2.mnCol);
263 : }
264 :
265 21 : inline bool operator<( const ScHTMLPos& rPos1, const ScHTMLPos& rPos2 )
266 : {
267 21 : return (rPos1.mnRow < rPos2.mnRow) || ((rPos1.mnRow == rPos2.mnRow) && (rPos1.mnCol < rPos2.mnCol));
268 : }
269 :
270 : /** A 2D cell size in an HTML table. */
271 : struct ScHTMLSize
272 : {
273 : SCCOL mnCols;
274 : SCROW mnRows;
275 :
276 : inline explicit ScHTMLSize() : mnCols( 0 ), mnRows( 0 ) {}
277 44 : inline explicit ScHTMLSize( SCCOL nCols, SCROW nRows ) :
278 44 : mnCols( nCols ), mnRows( nRows ) {}
279 :
280 : inline SCCOLROW Get( ScHTMLOrient eOrient ) const
281 : { return (eOrient == tdCol) ? mnCols : mnRows; }
282 0 : inline void Set( SCCOL nCols, SCROW nRows )
283 0 : { mnCols = nCols; mnRows = nRows; }
284 : inline void Expand( SCsCOL nColDiff, SCsROW nRowDiff )
285 : { mnCols = mnCols + nColDiff; mnRows = mnRows + nRowDiff; }
286 : };
287 :
288 : inline bool operator==( const ScHTMLSize& rSize1, const ScHTMLSize& rSize2 )
289 : {
290 : return (rSize1.mnRows == rSize2.mnRows) && (rSize1.mnCols == rSize2.mnCols);
291 : }
292 :
293 : /** A single entry containing a line of text or representing a table. */
294 21 : struct ScHTMLEntry : public ScEEParseEntry
295 : {
296 : public:
297 : explicit ScHTMLEntry(
298 : const SfxItemSet& rItemSet,
299 : ScHTMLTableId nTableId = SC_HTML_NO_TABLE );
300 :
301 : /** Returns true, if the selection of the entry is empty. */
302 0 : inline bool IsEmpty() const { return !aSel.HasRange(); }
303 : /** Returns true, if the entry has any content to be imported. */
304 : bool HasContents() const;
305 : /** Returns true, if the entry represents a table. */
306 34 : inline bool IsTable() const { return nTab != SC_HTML_NO_TABLE; }
307 : /** Returns true, if the entry represents a table. */
308 21 : inline ScHTMLTableId GetTableId() const { return nTab; }
309 :
310 : /** Sets or cleares the import always state. */
311 0 : inline void SetImportAlways( bool bSet = true ) { mbImportAlways = bSet; }
312 : /** Sets start point of the entry selection to the start of the import info object. */
313 : void AdjustStart( const ImportInfo& rInfo );
314 : /** Sets end point of the entry selection to the end of the import info object. */
315 : void AdjustEnd( const ImportInfo& rInfo );
316 : /** Deletes leading and trailing empty paragraphs from the entry. */
317 : void Strip( const EditEngine& rEditEngine );
318 :
319 : /** Returns read/write access to the item set of this entry. */
320 0 : inline SfxItemSet& GetItemSet() { return aItemSet; }
321 : /** Returns read-only access to the item set of this entry. */
322 : inline const SfxItemSet& GetItemSet() const { return aItemSet; }
323 :
324 : private:
325 : bool mbImportAlways; /// true = Always import this entry.
326 : };
327 :
328 : /** This struct handles creation of unique table identifiers. */
329 : struct ScHTMLTableAutoId
330 : {
331 : const ScHTMLTableId mnTableId; /// The created unique table identifier.
332 : ScHTMLTableId& mrnUnusedId; /// Reference to global unused identifier variable.
333 :
334 : /** The constructor assigns an unused identifier to member mnTableId. */
335 : explicit ScHTMLTableAutoId( ScHTMLTableId& rnUnusedId );
336 : };
337 :
338 : class ScHTMLTableMap;
339 :
340 : /** Stores data for one table in an HTML document.
341 :
342 : This class does the main work for importing an HTML document. It manages
343 : the correct insertion of parse entries into the correct cells and the
344 : creation of nested tables. Recalculation of resulting document size and
345 : position is done recursively in all nested tables.
346 : */
347 : class ScHTMLTable
348 : {
349 : public:
350 : /** Creates a new HTML table without content.
351 : @descr Internally handles a current cell position. This position is
352 : invalid until first calls of RowOn() and DataOn().
353 : @param rParentTable Reference to the parent table that owns this table.
354 : @param bPreFormText true = Table is based on preformatted text (<pre> tag). */
355 : explicit ScHTMLTable(
356 : ScHTMLTable& rParentTable,
357 : const ImportInfo& rInfo,
358 : bool bPreFormText );
359 :
360 : virtual ~ScHTMLTable();
361 :
362 : /** Returns the name of the table, specified in the TABLE tag. */
363 1 : inline const OUString& GetTableName() const { return maTableName; }
364 : /** Returns the unique identifier of the table. */
365 7 : inline ScHTMLTableId GetTableId() const { return maTableId.mnTableId; }
366 : /** Returns the table size. */
367 : inline const ScHTMLSize& GetSize() const { return maSize; }
368 : /** Returns the cell spanning of the specified cell. */
369 : ScHTMLSize GetSpan( const ScHTMLPos& rCellPos ) const;
370 :
371 : /** Searches in all nested tables for the specified table.
372 : @param nTableId Unique identifier of the table. */
373 : ScHTMLTable* FindNestedTable( ScHTMLTableId nTableId ) const;
374 :
375 : /** Puts the item into the item set of the current entry. */
376 : void PutItem( const SfxPoolItem& rItem );
377 : /** Inserts a text portion into current entry. */
378 : void PutText( const ImportInfo& rInfo );
379 : /** Inserts a new line, if in preformatted text, else does nothing. */
380 : void InsertPara( const ImportInfo& rInfo );
381 :
382 : /** Inserts a line break (<br> tag).
383 : @descr Inserts the current entry regardless if it is empty. */
384 : void BreakOn();
385 : /** Inserts a heading line (<p> and <h*> tags). */
386 : void HeadingOn();
387 : /** Processes a hyperlink (<a> tag). */
388 : void AnchorOn();
389 :
390 : /** Starts a *new* table nested in this table (<table> tag).
391 : @return Pointer to the new table. */
392 : ScHTMLTable* TableOn( const ImportInfo& rInfo );
393 : /** Closes *this* table (</table> tag).
394 : @return Pointer to the parent table. */
395 : ScHTMLTable* TableOff( const ImportInfo& rInfo );
396 : /** Starts a *new* table based on preformatted text (<pre> tag).
397 : @return Pointer to the new table. */
398 : ScHTMLTable* PreOn( const ImportInfo& rInfo );
399 : /** Closes *this* table based on preformatted text (</pre> tag).
400 : @return Pointer to the parent table. */
401 : ScHTMLTable* PreOff( const ImportInfo& rInfo );
402 :
403 : /** Starts next row (<tr> tag).
404 : @descr Cell address is invalid until first call of DataOn(). */
405 : void RowOn( const ImportInfo& rInfo );
406 : /** Closes the current row (<tr> tag).
407 : @descr Cell address is invalid until call of RowOn() and DataOn(). */
408 : void RowOff( const ImportInfo& rInfo );
409 : /** Starts the next cell (<td> or <th> tag). */
410 : void DataOn( const ImportInfo& rInfo );
411 : /** Closes the current cell (</td> or </th> tag).
412 : @descr Cell address is invalid until next call of DataOn(). */
413 : void DataOff( const ImportInfo& rInfo );
414 :
415 : /** Starts the body of the HTML document (<body> tag). */
416 : void BodyOn( const ImportInfo& rInfo );
417 : /** Closes the body of the HTML document (</body> tag). */
418 : void BodyOff( const ImportInfo& rInfo );
419 :
420 : /** Closes *this* table (</table> tag) or preformatted text (</pre> tag).
421 : @descr Used to close this table object regardless on opening tag type.
422 : @return Pointer to the parent table, or this, if no parent found. */
423 : ScHTMLTable* CloseTable( const ImportInfo& rInfo );
424 :
425 : /** Returns the resulting document row/column count of the specified HTML row/column. */
426 : SCCOLROW GetDocSize( ScHTMLOrient eOrient, SCCOLROW nCellPos ) const;
427 : /** Returns the resulting document row/column count in the half-open range [nCellBegin, nCellEnd). */
428 : SCCOLROW GetDocSize( ScHTMLOrient eOrient, SCCOLROW nCellBegin, SCCOLROW nCellEnd ) const;
429 : /** Returns the total document row/column count in the specified direction. */
430 : SCCOLROW GetDocSize( ScHTMLOrient eOrient ) const;
431 : /** Returns the total document row/column count of the specified HTML cell. */
432 : ScHTMLSize GetDocSize( const ScHTMLPos& rCellPos ) const;
433 :
434 : /** Returns the resulting Calc position of the top left edge of the table. */
435 1 : inline const ScHTMLPos& GetDocPos() const { return maDocBasePos; }
436 : /** Calculates the resulting Calc position of the specified HTML column/row. */
437 : SCCOLROW GetDocPos( ScHTMLOrient eOrient, SCCOLROW nCellPos = 0 ) const;
438 : /** Calculates the resulting Calc position of the specified HTML cell. */
439 : ScHTMLPos GetDocPos( const ScHTMLPos& rCellPos ) const;
440 :
441 : /** Calculates the current Calc document area of this table. */
442 : void GetDocRange( ScRange& rRange ) const;
443 :
444 : /** Applies border formatting to the passed document. */
445 : void ApplyCellBorders( ScDocument* pDoc, const ScAddress& rFirstPos ) const;
446 :
447 : SvNumberFormatter* GetFormatTable();
448 :
449 : protected:
450 : /** Creates a new HTML table without parent.
451 : @descr This constructor is used to create the "global table". */
452 : explicit ScHTMLTable(
453 : SfxItemPool& rPool,
454 : EditEngine& rEditEngine,
455 : ::std::vector< ScEEParseEntry* >& rEEParseList,
456 : ScHTMLTableId& rnUnusedId, ScHTMLParser* pParser );
457 :
458 : /** Fills all empty cells in this and nested tables with dummy parse entries. */
459 : void FillEmptyCells();
460 : /** Recalculates the size of all columns/rows in the table, regarding nested tables. */
461 : void RecalcDocSize();
462 : /** Recalculates the position of all cell entries and nested tables.
463 : @param rBasePos The origin of the table in the Calc document. */
464 : void RecalcDocPos( const ScHTMLPos& rBasePos );
465 :
466 : private:
467 : typedef ::std::unique_ptr< ScHTMLTableMap > ScHTMLTableMapPtr;
468 : typedef ::std::unique_ptr< SfxItemSet > SfxItemSetPtr;
469 : typedef ::std::vector< SCCOLROW > ScSizeVec;
470 : typedef ::std::list< ScHTMLEntry* > ScHTMLEntryList;
471 : typedef ::std::map< ScHTMLPos, ScHTMLEntryList > ScHTMLEntryMap;
472 : typedef ::std::unique_ptr< ScHTMLEntry > ScHTMLEntryPtr;
473 :
474 : /** Returns true, if the current cell does not contain an entry yet. */
475 : bool IsEmptyCell() const;
476 : /** Returns the item set from cell, row, or table, depending on current state. */
477 : const SfxItemSet& GetCurrItemSet() const;
478 :
479 : /** Returns true, if import info represents a space character. */
480 : static bool IsSpaceCharInfo( const ImportInfo& rInfo );
481 :
482 : /** Creates and returns a new empty flying entry at position (0,0). */
483 : ScHTMLEntryPtr CreateEntry() const;
484 : /** Creates a new flying entry.
485 : @param rInfo Contains the initial edit engine selection for the entry. */
486 : void CreateNewEntry( const ImportInfo& rInfo );
487 :
488 : /** Inserts an empty line in front of the next entry. */
489 : void InsertLeadingEmptyLine();
490 :
491 : /** Pushes the passed entry into the list of the current cell. */
492 : void ImplPushEntryToList( ScHTMLEntryList& rEntryList, ScHTMLEntryPtr& rxEntry );
493 : /** Tries to insert the entry into the current cell.
494 : @descr If insertion is not possible (i.e., currently no cell open), the
495 : entry will be inserted into the parent table.
496 : @return true = Entry as been pushed into the current cell; false = Entry dropped. */
497 : bool PushEntry( ScHTMLEntryPtr& rxEntry );
498 : /** Puts the current entry into the entry list, if it is not empty.
499 : @param rInfo The import info struct containing the end position of the current entry.
500 : @param bLastInCell true = If cell is still empty, put this entry always.
501 : @return true = Entry as been pushed into the current cell; false = Entry dropped. */
502 : bool PushEntry( const ImportInfo& rInfo, bool bLastInCell = false );
503 : /** Pushes a new entry into current cell which references a nested table.
504 : @return true = Entry as been pushed into the current cell; false = Entry dropped. */
505 : bool PushTableEntry( ScHTMLTableId nTableId );
506 :
507 : /** Tries to find a table from the table container.
508 : @descr Assumes that the table is located in the current container or
509 : that the passed table identifier is 0.
510 : @param nTableId Unique identifier of the table or 0. */
511 : ScHTMLTable* GetExistingTable( ScHTMLTableId nTableId ) const;
512 : /** Inserts a nested table in the current cell at the specified position.
513 : @param bPreFormText true = New table is based on preformatted text (<pre> tag). */
514 : ScHTMLTable* InsertNestedTable( const ImportInfo& rInfo, bool bPreFormText );
515 :
516 : /** Inserts a new cell in an unused position, starting from current cell position. */
517 : void InsertNewCell( const ScHTMLSize& rSpanSize );
518 :
519 : /** Set internal states for a new table row. */
520 : void ImplRowOn();
521 : /** Set internal states for leaving a table row. */
522 : void ImplRowOff();
523 : /** Set internal states for entering a new table cell. */
524 : void ImplDataOn( const ScHTMLSize& rSpanSize );
525 : /** Set internal states for leaving a table cell. */
526 : void ImplDataOff();
527 :
528 : /** Inserts additional formatting options from import info into the item set. */
529 : static void ProcessFormatOptions( SfxItemSet& rItemSet, const ImportInfo& rInfo );
530 :
531 : /** Updates the document column/row size of the specified column or row.
532 : @descr Only increases the present count, never decreases. */
533 : void SetDocSize( ScHTMLOrient eOrient, SCCOLROW nCellPos, SCCOLROW nSize );
534 : /** Calculates and sets the resulting size the cell needs in the document.
535 : @descr Reduces the needed size in merged cells.
536 : @param nCellPos The first column/row position of the (merged) cell.
537 : @param nCellSpan The cell spanning in the specified orientation.
538 : @param nRealDocSize The raw document size of all entries of the cell. */
539 : void CalcNeededDocSize(
540 : ScHTMLOrient eOrient, SCCOLROW nCellPos,
541 : SCCOLROW nCellSpan, SCCOLROW nRealDocSize );
542 :
543 : private:
544 : ScHTMLTable* mpParentTable; /// Pointer to parent table.
545 : ScHTMLTableMapPtr mxNestedTables; /// Table of nested HTML tables.
546 : OUString maTableName; /// Table name from <table id> option.
547 : ScHTMLTableAutoId maTableId; /// Unique identifier of this table.
548 : SfxItemSet maTableItemSet; /// Items for the entire table.
549 : SfxItemSetPtr mxRowItemSet; /// Items for the current table row.
550 : SfxItemSetPtr mxDataItemSet; /// Items for the current cell.
551 : ScRangeList maHMergedCells; /// List of all horizontally merged cells.
552 : ScRangeList maVMergedCells; /// List of all vertically merged cells.
553 : ScRangeList maUsedCells; /// List of all used cells.
554 : EditEngine& mrEditEngine; /// Edit engine (from ScEEParser).
555 : ::std::vector< ScEEParseEntry* >& mrEEParseList; /// List that owns the parse entries (from ScEEParser).
556 : ScHTMLEntryMap maEntryMap; /// List of entries for each cell.
557 : ScHTMLEntryList* mpCurrEntryList; /// Current entry list from map for faster access.
558 : ScHTMLEntryPtr mxCurrEntry; /// Working entry, not yet inserted in a list.
559 : ScSizeVec maCumSizes[ 2 ]; /// Cumulated cell counts for each HTML table column/row.
560 : ScHTMLSize maSize; /// Size of the table.
561 : ScHTMLPos maCurrCell; /// Address of current cell to fill.
562 : ScHTMLPos maDocBasePos; /// Resulting base address in a Calc document.
563 : ScHTMLParser* mpParser;
564 : bool mbBorderOn:1; /// true = Table borders on.
565 : bool mbPreFormText:1; /// true = Table from preformatted text (<pre> tag).
566 : bool mbRowOn:1; /// true = Inside of <tr> </tr>.
567 : bool mbDataOn:1; /// true = Inside of <td> </td> or <th> </th>.
568 : bool mbPushEmptyLine:1; /// true = Insert empty line before current entry.
569 : };
570 :
571 : /** The "global table" representing the entire HTML document. */
572 : class ScHTMLGlobalTable : public ScHTMLTable
573 : {
574 : public:
575 : explicit ScHTMLGlobalTable(
576 : SfxItemPool& rPool,
577 : EditEngine& rEditEngine,
578 : ::std::vector< ScEEParseEntry* >& rEEParseList,
579 : ScHTMLTableId& rnUnusedId, ScHTMLParser* pParser );
580 :
581 : virtual ~ScHTMLGlobalTable();
582 :
583 : /** Recalculates sizes and resulting positions of all document entries. */
584 : void Recalc();
585 : };
586 :
587 : /** The HTML parser for data queries. Focuses on data import, not on layout.
588 :
589 : Builds the table structure correctly, ignores extended formatting like
590 : pictures or column widths.
591 : */
592 : class ScHTMLQueryParser : public ScHTMLParser
593 : {
594 : public:
595 : explicit ScHTMLQueryParser( EditEngine* pEditEngine, ScDocument* pDoc );
596 : virtual ~ScHTMLQueryParser();
597 :
598 : virtual sal_uLong Read( SvStream& rStrm, const OUString& rBaseURL ) SAL_OVERRIDE;
599 :
600 : /** Returns the "global table" which contains the entire HTML document. */
601 : virtual const ScHTMLTable* GetGlobalTable() const SAL_OVERRIDE;
602 :
603 : private:
604 : /** Handles all possible tags in the HTML document. */
605 : void ProcessToken( const ImportInfo& rInfo );
606 : /** Inserts a text portion into current entry. */
607 : void InsertText( const ImportInfo& rInfo );
608 : /** Processes the <font> tag. */
609 : void FontOn( const ImportInfo& rInfo );
610 :
611 : /** Processes the <meta> tag. */
612 : void MetaOn( const ImportInfo& rInfo );
613 : /** Opens the title of the HTML document (<title> tag). */
614 : void TitleOn( const ImportInfo& rInfo );
615 : /** Closes the title of the HTML document (</title> tag). */
616 : void TitleOff( const ImportInfo& rInfo );
617 :
618 : /** Opens a new table at the current position. */
619 : void TableOn( const ImportInfo& rInfo );
620 : /** Closes the current table. */
621 : void TableOff( const ImportInfo& rInfo );
622 : /** Opens a new table based on preformatted text. */
623 : void PreOn( const ImportInfo& rInfo );
624 : /** Closes the current preformatted text table. */
625 : void PreOff( const ImportInfo& rInfo );
626 :
627 : /** Closes the current table, regardless on opening tag. */
628 : void CloseTable( const ImportInfo& rInfo );
629 :
630 : void ParseStyle(const OUString& rStrm);
631 :
632 : DECL_LINK( HTMLImportHdl, const ImportInfo* );
633 :
634 : private:
635 : typedef ::std::unique_ptr< ScHTMLGlobalTable > ScHTMLGlobalTablePtr;
636 :
637 : OUStringBuffer maTitle; /// The title of the document.
638 : ScHTMLGlobalTablePtr mxGlobTable; /// Contains the entire imported document.
639 : ScHTMLTable* mpCurrTable; /// Pointer to current table (performance).
640 : ScHTMLTableId mnUnusedId; /// First unused table identifier.
641 : bool mbTitleOn; /// true = Inside of <title> </title>.
642 : };
643 :
644 : #endif
645 :
646 : /* vim:set shiftwidth=4 softtabstop=4 expandtab: */
|