Line data Source code
1 : /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 : /*
3 : * This file is part of the LibreOffice project.
4 : *
5 : * This Source Code Form is subject to the terms of the Mozilla Public
6 : * License, v. 2.0. If a copy of the MPL was not distributed with this
7 : * file, You can obtain one at http://mozilla.org/MPL/2.0/.
8 : *
9 : * This file incorporates work covered by the following license notice:
10 : *
11 : * Licensed to the Apache Software Foundation (ASF) under one or more
12 : * contributor license agreements. See the NOTICE file distributed
13 : * with this work for additional information regarding copyright
14 : * ownership. The ASF licenses this file to you under the Apache
15 : * License, Version 2.0 (the "License"); you may not use this file
16 : * except in compliance with the License. You may obtain a copy of
17 : * the License at http://www.apache.org/licenses/LICENSE-2.0 .
18 : */
19 :
20 :
21 : #include <vcl/wrkwin.hxx>
22 : #include <vcl/dialog.hxx>
23 : #include <vcl/msgbox.hxx>
24 : #include <vcl/svapp.hxx>
25 : #include <eehtml.hxx>
26 : #include <editeng/adjustitem.hxx>
27 : #include <editeng/flditem.hxx>
28 : #include <tools/urlobj.hxx>
29 : #include <editeng/fhgtitem.hxx>
30 : #include <editeng/fontitem.hxx>
31 : #include <editeng/ulspitem.hxx>
32 : #include <editeng/wghtitem.hxx>
33 : #include <svtools/htmltokn.h>
34 : #include <svtools/htmlkywd.hxx>
35 : #include <tools/tenccvt.hxx>
36 :
37 : #include "editeng/editeng.hxx"
38 :
39 : #define STYLE_PRE 101
40 :
41 1 : EditHTMLParser::EditHTMLParser( SvStream& rIn, const String& rBaseURL, SvKeyValueIterator* pHTTPHeaderAttrs )
42 : : HTMLParser( rIn, true ),
43 : aBaseURL( rBaseURL ),
44 : mpEditEngine(NULL),
45 : pCurAnchor(NULL),
46 : bInPara(false),
47 : bWasInPara(false),
48 : bFieldsInserted(false),
49 : bInTitle(false),
50 : nInTable(0),
51 : nInCell(0),
52 1 : nDefListLevel(0)
53 : {
54 : DBG_ASSERT( RTL_TEXTENCODING_DONTKNOW == GetSrcEncoding( ), "EditHTMLParser::EditHTMLParser: Where does the encoding come from?" );
55 : DBG_ASSERT( !IsSwitchToUCS2(), "EditHTMLParser::::EditHTMLParser: Switch to UCS2?" );
56 :
57 : // Altough the real default encoding is ISO8859-1, we use MS-1252
58 : // als default encoding.
59 1 : SetSrcEncoding( GetExtendedCompatibilityTextEncoding( RTL_TEXTENCODING_ISO_8859_1 ) );
60 :
61 : // If the file starts with a BOM, switch to UCS2.
62 1 : SetSwitchToUCS2( true );
63 :
64 1 : if ( pHTTPHeaderAttrs )
65 1 : SetEncodingByHTTPHeader( pHTTPHeaderAttrs );
66 1 : }
67 :
68 3 : EditHTMLParser::~EditHTMLParser()
69 : {
70 1 : delete pCurAnchor;
71 2 : }
72 :
73 1 : SvParserState EditHTMLParser::CallParser(EditEngine* pEE, const EditPaM& rPaM)
74 : {
75 : DBG_ASSERT(pEE, "CallParser: ImpEditEngine ?!");
76 1 : mpEditEngine = pEE;
77 1 : SvParserState _eState = SVPAR_NOTSTARTED;
78 1 : if ( mpEditEngine )
79 : {
80 : // Build in wrap mimic in RTF import?
81 1 : aCurSel = EditSelection( rPaM, rPaM );
82 :
83 1 : if (mpEditEngine->IsImportHandlerSet())
84 : {
85 1 : ImportInfo aImportInfo(HTMLIMP_START, this, mpEditEngine->CreateESelection(aCurSel));
86 1 : mpEditEngine->CallImportHandler(aImportInfo);
87 : }
88 :
89 1 : ImpSetStyleSheet( 0 );
90 1 : _eState = HTMLParser::CallParser();
91 :
92 1 : if (mpEditEngine->IsImportHandlerSet())
93 : {
94 1 : ImportInfo aImportInfo(HTMLIMP_END, this, mpEditEngine->CreateESelection(aCurSel));
95 1 : mpEditEngine->CallImportHandler(aImportInfo);
96 : }
97 :
98 1 : if ( bFieldsInserted )
99 0 : mpEditEngine->UpdateFieldsOnly();
100 : }
101 1 : return _eState;
102 : }
103 :
104 34 : void EditHTMLParser::NextToken( int nToken )
105 : {
106 : #ifdef DBG_UTIL
107 : HTML_TOKEN_IDS xID = (HTML_TOKEN_IDS)nToken;
108 : (void)xID;
109 : #endif
110 :
111 34 : switch( nToken )
112 : {
113 : case HTML_META:
114 : {
115 0 : const HTMLOptions& aOptions = GetOptions();
116 0 : size_t nArrLen = aOptions.size();
117 0 : bool bEquiv = false;
118 0 : for ( size_t i = 0; i < nArrLen; i++ )
119 : {
120 0 : const HTMLOption& aOption = aOptions[i];
121 0 : switch( aOption.GetToken() )
122 : {
123 : case HTML_O_HTTPEQUIV:
124 : {
125 0 : bEquiv = true;
126 : }
127 0 : break;
128 : case HTML_O_CONTENT:
129 : {
130 0 : if ( bEquiv )
131 : {
132 0 : rtl_TextEncoding eEnc = GetEncodingByMIME( aOption.GetString() );
133 0 : if ( eEnc != RTL_TEXTENCODING_DONTKNOW )
134 0 : SetSrcEncoding( eEnc );
135 : }
136 : }
137 0 : break;
138 : }
139 : }
140 :
141 : }
142 0 : break;
143 : case HTML_PLAINTEXT_ON:
144 : case HTML_PLAINTEXT2_ON:
145 0 : bInPara = true;
146 0 : break;
147 : case HTML_PLAINTEXT_OFF:
148 : case HTML_PLAINTEXT2_OFF:
149 0 : bInPara = false;
150 0 : break;
151 :
152 : case HTML_LINEBREAK:
153 : case HTML_NEWPARA:
154 : {
155 0 : if ( ( bInPara || nInTable ) &&
156 0 : ( ( nToken == HTML_LINEBREAK ) || HasTextInCurrentPara() ) )
157 : {
158 0 : ImpInsertParaBreak();
159 : }
160 : }
161 0 : break;
162 : case HTML_HORZRULE:
163 : {
164 0 : if ( HasTextInCurrentPara() )
165 0 : ImpInsertParaBreak();
166 0 : ImpInsertParaBreak();
167 : }
168 : case HTML_NONBREAKSPACE:
169 : {
170 0 : if ( bInPara )
171 : {
172 0 : ImpInsertText( String( RTL_CONSTASCII_USTRINGPARAM( " " ) ) );
173 : }
174 : }
175 0 : break;
176 : case HTML_RAWDATA:
177 0 : if (IsReadStyle() && aToken.Len())
178 : {
179 : // Each token represents a single line.
180 0 : maStyleSource.append(aToken);
181 0 : maStyleSource.append(sal_Unicode('\n'));
182 : }
183 0 : break;
184 : case HTML_TEXTTOKEN:
185 : {
186 : // #i110937# for <title> content, call aImportHdl (no SkipGroup), but don't insert the text into the EditEngine
187 13 : if (!bInTitle)
188 : {
189 13 : if ( !bInPara )
190 3 : StartPara( false );
191 :
192 13 : String aText = aToken;
193 39 : if ( aText.Len() && ( aText.GetChar( 0 ) == ' ' )
194 20 : && ThrowAwayBlank() && !IsReadPRE() )
195 7 : aText.Erase( 0, 1 );
196 :
197 13 : if ( pCurAnchor )
198 : {
199 0 : pCurAnchor->aText += aText;
200 : }
201 : else
202 : {
203 : // Only written until HTML with 319?
204 13 : if ( IsReadPRE() )
205 : {
206 0 : sal_uInt16 nTabPos = aText.Search( '\t', 0 );
207 0 : while ( nTabPos != STRING_NOTFOUND )
208 : {
209 0 : aText.Erase( nTabPos, 1 );
210 0 : aText.Insert( String( RTL_CONSTASCII_USTRINGPARAM( " " ) ), nTabPos );
211 0 : nTabPos = aText.Search( '\t', nTabPos+8 );
212 : }
213 : }
214 13 : ImpInsertText( aText );
215 13 : }
216 : }
217 : }
218 13 : break;
219 :
220 : case HTML_CENTER_ON:
221 : case HTML_CENTER_OFF:
222 : {
223 0 : sal_Int32 nNode = mpEditEngine->GetEditDoc().GetPos( aCurSel.Max().GetNode() );
224 0 : SfxItemSet aItems( aCurSel.Max().GetNode()->GetContentAttribs().GetItems() );
225 0 : aItems.ClearItem( EE_PARA_JUST );
226 0 : if ( nToken == HTML_CENTER_ON )
227 0 : aItems.Put( SvxAdjustItem( SVX_ADJUST_CENTER, EE_PARA_JUST ) );
228 0 : mpEditEngine->SetParaAttribsOnly(nNode, aItems);
229 : }
230 0 : break;
231 :
232 0 : case HTML_ANCHOR_ON: AnchorStart();
233 0 : break;
234 0 : case HTML_ANCHOR_OFF: AnchorEnd();
235 0 : break;
236 :
237 : case HTML_PARABREAK_ON:
238 0 : if( bInPara && HasTextInCurrentPara() )
239 0 : EndPara( true );
240 0 : StartPara( true );
241 0 : break;
242 :
243 : case HTML_PARABREAK_OFF:
244 0 : if( bInPara )
245 0 : EndPara( true );
246 0 : break;
247 :
248 : case HTML_HEAD1_ON:
249 : case HTML_HEAD2_ON:
250 : case HTML_HEAD3_ON:
251 : case HTML_HEAD4_ON:
252 : case HTML_HEAD5_ON:
253 : case HTML_HEAD6_ON:
254 : {
255 0 : HeadingStart( nToken );
256 : }
257 0 : break;
258 :
259 : case HTML_HEAD1_OFF:
260 : case HTML_HEAD2_OFF:
261 : case HTML_HEAD3_OFF:
262 : case HTML_HEAD4_OFF:
263 : case HTML_HEAD5_OFF:
264 : case HTML_HEAD6_OFF:
265 : {
266 0 : HeadingEnd( nToken );
267 : }
268 0 : break;
269 :
270 : case HTML_PREFORMTXT_ON:
271 : case HTML_XMP_ON:
272 : case HTML_LISTING_ON:
273 : {
274 0 : StartPara( true );
275 0 : ImpSetStyleSheet( STYLE_PRE );
276 : }
277 0 : break;
278 :
279 : case HTML_DEFLIST_ON:
280 : {
281 0 : nDefListLevel++;
282 : }
283 0 : break;
284 :
285 : case HTML_DEFLIST_OFF:
286 : {
287 0 : if( nDefListLevel )
288 0 : nDefListLevel--;
289 : }
290 0 : break;
291 :
292 1 : case HTML_TABLE_ON: nInTable++;
293 1 : break;
294 : case HTML_TABLE_OFF: DBG_ASSERT( nInTable, "Not in Table, but TABLE_OFF?" );
295 1 : nInTable--;
296 1 : break;
297 :
298 : case HTML_TABLEHEADER_ON:
299 : case HTML_TABLEDATA_ON:
300 6 : nInCell++;
301 : // fall through
302 : case HTML_BLOCKQUOTE_ON:
303 : case HTML_BLOCKQUOTE_OFF:
304 : case HTML_BLOCKQUOTE30_ON:
305 : case HTML_BLOCKQUOTE30_OFF:
306 : case HTML_LISTHEADER_ON:
307 : case HTML_LI_ON:
308 : case HTML_DD_ON:
309 : case HTML_DT_ON:
310 : case HTML_ORDERLIST_ON:
311 : case HTML_UNORDERLIST_ON:
312 : {
313 6 : bool bHasText = HasTextInCurrentPara();
314 6 : if ( bHasText )
315 0 : ImpInsertParaBreak();
316 6 : StartPara( false );
317 : }
318 6 : break;
319 :
320 : case HTML_TABLEHEADER_OFF:
321 : case HTML_TABLEDATA_OFF:
322 : {
323 6 : if ( nInCell )
324 6 : nInCell--;
325 : }
326 : // fall through
327 : case HTML_LISTHEADER_OFF:
328 : case HTML_LI_OFF:
329 : case HTML_DD_OFF:
330 : case HTML_DT_OFF:
331 : case HTML_ORDERLIST_OFF:
332 6 : case HTML_UNORDERLIST_OFF: EndPara( false );
333 6 : break;
334 :
335 : case HTML_TABLEROW_ON:
336 : case HTML_TABLEROW_OFF: // A RETURN only after a CELL, for Calc
337 :
338 : case HTML_COL_ON:
339 : case HTML_COLGROUP_ON:
340 4 : case HTML_COLGROUP_OFF: break;
341 :
342 : case HTML_FONT_ON: // ...
343 0 : break;
344 : case HTML_FONT_OFF: // ...
345 0 : break;
346 :
347 : case HTML_TITLE_ON:
348 0 : bInTitle = true;
349 0 : break;
350 : case HTML_TITLE_OFF:
351 0 : bInTitle = false;
352 0 : break;
353 :
354 : // globals
355 : case HTML_HTML_ON:
356 : case HTML_HTML_OFF:
357 : case HTML_STYLE_ON:
358 : case HTML_STYLE_OFF:
359 : case HTML_BODY_ON:
360 : case HTML_BODY_OFF:
361 : case HTML_HEAD_ON:
362 : case HTML_HEAD_OFF:
363 : case HTML_FORM_ON:
364 : case HTML_FORM_OFF:
365 : case HTML_THEAD_ON:
366 : case HTML_THEAD_OFF:
367 : case HTML_TBODY_ON:
368 : case HTML_TBODY_OFF:
369 : // inline elements, structural markup
370 : // HTML 3.0
371 : case HTML_BANNER_ON:
372 : case HTML_BANNER_OFF:
373 : case HTML_DIVISION_ON:
374 : case HTML_DIVISION_OFF:
375 : // case HTML_LISTHEADER_ON: //! special handling
376 : // case HTML_LISTHEADER_OFF:
377 : case HTML_NOTE_ON:
378 : case HTML_NOTE_OFF:
379 : // inline elements, logical markup
380 : // HTML 2.0
381 : case HTML_ADDRESS_ON:
382 : case HTML_ADDRESS_OFF:
383 : // case HTML_BLOCKQUOTE_ON: //! special handling
384 : // case HTML_BLOCKQUOTE_OFF:
385 : case HTML_CITIATION_ON:
386 : case HTML_CITIATION_OFF:
387 : case HTML_CODE_ON:
388 : case HTML_CODE_OFF:
389 : case HTML_DEFINSTANCE_ON:
390 : case HTML_DEFINSTANCE_OFF:
391 : case HTML_EMPHASIS_ON:
392 : case HTML_EMPHASIS_OFF:
393 : case HTML_KEYBOARD_ON:
394 : case HTML_KEYBOARD_OFF:
395 : case HTML_SAMPLE_ON:
396 : case HTML_SAMPLE_OFF:
397 : case HTML_STRIKE_ON:
398 : case HTML_STRIKE_OFF:
399 : case HTML_STRONG_ON:
400 : case HTML_STRONG_OFF:
401 : case HTML_VARIABLE_ON:
402 : case HTML_VARIABLE_OFF:
403 : // HTML 3.0
404 : case HTML_ABBREVIATION_ON:
405 : case HTML_ABBREVIATION_OFF:
406 : case HTML_ACRONYM_ON:
407 : case HTML_ACRONYM_OFF:
408 : case HTML_AUTHOR_ON:
409 : case HTML_AUTHOR_OFF:
410 : // case HTML_BLOCKQUOTE30_ON: //! special handling
411 : // case HTML_BLOCKQUOTE30_OFF:
412 : case HTML_DELETEDTEXT_ON:
413 : case HTML_DELETEDTEXT_OFF:
414 : case HTML_INSERTEDTEXT_ON:
415 : case HTML_INSERTEDTEXT_OFF:
416 : case HTML_LANGUAGE_ON:
417 : case HTML_LANGUAGE_OFF:
418 : case HTML_PERSON_ON:
419 : case HTML_PERSON_OFF:
420 : case HTML_SHORTQUOTE_ON:
421 : case HTML_SHORTQUOTE_OFF:
422 : case HTML_SUBSCRIPT_ON:
423 : case HTML_SUBSCRIPT_OFF:
424 : case HTML_SUPERSCRIPT_ON:
425 : case HTML_SUPERSCRIPT_OFF:
426 : // inline elements, visual markup
427 : // HTML 2.0
428 : case HTML_BOLD_ON:
429 : case HTML_BOLD_OFF:
430 : case HTML_ITALIC_ON:
431 : case HTML_ITALIC_OFF:
432 : case HTML_TELETYPE_ON:
433 : case HTML_TELETYPE_OFF:
434 : case HTML_UNDERLINE_ON:
435 : case HTML_UNDERLINE_OFF:
436 : // HTML 3.0
437 : case HTML_BIGPRINT_ON:
438 : case HTML_BIGPRINT_OFF:
439 : case HTML_STRIKETHROUGH_ON:
440 : case HTML_STRIKETHROUGH_OFF:
441 : case HTML_SMALLPRINT_ON:
442 : case HTML_SMALLPRINT_OFF:
443 : // figures
444 : case HTML_FIGURE_ON:
445 : case HTML_FIGURE_OFF:
446 : case HTML_CAPTION_ON:
447 : case HTML_CAPTION_OFF:
448 : case HTML_CREDIT_ON:
449 : case HTML_CREDIT_OFF:
450 : // misc
451 : case HTML_DIRLIST_ON:
452 : case HTML_DIRLIST_OFF:
453 : case HTML_FOOTNOTE_ON: //! land so im Text
454 : case HTML_FOOTNOTE_OFF:
455 : case HTML_MENULIST_ON:
456 : case HTML_MENULIST_OFF:
457 : // case HTML_PLAINTEXT_ON: //! special handling
458 : // case HTML_PLAINTEXT_OFF:
459 : // case HTML_PREFORMTXT_ON: //! special handling
460 : // case HTML_PREFORMTXT_OFF:
461 : case HTML_SPAN_ON:
462 : case HTML_SPAN_OFF:
463 : // obsolete
464 : // case HTML_XMP_ON: //! special handling
465 : // case HTML_XMP_OFF:
466 : // case HTML_LISTING_ON: //! special handling
467 : // case HTML_LISTING_OFF:
468 : // Netscape
469 : case HTML_BLINK_ON:
470 : case HTML_BLINK_OFF:
471 : case HTML_NOBR_ON:
472 : case HTML_NOBR_OFF:
473 : case HTML_NOEMBED_ON:
474 : case HTML_NOEMBED_OFF:
475 : case HTML_NOFRAMES_ON:
476 : case HTML_NOFRAMES_OFF:
477 : // Internet Explorer
478 : case HTML_MARQUEE_ON:
479 : case HTML_MARQUEE_OFF:
480 : // case HTML_PLAINTEXT2_ON: //! special handling
481 : // case HTML_PLAINTEXT2_OFF:
482 3 : break;
483 :
484 : default:
485 : {
486 0 : if ( nToken & HTML_TOKEN_ONOFF )
487 : {
488 0 : if ( ( nToken == HTML_UNKNOWNCONTROL_ON ) || ( nToken == HTML_UNKNOWNCONTROL_OFF ) )
489 : {
490 : ;
491 : }
492 0 : else if ( !(nToken & 1) )
493 : {
494 : DBG_ASSERT( !( nToken & 1 ), "No Start-Token ?!" );
495 0 : SkipGroup( nToken + 1 );
496 : }
497 : }
498 : }
499 : } // SWITCH
500 :
501 34 : if (mpEditEngine->IsImportHandlerSet())
502 : {
503 34 : ImportInfo aImportInfo(HTMLIMP_NEXTTOKEN, this, mpEditEngine->CreateESelection(aCurSel));
504 34 : aImportInfo.nToken = nToken;
505 34 : aImportInfo.nTokenValue = (short)nTokenValue;
506 34 : if ( nToken == HTML_TEXTTOKEN )
507 13 : aImportInfo.aText = aToken;
508 21 : else if (nToken == HTML_STYLE_OFF)
509 0 : aImportInfo.aText = maStyleSource.makeStringAndClear();
510 34 : mpEditEngine->CallImportHandler(aImportInfo);
511 : }
512 :
513 34 : }
514 :
515 6 : void EditHTMLParser::ImpInsertParaBreak()
516 : {
517 6 : if (mpEditEngine->IsImportHandlerSet())
518 : {
519 6 : ImportInfo aImportInfo(HTMLIMP_INSERTPARA, this, mpEditEngine->CreateESelection(aCurSel));
520 6 : mpEditEngine->CallImportHandler(aImportInfo);
521 : }
522 6 : aCurSel = mpEditEngine->InsertParaBreak(aCurSel);
523 6 : }
524 :
525 0 : void EditHTMLParser::ImpSetAttribs( const SfxItemSet& rItems, EditSelection* pSel )
526 : {
527 : // pSel, when character attributes, otherwise paragraph attributes for
528 : // the current paragraph.
529 : DBG_ASSERT( pSel || ( aCurSel.Min().GetNode() == aCurSel.Max().GetNode() ), "ImpInsertAttribs: Selection?" );
530 :
531 0 : EditPaM aStartPaM( pSel ? pSel->Min() : aCurSel.Min() );
532 0 : EditPaM aEndPaM( pSel ? pSel->Max() : aCurSel.Max() );
533 :
534 0 : if ( !pSel )
535 : {
536 0 : aStartPaM.SetIndex( 0 );
537 0 : aEndPaM.SetIndex( aEndPaM.GetNode()->Len() );
538 : }
539 :
540 0 : if (mpEditEngine->IsImportHandlerSet())
541 : {
542 0 : EditSelection aSel( aStartPaM, aEndPaM );
543 0 : ImportInfo aImportInfo(HTMLIMP_SETATTR, this, mpEditEngine->CreateESelection(aSel));
544 0 : aImportInfo.pAttrs = (void*)&rItems;
545 0 : mpEditEngine->CallImportHandler(aImportInfo);
546 : }
547 :
548 0 : ContentNode* pSN = aStartPaM.GetNode();
549 0 : sal_Int32 nStartNode = mpEditEngine->GetEditDoc().GetPos( pSN );
550 :
551 : // If an attribute goes from 0 to current Paragraph length,
552 : // then it should be a paragraph attribute!
553 :
554 : // Note: Selection can reach over several Paragraphs.
555 : // All complete paragraphs are paragraph attributes ...
556 :
557 : // not really HTML:
558 : #ifdef DBG_UTIL
559 : ContentNode* pEN = aEndPaM.GetNode();
560 : sal_Int32 nEndNode = mpEditEngine->GetEditDoc().GetPos( pEN );
561 : DBG_ASSERT( nStartNode == nEndNode, "ImpSetAttribs: Several paragraphs?" );
562 : #endif
563 :
564 0 : if ( ( aStartPaM.GetIndex() == 0 ) && ( aEndPaM.GetIndex() == aEndPaM.GetNode()->Len() ) )
565 : {
566 : // Has to be merged:
567 0 : SfxItemSet aItems = mpEditEngine->GetBaseParaAttribs(nStartNode);
568 0 : aItems.Put( rItems );
569 0 : mpEditEngine->SetParaAttribsOnly(nStartNode, aItems);
570 : }
571 : else
572 0 : mpEditEngine->SetAttribs( EditSelection( aStartPaM, aEndPaM ), rItems );
573 0 : }
574 :
575 1 : void EditHTMLParser::ImpSetStyleSheet( sal_uInt16 nHLevel )
576 : {
577 : /*
578 : nHLevel: 0: Turn off
579 : 1-6: Heading
580 : STYLE_PRE: Preformatted
581 : */
582 : // Create hard attributes ...
583 : // Enough for Calc, would have to be clarified with StyleSheets
584 : // that they should also be in the app so that when they are feed
585 : // in a different engine still are here ...
586 1 : sal_Int32 nNode = mpEditEngine->GetEditDoc().GetPos( aCurSel.Max().GetNode() );
587 :
588 1 : SfxItemSet aItems( aCurSel.Max().GetNode()->GetContentAttribs().GetItems() );
589 :
590 1 : aItems.ClearItem( EE_PARA_ULSPACE );
591 :
592 1 : aItems.ClearItem( EE_CHAR_FONTHEIGHT );
593 1 : aItems.ClearItem( EE_CHAR_FONTINFO );
594 1 : aItems.ClearItem( EE_CHAR_WEIGHT );
595 :
596 1 : aItems.ClearItem( EE_CHAR_FONTHEIGHT_CJK );
597 1 : aItems.ClearItem( EE_CHAR_FONTINFO_CJK );
598 1 : aItems.ClearItem( EE_CHAR_WEIGHT_CJK );
599 :
600 1 : aItems.ClearItem( EE_CHAR_FONTHEIGHT_CTL );
601 1 : aItems.ClearItem( EE_CHAR_FONTINFO_CTL );
602 1 : aItems.ClearItem( EE_CHAR_WEIGHT_CTL );
603 :
604 : // Bold in the first 3 Headings
605 1 : if ( ( nHLevel >= 1 ) && ( nHLevel <= 3 ) )
606 : {
607 0 : SvxWeightItem aWeightItem( WEIGHT_BOLD, EE_CHAR_WEIGHT );
608 0 : aItems.Put( aWeightItem );
609 :
610 0 : SvxWeightItem aWeightItemCJK( WEIGHT_BOLD, EE_CHAR_WEIGHT_CJK );
611 0 : aItems.Put( aWeightItem );
612 :
613 0 : SvxWeightItem aWeightItemCTL( WEIGHT_BOLD, EE_CHAR_WEIGHT_CTL );
614 0 : aItems.Put( aWeightItem );
615 : }
616 :
617 : // Font hight and margins, when LogicToLogic is possible:
618 1 : MapUnit eUnit = mpEditEngine->GetRefMapMode().GetMapUnit();
619 1 : if ( ( eUnit != MAP_PIXEL ) && ( eUnit != MAP_SYSFONT ) &&
620 1 : ( eUnit != MAP_APPFONT ) && ( eUnit != MAP_RELATIVE ) )
621 : {
622 1 : long nPoints = 10;
623 1 : if ( nHLevel == 1 )
624 0 : nPoints = 22;
625 1 : else if ( nHLevel == 2 )
626 0 : nPoints = 16;
627 1 : else if ( nHLevel == 3 )
628 0 : nPoints = 12;
629 1 : else if ( nHLevel == 4 )
630 0 : nPoints = 11;
631 :
632 1 : nPoints = OutputDevice::LogicToLogic( nPoints, MAP_POINT, eUnit );
633 :
634 1 : SvxFontHeightItem aHeightItem( nPoints, 100, EE_CHAR_FONTHEIGHT );
635 1 : aItems.Put( aHeightItem );
636 :
637 2 : SvxFontHeightItem aHeightItemCJK( nPoints, 100, EE_CHAR_FONTHEIGHT_CJK );
638 1 : aItems.Put( aHeightItemCJK );
639 :
640 2 : SvxFontHeightItem aHeightItemCTL( nPoints, 100, EE_CHAR_FONTHEIGHT_CTL );
641 1 : aItems.Put( aHeightItemCTL );
642 :
643 : // Paragraph margins, when Heading:
644 1 : if ( !nHLevel || ((nHLevel >= 1) && (nHLevel <= 6)) )
645 : {
646 1 : SvxULSpaceItem aULSpaceItem( EE_PARA_ULSPACE );
647 1 : aULSpaceItem.SetUpper( (sal_uInt16)OutputDevice::LogicToLogic( 42, MAP_10TH_MM, eUnit ) );
648 1 : aULSpaceItem.SetLower( (sal_uInt16)OutputDevice::LogicToLogic( 35, MAP_10TH_MM, eUnit ) );
649 1 : aItems.Put( aULSpaceItem );
650 1 : }
651 : }
652 :
653 : // Choose a proportional Font for Pre
654 1 : if ( nHLevel == STYLE_PRE )
655 : {
656 0 : Font aFont = OutputDevice::GetDefaultFont( DEFAULTFONT_FIXED, LANGUAGE_SYSTEM, 0 );
657 0 : SvxFontItem aFontItem( aFont.GetFamily(), aFont.GetName(), XubString(), aFont.GetPitch(), aFont.GetCharSet(), EE_CHAR_FONTINFO );
658 0 : aItems.Put( aFontItem );
659 :
660 0 : SvxFontItem aFontItemCJK( aFont.GetFamily(), aFont.GetName(), XubString(), aFont.GetPitch(), aFont.GetCharSet(), EE_CHAR_FONTINFO_CJK );
661 0 : aItems.Put( aFontItemCJK );
662 :
663 0 : SvxFontItem aFontItemCTL( aFont.GetFamily(), aFont.GetName(), XubString(), aFont.GetPitch(), aFont.GetCharSet(), EE_CHAR_FONTINFO_CTL );
664 0 : aItems.Put( aFontItemCTL );
665 : }
666 :
667 1 : mpEditEngine->SetParaAttribsOnly(nNode, aItems);
668 1 : }
669 :
670 13 : void EditHTMLParser::ImpInsertText( const String& rText )
671 : {
672 13 : String aText( rText );
673 13 : if (mpEditEngine->IsImportHandlerSet())
674 : {
675 13 : ImportInfo aImportInfo(HTMLIMP_INSERTTEXT, this, mpEditEngine->CreateESelection(aCurSel));
676 13 : aImportInfo.aText = aText;
677 13 : mpEditEngine->CallImportHandler(aImportInfo);
678 : }
679 :
680 13 : aCurSel = mpEditEngine->InsertText(aCurSel, aText);
681 13 : }
682 :
683 0 : void EditHTMLParser::SkipGroup( int nEndToken )
684 : {
685 : // groups in cells are closed upon leaving the cell, because those
686 : // ******* web authors don't know their job
687 : // for example: <td><form></td> lacks a closing </form>
688 0 : sal_uInt8 nCellLevel = nInCell;
689 : int nToken;
690 0 : while( nCellLevel <= nInCell && ( (nToken = GetNextToken() ) != nEndToken ) && nToken )
691 : {
692 0 : switch ( nToken )
693 : {
694 : case HTML_TABLEHEADER_ON:
695 : case HTML_TABLEDATA_ON:
696 0 : nInCell++;
697 0 : break;
698 : case HTML_TABLEHEADER_OFF:
699 : case HTML_TABLEDATA_OFF:
700 0 : if ( nInCell )
701 0 : nInCell--;
702 0 : break;
703 : }
704 : }
705 0 : }
706 :
707 9 : void EditHTMLParser::StartPara( bool bReal )
708 : {
709 9 : if ( bReal )
710 : {
711 0 : const HTMLOptions& aOptions = GetOptions();
712 0 : SvxAdjust eAdjust = SVX_ADJUST_LEFT;
713 0 : for ( size_t i = 0, n = aOptions.size(); i < n; ++i )
714 : {
715 0 : const HTMLOption& aOption = aOptions[i];
716 0 : switch( aOption.GetToken() )
717 : {
718 : case HTML_O_ALIGN:
719 : {
720 0 : if ( aOption.GetString().CompareIgnoreCaseToAscii( OOO_STRING_SVTOOLS_HTML_AL_right ) == COMPARE_EQUAL )
721 0 : eAdjust = SVX_ADJUST_RIGHT;
722 0 : else if ( aOption.GetString().CompareIgnoreCaseToAscii( OOO_STRING_SVTOOLS_HTML_AL_middle ) == COMPARE_EQUAL )
723 0 : eAdjust = SVX_ADJUST_CENTER;
724 0 : else if ( aOption.GetString().CompareIgnoreCaseToAscii( OOO_STRING_SVTOOLS_HTML_AL_center ) == COMPARE_EQUAL )
725 0 : eAdjust = SVX_ADJUST_CENTER;
726 : else
727 0 : eAdjust = SVX_ADJUST_LEFT;
728 : }
729 0 : break;
730 : }
731 : }
732 0 : SfxItemSet aItemSet = mpEditEngine->GetEmptyItemSet();
733 0 : aItemSet.Put( SvxAdjustItem( eAdjust, EE_PARA_JUST ) );
734 0 : ImpSetAttribs( aItemSet );
735 : }
736 9 : bInPara = true;
737 9 : }
738 :
739 6 : void EditHTMLParser::EndPara( bool )
740 : {
741 6 : if ( bInPara )
742 : {
743 6 : bool bHasText = HasTextInCurrentPara();
744 6 : if ( bHasText )
745 6 : ImpInsertParaBreak();
746 : }
747 6 : bInPara = false;
748 6 : }
749 :
750 7 : bool EditHTMLParser::ThrowAwayBlank()
751 : {
752 : // A blank must be thrown away if the new text begins with a Blank and
753 : // if the current paragraph is empty or ends with a Blank...
754 7 : ContentNode* pNode = aCurSel.Max().GetNode();
755 7 : if ( pNode->Len() && ( pNode->GetChar( pNode->Len()-1 ) != ' ' ) )
756 0 : return false;
757 7 : return true;
758 : }
759 :
760 12 : bool EditHTMLParser::HasTextInCurrentPara()
761 : {
762 12 : return aCurSel.Max().GetNode()->Len() ? true : false;
763 : }
764 :
765 0 : void EditHTMLParser::AnchorStart()
766 : {
767 : // ignore anchor in anchor
768 0 : if ( !pCurAnchor )
769 : {
770 0 : const HTMLOptions& aOptions = GetOptions();
771 0 : String aRef;
772 :
773 0 : for ( size_t i = 0, n = aOptions.size(); i < n; ++i )
774 : {
775 0 : const HTMLOption& aOption = aOptions[i];
776 0 : switch( aOption.GetToken() )
777 : {
778 : case HTML_O_HREF:
779 0 : aRef = aOption.GetString();
780 0 : break;
781 : }
782 : }
783 :
784 0 : if ( aRef.Len() )
785 : {
786 0 : String aURL = aRef;
787 0 : if ( aURL.Len() && ( aURL.GetChar( 0 ) != '#' ) )
788 : {
789 0 : INetURLObject aTargetURL;
790 0 : INetURLObject aRootURL( aBaseURL );
791 0 : aRootURL.GetNewAbsURL( aRef, &aTargetURL );
792 0 : aURL = aTargetURL.GetMainURL( INetURLObject::DECODE_TO_IURI );
793 : }
794 0 : pCurAnchor = new AnchorInfo;
795 0 : pCurAnchor->aHRef = aURL;
796 0 : }
797 : }
798 0 : }
799 :
800 0 : void EditHTMLParser::AnchorEnd()
801 : {
802 0 : if ( pCurAnchor )
803 : {
804 : // Insert as URL-Field...
805 0 : SvxFieldItem aFld( SvxURLField( pCurAnchor->aHRef, pCurAnchor->aText, SVXURLFORMAT_REPR ), EE_FEATURE_FIELD );
806 0 : aCurSel = mpEditEngine->InsertField(aCurSel, aFld);
807 0 : bFieldsInserted = true;
808 0 : delete pCurAnchor;
809 0 : pCurAnchor = NULL;
810 :
811 0 : if (mpEditEngine->IsImportHandlerSet())
812 : {
813 0 : ImportInfo aImportInfo(HTMLIMP_INSERTFIELD, this, mpEditEngine->CreateESelection(aCurSel));
814 0 : mpEditEngine->CallImportHandler(aImportInfo);
815 0 : }
816 : }
817 0 : }
818 :
819 0 : void EditHTMLParser::HeadingStart( int nToken )
820 : {
821 0 : bWasInPara = bInPara;
822 0 : StartPara( false );
823 :
824 0 : if ( bWasInPara && HasTextInCurrentPara() )
825 0 : ImpInsertParaBreak();
826 :
827 : sal_uInt16 nId = sal::static_int_cast< sal_uInt16 >(
828 0 : 1 + ( ( nToken - HTML_HEAD1_ON ) / 2 ) );
829 : DBG_ASSERT( (nId >= 1) && (nId <= 9), "HeadingStart: ID can not be correct!" );
830 0 : ImpSetStyleSheet( nId );
831 0 : }
832 :
833 0 : void EditHTMLParser::HeadingEnd( int )
834 : {
835 0 : EndPara( false );
836 0 : ImpSetStyleSheet( 0 );
837 :
838 0 : if ( bWasInPara )
839 : {
840 0 : bInPara = true;
841 0 : bWasInPara = false;
842 : }
843 267 : }
844 :
845 : /* vim:set shiftwidth=4 softtabstop=4 expandtab: */
|