Line data Source code
1 : /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 : /*
3 : * This file is part of the LibreOffice project.
4 : *
5 : * This Source Code Form is subject to the terms of the Mozilla Public
6 : * License, v. 2.0. If a copy of the MPL was not distributed with this
7 : * file, You can obtain one at http://mozilla.org/MPL/2.0/.
8 : *
9 : * This file incorporates work covered by the following license notice:
10 : *
11 : * Licensed to the Apache Software Foundation (ASF) under one or more
12 : * contributor license agreements. See the NOTICE file distributed
13 : * with this work for additional information regarding copyright
14 : * ownership. The ASF licenses this file to you under the Apache
15 : * License, Version 2.0 (the "License"); you may not use this file
16 : * except in compliance with the License. You may obtain a copy of
17 : * the License at http://www.apache.org/licenses/LICENSE-2.0 .
18 : */
19 :
20 :
21 : #include <vcl/wrkwin.hxx>
22 : #include <vcl/dialog.hxx>
23 : #include <vcl/msgbox.hxx>
24 : #include <vcl/svapp.hxx>
25 : #include <eehtml.hxx>
26 : #include <editeng/adjitem.hxx>
27 : #include <editeng/flditem.hxx>
28 : #include <tools/urlobj.hxx>
29 : #include <editeng/fhgtitem.hxx>
30 : #include <editeng/fontitem.hxx>
31 : #include <editeng/ulspitem.hxx>
32 : #include <editeng/wghtitem.hxx>
33 : #include <svtools/htmltokn.h>
34 : #include <svtools/htmlkywd.hxx>
35 : #include <tools/tenccvt.hxx>
36 :
37 : #include "editeng/editeng.hxx"
38 :
39 : #define ACTION_INSERTTEXT 1
40 : #define ACTION_INSERTPARABRK 2
41 :
42 : #define STYLE_PRE 101
43 :
44 1 : EditHTMLParser::EditHTMLParser( SvStream& rIn, const String& rBaseURL, SvKeyValueIterator* pHTTPHeaderAttrs )
45 : : HTMLParser( rIn, true ),
46 : aBaseURL( rBaseURL ),
47 : mpEditEngine(NULL),
48 : pCurAnchor(NULL),
49 : bInPara(false),
50 : bWasInPara(false),
51 : bFieldsInserted(false),
52 : bInTitle(false),
53 : nInTable(0),
54 : nInCell(0),
55 1 : nDefListLevel(0)
56 : {
57 : DBG_ASSERT( RTL_TEXTENCODING_DONTKNOW == GetSrcEncoding( ), "EditHTMLParser::EditHTMLParser: Where does the encoding come from?" );
58 : DBG_ASSERT( !IsSwitchToUCS2(), "EditHTMLParser::::EditHTMLParser: Switch to UCS2?" );
59 :
60 : // Altough the real default encoding is ISO8859-1, we use MS-1252
61 : // als default encoding.
62 1 : SetSrcEncoding( GetExtendedCompatibilityTextEncoding( RTL_TEXTENCODING_ISO_8859_1 ) );
63 :
64 : // If the file starts with a BOM, switch to UCS2.
65 1 : SetSwitchToUCS2( true );
66 :
67 1 : if ( pHTTPHeaderAttrs )
68 1 : SetEncodingByHTTPHeader( pHTTPHeaderAttrs );
69 1 : }
70 :
71 3 : EditHTMLParser::~EditHTMLParser()
72 : {
73 1 : delete pCurAnchor;
74 2 : }
75 :
76 1 : SvParserState EditHTMLParser::CallParser(EditEngine* pEE, const EditPaM& rPaM)
77 : {
78 : DBG_ASSERT(pEE, "CallParser: ImpEditEngine ?!");
79 1 : mpEditEngine = pEE;
80 1 : SvParserState _eState = SVPAR_NOTSTARTED;
81 1 : if ( mpEditEngine )
82 : {
83 : // Build in wrap mimic in RTF import?
84 1 : aCurSel = EditSelection( rPaM, rPaM );
85 :
86 1 : if (mpEditEngine->IsImportHandlerSet())
87 : {
88 1 : ImportInfo aImportInfo(HTMLIMP_START, this, mpEditEngine->CreateESelection(aCurSel));
89 1 : mpEditEngine->CallImportHandler(aImportInfo);
90 : }
91 :
92 1 : ImpSetStyleSheet( 0 );
93 1 : _eState = HTMLParser::CallParser();
94 :
95 1 : if (mpEditEngine->IsImportHandlerSet())
96 : {
97 1 : ImportInfo aImportInfo(HTMLIMP_END, this, mpEditEngine->CreateESelection(aCurSel));
98 1 : mpEditEngine->CallImportHandler(aImportInfo);
99 : }
100 :
101 1 : if ( bFieldsInserted )
102 0 : mpEditEngine->UpdateFieldsOnly();
103 : }
104 1 : return _eState;
105 : }
106 :
107 34 : void EditHTMLParser::NextToken( int nToken )
108 : {
109 : #ifdef DBG_UTIL
110 : HTML_TOKEN_IDS xID = (HTML_TOKEN_IDS)nToken;
111 : (void)xID;
112 : #endif
113 :
114 34 : switch( nToken )
115 : {
116 : case HTML_META:
117 : {
118 0 : const HTMLOptions& aOptions = GetOptions();
119 0 : size_t nArrLen = aOptions.size();
120 0 : bool bEquiv = false;
121 0 : for ( size_t i = 0; i < nArrLen; i++ )
122 : {
123 0 : const HTMLOption& aOption = aOptions[i];
124 0 : switch( aOption.GetToken() )
125 : {
126 : case HTML_O_HTTPEQUIV:
127 : {
128 0 : bEquiv = true;
129 : }
130 0 : break;
131 : case HTML_O_CONTENT:
132 : {
133 0 : if ( bEquiv )
134 : {
135 0 : rtl_TextEncoding eEnc = GetEncodingByMIME( aOption.GetString() );
136 0 : if ( eEnc != RTL_TEXTENCODING_DONTKNOW )
137 0 : SetSrcEncoding( eEnc );
138 : }
139 : }
140 0 : break;
141 : }
142 : }
143 :
144 : }
145 0 : break;
146 : case HTML_PLAINTEXT_ON:
147 : case HTML_PLAINTEXT2_ON:
148 0 : bInPara = true;
149 0 : break;
150 : case HTML_PLAINTEXT_OFF:
151 : case HTML_PLAINTEXT2_OFF:
152 0 : bInPara = false;
153 0 : break;
154 :
155 : case HTML_LINEBREAK:
156 : case HTML_NEWPARA:
157 : {
158 0 : if ( ( bInPara || nInTable ) &&
159 0 : ( ( nToken == HTML_LINEBREAK ) || HasTextInCurrentPara() ) )
160 : {
161 0 : ImpInsertParaBreak();
162 : }
163 : }
164 0 : break;
165 : case HTML_HORZRULE:
166 : {
167 0 : if ( HasTextInCurrentPara() )
168 0 : ImpInsertParaBreak();
169 0 : ImpInsertParaBreak();
170 : }
171 : case HTML_NONBREAKSPACE:
172 : {
173 0 : if ( bInPara )
174 : {
175 0 : ImpInsertText( String( RTL_CONSTASCII_USTRINGPARAM( " " ) ) );
176 : }
177 : }
178 0 : break;
179 : case HTML_RAWDATA:
180 0 : if (IsReadStyle() && aToken.Len())
181 : {
182 : // Each token represents a single line.
183 0 : maStyleSource.append(aToken);
184 0 : maStyleSource.append(sal_Unicode('\n'));
185 : }
186 0 : break;
187 : case HTML_TEXTTOKEN:
188 : {
189 : // #i110937# for <title> content, call aImportHdl (no SkipGroup), but don't insert the text into the EditEngine
190 13 : if (!bInTitle)
191 : {
192 13 : if ( !bInPara )
193 3 : StartPara( false );
194 :
195 13 : String aText = aToken;
196 27 : if ( aText.Len() && ( aText.GetChar( 0 ) == ' ' )
197 14 : && ThrowAwayBlank() && !IsReadPRE() )
198 7 : aText.Erase( 0, 1 );
199 :
200 13 : if ( pCurAnchor )
201 : {
202 0 : pCurAnchor->aText += aText;
203 : }
204 : else
205 : {
206 : // Only written until HTML with 319?
207 13 : if ( IsReadPRE() )
208 : {
209 0 : sal_uInt16 nTabPos = aText.Search( '\t', 0 );
210 0 : while ( nTabPos != STRING_NOTFOUND )
211 : {
212 0 : aText.Erase( nTabPos, 1 );
213 0 : aText.Insert( String( RTL_CONSTASCII_USTRINGPARAM( " " ) ), nTabPos );
214 0 : nTabPos = aText.Search( '\t', nTabPos+8 );
215 : }
216 : }
217 13 : ImpInsertText( aText );
218 13 : }
219 : }
220 : }
221 13 : break;
222 :
223 : case HTML_CENTER_ON:
224 : case HTML_CENTER_OFF:
225 : {
226 0 : sal_uInt16 nNode = mpEditEngine->GetEditDoc().GetPos( aCurSel.Max().GetNode() );
227 0 : SfxItemSet aItems( aCurSel.Max().GetNode()->GetContentAttribs().GetItems() );
228 0 : aItems.ClearItem( EE_PARA_JUST );
229 0 : if ( nToken == HTML_CENTER_ON )
230 0 : aItems.Put( SvxAdjustItem( SVX_ADJUST_CENTER, EE_PARA_JUST ) );
231 0 : mpEditEngine->SetParaAttribsOnly(nNode, aItems);
232 : }
233 0 : break;
234 :
235 0 : case HTML_ANCHOR_ON: AnchorStart();
236 0 : break;
237 0 : case HTML_ANCHOR_OFF: AnchorEnd();
238 0 : break;
239 :
240 : case HTML_PARABREAK_ON:
241 0 : if( bInPara && HasTextInCurrentPara() )
242 0 : EndPara( true );
243 0 : StartPara( true );
244 0 : break;
245 :
246 : case HTML_PARABREAK_OFF:
247 0 : if( bInPara )
248 0 : EndPara( true );
249 0 : break;
250 :
251 : case HTML_HEAD1_ON:
252 : case HTML_HEAD2_ON:
253 : case HTML_HEAD3_ON:
254 : case HTML_HEAD4_ON:
255 : case HTML_HEAD5_ON:
256 : case HTML_HEAD6_ON:
257 : {
258 0 : HeadingStart( nToken );
259 : }
260 0 : break;
261 :
262 : case HTML_HEAD1_OFF:
263 : case HTML_HEAD2_OFF:
264 : case HTML_HEAD3_OFF:
265 : case HTML_HEAD4_OFF:
266 : case HTML_HEAD5_OFF:
267 : case HTML_HEAD6_OFF:
268 : {
269 0 : HeadingEnd( nToken );
270 : }
271 0 : break;
272 :
273 : case HTML_PREFORMTXT_ON:
274 : case HTML_XMP_ON:
275 : case HTML_LISTING_ON:
276 : {
277 0 : StartPara( true );
278 0 : ImpSetStyleSheet( STYLE_PRE );
279 : }
280 0 : break;
281 :
282 : case HTML_DEFLIST_ON:
283 : {
284 0 : nDefListLevel++;
285 : }
286 0 : break;
287 :
288 : case HTML_DEFLIST_OFF:
289 : {
290 0 : if( nDefListLevel )
291 0 : nDefListLevel--;
292 : }
293 0 : break;
294 :
295 1 : case HTML_TABLE_ON: nInTable++;
296 1 : break;
297 : case HTML_TABLE_OFF: DBG_ASSERT( nInTable, "Not in Table, but TABLE_OFF?" );
298 1 : nInTable--;
299 1 : break;
300 :
301 : case HTML_TABLEHEADER_ON:
302 : case HTML_TABLEDATA_ON:
303 6 : nInCell++;
304 : // fall through
305 : case HTML_BLOCKQUOTE_ON:
306 : case HTML_BLOCKQUOTE_OFF:
307 : case HTML_BLOCKQUOTE30_ON:
308 : case HTML_BLOCKQUOTE30_OFF:
309 : case HTML_LISTHEADER_ON:
310 : case HTML_LI_ON:
311 : case HTML_DD_ON:
312 : case HTML_DT_ON:
313 : case HTML_ORDERLIST_ON:
314 : case HTML_UNORDERLIST_ON:
315 : {
316 6 : bool bHasText = HasTextInCurrentPara();
317 6 : if ( bHasText )
318 0 : ImpInsertParaBreak();
319 6 : StartPara( false );
320 : }
321 6 : break;
322 :
323 : case HTML_TABLEHEADER_OFF:
324 : case HTML_TABLEDATA_OFF:
325 : {
326 6 : if ( nInCell )
327 6 : nInCell--;
328 : }
329 : // fall through
330 : case HTML_LISTHEADER_OFF:
331 : case HTML_LI_OFF:
332 : case HTML_DD_OFF:
333 : case HTML_DT_OFF:
334 : case HTML_ORDERLIST_OFF:
335 6 : case HTML_UNORDERLIST_OFF: EndPara( false );
336 6 : break;
337 :
338 : case HTML_TABLEROW_ON:
339 : case HTML_TABLEROW_OFF: // A RETURN only after a CELL, for Calc
340 :
341 : case HTML_COL_ON:
342 : case HTML_COLGROUP_ON:
343 4 : case HTML_COLGROUP_OFF: break;
344 :
345 : case HTML_FONT_ON: // ...
346 0 : break;
347 : case HTML_FONT_OFF: // ...
348 0 : break;
349 :
350 : case HTML_TITLE_ON:
351 0 : bInTitle = true;
352 0 : break;
353 : case HTML_TITLE_OFF:
354 0 : bInTitle = false;
355 0 : break;
356 :
357 : // globals
358 : case HTML_HTML_ON:
359 : case HTML_HTML_OFF:
360 : case HTML_STYLE_ON:
361 : case HTML_STYLE_OFF:
362 : case HTML_BODY_ON:
363 : case HTML_BODY_OFF:
364 : case HTML_HEAD_ON:
365 : case HTML_HEAD_OFF:
366 : case HTML_FORM_ON:
367 : case HTML_FORM_OFF:
368 : case HTML_THEAD_ON:
369 : case HTML_THEAD_OFF:
370 : case HTML_TBODY_ON:
371 : case HTML_TBODY_OFF:
372 : // inline elements, structural markup
373 : // HTML 3.0
374 : case HTML_BANNER_ON:
375 : case HTML_BANNER_OFF:
376 : case HTML_DIVISION_ON:
377 : case HTML_DIVISION_OFF:
378 : // case HTML_LISTHEADER_ON: //! special handling
379 : // case HTML_LISTHEADER_OFF:
380 : case HTML_NOTE_ON:
381 : case HTML_NOTE_OFF:
382 : // inline elements, logical markup
383 : // HTML 2.0
384 : case HTML_ADDRESS_ON:
385 : case HTML_ADDRESS_OFF:
386 : // case HTML_BLOCKQUOTE_ON: //! special handling
387 : // case HTML_BLOCKQUOTE_OFF:
388 : case HTML_CITIATION_ON:
389 : case HTML_CITIATION_OFF:
390 : case HTML_CODE_ON:
391 : case HTML_CODE_OFF:
392 : case HTML_DEFINSTANCE_ON:
393 : case HTML_DEFINSTANCE_OFF:
394 : case HTML_EMPHASIS_ON:
395 : case HTML_EMPHASIS_OFF:
396 : case HTML_KEYBOARD_ON:
397 : case HTML_KEYBOARD_OFF:
398 : case HTML_SAMPLE_ON:
399 : case HTML_SAMPLE_OFF:
400 : case HTML_STRIKE_ON:
401 : case HTML_STRIKE_OFF:
402 : case HTML_STRONG_ON:
403 : case HTML_STRONG_OFF:
404 : case HTML_VARIABLE_ON:
405 : case HTML_VARIABLE_OFF:
406 : // HTML 3.0
407 : case HTML_ABBREVIATION_ON:
408 : case HTML_ABBREVIATION_OFF:
409 : case HTML_ACRONYM_ON:
410 : case HTML_ACRONYM_OFF:
411 : case HTML_AUTHOR_ON:
412 : case HTML_AUTHOR_OFF:
413 : // case HTML_BLOCKQUOTE30_ON: //! special handling
414 : // case HTML_BLOCKQUOTE30_OFF:
415 : case HTML_DELETEDTEXT_ON:
416 : case HTML_DELETEDTEXT_OFF:
417 : case HTML_INSERTEDTEXT_ON:
418 : case HTML_INSERTEDTEXT_OFF:
419 : case HTML_LANGUAGE_ON:
420 : case HTML_LANGUAGE_OFF:
421 : case HTML_PERSON_ON:
422 : case HTML_PERSON_OFF:
423 : case HTML_SHORTQUOTE_ON:
424 : case HTML_SHORTQUOTE_OFF:
425 : case HTML_SUBSCRIPT_ON:
426 : case HTML_SUBSCRIPT_OFF:
427 : case HTML_SUPERSCRIPT_ON:
428 : case HTML_SUPERSCRIPT_OFF:
429 : // inline elements, visual markup
430 : // HTML 2.0
431 : case HTML_BOLD_ON:
432 : case HTML_BOLD_OFF:
433 : case HTML_ITALIC_ON:
434 : case HTML_ITALIC_OFF:
435 : case HTML_TELETYPE_ON:
436 : case HTML_TELETYPE_OFF:
437 : case HTML_UNDERLINE_ON:
438 : case HTML_UNDERLINE_OFF:
439 : // HTML 3.0
440 : case HTML_BIGPRINT_ON:
441 : case HTML_BIGPRINT_OFF:
442 : case HTML_STRIKETHROUGH_ON:
443 : case HTML_STRIKETHROUGH_OFF:
444 : case HTML_SMALLPRINT_ON:
445 : case HTML_SMALLPRINT_OFF:
446 : // figures
447 : case HTML_FIGURE_ON:
448 : case HTML_FIGURE_OFF:
449 : case HTML_CAPTION_ON:
450 : case HTML_CAPTION_OFF:
451 : case HTML_CREDIT_ON:
452 : case HTML_CREDIT_OFF:
453 : // misc
454 : case HTML_DIRLIST_ON:
455 : case HTML_DIRLIST_OFF:
456 : case HTML_FOOTNOTE_ON: //! land so im Text
457 : case HTML_FOOTNOTE_OFF:
458 : case HTML_MENULIST_ON:
459 : case HTML_MENULIST_OFF:
460 : // case HTML_PLAINTEXT_ON: //! special handling
461 : // case HTML_PLAINTEXT_OFF:
462 : // case HTML_PREFORMTXT_ON: //! special handling
463 : // case HTML_PREFORMTXT_OFF:
464 : case HTML_SPAN_ON:
465 : case HTML_SPAN_OFF:
466 : // obsolete
467 : // case HTML_XMP_ON: //! special handling
468 : // case HTML_XMP_OFF:
469 : // case HTML_LISTING_ON: //! special handling
470 : // case HTML_LISTING_OFF:
471 : // Netscape
472 : case HTML_BLINK_ON:
473 : case HTML_BLINK_OFF:
474 : case HTML_NOBR_ON:
475 : case HTML_NOBR_OFF:
476 : case HTML_NOEMBED_ON:
477 : case HTML_NOEMBED_OFF:
478 : case HTML_NOFRAMES_ON:
479 : case HTML_NOFRAMES_OFF:
480 : // Internet Explorer
481 : case HTML_MARQUEE_ON:
482 : case HTML_MARQUEE_OFF:
483 : // case HTML_PLAINTEXT2_ON: //! special handling
484 : // case HTML_PLAINTEXT2_OFF:
485 3 : break;
486 :
487 : default:
488 : {
489 0 : if ( nToken & HTML_TOKEN_ONOFF )
490 : {
491 0 : if ( ( nToken == HTML_UNKNOWNCONTROL_ON ) || ( nToken == HTML_UNKNOWNCONTROL_OFF ) )
492 : {
493 : ;
494 : }
495 0 : else if ( !(nToken & 1) )
496 : {
497 : DBG_ASSERT( !( nToken & 1 ), "No Start-Token ?!" );
498 0 : SkipGroup( nToken + 1 );
499 : }
500 : }
501 : }
502 : } // SWITCH
503 :
504 34 : if (mpEditEngine->IsImportHandlerSet())
505 : {
506 34 : ImportInfo aImportInfo(HTMLIMP_NEXTTOKEN, this, mpEditEngine->CreateESelection(aCurSel));
507 34 : aImportInfo.nToken = nToken;
508 34 : aImportInfo.nTokenValue = (short)nTokenValue;
509 34 : if ( nToken == HTML_TEXTTOKEN )
510 13 : aImportInfo.aText = aToken;
511 21 : else if (nToken == HTML_STYLE_OFF)
512 0 : aImportInfo.aText = maStyleSource.makeStringAndClear();
513 34 : mpEditEngine->CallImportHandler(aImportInfo);
514 : }
515 :
516 34 : }
517 :
518 6 : void EditHTMLParser::ImpInsertParaBreak()
519 : {
520 6 : if (mpEditEngine->IsImportHandlerSet())
521 : {
522 6 : ImportInfo aImportInfo(HTMLIMP_INSERTPARA, this, mpEditEngine->CreateESelection(aCurSel));
523 6 : mpEditEngine->CallImportHandler(aImportInfo);
524 : }
525 6 : aCurSel = mpEditEngine->InsertParaBreak(aCurSel);
526 6 : }
527 :
528 0 : void EditHTMLParser::ImpSetAttribs( const SfxItemSet& rItems, EditSelection* pSel )
529 : {
530 : // pSel, when character attributes, otherwise paragraph attributes for
531 : // the current paragraph.
532 : DBG_ASSERT( pSel || ( aCurSel.Min().GetNode() == aCurSel.Max().GetNode() ), "ImpInsertAttribs: Selection?" );
533 :
534 0 : EditPaM aStartPaM( pSel ? pSel->Min() : aCurSel.Min() );
535 0 : EditPaM aEndPaM( pSel ? pSel->Max() : aCurSel.Max() );
536 :
537 0 : if ( !pSel )
538 : {
539 0 : aStartPaM.SetIndex( 0 );
540 0 : aEndPaM.SetIndex( aEndPaM.GetNode()->Len() );
541 : }
542 :
543 0 : if (mpEditEngine->IsImportHandlerSet())
544 : {
545 0 : EditSelection aSel( aStartPaM, aEndPaM );
546 0 : ImportInfo aImportInfo(HTMLIMP_SETATTR, this, mpEditEngine->CreateESelection(aSel));
547 0 : aImportInfo.pAttrs = (void*)&rItems;
548 0 : mpEditEngine->CallImportHandler(aImportInfo);
549 : }
550 :
551 0 : ContentNode* pSN = aStartPaM.GetNode();
552 0 : sal_uInt16 nStartNode = mpEditEngine->GetEditDoc().GetPos( pSN );
553 :
554 : // If an attribute goes from 0 to current Paragraph length,
555 : // then it should be a paragraph attribute!
556 :
557 : // Note: Selection can reach over several Paragraphs.
558 : // All complete paragraphs are paragraph attributes ...
559 :
560 : // not really HTML:
561 : #ifdef DBG_UTIL
562 : ContentNode* pEN = aEndPaM.GetNode();
563 : sal_uInt16 nEndNode = mpEditEngine->GetEditDoc().GetPos( pEN );
564 : DBG_ASSERT( nStartNode == nEndNode, "ImpSetAttribs: Several paragraphs?" );
565 : #endif
566 :
567 0 : if ( ( aStartPaM.GetIndex() == 0 ) && ( aEndPaM.GetIndex() == aEndPaM.GetNode()->Len() ) )
568 : {
569 : // Has to be merged:
570 0 : SfxItemSet aItems = mpEditEngine->GetBaseParaAttribs(nStartNode);
571 0 : aItems.Put( rItems );
572 0 : mpEditEngine->SetParaAttribsOnly(nStartNode, aItems);
573 : }
574 : else
575 0 : mpEditEngine->SetAttribs( EditSelection( aStartPaM, aEndPaM ), rItems );
576 0 : }
577 :
578 1 : void EditHTMLParser::ImpSetStyleSheet( sal_uInt16 nHLevel )
579 : {
580 : /*
581 : nHLevel: 0: Turn off
582 : 1-6: Heading
583 : STYLE_PRE: Preformatted
584 : */
585 : // Create hard attributes ...
586 : // Enough for Calc, would have to be clarified with StyleSheets
587 : // that they should also be in the app so that when they are feed
588 : // in a different engine still are here ...
589 1 : sal_uInt16 nNode = mpEditEngine->GetEditDoc().GetPos( aCurSel.Max().GetNode() );
590 :
591 1 : SfxItemSet aItems( aCurSel.Max().GetNode()->GetContentAttribs().GetItems() );
592 :
593 1 : aItems.ClearItem( EE_PARA_ULSPACE );
594 :
595 1 : aItems.ClearItem( EE_CHAR_FONTHEIGHT );
596 1 : aItems.ClearItem( EE_CHAR_FONTINFO );
597 1 : aItems.ClearItem( EE_CHAR_WEIGHT );
598 :
599 1 : aItems.ClearItem( EE_CHAR_FONTHEIGHT_CJK );
600 1 : aItems.ClearItem( EE_CHAR_FONTINFO_CJK );
601 1 : aItems.ClearItem( EE_CHAR_WEIGHT_CJK );
602 :
603 1 : aItems.ClearItem( EE_CHAR_FONTHEIGHT_CTL );
604 1 : aItems.ClearItem( EE_CHAR_FONTINFO_CTL );
605 1 : aItems.ClearItem( EE_CHAR_WEIGHT_CTL );
606 :
607 : // Bold in the first 3 Headings
608 1 : if ( ( nHLevel >= 1 ) && ( nHLevel <= 3 ) )
609 : {
610 0 : SvxWeightItem aWeightItem( WEIGHT_BOLD, EE_CHAR_WEIGHT );
611 0 : aItems.Put( aWeightItem );
612 :
613 0 : SvxWeightItem aWeightItemCJK( WEIGHT_BOLD, EE_CHAR_WEIGHT_CJK );
614 0 : aItems.Put( aWeightItem );
615 :
616 0 : SvxWeightItem aWeightItemCTL( WEIGHT_BOLD, EE_CHAR_WEIGHT_CTL );
617 0 : aItems.Put( aWeightItem );
618 : }
619 :
620 : // Font hight and margins, when LogicToLogic is possible:
621 1 : MapUnit eUnit = mpEditEngine->GetRefMapMode().GetMapUnit();
622 1 : if ( ( eUnit != MAP_PIXEL ) && ( eUnit != MAP_SYSFONT ) &&
623 : ( eUnit != MAP_APPFONT ) && ( eUnit != MAP_RELATIVE ) )
624 : {
625 1 : long nPoints = 10;
626 1 : if ( nHLevel == 1 )
627 0 : nPoints = 22;
628 1 : else if ( nHLevel == 2 )
629 0 : nPoints = 16;
630 1 : else if ( nHLevel == 3 )
631 0 : nPoints = 12;
632 1 : else if ( nHLevel == 4 )
633 0 : nPoints = 11;
634 :
635 1 : nPoints = OutputDevice::LogicToLogic( nPoints, MAP_POINT, eUnit );
636 :
637 1 : SvxFontHeightItem aHeightItem( nPoints, 100, EE_CHAR_FONTHEIGHT );
638 1 : aItems.Put( aHeightItem );
639 :
640 1 : SvxFontHeightItem aHeightItemCJK( nPoints, 100, EE_CHAR_FONTHEIGHT_CJK );
641 1 : aItems.Put( aHeightItemCJK );
642 :
643 1 : SvxFontHeightItem aHeightItemCTL( nPoints, 100, EE_CHAR_FONTHEIGHT_CTL );
644 1 : aItems.Put( aHeightItemCTL );
645 :
646 : // Paragraph margins, when Heading:
647 1 : if ( !nHLevel || ((nHLevel >= 1) && (nHLevel <= 6)) )
648 : {
649 1 : SvxULSpaceItem aULSpaceItem( EE_PARA_ULSPACE );
650 1 : aULSpaceItem.SetUpper( (sal_uInt16)OutputDevice::LogicToLogic( 42, MAP_10TH_MM, eUnit ) );
651 1 : aULSpaceItem.SetLower( (sal_uInt16)OutputDevice::LogicToLogic( 35, MAP_10TH_MM, eUnit ) );
652 1 : aItems.Put( aULSpaceItem );
653 1 : }
654 : }
655 :
656 : // Choose a proportional Font for Pre
657 1 : if ( nHLevel == STYLE_PRE )
658 : {
659 0 : Font aFont = OutputDevice::GetDefaultFont( DEFAULTFONT_FIXED, LANGUAGE_SYSTEM, 0 );
660 0 : SvxFontItem aFontItem( aFont.GetFamily(), aFont.GetName(), XubString(), aFont.GetPitch(), aFont.GetCharSet(), EE_CHAR_FONTINFO );
661 0 : aItems.Put( aFontItem );
662 :
663 0 : SvxFontItem aFontItemCJK( aFont.GetFamily(), aFont.GetName(), XubString(), aFont.GetPitch(), aFont.GetCharSet(), EE_CHAR_FONTINFO_CJK );
664 0 : aItems.Put( aFontItemCJK );
665 :
666 0 : SvxFontItem aFontItemCTL( aFont.GetFamily(), aFont.GetName(), XubString(), aFont.GetPitch(), aFont.GetCharSet(), EE_CHAR_FONTINFO_CTL );
667 0 : aItems.Put( aFontItemCTL );
668 : }
669 :
670 1 : mpEditEngine->SetParaAttribsOnly(nNode, aItems);
671 1 : }
672 :
673 13 : void EditHTMLParser::ImpInsertText( const String& rText )
674 : {
675 13 : String aText( rText );
676 13 : if (mpEditEngine->IsImportHandlerSet())
677 : {
678 13 : ImportInfo aImportInfo(HTMLIMP_INSERTTEXT, this, mpEditEngine->CreateESelection(aCurSel));
679 13 : aImportInfo.aText = aText;
680 13 : mpEditEngine->CallImportHandler(aImportInfo);
681 : }
682 :
683 13 : aCurSel = mpEditEngine->InsertText(aCurSel, aText);
684 13 : }
685 :
686 0 : void EditHTMLParser::SkipGroup( int nEndToken )
687 : {
688 : // groups in cells are closed upon leaving the cell, because those
689 : // ******* web authors don't know their job
690 : // for example: <td><form></td> lacks a closing </form>
691 0 : sal_uInt8 nCellLevel = nInCell;
692 : int nToken;
693 0 : while( nCellLevel <= nInCell && ( (nToken = GetNextToken() ) != nEndToken ) && nToken )
694 : {
695 0 : switch ( nToken )
696 : {
697 : case HTML_TABLEHEADER_ON:
698 : case HTML_TABLEDATA_ON:
699 0 : nInCell++;
700 0 : break;
701 : case HTML_TABLEHEADER_OFF:
702 : case HTML_TABLEDATA_OFF:
703 0 : if ( nInCell )
704 0 : nInCell--;
705 0 : break;
706 : }
707 : }
708 0 : }
709 :
710 9 : void EditHTMLParser::StartPara( bool bReal )
711 : {
712 9 : if ( bReal )
713 : {
714 0 : const HTMLOptions& aOptions = GetOptions();
715 0 : SvxAdjust eAdjust = SVX_ADJUST_LEFT;
716 0 : for ( size_t i = 0, n = aOptions.size(); i < n; ++i )
717 : {
718 0 : const HTMLOption& aOption = aOptions[i];
719 0 : switch( aOption.GetToken() )
720 : {
721 : case HTML_O_ALIGN:
722 : {
723 0 : if ( aOption.GetString().CompareIgnoreCaseToAscii( OOO_STRING_SVTOOLS_HTML_AL_right ) == COMPARE_EQUAL )
724 0 : eAdjust = SVX_ADJUST_RIGHT;
725 0 : else if ( aOption.GetString().CompareIgnoreCaseToAscii( OOO_STRING_SVTOOLS_HTML_AL_middle ) == COMPARE_EQUAL )
726 0 : eAdjust = SVX_ADJUST_CENTER;
727 0 : else if ( aOption.GetString().CompareIgnoreCaseToAscii( OOO_STRING_SVTOOLS_HTML_AL_center ) == COMPARE_EQUAL )
728 0 : eAdjust = SVX_ADJUST_CENTER;
729 : else
730 0 : eAdjust = SVX_ADJUST_LEFT;
731 : }
732 0 : break;
733 : }
734 : }
735 0 : SfxItemSet aItemSet = mpEditEngine->GetEmptyItemSet();
736 0 : aItemSet.Put( SvxAdjustItem( eAdjust, EE_PARA_JUST ) );
737 0 : ImpSetAttribs( aItemSet );
738 : }
739 9 : bInPara = true;
740 9 : }
741 :
742 6 : void EditHTMLParser::EndPara( bool )
743 : {
744 6 : if ( bInPara )
745 : {
746 6 : bool bHasText = HasTextInCurrentPara();
747 6 : if ( bHasText )
748 6 : ImpInsertParaBreak();
749 : }
750 6 : bInPara = false;
751 6 : }
752 :
753 7 : bool EditHTMLParser::ThrowAwayBlank()
754 : {
755 : // A blank must be thrown away if the new text begins with a Blank and
756 : // if the current paragraph is empty or ends with a Blank...
757 7 : ContentNode* pNode = aCurSel.Max().GetNode();
758 7 : if ( pNode->Len() && ( pNode->GetChar( pNode->Len()-1 ) != ' ' ) )
759 0 : return false;
760 7 : return true;
761 : }
762 :
763 12 : bool EditHTMLParser::HasTextInCurrentPara()
764 : {
765 12 : return aCurSel.Max().GetNode()->Len() ? true : false;
766 : }
767 :
768 0 : void EditHTMLParser::AnchorStart()
769 : {
770 : // ignore anchor in anchor
771 0 : if ( !pCurAnchor )
772 : {
773 0 : const HTMLOptions& aOptions = GetOptions();
774 0 : String aRef;
775 :
776 0 : for ( size_t i = 0, n = aOptions.size(); i < n; ++i )
777 : {
778 0 : const HTMLOption& aOption = aOptions[i];
779 0 : switch( aOption.GetToken() )
780 : {
781 : case HTML_O_HREF:
782 0 : aRef = aOption.GetString();
783 0 : break;
784 : }
785 : }
786 :
787 0 : if ( aRef.Len() )
788 : {
789 0 : String aURL = aRef;
790 0 : if ( aURL.Len() && ( aURL.GetChar( 0 ) != '#' ) )
791 : {
792 0 : INetURLObject aTargetURL;
793 0 : INetURLObject aRootURL( aBaseURL );
794 0 : aRootURL.GetNewAbsURL( aRef, &aTargetURL );
795 0 : aURL = aTargetURL.GetMainURL( INetURLObject::DECODE_TO_IURI );
796 : }
797 0 : pCurAnchor = new AnchorInfo;
798 0 : pCurAnchor->aHRef = aURL;
799 0 : }
800 : }
801 0 : }
802 :
803 0 : void EditHTMLParser::AnchorEnd()
804 : {
805 0 : if ( pCurAnchor )
806 : {
807 : // Insert as URL-Field...
808 0 : SvxFieldItem aFld( SvxURLField( pCurAnchor->aHRef, pCurAnchor->aText, SVXURLFORMAT_REPR ), EE_FEATURE_FIELD );
809 0 : aCurSel = mpEditEngine->InsertField(aCurSel, aFld);
810 0 : bFieldsInserted = true;
811 0 : delete pCurAnchor;
812 0 : pCurAnchor = NULL;
813 :
814 0 : if (mpEditEngine->IsImportHandlerSet())
815 : {
816 0 : ImportInfo aImportInfo(HTMLIMP_INSERTFIELD, this, mpEditEngine->CreateESelection(aCurSel));
817 0 : mpEditEngine->CallImportHandler(aImportInfo);
818 0 : }
819 : }
820 0 : }
821 :
822 0 : void EditHTMLParser::HeadingStart( int nToken )
823 : {
824 0 : bWasInPara = bInPara;
825 0 : StartPara( false );
826 :
827 0 : if ( bWasInPara && HasTextInCurrentPara() )
828 0 : ImpInsertParaBreak();
829 :
830 : sal_uInt16 nId = sal::static_int_cast< sal_uInt16 >(
831 0 : 1 + ( ( nToken - HTML_HEAD1_ON ) / 2 ) );
832 : DBG_ASSERT( (nId >= 1) && (nId <= 9), "HeadingStart: ID can not be correct!" );
833 0 : ImpSetStyleSheet( nId );
834 0 : }
835 :
836 0 : void EditHTMLParser::HeadingEnd( int )
837 : {
838 0 : EndPara( false );
839 0 : ImpSetStyleSheet( 0 );
840 :
841 0 : if ( bWasInPara )
842 : {
843 0 : bInPara = true;
844 0 : bWasInPara = false;
845 : }
846 0 : }
847 :
848 : /* vim:set shiftwidth=4 softtabstop=4 expandtab: */
|