LCOV - code coverage report
Current view: top level - sw/source/filter/ascii - parasc.cxx (source / functions) Hit Total Coverage
Test: commit e02a6cb2c3e2b23b203b422e4e0680877f232636 Lines: 0 229 0.0 %
Date: 2014-04-14 Functions: 0 6 0.0 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
       2             : /*
       3             :  * This file is part of the LibreOffice project.
       4             :  *
       5             :  * This Source Code Form is subject to the terms of the Mozilla Public
       6             :  * License, v. 2.0. If a copy of the MPL was not distributed with this
       7             :  * file, You can obtain one at http://mozilla.org/MPL/2.0/.
       8             :  *
       9             :  * This file incorporates work covered by the following license notice:
      10             :  *
      11             :  *   Licensed to the Apache Software Foundation (ASF) under one or more
      12             :  *   contributor license agreements. See the NOTICE file distributed
      13             :  *   with this work for additional information regarding copyright
      14             :  *   ownership. The ASF licenses this file to you under the Apache
      15             :  *   License, Version 2.0 (the "License"); you may not use this file
      16             :  *   except in compliance with the License. You may obtain a copy of
      17             :  *   the License at http://www.apache.org/licenses/LICENSE-2.0 .
      18             :  */
      19             : 
      20             : #include <boost/scoped_array.hpp>
      21             : #include <tools/stream.hxx>
      22             : #include <hintids.hxx>
      23             : #include <rtl/tencinfo.h>
      24             : #include <sfx2/printer.hxx>
      25             : #include <editeng/fontitem.hxx>
      26             : #include <editeng/langitem.hxx>
      27             : #include <editeng/formatbreakitem.hxx>
      28             : #include <editeng/scripttypeitem.hxx>
      29             : #include <shellio.hxx>
      30             : #include <doc.hxx>
      31             : #include <swtypes.hxx>
      32             : #include <ndtxt.hxx>
      33             : #include <pam.hxx>
      34             : #include <frmatr.hxx>
      35             : #include <fltini.hxx>
      36             : #include <pagedesc.hxx>
      37             : #include <breakit.hxx>
      38             : #include <swerror.h>
      39             : #include <statstr.hrc>
      40             : #include <mdiexp.hxx>
      41             : #include <poolfmt.hxx>
      42             : 
      43             : #include "vcl/metric.hxx"
      44             : 
      45             : #define ASC_BUFFLEN 4096
      46             : 
      47             : class SwASCIIParser
      48             : {
      49             :     SwDoc* pDoc;
      50             :     SwPaM* pPam;
      51             :     SvStream& rInput;
      52             :     sal_Char* pArr;
      53             :     const SwAsciiOptions& rOpt;
      54             :     SfxItemSet* pItemSet;
      55             :     long nFileSize;
      56             :     sal_uInt16 nScript;
      57             :     bool bNewDoc;
      58             : 
      59             :     sal_uLong ReadChars();
      60             :     void InsertText( const OUString& rStr );
      61             : 
      62             : public:
      63             :     SwASCIIParser( SwDoc* pD, const SwPaM& rCrsr, SvStream& rIn,
      64             :                             bool bReadNewDoc, const SwAsciiOptions& rOpts );
      65             :     ~SwASCIIParser();
      66             : 
      67             :     sal_uLong CallParser();
      68             : };
      69             : 
      70             : // Call for the general reader interface
      71           0 : sal_uLong AsciiReader::Read( SwDoc &rDoc, const OUString&, SwPaM &rPam, const OUString & )
      72             : {
      73           0 :     if( !pStrm )
      74             :     {
      75             :         OSL_ENSURE( !this, "ASCII read without a stream" );
      76           0 :         return ERR_SWG_READ_ERROR;
      77             :     }
      78             : 
      79             :     SwASCIIParser* pParser = new SwASCIIParser( &rDoc, rPam, *pStrm,
      80           0 :                                         !bInsertMode, aOpt.GetASCIIOpts() );
      81           0 :     sal_uLong nRet = pParser->CallParser();
      82             : 
      83           0 :     delete pParser;
      84             :     // after Read reset the options
      85           0 :     aOpt.ResetASCIIOpts();
      86           0 :     return nRet;
      87             : }
      88             : 
      89           0 : SwASCIIParser::SwASCIIParser(SwDoc* pD, const SwPaM& rCrsr, SvStream& rIn,
      90             :     bool bReadNewDoc, const SwAsciiOptions& rOpts)
      91             :     : pDoc(pD), rInput(rIn), rOpt(rOpts), nFileSize(0), nScript(0)
      92           0 :     , bNewDoc(bReadNewDoc)
      93             : {
      94           0 :     pPam = new SwPaM( *rCrsr.GetPoint() );
      95           0 :     pArr = new sal_Char [ ASC_BUFFLEN + 2 ];
      96             : 
      97           0 :     pItemSet = new SfxItemSet( pDoc->GetAttrPool(),
      98             :                 RES_CHRATR_FONT,        RES_CHRATR_LANGUAGE,
      99             :                 RES_CHRATR_CJK_FONT,    RES_CHRATR_CJK_LANGUAGE,
     100             :                 RES_CHRATR_CTL_FONT,    RES_CHRATR_CTL_LANGUAGE,
     101           0 :                 0 );
     102             : 
     103             :     // set defaults from the options
     104           0 :     if( rOpt.GetLanguage() )
     105             :     {
     106           0 :         SvxLanguageItem aLang( (LanguageType)rOpt.GetLanguage(),
     107           0 :                                  RES_CHRATR_LANGUAGE );
     108           0 :         pItemSet->Put( aLang );
     109           0 :         pItemSet->Put( aLang, RES_CHRATR_CJK_LANGUAGE );
     110           0 :         pItemSet->Put( aLang, RES_CHRATR_CTL_LANGUAGE );
     111             :     }
     112           0 :     if( !rOpt.GetFontName().isEmpty() )
     113             :     {
     114           0 :         Font aTextFont( rOpt.GetFontName(), Size( 0, 10 ) );
     115           0 :         if( pDoc->getPrinter( false ) )
     116           0 :             aTextFont = pDoc->getPrinter( false )->GetFontMetric( aTextFont );
     117           0 :         SvxFontItem aFont( aTextFont.GetFamily(), aTextFont.GetName(),
     118           0 :                            OUString(), aTextFont.GetPitch(), aTextFont.GetCharSet(), RES_CHRATR_FONT );
     119           0 :         pItemSet->Put( aFont );
     120           0 :         pItemSet->Put( aFont, RES_CHRATR_CJK_FONT );
     121           0 :         pItemSet->Put( aFont, RES_CHRATR_CTL_FONT );
     122             :     }
     123           0 : }
     124             : 
     125           0 : SwASCIIParser::~SwASCIIParser()
     126             : {
     127           0 :     delete pPam;
     128           0 :     delete [] pArr;
     129           0 :     delete pItemSet;
     130           0 : }
     131             : 
     132             : // Calling the parser
     133           0 : sal_uLong SwASCIIParser::CallParser()
     134             : {
     135           0 :     rInput.Seek(STREAM_SEEK_TO_END);
     136           0 :     rInput.ResetError();
     137             : 
     138           0 :     nFileSize = rInput.Tell();
     139           0 :     rInput.Seek(STREAM_SEEK_TO_BEGIN);
     140           0 :     rInput.ResetError();
     141             : 
     142           0 :     ::StartProgress( STR_STATSTR_W4WREAD, 0, nFileSize, pDoc->GetDocShell() );
     143             : 
     144           0 :     SwPaM* pInsPam = 0;
     145           0 :     sal_Int32 nSttCntnt = 0;
     146           0 :     if (!bNewDoc)
     147             :     {
     148           0 :         const SwNodeIndex& rTmp = pPam->GetPoint()->nNode;
     149           0 :         pInsPam = new SwPaM( rTmp, rTmp, 0, -1 );
     150           0 :         nSttCntnt = pPam->GetPoint()->nContent.GetIndex();
     151             :     }
     152             : 
     153           0 :     SwTxtFmtColl *pColl = 0;
     154             : 
     155           0 :     if (bNewDoc)
     156             :     {
     157           0 :         pColl = pDoc->GetTxtCollFromPool(RES_POOLCOLL_HTML_PRE, false);
     158           0 :         if (!pColl)
     159           0 :             pColl = pDoc->GetTxtCollFromPool(RES_POOLCOLL_STANDARD,false);
     160           0 :         if (pColl)
     161           0 :             pDoc->SetTxtFmtColl(*pPam, pColl);
     162             :     }
     163             : 
     164           0 :     sal_uLong nError = ReadChars();
     165             : 
     166           0 :     if( pItemSet )
     167             :     {
     168             :         // set only the attribute, for scanned scripts.
     169           0 :         if( !( SCRIPTTYPE_LATIN & nScript ))
     170             :         {
     171           0 :             pItemSet->ClearItem( RES_CHRATR_FONT );
     172           0 :             pItemSet->ClearItem( RES_CHRATR_LANGUAGE );
     173             :         }
     174           0 :         if( !( SCRIPTTYPE_ASIAN & nScript ))
     175             :         {
     176           0 :             pItemSet->ClearItem( RES_CHRATR_CJK_FONT );
     177           0 :             pItemSet->ClearItem( RES_CHRATR_CJK_LANGUAGE );
     178             :         }
     179           0 :         if( !( SCRIPTTYPE_COMPLEX & nScript ))
     180             :         {
     181           0 :             pItemSet->ClearItem( RES_CHRATR_CTL_FONT );
     182           0 :             pItemSet->ClearItem( RES_CHRATR_CTL_LANGUAGE );
     183             :         }
     184           0 :         if( pItemSet->Count() )
     185             :         {
     186           0 :             if( bNewDoc )
     187             :             {
     188           0 :                 if (pColl)
     189             :                 {
     190             :                     // Using the pool defaults for the font causes significant
     191             :                     // trouble for the HTML filter, because it is not able
     192             :                     // to export the pool defaults (or to be more precise:
     193             :                     // the HTML filter is not able to detect whether a pool
     194             :                     // default has changed or not. Even a comparison with the
     195             :                     // HTMLi template does not work, because the defaults are
     196             :                     // not copied when a new doc is created. The result of
     197             :                     // comparing pool defaults therefor would be that the
     198             :                     // defaults are exported always if the have changed for
     199             :                     // text documents in general. That's not sensible, as well
     200             :                     // as it is not sensible to export them always.
     201             :                     sal_uInt16 aWhichIds[4] =
     202             :                     {
     203             :                         RES_CHRATR_FONT, RES_CHRATR_CJK_FONT,
     204             :                         RES_CHRATR_CTL_FONT, 0
     205           0 :                     };
     206           0 :                     sal_uInt16 *pWhichIds = aWhichIds;
     207           0 :                     while (*pWhichIds)
     208             :                     {
     209             :                         const SfxPoolItem *pItem;
     210           0 :                         if (SFX_ITEM_SET == pItemSet->GetItemState(*pWhichIds,
     211           0 :                             false, &pItem))
     212             :                         {
     213           0 :                             pColl->SetFmtAttr( *pItem );
     214           0 :                             pItemSet->ClearItem( *pWhichIds );
     215             :                         }
     216           0 :                         ++pWhichIds;
     217             :                     }
     218             :                 }
     219           0 :                 if (pItemSet->Count())
     220           0 :                     pDoc->SetDefault(*pItemSet);
     221             :             }
     222           0 :             else if( pInsPam )
     223             :             {
     224             :                 // then set over the insert range the defined attributes
     225           0 :                 *pInsPam->GetMark() = *pPam->GetPoint();
     226           0 :                 pInsPam->GetPoint()->nNode++;
     227           0 :                 pInsPam->GetPoint()->nContent.Assign(
     228           0 :                                     pInsPam->GetCntntNode(), nSttCntnt );
     229             : 
     230             :                 // !!!!!
     231             :                 OSL_ENSURE( !this, "Have to change - hard attr. to para. style" );
     232           0 :                 pDoc->InsertItemSet( *pInsPam, *pItemSet, 0 );
     233             :             }
     234             :         }
     235           0 :         delete pItemSet, pItemSet = 0;
     236             :     }
     237             : 
     238           0 :     delete pInsPam;
     239             : 
     240           0 :     ::EndProgress( pDoc->GetDocShell() );
     241           0 :     return nError;
     242             : }
     243             : 
     244           0 : sal_uLong SwASCIIParser::ReadChars()
     245             : {
     246           0 :     sal_Unicode *pStt = 0, *pEnd = 0, *pLastStt = 0;
     247           0 :     long nReadCnt = 0, nLineLen = 0;
     248           0 :     sal_Unicode cLastCR = 0;
     249           0 :     bool bSwapUnicode = false;
     250             : 
     251           0 :     const SwAsciiOptions *pUseMe=&rOpt;
     252           0 :     SwAsciiOptions aEmpty;
     253           0 :     if (nFileSize >= 2 &&
     254           0 :         aEmpty.GetFontName() == rOpt.GetFontName() &&
     255           0 :         aEmpty.GetCharSet() == rOpt.GetCharSet() &&
     256           0 :         aEmpty.GetLanguage() == rOpt.GetLanguage() &&
     257           0 :         aEmpty.GetParaFlags() == rOpt.GetParaFlags())
     258             :     {
     259             :         sal_uLong nLen, nOrig;
     260           0 :         nOrig = nLen = rInput.Read(pArr, ASC_BUFFLEN);
     261             :         rtl_TextEncoding eCharSet;
     262           0 :         bool bRet = SwIoSystem::IsDetectableText(pArr, nLen, &eCharSet, &bSwapUnicode);
     263             :         OSL_ENSURE(bRet, "Autodetect of text import without nag dialog must "
     264             :             "have failed");
     265           0 :         if (bRet && eCharSet != RTL_TEXTENCODING_DONTKNOW)
     266             :         {
     267           0 :             aEmpty.SetCharSet(eCharSet);
     268           0 :             rInput.SeekRel(-(long(nLen)));
     269             :         }
     270             :         else
     271           0 :             rInput.SeekRel(-(long(nOrig)));
     272           0 :         pUseMe=&aEmpty;
     273             :     }
     274             : 
     275           0 :     rtl_TextToUnicodeConverter hConverter=0;
     276           0 :     rtl_TextToUnicodeContext hContext=0;
     277           0 :     rtl_TextEncoding currentCharSet = pUseMe->GetCharSet();
     278           0 :     if (RTL_TEXTENCODING_UCS2 != currentCharSet)
     279             :     {
     280           0 :         if( currentCharSet == RTL_TEXTENCODING_DONTKNOW )
     281           0 :                 currentCharSet = RTL_TEXTENCODING_ASCII_US;
     282           0 :         hConverter = rtl_createTextToUnicodeConverter( currentCharSet );
     283             :         OSL_ENSURE( hConverter, "no string convert available" );
     284           0 :         if (!hConverter)
     285           0 :             return ERROR_SW_READ_BASE;
     286           0 :         bSwapUnicode = false;
     287           0 :         hContext = rtl_createTextToUnicodeContext( hConverter );
     288             :     }
     289           0 :     else if (pUseMe != &aEmpty)  //Already successfully figured out type
     290             :     {
     291           0 :         rInput.StartReadingUnicodeText( currentCharSet );
     292           0 :         bSwapUnicode = rInput.IsEndianSwap();
     293             :     }
     294             : 
     295           0 :     boost::scoped_array<sal_Unicode> aWork;
     296           0 :     sal_uLong nArrOffset = 0;
     297             : 
     298             :     do {
     299           0 :         if( pStt >= pEnd )
     300             :         {
     301           0 :             if( pLastStt != pStt )
     302           0 :                 InsertText( OUString( pLastStt ));
     303             : 
     304             :             // Read a new block
     305             :             sal_uLong lGCount;
     306           0 :             if( SVSTREAM_OK != rInput.GetError() || 0 == (lGCount =
     307           0 :                         rInput.Read( pArr + nArrOffset,
     308           0 :                                      ASC_BUFFLEN - nArrOffset )))
     309           0 :                 break;      // break from the while loop
     310             : 
     311             :             /*
     312             :             If there was some unconverted bytes on the last cycle then they
     313             :             were put at the beginning of the array, so total bytes available
     314             :             to convert this cycle includes them. If we found 0 following bytes
     315             :             then we ignore the previous partial character.
     316             :             */
     317           0 :             lGCount+=nArrOffset;
     318             : 
     319           0 :             if( hConverter )
     320             :             {
     321             :                 sal_uInt32 nInfo;
     322           0 :                 sal_Size nNewLen = lGCount, nCntBytes;
     323           0 :                 aWork.reset(new sal_Unicode[nNewLen + 1]); // add 1 for '\0'
     324           0 :                 sal_Unicode* pBuf = aWork.get();
     325             : 
     326             :                 nNewLen = rtl_convertTextToUnicode( hConverter, hContext,
     327             :                                 pArr, lGCount, pBuf, nNewLen,
     328             :                                 (
     329             :                                 RTL_TEXTTOUNICODE_FLAGS_UNDEFINED_DEFAULT |
     330             :                                 RTL_TEXTTOUNICODE_FLAGS_MBUNDEFINED_DEFAULT |
     331             :                                 RTL_TEXTTOUNICODE_FLAGS_INVALID_DEFAULT |
     332             :                                 RTL_TEXTTOUNICODE_FLAGS_GLOBAL_SIGNATURE
     333             :                                 ),
     334             :                                 &nInfo,
     335           0 :                                 &nCntBytes );
     336           0 :                 if( 0 != ( nArrOffset = lGCount - nCntBytes ) )
     337           0 :                     memmove( pArr, pArr + nCntBytes, nArrOffset );
     338             : 
     339           0 :                 pStt = pLastStt = aWork.get();
     340           0 :                 pEnd = pStt + nNewLen;
     341             :             }
     342             :             else
     343             :             {
     344           0 :                 pStt = pLastStt = (sal_Unicode*)pArr;
     345           0 :                 pEnd = (sal_Unicode*)(pArr + lGCount);
     346             : 
     347           0 :                 if( bSwapUnicode )
     348             :                 {
     349           0 :                     sal_Char* pF = pArr, *pN = pArr + 1;
     350           0 :                     for( sal_uLong n = 0; n < lGCount; n += 2, pF += 2, pN += 2 )
     351             :                     {
     352           0 :                         sal_Char c = *pF;
     353           0 :                         *pF = *pN;
     354           0 :                         *pN = c;
     355             :                     }
     356             :                 }
     357             :             }
     358             : 
     359           0 :             *pEnd = 0;
     360           0 :             nReadCnt += lGCount;
     361             : 
     362           0 :             ::SetProgressState( nReadCnt, pDoc->GetDocShell() );
     363             : 
     364           0 :             if( cLastCR )
     365             :             {
     366           0 :                 if( 0x0a == *pStt && 0x0d == cLastCR )
     367           0 :                     pLastStt = ++pStt;
     368           0 :                 cLastCR = 0;
     369           0 :                 nLineLen = 0;
     370             :                 // We skip the last one at the end
     371           0 :                 if( !rInput.IsEof() || !(pEnd == pStt ||
     372           0 :                     ( !*pEnd && pEnd == pStt+1 ) ) )
     373           0 :                     pDoc->SplitNode( *pPam->GetPoint(), false );
     374             :             }
     375             :         }
     376             : 
     377           0 :         bool bIns = true, bSplitNode = false;
     378           0 :         switch( *pStt )
     379             :         {
     380             : 
     381           0 :         case 0x0a:  if( LINEEND_LF == pUseMe->GetParaFlags() )
     382             :                     {
     383           0 :                         bIns = false;
     384           0 :                         *pStt = 0;
     385           0 :                         ++pStt;
     386             : 
     387             :                         // We skip the last one at the end
     388           0 :                         if( !rInput.IsEof() || pEnd != pStt )
     389           0 :                             bSplitNode = true;
     390             :                     }
     391           0 :                     break;
     392             : 
     393           0 :         case 0x0d:  if( LINEEND_LF != pUseMe->GetParaFlags() )
     394             :                     {
     395           0 :                         bIns = false;
     396           0 :                         *pStt = 0;
     397           0 :                         ++pStt;
     398             : 
     399           0 :                         bool bChkSplit = false;
     400           0 :                         if( LINEEND_CRLF == pUseMe->GetParaFlags() )
     401             :                         {
     402           0 :                             if( pStt == pEnd )
     403           0 :                                 cLastCR = 0x0d;
     404           0 :                             else if( 0x0a == *pStt )
     405             :                             {
     406           0 :                                 ++pStt;
     407           0 :                                 bChkSplit = true;
     408             :                             }
     409             :                         }
     410             :                         else
     411           0 :                             bChkSplit = true;
     412             : 
     413             :                         // We skip the last one at the end
     414           0 :                         if( bChkSplit && ( !rInput.IsEof() || pEnd != pStt ))
     415           0 :                             bSplitNode = true;
     416             :                     }
     417           0 :                     break;
     418             : 
     419             :         case 0x0c:
     420             :                     {
     421             :                         // Insert a hard page break
     422           0 :                         *pStt++ = 0;
     423           0 :                         if( nLineLen )
     424             :                         {
     425           0 :                             InsertText( OUString( pLastStt ));
     426             :                         }
     427           0 :                         pDoc->SplitNode( *pPam->GetPoint(), false );
     428             :                         pDoc->InsertPoolItem(
     429           0 :                             *pPam, SvxFmtBreakItem( SVX_BREAK_PAGE_BEFORE, RES_BREAK ), 0);
     430           0 :                         pLastStt = pStt;
     431           0 :                         nLineLen = 0;
     432           0 :                         bIns = false;
     433             :                     }
     434           0 :                     break;
     435             : 
     436             :         case 0x1a:
     437           0 :                     if( nReadCnt == nFileSize && pStt+1 == pEnd )
     438           0 :                         *pStt = 0;
     439             :                     else
     440           0 :                         *pStt = '#';        // Replacement visualisation
     441           0 :                     break;
     442             : 
     443           0 :         case '\t':  break;
     444             : 
     445             :         default:
     446           0 :             if( ' ' > *pStt )
     447             :             // Found control char, replace with '#'
     448           0 :                 *pStt = '#';
     449           0 :             break;
     450             :         }
     451             : 
     452           0 :         if( bIns )
     453             :         {
     454           0 :             if( ( nLineLen >= MAX_ASCII_PARA - 100 ) &&
     455           0 :                 ( ( *pStt == ' ' ) || ( nLineLen >= MAX_ASCII_PARA - 1 ) ) )
     456             :             {
     457           0 :                 sal_Unicode c = *pStt;
     458           0 :                 *pStt = 0;
     459           0 :                 InsertText( OUString( pLastStt ));
     460           0 :                 pDoc->SplitNode( *pPam->GetPoint(), false );
     461           0 :                 pLastStt = pStt;
     462           0 :                 nLineLen = 0;
     463           0 :                 *pStt = c;
     464             :             }
     465           0 :             ++pStt;
     466           0 :             ++nLineLen;
     467             :         }
     468           0 :         else if( bSplitNode )
     469             :         {
     470             :             // We found a CR/LF, thus save the text
     471           0 :             InsertText( OUString( pLastStt ));
     472           0 :             pDoc->SplitNode( *pPam->GetPoint(), false );
     473           0 :             pLastStt = pStt;
     474           0 :             nLineLen = 0;
     475             :         }
     476             :     } while(true);
     477             : 
     478           0 :     if( hConverter )
     479             :     {
     480           0 :         rtl_destroyTextToUnicodeContext( hConverter, hContext );
     481           0 :         rtl_destroyTextToUnicodeConverter( hConverter );
     482             :     }
     483           0 :     return 0;
     484             : }
     485             : 
     486           0 : void SwASCIIParser::InsertText( const OUString& rStr )
     487             : {
     488           0 :     pDoc->InsertString( *pPam, rStr );
     489           0 :     pDoc->UpdateRsid( *pPam, rStr.getLength() );
     490           0 :     pDoc->UpdateParRsid( pPam->GetPoint()->nNode.GetNode().GetTxtNode() );
     491             : 
     492           0 :     if( pItemSet && g_pBreakIt && nScript != ( SCRIPTTYPE_LATIN |
     493             :                                              SCRIPTTYPE_ASIAN |
     494             :                                              SCRIPTTYPE_COMPLEX ) )
     495           0 :         nScript |= g_pBreakIt->GetAllScriptsOfText( rStr );
     496           0 : }
     497             : 
     498             : /* vim:set shiftwidth=4 softtabstop=4 expandtab: */

Generated by: LCOV version 1.10