|           Line data    Source code 
       1             : /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
       2             : /*
       3             :  * This file is part of the LibreOffice project.
       4             :  *
       5             :  * This Source Code Form is subject to the terms of the Mozilla Public
       6             :  * License, v. 2.0. If a copy of the MPL was not distributed with this
       7             :  * file, You can obtain one at http://mozilla.org/MPL/2.0/.
       8             :  *
       9             :  * This file incorporates work covered by the following license notice:
      10             :  *
      11             :  *   Licensed to the Apache Software Foundation (ASF) under one or more
      12             :  *   contributor license agreements. See the NOTICE file distributed
      13             :  *   with this work for additional information regarding copyright
      14             :  *   ownership. The ASF licenses this file to you under the Apache
      15             :  *   License, Version 2.0 (the "License"); you may not use this file
      16             :  *   except in compliance with the License. You may obtain a copy of
      17             :  *   the License at http://www.apache.org/licenses/LICENSE-2.0 .
      18             :  */
      19             : 
      20             : #include "stringutil.hxx"
      21             : #include "global.hxx"
      22             : #include "svl/zforlist.hxx"
      23             : 
      24             : #include <rtl/ustrbuf.hxx>
      25             : #include <rtl/strbuf.hxx>
      26             : #include <rtl/math.hxx>
      27             : 
      28           0 : ScSetStringParam::ScSetStringParam() :
      29             :     mpNumFormatter(NULL),
      30             :     mbDetectNumberFormat(true),
      31             :     meSetTextNumFormat(Never),
      32           0 :     mbHandleApostrophe(true)
      33             : {
      34           0 : }
      35             : 
      36           0 : void ScSetStringParam::setTextInput()
      37             : {
      38           0 :     mbDetectNumberFormat = false;
      39           0 :     mbHandleApostrophe = false;
      40           0 :     meSetTextNumFormat = Always;
      41           0 : }
      42             : 
      43           0 : void ScSetStringParam::setNumericInput()
      44             : {
      45           0 :     mbDetectNumberFormat = true;
      46           0 :     mbHandleApostrophe = true;
      47           0 :     meSetTextNumFormat = Never;
      48           0 : }
      49             : 
      50           0 : bool ScStringUtil::parseSimpleNumber(
      51             :     const OUString& rStr, sal_Unicode dsep, sal_Unicode gsep, double& rVal)
      52             : {
      53             :     // Actually almost the entire pre-check is unnecessary and we could call
      54             :     // rtl::math::stringToDouble() just after having exchanged ascii space with
      55             :     // non-breaking space, if it wasn't for check of grouped digits. The NaN
      56             :     // and Inf cases that are accepted by stringToDouble() could be detected
      57             :     // using rtl::math::isFinite() on the result.
      58             : 
      59             :     /* TODO: The grouped digits check isn't even valid for locales that do not
      60             :      * group in thousands ... e.g. Indian locales. But that's something also
      61             :      * the number scanner doesn't implement yet, only the formatter. */
      62             : 
      63           0 :     OUStringBuffer aBuf;
      64             : 
      65           0 :     sal_Int32 i = 0;
      66           0 :     sal_Int32 n = rStr.getLength();
      67           0 :     const sal_Unicode* p = rStr.getStr();
      68           0 :     const sal_Unicode* pLast = p + (n-1);
      69           0 :     sal_Int32 nPosDSep = -1, nPosGSep = -1;
      70           0 :     sal_uInt32 nDigitCount = 0;
      71           0 :     sal_Int32 nPosExponent = -1;
      72             : 
      73             :     // Skip preceding spaces.
      74           0 :     for (i = 0; i < n; ++i, ++p)
      75             :     {
      76           0 :         sal_Unicode c = *p;
      77           0 :         if (c != 0x0020 && c != 0x00A0)
      78             :             // first non-space character.  Exit.
      79           0 :             break;
      80             :     }
      81             : 
      82           0 :     if (i == n)
      83             :         // the whole string is space.  Fail.
      84           0 :         return false;
      85             : 
      86           0 :     n -= i; // Subtract the length of the preceding spaces.
      87             : 
      88             :     // Determine the last non-space character.
      89           0 :     for (; p != pLast; --pLast, --n)
      90             :     {
      91           0 :         sal_Unicode c = *pLast;
      92           0 :         if (c != 0x0020 && c != 0x00A0)
      93             :             // Non space character. Exit.
      94           0 :             break;
      95             :     }
      96             : 
      97           0 :     for (i = 0; i < n; ++i, ++p)
      98             :     {
      99           0 :         sal_Unicode c = *p;
     100           0 :         if (c == 0x0020 && gsep == 0x00A0)
     101             :             // ascii space to unicode space if that is group separator
     102           0 :             c = 0x00A0;
     103             : 
     104           0 :         if ('0' <= c && c <= '9')
     105             :         {
     106             :             // this is a digit.
     107           0 :             aBuf.append(c);
     108           0 :             ++nDigitCount;
     109             :         }
     110           0 :         else if (c == dsep)
     111             :         {
     112             :             // this is a decimal separator.
     113             : 
     114           0 :             if (nPosDSep >= 0)
     115             :                 // a second decimal separator -> not a valid number.
     116           0 :                 return false;
     117             : 
     118           0 :             if (nPosGSep >= 0 && i - nPosGSep != 4)
     119             :                 // the number has a group separator and the decimal sep is not
     120             :                 // positioned correctly.
     121           0 :                 return false;
     122             : 
     123           0 :             nPosDSep = i;
     124           0 :             nPosGSep = -1;
     125           0 :             aBuf.append(c);
     126           0 :             nDigitCount = 0;
     127             :         }
     128           0 :         else if (c == gsep)
     129             :         {
     130             :             // this is a group (thousand) separator.
     131             : 
     132           0 :             if (i == 0)
     133             :                 // not allowed as the first character.
     134           0 :                 return false;
     135             : 
     136           0 :             if (nPosDSep >= 0)
     137             :                 // not allowed after the decimal separator.
     138           0 :                 return false;
     139             : 
     140           0 :             if (nPosGSep >= 0 && nDigitCount != 3)
     141             :                 // must be exactly 3 digits since the last group separator.
     142           0 :                 return false;
     143             : 
     144           0 :             if (nPosExponent >= 0)
     145             :                 // not allowed in exponent.
     146           0 :                 return false;
     147             : 
     148           0 :             nPosGSep = i;
     149           0 :             nDigitCount = 0;
     150             :         }
     151           0 :         else if (c == '-' || c == '+')
     152             :         {
     153             :             // A sign must be the first character if it's given, or immediately
     154             :             // follow the exponent character if present.
     155           0 :             if (i == 0 || (nPosExponent >= 0 && i == nPosExponent + 1))
     156           0 :                 aBuf.append(c);
     157             :             else
     158           0 :                 return false;
     159             :         }
     160           0 :         else if (c == 'E' || c == 'e')
     161             :         {
     162             :             // this is an exponent designator.
     163             : 
     164           0 :             if (nPosExponent >= 0)
     165             :                 // Only one exponent allowed.
     166           0 :                 return false;
     167             : 
     168           0 :             if (nPosGSep >= 0 && nDigitCount != 3)
     169             :                 // must be exactly 3 digits since the last group separator.
     170           0 :                 return false;
     171             : 
     172           0 :             aBuf.append(c);
     173           0 :             nPosExponent = i;
     174           0 :             nPosDSep = -1;
     175           0 :             nPosGSep = -1;
     176           0 :             nDigitCount = 0;
     177             :         }
     178             :         else
     179           0 :             return false;
     180             :     }
     181             : 
     182             :     // finished parsing the number.
     183             : 
     184           0 :     if (nPosGSep >= 0 && nDigitCount != 3)
     185             :         // must be exactly 3 digits since the last group separator.
     186           0 :         return false;
     187             : 
     188           0 :     rtl_math_ConversionStatus eStatus = rtl_math_ConversionStatus_Ok;
     189           0 :     sal_Int32 nParseEnd = 0;
     190           0 :     OUString aString( aBuf.makeStringAndClear());
     191           0 :     rVal = ::rtl::math::stringToDouble( aString, dsep, gsep, &eStatus, &nParseEnd);
     192           0 :     if (eStatus != rtl_math_ConversionStatus_Ok || nParseEnd < aString.getLength())
     193             :         // Not a valid number or not entire string consumed.
     194           0 :         return false;
     195             : 
     196           0 :     return true;
     197             : }
     198             : 
     199           0 : bool ScStringUtil::parseSimpleNumber(
     200             :     const char* p, size_t n, char dsep, char gsep, double& rVal)
     201             : {
     202             :     // Actually almost the entire pre-check is unnecessary and we could call
     203             :     // rtl::math::stringToDouble() just after having exchanged ascii space with
     204             :     // non-breaking space, if it wasn't for check of grouped digits. The NaN
     205             :     // and Inf cases that are accepted by stringToDouble() could be detected
     206             :     // using rtl::math::isFinite() on the result.
     207             : 
     208             :     /* TODO: The grouped digits check isn't even valid for locales that do not
     209             :      * group in thousands ... e.g. Indian locales. But that's something also
     210             :      * the number scanner doesn't implement yet, only the formatter. */
     211             : 
     212           0 :     OStringBuffer aBuf;
     213             : 
     214           0 :     size_t i = 0;
     215           0 :     const char* pLast = p + (n-1);
     216           0 :     sal_Int32 nPosDSep = -1, nPosGSep = -1;
     217           0 :     sal_uInt32 nDigitCount = 0;
     218           0 :     sal_Int32 nPosExponent = -1;
     219             : 
     220             :     // Skip preceding spaces.
     221           0 :     for (i = 0; i < n; ++i, ++p)
     222             :     {
     223           0 :         char c = *p;
     224           0 :         if (c != ' ')
     225             :             // first non-space character.  Exit.
     226           0 :             break;
     227             :     }
     228             : 
     229           0 :     if (i == n)
     230             :         // the whole string is space.  Fail.
     231           0 :         return false;
     232             : 
     233           0 :     n -= i; // Subtract the length of the preceding spaces.
     234             : 
     235             :     // Determine the last non-space character.
     236           0 :     for (; p != pLast; --pLast, --n)
     237             :     {
     238           0 :         char c = *pLast;
     239           0 :         if (c != ' ')
     240             :             // Non space character. Exit.
     241           0 :             break;
     242             :     }
     243             : 
     244           0 :     for (i = 0; i < n; ++i, ++p)
     245             :     {
     246           0 :         char c = *p;
     247             : 
     248           0 :         if ('0' <= c && c <= '9')
     249             :         {
     250             :             // this is a digit.
     251           0 :             aBuf.append(c);
     252           0 :             ++nDigitCount;
     253             :         }
     254           0 :         else if (c == dsep)
     255             :         {
     256             :             // this is a decimal separator.
     257             : 
     258           0 :             if (nPosDSep >= 0)
     259             :                 // a second decimal separator -> not a valid number.
     260           0 :                 return false;
     261             : 
     262           0 :             if (nPosGSep >= 0 && i - nPosGSep != 4)
     263             :                 // the number has a group separator and the decimal sep is not
     264             :                 // positioned correctly.
     265           0 :                 return false;
     266             : 
     267           0 :             nPosDSep = i;
     268           0 :             nPosGSep = -1;
     269           0 :             aBuf.append(c);
     270           0 :             nDigitCount = 0;
     271             :         }
     272           0 :         else if (c == gsep)
     273             :         {
     274             :             // this is a group (thousand) separator.
     275             : 
     276           0 :             if (i == 0)
     277             :                 // not allowed as the first character.
     278           0 :                 return false;
     279             : 
     280           0 :             if (nPosDSep >= 0)
     281             :                 // not allowed after the decimal separator.
     282           0 :                 return false;
     283             : 
     284           0 :             if (nPosGSep >= 0 && nDigitCount != 3)
     285             :                 // must be exactly 3 digits since the last group separator.
     286           0 :                 return false;
     287             : 
     288           0 :             if (nPosExponent >= 0)
     289             :                 // not allowed in exponent.
     290           0 :                 return false;
     291             : 
     292           0 :             nPosGSep = i;
     293           0 :             nDigitCount = 0;
     294             :         }
     295           0 :         else if (c == '-' || c == '+')
     296             :         {
     297             :             // A sign must be the first character if it's given, or immediately
     298             :             // follow the exponent character if present.
     299           0 :             if (i == 0 || (nPosExponent >= 0 && i == static_cast<size_t>(nPosExponent+1)))
     300           0 :                 aBuf.append(c);
     301             :             else
     302           0 :                 return false;
     303             :         }
     304           0 :         else if (c == 'E' || c == 'e')
     305             :         {
     306             :             // this is an exponent designator.
     307             : 
     308           0 :             if (nPosExponent >= 0)
     309             :                 // Only one exponent allowed.
     310           0 :                 return false;
     311             : 
     312           0 :             if (nPosGSep >= 0 && nDigitCount != 3)
     313             :                 // must be exactly 3 digits since the last group separator.
     314           0 :                 return false;
     315             : 
     316           0 :             aBuf.append(c);
     317           0 :             nPosExponent = i;
     318           0 :             nPosDSep = -1;
     319           0 :             nPosGSep = -1;
     320           0 :             nDigitCount = 0;
     321             :         }
     322             :         else
     323           0 :             return false;
     324             :     }
     325             : 
     326             :     // finished parsing the number.
     327             : 
     328           0 :     if (nPosGSep >= 0 && nDigitCount != 3)
     329             :         // must be exactly 3 digits since the last group separator.
     330           0 :         return false;
     331             : 
     332           0 :     rtl_math_ConversionStatus eStatus = rtl_math_ConversionStatus_Ok;
     333           0 :     sal_Int32 nParseEnd = 0;
     334           0 :     OString aString( aBuf.makeStringAndClear());
     335           0 :     rVal = ::rtl::math::stringToDouble( aString, dsep, gsep, &eStatus, &nParseEnd);
     336           0 :     if (eStatus != rtl_math_ConversionStatus_Ok || nParseEnd < aString.getLength())
     337             :         // Not a valid number or not entire string consumed.
     338           0 :         return false;
     339             : 
     340           0 :     return true;
     341             : }
     342             : 
     343           0 : sal_Int32 ScStringUtil::GetQuotedTokenCount(const OUString &rIn, const OUString& rQuotedPairs, sal_Unicode cTok )
     344             : {
     345             :     assert( !(rQuotedPairs.getLength()%2) );
     346             :     assert( rQuotedPairs.indexOf(cTok) );
     347             : 
     348             :     // empty string: TokenCount is 0 per definition
     349           0 :     if ( rIn.isEmpty() )
     350           0 :         return 0;
     351             : 
     352           0 :     sal_Int32      nTokCount       = 1;
     353           0 :     sal_Int32      nLen            = rIn.getLength();
     354           0 :     sal_Int32      nQuotedLen      = rQuotedPairs.getLength();
     355           0 :     sal_Unicode         cQuotedEndChar  = 0;
     356           0 :     const sal_Unicode*  pQuotedStr      = rQuotedPairs.getStr();
     357           0 :     const sal_Unicode*  pStr            = rIn.getStr();
     358           0 :     sal_Int32       nIndex         = 0;
     359           0 :     while ( nIndex < nLen )
     360             :     {
     361           0 :         sal_Unicode c = *pStr;
     362           0 :         if ( cQuotedEndChar )
     363             :         {
     364             :             // reached end of the quote ?
     365           0 :             if ( c == cQuotedEndChar )
     366           0 :                 cQuotedEndChar = 0;
     367             :         }
     368             :         else
     369             :         {
     370             :             // Is the char a quote-beginn char ?
     371           0 :             sal_Int32 nQuoteIndex = 0;
     372           0 :             while ( nQuoteIndex < nQuotedLen )
     373             :             {
     374           0 :                 if ( pQuotedStr[nQuoteIndex] == c )
     375             :                 {
     376           0 :                     cQuotedEndChar = pQuotedStr[nQuoteIndex+1];
     377           0 :                     break;
     378             :                 }
     379             :                 else
     380           0 :                     nQuoteIndex += 2;
     381             :             }
     382             : 
     383             :             // If the token-char matches then increase TokCount
     384           0 :             if ( c == cTok )
     385           0 :                 ++nTokCount;
     386             :         }
     387             : 
     388             :         ++pStr,
     389           0 :         ++nIndex;
     390             :     }
     391             : 
     392           0 :     return nTokCount;
     393             : }
     394             : 
     395           0 : OUString ScStringUtil::GetQuotedToken(const OUString &rIn, sal_Int32 nToken, const OUString& rQuotedPairs,
     396             :                                sal_Unicode cTok, sal_Int32& rIndex )
     397             : {
     398             :     assert( !(rQuotedPairs.getLength()%2) );
     399             :     assert( rQuotedPairs.indexOf(cTok) == -1 );
     400             : 
     401           0 :     const sal_Unicode*  pStr            = rIn.getStr();
     402           0 :     const sal_Unicode*  pQuotedStr      = rQuotedPairs.getStr();
     403           0 :     sal_Unicode         cQuotedEndChar  = 0;
     404           0 :     sal_Int32      nQuotedLen      = rQuotedPairs.getLength();
     405           0 :     sal_Int32      nLen            = rIn.getLength();
     406           0 :     sal_Int32      nTok            = 0;
     407           0 :     sal_Int32      nFirstChar      = rIndex;
     408           0 :     sal_Int32      i               = nFirstChar;
     409             : 
     410             :     // detect token position and length
     411           0 :     pStr += i;
     412           0 :     while ( i < nLen )
     413             :     {
     414           0 :         sal_Unicode c = *pStr;
     415           0 :         if ( cQuotedEndChar )
     416             :         {
     417             :             // end of the quote reached ?
     418           0 :             if ( c == cQuotedEndChar )
     419           0 :                 cQuotedEndChar = 0;
     420             :         }
     421             :         else
     422             :         {
     423             :             // Is the char a quote-begin char ?
     424           0 :             sal_Int32 nQuoteIndex = 0;
     425           0 :             while ( nQuoteIndex < nQuotedLen )
     426             :             {
     427           0 :                 if ( pQuotedStr[nQuoteIndex] == c )
     428             :                 {
     429           0 :                     cQuotedEndChar = pQuotedStr[nQuoteIndex+1];
     430           0 :                     break;
     431             :                 }
     432             :                 else
     433           0 :                     nQuoteIndex += 2;
     434             :             }
     435             : 
     436             :             // If the token-char matches then increase TokCount
     437           0 :             if ( c == cTok )
     438             :             {
     439           0 :                 ++nTok;
     440             : 
     441           0 :                 if ( nTok == nToken )
     442           0 :                     nFirstChar = i+1;
     443             :                 else
     444             :                 {
     445           0 :                     if ( nTok > nToken )
     446           0 :                         break;
     447             :                 }
     448             :             }
     449             :         }
     450             : 
     451             :         ++pStr,
     452           0 :         ++i;
     453             :     }
     454             : 
     455           0 :     if ( nTok >= nToken )
     456             :     {
     457           0 :         if ( i < nLen )
     458           0 :             rIndex = i+1;
     459             :         else
     460           0 :             rIndex = -1;
     461           0 :         return rIn.copy( nFirstChar, i-nFirstChar );
     462             :     }
     463             :     else
     464             :     {
     465           0 :         rIndex = -1;
     466           0 :         return OUString();
     467             :     }
     468             : }
     469             : 
     470           0 : bool ScStringUtil::isMultiline( const OUString& rStr )
     471             : {
     472           0 :     if (rStr.indexOf('\n') != -1)
     473           0 :         return true;
     474             : 
     475           0 :     if (rStr.indexOf(CHAR_CR) != -1)
     476           0 :         return true;
     477             : 
     478           0 :     return false;
     479             : }
     480             : 
     481           0 : ScInputStringType ScStringUtil::parseInputString(
     482             :     SvNumberFormatter& rFormatter, const OUString& rStr, LanguageType eLang )
     483             : {
     484           0 :     ScInputStringType aRet;
     485           0 :     aRet.mnFormatType = 0;
     486           0 :     aRet.meType = ScInputStringType::Unknown;
     487           0 :     aRet.maText = rStr;
     488           0 :     aRet.mfValue = 0.0;
     489             : 
     490           0 :     if (rStr.getLength() > 1 && rStr[0] == '=')
     491             :     {
     492           0 :         aRet.meType = ScInputStringType::Formula;
     493             :     }
     494           0 :     else if (rStr.getLength() > 1 && rStr[0] == '\'')
     495             :     {
     496             :         //  for bEnglish, "'" at the beginning is always interpreted as text
     497             :         //  marker and stripped
     498           0 :         aRet.maText = rStr.copy(1);
     499           0 :         aRet.meType = ScInputStringType::Text;
     500             :     }
     501             :     else        // (nur) auf englisches Zahlformat testen
     502             :     {
     503           0 :         sal_uInt32 nNumFormat = rFormatter.GetStandardIndex(eLang);
     504             : 
     505           0 :         if (rFormatter.IsNumberFormat(rStr, nNumFormat, aRet.mfValue))
     506             :         {
     507           0 :             aRet.meType = ScInputStringType::Number;
     508           0 :             aRet.mnFormatType = rFormatter.GetType(nNumFormat);
     509             :         }
     510           0 :         else if (!rStr.isEmpty())
     511           0 :             aRet.meType = ScInputStringType::Text;
     512             : 
     513             :         //  das (englische) Zahlformat wird nicht gesetzt
     514             :         //! passendes lokales Format suchen und setzen???
     515             :     }
     516             : 
     517           0 :     return aRet;
     518             : }
     519             : 
     520             : /* vim:set shiftwidth=4 softtabstop=4 expandtab: */
 |