LCOV - code coverage report
Current view: top level - i18npool/qa/cppunit - test_breakiterator.cxx (source / functions) Hit Total Coverage
Test: commit 0e63ca4fde4e446f346e35849c756a30ca294aab Lines: 494 511 96.7 %
Date: 2014-04-11 Functions: 20 22 90.9 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
       2             : /*
       3             :  * This file is part of the LibreOffice project.
       4             :  *
       5             :  * This Source Code Form is subject to the terms of the Mozilla Public
       6             :  * License, v. 2.0. If a copy of the MPL was not distributed with this
       7             :  * file, You can obtain one at http://mozilla.org/MPL/2.0/.
       8             :  */
       9             : 
      10             : #include <cppuhelper/compbase1.hxx>
      11             : #include <cppuhelper/bootstrap.hxx>
      12             : #include <cppuhelper/basemutex.hxx>
      13             : #include <com/sun/star/i18n/XBreakIterator.hpp>
      14             : #include <com/sun/star/i18n/CharacterIteratorMode.hpp>
      15             : #include <com/sun/star/i18n/ScriptType.hpp>
      16             : #include <com/sun/star/i18n/WordType.hpp>
      17             : #include <unotest/bootstrapfixturebase.hxx>
      18             : 
      19             : #include <unicode/uversion.h>
      20             : 
      21             : #include <rtl/strbuf.hxx>
      22             : #include <rtl/ustrbuf.hxx>
      23             : 
      24             : #include <string.h>
      25             : 
      26             : #include <stack>
      27             : 
      28             : using namespace ::com::sun::star;
      29             : 
      30          27 : class TestBreakIterator : public test::BootstrapFixtureBase
      31             : {
      32             : public:
      33             :     virtual void setUp() SAL_OVERRIDE;
      34             :     virtual void tearDown() SAL_OVERRIDE;
      35             : 
      36             :     void testLineBreaking();
      37             :     void testWordBoundaries();
      38             :     void testGraphemeIteration();
      39             :     void testWeak();
      40             :     void testAsian();
      41             :     void testThai();
      42             :     void testLao();
      43             : #ifdef TODO
      44             :     void testNorthernThai();
      45             : #endif
      46             :     void testKhmer();
      47             :     void testJapanese();
      48             :     void testChinese();
      49           2 :     CPPUNIT_TEST_SUITE(TestBreakIterator);
      50           1 :     CPPUNIT_TEST(testLineBreaking);
      51           1 :     CPPUNIT_TEST(testGraphemeIteration);
      52           1 :     CPPUNIT_TEST(testWeak);
      53           1 :     CPPUNIT_TEST(testAsian);
      54           1 :     CPPUNIT_TEST(testThai);
      55             : #ifdef TODO
      56             :     CPPUNIT_TEST(testNorthernThai);
      57             : #endif
      58             : 
      59           1 :     CPPUNIT_TEST(testWordBoundaries);
      60             : #if (U_ICU_VERSION_MAJOR_NUM > 4)
      61           1 :     CPPUNIT_TEST(testKhmer);
      62             : #endif
      63             : #if (U_ICU_VERSION_MAJOR_NUM > 51)
      64             :     CPPUNIT_TEST(testLao);
      65             : #endif
      66           1 :     CPPUNIT_TEST(testJapanese);
      67           1 :     CPPUNIT_TEST(testChinese);
      68           2 :     CPPUNIT_TEST_SUITE_END();
      69             : private:
      70             :     uno::Reference<i18n::XBreakIterator> m_xBreak;
      71             : };
      72             : 
      73           1 : void TestBreakIterator::testLineBreaking()
      74             : {
      75           1 :     i18n::LineBreakHyphenationOptions aHyphOptions;
      76           2 :     i18n::LineBreakUserOptions aUserOptions;
      77           2 :     lang::Locale aLocale;
      78             : 
      79             :     //See https://bugs.libreoffice.org/show_bug.cgi?id=31271
      80             :     {
      81           1 :         OUString aTest("(some text here)");
      82             : 
      83           1 :         aLocale.Language = "en";
      84           1 :         aLocale.Country = "US";
      85             : 
      86             :         {
      87             :             //Here we want the line break to leave text here) on the next line
      88           1 :             i18n::LineBreakResults aResult = m_xBreak->getLineBreak(aTest, strlen("(some tex"), aLocale, 0, aHyphOptions, aUserOptions);
      89           1 :             CPPUNIT_ASSERT_MESSAGE("Expected a break at the start of the word", aResult.breakIndex == 6);
      90             :         }
      91             : 
      92             :         {
      93             :             //Here we want the line break to leave "here)" on the next line
      94           1 :             i18n::LineBreakResults aResult = m_xBreak->getLineBreak(aTest, strlen("(some text here"), aLocale, 0, aHyphOptions, aUserOptions);
      95           1 :             CPPUNIT_ASSERT_MESSAGE("Expected a break at the start of the word", aResult.breakIndex == 11);
      96           1 :         }
      97             :     }
      98             : 
      99             :     //See https://bugs.libreoffice.org/show_bug.cgi?id=49849
     100             :     {
     101           1 :         const sal_Unicode HEBREW1[] = { 0x05DE, 0x05D9, 0x05DC, 0x05D9, 0x5DD };
     102           1 :         OUString aWord(HEBREW1, SAL_N_ELEMENTS(HEBREW1));
     103           2 :         OUString aTest(OUStringBuffer(aWord).append(' ').append(aWord).makeStringAndClear());
     104             : 
     105           1 :         aLocale.Language = "he";
     106           1 :         aLocale.Country = "IL";
     107             : 
     108             :         {
     109             :             //Here we want the line break to happen at the whitespace
     110           1 :             i18n::LineBreakResults aResult = m_xBreak->getLineBreak(aTest, aTest.getLength()-1, aLocale, 0, aHyphOptions, aUserOptions);
     111           1 :             CPPUNIT_ASSERT_MESSAGE("Expected a break at the start of the word", aResult.breakIndex == aWord.getLength()+1);
     112           1 :         }
     113             :     }
     114             : 
     115             :     //See https://issues.apache.org/ooo/show_bug.cgi?id=17155
     116             :     {
     117           1 :         OUString aTest("foo /bar/baz");
     118             : 
     119           1 :         aLocale.Language = "en";
     120           1 :         aLocale.Country = "US";
     121             : 
     122             :         {
     123             :             //Here we want the line break to leave /bar/ba clumped together on the next line
     124           1 :             i18n::LineBreakResults aResult = m_xBreak->getLineBreak(aTest, strlen("foo /bar/ba"), aLocale, 0,
     125           1 :                 aHyphOptions, aUserOptions);
     126           1 :             CPPUNIT_ASSERT_MESSAGE("Expected a break at the first slash", aResult.breakIndex == 4);
     127           1 :         }
     128             :     }
     129             : 
     130             :     //See https://issues.apache.org/ooo/show_bug.cgi?id=19716
     131             :     {
     132           1 :         OUString aTest("aaa]aaa");
     133             : 
     134           1 :         aLocale.Language = "en";
     135           1 :         aLocale.Country = "US";
     136             : 
     137             :         {
     138             :             //Here we want the line break to move the whole lot to the next line
     139           2 :             i18n::LineBreakResults aResult = m_xBreak->getLineBreak(aTest, aTest.getLength()-2, aLocale, 0,
     140           2 :                 aHyphOptions, aUserOptions);
     141           1 :             CPPUNIT_ASSERT_MESSAGE("Expected a break at the start of the line, not at ]", aResult.breakIndex == 0);
     142           1 :         }
     143           1 :     }
     144           1 : }
     145             : 
     146             : //See https://bugs.libreoffice.org/show_bug.cgi?id=49629
     147           1 : void TestBreakIterator::testWordBoundaries()
     148             : {
     149           1 :     lang::Locale aLocale;
     150           1 :     aLocale.Language = "en";
     151           1 :     aLocale.Country = "US";
     152             : 
     153           1 :     i18n::Boundary aBounds;
     154             : 
     155             :     //See https://issues.apache.org/ooo/show_bug.cgi?id=11993
     156             :     {
     157           1 :         OUString aTest("abcd ef  ghi??? KLM");
     158             : 
     159           1 :         CPPUNIT_ASSERT(!m_xBreak->isBeginWord(aTest, 4, aLocale, i18n::WordType::DICTIONARY_WORD));
     160           1 :         CPPUNIT_ASSERT(m_xBreak->isEndWord(aTest, 4, aLocale, i18n::WordType::DICTIONARY_WORD));
     161           1 :         aBounds = m_xBreak->getWordBoundary(aTest, 4, aLocale, i18n::WordType::DICTIONARY_WORD, true);
     162           1 :         CPPUNIT_ASSERT(aBounds.startPos == 0 && aBounds.endPos == 4);
     163             : 
     164           1 :         CPPUNIT_ASSERT(!m_xBreak->isBeginWord(aTest, 8, aLocale, i18n::WordType::DICTIONARY_WORD));
     165           1 :         CPPUNIT_ASSERT(!m_xBreak->isEndWord(aTest, 8, aLocale, i18n::WordType::DICTIONARY_WORD));
     166             : 
     167             :         //next word
     168           1 :         aBounds = m_xBreak->getWordBoundary(aTest, 8, aLocale, i18n::WordType::DICTIONARY_WORD, true);
     169           1 :         CPPUNIT_ASSERT(aBounds.startPos == 9 && aBounds.endPos == 12);
     170             : 
     171             :         //previous word
     172           1 :         aBounds = m_xBreak->getWordBoundary(aTest, 8, aLocale, i18n::WordType::DICTIONARY_WORD, false);
     173           1 :         CPPUNIT_ASSERT(aBounds.startPos == 5 && aBounds.endPos == 7);
     174             : 
     175           1 :         CPPUNIT_ASSERT(!m_xBreak->isBeginWord(aTest, 12, aLocale, i18n::WordType::DICTIONARY_WORD));
     176           1 :         CPPUNIT_ASSERT(m_xBreak->isEndWord(aTest, 12, aLocale, i18n::WordType::DICTIONARY_WORD));
     177           1 :         aBounds = m_xBreak->getWordBoundary(aTest, 12, aLocale, i18n::WordType::DICTIONARY_WORD, true);
     178           1 :         CPPUNIT_ASSERT(aBounds.startPos == 9 && aBounds.endPos == 12);
     179             : 
     180           1 :         CPPUNIT_ASSERT(m_xBreak->isBeginWord(aTest, 16, aLocale, i18n::WordType::DICTIONARY_WORD));
     181           1 :         CPPUNIT_ASSERT(!m_xBreak->isEndWord(aTest, 16, aLocale, i18n::WordType::DICTIONARY_WORD));
     182           1 :         aBounds = m_xBreak->getWordBoundary(aTest, 16, aLocale, i18n::WordType::DICTIONARY_WORD, true);
     183           1 :         CPPUNIT_ASSERT(aBounds.startPos == 16 && aBounds.endPos == 19);
     184             :     }
     185             : 
     186             :     //See https://issues.apache.org/ooo/show_bug.cgi?id=21907
     187             :     {
     188           1 :         OUString aTest("b a?");
     189             : 
     190           1 :         CPPUNIT_ASSERT(m_xBreak->isBeginWord(aTest, 1, aLocale, i18n::WordType::ANY_WORD));
     191           1 :         CPPUNIT_ASSERT(m_xBreak->isBeginWord(aTest, 2, aLocale, i18n::WordType::ANY_WORD));
     192           1 :         CPPUNIT_ASSERT(m_xBreak->isBeginWord(aTest, 3, aLocale, i18n::WordType::ANY_WORD));
     193             : 
     194           1 :         CPPUNIT_ASSERT(m_xBreak->isBeginWord(aTest, 3, aLocale, i18n::WordType::ANYWORD_IGNOREWHITESPACES));
     195             : 
     196           1 :         CPPUNIT_ASSERT(m_xBreak->isEndWord(aTest, 1, aLocale, i18n::WordType::ANY_WORD));
     197           1 :         CPPUNIT_ASSERT(m_xBreak->isEndWord(aTest, 2, aLocale, i18n::WordType::ANY_WORD));
     198           1 :         CPPUNIT_ASSERT(m_xBreak->isEndWord(aTest, 3, aLocale, i18n::WordType::ANY_WORD));
     199             : 
     200           1 :         CPPUNIT_ASSERT(m_xBreak->isEndWord(aTest, 3, aLocale, i18n::WordType::ANYWORD_IGNOREWHITESPACES));
     201             :     }
     202             : 
     203             :     //See https://issues.apache.org/ooo/show_bug.cgi?id=14904
     204             :     {
     205             :         const sal_Unicode TEST[] =
     206             :         {
     207             :             'W', 'o', 'r', 'k', 'i', 'n', 'g', ' ', 0x201C, 'W', 'o', 'r', 'd', 's',
     208             :             ' ', 's', 't', 'a', 'r', 't', 'i', 'n', 'g', ' ', 'w', 'i', 't',
     209             :             'h', ' ', 'q', 'u', 'o', 't', 'e', 's', 0x201D, ' ', 'W', 'o', 'r', 'k',
     210             :             'i', 'n', 'g', ' ', 0x2018, 'B', 'r', 'o', 'k', 'e', 'n', 0x2019, ' ',
     211             :             '?', 'S', 'p', 'a', 'n', 'i', 's', 'h', '?', ' ', 'd', 'o', 'e',
     212             :             's', 'n', 0x2019, 't', ' ', 'w', 'o', 'r', 'k', '.', ' ', 'N', 'o',
     213             :             't', ' ', 'e', 'v', 'e', 'n', ' ' , 0x00BF, 'r', 'e', 'a', 'l', '?', ' ',
     214             :             'S', 'p', 'a', 'n', 'i', 's', 'h'
     215           1 :         };
     216           1 :         OUString aTest(TEST, SAL_N_ELEMENTS(TEST));
     217             : 
     218           1 :         aBounds = m_xBreak->getWordBoundary(aTest, 4, aLocale, i18n::WordType::DICTIONARY_WORD, false);
     219           1 :         CPPUNIT_ASSERT(aBounds.startPos == 0 && aBounds.endPos == 7);
     220             : 
     221           1 :         aBounds = m_xBreak->getWordBoundary(aTest, 12, aLocale, i18n::WordType::DICTIONARY_WORD, false);
     222           1 :         CPPUNIT_ASSERT(aBounds.startPos == 9 && aBounds.endPos == 14);
     223             : 
     224           1 :         aBounds = m_xBreak->getWordBoundary(aTest, 40, aLocale, i18n::WordType::DICTIONARY_WORD, false);
     225           1 :         CPPUNIT_ASSERT(aBounds.startPos == 37 && aBounds.endPos == 44);
     226             : 
     227           1 :         aBounds = m_xBreak->getWordBoundary(aTest, 49, aLocale, i18n::WordType::DICTIONARY_WORD, false);
     228           1 :         CPPUNIT_ASSERT(aBounds.startPos == 46 && aBounds.endPos == 52);
     229             : 
     230           1 :         aBounds = m_xBreak->getWordBoundary(aTest, 58, aLocale, i18n::WordType::DICTIONARY_WORD, false);
     231           1 :         CPPUNIT_ASSERT(aBounds.startPos == 55 && aBounds.endPos == 62);
     232             : 
     233           1 :         aBounds = m_xBreak->getWordBoundary(aTest, 67, aLocale, i18n::WordType::DICTIONARY_WORD, false);
     234           1 :         CPPUNIT_ASSERT(aBounds.startPos == 64 && aBounds.endPos == 71);
     235             : 
     236           1 :         aBounds = m_xBreak->getWordBoundary(aTest, 90, aLocale, i18n::WordType::DICTIONARY_WORD, false);
     237           1 :         CPPUNIT_ASSERT(aBounds.startPos == 88 && aBounds.endPos == 92);
     238             :     }
     239             : 
     240             :     //See https://bugs.libreoffice.org/show_bug.cgi?id=49629
     241           1 :     sal_Unicode aBreakTests[] = { ' ', 1, 2, 3, 4, 5, 6, 7, 0x91, 0x92, 0x200B, 0xE8FF, 0xF8FF };
     242           5 :     for (int mode = i18n::WordType::ANY_WORD; mode <= i18n::WordType::WORD_COUNT; ++mode)
     243             :     {
     244             :         //make sure that in all cases isBeginWord and isEndWord matches getWordBoundary
     245          56 :         for (size_t i = 0; i < SAL_N_ELEMENTS(aBreakTests); ++i)
     246             :         {
     247             : #if (U_ICU_VERSION_MAJOR_NUM == 4) && (U_ICU_VERSION_MINOR_NUM <= 2)
     248             :             //Note the breakiterator test is known to fail on older icu
     249             :             //versions (4.2.1) for the 200B (ZWSP) Zero Width Space testcase.
     250             :             if (aBreakTests[i] == 0x200B)
     251             :                 continue;
     252             : #endif
     253          52 :             OUString aTest = "Word" + OUString(aBreakTests[i]) + "Word";
     254          52 :             aBounds = m_xBreak->getWordBoundary(aTest, 0, aLocale, mode, true);
     255          52 :             switch (mode)
     256             :             {
     257             :                 case i18n::WordType::ANY_WORD:
     258          13 :                     CPPUNIT_ASSERT(aBounds.startPos == 0 && aBounds.endPos == 4);
     259          13 :                     break;
     260             :                 case i18n::WordType::ANYWORD_IGNOREWHITESPACES:
     261          13 :                     CPPUNIT_ASSERT(aBounds.startPos == 0 && aBounds.endPos == 4);
     262          13 :                     break;
     263             :                 case i18n::WordType::DICTIONARY_WORD:
     264          13 :                     CPPUNIT_ASSERT(aBounds.startPos == 0 && aBounds.endPos == 4);
     265          13 :                     break;
     266             :                 case i18n::WordType::WORD_COUNT:
     267          13 :                     CPPUNIT_ASSERT(aBounds.startPos == 0 && aBounds.endPos == 4);
     268          13 :                     break;
     269             :             }
     270             : 
     271          52 :             CPPUNIT_ASSERT(m_xBreak->isBeginWord(aTest, aBounds.startPos, aLocale, mode));
     272          52 :             CPPUNIT_ASSERT(m_xBreak->isEndWord(aTest, aBounds.endPos, aLocale, mode));
     273          52 :         }
     274             :     }
     275             : 
     276           1 :     sal_Unicode aJoinTests[] = { 'X', 0x200C, 0x200D, 0x2060, 0xFEFF, 0xFFF9, 0xFFFA, 0xFFFB };
     277           5 :     for (int mode = i18n::WordType::ANY_WORD; mode <= i18n::WordType::WORD_COUNT; ++mode)
     278             :     {
     279             :         //make sure that in all cases isBeginWord and isEndWord matches getWordBoundary
     280          36 :         for (size_t i = 0; i < SAL_N_ELEMENTS(aJoinTests); ++i)
     281             :         {
     282          32 :             OUString aTest = "Word" + OUString(aJoinTests[i]) + "Word";
     283          32 :             aBounds = m_xBreak->getWordBoundary(aTest, 0, aLocale, mode, true);
     284          32 :             switch (mode)
     285             :             {
     286             :                 case i18n::WordType::ANY_WORD:
     287           8 :                     CPPUNIT_ASSERT(aBounds.startPos == 0 && aBounds.endPos == 9);
     288           8 :                     break;
     289             :                 case i18n::WordType::ANYWORD_IGNOREWHITESPACES:
     290           8 :                     CPPUNIT_ASSERT(aBounds.startPos == 0 && aBounds.endPos == 9);
     291           8 :                     break;
     292             :                 case i18n::WordType::DICTIONARY_WORD:
     293           8 :                     CPPUNIT_ASSERT(aBounds.startPos == 0 && aBounds.endPos == 9);
     294           8 :                     break;
     295             :                 case i18n::WordType::WORD_COUNT:
     296           8 :                     CPPUNIT_ASSERT(aBounds.startPos == 0 && aBounds.endPos == 9);
     297           8 :                     break;
     298             :             }
     299             : 
     300          32 :             CPPUNIT_ASSERT(m_xBreak->isBeginWord(aTest, aBounds.startPos, aLocale, mode));
     301          32 :             CPPUNIT_ASSERT(m_xBreak->isEndWord(aTest, aBounds.endPos, aLocale, mode));
     302          32 :         }
     303             :     }
     304             : 
     305             :     //See https://issues.apache.org/ooo/show_bug.cgi?id=13494
     306             :     {
     307           1 :         const OUString aBase("xxAAxxBBxxCCxx");
     308             :         const sal_Unicode aTests[] =
     309             :         {
     310             :             '\'', ';', ',', '.', '!', '@', '#', '%', '&', '*',
     311             :             '(', ')', '_', '-', '{', '}', '[', ']', '\"', '/',
     312             :             '\\', '?', '~', '$', '+', '^', '=', '<', '>', '|'
     313           1 :         };
     314             : 
     315           1 :         const sal_Int32 aDoublePositions[] = {0, 2, 4, 6, 8, 10, 12, 14};
     316          31 :         for (size_t j = 0; j < SAL_N_ELEMENTS(aTests); ++j)
     317             :         {
     318          30 :             OUString aTest = aBase.replace('x', aTests[j]);
     319          30 :             sal_Int32 nPos = -1;
     320          30 :             size_t i = 0;
     321         240 :             do
     322             :             {
     323         240 :                 CPPUNIT_ASSERT(i < SAL_N_ELEMENTS(aDoublePositions));
     324         240 :                 nPos = m_xBreak->nextWord(aTest, nPos, aLocale, i18n::WordType::ANYWORD_IGNOREWHITESPACES).startPos;
     325         240 :                 CPPUNIT_ASSERT(nPos == aDoublePositions[i++]);
     326             :             }
     327         240 :             while (nPos < aTest.getLength());
     328          30 :             nPos = aTest.getLength();
     329          30 :             i = SAL_N_ELEMENTS(aDoublePositions)-1;
     330         210 :             do
     331             :             {
     332         210 :                 nPos = m_xBreak->previousWord(aTest, nPos, aLocale, i18n::WordType::ANYWORD_IGNOREWHITESPACES).startPos;
     333         210 :                 CPPUNIT_ASSERT(nPos == aDoublePositions[--i]);
     334             :             }
     335             :             while (nPos > 0);
     336          30 :         }
     337             : 
     338           1 :         const sal_Int32 aSinglePositions[] = {0, 1, 3, 4, 6, 7, 9, 10};
     339          30 :         for (size_t j = 1; j < SAL_N_ELEMENTS(aTests); ++j)
     340             :         {
     341          29 :             OUString aTest = aBase.replaceAll(OUString("xx"), OUString(aTests[j]));
     342          29 :             sal_Int32 nPos = -1;
     343          29 :             size_t i = 0;
     344         232 :             do
     345             :             {
     346         232 :                 CPPUNIT_ASSERT(i < SAL_N_ELEMENTS(aSinglePositions));
     347         232 :                 nPos = m_xBreak->nextWord(aTest, nPos, aLocale, i18n::WordType::ANYWORD_IGNOREWHITESPACES).startPos;
     348         232 :                 CPPUNIT_ASSERT(nPos == aSinglePositions[i++]);
     349             :             }
     350         232 :             while (nPos < aTest.getLength());
     351          29 :             nPos = aTest.getLength();
     352          29 :             i = SAL_N_ELEMENTS(aSinglePositions)-1;
     353         203 :             do
     354             :             {
     355         203 :                 nPos = m_xBreak->previousWord(aTest, nPos, aLocale, i18n::WordType::ANYWORD_IGNOREWHITESPACES).startPos;
     356         203 :                 CPPUNIT_ASSERT(nPos == aSinglePositions[--i]);
     357             :             }
     358             :             while (nPos > 0);
     359          29 :         }
     360             : 
     361           1 :         const sal_Int32 aSingleQuotePositions[] = {0, 1, 9, 10};
     362           1 :         CPPUNIT_ASSERT(aTests[0] == '\'');
     363             :         {
     364           1 :             OUString aTest = aBase.replaceAll(OUString("xx"), OUString(aTests[0]));
     365           1 :             sal_Int32 nPos = -1;
     366           1 :             size_t i = 0;
     367           4 :             do
     368             :             {
     369           4 :                 CPPUNIT_ASSERT(i < SAL_N_ELEMENTS(aSingleQuotePositions));
     370           4 :                 nPos = m_xBreak->nextWord(aTest, nPos, aLocale, i18n::WordType::ANYWORD_IGNOREWHITESPACES).startPos;
     371           4 :                 CPPUNIT_ASSERT(nPos == aSingleQuotePositions[i++]);
     372             :             }
     373           4 :             while (nPos < aTest.getLength());
     374           1 :             nPos = aTest.getLength();
     375           1 :             i = SAL_N_ELEMENTS(aSingleQuotePositions)-1;
     376           3 :             do
     377             :             {
     378           3 :                 nPos = m_xBreak->previousWord(aTest, nPos, aLocale, i18n::WordType::ANYWORD_IGNOREWHITESPACES).startPos;
     379           3 :                 CPPUNIT_ASSERT(nPos == aSingleQuotePositions[--i]);
     380             :             }
     381           1 :             while (nPos > 0);
     382           1 :         }
     383             :     }
     384             : 
     385             :     //See https://issues.apache.org/ooo/show_bug.cgi?id=13451
     386             :     {
     387           1 :         aLocale.Language = "ca";
     388           1 :         aLocale.Country = "ES";
     389             : 
     390           1 :         OUString aTest("mirar-se comprar-vos donem-nos les mans aneu-vos-en!");
     391             : 
     392           1 :         sal_Int32 nPos = 0;
     393           1 :         sal_Int32 aExpected[] = {8, 20, 30, 34, 39, 51, 52};
     394           1 :         size_t i = 0;
     395           7 :         do
     396             :         {
     397           7 :             CPPUNIT_ASSERT(i < SAL_N_ELEMENTS(aExpected));
     398           7 :             nPos = m_xBreak->getWordBoundary(aTest, nPos, aLocale,
     399           7 :                 i18n::WordType::DICTIONARY_WORD, true).endPos;
     400           7 :             CPPUNIT_ASSERT(aExpected[i++] == nPos);
     401             :         }
     402           7 :         while (nPos++ < aTest.getLength());
     403           1 :         CPPUNIT_ASSERT(i == SAL_N_ELEMENTS(aExpected));
     404             :     }
     405             : 
     406             :     //See https://issues.apache.org/ooo/show_bug.cgi?id=85411
     407           3 :     for (int j = 0; j < 2; ++j)
     408             :     {
     409           2 :         switch (j)
     410             :         {
     411             :             case 0:
     412           1 :                 aLocale.Language = "en";
     413           1 :                 aLocale.Country = "US";
     414           1 :                 break;
     415             :             case 1:
     416           1 :                 aLocale.Language = "ca";
     417           1 :                 aLocale.Country = "ES";
     418           1 :                 break;
     419             :             default:
     420           0 :                 CPPUNIT_ASSERT(false);
     421           0 :                 break;
     422             :         }
     423             : 
     424             :         const sal_Unicode TEST[] =
     425             :         {
     426             :             'I', 0x200B, 'w', 'a', 'n', 't', 0x200B, 't', 'o', 0x200B, 'g', 'o'
     427           2 :         };
     428           2 :         OUString aTest(TEST, SAL_N_ELEMENTS(TEST));
     429             : 
     430           2 :         sal_Int32 nPos = 0;
     431           2 :         sal_Int32 aExpected[] = {1, 6, 9, 12};
     432           2 :         size_t i = 0;
     433           8 :         do
     434             :         {
     435           8 :             CPPUNIT_ASSERT(i < SAL_N_ELEMENTS(aExpected));
     436           8 :             nPos = m_xBreak->getWordBoundary(aTest, nPos, aLocale,
     437           8 :                 i18n::WordType::DICTIONARY_WORD, true).endPos;
     438           8 :             CPPUNIT_ASSERT(aExpected[i++] == nPos);
     439             :         }
     440           8 :         while (nPos++ < aTest.getLength());
     441           2 :         CPPUNIT_ASSERT(i == SAL_N_ELEMENTS(aExpected));
     442           2 :     }
     443             : 
     444             :     //https://issues.apache.org/ooo/show_bug.cgi?id=21290
     445           3 :     for (int j = 0; j < 2; ++j)
     446             :     {
     447           2 :         switch (j)
     448             :         {
     449             :             case 0:
     450           1 :                 aLocale.Language = "en";
     451           1 :                 aLocale.Country = "US";
     452           1 :                 break;
     453             :             case 1:
     454           1 :                 aLocale.Language = "grc";
     455           1 :                 aLocale.Country = "";
     456           1 :                 break;
     457             :             default:
     458           0 :                 CPPUNIT_ASSERT(false);
     459           0 :                 break;
     460             :         }
     461             : 
     462             :         const sal_Unicode TEST[] =
     463             :         {
     464             :             0x1F0C, 0x03BD, 0x03B4, 0x03C1, 0x03B1, 0x0020, 0x1F00,
     465             :             0x03C1, 0x03BD, 0x1F7B, 0x03BC, 0x03B5, 0x03BD, 0x03BF,
     466             :             0x03C2, 0x0020, 0x1F00, 0x03BB, 0x03BB, 0x0020, 0x1F24,
     467             :             0x03C3, 0x03B8, 0x03B9, 0x03BF, 0x03BD
     468           2 :         };
     469           2 :         OUString aTest(TEST, SAL_N_ELEMENTS(TEST));
     470             : 
     471           2 :         sal_Int32 nPos = 0;
     472           2 :         sal_Int32 aExpected[] = {5, 15, 19, 26};
     473           2 :         size_t i = 0;
     474           8 :         do
     475             :         {
     476           8 :             CPPUNIT_ASSERT(i < SAL_N_ELEMENTS(aExpected));
     477           8 :             nPos = m_xBreak->getWordBoundary(aTest, nPos, aLocale,
     478           8 :                 i18n::WordType::DICTIONARY_WORD, true).endPos;
     479           8 :             CPPUNIT_ASSERT(aExpected[i++] == nPos);
     480             :         }
     481           8 :         while (nPos++ < aTest.getLength());
     482           2 :         CPPUNIT_ASSERT(i == SAL_N_ELEMENTS(aExpected));
     483           2 :     }
     484             : 
     485             :     //See https://issues.apache.org/ooo/show_bug.cgi?id=58513
     486             :     {
     487           1 :         aLocale.Language = "fi";
     488           1 :         aLocale.Country = "FI";
     489             : 
     490           1 :         OUString aTest("Kuorma-auto kaakkois- ja Keski-Suomi");
     491             : 
     492             :         {
     493           1 :             sal_Int32 nPos = 0;
     494           1 :             sal_Int32 aExpected[] = {12, 22, 25, 36};
     495           1 :             size_t i = 0;
     496           4 :             do
     497             :             {
     498           4 :                 CPPUNIT_ASSERT(i < SAL_N_ELEMENTS(aExpected));
     499           4 :                 nPos = m_xBreak->getWordBoundary(aTest, nPos, aLocale,
     500           4 :                     i18n::WordType::WORD_COUNT, true).endPos;
     501           4 :                 CPPUNIT_ASSERT(aExpected[i++] == nPos);
     502             :             }
     503           4 :             while (nPos++ < aTest.getLength());
     504           1 :             CPPUNIT_ASSERT(i == SAL_N_ELEMENTS(aExpected));
     505             :         }
     506             : 
     507             :         {
     508           1 :             sal_Int32 nPos = 0;
     509           1 :             sal_Int32 aExpected[] = {0, 11, 12, 21, 22, 24, 25, 36};
     510           1 :             size_t i = 0;
     511           4 :             do
     512             :             {
     513           4 :                 CPPUNIT_ASSERT(i < SAL_N_ELEMENTS(aExpected));
     514           4 :                 aBounds = m_xBreak->getWordBoundary(aTest, nPos, aLocale,
     515           4 :                     i18n::WordType::DICTIONARY_WORD, true);
     516           4 :                 CPPUNIT_ASSERT(aExpected[i++] == aBounds.startPos);
     517           4 :                 CPPUNIT_ASSERT(aExpected[i++] == aBounds.endPos);
     518           4 :                 nPos = aBounds.endPos;
     519             :             }
     520           4 :             while (nPos++ < aTest.getLength());
     521           1 :             CPPUNIT_ASSERT(i == SAL_N_ELEMENTS(aExpected));
     522           1 :         }
     523             :     }
     524             : 
     525             :     //See https://issues.apache.org/ooo/show_bug.cgi?id=107843
     526             :     {
     527           1 :         aLocale.Language = "en";
     528           1 :         aLocale.Country = "US";
     529             : 
     530             :         const sal_Unicode TEST[] =
     531             :         {
     532             :             'r', 'u', 0xFB00, 'l', 'e', ' ', 0xFB01, 's', 'h'
     533           1 :         };
     534           1 :         OUString aTest(TEST, SAL_N_ELEMENTS(TEST));
     535             : 
     536           1 :         aBounds = m_xBreak->getWordBoundary(aTest, 1, aLocale, i18n::WordType::DICTIONARY_WORD, false);
     537           1 :         CPPUNIT_ASSERT(aBounds.startPos == 0 && aBounds.endPos == 5);
     538             : 
     539           1 :         aBounds = m_xBreak->getWordBoundary(aTest, 7, aLocale, i18n::WordType::DICTIONARY_WORD, false);
     540           1 :         CPPUNIT_ASSERT(aBounds.startPos == 6 && aBounds.endPos == 9);
     541             :     }
     542             : 
     543             :     //See https://issues.apache.org/ooo/show_bug.cgi?id=113785
     544             :     {
     545           1 :         aLocale.Language = "en";
     546           1 :         aLocale.Country = "US";
     547             : 
     548             :         const sal_Unicode TEST[] =
     549             :         {
     550             :             'a', 0x2013, 'b', 0x2014, 'c'
     551           1 :         };
     552           1 :         OUString aTest(TEST, SAL_N_ELEMENTS(TEST));
     553             : 
     554           1 :         aBounds = m_xBreak->getWordBoundary(aTest, 0, aLocale, i18n::WordType::DICTIONARY_WORD, true);
     555           1 :         CPPUNIT_ASSERT(aBounds.startPos == 0 && aBounds.endPos == 1);
     556             : 
     557           1 :         aBounds = m_xBreak->nextWord(aTest, 0, aLocale, i18n::WordType::DICTIONARY_WORD);
     558           1 :         CPPUNIT_ASSERT(aBounds.startPos == 2 && aBounds.endPos == 3);
     559             : 
     560           1 :         aBounds = m_xBreak->nextWord(aTest, aBounds.endPos, aLocale, i18n::WordType::DICTIONARY_WORD);
     561           1 :         CPPUNIT_ASSERT(aBounds.startPos == 4 && aBounds.endPos == 5);
     562           1 :     }
     563           1 : }
     564             : 
     565             : //See https://bugs.libreoffice.org/show_bug.cgi?id=40292
     566             : //See https://issues.apache.org/ooo/show_bug.cgi?id=80412
     567             : //See https://issues.apache.org/ooo/show_bug.cgi?id=111152
     568             : //See https://issues.apache.org/ooo/show_bug.cgi?id=50172
     569           1 : void TestBreakIterator::testGraphemeIteration()
     570             : {
     571           1 :     lang::Locale aLocale;
     572           1 :     aLocale.Language = "bn";
     573           1 :     aLocale.Country = "IN";
     574             : 
     575             :     {
     576           1 :         const sal_Unicode BA_HALANT_LA[] = { 0x09AC, 0x09CD, 0x09AF };
     577           1 :         OUString aTest(BA_HALANT_LA, SAL_N_ELEMENTS(BA_HALANT_LA));
     578             : 
     579           1 :         sal_Int32 nDone=0;
     580             :         sal_Int32 nPos;
     581           1 :         nPos = m_xBreak->nextCharacters(aTest, 0, aLocale,
     582           1 :             i18n::CharacterIteratorMode::SKIPCELL, 1, nDone);
     583           1 :         CPPUNIT_ASSERT_MESSAGE("Should skip full grapheme", nPos == SAL_N_ELEMENTS(BA_HALANT_LA));
     584           1 :         nPos = m_xBreak->previousCharacters(aTest, SAL_N_ELEMENTS(BA_HALANT_LA), aLocale,
     585           1 :             i18n::CharacterIteratorMode::SKIPCELL, 1, nDone);
     586           1 :         CPPUNIT_ASSERT_MESSAGE("Should skip full grapheme", nPos == 0);
     587             :     }
     588             : 
     589             :     {
     590           1 :         const sal_Unicode HA_HALANT_NA_VOWELSIGNI[] = { 0x09B9, 0x09CD, 0x09A3, 0x09BF };
     591           1 :         OUString aTest(HA_HALANT_NA_VOWELSIGNI, SAL_N_ELEMENTS(HA_HALANT_NA_VOWELSIGNI));
     592             : 
     593           1 :         sal_Int32 nDone=0;
     594             :         sal_Int32 nPos;
     595           1 :         nPos = m_xBreak->nextCharacters(aTest, 0, aLocale,
     596           1 :             i18n::CharacterIteratorMode::SKIPCELL, 1, nDone);
     597           1 :         CPPUNIT_ASSERT_MESSAGE("Should skip full grapheme", nPos == SAL_N_ELEMENTS(HA_HALANT_NA_VOWELSIGNI));
     598           1 :         nPos = m_xBreak->previousCharacters(aTest, SAL_N_ELEMENTS(HA_HALANT_NA_VOWELSIGNI), aLocale,
     599           1 :             i18n::CharacterIteratorMode::SKIPCELL, 1, nDone);
     600           1 :         CPPUNIT_ASSERT_MESSAGE("Should skip full grapheme", nPos == 0);
     601             :     }
     602             : 
     603             :     {
     604           1 :         const sal_Unicode TA_HALANT_MA_HALANT_YA  [] = { 0x09A4, 0x09CD, 0x09AE, 0x09CD, 0x09AF };
     605           1 :         OUString aTest(TA_HALANT_MA_HALANT_YA, SAL_N_ELEMENTS(TA_HALANT_MA_HALANT_YA));
     606             : 
     607           1 :         sal_Int32 nDone=0;
     608             :         sal_Int32 nPos;
     609           1 :         nPos = m_xBreak->nextCharacters(aTest, 0, aLocale,
     610           1 :             i18n::CharacterIteratorMode::SKIPCELL, 1, nDone);
     611           1 :         CPPUNIT_ASSERT_MESSAGE("Should skip full grapheme", nPos == SAL_N_ELEMENTS(TA_HALANT_MA_HALANT_YA));
     612           1 :         nPos = m_xBreak->previousCharacters(aTest, SAL_N_ELEMENTS(TA_HALANT_MA_HALANT_YA), aLocale,
     613           1 :             i18n::CharacterIteratorMode::SKIPCELL, 1, nDone);
     614           1 :         CPPUNIT_ASSERT_MESSAGE("Should skip full grapheme", nPos == 0);
     615             :     }
     616             : 
     617           1 :     aLocale.Language = "ta";
     618           1 :     aLocale.Country = "IN";
     619             : 
     620             :     {
     621           1 :         const sal_Unicode KA_VIRAMA_SSA[] = { 0x0B95, 0x0BCD, 0x0BB7 };
     622           1 :         OUString aTest(KA_VIRAMA_SSA, SAL_N_ELEMENTS(KA_VIRAMA_SSA));
     623             : 
     624           1 :         sal_Int32 nDone=0;
     625           1 :         sal_Int32 nPos = 0;
     626             : 
     627           1 :         nPos = m_xBreak->nextCharacters(aTest, 0, aLocale,
     628           1 :             i18n::CharacterIteratorMode::SKIPCELL, 1, nDone);
     629           1 :         CPPUNIT_ASSERT_MESSAGE("Should skip full grapheme", nPos == SAL_N_ELEMENTS(KA_VIRAMA_SSA));
     630           1 :         nPos = m_xBreak->previousCharacters(aTest, SAL_N_ELEMENTS(KA_VIRAMA_SSA), aLocale,
     631           1 :             i18n::CharacterIteratorMode::SKIPCELL, 1, nDone);
     632           1 :         CPPUNIT_ASSERT_MESSAGE("Should skip full grapheme", nPos == 0);
     633             :     }
     634             : 
     635             :     {
     636           1 :         const sal_Unicode KA_VOWELSIGNU[] = { 0x0B95, 0x0BC1 };
     637           1 :         OUString aTest(KA_VOWELSIGNU, SAL_N_ELEMENTS(KA_VOWELSIGNU));
     638             : 
     639           1 :         sal_Int32 nDone=0;
     640           1 :         sal_Int32 nPos = 0;
     641             : 
     642           1 :         nPos = m_xBreak->nextCharacters(aTest, 0, aLocale,
     643           1 :             i18n::CharacterIteratorMode::SKIPCELL, 1, nDone);
     644           1 :         CPPUNIT_ASSERT_MESSAGE("Should skip full grapheme", nPos == SAL_N_ELEMENTS(KA_VOWELSIGNU));
     645           1 :         nPos = m_xBreak->previousCharacters(aTest, SAL_N_ELEMENTS(KA_VOWELSIGNU), aLocale,
     646           1 :             i18n::CharacterIteratorMode::SKIPCELL, 1, nDone);
     647           1 :         CPPUNIT_ASSERT_MESSAGE("Should skip full grapheme", nPos == 0);
     648             :     }
     649             : 
     650             :     {
     651             :         const sal_Unicode CA_VOWELSIGNI_TA_VIRAMA_TA_VOWELSIGNI_RA_VOWELSIGNAI[] =
     652           1 :             { 0x0B9A, 0x0BBF, 0x0BA4, 0x0BCD, 0x0BA4, 0x0BBF, 0x0BB0, 0x0BC8 };
     653             :         OUString aTest(CA_VOWELSIGNI_TA_VIRAMA_TA_VOWELSIGNI_RA_VOWELSIGNAI,
     654           1 :             SAL_N_ELEMENTS(CA_VOWELSIGNI_TA_VIRAMA_TA_VOWELSIGNI_RA_VOWELSIGNAI));
     655             : 
     656           1 :         sal_Int32 nDone=0;
     657           1 :         sal_Int32 nPos=0;
     658             : 
     659           5 :         for (sal_Int32 i = 0; i < 4; ++i)
     660             :         {
     661           4 :             sal_Int32 nOldPos = nPos;
     662           4 :             nPos = m_xBreak->nextCharacters(aTest, nPos, aLocale,
     663           4 :                 i18n::CharacterIteratorMode::SKIPCELL, 1, nDone);
     664           4 :             CPPUNIT_ASSERT_MESSAGE("Should skip 2 units", nPos == nOldPos+2);
     665             :         }
     666             : 
     667           5 :         for (sal_Int32 i = 0; i < 4; ++i)
     668             :         {
     669           4 :             sal_Int32 nOldPos = nPos;
     670           4 :             nPos = m_xBreak->previousCharacters(aTest, nPos, aLocale,
     671           4 :                 i18n::CharacterIteratorMode::SKIPCELL, 1, nDone);
     672           4 :             CPPUNIT_ASSERT_MESSAGE("Should skip 2 units", nPos == nOldPos-2);
     673           1 :         }
     674             :     }
     675             : 
     676             :     {
     677           1 :         const sal_Unicode ALEF_QAMATS [] = { 0x05D0, 0x05B8 };
     678           1 :         OUString aText(ALEF_QAMATS, SAL_N_ELEMENTS(ALEF_QAMATS));
     679             : 
     680           1 :         sal_Int32 nGraphemeCount = 0;
     681             : 
     682           1 :         sal_Int32 nCurPos = 0;
     683           3 :         while (nCurPos < aText.getLength())
     684             :         {
     685           1 :             sal_Int32 nCount2 = 1;
     686           1 :             nCurPos = m_xBreak->nextCharacters(aText, nCurPos, lang::Locale(),
     687           1 :                 i18n::CharacterIteratorMode::SKIPCELL, nCount2, nCount2);
     688           1 :             ++nGraphemeCount;
     689             :         }
     690             : 
     691           1 :         CPPUNIT_ASSERT_MESSAGE("Should be considered 1 grapheme", nGraphemeCount == 1);
     692             :     }
     693             : 
     694           1 :     aLocale.Language = "hi";
     695           1 :     aLocale.Country = "IN";
     696             : 
     697             :     {
     698           1 :         const sal_Unicode SHA_VOWELSIGNII[] = { 0x936, 0x940 };
     699           1 :         OUString aTest(SHA_VOWELSIGNII, SAL_N_ELEMENTS(SHA_VOWELSIGNII));
     700             : 
     701           1 :         sal_Int32 nDone=0;
     702           1 :         sal_Int32 nPos = 0;
     703             : 
     704           1 :         nPos = m_xBreak->nextCharacters(aTest, 0, aLocale,
     705           1 :             i18n::CharacterIteratorMode::SKIPCELL, 1, nDone);
     706           1 :         CPPUNIT_ASSERT_MESSAGE("Should skip full grapheme", nPos == SAL_N_ELEMENTS(SHA_VOWELSIGNII));
     707           1 :         nPos = m_xBreak->previousCharacters(aTest, SAL_N_ELEMENTS(SHA_VOWELSIGNII), aLocale,
     708           1 :             i18n::CharacterIteratorMode::SKIPCELL, 1, nDone);
     709           1 :         CPPUNIT_ASSERT_MESSAGE("Should skip full grapheme", nPos == 0);
     710           1 :     }
     711           1 : }
     712             : 
     713             : //A test to ensure that certain ranges and codepoints that are categorized as
     714             : //weak remain as weak, so that existing docs that depend on this don't silently
     715             : //change font for those weak chars
     716           1 : void TestBreakIterator::testWeak()
     717             : {
     718           1 :     lang::Locale aLocale;
     719           1 :     aLocale.Language = "en";
     720           1 :     aLocale.Country = "US";
     721             : 
     722             :     {
     723             :         const sal_Unicode WEAKS[] =
     724             :         {
     725             :             0x0001, 0x0002,
     726             :             0x0020, 0x00A0,
     727             :             0x2150, 0x215F, //Number Forms, fractions
     728             :             0x2160, 0x2180, //Number Forms, roman numerals
     729             :             0x2200, 0x22FF, //Mathematical Operators
     730             :             0x27C0, 0x27EF, //Miscellaneous Mathematical Symbols-A
     731             :             0x2980, 0x29FF, //Miscellaneous Mathematical Symbols-B
     732             :             0x2A00, 0x2AFF, //Supplemental Mathematical Operators
     733             :             0x2100, 0x214F, //Letterlike Symbols
     734             :             0x2308, 0x230B, //Miscellaneous technical
     735             :             0x25A0, 0x25FF, //Geometric Shapes
     736             :             0x2B30, 0x2B4C  //Miscellaneous Symbols and Arrows
     737           1 :         };
     738           1 :         OUString aWeaks(WEAKS, SAL_N_ELEMENTS(WEAKS));
     739             : 
     740          25 :         for (sal_Int32 i = 0; i < aWeaks.getLength(); ++i)
     741             :         {
     742          24 :             sal_Int16 nScript = m_xBreak->getScriptType(aWeaks, i);
     743          24 :             OStringBuffer aMsg;
     744          24 :             aMsg.append("Char 0x");
     745          24 :             aMsg.append(static_cast<sal_Int32>(aWeaks[i]), 16);
     746          24 :             aMsg.append(" should have been weak");
     747          48 :             CPPUNIT_ASSERT_MESSAGE(aMsg.getStr(),
     748          24 :                 nScript == i18n::ScriptType::WEAK);
     749          25 :         }
     750           1 :     }
     751           1 : }
     752             : 
     753             : //A test to ensure that certain ranges and codepoints that are categorized as
     754             : //asian remain as asian, so that existing docs that depend on this don't silently
     755             : //change font for those asian chars.
     756             : //See https://bugs.libreoffice.org/show_bug.cgi?id=38095
     757           1 : void TestBreakIterator::testAsian()
     758             : {
     759           1 :     lang::Locale aLocale;
     760           1 :     aLocale.Language = "en";
     761           1 :     aLocale.Country = "US";
     762             : 
     763             :     {
     764             :         const sal_Unicode ASIANS[] =
     765             :         {
     766             :             //some typical CJK chars
     767             :             0x4E00, 0x62FF,
     768             :             //The full HalfWidth and FullWidth block has historically been
     769             :             //designated as taking the CJK font :-(
     770             :             //HalfWidth and FullWidth forms of ASCII 0-9, categorized under
     771             :             //UAX24 as "Common" i.e. by that logic WEAK
     772             :             0xFF10, 0xFF19,
     773             :             //HalfWidth and FullWidth forms of ASCII A-z, categorized under
     774             :             //UAX25 as "Latin", i.e. by that logic LATIN
     775             :             0xFF21, 0xFF5A
     776           1 :         };
     777           1 :         OUString aAsians(ASIANS, SAL_N_ELEMENTS(ASIANS));
     778             : 
     779           7 :         for (sal_Int32 i = 0; i < aAsians.getLength(); ++i)
     780             :         {
     781           6 :             sal_Int16 nScript = m_xBreak->getScriptType(aAsians, i);
     782           6 :             OStringBuffer aMsg;
     783           6 :             aMsg.append("Char 0x");
     784           6 :             aMsg.append(static_cast<sal_Int32>(aAsians[i]), 16);
     785           6 :             aMsg.append(" should have been asian");
     786          12 :             CPPUNIT_ASSERT_MESSAGE(aMsg.getStr(),
     787           6 :                 nScript == i18n::ScriptType::ASIAN);
     788           7 :         }
     789           1 :     }
     790           1 : }
     791             : 
     792             : //A test to ensure that our Lao word boundary detection is useful
     793           0 : void TestBreakIterator::testLao()
     794             : {
     795           0 :     lang::Locale aLocale;
     796           0 :     aLocale.Language = "lo";
     797           0 :     aLocale.Country = "LA";
     798             : 
     799           0 :     const sal_Unicode LAO[] = { 0x0e8d, 0x0eb4, 0x0e99, 0x0e94, 0x0eb5, 0x0e95, 0x0ec9, 0x0ead, 0x0e99, 0x0eae, 0x0eb1, 0x0e9a };
     800           0 :     OUString aTest(LAO, SAL_N_ELEMENTS(LAO));
     801           0 :     i18n::Boundary aBounds = m_xBreak->getWordBoundary(aTest, 0, aLocale,
     802           0 :         i18n::WordType::DICTIONARY_WORD, true);
     803             : 
     804           0 :     CPPUNIT_ASSERT(aBounds.startPos == 0 && aBounds.endPos == 5);
     805             : 
     806           0 :     aBounds = m_xBreak->getWordBoundary(aTest, aBounds.endPos, aLocale,
     807           0 :         i18n::WordType::DICTIONARY_WORD, true);
     808             : 
     809           0 :     CPPUNIT_ASSERT(aBounds.startPos == 5 && aBounds.endPos == 9);
     810             : 
     811           0 : }
     812             : 
     813             : //A test to ensure that our thai word boundary detection is useful
     814           1 : void TestBreakIterator::testThai()
     815             : {
     816           1 :     lang::Locale aLocale;
     817           1 :     aLocale.Language = "th";
     818           1 :     aLocale.Country = "TH";
     819             : 
     820             :     //See http://lists.freedesktop.org/archives/libreoffice/2012-February/025959.html
     821             :     {
     822           1 :         const sal_Unicode THAI[] = { 0x0E01, 0x0E38, 0x0E2B, 0x0E25, 0x0E32, 0x0E1A };
     823           1 :         OUString aTest(THAI, SAL_N_ELEMENTS(THAI));
     824           1 :         i18n::Boundary aBounds = m_xBreak->getWordBoundary(aTest, 0, aLocale,
     825           1 :             i18n::WordType::DICTIONARY_WORD, true);
     826           2 :         CPPUNIT_ASSERT_MESSAGE("Should skip full word",
     827           2 :             aBounds.startPos == 0 && aBounds.endPos == aTest.getLength());
     828             :     }
     829             : 
     830             :     //See https://issues.apache.org/ooo/show_bug.cgi?id=29548
     831             :     //make sure forwards and back are consistent
     832             :     {
     833             :         const sal_Unicode THAI[] =
     834             :         {
     835             :             0x0E2D, 0x0E38, 0x0E17, 0x0E22, 0x0E32, 0x0E19, 0x0E41,
     836             :             0x0E2B, 0x0E48, 0x0E07, 0x0E0A, 0x0E32, 0x0E15, 0x0E34,
     837             :             0x0E19, 0x0E49, 0x0E33, 0x0E2B, 0x0E19, 0x0E32, 0x0E27,
     838             :             0x0E2D, 0x0E38, 0x0E17, 0x0E22, 0x0E32, 0x0E19, 0x0E41,
     839             :             0x0E2B, 0x0E48, 0x0E07, 0x0E0A, 0x0E32, 0x0E15, 0x0E34,
     840             :             0x0E19, 0x0E49, 0x0E33, 0x0E2B, 0x0E19, 0x0E32, 0x0E27
     841           1 :         };
     842           1 :         OUString aTest(THAI, SAL_N_ELEMENTS(THAI));
     843             : 
     844           2 :         std::stack<sal_Int32> aPositions;
     845           1 :         sal_Int32 nPos = -1;
     846          11 :         do
     847             :         {
     848          11 :             nPos = m_xBreak->nextWord(aTest, nPos, aLocale, i18n::WordType::ANYWORD_IGNOREWHITESPACES).startPos;
     849          11 :             aPositions.push(nPos);
     850             :         }
     851          11 :         while (nPos < aTest.getLength());
     852           1 :         nPos = aTest.getLength();
     853           1 :         CPPUNIT_ASSERT(!aPositions.empty());
     854           1 :         aPositions.pop();
     855          10 :         do
     856             :         {
     857          10 :             CPPUNIT_ASSERT(!aPositions.empty());
     858          10 :             nPos = m_xBreak->previousWord(aTest, nPos, aLocale, i18n::WordType::ANYWORD_IGNOREWHITESPACES).startPos;
     859          10 :             CPPUNIT_ASSERT(nPos == aPositions.top());
     860          10 :             aPositions.pop();
     861             :         }
     862          11 :         while (nPos > 0);
     863           1 :     }
     864           1 : }
     865             : 
     866             : #ifdef TODO
     867             : void TestBreakIterator::testNorthernThai()
     868             : {
     869             :     lang::Locale aLocale;
     870             :     aLocale.Language = "nod";
     871             :     aLocale.Country = "TH";
     872             : 
     873             :     const sal_Unicode NORTHERN_THAI1[] = { 0x0E01, 0x0E38, 0x0E4A, 0x0E2B, 0x0E25, 0x0E32, 0x0E1A };
     874             :     OUString aTest(NORTHERN_THAI1, SAL_N_ELEMENTS(NORTHERN_THAI1));
     875             :     i18n::Boundary aBounds = m_xBreak->getWordBoundary(aTest, 0, aLocale,
     876             :         i18n::WordType::DICTIONARY_WORD, true);
     877             :     CPPUNIT_ASSERT_MESSAGE("Should skip full word",
     878             :         aBounds.startPos == 0 && aBounds.endPos == aTest.getLength());
     879             : }
     880             : #endif
     881             : 
     882             : #if (U_ICU_VERSION_MAJOR_NUM > 4)
     883             : // Not sure if any version earlier than 49 did have Khmer word boundary
     884             : // dictionaries, 4.6 does not.
     885             : 
     886             : //A test to ensure that our khmer word boundary detection is useful
     887             : //https://bugs.libreoffice.org/show_bug.cgi?id=52020
     888           1 : void TestBreakIterator::testKhmer()
     889             : {
     890           1 :     lang::Locale aLocale;
     891           1 :     aLocale.Language = "km";
     892           1 :     aLocale.Country = "KH";
     893             : 
     894           1 :     const sal_Unicode KHMER[] = { 0x17B2, 0x17D2, 0x1799, 0x1782, 0x17C1 };
     895             : 
     896           2 :     OUString aTest(KHMER, SAL_N_ELEMENTS(KHMER));
     897           1 :     i18n::Boundary aBounds = m_xBreak->getWordBoundary(aTest, 0, aLocale,
     898           1 :         i18n::WordType::DICTIONARY_WORD, true);
     899             : 
     900           1 :     CPPUNIT_ASSERT(aBounds.startPos == 0 && aBounds.endPos == 3);
     901             : 
     902           1 :     aBounds = m_xBreak->getWordBoundary(aTest, aBounds.endPos, aLocale,
     903           1 :         i18n::WordType::DICTIONARY_WORD, true);
     904             : 
     905           2 :     CPPUNIT_ASSERT(aBounds.startPos == 3 && aBounds.endPos == 5);
     906           1 : }
     907             : #endif
     908             : 
     909           1 : void TestBreakIterator::testJapanese()
     910             : {
     911           1 :     lang::Locale aLocale;
     912           1 :     aLocale.Language = "ja";
     913           1 :     aLocale.Country = "JP";
     914           1 :     i18n::Boundary aBounds;
     915             : 
     916             :     {
     917           1 :         const sal_Unicode JAPANESE[] = { 0x30B7, 0x30E3, 0x30C3, 0x30C8, 0x30C0, 0x30A6, 0x30F3 };
     918             : 
     919           1 :         OUString aTest(JAPANESE, SAL_N_ELEMENTS(JAPANESE));
     920           1 :         aBounds = m_xBreak->getWordBoundary(aTest, 5, aLocale,
     921           1 :             i18n::WordType::DICTIONARY_WORD, true);
     922             : 
     923           1 :         CPPUNIT_ASSERT(aBounds.startPos == 0 && aBounds.endPos == 7);
     924             :     }
     925             : 
     926             :     {
     927           1 :         const sal_Unicode JAPANESE[] = { 0x9EBB, 0x306E, 0x8449, 0x9EBB, 0x306E, 0x8449 };
     928             : 
     929           1 :         OUString aTest(JAPANESE, SAL_N_ELEMENTS(JAPANESE));
     930           1 :         aBounds = m_xBreak->getWordBoundary(aTest, 1, aLocale,
     931           1 :             i18n::WordType::DICTIONARY_WORD, true);
     932             : 
     933           1 :         CPPUNIT_ASSERT(aBounds.startPos == 0 && aBounds.endPos == 3);
     934             : 
     935           1 :         aBounds = m_xBreak->getWordBoundary(aTest, 5, aLocale,
     936           1 :             i18n::WordType::DICTIONARY_WORD, true);
     937             : 
     938           1 :         CPPUNIT_ASSERT(aBounds.startPos == 3 && aBounds.endPos == 6);
     939           1 :     }
     940           1 : }
     941             : 
     942           1 : void TestBreakIterator::testChinese()
     943             : {
     944           1 :     lang::Locale aLocale;
     945           1 :     aLocale.Language = "zh";
     946           1 :     aLocale.Country = "CN";
     947           1 :     i18n::Boundary aBounds;
     948             : 
     949             :     {
     950           1 :         const sal_Unicode CHINESE[] = { 0x6A35, 0x6A30, 0x69FE, 0x8919, 0xD867, 0xDEDB  };
     951             : 
     952           1 :         OUString aTest(CHINESE, SAL_N_ELEMENTS(CHINESE));
     953           1 :         aBounds = m_xBreak->getWordBoundary(aTest, 4, aLocale,
     954           1 :             i18n::WordType::DICTIONARY_WORD, true);
     955           1 :         CPPUNIT_ASSERT(aBounds.startPos == 4 && aBounds.endPos == 6);
     956           1 :     }
     957           1 : }
     958           9 : void TestBreakIterator::setUp()
     959             : {
     960           9 :     BootstrapFixtureBase::setUp();
     961          27 :     m_xBreak = uno::Reference< i18n::XBreakIterator >(m_xSFactory->createInstance(
     962          18 :         "com.sun.star.i18n.BreakIterator"), uno::UNO_QUERY_THROW);
     963           9 : }
     964             : 
     965           9 : void TestBreakIterator::tearDown()
     966             : {
     967           9 :     m_xBreak.clear();
     968           9 :     BootstrapFixtureBase::tearDown();
     969           9 : }
     970             : 
     971           1 : CPPUNIT_TEST_SUITE_REGISTRATION(TestBreakIterator);
     972             : 
     973           4 : CPPUNIT_PLUGIN_IMPLEMENT();
     974             : 
     975             : /* vim:set shiftwidth=4 softtabstop=4 expandtab: */

Generated by: LCOV version 1.10