LCOV - code coverage report
Current view: top level - i18npool/qa/cppunit - test_breakiterator.cxx (source / functions) Hit Total Coverage
Test: commit 10e77ab3ff6f4314137acd6e2702a6e5c1ce1fae Lines: 517 521 99.2 %
Date: 2014-11-03 Functions: 22 23 95.7 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
       2             : /*
       3             :  * This file is part of the LibreOffice project.
       4             :  *
       5             :  * This Source Code Form is subject to the terms of the Mozilla Public
       6             :  * License, v. 2.0. If a copy of the MPL was not distributed with this
       7             :  * file, You can obtain one at http://mozilla.org/MPL/2.0/.
       8             :  */
       9             : 
      10             : #include <cppuhelper/compbase1.hxx>
      11             : #include <cppuhelper/bootstrap.hxx>
      12             : #include <cppuhelper/basemutex.hxx>
      13             : #include <com/sun/star/i18n/XBreakIterator.hpp>
      14             : #include <com/sun/star/i18n/CharacterIteratorMode.hpp>
      15             : #include <com/sun/star/i18n/ScriptType.hpp>
      16             : #include <com/sun/star/i18n/WordType.hpp>
      17             : #include <unotest/bootstrapfixturebase.hxx>
      18             : 
      19             : #include <unicode/uversion.h>
      20             : 
      21             : #include <rtl/strbuf.hxx>
      22             : #include <rtl/ustrbuf.hxx>
      23             : 
      24             : #include <string.h>
      25             : 
      26             : #include <stack>
      27             : 
      28             : using namespace ::com::sun::star;
      29             : 
      30          60 : class TestBreakIterator : public test::BootstrapFixtureBase
      31             : {
      32             : public:
      33             :     virtual void setUp() SAL_OVERRIDE;
      34             :     virtual void tearDown() SAL_OVERRIDE;
      35             : 
      36             :     void testLineBreaking();
      37             :     void testWordBoundaries();
      38             :     void testGraphemeIteration();
      39             :     void testWeak();
      40             :     void testAsian();
      41             :     void testThai();
      42             :     void testLao();
      43             : #ifdef TODO
      44             :     void testNorthernThai();
      45             : #endif
      46             :     void testKhmer();
      47             :     void testJapanese();
      48             :     void testChinese();
      49           4 :     CPPUNIT_TEST_SUITE(TestBreakIterator);
      50           2 :     CPPUNIT_TEST(testLineBreaking);
      51           2 :     CPPUNIT_TEST(testGraphemeIteration);
      52           2 :     CPPUNIT_TEST(testWeak);
      53           2 :     CPPUNIT_TEST(testAsian);
      54           2 :     CPPUNIT_TEST(testThai);
      55             : #ifdef TODO
      56             :     CPPUNIT_TEST(testNorthernThai);
      57             : #endif
      58             : 
      59           2 :     CPPUNIT_TEST(testWordBoundaries);
      60             : #if (U_ICU_VERSION_MAJOR_NUM > 4)
      61           2 :     CPPUNIT_TEST(testKhmer);
      62             : #endif
      63             : #if (U_ICU_VERSION_MAJOR_NUM > 51)
      64           2 :     CPPUNIT_TEST(testLao);
      65             : #endif
      66           2 :     CPPUNIT_TEST(testJapanese);
      67           2 :     CPPUNIT_TEST(testChinese);
      68           4 :     CPPUNIT_TEST_SUITE_END();
      69             : private:
      70             :     uno::Reference<i18n::XBreakIterator> m_xBreak;
      71             :     void doTestJapanese(uno::Reference< i18n::XBreakIterator > &xBreak);
      72             : };
      73             : 
      74           2 : void TestBreakIterator::testLineBreaking()
      75             : {
      76           2 :     i18n::LineBreakHyphenationOptions aHyphOptions;
      77           4 :     i18n::LineBreakUserOptions aUserOptions;
      78           4 :     lang::Locale aLocale;
      79             : 
      80             :     //See https://bugs.libreoffice.org/show_bug.cgi?id=31271
      81             :     {
      82           2 :         OUString aTest("(some text here)");
      83             : 
      84           2 :         aLocale.Language = "en";
      85           2 :         aLocale.Country = "US";
      86             : 
      87             :         {
      88             :             //Here we want the line break to leave text here) on the next line
      89           2 :             i18n::LineBreakResults aResult = m_xBreak->getLineBreak(aTest, strlen("(some tex"), aLocale, 0, aHyphOptions, aUserOptions);
      90           2 :             CPPUNIT_ASSERT_MESSAGE("Expected a break at the start of the word", aResult.breakIndex == 6);
      91             :         }
      92             : 
      93             :         {
      94             :             //Here we want the line break to leave "here)" on the next line
      95           2 :             i18n::LineBreakResults aResult = m_xBreak->getLineBreak(aTest, strlen("(some text here"), aLocale, 0, aHyphOptions, aUserOptions);
      96           2 :             CPPUNIT_ASSERT_MESSAGE("Expected a break at the start of the word", aResult.breakIndex == 11);
      97           2 :         }
      98             :     }
      99             : 
     100             :     //See https://bugs.libreoffice.org/show_bug.cgi?id=49849
     101             :     {
     102           2 :         const sal_Unicode HEBREW1[] = { 0x05DE, 0x05D9, 0x05DC, 0x05D9, 0x5DD };
     103           2 :         OUString aWord(HEBREW1, SAL_N_ELEMENTS(HEBREW1));
     104           4 :         OUString aTest(OUStringBuffer(aWord).append(' ').append(aWord).makeStringAndClear());
     105             : 
     106           2 :         aLocale.Language = "he";
     107           2 :         aLocale.Country = "IL";
     108             : 
     109             :         {
     110             :             //Here we want the line break to happen at the whitespace
     111           2 :             i18n::LineBreakResults aResult = m_xBreak->getLineBreak(aTest, aTest.getLength()-1, aLocale, 0, aHyphOptions, aUserOptions);
     112           2 :             CPPUNIT_ASSERT_MESSAGE("Expected a break at the start of the word", aResult.breakIndex == aWord.getLength()+1);
     113           2 :         }
     114             :     }
     115             : 
     116             :     //See https://issues.apache.org/ooo/show_bug.cgi?id=17155
     117             :     {
     118           2 :         OUString aTest("foo /bar/baz");
     119             : 
     120           2 :         aLocale.Language = "en";
     121           2 :         aLocale.Country = "US";
     122             : 
     123             :         {
     124             :             //Here we want the line break to leave /bar/ba clumped together on the next line
     125           2 :             i18n::LineBreakResults aResult = m_xBreak->getLineBreak(aTest, strlen("foo /bar/ba"), aLocale, 0,
     126           2 :                 aHyphOptions, aUserOptions);
     127           2 :             CPPUNIT_ASSERT_MESSAGE("Expected a break at the first slash", aResult.breakIndex == 4);
     128           2 :         }
     129             :     }
     130             : 
     131             :     //See https://issues.apache.org/ooo/show_bug.cgi?id=19716
     132             :     {
     133           2 :         OUString aTest("aaa]aaa");
     134             : 
     135           2 :         aLocale.Language = "en";
     136           2 :         aLocale.Country = "US";
     137             : 
     138             :         {
     139             :             //Here we want the line break to move the whole lot to the next line
     140           4 :             i18n::LineBreakResults aResult = m_xBreak->getLineBreak(aTest, aTest.getLength()-2, aLocale, 0,
     141           4 :                 aHyphOptions, aUserOptions);
     142           2 :             CPPUNIT_ASSERT_MESSAGE("Expected a break at the start of the line, not at ]", aResult.breakIndex == 0);
     143           2 :         }
     144           2 :     }
     145           2 : }
     146             : 
     147             : //See https://bugs.libreoffice.org/show_bug.cgi?id=49629
     148           2 : void TestBreakIterator::testWordBoundaries()
     149             : {
     150           2 :     lang::Locale aLocale;
     151           2 :     aLocale.Language = "en";
     152           2 :     aLocale.Country = "US";
     153             : 
     154           2 :     i18n::Boundary aBounds;
     155             : 
     156             :     //See https://issues.apache.org/ooo/show_bug.cgi?id=11993
     157             :     {
     158           2 :         OUString aTest("abcd ef  ghi??? KLM");
     159             : 
     160           2 :         CPPUNIT_ASSERT(!m_xBreak->isBeginWord(aTest, 4, aLocale, i18n::WordType::DICTIONARY_WORD));
     161           2 :         CPPUNIT_ASSERT(m_xBreak->isEndWord(aTest, 4, aLocale, i18n::WordType::DICTIONARY_WORD));
     162           2 :         aBounds = m_xBreak->getWordBoundary(aTest, 4, aLocale, i18n::WordType::DICTIONARY_WORD, true);
     163           2 :         CPPUNIT_ASSERT(aBounds.startPos == 0 && aBounds.endPos == 4);
     164             : 
     165           2 :         CPPUNIT_ASSERT(!m_xBreak->isBeginWord(aTest, 8, aLocale, i18n::WordType::DICTIONARY_WORD));
     166           2 :         CPPUNIT_ASSERT(!m_xBreak->isEndWord(aTest, 8, aLocale, i18n::WordType::DICTIONARY_WORD));
     167             : 
     168             :         //next word
     169           2 :         aBounds = m_xBreak->getWordBoundary(aTest, 8, aLocale, i18n::WordType::DICTIONARY_WORD, true);
     170           2 :         CPPUNIT_ASSERT(aBounds.startPos == 9 && aBounds.endPos == 12);
     171             : 
     172             :         //previous word
     173           2 :         aBounds = m_xBreak->getWordBoundary(aTest, 8, aLocale, i18n::WordType::DICTIONARY_WORD, false);
     174           2 :         CPPUNIT_ASSERT(aBounds.startPos == 5 && aBounds.endPos == 7);
     175             : 
     176           2 :         CPPUNIT_ASSERT(!m_xBreak->isBeginWord(aTest, 12, aLocale, i18n::WordType::DICTIONARY_WORD));
     177           2 :         CPPUNIT_ASSERT(m_xBreak->isEndWord(aTest, 12, aLocale, i18n::WordType::DICTIONARY_WORD));
     178           2 :         aBounds = m_xBreak->getWordBoundary(aTest, 12, aLocale, i18n::WordType::DICTIONARY_WORD, true);
     179           2 :         CPPUNIT_ASSERT(aBounds.startPos == 9 && aBounds.endPos == 12);
     180             : 
     181           2 :         CPPUNIT_ASSERT(m_xBreak->isBeginWord(aTest, 16, aLocale, i18n::WordType::DICTIONARY_WORD));
     182           2 :         CPPUNIT_ASSERT(!m_xBreak->isEndWord(aTest, 16, aLocale, i18n::WordType::DICTIONARY_WORD));
     183           2 :         aBounds = m_xBreak->getWordBoundary(aTest, 16, aLocale, i18n::WordType::DICTIONARY_WORD, true);
     184           2 :         CPPUNIT_ASSERT(aBounds.startPos == 16 && aBounds.endPos == 19);
     185             :     }
     186             : 
     187             :     //See https://issues.apache.org/ooo/show_bug.cgi?id=21907
     188             :     {
     189           2 :         OUString aTest("b a?");
     190             : 
     191           2 :         CPPUNIT_ASSERT(m_xBreak->isBeginWord(aTest, 1, aLocale, i18n::WordType::ANY_WORD));
     192           2 :         CPPUNIT_ASSERT(m_xBreak->isBeginWord(aTest, 2, aLocale, i18n::WordType::ANY_WORD));
     193           2 :         CPPUNIT_ASSERT(m_xBreak->isBeginWord(aTest, 3, aLocale, i18n::WordType::ANY_WORD));
     194             : 
     195           2 :         CPPUNIT_ASSERT(m_xBreak->isBeginWord(aTest, 3, aLocale, i18n::WordType::ANYWORD_IGNOREWHITESPACES));
     196             : 
     197           2 :         CPPUNIT_ASSERT(m_xBreak->isEndWord(aTest, 1, aLocale, i18n::WordType::ANY_WORD));
     198           2 :         CPPUNIT_ASSERT(m_xBreak->isEndWord(aTest, 2, aLocale, i18n::WordType::ANY_WORD));
     199           2 :         CPPUNIT_ASSERT(m_xBreak->isEndWord(aTest, 3, aLocale, i18n::WordType::ANY_WORD));
     200             : 
     201           2 :         CPPUNIT_ASSERT(m_xBreak->isEndWord(aTest, 3, aLocale, i18n::WordType::ANYWORD_IGNOREWHITESPACES));
     202             :     }
     203             : 
     204             :     //See https://issues.apache.org/ooo/show_bug.cgi?id=14904
     205             :     {
     206             :         const sal_Unicode TEST[] =
     207             :         {
     208             :             'W', 'o', 'r', 'k', 'i', 'n', 'g', ' ', 0x201C, 'W', 'o', 'r', 'd', 's',
     209             :             ' ', 's', 't', 'a', 'r', 't', 'i', 'n', 'g', ' ', 'w', 'i', 't',
     210             :             'h', ' ', 'q', 'u', 'o', 't', 'e', 's', 0x201D, ' ', 'W', 'o', 'r', 'k',
     211             :             'i', 'n', 'g', ' ', 0x2018, 'B', 'r', 'o', 'k', 'e', 'n', 0x2019, ' ',
     212             :             '?', 'S', 'p', 'a', 'n', 'i', 's', 'h', '?', ' ', 'd', 'o', 'e',
     213             :             's', 'n', 0x2019, 't', ' ', 'w', 'o', 'r', 'k', '.', ' ', 'N', 'o',
     214             :             't', ' ', 'e', 'v', 'e', 'n', ' ' , 0x00BF, 'r', 'e', 'a', 'l', '?', ' ',
     215             :             'S', 'p', 'a', 'n', 'i', 's', 'h'
     216           2 :         };
     217           2 :         OUString aTest(TEST, SAL_N_ELEMENTS(TEST));
     218             : 
     219           2 :         aBounds = m_xBreak->getWordBoundary(aTest, 4, aLocale, i18n::WordType::DICTIONARY_WORD, false);
     220           2 :         CPPUNIT_ASSERT(aBounds.startPos == 0 && aBounds.endPos == 7);
     221             : 
     222           2 :         aBounds = m_xBreak->getWordBoundary(aTest, 12, aLocale, i18n::WordType::DICTIONARY_WORD, false);
     223           2 :         CPPUNIT_ASSERT(aBounds.startPos == 9 && aBounds.endPos == 14);
     224             : 
     225           2 :         aBounds = m_xBreak->getWordBoundary(aTest, 40, aLocale, i18n::WordType::DICTIONARY_WORD, false);
     226           2 :         CPPUNIT_ASSERT(aBounds.startPos == 37 && aBounds.endPos == 44);
     227             : 
     228           2 :         aBounds = m_xBreak->getWordBoundary(aTest, 49, aLocale, i18n::WordType::DICTIONARY_WORD, false);
     229           2 :         CPPUNIT_ASSERT(aBounds.startPos == 46 && aBounds.endPos == 52);
     230             : 
     231           2 :         aBounds = m_xBreak->getWordBoundary(aTest, 58, aLocale, i18n::WordType::DICTIONARY_WORD, false);
     232           2 :         CPPUNIT_ASSERT(aBounds.startPos == 55 && aBounds.endPos == 62);
     233             : 
     234           2 :         aBounds = m_xBreak->getWordBoundary(aTest, 67, aLocale, i18n::WordType::DICTIONARY_WORD, false);
     235           2 :         CPPUNIT_ASSERT(aBounds.startPos == 64 && aBounds.endPos == 71);
     236             : 
     237           2 :         aBounds = m_xBreak->getWordBoundary(aTest, 90, aLocale, i18n::WordType::DICTIONARY_WORD, false);
     238           2 :         CPPUNIT_ASSERT(aBounds.startPos == 88 && aBounds.endPos == 92);
     239             :     }
     240             : 
     241             :     //See https://bugs.libreoffice.org/show_bug.cgi?id=49629
     242           2 :     sal_Unicode aBreakTests[] = { ' ', 1, 2, 3, 4, 5, 6, 7, 0x91, 0x92, 0x200B, 0xE8FF, 0xF8FF };
     243          10 :     for (int mode = i18n::WordType::ANY_WORD; mode <= i18n::WordType::WORD_COUNT; ++mode)
     244             :     {
     245             :         //make sure that in all cases isBeginWord and isEndWord matches getWordBoundary
     246         112 :         for (size_t i = 0; i < SAL_N_ELEMENTS(aBreakTests); ++i)
     247             :         {
     248             : #if (U_ICU_VERSION_MAJOR_NUM == 4) && (U_ICU_VERSION_MINOR_NUM <= 2)
     249             :             //Note the breakiterator test is known to fail on older icu
     250             :             //versions (4.2.1) for the 200B (ZWSP) Zero Width Space testcase.
     251             :             if (aBreakTests[i] == 0x200B)
     252             :                 continue;
     253             : #endif
     254         104 :             OUString aTest = "Word" + OUString(aBreakTests[i]) + "Word";
     255         104 :             aBounds = m_xBreak->getWordBoundary(aTest, 0, aLocale, mode, true);
     256         104 :             switch (mode)
     257             :             {
     258             :                 case i18n::WordType::ANY_WORD:
     259          26 :                     CPPUNIT_ASSERT(aBounds.startPos == 0 && aBounds.endPos == 4);
     260          26 :                     break;
     261             :                 case i18n::WordType::ANYWORD_IGNOREWHITESPACES:
     262          26 :                     CPPUNIT_ASSERT(aBounds.startPos == 0 && aBounds.endPos == 4);
     263          26 :                     break;
     264             :                 case i18n::WordType::DICTIONARY_WORD:
     265          26 :                     CPPUNIT_ASSERT(aBounds.startPos == 0 && aBounds.endPos == 4);
     266          26 :                     break;
     267             :                 case i18n::WordType::WORD_COUNT:
     268          26 :                     CPPUNIT_ASSERT(aBounds.startPos == 0 && aBounds.endPos == 4);
     269          26 :                     break;
     270             :             }
     271             : 
     272         104 :             CPPUNIT_ASSERT(m_xBreak->isBeginWord(aTest, aBounds.startPos, aLocale, mode));
     273         104 :             CPPUNIT_ASSERT(m_xBreak->isEndWord(aTest, aBounds.endPos, aLocale, mode));
     274         104 :         }
     275             :     }
     276             : 
     277           2 :     sal_Unicode aJoinTests[] = { 'X', 0x200C, 0x200D, 0x2060, 0xFEFF, 0xFFF9, 0xFFFA, 0xFFFB };
     278          10 :     for (int mode = i18n::WordType::ANY_WORD; mode <= i18n::WordType::WORD_COUNT; ++mode)
     279             :     {
     280             :         //make sure that in all cases isBeginWord and isEndWord matches getWordBoundary
     281          72 :         for (size_t i = 0; i < SAL_N_ELEMENTS(aJoinTests); ++i)
     282             :         {
     283          64 :             OUString aTest = "Word" + OUString(aJoinTests[i]) + "Word";
     284          64 :             aBounds = m_xBreak->getWordBoundary(aTest, 0, aLocale, mode, true);
     285          64 :             switch (mode)
     286             :             {
     287             :                 case i18n::WordType::ANY_WORD:
     288          16 :                     CPPUNIT_ASSERT(aBounds.startPos == 0 && aBounds.endPos == 9);
     289          16 :                     break;
     290             :                 case i18n::WordType::ANYWORD_IGNOREWHITESPACES:
     291          16 :                     CPPUNIT_ASSERT(aBounds.startPos == 0 && aBounds.endPos == 9);
     292          16 :                     break;
     293             :                 case i18n::WordType::DICTIONARY_WORD:
     294          16 :                     CPPUNIT_ASSERT(aBounds.startPos == 0 && aBounds.endPos == 9);
     295          16 :                     break;
     296             :                 case i18n::WordType::WORD_COUNT:
     297          16 :                     CPPUNIT_ASSERT(aBounds.startPos == 0 && aBounds.endPos == 9);
     298          16 :                     break;
     299             :             }
     300             : 
     301          64 :             CPPUNIT_ASSERT(m_xBreak->isBeginWord(aTest, aBounds.startPos, aLocale, mode));
     302          64 :             CPPUNIT_ASSERT(m_xBreak->isEndWord(aTest, aBounds.endPos, aLocale, mode));
     303          64 :         }
     304             :     }
     305             : 
     306             :     //See https://issues.apache.org/ooo/show_bug.cgi?id=13494
     307             :     {
     308           2 :         const OUString aBase("xxAAxxBBxxCCxx");
     309             :         const sal_Unicode aTests[] =
     310             :         {
     311             :             '\'', ';', ',', '.', '!', '@', '#', '%', '&', '*',
     312             :             '(', ')', '_', '-', '{', '}', '[', ']', '\"', '/',
     313             :             '\\', '?', '~', '$', '+', '^', '=', '<', '>', '|'
     314           2 :         };
     315             : 
     316           2 :         const sal_Int32 aDoublePositions[] = {0, 2, 4, 6, 8, 10, 12, 14};
     317          62 :         for (size_t j = 0; j < SAL_N_ELEMENTS(aTests); ++j)
     318             :         {
     319          60 :             OUString aTest = aBase.replace('x', aTests[j]);
     320          60 :             sal_Int32 nPos = -1;
     321          60 :             size_t i = 0;
     322         480 :             do
     323             :             {
     324         480 :                 CPPUNIT_ASSERT(i < SAL_N_ELEMENTS(aDoublePositions));
     325         480 :                 nPos = m_xBreak->nextWord(aTest, nPos, aLocale, i18n::WordType::ANYWORD_IGNOREWHITESPACES).startPos;
     326         480 :                 CPPUNIT_ASSERT(nPos == aDoublePositions[i++]);
     327             :             }
     328         480 :             while (nPos < aTest.getLength());
     329          60 :             nPos = aTest.getLength();
     330          60 :             i = SAL_N_ELEMENTS(aDoublePositions)-1;
     331         420 :             do
     332             :             {
     333         420 :                 nPos = m_xBreak->previousWord(aTest, nPos, aLocale, i18n::WordType::ANYWORD_IGNOREWHITESPACES).startPos;
     334         420 :                 CPPUNIT_ASSERT(nPos == aDoublePositions[--i]);
     335             :             }
     336             :             while (nPos > 0);
     337          60 :         }
     338             : 
     339           2 :         const sal_Int32 aSinglePositions[] = {0, 1, 3, 4, 6, 7, 9, 10};
     340          60 :         for (size_t j = 1; j < SAL_N_ELEMENTS(aTests); ++j)
     341             :         {
     342          58 :             OUString aTest = aBase.replaceAll(OUString("xx"), OUString(aTests[j]));
     343          58 :             sal_Int32 nPos = -1;
     344          58 :             size_t i = 0;
     345         464 :             do
     346             :             {
     347         464 :                 CPPUNIT_ASSERT(i < SAL_N_ELEMENTS(aSinglePositions));
     348         464 :                 nPos = m_xBreak->nextWord(aTest, nPos, aLocale, i18n::WordType::ANYWORD_IGNOREWHITESPACES).startPos;
     349         464 :                 CPPUNIT_ASSERT(nPos == aSinglePositions[i++]);
     350             :             }
     351         464 :             while (nPos < aTest.getLength());
     352          58 :             nPos = aTest.getLength();
     353          58 :             i = SAL_N_ELEMENTS(aSinglePositions)-1;
     354         406 :             do
     355             :             {
     356         406 :                 nPos = m_xBreak->previousWord(aTest, nPos, aLocale, i18n::WordType::ANYWORD_IGNOREWHITESPACES).startPos;
     357         406 :                 CPPUNIT_ASSERT(nPos == aSinglePositions[--i]);
     358             :             }
     359             :             while (nPos > 0);
     360          58 :         }
     361             : 
     362           2 :         const sal_Int32 aSingleQuotePositions[] = {0, 1, 9, 10};
     363           2 :         CPPUNIT_ASSERT(aTests[0] == '\'');
     364             :         {
     365           2 :             OUString aTest = aBase.replaceAll(OUString("xx"), OUString(aTests[0]));
     366           2 :             sal_Int32 nPos = -1;
     367           2 :             size_t i = 0;
     368           8 :             do
     369             :             {
     370           8 :                 CPPUNIT_ASSERT(i < SAL_N_ELEMENTS(aSingleQuotePositions));
     371           8 :                 nPos = m_xBreak->nextWord(aTest, nPos, aLocale, i18n::WordType::ANYWORD_IGNOREWHITESPACES).startPos;
     372           8 :                 CPPUNIT_ASSERT(nPos == aSingleQuotePositions[i++]);
     373             :             }
     374           8 :             while (nPos < aTest.getLength());
     375           2 :             nPos = aTest.getLength();
     376           2 :             i = SAL_N_ELEMENTS(aSingleQuotePositions)-1;
     377           6 :             do
     378             :             {
     379           6 :                 nPos = m_xBreak->previousWord(aTest, nPos, aLocale, i18n::WordType::ANYWORD_IGNOREWHITESPACES).startPos;
     380           6 :                 CPPUNIT_ASSERT(nPos == aSingleQuotePositions[--i]);
     381             :             }
     382           2 :             while (nPos > 0);
     383           2 :         }
     384             :     }
     385             : 
     386             :     //See https://issues.apache.org/ooo/show_bug.cgi?id=13451
     387             :     {
     388           2 :         aLocale.Language = "ca";
     389           2 :         aLocale.Country = "ES";
     390             : 
     391           2 :         OUString aTest("mirar-se comprar-vos donem-nos les mans aneu-vos-en!");
     392             : 
     393           2 :         sal_Int32 nPos = 0;
     394           2 :         sal_Int32 aExpected[] = {8, 20, 30, 34, 39, 51, 52};
     395           2 :         size_t i = 0;
     396          14 :         do
     397             :         {
     398          14 :             CPPUNIT_ASSERT(i < SAL_N_ELEMENTS(aExpected));
     399          14 :             nPos = m_xBreak->getWordBoundary(aTest, nPos, aLocale,
     400          14 :                 i18n::WordType::DICTIONARY_WORD, true).endPos;
     401          14 :             CPPUNIT_ASSERT(aExpected[i++] == nPos);
     402             :         }
     403          14 :         while (nPos++ < aTest.getLength());
     404           2 :         CPPUNIT_ASSERT(i == SAL_N_ELEMENTS(aExpected));
     405             :     }
     406             : 
     407             :     //See https://issues.apache.org/ooo/show_bug.cgi?id=85411
     408           8 :     for (int j = 0; j < 3; ++j)
     409             :     {
     410           6 :         switch (j)
     411             :         {
     412             :             case 0:
     413           2 :                 aLocale.Language = "en";
     414           2 :                 aLocale.Country = "US";
     415           2 :                 break;
     416             :             case 1:
     417           2 :                 aLocale.Language = "ca";
     418           2 :                 aLocale.Country = "ES";
     419           2 :                 break;
     420             :             case 2:
     421           2 :                 aLocale.Language = "fi";
     422           2 :                 aLocale.Country = "FI";
     423           2 :                 break;
     424             :             default:
     425           0 :                 CPPUNIT_ASSERT(false);
     426           0 :                 break;
     427             :         }
     428             : 
     429             :         const sal_Unicode TEST[] =
     430             :         {
     431             :             'I', 0x200B, 'w', 'a', 'n', 't', 0x200B, 't', 'o', 0x200B, 'g', 'o'
     432           6 :         };
     433           6 :         OUString aTest(TEST, SAL_N_ELEMENTS(TEST));
     434             : 
     435           6 :         sal_Int32 nPos = 0;
     436           6 :         sal_Int32 aExpected[] = {1, 6, 9, 12};
     437           6 :         size_t i = 0;
     438          24 :         do
     439             :         {
     440          24 :             CPPUNIT_ASSERT(i < SAL_N_ELEMENTS(aExpected));
     441          24 :             nPos = m_xBreak->getWordBoundary(aTest, nPos, aLocale,
     442          24 :                 i18n::WordType::DICTIONARY_WORD, true).endPos;
     443          24 :             CPPUNIT_ASSERT(aExpected[i++] == nPos);
     444             :         }
     445          24 :         while (nPos++ < aTest.getLength());
     446           6 :         CPPUNIT_ASSERT(i == SAL_N_ELEMENTS(aExpected));
     447           6 :     }
     448             : 
     449             :     //https://issues.apache.org/ooo/show_bug.cgi?id=21290
     450           6 :     for (int j = 0; j < 2; ++j)
     451             :     {
     452           4 :         switch (j)
     453             :         {
     454             :             case 0:
     455           2 :                 aLocale.Language = "en";
     456           2 :                 aLocale.Country = "US";
     457           2 :                 break;
     458             :             case 1:
     459           2 :                 aLocale.Language = "grc";
     460           2 :                 aLocale.Country = "";
     461           2 :                 break;
     462             :             default:
     463           0 :                 CPPUNIT_ASSERT(false);
     464           0 :                 break;
     465             :         }
     466             : 
     467             :         const sal_Unicode TEST[] =
     468             :         {
     469             :             0x1F0C, 0x03BD, 0x03B4, 0x03C1, 0x03B1, 0x0020, 0x1F00,
     470             :             0x03C1, 0x03BD, 0x1F7B, 0x03BC, 0x03B5, 0x03BD, 0x03BF,
     471             :             0x03C2, 0x0020, 0x1F00, 0x03BB, 0x03BB, 0x0020, 0x1F24,
     472             :             0x03C3, 0x03B8, 0x03B9, 0x03BF, 0x03BD
     473           4 :         };
     474           4 :         OUString aTest(TEST, SAL_N_ELEMENTS(TEST));
     475             : 
     476           4 :         sal_Int32 nPos = 0;
     477           4 :         sal_Int32 aExpected[] = {5, 15, 19, 26};
     478           4 :         size_t i = 0;
     479          16 :         do
     480             :         {
     481          16 :             CPPUNIT_ASSERT(i < SAL_N_ELEMENTS(aExpected));
     482          16 :             nPos = m_xBreak->getWordBoundary(aTest, nPos, aLocale,
     483          16 :                 i18n::WordType::DICTIONARY_WORD, true).endPos;
     484          16 :             CPPUNIT_ASSERT(aExpected[i++] == nPos);
     485             :         }
     486          16 :         while (nPos++ < aTest.getLength());
     487           4 :         CPPUNIT_ASSERT(i == SAL_N_ELEMENTS(aExpected));
     488           4 :     }
     489             : 
     490             :     //See https://issues.apache.org/ooo/show_bug.cgi?id=58513
     491             :     //See https://bugs.freedesktop.org/show_bug.cgi?id=55707
     492             :     {
     493           2 :         aLocale.Language = "fi";
     494           2 :         aLocale.Country = "FI";
     495             : 
     496           2 :         OUString aTest("Kuorma-auto kaakkois- ja Keski-Suomi USA:n 90:n %:n");
     497             : 
     498             :         {
     499           2 :             sal_Int32 nPos = 0;
     500           2 :             sal_Int32 aExpected[] = {11, 21, 24, 36, 42, 47, 51};
     501           2 :             size_t i = 0;
     502          14 :             do
     503             :             {
     504          14 :                 CPPUNIT_ASSERT(i < SAL_N_ELEMENTS(aExpected));
     505          14 :                 nPos = m_xBreak->getWordBoundary(aTest, nPos, aLocale,
     506          14 :                     i18n::WordType::WORD_COUNT, true).endPos;
     507          14 :                 CPPUNIT_ASSERT(aExpected[i++] == nPos);
     508             :             }
     509          14 :             while (nPos++ < aTest.getLength());
     510           2 :             CPPUNIT_ASSERT(i == SAL_N_ELEMENTS(aExpected));
     511             :         }
     512             : 
     513             :         {
     514           2 :             sal_Int32 nPos = 0;
     515             :             sal_Int32 aExpected[] = {0, 11, 12, 20, 22, 24, 25, 36, 37,
     516           2 :                                     40, 41, 42, 43, 45, 46, 47, 50, 51};
     517           2 :             size_t i = 0;
     518          18 :             do
     519             :             {
     520          18 :                 CPPUNIT_ASSERT(i < SAL_N_ELEMENTS(aExpected));
     521          18 :                 aBounds = m_xBreak->getWordBoundary(aTest, nPos, aLocale,
     522          18 :                     i18n::WordType::DICTIONARY_WORD, true);
     523          18 :                 CPPUNIT_ASSERT(aExpected[i++] == aBounds.startPos);
     524          18 :                 CPPUNIT_ASSERT(aExpected[i++] == aBounds.endPos);
     525          18 :                 nPos = aBounds.endPos;
     526             :             }
     527          18 :             while (nPos++ < aTest.getLength());
     528           2 :             CPPUNIT_ASSERT(i == SAL_N_ELEMENTS(aExpected));
     529           2 :         }
     530             :     }
     531             : 
     532             :     //See https://issues.apache.org/ooo/show_bug.cgi?id=107843
     533             :     {
     534           2 :         aLocale.Language = "en";
     535           2 :         aLocale.Country = "US";
     536             : 
     537             :         const sal_Unicode TEST[] =
     538             :         {
     539             :             'r', 'u', 0xFB00, 'l', 'e', ' ', 0xFB01, 's', 'h'
     540           2 :         };
     541           2 :         OUString aTest(TEST, SAL_N_ELEMENTS(TEST));
     542             : 
     543           2 :         aBounds = m_xBreak->getWordBoundary(aTest, 1, aLocale, i18n::WordType::DICTIONARY_WORD, false);
     544           2 :         CPPUNIT_ASSERT(aBounds.startPos == 0 && aBounds.endPos == 5);
     545             : 
     546           2 :         aBounds = m_xBreak->getWordBoundary(aTest, 7, aLocale, i18n::WordType::DICTIONARY_WORD, false);
     547           2 :         CPPUNIT_ASSERT(aBounds.startPos == 6 && aBounds.endPos == 9);
     548             :     }
     549             : 
     550             :     //See https://issues.apache.org/ooo/show_bug.cgi?id=113785
     551             :     {
     552           2 :         aLocale.Language = "en";
     553           2 :         aLocale.Country = "US";
     554             : 
     555             :         const sal_Unicode TEST[] =
     556             :         {
     557             :             'a', 0x2013, 'b', 0x2014, 'c'
     558           2 :         };
     559           2 :         OUString aTest(TEST, SAL_N_ELEMENTS(TEST));
     560             : 
     561           2 :         aBounds = m_xBreak->getWordBoundary(aTest, 0, aLocale, i18n::WordType::DICTIONARY_WORD, true);
     562           2 :         CPPUNIT_ASSERT(aBounds.startPos == 0 && aBounds.endPos == 1);
     563             : 
     564           2 :         aBounds = m_xBreak->nextWord(aTest, 0, aLocale, i18n::WordType::DICTIONARY_WORD);
     565           2 :         CPPUNIT_ASSERT(aBounds.startPos == 2 && aBounds.endPos == 3);
     566             : 
     567           2 :         aBounds = m_xBreak->nextWord(aTest, aBounds.endPos, aLocale, i18n::WordType::DICTIONARY_WORD);
     568           2 :         CPPUNIT_ASSERT(aBounds.startPos == 4 && aBounds.endPos == 5);
     569           2 :     }
     570           2 : }
     571             : 
     572             : //See https://bugs.libreoffice.org/show_bug.cgi?id=40292
     573             : //See https://issues.apache.org/ooo/show_bug.cgi?id=80412
     574             : //See https://issues.apache.org/ooo/show_bug.cgi?id=111152
     575             : //See https://issues.apache.org/ooo/show_bug.cgi?id=50172
     576           2 : void TestBreakIterator::testGraphemeIteration()
     577             : {
     578           2 :     lang::Locale aLocale;
     579           2 :     aLocale.Language = "bn";
     580           2 :     aLocale.Country = "IN";
     581             : 
     582             :     {
     583           2 :         const sal_Unicode BA_HALANT_LA[] = { 0x09AC, 0x09CD, 0x09AF };
     584           2 :         OUString aTest(BA_HALANT_LA, SAL_N_ELEMENTS(BA_HALANT_LA));
     585             : 
     586           2 :         sal_Int32 nDone=0;
     587             :         sal_Int32 nPos;
     588           2 :         nPos = m_xBreak->nextCharacters(aTest, 0, aLocale,
     589           2 :             i18n::CharacterIteratorMode::SKIPCELL, 1, nDone);
     590           2 :         CPPUNIT_ASSERT_MESSAGE("Should skip full grapheme", nPos == SAL_N_ELEMENTS(BA_HALANT_LA));
     591           2 :         nPos = m_xBreak->previousCharacters(aTest, SAL_N_ELEMENTS(BA_HALANT_LA), aLocale,
     592           2 :             i18n::CharacterIteratorMode::SKIPCELL, 1, nDone);
     593           2 :         CPPUNIT_ASSERT_MESSAGE("Should skip full grapheme", nPos == 0);
     594             :     }
     595             : 
     596             :     {
     597           2 :         const sal_Unicode HA_HALANT_NA_VOWELSIGNI[] = { 0x09B9, 0x09CD, 0x09A3, 0x09BF };
     598           2 :         OUString aTest(HA_HALANT_NA_VOWELSIGNI, SAL_N_ELEMENTS(HA_HALANT_NA_VOWELSIGNI));
     599             : 
     600           2 :         sal_Int32 nDone=0;
     601             :         sal_Int32 nPos;
     602           2 :         nPos = m_xBreak->nextCharacters(aTest, 0, aLocale,
     603           2 :             i18n::CharacterIteratorMode::SKIPCELL, 1, nDone);
     604           2 :         CPPUNIT_ASSERT_MESSAGE("Should skip full grapheme", nPos == SAL_N_ELEMENTS(HA_HALANT_NA_VOWELSIGNI));
     605           2 :         nPos = m_xBreak->previousCharacters(aTest, SAL_N_ELEMENTS(HA_HALANT_NA_VOWELSIGNI), aLocale,
     606           2 :             i18n::CharacterIteratorMode::SKIPCELL, 1, nDone);
     607           2 :         CPPUNIT_ASSERT_MESSAGE("Should skip full grapheme", nPos == 0);
     608             :     }
     609             : 
     610             :     {
     611           2 :         const sal_Unicode TA_HALANT_MA_HALANT_YA  [] = { 0x09A4, 0x09CD, 0x09AE, 0x09CD, 0x09AF };
     612           2 :         OUString aTest(TA_HALANT_MA_HALANT_YA, SAL_N_ELEMENTS(TA_HALANT_MA_HALANT_YA));
     613             : 
     614           2 :         sal_Int32 nDone=0;
     615             :         sal_Int32 nPos;
     616           2 :         nPos = m_xBreak->nextCharacters(aTest, 0, aLocale,
     617           2 :             i18n::CharacterIteratorMode::SKIPCELL, 1, nDone);
     618           2 :         CPPUNIT_ASSERT_MESSAGE("Should skip full grapheme", nPos == SAL_N_ELEMENTS(TA_HALANT_MA_HALANT_YA));
     619           2 :         nPos = m_xBreak->previousCharacters(aTest, SAL_N_ELEMENTS(TA_HALANT_MA_HALANT_YA), aLocale,
     620           2 :             i18n::CharacterIteratorMode::SKIPCELL, 1, nDone);
     621           2 :         CPPUNIT_ASSERT_MESSAGE("Should skip full grapheme", nPos == 0);
     622             :     }
     623             : 
     624           2 :     aLocale.Language = "ta";
     625           2 :     aLocale.Country = "IN";
     626             : 
     627             :     {
     628           2 :         const sal_Unicode KA_VIRAMA_SSA[] = { 0x0B95, 0x0BCD, 0x0BB7 };
     629           2 :         OUString aTest(KA_VIRAMA_SSA, SAL_N_ELEMENTS(KA_VIRAMA_SSA));
     630             : 
     631           2 :         sal_Int32 nDone=0;
     632           2 :         sal_Int32 nPos = 0;
     633             : 
     634           2 :         nPos = m_xBreak->nextCharacters(aTest, 0, aLocale,
     635           2 :             i18n::CharacterIteratorMode::SKIPCELL, 1, nDone);
     636           2 :         CPPUNIT_ASSERT_MESSAGE("Should skip full grapheme", nPos == SAL_N_ELEMENTS(KA_VIRAMA_SSA));
     637           2 :         nPos = m_xBreak->previousCharacters(aTest, SAL_N_ELEMENTS(KA_VIRAMA_SSA), aLocale,
     638           2 :             i18n::CharacterIteratorMode::SKIPCELL, 1, nDone);
     639           2 :         CPPUNIT_ASSERT_MESSAGE("Should skip full grapheme", nPos == 0);
     640             :     }
     641             : 
     642             :     {
     643           2 :         const sal_Unicode KA_VOWELSIGNU[] = { 0x0B95, 0x0BC1 };
     644           2 :         OUString aTest(KA_VOWELSIGNU, SAL_N_ELEMENTS(KA_VOWELSIGNU));
     645             : 
     646           2 :         sal_Int32 nDone=0;
     647           2 :         sal_Int32 nPos = 0;
     648             : 
     649           2 :         nPos = m_xBreak->nextCharacters(aTest, 0, aLocale,
     650           2 :             i18n::CharacterIteratorMode::SKIPCELL, 1, nDone);
     651           2 :         CPPUNIT_ASSERT_MESSAGE("Should skip full grapheme", nPos == SAL_N_ELEMENTS(KA_VOWELSIGNU));
     652           2 :         nPos = m_xBreak->previousCharacters(aTest, SAL_N_ELEMENTS(KA_VOWELSIGNU), aLocale,
     653           2 :             i18n::CharacterIteratorMode::SKIPCELL, 1, nDone);
     654           2 :         CPPUNIT_ASSERT_MESSAGE("Should skip full grapheme", nPos == 0);
     655             :     }
     656             : 
     657             :     {
     658             :         const sal_Unicode CA_VOWELSIGNI_TA_VIRAMA_TA_VOWELSIGNI_RA_VOWELSIGNAI[] =
     659           2 :             { 0x0B9A, 0x0BBF, 0x0BA4, 0x0BCD, 0x0BA4, 0x0BBF, 0x0BB0, 0x0BC8 };
     660             :         OUString aTest(CA_VOWELSIGNI_TA_VIRAMA_TA_VOWELSIGNI_RA_VOWELSIGNAI,
     661           2 :             SAL_N_ELEMENTS(CA_VOWELSIGNI_TA_VIRAMA_TA_VOWELSIGNI_RA_VOWELSIGNAI));
     662             : 
     663           2 :         sal_Int32 nDone=0;
     664           2 :         sal_Int32 nPos=0;
     665             : 
     666          10 :         for (sal_Int32 i = 0; i < 4; ++i)
     667             :         {
     668           8 :             sal_Int32 nOldPos = nPos;
     669           8 :             nPos = m_xBreak->nextCharacters(aTest, nPos, aLocale,
     670           8 :                 i18n::CharacterIteratorMode::SKIPCELL, 1, nDone);
     671           8 :             CPPUNIT_ASSERT_MESSAGE("Should skip 2 units", nPos == nOldPos+2);
     672             :         }
     673             : 
     674          10 :         for (sal_Int32 i = 0; i < 4; ++i)
     675             :         {
     676           8 :             sal_Int32 nOldPos = nPos;
     677           8 :             nPos = m_xBreak->previousCharacters(aTest, nPos, aLocale,
     678           8 :                 i18n::CharacterIteratorMode::SKIPCELL, 1, nDone);
     679           8 :             CPPUNIT_ASSERT_MESSAGE("Should skip 2 units", nPos == nOldPos-2);
     680           2 :         }
     681             :     }
     682             : 
     683             :     {
     684           2 :         const sal_Unicode ALEF_QAMATS [] = { 0x05D0, 0x05B8 };
     685           2 :         OUString aText(ALEF_QAMATS, SAL_N_ELEMENTS(ALEF_QAMATS));
     686             : 
     687           2 :         sal_Int32 nGraphemeCount = 0;
     688             : 
     689           2 :         sal_Int32 nCurPos = 0;
     690           6 :         while (nCurPos < aText.getLength())
     691             :         {
     692           2 :             sal_Int32 nCount2 = 1;
     693           2 :             nCurPos = m_xBreak->nextCharacters(aText, nCurPos, lang::Locale(),
     694           2 :                 i18n::CharacterIteratorMode::SKIPCELL, nCount2, nCount2);
     695           2 :             ++nGraphemeCount;
     696             :         }
     697             : 
     698           2 :         CPPUNIT_ASSERT_MESSAGE("Should be considered 1 grapheme", nGraphemeCount == 1);
     699             :     }
     700             : 
     701           2 :     aLocale.Language = "hi";
     702           2 :     aLocale.Country = "IN";
     703             : 
     704             :     {
     705           2 :         const sal_Unicode SHA_VOWELSIGNII[] = { 0x936, 0x940 };
     706           2 :         OUString aTest(SHA_VOWELSIGNII, SAL_N_ELEMENTS(SHA_VOWELSIGNII));
     707             : 
     708           2 :         sal_Int32 nDone=0;
     709           2 :         sal_Int32 nPos = 0;
     710             : 
     711           2 :         nPos = m_xBreak->nextCharacters(aTest, 0, aLocale,
     712           2 :             i18n::CharacterIteratorMode::SKIPCELL, 1, nDone);
     713           2 :         CPPUNIT_ASSERT_MESSAGE("Should skip full grapheme", nPos == SAL_N_ELEMENTS(SHA_VOWELSIGNII));
     714           2 :         nPos = m_xBreak->previousCharacters(aTest, SAL_N_ELEMENTS(SHA_VOWELSIGNII), aLocale,
     715           2 :             i18n::CharacterIteratorMode::SKIPCELL, 1, nDone);
     716           2 :         CPPUNIT_ASSERT_MESSAGE("Should skip full grapheme", nPos == 0);
     717           2 :     }
     718           2 : }
     719             : 
     720             : //A test to ensure that certain ranges and codepoints that are categorized as
     721             : //weak remain as weak, so that existing docs that depend on this don't silently
     722             : //change font for those weak chars
     723           2 : void TestBreakIterator::testWeak()
     724             : {
     725           2 :     lang::Locale aLocale;
     726           2 :     aLocale.Language = "en";
     727           2 :     aLocale.Country = "US";
     728             : 
     729             :     {
     730             :         const sal_Unicode WEAKS[] =
     731             :         {
     732             :             0x0001, 0x0002,
     733             :             0x0020, 0x00A0,
     734             :             0x2150, 0x215F, //Number Forms, fractions
     735             :             0x2160, 0x2180, //Number Forms, roman numerals
     736             :             0x2200, 0x22FF, //Mathematical Operators
     737             :             0x27C0, 0x27EF, //Miscellaneous Mathematical Symbols-A
     738             :             0x2980, 0x29FF, //Miscellaneous Mathematical Symbols-B
     739             :             0x2A00, 0x2AFF, //Supplemental Mathematical Operators
     740             :             0x2100, 0x214F, //Letterlike Symbols
     741             :             0x2308, 0x230B, //Miscellaneous technical
     742             :             0x25A0, 0x25FF, //Geometric Shapes
     743             :             0x2B30, 0x2B4C  //Miscellaneous Symbols and Arrows
     744           2 :         };
     745           2 :         OUString aWeaks(WEAKS, SAL_N_ELEMENTS(WEAKS));
     746             : 
     747          50 :         for (sal_Int32 i = 0; i < aWeaks.getLength(); ++i)
     748             :         {
     749          48 :             sal_Int16 nScript = m_xBreak->getScriptType(aWeaks, i);
     750          48 :             OStringBuffer aMsg;
     751          48 :             aMsg.append("Char 0x");
     752          48 :             aMsg.append(static_cast<sal_Int32>(aWeaks[i]), 16);
     753          48 :             aMsg.append(" should have been weak");
     754          96 :             CPPUNIT_ASSERT_MESSAGE(aMsg.getStr(),
     755          48 :                 nScript == i18n::ScriptType::WEAK);
     756          50 :         }
     757           2 :     }
     758           2 : }
     759             : 
     760             : //A test to ensure that certain ranges and codepoints that are categorized as
     761             : //asian remain as asian, so that existing docs that depend on this don't silently
     762             : //change font for those asian chars.
     763             : //See https://bugs.libreoffice.org/show_bug.cgi?id=38095
     764           2 : void TestBreakIterator::testAsian()
     765             : {
     766           2 :     lang::Locale aLocale;
     767           2 :     aLocale.Language = "en";
     768           2 :     aLocale.Country = "US";
     769             : 
     770             :     {
     771             :         const sal_Unicode ASIANS[] =
     772             :         {
     773             :             //some typical CJK chars
     774             :             0x4E00, 0x62FF,
     775             :             //The full HalfWidth and FullWidth block has historically been
     776             :             //designated as taking the CJK font :-(
     777             :             //HalfWidth and FullWidth forms of ASCII 0-9, categorized under
     778             :             //UAX24 as "Common" i.e. by that logic WEAK
     779             :             0xFF10, 0xFF19,
     780             :             //HalfWidth and FullWidth forms of ASCII A-z, categorized under
     781             :             //UAX25 as "Latin", i.e. by that logic LATIN
     782             :             0xFF21, 0xFF5A
     783           2 :         };
     784           2 :         OUString aAsians(ASIANS, SAL_N_ELEMENTS(ASIANS));
     785             : 
     786          14 :         for (sal_Int32 i = 0; i < aAsians.getLength(); ++i)
     787             :         {
     788          12 :             sal_Int16 nScript = m_xBreak->getScriptType(aAsians, i);
     789          12 :             OStringBuffer aMsg;
     790          12 :             aMsg.append("Char 0x");
     791          12 :             aMsg.append(static_cast<sal_Int32>(aAsians[i]), 16);
     792          12 :             aMsg.append(" should have been asian");
     793          24 :             CPPUNIT_ASSERT_MESSAGE(aMsg.getStr(),
     794          12 :                 nScript == i18n::ScriptType::ASIAN);
     795          14 :         }
     796           2 :     }
     797           2 : }
     798             : 
     799             : //A test to ensure that our Lao word boundary detection is useful
     800           2 : void TestBreakIterator::testLao()
     801             : {
     802           2 :     lang::Locale aLocale;
     803           2 :     aLocale.Language = "lo";
     804           2 :     aLocale.Country = "LA";
     805             : 
     806           2 :     const sal_Unicode LAO[] = { 0x0e8d, 0x0eb4, 0x0e99, 0x0e94, 0x0eb5, 0x0e95, 0x0ec9, 0x0ead, 0x0e99, 0x0eae, 0x0eb1, 0x0e9a };
     807           4 :     OUString aTest(LAO, SAL_N_ELEMENTS(LAO));
     808           2 :     i18n::Boundary aBounds = m_xBreak->getWordBoundary(aTest, 0, aLocale,
     809           2 :         i18n::WordType::DICTIONARY_WORD, true);
     810             : 
     811           2 :     CPPUNIT_ASSERT(aBounds.startPos == 0 && aBounds.endPos == 5);
     812             : 
     813           2 :     aBounds = m_xBreak->getWordBoundary(aTest, aBounds.endPos, aLocale,
     814           2 :         i18n::WordType::DICTIONARY_WORD, true);
     815             : 
     816           4 :     CPPUNIT_ASSERT(aBounds.startPos == 5 && aBounds.endPos == 9);
     817             : 
     818           2 : }
     819             : 
     820             : //A test to ensure that our thai word boundary detection is useful
     821           2 : void TestBreakIterator::testThai()
     822             : {
     823           2 :     lang::Locale aLocale;
     824           2 :     aLocale.Language = "th";
     825           2 :     aLocale.Country = "TH";
     826             : 
     827             :     //See http://lists.freedesktop.org/archives/libreoffice/2012-February/025959.html
     828             :     {
     829           2 :         const sal_Unicode THAI[] = { 0x0E01, 0x0E38, 0x0E2B, 0x0E25, 0x0E32, 0x0E1A };
     830           2 :         OUString aTest(THAI, SAL_N_ELEMENTS(THAI));
     831           2 :         i18n::Boundary aBounds = m_xBreak->getWordBoundary(aTest, 0, aLocale,
     832           2 :             i18n::WordType::DICTIONARY_WORD, true);
     833           4 :         CPPUNIT_ASSERT_MESSAGE("Should skip full word",
     834           4 :             aBounds.startPos == 0 && aBounds.endPos == aTest.getLength());
     835             :     }
     836             : 
     837             :     //See https://issues.apache.org/ooo/show_bug.cgi?id=29548
     838             :     //make sure forwards and back are consistent
     839             :     {
     840             :         const sal_Unicode THAI[] =
     841             :         {
     842             :             0x0E2D, 0x0E38, 0x0E17, 0x0E22, 0x0E32, 0x0E19, 0x0E41,
     843             :             0x0E2B, 0x0E48, 0x0E07, 0x0E0A, 0x0E32, 0x0E15, 0x0E34,
     844             :             0x0E19, 0x0E49, 0x0E33, 0x0E2B, 0x0E19, 0x0E32, 0x0E27,
     845             :             0x0E2D, 0x0E38, 0x0E17, 0x0E22, 0x0E32, 0x0E19, 0x0E41,
     846             :             0x0E2B, 0x0E48, 0x0E07, 0x0E0A, 0x0E32, 0x0E15, 0x0E34,
     847             :             0x0E19, 0x0E49, 0x0E33, 0x0E2B, 0x0E19, 0x0E32, 0x0E27
     848           2 :         };
     849           2 :         OUString aTest(THAI, SAL_N_ELEMENTS(THAI));
     850             : 
     851           4 :         std::stack<sal_Int32> aPositions;
     852           2 :         sal_Int32 nPos = -1;
     853          22 :         do
     854             :         {
     855          22 :             nPos = m_xBreak->nextWord(aTest, nPos, aLocale, i18n::WordType::ANYWORD_IGNOREWHITESPACES).startPos;
     856          22 :             aPositions.push(nPos);
     857             :         }
     858          22 :         while (nPos < aTest.getLength());
     859           2 :         nPos = aTest.getLength();
     860           2 :         CPPUNIT_ASSERT(!aPositions.empty());
     861           2 :         aPositions.pop();
     862          20 :         do
     863             :         {
     864          20 :             CPPUNIT_ASSERT(!aPositions.empty());
     865          20 :             nPos = m_xBreak->previousWord(aTest, nPos, aLocale, i18n::WordType::ANYWORD_IGNOREWHITESPACES).startPos;
     866          20 :             CPPUNIT_ASSERT(nPos == aPositions.top());
     867          20 :             aPositions.pop();
     868             :         }
     869          22 :         while (nPos > 0);
     870           2 :     }
     871           2 : }
     872             : 
     873             : #ifdef TODO
     874             : void TestBreakIterator::testNorthernThai()
     875             : {
     876             :     lang::Locale aLocale;
     877             :     aLocale.Language = "nod";
     878             :     aLocale.Country = "TH";
     879             : 
     880             :     const sal_Unicode NORTHERN_THAI1[] = { 0x0E01, 0x0E38, 0x0E4A, 0x0E2B, 0x0E25, 0x0E32, 0x0E1A };
     881             :     OUString aTest(NORTHERN_THAI1, SAL_N_ELEMENTS(NORTHERN_THAI1));
     882             :     i18n::Boundary aBounds = m_xBreak->getWordBoundary(aTest, 0, aLocale,
     883             :         i18n::WordType::DICTIONARY_WORD, true);
     884             :     CPPUNIT_ASSERT_MESSAGE("Should skip full word",
     885             :         aBounds.startPos == 0 && aBounds.endPos == aTest.getLength());
     886             : }
     887             : #endif
     888             : 
     889             : #if (U_ICU_VERSION_MAJOR_NUM > 4)
     890             : // Not sure if any version earlier than 49 did have Khmer word boundary
     891             : // dictionaries, 4.6 does not.
     892             : 
     893             : //A test to ensure that our khmer word boundary detection is useful
     894             : //https://bugs.libreoffice.org/show_bug.cgi?id=52020
     895           2 : void TestBreakIterator::testKhmer()
     896             : {
     897           2 :     lang::Locale aLocale;
     898           2 :     aLocale.Language = "km";
     899           2 :     aLocale.Country = "KH";
     900             : 
     901           2 :     const sal_Unicode KHMER[] = { 0x17B2, 0x17D2, 0x1799, 0x1782, 0x17C1 };
     902             : 
     903           4 :     OUString aTest(KHMER, SAL_N_ELEMENTS(KHMER));
     904           2 :     i18n::Boundary aBounds = m_xBreak->getWordBoundary(aTest, 0, aLocale,
     905           2 :         i18n::WordType::DICTIONARY_WORD, true);
     906             : 
     907           2 :     CPPUNIT_ASSERT(aBounds.startPos == 0 && aBounds.endPos == 3);
     908             : 
     909           2 :     aBounds = m_xBreak->getWordBoundary(aTest, aBounds.endPos, aLocale,
     910           2 :         i18n::WordType::DICTIONARY_WORD, true);
     911             : 
     912           4 :     CPPUNIT_ASSERT(aBounds.startPos == 3 && aBounds.endPos == 5);
     913           2 : }
     914             : #endif
     915             : 
     916           4 : void TestBreakIterator::doTestJapanese(uno::Reference< i18n::XBreakIterator > &xBreak)
     917             : {
     918           4 :     lang::Locale aLocale;
     919           4 :     aLocale.Language = "ja";
     920           4 :     aLocale.Country = "JP";
     921           4 :     i18n::Boundary aBounds;
     922             : 
     923             :     {
     924           4 :         const sal_Unicode JAPANESE[] = { 0x30B7, 0x30E3, 0x30C3, 0x30C8, 0x30C0, 0x30A6, 0x30F3 };
     925             : 
     926           4 :         OUString aTest(JAPANESE, SAL_N_ELEMENTS(JAPANESE));
     927           4 :         aBounds = xBreak->getWordBoundary(aTest, 5, aLocale,
     928           4 :             i18n::WordType::DICTIONARY_WORD, true);
     929             : 
     930           4 :         CPPUNIT_ASSERT(aBounds.startPos == 0 && aBounds.endPos == 7);
     931             :     }
     932             : 
     933             :     {
     934           4 :         const sal_Unicode JAPANESE[] = { 0x9EBB, 0x306E, 0x8449, 0x9EBB, 0x306E, 0x8449 };
     935             : 
     936           4 :         OUString aTest(JAPANESE, SAL_N_ELEMENTS(JAPANESE));
     937           4 :         aBounds = xBreak->getWordBoundary(aTest, 1, aLocale,
     938           4 :             i18n::WordType::DICTIONARY_WORD, true);
     939             : 
     940           4 :         CPPUNIT_ASSERT(aBounds.startPos == 0 && aBounds.endPos == 3);
     941             : 
     942           4 :         aBounds = xBreak->getWordBoundary(aTest, 5, aLocale,
     943           4 :             i18n::WordType::DICTIONARY_WORD, true);
     944             : 
     945           4 :         CPPUNIT_ASSERT(aBounds.startPos == 3 && aBounds.endPos == 6);
     946           4 :     }
     947           4 : }
     948             : 
     949           2 : void TestBreakIterator::testJapanese()
     950             : {
     951           2 :     doTestJapanese(m_xBreak);
     952             : 
     953             :     // fdo#78479 - test second / cached instantiation of xdictionary
     954           2 :     uno::Reference< i18n::XBreakIterator > xTmpBreak(m_xSFactory->createInstance(
     955           2 :         "com.sun.star.i18n.BreakIterator"), uno::UNO_QUERY_THROW);
     956             : 
     957           2 :     doTestJapanese(xTmpBreak);
     958           2 : }
     959             : 
     960           2 : void TestBreakIterator::testChinese()
     961             : {
     962           2 :     lang::Locale aLocale;
     963           2 :     aLocale.Language = "zh";
     964           2 :     aLocale.Country = "CN";
     965           2 :     i18n::Boundary aBounds;
     966             : 
     967             :     {
     968           2 :         const sal_Unicode CHINESE[] = { 0x6A35, 0x6A30, 0x69FE, 0x8919, 0xD867, 0xDEDB  };
     969             : 
     970           2 :         OUString aTest(CHINESE, SAL_N_ELEMENTS(CHINESE));
     971           2 :         aBounds = m_xBreak->getWordBoundary(aTest, 4, aLocale,
     972           2 :             i18n::WordType::DICTIONARY_WORD, true);
     973           2 :         CPPUNIT_ASSERT(aBounds.startPos == 4 && aBounds.endPos == 6);
     974           2 :     }
     975           2 : }
     976          20 : void TestBreakIterator::setUp()
     977             : {
     978          20 :     BootstrapFixtureBase::setUp();
     979          60 :     m_xBreak = uno::Reference< i18n::XBreakIterator >(m_xSFactory->createInstance(
     980          40 :         "com.sun.star.i18n.BreakIterator"), uno::UNO_QUERY_THROW);
     981          20 : }
     982             : 
     983          20 : void TestBreakIterator::tearDown()
     984             : {
     985          20 :     m_xBreak.clear();
     986          20 :     BootstrapFixtureBase::tearDown();
     987          20 : }
     988             : 
     989           2 : CPPUNIT_TEST_SUITE_REGISTRATION(TestBreakIterator);
     990             : 
     991           8 : CPPUNIT_PLUGIN_IMPLEMENT();
     992             : 
     993             : /* vim:set shiftwidth=4 softtabstop=4 expandtab: */

Generated by: LCOV version 1.10