LCOV - code coverage report
Current view: top level - i18npool/qa/cppunit - test_breakiterator.cxx (source / functions) Hit Total Coverage
Test: commit c8344322a7af75b84dd3ca8f78b05543a976dfd5 Lines: 519 523 99.2 %
Date: 2015-06-13 12:38:46 Functions: 25 26 96.2 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
       2             : /*
       3             :  * This file is part of the LibreOffice project.
       4             :  *
       5             :  * This Source Code Form is subject to the terms of the Mozilla Public
       6             :  * License, v. 2.0. If a copy of the MPL was not distributed with this
       7             :  * file, You can obtain one at http://mozilla.org/MPL/2.0/.
       8             :  */
       9             : 
      10             : #include <cppuhelper/compbase1.hxx>
      11             : #include <cppuhelper/bootstrap.hxx>
      12             : #include <cppuhelper/basemutex.hxx>
      13             : #include <com/sun/star/i18n/XBreakIterator.hpp>
      14             : #include <com/sun/star/i18n/CharacterIteratorMode.hpp>
      15             : #include <com/sun/star/i18n/ScriptType.hpp>
      16             : #include <com/sun/star/i18n/WordType.hpp>
      17             : #include <unotest/bootstrapfixturebase.hxx>
      18             : 
      19             : #include <unicode/uversion.h>
      20             : 
      21             : #include <rtl/strbuf.hxx>
      22             : #include <rtl/ustrbuf.hxx>
      23             : 
      24             : #include <string.h>
      25             : 
      26             : #include <stack>
      27             : 
      28             : using namespace ::com::sun::star;
      29             : 
      30          30 : class TestBreakIterator : public test::BootstrapFixtureBase
      31             : {
      32             : public:
      33             :     virtual void setUp() SAL_OVERRIDE;
      34             :     virtual void tearDown() SAL_OVERRIDE;
      35             : 
      36             :     void testLineBreaking();
      37             :     void testWordBoundaries();
      38             :     void testGraphemeIteration();
      39             :     void testWeak();
      40             :     void testAsian();
      41             :     void testThai();
      42             : #if (U_ICU_VERSION_MAJOR_NUM > 51)
      43             :     void testLao();
      44             : #endif
      45             : #ifdef TODO
      46             :     void testNorthernThai();
      47             : #endif
      48             :     void testKhmer();
      49             :     void testJapanese();
      50             :     void testChinese();
      51             : 
      52           2 :     CPPUNIT_TEST_SUITE(TestBreakIterator);
      53           1 :     CPPUNIT_TEST(testLineBreaking);
      54           1 :     CPPUNIT_TEST(testWordBoundaries);
      55           1 :     CPPUNIT_TEST(testGraphemeIteration);
      56           1 :     CPPUNIT_TEST(testWeak);
      57           1 :     CPPUNIT_TEST(testAsian);
      58           1 :     CPPUNIT_TEST(testThai);
      59             : #if (U_ICU_VERSION_MAJOR_NUM > 51)
      60           1 :     CPPUNIT_TEST(testLao);
      61             : #endif
      62             : #ifdef TODO
      63             :     CPPUNIT_TEST(testNorthernThai);
      64             : #endif
      65             : #if (U_ICU_VERSION_MAJOR_NUM > 4)
      66           1 :     CPPUNIT_TEST(testKhmer);
      67             : #endif
      68           1 :     CPPUNIT_TEST(testJapanese);
      69           1 :     CPPUNIT_TEST(testChinese);
      70           5 :     CPPUNIT_TEST_SUITE_END();
      71             : 
      72             : private:
      73             :     uno::Reference<i18n::XBreakIterator> m_xBreak;
      74             :     void doTestJapanese(uno::Reference< i18n::XBreakIterator > &xBreak);
      75             : };
      76             : 
      77           1 : void TestBreakIterator::testLineBreaking()
      78             : {
      79           1 :     i18n::LineBreakHyphenationOptions aHyphOptions;
      80           2 :     i18n::LineBreakUserOptions aUserOptions;
      81           2 :     lang::Locale aLocale;
      82             : 
      83             :     //See https://bugs.libreoffice.org/show_bug.cgi?id=31271
      84             :     {
      85           1 :         OUString aTest("(some text here)");
      86             : 
      87           1 :         aLocale.Language = "en";
      88           1 :         aLocale.Country = "US";
      89             : 
      90             :         {
      91             :             //Here we want the line break to leave text here) on the next line
      92           1 :             i18n::LineBreakResults aResult = m_xBreak->getLineBreak(aTest, strlen("(some tex"), aLocale, 0, aHyphOptions, aUserOptions);
      93           1 :             CPPUNIT_ASSERT_MESSAGE("Expected a break at the start of the word", aResult.breakIndex == 6);
      94             :         }
      95             : 
      96             :         {
      97             :             //Here we want the line break to leave "here)" on the next line
      98           1 :             i18n::LineBreakResults aResult = m_xBreak->getLineBreak(aTest, strlen("(some text here"), aLocale, 0, aHyphOptions, aUserOptions);
      99           1 :             CPPUNIT_ASSERT_MESSAGE("Expected a break at the start of the word", aResult.breakIndex == 11);
     100           1 :         }
     101             :     }
     102             : 
     103             :     //See https://bugs.libreoffice.org/show_bug.cgi?id=49849
     104             :     {
     105           1 :         const sal_Unicode HEBREW1[] = { 0x05DE, 0x05D9, 0x05DC, 0x05D9, 0x5DD };
     106           1 :         OUString aWord(HEBREW1, SAL_N_ELEMENTS(HEBREW1));
     107           2 :         OUString aTest(OUStringBuffer(aWord).append(' ').append(aWord).makeStringAndClear());
     108             : 
     109           1 :         aLocale.Language = "he";
     110           1 :         aLocale.Country = "IL";
     111             : 
     112             :         {
     113             :             //Here we want the line break to happen at the whitespace
     114           1 :             i18n::LineBreakResults aResult = m_xBreak->getLineBreak(aTest, aTest.getLength()-1, aLocale, 0, aHyphOptions, aUserOptions);
     115           1 :             CPPUNIT_ASSERT_MESSAGE("Expected a break at the start of the word", aResult.breakIndex == aWord.getLength()+1);
     116           1 :         }
     117             :     }
     118             : 
     119             :     //See https://bz.apache.org/ooo/show_bug.cgi?id=17155
     120             :     {
     121           1 :         OUString aTest("foo /bar/baz");
     122             : 
     123           1 :         aLocale.Language = "en";
     124           1 :         aLocale.Country = "US";
     125             : 
     126             :         {
     127             :             //Here we want the line break to leave /bar/ba clumped together on the next line
     128           1 :             i18n::LineBreakResults aResult = m_xBreak->getLineBreak(aTest, strlen("foo /bar/ba"), aLocale, 0,
     129           1 :                 aHyphOptions, aUserOptions);
     130           1 :             CPPUNIT_ASSERT_MESSAGE("Expected a break at the first slash", aResult.breakIndex == 4);
     131           1 :         }
     132             :     }
     133             : 
     134             :     //See https://bz.apache.org/ooo/show_bug.cgi?id=19716
     135             :     {
     136           1 :         OUString aTest("aaa]aaa");
     137             : 
     138           1 :         aLocale.Language = "en";
     139           1 :         aLocale.Country = "US";
     140             : 
     141             :         {
     142             :             //Here we want the line break to move the whole lot to the next line
     143           2 :             i18n::LineBreakResults aResult = m_xBreak->getLineBreak(aTest, aTest.getLength()-2, aLocale, 0,
     144           2 :                 aHyphOptions, aUserOptions);
     145           1 :             CPPUNIT_ASSERT_MESSAGE("Expected a break at the start of the line, not at ]", aResult.breakIndex == 0);
     146           1 :         }
     147           1 :     }
     148           1 : }
     149             : 
     150             : //See https://bugs.libreoffice.org/show_bug.cgi?id=49629
     151           1 : void TestBreakIterator::testWordBoundaries()
     152             : {
     153           1 :     lang::Locale aLocale;
     154           1 :     aLocale.Language = "en";
     155           1 :     aLocale.Country = "US";
     156             : 
     157           1 :     i18n::Boundary aBounds;
     158             : 
     159             :     //See https://bz.apache.org/ooo/show_bug.cgi?id=11993
     160             :     {
     161           1 :         OUString aTest("abcd ef  ghi??? KLM");
     162             : 
     163           1 :         CPPUNIT_ASSERT(!m_xBreak->isBeginWord(aTest, 4, aLocale, i18n::WordType::DICTIONARY_WORD));
     164           1 :         CPPUNIT_ASSERT(m_xBreak->isEndWord(aTest, 4, aLocale, i18n::WordType::DICTIONARY_WORD));
     165           1 :         aBounds = m_xBreak->getWordBoundary(aTest, 4, aLocale, i18n::WordType::DICTIONARY_WORD, true);
     166           1 :         CPPUNIT_ASSERT(aBounds.startPos == 0 && aBounds.endPos == 4);
     167             : 
     168           1 :         CPPUNIT_ASSERT(!m_xBreak->isBeginWord(aTest, 8, aLocale, i18n::WordType::DICTIONARY_WORD));
     169           1 :         CPPUNIT_ASSERT(!m_xBreak->isEndWord(aTest, 8, aLocale, i18n::WordType::DICTIONARY_WORD));
     170             : 
     171             :         //next word
     172           1 :         aBounds = m_xBreak->getWordBoundary(aTest, 8, aLocale, i18n::WordType::DICTIONARY_WORD, true);
     173           1 :         CPPUNIT_ASSERT(aBounds.startPos == 9 && aBounds.endPos == 12);
     174             : 
     175             :         //previous word
     176           1 :         aBounds = m_xBreak->getWordBoundary(aTest, 8, aLocale, i18n::WordType::DICTIONARY_WORD, false);
     177           1 :         CPPUNIT_ASSERT(aBounds.startPos == 5 && aBounds.endPos == 7);
     178             : 
     179           1 :         CPPUNIT_ASSERT(!m_xBreak->isBeginWord(aTest, 12, aLocale, i18n::WordType::DICTIONARY_WORD));
     180           1 :         CPPUNIT_ASSERT(m_xBreak->isEndWord(aTest, 12, aLocale, i18n::WordType::DICTIONARY_WORD));
     181           1 :         aBounds = m_xBreak->getWordBoundary(aTest, 12, aLocale, i18n::WordType::DICTIONARY_WORD, true);
     182           1 :         CPPUNIT_ASSERT(aBounds.startPos == 9 && aBounds.endPos == 12);
     183             : 
     184           1 :         CPPUNIT_ASSERT(m_xBreak->isBeginWord(aTest, 16, aLocale, i18n::WordType::DICTIONARY_WORD));
     185           1 :         CPPUNIT_ASSERT(!m_xBreak->isEndWord(aTest, 16, aLocale, i18n::WordType::DICTIONARY_WORD));
     186           1 :         aBounds = m_xBreak->getWordBoundary(aTest, 16, aLocale, i18n::WordType::DICTIONARY_WORD, true);
     187           1 :         CPPUNIT_ASSERT(aBounds.startPos == 16 && aBounds.endPos == 19);
     188             :     }
     189             : 
     190             :     //See https://bz.apache.org/ooo/show_bug.cgi?id=21907
     191             :     {
     192           1 :         OUString aTest("b a?");
     193             : 
     194           1 :         CPPUNIT_ASSERT(m_xBreak->isBeginWord(aTest, 1, aLocale, i18n::WordType::ANY_WORD));
     195           1 :         CPPUNIT_ASSERT(m_xBreak->isBeginWord(aTest, 2, aLocale, i18n::WordType::ANY_WORD));
     196           1 :         CPPUNIT_ASSERT(m_xBreak->isBeginWord(aTest, 3, aLocale, i18n::WordType::ANY_WORD));
     197             : 
     198           1 :         CPPUNIT_ASSERT(m_xBreak->isBeginWord(aTest, 3, aLocale, i18n::WordType::ANYWORD_IGNOREWHITESPACES));
     199             : 
     200           1 :         CPPUNIT_ASSERT(m_xBreak->isEndWord(aTest, 1, aLocale, i18n::WordType::ANY_WORD));
     201           1 :         CPPUNIT_ASSERT(m_xBreak->isEndWord(aTest, 2, aLocale, i18n::WordType::ANY_WORD));
     202           1 :         CPPUNIT_ASSERT(m_xBreak->isEndWord(aTest, 3, aLocale, i18n::WordType::ANY_WORD));
     203             : 
     204           1 :         CPPUNIT_ASSERT(m_xBreak->isEndWord(aTest, 3, aLocale, i18n::WordType::ANYWORD_IGNOREWHITESPACES));
     205             :     }
     206             : 
     207             :     //See https://bz.apache.org/ooo/show_bug.cgi?id=14904
     208             :     {
     209             :         const sal_Unicode TEST[] =
     210             :         {
     211             :             'W', 'o', 'r', 'k', 'i', 'n', 'g', ' ', 0x201C, 'W', 'o', 'r', 'd', 's',
     212             :             ' ', 's', 't', 'a', 'r', 't', 'i', 'n', 'g', ' ', 'w', 'i', 't',
     213             :             'h', ' ', 'q', 'u', 'o', 't', 'e', 's', 0x201D, ' ', 'W', 'o', 'r', 'k',
     214             :             'i', 'n', 'g', ' ', 0x2018, 'B', 'r', 'o', 'k', 'e', 'n', 0x2019, ' ',
     215             :             '?', 'S', 'p', 'a', 'n', 'i', 's', 'h', '?', ' ', 'd', 'o', 'e',
     216             :             's', 'n', 0x2019, 't', ' ', 'w', 'o', 'r', 'k', '.', ' ', 'N', 'o',
     217             :             't', ' ', 'e', 'v', 'e', 'n', ' ' , 0x00BF, 'r', 'e', 'a', 'l', '?', ' ',
     218             :             'S', 'p', 'a', 'n', 'i', 's', 'h'
     219           1 :         };
     220           1 :         OUString aTest(TEST, SAL_N_ELEMENTS(TEST));
     221             : 
     222           1 :         aBounds = m_xBreak->getWordBoundary(aTest, 4, aLocale, i18n::WordType::DICTIONARY_WORD, false);
     223           1 :         CPPUNIT_ASSERT(aBounds.startPos == 0 && aBounds.endPos == 7);
     224             : 
     225           1 :         aBounds = m_xBreak->getWordBoundary(aTest, 12, aLocale, i18n::WordType::DICTIONARY_WORD, false);
     226           1 :         CPPUNIT_ASSERT(aBounds.startPos == 9 && aBounds.endPos == 14);
     227             : 
     228           1 :         aBounds = m_xBreak->getWordBoundary(aTest, 40, aLocale, i18n::WordType::DICTIONARY_WORD, false);
     229           1 :         CPPUNIT_ASSERT(aBounds.startPos == 37 && aBounds.endPos == 44);
     230             : 
     231           1 :         aBounds = m_xBreak->getWordBoundary(aTest, 49, aLocale, i18n::WordType::DICTIONARY_WORD, false);
     232           1 :         CPPUNIT_ASSERT(aBounds.startPos == 46 && aBounds.endPos == 52);
     233             : 
     234           1 :         aBounds = m_xBreak->getWordBoundary(aTest, 58, aLocale, i18n::WordType::DICTIONARY_WORD, false);
     235           1 :         CPPUNIT_ASSERT(aBounds.startPos == 55 && aBounds.endPos == 62);
     236             : 
     237           1 :         aBounds = m_xBreak->getWordBoundary(aTest, 67, aLocale, i18n::WordType::DICTIONARY_WORD, false);
     238           1 :         CPPUNIT_ASSERT(aBounds.startPos == 64 && aBounds.endPos == 71);
     239             : 
     240           1 :         aBounds = m_xBreak->getWordBoundary(aTest, 90, aLocale, i18n::WordType::DICTIONARY_WORD, false);
     241           1 :         CPPUNIT_ASSERT(aBounds.startPos == 88 && aBounds.endPos == 92);
     242             :     }
     243             : 
     244             :     //See https://bugs.libreoffice.org/show_bug.cgi?id=49629
     245           1 :     sal_Unicode aBreakTests[] = { ' ', 1, 2, 3, 4, 5, 6, 7, 0x91, 0x92, 0x200B, 0xE8FF, 0xF8FF };
     246           5 :     for (int mode = i18n::WordType::ANY_WORD; mode <= i18n::WordType::WORD_COUNT; ++mode)
     247             :     {
     248             :         //make sure that in all cases isBeginWord and isEndWord matches getWordBoundary
     249          56 :         for (size_t i = 0; i < SAL_N_ELEMENTS(aBreakTests); ++i)
     250             :         {
     251             : #if (U_ICU_VERSION_MAJOR_NUM == 4) && (U_ICU_VERSION_MINOR_NUM <= 2)
     252             :             //Note the breakiterator test is known to fail on older icu
     253             :             //versions (4.2.1) for the 200B (ZWSP) Zero Width Space testcase.
     254             :             if (aBreakTests[i] == 0x200B)
     255             :                 continue;
     256             : #endif
     257          52 :             OUString aTest = "Word" + OUString(aBreakTests[i]) + "Word";
     258          52 :             aBounds = m_xBreak->getWordBoundary(aTest, 0, aLocale, mode, true);
     259          52 :             switch (mode)
     260             :             {
     261             :                 case i18n::WordType::ANY_WORD:
     262          13 :                     CPPUNIT_ASSERT(aBounds.startPos == 0 && aBounds.endPos == 4);
     263          13 :                     break;
     264             :                 case i18n::WordType::ANYWORD_IGNOREWHITESPACES:
     265          13 :                     CPPUNIT_ASSERT(aBounds.startPos == 0 && aBounds.endPos == 4);
     266          13 :                     break;
     267             :                 case i18n::WordType::DICTIONARY_WORD:
     268          13 :                     CPPUNIT_ASSERT(aBounds.startPos == 0 && aBounds.endPos == 4);
     269          13 :                     break;
     270             :                 case i18n::WordType::WORD_COUNT:
     271          13 :                     CPPUNIT_ASSERT(aBounds.startPos == 0 && aBounds.endPos == 4);
     272          13 :                     break;
     273             :             }
     274             : 
     275          52 :             CPPUNIT_ASSERT(m_xBreak->isBeginWord(aTest, aBounds.startPos, aLocale, mode));
     276          52 :             CPPUNIT_ASSERT(m_xBreak->isEndWord(aTest, aBounds.endPos, aLocale, mode));
     277          52 :         }
     278             :     }
     279             : 
     280           1 :     sal_Unicode aJoinTests[] = { 'X', 0x200C, 0x200D, 0x2060, 0xFEFF, 0xFFF9, 0xFFFA, 0xFFFB };
     281           5 :     for (int mode = i18n::WordType::ANY_WORD; mode <= i18n::WordType::WORD_COUNT; ++mode)
     282             :     {
     283             :         //make sure that in all cases isBeginWord and isEndWord matches getWordBoundary
     284          36 :         for (size_t i = 0; i < SAL_N_ELEMENTS(aJoinTests); ++i)
     285             :         {
     286          32 :             OUString aTest = "Word" + OUString(aJoinTests[i]) + "Word";
     287          32 :             aBounds = m_xBreak->getWordBoundary(aTest, 0, aLocale, mode, true);
     288          32 :             switch (mode)
     289             :             {
     290             :                 case i18n::WordType::ANY_WORD:
     291           8 :                     CPPUNIT_ASSERT(aBounds.startPos == 0 && aBounds.endPos == 9);
     292           8 :                     break;
     293             :                 case i18n::WordType::ANYWORD_IGNOREWHITESPACES:
     294           8 :                     CPPUNIT_ASSERT(aBounds.startPos == 0 && aBounds.endPos == 9);
     295           8 :                     break;
     296             :                 case i18n::WordType::DICTIONARY_WORD:
     297           8 :                     CPPUNIT_ASSERT(aBounds.startPos == 0 && aBounds.endPos == 9);
     298           8 :                     break;
     299             :                 case i18n::WordType::WORD_COUNT:
     300           8 :                     CPPUNIT_ASSERT(aBounds.startPos == 0 && aBounds.endPos == 9);
     301           8 :                     break;
     302             :             }
     303             : 
     304          32 :             CPPUNIT_ASSERT(m_xBreak->isBeginWord(aTest, aBounds.startPos, aLocale, mode));
     305          32 :             CPPUNIT_ASSERT(m_xBreak->isEndWord(aTest, aBounds.endPos, aLocale, mode));
     306          32 :         }
     307             :     }
     308             : 
     309             :     //See https://bz.apache.org/ooo/show_bug.cgi?id=13494
     310             :     {
     311           1 :         const OUString aBase("xxAAxxBBxxCCxx");
     312             :         const sal_Unicode aTests[] =
     313             :         {
     314             :             '\'', ';', ',', '.', '!', '@', '#', '%', '&', '*',
     315             :             '(', ')', '_', '-', '{', '}', '[', ']', '\"', '/',
     316             :             '\\', '?', '~', '$', '+', '^', '=', '<', '>', '|'
     317           1 :         };
     318             : 
     319           1 :         const sal_Int32 aDoublePositions[] = {0, 2, 4, 6, 8, 10, 12, 14};
     320          31 :         for (size_t j = 0; j < SAL_N_ELEMENTS(aTests); ++j)
     321             :         {
     322          30 :             OUString aTest = aBase.replace('x', aTests[j]);
     323          30 :             sal_Int32 nPos = -1;
     324          30 :             size_t i = 0;
     325         240 :             do
     326             :             {
     327         240 :                 CPPUNIT_ASSERT(i < SAL_N_ELEMENTS(aDoublePositions));
     328         240 :                 nPos = m_xBreak->nextWord(aTest, nPos, aLocale, i18n::WordType::ANYWORD_IGNOREWHITESPACES).startPos;
     329         240 :                 CPPUNIT_ASSERT(nPos == aDoublePositions[i++]);
     330             :             }
     331         240 :             while (nPos < aTest.getLength());
     332          30 :             nPos = aTest.getLength();
     333          30 :             i = SAL_N_ELEMENTS(aDoublePositions)-1;
     334         210 :             do
     335             :             {
     336         210 :                 nPos = m_xBreak->previousWord(aTest, nPos, aLocale, i18n::WordType::ANYWORD_IGNOREWHITESPACES).startPos;
     337         210 :                 CPPUNIT_ASSERT(nPos == aDoublePositions[--i]);
     338             :             }
     339             :             while (nPos > 0);
     340          30 :         }
     341             : 
     342           1 :         const sal_Int32 aSinglePositions[] = {0, 1, 3, 4, 6, 7, 9, 10};
     343          30 :         for (size_t j = 1; j < SAL_N_ELEMENTS(aTests); ++j)
     344             :         {
     345          29 :             OUString aTest = aBase.replaceAll(OUString("xx"), OUString(aTests[j]));
     346          29 :             sal_Int32 nPos = -1;
     347          29 :             size_t i = 0;
     348         232 :             do
     349             :             {
     350         232 :                 CPPUNIT_ASSERT(i < SAL_N_ELEMENTS(aSinglePositions));
     351         232 :                 nPos = m_xBreak->nextWord(aTest, nPos, aLocale, i18n::WordType::ANYWORD_IGNOREWHITESPACES).startPos;
     352         232 :                 CPPUNIT_ASSERT(nPos == aSinglePositions[i++]);
     353             :             }
     354         232 :             while (nPos < aTest.getLength());
     355          29 :             nPos = aTest.getLength();
     356          29 :             i = SAL_N_ELEMENTS(aSinglePositions)-1;
     357         203 :             do
     358             :             {
     359         203 :                 nPos = m_xBreak->previousWord(aTest, nPos, aLocale, i18n::WordType::ANYWORD_IGNOREWHITESPACES).startPos;
     360         203 :                 CPPUNIT_ASSERT(nPos == aSinglePositions[--i]);
     361             :             }
     362             :             while (nPos > 0);
     363          29 :         }
     364             : 
     365           1 :         const sal_Int32 aSingleQuotePositions[] = {0, 1, 9, 10};
     366           1 :         CPPUNIT_ASSERT(aTests[0] == '\'');
     367             :         {
     368           1 :             OUString aTest = aBase.replaceAll(OUString("xx"), OUString(aTests[0]));
     369           1 :             sal_Int32 nPos = -1;
     370           1 :             size_t i = 0;
     371           4 :             do
     372             :             {
     373           4 :                 CPPUNIT_ASSERT(i < SAL_N_ELEMENTS(aSingleQuotePositions));
     374           4 :                 nPos = m_xBreak->nextWord(aTest, nPos, aLocale, i18n::WordType::ANYWORD_IGNOREWHITESPACES).startPos;
     375           4 :                 CPPUNIT_ASSERT(nPos == aSingleQuotePositions[i++]);
     376             :             }
     377           4 :             while (nPos < aTest.getLength());
     378           1 :             nPos = aTest.getLength();
     379           1 :             i = SAL_N_ELEMENTS(aSingleQuotePositions)-1;
     380           3 :             do
     381             :             {
     382           3 :                 nPos = m_xBreak->previousWord(aTest, nPos, aLocale, i18n::WordType::ANYWORD_IGNOREWHITESPACES).startPos;
     383           3 :                 CPPUNIT_ASSERT(nPos == aSingleQuotePositions[--i]);
     384             :             }
     385           1 :             while (nPos > 0);
     386           1 :         }
     387             :     }
     388             : 
     389             :     //See https://bz.apache.org/ooo/show_bug.cgi?id=13451
     390             :     {
     391           1 :         aLocale.Language = "ca";
     392           1 :         aLocale.Country = "ES";
     393             : 
     394           1 :         OUString aTest("mirar-se comprar-vos donem-nos les mans aneu-vos-en!");
     395             : 
     396           1 :         sal_Int32 nPos = 0;
     397           1 :         sal_Int32 aExpected[] = {8, 20, 30, 34, 39, 51, 52};
     398           1 :         size_t i = 0;
     399           7 :         do
     400             :         {
     401           7 :             CPPUNIT_ASSERT(i < SAL_N_ELEMENTS(aExpected));
     402           7 :             nPos = m_xBreak->getWordBoundary(aTest, nPos, aLocale,
     403           7 :                 i18n::WordType::DICTIONARY_WORD, true).endPos;
     404           7 :             CPPUNIT_ASSERT(aExpected[i++] == nPos);
     405             :         }
     406           7 :         while (nPos++ < aTest.getLength());
     407           1 :         CPPUNIT_ASSERT(i == SAL_N_ELEMENTS(aExpected));
     408             :     }
     409             : 
     410             :     //See https://bz.apache.org/ooo/show_bug.cgi?id=85411
     411           4 :     for (int j = 0; j < 3; ++j)
     412             :     {
     413           3 :         switch (j)
     414             :         {
     415             :             case 0:
     416           1 :                 aLocale.Language = "en";
     417           1 :                 aLocale.Country = "US";
     418           1 :                 break;
     419             :             case 1:
     420           1 :                 aLocale.Language = "ca";
     421           1 :                 aLocale.Country = "ES";
     422           1 :                 break;
     423             :             case 2:
     424           1 :                 aLocale.Language = "fi";
     425           1 :                 aLocale.Country = "FI";
     426           1 :                 break;
     427             :             default:
     428           0 :                 CPPUNIT_ASSERT(false);
     429           0 :                 break;
     430             :         }
     431             : 
     432             :         const sal_Unicode TEST[] =
     433             :         {
     434             :             'I', 0x200B, 'w', 'a', 'n', 't', 0x200B, 't', 'o', 0x200B, 'g', 'o'
     435           3 :         };
     436           3 :         OUString aTest(TEST, SAL_N_ELEMENTS(TEST));
     437             : 
     438           3 :         sal_Int32 nPos = 0;
     439           3 :         sal_Int32 aExpected[] = {1, 6, 9, 12};
     440           3 :         size_t i = 0;
     441          12 :         do
     442             :         {
     443          12 :             CPPUNIT_ASSERT(i < SAL_N_ELEMENTS(aExpected));
     444          12 :             nPos = m_xBreak->getWordBoundary(aTest, nPos, aLocale,
     445          12 :                 i18n::WordType::DICTIONARY_WORD, true).endPos;
     446          12 :             CPPUNIT_ASSERT(aExpected[i++] == nPos);
     447             :         }
     448          12 :         while (nPos++ < aTest.getLength());
     449           3 :         CPPUNIT_ASSERT(i == SAL_N_ELEMENTS(aExpected));
     450           3 :     }
     451             : 
     452             :     //https://bz.apache.org/ooo/show_bug.cgi?id=21290
     453           3 :     for (int j = 0; j < 2; ++j)
     454             :     {
     455           2 :         switch (j)
     456             :         {
     457             :             case 0:
     458           1 :                 aLocale.Language = "en";
     459           1 :                 aLocale.Country = "US";
     460           1 :                 break;
     461             :             case 1:
     462           1 :                 aLocale.Language = "grc";
     463           1 :                 aLocale.Country.clear();
     464           1 :                 break;
     465             :             default:
     466           0 :                 CPPUNIT_ASSERT(false);
     467           0 :                 break;
     468             :         }
     469             : 
     470             :         const sal_Unicode TEST[] =
     471             :         {
     472             :             0x1F0C, 0x03BD, 0x03B4, 0x03C1, 0x03B1, 0x0020, 0x1F00,
     473             :             0x03C1, 0x03BD, 0x1F7B, 0x03BC, 0x03B5, 0x03BD, 0x03BF,
     474             :             0x03C2, 0x0020, 0x1F00, 0x03BB, 0x03BB, 0x0020, 0x1F24,
     475             :             0x03C3, 0x03B8, 0x03B9, 0x03BF, 0x03BD
     476           2 :         };
     477           2 :         OUString aTest(TEST, SAL_N_ELEMENTS(TEST));
     478             : 
     479           2 :         sal_Int32 nPos = 0;
     480           2 :         sal_Int32 aExpected[] = {5, 15, 19, 26};
     481           2 :         size_t i = 0;
     482           8 :         do
     483             :         {
     484           8 :             CPPUNIT_ASSERT(i < SAL_N_ELEMENTS(aExpected));
     485           8 :             nPos = m_xBreak->getWordBoundary(aTest, nPos, aLocale,
     486           8 :                 i18n::WordType::DICTIONARY_WORD, true).endPos;
     487           8 :             CPPUNIT_ASSERT(aExpected[i++] == nPos);
     488             :         }
     489           8 :         while (nPos++ < aTest.getLength());
     490           2 :         CPPUNIT_ASSERT(i == SAL_N_ELEMENTS(aExpected));
     491           2 :     }
     492             : 
     493             :     //See https://bz.apache.org/ooo/show_bug.cgi?id=58513
     494             :     //See https://bugs.libreoffice.org/show_bug.cgi?id=55707
     495             :     {
     496           1 :         aLocale.Language = "fi";
     497           1 :         aLocale.Country = "FI";
     498             : 
     499           1 :         OUString aTest("Kuorma-auto kaakkois- ja Keski-Suomi USA:n 90:n %:n");
     500             : 
     501             :         {
     502           1 :             sal_Int32 nPos = 0;
     503           1 :             sal_Int32 aExpected[] = {11, 21, 24, 36, 42, 47, 51};
     504           1 :             size_t i = 0;
     505           7 :             do
     506             :             {
     507           7 :                 CPPUNIT_ASSERT(i < SAL_N_ELEMENTS(aExpected));
     508           7 :                 nPos = m_xBreak->getWordBoundary(aTest, nPos, aLocale,
     509           7 :                     i18n::WordType::WORD_COUNT, true).endPos;
     510           7 :                 CPPUNIT_ASSERT(aExpected[i++] == nPos);
     511             :             }
     512           7 :             while (nPos++ < aTest.getLength());
     513           1 :             CPPUNIT_ASSERT(i == SAL_N_ELEMENTS(aExpected));
     514             :         }
     515             : 
     516             :         {
     517           1 :             sal_Int32 nPos = 0;
     518             :             sal_Int32 aExpected[] = {0, 11, 12, 20, 22, 24, 25, 36, 37,
     519           1 :                                     40, 41, 42, 43, 45, 46, 47, 50, 51};
     520           1 :             size_t i = 0;
     521           9 :             do
     522             :             {
     523           9 :                 CPPUNIT_ASSERT(i < SAL_N_ELEMENTS(aExpected));
     524           9 :                 aBounds = m_xBreak->getWordBoundary(aTest, nPos, aLocale,
     525           9 :                     i18n::WordType::DICTIONARY_WORD, true);
     526           9 :                 CPPUNIT_ASSERT(aExpected[i++] == aBounds.startPos);
     527           9 :                 CPPUNIT_ASSERT(aExpected[i++] == aBounds.endPos);
     528           9 :                 nPos = aBounds.endPos;
     529             :             }
     530           9 :             while (nPos++ < aTest.getLength());
     531           1 :             CPPUNIT_ASSERT(i == SAL_N_ELEMENTS(aExpected));
     532           1 :         }
     533             :     }
     534             : 
     535             :     //See https://bz.apache.org/ooo/show_bug.cgi?id=107843
     536             :     {
     537           1 :         aLocale.Language = "en";
     538           1 :         aLocale.Country = "US";
     539             : 
     540             :         const sal_Unicode TEST[] =
     541             :         {
     542             :             'r', 'u', 0xFB00, 'l', 'e', ' ', 0xFB01, 's', 'h'
     543           1 :         };
     544           1 :         OUString aTest(TEST, SAL_N_ELEMENTS(TEST));
     545             : 
     546           1 :         aBounds = m_xBreak->getWordBoundary(aTest, 1, aLocale, i18n::WordType::DICTIONARY_WORD, false);
     547           1 :         CPPUNIT_ASSERT(aBounds.startPos == 0 && aBounds.endPos == 5);
     548             : 
     549           1 :         aBounds = m_xBreak->getWordBoundary(aTest, 7, aLocale, i18n::WordType::DICTIONARY_WORD, false);
     550           1 :         CPPUNIT_ASSERT(aBounds.startPos == 6 && aBounds.endPos == 9);
     551             :     }
     552             : 
     553             :     //See https://bz.apache.org/ooo/show_bug.cgi?id=113785
     554             :     {
     555           1 :         aLocale.Language = "en";
     556           1 :         aLocale.Country = "US";
     557             : 
     558             :         const sal_Unicode TEST[] =
     559             :         {
     560             :             'a', 0x2013, 'b', 0x2014, 'c'
     561           1 :         };
     562           1 :         OUString aTest(TEST, SAL_N_ELEMENTS(TEST));
     563             : 
     564           1 :         aBounds = m_xBreak->getWordBoundary(aTest, 0, aLocale, i18n::WordType::DICTIONARY_WORD, true);
     565           1 :         CPPUNIT_ASSERT(aBounds.startPos == 0 && aBounds.endPos == 1);
     566             : 
     567           1 :         aBounds = m_xBreak->nextWord(aTest, 0, aLocale, i18n::WordType::DICTIONARY_WORD);
     568           1 :         CPPUNIT_ASSERT(aBounds.startPos == 2 && aBounds.endPos == 3);
     569             : 
     570           1 :         aBounds = m_xBreak->nextWord(aTest, aBounds.endPos, aLocale, i18n::WordType::DICTIONARY_WORD);
     571           1 :         CPPUNIT_ASSERT(aBounds.startPos == 4 && aBounds.endPos == 5);
     572           1 :     }
     573           1 : }
     574             : 
     575             : //See https://bugs.libreoffice.org/show_bug.cgi?id=40292
     576             : //See https://bz.apache.org/ooo/show_bug.cgi?id=80412
     577             : //See https://bz.apache.org/ooo/show_bug.cgi?id=111152
     578             : //See https://bz.apache.org/ooo/show_bug.cgi?id=50172
     579           1 : void TestBreakIterator::testGraphemeIteration()
     580             : {
     581           1 :     lang::Locale aLocale;
     582           1 :     aLocale.Language = "bn";
     583           1 :     aLocale.Country = "IN";
     584             : 
     585             :     {
     586           1 :         const sal_Unicode BA_HALANT_LA[] = { 0x09AC, 0x09CD, 0x09AF };
     587           1 :         OUString aTest(BA_HALANT_LA, SAL_N_ELEMENTS(BA_HALANT_LA));
     588             : 
     589           1 :         sal_Int32 nDone=0;
     590             :         sal_Int32 nPos;
     591           1 :         nPos = m_xBreak->nextCharacters(aTest, 0, aLocale,
     592           1 :             i18n::CharacterIteratorMode::SKIPCELL, 1, nDone);
     593           1 :         CPPUNIT_ASSERT_MESSAGE("Should skip full grapheme", nPos == SAL_N_ELEMENTS(BA_HALANT_LA));
     594           1 :         nPos = m_xBreak->previousCharacters(aTest, SAL_N_ELEMENTS(BA_HALANT_LA), aLocale,
     595           1 :             i18n::CharacterIteratorMode::SKIPCELL, 1, nDone);
     596           1 :         CPPUNIT_ASSERT_MESSAGE("Should skip full grapheme", nPos == 0);
     597             :     }
     598             : 
     599             :     {
     600           1 :         const sal_Unicode HA_HALANT_NA_VOWELSIGNI[] = { 0x09B9, 0x09CD, 0x09A3, 0x09BF };
     601           1 :         OUString aTest(HA_HALANT_NA_VOWELSIGNI, SAL_N_ELEMENTS(HA_HALANT_NA_VOWELSIGNI));
     602             : 
     603           1 :         sal_Int32 nDone=0;
     604             :         sal_Int32 nPos;
     605           1 :         nPos = m_xBreak->nextCharacters(aTest, 0, aLocale,
     606           1 :             i18n::CharacterIteratorMode::SKIPCELL, 1, nDone);
     607           1 :         CPPUNIT_ASSERT_MESSAGE("Should skip full grapheme", nPos == SAL_N_ELEMENTS(HA_HALANT_NA_VOWELSIGNI));
     608           1 :         nPos = m_xBreak->previousCharacters(aTest, SAL_N_ELEMENTS(HA_HALANT_NA_VOWELSIGNI), aLocale,
     609           1 :             i18n::CharacterIteratorMode::SKIPCELL, 1, nDone);
     610           1 :         CPPUNIT_ASSERT_MESSAGE("Should skip full grapheme", nPos == 0);
     611             :     }
     612             : 
     613             :     {
     614           1 :         const sal_Unicode TA_HALANT_MA_HALANT_YA  [] = { 0x09A4, 0x09CD, 0x09AE, 0x09CD, 0x09AF };
     615           1 :         OUString aTest(TA_HALANT_MA_HALANT_YA, SAL_N_ELEMENTS(TA_HALANT_MA_HALANT_YA));
     616             : 
     617           1 :         sal_Int32 nDone=0;
     618             :         sal_Int32 nPos;
     619           1 :         nPos = m_xBreak->nextCharacters(aTest, 0, aLocale,
     620           1 :             i18n::CharacterIteratorMode::SKIPCELL, 1, nDone);
     621           1 :         CPPUNIT_ASSERT_MESSAGE("Should skip full grapheme", nPos == SAL_N_ELEMENTS(TA_HALANT_MA_HALANT_YA));
     622           1 :         nPos = m_xBreak->previousCharacters(aTest, SAL_N_ELEMENTS(TA_HALANT_MA_HALANT_YA), aLocale,
     623           1 :             i18n::CharacterIteratorMode::SKIPCELL, 1, nDone);
     624           1 :         CPPUNIT_ASSERT_MESSAGE("Should skip full grapheme", nPos == 0);
     625             :     }
     626             : 
     627           1 :     aLocale.Language = "ta";
     628           1 :     aLocale.Country = "IN";
     629             : 
     630             :     {
     631           1 :         const sal_Unicode KA_VIRAMA_SSA[] = { 0x0B95, 0x0BCD, 0x0BB7 };
     632           1 :         OUString aTest(KA_VIRAMA_SSA, SAL_N_ELEMENTS(KA_VIRAMA_SSA));
     633             : 
     634           1 :         sal_Int32 nDone=0;
     635           1 :         sal_Int32 nPos = 0;
     636             : 
     637           1 :         nPos = m_xBreak->nextCharacters(aTest, 0, aLocale,
     638           1 :             i18n::CharacterIteratorMode::SKIPCELL, 1, nDone);
     639           1 :         CPPUNIT_ASSERT_MESSAGE("Should skip full grapheme", nPos == SAL_N_ELEMENTS(KA_VIRAMA_SSA));
     640           1 :         nPos = m_xBreak->previousCharacters(aTest, SAL_N_ELEMENTS(KA_VIRAMA_SSA), aLocale,
     641           1 :             i18n::CharacterIteratorMode::SKIPCELL, 1, nDone);
     642           1 :         CPPUNIT_ASSERT_MESSAGE("Should skip full grapheme", nPos == 0);
     643             :     }
     644             : 
     645             :     {
     646           1 :         const sal_Unicode KA_VOWELSIGNU[] = { 0x0B95, 0x0BC1 };
     647           1 :         OUString aTest(KA_VOWELSIGNU, SAL_N_ELEMENTS(KA_VOWELSIGNU));
     648             : 
     649           1 :         sal_Int32 nDone=0;
     650           1 :         sal_Int32 nPos = 0;
     651             : 
     652           1 :         nPos = m_xBreak->nextCharacters(aTest, 0, aLocale,
     653           1 :             i18n::CharacterIteratorMode::SKIPCELL, 1, nDone);
     654           1 :         CPPUNIT_ASSERT_MESSAGE("Should skip full grapheme", nPos == SAL_N_ELEMENTS(KA_VOWELSIGNU));
     655           1 :         nPos = m_xBreak->previousCharacters(aTest, SAL_N_ELEMENTS(KA_VOWELSIGNU), aLocale,
     656           1 :             i18n::CharacterIteratorMode::SKIPCELL, 1, nDone);
     657           1 :         CPPUNIT_ASSERT_MESSAGE("Should skip full grapheme", nPos == 0);
     658             :     }
     659             : 
     660             :     {
     661             :         const sal_Unicode CA_VOWELSIGNI_TA_VIRAMA_TA_VOWELSIGNI_RA_VOWELSIGNAI[] =
     662           1 :             { 0x0B9A, 0x0BBF, 0x0BA4, 0x0BCD, 0x0BA4, 0x0BBF, 0x0BB0, 0x0BC8 };
     663             :         OUString aTest(CA_VOWELSIGNI_TA_VIRAMA_TA_VOWELSIGNI_RA_VOWELSIGNAI,
     664           1 :             SAL_N_ELEMENTS(CA_VOWELSIGNI_TA_VIRAMA_TA_VOWELSIGNI_RA_VOWELSIGNAI));
     665             : 
     666           1 :         sal_Int32 nDone=0;
     667           1 :         sal_Int32 nPos=0;
     668             : 
     669           5 :         for (sal_Int32 i = 0; i < 4; ++i)
     670             :         {
     671           4 :             sal_Int32 nOldPos = nPos;
     672           4 :             nPos = m_xBreak->nextCharacters(aTest, nPos, aLocale,
     673           4 :                 i18n::CharacterIteratorMode::SKIPCELL, 1, nDone);
     674           4 :             CPPUNIT_ASSERT_MESSAGE("Should skip 2 units", nPos == nOldPos+2);
     675             :         }
     676             : 
     677           5 :         for (sal_Int32 i = 0; i < 4; ++i)
     678             :         {
     679           4 :             sal_Int32 nOldPos = nPos;
     680           4 :             nPos = m_xBreak->previousCharacters(aTest, nPos, aLocale,
     681           4 :                 i18n::CharacterIteratorMode::SKIPCELL, 1, nDone);
     682           4 :             CPPUNIT_ASSERT_MESSAGE("Should skip 2 units", nPos == nOldPos-2);
     683           1 :         }
     684             :     }
     685             : 
     686             :     {
     687           1 :         const sal_Unicode ALEF_QAMATS [] = { 0x05D0, 0x05B8 };
     688           1 :         OUString aText(ALEF_QAMATS, SAL_N_ELEMENTS(ALEF_QAMATS));
     689             : 
     690           1 :         sal_Int32 nGraphemeCount = 0;
     691             : 
     692           1 :         sal_Int32 nCurPos = 0;
     693           3 :         while (nCurPos < aText.getLength())
     694             :         {
     695           1 :             sal_Int32 nCount2 = 1;
     696           1 :             nCurPos = m_xBreak->nextCharacters(aText, nCurPos, lang::Locale(),
     697           1 :                 i18n::CharacterIteratorMode::SKIPCELL, nCount2, nCount2);
     698           1 :             ++nGraphemeCount;
     699             :         }
     700             : 
     701           1 :         CPPUNIT_ASSERT_MESSAGE("Should be considered 1 grapheme", nGraphemeCount == 1);
     702             :     }
     703             : 
     704           1 :     aLocale.Language = "hi";
     705           1 :     aLocale.Country = "IN";
     706             : 
     707             :     {
     708           1 :         const sal_Unicode SHA_VOWELSIGNII[] = { 0x936, 0x940 };
     709           1 :         OUString aTest(SHA_VOWELSIGNII, SAL_N_ELEMENTS(SHA_VOWELSIGNII));
     710             : 
     711           1 :         sal_Int32 nDone=0;
     712           1 :         sal_Int32 nPos = 0;
     713             : 
     714           1 :         nPos = m_xBreak->nextCharacters(aTest, 0, aLocale,
     715           1 :             i18n::CharacterIteratorMode::SKIPCELL, 1, nDone);
     716           1 :         CPPUNIT_ASSERT_MESSAGE("Should skip full grapheme", nPos == SAL_N_ELEMENTS(SHA_VOWELSIGNII));
     717           1 :         nPos = m_xBreak->previousCharacters(aTest, SAL_N_ELEMENTS(SHA_VOWELSIGNII), aLocale,
     718           1 :             i18n::CharacterIteratorMode::SKIPCELL, 1, nDone);
     719           1 :         CPPUNIT_ASSERT_MESSAGE("Should skip full grapheme", nPos == 0);
     720           1 :     }
     721           1 : }
     722             : 
     723             : //A test to ensure that certain ranges and codepoints that are categorized as
     724             : //weak remain as weak, so that existing docs that depend on this don't silently
     725             : //change font for those weak chars
     726           1 : void TestBreakIterator::testWeak()
     727             : {
     728           1 :     lang::Locale aLocale;
     729           1 :     aLocale.Language = "en";
     730           1 :     aLocale.Country = "US";
     731             : 
     732             :     {
     733             :         const sal_Unicode WEAKS[] =
     734             :         {
     735             :             0x0001, 0x0002,
     736             :             0x0020, 0x00A0,
     737             :             0x2150, 0x215F, //Number Forms, fractions
     738             :             0x2160, 0x2180, //Number Forms, roman numerals
     739             :             0x2200, 0x22FF, //Mathematical Operators
     740             :             0x27C0, 0x27EF, //Miscellaneous Mathematical Symbols-A
     741             :             0x2980, 0x29FF, //Miscellaneous Mathematical Symbols-B
     742             :             0x2A00, 0x2AFF, //Supplemental Mathematical Operators
     743             :             0x2100, 0x214F, //Letterlike Symbols
     744             :             0x2308, 0x230B, //Miscellaneous technical
     745             :             0x25A0, 0x25FF, //Geometric Shapes
     746             :             0x2B30, 0x2B4C  //Miscellaneous Symbols and Arrows
     747           1 :         };
     748           1 :         OUString aWeaks(WEAKS, SAL_N_ELEMENTS(WEAKS));
     749             : 
     750          25 :         for (sal_Int32 i = 0; i < aWeaks.getLength(); ++i)
     751             :         {
     752          24 :             sal_Int16 nScript = m_xBreak->getScriptType(aWeaks, i);
     753          24 :             OStringBuffer aMsg;
     754          24 :             aMsg.append("Char 0x");
     755          24 :             aMsg.append(static_cast<sal_Int32>(aWeaks[i]), 16);
     756          24 :             aMsg.append(" should have been weak");
     757          48 :             CPPUNIT_ASSERT_MESSAGE(aMsg.getStr(),
     758          24 :                 nScript == i18n::ScriptType::WEAK);
     759          25 :         }
     760           1 :     }
     761           1 : }
     762             : 
     763             : //A test to ensure that certain ranges and codepoints that are categorized as
     764             : //asian remain as asian, so that existing docs that depend on this don't silently
     765             : //change font for those asian chars.
     766             : //See https://bugs.libreoffice.org/show_bug.cgi?id=38095
     767           1 : void TestBreakIterator::testAsian()
     768             : {
     769           1 :     lang::Locale aLocale;
     770           1 :     aLocale.Language = "en";
     771           1 :     aLocale.Country = "US";
     772             : 
     773             :     {
     774             :         const sal_Unicode ASIANS[] =
     775             :         {
     776             :             //some typical CJK chars
     777             :             0x4E00, 0x62FF,
     778             :             //The full HalfWidth and FullWidth block has historically been
     779             :             //designated as taking the CJK font :-(
     780             :             //HalfWidth and FullWidth forms of ASCII 0-9, categorized under
     781             :             //UAX24 as "Common" i.e. by that logic WEAK
     782             :             0xFF10, 0xFF19,
     783             :             //HalfWidth and FullWidth forms of ASCII A-z, categorized under
     784             :             //UAX25 as "Latin", i.e. by that logic LATIN
     785             :             0xFF21, 0xFF5A
     786           1 :         };
     787           1 :         OUString aAsians(ASIANS, SAL_N_ELEMENTS(ASIANS));
     788             : 
     789           7 :         for (sal_Int32 i = 0; i < aAsians.getLength(); ++i)
     790             :         {
     791           6 :             sal_Int16 nScript = m_xBreak->getScriptType(aAsians, i);
     792           6 :             OStringBuffer aMsg;
     793           6 :             aMsg.append("Char 0x");
     794           6 :             aMsg.append(static_cast<sal_Int32>(aAsians[i]), 16);
     795           6 :             aMsg.append(" should have been asian");
     796          12 :             CPPUNIT_ASSERT_MESSAGE(aMsg.getStr(),
     797           6 :                 nScript == i18n::ScriptType::ASIAN);
     798           7 :         }
     799           1 :     }
     800           1 : }
     801             : 
     802             : #if (U_ICU_VERSION_MAJOR_NUM > 51)
     803             : //A test to ensure that our Lao word boundary detection is useful
     804           1 : void TestBreakIterator::testLao()
     805             : {
     806           1 :     lang::Locale aLocale;
     807           1 :     aLocale.Language = "lo";
     808           1 :     aLocale.Country = "LA";
     809             : 
     810           1 :     const sal_Unicode LAO[] = { 0x0e8d, 0x0eb4, 0x0e99, 0x0e94, 0x0eb5, 0x0e95, 0x0ec9, 0x0ead, 0x0e99, 0x0eae, 0x0eb1, 0x0e9a };
     811           2 :     OUString aTest(LAO, SAL_N_ELEMENTS(LAO));
     812           1 :     i18n::Boundary aBounds = m_xBreak->getWordBoundary(aTest, 0, aLocale,
     813           1 :         i18n::WordType::DICTIONARY_WORD, true);
     814             : 
     815           1 :     CPPUNIT_ASSERT_EQUAL(sal_Int32(0), aBounds.startPos);
     816           1 :     CPPUNIT_ASSERT_EQUAL(sal_Int32(5), aBounds.endPos);
     817             : 
     818           1 :     aBounds = m_xBreak->getWordBoundary(aTest, aBounds.endPos, aLocale,
     819           1 :         i18n::WordType::DICTIONARY_WORD, true);
     820             : 
     821           1 :     CPPUNIT_ASSERT_EQUAL(sal_Int32(5), aBounds.startPos);
     822           2 :     CPPUNIT_ASSERT_EQUAL(sal_Int32(9), aBounds.endPos);
     823           1 : }
     824             : #endif
     825             : 
     826             : //A test to ensure that our thai word boundary detection is useful
     827           1 : void TestBreakIterator::testThai()
     828             : {
     829           1 :     lang::Locale aLocale;
     830           1 :     aLocale.Language = "th";
     831           1 :     aLocale.Country = "TH";
     832             : 
     833             :     //See http://lists.freedesktop.org/archives/libreoffice/2012-February/025959.html
     834             :     {
     835           1 :         const sal_Unicode THAI[] = { 0x0E01, 0x0E38, 0x0E2B, 0x0E25, 0x0E32, 0x0E1A };
     836           1 :         OUString aTest(THAI, SAL_N_ELEMENTS(THAI));
     837           1 :         i18n::Boundary aBounds = m_xBreak->getWordBoundary(aTest, 0, aLocale,
     838           1 :             i18n::WordType::DICTIONARY_WORD, true);
     839           2 :         CPPUNIT_ASSERT_MESSAGE("Should skip full word",
     840           2 :             aBounds.startPos == 0 && aBounds.endPos == aTest.getLength());
     841             :     }
     842             : 
     843             :     //See https://bz.apache.org/ooo/show_bug.cgi?id=29548
     844             :     //make sure forwards and back are consistent
     845             :     {
     846             :         const sal_Unicode THAI[] =
     847             :         {
     848             :             0x0E2D, 0x0E38, 0x0E17, 0x0E22, 0x0E32, 0x0E19, 0x0E41,
     849             :             0x0E2B, 0x0E48, 0x0E07, 0x0E0A, 0x0E32, 0x0E15, 0x0E34,
     850             :             0x0E19, 0x0E49, 0x0E33, 0x0E2B, 0x0E19, 0x0E32, 0x0E27,
     851             :             0x0E2D, 0x0E38, 0x0E17, 0x0E22, 0x0E32, 0x0E19, 0x0E41,
     852             :             0x0E2B, 0x0E48, 0x0E07, 0x0E0A, 0x0E32, 0x0E15, 0x0E34,
     853             :             0x0E19, 0x0E49, 0x0E33, 0x0E2B, 0x0E19, 0x0E32, 0x0E27
     854           1 :         };
     855           1 :         OUString aTest(THAI, SAL_N_ELEMENTS(THAI));
     856             : 
     857           2 :         std::stack<sal_Int32> aPositions;
     858           1 :         sal_Int32 nPos = -1;
     859          11 :         do
     860             :         {
     861          11 :             nPos = m_xBreak->nextWord(aTest, nPos, aLocale, i18n::WordType::ANYWORD_IGNOREWHITESPACES).startPos;
     862          11 :             aPositions.push(nPos);
     863             :         }
     864          11 :         while (nPos < aTest.getLength());
     865           1 :         nPos = aTest.getLength();
     866           1 :         CPPUNIT_ASSERT(!aPositions.empty());
     867           1 :         aPositions.pop();
     868          10 :         do
     869             :         {
     870          10 :             CPPUNIT_ASSERT(!aPositions.empty());
     871          10 :             nPos = m_xBreak->previousWord(aTest, nPos, aLocale, i18n::WordType::ANYWORD_IGNOREWHITESPACES).startPos;
     872          10 :             CPPUNIT_ASSERT(nPos == aPositions.top());
     873          10 :             aPositions.pop();
     874             :         }
     875          11 :         while (nPos > 0);
     876           1 :     }
     877           1 : }
     878             : 
     879             : #ifdef TODO
     880             : void TestBreakIterator::testNorthernThai()
     881             : {
     882             :     lang::Locale aLocale;
     883             :     aLocale.Language = "nod";
     884             :     aLocale.Country = "TH";
     885             : 
     886             :     const sal_Unicode NORTHERN_THAI1[] = { 0x0E01, 0x0E38, 0x0E4A, 0x0E2B, 0x0E25, 0x0E32, 0x0E1A };
     887             :     OUString aTest(NORTHERN_THAI1, SAL_N_ELEMENTS(NORTHERN_THAI1));
     888             :     i18n::Boundary aBounds = m_xBreak->getWordBoundary(aTest, 0, aLocale,
     889             :         i18n::WordType::DICTIONARY_WORD, true);
     890             :     CPPUNIT_ASSERT_MESSAGE("Should skip full word",
     891             :         aBounds.startPos == 0 && aBounds.endPos == aTest.getLength());
     892             : }
     893             : #endif
     894             : 
     895             : #if (U_ICU_VERSION_MAJOR_NUM > 4)
     896             : // Not sure if any version earlier than 49 did have Khmer word boundary
     897             : // dictionaries, 4.6 does not.
     898             : 
     899             : //A test to ensure that our khmer word boundary detection is useful
     900             : //https://bugs.libreoffice.org/show_bug.cgi?id=52020
     901           1 : void TestBreakIterator::testKhmer()
     902             : {
     903           1 :     lang::Locale aLocale;
     904           1 :     aLocale.Language = "km";
     905           1 :     aLocale.Country = "KH";
     906             : 
     907           1 :     const sal_Unicode KHMER[] = { 0x17B2, 0x17D2, 0x1799, 0x1782, 0x17C1 };
     908             : 
     909           2 :     OUString aTest(KHMER, SAL_N_ELEMENTS(KHMER));
     910           1 :     i18n::Boundary aBounds = m_xBreak->getWordBoundary(aTest, 0, aLocale,
     911           1 :         i18n::WordType::DICTIONARY_WORD, true);
     912             : 
     913           1 :     CPPUNIT_ASSERT(aBounds.startPos == 0 && aBounds.endPos == 3);
     914             : 
     915           1 :     aBounds = m_xBreak->getWordBoundary(aTest, aBounds.endPos, aLocale,
     916           1 :         i18n::WordType::DICTIONARY_WORD, true);
     917             : 
     918           2 :     CPPUNIT_ASSERT(aBounds.startPos == 3 && aBounds.endPos == 5);
     919           1 : }
     920             : #endif
     921             : 
     922           2 : void TestBreakIterator::doTestJapanese(uno::Reference< i18n::XBreakIterator > &xBreak)
     923             : {
     924           2 :     lang::Locale aLocale;
     925           2 :     aLocale.Language = "ja";
     926           2 :     aLocale.Country = "JP";
     927           2 :     i18n::Boundary aBounds;
     928             : 
     929             :     {
     930           2 :         const sal_Unicode JAPANESE[] = { 0x30B7, 0x30E3, 0x30C3, 0x30C8, 0x30C0, 0x30A6, 0x30F3 };
     931             : 
     932           2 :         OUString aTest(JAPANESE, SAL_N_ELEMENTS(JAPANESE));
     933           2 :         aBounds = xBreak->getWordBoundary(aTest, 5, aLocale,
     934           2 :             i18n::WordType::DICTIONARY_WORD, true);
     935             : 
     936           2 :         CPPUNIT_ASSERT(aBounds.startPos == 0 && aBounds.endPos == 7);
     937             :     }
     938             : 
     939             :     {
     940           2 :         const sal_Unicode JAPANESE[] = { 0x9EBB, 0x306E, 0x8449, 0x9EBB, 0x306E, 0x8449 };
     941             : 
     942           2 :         OUString aTest(JAPANESE, SAL_N_ELEMENTS(JAPANESE));
     943           2 :         aBounds = xBreak->getWordBoundary(aTest, 1, aLocale,
     944           2 :             i18n::WordType::DICTIONARY_WORD, true);
     945             : 
     946           2 :         CPPUNIT_ASSERT(aBounds.startPos == 0 && aBounds.endPos == 3);
     947             : 
     948           2 :         aBounds = xBreak->getWordBoundary(aTest, 5, aLocale,
     949           2 :             i18n::WordType::DICTIONARY_WORD, true);
     950             : 
     951           2 :         CPPUNIT_ASSERT(aBounds.startPos == 3 && aBounds.endPos == 6);
     952           2 :     }
     953           2 : }
     954             : 
     955           1 : void TestBreakIterator::testJapanese()
     956             : {
     957           1 :     doTestJapanese(m_xBreak);
     958             : 
     959             :     // fdo#78479 - test second / cached instantiation of xdictionary
     960           1 :     uno::Reference< i18n::XBreakIterator > xTmpBreak(m_xSFactory->createInstance(
     961           1 :         "com.sun.star.i18n.BreakIterator"), uno::UNO_QUERY_THROW);
     962             : 
     963           1 :     doTestJapanese(xTmpBreak);
     964           1 : }
     965             : 
     966           1 : void TestBreakIterator::testChinese()
     967             : {
     968           1 :     lang::Locale aLocale;
     969           1 :     aLocale.Language = "zh";
     970           1 :     aLocale.Country = "CN";
     971           1 :     i18n::Boundary aBounds;
     972             : 
     973             :     {
     974           1 :         const sal_Unicode CHINESE[] = { 0x6A35, 0x6A30, 0x69FE, 0x8919, 0xD867, 0xDEDB  };
     975             : 
     976           1 :         OUString aTest(CHINESE, SAL_N_ELEMENTS(CHINESE));
     977           1 :         aBounds = m_xBreak->getWordBoundary(aTest, 4, aLocale,
     978           1 :             i18n::WordType::DICTIONARY_WORD, true);
     979           1 :         CPPUNIT_ASSERT(aBounds.startPos == 4 && aBounds.endPos == 6);
     980           1 :     }
     981           1 : }
     982          10 : void TestBreakIterator::setUp()
     983             : {
     984          10 :     BootstrapFixtureBase::setUp();
     985          30 :     m_xBreak = uno::Reference< i18n::XBreakIterator >(m_xSFactory->createInstance(
     986          20 :         "com.sun.star.i18n.BreakIterator"), uno::UNO_QUERY_THROW);
     987          10 : }
     988             : 
     989          10 : void TestBreakIterator::tearDown()
     990             : {
     991          10 :     m_xBreak.clear();
     992          10 :     BootstrapFixtureBase::tearDown();
     993          10 : }
     994             : 
     995           1 : CPPUNIT_TEST_SUITE_REGISTRATION(TestBreakIterator);
     996             : 
     997           4 : CPPUNIT_PLUGIN_IMPLEMENT();
     998             : 
     999             : /* vim:set shiftwidth=4 softtabstop=4 expandtab: */

Generated by: LCOV version 1.11