LCOV - libreoffice_filtered.info - libreoffice/i18npool/qa/cppunit/test

LCOV - code coverage report

Current view:	top level - libreoffice/i18npool/qa/cppunit - test_breakiterator.cxx (source / functions)		Hit	Total	Coverage
Test:	libreoffice_filtered.info	Lines:	231	472	48.9 %
Date:	2012-12-27	Functions:	17	19	89.5 %
Legend:	Lines: hit not hit

          Line data    Source code

       1             : /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
       2             : /*
       3             :  * Version: MPL 1.1 / GPLv3+ / LGPLv3+
       4             :  *
       5             :  * The contents of this file are subject to the Mozilla Public License Version
       6             :  * 1.1 (the "License"); you may not use this file except in compliance with
       7             :  * the License. You may obtain a copy of the License at
       8             :  * http://www.mozilla.org/MPL/
       9             :  *
      10             :  * Software distributed under the License is distributed on an "AS IS" basis,
      11             :  * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
      12             :  * for the specific language governing rights and limitations under the
      13             :  * License.
      14             :  *
      15             :  * The Initial Developer of the Original Code is
      16             :  *       Caolán McNamara <caolanm@redhat.com>
      17             :  *
      18             :  * Contributor(s):
      19             :  *   Caolán McNamara <caolanm@redhat.com>
      20             :  *
      21             :  * Alternatively, the contents of this file may be used under the terms of
      22             :  * either the GNU General Public License Version 3 or later (the "GPLv3+"), or
      23             :  * the GNU Lesser General Public License Version 3 or later (the "LGPLv3+"),
      24             :  * in which case the provisions of the GPLv3+ or the LGPLv3+ are applicable
      25             :  * instead of those above.
      26             :  */
      27             : 
      28             : #include <cppuhelper/compbase1.hxx>
      29             : #include <cppuhelper/bootstrap.hxx>
      30             : #include <cppuhelper/basemutex.hxx>
      31             : #include <com/sun/star/i18n/XBreakIterator.hpp>
      32             : #include <com/sun/star/i18n/CharacterIteratorMode.hpp>
      33             : #include <com/sun/star/i18n/ScriptType.hpp>
      34             : #include <com/sun/star/i18n/WordType.hpp>
      35             : #include <unotest/bootstrapfixturebase.hxx>
      36             : 
      37             : #include <unicode/uvernum.h>
      38             : 
      39             : #include <rtl/strbuf.hxx>
      40             : #include <rtl/ustrbuf.hxx>
      41             : 
      42             : #include <string.h>
      43             : 
      44             : #include <stack>
      45             : 
      46             : using namespace ::com::sun::star;
      47             : 
      48          18 : class TestBreakIterator : public test::BootstrapFixtureBase
      49             : {
      50             : public:
      51             :     virtual void setUp();
      52             :     virtual void tearDown();
      53             : 
      54             :     void testLineBreaking();
      55             :     void testWordBoundaries();
      56             :     void testGraphemeIteration();
      57             :     void testWeak();
      58             :     void testAsian();
      59             :     void testThai();
      60             : #if TODO
      61             :     void testNorthernThai();
      62             : #endif
      63             :     void testKhmer();
      64             :     void testJapanese();
      65             : 
      66           2 :     CPPUNIT_TEST_SUITE(TestBreakIterator);
      67           1 :     CPPUNIT_TEST(testLineBreaking);
      68           1 :     CPPUNIT_TEST(testGraphemeIteration);
      69           1 :     CPPUNIT_TEST(testWeak);
      70           1 :     CPPUNIT_TEST(testAsian);
      71           1 :     CPPUNIT_TEST(testThai);
      72             : #if TODO
      73             :     CPPUNIT_TEST(testNorthernThai);
      74             : #endif
      75             : #if (U_ICU_VERSION_MAJOR_NUM > 4)
      76             :     CPPUNIT_TEST(testWordBoundaries);
      77             :     CPPUNIT_TEST(testKhmer);
      78             : #endif
      79           1 :     CPPUNIT_TEST(testJapanese);
      80           2 :     CPPUNIT_TEST_SUITE_END();
      81             : private:
      82             :     uno::Reference<i18n::XBreakIterator> m_xBreak;
      83             : };
      84             : 
      85           1 : void TestBreakIterator::testLineBreaking()
      86             : {
      87           1 :     i18n::LineBreakHyphenationOptions aHyphOptions;
      88           1 :     i18n::LineBreakUserOptions aUserOptions;
      89           1 :     lang::Locale aLocale;
      90             : 
      91             :     //See https://bugs.freedesktop.org/show_bug.cgi?id=31271
      92             :     {
      93           1 :         rtl::OUString aTest(RTL_CONSTASCII_USTRINGPARAM("(some text here)"));
      94             : 
      95           1 :         aLocale.Language = rtl::OUString(RTL_CONSTASCII_USTRINGPARAM("en"));
      96           1 :         aLocale.Country = rtl::OUString(RTL_CONSTASCII_USTRINGPARAM("US"));
      97             : 
      98             :         {
      99             :             //Here we want the line break to leave text here) on the next line
     100           1 :             i18n::LineBreakResults aResult = m_xBreak->getLineBreak(aTest, strlen("(some tex"), aLocale, 0, aHyphOptions, aUserOptions);
     101           1 :             CPPUNIT_ASSERT_MESSAGE("Expected a break at the the start of the word", aResult.breakIndex == 6);
     102             :         }
     103             : 
     104             :         {
     105             :             //Here we want the line break to leave "here)" on the next line
     106           1 :             i18n::LineBreakResults aResult = m_xBreak->getLineBreak(aTest, strlen("(some text here"), aLocale, 0, aHyphOptions, aUserOptions);
     107           1 :             CPPUNIT_ASSERT_MESSAGE("Expected a break at the the start of the word", aResult.breakIndex == 11);
     108           1 :         }
     109             :     }
     110             : 
     111             :     //See https://bugs.freedesktop.org/show_bug.cgi?id=49849
     112             :     {
     113           1 :         const sal_Unicode HEBREW1[] = { 0x05DE, 0x05D9, 0x05DC, 0x05D9, 0x5DD };
     114           1 :         rtl::OUString aWord(HEBREW1, SAL_N_ELEMENTS(HEBREW1));
     115           1 :         rtl::OUString aTest(rtl::OUStringBuffer(aWord).append(' ').append(aWord).makeStringAndClear());
     116             : 
     117           1 :         aLocale.Language = rtl::OUString(RTL_CONSTASCII_USTRINGPARAM("he"));
     118           1 :         aLocale.Country = rtl::OUString(RTL_CONSTASCII_USTRINGPARAM("IL"));
     119             : 
     120             :         {
     121             :             //Here we want the line break to happen at the whitespace
     122           1 :             i18n::LineBreakResults aResult = m_xBreak->getLineBreak(aTest, aTest.getLength()-1, aLocale, 0, aHyphOptions, aUserOptions);
     123           1 :             CPPUNIT_ASSERT_MESSAGE("Expected a break at the the start of the word", aResult.breakIndex == aWord.getLength()+1);
     124           1 :         }
     125             :     }
     126             : 
     127             :     //See https://issues.apache.org/ooo/show_bug.cgi?id=17155
     128             :     {
     129           1 :         rtl::OUString aTest(RTL_CONSTASCII_USTRINGPARAM("foo /bar/baz"));
     130             : 
     131           1 :         aLocale.Language = rtl::OUString(RTL_CONSTASCII_USTRINGPARAM("en"));
     132           1 :         aLocale.Country = rtl::OUString(RTL_CONSTASCII_USTRINGPARAM("US"));
     133             : 
     134             :         {
     135             :             //Here we want the line break to leave /bar/ba clumped together on the next line
     136           1 :             i18n::LineBreakResults aResult = m_xBreak->getLineBreak(aTest, strlen("foo /bar/ba"), aLocale, 0,
     137           1 :                 aHyphOptions, aUserOptions);
     138           1 :             CPPUNIT_ASSERT_MESSAGE("Expected a break at the first slash", aResult.breakIndex == 4);
     139           1 :         }
     140             :     }
     141             : 
     142             :     //See https://issues.apache.org/ooo/show_bug.cgi?id=19716
     143             :     {
     144           1 :         rtl::OUString aTest(RTL_CONSTASCII_USTRINGPARAM("aaa]aaa"));
     145             : 
     146           1 :         aLocale.Language = rtl::OUString(RTL_CONSTASCII_USTRINGPARAM("en"));
     147           1 :         aLocale.Country = rtl::OUString(RTL_CONSTASCII_USTRINGPARAM("US"));
     148             : 
     149             :         {
     150             :             //Here we want the line break to move the whole lot to the next line
     151           2 :             i18n::LineBreakResults aResult = m_xBreak->getLineBreak(aTest, aTest.getLength()-2, aLocale, 0,
     152           2 :                 aHyphOptions, aUserOptions);
     153           1 :             CPPUNIT_ASSERT_MESSAGE("Expected a break at the start of the line, not at ]", aResult.breakIndex == 0);
     154           1 :         }
     155           1 :     }
     156           1 : }
     157             : 
     158             : //See https://bugs.freedesktop.org/show_bug.cgi?id=49629
     159           0 : void TestBreakIterator::testWordBoundaries()
     160             : {
     161           0 :     lang::Locale aLocale;
     162           0 :     aLocale.Language = rtl::OUString(RTL_CONSTASCII_USTRINGPARAM("en"));
     163           0 :     aLocale.Country = rtl::OUString(RTL_CONSTASCII_USTRINGPARAM("US"));
     164             : 
     165           0 :     i18n::Boundary aBounds;
     166             : 
     167             :     //See https://issues.apache.org/ooo/show_bug.cgi?id=11993
     168             :     {
     169           0 :         rtl::OUString aTest("abcd ef  ghi??? KLM");
     170             : 
     171           0 :         CPPUNIT_ASSERT(!m_xBreak->isBeginWord(aTest, 4, aLocale, i18n::WordType::DICTIONARY_WORD));
     172           0 :         CPPUNIT_ASSERT(m_xBreak->isEndWord(aTest, 4, aLocale, i18n::WordType::DICTIONARY_WORD));
     173           0 :         aBounds = m_xBreak->getWordBoundary(aTest, 4, aLocale, i18n::WordType::DICTIONARY_WORD, true);
     174           0 :         CPPUNIT_ASSERT(aBounds.startPos == 0 && aBounds.endPos == 4);
     175             : 
     176           0 :         CPPUNIT_ASSERT(!m_xBreak->isBeginWord(aTest, 8, aLocale, i18n::WordType::DICTIONARY_WORD));
     177           0 :         CPPUNIT_ASSERT(!m_xBreak->isEndWord(aTest, 8, aLocale, i18n::WordType::DICTIONARY_WORD));
     178             : 
     179             :         //next word
     180           0 :         aBounds = m_xBreak->getWordBoundary(aTest, 8, aLocale, i18n::WordType::DICTIONARY_WORD, true);
     181           0 :         CPPUNIT_ASSERT(aBounds.startPos == 9 && aBounds.endPos == 12);
     182             : 
     183             :         //previous word
     184           0 :         aBounds = m_xBreak->getWordBoundary(aTest, 8, aLocale, i18n::WordType::DICTIONARY_WORD, false);
     185           0 :         CPPUNIT_ASSERT(aBounds.startPos == 5 && aBounds.endPos == 7);
     186             : 
     187           0 :         CPPUNIT_ASSERT(!m_xBreak->isBeginWord(aTest, 12, aLocale, i18n::WordType::DICTIONARY_WORD));
     188           0 :         CPPUNIT_ASSERT(m_xBreak->isEndWord(aTest, 12, aLocale, i18n::WordType::DICTIONARY_WORD));
     189           0 :         aBounds = m_xBreak->getWordBoundary(aTest, 12, aLocale, i18n::WordType::DICTIONARY_WORD, true);
     190           0 :         CPPUNIT_ASSERT(aBounds.startPos == 9 && aBounds.endPos == 12);
     191             : 
     192           0 :         CPPUNIT_ASSERT(m_xBreak->isBeginWord(aTest, 16, aLocale, i18n::WordType::DICTIONARY_WORD));
     193           0 :         CPPUNIT_ASSERT(!m_xBreak->isEndWord(aTest, 16, aLocale, i18n::WordType::DICTIONARY_WORD));
     194           0 :         aBounds = m_xBreak->getWordBoundary(aTest, 16, aLocale, i18n::WordType::DICTIONARY_WORD, true);
     195           0 :         CPPUNIT_ASSERT(aBounds.startPos == 16 && aBounds.endPos == 19);
     196             :     }
     197             : 
     198             :     //See https://issues.apache.org/ooo/show_bug.cgi?id=21907
     199             :     {
     200           0 :         rtl::OUString aTest("b a?");
     201             : 
     202           0 :         CPPUNIT_ASSERT(m_xBreak->isBeginWord(aTest, 1, aLocale, i18n::WordType::ANY_WORD));
     203           0 :         CPPUNIT_ASSERT(m_xBreak->isBeginWord(aTest, 2, aLocale, i18n::WordType::ANY_WORD));
     204           0 :         CPPUNIT_ASSERT(m_xBreak->isBeginWord(aTest, 3, aLocale, i18n::WordType::ANY_WORD));
     205             : 
     206           0 :         CPPUNIT_ASSERT(m_xBreak->isBeginWord(aTest, 3, aLocale, i18n::WordType::ANYWORD_IGNOREWHITESPACES));
     207             : 
     208           0 :         CPPUNIT_ASSERT(m_xBreak->isEndWord(aTest, 1, aLocale, i18n::WordType::ANY_WORD));
     209           0 :         CPPUNIT_ASSERT(m_xBreak->isEndWord(aTest, 2, aLocale, i18n::WordType::ANY_WORD));
     210           0 :         CPPUNIT_ASSERT(m_xBreak->isEndWord(aTest, 3, aLocale, i18n::WordType::ANY_WORD));
     211             : 
     212           0 :         CPPUNIT_ASSERT(m_xBreak->isEndWord(aTest, 3, aLocale, i18n::WordType::ANYWORD_IGNOREWHITESPACES));
     213             :     }
     214             : 
     215             :     //See https://issues.apache.org/ooo/show_bug.cgi?id=14904
     216             :     {
     217             :         const sal_Unicode TEST[] =
     218             :         {
     219             :             'W', 'o', 'r', 'k', 'i', 'n', 'g', ' ', 0x201C, 'W', 'o', 'r', 'd', 's',
     220             :             ' ', 's', 't', 'a', 'r', 't', 'i', 'n', 'g', ' ', 'w', 'i', 't',
     221             :             'h', ' ', 'q', 'u', 'o', 't', 'e', 's', 0x201D, ' ', 'W', 'o', 'r', 'k',
     222             :             'i', 'n', 'g', ' ', 0x2018, 'B', 'r', 'o', 'k', 'e', 'n', 0x2019, ' ',
     223             :             '?', 'S', 'p', 'a', 'n', 'i', 's', 'h', '?', ' ', 'd', 'o', 'e',
     224             :             's', 'n', 0x2019, 't', ' ', 'w', 'o', 'r', 'k', '.', ' ', 'N', 'o',
     225             :             't', ' ', 'e', 'v', 'e', 'n', ' ' , 0x00BF, 'r', 'e', 'a', 'l', '?', ' ',
     226             :             'S', 'p', 'a', 'n', 'i', 's', 'h'
     227           0 :         };
     228           0 :         rtl::OUString aTest(TEST, SAL_N_ELEMENTS(TEST));
     229             : 
     230           0 :         aBounds = m_xBreak->getWordBoundary(aTest, 4, aLocale, i18n::WordType::DICTIONARY_WORD, false);
     231           0 :         CPPUNIT_ASSERT(aBounds.startPos == 0 && aBounds.endPos == 7);
     232             : 
     233           0 :         aBounds = m_xBreak->getWordBoundary(aTest, 12, aLocale, i18n::WordType::DICTIONARY_WORD, false);
     234           0 :         CPPUNIT_ASSERT(aBounds.startPos == 9 && aBounds.endPos == 14);
     235             : 
     236           0 :         aBounds = m_xBreak->getWordBoundary(aTest, 40, aLocale, i18n::WordType::DICTIONARY_WORD, false);
     237           0 :         CPPUNIT_ASSERT(aBounds.startPos == 37 && aBounds.endPos == 44);
     238             : 
     239           0 :         aBounds = m_xBreak->getWordBoundary(aTest, 49, aLocale, i18n::WordType::DICTIONARY_WORD, false);
     240           0 :         CPPUNIT_ASSERT(aBounds.startPos == 46 && aBounds.endPos == 52);
     241             : 
     242           0 :         aBounds = m_xBreak->getWordBoundary(aTest, 58, aLocale, i18n::WordType::DICTIONARY_WORD, false);
     243           0 :         CPPUNIT_ASSERT(aBounds.startPos == 55 && aBounds.endPos == 62);
     244             : 
     245           0 :         aBounds = m_xBreak->getWordBoundary(aTest, 67, aLocale, i18n::WordType::DICTIONARY_WORD, false);
     246           0 :         CPPUNIT_ASSERT(aBounds.startPos == 64 && aBounds.endPos == 71);
     247             : 
     248           0 :         aBounds = m_xBreak->getWordBoundary(aTest, 90, aLocale, i18n::WordType::DICTIONARY_WORD, false);
     249           0 :         CPPUNIT_ASSERT(aBounds.startPos == 88 && aBounds.endPos == 92);
     250             :     }
     251             : 
     252             :     //See https://bugs.freedesktop.org/show_bug.cgi?id=49629
     253           0 :     sal_Unicode aBreakTests[] = { ' ', 1, 2, 3, 4, 5, 6, 7, 0x91, 0x92, 0x200B, 0xE8FF, 0xF8FF };
     254           0 :     for (int mode = i18n::WordType::ANY_WORD; mode <= i18n::WordType::WORD_COUNT; ++mode)
     255             :     {
     256             :         //make sure that in all cases isBeginWord and isEndWord matches getWordBoundary
     257           0 :         for (size_t i = 0; i < SAL_N_ELEMENTS(aBreakTests); ++i)
     258             :         {
     259           0 :             rtl::OUString aTest("Word");
     260           0 :             aTest += rtl::OUString(aBreakTests[i]) + rtl::OUString("Word");
     261           0 :             aBounds = m_xBreak->getWordBoundary(aTest, 0, aLocale, mode, true);
     262           0 :             switch (mode)
     263             :             {
     264             :                 case i18n::WordType::ANY_WORD:
     265           0 :                     CPPUNIT_ASSERT(aBounds.startPos == 0 && aBounds.endPos == 4);
     266           0 :                     break;
     267             :                 case i18n::WordType::ANYWORD_IGNOREWHITESPACES:
     268           0 :                     CPPUNIT_ASSERT(aBounds.startPos == 0 && aBounds.endPos == 4);
     269           0 :                     break;
     270             :                 case i18n::WordType::DICTIONARY_WORD:
     271           0 :                     CPPUNIT_ASSERT(aBounds.startPos == 0 && aBounds.endPos == 4);
     272           0 :                     break;
     273             :                 case i18n::WordType::WORD_COUNT:
     274           0 :                     CPPUNIT_ASSERT(aBounds.startPos == 0 && aBounds.endPos == 4);
     275           0 :                     break;
     276             :             }
     277             : 
     278           0 :             CPPUNIT_ASSERT(m_xBreak->isBeginWord(aTest, aBounds.startPos, aLocale, mode));
     279           0 :             CPPUNIT_ASSERT(m_xBreak->isEndWord(aTest, aBounds.endPos, aLocale, mode));
     280           0 :         }
     281             :     }
     282             : 
     283           0 :     sal_Unicode aJoinTests[] = { 'X', 0x200C, 0x200D, 0x2060, 0xFEFF, 0xFFF9, 0xFFFA, 0xFFFB };
     284           0 :     for (int mode = i18n::WordType::ANY_WORD; mode <= i18n::WordType::WORD_COUNT; ++mode)
     285             :     {
     286             :         //make sure that in all cases isBeginWord and isEndWord matches getWordBoundary
     287           0 :         for (size_t i = 0; i < SAL_N_ELEMENTS(aJoinTests); ++i)
     288             :         {
     289           0 :             rtl::OUString aTest("Word");
     290           0 :             aTest += rtl::OUString(aJoinTests[i]) + rtl::OUString("Word");
     291           0 :             aBounds = m_xBreak->getWordBoundary(aTest, 0, aLocale, mode, true);
     292           0 :             switch (mode)
     293             :             {
     294             :                 case i18n::WordType::ANY_WORD:
     295           0 :                     CPPUNIT_ASSERT(aBounds.startPos == 0 && aBounds.endPos == 9);
     296           0 :                     break;
     297             :                 case i18n::WordType::ANYWORD_IGNOREWHITESPACES:
     298           0 :                     CPPUNIT_ASSERT(aBounds.startPos == 0 && aBounds.endPos == 9);
     299           0 :                     break;
     300             :                 case i18n::WordType::DICTIONARY_WORD:
     301           0 :                     CPPUNIT_ASSERT(aBounds.startPos == 0 && aBounds.endPos == 9);
     302           0 :                     break;
     303             :                 case i18n::WordType::WORD_COUNT:
     304           0 :                     CPPUNIT_ASSERT(aBounds.startPos == 0 && aBounds.endPos == 9);
     305           0 :                     break;
     306             :             }
     307             : 
     308           0 :             CPPUNIT_ASSERT(m_xBreak->isBeginWord(aTest, aBounds.startPos, aLocale, mode));
     309           0 :             CPPUNIT_ASSERT(m_xBreak->isEndWord(aTest, aBounds.endPos, aLocale, mode));
     310           0 :         }
     311             :     }
     312             : 
     313             :     //See https://issues.apache.org/ooo/show_bug.cgi?id=13494
     314             :     {
     315           0 :         const rtl::OUString aBase("xxAAxxBBxxCCxx");
     316             :         const sal_Unicode aTests[] =
     317             :         {
     318             :             '\'', ';', ',', '.', '!', '@', '#', '%', '&', '*',
     319             :             '(', ')', '_', '-', '{', '}', '[', ']', '\"', '/',
     320             :             '\\', '?', '~', '$', '+', '^', '=', '<', '>', '|'
     321           0 :         };
     322             : 
     323           0 :         const sal_Int32 aDoublePositions[] = {0, 2, 4, 6, 8, 10, 12, 14};
     324           0 :         for (size_t j = 0; j < SAL_N_ELEMENTS(aTests); ++j)
     325             :         {
     326           0 :             rtl::OUString aTest = aBase.replace('x', aTests[j]);
     327           0 :             sal_Int32 nPos = -1;
     328           0 :             size_t i = 0;
     329           0 :             do
     330             :             {
     331           0 :                 CPPUNIT_ASSERT(i < SAL_N_ELEMENTS(aDoublePositions));
     332           0 :                 nPos = m_xBreak->nextWord(aTest, nPos, aLocale, i18n::WordType::ANYWORD_IGNOREWHITESPACES).startPos;
     333           0 :                 CPPUNIT_ASSERT(nPos == aDoublePositions[i++]);
     334             :             }
     335           0 :             while (nPos < aTest.getLength());
     336           0 :             nPos = aTest.getLength();
     337           0 :             i = SAL_N_ELEMENTS(aDoublePositions)-1;
     338           0 :             do
     339             :             {
     340           0 :                 nPos = m_xBreak->previousWord(aTest, nPos, aLocale, i18n::WordType::ANYWORD_IGNOREWHITESPACES).startPos;
     341           0 :                 CPPUNIT_ASSERT(nPos == aDoublePositions[--i]);
     342             :             }
     343             :             while (nPos > 0);
     344           0 :         }
     345             : 
     346           0 :         const sal_Int32 aSinglePositions[] = {0, 1, 3, 4, 6, 7, 9, 10};
     347           0 :         for (size_t j = 1; j < SAL_N_ELEMENTS(aTests); ++j)
     348             :         {
     349           0 :             rtl::OUString aTest = aBase.replaceAll(rtl::OUString("xx"), rtl::OUString(aTests[j]));
     350           0 :             sal_Int32 nPos = -1;
     351           0 :             size_t i = 0;
     352           0 :             do
     353             :             {
     354           0 :                 CPPUNIT_ASSERT(i < SAL_N_ELEMENTS(aSinglePositions));
     355           0 :                 nPos = m_xBreak->nextWord(aTest, nPos, aLocale, i18n::WordType::ANYWORD_IGNOREWHITESPACES).startPos;
     356           0 :                 CPPUNIT_ASSERT(nPos == aSinglePositions[i++]);
     357             :             }
     358           0 :             while (nPos < aTest.getLength());
     359           0 :             nPos = aTest.getLength();
     360           0 :             i = SAL_N_ELEMENTS(aSinglePositions)-1;
     361           0 :             do
     362             :             {
     363           0 :                 nPos = m_xBreak->previousWord(aTest, nPos, aLocale, i18n::WordType::ANYWORD_IGNOREWHITESPACES).startPos;
     364           0 :                 CPPUNIT_ASSERT(nPos == aSinglePositions[--i]);
     365             :             }
     366             :             while (nPos > 0);
     367           0 :         }
     368             : 
     369           0 :         const sal_Int32 aSingleQuotePositions[] = {0, 1, 9, 10};
     370           0 :         CPPUNIT_ASSERT(aTests[0] == '\'');
     371             :         {
     372           0 :             rtl::OUString aTest = aBase.replaceAll(rtl::OUString("xx"), rtl::OUString(aTests[0]));
     373           0 :             sal_Int32 nPos = -1;
     374           0 :             size_t i = 0;
     375           0 :             do
     376             :             {
     377           0 :                 CPPUNIT_ASSERT(i < SAL_N_ELEMENTS(aSingleQuotePositions));
     378           0 :                 nPos = m_xBreak->nextWord(aTest, nPos, aLocale, i18n::WordType::ANYWORD_IGNOREWHITESPACES).startPos;
     379           0 :                 CPPUNIT_ASSERT(nPos == aSingleQuotePositions[i++]);
     380             :             }
     381           0 :             while (nPos < aTest.getLength());
     382           0 :             nPos = aTest.getLength();
     383           0 :             i = SAL_N_ELEMENTS(aSingleQuotePositions)-1;
     384           0 :             do
     385             :             {
     386           0 :                 nPos = m_xBreak->previousWord(aTest, nPos, aLocale, i18n::WordType::ANYWORD_IGNOREWHITESPACES).startPos;
     387           0 :                 CPPUNIT_ASSERT(nPos == aSingleQuotePositions[--i]);
     388             :             }
     389           0 :             while (nPos > 0);
     390           0 :         }
     391             :     }
     392             : 
     393             :     //See https://issues.apache.org/ooo/show_bug.cgi?id=13451
     394             :     {
     395           0 :         aLocale.Language = rtl::OUString(RTL_CONSTASCII_USTRINGPARAM("ca"));
     396           0 :         aLocale.Country = rtl::OUString(RTL_CONSTASCII_USTRINGPARAM("ES"));
     397             : 
     398           0 :         rtl::OUString aTest("mirar-se comprar-vos donem-nos les mans aneu-vos-en!");
     399             : 
     400           0 :         sal_Int32 nPos = 0;
     401           0 :         sal_Int32 aExpected[] = {8, 20, 30, 34, 39, 51, 52};
     402           0 :         size_t i = 0;
     403           0 :         do
     404             :         {
     405           0 :             CPPUNIT_ASSERT(i < SAL_N_ELEMENTS(aExpected));
     406           0 :             nPos = m_xBreak->getWordBoundary(aTest, nPos, aLocale,
     407           0 :                 i18n::WordType::DICTIONARY_WORD, true).endPos;
     408           0 :             CPPUNIT_ASSERT(aExpected[i++] == nPos);
     409             :         }
     410           0 :         while (nPos++ < aTest.getLength());
     411           0 :         CPPUNIT_ASSERT(i == SAL_N_ELEMENTS(aExpected));
     412             :     }
     413             : 
     414             :     //See https://issues.apache.org/ooo/show_bug.cgi?id=85411
     415           0 :     for (int j = 0; j < 2; ++j)
     416             :     {
     417           0 :         switch (j)
     418             :         {
     419             :             case 0:
     420           0 :                 aLocale.Language = rtl::OUString("en");
     421           0 :                 aLocale.Country = rtl::OUString("US");
     422           0 :                 break;
     423             :             case 1:
     424           0 :                 aLocale.Language = rtl::OUString("ca");
     425           0 :                 aLocale.Country = rtl::OUString("ES");
     426           0 :                 break;
     427             :             default:
     428           0 :                 CPPUNIT_ASSERT(false);
     429           0 :                 break;
     430             :         }
     431             : 
     432             :         const sal_Unicode TEST[] =
     433             :         {
     434             :             'I', 0x200B, 'w', 'a', 'n', 't', 0x200B, 't', 'o', 0x200B, 'g', 'o'
     435           0 :         };
     436           0 :         rtl::OUString aTest(TEST, SAL_N_ELEMENTS(TEST));
     437             : 
     438           0 :         sal_Int32 nPos = 0;
     439           0 :         sal_Int32 aExpected[] = {1, 6, 9, 12};
     440           0 :         size_t i = 0;
     441           0 :         do
     442             :         {
     443           0 :             CPPUNIT_ASSERT(i < SAL_N_ELEMENTS(aExpected));
     444           0 :             nPos = m_xBreak->getWordBoundary(aTest, nPos, aLocale,
     445           0 :                 i18n::WordType::DICTIONARY_WORD, true).endPos;
     446           0 :             CPPUNIT_ASSERT(aExpected[i++] == nPos);
     447             :         }
     448           0 :         while (nPos++ < aTest.getLength());
     449           0 :         CPPUNIT_ASSERT(i == SAL_N_ELEMENTS(aExpected));
     450           0 :     }
     451             : 
     452             :     //https://issues.apache.org/ooo/show_bug.cgi?id=21290
     453           0 :     for (int j = 0; j < 2; ++j)
     454             :     {
     455           0 :         switch (j)
     456             :         {
     457             :             case 0:
     458           0 :                 aLocale.Language = rtl::OUString("en");
     459           0 :                 aLocale.Country = rtl::OUString("US");
     460           0 :                 break;
     461             :             case 1:
     462           0 :                 aLocale.Language = rtl::OUString("grc");
     463           0 :                 aLocale.Country = rtl::OUString();
     464           0 :                 break;
     465             :             default:
     466           0 :                 CPPUNIT_ASSERT(false);
     467           0 :                 break;
     468             :         }
     469             : 
     470             :         const sal_Unicode TEST[] =
     471             :         {
     472             :             0x1F0C, 0x03BD, 0x03B4, 0x03C1, 0x03B1, 0x0020, 0x1F00,
     473             :             0x03C1, 0x03BD, 0x1F7B, 0x03BC, 0x03B5, 0x03BD, 0x03BF,
     474             :             0x03C2, 0x0020, 0x1F00, 0x03BB, 0x03BB, 0x0020, 0x1F24,
     475             :             0x03C3, 0x03B8, 0x03B9, 0x03BF, 0x03BD
     476           0 :         };
     477           0 :         rtl::OUString aTest(TEST, SAL_N_ELEMENTS(TEST));
     478             : 
     479           0 :         sal_Int32 nPos = 0;
     480           0 :         sal_Int32 aExpected[] = {5, 15, 19, 26};
     481           0 :         size_t i = 0;
     482           0 :         do
     483             :         {
     484           0 :             CPPUNIT_ASSERT(i < SAL_N_ELEMENTS(aExpected));
     485           0 :             nPos = m_xBreak->getWordBoundary(aTest, nPos, aLocale,
     486           0 :                 i18n::WordType::DICTIONARY_WORD, true).endPos;
     487           0 :             CPPUNIT_ASSERT(aExpected[i++] == nPos);
     488             :         }
     489           0 :         while (nPos++ < aTest.getLength());
     490           0 :         CPPUNIT_ASSERT(i == SAL_N_ELEMENTS(aExpected));
     491           0 :     }
     492             : 
     493             :     //See https://issues.apache.org/ooo/show_bug.cgi?id=58513
     494             :     {
     495           0 :         aLocale.Language = "fi";
     496           0 :         aLocale.Country = "FI";
     497             : 
     498           0 :         rtl::OUString aTest("Kuorma-auto kaakkois- ja Keski-Suomi");
     499             : 
     500             :         {
     501           0 :             sal_Int32 nPos = 0;
     502           0 :             sal_Int32 aExpected[] = {12, 22, 25, 36};
     503           0 :             size_t i = 0;
     504           0 :             do
     505             :             {
     506           0 :                 CPPUNIT_ASSERT(i < SAL_N_ELEMENTS(aExpected));
     507           0 :                 nPos = m_xBreak->getWordBoundary(aTest, nPos, aLocale,
     508           0 :                     i18n::WordType::WORD_COUNT, true).endPos;
     509           0 :                 CPPUNIT_ASSERT(aExpected[i++] == nPos);
     510             :             }
     511           0 :             while (nPos++ < aTest.getLength());
     512           0 :             CPPUNIT_ASSERT(i == SAL_N_ELEMENTS(aExpected));
     513             :         }
     514             : 
     515             :         {
     516           0 :             sal_Int32 nPos = 0;
     517           0 :             sal_Int32 aExpected[] = {0, 11, 12, 21, 22, 24, 25, 36};
     518           0 :             size_t i = 0;
     519           0 :             do
     520             :             {
     521           0 :                 CPPUNIT_ASSERT(i < SAL_N_ELEMENTS(aExpected));
     522           0 :                 aBounds = m_xBreak->getWordBoundary(aTest, nPos, aLocale,
     523           0 :                     i18n::WordType::DICTIONARY_WORD, true);
     524           0 :                 CPPUNIT_ASSERT(aExpected[i++] == aBounds.startPos);
     525           0 :                 CPPUNIT_ASSERT(aExpected[i++] == aBounds.endPos);
     526           0 :                 nPos = aBounds.endPos;
     527             :             }
     528           0 :             while (nPos++ < aTest.getLength());
     529           0 :             CPPUNIT_ASSERT(i == SAL_N_ELEMENTS(aExpected));
     530           0 :         }
     531             :     }
     532             : 
     533             :     //See https://issues.apache.org/ooo/show_bug.cgi?id=107843
     534             :     {
     535           0 :         aLocale.Language = rtl::OUString("en");
     536           0 :         aLocale.Country = rtl::OUString("US");
     537             : 
     538             :         const sal_Unicode TEST[] =
     539             :         {
     540             :             'r', 'u', 0xFB00, 'l', 'e', ' ', 0xFB01, 's', 'h'
     541           0 :         };
     542           0 :         rtl::OUString aTest(TEST, SAL_N_ELEMENTS(TEST));
     543             : 
     544           0 :         aBounds = m_xBreak->getWordBoundary(aTest, 1, aLocale, i18n::WordType::DICTIONARY_WORD, false);
     545           0 :         CPPUNIT_ASSERT(aBounds.startPos == 0 && aBounds.endPos == 5);
     546             : 
     547           0 :         aBounds = m_xBreak->getWordBoundary(aTest, 7, aLocale, i18n::WordType::DICTIONARY_WORD, false);
     548           0 :         CPPUNIT_ASSERT(aBounds.startPos == 6 && aBounds.endPos == 9);
     549             :     }
     550             : 
     551             :     //See https://issues.apache.org/ooo/show_bug.cgi?id=113785
     552             :     {
     553           0 :         aLocale.Language = rtl::OUString("en");
     554           0 :         aLocale.Country = rtl::OUString("US");
     555             : 
     556             :         const sal_Unicode TEST[] =
     557             :         {
     558             :             'a', 0x2013, 'b', 0x2014, 'c'
     559           0 :         };
     560           0 :         rtl::OUString aTest(TEST, SAL_N_ELEMENTS(TEST));
     561             : 
     562           0 :         aBounds = m_xBreak->getWordBoundary(aTest, 0, aLocale, i18n::WordType::DICTIONARY_WORD, true);
     563           0 :         CPPUNIT_ASSERT(aBounds.startPos == 0 && aBounds.endPos == 1);
     564             : 
     565           0 :         aBounds = m_xBreak->nextWord(aTest, 0, aLocale, i18n::WordType::DICTIONARY_WORD);
     566           0 :         CPPUNIT_ASSERT(aBounds.startPos == 2 && aBounds.endPos == 3);
     567             : 
     568           0 :         aBounds = m_xBreak->nextWord(aTest, aBounds.endPos, aLocale, i18n::WordType::DICTIONARY_WORD);
     569           0 :         CPPUNIT_ASSERT(aBounds.startPos == 4 && aBounds.endPos == 5);
     570           0 :     }
     571           0 : }
     572             : 
     573             : //See https://bugs.freedesktop.org/show_bug.cgi?id=40292
     574             : //See https://issues.apache.org/ooo/show_bug.cgi?id=80412
     575             : //See https://issues.apache.org/ooo/show_bug.cgi?id=111152
     576             : //See https://issues.apache.org/ooo/show_bug.cgi?id=50172
     577           1 : void TestBreakIterator::testGraphemeIteration()
     578             : {
     579           1 :     lang::Locale aLocale;
     580           1 :     aLocale.Language = rtl::OUString(RTL_CONSTASCII_USTRINGPARAM("bn"));
     581           1 :     aLocale.Country = rtl::OUString(RTL_CONSTASCII_USTRINGPARAM("IN"));
     582             : 
     583             :     {
     584           1 :         const sal_Unicode BA_HALANT_LA[] = { 0x09AC, 0x09CD, 0x09AF };
     585           1 :         rtl::OUString aTest(BA_HALANT_LA, SAL_N_ELEMENTS(BA_HALANT_LA));
     586             : 
     587           1 :         sal_Int32 nDone=0;
     588             :         sal_Int32 nPos;
     589           1 :         nPos = m_xBreak->nextCharacters(aTest, 0, aLocale,
     590           1 :             i18n::CharacterIteratorMode::SKIPCELL, 1, nDone);
     591           1 :         CPPUNIT_ASSERT_MESSAGE("Should skip full grapheme", nPos == SAL_N_ELEMENTS(BA_HALANT_LA));
     592           1 :         nPos = m_xBreak->previousCharacters(aTest, SAL_N_ELEMENTS(BA_HALANT_LA), aLocale,
     593           1 :             i18n::CharacterIteratorMode::SKIPCELL, 1, nDone);
     594           1 :         CPPUNIT_ASSERT_MESSAGE("Should skip full grapheme", nPos == 0);
     595             :     }
     596             : 
     597             :     {
     598           1 :         const sal_Unicode HA_HALANT_NA_VOWELSIGNI[] = { 0x09B9, 0x09CD, 0x09A3, 0x09BF };
     599           1 :         rtl::OUString aTest(HA_HALANT_NA_VOWELSIGNI, SAL_N_ELEMENTS(HA_HALANT_NA_VOWELSIGNI));
     600             : 
     601           1 :         sal_Int32 nDone=0;
     602             :         sal_Int32 nPos;
     603           1 :         nPos = m_xBreak->nextCharacters(aTest, 0, aLocale,
     604           1 :             i18n::CharacterIteratorMode::SKIPCELL, 1, nDone);
     605           1 :         CPPUNIT_ASSERT_MESSAGE("Should skip full grapheme", nPos == SAL_N_ELEMENTS(HA_HALANT_NA_VOWELSIGNI));
     606           1 :         nPos = m_xBreak->previousCharacters(aTest, SAL_N_ELEMENTS(HA_HALANT_NA_VOWELSIGNI), aLocale,
     607           1 :             i18n::CharacterIteratorMode::SKIPCELL, 1, nDone);
     608           1 :         CPPUNIT_ASSERT_MESSAGE("Should skip full grapheme", nPos == 0);
     609             :     }
     610             : 
     611             :     {
     612           1 :         const sal_Unicode TA_HALANT_MA_HALANT_YA  [] = { 0x09A4, 0x09CD, 0x09AE, 0x09CD, 0x09AF };
     613           1 :         rtl::OUString aTest(TA_HALANT_MA_HALANT_YA, SAL_N_ELEMENTS(TA_HALANT_MA_HALANT_YA));
     614             : 
     615           1 :         sal_Int32 nDone=0;
     616             :         sal_Int32 nPos;
     617           1 :         nPos = m_xBreak->nextCharacters(aTest, 0, aLocale,
     618           1 :             i18n::CharacterIteratorMode::SKIPCELL, 1, nDone);
     619           1 :         CPPUNIT_ASSERT_MESSAGE("Should skip full grapheme", nPos == SAL_N_ELEMENTS(TA_HALANT_MA_HALANT_YA));
     620           1 :         nPos = m_xBreak->previousCharacters(aTest, SAL_N_ELEMENTS(TA_HALANT_MA_HALANT_YA), aLocale,
     621           1 :             i18n::CharacterIteratorMode::SKIPCELL, 1, nDone);
     622           1 :         CPPUNIT_ASSERT_MESSAGE("Should skip full grapheme", nPos == 0);
     623             :     }
     624             : 
     625           1 :     aLocale.Language = rtl::OUString(RTL_CONSTASCII_USTRINGPARAM("ta"));
     626           1 :     aLocale.Country = rtl::OUString(RTL_CONSTASCII_USTRINGPARAM("IN"));
     627             : 
     628             :     {
     629           1 :         const sal_Unicode KA_VIRAMA_SSA[] = { 0x0B95, 0x0BCD, 0x0BB7 };
     630           1 :         rtl::OUString aTest(KA_VIRAMA_SSA, SAL_N_ELEMENTS(KA_VIRAMA_SSA));
     631             : 
     632           1 :         sal_Int32 nDone=0;
     633           1 :         sal_Int32 nPos = 0;
     634             : 
     635           1 :         nPos = m_xBreak->nextCharacters(aTest, 0, aLocale,
     636           1 :             i18n::CharacterIteratorMode::SKIPCELL, 1, nDone);
     637           1 :         CPPUNIT_ASSERT_MESSAGE("Should skip full grapheme", nPos == SAL_N_ELEMENTS(KA_VIRAMA_SSA));
     638           1 :         nPos = m_xBreak->previousCharacters(aTest, SAL_N_ELEMENTS(KA_VIRAMA_SSA), aLocale,
     639           1 :             i18n::CharacterIteratorMode::SKIPCELL, 1, nDone);
     640           1 :         CPPUNIT_ASSERT_MESSAGE("Should skip full grapheme", nPos == 0);
     641             :     }
     642             : 
     643             :     {
     644           1 :         const sal_Unicode KA_VOWELSIGNU[] = { 0x0B95, 0x0BC1 };
     645           1 :         rtl::OUString aTest(KA_VOWELSIGNU, SAL_N_ELEMENTS(KA_VOWELSIGNU));
     646             : 
     647           1 :         sal_Int32 nDone=0;
     648           1 :         sal_Int32 nPos = 0;
     649             : 
     650           1 :         nPos = m_xBreak->nextCharacters(aTest, 0, aLocale,
     651           1 :             i18n::CharacterIteratorMode::SKIPCELL, 1, nDone);
     652           1 :         CPPUNIT_ASSERT_MESSAGE("Should skip full grapheme", nPos == SAL_N_ELEMENTS(KA_VOWELSIGNU));
     653           1 :         nPos = m_xBreak->previousCharacters(aTest, SAL_N_ELEMENTS(KA_VOWELSIGNU), aLocale,
     654           1 :             i18n::CharacterIteratorMode::SKIPCELL, 1, nDone);
     655           1 :         CPPUNIT_ASSERT_MESSAGE("Should skip full grapheme", nPos == 0);
     656             :     }
     657             : 
     658             :     {
     659             :         const sal_Unicode CA_VOWELSIGNI_TA_VIRAMA_TA_VOWELSIGNI_RA_VOWELSIGNAI[] =
     660           1 :             { 0x0B9A, 0x0BBF, 0x0BA4, 0x0BCD, 0x0BA4, 0x0BBF, 0x0BB0, 0x0BC8 };
     661             :         rtl::OUString aTest(CA_VOWELSIGNI_TA_VIRAMA_TA_VOWELSIGNI_RA_VOWELSIGNAI,
     662           1 :             SAL_N_ELEMENTS(CA_VOWELSIGNI_TA_VIRAMA_TA_VOWELSIGNI_RA_VOWELSIGNAI));
     663             : 
     664           1 :         sal_Int32 nDone=0;
     665           1 :         sal_Int32 nPos=0;
     666             : 
     667           5 :         for (sal_Int32 i = 0; i < 4; ++i)
     668             :         {
     669           4 :             sal_Int32 nOldPos = nPos;
     670           4 :             nPos = m_xBreak->nextCharacters(aTest, nPos, aLocale,
     671           4 :                 i18n::CharacterIteratorMode::SKIPCELL, 1, nDone);
     672           4 :             CPPUNIT_ASSERT_MESSAGE("Should skip 2 units", nPos == nOldPos+2);
     673             :         }
     674             : 
     675           5 :         for (sal_Int32 i = 0; i < 4; ++i)
     676             :         {
     677           4 :             sal_Int32 nOldPos = nPos;
     678           4 :             nPos = m_xBreak->previousCharacters(aTest, nPos, aLocale,
     679           4 :                 i18n::CharacterIteratorMode::SKIPCELL, 1, nDone);
     680           4 :             CPPUNIT_ASSERT_MESSAGE("Should skip 2 units", nPos == nOldPos-2);
     681           1 :         }
     682             :     }
     683             : 
     684             :     {
     685           1 :         const sal_Unicode ALEF_QAMATS [] = { 0x05D0, 0x05B8 };
     686           1 :         rtl::OUString aText(ALEF_QAMATS, SAL_N_ELEMENTS(ALEF_QAMATS));
     687             : 
     688           1 :         sal_Int32 nGraphemeCount = 0;
     689             : 
     690           1 :         sal_Int32 nCurPos = 0;
     691           3 :         while (nCurPos < aText.getLength())
     692             :         {
     693           1 :             sal_Int32 nCount2 = 1;
     694           1 :             nCurPos = m_xBreak->nextCharacters(aText, nCurPos, lang::Locale(),
     695           1 :                 i18n::CharacterIteratorMode::SKIPCELL, nCount2, nCount2);
     696           1 :             ++nGraphemeCount;
     697             :         }
     698             : 
     699           1 :         CPPUNIT_ASSERT_MESSAGE("Should be considered 1 grapheme", nGraphemeCount == 1);
     700             :     }
     701             : 
     702           1 :     aLocale.Language = rtl::OUString(RTL_CONSTASCII_USTRINGPARAM("hi"));
     703           1 :     aLocale.Country = rtl::OUString(RTL_CONSTASCII_USTRINGPARAM("IN"));
     704             : 
     705             :     {
     706           1 :         const sal_Unicode SHA_VOWELSIGNII[] = { 0x936, 0x940 };
     707           1 :         rtl::OUString aTest(SHA_VOWELSIGNII, SAL_N_ELEMENTS(SHA_VOWELSIGNII));
     708             : 
     709           1 :         sal_Int32 nDone=0;
     710           1 :         sal_Int32 nPos = 0;
     711             : 
     712           1 :         nPos = m_xBreak->nextCharacters(aTest, 0, aLocale,
     713           1 :             i18n::CharacterIteratorMode::SKIPCELL, 1, nDone);
     714           1 :         CPPUNIT_ASSERT_MESSAGE("Should skip full grapheme", nPos == SAL_N_ELEMENTS(SHA_VOWELSIGNII));
     715           1 :         nPos = m_xBreak->previousCharacters(aTest, SAL_N_ELEMENTS(SHA_VOWELSIGNII), aLocale,
     716           1 :             i18n::CharacterIteratorMode::SKIPCELL, 1, nDone);
     717           1 :         CPPUNIT_ASSERT_MESSAGE("Should skip full grapheme", nPos == 0);
     718           1 :     }
     719           1 : }
     720             : 
     721             : //A test to ensure that certain ranges and codepoints that are categorized as
     722             : //weak remain as weak, so that existing docs that depend on this don't silently
     723             : //change font for those weak chars
     724           1 : void TestBreakIterator::testWeak()
     725             : {
     726           1 :     lang::Locale aLocale;
     727           1 :     aLocale.Language = rtl::OUString(RTL_CONSTASCII_USTRINGPARAM("en"));
     728           1 :     aLocale.Country = rtl::OUString(RTL_CONSTASCII_USTRINGPARAM("US"));
     729             : 
     730             :     {
     731             :         const sal_Unicode WEAKS[] =
     732             :         {
     733             :             0x0001, 0x0002,
     734             :             0x0020, 0x00A0,
     735             :             0x2150, 0x215F, //Number Forms, fractions
     736             :             0x2160, 0x2180, //Number Forms, roman numerals
     737             :             0x2200, 0x22FF, //Mathematical Operators
     738             :             0x27C0, 0x27EF, //Miscellaneous Mathematical Symbols-A
     739             :             0x2980, 0x29FF, //Miscellaneous Mathematical Symbols-B
     740             :             0x2A00, 0x2AFF, //Supplemental Mathematical Operators
     741             :             0x2100, 0x214F, //Letterlike Symbols
     742             :             0x2308, 0x230B, //Miscellaneous technical
     743             :             0x25A0, 0x25FF, //Geometric Shapes
     744             :             0x2B30, 0x2B4C  //Miscellaneous Symbols and Arrows
     745           1 :         };
     746           1 :         rtl::OUString aWeaks(WEAKS, SAL_N_ELEMENTS(WEAKS));
     747             : 
     748          25 :         for (sal_Int32 i = 0; i < aWeaks.getLength(); ++i)
     749             :         {
     750          24 :             sal_Int16 nScript = m_xBreak->getScriptType(aWeaks, i);
     751          24 :             rtl::OStringBuffer aMsg;
     752          24 :             aMsg.append(RTL_CONSTASCII_STRINGPARAM("Char 0x"));
     753          24 :             aMsg.append(static_cast<sal_Int32>(aWeaks.getStr()[i]), 16);
     754          24 :             aMsg.append(RTL_CONSTASCII_STRINGPARAM(" should have been weak"));
     755          48 :             CPPUNIT_ASSERT_MESSAGE(aMsg.getStr(),
     756          24 :                 nScript == i18n::ScriptType::WEAK);
     757          25 :         }
     758           1 :     }
     759           1 : }
     760             : 
     761             : //A test to ensure that certain ranges and codepoints that are categorized as
     762             : //asian remain as asian, so that existing docs that depend on this don't silently
     763             : //change font for those asian chars.
     764             : //See https://bugs.freedesktop.org/show_bug.cgi?id=38095
     765           1 : void TestBreakIterator::testAsian()
     766             : {
     767           1 :     lang::Locale aLocale;
     768           1 :     aLocale.Language = rtl::OUString(RTL_CONSTASCII_USTRINGPARAM("en"));
     769           1 :     aLocale.Country = rtl::OUString(RTL_CONSTASCII_USTRINGPARAM("US"));
     770             : 
     771             :     {
     772             :         const sal_Unicode ASIANS[] =
     773             :         {
     774             :             //some typical CJK chars
     775             :             0x4E00, 0x62FF,
     776             :             //The full HalfWidth and FullWidth block has historically been
     777             :             //designated as taking the CJK font :-(
     778             :             //HalfWidth and FullWidth forms of ASCII 0-9, categorized under
     779             :             //UAX24 as "Common" i.e. by that logic WEAK
     780             :             0xFF10, 0xFF19,
     781             :             //HalfWidth and FullWidth forms of ASCII A-z, categorized under
     782             :             //UAX25 as "Latin", i.e. by that logic LATIN
     783             :             0xFF21, 0xFF5A
     784           1 :         };
     785           1 :         rtl::OUString aAsians(ASIANS, SAL_N_ELEMENTS(ASIANS));
     786             : 
     787           7 :         for (sal_Int32 i = 0; i < aAsians.getLength(); ++i)
     788             :         {
     789           6 :             sal_Int16 nScript = m_xBreak->getScriptType(aAsians, i);
     790           6 :             rtl::OStringBuffer aMsg;
     791           6 :             aMsg.append(RTL_CONSTASCII_STRINGPARAM("Char 0x"));
     792           6 :             aMsg.append(static_cast<sal_Int32>(aAsians.getStr()[i]), 16);
     793           6 :             aMsg.append(RTL_CONSTASCII_STRINGPARAM(" should have been asian"));
     794          12 :             CPPUNIT_ASSERT_MESSAGE(aMsg.getStr(),
     795           6 :                 nScript == i18n::ScriptType::ASIAN);
     796           7 :         }
     797           1 :     }
     798           1 : }
     799             : 
     800             : //A test to ensure that our thai word boundary detection is useful
     801           1 : void TestBreakIterator::testThai()
     802             : {
     803           1 :     lang::Locale aLocale;
     804           1 :     aLocale.Language = rtl::OUString(RTL_CONSTASCII_USTRINGPARAM("th"));
     805           1 :     aLocale.Country = rtl::OUString(RTL_CONSTASCII_USTRINGPARAM("TH"));
     806             : 
     807             :     //See http://lists.freedesktop.org/archives/libreoffice/2012-February/025959.html
     808             :     {
     809           1 :         const sal_Unicode THAI[] = { 0x0E01, 0x0E38, 0x0E2B, 0x0E25, 0x0E32, 0x0E1A };
     810           1 :         rtl::OUString aTest(THAI, SAL_N_ELEMENTS(THAI));
     811           1 :         i18n::Boundary aBounds = m_xBreak->getWordBoundary(aTest, 0, aLocale,
     812           1 :             i18n::WordType::DICTIONARY_WORD, true);
     813           2 :         CPPUNIT_ASSERT_MESSAGE("Should skip full word",
     814           2 :             aBounds.startPos == 0 && aBounds.endPos == aTest.getLength());
     815             :     }
     816             : 
     817             :     //See https://issues.apache.org/ooo/show_bug.cgi?id=29548
     818             :     //make sure forwards and back are consistent
     819             :     {
     820             :         const sal_Unicode THAI[] =
     821             :         {
     822             :             0x0E2D, 0x0E38, 0x0E17, 0x0E22, 0x0E32, 0x0E19, 0x0E41,
     823             :             0x0E2B, 0x0E48, 0x0E07, 0x0E0A, 0x0E32, 0x0E15, 0x0E34,
     824             :             0x0E19, 0x0E49, 0x0E33, 0x0E2B, 0x0E19, 0x0E32, 0x0E27,
     825             :             0x0E2D, 0x0E38, 0x0E17, 0x0E22, 0x0E32, 0x0E19, 0x0E41,
     826             :             0x0E2B, 0x0E48, 0x0E07, 0x0E0A, 0x0E32, 0x0E15, 0x0E34,
     827             :             0x0E19, 0x0E49, 0x0E33, 0x0E2B, 0x0E19, 0x0E32, 0x0E27
     828           1 :         };
     829           1 :         rtl::OUString aTest(THAI, SAL_N_ELEMENTS(THAI));
     830             : 
     831           1 :         std::stack<sal_Int32> aPositions;
     832           1 :         sal_Int32 nPos = -1;
     833          11 :         do
     834             :         {
     835          11 :             nPos = m_xBreak->nextWord(aTest, nPos, aLocale, i18n::WordType::ANYWORD_IGNOREWHITESPACES).startPos;
     836          11 :             aPositions.push(nPos);
     837             :         }
     838          11 :         while (nPos < aTest.getLength());
     839           1 :         nPos = aTest.getLength();
     840           1 :         CPPUNIT_ASSERT(!aPositions.empty());
     841           1 :         aPositions.pop();
     842          10 :         do
     843             :         {
     844          10 :             CPPUNIT_ASSERT(!aPositions.empty());
     845          10 :             nPos = m_xBreak->previousWord(aTest, nPos, aLocale, i18n::WordType::ANYWORD_IGNOREWHITESPACES).startPos;
     846          10 :             CPPUNIT_ASSERT(nPos == aPositions.top());
     847          10 :             aPositions.pop();
     848             :         }
     849           1 :         while (nPos > 0);
     850           1 :     }
     851           1 : }
     852             : 
     853             : #if TODO
     854             : void TestBreakIterator::testNorthernThai()
     855             : {
     856             :     lang::Locale aLocale;
     857             :     aLocale.Language = rtl::OUString(RTL_CONSTASCII_USTRINGPARAM("nod"));
     858             :     aLocale.Country = rtl::OUString(RTL_CONSTASCII_USTRINGPARAM("TH"));
     859             : 
     860             :     const sal_Unicode NORTHERN_THAI1[] = { 0x0E01, 0x0E38, 0x0E4A, 0x0E2B, 0x0E25, 0x0E32, 0x0E1A };
     861             :     rtl::OUString aTest(NORTHERN_THAI1, SAL_N_ELEMENTS(NORTHERN_THAI1));
     862             :     i18n::Boundary aBounds = m_xBreak->getWordBoundary(aTest, 0, aLocale,
     863             :         i18n::WordType::DICTIONARY_WORD, true);
     864             :     CPPUNIT_ASSERT_MESSAGE("Should skip full word",
     865             :         aBounds.startPos == 0 && aBounds.endPos == aTest.getLength());
     866             : }
     867             : #endif
     868             : 
     869             : #if (U_ICU_VERSION_MAJOR_NUM > 4)
     870             : //A test to ensure that our khmer word boundary detection is useful
     871             : //https://bugs.freedesktop.org/show_bug.cgi?id=52020
     872             : //
     873             : //icu doesn't have the Khmer word boundary dictionaries in <= 4.0.0 but does in
     874             : //the current 49.x.y . Not sure which version first had them introduced.
     875             : void TestBreakIterator::testKhmer()
     876             : {
     877             :     lang::Locale aLocale;
     878             :     aLocale.Language = rtl::OUString(RTL_CONSTASCII_USTRINGPARAM("km"));
     879             :     aLocale.Country = rtl::OUString(RTL_CONSTASCII_USTRINGPARAM("KH"));
     880             : 
     881             :     const sal_Unicode KHMER[] = { 0x17B2, 0x17D2, 0x1799, 0x1782, 0x17C1 };
     882             : 
     883             :     rtl::OUString aTest(KHMER, SAL_N_ELEMENTS(KHMER));
     884             :     i18n::Boundary aBounds = m_xBreak->getWordBoundary(aTest, 0, aLocale,
     885             :         i18n::WordType::DICTIONARY_WORD, true);
     886             : 
     887             :     CPPUNIT_ASSERT(aBounds.startPos == 0 && aBounds.endPos == 3);
     888             : 
     889             :     aBounds = m_xBreak->getWordBoundary(aTest, aBounds.endPos, aLocale,
     890             :         i18n::WordType::DICTIONARY_WORD, true);
     891             : 
     892             :     CPPUNIT_ASSERT(aBounds.startPos == 3 && aBounds.endPos == 5);
     893             : }
     894             : #endif
     895             : 
     896           1 : void TestBreakIterator::testJapanese()
     897             : {
     898           1 :     lang::Locale aLocale;
     899           1 :     aLocale.Language = OUString("ja");
     900           1 :     aLocale.Country = OUString("JP");
     901           1 :     i18n::Boundary aBounds;
     902             : 
     903             :     {
     904           1 :         const sal_Unicode JAPANESE[] = { 0x30B7, 0x30E3, 0x30C3, 0x30C8, 0x30C0, 0x30A6, 0x30F3 };
     905             : 
     906           1 :         rtl::OUString aTest(JAPANESE, SAL_N_ELEMENTS(JAPANESE));
     907           1 :         aBounds = m_xBreak->getWordBoundary(aTest, 5, aLocale,
     908           1 :             i18n::WordType::DICTIONARY_WORD, true);
     909             : 
     910           1 :         CPPUNIT_ASSERT(aBounds.startPos == 0 && aBounds.endPos == 7);
     911             :     }
     912             : 
     913             :     {
     914           1 :         const sal_Unicode JAPANESE[] = { 0x9EBB, 0x306E, 0x8449, 0x9EBB, 0x306E, 0x8449 };
     915             : 
     916           1 :         rtl::OUString aTest(JAPANESE, SAL_N_ELEMENTS(JAPANESE));
     917           1 :         aBounds = m_xBreak->getWordBoundary(aTest, 1, aLocale,
     918           1 :             i18n::WordType::DICTIONARY_WORD, true);
     919             : 
     920           1 :         CPPUNIT_ASSERT(aBounds.startPos == 0 && aBounds.endPos == 3);
     921             : 
     922           1 :         aBounds = m_xBreak->getWordBoundary(aTest, 5, aLocale,
     923           1 :             i18n::WordType::DICTIONARY_WORD, true);
     924             : 
     925           1 :         CPPUNIT_ASSERT(aBounds.startPos == 3 && aBounds.endPos == 6);
     926           1 :     }
     927           1 : }
     928             : 
     929           6 : void TestBreakIterator::setUp()
     930             : {
     931           6 :     BootstrapFixtureBase::setUp();
     932           6 :     m_xBreak = uno::Reference< i18n::XBreakIterator >(m_xSFactory->createInstance(
     933           6 :         "com.sun.star.i18n.BreakIterator"), uno::UNO_QUERY_THROW);
     934           6 : }
     935             : 
     936           6 : void TestBreakIterator::tearDown()
     937             : {
     938           6 :     m_xBreak.clear();
     939           6 :     BootstrapFixtureBase::tearDown();
     940           6 : }
     941             : 
     942           1 : CPPUNIT_TEST_SUITE_REGISTRATION(TestBreakIterator);
     943             : 
     944           4 : CPPUNIT_PLUGIN_IMPLEMENT();
     945             : 
     946             : /* vim:set shiftwidth=4 softtabstop=4 expandtab: */

Generated by: LCOV version 1.10