Branch data Line data Source code
1 : : /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 : : /*
3 : : * Version: MPL 1.1 / GPLv3+ / LGPLv3+
4 : : *
5 : : * The contents of this file are subject to the Mozilla Public License Version
6 : : * 1.1 (the "License"); you may not use this file except in compliance with
7 : : * the License. You may obtain a copy of the License at
8 : : * http://www.mozilla.org/MPL/
9 : : *
10 : : * Software distributed under the License is distributed on an "AS IS" basis,
11 : : * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
12 : : * for the specific language governing rights and limitations under the
13 : : * License.
14 : : *
15 : : * The Initial Developer of the Original Code is
16 : : * Caolán McNamara <caolanm@redhat.com>
17 : : *
18 : : * Contributor(s):
19 : : * Caolán McNamara <caolanm@redhat.com>
20 : : *
21 : : * Alternatively, the contents of this file may be used under the terms of
22 : : * either the GNU General Public License Version 3 or later (the "GPLv3+"), or
23 : : * the GNU Lesser General Public License Version 3 or later (the "LGPLv3+"),
24 : : * in which case the provisions of the GPLv3+ or the LGPLv3+ are applicable
25 : : * instead of those above.
26 : : */
27 : :
28 : : #include <cppuhelper/compbase1.hxx>
29 : : #include <cppuhelper/bootstrap.hxx>
30 : : #include <cppuhelper/basemutex.hxx>
31 : : #include <com/sun/star/i18n/XBreakIterator.hpp>
32 : : #include <com/sun/star/i18n/CharacterIteratorMode.hpp>
33 : : #include <com/sun/star/i18n/ScriptType.hpp>
34 : : #include <com/sun/star/i18n/WordType.hpp>
35 : : #include <unotest/bootstrapfixturebase.hxx>
36 : :
37 : : #include <unicode/uvernum.h>
38 : :
39 : : #include <rtl/strbuf.hxx>
40 : : #include <rtl/ustrbuf.hxx>
41 : :
42 : : #include <string.h>
43 : :
44 : : #include <stack>
45 : :
46 : : using namespace ::com::sun::star;
47 : :
48 [ - + ]: 45 : class TestBreakIterator : public test::BootstrapFixtureBase
49 : : {
50 : : public:
51 : : virtual void setUp();
52 : : virtual void tearDown();
53 : :
54 : : void testLineBreaking();
55 : : void testWordBoundaries();
56 : : void testGraphemeIteration();
57 : : void testWeak();
58 : : void testAsian();
59 : : void testThai();
60 : : #if TODO
61 : : void testNorthernThai();
62 : : #endif
63 : : void testKhmer();
64 : :
65 [ + - ][ + - ]: 6 : CPPUNIT_TEST_SUITE(TestBreakIterator);
[ + - ][ + - ]
[ # # ]
66 [ + - ][ + - ]: 3 : CPPUNIT_TEST(testLineBreaking);
[ + - ][ + - ]
[ + - ][ + - ]
67 [ + - ][ + - ]: 3 : CPPUNIT_TEST(testGraphemeIteration);
[ + - ][ + - ]
[ + - ][ + - ]
68 [ + - ][ + - ]: 3 : CPPUNIT_TEST(testWeak);
[ + - ][ + - ]
[ + - ][ + - ]
69 [ + - ][ + - ]: 3 : CPPUNIT_TEST(testAsian);
[ + - ][ + - ]
[ + - ][ + - ]
70 [ + - ][ + - ]: 3 : CPPUNIT_TEST(testThai);
[ + - ][ + - ]
[ + - ][ + - ]
[ + - ]
71 : : #if TODO
72 : : CPPUNIT_TEST(testNorthernThai);
73 : : #endif
74 : : #if (U_ICU_VERSION_MAJOR_NUM > 4)
75 : : CPPUNIT_TEST(testWordBoundaries);
76 : : CPPUNIT_TEST(testKhmer);
77 : : #endif
78 [ + - ][ + - ]: 6 : CPPUNIT_TEST_SUITE_END();
[ + - ][ + - ]
[ + - ][ + - ]
[ + - ][ + - ]
79 : : private:
80 : : uno::Reference<i18n::XBreakIterator> m_xBreak;
81 : : };
82 : :
83 : 3 : void TestBreakIterator::testLineBreaking()
84 : : {
85 [ + - ]: 3 : i18n::LineBreakHyphenationOptions aHyphOptions;
86 : 3 : i18n::LineBreakUserOptions aUserOptions;
87 : 3 : lang::Locale aLocale;
88 : :
89 : : //See https://bugs.freedesktop.org/show_bug.cgi?id=31271
90 : : {
91 [ + - ]: 3 : rtl::OUString aTest(RTL_CONSTASCII_USTRINGPARAM("(some text here)"));
92 : :
93 [ + - ]: 3 : aLocale.Language = rtl::OUString(RTL_CONSTASCII_USTRINGPARAM("en"));
94 [ + - ]: 3 : aLocale.Country = rtl::OUString(RTL_CONSTASCII_USTRINGPARAM("US"));
95 : :
96 : : {
97 : : //Here we want the line break to leave text here) on the next line
98 [ + - ][ + - ]: 3 : i18n::LineBreakResults aResult = m_xBreak->getLineBreak(aTest, strlen("(some tex"), aLocale, 0, aHyphOptions, aUserOptions);
99 [ + - ][ + - ]: 3 : CPPUNIT_ASSERT_MESSAGE("Expected a break at the the start of the word", aResult.breakIndex == 6);
[ + - ][ + - ]
[ + - ][ + - ]
[ + - ][ + - ]
[ + - ][ + - ]
100 : : }
101 : :
102 : : {
103 : : //Here we want the line break to leave "here)" on the next line
104 [ + - ][ + - ]: 3 : i18n::LineBreakResults aResult = m_xBreak->getLineBreak(aTest, strlen("(some text here"), aLocale, 0, aHyphOptions, aUserOptions);
105 [ + - ][ + - ]: 3 : CPPUNIT_ASSERT_MESSAGE("Expected a break at the the start of the word", aResult.breakIndex == 11);
[ + - ][ + - ]
[ + - ][ + - ]
[ + - ][ + - ]
[ + - ][ + - ]
106 : 3 : }
107 : : }
108 : :
109 : : //See https://bugs.freedesktop.org/show_bug.cgi?id=49849
110 : : {
111 : 3 : const sal_Unicode HEBREW1[] = { 0x05DE, 0x05D9, 0x05DC, 0x05D9, 0x5DD };
112 : 3 : rtl::OUString aWord(HEBREW1, SAL_N_ELEMENTS(HEBREW1));
113 [ + - ][ + - ]: 3 : rtl::OUString aTest(rtl::OUStringBuffer(aWord).append(' ').append(aWord).makeStringAndClear());
[ + - ][ + - ]
114 : :
115 [ + - ]: 3 : aLocale.Language = rtl::OUString(RTL_CONSTASCII_USTRINGPARAM("he"));
116 [ + - ]: 3 : aLocale.Country = rtl::OUString(RTL_CONSTASCII_USTRINGPARAM("IL"));
117 : :
118 : : {
119 : : //Here we want the line break to happen at the whitespace
120 [ + - ][ + - ]: 3 : i18n::LineBreakResults aResult = m_xBreak->getLineBreak(aTest, aTest.getLength()-1, aLocale, 0, aHyphOptions, aUserOptions);
121 [ + - ][ + - ]: 3 : CPPUNIT_ASSERT_MESSAGE("Expected a break at the the start of the word", aResult.breakIndex == aWord.getLength()+1);
[ + - ][ + - ]
[ + - ][ + - ]
[ + - ][ + - ]
[ + - ][ + - ]
122 : 3 : }
123 : : }
124 : :
125 : : //See https://issues.apache.org/ooo/show_bug.cgi?id=17155
126 : : {
127 [ + - ]: 3 : rtl::OUString aTest(RTL_CONSTASCII_USTRINGPARAM("foo /bar/baz"));
128 : :
129 [ + - ]: 3 : aLocale.Language = rtl::OUString(RTL_CONSTASCII_USTRINGPARAM("en"));
130 [ + - ]: 3 : aLocale.Country = rtl::OUString(RTL_CONSTASCII_USTRINGPARAM("US"));
131 : :
132 : : {
133 : : //Here we want the line break to leave /bar/ba clumped together on the next line
134 [ + - ]: 3 : i18n::LineBreakResults aResult = m_xBreak->getLineBreak(aTest, strlen("foo /bar/ba"), aLocale, 0,
135 [ + - ]: 3 : aHyphOptions, aUserOptions);
136 [ + - ][ + - ]: 3 : CPPUNIT_ASSERT_MESSAGE("Expected a break at the first slash", aResult.breakIndex == 4);
[ + - ][ + - ]
[ + - ][ + - ]
[ + - ][ + - ]
[ + - ][ + - ]
137 : 3 : }
138 : : }
139 : :
140 : : //See https://issues.apache.org/ooo/show_bug.cgi?id=19716
141 : : {
142 [ + - ]: 3 : rtl::OUString aTest(RTL_CONSTASCII_USTRINGPARAM("aaa]aaa"));
143 : :
144 [ + - ]: 3 : aLocale.Language = rtl::OUString(RTL_CONSTASCII_USTRINGPARAM("en"));
145 [ + - ]: 3 : aLocale.Country = rtl::OUString(RTL_CONSTASCII_USTRINGPARAM("US"));
146 : :
147 : : {
148 : : //Here we want the line break to move the whole lot to the next line
149 [ + - ]: 3 : i18n::LineBreakResults aResult = m_xBreak->getLineBreak(aTest, aTest.getLength()-2, aLocale, 0,
150 [ + - ]: 3 : aHyphOptions, aUserOptions);
151 [ + - ][ + - ]: 3 : CPPUNIT_ASSERT_MESSAGE("Expected a break at the start of the line, not at ]", aResult.breakIndex == 0);
[ + - ][ + - ]
[ + - ][ + - ]
[ + - ][ + - ]
[ + - ][ + - ]
152 : 3 : }
153 [ + - ]: 3 : }
154 : 3 : }
155 : :
156 : : //See https://bugs.freedesktop.org/show_bug.cgi?id=49629
157 : 0 : void TestBreakIterator::testWordBoundaries()
158 : : {
159 : 0 : lang::Locale aLocale;
160 [ # # ]: 0 : aLocale.Language = rtl::OUString(RTL_CONSTASCII_USTRINGPARAM("en"));
161 [ # # ]: 0 : aLocale.Country = rtl::OUString(RTL_CONSTASCII_USTRINGPARAM("US"));
162 : :
163 : 0 : i18n::Boundary aBounds;
164 : :
165 : : //See https://issues.apache.org/ooo/show_bug.cgi?id=11993
166 : : {
167 : 0 : rtl::OUString aTest("abcd ef ghi??? KLM");
168 : :
169 [ # # ][ # # ]: 0 : CPPUNIT_ASSERT(!m_xBreak->isBeginWord(aTest, 4, aLocale, i18n::WordType::DICTIONARY_WORD));
[ # # ][ # # ]
[ # # ][ # # ]
[ # # ][ # # ]
[ # # ][ # # ]
170 [ # # ][ # # ]: 0 : CPPUNIT_ASSERT(m_xBreak->isEndWord(aTest, 4, aLocale, i18n::WordType::DICTIONARY_WORD));
[ # # ][ # # ]
[ # # ][ # # ]
[ # # ][ # # ]
[ # # ][ # # ]
171 [ # # ][ # # ]: 0 : aBounds = m_xBreak->getWordBoundary(aTest, 4, aLocale, i18n::WordType::DICTIONARY_WORD, true);
172 [ # # ][ # # ]: 0 : CPPUNIT_ASSERT(aBounds.startPos == 0 && aBounds.endPos == 4);
[ # # ][ # # ]
[ # # ][ # # ]
[ # # ][ # # ]
[ # # ][ # # ]
173 : :
174 [ # # ][ # # ]: 0 : CPPUNIT_ASSERT(!m_xBreak->isBeginWord(aTest, 8, aLocale, i18n::WordType::DICTIONARY_WORD));
[ # # ][ # # ]
[ # # ][ # # ]
[ # # ][ # # ]
[ # # ][ # # ]
175 [ # # ][ # # ]: 0 : CPPUNIT_ASSERT(!m_xBreak->isEndWord(aTest, 8, aLocale, i18n::WordType::DICTIONARY_WORD));
[ # # ][ # # ]
[ # # ][ # # ]
[ # # ][ # # ]
[ # # ][ # # ]
176 : :
177 : : //next word
178 [ # # ][ # # ]: 0 : aBounds = m_xBreak->getWordBoundary(aTest, 8, aLocale, i18n::WordType::DICTIONARY_WORD, true);
179 [ # # ][ # # ]: 0 : CPPUNIT_ASSERT(aBounds.startPos == 9 && aBounds.endPos == 12);
[ # # ][ # # ]
[ # # ][ # # ]
[ # # ][ # # ]
[ # # ][ # # ]
180 : :
181 : : //previous word
182 [ # # ][ # # ]: 0 : aBounds = m_xBreak->getWordBoundary(aTest, 8, aLocale, i18n::WordType::DICTIONARY_WORD, false);
183 [ # # ][ # # ]: 0 : CPPUNIT_ASSERT(aBounds.startPos == 5 && aBounds.endPos == 7);
[ # # ][ # # ]
[ # # ][ # # ]
[ # # ][ # # ]
[ # # ][ # # ]
184 : :
185 [ # # ][ # # ]: 0 : CPPUNIT_ASSERT(!m_xBreak->isBeginWord(aTest, 12, aLocale, i18n::WordType::DICTIONARY_WORD));
[ # # ][ # # ]
[ # # ][ # # ]
[ # # ][ # # ]
[ # # ][ # # ]
186 [ # # ][ # # ]: 0 : CPPUNIT_ASSERT(m_xBreak->isEndWord(aTest, 12, aLocale, i18n::WordType::DICTIONARY_WORD));
[ # # ][ # # ]
[ # # ][ # # ]
[ # # ][ # # ]
[ # # ][ # # ]
187 [ # # ][ # # ]: 0 : aBounds = m_xBreak->getWordBoundary(aTest, 12, aLocale, i18n::WordType::DICTIONARY_WORD, true);
188 [ # # ][ # # ]: 0 : CPPUNIT_ASSERT(aBounds.startPos == 9 && aBounds.endPos == 12);
[ # # ][ # # ]
[ # # ][ # # ]
[ # # ][ # # ]
[ # # ][ # # ]
189 : :
190 [ # # ][ # # ]: 0 : CPPUNIT_ASSERT(m_xBreak->isBeginWord(aTest, 16, aLocale, i18n::WordType::DICTIONARY_WORD));
[ # # ][ # # ]
[ # # ][ # # ]
[ # # ][ # # ]
[ # # ][ # # ]
191 [ # # ][ # # ]: 0 : CPPUNIT_ASSERT(!m_xBreak->isEndWord(aTest, 16, aLocale, i18n::WordType::DICTIONARY_WORD));
[ # # ][ # # ]
[ # # ][ # # ]
[ # # ][ # # ]
[ # # ][ # # ]
192 [ # # ][ # # ]: 0 : aBounds = m_xBreak->getWordBoundary(aTest, 16, aLocale, i18n::WordType::DICTIONARY_WORD, true);
193 [ # # ][ # # ]: 0 : CPPUNIT_ASSERT(aBounds.startPos == 16 && aBounds.endPos == 19);
[ # # ][ # # ]
[ # # ][ # # ]
[ # # ][ # # ]
[ # # ][ # # ]
194 : : }
195 : :
196 : : //See https://issues.apache.org/ooo/show_bug.cgi?id=21907
197 : : {
198 : 0 : rtl::OUString aTest("b a?");
199 : :
200 [ # # ][ # # ]: 0 : CPPUNIT_ASSERT(m_xBreak->isBeginWord(aTest, 1, aLocale, i18n::WordType::ANY_WORD));
[ # # ][ # # ]
[ # # ][ # # ]
[ # # ][ # # ]
[ # # ][ # # ]
201 [ # # ][ # # ]: 0 : CPPUNIT_ASSERT(m_xBreak->isBeginWord(aTest, 2, aLocale, i18n::WordType::ANY_WORD));
[ # # ][ # # ]
[ # # ][ # # ]
[ # # ][ # # ]
[ # # ][ # # ]
202 [ # # ][ # # ]: 0 : CPPUNIT_ASSERT(m_xBreak->isBeginWord(aTest, 3, aLocale, i18n::WordType::ANY_WORD));
[ # # ][ # # ]
[ # # ][ # # ]
[ # # ][ # # ]
[ # # ][ # # ]
203 : :
204 [ # # ][ # # ]: 0 : CPPUNIT_ASSERT(m_xBreak->isBeginWord(aTest, 3, aLocale, i18n::WordType::ANYWORD_IGNOREWHITESPACES));
[ # # ][ # # ]
[ # # ][ # # ]
[ # # ][ # # ]
[ # # ][ # # ]
205 : :
206 [ # # ][ # # ]: 0 : CPPUNIT_ASSERT(m_xBreak->isEndWord(aTest, 1, aLocale, i18n::WordType::ANY_WORD));
[ # # ][ # # ]
[ # # ][ # # ]
[ # # ][ # # ]
[ # # ][ # # ]
207 [ # # ][ # # ]: 0 : CPPUNIT_ASSERT(m_xBreak->isEndWord(aTest, 2, aLocale, i18n::WordType::ANY_WORD));
[ # # ][ # # ]
[ # # ][ # # ]
[ # # ][ # # ]
[ # # ][ # # ]
208 [ # # ][ # # ]: 0 : CPPUNIT_ASSERT(m_xBreak->isEndWord(aTest, 3, aLocale, i18n::WordType::ANY_WORD));
[ # # ][ # # ]
[ # # ][ # # ]
[ # # ][ # # ]
[ # # ][ # # ]
209 : :
210 [ # # ][ # # ]: 0 : CPPUNIT_ASSERT(m_xBreak->isEndWord(aTest, 3, aLocale, i18n::WordType::ANYWORD_IGNOREWHITESPACES));
[ # # ][ # # ]
[ # # ][ # # ]
[ # # ][ # # ]
[ # # ][ # # ]
211 : : }
212 : :
213 : : //See https://issues.apache.org/ooo/show_bug.cgi?id=14904
214 : : {
215 : : const sal_Unicode TEST1[] =
216 : : {
217 : : 'W', 'o', 'r', 'k', 'i', 'n', 'g', ' ', 0x201C, 'W', 'o', 'r', 'd', 's',
218 : : ' ', 's', 't', 'a', 'r', 't', 'i', 'n', 'g', ' ', 'w', 'i', 't',
219 : : 'h', ' ', 'q', 'u', 'o', 't', 'e', 's', 0x201D, ' ', 'W', 'o', 'r', 'k',
220 : : 'i', 'n', 'g', ' ', 0x2018, 'B', 'r', 'o', 'k', 'e', 'n', 0x2019, ' ',
221 : : '?', 'S', 'p', 'a', 'n', 'i', 's', 'h', '?', ' ', 'd', 'o', 'e',
222 : : 's', 'n', 0x2019, 't', ' ', 'w', 'o', 'r', 'k', '.', ' ', 'N', 'o',
223 : : 't', ' ', 'e', 'v', 'e', 'n', ' ' , 0x00BF, 'r', 'e', 'a', 'l', '?', ' ',
224 : : 'S', 'p', 'a', 'n', 'i', 's', 'h'
225 : 0 : };
226 : 0 : rtl::OUString aTest(TEST1, SAL_N_ELEMENTS(TEST1));
227 : :
228 [ # # ][ # # ]: 0 : aBounds = m_xBreak->getWordBoundary(aTest, 4, aLocale, i18n::WordType::DICTIONARY_WORD, false);
229 [ # # ][ # # ]: 0 : CPPUNIT_ASSERT(aBounds.startPos == 0 && aBounds.endPos == 7);
[ # # ][ # # ]
[ # # ][ # # ]
[ # # ][ # # ]
[ # # ][ # # ]
230 : :
231 [ # # ][ # # ]: 0 : aBounds = m_xBreak->getWordBoundary(aTest, 12, aLocale, i18n::WordType::DICTIONARY_WORD, false);
232 [ # # ][ # # ]: 0 : CPPUNIT_ASSERT(aBounds.startPos == 9 && aBounds.endPos == 14);
[ # # ][ # # ]
[ # # ][ # # ]
[ # # ][ # # ]
[ # # ][ # # ]
233 : :
234 [ # # ][ # # ]: 0 : aBounds = m_xBreak->getWordBoundary(aTest, 40, aLocale, i18n::WordType::DICTIONARY_WORD, false);
235 [ # # ][ # # ]: 0 : CPPUNIT_ASSERT(aBounds.startPos == 37 && aBounds.endPos == 44);
[ # # ][ # # ]
[ # # ][ # # ]
[ # # ][ # # ]
[ # # ][ # # ]
236 : :
237 [ # # ][ # # ]: 0 : aBounds = m_xBreak->getWordBoundary(aTest, 49, aLocale, i18n::WordType::DICTIONARY_WORD, false);
238 [ # # ][ # # ]: 0 : CPPUNIT_ASSERT(aBounds.startPos == 46 && aBounds.endPos == 52);
[ # # ][ # # ]
[ # # ][ # # ]
[ # # ][ # # ]
[ # # ][ # # ]
239 : :
240 [ # # ][ # # ]: 0 : aBounds = m_xBreak->getWordBoundary(aTest, 58, aLocale, i18n::WordType::DICTIONARY_WORD, false);
241 [ # # ][ # # ]: 0 : CPPUNIT_ASSERT(aBounds.startPos == 55 && aBounds.endPos == 62);
[ # # ][ # # ]
[ # # ][ # # ]
[ # # ][ # # ]
[ # # ][ # # ]
242 : :
243 [ # # ][ # # ]: 0 : aBounds = m_xBreak->getWordBoundary(aTest, 67, aLocale, i18n::WordType::DICTIONARY_WORD, false);
244 [ # # ][ # # ]: 0 : CPPUNIT_ASSERT(aBounds.startPos == 64 && aBounds.endPos == 71);
[ # # ][ # # ]
[ # # ][ # # ]
[ # # ][ # # ]
[ # # ][ # # ]
245 : :
246 [ # # ][ # # ]: 0 : aBounds = m_xBreak->getWordBoundary(aTest, 90, aLocale, i18n::WordType::DICTIONARY_WORD, false);
247 [ # # ][ # # ]: 0 : CPPUNIT_ASSERT(aBounds.startPos == 88 && aBounds.endPos == 92);
[ # # ][ # # ]
[ # # ][ # # ]
[ # # ][ # # ]
[ # # ][ # # ]
248 : : }
249 : :
250 : : //See https://bugs.freedesktop.org/show_bug.cgi?id=49629
251 : 0 : sal_Unicode aBreakTests[] = { ' ', 1, 2, 3, 4, 5, 6, 7, 0x91, 0x92, 0x200B, 0xE8FF, 0xF8FF };
252 [ # # ]: 0 : for (int mode = i18n::WordType::ANY_WORD; mode <= i18n::WordType::WORD_COUNT; ++mode)
253 : : {
254 : : //make sure that in all cases isBeginWord and isEndWord matches getWordBoundary
255 [ # # ]: 0 : for (size_t i = 0; i < SAL_N_ELEMENTS(aBreakTests); ++i)
256 : : {
257 : 0 : rtl::OUString aTest("Word");
258 : 0 : aTest += rtl::OUString(aBreakTests[i]) + rtl::OUString("Word");
259 [ # # ][ # # ]: 0 : aBounds = m_xBreak->getWordBoundary(aTest, 0, aLocale, mode, true);
260 [ # # # # : 0 : switch (mode)
# ]
261 : : {
262 : : case i18n::WordType::ANY_WORD:
263 [ # # ][ # # ]: 0 : CPPUNIT_ASSERT(aBounds.startPos == 0 && aBounds.endPos == 4);
[ # # ][ # # ]
[ # # ][ # # ]
[ # # ][ # # ]
[ # # ][ # # ]
264 : 0 : break;
265 : : case i18n::WordType::ANYWORD_IGNOREWHITESPACES:
266 [ # # ][ # # ]: 0 : CPPUNIT_ASSERT(aBounds.startPos == 0 && aBounds.endPos == 4);
[ # # ][ # # ]
[ # # ][ # # ]
[ # # ][ # # ]
[ # # ][ # # ]
267 : 0 : break;
268 : : case i18n::WordType::DICTIONARY_WORD:
269 [ # # ][ # # ]: 0 : CPPUNIT_ASSERT(aBounds.startPos == 0 && aBounds.endPos == 4);
[ # # ][ # # ]
[ # # ][ # # ]
[ # # ][ # # ]
[ # # ][ # # ]
270 : 0 : break;
271 : : case i18n::WordType::WORD_COUNT:
272 [ # # ][ # # ]: 0 : CPPUNIT_ASSERT(aBounds.startPos == 0 && aBounds.endPos == 4);
[ # # ][ # # ]
[ # # ][ # # ]
[ # # ][ # # ]
[ # # ][ # # ]
273 : 0 : break;
274 : : }
275 : :
276 [ # # ][ # # ]: 0 : CPPUNIT_ASSERT(m_xBreak->isBeginWord(aTest, aBounds.startPos, aLocale, mode));
[ # # ][ # # ]
[ # # ][ # # ]
[ # # ][ # # ]
[ # # ][ # # ]
277 [ # # ][ # # ]: 0 : CPPUNIT_ASSERT(m_xBreak->isEndWord(aTest, aBounds.endPos, aLocale, mode));
[ # # ][ # # ]
[ # # ][ # # ]
[ # # ][ # # ]
[ # # ][ # # ]
278 : 0 : }
279 : : }
280 : :
281 : 0 : sal_Unicode aJoinTests[] = { 'X', 0x200C, 0x200D, 0x2060, 0xFEFF, 0xFFF9, 0xFFFA, 0xFFFB };
282 [ # # ]: 0 : for (int mode = i18n::WordType::ANY_WORD; mode <= i18n::WordType::WORD_COUNT; ++mode)
283 : : {
284 : : //make sure that in all cases isBeginWord and isEndWord matches getWordBoundary
285 [ # # ]: 0 : for (size_t i = 0; i < SAL_N_ELEMENTS(aJoinTests); ++i)
286 : : {
287 : 0 : rtl::OUString aTest("Word");
288 : 0 : aTest += rtl::OUString(aJoinTests[i]) + rtl::OUString("Word");
289 [ # # ][ # # ]: 0 : aBounds = m_xBreak->getWordBoundary(aTest, 0, aLocale, mode, true);
290 [ # # # # : 0 : switch (mode)
# ]
291 : : {
292 : : case i18n::WordType::ANY_WORD:
293 [ # # ][ # # ]: 0 : CPPUNIT_ASSERT(aBounds.startPos == 0 && aBounds.endPos == 9);
[ # # ][ # # ]
[ # # ][ # # ]
[ # # ][ # # ]
[ # # ][ # # ]
294 : 0 : break;
295 : : case i18n::WordType::ANYWORD_IGNOREWHITESPACES:
296 [ # # ][ # # ]: 0 : CPPUNIT_ASSERT(aBounds.startPos == 0 && aBounds.endPos == 9);
[ # # ][ # # ]
[ # # ][ # # ]
[ # # ][ # # ]
[ # # ][ # # ]
297 : 0 : break;
298 : : case i18n::WordType::DICTIONARY_WORD:
299 [ # # ][ # # ]: 0 : CPPUNIT_ASSERT(aBounds.startPos == 0 && aBounds.endPos == 9);
[ # # ][ # # ]
[ # # ][ # # ]
[ # # ][ # # ]
[ # # ][ # # ]
300 : 0 : break;
301 : : case i18n::WordType::WORD_COUNT:
302 [ # # ][ # # ]: 0 : CPPUNIT_ASSERT(aBounds.startPos == 0 && aBounds.endPos == 9);
[ # # ][ # # ]
[ # # ][ # # ]
[ # # ][ # # ]
[ # # ][ # # ]
303 : 0 : break;
304 : : }
305 : :
306 [ # # ][ # # ]: 0 : CPPUNIT_ASSERT(m_xBreak->isBeginWord(aTest, aBounds.startPos, aLocale, mode));
[ # # ][ # # ]
[ # # ][ # # ]
[ # # ][ # # ]
[ # # ][ # # ]
307 [ # # ][ # # ]: 0 : CPPUNIT_ASSERT(m_xBreak->isEndWord(aTest, aBounds.endPos, aLocale, mode));
[ # # ][ # # ]
[ # # ][ # # ]
[ # # ][ # # ]
[ # # ][ # # ]
308 : 0 : }
309 : : }
310 : :
311 : : //See https://issues.apache.org/ooo/show_bug.cgi?id=13494
312 : : {
313 : 0 : const rtl::OUString aBase("xxAAxxBBxxCCxx");
314 : : const sal_Unicode aTests[] =
315 : : {
316 : : '\'', ';', ',', '.', '!', '@', '#', '%', '&', '*',
317 : : '(', ')', '_', '-', '{', '}', '[', ']', '\"', '/',
318 : : '\\', '?', '~', '$', '+', '^', '=', '<', '>', '|'
319 : 0 : };
320 : :
321 : 0 : const sal_Int32 aDoublePositions[] = {0, 2, 4, 6, 8, 10, 12, 14};
322 [ # # ]: 0 : for (size_t j = 0; j < SAL_N_ELEMENTS(aTests); ++j)
323 : : {
324 : 0 : rtl::OUString aTest = aBase.replace('x', aTests[j]);
325 : 0 : sal_Int32 nPos = -1;
326 : 0 : size_t i = 0;
327 [ # # ]: 0 : do
328 : : {
329 [ # # ][ # # ]: 0 : CPPUNIT_ASSERT(i < SAL_N_ELEMENTS(aDoublePositions));
[ # # ][ # # ]
[ # # ][ # # ]
[ # # ][ # # ]
330 [ # # ][ # # ]: 0 : nPos = m_xBreak->nextWord(aTest, nPos, aLocale, i18n::WordType::ANYWORD_IGNOREWHITESPACES).startPos;
331 [ # # ][ # # ]: 0 : CPPUNIT_ASSERT(nPos == aDoublePositions[i++]);
[ # # ][ # # ]
[ # # ][ # # ]
[ # # ][ # # ]
332 : : }
333 : 0 : while (nPos < aTest.getLength());
334 : 0 : nPos = aTest.getLength();
335 : 0 : i = SAL_N_ELEMENTS(aDoublePositions)-1;
336 [ # # ]: 0 : do
337 : : {
338 [ # # ][ # # ]: 0 : nPos = m_xBreak->previousWord(aTest, nPos, aLocale, i18n::WordType::ANYWORD_IGNOREWHITESPACES).startPos;
339 [ # # ][ # # ]: 0 : CPPUNIT_ASSERT(nPos == aDoublePositions[--i]);
[ # # ][ # # ]
[ # # ][ # # ]
[ # # ][ # # ]
340 : : }
341 : : while (nPos > 0);
342 : 0 : }
343 : :
344 : 0 : const sal_Int32 aSinglePositions[] = {0, 1, 3, 4, 6, 7, 9, 10};
345 [ # # ]: 0 : for (size_t j = 1; j < SAL_N_ELEMENTS(aTests); ++j)
346 : : {
347 : 0 : rtl::OUString aTest = aBase.replaceAll(rtl::OUString("xx"), rtl::OUString(aTests[j]));
348 : 0 : sal_Int32 nPos = -1;
349 : 0 : size_t i = 0;
350 [ # # ]: 0 : do
351 : : {
352 [ # # ][ # # ]: 0 : CPPUNIT_ASSERT(i < SAL_N_ELEMENTS(aSinglePositions));
[ # # ][ # # ]
[ # # ][ # # ]
[ # # ][ # # ]
353 [ # # ][ # # ]: 0 : nPos = m_xBreak->nextWord(aTest, nPos, aLocale, i18n::WordType::ANYWORD_IGNOREWHITESPACES).startPos;
354 [ # # ][ # # ]: 0 : CPPUNIT_ASSERT(nPos == aSinglePositions[i++]);
[ # # ][ # # ]
[ # # ][ # # ]
[ # # ][ # # ]
355 : : }
356 : 0 : while (nPos < aTest.getLength());
357 : 0 : nPos = aTest.getLength();
358 : 0 : i = SAL_N_ELEMENTS(aSinglePositions)-1;
359 [ # # ]: 0 : do
360 : : {
361 [ # # ][ # # ]: 0 : nPos = m_xBreak->previousWord(aTest, nPos, aLocale, i18n::WordType::ANYWORD_IGNOREWHITESPACES).startPos;
362 [ # # ][ # # ]: 0 : CPPUNIT_ASSERT(nPos == aSinglePositions[--i]);
[ # # ][ # # ]
[ # # ][ # # ]
[ # # ][ # # ]
363 : : }
364 : : while (nPos > 0);
365 : 0 : }
366 : :
367 : 0 : const sal_Int32 aSingleQuotePositions[] = {0, 1, 9, 10};
368 [ # # ][ # # ]: 0 : CPPUNIT_ASSERT(aTests[0] == '\'');
[ # # ][ # # ]
[ # # ][ # # ]
[ # # ][ # # ]
369 : : {
370 : 0 : rtl::OUString aTest = aBase.replaceAll(rtl::OUString("xx"), rtl::OUString(aTests[0]));
371 : 0 : sal_Int32 nPos = -1;
372 : 0 : size_t i = 0;
373 [ # # ]: 0 : do
374 : : {
375 [ # # ][ # # ]: 0 : CPPUNIT_ASSERT(i < SAL_N_ELEMENTS(aSingleQuotePositions));
[ # # ][ # # ]
[ # # ][ # # ]
[ # # ][ # # ]
376 [ # # ][ # # ]: 0 : nPos = m_xBreak->nextWord(aTest, nPos, aLocale, i18n::WordType::ANYWORD_IGNOREWHITESPACES).startPos;
377 [ # # ][ # # ]: 0 : CPPUNIT_ASSERT(nPos == aSingleQuotePositions[i++]);
[ # # ][ # # ]
[ # # ][ # # ]
[ # # ][ # # ]
378 : : }
379 : 0 : while (nPos < aTest.getLength());
380 : 0 : nPos = aTest.getLength();
381 : 0 : i = SAL_N_ELEMENTS(aSingleQuotePositions)-1;
382 [ # # ]: 0 : do
383 : : {
384 [ # # ][ # # ]: 0 : nPos = m_xBreak->previousWord(aTest, nPos, aLocale, i18n::WordType::ANYWORD_IGNOREWHITESPACES).startPos;
385 [ # # ][ # # ]: 0 : CPPUNIT_ASSERT(nPos == aSingleQuotePositions[--i]);
[ # # ][ # # ]
[ # # ][ # # ]
[ # # ][ # # ]
386 : : }
387 : 0 : while (nPos > 0);
388 : 0 : }
389 : : }
390 : :
391 : : //See https://issues.apache.org/ooo/show_bug.cgi?id=13451
392 : : {
393 [ # # ]: 0 : aLocale.Language = rtl::OUString(RTL_CONSTASCII_USTRINGPARAM("ca"));
394 [ # # ]: 0 : aLocale.Country = rtl::OUString(RTL_CONSTASCII_USTRINGPARAM("ES"));
395 : :
396 : 0 : rtl::OUString aTest("mirar-se comprar-vos donem-nos les mans aneu-vos-en!");
397 : :
398 : 0 : sal_Int32 nPos = 0;
399 : 0 : sal_Int32 aExpected[] = {8, 20, 30, 34, 39, 51, 52};
400 : 0 : size_t i = 0;
401 [ # # ]: 0 : do
402 : : {
403 [ # # ][ # # ]: 0 : CPPUNIT_ASSERT(i < SAL_N_ELEMENTS(aExpected));
[ # # ][ # # ]
[ # # ][ # # ]
[ # # ][ # # ]
404 [ # # ]: 0 : nPos = m_xBreak->getWordBoundary(aTest, nPos, aLocale,
405 [ # # ]: 0 : i18n::WordType::DICTIONARY_WORD, true).endPos;
406 [ # # ][ # # ]: 0 : CPPUNIT_ASSERT(aExpected[i++] == nPos);
[ # # ][ # # ]
[ # # ][ # # ]
[ # # ][ # # ]
407 : : }
408 : 0 : while (nPos++ < aTest.getLength());
409 [ # # ][ # # ]: 0 : CPPUNIT_ASSERT(i == SAL_N_ELEMENTS(aExpected));
[ # # ][ # # ]
[ # # ][ # # ]
[ # # ][ # # ]
410 : : }
411 : :
412 : : //See https://issues.apache.org/ooo/show_bug.cgi?id=85411
413 [ # # ]: 0 : for (int j = 0; j < 2; ++j)
414 : : {
415 [ # # # ]: 0 : switch (j)
416 : : {
417 : : case 0:
418 : 0 : aLocale.Language = rtl::OUString("en");
419 : 0 : aLocale.Country = rtl::OUString("US");
420 : 0 : break;
421 : : case 1:
422 : 0 : aLocale.Language = rtl::OUString("ca");
423 : 0 : aLocale.Country = rtl::OUString("ES");
424 : 0 : break;
425 : : default:
426 [ # # ][ # # ]: 0 : CPPUNIT_ASSERT(false);
[ # # ][ # # ]
[ # # ][ # # ]
[ # # ][ # # ]
427 : 0 : break;
428 : : }
429 : :
430 : : const sal_Unicode TEST[] =
431 : : {
432 : : 'I', 0x200B, 'w', 'a', 'n', 't', 0x200B, 't', 'o', 0x200B, 'g', 'o'
433 : 0 : };
434 : 0 : rtl::OUString aTest(TEST, SAL_N_ELEMENTS(TEST));
435 : :
436 : 0 : sal_Int32 nPos = 0;
437 : 0 : sal_Int32 aExpected[] = {1, 6, 9, 12};
438 : 0 : size_t i = 0;
439 [ # # ]: 0 : do
440 : : {
441 [ # # ][ # # ]: 0 : CPPUNIT_ASSERT(i < SAL_N_ELEMENTS(aExpected));
[ # # ][ # # ]
[ # # ][ # # ]
[ # # ][ # # ]
442 [ # # ]: 0 : nPos = m_xBreak->getWordBoundary(aTest, nPos, aLocale,
443 [ # # ]: 0 : i18n::WordType::DICTIONARY_WORD, true).endPos;
444 [ # # ][ # # ]: 0 : CPPUNIT_ASSERT(aExpected[i++] == nPos);
[ # # ][ # # ]
[ # # ][ # # ]
[ # # ][ # # ]
445 : : }
446 : 0 : while (nPos++ < aTest.getLength());
447 [ # # ][ # # ]: 0 : CPPUNIT_ASSERT(i == SAL_N_ELEMENTS(aExpected));
[ # # ][ # # ]
[ # # ][ # # ]
[ # # ][ # # ]
448 : 0 : }
449 : :
450 : : //https://issues.apache.org/ooo/show_bug.cgi?id=21290
451 [ # # ]: 0 : for (int j = 0; j < 2; ++j)
452 : : {
453 [ # # # ]: 0 : switch (j)
454 : : {
455 : : case 0:
456 : 0 : aLocale.Language = rtl::OUString("en");
457 : 0 : aLocale.Country = rtl::OUString("US");
458 : 0 : break;
459 : : case 1:
460 : 0 : aLocale.Language = rtl::OUString("grc");
461 : 0 : aLocale.Country = rtl::OUString();
462 : 0 : break;
463 : : default:
464 [ # # ][ # # ]: 0 : CPPUNIT_ASSERT(false);
[ # # ][ # # ]
[ # # ][ # # ]
[ # # ][ # # ]
465 : 0 : break;
466 : : }
467 : :
468 : : const sal_Unicode TEST[] =
469 : : {
470 : : 0x1F0C, 0x03BD, 0x03B4, 0x03C1, 0x03B1, 0x0020, 0x1F00,
471 : : 0x03C1, 0x03BD, 0x1F7B, 0x03BC, 0x03B5, 0x03BD, 0x03BF,
472 : : 0x03C2, 0x0020, 0x1F00, 0x03BB, 0x03BB, 0x0020, 0x1F24,
473 : : 0x03C3, 0x03B8, 0x03B9, 0x03BF, 0x03BD
474 : 0 : };
475 : 0 : rtl::OUString aTest(TEST, SAL_N_ELEMENTS(TEST));
476 : :
477 : 0 : sal_Int32 nPos = 0;
478 : 0 : sal_Int32 aExpected[] = {5, 15, 19, 26};
479 : 0 : size_t i = 0;
480 [ # # ]: 0 : do
481 : : {
482 [ # # ][ # # ]: 0 : CPPUNIT_ASSERT(i < SAL_N_ELEMENTS(aExpected));
[ # # ][ # # ]
[ # # ][ # # ]
[ # # ][ # # ]
483 [ # # ]: 0 : nPos = m_xBreak->getWordBoundary(aTest, nPos, aLocale,
484 [ # # ]: 0 : i18n::WordType::DICTIONARY_WORD, true).endPos;
485 [ # # ][ # # ]: 0 : CPPUNIT_ASSERT(aExpected[i++] == nPos);
[ # # ][ # # ]
[ # # ][ # # ]
[ # # ][ # # ]
486 : : }
487 : 0 : while (nPos++ < aTest.getLength());
488 [ # # ][ # # ]: 0 : CPPUNIT_ASSERT(i == SAL_N_ELEMENTS(aExpected));
[ # # ][ # # ]
[ # # ][ # # ]
[ # # ][ # # ]
489 : 0 : }
490 : 0 : }
491 : :
492 : : //See http://qa.openoffice.org/issues/show_bug.cgi?id=111152
493 : : //See https://bugs.freedesktop.org/show_bug.cgi?id=40292
494 : 3 : void TestBreakIterator::testGraphemeIteration()
495 : : {
496 : 3 : lang::Locale aLocale;
497 [ + - ]: 3 : aLocale.Language = rtl::OUString(RTL_CONSTASCII_USTRINGPARAM("bn"));
498 [ + - ]: 3 : aLocale.Country = rtl::OUString(RTL_CONSTASCII_USTRINGPARAM("IN"));
499 : :
500 : : {
501 : 3 : const sal_Unicode BA_HALANT_LA[] = { 0x09AC, 0x09CD, 0x09AF };
502 : 3 : rtl::OUString aTest(BA_HALANT_LA, SAL_N_ELEMENTS(BA_HALANT_LA));
503 : :
504 : 3 : sal_Int32 nDone=0;
505 : : sal_Int32 nPos;
506 [ + - ]: 3 : nPos = m_xBreak->nextCharacters(aTest, 0, aLocale,
507 [ + - ]: 3 : i18n::CharacterIteratorMode::SKIPCELL, 1, nDone);
508 [ + - ][ + - ]: 3 : CPPUNIT_ASSERT_MESSAGE("Should skip full grapheme", nPos == SAL_N_ELEMENTS(BA_HALANT_LA));
[ + - ][ + - ]
[ + - ][ + - ]
[ + - ][ + - ]
[ + - ]
509 [ + - ]: 3 : nPos = m_xBreak->previousCharacters(aTest, SAL_N_ELEMENTS(BA_HALANT_LA), aLocale,
510 [ + - ]: 3 : i18n::CharacterIteratorMode::SKIPCELL, 1, nDone);
511 [ + - ][ + - ]: 3 : CPPUNIT_ASSERT_MESSAGE("Should skip full grapheme", nPos == 0);
[ + - ][ + - ]
[ + - ][ + - ]
[ + - ][ + - ]
[ + - ]
512 : : }
513 : :
514 : : {
515 : 3 : const sal_Unicode HA_HALANT_NA_VOWELSIGNI[] = { 0x09B9, 0x09CD, 0x09A3, 0x09BF };
516 : 3 : rtl::OUString aTest(HA_HALANT_NA_VOWELSIGNI, SAL_N_ELEMENTS(HA_HALANT_NA_VOWELSIGNI));
517 : :
518 : 3 : sal_Int32 nDone=0;
519 : : sal_Int32 nPos;
520 [ + - ]: 3 : nPos = m_xBreak->nextCharacters(aTest, 0, aLocale,
521 [ + - ]: 3 : i18n::CharacterIteratorMode::SKIPCELL, 1, nDone);
522 [ + - ][ + - ]: 3 : CPPUNIT_ASSERT_MESSAGE("Should skip full grapheme", nPos == SAL_N_ELEMENTS(HA_HALANT_NA_VOWELSIGNI));
[ + - ][ + - ]
[ + - ][ + - ]
[ + - ][ + - ]
[ + - ]
523 [ + - ]: 3 : nPos = m_xBreak->previousCharacters(aTest, SAL_N_ELEMENTS(HA_HALANT_NA_VOWELSIGNI), aLocale,
524 [ + - ]: 3 : i18n::CharacterIteratorMode::SKIPCELL, 1, nDone);
525 [ + - ][ + - ]: 3 : CPPUNIT_ASSERT_MESSAGE("Should skip full grapheme", nPos == 0);
[ + - ][ + - ]
[ + - ][ + - ]
[ + - ][ + - ]
[ + - ]
526 : : }
527 : :
528 : : {
529 : 3 : const sal_Unicode TA_HALANT_MA_HALANT_YA [] = { 0x09A4, 0x09CD, 0x09AE, 0x09CD, 0x09AF };
530 : 3 : rtl::OUString aTest(TA_HALANT_MA_HALANT_YA, SAL_N_ELEMENTS(TA_HALANT_MA_HALANT_YA));
531 : :
532 : 3 : sal_Int32 nDone=0;
533 : : sal_Int32 nPos;
534 [ + - ]: 3 : nPos = m_xBreak->nextCharacters(aTest, 0, aLocale,
535 [ + - ]: 3 : i18n::CharacterIteratorMode::SKIPCELL, 1, nDone);
536 [ + - ][ + - ]: 3 : CPPUNIT_ASSERT_MESSAGE("Should skip full grapheme", nPos == SAL_N_ELEMENTS(TA_HALANT_MA_HALANT_YA));
[ + - ][ + - ]
[ + - ][ + - ]
[ + - ][ + - ]
[ + - ]
537 [ + - ]: 3 : nPos = m_xBreak->previousCharacters(aTest, SAL_N_ELEMENTS(TA_HALANT_MA_HALANT_YA), aLocale,
538 [ + - ]: 3 : i18n::CharacterIteratorMode::SKIPCELL, 1, nDone);
539 [ + - ][ + - ]: 3 : CPPUNIT_ASSERT_MESSAGE("Should skip full grapheme", nPos == 0);
[ + - ][ + - ]
[ + - ][ + - ]
[ + - ][ + - ]
[ + - ]
540 : : }
541 : :
542 [ + - ]: 3 : aLocale.Language = rtl::OUString(RTL_CONSTASCII_USTRINGPARAM("ta"));
543 [ + - ]: 3 : aLocale.Country = rtl::OUString(RTL_CONSTASCII_USTRINGPARAM("IN"));
544 : :
545 : : {
546 : 3 : const sal_Unicode KA_VIRAMA_SSA[] = { 0x0B95, 0x0BCD, 0x0BB7 };
547 : 3 : rtl::OUString aTest(KA_VIRAMA_SSA, SAL_N_ELEMENTS(KA_VIRAMA_SSA));
548 : :
549 : 3 : sal_Int32 nDone=0;
550 : 3 : sal_Int32 nPos = 0;
551 : :
552 [ + - ]: 3 : nPos = m_xBreak->nextCharacters(aTest, 0, aLocale,
553 [ + - ]: 3 : i18n::CharacterIteratorMode::SKIPCELL, 1, nDone);
554 [ + - ][ + - ]: 3 : CPPUNIT_ASSERT_MESSAGE("Should skip full grapheme", nPos == SAL_N_ELEMENTS(KA_VIRAMA_SSA));
[ + - ][ + - ]
[ + - ][ + - ]
[ + - ][ + - ]
[ + - ]
555 [ + - ]: 3 : nPos = m_xBreak->previousCharacters(aTest, SAL_N_ELEMENTS(KA_VIRAMA_SSA), aLocale,
556 [ + - ]: 3 : i18n::CharacterIteratorMode::SKIPCELL, 1, nDone);
557 [ + - ][ + - ]: 3 : CPPUNIT_ASSERT_MESSAGE("Should skip full grapheme", nPos == 0);
[ + - ][ + - ]
[ + - ][ + - ]
[ + - ][ + - ]
[ + - ]
558 : : }
559 : :
560 : : {
561 : : const sal_Unicode CA_VOWELSIGNI_TA_VIRAMA_TA_VOWELSIGNI_RA_VOWELSIGNAI[] =
562 : 3 : { 0x0B9A, 0x0BBF, 0x0BA4, 0x0BCD, 0x0BA4, 0x0BBF, 0x0BB0, 0x0BC8 };
563 : : rtl::OUString aTest(CA_VOWELSIGNI_TA_VIRAMA_TA_VOWELSIGNI_RA_VOWELSIGNAI,
564 : 3 : SAL_N_ELEMENTS(CA_VOWELSIGNI_TA_VIRAMA_TA_VOWELSIGNI_RA_VOWELSIGNAI));
565 : :
566 : 3 : sal_Int32 nDone=0;
567 : 3 : sal_Int32 nPos=0;
568 : :
569 [ + + ]: 15 : for (sal_Int32 i = 0; i < 4; ++i)
570 : : {
571 : 12 : sal_Int32 nOldPos = nPos;
572 [ + - ]: 12 : nPos = m_xBreak->nextCharacters(aTest, nPos, aLocale,
573 [ + - ]: 12 : i18n::CharacterIteratorMode::SKIPCELL, 1, nDone);
574 [ + - ][ + - ]: 12 : CPPUNIT_ASSERT_MESSAGE("Should skip 2 units", nPos == nOldPos+2);
[ + - ][ + - ]
[ + - ][ + - ]
[ + - ][ + - ]
[ + - ]
575 : : }
576 : :
577 [ + + ]: 15 : for (sal_Int32 i = 0; i < 4; ++i)
578 : : {
579 : 12 : sal_Int32 nOldPos = nPos;
580 [ + - ]: 12 : nPos = m_xBreak->previousCharacters(aTest, nPos, aLocale,
581 [ + - ]: 12 : i18n::CharacterIteratorMode::SKIPCELL, 1, nDone);
582 [ + - ][ + - ]: 12 : CPPUNIT_ASSERT_MESSAGE("Should skip 2 units", nPos == nOldPos-2);
[ + - ][ + - ]
[ + - ][ + - ]
[ + - ][ + - ]
[ + - ]
583 : 3 : }
584 : : }
585 : :
586 : : {
587 : 3 : const sal_Unicode ALEF_QAMATS [] = { 0x05D0, 0x05B8 };
588 : 3 : rtl::OUString aText(ALEF_QAMATS, SAL_N_ELEMENTS(ALEF_QAMATS));
589 : :
590 : 3 : sal_Int32 nGraphemeCount = 0;
591 : :
592 : 3 : sal_Int32 nCurPos = 0;
593 [ + + ]: 6 : while (nCurPos < aText.getLength())
594 : : {
595 : 3 : sal_Int32 nCount2 = 1;
596 [ + - ]: 3 : nCurPos = m_xBreak->nextCharacters(aText, nCurPos, lang::Locale(),
597 [ + - ]: 3 : i18n::CharacterIteratorMode::SKIPCELL, nCount2, nCount2);
598 : 3 : ++nGraphemeCount;
599 : : }
600 : :
601 [ + - ][ + - ]: 3 : CPPUNIT_ASSERT_MESSAGE("Should be considered 1 grapheme", nGraphemeCount == 1);
[ + - ][ + - ]
[ + - ][ + - ]
[ + - ][ + - ]
[ + - ]
602 : 3 : }
603 : 3 : }
604 : :
605 : : //A test to ensure that certain ranges and codepoints that are categorized as
606 : : //weak remain as weak, so that existing docs that depend on this don't silently
607 : : //change font for those weak chars
608 : 3 : void TestBreakIterator::testWeak()
609 : : {
610 : 3 : lang::Locale aLocale;
611 [ + - ]: 3 : aLocale.Language = rtl::OUString(RTL_CONSTASCII_USTRINGPARAM("en"));
612 [ + - ]: 3 : aLocale.Country = rtl::OUString(RTL_CONSTASCII_USTRINGPARAM("US"));
613 : :
614 : : {
615 : : const sal_Unicode WEAKS[] =
616 : : {
617 : : 0x0001, 0x0002,
618 : : 0x0020, 0x00A0,
619 : : 0x2150, 0x215F, //Number Forms, fractions
620 : : 0x2160, 0x2180, //Number Forms, roman numerals
621 : : 0x2200, 0x22FF, //Mathematical Operators
622 : : 0x27C0, 0x27EF, //Miscellaneous Mathematical Symbols-A
623 : : 0x2980, 0x29FF, //Miscellaneous Mathematical Symbols-B
624 : : 0x2A00, 0x2AFF, //Supplemental Mathematical Operators
625 : : 0x2100, 0x214F, //Letterlike Symbols
626 : : 0x2308, 0x230B, //Miscellaneous technical
627 : : 0x25A0, 0x25FF, //Geometric Shapes
628 : : 0x2B30, 0x2B4C //Miscellaneous Symbols and Arrows
629 : 3 : };
630 : 3 : rtl::OUString aWeaks(WEAKS, SAL_N_ELEMENTS(WEAKS));
631 : :
632 [ + + ]: 75 : for (sal_Int32 i = 0; i < aWeaks.getLength(); ++i)
633 : : {
634 [ + - ][ + - ]: 72 : sal_Int16 nScript = m_xBreak->getScriptType(aWeaks, i);
635 : 72 : rtl::OStringBuffer aMsg;
636 [ + - ]: 72 : aMsg.append(RTL_CONSTASCII_STRINGPARAM("Char 0x"));
637 [ + - ]: 72 : aMsg.append(static_cast<sal_Int32>(aWeaks.getStr()[i]), 16);
638 [ + - ]: 72 : aMsg.append(RTL_CONSTASCII_STRINGPARAM(" should have been weak"));
639 [ + - ][ + - ]: 144 : CPPUNIT_ASSERT_MESSAGE(aMsg.getStr(),
[ + - ][ + - ]
[ + - ][ + - ]
[ + - ][ + - ]
640 [ + - ]: 72 : nScript == i18n::ScriptType::WEAK);
641 : 75 : }
642 : 3 : }
643 : 3 : }
644 : :
645 : : //A test to ensure that certain ranges and codepoints that are categorized as
646 : : //asian remain as asian, so that existing docs that depend on this don't silently
647 : : //change font for those asian chars.
648 : : //See https://bugs.freedesktop.org/show_bug.cgi?id=38095
649 : 3 : void TestBreakIterator::testAsian()
650 : : {
651 : 3 : lang::Locale aLocale;
652 [ + - ]: 3 : aLocale.Language = rtl::OUString(RTL_CONSTASCII_USTRINGPARAM("en"));
653 [ + - ]: 3 : aLocale.Country = rtl::OUString(RTL_CONSTASCII_USTRINGPARAM("US"));
654 : :
655 : : {
656 : : const sal_Unicode ASIANS[] =
657 : : {
658 : : //some typical CJK chars
659 : : 0x4E00, 0x62FF,
660 : : //The full HalfWidth and FullWidth block has historically been
661 : : //designated as taking the CJK font :-(
662 : : //HalfWidth and FullWidth forms of ASCII 0-9, categorized under
663 : : //UAX24 as "Common" i.e. by that logic WEAK
664 : : 0xFF10, 0xFF19,
665 : : //HalfWidth and FullWidth forms of ASCII A-z, categorized under
666 : : //UAX25 as "Latin", i.e. by that logic LATIN
667 : : 0xFF21, 0xFF5A
668 : 3 : };
669 : 3 : rtl::OUString aAsians(ASIANS, SAL_N_ELEMENTS(ASIANS));
670 : :
671 [ + + ]: 21 : for (sal_Int32 i = 0; i < aAsians.getLength(); ++i)
672 : : {
673 [ + - ][ + - ]: 18 : sal_Int16 nScript = m_xBreak->getScriptType(aAsians, i);
674 : 18 : rtl::OStringBuffer aMsg;
675 [ + - ]: 18 : aMsg.append(RTL_CONSTASCII_STRINGPARAM("Char 0x"));
676 [ + - ]: 18 : aMsg.append(static_cast<sal_Int32>(aAsians.getStr()[i]), 16);
677 [ + - ]: 18 : aMsg.append(RTL_CONSTASCII_STRINGPARAM(" should have been asian"));
678 [ + - ][ + - ]: 36 : CPPUNIT_ASSERT_MESSAGE(aMsg.getStr(),
[ + - ][ + - ]
[ + - ][ + - ]
[ + - ][ + - ]
679 [ + - ]: 18 : nScript == i18n::ScriptType::ASIAN);
680 : 21 : }
681 : 3 : }
682 : 3 : }
683 : :
684 : : //A test to ensure that our thai word boundary detection is useful
685 : 3 : void TestBreakIterator::testThai()
686 : : {
687 : 3 : lang::Locale aLocale;
688 [ + - ]: 3 : aLocale.Language = rtl::OUString(RTL_CONSTASCII_USTRINGPARAM("th"));
689 [ + - ]: 3 : aLocale.Country = rtl::OUString(RTL_CONSTASCII_USTRINGPARAM("TH"));
690 : :
691 : : //See http://lists.freedesktop.org/archives/libreoffice/2012-February/025959.html
692 : : {
693 : 3 : const sal_Unicode THAI[] = { 0x0E01, 0x0E38, 0x0E2B, 0x0E25, 0x0E32, 0x0E1A };
694 : 3 : rtl::OUString aTest(THAI, SAL_N_ELEMENTS(THAI));
695 [ + - ]: 3 : i18n::Boundary aBounds = m_xBreak->getWordBoundary(aTest, 0, aLocale,
696 [ + - ]: 3 : i18n::WordType::DICTIONARY_WORD, true);
697 [ + - ][ + - ]: 6 : CPPUNIT_ASSERT_MESSAGE("Should skip full word",
[ + - ][ + - ]
[ + - ][ + - ]
[ + - ][ - + ]
[ + - ][ + - ]
698 [ + - ]: 6 : aBounds.startPos == 0 && aBounds.endPos == aTest.getLength());
699 : : }
700 : :
701 : : //See https://issues.apache.org/ooo/show_bug.cgi?id=29548
702 : : //make sure forwards and back are consistent
703 : : {
704 : : const sal_Unicode THAI[] =
705 : : {
706 : : 0x0E2D, 0x0E38, 0x0E17, 0x0E22, 0x0E32, 0x0E19, 0x0E41,
707 : : 0x0E2B, 0x0E48, 0x0E07, 0x0E0A, 0x0E32, 0x0E15, 0x0E34,
708 : : 0x0E19, 0x0E49, 0x0E33, 0x0E2B, 0x0E19, 0x0E32, 0x0E27,
709 : : 0x0E2D, 0x0E38, 0x0E17, 0x0E22, 0x0E32, 0x0E19, 0x0E41,
710 : : 0x0E2B, 0x0E48, 0x0E07, 0x0E0A, 0x0E32, 0x0E15, 0x0E34,
711 : : 0x0E19, 0x0E49, 0x0E33, 0x0E2B, 0x0E19, 0x0E32, 0x0E27
712 : 3 : };
713 : 3 : rtl::OUString aTest(THAI, SAL_N_ELEMENTS(THAI));
714 : :
715 [ + - ][ + - ]: 3 : std::stack<sal_Int32> aPositions;
716 : 3 : sal_Int32 nPos = -1;
717 [ + + ]: 33 : do
718 : : {
719 [ + - ][ + - ]: 33 : nPos = m_xBreak->nextWord(aTest, nPos, aLocale, i18n::WordType::ANYWORD_IGNOREWHITESPACES).startPos;
720 [ + - ]: 33 : aPositions.push(nPos);
721 : : }
722 : 33 : while (nPos < aTest.getLength());
723 : 3 : nPos = aTest.getLength();
724 [ + - ][ + - ]: 3 : CPPUNIT_ASSERT(!aPositions.empty());
[ + - ][ + - ]
[ + - ][ + - ]
[ + - ][ + - ]
[ + - ]
725 [ + - ]: 3 : aPositions.pop();
726 [ + + ]: 30 : do
727 : : {
728 [ + - ][ + - ]: 30 : CPPUNIT_ASSERT(!aPositions.empty());
[ + - ][ + - ]
[ + - ][ + - ]
[ + - ][ + - ]
[ + - ]
729 [ + - ][ + - ]: 30 : nPos = m_xBreak->previousWord(aTest, nPos, aLocale, i18n::WordType::ANYWORD_IGNOREWHITESPACES).startPos;
730 [ + - ][ + - ]: 30 : CPPUNIT_ASSERT(nPos == aPositions.top());
[ + - ][ + - ]
[ + - ][ + - ]
[ + - ][ + - ]
[ + - ]
731 [ + - ]: 30 : aPositions.pop();
732 : : }
733 : 3 : while (nPos > 0);
734 : 3 : }
735 : 3 : }
736 : :
737 : : #if TODO
738 : : void TestBreakIterator::testNorthernThai()
739 : : {
740 : : lang::Locale aLocale;
741 : : aLocale.Language = rtl::OUString(RTL_CONSTASCII_USTRINGPARAM("nod"));
742 : : aLocale.Country = rtl::OUString(RTL_CONSTASCII_USTRINGPARAM("TH"));
743 : :
744 : : const sal_Unicode NORTHERN_THAI1[] = { 0x0E01, 0x0E38, 0x0E4A, 0x0E2B, 0x0E25, 0x0E32, 0x0E1A };
745 : : rtl::OUString aTest(NORTHERN_THAI1, SAL_N_ELEMENTS(NORTHERN_THAI1));
746 : : i18n::Boundary aBounds = m_xBreak->getWordBoundary(aTest, 0, aLocale,
747 : : i18n::WordType::DICTIONARY_WORD, true);
748 : : CPPUNIT_ASSERT_MESSAGE("Should skip full word",
749 : : aBounds.startPos == 0 && aBounds.endPos == aTest.getLength());
750 : : }
751 : : #endif
752 : :
753 : : #if (U_ICU_VERSION_MAJOR_NUM > 4)
754 : : //A test to ensure that our khmer word boundary detection is useful
755 : : //https://bugs.freedesktop.org/show_bug.cgi?id=52020
756 : : //
757 : : //icu doesn't have the Khmer word boundary dictionaries in <= 4.0.0 but does in
758 : : //the current 49.x.y . Not sure which version first had them introduced.
759 : : void TestBreakIterator::testKhmer()
760 : : {
761 : : lang::Locale aLocale;
762 : : aLocale.Language = rtl::OUString(RTL_CONSTASCII_USTRINGPARAM("km"));
763 : : aLocale.Country = rtl::OUString(RTL_CONSTASCII_USTRINGPARAM("KH"));
764 : :
765 : : const sal_Unicode KHMER1[] = { 0x17B2, 0x17D2, 0x1799, 0x1782, 0x17C1 };
766 : :
767 : : rtl::OUString aTest(KHMER1, SAL_N_ELEMENTS(KHMER1));
768 : : i18n::Boundary aBounds = m_xBreak->getWordBoundary(aTest, 0, aLocale,
769 : : i18n::WordType::DICTIONARY_WORD, true);
770 : :
771 : : CPPUNIT_ASSERT(aBounds.startPos == 0 && aBounds.endPos == 3);
772 : :
773 : : aBounds = m_xBreak->getWordBoundary(aTest, aBounds.endPos, aLocale,
774 : : i18n::WordType::DICTIONARY_WORD, true);
775 : :
776 : : CPPUNIT_ASSERT(aBounds.startPos == 3 && aBounds.endPos == 5);
777 : : }
778 : : #endif
779 : :
780 : 15 : void TestBreakIterator::setUp()
781 : : {
782 : 15 : BootstrapFixtureBase::setUp();
783 : 15 : m_xBreak = uno::Reference< i18n::XBreakIterator >(m_xSFactory->createInstance(
784 [ + - ][ + - ]: 15 : "com.sun.star.i18n.BreakIterator"), uno::UNO_QUERY_THROW);
[ + - ]
785 : 15 : }
786 : :
787 : 15 : void TestBreakIterator::tearDown()
788 : : {
789 : 15 : BootstrapFixtureBase::tearDown();
790 : 15 : m_xBreak.clear();
791 : 15 : }
792 : :
793 : 3 : CPPUNIT_TEST_SUITE_REGISTRATION(TestBreakIterator);
794 : :
795 [ + - ][ + - ]: 12 : CPPUNIT_PLUGIN_IMPLEMENT();
[ + - ][ + - ]
[ + - ][ # # ]
796 : :
797 : : /* vim:set shiftwidth=4 softtabstop=4 expandtab: */
|