Line data Source code
1 : /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 : /*
3 : * This file is part of the LibreOffice project.
4 : *
5 : * This Source Code Form is subject to the terms of the Mozilla Public
6 : * License, v. 2.0. If a copy of the MPL was not distributed with this
7 : * file, You can obtain one at http://mozilla.org/MPL/2.0/.
8 : *
9 : * This file incorporates work covered by the following license notice:
10 : *
11 : * Licensed to the Apache Software Foundation (ASF) under one or more
12 : * contributor license agreements. See the NOTICE file distributed
13 : * with this work for additional information regarding copyright
14 : * ownership. The ASF licenses this file to you under the Apache
15 : * License, Version 2.0 (the "License"); you may not use this file
16 : * except in compliance with the License. You may obtain a copy of
17 : * the License at http://www.apache.org/licenses/LICENSE-2.0 .
18 : */
19 :
20 : #include "breakit.hxx"
21 : #include "swtypes.hxx"
22 :
23 : #include <com/sun/star/i18n/ScriptType.hpp>
24 : #include <com/sun/star/i18n/CharacterIteratorMode.hpp>
25 : #include <com/sun/star/i18n/BreakIterator.hpp>
26 : #include <com/sun/star/lang/XMultiServiceFactory.hpp>
27 : #include <editeng/unolingu.hxx>
28 : #include <editeng/scripttypeitem.hxx>
29 : #include <unicode/uchar.h>
30 : #include <unotools/localedatawrapper.hxx>
31 : #include <comphelper/processfactory.hxx>
32 :
33 : using namespace com::sun::star;
34 :
35 : SwBreakIt* g_pBreakIt = 0;
36 :
37 90 : void SwBreakIt::_Create( const uno::Reference<uno::XComponentContext> & rxContext )
38 : {
39 90 : delete g_pBreakIt, g_pBreakIt = new SwBreakIt( rxContext );
40 90 : }
41 :
42 90 : void SwBreakIt::_Delete()
43 : {
44 90 : delete g_pBreakIt, g_pBreakIt = 0;
45 90 : }
46 :
47 70 : SwBreakIt * SwBreakIt::Get()
48 : {
49 70 : return g_pBreakIt;
50 : }
51 :
52 90 : SwBreakIt::SwBreakIt( const uno::Reference<uno::XComponentContext> & rxContext )
53 : : m_xContext( rxContext ),
54 : m_pLanguageTag( NULL ),
55 : m_pForbidden( NULL ),
56 90 : aForbiddenLang( LANGUAGE_DONTKNOW )
57 : {
58 : OSL_ENSURE( m_xContext.is(), "SwBreakIt: no MultiServiceFactory" );
59 90 : }
60 :
61 180 : SwBreakIt::~SwBreakIt()
62 : {
63 90 : delete m_pLanguageTag;
64 90 : delete m_pForbidden;
65 90 : }
66 :
67 1846895 : void SwBreakIt::createBreakIterator() const
68 : {
69 1846895 : if ( m_xContext.is() && !xBreak.is() )
70 78 : xBreak.set( i18n::BreakIterator::create(m_xContext) );
71 1846895 : }
72 :
73 5428 : void SwBreakIt::_GetLocale( const LanguageType aLang )
74 : {
75 5428 : if (m_pLanguageTag)
76 5376 : m_pLanguageTag->reset( aLang );
77 : else
78 52 : m_pLanguageTag = new LanguageTag( aLang );
79 5428 : }
80 :
81 32 : void SwBreakIt::_GetLocale( const LanguageTag& rLanguageTag )
82 : {
83 32 : if (m_pLanguageTag)
84 12 : *m_pLanguageTag = rLanguageTag;
85 : else
86 20 : m_pLanguageTag = new LanguageTag( rLanguageTag );
87 32 : }
88 :
89 342 : void SwBreakIt::_GetForbidden( const LanguageType aLang )
90 : {
91 342 : LocaleDataWrapper aWrap( m_xContext, GetLanguageTag( aLang ) );
92 :
93 342 : aForbiddenLang = aLang;
94 342 : delete m_pForbidden;
95 342 : m_pForbidden = new i18n::ForbiddenCharacters( aWrap.getForbiddenCharacters() );
96 342 : }
97 :
98 271043 : sal_uInt16 SwBreakIt::GetRealScriptOfText( const OUString& rTxt, sal_Int32 nPos ) const
99 : {
100 271043 : createBreakIterator();
101 271043 : sal_uInt16 nScript = i18n::ScriptType::WEAK;
102 271043 : if( xBreak.is() && !rTxt.isEmpty() )
103 : {
104 208517 : if( nPos && nPos == rTxt.getLength() )
105 42672 : --nPos;
106 165845 : else if( nPos < 0)
107 0 : nPos = 0;
108 :
109 208517 : nScript = xBreak->getScriptType( rTxt, nPos );
110 208517 : sal_Int32 nChgPos = 0;
111 208517 : if (i18n::ScriptType::WEAK == nScript && nPos >= 0 && nPos + 1 < rTxt.getLength())
112 : {
113 : // A weak character followed by a mark may be meant to combine with
114 : // the mark, so prefer the following character's script
115 12797 : switch (u_charType(rTxt[nPos + 1]))
116 : {
117 : case U_NON_SPACING_MARK:
118 : case U_ENCLOSING_MARK:
119 : case U_COMBINING_SPACING_MARK:
120 0 : nScript = xBreak->getScriptType( rTxt, nPos+1 );
121 0 : break;
122 : }
123 : }
124 227508 : if( i18n::ScriptType::WEAK == nScript &&
125 224737 : nPos &&
126 16220 : 0 < ( nChgPos = xBreak->beginOfScript( rTxt, nPos, nScript ) ) )
127 : {
128 15160 : nScript = xBreak->getScriptType( rTxt, nChgPos-1 );
129 : }
130 :
131 212348 : if( i18n::ScriptType::WEAK == nScript &&
132 208767 : rTxt.getLength() > ( nChgPos = xBreak->endOfScript( rTxt, nPos, nScript ) ) &&
133 : 0 <= nChgPos )
134 : {
135 250 : nScript = xBreak->getScriptType( rTxt, nChgPos );
136 : }
137 : }
138 271043 : if( i18n::ScriptType::WEAK == nScript )
139 66107 : nScript = GetI18NScriptTypeOfLanguage( (sal_uInt16)GetAppLanguage() );
140 271043 : return nScript;
141 : }
142 :
143 128 : sal_uInt16 SwBreakIt::GetAllScriptsOfText( const OUString& rTxt ) const
144 : {
145 : const sal_uInt16 coAllScripts = ( SCRIPTTYPE_LATIN |
146 : SCRIPTTYPE_ASIAN |
147 128 : SCRIPTTYPE_COMPLEX );
148 128 : createBreakIterator();
149 128 : sal_uInt16 nRet = 0, nScript;
150 128 : if( !xBreak.is() )
151 : {
152 0 : nRet = coAllScripts;
153 : }
154 128 : else if( !rTxt.isEmpty() )
155 : {
156 218 : for( sal_Int32 n = 0, nEnd = rTxt.getLength(); n < nEnd;
157 108 : n = xBreak->endOfScript(rTxt, n, nScript) )
158 : {
159 110 : switch( nScript = xBreak->getScriptType( rTxt, n ) )
160 : {
161 108 : case i18n::ScriptType::LATIN: nRet |= SCRIPTTYPE_LATIN; break;
162 0 : case i18n::ScriptType::ASIAN: nRet |= SCRIPTTYPE_ASIAN; break;
163 0 : case i18n::ScriptType::COMPLEX: nRet |= SCRIPTTYPE_COMPLEX; break;
164 : case i18n::ScriptType::WEAK:
165 2 : if( !nRet )
166 2 : nRet |= coAllScripts;
167 2 : break;
168 : }
169 110 : if( coAllScripts == nRet )
170 2 : break;
171 : }
172 : }
173 128 : return nRet;
174 : }
175 :
176 13926 : sal_Int32 SwBreakIt::getGraphemeCount(const OUString& rText,
177 : sal_Int32 nStart, sal_Int32 nEnd) const
178 : {
179 13926 : sal_Int32 nGraphemeCount = 0;
180 :
181 13926 : sal_Int32 nCurPos = std::max(static_cast<sal_Int32>(0), nStart);
182 355338 : while (nCurPos < nEnd)
183 : {
184 : // fdo#49208 cheat and assume that nothing can combine with a space
185 : // to form a single grapheme
186 327486 : if (rText[nCurPos] == ' ')
187 : {
188 11972 : ++nCurPos;
189 : }
190 : else
191 : {
192 315514 : sal_Int32 nCount2 = 1;
193 315514 : nCurPos = xBreak->nextCharacters(rText, nCurPos, lang::Locale(),
194 315514 : i18n::CharacterIteratorMode::SKIPCELL, nCount2, nCount2);
195 : }
196 327486 : ++nGraphemeCount;
197 : }
198 :
199 13926 : return nGraphemeCount;
200 : }
201 :
202 : /* vim:set shiftwidth=4 softtabstop=4 expandtab: */
|