Line data Source code
1 : /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 : /*
3 : * This file is part of the LibreOffice project.
4 : *
5 : * This Source Code Form is subject to the terms of the Mozilla Public
6 : * License, v. 2.0. If a copy of the MPL was not distributed with this
7 : * file, You can obtain one at http://mozilla.org/MPL/2.0/.
8 : *
9 : * This file incorporates work covered by the following license notice:
10 : *
11 : * Licensed to the Apache Software Foundation (ASF) under one or more
12 : * contributor license agreements. See the NOTICE file distributed
13 : * with this work for additional information regarding copyright
14 : * ownership. The ASF licenses this file to you under the Apache
15 : * License, Version 2.0 (the "License"); you may not use this file
16 : * except in compliance with the License. You may obtain a copy of
17 : * the License at http://www.apache.org/licenses/LICENSE-2.0 .
18 : */
19 :
20 : #include "breakit.hxx"
21 : #include "swtypes.hxx"
22 :
23 : #include <com/sun/star/i18n/ScriptType.hpp>
24 : #include <com/sun/star/i18n/CharacterIteratorMode.hpp>
25 : #include <com/sun/star/i18n/BreakIterator.hpp>
26 : #include <com/sun/star/lang/XMultiServiceFactory.hpp>
27 : #include <editeng/unolingu.hxx>
28 : #include <editeng/scripttypeitem.hxx>
29 : #include <unicode/uchar.h>
30 : #include <unotools/localedatawrapper.hxx>
31 : #include <comphelper/processfactory.hxx>
32 : #include <osl/diagnose.h>
33 : #include <algorithm>
34 :
35 : using namespace com::sun::star;
36 :
37 : SwBreakIt* g_pBreakIt = 0;
38 :
39 59 : void SwBreakIt::_Create( const uno::Reference<uno::XComponentContext> & rxContext )
40 : {
41 59 : delete g_pBreakIt, g_pBreakIt = new SwBreakIt( rxContext );
42 59 : }
43 :
44 59 : void SwBreakIt::_Delete()
45 : {
46 59 : delete g_pBreakIt, g_pBreakIt = 0;
47 59 : }
48 :
49 50 : SwBreakIt * SwBreakIt::Get()
50 : {
51 50 : return g_pBreakIt;
52 : }
53 :
54 59 : SwBreakIt::SwBreakIt( const uno::Reference<uno::XComponentContext> & rxContext )
55 : : m_xContext( rxContext ),
56 : m_pLanguageTag( NULL ),
57 : m_pForbidden( NULL ),
58 59 : aForbiddenLang( LANGUAGE_DONTKNOW )
59 : {
60 : OSL_ENSURE( m_xContext.is(), "SwBreakIt: no MultiServiceFactory" );
61 59 : }
62 :
63 118 : SwBreakIt::~SwBreakIt()
64 : {
65 59 : delete m_pLanguageTag;
66 59 : delete m_pForbidden;
67 59 : }
68 :
69 494782182 : void SwBreakIt::createBreakIterator() const
70 : {
71 494782182 : if ( m_xContext.is() && !xBreak.is() )
72 53 : xBreak.set( i18n::BreakIterator::create(m_xContext) );
73 494782182 : }
74 :
75 2936 : void SwBreakIt::_GetLocale( const LanguageType aLang )
76 : {
77 2936 : if (m_pLanguageTag)
78 2901 : m_pLanguageTag->reset( aLang );
79 : else
80 35 : m_pLanguageTag = new LanguageTag( aLang );
81 2936 : }
82 :
83 24 : void SwBreakIt::_GetLocale( const LanguageTag& rLanguageTag )
84 : {
85 24 : if (m_pLanguageTag)
86 9 : *m_pLanguageTag = rLanguageTag;
87 : else
88 15 : m_pLanguageTag = new LanguageTag( rLanguageTag );
89 24 : }
90 :
91 205 : void SwBreakIt::_GetForbidden( const LanguageType aLang )
92 : {
93 205 : LocaleDataWrapper aWrap( m_xContext, GetLanguageTag( aLang ) );
94 :
95 205 : aForbiddenLang = aLang;
96 205 : delete m_pForbidden;
97 205 : m_pForbidden = new i18n::ForbiddenCharacters( aWrap.getForbiddenCharacters() );
98 205 : }
99 :
100 97176902 : sal_uInt16 SwBreakIt::GetRealScriptOfText( const OUString& rText, sal_Int32 nPos ) const
101 : {
102 97176902 : createBreakIterator();
103 97176902 : sal_uInt16 nScript = i18n::ScriptType::WEAK;
104 97176902 : if( xBreak.is() && !rText.isEmpty() )
105 : {
106 97141320 : if( nPos && nPos == rText.getLength() )
107 22904 : --nPos;
108 97118416 : else if( nPos < 0)
109 0 : nPos = 0;
110 :
111 97141320 : nScript = xBreak->getScriptType( rText, nPos );
112 97141320 : sal_Int32 nChgPos = 0;
113 97141320 : if (i18n::ScriptType::WEAK == nScript && nPos >= 0 && nPos + 1 < rText.getLength())
114 : {
115 : // A weak character followed by a mark may be meant to combine with
116 : // the mark, so prefer the following character's script
117 37102290 : switch (u_charType(rText[nPos + 1]))
118 : {
119 : case U_NON_SPACING_MARK:
120 : case U_ENCLOSING_MARK:
121 : case U_COMBINING_SPACING_MARK:
122 0 : nScript = xBreak->getScriptType( rText, nPos+1 );
123 0 : break;
124 : }
125 : }
126 134247337 : if( i18n::ScriptType::WEAK == nScript &&
127 133965745 : nPos &&
128 36824425 : 0 < ( nChgPos = xBreak->beginOfScript( rText, nPos, nScript ) ) )
129 : {
130 36823599 : nScript = xBreak->getScriptType( rText, nChgPos-1 );
131 : }
132 :
133 97423738 : if( i18n::ScriptType::WEAK == nScript &&
134 97421121 : rText.getLength() > ( nChgPos = xBreak->endOfScript( rText, nPos, nScript ) ) &&
135 : 0 <= nChgPos )
136 : {
137 279801 : nScript = xBreak->getScriptType( rText, nChgPos );
138 : }
139 : }
140 97176902 : if( i18n::ScriptType::WEAK == nScript )
141 38199 : nScript = SvtLanguageOptions::GetI18NScriptTypeOfLanguage( GetAppLanguage() );
142 97176902 : return nScript;
143 : }
144 :
145 67 : SvtScriptType SwBreakIt::GetAllScriptsOfText( const OUString& rText ) const
146 : {
147 : const SvtScriptType coAllScripts = ( SvtScriptType::LATIN |
148 134 : SvtScriptType::ASIAN |
149 134 : SvtScriptType::COMPLEX );
150 67 : createBreakIterator();
151 67 : SvtScriptType nRet = SvtScriptType::NONE;
152 67 : sal_uInt16 nScript = 0;
153 67 : if( !xBreak.is() )
154 : {
155 0 : nRet = coAllScripts;
156 : }
157 67 : else if( !rText.isEmpty() )
158 : {
159 116 : for( sal_Int32 n = 0, nEnd = rText.getLength(); n < nEnd;
160 58 : n = xBreak->endOfScript(rText, n, nScript) )
161 : {
162 60 : switch( nScript = xBreak->getScriptType( rText, n ) )
163 : {
164 57 : case i18n::ScriptType::LATIN: nRet |= SvtScriptType::LATIN; break;
165 0 : case i18n::ScriptType::ASIAN: nRet |= SvtScriptType::ASIAN; break;
166 1 : case i18n::ScriptType::COMPLEX: nRet |= SvtScriptType::COMPLEX; break;
167 : case i18n::ScriptType::WEAK:
168 2 : if( nRet == SvtScriptType::NONE )
169 2 : nRet |= coAllScripts;
170 2 : break;
171 : }
172 60 : if( coAllScripts == nRet )
173 2 : break;
174 : }
175 : }
176 67 : return nRet;
177 : }
178 :
179 263401 : sal_Int32 SwBreakIt::getGraphemeCount(const OUString& rText,
180 : sal_Int32 nStart, sal_Int32 nEnd) const
181 : {
182 263401 : sal_Int32 nGraphemeCount = 0;
183 :
184 263401 : sal_Int32 nCurPos = std::max(static_cast<sal_Int32>(0), nStart);
185 9268107 : while (nCurPos < nEnd)
186 : {
187 : // fdo#49208 cheat and assume that nothing can combine with a space
188 : // to form a single grapheme
189 8741305 : if (rText[nCurPos] == ' ')
190 : {
191 255179 : ++nCurPos;
192 : }
193 : else
194 : {
195 8486126 : sal_Int32 nCount2 = 1;
196 8486126 : nCurPos = xBreak->nextCharacters(rText, nCurPos, lang::Locale(),
197 8486126 : i18n::CharacterIteratorMode::SKIPCELL, nCount2, nCount2);
198 : }
199 8741305 : ++nGraphemeCount;
200 : }
201 :
202 263401 : return nGraphemeCount;
203 : }
204 :
205 : /* vim:set shiftwidth=4 softtabstop=4 expandtab: */
|