Line data Source code
1 : /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 : /*
3 : * This file is part of the LibreOffice project.
4 : *
5 : * This Source Code Form is subject to the terms of the Mozilla Public
6 : * License, v. 2.0. If a copy of the MPL was not distributed with this
7 : * file, You can obtain one at http://mozilla.org/MPL/2.0/.
8 : *
9 : * This file incorporates work covered by the following license notice:
10 : *
11 : * Licensed to the Apache Software Foundation (ASF) under one or more
12 : * contributor license agreements. See the NOTICE file distributed
13 : * with this work for additional information regarding copyright
14 : * ownership. The ASF licenses this file to you under the Apache
15 : * License, Version 2.0 (the "License"); you may not use this file
16 : * except in compliance with the License. You may obtain a copy of
17 : * the License at http://www.apache.org/licenses/LICENSE-2.0 .
18 : */
19 :
20 : #include "breakit.hxx"
21 : #include "swtypes.hxx"
22 :
23 : #include <com/sun/star/i18n/ScriptType.hpp>
24 : #include <com/sun/star/i18n/CharacterIteratorMode.hpp>
25 : #include <com/sun/star/i18n/BreakIterator.hpp>
26 : #include <com/sun/star/lang/XMultiServiceFactory.hpp>
27 : #include <editeng/unolingu.hxx>
28 : #include <editeng/scripttypeitem.hxx>
29 : #include <unicode/uchar.h>
30 : #include <unotools/localedatawrapper.hxx>
31 : #include <comphelper/processfactory.hxx>
32 :
33 : using namespace com::sun::star;
34 :
35 : SwBreakIt* pBreakIt = 0;
36 :
37 10 : void SwBreakIt::_Create( const uno::Reference<uno::XComponentContext> & rxContext )
38 : {
39 10 : delete pBreakIt, pBreakIt = new SwBreakIt( rxContext );
40 10 : }
41 :
42 10 : void SwBreakIt::_Delete()
43 : {
44 10 : delete pBreakIt, pBreakIt = 0;
45 10 : }
46 :
47 9 : SwBreakIt * SwBreakIt::Get()
48 : {
49 9 : return pBreakIt;
50 : }
51 :
52 10 : SwBreakIt::SwBreakIt( const uno::Reference<uno::XComponentContext> & rxContext )
53 : : m_xContext( rxContext ),
54 : m_pLanguageTag( NULL ),
55 : m_pForbidden( NULL ),
56 10 : aForbiddenLang( LANGUAGE_DONTKNOW )
57 : {
58 : OSL_ENSURE( m_xContext.is(), "SwBreakIt: no MultiServiceFactory" );
59 10 : }
60 :
61 20 : SwBreakIt::~SwBreakIt()
62 : {
63 10 : delete m_pLanguageTag;
64 10 : delete m_pForbidden;
65 10 : }
66 :
67 22486 : void SwBreakIt::createBreakIterator() const
68 : {
69 22486 : if ( m_xContext.is() && !xBreak.is() )
70 10 : xBreak.set( i18n::BreakIterator::create(m_xContext) );
71 22486 : }
72 :
73 299 : void SwBreakIt::_GetLocale( const LanguageType aLang )
74 : {
75 299 : if (m_pLanguageTag)
76 293 : m_pLanguageTag->reset( aLang );
77 : else
78 6 : m_pLanguageTag = new LanguageTag( aLang );
79 299 : }
80 :
81 4 : void SwBreakIt::_GetLocale( const LanguageTag& rLanguageTag )
82 : {
83 4 : if (m_pLanguageTag)
84 1 : *m_pLanguageTag = rLanguageTag;
85 : else
86 3 : m_pLanguageTag = new LanguageTag( rLanguageTag );
87 4 : }
88 :
89 9 : void SwBreakIt::_GetForbidden( const LanguageType aLang )
90 : {
91 9 : LocaleDataWrapper aWrap( m_xContext, GetLanguageTag( aLang ) );
92 :
93 9 : aForbiddenLang = aLang;
94 9 : delete m_pForbidden;
95 9 : m_pForbidden = new i18n::ForbiddenCharacters( aWrap.getForbiddenCharacters() );
96 9 : }
97 :
98 1966 : sal_uInt16 SwBreakIt::GetRealScriptOfText( const rtl::OUString& rTxt, sal_Int32 nPos ) const
99 : {
100 1966 : createBreakIterator();
101 1966 : sal_uInt16 nScript = i18n::ScriptType::WEAK;
102 1966 : if( xBreak.is() && !rTxt.isEmpty() )
103 : {
104 1073 : if( nPos && nPos == rTxt.getLength() )
105 155 : --nPos;
106 1073 : nScript = xBreak->getScriptType( rTxt, nPos );
107 1073 : sal_Int32 nChgPos = 0;
108 1073 : if ( i18n::ScriptType::WEAK == nScript && nPos + 1 < rTxt.getLength() )
109 : {
110 : // A weak character followed by a mark may be meant to combine with
111 : // the mark, so prefer the following character's script
112 240 : switch (u_charType(rTxt[nPos + 1]))
113 : {
114 : case U_NON_SPACING_MARK:
115 : case U_ENCLOSING_MARK:
116 : case U_COMBINING_SPACING_MARK:
117 0 : nScript = xBreak->getScriptType( rTxt, nPos+1 );
118 0 : break;
119 : }
120 : }
121 1315 : if( i18n::ScriptType::WEAK == nScript &&
122 : nPos &&
123 242 : 0 < ( nChgPos = xBreak->beginOfScript( rTxt, nPos, nScript ) ) )
124 : {
125 22 : nScript = xBreak->getScriptType( rTxt, nChgPos-1 );
126 : }
127 :
128 1493 : if( i18n::ScriptType::WEAK == nScript &&
129 420 : rTxt.getLength() > ( nChgPos = xBreak->endOfScript( rTxt, nPos, nScript ) ) &&
130 : 0 <= nChgPos )
131 : {
132 3 : nScript = xBreak->getScriptType( rTxt, nChgPos );
133 : }
134 : }
135 1966 : if( i18n::ScriptType::WEAK == nScript )
136 1310 : nScript = GetI18NScriptTypeOfLanguage( (sal_uInt16)GetAppLanguage() );
137 1966 : return nScript;
138 : }
139 :
140 64 : sal_uInt16 SwBreakIt::GetAllScriptsOfText( const rtl::OUString& rTxt ) const
141 : {
142 : const sal_uInt16 coAllScripts = ( SCRIPTTYPE_LATIN |
143 : SCRIPTTYPE_ASIAN |
144 64 : SCRIPTTYPE_COMPLEX );
145 64 : createBreakIterator();
146 64 : sal_uInt16 nRet = 0, nScript;
147 64 : if( !xBreak.is() )
148 : {
149 0 : nRet = coAllScripts;
150 : }
151 64 : else if( !rTxt.isEmpty() )
152 : {
153 109 : for( sal_Int32 n = 0, nEnd = rTxt.getLength(); n < nEnd;
154 54 : n = xBreak->endOfScript(rTxt, n, nScript) )
155 : {
156 55 : switch( nScript = xBreak->getScriptType( rTxt, n ) )
157 : {
158 54 : case i18n::ScriptType::LATIN: nRet |= SCRIPTTYPE_LATIN; break;
159 0 : case i18n::ScriptType::ASIAN: nRet |= SCRIPTTYPE_ASIAN; break;
160 0 : case i18n::ScriptType::COMPLEX: nRet |= SCRIPTTYPE_COMPLEX; break;
161 : case i18n::ScriptType::WEAK:
162 1 : if( !nRet )
163 1 : nRet |= coAllScripts;
164 1 : break;
165 : }
166 55 : if( coAllScripts == nRet )
167 1 : break;
168 : }
169 : }
170 64 : return nRet;
171 : }
172 :
173 899 : sal_Int32 SwBreakIt::getGraphemeCount(const rtl::OUString& rText,
174 : sal_Int32 nStart, sal_Int32 nEnd) const
175 : {
176 899 : sal_Int32 nGraphemeCount = 0;
177 :
178 899 : sal_Int32 nCurPos = nStart;
179 11464 : while (nCurPos < nEnd)
180 : {
181 : // fdo#49208 cheat and assume that nothing can combine with a space
182 : // to form a single grapheme
183 9666 : if (rText[nCurPos] == ' ')
184 : {
185 462 : ++nCurPos;
186 : }
187 : else
188 : {
189 9204 : sal_Int32 nCount2 = 1;
190 9204 : nCurPos = xBreak->nextCharacters(rText, nCurPos, lang::Locale(),
191 9204 : i18n::CharacterIteratorMode::SKIPCELL, nCount2, nCount2);
192 : }
193 9666 : ++nGraphemeCount;
194 : }
195 :
196 899 : return nGraphemeCount;
197 : }
198 :
199 : /* vim:set shiftwidth=4 softtabstop=4 expandtab: */
|