Line data Source code
1 : /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 : /*
3 : * This file is part of the LibreOffice project.
4 : *
5 : * This Source Code Form is subject to the terms of the Mozilla Public
6 : * License, v. 2.0. If a copy of the MPL was not distributed with this
7 : * file, You can obtain one at http://mozilla.org/MPL/2.0/.
8 : *
9 : * This file incorporates work covered by the following license notice:
10 : *
11 : * Licensed to the Apache Software Foundation (ASF) under one or more
12 : * contributor license agreements. See the NOTICE file distributed
13 : * with this work for additional information regarding copyright
14 : * ownership. The ASF licenses this file to you under the Apache
15 : * License, Version 2.0 (the "License"); you may not use this file
16 : * except in compliance with the License. You may obtain a copy of
17 : * the License at http://www.apache.org/licenses/LICENSE-2.0 .
18 : */
19 :
20 : #if defined(WNT)
21 : #include <windows.h>
22 : #endif
23 :
24 : #include <osl/thread.h>
25 : #include <osl/file.hxx>
26 : #include <tools/debug.hxx>
27 : #include <tools/urlobj.hxx>
28 : #include <i18nlangtag/languagetag.hxx>
29 : #include <i18nlangtag/mslangid.hxx>
30 : #include <unotools/lingucfg.hxx>
31 : #include <unotools/pathoptions.hxx>
32 : #include <rtl/ustring.hxx>
33 : #include <rtl/string.hxx>
34 : #include <rtl/tencinfo.h>
35 : #include <linguistic/misc.hxx>
36 :
37 : #include <set>
38 : #include <vector>
39 : #include <string.h>
40 :
41 : #include <lingutil.hxx>
42 :
43 : #include <sal/macros.h>
44 :
45 : using ::com::sun::star::lang::Locale;
46 : using namespace ::com::sun::star;
47 :
48 : #if defined(WNT)
49 : OString Win_GetShortPathName( const OUString &rLongPathName )
50 : {
51 : OString aRes;
52 :
53 : sal_Unicode aShortBuffer[1024] = {0};
54 : sal_Int32 nShortBufSize = SAL_N_ELEMENTS( aShortBuffer );
55 :
56 : // use the version of 'GetShortPathName' that can deal with Unicode...
57 : sal_Int32 nShortLen = GetShortPathNameW(
58 : reinterpret_cast<LPCWSTR>( rLongPathName.getStr() ),
59 : reinterpret_cast<LPWSTR>( aShortBuffer ),
60 : nShortBufSize );
61 :
62 : if (nShortLen < nShortBufSize) // conversion successful?
63 : aRes = OString( OU2ENC( OUString( aShortBuffer, nShortLen ), osl_getThreadTextEncoding()) );
64 : else
65 : OSL_FAIL( "Win_GetShortPathName: buffer to short" );
66 :
67 : return aRes;
68 : }
69 : #endif //defined(WNT)
70 :
71 : // build list of old style diuctionaries (not as extensions) to use.
72 : // User installed dictionaries (the ones residing in the user paths)
73 : // will get precedence over system installed ones for the same language.
74 0 : std::vector< SvtLinguConfigDictionaryEntry > GetOldStyleDics( const char *pDicType )
75 : {
76 0 : std::vector< SvtLinguConfigDictionaryEntry > aRes;
77 :
78 0 : if (!pDicType)
79 0 : return aRes;
80 :
81 0 : OUString aFormatName;
82 0 : OUString aDicExtension;
83 : #ifdef SYSTEM_DICTS
84 0 : OUString aSystemDir;
85 0 : OUString aSystemPrefix;
86 0 : OUString aSystemSuffix;
87 : #endif
88 0 : if (strcmp( pDicType, "DICT" ) == 0)
89 : {
90 0 : aFormatName = "DICT_SPELL";
91 0 : aDicExtension = ".dic";
92 : #ifdef SYSTEM_DICTS
93 0 : aSystemDir = DICT_SYSTEM_DIR;
94 0 : aSystemSuffix = aDicExtension;
95 : #endif
96 : }
97 0 : else if (strcmp( pDicType, "HYPH" ) == 0)
98 : {
99 0 : aFormatName = "DICT_HYPH";
100 0 : aDicExtension = ".dic";
101 : #ifdef SYSTEM_DICTS
102 0 : aSystemDir = HYPH_SYSTEM_DIR;
103 0 : aSystemPrefix = "hyph_";
104 0 : aSystemSuffix = aDicExtension;
105 : #endif
106 : }
107 0 : else if (strcmp( pDicType, "THES" ) == 0)
108 : {
109 0 : aFormatName = "DICT_THES";
110 0 : aDicExtension = ".dat";
111 : #ifdef SYSTEM_DICTS
112 0 : aSystemDir = THES_SYSTEM_DIR;
113 0 : aSystemPrefix = "th_";
114 0 : aSystemSuffix = "_v2.dat";
115 : #endif
116 : }
117 :
118 0 : if (aFormatName.isEmpty() || aDicExtension.isEmpty())
119 0 : return aRes;
120 :
121 : #ifdef SYSTEM_DICTS
122 0 : osl::Directory aSystemDicts(aSystemDir);
123 0 : if (aSystemDicts.open() == osl::FileBase::E_None)
124 : {
125 : // set of languages to remember the language where it is already
126 : // decided to make use of the dictionary.
127 0 : std::set< OUString > aDicLangInUse;
128 :
129 0 : osl::DirectoryItem aItem;
130 0 : osl::FileStatus aFileStatus(osl_FileStatus_Mask_FileURL);
131 0 : while (aSystemDicts.getNextItem(aItem) == osl::FileBase::E_None)
132 : {
133 0 : aItem.getFileStatus(aFileStatus);
134 0 : OUString sPath = aFileStatus.getFileURL();
135 0 : if (sPath.endsWith(aSystemSuffix))
136 : {
137 0 : sal_Int32 nStartIndex = sPath.lastIndexOf('/') + 1;
138 0 : if (!sPath.match(aSystemPrefix, nStartIndex))
139 0 : continue;
140 0 : OUString sChunk = sPath.copy(nStartIndex + aSystemPrefix.getLength(),
141 0 : sPath.getLength() - aSystemSuffix.getLength() -
142 0 : nStartIndex - aSystemPrefix.getLength());
143 0 : if (sChunk.isEmpty())
144 0 : continue;
145 :
146 : // We prefer (now) to use language tags.
147 : // Avoid feeding in the older LANG_REGION scheme to the BCP47
148 : // ctor as that triggers use of liblangtag and initializes its
149 : // database which we do not want during startup. Convert
150 : // instead.
151 0 : sChunk = sChunk.replace( '_', '-');
152 :
153 : // There's a known exception to the rule, the dreaded
154 : // hu_HU_u8.dic of the myspell-hu package, see
155 : // http://packages.debian.org/search?arch=any&searchon=contents&keywords=hu_HU_u8.dic
156 : // This was ignored because unknown in the old implementation,
157 : // truncate to the known locale and either insert because hu_HU
158 : // wasn't encountered yet, or skip because it was. It doesn't
159 : // really matter because the proper new-style hu_HU dictionary
160 : // will take precedence anyway if installed with a Hungarian
161 : // languagepack. Again, this is only to not pull in all
162 : // liblangtag and stuff during startup, the result would be
163 : // !isValidBcp47() and the dictionary ignored.
164 0 : if (sChunk == "hu-HU-u8")
165 0 : sChunk = "hu-HU";
166 :
167 0 : LanguageTag aLangTag(sChunk, true);
168 0 : if (!aLangTag.isValidBcp47())
169 0 : continue;
170 :
171 : // Thus we first get the language of the dictionary
172 0 : OUString aLocaleName(aLangTag.getBcp47());
173 :
174 0 : if (aDicLangInUse.insert(aLocaleName).second)
175 : {
176 : // add the dictionary to the resulting vector
177 0 : SvtLinguConfigDictionaryEntry aDicEntry;
178 0 : aDicEntry.aLocations.realloc(1);
179 0 : aDicEntry.aLocaleNames.realloc(1);
180 0 : aDicEntry.aLocations[0] = sPath;
181 0 : aDicEntry.aFormatName = aFormatName;
182 0 : aDicEntry.aLocaleNames[0] = aLocaleName;
183 0 : aRes.push_back( aDicEntry );
184 0 : }
185 : }
186 0 : }
187 : }
188 : #endif
189 :
190 0 : return aRes;
191 : }
192 :
193 0 : void MergeNewStyleDicsAndOldStyleDics(
194 : std::list< SvtLinguConfigDictionaryEntry > &rNewStyleDics,
195 : const std::vector< SvtLinguConfigDictionaryEntry > &rOldStyleDics )
196 : {
197 : // get list of languages supported by new style dictionaries
198 0 : std::set< OUString > aNewStyleLanguages;
199 0 : std::list< SvtLinguConfigDictionaryEntry >::const_iterator aIt;
200 0 : for (aIt = rNewStyleDics.begin() ; aIt != rNewStyleDics.end(); ++aIt)
201 : {
202 0 : const uno::Sequence< OUString > aLocaleNames( aIt->aLocaleNames );
203 0 : sal_Int32 nLocaleNames = aLocaleNames.getLength();
204 0 : for (sal_Int32 k = 0; k < nLocaleNames; ++k)
205 : {
206 0 : aNewStyleLanguages.insert( aLocaleNames[k] );
207 : }
208 0 : }
209 :
210 : // now check all old style dictionaries if they will add a not yet
211 : // added language. If so add them to the resulting vector
212 0 : std::vector< SvtLinguConfigDictionaryEntry >::const_iterator aIt2;
213 0 : for (aIt2 = rOldStyleDics.begin(); aIt2 != rOldStyleDics.end(); ++aIt2)
214 : {
215 0 : sal_Int32 nOldStyleDics = aIt2->aLocaleNames.getLength();
216 :
217 : // old style dics should only have one language listed...
218 : DBG_ASSERT( nOldStyleDics, "old style dictionary with more then one language found!");
219 0 : if (nOldStyleDics > 0)
220 : {
221 0 : if (linguistic::LinguIsUnspecified( aIt2->aLocaleNames[0]))
222 : {
223 : OSL_FAIL( "old style dictionary with invalid language found!" );
224 0 : continue;
225 : }
226 :
227 : // language not yet added?
228 0 : if (aNewStyleLanguages.find( aIt2->aLocaleNames[0] ) == aNewStyleLanguages.end())
229 0 : rNewStyleDics.push_back( *aIt2 );
230 : }
231 : else
232 : {
233 : OSL_FAIL( "old style dictionary with no language found!" );
234 : }
235 0 : }
236 0 : }
237 :
238 0 : rtl_TextEncoding getTextEncodingFromCharset(const sal_Char* pCharset)
239 : {
240 : // default result: used to indicate that we failed to get the proper encoding
241 0 : rtl_TextEncoding eRet = RTL_TEXTENCODING_DONTKNOW;
242 :
243 0 : if (pCharset)
244 : {
245 0 : eRet = rtl_getTextEncodingFromMimeCharset(pCharset);
246 0 : if (eRet == RTL_TEXTENCODING_DONTKNOW)
247 0 : eRet = rtl_getTextEncodingFromUnixCharset(pCharset);
248 0 : if (eRet == RTL_TEXTENCODING_DONTKNOW)
249 : {
250 0 : if (strcmp("ISCII-DEVANAGARI", pCharset) == 0)
251 0 : eRet = RTL_TEXTENCODING_ISCII_DEVANAGARI;
252 : }
253 : }
254 0 : return eRet;
255 : }
256 :
257 : /* vim:set shiftwidth=4 softtabstop=4 expandtab: */
|