Line data Source code
1 : /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 : /*
3 : * This file is part of the LibreOffice project.
4 : *
5 : * This Source Code Form is subject to the terms of the Mozilla Public
6 : * License, v. 2.0. If a copy of the MPL was not distributed with this
7 : * file, You can obtain one at http://mozilla.org/MPL/2.0/.
8 : *
9 : * This file incorporates work covered by the following license notice:
10 : *
11 : * Licensed to the Apache Software Foundation (ASF) under one or more
12 : * contributor license agreements. See the NOTICE file distributed
13 : * with this work for additional information regarding copyright
14 : * ownership. The ASF licenses this file to you under the Apache
15 : * License, Version 2.0 (the "License"); you may not use this file
16 : * except in compliance with the License. You may obtain a copy of
17 : * the License at http://www.apache.org/licenses/LICENSE-2.0 .
18 : */
19 :
20 : /**
21 : *
22 : *
23 : *
24 : *
25 : * TODO
26 : * - Add exception throwing when h == NULL
27 : * - Not init h when implicit constructor is launched
28 : */
29 :
30 : #include <string.h>
31 : #include <sstream>
32 : #include <iostream>
33 :
34 : #ifdef SYSTEM_LIBEXTTEXTCAT
35 : #include <libexttextcat/textcat.h>
36 : #include <libexttextcat/common.h>
37 : #include <libexttextcat/constants.h>
38 : #include <libexttextcat/fingerprint.h>
39 : #include <libexttextcat/utf8misc.h>
40 : #else
41 : #include <textcat.h>
42 : #include <common.h>
43 : #include <constants.h>
44 : #include <fingerprint.h>
45 : #include <utf8misc.h>
46 : #endif
47 :
48 : #include <sal/types.h>
49 :
50 : #include "altstrfunc.hxx"
51 : #include "simpleguesser.hxx"
52 :
53 : using namespace std;
54 :
55 : /**
56 : * This 3 following structures are from fingerprint.c and textcat.c
57 : */
58 : typedef struct ngram_t {
59 :
60 : sint2 rank;
61 : char str[MAXNGRAMSIZE+1];
62 :
63 : } ngram_t;
64 :
65 : typedef struct fp_t {
66 :
67 : const char *name;
68 : ngram_t *fprint;
69 : uint4 size;
70 :
71 : } fp_t;
72 :
73 : typedef struct textcat_t{
74 :
75 : void **fprint;
76 : char *fprint_disable;
77 : uint4 size;
78 : uint4 maxsize;
79 :
80 : char output[MAXOUTPUTSIZE];
81 :
82 : } textcat_t;
83 : // end of the 3 structs
84 :
85 0 : SimpleGuesser::SimpleGuesser()
86 : {
87 0 : h = NULL;
88 0 : }
89 :
90 0 : SimpleGuesser& SimpleGuesser::operator=(const SimpleGuesser& sg){
91 : // Check for self-assignment!
92 0 : if (this == &sg) // Same object?
93 0 : return *this; // Yes, so skip assignment, and just return *this.
94 :
95 0 : if(h){textcat_Done(h);}
96 0 : h = sg.h;
97 0 : return *this;
98 : }
99 :
100 0 : SimpleGuesser::~SimpleGuesser()
101 : {
102 0 : if(h){textcat_Done(h);}
103 0 : }
104 :
105 : /*!
106 : \fn SimpleGuesser::GuessLanguage(char* text)
107 : */
108 0 : vector<Guess> SimpleGuesser::GuessLanguage(const char* text)
109 : {
110 0 : vector<Guess> guesses;
111 :
112 0 : if (!h)
113 0 : return guesses;
114 :
115 0 : int len = strlen(text);
116 :
117 0 : if (len > MAX_STRING_LENGTH_TO_ANALYSE)
118 0 : len = MAX_STRING_LENGTH_TO_ANALYSE;
119 :
120 0 : const char *guess_list = textcat_Classify(h, text, len);
121 :
122 : // FIXME just a temporary check until new version with renamed macros deployed
123 : #if EXTTEXTCAT_VERSION_MAJOR > 3 || (EXTTEXTCAT_VERSION_MAJOR == 3 && (EXTTEXTCAT_VERSION_MINOR > 4 || (EXTTEXTCAT_VERSION_MINOR == 4 && (EXTTEXTCAT_VERSION_MICRO >= 1))))
124 0 : if (strcmp(guess_list, TEXTCAT_RESULT_SHORT_STR) == 0)
125 : #else
126 : if (strcmp(guess_list, _TEXTCAT_RESULT_SHORT) == 0)
127 : #endif
128 0 : return guesses;
129 :
130 0 : int current_pointer = 0;
131 :
132 0 : for(int i = 0; guess_list[current_pointer] != '\0'; i++)
133 : {
134 0 : while (guess_list[current_pointer] != GUESS_SEPARATOR_OPEN && guess_list[current_pointer] != '\0')
135 0 : current_pointer++;
136 0 : if(guess_list[current_pointer] != '\0')
137 : {
138 0 : Guess g(guess_list + current_pointer);
139 :
140 0 : guesses.push_back(g);
141 :
142 0 : current_pointer++;
143 : }
144 : }
145 :
146 0 : return guesses;
147 : }
148 :
149 0 : Guess SimpleGuesser::GuessPrimaryLanguage(const char* text)
150 : {
151 0 : vector<Guess> ret = GuessLanguage(text);
152 0 : return ret.empty() ? Guess() : ret[0];
153 : }
154 : /**
155 : * Is used to know which language is available, unavailable or both
156 : * when mask = 0xF0, return only Available
157 : * when mask = 0x0F, return only Unavailable
158 : * when mask = 0xFF, return both Available and Unavailable
159 : */
160 0 : vector<Guess> SimpleGuesser::GetManagedLanguages(const char mask)
161 : {
162 0 : textcat_t *tables = (textcat_t*)h;
163 :
164 0 : vector<Guess> lang;
165 0 : if(!h){return lang;}
166 :
167 0 : for (size_t i=0; i<tables->size; ++i)
168 : {
169 0 : if (tables->fprint_disable[i] & mask)
170 : {
171 0 : string langStr = "[";
172 0 : langStr += fp_Name(tables->fprint[i]);
173 0 : Guess g(langStr.c_str());
174 0 : lang.push_back(g);
175 : }
176 : }
177 :
178 0 : return lang;
179 : }
180 :
181 0 : vector<Guess> SimpleGuesser::GetAvailableLanguages()
182 : {
183 0 : return GetManagedLanguages( sal::static_int_cast< char >( 0xF0 ) );
184 : }
185 :
186 0 : vector<Guess> SimpleGuesser::GetUnavailableLanguages()
187 : {
188 0 : return GetManagedLanguages( sal::static_int_cast< char >( 0x0F ));
189 : }
190 :
191 0 : vector<Guess> SimpleGuesser::GetAllManagedLanguages()
192 : {
193 0 : return GetManagedLanguages( sal::static_int_cast< char >( 0xFF ));
194 : }
195 :
196 0 : void SimpleGuesser::XableLanguage(string lang, char mask)
197 : {
198 0 : textcat_t *tables = (textcat_t*)h;
199 :
200 0 : if(!h){return;}
201 :
202 0 : for (size_t i=0; i<tables->size; i++)
203 : {
204 0 : string language(fp_Name(tables->fprint[i]));
205 0 : if (start(language,lang) == 0)
206 0 : tables->fprint_disable[i] = mask;
207 0 : }
208 : }
209 :
210 0 : void SimpleGuesser::EnableLanguage(string lang)
211 : {
212 0 : XableLanguage(lang, sal::static_int_cast< char >( 0xF0 ));
213 0 : }
214 :
215 0 : void SimpleGuesser::DisableLanguage(string lang)
216 : {
217 0 : XableLanguage(lang, sal::static_int_cast< char >( 0x0F ));
218 0 : }
219 :
220 0 : void SimpleGuesser::SetDBPath(const char* path, const char* prefix)
221 : {
222 0 : if (h)
223 0 : textcat_Done(h);
224 0 : h = special_textcat_Init(path, prefix);
225 0 : }
226 :
227 : /* vim:set shiftwidth=4 softtabstop=4 expandtab: */
|