LCOV - code coverage report
Current view: top level - libreoffice/lingucomponent/source/languageguessing - simpleguesser.cxx (source / functions) Hit Total Coverage
Test: libreoffice_filtered.info Lines: 0 68 0.0 %
Date: 2012-12-27 Functions: 0 15 0.0 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
       2             : /*
       3             :  * This file is part of the LibreOffice project.
       4             :  *
       5             :  * This Source Code Form is subject to the terms of the Mozilla Public
       6             :  * License, v. 2.0. If a copy of the MPL was not distributed with this
       7             :  * file, You can obtain one at http://mozilla.org/MPL/2.0/.
       8             :  *
       9             :  * This file incorporates work covered by the following license notice:
      10             :  *
      11             :  *   Licensed to the Apache Software Foundation (ASF) under one or more
      12             :  *   contributor license agreements. See the NOTICE file distributed
      13             :  *   with this work for additional information regarding copyright
      14             :  *   ownership. The ASF licenses this file to you under the Apache
      15             :  *   License, Version 2.0 (the "License"); you may not use this file
      16             :  *   except in compliance with the License. You may obtain a copy of
      17             :  *   the License at http://www.apache.org/licenses/LICENSE-2.0 .
      18             :  */
      19             : 
      20             :  /**
      21             :   *
      22             :   *
      23             :   *
      24             :   *
      25             :   * TODO
      26             :   * - Add exception throwing when h == NULL
      27             :   * - Not init h when implicit constructor is launched
      28             :   */
      29             : 
      30             : 
      31             : #include <string.h>
      32             : #include <sstream>
      33             : #include <iostream>
      34             : 
      35             : #ifdef SYSTEM_LIBEXTTEXTCAT
      36             : #include <libexttextcat/textcat.h>
      37             : #include <libexttextcat/common.h>
      38             : #include <libexttextcat/constants.h>
      39             : #include <libexttextcat/fingerprint.h>
      40             : #include <libexttextcat/utf8misc.h>
      41             : #else
      42             : #include <textcat.h>
      43             : #include <common.h>
      44             : #include <constants.h>
      45             : #include <fingerprint.h>
      46             : #include <utf8misc.h>
      47             : #endif
      48             : 
      49             : #include <sal/types.h>
      50             : 
      51             : #include "altstrfunc.hxx"
      52             : #include "simpleguesser.hxx"
      53             : 
      54             : #ifndef _UTF8_
      55             : #define _UTF8_
      56             : #endif
      57             : 
      58             : 
      59             : using namespace std;
      60             : 
      61             : 
      62             : /**
      63             :  * This 3 following structures are from fingerprint.c and textcat.c
      64             :  */
      65             : 
      66             : typedef struct ngram_t {
      67             : 
      68             :     sint2 rank;
      69             :     char str[MAXNGRAMSIZE+1];
      70             : 
      71             : } ngram_t;
      72             : 
      73             : typedef struct fp_t {
      74             : 
      75             :     const char *name;
      76             :     ngram_t *fprint;
      77             :     uint4 size;
      78             : 
      79             : } fp_t;
      80             : 
      81             : typedef struct textcat_t{
      82             : 
      83             :     void **fprint;
      84             :     char *fprint_disable;
      85             :     uint4 size;
      86             :     uint4 maxsize;
      87             : 
      88             :     char output[MAXOUTPUTSIZE];
      89             : 
      90             : } textcat_t;
      91             : /** end of the 3 structs */
      92             : 
      93           0 : SimpleGuesser::SimpleGuesser()
      94             : {
      95           0 :     h = NULL;
      96           0 : }
      97             : 
      98           0 : void SimpleGuesser::operator=(SimpleGuesser& sg){
      99           0 :     if(h){textcat_Done(h);}
     100           0 :     h = sg.h;
     101           0 : }
     102             : 
     103           0 : SimpleGuesser::~SimpleGuesser()
     104             : {
     105           0 :     if(h){textcat_Done(h);}
     106           0 : }
     107             : 
     108             : 
     109             : /*!
     110             :     \fn SimpleGuesser::GuessLanguage(char* text)
     111             :  */
     112           0 : vector<Guess> SimpleGuesser::GuessLanguage(const char* text)
     113             : {
     114           0 :     vector<Guess> guesses;
     115             : 
     116           0 :     if (!h)
     117           0 :         return guesses;
     118             : 
     119           0 :     int len = strlen(text);
     120             : 
     121           0 :     if (len > MAX_STRING_LENGTH_TO_ANALYSE)
     122           0 :         len = MAX_STRING_LENGTH_TO_ANALYSE;
     123             : 
     124           0 :     const char *guess_list = textcat_Classify(h, text, len);
     125             : 
     126           0 :     if (strcmp(guess_list, _TEXTCAT_RESULT_SHORT) == 0)
     127           0 :         return guesses;
     128             : 
     129           0 :     int current_pointer = 0;
     130             : 
     131           0 :     for(int i = 0; guess_list[current_pointer] != '\0'; i++)
     132             :     {
     133           0 :         while (guess_list[current_pointer] != GUESS_SEPARATOR_OPEN && guess_list[current_pointer] != '\0')
     134           0 :             current_pointer++;
     135           0 :         if(guess_list[current_pointer] != '\0')
     136             :         {
     137           0 :             Guess g(guess_list + current_pointer);
     138             : 
     139           0 :             guesses.push_back(g);
     140             : 
     141           0 :             current_pointer++;
     142             :         }
     143             :     }
     144             : 
     145           0 :     return guesses;
     146             : }
     147             : 
     148           0 : Guess SimpleGuesser::GuessPrimaryLanguage(const char* text)
     149             : {
     150           0 :     vector<Guess> ret = GuessLanguage(text);
     151           0 :     return ret.empty() ? Guess() : ret[0];
     152             : }
     153             : /**
     154             :  * Is used to know wich language is available, unavailable or both
     155             :  * when mask = 0xF0, return only Available
     156             :  * when mask = 0x0F, return only Unavailable
     157             :  * when mask = 0xFF, return both Available and Unavailable
     158             :  */
     159           0 : vector<Guess> SimpleGuesser::GetManagedLanguages(const char mask)
     160             : {
     161           0 :     textcat_t *tables = (textcat_t*)h;
     162             : 
     163           0 :     vector<Guess> lang;
     164           0 :     if(!h){return lang;}
     165             : 
     166           0 :     for (size_t i=0; i<tables->size; ++i)
     167             :     {
     168           0 :         if (tables->fprint_disable[i] & mask)
     169             :         {
     170           0 :             string langStr = "[";
     171           0 :             langStr += fp_Name(tables->fprint[i]);
     172           0 :             Guess g(langStr.c_str());
     173           0 :             lang.push_back(g);
     174             :         }
     175             :     }
     176             : 
     177           0 :     return lang;
     178             : }
     179             : 
     180           0 : vector<Guess> SimpleGuesser::GetAvailableLanguages()
     181             : {
     182           0 :     return GetManagedLanguages( sal::static_int_cast< char >( 0xF0 ) );
     183             : }
     184             : 
     185           0 : vector<Guess> SimpleGuesser::GetUnavailableLanguages()
     186             : {
     187           0 :     return GetManagedLanguages( sal::static_int_cast< char >( 0x0F ));
     188             : }
     189             : 
     190           0 : vector<Guess> SimpleGuesser::GetAllManagedLanguages()
     191             : {
     192           0 :     return GetManagedLanguages( sal::static_int_cast< char >( 0xFF ));
     193             : }
     194             : 
     195           0 : void SimpleGuesser::XableLanguage(string lang, char mask)
     196             : {
     197           0 :     textcat_t *tables = (textcat_t*)h;
     198             : 
     199           0 :     if(!h){return;}
     200             : 
     201           0 :     for (size_t i=0; i<tables->size; i++)
     202             :     {
     203           0 :         string language(fp_Name(tables->fprint[i]));
     204           0 :         if (start(language,lang) == 0)
     205           0 :             tables->fprint_disable[i] = mask;
     206           0 :     }
     207             : }
     208             : 
     209           0 : void SimpleGuesser::EnableLanguage(string lang)
     210             : {
     211           0 :     XableLanguage(lang,  sal::static_int_cast< char >( 0xF0 ));
     212           0 : }
     213             : 
     214           0 : void SimpleGuesser::DisableLanguage(string lang)
     215             : {
     216           0 :     XableLanguage(lang,  sal::static_int_cast< char >( 0x0F ));
     217           0 : }
     218             : 
     219             : /**
     220             : *
     221             : */
     222           0 : void SimpleGuesser::SetDBPath(const char* path, const char* prefix)
     223             : {
     224           0 :     if (h)
     225           0 :         textcat_Done(h);
     226           0 :     h = special_textcat_Init(path, prefix);
     227           0 : }
     228             : 
     229             : /* vim:set shiftwidth=4 softtabstop=4 expandtab: */

Generated by: LCOV version 1.10