LCOV - code coverage report
Current view: top level - lingucomponent/source/languageguessing - simpleguesser.cxx (source / functions) Hit Total Coverage
Test: commit 0e63ca4fde4e446f346e35849c756a30ca294aab Lines: 7 70 10.0 %
Date: 2014-04-11 Functions: 4 15 26.7 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
       2             : /*
       3             :  * This file is part of the LibreOffice project.
       4             :  *
       5             :  * This Source Code Form is subject to the terms of the Mozilla Public
       6             :  * License, v. 2.0. If a copy of the MPL was not distributed with this
       7             :  * file, You can obtain one at http://mozilla.org/MPL/2.0/.
       8             :  *
       9             :  * This file incorporates work covered by the following license notice:
      10             :  *
      11             :  *   Licensed to the Apache Software Foundation (ASF) under one or more
      12             :  *   contributor license agreements. See the NOTICE file distributed
      13             :  *   with this work for additional information regarding copyright
      14             :  *   ownership. The ASF licenses this file to you under the Apache
      15             :  *   License, Version 2.0 (the "License"); you may not use this file
      16             :  *   except in compliance with the License. You may obtain a copy of
      17             :  *   the License at http://www.apache.org/licenses/LICENSE-2.0 .
      18             :  */
      19             : 
      20             :  /**
      21             :   *
      22             :   *
      23             :   *
      24             :   *
      25             :   * TODO
      26             :   * - Add exception throwing when h == NULL
      27             :   * - Not init h when implicit constructor is launched
      28             :   */
      29             : 
      30             : #include <string.h>
      31             : #include <sstream>
      32             : #include <iostream>
      33             : 
      34             : #ifdef SYSTEM_LIBEXTTEXTCAT
      35             : #include <libexttextcat/textcat.h>
      36             : #include <libexttextcat/common.h>
      37             : #include <libexttextcat/constants.h>
      38             : #include <libexttextcat/fingerprint.h>
      39             : #include <libexttextcat/utf8misc.h>
      40             : #else
      41             : #include <textcat.h>
      42             : #include <common.h>
      43             : #include <constants.h>
      44             : #include <fingerprint.h>
      45             : #include <utf8misc.h>
      46             : #endif
      47             : 
      48             : #include <sal/types.h>
      49             : 
      50             : #include "altstrfunc.hxx"
      51             : #include "simpleguesser.hxx"
      52             : 
      53             : using namespace std;
      54             : 
      55             : /**
      56             :  * This 3 following structures are from fingerprint.c and textcat.c
      57             :  */
      58             : typedef struct ngram_t {
      59             : 
      60             :     sint2 rank;
      61             :     char str[MAXNGRAMSIZE+1];
      62             : 
      63             : } ngram_t;
      64             : 
      65             : typedef struct fp_t {
      66             : 
      67             :     const char *name;
      68             :     ngram_t *fprint;
      69             :     uint4 size;
      70             : 
      71             : } fp_t;
      72             : 
      73             : typedef struct textcat_t{
      74             : 
      75             :     void **fprint;
      76             :     char *fprint_disable;
      77             :     uint4 size;
      78             :     uint4 maxsize;
      79             : 
      80             :     char output[MAXOUTPUTSIZE];
      81             : 
      82             : } textcat_t;
      83             : // end of the 3 structs
      84             : 
      85           2 : SimpleGuesser::SimpleGuesser()
      86             : {
      87           2 :     h = NULL;
      88           2 : }
      89             : 
      90           0 : SimpleGuesser& SimpleGuesser::operator=(const SimpleGuesser& sg){
      91             :     // Check for self-assignment!
      92           0 :     if (this == &sg)      // Same object?
      93           0 :       return *this;       // Yes, so skip assignment, and just return *this.
      94             : 
      95           0 :     if(h){textcat_Done(h);}
      96           0 :     h = sg.h;
      97           0 :     return *this;
      98             : }
      99             : 
     100           1 : SimpleGuesser::~SimpleGuesser()
     101             : {
     102           1 :     if(h){textcat_Done(h);}
     103           1 : }
     104             : 
     105             : /*!
     106             :     \fn SimpleGuesser::GuessLanguage(char* text)
     107             :  */
     108           0 : vector<Guess> SimpleGuesser::GuessLanguage(const char* text)
     109             : {
     110           0 :     vector<Guess> guesses;
     111             : 
     112           0 :     if (!h)
     113           0 :         return guesses;
     114             : 
     115           0 :     int len = strlen(text);
     116             : 
     117           0 :     if (len > MAX_STRING_LENGTH_TO_ANALYSE)
     118           0 :         len = MAX_STRING_LENGTH_TO_ANALYSE;
     119             : 
     120           0 :     const char *guess_list = textcat_Classify(h, text, len);
     121             : 
     122             : // FIXME just a temporary check until new version with renamed macros deployed
     123             : #if EXTTEXTCAT_VERSION_MAJOR > 3 || (EXTTEXTCAT_VERSION_MAJOR == 3 && (EXTTEXTCAT_VERSION_MINOR > 4 || (EXTTEXTCAT_VERSION_MINOR == 4 && (EXTTEXTCAT_VERSION_MICRO >= 1))))
     124           0 :     if (strcmp(guess_list, TEXTCAT_RESULT_SHORT_STR) == 0)
     125             : #else
     126             :     if (strcmp(guess_list, _TEXTCAT_RESULT_SHORT) == 0)
     127             : #endif
     128           0 :         return guesses;
     129             : 
     130           0 :     int current_pointer = 0;
     131             : 
     132           0 :     for(int i = 0; guess_list[current_pointer] != '\0'; i++)
     133             :     {
     134           0 :         while (guess_list[current_pointer] != GUESS_SEPARATOR_OPEN && guess_list[current_pointer] != '\0')
     135           0 :             current_pointer++;
     136           0 :         if(guess_list[current_pointer] != '\0')
     137             :         {
     138           0 :             Guess g(guess_list + current_pointer);
     139             : 
     140           0 :             guesses.push_back(g);
     141             : 
     142           0 :             current_pointer++;
     143             :         }
     144             :     }
     145             : 
     146           0 :     return guesses;
     147             : }
     148             : 
     149           0 : Guess SimpleGuesser::GuessPrimaryLanguage(const char* text)
     150             : {
     151           0 :     vector<Guess> ret = GuessLanguage(text);
     152           0 :     return ret.empty() ? Guess() : ret[0];
     153             : }
     154             : /**
     155             :  * Is used to know which language is available, unavailable or both
     156             :  * when mask = 0xF0, return only Available
     157             :  * when mask = 0x0F, return only Unavailable
     158             :  * when mask = 0xFF, return both Available and Unavailable
     159             :  */
     160           0 : vector<Guess> SimpleGuesser::GetManagedLanguages(const char mask)
     161             : {
     162           0 :     textcat_t *tables = (textcat_t*)h;
     163             : 
     164           0 :     vector<Guess> lang;
     165           0 :     if(!h){return lang;}
     166             : 
     167           0 :     for (size_t i=0; i<tables->size; ++i)
     168             :     {
     169           0 :         if (tables->fprint_disable[i] & mask)
     170             :         {
     171           0 :             string langStr = "[";
     172           0 :             langStr += fp_Name(tables->fprint[i]);
     173           0 :             Guess g(langStr.c_str());
     174           0 :             lang.push_back(g);
     175             :         }
     176             :     }
     177             : 
     178           0 :     return lang;
     179             : }
     180             : 
     181           0 : vector<Guess> SimpleGuesser::GetAvailableLanguages()
     182             : {
     183           0 :     return GetManagedLanguages( sal::static_int_cast< char >( 0xF0 ) );
     184             : }
     185             : 
     186           0 : vector<Guess> SimpleGuesser::GetUnavailableLanguages()
     187             : {
     188           0 :     return GetManagedLanguages( sal::static_int_cast< char >( 0x0F ));
     189             : }
     190             : 
     191           0 : vector<Guess> SimpleGuesser::GetAllManagedLanguages()
     192             : {
     193           0 :     return GetManagedLanguages( sal::static_int_cast< char >( 0xFF ));
     194             : }
     195             : 
     196           0 : void SimpleGuesser::XableLanguage(string lang, char mask)
     197             : {
     198           0 :     textcat_t *tables = (textcat_t*)h;
     199             : 
     200           0 :     if(!h){return;}
     201             : 
     202           0 :     for (size_t i=0; i<tables->size; i++)
     203             :     {
     204           0 :         string language(fp_Name(tables->fprint[i]));
     205           0 :         if (start(language,lang) == 0)
     206           0 :             tables->fprint_disable[i] = mask;
     207           0 :     }
     208             : }
     209             : 
     210           0 : void SimpleGuesser::EnableLanguage(string lang)
     211             : {
     212           0 :     XableLanguage(lang,  sal::static_int_cast< char >( 0xF0 ));
     213           0 : }
     214             : 
     215           0 : void SimpleGuesser::DisableLanguage(string lang)
     216             : {
     217           0 :     XableLanguage(lang,  sal::static_int_cast< char >( 0x0F ));
     218           0 : }
     219             : 
     220           0 : void SimpleGuesser::SetDBPath(const char* path, const char* prefix)
     221             : {
     222           0 :     if (h)
     223           0 :         textcat_Done(h);
     224           0 :     h = special_textcat_Init(path, prefix);
     225           6 : }
     226             : 
     227             : /* vim:set shiftwidth=4 softtabstop=4 expandtab: */

Generated by: LCOV version 1.10