LCOV - code coverage report
Current view: top level - usr/local/src/libreoffice/lingucomponent/source/languageguessing - simpleguesser.cxx (source / functions) Hit Total Coverage
Test: libreoffice_filtered.info Lines: 0 70 0.0 %
Date: 2013-07-09 Functions: 0 15 0.0 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
       2             : /*
       3             :  * This file is part of the LibreOffice project.
       4             :  *
       5             :  * This Source Code Form is subject to the terms of the Mozilla Public
       6             :  * License, v. 2.0. If a copy of the MPL was not distributed with this
       7             :  * file, You can obtain one at http://mozilla.org/MPL/2.0/.
       8             :  *
       9             :  * This file incorporates work covered by the following license notice:
      10             :  *
      11             :  *   Licensed to the Apache Software Foundation (ASF) under one or more
      12             :  *   contributor license agreements. See the NOTICE file distributed
      13             :  *   with this work for additional information regarding copyright
      14             :  *   ownership. The ASF licenses this file to you under the Apache
      15             :  *   License, Version 2.0 (the "License"); you may not use this file
      16             :  *   except in compliance with the License. You may obtain a copy of
      17             :  *   the License at http://www.apache.org/licenses/LICENSE-2.0 .
      18             :  */
      19             : 
      20             :  /**
      21             :   *
      22             :   *
      23             :   *
      24             :   *
      25             :   * TODO
      26             :   * - Add exception throwing when h == NULL
      27             :   * - Not init h when implicit constructor is launched
      28             :   */
      29             : 
      30             : 
      31             : #include <string.h>
      32             : #include <sstream>
      33             : #include <iostream>
      34             : 
      35             : #ifdef SYSTEM_LIBEXTTEXTCAT
      36             : #include <libexttextcat/textcat.h>
      37             : #include <libexttextcat/common.h>
      38             : #include <libexttextcat/constants.h>
      39             : #include <libexttextcat/fingerprint.h>
      40             : #include <libexttextcat/utf8misc.h>
      41             : #else
      42             : #include <textcat.h>
      43             : #include <common.h>
      44             : #include <constants.h>
      45             : #include <fingerprint.h>
      46             : #include <utf8misc.h>
      47             : #endif
      48             : 
      49             : #include <sal/types.h>
      50             : 
      51             : #include "altstrfunc.hxx"
      52             : #include "simpleguesser.hxx"
      53             : 
      54             : using namespace std;
      55             : 
      56             : 
      57             : /**
      58             :  * This 3 following structures are from fingerprint.c and textcat.c
      59             :  */
      60             : 
      61             : typedef struct ngram_t {
      62             : 
      63             :     sint2 rank;
      64             :     char str[MAXNGRAMSIZE+1];
      65             : 
      66             : } ngram_t;
      67             : 
      68             : typedef struct fp_t {
      69             : 
      70             :     const char *name;
      71             :     ngram_t *fprint;
      72             :     uint4 size;
      73             : 
      74             : } fp_t;
      75             : 
      76             : typedef struct textcat_t{
      77             : 
      78             :     void **fprint;
      79             :     char *fprint_disable;
      80             :     uint4 size;
      81             :     uint4 maxsize;
      82             : 
      83             :     char output[MAXOUTPUTSIZE];
      84             : 
      85             : } textcat_t;
      86             : /** end of the 3 structs */
      87             : 
      88           0 : SimpleGuesser::SimpleGuesser()
      89             : {
      90           0 :     h = NULL;
      91           0 : }
      92             : 
      93           0 : SimpleGuesser& SimpleGuesser::operator=(const SimpleGuesser& sg){
      94             :     // Check for self-assignment!
      95           0 :     if (this == &sg)      // Same object?
      96           0 :       return *this;        // Yes, so skip assignment, and just return *this.
      97             : 
      98           0 :     if(h){textcat_Done(h);}
      99           0 :     h = sg.h;
     100           0 :     return *this;
     101             : }
     102             : 
     103           0 : SimpleGuesser::~SimpleGuesser()
     104             : {
     105           0 :     if(h){textcat_Done(h);}
     106           0 : }
     107             : 
     108             : 
     109             : /*!
     110             :     \fn SimpleGuesser::GuessLanguage(char* text)
     111             :  */
     112           0 : vector<Guess> SimpleGuesser::GuessLanguage(const char* text)
     113             : {
     114           0 :     vector<Guess> guesses;
     115             : 
     116           0 :     if (!h)
     117           0 :         return guesses;
     118             : 
     119           0 :     int len = strlen(text);
     120             : 
     121           0 :     if (len > MAX_STRING_LENGTH_TO_ANALYSE)
     122           0 :         len = MAX_STRING_LENGTH_TO_ANALYSE;
     123             : 
     124           0 :     const char *guess_list = textcat_Classify(h, text, len);
     125             : 
     126             : // FIXME just a temporary check until new version with renamed macros deployed
     127             : #if EXTTEXTCAT_VERSION_MAJOR > 3 || (EXTTEXTCAT_VERSION_MAJOR == 3 && (EXTTEXTCAT_VERSION_MINOR > 4 || (EXTTEXTCAT_VERSION_MINOR == 4 && (EXTTEXTCAT_VERSION_MICRO >= 1))))
     128           0 :     if (strcmp(guess_list, TEXTCAT_RESULT_SHORT_STR) == 0)
     129             : #else
     130             :     if (strcmp(guess_list, _TEXTCAT_RESULT_SHORT) == 0)
     131             : #endif
     132           0 :         return guesses;
     133             : 
     134           0 :     int current_pointer = 0;
     135             : 
     136           0 :     for(int i = 0; guess_list[current_pointer] != '\0'; i++)
     137             :     {
     138           0 :         while (guess_list[current_pointer] != GUESS_SEPARATOR_OPEN && guess_list[current_pointer] != '\0')
     139           0 :             current_pointer++;
     140           0 :         if(guess_list[current_pointer] != '\0')
     141             :         {
     142           0 :             Guess g(guess_list + current_pointer);
     143             : 
     144           0 :             guesses.push_back(g);
     145             : 
     146           0 :             current_pointer++;
     147             :         }
     148             :     }
     149             : 
     150           0 :     return guesses;
     151             : }
     152             : 
     153           0 : Guess SimpleGuesser::GuessPrimaryLanguage(const char* text)
     154             : {
     155           0 :     vector<Guess> ret = GuessLanguage(text);
     156           0 :     return ret.empty() ? Guess() : ret[0];
     157             : }
     158             : /**
     159             :  * Is used to know which language is available, unavailable or both
     160             :  * when mask = 0xF0, return only Available
     161             :  * when mask = 0x0F, return only Unavailable
     162             :  * when mask = 0xFF, return both Available and Unavailable
     163             :  */
     164           0 : vector<Guess> SimpleGuesser::GetManagedLanguages(const char mask)
     165             : {
     166           0 :     textcat_t *tables = (textcat_t*)h;
     167             : 
     168           0 :     vector<Guess> lang;
     169           0 :     if(!h){return lang;}
     170             : 
     171           0 :     for (size_t i=0; i<tables->size; ++i)
     172             :     {
     173           0 :         if (tables->fprint_disable[i] & mask)
     174             :         {
     175           0 :             string langStr = "[";
     176           0 :             langStr += fp_Name(tables->fprint[i]);
     177           0 :             Guess g(langStr.c_str());
     178           0 :             lang.push_back(g);
     179             :         }
     180             :     }
     181             : 
     182           0 :     return lang;
     183             : }
     184             : 
     185           0 : vector<Guess> SimpleGuesser::GetAvailableLanguages()
     186             : {
     187           0 :     return GetManagedLanguages( sal::static_int_cast< char >( 0xF0 ) );
     188             : }
     189             : 
     190           0 : vector<Guess> SimpleGuesser::GetUnavailableLanguages()
     191             : {
     192           0 :     return GetManagedLanguages( sal::static_int_cast< char >( 0x0F ));
     193             : }
     194             : 
     195           0 : vector<Guess> SimpleGuesser::GetAllManagedLanguages()
     196             : {
     197           0 :     return GetManagedLanguages( sal::static_int_cast< char >( 0xFF ));
     198             : }
     199             : 
     200           0 : void SimpleGuesser::XableLanguage(string lang, char mask)
     201             : {
     202           0 :     textcat_t *tables = (textcat_t*)h;
     203             : 
     204           0 :     if(!h){return;}
     205             : 
     206           0 :     for (size_t i=0; i<tables->size; i++)
     207             :     {
     208           0 :         string language(fp_Name(tables->fprint[i]));
     209           0 :         if (start(language,lang) == 0)
     210           0 :             tables->fprint_disable[i] = mask;
     211           0 :     }
     212             : }
     213             : 
     214           0 : void SimpleGuesser::EnableLanguage(string lang)
     215             : {
     216           0 :     XableLanguage(lang,  sal::static_int_cast< char >( 0xF0 ));
     217           0 : }
     218             : 
     219           0 : void SimpleGuesser::DisableLanguage(string lang)
     220             : {
     221           0 :     XableLanguage(lang,  sal::static_int_cast< char >( 0x0F ));
     222           0 : }
     223             : 
     224             : /**
     225             : *
     226             : */
     227           0 : void SimpleGuesser::SetDBPath(const char* path, const char* prefix)
     228             : {
     229           0 :     if (h)
     230           0 :         textcat_Done(h);
     231           0 :     h = special_textcat_Init(path, prefix);
     232           0 : }
     233             : 
     234             : /* vim:set shiftwidth=4 softtabstop=4 expandtab: */

Generated by: LCOV version 1.10