LCOV - code coverage report
Current view: top level - l10ntools/source/help - HelpIndexer.cxx (source / functions) Hit Total Coverage
Test: libreoffice_filtered.info Lines: 63 72 87.5 %
Date: 2012-08-25 Functions: 6 7 85.7 %
Legend: Lines: hit not hit | Branches: + taken - not taken # not executed Branches: 76 154 49.4 %

           Branch data     Line data    Source code
       1                 :            : /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
       2                 :            : /*
       3                 :            :  * Version: MPL 1.1 / GPLv3+ / LGPLv3+
       4                 :            :  *
       5                 :            :  * The contents of this file are subject to the Mozilla Public License Version
       6                 :            :  * 1.1 (the "License"); you may not use this file except in compliance with
       7                 :            :  * the License or as specified alternatively below. You may obtain a copy of
       8                 :            :  * the License at http://www.mozilla.org/MPL/
       9                 :            :  *
      10                 :            :  * Software distributed under the License is distributed on an "AS IS" basis,
      11                 :            :  * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
      12                 :            :  * for the specific language governing rights and limitations under the
      13                 :            :  * License.
      14                 :            :  *
      15                 :            :  * Major Contributor(s):
      16                 :            :  * Copyright (C) 2012 Gert van Valkenhoef <g.h.m.van.valkenhoef@rug.nl>
      17                 :            :  *  (initial developer)
      18                 :            :  *
      19                 :            :  * All Rights Reserved.
      20                 :            :  *
      21                 :            :  * For minor contributions see the git repository.
      22                 :            :  *
      23                 :            :  * Alternatively, the contents of this file may be used under the terms of
      24                 :            :  * either the GNU General Public License Version 3 or later (the "GPLv3+"), or
      25                 :            :  * the GNU Lesser General Public License Version 3 or later (the "LGPLv3+"),
      26                 :            :  * in which case the provisions of the GPLv3+ or the LGPLv3+ are applicable
      27                 :            :  * instead of those above.
      28                 :            :  */
      29                 :            : 
      30                 :            : #include <l10ntools/HelpIndexer.hxx>
      31                 :            : 
      32                 :            : #include <rtl/string.hxx>
      33                 :            : #include <rtl/uri.hxx>
      34                 :            : #include <rtl/ustrbuf.hxx>
      35                 :            : #include <osl/file.hxx>
      36                 :            : #include <osl/thread.h>
      37                 :            : #include <boost/scoped_ptr.hpp>
      38                 :            : #include <algorithm>
      39                 :            : 
      40                 :            : #include "LuceneHelper.hxx"
      41                 :            : 
      42                 :            : using namespace lucene::document;
      43                 :            : 
      44                 :         71 : HelpIndexer::HelpIndexer(rtl::OUString const &lang, rtl::OUString const &module,
      45                 :            :     rtl::OUString const &srcDir, rtl::OUString const &outDir)
      46         [ +  - ]:         71 :     : d_lang(lang), d_module(module)
      47                 :            : {
      48 [ +  - ][ +  - ]:        142 :     d_indexDir = rtl::OUStringBuffer(outDir).append('/').
      49 [ +  - ][ +  - ]:         71 :         append(module).appendAscii(RTL_CONSTASCII_STRINGPARAM(".idxl")).toString();
      50         [ +  - ]:         71 :     d_captionDir = srcDir + rtl::OUString(RTL_CONSTASCII_USTRINGPARAM("/caption"));
      51         [ +  - ]:         71 :     d_contentDir = srcDir + rtl::OUString(RTL_CONSTASCII_USTRINGPARAM("/content"));
      52                 :         71 : }
      53                 :            : 
      54                 :         71 : bool HelpIndexer::indexDocuments()
      55                 :            : {
      56         [ -  + ]:         71 :     if (!scanForFiles())
      57                 :          0 :         return false;
      58                 :            : 
      59                 :            :     try
      60                 :            :     {
      61                 :         71 :         rtl::OUString sLang = d_lang.getToken(0, '-');
      62 [ +  - ][ -  + ]:         71 :         bool bUseCJK = sLang == "ja" || sLang == "ko" || sLang == "zh";
                 [ +  - ]
      63                 :            : 
      64                 :            :         // Construct the analyzer appropriate for the given language
      65                 :         71 :         boost::scoped_ptr<lucene::analysis::Analyzer> analyzer;
      66         [ -  + ]:         71 :         if (bUseCJK)
      67 [ #  # ][ #  # ]:          0 :             analyzer.reset(new lucene::analysis::LanguageBasedAnalyzer(L"cjk"));
                 [ #  # ]
      68                 :            :         else
      69 [ +  - ][ +  - ]:         71 :             analyzer.reset(new lucene::analysis::standard::StandardAnalyzer());
                 [ +  - ]
      70                 :            : 
      71                 :         71 :         rtl::OUString ustrSystemPath;
      72         [ +  - ]:         71 :         osl::File::getSystemPathFromFileURL(d_indexDir, ustrSystemPath);
      73                 :            : 
      74 [ +  - ][ +  - ]:         71 :         rtl::OString indexDirStr = rtl::OUStringToOString(ustrSystemPath, osl_getThreadTextEncoding());
      75         [ +  - ]:         71 :         lucene::index::IndexWriter writer(indexDirStr.getStr(), analyzer.get(), true);
      76                 :            :         //Double limit of tokens allowed, otherwise we'll get a too-many-tokens
      77                 :            :         //exception for ja help. Could alternative ignore the exception and get
      78                 :            :         //truncated results as per java-Lucene apparently
      79         [ +  - ]:         71 :         writer.setMaxFieldLength(lucene::index::IndexWriter::DEFAULT_MAX_FIELD_LENGTH*2);
      80                 :            : 
      81                 :            :         // Index the identified help files
      82         [ +  - ]:         71 :         Document doc;
      83         [ +  + ]:       9052 :         for (std::set<rtl::OUString>::iterator i = d_files.begin(); i != d_files.end(); ++i) {
      84         [ +  - ]:       8981 :             helpDocument(*i, &doc);
      85         [ +  - ]:       8981 :             writer.addDocument(&doc);
      86         [ +  - ]:       8981 :             doc.clear();
      87                 :            :         }
      88         [ +  - ]:         71 :         writer.optimize();
      89                 :            : 
      90                 :            :         // Optimize the index
      91 [ +  - ][ +  - ]:         71 :         writer.optimize();
         [ +  - ][ +  - ]
      92                 :            :     }
      93         [ #  # ]:          0 :     catch (CLuceneError &e)
      94                 :            :     {
      95         [ #  # ]:          0 :         d_error = rtl::OUString::createFromAscii(e.what());
      96                 :          0 :         return false;
      97                 :            :     }
      98                 :            : 
      99                 :         71 :     return true;
     100                 :            : }
     101                 :            : 
     102                 :          0 : rtl::OUString const & HelpIndexer::getErrorMessage() {
     103                 :          0 :     return d_error;
     104                 :            : }
     105                 :            : 
     106                 :         71 : bool HelpIndexer::scanForFiles() {
     107         [ -  + ]:         71 :     if (!scanForFiles(d_contentDir)) {
     108                 :          0 :         return false;
     109                 :            :     }
     110         [ -  + ]:         71 :     if (!scanForFiles(d_captionDir)) {
     111                 :          0 :         return false;
     112                 :            :     }
     113                 :         71 :     return true;
     114                 :            : }
     115                 :            : 
     116                 :        142 : bool HelpIndexer::scanForFiles(rtl::OUString const & path) {
     117                 :            : 
     118                 :        142 :     osl::Directory dir(path);
     119 [ +  + ][ +  - ]:        142 :     if (osl::FileBase::E_None != dir.open()) {
     120         [ +  - ]:          2 :         d_error = rtl::OUString(RTL_CONSTASCII_USTRINGPARAM("Error reading directory ")) + path;
     121                 :          2 :         return true;
     122                 :            :     }
     123                 :            : 
     124                 :        140 :     osl::DirectoryItem item;
     125                 :        140 :     osl::FileStatus fileStatus(osl_FileStatus_Mask_FileName | osl_FileStatus_Mask_Type);
     126 [ +  - ][ +  + ]:      17559 :     while (dir.getNextItem(item) == osl::FileBase::E_None) {
     127         [ +  - ]:      17419 :         item.getFileStatus(fileStatus);
     128 [ +  - ][ +  - ]:      17419 :         if (fileStatus.getFileType() == osl::FileStatus::Regular) {
     129 [ +  - ][ +  - ]:      17419 :             d_files.insert(fileStatus.getFileName());
     130                 :            :         }
     131                 :            :     }
     132                 :            : 
     133 [ +  - ][ +  - ]:        142 :     return true;
     134                 :            : }
     135                 :            : 
     136                 :       8981 : bool HelpIndexer::helpDocument(rtl::OUString const & fileName, Document *doc) {
     137                 :            :     // Add the help path as an indexed, untokenized field.
     138                 :            : 
     139                 :            :     rtl::OUString path = rtl::OUString(RTL_CONSTASCII_USTRINGPARAM("#HLP#")) +
     140 [ +  - ][ +  - ]:       8981 :         d_module + rtl::OUString(RTL_CONSTASCII_USTRINGPARAM("/")) + fileName;
     141         [ +  - ]:       8981 :     std::vector<TCHAR> aPath(OUStringToTCHARVec(path));
     142 [ +  - ][ +  - ]:       8981 :     doc->add(*_CLNEW Field(_T("path"), &aPath[0], Field::STORE_YES | Field::INDEX_UNTOKENIZED));
         [ +  - ][ +  - ]
     143                 :            : 
     144                 :            :     rtl::OUString sEscapedFileName =
     145                 :            :         rtl::Uri::encode(fileName,
     146                 :       8981 :         rtl_UriCharClassUric, rtl_UriEncodeIgnoreEscapes, RTL_TEXTENCODING_UTF8);
     147                 :            : 
     148                 :            :     // Add the caption as a field.
     149         [ +  - ]:       8981 :     rtl::OUString captionPath = d_captionDir + rtl::OUString(RTL_CONSTASCII_USTRINGPARAM("/")) + sEscapedFileName;
     150 [ +  - ][ +  - ]:       8981 :     doc->add(*_CLNEW Field(_T("caption"), helpFileReader(captionPath), Field::STORE_NO | Field::INDEX_TOKENIZED));
         [ +  - ][ +  - ]
     151                 :            : 
     152                 :            :     // Add the content as a field.
     153         [ +  - ]:       8981 :     rtl::OUString contentPath = d_contentDir + rtl::OUString(RTL_CONSTASCII_USTRINGPARAM("/")) + sEscapedFileName;
     154 [ +  - ][ +  - ]:       8981 :     doc->add(*_CLNEW Field(_T("content"), helpFileReader(contentPath), Field::STORE_NO | Field::INDEX_TOKENIZED));
         [ +  - ][ +  - ]
     155                 :            : 
     156                 :       8981 :     return true;
     157                 :            : }
     158                 :            : 
     159                 :      17962 : lucene::util::Reader *HelpIndexer::helpFileReader(rtl::OUString const & path) {
     160                 :      17962 :     osl::File file(path);
     161 [ +  + ][ +  - ]:      17962 :     if (osl::FileBase::E_None == file.open(osl_File_OpenFlag_Read)) {
     162         [ +  - ]:      17419 :         file.close();
     163                 :      17419 :         rtl::OUString ustrSystemPath;
     164         [ +  - ]:      17419 :         osl::File::getSystemPathFromFileURL(path, ustrSystemPath);
     165 [ +  - ][ +  - ]:      17419 :         rtl::OString pathStr = rtl::OUStringToOString(ustrSystemPath, osl_getThreadTextEncoding());
     166 [ +  - ][ +  - ]:      17419 :         return _CLNEW lucene::util::FileReader(pathStr.getStr(), "UTF-8");
     167                 :            :     } else {
     168 [ +  - ][ +  - ]:        543 :         return _CLNEW lucene::util::StringReader(L"");
     169         [ +  - ]:      17962 :     }
     170                 :            : }
     171                 :            : 
     172                 :            : /* vim:set shiftwidth=4 softtabstop=4 expandtab: */

Generated by: LCOV version 1.10