LCOV - code coverage report
Current view: top level - helpcompiler/source - HelpIndexer.cxx (source / functions) Hit Total Coverage
Test: commit 10e77ab3ff6f4314137acd6e2702a6e5c1ce1fae Lines: 0 70 0.0 %
Date: 2014-11-03 Functions: 0 6 0.0 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
       2             : /*
       3             :  * This file is part of the LibreOffice project.
       4             :  *
       5             :  * This Source Code Form is subject to the terms of the Mozilla Public
       6             :  * License, v. 2.0. If a copy of the MPL was not distributed with this
       7             :  * file, You can obtain one at http://mozilla.org/MPL/2.0/.
       8             :  */
       9             : 
      10             : #include <helpcompiler/HelpIndexer.hxx>
      11             : 
      12             : #include <rtl/string.hxx>
      13             : #include <rtl/uri.hxx>
      14             : #include <rtl/ustrbuf.hxx>
      15             : #include <osl/file.hxx>
      16             : #include <osl/thread.h>
      17             : #include <boost/scoped_ptr.hpp>
      18             : #include <algorithm>
      19             : 
      20             : #include "LuceneHelper.hxx"
      21             : 
      22             : using namespace lucene::document;
      23             : 
      24           0 : HelpIndexer::HelpIndexer(OUString const &lang, OUString const &module,
      25             :     OUString const &srcDir, OUString const &outDir)
      26           0 :     : d_lang(lang), d_module(module)
      27             : {
      28           0 :     d_indexDir = OUStringBuffer(outDir).append('/').
      29           0 :         append(module).append(".idxl").makeStringAndClear();
      30           0 :     d_captionDir = srcDir + "/caption";
      31           0 :     d_contentDir = srcDir + "/content";
      32           0 : }
      33             : 
      34           0 : bool HelpIndexer::indexDocuments()
      35             : {
      36           0 :     if (!scanForFiles())
      37           0 :         return false;
      38             : 
      39             :     try
      40             :     {
      41           0 :         OUString sLang = d_lang.getToken(0, '-');
      42           0 :         bool bUseCJK = sLang == "ja" || sLang == "ko" || sLang == "zh";
      43             : 
      44             :         // Construct the analyzer appropriate for the given language
      45           0 :         boost::scoped_ptr<lucene::analysis::Analyzer> analyzer;
      46           0 :         if (bUseCJK)
      47           0 :             analyzer.reset(new lucene::analysis::LanguageBasedAnalyzer(L"cjk"));
      48             :         else
      49           0 :             analyzer.reset(new lucene::analysis::standard::StandardAnalyzer());
      50             : 
      51           0 :         OUString ustrSystemPath;
      52           0 :         osl::File::getSystemPathFromFileURL(d_indexDir, ustrSystemPath);
      53             : 
      54           0 :         OString indexDirStr = OUStringToOString(ustrSystemPath, osl_getThreadTextEncoding());
      55           0 :         lucene::index::IndexWriter writer(indexDirStr.getStr(), analyzer.get(), true);
      56             :         //Double limit of tokens allowed, otherwise we'll get a too-many-tokens
      57             :         //exception for ja help. Could alternative ignore the exception and get
      58             :         //truncated results as per java-Lucene apparently
      59           0 :         writer.setMaxFieldLength(lucene::index::IndexWriter::DEFAULT_MAX_FIELD_LENGTH*2);
      60             : 
      61             :         // Index the identified help files
      62           0 :         Document doc;
      63           0 :         for (std::set<OUString>::iterator i = d_files.begin(); i != d_files.end(); ++i) {
      64           0 :             helpDocument(*i, &doc);
      65           0 :             writer.addDocument(&doc);
      66           0 :             doc.clear();
      67             :         }
      68           0 :         writer.optimize();
      69             : 
      70             :         // Optimize the index
      71           0 :         writer.optimize();
      72             :     }
      73           0 :     catch (CLuceneError &e)
      74             :     {
      75           0 :         d_error = OUString::createFromAscii(e.what());
      76           0 :         return false;
      77             :     }
      78             : 
      79           0 :     return true;
      80             : }
      81             : 
      82             : 
      83           0 : bool HelpIndexer::scanForFiles() {
      84           0 :     if (!scanForFiles(d_contentDir)) {
      85           0 :         return false;
      86             :     }
      87           0 :     if (!scanForFiles(d_captionDir)) {
      88           0 :         return false;
      89             :     }
      90           0 :     return true;
      91             : }
      92             : 
      93           0 : bool HelpIndexer::scanForFiles(OUString const & path) {
      94             : 
      95           0 :     osl::Directory dir(path);
      96           0 :     if (osl::FileBase::E_None != dir.open()) {
      97           0 :         d_error = "Error reading directory " + path;
      98           0 :         return true;
      99             :     }
     100             : 
     101           0 :     osl::DirectoryItem item;
     102           0 :     osl::FileStatus fileStatus(osl_FileStatus_Mask_FileName | osl_FileStatus_Mask_Type);
     103           0 :     while (dir.getNextItem(item) == osl::FileBase::E_None) {
     104           0 :         item.getFileStatus(fileStatus);
     105           0 :         if (fileStatus.getFileType() == osl::FileStatus::Regular) {
     106           0 :             d_files.insert(fileStatus.getFileName());
     107             :         }
     108             :     }
     109             : 
     110           0 :     return true;
     111             : }
     112             : 
     113           0 : bool HelpIndexer::helpDocument(OUString const & fileName, Document *doc) {
     114             :     // Add the help path as an indexed, untokenized field.
     115             : 
     116           0 :     OUString path = "#HLP#" + d_module + "/" + fileName;
     117           0 :     std::vector<TCHAR> aPath(OUStringToTCHARVec(path));
     118           0 :     doc->add(*_CLNEW Field(_T("path"), &aPath[0], Field::STORE_YES | Field::INDEX_UNTOKENIZED));
     119             : 
     120             :     OUString sEscapedFileName =
     121             :         rtl::Uri::encode(fileName,
     122           0 :         rtl_UriCharClassUric, rtl_UriEncodeIgnoreEscapes, RTL_TEXTENCODING_UTF8);
     123             : 
     124             :     // Add the caption as a field.
     125           0 :     OUString captionPath = d_captionDir + "/" + sEscapedFileName;
     126           0 :     doc->add(*_CLNEW Field(_T("caption"), helpFileReader(captionPath), Field::STORE_NO | Field::INDEX_TOKENIZED));
     127             : 
     128             :     // Add the content as a field.
     129           0 :     OUString contentPath = d_contentDir + "/" + sEscapedFileName;
     130           0 :     doc->add(*_CLNEW Field(_T("content"), helpFileReader(contentPath), Field::STORE_NO | Field::INDEX_TOKENIZED));
     131             : 
     132           0 :     return true;
     133             : }
     134             : 
     135           0 : lucene::util::Reader *HelpIndexer::helpFileReader(OUString const & path) {
     136           0 :     osl::File file(path);
     137           0 :     if (osl::FileBase::E_None == file.open(osl_File_OpenFlag_Read)) {
     138           0 :         file.close();
     139           0 :         OUString ustrSystemPath;
     140           0 :         osl::File::getSystemPathFromFileURL(path, ustrSystemPath);
     141           0 :         OString pathStr = OUStringToOString(ustrSystemPath, osl_getThreadTextEncoding());
     142           0 :         return _CLNEW lucene::util::FileReader(pathStr.getStr(), "UTF-8");
     143             :     } else {
     144           0 :         return _CLNEW lucene::util::StringReader(L"");
     145           0 :     }
     146             : }
     147             : 
     148             : /* vim:set shiftwidth=4 softtabstop=4 expandtab: */

Generated by: LCOV version 1.10