LCOV - commit e02a6cb2c3e2b23b203b422e4e0680877f232636 - helpcompiler/source/HelpIndexer.cxx

LCOV - code coverage report

Current view:	top level - helpcompiler/source - HelpIndexer.cxx (source / functions)		Hit	Total	Coverage
Test:	commit e02a6cb2c3e2b23b203b422e4e0680877f232636	Lines:	0	72	0.0 %
Date:	2014-04-14	Functions:	0	7	0.0 %
Legend:	Lines: hit not hit

          Line data    Source code

       1             : /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
       2             : /*
       3             :  * This file is part of the LibreOffice project.
       4             :  *
       5             :  * This Source Code Form is subject to the terms of the Mozilla Public
       6             :  * License, v. 2.0. If a copy of the MPL was not distributed with this
       7             :  * file, You can obtain one at http://mozilla.org/MPL/2.0/.
       8             :  */
       9             : 
      10             : #include <helpcompiler/HelpIndexer.hxx>
      11             : 
      12             : #include <rtl/string.hxx>
      13             : #include <rtl/uri.hxx>
      14             : #include <rtl/ustrbuf.hxx>
      15             : #include <osl/file.hxx>
      16             : #include <osl/thread.h>
      17             : #include <boost/scoped_ptr.hpp>
      18             : #include <algorithm>
      19             : 
      20             : #include "LuceneHelper.hxx"
      21             : 
      22             : using namespace lucene::document;
      23             : 
      24           0 : HelpIndexer::HelpIndexer(OUString const &lang, OUString const &module,
      25             :     OUString const &srcDir, OUString const &outDir)
      26           0 :     : d_lang(lang), d_module(module)
      27             : {
      28           0 :     d_indexDir = OUStringBuffer(outDir).append('/').
      29           0 :         append(module).append(".idxl").makeStringAndClear();
      30           0 :     d_captionDir = srcDir + "/caption";
      31           0 :     d_contentDir = srcDir + "/content";
      32           0 : }
      33             : 
      34           0 : bool HelpIndexer::indexDocuments()
      35             : {
      36           0 :     if (!scanForFiles())
      37           0 :         return false;
      38             : 
      39             :     try
      40             :     {
      41           0 :         OUString sLang = d_lang.getToken(0, '-');
      42           0 :         bool bUseCJK = sLang == "ja" || sLang == "ko" || sLang == "zh";
      43             : 
      44             :         // Construct the analyzer appropriate for the given language
      45           0 :         boost::scoped_ptr<lucene::analysis::Analyzer> analyzer;
      46           0 :         if (bUseCJK)
      47           0 :             analyzer.reset(new lucene::analysis::LanguageBasedAnalyzer(L"cjk"));
      48             :         else
      49           0 :             analyzer.reset(new lucene::analysis::standard::StandardAnalyzer());
      50             : 
      51           0 :         OUString ustrSystemPath;
      52           0 :         osl::File::getSystemPathFromFileURL(d_indexDir, ustrSystemPath);
      53             : 
      54           0 :         OString indexDirStr = OUStringToOString(ustrSystemPath, osl_getThreadTextEncoding());
      55           0 :         lucene::index::IndexWriter writer(indexDirStr.getStr(), analyzer.get(), true);
      56             :         //Double limit of tokens allowed, otherwise we'll get a too-many-tokens
      57             :         //exception for ja help. Could alternative ignore the exception and get
      58             :         //truncated results as per java-Lucene apparently
      59           0 :         writer.setMaxFieldLength(lucene::index::IndexWriter::DEFAULT_MAX_FIELD_LENGTH*2);
      60             : 
      61             :         // Index the identified help files
      62           0 :         Document doc;
      63           0 :         for (std::set<OUString>::iterator i = d_files.begin(); i != d_files.end(); ++i) {
      64           0 :             helpDocument(*i, &doc);
      65           0 :             writer.addDocument(&doc);
      66           0 :             doc.clear();
      67             :         }
      68           0 :         writer.optimize();
      69             : 
      70             :         // Optimize the index
      71           0 :         writer.optimize();
      72             :     }
      73           0 :     catch (CLuceneError &e)
      74             :     {
      75           0 :         d_error = OUString::createFromAscii(e.what());
      76           0 :         return false;
      77             :     }
      78             : 
      79           0 :     return true;
      80             : }
      81             : 
      82           0 : OUString const & HelpIndexer::getErrorMessage() {
      83           0 :     return d_error;
      84             : }
      85             : 
      86           0 : bool HelpIndexer::scanForFiles() {
      87           0 :     if (!scanForFiles(d_contentDir)) {
      88           0 :         return false;
      89             :     }
      90           0 :     if (!scanForFiles(d_captionDir)) {
      91           0 :         return false;
      92             :     }
      93           0 :     return true;
      94             : }
      95             : 
      96           0 : bool HelpIndexer::scanForFiles(OUString const & path) {
      97             : 
      98           0 :     osl::Directory dir(path);
      99           0 :     if (osl::FileBase::E_None != dir.open()) {
     100           0 :         d_error = "Error reading directory " + path;
     101           0 :         return true;
     102             :     }
     103             : 
     104           0 :     osl::DirectoryItem item;
     105           0 :     osl::FileStatus fileStatus(osl_FileStatus_Mask_FileName | osl_FileStatus_Mask_Type);
     106           0 :     while (dir.getNextItem(item) == osl::FileBase::E_None) {
     107           0 :         item.getFileStatus(fileStatus);
     108           0 :         if (fileStatus.getFileType() == osl::FileStatus::Regular) {
     109           0 :             d_files.insert(fileStatus.getFileName());
     110             :         }
     111             :     }
     112             : 
     113           0 :     return true;
     114             : }
     115             : 
     116           0 : bool HelpIndexer::helpDocument(OUString const & fileName, Document *doc) {
     117             :     // Add the help path as an indexed, untokenized field.
     118             : 
     119           0 :     OUString path = "#HLP#" + d_module + "/" + fileName;
     120           0 :     std::vector<TCHAR> aPath(OUStringToTCHARVec(path));
     121           0 :     doc->add(*_CLNEW Field(_T("path"), &aPath[0], Field::STORE_YES | Field::INDEX_UNTOKENIZED));
     122             : 
     123             :     OUString sEscapedFileName =
     124             :         rtl::Uri::encode(fileName,
     125           0 :         rtl_UriCharClassUric, rtl_UriEncodeIgnoreEscapes, RTL_TEXTENCODING_UTF8);
     126             : 
     127             :     // Add the caption as a field.
     128           0 :     OUString captionPath = d_captionDir + "/" + sEscapedFileName;
     129           0 :     doc->add(*_CLNEW Field(_T("caption"), helpFileReader(captionPath), Field::STORE_NO | Field::INDEX_TOKENIZED));
     130             : 
     131             :     // Add the content as a field.
     132           0 :     OUString contentPath = d_contentDir + "/" + sEscapedFileName;
     133           0 :     doc->add(*_CLNEW Field(_T("content"), helpFileReader(contentPath), Field::STORE_NO | Field::INDEX_TOKENIZED));
     134             : 
     135           0 :     return true;
     136             : }
     137             : 
     138           0 : lucene::util::Reader *HelpIndexer::helpFileReader(OUString const & path) {
     139           0 :     osl::File file(path);
     140           0 :     if (osl::FileBase::E_None == file.open(osl_File_OpenFlag_Read)) {
     141           0 :         file.close();
     142           0 :         OUString ustrSystemPath;
     143           0 :         osl::File::getSystemPathFromFileURL(path, ustrSystemPath);
     144           0 :         OString pathStr = OUStringToOString(ustrSystemPath, osl_getThreadTextEncoding());
     145           0 :         return _CLNEW lucene::util::FileReader(pathStr.getStr(), "UTF-8");
     146             :     } else {
     147           0 :         return _CLNEW lucene::util::StringReader(L"");
     148           0 :     }
     149             : }
     150             : 
     151             : /* vim:set shiftwidth=4 softtabstop=4 expandtab: */

Generated by: LCOV version 1.10