Line data Source code
1 : /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 : /*
3 : * Version: MPL 1.1 / GPLv3+ / LGPLv3+
4 : *
5 : * The contents of this file are subject to the Mozilla Public License Version
6 : * 1.1 (the "License"); you may not use this file except in compliance with
7 : * the License or as specified alternatively below. You may obtain a copy of
8 : * the License at http://www.mozilla.org/MPL/
9 : *
10 : * Software distributed under the License is distributed on an "AS IS" basis,
11 : * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
12 : * for the specific language governing rights and limitations under the
13 : * License.
14 : *
15 : * The Initial Developer of the Original Code is
16 : * Steven Butler <sebutler@gmail.com>
17 : * Portions created by the Initial Developer are Copyright (C) 2011 the
18 : * Initial Developer. All Rights Reserved.
19 : *
20 : * For minor contributions see the git repository.
21 : *
22 : * Alternatively, the contents of this file may be used under the terms of
23 : * either the GNU General Public License Version 3 or later (the "GPLv3+"), or
24 : * the GNU Lesser General Public License Version 3 or later (the "LGPLv3+"),
25 : * in which case the provisions of the GPLv3+ or the LGPLv3+ are applicable
26 : * instead of those above.
27 : */
28 : #include <iostream>
29 : #include <fstream>
30 : #include <string>
31 : #include <map>
32 : #include <stdlib.h>
33 : #include <string.h>
34 :
35 : static const int MAXLINE = 1024*64;
36 :
37 : using namespace std;
38 :
39 24 : int main(int argc, char *argv[])
40 : {
41 24 : if (argc != 3 || strcmp(argv[1],"-o"))
42 : {
43 0 : cout << "Usage: idxdict -o outputfile < input\n";
44 0 : ::exit(99);
45 : }
46 : // This call improves performance by approx 5x
47 24 : cin.sync_with_stdio(false);
48 :
49 24 : const char * outputFile(argv[2]);
50 : char inputBuffer[MAXLINE];
51 24 : multimap<string, size_t> entries;
52 24 : multimap<string,size_t>::iterator ret(entries.begin());
53 :
54 24 : int line(1);
55 24 : cin.getline(inputBuffer, MAXLINE);
56 24 : const string encoding(inputBuffer);
57 24 : size_t currentOffset(encoding.size()+1);
58 650544 : while (true)
59 : {
60 : // Extract the next word, but not the entry count
61 650568 : cin.getline(inputBuffer, MAXLINE, '|');
62 :
63 650568 : if (cin.eof()) break;
64 :
65 650544 : string word(inputBuffer);
66 650544 : ret = entries.insert(ret, pair<string, size_t>(word, currentOffset));
67 650544 : currentOffset += word.size() + 1;
68 : // Next is the entry count
69 650544 : cin.getline(inputBuffer, MAXLINE);
70 650544 : if (!cin.good())
71 : {
72 0 : cerr << "Unable to read entry - insufficient buffer?.\n";
73 0 : exit(99);
74 : }
75 650544 : currentOffset += strlen(inputBuffer)+1;
76 650544 : int entryCount(strtol(inputBuffer, NULL, 10));
77 1627997 : for (int i(0); i < entryCount; ++i)
78 : {
79 977453 : cin.getline(inputBuffer, MAXLINE);
80 977453 : currentOffset += strlen(inputBuffer)+1;
81 977453 : ++line;
82 : }
83 650544 : }
84 :
85 : // Use binary mode to prevent any translation of LF to CRLF on Windows
86 24 : ofstream outputStream(outputFile, ios_base::binary| ios_base::trunc|ios_base::out);
87 24 : if (!outputStream.is_open())
88 : {
89 0 : cerr << "Unable to open output file " << outputFile << endl;
90 0 : ::exit(99);
91 : }
92 :
93 24 : outputStream << encoding << '\n' << entries.size() << '\n';
94 :
95 1951704 : for (multimap<string, size_t>::const_iterator ii(entries.begin());
96 1301136 : ii != entries.end();
97 : ++ii
98 : )
99 : {
100 650544 : outputStream << ii->first << '|' << ii->second << '\n';
101 24 : }
102 96 : }
103 :
104 : /* vim:set shiftwidth=4 softtabstop=4 expandtab: */
|