Branch data Line data Source code
1 : : /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 : : /*
3 : : * Version: MPL 1.1 / GPLv3+ / LGPLv3+
4 : : *
5 : : * The contents of this file are subject to the Mozilla Public License Version
6 : : * 1.1 (the "License"); you may not use this file except in compliance with
7 : : * the License or as specified alternatively below. You may obtain a copy of
8 : : * the License at http://www.mozilla.org/MPL/
9 : : *
10 : : * Software distributed under the License is distributed on an "AS IS" basis,
11 : : * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
12 : : * for the specific language governing rights and limitations under the
13 : : * License.
14 : : *
15 : : * The Initial Developer of the Original Code is
16 : : * Steven Butler <sebutler@gmail.com>
17 : : * Portions created by the Initial Developer are Copyright (C) 2011 the
18 : : * Initial Developer. All Rights Reserved.
19 : : *
20 : : * For minor contributions see the git repository.
21 : : *
22 : : * Alternatively, the contents of this file may be used under the terms of
23 : : * either the GNU General Public License Version 3 or later (the "GPLv3+"), or
24 : : * the GNU Lesser General Public License Version 3 or later (the "LGPLv3+"),
25 : : * in which case the provisions of the GPLv3+ or the LGPLv3+ are applicable
26 : : * instead of those above.
27 : : */
28 : : #include <iostream>
29 : : #include <fstream>
30 : : #include <string>
31 : : #include <map>
32 : : #include <stdlib.h>
33 : : #include <string.h>
34 : :
35 : : static const int MAXLINE = 1024*64;
36 : :
37 : : using namespace std;
38 : :
39 : 23 : int main(int argc, char *argv[])
40 : : {
41 [ + - ][ - + ]: 23 : if (argc != 3 || strcmp(argv[1],"-o"))
42 : : {
43 [ # # ]: 0 : cout << "Usage: idxdict -o outputfile < input\n";
44 : 0 : ::exit(99);
45 : : }
46 : : // This call improves performance by approx 5x
47 [ + - ]: 23 : cin.sync_with_stdio(false);
48 : :
49 : 23 : const char * outputFile(argv[2]);
50 : : char inputBuffer[MAXLINE];
51 [ + - ]: 23 : multimap<string, size_t> entries;
52 : 23 : multimap<string,size_t>::iterator ret(entries.begin());
53 : :
54 : 23 : int line(1);
55 [ + - ]: 23 : cin.getline(inputBuffer, MAXLINE);
56 [ + - ]: 23 : const string encoding(inputBuffer);
57 : 23 : size_t currentOffset(encoding.size()+1);
58 : 586443 : while (true)
59 : : {
60 : : // Extract the next word, but not the entry count
61 [ + - ]: 586466 : cin.getline(inputBuffer, MAXLINE, '|');
62 : :
63 [ + - ][ + + ]: 586466 : if (cin.eof()) break;
64 : :
65 [ + - ]: 586443 : string word(inputBuffer);
66 [ + - ][ + - ]: 586443 : ret = entries.insert(ret, pair<string, size_t>(word, currentOffset));
[ + - ]
67 : 586443 : currentOffset += word.size() + 1;
68 : : // Next is the entry count
69 [ + - ]: 586443 : cin.getline(inputBuffer, MAXLINE);
70 [ + - ][ - + ]: 586443 : if (!cin.good())
71 : : {
72 [ # # ]: 0 : cerr << "Unable to read entry - insufficient buffer?.\n";
73 : 0 : exit(99);
74 : : }
75 : 586443 : currentOffset += strlen(inputBuffer)+1;
76 : 586443 : int entryCount(strtol(inputBuffer, NULL, 10));
77 [ + + ]: 1482061 : for (int i(0); i < entryCount; ++i)
78 : : {
79 [ + - ]: 895618 : cin.getline(inputBuffer, MAXLINE);
80 : 895618 : currentOffset += strlen(inputBuffer)+1;
81 : 895618 : ++line;
82 : : }
83 : 586443 : }
84 : :
85 : : // Use binary mode to prevent any translation of LF to CRLF on Windows
86 [ + - ]: 23 : ofstream outputStream(outputFile, ios_base::binary| ios_base::trunc|ios_base::out);
87 [ + - ][ - + ]: 23 : if (!outputStream.is_open())
88 : : {
89 [ # # ][ # # ]: 0 : cerr << "Unable to open output file " << outputFile << endl;
[ # # ]
90 : 0 : ::exit(99);
91 : : }
92 : :
93 [ + - ][ + - ]: 23 : outputStream << encoding << '\n' << entries.size() << '\n';
[ + - ][ + - ]
94 : :
95 [ + - ][ + - ]: 1172932 : for (multimap<string, size_t>::const_iterator ii(entries.begin());
[ + - ][ + + ]
96 [ + - ]: 586466 : ii != entries.end();
97 : : ++ii
98 : : )
99 : : {
100 [ + - ][ + - ]: 586443 : outputStream << ii->first << '|' << ii->second << '\n';
[ + - ][ + - ]
[ + - ][ + - ]
101 [ + - ]: 23 : }
102 [ + - ][ + - ]: 92 : }
103 : :
104 : : /* vim:set shiftwidth=4 softtabstop=4 expandtab: */
|