Line data Source code
1 : /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2 : /* libmspub
3 : * Version: MPL 1.1 / GPLv2+ / LGPLv2+
4 : *
5 : * The contents of this file are subject to the Mozilla Public License Version
6 : * 1.1 (the "License"); you may not use this file except in compliance with
7 : * the License or as specified alternatively below. You may obtain a copy of
8 : * the License at http://www.mozilla.org/MPL/
9 : *
10 : * Software distributed under the License is distributed on an "AS IS" basis,
11 : * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
12 : * for the specific language governing rights and limitations under the
13 : * License.
14 : *
15 : * Major Contributor(s):
16 : * Copyright (C) 2012 Brennan Vincent <brennanv@email.arizona.edu>
17 : *
18 : * All Rights Reserved.
19 : *
20 : * For minor contributions see the git repository.
21 : *
22 : * Alternatively, the contents of this file may be used under the terms of
23 : * either the GNU General Public License Version 2 or later (the "GPLv2+"), or
24 : * the GNU Lesser General Public License Version 2 or later (the "LGPLv2+"),
25 : * in which case the provisions of the GPLv2+ or the LGPLv2+ are applicable
26 : * instead of those above.
27 : */
28 :
29 : #include "MSPUBParser97.h"
30 : #include "MSPUBCollector.h"
31 : #include "libmspub_utils.h"
32 : #include "MSPUBTypes.h"
33 :
34 0 : libmspub::MSPUBParser97::MSPUBParser97(WPXInputStream *input, MSPUBCollector *collector)
35 0 : : MSPUBParser2k(input, collector), m_isBanner(false)
36 : {
37 0 : m_collector->setEncoding(WIN_1252);
38 0 : }
39 :
40 0 : unsigned short libmspub::MSPUBParser97::getTextMarker() const
41 : {
42 0 : return 0x0000;
43 : }
44 :
45 0 : unsigned libmspub::MSPUBParser97::getTextIdOffset() const
46 : {
47 0 : return 0x46;
48 : }
49 :
50 0 : bool libmspub::MSPUBParser97::parse()
51 : {
52 0 : WPXInputStream *contents = m_input->getDocumentOLEStream("Contents");
53 0 : if (!contents)
54 : {
55 : MSPUB_DEBUG_MSG(("Couldn't get contents stream.\n"));
56 0 : return false;
57 : }
58 0 : if (!parseContents(contents))
59 : {
60 : MSPUB_DEBUG_MSG(("Couldn't parse contents stream.\n"));
61 0 : delete contents;
62 0 : return false;
63 : }
64 0 : return m_collector->go();
65 : }
66 :
67 0 : bool libmspub::MSPUBParser97::parseDocument(WPXInputStream *input)
68 : {
69 0 : if (m_documentChunkIndex.is_initialized())
70 : {
71 0 : input->seek(m_contentChunks[m_documentChunkIndex.get()].offset + 0x12, WPX_SEEK_SET);
72 0 : unsigned short coordinateSystemMark = readU16(input);
73 0 : m_isBanner = coordinateSystemMark == 0x0007;
74 0 : unsigned width = readU32(input);
75 0 : unsigned height = readU32(input);
76 0 : m_collector->setWidthInEmu(width);
77 0 : m_collector->setHeightInEmu(height);
78 0 : return true;
79 : }
80 0 : return false;
81 : }
82 :
83 0 : void libmspub::MSPUBParser97::parseContentsTextIfNecessary(WPXInputStream *input)
84 : {
85 0 : input->seek(0x12, WPX_SEEK_SET);
86 0 : input->seek(readU32(input), WPX_SEEK_SET);
87 0 : input->seek(14, WPX_SEEK_CUR);
88 0 : unsigned textStart = readU32(input);
89 0 : unsigned textEnd = readU32(input);
90 0 : unsigned prop1Index = readU16(input);
91 0 : unsigned prop2Index = readU16(input);
92 0 : unsigned prop3Index = readU16(input);
93 0 : unsigned prop3End = readU16(input);
94 : std::vector<SpanInfo97> spanInfos = getSpansInfo(input, prop1Index,
95 0 : prop2Index, prop3Index, prop3End);
96 0 : input->seek(textStart, WPX_SEEK_SET);
97 0 : TextInfo97 textInfo = getTextInfo(input, textEnd - textStart);
98 0 : unsigned iParaEnd = 0, iSpanEnd = 0;
99 0 : unsigned currentParaIndex = 0;
100 0 : unsigned currentSpanIndex = 0;
101 0 : for (unsigned iShapeEnd = 0; iShapeEnd < textInfo.m_shapeEnds.size(); ++iShapeEnd)
102 : {
103 0 : unsigned shapeEnd = std::min<unsigned>(textInfo.m_shapeEnds[iShapeEnd], textInfo.m_chars.size());
104 0 : std::vector<TextParagraph> shapeParas;
105 0 : while (currentParaIndex < shapeEnd)
106 : {
107 0 : unsigned paraEnd = iParaEnd < textInfo.m_paragraphEnds.size() ?
108 0 : textInfo.m_paragraphEnds[iParaEnd++] : shapeEnd;
109 0 : if (paraEnd > shapeEnd)
110 : {
111 0 : --iParaEnd;
112 0 : paraEnd = shapeEnd;
113 : }
114 0 : std::vector<TextSpan> paraSpans;
115 0 : while (currentSpanIndex < paraEnd)
116 : {
117 0 : const SpanInfo97 &spanInfo = iSpanEnd < spanInfos.size() ?
118 0 : spanInfos[iSpanEnd++] :
119 0 : SpanInfo97(paraEnd, CharacterStyle());
120 0 : unsigned spanEnd = spanInfo.m_spanEnd;
121 0 : if (spanEnd > paraEnd)
122 : {
123 0 : --iSpanEnd;
124 0 : spanEnd = paraEnd;
125 : }
126 0 : const CharacterStyle &spanStyle = spanInfo.m_style;
127 0 : std::vector<unsigned char> spanChars;
128 0 : spanChars.reserve(spanEnd - currentSpanIndex);
129 0 : for (unsigned i = currentSpanIndex; i < spanEnd; ++i)
130 : {
131 0 : unsigned char ch = textInfo.m_chars[i];
132 0 : if (ch == 0xB) // Pub97 interprets vertical tab as nonbreaking space.
133 : {
134 0 : spanChars.push_back('\n');
135 : }
136 0 : else if (ch == 0x0D)
137 : {
138 0 : if (i + 1 < spanEnd && textInfo.m_chars[i + 1] == 0x0A)
139 : {
140 0 : ++i; // ignore the 0x0D and advance past the 0x0A
141 : }
142 : }
143 0 : else if (ch == 0x0C)
144 : {
145 : // ignore the 0x0C
146 : }
147 : else
148 : {
149 0 : spanChars.push_back(ch);
150 : }
151 : }
152 0 : paraSpans.push_back(TextSpan(spanChars, spanStyle));
153 0 : currentSpanIndex = spanEnd;
154 0 : }
155 0 : shapeParas.push_back(TextParagraph(paraSpans, ParagraphStyle()));
156 0 : currentParaIndex = paraEnd;
157 0 : }
158 0 : m_collector->addTextString(shapeParas, iShapeEnd);
159 0 : }
160 0 : }
161 :
162 0 : std::vector<libmspub::MSPUBParser97::SpanInfo97> libmspub::MSPUBParser97::getSpansInfo(
163 : WPXInputStream *input,
164 : unsigned prop1Index, unsigned prop2Index, unsigned /* prop3Index */,
165 : unsigned /* prop3End */)
166 : {
167 0 : std::vector<unsigned> spanEnds;
168 0 : std::vector<SpanInfo97> ret;
169 0 : for (unsigned i = prop1Index; i < prop2Index; ++i)
170 : {
171 0 : unsigned offset = i * 0x200;
172 0 : input->seek(offset + 0x1FF, WPX_SEEK_SET);
173 0 : unsigned numEntries = readU8(input);
174 0 : input->seek(offset, WPX_SEEK_SET);
175 : // Skip the first thing; it is not an end
176 0 : unsigned start = readU32(input);
177 0 : for (unsigned j = 0; j < numEntries; ++j)
178 : {
179 0 : spanEnds.push_back(readU32(input) - start);
180 : }
181 0 : std::vector<unsigned char> spanStyleIndices;
182 0 : for (unsigned j = 0; j < spanEnds.size(); ++j)
183 : {
184 0 : spanStyleIndices.push_back(readU8(input));
185 : }
186 0 : while (stillReading(input, offset + 0x200) && readU8(input) == 0)
187 : {
188 : ;
189 : }
190 0 : input->seek(-1, WPX_SEEK_CUR);
191 0 : std::map<unsigned char, CharacterStyle> stylesByIndex;
192 0 : while (stillReading(input, offset + 0x1FF))
193 : {
194 0 : unsigned length = readU8(input);
195 0 : unsigned nextOffset = input->tell() + length;
196 : unsigned char index = static_cast<unsigned char>(
197 0 : (input->tell() - 1 - offset) / 2);
198 0 : stylesByIndex[index] = readCharacterStyle(input, length);
199 0 : input->seek(nextOffset, WPX_SEEK_SET);
200 : }
201 0 : for (unsigned j = 0; j < spanEnds.size(); ++j)
202 : {
203 0 : ret.push_back(SpanInfo97(spanEnds[j], j < spanStyleIndices.size() ?
204 0 : stylesByIndex[spanStyleIndices[j]] : CharacterStyle()));
205 : }
206 0 : }
207 0 : return ret;
208 : }
209 :
210 0 : libmspub::CharacterStyle libmspub::MSPUBParser97::readCharacterStyle(
211 : WPXInputStream *input, unsigned length)
212 : {
213 0 : unsigned begin = input->tell();
214 0 : bool underline = false, italic = false, bold = false;
215 0 : int colorIndex = -1;
216 0 : unsigned fontIndex = 0;
217 0 : int textSizeVariationFromDefault = 0;
218 :
219 0 : if (length >= 1)
220 : {
221 0 : unsigned char biFlags = readU8(input);
222 0 : bold = biFlags & 0x1;
223 0 : italic = biFlags & 0x2;
224 : }
225 0 : if (length >= 3)
226 : {
227 0 : input->seek(begin + 0x2, WPX_SEEK_SET);
228 0 : fontIndex = readU8(input);
229 : }
230 0 : if (length >= 9)
231 : {
232 0 : input->seek(begin + 0x8, WPX_SEEK_SET);
233 0 : underline = readU8(input) & 0x1;
234 : }
235 0 : if (length >= 5)
236 : {
237 0 : input->seek(begin + 0x4, WPX_SEEK_SET);
238 : textSizeVariationFromDefault =
239 0 : length >= 6 ? readS16(input) : readS8(input);
240 : }
241 0 : if (length >= 16)
242 : {
243 0 : input->seek(begin + 0xC, WPX_SEEK_SET);
244 0 : colorIndex = getColorIndexByQuillEntry(readU32(input));
245 : }
246 : double textSizeInPt = 10 +
247 0 : static_cast<double>(textSizeVariationFromDefault) / 2;
248 : return CharacterStyle(underline, italic, bold, textSizeInPt, colorIndex,
249 0 : fontIndex);
250 : }
251 :
252 0 : libmspub::MSPUBParser97::TextInfo97 libmspub::MSPUBParser97::getTextInfo(WPXInputStream *input, unsigned length)
253 : {
254 0 : std::vector<unsigned char> chars;
255 0 : chars.reserve(length);
256 0 : std::vector<unsigned> paragraphEnds;
257 0 : std::vector<unsigned> shapeEnds;
258 0 : unsigned start = input->tell();
259 0 : unsigned char last = '\0';
260 0 : while (stillReading(input, start + length))
261 : {
262 0 : chars.push_back(readU8(input));
263 0 : if (last == 0xD && chars.back() == 0xA)
264 : {
265 0 : paragraphEnds.push_back(chars.size());
266 : }
267 0 : else if (chars.back() == 0xC)
268 : {
269 0 : shapeEnds.push_back(chars.size());
270 : }
271 0 : last = chars.back();
272 : }
273 0 : return TextInfo97(chars, paragraphEnds, shapeEnds);
274 : }
275 :
276 0 : int libmspub::MSPUBParser97::translateCoordinateIfNecessary(int coordinate) const
277 : {
278 0 : if (m_isBanner)
279 : {
280 0 : return coordinate - 120 * EMUS_IN_INCH;
281 : }
282 : else
283 : {
284 0 : return coordinate - 25 * EMUS_IN_INCH;
285 : }
286 : }
287 :
288 0 : unsigned libmspub::MSPUBParser97::getFirstLineOffset() const
289 : {
290 0 : return 0x22;
291 : }
292 :
293 0 : unsigned libmspub::MSPUBParser97::getSecondLineOffset() const
294 : {
295 0 : return 0x2D;
296 : }
297 :
298 0 : unsigned libmspub::MSPUBParser97::getShapeFillTypeOffset() const
299 : {
300 0 : return 0x20;
301 : }
302 :
303 0 : unsigned libmspub::MSPUBParser97::getShapeFillColorOffset() const
304 : {
305 0 : return 0x18;
306 0 : }
307 :
308 : /* vim:set shiftwidth=2 softtabstop=2 expandtab: */
|