Line data Source code
1 : /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2 : /* libmspub
3 : * Version: MPL 1.1 / GPLv2+ / LGPLv2+
4 : *
5 : * The contents of this file are subject to the Mozilla Public License Version
6 : * 1.1 (the "License"); you may not use this file except in compliance with
7 : * the License or as specified alternatively below. You may obtain a copy of
8 : * the License at http://www.mozilla.org/MPL/
9 : *
10 : * Software distributed under the License is distributed on an "AS IS" basis,
11 : * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
12 : * for the specific language governing rights and limitations under the
13 : * License.
14 : *
15 : * Major Contributor(s):
16 : * Copyright (C) 2012 Brennan Vincent <brennanv@email.arizona.edu>
17 : * Copyright (C) 2012 Fridrich Strba <fridrich.strba@bluewin.ch>
18 : *
19 : * All Rights Reserved.
20 : *
21 : * For minor contributions see the git repository.
22 : *
23 : * Alternatively, the contents of this file may be used under the terms of
24 : * either the GNU General Public License Version 2 or later (the "GPLv2+"), or
25 : * the GNU Lesser General Public License Version 2 or later (the "LGPLv2+"),
26 : * in which case the provisions of the GPLv2+ or the LGPLv2+ are applicable
27 : * instead of those above.
28 : */
29 :
30 : #include <sstream>
31 : #include <string>
32 : #include <algorithm>
33 : #include <string.h>
34 : #include <libwpd-stream/libwpd-stream.h>
35 : #include <boost/cstdint.hpp>
36 : #include <zlib.h>
37 : #include "MSPUBParser.h"
38 : #include "MSPUBCollector.h"
39 : #include "MSPUBBlockID.h"
40 : #include "MSPUBBlockType.h"
41 : #include "MSPUBContentChunkType.h"
42 : #include "MSPUBConstants.h"
43 : #include "EscherContainerType.h"
44 : #include "EscherFieldIds.h"
45 : #include "libmspub_utils.h"
46 : #include "ShapeType.h"
47 : #include "ShapeFlags.h"
48 : #include "Fill.h"
49 : #include "FillType.h"
50 : #include "ListInfo.h"
51 : #include "Dash.h"
52 : #include "TableInfo.h"
53 : #include "VerticalAlign.h"
54 :
55 : #if !defined(_MSC_VER) && !defined(BOOST_CSTDINT_HPP)
56 : using boost::int32_t;
57 : #endif
58 : using boost::uint32_t;
59 :
60 0 : libmspub::MSPUBParser::MSPUBParser(WPXInputStream *input, MSPUBCollector *collector)
61 : : m_input(input), m_collector(collector),
62 : m_blockInfo(), m_contentChunks(),
63 : m_cellsChunkIndices(),
64 : m_pageChunkIndices(), m_shapeChunkIndices(),
65 : m_paletteChunkIndices(), m_borderArtChunkIndices(),
66 : m_fontChunkIndices(),
67 : m_unknownChunkIndices(), m_documentChunkIndex(),
68 : m_lastSeenSeqNum(-1), m_lastAddedImage(0),
69 0 : m_alternateShapeSeqNums(), m_escherDelayIndices()
70 : {
71 0 : }
72 :
73 0 : libmspub::MSPUBParser::~MSPUBParser()
74 : {
75 0 : }
76 :
77 0 : bool libmspub::MSPUBParser::lineExistsByFlagPointer(unsigned *flags,
78 : unsigned *geomFlags)
79 : {
80 : return flags &&
81 0 : !(((*flags) & FLAG_USE_LINE) && !((*flags) & FLAG_LINE)) &&
82 0 : ((!geomFlags) || !((*geomFlags) & FLAG_GEOM_USE_LINE_OK)
83 0 : || ((*geomFlags) & FLAG_GEOM_LINE_OK));
84 :
85 : }
86 :
87 0 : unsigned libmspub::MSPUBParser::getColorIndexByQuillEntry(unsigned entry)
88 : {
89 0 : return entry;
90 : }
91 :
92 0 : short libmspub::MSPUBParser::getBlockDataLength(unsigned type) // -1 for variable-length block with the data length as the first DWORD
93 : {
94 0 : switch(type)
95 : {
96 : case DUMMY:
97 : case 0x5:
98 : case 0x8:
99 : case 0xa:
100 0 : return 0;
101 : case 0x10:
102 : case 0x12:
103 : case 0x18:
104 : case 0x1a:
105 : case 0x07:
106 0 : return 2;
107 : case 0x20:
108 : case 0x22:
109 : case 0x58:
110 : case 0x68:
111 : case 0x70:
112 : case 0xb8:
113 0 : return 4;
114 : case 0x28:
115 0 : return 8;
116 : case 0x38:
117 0 : return 16;
118 : case 0x48:
119 0 : return 24;
120 : case STRING_CONTAINER:
121 : case 0x80:
122 : case 0x82:
123 : case GENERAL_CONTAINER:
124 : case 0x8a:
125 : case 0x90:
126 : case 0x98:
127 : case 0xa0:
128 0 : return -1;
129 : }
130 : //FIXME: Debug assertion here? Should never get here.
131 : MSPUB_DEBUG_MSG(("Block of unknown type seen!\n"));
132 0 : return 0;
133 : }
134 :
135 0 : bool libmspub::MSPUBParser::parse()
136 : {
137 : MSPUB_DEBUG_MSG(("***NOTE***: Where applicable, the meanings of block/chunk IDs and Types printed below may be found in:\n\t***MSPUBBlockType.h\n\t***MSPUBBlockID.h\n\t***MSPUBContentChunkType.h\n*****\n"));
138 0 : if (!m_input->isOLEStream())
139 0 : return false;
140 0 : WPXInputStream *quill = m_input->getDocumentOLEStream("Quill/QuillSub/CONTENTS");
141 0 : if (!quill)
142 : {
143 : MSPUB_DEBUG_MSG(("Couldn't get quill stream.\n"));
144 0 : return false;
145 : }
146 0 : if (!parseQuill(quill))
147 : {
148 : MSPUB_DEBUG_MSG(("Couldn't parse quill stream.\n"));
149 0 : delete quill;
150 0 : return false;
151 : }
152 0 : delete quill;
153 0 : WPXInputStream *contents = m_input->getDocumentOLEStream("Contents");
154 0 : if (!contents)
155 : {
156 : MSPUB_DEBUG_MSG(("Couldn't get contents stream.\n"));
157 0 : return false;
158 : }
159 0 : if (!parseContents(contents))
160 : {
161 : MSPUB_DEBUG_MSG(("Couldn't parse contents stream.\n"));
162 0 : delete contents;
163 0 : return false;
164 : }
165 0 : delete contents;
166 0 : WPXInputStream *escherDelay = m_input->getDocumentOLEStream("Escher/EscherDelayStm");
167 0 : if (escherDelay)
168 : {
169 0 : parseEscherDelay(escherDelay);
170 0 : delete escherDelay;
171 : }
172 0 : WPXInputStream *escher = m_input->getDocumentOLEStream("Escher/EscherStm");
173 0 : if (!escher)
174 : {
175 : MSPUB_DEBUG_MSG(("Couldn't get escher stream.\n"));
176 0 : return false;
177 : }
178 0 : if (!parseEscher(escher))
179 : {
180 : MSPUB_DEBUG_MSG(("Couldn't parse escher stream.\n"));
181 0 : delete escher;
182 0 : return false;
183 : }
184 0 : delete escher;
185 :
186 0 : return m_collector->go();
187 : }
188 :
189 0 : libmspub::ImgType libmspub::MSPUBParser::imgTypeByBlipType(unsigned short type)
190 : {
191 0 : switch (type)
192 : {
193 : case OFFICE_ART_BLIP_PNG:
194 0 : return PNG;
195 : case OFFICE_ART_BLIP_JPEG:
196 0 : return JPEG;
197 : case OFFICE_ART_BLIP_WMF:
198 0 : return WMF;
199 : case OFFICE_ART_BLIP_DIB:
200 0 : return DIB;
201 : case OFFICE_ART_BLIP_EMF:
202 0 : return EMF;
203 : case OFFICE_ART_BLIP_TIFF:
204 0 : return TIFF;
205 : case OFFICE_ART_BLIP_PICT:
206 0 : return PICT;
207 : }
208 0 : return UNKNOWN;
209 : }
210 :
211 0 : int libmspub::MSPUBParser::getStartOffset(ImgType type, unsigned short initial)
212 : {
213 0 : bool oneUid = true;
214 0 : int offset = 0x11;
215 0 : unsigned short recInstance = initial >> 4;
216 0 : switch (type)
217 : {
218 : case WMF:
219 0 : oneUid = recInstance == 0x216;
220 0 : offset = 0x34;
221 0 : break;
222 : case EMF:
223 0 : oneUid = recInstance == 0x3D4;
224 0 : offset = 0x34;
225 0 : break;
226 : case PNG:
227 0 : oneUid = recInstance == 0x6E0;
228 0 : offset = 0x11;
229 0 : break;
230 : case JPEG:
231 0 : oneUid = recInstance == 0x46A || recInstance == 0x6E2;
232 0 : offset = 0x11;
233 0 : break;
234 : case DIB:
235 0 : oneUid = recInstance == 0x7A8;
236 0 : offset = 0x11;
237 0 : break;
238 : default:
239 0 : break;
240 : }
241 0 : return offset + (oneUid ? 0 : 0x10);
242 : }
243 :
244 0 : bool libmspub::MSPUBParser::parseEscherDelay(WPXInputStream *input)
245 : {
246 0 : while (stillReading (input, (unsigned long)-1))
247 : {
248 0 : EscherContainerInfo info = parseEscherContainer(input);
249 0 : if (imgTypeByBlipType(info.type) != UNKNOWN)
250 : {
251 0 : WPXBinaryData img;
252 0 : unsigned long toRead = info.contentsLength;
253 0 : input->seek(input->tell() + getStartOffset(imgTypeByBlipType(info.type), info.initial), WPX_SEEK_SET);
254 0 : while (toRead > 0 && stillReading(input, (unsigned long)-1))
255 : {
256 0 : unsigned long howManyRead = 0;
257 0 : const unsigned char *buf = input->read(toRead, howManyRead);
258 0 : img.append(buf, howManyRead);
259 0 : toRead -= howManyRead;
260 : }
261 0 : if (imgTypeByBlipType(info.type) == WMF || imgTypeByBlipType(info.type) == EMF)
262 : {
263 0 : img = inflateData(img);
264 : }
265 0 : else if (imgTypeByBlipType(info.type) == DIB)
266 : {
267 : // Reconstruct BMP header
268 : // cf. http://en.wikipedia.org/wiki/BMP_file_format , accessed 2012-5-31
269 0 : const unsigned char *buf = img.getDataBuffer();
270 0 : if (img.size() < 0x2E + 4)
271 : {
272 0 : ++m_lastAddedImage;
273 : MSPUB_DEBUG_MSG(("Garbage DIB at index 0x%x\n", m_lastAddedImage));
274 0 : input->seek(info.contentsOffset + info.contentsLength, WPX_SEEK_SET);
275 0 : continue;
276 : }
277 0 : unsigned short bitsPerPixel = readU16(buf, 0x0E);
278 0 : unsigned numPaletteColors = readU32(buf, 0x20);
279 0 : if (numPaletteColors == 0 && bitsPerPixel <= 8)
280 : {
281 0 : numPaletteColors = 1;
282 0 : for (int i = 0; i < bitsPerPixel; ++i)
283 : {
284 0 : numPaletteColors *= 2;
285 : }
286 : }
287 :
288 0 : WPXBinaryData tmpImg;
289 0 : tmpImg.append(0x42);
290 0 : tmpImg.append(0x4d);
291 :
292 0 : tmpImg.append((unsigned char)((img.size() + 14) & 0x000000ff));
293 0 : tmpImg.append((unsigned char)(((img.size() + 14) & 0x0000ff00) >> 8));
294 0 : tmpImg.append((unsigned char)(((img.size() + 14) & 0x00ff0000) >> 16));
295 0 : tmpImg.append((unsigned char)(((img.size() + 14) & 0xff000000) >> 24));
296 :
297 0 : tmpImg.append(0x00);
298 0 : tmpImg.append(0x00);
299 0 : tmpImg.append(0x00);
300 0 : tmpImg.append(0x00);
301 :
302 0 : tmpImg.append(0x36 + 4 * numPaletteColors);
303 0 : tmpImg.append(0x00);
304 0 : tmpImg.append(0x00);
305 0 : tmpImg.append(0x00);
306 0 : tmpImg.append(img);
307 0 : img = tmpImg;
308 : }
309 0 : m_collector->addImage(++m_lastAddedImage, imgTypeByBlipType(info.type), img);
310 : }
311 : else
312 : {
313 0 : ++m_lastAddedImage;
314 : MSPUB_DEBUG_MSG(("Image of unknown type at index 0x%x\n", m_lastAddedImage));
315 : }
316 0 : input->seek(info.contentsOffset + info.contentsLength, WPX_SEEK_SET);
317 : }
318 0 : return true;
319 : }
320 :
321 0 : bool libmspub::MSPUBParser::parseContents(WPXInputStream *input)
322 : {
323 : MSPUB_DEBUG_MSG(("MSPUBParser::parseContents\n"));
324 0 : input->seek(0x1a, WPX_SEEK_SET);
325 0 : unsigned trailerOffset = readU32(input);
326 : MSPUB_DEBUG_MSG(("MSPUBParser: trailerOffset %.8x\n", trailerOffset));
327 0 : input->seek(trailerOffset, WPX_SEEK_SET);
328 0 : unsigned trailerLength = readU32(input);
329 0 : for (unsigned i=0; i<3; i++)
330 : {
331 0 : libmspub::MSPUBBlockInfo trailerPart = parseBlock(input);
332 : MSPUB_DEBUG_MSG(("Trailer SubBlock %i, startPosition 0x%lx, id %i, type 0x%x, dataLength 0x%lx\n", i+1, trailerPart.startPosition, trailerPart.id, trailerPart.type, trailerPart.dataLength));
333 0 : if (trailerPart.type == TRAILER_DIRECTORY)
334 : {
335 :
336 0 : while (stillReading(input, trailerPart.dataOffset + trailerPart.dataLength))
337 : {
338 0 : m_blockInfo.push_back(parseBlock(input));
339 0 : ++m_lastSeenSeqNum;
340 0 : if (m_blockInfo.back().type == GENERAL_CONTAINER)
341 : {
342 0 : if (parseContentChunkReference(input, m_blockInfo.back()))
343 : {
344 0 : if (m_contentChunks.size() > 1)
345 : {
346 0 : m_contentChunks[m_contentChunks.size() - 2].end
347 0 : = m_contentChunks.back().offset;
348 : }
349 : }
350 : }
351 0 : else(skipBlock(input, m_blockInfo.back()));
352 : }
353 0 : if (m_contentChunks.size() > 0)
354 : {
355 0 : m_contentChunks.back().end = trailerPart.dataOffset + trailerPart.dataLength;
356 : }
357 0 : if (!m_documentChunkIndex.is_initialized())
358 : {
359 0 : return false;
360 : }
361 0 : const ContentChunkReference &documentChunk = m_contentChunks.at(m_documentChunkIndex.get());
362 0 : for (unsigned i_pal = 0; i_pal < m_paletteChunkIndices.size(); ++i_pal)
363 : {
364 0 : const ContentChunkReference &paletteChunk = m_contentChunks.at(m_paletteChunkIndices[i_pal]);
365 0 : input->seek(paletteChunk.offset, WPX_SEEK_SET);
366 0 : if (! parsePaletteChunk(input, paletteChunk))
367 : {
368 0 : return false;
369 : }
370 : }
371 0 : for (unsigned i_ba = 0; i_ba < m_borderArtChunkIndices.size();
372 : ++i_ba)
373 : {
374 : const ContentChunkReference &baChunk =
375 0 : m_contentChunks.at(m_borderArtChunkIndices[i_ba]);
376 0 : input->seek(baChunk.offset, WPX_SEEK_SET);
377 0 : if (!parseBorderArtChunk(input, baChunk))
378 : {
379 0 : return false;
380 : }
381 : }
382 0 : for (unsigned i_shape = 0; i_shape < m_shapeChunkIndices.size();
383 : ++i_shape)
384 : {
385 : const ContentChunkReference &shapeChunk =
386 0 : m_contentChunks.at(m_shapeChunkIndices[i_shape]);
387 0 : input->seek(shapeChunk.offset, WPX_SEEK_SET);
388 0 : if (!parseShape(input, shapeChunk))
389 : {
390 0 : return false;
391 : }
392 : }
393 0 : for (unsigned i_font = 0; i_font < m_fontChunkIndices.size();
394 : ++i_font)
395 : {
396 : const ContentChunkReference &fontChunk =
397 0 : m_contentChunks.at(m_fontChunkIndices[i_font]);
398 0 : input->seek(fontChunk.offset, WPX_SEEK_SET);
399 0 : if (!parseFontChunk(input, fontChunk))
400 : {
401 0 : return false;
402 : }
403 : }
404 0 : input->seek(documentChunk.offset, WPX_SEEK_SET);
405 0 : if (!parseDocumentChunk(input, documentChunk))
406 : {
407 0 : return false;
408 : }
409 0 : for (unsigned i_page = 0; i_page < m_pageChunkIndices.size(); ++i_page)
410 : {
411 0 : const ContentChunkReference &pageChunk = m_contentChunks.at(m_pageChunkIndices[i_page]);
412 0 : input->seek(pageChunk.offset, WPX_SEEK_SET);
413 0 : if (!parsePageChunk(input, pageChunk))
414 : {
415 0 : return false;
416 : }
417 : }
418 : }
419 0 : }
420 0 : input->seek(trailerOffset + trailerLength, WPX_SEEK_SET);
421 :
422 0 : return true;
423 : }
424 :
425 : #ifdef DEBUG
426 : bool libmspub::MSPUBParser::parseDocumentChunk(WPXInputStream *input, const ContentChunkReference &chunk)
427 : #else
428 0 : bool libmspub::MSPUBParser::parseDocumentChunk(WPXInputStream *input, const ContentChunkReference &)
429 : #endif
430 : {
431 : MSPUB_DEBUG_MSG(("parseDocumentChunk: offset 0x%lx, end 0x%lx\n", input->tell(), chunk.end));
432 0 : unsigned long begin = input->tell();
433 0 : unsigned long len = readU32(input);
434 0 : while (stillReading(input, begin + len))
435 : {
436 0 : libmspub::MSPUBBlockInfo info = parseBlock(input);
437 0 : if (info.id == DOCUMENT_SIZE)
438 : {
439 0 : while (stillReading(input, info.dataOffset + info.dataLength))
440 : {
441 0 : libmspub::MSPUBBlockInfo subInfo = parseBlock(input, true);
442 0 : if (subInfo.id == DOCUMENT_WIDTH)
443 : {
444 0 : m_collector->setWidthInEmu(subInfo.data);
445 : }
446 0 : else if (subInfo.id == DOCUMENT_HEIGHT)
447 : {
448 0 : m_collector->setHeightInEmu(subInfo.data);
449 : }
450 0 : }
451 : }
452 0 : else if (info.id == DOCUMENT_PAGE_LIST)
453 : {
454 0 : input->seek(info.dataOffset + 4, WPX_SEEK_SET);
455 0 : while (stillReading(input, info.dataOffset + info.dataLength))
456 : {
457 0 : MSPUBBlockInfo subInfo = parseBlock(input, true);
458 0 : if (subInfo.id == 0)
459 : {
460 0 : m_collector->setNextPage(subInfo.data);
461 : }
462 0 : }
463 : }
464 : else
465 : {
466 0 : skipBlock(input, info);
467 : }
468 0 : }
469 0 : return true; //FIXME: return false for failure
470 : }
471 :
472 0 : bool libmspub::MSPUBParser::parseFontChunk(
473 : WPXInputStream *input, const ContentChunkReference &chunk)
474 : {
475 0 : unsigned length = readU32(input);
476 0 : while (stillReading(input, chunk.offset + length))
477 : {
478 0 : MSPUBBlockInfo info = parseBlock(input, true);
479 0 : if (info.id == FONT_CONTAINER_ARRAY)
480 : {
481 0 : input->seek(info.dataOffset + 4, WPX_SEEK_SET);
482 0 : while (stillReading(input, info.dataOffset + info.dataLength))
483 : {
484 0 : MSPUBBlockInfo subInfo = parseBlock(input, true);
485 0 : if (subInfo.id == 0)
486 : {
487 0 : boost::optional<WPXString> name;
488 0 : boost::optional<unsigned> eotOffset;
489 0 : input->seek(subInfo.dataOffset + 4, WPX_SEEK_SET);
490 0 : while (stillReading(input, subInfo.dataOffset + subInfo.dataLength))
491 : {
492 0 : MSPUBBlockInfo subSubInfo = parseBlock(input, true);
493 0 : if (subSubInfo.id == EMBEDDED_FONT_NAME)
494 : {
495 0 : name = WPXString();
496 0 : appendCharacters(name.get(), subSubInfo.stringData, UTF_16);
497 : }
498 0 : else if (subSubInfo.id == EMBEDDED_EOT)
499 : {
500 0 : eotOffset = subSubInfo.dataOffset;
501 : }
502 0 : }
503 0 : if (name.is_initialized() && eotOffset.is_initialized())
504 : {
505 0 : input->seek(eotOffset.get(), WPX_SEEK_SET);
506 0 : MSPUBBlockInfo eotRecord = parseBlock(input, true);
507 0 : WPXBinaryData &data = m_collector->addEOTFont(name.get());
508 0 : unsigned long toRead = eotRecord.dataLength;
509 0 : while (toRead > 0 && stillReading(input, (unsigned long)-1))
510 : {
511 0 : unsigned long howManyRead = 0;
512 0 : const unsigned char *buf = input->read(toRead, howManyRead);
513 0 : data.append(buf, howManyRead);
514 0 : toRead -= howManyRead;
515 : }
516 0 : input->seek(subInfo.dataOffset + subInfo.dataLength, WPX_SEEK_SET);
517 0 : }
518 : }
519 0 : }
520 : }
521 0 : }
522 0 : return true;
523 : }
524 :
525 0 : bool libmspub::MSPUBParser::parseBorderArtChunk(
526 : WPXInputStream *input, const ContentChunkReference &chunk)
527 : {
528 0 : unsigned length = readU32(input);
529 0 : while (stillReading(input, chunk.offset + length))
530 : {
531 0 : MSPUBBlockInfo info = parseBlock(input, true);
532 0 : if (info.id == BA_ARRAY)
533 : {
534 0 : input->seek(info.dataOffset + 4, WPX_SEEK_SET);
535 0 : unsigned i = 0;
536 0 : while (stillReading(input, info.dataOffset + info.dataLength))
537 : {
538 0 : MSPUBBlockInfo entry = parseBlock(input, false);
539 0 : while (stillReading(input, entry.dataOffset + entry.dataLength))
540 : {
541 0 : MSPUBBlockInfo subRecord = parseBlock(input, true);
542 0 : if (subRecord.id == BA_IMAGE_ARRAY)
543 : {
544 0 : input->seek(subRecord.dataOffset + 4, WPX_SEEK_SET);
545 0 : while (stillReading(input, subRecord.dataOffset + subRecord.dataLength))
546 : {
547 0 : MSPUBBlockInfo subSubRecord = parseBlock(input, false);
548 0 : if (subSubRecord.id == BA_IMAGE_CONTAINER)
549 : {
550 0 : MSPUBBlockInfo imgRecord = parseBlock(input, false);
551 0 : if (imgRecord.id == BA_IMAGE)
552 : {
553 : WPXBinaryData &img = *(m_collector->addBorderImage(
554 0 : WMF, i));
555 0 : unsigned long toRead = imgRecord.dataLength;
556 0 : while (toRead > 0 && stillReading(input, (unsigned long)-1))
557 : {
558 0 : unsigned long howManyRead = 0;
559 0 : const unsigned char *buf = input->read(toRead, howManyRead);
560 0 : img.append(buf, howManyRead);
561 0 : toRead -= howManyRead;
562 : }
563 0 : }
564 : }
565 0 : }
566 : }
567 0 : else if (subRecord.id == BA_OFFSET_CONTAINER)
568 : {
569 0 : input->seek(subRecord.dataOffset + 4, WPX_SEEK_SET);
570 0 : while (stillReading(
571 0 : input, subRecord.dataOffset + subRecord.dataLength))
572 : {
573 0 : MSPUBBlockInfo subSubRecord = parseBlock(input, true);
574 0 : if (subSubRecord.id == BA_OFFSET_ENTRY)
575 : {
576 0 : m_collector->setBorderImageOffset(i, subSubRecord.data);
577 : }
578 0 : }
579 : }
580 0 : }
581 0 : ++i;
582 0 : input->seek(entry.dataOffset + entry.dataLength, WPX_SEEK_SET);
583 0 : }
584 : }
585 0 : }
586 0 : return true;
587 : }
588 :
589 0 : bool libmspub::MSPUBParser::parsePageChunk(WPXInputStream *input, const ContentChunkReference &chunk)
590 : {
591 : MSPUB_DEBUG_MSG(("parsePageChunk: offset 0x%lx, end 0x%lx, seqnum 0x%x, parent 0x%x\n", input->tell(), chunk.end, chunk.seqNum, chunk.parentSeqNum));
592 0 : unsigned long length = readU32(input);
593 0 : PageType type = getPageTypeBySeqNum(chunk.seqNum);
594 0 : if (type == NORMAL)
595 : {
596 0 : m_collector->addPage(chunk.seqNum);
597 : }
598 0 : while (stillReading(input, chunk.offset + length))
599 : {
600 0 : libmspub::MSPUBBlockInfo info = parseBlock(input);
601 0 : if (info.id == PAGE_BG_SHAPE)
602 : {
603 0 : m_collector->setPageBgShape(chunk.seqNum, info.data);
604 : }
605 0 : else if (info.id == PAGE_SHAPES)
606 : {
607 0 : parsePageShapeList(input, info, chunk.seqNum);
608 : }
609 0 : else if (info.id == THIS_MASTER_NAME)
610 : {
611 0 : for (unsigned i = 0; i < info.stringData.size(); ++i)
612 : {
613 0 : if (info.stringData[i] != 0)
614 : {
615 0 : m_collector->designateMasterPage(chunk.seqNum);
616 : }
617 : }
618 : }
619 0 : else if (info.id == APPLIED_MASTER_NAME)
620 : {
621 0 : m_collector->setMasterPage(chunk.seqNum, info.data);
622 : }
623 : else
624 : {
625 0 : skipBlock(input, info);
626 : }
627 0 : }
628 0 : return true;
629 : }
630 :
631 0 : bool libmspub::MSPUBParser::parsePageShapeList(WPXInputStream *input, libmspub::MSPUBBlockInfo info, unsigned pageSeqNum)
632 : {
633 : MSPUB_DEBUG_MSG(("parsePageShapeList: page seqnum 0x%x\n", pageSeqNum));
634 0 : while (stillReading(input, info.dataOffset + info.dataLength))
635 : {
636 0 : libmspub::MSPUBBlockInfo subInfo = parseBlock(input, true);
637 0 : if (subInfo.type == SHAPE_SEQNUM)
638 : {
639 0 : m_collector->setShapePage(subInfo.data, pageSeqNum);
640 : }
641 0 : }
642 0 : return true;
643 : }
644 :
645 0 : bool libmspub::MSPUBParser::parseShape(WPXInputStream *input,
646 : const ContentChunkReference &chunk)
647 : {
648 : MSPUB_DEBUG_MSG(("parseShape: seqNum 0x%x\n", chunk.seqNum));
649 0 : unsigned long pos = input->tell();
650 0 : unsigned length = readU32(input);
651 0 : unsigned width = 0;
652 0 : unsigned height = 0;
653 0 : bool isTable = chunk.type == TABLE;
654 0 : bool isGroup = chunk.type == GROUP || chunk.type == LOGO;
655 0 : bool isText = false;
656 0 : bool shouldStretchBorderArt = true;
657 0 : unsigned textId = 0;
658 0 : if (isTable)
659 : {
660 0 : boost::optional<unsigned> cellsSeqNum;
661 0 : boost::optional<unsigned> numRows;
662 0 : boost::optional<unsigned> numCols;
663 0 : boost::optional<unsigned> rowcolArrayOffset;
664 0 : while (stillReading(input, pos + length))
665 : {
666 0 : libmspub::MSPUBBlockInfo info = parseBlock(input, true);
667 0 : if (info.id == TABLE_WIDTH)
668 : {
669 0 : width = info.data;
670 : }
671 0 : else if (info.id == TABLE_HEIGHT)
672 : {
673 0 : height = info.data;
674 : }
675 0 : else if (info.id == TABLE_CELLS_SEQNUM)
676 : {
677 0 : cellsSeqNum = info.data;
678 : }
679 0 : else if (info.id == TABLE_NUM_ROWS)
680 : {
681 0 : numRows = info.data;
682 : }
683 0 : else if (info.id == TABLE_NUM_COLS)
684 : {
685 0 : numCols = info.data;
686 : }
687 0 : else if (info.id == TABLE_ROWCOL_ARRAY)
688 : {
689 0 : rowcolArrayOffset = info.dataOffset;
690 : }
691 0 : }
692 0 : if (cellsSeqNum.is_initialized() && numRows.is_initialized() &&
693 0 : numCols.is_initialized() && rowcolArrayOffset.is_initialized())
694 : {
695 0 : unsigned nr = numRows.get();
696 0 : unsigned nc = numCols.get();
697 0 : unsigned rcao = rowcolArrayOffset.get();
698 0 : unsigned csn = cellsSeqNum.get();
699 0 : std::vector<unsigned> rowOffsetsInEmu;
700 0 : std::vector<unsigned> columnOffsetsInEmu;
701 0 : unsigned rowFirstOffset = 0;
702 0 : unsigned columnFirstOffset = 0;
703 0 : input->seek(rcao, WPX_SEEK_SET);
704 0 : unsigned arrayLength = readU32(input);
705 0 : while(stillReading(input, rcao + arrayLength))
706 : {
707 0 : MSPUBBlockInfo info = parseBlock(input, true);
708 0 : if (info.id == 0)
709 : {
710 0 : input->seek(info.dataOffset + 4, WPX_SEEK_SET);
711 0 : while (stillReading(input, info.dataOffset + info.dataLength))
712 : {
713 0 : MSPUBBlockInfo subInfo = parseBlock(input, true);
714 0 : if (subInfo.id == TABLE_ROWCOL_OFFSET)
715 : {
716 0 : unsigned rowcolOffset = readU32(input);
717 0 : if (columnOffsetsInEmu.size() < nc)
718 : {
719 0 : if (columnOffsetsInEmu.empty())
720 : {
721 0 : columnFirstOffset = rowcolOffset;
722 : }
723 0 : columnOffsetsInEmu.push_back(rowcolOffset - columnFirstOffset);
724 : }
725 0 : else if (rowOffsetsInEmu.size() < nr)
726 : {
727 0 : if (rowOffsetsInEmu.empty())
728 : {
729 0 : rowFirstOffset = rowcolOffset;
730 : }
731 0 : rowOffsetsInEmu.push_back(rowcolOffset - rowFirstOffset);
732 : }
733 : }
734 0 : }
735 : }
736 0 : }
737 0 : if (rowOffsetsInEmu.size() != nr || columnOffsetsInEmu.size() != nc)
738 : {
739 : MSPUB_DEBUG_MSG(("ERROR: Wrong number of rows or columns found in table definition.\n"));
740 0 : return false;
741 : }
742 0 : boost::optional<unsigned> index;
743 0 : for (unsigned i = 0; i < m_cellsChunkIndices.size(); ++i)
744 : {
745 0 : if (m_contentChunks[m_cellsChunkIndices[i]].seqNum == csn)
746 : {
747 0 : index = i;
748 0 : break;
749 : }
750 : }
751 0 : if (!index.is_initialized())
752 : {
753 : MSPUB_DEBUG_MSG(("WARNING: Couldn't find cells of seqnum %d corresponding to table of seqnum %d.\n",
754 : csn, chunk.seqNum));
755 0 : return false;
756 : }
757 : else
758 : {
759 : // Currently do nothing with the cells chunk.
760 : }
761 0 : TableInfo ti(nr, nc);
762 0 : ti.m_rowOffsetsInEmu = rowOffsetsInEmu;
763 0 : ti.m_columnOffsetsInEmu = columnOffsetsInEmu;
764 0 : m_collector->setShapeTableInfo(chunk.seqNum, ti);
765 0 : return true;
766 : }
767 0 : return false;
768 : }
769 : else
770 : {
771 0 : while (stillReading(input, pos + length))
772 : {
773 0 : libmspub::MSPUBBlockInfo info = parseBlock(input, true);
774 0 : if (info.id == SHAPE_WIDTH)
775 : {
776 0 : width = info.data;
777 : }
778 0 : else if (info.id == SHAPE_HEIGHT)
779 : {
780 0 : height = info.data;
781 : }
782 0 : else if (info.id == SHAPE_BORDER_IMAGE_ID)
783 : {
784 0 : m_collector->setShapeBorderImageId(chunk.seqNum, info.data);
785 : }
786 0 : else if (info.id == SHAPE_DONT_STRETCH_BA)
787 : {
788 0 : shouldStretchBorderArt = false;
789 : }
790 0 : else if (info.id == SHAPE_TEXT_ID)
791 : {
792 0 : textId = info.data;
793 0 : isText = true;
794 : }
795 0 : else if (info.id == SHAPE_VALIGN)
796 : {
797 : m_collector->setShapeVerticalTextAlign(chunk.seqNum,
798 0 : static_cast<VerticalAlign>(info.data));
799 : }
800 0 : }
801 0 : if (shouldStretchBorderArt)
802 : {
803 0 : m_collector->setShapeStretchBorderArt(chunk.seqNum);
804 : }
805 0 : bool parseWithoutDimensions = true; //FIXME: Should we ever ignore if height and width not given?
806 0 : if (isGroup || (height > 0 && width > 0) || parseWithoutDimensions)
807 : {
808 0 : if (! isGroup)
809 : {
810 0 : if (isText)
811 : {
812 0 : m_collector->addTextShape(textId, chunk.seqNum);
813 : }
814 : }
815 : }
816 : else
817 : {
818 : MSPUB_DEBUG_MSG(("Height and width not both specified, ignoring. (Height: 0x%x, Width: 0x%x)\n", height, width));
819 : }
820 0 : return true;
821 : }
822 : }
823 :
824 0 : libmspub::QuillChunkReference libmspub::MSPUBParser::parseQuillChunkReference(WPXInputStream *input)
825 : {
826 0 : libmspub::QuillChunkReference ret;
827 0 : readU16(input); //FIXME: Can we do something sensible if this is not 0x18 ?
828 : char name[5];
829 0 : for (int i = 0; i < 4; ++i)
830 : {
831 0 : name[i] = (char)readU8(input);
832 : }
833 0 : name[4] = '\0';
834 0 : ret.name = name;
835 0 : ret.id = readU16(input);
836 0 : input->seek(input->tell() + 4, WPX_SEEK_SET); //Seek past what is normally 0x01000000. We don't know what this represents.
837 : char name2[5];
838 0 : for (int i = 0; i < 4; ++i)
839 : {
840 0 : name2[i] = (char)readU8(input);
841 : }
842 0 : name2[4] = '\0';
843 0 : ret.name2 = name2;
844 0 : ret.offset = readU32(input);
845 0 : ret.length = readU32(input);
846 0 : return ret;
847 : }
848 :
849 0 : std::vector<unsigned> libmspub::MSPUBParser::parseTableCellDefinitions(
850 : WPXInputStream *input, const QuillChunkReference &chunk)
851 : {
852 0 : std::vector<unsigned> ret;
853 0 : unsigned numElements = readU32(input) + 1;
854 0 : input->seek(chunk.offset + 0xC, WPX_SEEK_SET);
855 0 : for (unsigned i = 0; i < numElements; ++i)
856 : {
857 0 : ret.push_back(readU32(input));
858 : // compensate for all but the last offset not including the terminating 0x0D00
859 0 : if (i != numElements - 1)
860 : {
861 0 : ret.back() += 2;
862 : }
863 : }
864 0 : return ret;
865 : }
866 :
867 0 : bool libmspub::MSPUBParser::parseQuill(WPXInputStream *input)
868 : {
869 : MSPUB_DEBUG_MSG(("MSPUBParser::parseQuill\n"));
870 0 : unsigned chunkReferenceListOffset = 0x18;
871 0 : std::list<QuillChunkReference> chunkReferences;
872 0 : while (chunkReferenceListOffset != 0xffffffff)
873 : {
874 0 : input->seek(chunkReferenceListOffset + 2, WPX_SEEK_SET);
875 0 : unsigned short numChunks = readU16(input);
876 0 : chunkReferenceListOffset = readU32(input);
877 0 : for (unsigned i = 0; i < numChunks; ++i)
878 : {
879 0 : libmspub::QuillChunkReference quillChunkReference = parseQuillChunkReference(input);
880 0 : chunkReferences.push_back(quillChunkReference);
881 0 : }
882 : }
883 : MSPUB_DEBUG_MSG(("Found %u Quill chunks\n", (unsigned)chunkReferences.size()));
884 : //Make sure we parse the STRS chunk before the TEXT chunk
885 0 : std::list<QuillChunkReference>::const_iterator textChunkReference = chunkReferences.end();
886 0 : bool parsedStrs = false;
887 0 : bool parsedSyid = false;
888 0 : bool parsedFdpc = false;
889 0 : bool parsedFdpp = false;
890 0 : bool parsedStsh = false;
891 0 : bool parsedFont = false;
892 0 : std::vector<unsigned> textLengths;
893 0 : std::vector<unsigned> textIDs;
894 0 : std::vector<unsigned> textOffsets;
895 0 : unsigned textOffsetAccum = 0;
896 0 : std::vector<TextSpanReference> spans;
897 0 : std::vector<TextParagraphReference> paras;
898 0 : unsigned whichStsh = 0;
899 0 : for (std::list<QuillChunkReference>::const_iterator i = chunkReferences.begin(); i != chunkReferences.end(); ++i)
900 : {
901 0 : if (i->name == "TEXT")
902 : {
903 0 : textChunkReference = i;
904 : }
905 0 : else if (i->name == "STRS")
906 : {
907 0 : input->seek(i->offset, WPX_SEEK_SET);
908 0 : unsigned numLengths = readU32(input); //Assuming the first DWORD is the number of children and that the next is the remaining length before children start. We are unsure that this is correct.
909 0 : input->seek(4 + i->offset + readU32(input), WPX_SEEK_SET);
910 0 : for (unsigned j = 0; j < numLengths; ++j)
911 : {
912 0 : unsigned length = readU32(input);
913 0 : textLengths.push_back(length);
914 0 : textOffsets.push_back(textOffsetAccum);
915 0 : textOffsetAccum += length * 2;
916 : }
917 0 : parsedStrs = true;
918 : }
919 0 : else if (i->name == "SYID")
920 : {
921 0 : input->seek(i->offset, WPX_SEEK_SET);
922 0 : readU32(input); // Don't know what the first DWORD means.
923 0 : unsigned numIDs = readU32(input);
924 0 : for (unsigned j = 0; j < numIDs; ++j)
925 : {
926 0 : textIDs.push_back(readU32(input));
927 : }
928 0 : parsedSyid = true;
929 : }
930 0 : else if (i->name == "PL ")
931 : {
932 0 : input->seek(i->offset, WPX_SEEK_SET);
933 0 : parseColors(input, *i);
934 : }
935 0 : else if (i->name == "FDPC")
936 : {
937 0 : input->seek(i->offset, WPX_SEEK_SET);
938 0 : std::vector<TextSpanReference> thisBlockSpans = parseCharacterStyles(input, *i);
939 0 : spans.insert(spans.end(), thisBlockSpans.begin(), thisBlockSpans.end());
940 0 : parsedFdpc = true;
941 : }
942 0 : else if (i->name == "FDPP")
943 : {
944 0 : input->seek(i->offset, WPX_SEEK_SET);
945 0 : std::vector<TextParagraphReference> thisBlockParas = parseParagraphStyles(input, *i);
946 0 : paras.insert(paras.end(), thisBlockParas.begin(), thisBlockParas.end());
947 0 : parsedFdpp = true;
948 : }
949 0 : else if (i->name == "STSH")
950 : {
951 0 : if (whichStsh++ == 1)
952 : {
953 0 : input->seek(i->offset, WPX_SEEK_SET);
954 0 : parseDefaultStyle(input, *i);
955 0 : parsedStsh = true;
956 : }
957 : }
958 0 : else if (i->name == "FONT")
959 : {
960 0 : input->seek(i->offset, WPX_SEEK_SET);
961 0 : parseFonts(input, *i);
962 0 : parsedFont = true;
963 : }
964 0 : else if (i->name == "TCD ")
965 : {
966 0 : input->seek(i->offset, WPX_SEEK_SET);
967 0 : std::vector<unsigned> ends = parseTableCellDefinitions(input, *i);
968 0 : m_collector->setNextTableCellTextEnds(ends);
969 : }
970 : }
971 0 : if (parsedStrs && parsedSyid && parsedFdpc && parsedFdpp && parsedStsh && parsedFont && textChunkReference != chunkReferences.end())
972 : {
973 0 : input->seek(textChunkReference->offset, WPX_SEEK_SET);
974 0 : unsigned bytesRead = 0;
975 0 : std::vector<TextSpanReference>::iterator currentTextSpan = spans.begin();
976 0 : std::vector<TextParagraphReference>::iterator currentTextPara = paras.begin();
977 0 : for (unsigned j = 0; j < textIDs.size() && j < textLengths.size(); ++j)
978 : {
979 : MSPUB_DEBUG_MSG(("Parsing a text block.\n"));
980 0 : std::vector<TextParagraph> readParas;
981 0 : std::vector<TextSpan> readSpans;
982 0 : std::vector<unsigned char> text;
983 0 : for (unsigned k = 0; k < textLengths[j]; ++k)
984 : {
985 0 : text.push_back(readU8(input));
986 0 : text.push_back(readU8(input));
987 0 : bytesRead += 2;
988 0 : if (bytesRead >= currentTextSpan->last - textChunkReference->offset)
989 : {
990 0 : if (!text.empty())
991 : {
992 0 : readSpans.push_back(TextSpan(text, currentTextSpan->charStyle));
993 : MSPUB_DEBUG_MSG(("Saw text span %d in the current text paragraph.\n", (unsigned)readSpans.size()));
994 : }
995 0 : ++currentTextSpan;
996 0 : text.clear();
997 : }
998 0 : if (bytesRead >= currentTextPara->last - textChunkReference->offset)
999 : {
1000 0 : if (!text.empty())
1001 : {
1002 0 : readSpans.push_back(TextSpan(text, currentTextSpan->charStyle));
1003 : MSPUB_DEBUG_MSG(("Saw text span %d in the current text paragraph.\n", (unsigned)readSpans.size()));
1004 : }
1005 0 : text.clear();
1006 0 : if (!readSpans.empty())
1007 : {
1008 0 : readParas.push_back(TextParagraph(readSpans, currentTextPara->paraStyle));
1009 : MSPUB_DEBUG_MSG(("Saw paragraph %d in the current text block.\n", (unsigned)readParas.size()));
1010 : }
1011 0 : ++currentTextPara;
1012 0 : readSpans.clear();
1013 : }
1014 : }
1015 0 : if (!readSpans.empty())
1016 : {
1017 0 : if (!text.empty())
1018 : {
1019 0 : readSpans.push_back(TextSpan(text, currentTextSpan->charStyle));
1020 : MSPUB_DEBUG_MSG(("Saw text span %d in the current text paragraph.\n", (unsigned)readSpans.size()));
1021 : }
1022 0 : text.clear();
1023 0 : readParas.push_back(TextParagraph(readSpans, currentTextPara->paraStyle));
1024 : MSPUB_DEBUG_MSG(("Saw paragraph %d in the current text block.\n", (unsigned)readParas.size()));
1025 : }
1026 0 : m_collector->addTextString(readParas, textIDs[j]);
1027 0 : m_collector->setTextStringOffset(textIDs[j], textOffsets[j]);
1028 0 : }
1029 0 : textChunkReference = chunkReferences.end();
1030 : }
1031 0 : return true;
1032 : }
1033 :
1034 0 : void libmspub::MSPUBParser::parseFonts(WPXInputStream *input, const QuillChunkReference &)
1035 : {
1036 0 : readU32(input);
1037 0 : unsigned numElements = readU32(input);
1038 0 : input->seek(input->tell() + 12 + 4 * numElements, WPX_SEEK_SET);
1039 0 : for (unsigned i = 0; i < numElements; ++i)
1040 : {
1041 0 : unsigned short nameLength = readU16(input);
1042 0 : std::vector<unsigned char> name;
1043 0 : readNBytes(input, nameLength * 2, name);
1044 0 : m_collector->addFont(name);
1045 0 : readU32(input);
1046 0 : }
1047 0 : }
1048 :
1049 0 : void libmspub::MSPUBParser::parseDefaultStyle(WPXInputStream *input, const QuillChunkReference &chunk)
1050 : {
1051 0 : readU32(input);
1052 0 : unsigned numElements = readU32(input);
1053 0 : input->seek(input->tell() + 12, WPX_SEEK_SET);
1054 0 : std::vector<unsigned> offsets;
1055 0 : offsets.reserve(numElements);
1056 0 : for (unsigned i = 0; i < numElements; ++i)
1057 : {
1058 0 : offsets.push_back(readU32(input));
1059 : }
1060 0 : for (unsigned i = 0; i < numElements; ++i)
1061 : {
1062 0 : input->seek(chunk.offset + 20 + offsets[i], WPX_SEEK_SET);
1063 0 : readU16(input);
1064 0 : if (i % 2 == 0)
1065 : {
1066 : //FIXME: Does STSH2 hold information for associating style indices in FDPP to indices in STSH1 ?
1067 0 : m_collector->addDefaultCharacterStyle(getCharacterStyle(input));
1068 : }
1069 : else
1070 : {
1071 0 : m_collector->addDefaultParagraphStyle(getParagraphStyle(input));
1072 : }
1073 0 : }
1074 0 : }
1075 :
1076 :
1077 0 : void libmspub::MSPUBParser::parseColors(WPXInputStream *input, const QuillChunkReference &)
1078 : {
1079 0 : unsigned numEntries = readU32(input);
1080 0 : input->seek(input->tell() + 8, WPX_SEEK_SET);
1081 0 : for (unsigned i = 0; i < numEntries; ++i)
1082 : {
1083 0 : unsigned blocksOffset = input->tell();
1084 0 : unsigned len = readU32(input);
1085 0 : while (stillReading(input, blocksOffset + len))
1086 : {
1087 0 : MSPUBBlockInfo info = parseBlock(input, true);
1088 0 : if (info.id == 0x01)
1089 : {
1090 0 : m_collector->addTextColor(ColorReference(info.data));
1091 : }
1092 0 : }
1093 : }
1094 0 : }
1095 :
1096 0 : std::vector<libmspub::MSPUBParser::TextParagraphReference> libmspub::MSPUBParser::parseParagraphStyles(WPXInputStream *input, const QuillChunkReference &chunk)
1097 : {
1098 0 : std::vector<TextParagraphReference> ret;
1099 0 : unsigned short numEntries = readU16(input);
1100 0 : input->seek(input->tell() + 6, WPX_SEEK_SET);
1101 0 : std::vector<unsigned> textOffsets;
1102 0 : textOffsets.reserve(numEntries);
1103 0 : std::vector<unsigned short> chunkOffsets;
1104 0 : textOffsets.reserve(numEntries);
1105 0 : for (unsigned short i = 0; i < numEntries; ++i)
1106 : {
1107 0 : textOffsets.push_back(readU32(input));
1108 : }
1109 0 : for (unsigned short i = 0; i < numEntries; ++i)
1110 : {
1111 0 : chunkOffsets.push_back(readU16(input));
1112 : }
1113 0 : unsigned currentSpanBegin = 0;
1114 0 : for (unsigned short i = 0; i < numEntries; ++i)
1115 : {
1116 0 : input->seek(chunk.offset + chunkOffsets[i], WPX_SEEK_SET);
1117 0 : ParagraphStyle style = getParagraphStyle(input);
1118 0 : ret.push_back(TextParagraphReference(currentSpanBegin, textOffsets[i], style));
1119 0 : currentSpanBegin = textOffsets[i] + 1;
1120 0 : }
1121 0 : return ret;
1122 : }
1123 :
1124 0 : std::vector<libmspub::MSPUBParser::TextSpanReference> libmspub::MSPUBParser::parseCharacterStyles(WPXInputStream *input, const QuillChunkReference &chunk)
1125 : {
1126 0 : unsigned short numEntries = readU16(input);
1127 0 : input->seek(input->tell() + 6, WPX_SEEK_SET);
1128 0 : std::vector<unsigned> textOffsets;
1129 0 : textOffsets.reserve(numEntries);
1130 0 : std::vector<unsigned short> chunkOffsets;
1131 0 : chunkOffsets.reserve(numEntries);
1132 0 : std::vector<TextSpanReference> ret;
1133 0 : for (unsigned short i = 0; i < numEntries; ++i)
1134 : {
1135 0 : textOffsets.push_back(readU32(input));
1136 : }
1137 0 : for (unsigned short i = 0; i < numEntries; ++i)
1138 : {
1139 0 : chunkOffsets.push_back(readU16(input));
1140 : }
1141 0 : unsigned currentSpanBegin = 0;
1142 0 : for (unsigned short i = 0; i < numEntries; ++i)
1143 : {
1144 0 : input->seek(chunk.offset + chunkOffsets[i], WPX_SEEK_SET);
1145 0 : CharacterStyle style = getCharacterStyle(input);
1146 0 : currentSpanBegin = textOffsets[i] + 1;
1147 0 : ret.push_back(TextSpanReference(currentSpanBegin, textOffsets[i], style));
1148 0 : }
1149 0 : return ret;
1150 : }
1151 0 : libmspub::ParagraphStyle libmspub::MSPUBParser::getParagraphStyle(WPXInputStream *input)
1152 : {
1153 0 : ParagraphStyle ret;
1154 :
1155 0 : bool isList = false;
1156 0 : uint32_t bulletChar = 0;
1157 0 : NumberingType numberingType = STANDARD_WESTERN;
1158 0 : NumberingDelimiter numberingDelimiter = NO_DELIMITER;
1159 0 : boost::optional<unsigned> numberIfRestarted;
1160 :
1161 0 : unsigned offset = input->tell();
1162 0 : unsigned len = readU32(input);
1163 0 : while (stillReading(input, offset + len))
1164 : {
1165 0 : MSPUBBlockInfo info = parseBlock(input, true);
1166 0 : switch(info.id)
1167 : {
1168 : case PARAGRAPH_ALIGNMENT:
1169 0 : ret.m_align = (Alignment)(info.data & 0xFF); // Is this correct?
1170 0 : break;
1171 : case PARAGRAPH_DEFAULT_CHAR_STYLE:
1172 0 : ret.m_defaultCharStyleIndex = info.data;
1173 0 : break;
1174 : case PARAGRAPH_LINE_SPACING:
1175 0 : if (info.data & 1)
1176 : {
1177 : // line spacing expressed in points in the UI,
1178 : // in eighths of an emu in the file format.
1179 : // (WTF??)
1180 : ret.m_lineSpacing = LineSpacingInfo(LINE_SPACING_PT,
1181 0 : static_cast<double>(info.data - 1) / 8 * 72 / EMUS_IN_INCH);
1182 : }
1183 0 : else if (info.data & 2)
1184 : {
1185 : // line spacing expressed in SP in the UI,
1186 : // in what would be EMUs if font size were 96pt in the file format
1187 : // (WTF??)
1188 : ret.m_lineSpacing = LineSpacingInfo(LINE_SPACING_SP,
1189 0 : static_cast<double>(info.data - 2) / EMUS_IN_INCH * 72 / 96);
1190 : }
1191 0 : break;
1192 : case PARAGRAPH_SPACE_BEFORE:
1193 0 : ret.m_spaceBeforeEmu = info.data;
1194 0 : break;
1195 : case PARAGRAPH_SPACE_AFTER:
1196 0 : ret.m_spaceAfterEmu = info.data;
1197 0 : break;
1198 : case PARAGRAPH_FIRST_LINE_INDENT:
1199 0 : ret.m_firstLineIndentEmu = (int)info.data;
1200 0 : break;
1201 : case PARAGRAPH_LEFT_INDENT:
1202 0 : ret.m_leftIndentEmu = info.data;
1203 0 : break;
1204 : case PARAGRAPH_RIGHT_INDENT:
1205 0 : ret.m_rightIndentEmu = info.data;
1206 0 : break;
1207 : case PARAGRAPH_TABS:
1208 0 : input->seek(info.dataOffset + 4, WPX_SEEK_SET);
1209 0 : while(stillReading(input, info.dataOffset + info.dataLength))
1210 : {
1211 0 : MSPUBBlockInfo tabArrayInfo = parseBlock(input, true);
1212 0 : if (tabArrayInfo.id == TAB_ARRAY)
1213 : {
1214 0 : input->seek(tabArrayInfo.dataOffset + 4, WPX_SEEK_SET);
1215 0 : while (stillReading(input, tabArrayInfo.dataOffset + tabArrayInfo.dataLength))
1216 : {
1217 0 : MSPUBBlockInfo tabEntryInfo = parseBlock(input, true);
1218 0 : if (tabEntryInfo.type == GENERAL_CONTAINER)
1219 : {
1220 0 : input->seek(tabEntryInfo.dataOffset + 4, WPX_SEEK_SET);
1221 0 : MSPUBBlockInfo tabInfo = parseBlock(input, true);
1222 0 : if (tabInfo.id == TAB_AMOUNT)
1223 : {
1224 0 : ret.m_tabStopsInEmu.push_back(tabInfo.data);
1225 0 : }
1226 : }
1227 0 : }
1228 : }
1229 0 : }
1230 0 : break;
1231 : case PARAGRAPH_LIST_INFO:
1232 : {
1233 0 : isList = true;
1234 0 : input->seek(info.dataOffset + 4, WPX_SEEK_SET);
1235 0 : while (stillReading(input, info.dataOffset + info.dataLength))
1236 : {
1237 0 : MSPUBBlockInfo listSubInfo = parseBlock(input, true);
1238 0 : switch (listSubInfo.id)
1239 : {
1240 : case PARAGRAPH_LIST_NUMBERING_TYPE:
1241 0 : numberingType = static_cast<NumberingType>(info.data);
1242 0 : break;
1243 : case PARAGRAPH_LIST_BULLET_CHAR:
1244 0 : bulletChar = info.data;
1245 0 : break;
1246 : default:
1247 0 : break;
1248 : }
1249 0 : }
1250 0 : break;
1251 : }
1252 : case PARAGRAPH_LIST_NUMBER_RESTART:
1253 0 : numberIfRestarted = info.data;
1254 0 : break;
1255 : default:
1256 0 : break;
1257 : }
1258 0 : }
1259 0 : if (isList)
1260 : {
1261 0 : if (bulletChar)
1262 : {
1263 0 : ret.m_listInfo = ListInfo(bulletChar);
1264 : }
1265 : else
1266 : {
1267 : ret.m_listInfo = ListInfo(numberIfRestarted, numberingType,
1268 0 : numberingDelimiter);
1269 : }
1270 : }
1271 :
1272 0 : return ret;
1273 : }
1274 :
1275 0 : libmspub::CharacterStyle libmspub::MSPUBParser::getCharacterStyle(WPXInputStream *input)
1276 : {
1277 0 : bool seenUnderline = false, seenBold1 = false, seenBold2 = false, seenItalic1 = false, seenItalic2 = false;
1278 0 : int textSize1 = -1, /* textSize2 = -1,*/ colorIndex = -1;
1279 0 : boost::optional<unsigned> fontIndex;
1280 0 : SuperSubType sst = NO_SUPER_SUB;
1281 0 : unsigned offset = input->tell();
1282 0 : unsigned len = readU32(input);
1283 0 : while (stillReading(input, offset + len))
1284 : {
1285 0 : libmspub::MSPUBBlockInfo info = parseBlock(input, true);
1286 0 : switch (info.id)
1287 : {
1288 : case BOLD_1_ID:
1289 0 : seenBold1 = true;
1290 0 : break;
1291 : case BOLD_2_ID:
1292 0 : seenBold2 = true;
1293 0 : break;
1294 : case ITALIC_1_ID:
1295 0 : seenItalic1 = true;
1296 0 : break;
1297 : case ITALIC_2_ID:
1298 0 : seenItalic2 = true;
1299 0 : break;
1300 : case UNDERLINE_ID:
1301 0 : seenUnderline = true;
1302 0 : break;
1303 : case TEXT_SIZE_1_ID:
1304 0 : textSize1 = info.data;
1305 0 : break;
1306 : case TEXT_SIZE_2_ID:
1307 : // textSize2 = info.data;
1308 0 : break;
1309 : case BARE_COLOR_INDEX_ID:
1310 0 : colorIndex = info.data;
1311 0 : break;
1312 : case COLOR_INDEX_CONTAINER_ID:
1313 0 : colorIndex = getColorIndex(input, info);
1314 0 : break;
1315 : case FONT_INDEX_CONTAINER_ID:
1316 0 : fontIndex = getFontIndex(input, info);
1317 0 : break;
1318 : case SUPER_SUB_TYPE_ID:
1319 0 : sst = static_cast<SuperSubType>(info.data);
1320 0 : break;
1321 : default:
1322 0 : break;
1323 : }
1324 0 : }
1325 : //FIXME: Figure out what textSize2 is used for. Can we find a document where it differs from textSize1 ?
1326 : // textSize2 = textSize1;
1327 0 : boost::optional<double> dTextSize;
1328 0 : if (textSize1 != -1)
1329 : {
1330 0 : dTextSize = (double)(textSize1 * POINTS_IN_INCH) / EMUS_IN_INCH;
1331 : }
1332 0 : return CharacterStyle(seenUnderline, seenItalic1 && seenItalic2, seenBold1 && seenBold2, dTextSize, getColorIndexByQuillEntry(colorIndex), fontIndex, sst);
1333 : }
1334 :
1335 0 : unsigned libmspub::MSPUBParser::getFontIndex(WPXInputStream *input, const MSPUBBlockInfo &info)
1336 : {
1337 : MSPUB_DEBUG_MSG(("In getFontIndex\n"));
1338 0 : input->seek(info.dataOffset + 4, WPX_SEEK_SET);
1339 0 : while (stillReading(input, info.dataOffset + info.dataLength))
1340 : {
1341 0 : MSPUBBlockInfo subInfo = parseBlock(input, true);
1342 0 : if (subInfo.type == GENERAL_CONTAINER)
1343 : {
1344 0 : input->seek(subInfo.dataOffset + 4, WPX_SEEK_SET);
1345 0 : if (stillReading(input, subInfo.dataOffset + subInfo.dataLength))
1346 : {
1347 0 : MSPUBBlockInfo subSubInfo = parseBlock(input, true);
1348 0 : skipBlock(input, info);
1349 0 : return subSubInfo.data;
1350 : }
1351 : }
1352 0 : }
1353 0 : return 0;
1354 : }
1355 :
1356 0 : int libmspub::MSPUBParser::getColorIndex(WPXInputStream *input, const MSPUBBlockInfo &info)
1357 : {
1358 0 : input->seek(info.dataOffset + 4, WPX_SEEK_SET);
1359 0 : while (stillReading(input, info.dataOffset + info.dataLength))
1360 : {
1361 0 : MSPUBBlockInfo subInfo = parseBlock(input, true);
1362 0 : if (subInfo.id == COLOR_INDEX_ID)
1363 : {
1364 0 : skipBlock(input, info);
1365 : MSPUB_DEBUG_MSG(("Found color index 0x%x\n", (unsigned)subInfo.data));
1366 0 : return subInfo.data;
1367 : }
1368 0 : }
1369 : MSPUB_DEBUG_MSG(("Failed to find color index!\n"));
1370 0 : return -1;
1371 : }
1372 :
1373 0 : bool libmspub::MSPUBParser::parseEscher(WPXInputStream *input)
1374 : {
1375 : MSPUB_DEBUG_MSG(("MSPUBParser::parseEscher\n"));
1376 : libmspub::EscherContainerInfo fakeroot;
1377 0 : fakeroot.initial = 0;
1378 0 : fakeroot.type = 0;
1379 0 : fakeroot.contentsOffset = input->tell();
1380 0 : fakeroot.contentsLength = (unsigned long)-1; //FIXME: Get the actual length
1381 : libmspub::EscherContainerInfo dg, dgg;
1382 : //Note: this assumes that dgg comes before any dg with images.
1383 0 : if (findEscherContainer(input, fakeroot, dgg, OFFICE_ART_DGG_CONTAINER))
1384 : {
1385 : libmspub::EscherContainerInfo bsc;
1386 0 : if (findEscherContainer(input, fakeroot, bsc, OFFICE_ART_B_STORE_CONTAINER))
1387 : {
1388 0 : unsigned short currentDelayIndex = 1;
1389 0 : while (stillReading(input, bsc.contentsOffset + bsc.contentsLength))
1390 : {
1391 0 : unsigned begin = input->tell();
1392 0 : input->seek(begin + 10, WPX_SEEK_SET);
1393 0 : if (! (readU32(input) == 0 && readU32(input) == 0 && readU32(input) == 0 && readU32(input) == 0))
1394 : {
1395 0 : m_escherDelayIndices.push_back(currentDelayIndex++);
1396 : }
1397 : else
1398 : {
1399 0 : m_escherDelayIndices.push_back(-1);
1400 : }
1401 0 : input->seek(begin + 44, WPX_SEEK_SET);
1402 : }
1403 : }
1404 0 : input->seek(dgg.contentsOffset + dgg.contentsLength + getEscherElementTailLength(OFFICE_ART_DGG_CONTAINER), WPX_SEEK_SET);
1405 : }
1406 0 : while (findEscherContainer(input, fakeroot, dg, OFFICE_ART_DG_CONTAINER))
1407 : {
1408 : libmspub::EscherContainerInfo spgr;
1409 0 : while (findEscherContainer(input, dg, spgr, OFFICE_ART_SPGR_CONTAINER))
1410 : {
1411 0 : Coordinate c1, c2;
1412 0 : parseShapeGroup(input, spgr, c1, c2);
1413 : }
1414 0 : input->seek(input->tell() + getEscherElementTailLength(OFFICE_ART_DG_CONTAINER), WPX_SEEK_SET);
1415 : }
1416 0 : return true;
1417 : }
1418 :
1419 0 : void libmspub::MSPUBParser::parseShapeGroup(WPXInputStream *input, const EscherContainerInfo &spgr, Coordinate parentCoordinateSystem, Coordinate parentGroupAbsoluteCoord)
1420 : {
1421 : libmspub::EscherContainerInfo shapeOrGroup;
1422 0 : std::set<unsigned short> types;
1423 0 : types.insert(OFFICE_ART_SPGR_CONTAINER);
1424 0 : types.insert(OFFICE_ART_SP_CONTAINER);
1425 0 : while (findEscherContainerWithTypeInSet(input, spgr, shapeOrGroup, types))
1426 : {
1427 0 : switch (shapeOrGroup.type)
1428 : {
1429 : case OFFICE_ART_SPGR_CONTAINER:
1430 0 : m_collector->beginGroup();
1431 0 : parseShapeGroup(input, shapeOrGroup, parentCoordinateSystem, parentGroupAbsoluteCoord);
1432 0 : m_collector->endGroup();
1433 0 : break;
1434 : case OFFICE_ART_SP_CONTAINER:
1435 0 : parseEscherShape(input, shapeOrGroup, parentCoordinateSystem, parentGroupAbsoluteCoord);
1436 0 : break;
1437 : }
1438 0 : input->seek(shapeOrGroup.contentsOffset + shapeOrGroup.contentsLength + getEscherElementTailLength(shapeOrGroup.type), WPX_SEEK_SET);
1439 0 : }
1440 0 : }
1441 :
1442 0 : void libmspub::MSPUBParser::parseEscherShape(WPXInputStream *input, const EscherContainerInfo &sp, Coordinate &parentCoordinateSystem, Coordinate &parentGroupAbsoluteCoord)
1443 : {
1444 0 : Coordinate thisParentCoordinateSystem = parentCoordinateSystem;
1445 0 : bool definesRelativeCoordinates = false;
1446 : libmspub::EscherContainerInfo cData;
1447 : libmspub::EscherContainerInfo cAnchor;
1448 : libmspub::EscherContainerInfo cFopt;
1449 : libmspub::EscherContainerInfo cTertiaryFopt;
1450 : libmspub::EscherContainerInfo cFsp;
1451 : libmspub::EscherContainerInfo cFspgr;
1452 0 : unsigned shapeFlags = 0;
1453 0 : bool isGroupLeader = false;
1454 0 : ShapeType st = RECTANGLE;
1455 0 : if (findEscherContainer(input, sp, cFspgr, OFFICE_ART_FSPGR))
1456 : {
1457 0 : input->seek(cFspgr.contentsOffset, WPX_SEEK_SET);
1458 0 : parentCoordinateSystem.m_xs = readU32(input);
1459 0 : parentCoordinateSystem.m_ys = readU32(input);
1460 0 : parentCoordinateSystem.m_xe = readU32(input);
1461 0 : parentCoordinateSystem.m_ye = readU32(input);
1462 0 : definesRelativeCoordinates = true;
1463 : }
1464 0 : input->seek(sp.contentsOffset, WPX_SEEK_SET);
1465 0 : if (findEscherContainer(input, sp, cFsp, OFFICE_ART_FSP))
1466 : {
1467 0 : st = (ShapeType)(cFsp.initial >> 4);
1468 0 : std::map<unsigned short, unsigned> fspData = extractEscherValues(input, cFsp);
1469 0 : input->seek(cFsp.contentsOffset + 4, WPX_SEEK_SET);
1470 0 : shapeFlags = readU32(input);
1471 0 : isGroupLeader = shapeFlags & SF_GROUP;
1472 : }
1473 0 : input->seek(sp.contentsOffset, WPX_SEEK_SET);
1474 0 : if (findEscherContainer(input, sp, cData, OFFICE_ART_CLIENT_DATA))
1475 : {
1476 0 : std::map<unsigned short, unsigned> dataValues = extractEscherValues(input, cData);
1477 0 : unsigned *shapeSeqNum = getIfExists(dataValues, FIELDID_SHAPE_ID);
1478 0 : if (shapeSeqNum)
1479 : {
1480 0 : m_collector->setShapeType(*shapeSeqNum, st);
1481 0 : m_collector->setShapeFlip(*shapeSeqNum, shapeFlags & SF_FLIP_V, shapeFlags & SF_FLIP_H);
1482 0 : input->seek(sp.contentsOffset, WPX_SEEK_SET);
1483 0 : if (isGroupLeader)
1484 : {
1485 0 : m_collector->setCurrentGroupSeqNum(*shapeSeqNum);
1486 : }
1487 : else
1488 : {
1489 0 : m_collector->setShapeOrder(*shapeSeqNum);
1490 : }
1491 0 : std::set<unsigned short> anchorTypes;
1492 0 : anchorTypes.insert(OFFICE_ART_CLIENT_ANCHOR);
1493 0 : anchorTypes.insert(OFFICE_ART_CHILD_ANCHOR);
1494 : bool foundAnchor;
1495 0 : bool rotated90 = false;
1496 0 : if ((foundAnchor = findEscherContainerWithTypeInSet(input, sp, cAnchor, anchorTypes)) || isGroupLeader)
1497 : {
1498 : MSPUB_DEBUG_MSG(("Found Escher data for %s of seqnum 0x%x\n", isGroupLeader ? "group" : "shape", *shapeSeqNum));
1499 0 : boost::optional<std::map<unsigned short, unsigned> > maybe_tertiaryFoptValues;
1500 0 : input->seek(sp.contentsOffset, WPX_SEEK_SET);
1501 0 : if (findEscherContainer(input, sp, cTertiaryFopt, OFFICE_ART_TERTIARY_FOPT))
1502 : {
1503 0 : maybe_tertiaryFoptValues = extractEscherValues(input, cTertiaryFopt);
1504 : }
1505 0 : if (maybe_tertiaryFoptValues.is_initialized())
1506 : {
1507 : const std::map<unsigned short, unsigned> &tertiaryFoptValues =
1508 0 : maybe_tertiaryFoptValues.get();
1509 : const unsigned *ptr_pictureRecolor = getIfExists_const(tertiaryFoptValues,
1510 0 : FIELDID_PICTURE_RECOLOR);
1511 0 : if (ptr_pictureRecolor)
1512 : {
1513 : m_collector->setShapePictureRecolor(*shapeSeqNum,
1514 0 : ColorReference(*ptr_pictureRecolor));
1515 : }
1516 : }
1517 0 : input->seek(sp.contentsOffset, WPX_SEEK_SET);
1518 0 : if (findEscherContainer(input, sp, cFopt, OFFICE_ART_FOPT))
1519 : {
1520 0 : FOPTValues foptValues = extractFOPTValues(input, cFopt);
1521 0 : unsigned *pxId = getIfExists(foptValues.m_scalarValues, FIELDID_PXID);
1522 0 : if (pxId)
1523 : {
1524 : MSPUB_DEBUG_MSG(("Current Escher shape has pxId %d\n", *pxId));
1525 0 : if (*pxId <= m_escherDelayIndices.size() && m_escherDelayIndices[*pxId - 1] >= 0)
1526 : {
1527 0 : m_collector->setShapeImgIndex(*shapeSeqNum, m_escherDelayIndices[*pxId - 1]);
1528 : }
1529 : else
1530 : {
1531 : MSPUB_DEBUG_MSG(("Couldn't find corresponding escherDelay index\n"));
1532 : }
1533 : }
1534 : unsigned *ptr_lineBackColor =
1535 0 : getIfExists(foptValues.m_scalarValues, FIELDID_LINE_BACK_COLOR);
1536 0 : if (ptr_lineBackColor &&
1537 : static_cast<int>(*ptr_lineBackColor) != -1)
1538 : {
1539 : m_collector->setShapeLineBackColor(
1540 0 : *shapeSeqNum, ColorReference(*ptr_lineBackColor));
1541 : }
1542 0 : unsigned *ptr_lineColor = getIfExists(foptValues.m_scalarValues, FIELDID_LINE_COLOR);
1543 0 : unsigned *ptr_lineFlags = getIfExists(foptValues.m_scalarValues, FIELDID_LINE_STYLE_BOOL_PROPS);
1544 : unsigned *ptr_geomFlags = getIfExists(
1545 0 : foptValues.m_scalarValues, FIELDID_GEOM_BOOL_PROPS);
1546 : bool useLine = lineExistsByFlagPointer(
1547 0 : ptr_lineFlags, ptr_geomFlags);
1548 0 : bool skipIfNotBg = false;
1549 0 : boost::shared_ptr<Fill> ptr_fill = getNewFill(foptValues.m_scalarValues, skipIfNotBg);
1550 0 : unsigned lineWidth = 0;
1551 0 : if (useLine)
1552 : {
1553 0 : if (ptr_lineColor)
1554 : {
1555 0 : unsigned *ptr_lineWidth = getIfExists(foptValues.m_scalarValues, FIELDID_LINE_WIDTH);
1556 0 : lineWidth = ptr_lineWidth ? *ptr_lineWidth : 9525;
1557 0 : m_collector->addShapeLine(*shapeSeqNum, Line(ColorReference(*ptr_lineColor), lineWidth, true));
1558 : }
1559 : else
1560 : {
1561 0 : if (maybe_tertiaryFoptValues.is_initialized())
1562 : {
1563 : std::map<unsigned short, unsigned> &tertiaryFoptValues =
1564 0 : maybe_tertiaryFoptValues.get();
1565 0 : unsigned *ptr_tertiaryLineFlags = getIfExists(tertiaryFoptValues, FIELDID_LINE_STYLE_BOOL_PROPS);
1566 0 : if (lineExistsByFlagPointer(ptr_tertiaryLineFlags))
1567 : {
1568 0 : unsigned *ptr_topColor = getIfExists(tertiaryFoptValues, FIELDID_LINE_TOP_COLOR);
1569 0 : unsigned *ptr_topWidth = getIfExists(tertiaryFoptValues, FIELDID_LINE_TOP_WIDTH);
1570 0 : unsigned *ptr_topFlags = getIfExists(tertiaryFoptValues, FIELDID_LINE_TOP_BOOL_PROPS);
1571 0 : unsigned *ptr_rightColor = getIfExists(tertiaryFoptValues, FIELDID_LINE_RIGHT_COLOR);
1572 0 : unsigned *ptr_rightWidth = getIfExists(tertiaryFoptValues, FIELDID_LINE_RIGHT_WIDTH);
1573 0 : unsigned *ptr_rightFlags = getIfExists(tertiaryFoptValues, FIELDID_LINE_RIGHT_BOOL_PROPS);
1574 0 : unsigned *ptr_bottomColor = getIfExists(tertiaryFoptValues, FIELDID_LINE_BOTTOM_COLOR);
1575 0 : unsigned *ptr_bottomWidth = getIfExists(tertiaryFoptValues, FIELDID_LINE_BOTTOM_WIDTH);
1576 0 : unsigned *ptr_bottomFlags = getIfExists(tertiaryFoptValues, FIELDID_LINE_BOTTOM_BOOL_PROPS);
1577 0 : unsigned *ptr_leftColor = getIfExists(tertiaryFoptValues, FIELDID_LINE_LEFT_COLOR);
1578 0 : unsigned *ptr_leftWidth = getIfExists(tertiaryFoptValues, FIELDID_LINE_LEFT_WIDTH);
1579 0 : unsigned *ptr_leftFlags = getIfExists(tertiaryFoptValues, FIELDID_LINE_LEFT_BOOL_PROPS);
1580 :
1581 0 : bool topExists = ptr_topColor && lineExistsByFlagPointer(ptr_topFlags);
1582 0 : bool rightExists = ptr_rightColor && lineExistsByFlagPointer(ptr_rightFlags);
1583 0 : bool bottomExists = ptr_bottomColor && lineExistsByFlagPointer(ptr_bottomFlags);
1584 0 : bool leftExists = ptr_leftColor && lineExistsByFlagPointer(ptr_leftFlags);
1585 0 : if (ptr_topWidth)
1586 : {
1587 0 : lineWidth = *ptr_topWidth;
1588 : }
1589 :
1590 : m_collector->addShapeLine(*shapeSeqNum,
1591 : topExists ? Line(ColorReference(*ptr_topColor), ptr_topWidth ? *ptr_topWidth : 9525, true) :
1592 0 : Line(ColorReference(0), 0, false));
1593 : m_collector->addShapeLine(*shapeSeqNum,
1594 : rightExists ? Line(ColorReference(*ptr_rightColor), ptr_rightWidth ? *ptr_rightWidth : 9525, true) :
1595 0 : Line(ColorReference(0), 0, false));
1596 : m_collector->addShapeLine(*shapeSeqNum,
1597 : bottomExists ? Line(ColorReference(*ptr_bottomColor), ptr_bottomWidth ? *ptr_bottomWidth : 9525, true) :
1598 0 : Line(ColorReference(0), 0, false));
1599 : m_collector->addShapeLine(*shapeSeqNum,
1600 : leftExists ? Line(ColorReference(*ptr_leftColor), ptr_leftWidth ? *ptr_leftWidth : 9525, true) :
1601 0 : Line(ColorReference(0), 0, false));
1602 :
1603 : // Amazing feat of Microsoft engineering:
1604 : // The detailed interaction of four flags describes ONE true/false property!
1605 :
1606 0 : if (ptr_leftFlags &&
1607 : (*ptr_leftFlags & FLAG_USE_LEFT_INSET_PEN) &&
1608 0 : (!(*ptr_leftFlags & FLAG_USE_LEFT_INSET_PEN_OK) || (*ptr_leftFlags & FLAG_LEFT_INSET_PEN_OK)) &&
1609 : (*ptr_leftFlags & FLAG_LEFT_INSET_PEN))
1610 : {
1611 0 : m_collector->setShapeBorderPosition(*shapeSeqNum, INSIDE_SHAPE);
1612 : }
1613 : else
1614 : {
1615 0 : m_collector->setShapeBorderPosition(*shapeSeqNum, HALF_INSIDE_SHAPE);
1616 : }
1617 : }
1618 : }
1619 : }
1620 : }
1621 0 : if (ptr_fill)
1622 : {
1623 0 : m_collector->setShapeFill(*shapeSeqNum, ptr_fill, skipIfNotBg);
1624 : }
1625 0 : int *ptr_adjust1 = (int *)getIfExists(foptValues.m_scalarValues, FIELDID_ADJUST_VALUE_1);
1626 0 : int *ptr_adjust2 = (int *)getIfExists(foptValues.m_scalarValues, FIELDID_ADJUST_VALUE_2);
1627 0 : int *ptr_adjust3 = (int *)getIfExists(foptValues.m_scalarValues, FIELDID_ADJUST_VALUE_3);
1628 0 : if (ptr_adjust1)
1629 : {
1630 0 : m_collector->setAdjustValue(*shapeSeqNum, 0, *ptr_adjust1);
1631 : }
1632 0 : if (ptr_adjust2)
1633 : {
1634 0 : m_collector->setAdjustValue(*shapeSeqNum, 1, *ptr_adjust2);
1635 : }
1636 0 : if (ptr_adjust3)
1637 : {
1638 0 : m_collector->setAdjustValue(*shapeSeqNum, 2, *ptr_adjust3);
1639 : }
1640 0 : int *ptr_rotation = (int *)getIfExists(foptValues.m_scalarValues, FIELDID_ROTATION);
1641 0 : if (ptr_rotation)
1642 : {
1643 0 : double rotation = doubleModulo(toFixedPoint(*ptr_rotation), 360);
1644 0 : m_collector->setShapeRotation(*shapeSeqNum, short(rotation));
1645 : //FIXME : make MSPUBCollector handle double shape rotations
1646 0 : rotated90 = (rotation >= 45 && rotation < 135) || (rotation >= 225 && rotation < 315);
1647 :
1648 : }
1649 0 : unsigned *ptr_left = getIfExists(foptValues.m_scalarValues, FIELDID_DY_TEXT_LEFT);
1650 0 : unsigned *ptr_top = getIfExists(foptValues.m_scalarValues, FIELDID_DY_TEXT_TOP);
1651 0 : unsigned *ptr_right = getIfExists(foptValues.m_scalarValues, FIELDID_DY_TEXT_RIGHT);
1652 0 : unsigned *ptr_bottom = getIfExists(foptValues.m_scalarValues, FIELDID_DY_TEXT_BOTTOM);
1653 : m_collector->setShapeMargins(*shapeSeqNum, ptr_left ? *ptr_left : DEFAULT_MARGIN,
1654 : ptr_top ? *ptr_top : DEFAULT_MARGIN,
1655 : ptr_right ? *ptr_right : DEFAULT_MARGIN,
1656 0 : ptr_bottom ? *ptr_bottom : DEFAULT_MARGIN);
1657 0 : unsigned *ptr_lineDashing = getIfExists(foptValues.m_scalarValues, FIELDID_LINE_DASHING);
1658 0 : unsigned *ptr_lineEndcapStyle = getIfExists(foptValues.m_scalarValues, FIELDID_LINE_ENDCAP_STYLE);
1659 0 : DotStyle dotStyle = RECT_DOT;
1660 0 : if (ptr_lineEndcapStyle)
1661 : {
1662 0 : switch (*ptr_lineEndcapStyle)
1663 : {
1664 : case 0:
1665 0 : dotStyle = ROUND_DOT;
1666 0 : break;
1667 : default:
1668 0 : break;
1669 : }
1670 : }
1671 0 : if (ptr_lineDashing)
1672 : {
1673 : m_collector->setShapeDash(*shapeSeqNum, getDash(
1674 : static_cast<MSPUBDashStyle>(*ptr_lineDashing), lineWidth,
1675 0 : dotStyle));
1676 : }
1677 :
1678 : unsigned *ptr_numColumns = getIfExists(foptValues.m_scalarValues,
1679 0 : FIELDID_NUM_COLUMNS);
1680 0 : if (ptr_numColumns)
1681 : {
1682 0 : m_collector->setShapeNumColumns(*shapeSeqNum, *ptr_numColumns);
1683 : }
1684 : unsigned *ptr_columnSpacing = getIfExists(foptValues.m_scalarValues,
1685 0 : FIELDID_COLUMN_SPACING);
1686 0 : if (ptr_columnSpacing)
1687 : {
1688 0 : m_collector->setShapeColumnSpacing(*shapeSeqNum, *ptr_columnSpacing);
1689 : }
1690 : unsigned *ptr_beginArrowStyle = getIfExists(foptValues.m_scalarValues,
1691 0 : FIELDID_BEGIN_ARROW_STYLE);
1692 : unsigned *ptr_beginArrowWidth = getIfExists(foptValues.m_scalarValues,
1693 0 : FIELDID_BEGIN_ARROW_WIDTH);
1694 : unsigned *ptr_beginArrowHeight = getIfExists(foptValues.m_scalarValues,
1695 0 : FIELDID_BEGIN_ARROW_HEIGHT);
1696 : m_collector->setShapeBeginArrow(*shapeSeqNum, Arrow(
1697 : ptr_beginArrowStyle ? (ArrowStyle)(*ptr_beginArrowStyle) :
1698 : NO_ARROW,
1699 : ptr_beginArrowWidth ? (ArrowSize)(*ptr_beginArrowWidth) :
1700 : MEDIUM,
1701 : ptr_beginArrowHeight ? (ArrowSize)(*ptr_beginArrowHeight) :
1702 0 : MEDIUM));
1703 : unsigned *ptr_endArrowStyle = getIfExists(foptValues.m_scalarValues,
1704 0 : FIELDID_END_ARROW_STYLE);
1705 : unsigned *ptr_endArrowWidth = getIfExists(foptValues.m_scalarValues,
1706 0 : FIELDID_END_ARROW_WIDTH);
1707 : unsigned *ptr_endArrowHeight = getIfExists(foptValues.m_scalarValues,
1708 0 : FIELDID_END_ARROW_HEIGHT);
1709 : m_collector->setShapeEndArrow(*shapeSeqNum, Arrow(
1710 : ptr_endArrowStyle ? (ArrowStyle)(*ptr_endArrowStyle) :
1711 : NO_ARROW,
1712 : ptr_endArrowWidth ? (ArrowSize)(*ptr_endArrowWidth) :
1713 : MEDIUM,
1714 : ptr_endArrowHeight ? (ArrowSize)(*ptr_endArrowHeight) :
1715 0 : MEDIUM));
1716 :
1717 : unsigned *ptr_shadowType = getIfExists(foptValues.m_scalarValues,
1718 0 : FIELDID_SHADOW_TYPE);
1719 0 : if (ptr_shadowType)
1720 : {
1721 0 : ShadowType shadowType = static_cast<ShadowType>(*ptr_shadowType);
1722 : unsigned *shadowColor = getIfExists(foptValues.m_scalarValues,
1723 0 : FIELDID_SHADOW_COLOR);
1724 : unsigned *shadowOpacity = getIfExists(foptValues.m_scalarValues,
1725 0 : FIELDID_SHADOW_OPACITY);
1726 : unsigned *shadowOffsetX = getIfExists(foptValues.m_scalarValues,
1727 0 : FIELDID_SHADOW_OFFSET_X);
1728 : unsigned *shadowOffsetY = getIfExists(foptValues.m_scalarValues,
1729 0 : FIELDID_SHADOW_OFFSET_Y);
1730 : unsigned *shadowOriginX = getIfExists(foptValues.m_scalarValues,
1731 0 : FIELDID_SHADOW_ORIGIN_X);
1732 : unsigned *shadowOriginY = getIfExists(foptValues.m_scalarValues,
1733 0 : FIELDID_SHADOW_ORIGIN_Y);
1734 : /* unsigned *shadowBoolProps = getIfExists(foptValues.m_scalarValues,
1735 : FIELDID_SHADOW_BOOL_PROPS); */
1736 : m_collector->setShapeShadow(*shapeSeqNum, Shadow(shadowType,
1737 : shadowOffsetX ? static_cast<int>(*shadowOffsetX) : 0x6338,
1738 : shadowOffsetY ? static_cast<int>(*shadowOffsetY) : 0x6338,
1739 0 : shadowOriginX ? toFixedPoint(static_cast<int>(*shadowOriginX)) : 0,
1740 0 : shadowOriginY ? toFixedPoint(static_cast<int>(*shadowOriginY)) : 0,
1741 : toFixedPoint(shadowOpacity ? static_cast<int>(*shadowOpacity)
1742 : : 0x10000),
1743 0 : ColorReference(shadowColor ? *shadowColor : 0)));
1744 :
1745 :
1746 : }
1747 :
1748 0 : const std::vector<unsigned char> vertexData = foptValues.m_complexValues[FIELDID_P_VERTICES];
1749 0 : if (vertexData.size() > 0)
1750 : {
1751 : unsigned *p_geoRight = getIfExists(foptValues.m_scalarValues,
1752 0 : FIELDID_GEO_RIGHT);
1753 : unsigned *p_geoBottom = getIfExists(foptValues.m_scalarValues,
1754 0 : FIELDID_GEO_BOTTOM);
1755 0 : const std::vector<unsigned char> segmentData = foptValues.m_complexValues[FIELDID_P_SEGMENTS];
1756 0 : const std::vector<unsigned char> guideData = foptValues.m_complexValues[FIELDID_P_GUIDES];
1757 : m_collector->setShapeCustomPath(*shapeSeqNum, getDynamicCustomShape(vertexData, segmentData,
1758 : guideData, p_geoRight ? *p_geoRight : 21600,
1759 0 : p_geoBottom ? *p_geoBottom : 21600));
1760 0 : }
1761 : }
1762 0 : if (foundAnchor)
1763 : {
1764 0 : Coordinate absolute;
1765 0 : if (cAnchor.type == OFFICE_ART_CLIENT_ANCHOR)
1766 : {
1767 0 : std::map<unsigned short, unsigned> anchorData = extractEscherValues(input, cAnchor);
1768 0 : absolute = Coordinate(anchorData[FIELDID_XS],
1769 0 : anchorData[FIELDID_YS], anchorData[FIELDID_XE],
1770 0 : anchorData[FIELDID_YE]);
1771 : }
1772 0 : else if (cAnchor.type == OFFICE_ART_CHILD_ANCHOR)
1773 : {
1774 0 : input->seek(cAnchor.contentsOffset, WPX_SEEK_SET);
1775 0 : int coordSystemWidth = thisParentCoordinateSystem.m_xe - thisParentCoordinateSystem.m_xs;
1776 0 : int coordSystemHeight = thisParentCoordinateSystem.m_ye - thisParentCoordinateSystem.m_ys;
1777 0 : int groupWidth = parentGroupAbsoluteCoord.m_xe - parentGroupAbsoluteCoord.m_xs;
1778 0 : int groupHeight = parentGroupAbsoluteCoord.m_ye - parentGroupAbsoluteCoord.m_ys;
1779 0 : double widthScale = (double)groupWidth / coordSystemWidth;
1780 0 : double heightScale = (double)groupHeight / coordSystemHeight;
1781 0 : int xs = (readU32(input) - thisParentCoordinateSystem.m_xs) * widthScale + parentGroupAbsoluteCoord.m_xs;
1782 0 : int ys = (readU32(input) - thisParentCoordinateSystem.m_ys) * heightScale + parentGroupAbsoluteCoord.m_ys;
1783 0 : int xe = (readU32(input) - thisParentCoordinateSystem.m_xs) * widthScale + parentGroupAbsoluteCoord.m_xs;
1784 0 : int ye = (readU32(input) - thisParentCoordinateSystem.m_ys) * heightScale + parentGroupAbsoluteCoord.m_ys;
1785 :
1786 0 : absolute = Coordinate(xs, ys, xe, ye);
1787 : }
1788 0 : if (rotated90)
1789 : {
1790 0 : int initialX = absolute.m_xs;
1791 0 : int initialY = absolute.m_ys;
1792 0 : int initialWidth = absolute.m_xe - absolute.m_xs;
1793 0 : int initialHeight = absolute.m_ye - absolute.m_ys;
1794 0 : int centerX = initialX + initialWidth / 2;
1795 0 : int centerY = initialY + initialHeight / 2;
1796 0 : int xs = centerX - initialHeight / 2;
1797 0 : int ys = centerY - initialWidth / 2;
1798 0 : int xe = xs + initialHeight;
1799 0 : int ye = ys + initialWidth;
1800 0 : absolute = Coordinate(xs, ys, xe, ye);
1801 : }
1802 : m_collector->setShapeCoordinatesInEmu(*shapeSeqNum,
1803 : absolute.m_xs,
1804 : absolute.m_ys,
1805 : absolute.m_xe,
1806 0 : absolute.m_ye);
1807 0 : if (definesRelativeCoordinates)
1808 : {
1809 0 : parentGroupAbsoluteCoord = absolute;
1810 : }
1811 0 : }
1812 0 : }
1813 0 : }
1814 : }
1815 0 : }
1816 :
1817 0 : boost::shared_ptr<libmspub::Fill> libmspub::MSPUBParser::getNewFill(const std::map<unsigned short, unsigned> &foptProperties,
1818 : bool &skipIfNotBg)
1819 : {
1820 0 : const FillType *ptr_fillType = (FillType *)getIfExists_const(foptProperties, FIELDID_FILL_TYPE);
1821 0 : FillType fillType = ptr_fillType ? *ptr_fillType : SOLID;
1822 0 : switch (fillType)
1823 : {
1824 : case SOLID:
1825 : {
1826 0 : const unsigned *ptr_fillColor = getIfExists_const(foptProperties, FIELDID_FILL_COLOR);
1827 0 : const unsigned *ptr_fieldStyleProps = getIfExists_const(foptProperties, FIELDID_FIELD_STYLE_BOOL_PROPS);
1828 0 : skipIfNotBg = ptr_fieldStyleProps && (*ptr_fieldStyleProps & 0xF0) == 0;
1829 0 : if (ptr_fillColor && !skipIfNotBg)
1830 : {
1831 0 : const unsigned *ptr_fillOpacity = getIfExists_const(foptProperties, FIELDID_FILL_OPACITY);
1832 0 : return boost::shared_ptr<Fill>(new SolidFill(ColorReference(*ptr_fillColor), ptr_fillOpacity ? (double)(*ptr_fillOpacity) / 0xFFFF : 1, m_collector));
1833 : }
1834 0 : return boost::shared_ptr<Fill>();
1835 : }
1836 : case GRADIENT: //FIXME: The handling of multi-color gradients here is quite bad.
1837 : {
1838 : int angle;
1839 0 : const int *ptr_angle = (const int *)getIfExists_const(foptProperties, FIELDID_FILL_ANGLE);
1840 0 : const unsigned *ptr_fillColor = getIfExists_const(foptProperties, FIELDID_FILL_COLOR);
1841 0 : const unsigned *ptr_fillBackColor = getIfExists_const(foptProperties, FIELDID_FILL_BACK_COLOR);
1842 0 : unsigned fill = ptr_fillColor ? *ptr_fillColor : 0x00FFFFFFF;
1843 0 : unsigned fillBack = ptr_fillBackColor ? *ptr_fillBackColor : 0x00FFFFFF;
1844 0 : ColorReference firstColor(fill, fill);
1845 0 : ColorReference secondColor(fill, fillBack);
1846 0 : const unsigned *ptr_fillOpacity = getIfExists_const(foptProperties, FIELDID_FILL_OPACITY);
1847 0 : const unsigned *ptr_fillBackOpacity = getIfExists_const(foptProperties, FIELDID_FILL_BACK_OPACITY);
1848 0 : const unsigned *ptr_fillFocus = getIfExists_const(foptProperties, FIELDID_FILL_FOCUS);
1849 0 : short fillFocus = ptr_fillFocus ? ((int)(*ptr_fillFocus) << 16) >> 16 : 0;
1850 0 : angle = ptr_angle ? *ptr_angle : 0;
1851 0 : angle >>= 16; //it's actually only 16 bits
1852 : // Don't try to figure out what sense the following switch statement makes.
1853 : // The angles are just offset by 90 degrees in the file format in some cases.
1854 : // It seems totally arbitrary -- maybe an MS bug ?
1855 0 : switch (angle)
1856 : {
1857 : case -135:
1858 0 : angle = -45;
1859 0 : break;
1860 : case -45:
1861 0 : angle = 225;
1862 0 : break;
1863 : default:
1864 0 : break;
1865 : }
1866 :
1867 0 : boost::shared_ptr<GradientFill> ret(new GradientFill(m_collector, angle));
1868 0 : if (fillFocus == 0)
1869 : {
1870 0 : ret->addColor(firstColor, 0, ptr_fillOpacity ? (double)(*ptr_fillOpacity) / 0xFFFF : 1);
1871 0 : ret->addColor(secondColor, 100, ptr_fillBackOpacity ? (double)(*ptr_fillBackOpacity) / 0xFFFF : 1);
1872 : }
1873 0 : else if (fillFocus == 100)
1874 : {
1875 0 : ret->addColor(secondColor, 0, ptr_fillBackOpacity ? (double)(*ptr_fillBackOpacity) / 0xFFFF : 1);
1876 0 : ret->addColor(firstColor, 100, ptr_fillOpacity ? (double)(*ptr_fillOpacity) / 0xFFFF : 1);
1877 : }
1878 0 : else if (fillFocus > 0)
1879 : {
1880 0 : ret->addColor(firstColor, 0, ptr_fillOpacity ? (double)(*ptr_fillOpacity) / 0xFFFF : 1);
1881 0 : ret->addColor(secondColor, fillFocus, ptr_fillBackOpacity ? (double)(*ptr_fillBackOpacity) / 0xFFFF : 1);
1882 0 : ret->addColor(firstColor, 100, ptr_fillOpacity ? (double)(*ptr_fillOpacity) / 0xFFFF : 1);
1883 : }
1884 0 : else if (fillFocus < 0)
1885 : {
1886 0 : ret->addColor(secondColor, 0, ptr_fillBackOpacity ? (double)(*ptr_fillBackOpacity) / 0xFFFF : 1);
1887 0 : ret->addColor(firstColor, 100 + fillFocus, ptr_fillOpacity ? (double)(*ptr_fillOpacity) / 0xFFFF : 1);
1888 0 : ret->addColor(secondColor, 100, ptr_fillBackOpacity ? (double)(*ptr_fillBackOpacity) / 0xFFFF : 1);
1889 : }
1890 0 : return ret;
1891 : }
1892 : case TEXTURE:
1893 : case BITMAP:
1894 : {
1895 0 : const unsigned *ptr_bgPxId = getIfExists_const(foptProperties, FIELDID_BG_PXID);
1896 0 : if (ptr_bgPxId && *ptr_bgPxId <= m_escherDelayIndices.size() && m_escherDelayIndices[*ptr_bgPxId - 1] >= 0)
1897 : {
1898 0 : return boost::shared_ptr<Fill>(new ImgFill(m_escherDelayIndices[*ptr_bgPxId - 1], m_collector, fillType == TEXTURE));
1899 : }
1900 0 : return boost::shared_ptr<Fill>();
1901 : }
1902 : case PATTERN:
1903 : {
1904 0 : const unsigned *ptr_bgPxId = getIfExists_const(foptProperties, FIELDID_BG_PXID);
1905 0 : const unsigned *ptr_fillColor = getIfExists_const(foptProperties, FIELDID_FILL_COLOR);
1906 0 : const unsigned *ptr_fillBackColor = getIfExists_const(foptProperties, FIELDID_FILL_BACK_COLOR);
1907 0 : ColorReference fill = ptr_fillColor ? ColorReference(*ptr_fillColor) : ColorReference(0x00FFFFFF);
1908 0 : ColorReference back = ptr_fillBackColor ? ColorReference(*ptr_fillBackColor) : ColorReference(0x08000000);
1909 0 : if (ptr_bgPxId && *ptr_bgPxId <= m_escherDelayIndices.size() && m_escherDelayIndices[*ptr_bgPxId - 1 ] >= 0)
1910 : {
1911 0 : return boost::shared_ptr<Fill>(new PatternFill(m_escherDelayIndices[*ptr_bgPxId - 1], m_collector, fill, back));
1912 : }
1913 : }
1914 : default:
1915 0 : return boost::shared_ptr<Fill>();
1916 : }
1917 : }
1918 :
1919 0 : libmspub::DynamicCustomShape libmspub::MSPUBParser::getDynamicCustomShape(
1920 : const std::vector<unsigned char> &vertexData, const std::vector<unsigned char> &segmentData,
1921 : const std::vector<unsigned char> &guideData, unsigned geoWidth,
1922 : unsigned geoHeight)
1923 : {
1924 0 : DynamicCustomShape ret(geoWidth, geoHeight);
1925 0 : ret.m_vertices = parseVertices(vertexData);
1926 0 : ret.m_elements = parseSegments(segmentData);
1927 0 : ret.m_calculations = parseGuides(guideData);
1928 0 : return ret;
1929 : }
1930 :
1931 0 : std::vector<unsigned short> libmspub::MSPUBParser::parseSegments(
1932 : const std::vector<unsigned char> &segmentData)
1933 : {
1934 0 : std::vector<unsigned short> ret;
1935 0 : if (segmentData.size() < 6)
1936 : {
1937 0 : return ret;
1938 : }
1939 : // assume that the entry size is 2.
1940 0 : unsigned short numEntries = segmentData[0] | (segmentData[1] << 8);
1941 0 : unsigned offset = 6;
1942 0 : for (unsigned i = 0; i < numEntries; ++i)
1943 : {
1944 0 : if (offset + 2 > segmentData.size())
1945 : {
1946 0 : break;
1947 : }
1948 0 : ret.push_back(segmentData[offset] | (segmentData[offset + 1] << 8));
1949 0 : offset += 2;
1950 : }
1951 0 : return ret;
1952 : }
1953 :
1954 0 : std::vector<libmspub::Calculation> libmspub::MSPUBParser::parseGuides(
1955 : const std::vector<unsigned char> &/* guideData */)
1956 : {
1957 0 : std::vector<Calculation> ret;
1958 :
1959 : //FIXME : implement this function.
1960 :
1961 0 : return ret;
1962 : }
1963 :
1964 0 : std::vector<libmspub::Vertex> libmspub::MSPUBParser::parseVertices(
1965 : const std::vector<unsigned char> &vertexData)
1966 : {
1967 0 : std::vector<libmspub::Vertex> ret;
1968 0 : if (vertexData.size() < 6)
1969 : {
1970 0 : return ret;
1971 : }
1972 0 : unsigned short numVertices = vertexData[0] | (vertexData[1] << 8);
1973 0 : unsigned short entrySize = vertexData[4] | (vertexData[5] << 8);
1974 0 : if (entrySize == 0xFFF0)
1975 : {
1976 0 : entrySize = 4;
1977 : }
1978 0 : if (! (entrySize == 2 || entrySize == 4 || entrySize == 8))
1979 : {
1980 : MSPUB_DEBUG_MSG(("Incomprehensible entry size %d in vertex complex data!\n", entrySize));
1981 0 : return ret;
1982 : }
1983 0 : unsigned offset = 6;
1984 0 : ret.reserve(numVertices);
1985 0 : for (unsigned i = 0; i < numVertices; ++i)
1986 : {
1987 0 : if (offset + entrySize > vertexData.size())
1988 : {
1989 : break;
1990 : }
1991 : int32_t x, y;
1992 0 : switch (entrySize)
1993 : {
1994 : case 2:
1995 0 : x = vertexData[offset];
1996 0 : y = vertexData[offset + 1];
1997 0 : break;
1998 : case 4:
1999 0 : x = vertexData[offset] | (uint32_t(vertexData[offset + 1]) << 8);
2000 0 : y = vertexData[offset + 2] | (uint32_t(vertexData[offset + 3]) << 8);
2001 0 : break;
2002 : case 8:
2003 0 : x = vertexData[offset] | (uint32_t(vertexData[offset + 1]) << 8) |
2004 0 : (uint32_t(vertexData[offset + 2]) << 16) | (uint32_t(vertexData[offset + 3]) << 24);
2005 0 : y = vertexData[offset + 4] | (uint32_t(vertexData[offset + 5]) << 8) |
2006 0 : (uint32_t(vertexData[offset + 6]) << 16) | (uint32_t(vertexData[offset + 7]) << 24);
2007 0 : break;
2008 : default: // logically shouldn't be able to get here.
2009 0 : x = 0;
2010 0 : y = 0;
2011 0 : break;
2012 : }
2013 0 : libmspub::Vertex v = {x, y};
2014 0 : ret.push_back(v);
2015 0 : offset += entrySize;
2016 : }
2017 0 : return ret;
2018 : }
2019 :
2020 0 : unsigned libmspub::MSPUBParser::getEscherElementTailLength(unsigned short type)
2021 : {
2022 0 : switch (type)
2023 : {
2024 : case OFFICE_ART_DGG_CONTAINER:
2025 : case OFFICE_ART_DG_CONTAINER:
2026 0 : return 4;
2027 : default:
2028 0 : return 0;
2029 : }
2030 : }
2031 :
2032 0 : unsigned libmspub::MSPUBParser::getEscherElementAdditionalHeaderLength(unsigned short type)
2033 : {
2034 0 : switch (type)
2035 : {
2036 : case OFFICE_ART_CLIENT_ANCHOR:
2037 : case OFFICE_ART_CLIENT_DATA: //account for the fact that the length appears twice, for whatever reason
2038 0 : return 4;
2039 : }
2040 0 : return 0;
2041 : }
2042 :
2043 0 : bool libmspub::MSPUBParser::findEscherContainerWithTypeInSet(WPXInputStream *input, const libmspub::EscherContainerInfo &parent, libmspub::EscherContainerInfo &out, std::set<unsigned short> types)
2044 : {
2045 0 : while (stillReading(input, parent.contentsOffset + parent.contentsLength))
2046 : {
2047 0 : libmspub::EscherContainerInfo next = parseEscherContainer(input);
2048 0 : if (types.find(next.type) != types.end())
2049 : {
2050 0 : out = next;
2051 0 : return true;
2052 : }
2053 0 : input->seek(next.contentsOffset + next.contentsLength + getEscherElementTailLength(next.type), WPX_SEEK_SET);
2054 : }
2055 0 : return false;
2056 : }
2057 :
2058 0 : bool libmspub::MSPUBParser::findEscherContainer(WPXInputStream *input, const libmspub::EscherContainerInfo &parent, libmspub::EscherContainerInfo &out, unsigned short desiredType)
2059 : {
2060 : MSPUB_DEBUG_MSG(("At offset 0x%lx, attempting to find escher container of type 0x%x\n", input->tell(), desiredType));
2061 0 : while (stillReading(input, parent.contentsOffset + parent.contentsLength))
2062 : {
2063 0 : libmspub::EscherContainerInfo next = parseEscherContainer(input);
2064 0 : if (next.type == desiredType)
2065 : {
2066 0 : out = next;
2067 0 : return true;
2068 : }
2069 0 : input->seek(next.contentsOffset + next.contentsLength + getEscherElementTailLength(next.type), WPX_SEEK_SET);
2070 : }
2071 0 : return false;
2072 : }
2073 :
2074 0 : libmspub::FOPTValues libmspub::MSPUBParser::extractFOPTValues(WPXInputStream *input, const libmspub::EscherContainerInfo &record)
2075 : {
2076 0 : FOPTValues ret;
2077 0 : input->seek(record.contentsOffset, WPX_SEEK_SET);
2078 0 : unsigned short numValues = record.initial >> 4;
2079 0 : std::vector<unsigned short> complexIds;
2080 0 : for (unsigned short i = 0; i < numValues; ++i)
2081 : {
2082 0 : if (!stillReading(input, record.contentsOffset + record.contentsLength))
2083 : {
2084 : break;
2085 : }
2086 0 : unsigned short id = readU16(input);
2087 0 : unsigned value = readU32(input);
2088 0 : ret.m_scalarValues[id] = value;
2089 0 : bool complex = id & 0x8000;
2090 0 : if (complex)
2091 : {
2092 0 : complexIds.push_back(id);
2093 : }
2094 : }
2095 0 : for (unsigned i = 0; i < complexIds.size(); ++i)
2096 : {
2097 0 : if (!stillReading(input, record.contentsOffset + record.contentsLength))
2098 : {
2099 : break;
2100 : }
2101 0 : unsigned short id = complexIds[i];
2102 0 : unsigned length = ret.m_scalarValues[id];
2103 0 : if (!length)
2104 : {
2105 0 : continue;
2106 : }
2107 0 : unsigned short numEntries = readU16(input);
2108 0 : input->seek(2, WPX_SEEK_CUR);
2109 0 : unsigned short entryLength = readU16(input);
2110 0 : if (entryLength == 0xFFF0)
2111 : {
2112 0 : entryLength = 4;
2113 : }
2114 0 : input->seek(-6, WPX_SEEK_CUR);
2115 0 : readNBytes(input, entryLength * numEntries + 6, ret.m_complexValues[id]);
2116 : }
2117 0 : return ret;
2118 : }
2119 :
2120 0 : std::map<unsigned short, unsigned> libmspub::MSPUBParser::extractEscherValues(WPXInputStream *input, const libmspub::EscherContainerInfo &record)
2121 : {
2122 0 : std::map<unsigned short, unsigned> ret;
2123 0 : input->seek(record.contentsOffset + getEscherElementAdditionalHeaderLength(record.type), WPX_SEEK_SET);
2124 0 : while (stillReading(input, record.contentsOffset + record.contentsLength))
2125 : {
2126 0 : unsigned short id = readU16(input);
2127 0 : unsigned value = readU32(input);
2128 0 : ret[id] = value;
2129 : }
2130 0 : return ret;
2131 : }
2132 :
2133 :
2134 0 : bool libmspub::MSPUBParser::parseContentChunkReference(WPXInputStream *input, const libmspub::MSPUBBlockInfo block)
2135 : {
2136 : //input should be at block.dataOffset + 4 , that is, at the beginning of the list of sub-blocks
2137 : MSPUB_DEBUG_MSG(("Parsing chunk reference 0x%x\n", m_lastSeenSeqNum));
2138 0 : libmspub::MSPUBContentChunkType type = (libmspub::MSPUBContentChunkType)0;
2139 0 : unsigned long offset = 0;
2140 0 : unsigned parentSeqNum = 0;
2141 0 : bool seenType = false;
2142 0 : bool seenOffset = false;
2143 0 : bool seenParentSeqNum = false;
2144 0 : while (stillReading(input, block.dataOffset + block.dataLength))
2145 : {
2146 0 : libmspub::MSPUBBlockInfo subBlock = parseBlock(input, true);
2147 : //FIXME: Warn if multiple of these blocks seen.
2148 0 : if (subBlock.id == CHUNK_TYPE)
2149 : {
2150 0 : type = (libmspub::MSPUBContentChunkType)subBlock.data;
2151 0 : seenType = true;
2152 : }
2153 0 : else if (subBlock.id == CHUNK_OFFSET)
2154 : {
2155 0 : offset = subBlock.data;
2156 0 : seenOffset = true;
2157 : }
2158 0 : else if (subBlock.id == CHUNK_PARENT_SEQNUM)
2159 : {
2160 0 : parentSeqNum = subBlock.data;
2161 0 : seenParentSeqNum = true;
2162 : }
2163 0 : }
2164 0 : if (seenType && seenOffset) //FIXME: What if there is an offset, but not a type? Should we still set the end of the preceding chunk to that offset?
2165 : {
2166 0 : if (type == PAGE)
2167 : {
2168 : MSPUB_DEBUG_MSG(("page chunk: offset 0x%lx, seqnum 0x%x\n", offset, m_lastSeenSeqNum));
2169 0 : m_contentChunks.push_back(ContentChunkReference(type, offset, 0, m_lastSeenSeqNum, seenParentSeqNum ? parentSeqNum : 0));
2170 0 : m_pageChunkIndices.push_back(unsigned(m_contentChunks.size() - 1));
2171 0 : return true;
2172 : }
2173 0 : else if (type == DOCUMENT)
2174 : {
2175 : MSPUB_DEBUG_MSG(("document chunk: offset 0x%lx, seqnum 0x%x\n", offset, m_lastSeenSeqNum));
2176 0 : m_contentChunks.push_back(ContentChunkReference(type, offset, 0, m_lastSeenSeqNum, seenParentSeqNum ? parentSeqNum : 0));
2177 0 : m_documentChunkIndex = unsigned(m_contentChunks.size() - 1);
2178 0 : return true;
2179 : }
2180 0 : else if (type == SHAPE || type == ALTSHAPE || type == GROUP || type == TABLE || type == LOGO)
2181 : {
2182 : MSPUB_DEBUG_MSG(("shape chunk: offset 0x%lx, seqnum 0x%x, parent seqnum: 0x%x\n", offset, m_lastSeenSeqNum, parentSeqNum));
2183 0 : m_contentChunks.push_back(ContentChunkReference(type, offset, 0, m_lastSeenSeqNum, seenParentSeqNum ? parentSeqNum : 0));
2184 0 : m_shapeChunkIndices.push_back(unsigned(m_contentChunks.size() - 1));
2185 0 : if (type == ALTSHAPE)
2186 : {
2187 0 : m_alternateShapeSeqNums.push_back(m_lastSeenSeqNum);
2188 : }
2189 0 : return true;
2190 : }
2191 0 : else if (type == CELLS)
2192 : {
2193 0 : m_contentChunks.push_back(ContentChunkReference(type, offset, 0, m_lastSeenSeqNum, seenParentSeqNum ? parentSeqNum : 0));
2194 0 : m_cellsChunkIndices.push_back(unsigned(m_contentChunks.size() - 1));
2195 0 : return true;
2196 : }
2197 0 : else if (type == PALETTE)
2198 : {
2199 0 : m_contentChunks.push_back(ContentChunkReference(type, offset, 0, m_lastSeenSeqNum, seenParentSeqNum ? parentSeqNum : 0));
2200 0 : m_paletteChunkIndices.push_back(unsigned(m_contentChunks.size() - 1));
2201 0 : return true;
2202 : }
2203 0 : else if (type == BORDER_ART)
2204 : {
2205 : m_contentChunks.push_back(ContentChunkReference(type, offset, 0,
2206 0 : m_lastSeenSeqNum, seenParentSeqNum ? parentSeqNum : 0));
2207 : m_borderArtChunkIndices.push_back(
2208 0 : unsigned(m_contentChunks.size() - 1));
2209 0 : return true;
2210 : }
2211 0 : else if (type == FONT)
2212 : {
2213 : m_contentChunks.push_back(ContentChunkReference(type, offset, 0,
2214 : m_lastSeenSeqNum,
2215 0 : seenParentSeqNum ? parentSeqNum : 0));
2216 0 : m_fontChunkIndices.push_back(unsigned(m_contentChunks.size() - 1));
2217 0 : return true;
2218 : }
2219 0 : m_contentChunks.push_back(ContentChunkReference(type, offset, 0, m_lastSeenSeqNum, seenParentSeqNum ? parentSeqNum : 0));
2220 0 : m_unknownChunkIndices.push_back(unsigned(m_contentChunks.size() - 1));
2221 : }
2222 0 : return false;
2223 : }
2224 :
2225 0 : bool libmspub::MSPUBParser::isBlockDataString(unsigned type)
2226 : {
2227 0 : return type == STRING_CONTAINER;
2228 : }
2229 0 : void libmspub::MSPUBParser::skipBlock(WPXInputStream *input, libmspub::MSPUBBlockInfo block)
2230 : {
2231 0 : input->seek(block.dataOffset + block.dataLength, WPX_SEEK_SET);
2232 0 : }
2233 :
2234 0 : libmspub::EscherContainerInfo libmspub::MSPUBParser::parseEscherContainer(WPXInputStream *input)
2235 : {
2236 : libmspub::EscherContainerInfo info;
2237 0 : info.initial = readU16(input);
2238 0 : info.type = readU16(input);
2239 0 : info.contentsLength = readU32(input);
2240 0 : info.contentsOffset = input->tell();
2241 : MSPUB_DEBUG_MSG(("Parsed escher container: type 0x%x, contentsOffset 0x%lx, contentsLength 0x%lx\n", info.type, info.contentsOffset, info.contentsLength));
2242 0 : return info;
2243 : }
2244 :
2245 0 : libmspub::MSPUBBlockInfo libmspub::MSPUBParser::parseBlock(WPXInputStream *input, bool skipHierarchicalData)
2246 : {
2247 0 : libmspub::MSPUBBlockInfo info;
2248 0 : info.startPosition = input->tell();
2249 0 : info.id = (MSPUBBlockID)readU8(input);
2250 0 : info.type = (MSPUBBlockType)readU8(input);
2251 0 : info.dataOffset = input->tell();
2252 0 : int len = getBlockDataLength(info.type);
2253 0 : bool varLen = len < 0;
2254 0 : if (varLen)
2255 : {
2256 0 : info.dataLength = readU32(input);
2257 0 : if (isBlockDataString(info.type))
2258 : {
2259 0 : info.stringData = std::vector<unsigned char>();
2260 0 : readNBytes(input, info.dataLength - 4, info.stringData);
2261 : }
2262 0 : else if (skipHierarchicalData)
2263 : {
2264 0 : skipBlock(input, info);
2265 : }
2266 0 : info.data = 0;
2267 : }
2268 : else
2269 : {
2270 0 : info.dataLength = len;
2271 0 : switch (info.dataLength)
2272 : {
2273 : case 1:
2274 0 : info.data = readU8(input);
2275 0 : break;
2276 : case 2:
2277 0 : info.data = readU16(input);
2278 0 : break;
2279 : case 4:
2280 0 : info.data = readU32(input);
2281 0 : break;
2282 : case 8:
2283 : case 16:
2284 : case 24:
2285 : //FIXME: Not doing anything with this data for now.
2286 0 : skipBlock(input, info);
2287 : default:
2288 0 : info.data = 0;
2289 : }
2290 : }
2291 : MSPUB_DEBUG_MSG(("parseBlock dataOffset 0x%lx, id 0x%x, type 0x%x, dataLength 0x%lx, integral data 0x%x\n", info.dataOffset, info.id, info.type, info.dataLength, info.data));
2292 0 : return info;
2293 : }
2294 :
2295 0 : libmspub::PageType libmspub::MSPUBParser::getPageTypeBySeqNum(unsigned seqNum)
2296 : {
2297 0 : switch(seqNum)
2298 : {
2299 : case 0x10d:
2300 : case 0x110:
2301 : case 0x113:
2302 : case 0x117:
2303 0 : return DUMMY_PAGE;
2304 : default:
2305 0 : return NORMAL;
2306 : }
2307 : }
2308 :
2309 0 : bool libmspub::MSPUBParser::parsePaletteChunk(WPXInputStream *input, const ContentChunkReference &chunk)
2310 : {
2311 0 : unsigned length = readU32(input);
2312 0 : while (stillReading(input, chunk.offset + length))
2313 : {
2314 0 : MSPUBBlockInfo info = parseBlock(input);
2315 0 : if (info.type == 0xA0)
2316 : {
2317 0 : while (stillReading(input, info.dataOffset + info.dataLength))
2318 : {
2319 0 : MSPUBBlockInfo subInfo = parseBlock(input);
2320 0 : if (subInfo.type == GENERAL_CONTAINER)
2321 : {
2322 0 : parsePaletteEntry(input, subInfo);
2323 : }
2324 0 : else if (subInfo.type == DUMMY)
2325 : {
2326 0 : m_collector->addPaletteColor(Color());
2327 : }
2328 0 : skipBlock(input, subInfo);
2329 0 : }
2330 : }
2331 0 : skipBlock(input, info);
2332 0 : }
2333 0 : return true;
2334 : }
2335 :
2336 0 : void libmspub::MSPUBParser::parsePaletteEntry(WPXInputStream *input, MSPUBBlockInfo info)
2337 : {
2338 0 : while (stillReading(input, info.dataOffset + info.dataLength))
2339 : {
2340 0 : MSPUBBlockInfo subInfo = parseBlock(input, true);
2341 0 : if (subInfo.id == 0x01)
2342 : {
2343 0 : m_collector->addPaletteColor(Color(subInfo.data & 0xFF, (subInfo.data >> 8) & 0xFF, (subInfo.data >> 16) & 0xFF));
2344 : }
2345 0 : }
2346 0 : }
2347 :
2348 : /* vim:set shiftwidth=2 softtabstop=2 expandtab: */
|