LCOV - code coverage report
Current view: top level - libreoffice/workdir/unxlngi6.pro/UnpackedTarball/mspub/src/lib - MSPUBParser97.cpp (source / functions) Hit Total Coverage
Test: libreoffice_filtered.info Lines: 0 162 0.0 %
Date: 2012-12-17 Functions: 0 16 0.0 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
       2             : /* libmspub
       3             :  * Version: MPL 1.1 / GPLv2+ / LGPLv2+
       4             :  *
       5             :  * The contents of this file are subject to the Mozilla Public License Version
       6             :  * 1.1 (the "License"); you may not use this file except in compliance with
       7             :  * the License or as specified alternatively below. You may obtain a copy of
       8             :  * the License at http://www.mozilla.org/MPL/
       9             :  *
      10             :  * Software distributed under the License is distributed on an "AS IS" basis,
      11             :  * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
      12             :  * for the specific language governing rights and limitations under the
      13             :  * License.
      14             :  *
      15             :  * Major Contributor(s):
      16             :  * Copyright (C) 2012 Brennan Vincent <brennanv@email.arizona.edu>
      17             :  *
      18             :  * All Rights Reserved.
      19             :  *
      20             :  * For minor contributions see the git repository.
      21             :  *
      22             :  * Alternatively, the contents of this file may be used under the terms of
      23             :  * either the GNU General Public License Version 2 or later (the "GPLv2+"), or
      24             :  * the GNU Lesser General Public License Version 2 or later (the "LGPLv2+"),
      25             :  * in which case the provisions of the GPLv2+ or the LGPLv2+ are applicable
      26             :  * instead of those above.
      27             :  */
      28             : 
      29             : #include "MSPUBParser97.h"
      30             : #include "MSPUBCollector.h"
      31             : #include "libmspub_utils.h"
      32             : #include "MSPUBTypes.h"
      33             : 
      34           0 : libmspub::MSPUBParser97::MSPUBParser97(WPXInputStream *input, MSPUBCollector *collector)
      35           0 :   : MSPUBParser2k(input, collector), m_isBanner(false)
      36             : {
      37           0 :   m_collector->setEncoding(WIN_1252);
      38           0 : }
      39             : 
      40           0 : unsigned short libmspub::MSPUBParser97::getTextMarker() const
      41             : {
      42           0 :   return 0x0000;
      43             : }
      44             : 
      45           0 : unsigned libmspub::MSPUBParser97::getTextIdOffset() const
      46             : {
      47           0 :   return 0x46;
      48             : }
      49             : 
      50           0 : bool libmspub::MSPUBParser97::parse()
      51             : {
      52           0 :   WPXInputStream *contents = m_input->getDocumentOLEStream("Contents");
      53           0 :   if (!contents)
      54             :   {
      55             :     MSPUB_DEBUG_MSG(("Couldn't get contents stream.\n"));
      56           0 :     return false;
      57             :   }
      58           0 :   if (!parseContents(contents))
      59             :   {
      60             :     MSPUB_DEBUG_MSG(("Couldn't parse contents stream.\n"));
      61           0 :     delete contents;
      62           0 :     return false;
      63             :   }
      64           0 :   return m_collector->go();
      65             : }
      66             : 
      67           0 : bool libmspub::MSPUBParser97::parseDocument(WPXInputStream *input)
      68             : {
      69           0 :   if (m_documentChunkIndex.is_initialized())
      70             :   {
      71           0 :     input->seek(m_contentChunks[m_documentChunkIndex.get()].offset + 0x12, WPX_SEEK_SET);
      72           0 :     unsigned short coordinateSystemMark = readU16(input);
      73           0 :     m_isBanner = coordinateSystemMark == 0x0007;
      74           0 :     unsigned width = readU32(input);
      75           0 :     unsigned height = readU32(input);
      76           0 :     m_collector->setWidthInEmu(width);
      77           0 :     m_collector->setHeightInEmu(height);
      78           0 :     return true;
      79             :   }
      80           0 :   return false;
      81             : }
      82             : 
      83           0 : void libmspub::MSPUBParser97::parseContentsTextIfNecessary(WPXInputStream *input)
      84             : {
      85           0 :   input->seek(0x12, WPX_SEEK_SET);
      86           0 :   input->seek(readU32(input), WPX_SEEK_SET);
      87           0 :   input->seek(14, WPX_SEEK_CUR);
      88           0 :   unsigned textStart = readU32(input);
      89           0 :   unsigned textEnd = readU32(input);
      90           0 :   unsigned prop1Index = readU16(input);
      91           0 :   unsigned prop2Index = readU16(input);
      92           0 :   unsigned prop3Index = readU16(input);
      93           0 :   unsigned prop3End = readU16(input);
      94             :   std::vector<SpanInfo97> spanInfos = getSpansInfo(input, prop1Index,
      95           0 :                                       prop2Index, prop3Index, prop3End);
      96           0 :   input->seek(textStart, WPX_SEEK_SET);
      97           0 :   TextInfo97 textInfo = getTextInfo(input, textEnd - textStart);
      98           0 :   unsigned iParaEnd = 0, iSpanEnd = 0;
      99           0 :   unsigned currentParaIndex = 0;
     100           0 :   unsigned currentSpanIndex = 0;
     101           0 :   for (unsigned iShapeEnd = 0; iShapeEnd < textInfo.m_shapeEnds.size(); ++iShapeEnd)
     102             :   {
     103           0 :     unsigned shapeEnd = std::min<unsigned>(textInfo.m_shapeEnds[iShapeEnd], textInfo.m_chars.size());
     104           0 :     std::vector<TextParagraph> shapeParas;
     105           0 :     while (currentParaIndex < shapeEnd)
     106             :     {
     107           0 :       unsigned paraEnd = iParaEnd < textInfo.m_paragraphEnds.size() ?
     108           0 :                          textInfo.m_paragraphEnds[iParaEnd++] : shapeEnd;
     109           0 :       if (paraEnd > shapeEnd)
     110             :       {
     111           0 :         --iParaEnd;
     112           0 :         paraEnd = shapeEnd;
     113             :       }
     114           0 :       std::vector<TextSpan> paraSpans;
     115           0 :       while (currentSpanIndex < paraEnd)
     116             :       {
     117           0 :         const SpanInfo97 &spanInfo = iSpanEnd < spanInfos.size() ?
     118           0 :                                      spanInfos[iSpanEnd++] :
     119           0 :                                      SpanInfo97(paraEnd, CharacterStyle());
     120           0 :         unsigned spanEnd = spanInfo.m_spanEnd;
     121           0 :         if (spanEnd > paraEnd)
     122             :         {
     123           0 :           --iSpanEnd;
     124           0 :           spanEnd = paraEnd;
     125             :         }
     126           0 :         const CharacterStyle &spanStyle = spanInfo.m_style;
     127           0 :         std::vector<unsigned char> spanChars;
     128           0 :         spanChars.reserve(spanEnd - currentSpanIndex);
     129           0 :         for (unsigned i = currentSpanIndex; i < spanEnd; ++i)
     130             :         {
     131           0 :           unsigned char ch = textInfo.m_chars[i];
     132           0 :           if (ch == 0xB) // Pub97 interprets vertical tab as nonbreaking space.
     133             :           {
     134           0 :             spanChars.push_back('\n');
     135             :           }
     136           0 :           else if (ch == 0x0D)
     137             :           {
     138           0 :             if (i + 1 < spanEnd && textInfo.m_chars[i + 1] == 0x0A)
     139             :             {
     140           0 :               ++i; // ignore the 0x0D and advance past the 0x0A
     141             :             }
     142             :           }
     143           0 :           else if (ch == 0x0C)
     144             :           {
     145             :             // ignore the 0x0C
     146             :           }
     147             :           else
     148             :           {
     149           0 :             spanChars.push_back(ch);
     150             :           }
     151             :         }
     152           0 :         paraSpans.push_back(TextSpan(spanChars, spanStyle));
     153           0 :         currentSpanIndex = spanEnd;
     154           0 :       }
     155           0 :       shapeParas.push_back(TextParagraph(paraSpans, ParagraphStyle()));
     156           0 :       currentParaIndex = paraEnd;
     157           0 :     }
     158           0 :     m_collector->addTextString(shapeParas, iShapeEnd);
     159           0 :   }
     160           0 : }
     161             : 
     162           0 : std::vector<libmspub::MSPUBParser97::SpanInfo97> libmspub::MSPUBParser97::getSpansInfo(
     163             :   WPXInputStream *input,
     164             :   unsigned prop1Index, unsigned prop2Index, unsigned /* prop3Index */,
     165             :   unsigned /* prop3End */)
     166             : {
     167           0 :   std::vector<unsigned> spanEnds;
     168           0 :   std::vector<SpanInfo97> ret;
     169           0 :   for (unsigned i = prop1Index; i < prop2Index; ++i)
     170             :   {
     171           0 :     unsigned offset = i * 0x200;
     172           0 :     input->seek(offset + 0x1FF, WPX_SEEK_SET);
     173           0 :     unsigned numEntries = readU8(input);
     174           0 :     input->seek(offset, WPX_SEEK_SET);
     175             :     // Skip the first thing; it is not an end
     176           0 :     unsigned start = readU32(input);
     177           0 :     for (unsigned j = 0; j < numEntries; ++j)
     178             :     {
     179           0 :       spanEnds.push_back(readU32(input) - start);
     180             :     }
     181           0 :     std::vector<unsigned char> spanStyleIndices;
     182           0 :     for (unsigned j = 0; j < spanEnds.size(); ++j)
     183             :     {
     184           0 :       spanStyleIndices.push_back(readU8(input));
     185             :     }
     186           0 :     while (stillReading(input, offset + 0x200) && readU8(input) == 0)
     187             :     {
     188             :       ;
     189             :     }
     190           0 :     input->seek(-1, WPX_SEEK_CUR);
     191           0 :     std::map<unsigned char, CharacterStyle> stylesByIndex;
     192           0 :     while (stillReading(input, offset + 0x1FF))
     193             :     {
     194           0 :       unsigned length = readU8(input);
     195           0 :       unsigned nextOffset = input->tell() + length;
     196             :       unsigned char index = static_cast<unsigned char>(
     197           0 :                               (input->tell() - 1 - offset) / 2);
     198           0 :       stylesByIndex[index] = readCharacterStyle(input, length);
     199           0 :       input->seek(nextOffset, WPX_SEEK_SET);
     200             :     }
     201           0 :     for (unsigned j = 0; j < spanEnds.size(); ++j)
     202             :     {
     203           0 :       ret.push_back(SpanInfo97(spanEnds[j], j < spanStyleIndices.size() ?
     204           0 :                                stylesByIndex[spanStyleIndices[j]] : CharacterStyle()));
     205             :     }
     206           0 :   }
     207           0 :   return ret;
     208             : }
     209             : 
     210           0 : libmspub::CharacterStyle libmspub::MSPUBParser97::readCharacterStyle(
     211             :   WPXInputStream *input, unsigned length)
     212             : {
     213           0 :   unsigned begin = input->tell();
     214           0 :   bool underline = false, italic = false, bold = false;
     215           0 :   int colorIndex = -1;
     216           0 :   unsigned fontIndex = 0;
     217           0 :   int textSizeVariationFromDefault = 0;
     218             : 
     219           0 :   if (length >= 1)
     220             :   {
     221           0 :     unsigned char biFlags = readU8(input);
     222           0 :     bold = biFlags & 0x1;
     223           0 :     italic = biFlags & 0x2;
     224             :   }
     225           0 :   if (length >= 3)
     226             :   {
     227           0 :     input->seek(begin + 0x2, WPX_SEEK_SET);
     228           0 :     fontIndex = readU8(input);
     229             :   }
     230           0 :   if (length >= 9)
     231             :   {
     232           0 :     input->seek(begin + 0x8, WPX_SEEK_SET);
     233           0 :     underline = readU8(input) & 0x1;
     234             :   }
     235           0 :   if (length >= 5)
     236             :   {
     237           0 :     input->seek(begin + 0x4, WPX_SEEK_SET);
     238             :     textSizeVariationFromDefault =
     239           0 :       length >= 6 ? readS16(input) : readS8(input);
     240             :   }
     241           0 :   if (length >= 16)
     242             :   {
     243           0 :     input->seek(begin + 0xC, WPX_SEEK_SET);
     244           0 :     colorIndex = getColorIndexByQuillEntry(readU32(input));
     245             :   }
     246             :   double textSizeInPt = 10 +
     247           0 :                         static_cast<double>(textSizeVariationFromDefault) / 2;
     248             :   return CharacterStyle(underline, italic, bold, textSizeInPt, colorIndex,
     249           0 :                         fontIndex);
     250             : }
     251             : 
     252           0 : libmspub::MSPUBParser97::TextInfo97 libmspub::MSPUBParser97::getTextInfo(WPXInputStream *input, unsigned length)
     253             : {
     254           0 :   std::vector<unsigned char> chars;
     255           0 :   chars.reserve(length);
     256           0 :   std::vector<unsigned> paragraphEnds;
     257           0 :   std::vector<unsigned> shapeEnds;
     258           0 :   unsigned start = input->tell();
     259           0 :   unsigned char last = '\0';
     260           0 :   while (stillReading(input, start + length))
     261             :   {
     262           0 :     chars.push_back(readU8(input));
     263           0 :     if (last == 0xD && chars.back() == 0xA)
     264             :     {
     265           0 :       paragraphEnds.push_back(chars.size());
     266             :     }
     267           0 :     else if (chars.back() == 0xC)
     268             :     {
     269           0 :       shapeEnds.push_back(chars.size());
     270             :     }
     271           0 :     last = chars.back();
     272             :   }
     273           0 :   return TextInfo97(chars, paragraphEnds, shapeEnds);
     274             : }
     275             : 
     276           0 : int libmspub::MSPUBParser97::translateCoordinateIfNecessary(int coordinate) const
     277             : {
     278           0 :   if (m_isBanner)
     279             :   {
     280           0 :     return coordinate - 120 * EMUS_IN_INCH;
     281             :   }
     282             :   else
     283             :   {
     284           0 :     return coordinate - 25 * EMUS_IN_INCH;
     285             :   }
     286             : }
     287             : 
     288           0 : unsigned libmspub::MSPUBParser97::getFirstLineOffset() const
     289             : {
     290           0 :   return 0x22;
     291             : }
     292             : 
     293           0 : unsigned libmspub::MSPUBParser97::getSecondLineOffset() const
     294             : {
     295           0 :   return 0x2D;
     296             : }
     297             : 
     298           0 : unsigned libmspub::MSPUBParser97::getShapeFillTypeOffset() const
     299             : {
     300           0 :   return 0x20;
     301             : }
     302             : 
     303           0 : unsigned libmspub::MSPUBParser97::getShapeFillColorOffset() const
     304             : {
     305           0 :   return 0x18;
     306           0 : }
     307             : 
     308             : /* vim:set shiftwidth=2 softtabstop=2 expandtab: */

Generated by: LCOV version 1.10