Line data Source code
1 : /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 : /*
3 : * This file is part of the LibreOffice project.
4 : *
5 : * This Source Code Form is subject to the terms of the Mozilla Public
6 : * License, v. 2.0. If a copy of the MPL was not distributed with this
7 : * file, You can obtain one at http://mozilla.org/MPL/2.0/.
8 : *
9 : * This file incorporates work covered by the following license notice:
10 : *
11 : * Licensed to the Apache Software Foundation (ASF) under one or more
12 : * contributor license agreements. See the NOTICE file distributed
13 : * with this work for additional information regarding copyright
14 : * ownership. The ASF licenses this file to you under the Apache
15 : * License, Version 2.0 (the "License"); you may not use this file
16 : * except in compliance with the License. You may obtain a copy of
17 : * the License at http://www.apache.org/licenses/LICENSE-2.0 .
18 : */
19 :
20 : #ifndef INCLUDED_XMLREADER_XMLREADER_HXX
21 : #define INCLUDED_XMLREADER_XMLREADER_HXX
22 :
23 : #include <sal/config.h>
24 :
25 : #include <stack>
26 : #include <vector>
27 :
28 : #include <com/sun/star/container/NoSuchElementException.hpp>
29 : #include <com/sun/star/uno/RuntimeException.hpp>
30 : #include <osl/file.h>
31 : #include <rtl/ustring.hxx>
32 : #include <sal/types.h>
33 : #include <xmlreader/detail/xmlreaderdllapi.hxx>
34 : #include <xmlreader/pad.hxx>
35 : #include <xmlreader/span.hxx>
36 :
37 : namespace xmlreader {
38 :
39 : class OOO_DLLPUBLIC_XMLREADER XmlReader {
40 : public:
41 : XmlReader(char const *sStr, size_t nLength);
42 :
43 : explicit XmlReader(OUString const & fileUrl);
44 :
45 : ~XmlReader();
46 :
47 : enum { NAMESPACE_NONE = -2, NAMESPACE_UNKNOWN = -1, NAMESPACE_XML = 0 };
48 :
49 : enum Text { TEXT_NONE, TEXT_RAW, TEXT_NORMALIZED };
50 :
51 : enum Result { RESULT_BEGIN, RESULT_END, RESULT_TEXT, RESULT_DONE };
52 :
53 : int registerNamespaceIri(Span const & iri);
54 :
55 : // RESULT_BEGIN: data = localName, ns = ns
56 : // RESULT_END: data, ns unused
57 : // RESULT_TEXT: data = text, ns unused
58 : Result nextItem(Text reportText, Span * data, int * nsId);
59 :
60 : bool nextAttribute(int * nsId, Span * localName);
61 :
62 : // the span returned by getAttributeValue is only valid until the next call
63 : // to nextItem or getAttributeValue
64 : Span getAttributeValue(bool fullyNormalize);
65 :
66 : int getNamespaceId(Span const & prefix) const;
67 :
68 235379 : const OUString& getUrl() const { return fileUrl_;}
69 :
70 : private:
71 : XmlReader(const XmlReader&) SAL_DELETED_FUNCTION;
72 : XmlReader& operator=(const XmlReader&) SAL_DELETED_FUNCTION;
73 :
74 : typedef std::vector< Span > NamespaceIris;
75 :
76 : // If NamespaceData (and similarly ElementData and AttributeData) is made
77 : // SAL_DLLPRIVATE, at least gcc 4.2.3 erroneously warns about
78 : // "'xmlreader::XmlReader' declared with greater visibility than the type of
79 : // its field 'xmlreader::XmlReader::namespaces_'" (and similarly for
80 : // elements_ and attributes_):
81 :
82 : struct NamespaceData {
83 : Span prefix;
84 : int nsId;
85 :
86 0 : NamespaceData():
87 0 : nsId(-1) {}
88 :
89 68944 : NamespaceData(Span const & thePrefix, int theNsId):
90 68944 : prefix(thePrefix), nsId(theNsId) {}
91 : };
92 :
93 : typedef std::vector< NamespaceData > NamespaceList;
94 :
95 : struct ElementData {
96 : Span name;
97 : NamespaceList::size_type inheritedNamespaces;
98 : int defaultNamespaceId;
99 :
100 13585816 : ElementData(
101 : Span const & theName,
102 : NamespaceList::size_type theInheritedNamespaces,
103 : int theDefaultNamespaceId):
104 : name(theName), inheritedNamespaces(theInheritedNamespaces),
105 13585816 : defaultNamespaceId(theDefaultNamespaceId)
106 13585816 : {}
107 : };
108 :
109 : typedef std::stack< ElementData > ElementStack;
110 :
111 : struct AttributeData {
112 : char const * nameBegin;
113 : char const * nameEnd;
114 : char const * nameColon;
115 : char const * valueBegin;
116 : char const * valueEnd;
117 :
118 14782324 : AttributeData(
119 : char const * theNameBegin, char const * theNameEnd,
120 : char const * theNameColon, char const * theValueBegin,
121 : char const * theValueEnd):
122 : nameBegin(theNameBegin), nameEnd(theNameEnd),
123 : nameColon(theNameColon), valueBegin(theValueBegin),
124 14782324 : valueEnd(theValueEnd)
125 14782324 : {}
126 : };
127 :
128 : typedef std::vector< AttributeData > Attributes;
129 :
130 : enum State {
131 : STATE_CONTENT, STATE_START_TAG, STATE_END_TAG, STATE_EMPTY_ELEMENT_TAG,
132 : STATE_DONE };
133 :
134 29680938 : SAL_DLLPRIVATE inline char read() { return pos_ == end_ ? '\0' : *pos_++; }
135 :
136 575539003 : SAL_DLLPRIVATE inline char peek() { return pos_ == end_ ? '\0' : *pos_; }
137 :
138 : SAL_DLLPRIVATE void normalizeLineEnds(Span const & text);
139 :
140 : SAL_DLLPRIVATE void skipSpace();
141 :
142 : SAL_DLLPRIVATE bool skipComment();
143 :
144 : SAL_DLLPRIVATE void skipProcessingInstruction();
145 :
146 : SAL_DLLPRIVATE void skipDocumentTypeDeclaration();
147 :
148 : SAL_DLLPRIVATE Span scanCdataSection();
149 :
150 : SAL_DLLPRIVATE bool scanName(char const ** nameColon);
151 :
152 : SAL_DLLPRIVATE int scanNamespaceIri(
153 : char const * begin, char const * end);
154 :
155 : SAL_DLLPRIVATE char const * handleReference(
156 : char const * position, char const * end);
157 :
158 : SAL_DLLPRIVATE Span handleAttributeValue(
159 : char const * begin, char const * end, bool fullyNormalize);
160 :
161 : SAL_DLLPRIVATE Result handleStartTag(int * nsId, Span * localName);
162 :
163 : SAL_DLLPRIVATE Result handleEndTag();
164 :
165 : SAL_DLLPRIVATE void handleElementEnd();
166 :
167 : SAL_DLLPRIVATE Result handleSkippedText(Span * data, int * nsId);
168 :
169 : SAL_DLLPRIVATE Result handleRawText(Span * text);
170 :
171 : SAL_DLLPRIVATE Result handleNormalizedText(Span * text);
172 :
173 : SAL_DLLPRIVATE int toNamespaceId(NamespaceIris::size_type pos);
174 :
175 : OUString fileUrl_;
176 : oslFileHandle fileHandle_;
177 : sal_uInt64 fileSize_;
178 : void * fileAddress_;
179 : NamespaceIris namespaceIris_;
180 : NamespaceList namespaces_;
181 : ElementStack elements_;
182 : char const * pos_;
183 : char const * end_;
184 : State state_;
185 : Attributes attributes_;
186 : Attributes::iterator currentAttribute_;
187 : bool firstAttribute_;
188 : Pad pad_;
189 : };
190 :
191 : }
192 :
193 : #endif
194 :
195 : /* vim:set shiftwidth=4 softtabstop=4 expandtab: */
|