Line data Source code
1 : /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 : /*
3 : * This file is part of the LibreOffice project.
4 : *
5 : * This Source Code Form is subject to the terms of the Mozilla Public
6 : * License, v. 2.0. If a copy of the MPL was not distributed with this
7 : * file, You can obtain one at http://mozilla.org/MPL/2.0/.
8 : *
9 : * This file incorporates work covered by the following license notice:
10 : *
11 : * Licensed to the Apache Software Foundation (ASF) under one or more
12 : * contributor license agreements. See the NOTICE file distributed
13 : * with this work for additional information regarding copyright
14 : * ownership. The ASF licenses this file to you under the Apache
15 : * License, Version 2.0 (the "License"); you may not use this file
16 : * except in compliance with the License. You may obtain a copy of
17 : * the License at http://www.apache.org/licenses/LICENSE-2.0 .
18 : */
19 :
20 :
21 : #include "internal/xml_parser.hxx"
22 : #include "internal/i_xml_parser_event_handler.hxx"
23 :
24 : #ifdef _WIN32
25 : #include "internal/utilities.hxx"
26 : #else
27 : #define UTF8ToWString(s) s
28 : #endif
29 :
30 : #include <assert.h>
31 :
32 : namespace /* private */
33 : {
34 :
35 : /* Extracts the local part of tag without
36 : namespace decoration e.g. meta:creator -> creator */
37 : const XML_Char COLON = (XML_Char)':';
38 :
39 0 : const XML_Char* get_local_name(const XML_Char* rawname)
40 : {
41 0 : const XML_Char* p = rawname;
42 :
43 : // go to the end
44 0 : while (*p) p++;
45 :
46 : // go back until the first ':'
47 0 : while (*p != COLON && p > rawname)
48 0 : p--;
49 :
50 : // if we are on a colon one step forward
51 0 : if (*p == COLON)
52 0 : p++;
53 :
54 0 : return p;
55 : }
56 :
57 0 : inline xml_parser* get_parser_instance(void* data)
58 : {
59 : return reinterpret_cast<xml_parser*>(XML_GetUserData(
60 0 : reinterpret_cast<XML_Parser>(data)));
61 : }
62 :
63 0 : bool has_only_whitespaces(const XML_Char* s, int len)
64 : {
65 0 : const XML_Char* p = s;
66 0 : for (int i = 0; i < len; i++)
67 0 : if (*p++ != ' ') return false;
68 0 : return true;
69 : }
70 : }
71 :
72 0 : xml_parser::xml_parser(const XML_Char* EncodingName) :
73 : document_handler_(0),
74 0 : xml_parser_(XML_ParserCreate(EncodingName))
75 : {
76 0 : init();
77 0 : }
78 :
79 0 : xml_parser::~xml_parser()
80 : {
81 0 : XML_ParserFree(xml_parser_);
82 0 : }
83 :
84 : /* Callback functions will be called by the parser on
85 : different events */
86 :
87 : extern "C"
88 : {
89 :
90 0 : static void xml_start_element_handler(void* UserData, const XML_Char* name, const XML_Char** atts)
91 : {
92 : assert(UserData != NULL);
93 :
94 0 : xml_parser* pImpl = get_parser_instance(UserData);
95 :
96 0 : i_xml_parser_event_handler* pDocHdl = pImpl->get_document_handler();
97 0 : if (pDocHdl)
98 : {
99 0 : xml_tag_attribute_container_t attributes;
100 :
101 0 : int i = 0;
102 :
103 0 : while(atts[i])
104 : {
105 0 : attributes[UTF8ToWString(reinterpret_cast<const char*>(get_local_name(atts[i])))] = UTF8ToWString(reinterpret_cast<const char*>(atts[i+1]));
106 0 : i += 2; // skip to next pair
107 : }
108 :
109 : pDocHdl->start_element(
110 0 : UTF8ToWString(reinterpret_cast<const char*>(name)), UTF8ToWString(reinterpret_cast<const char*>(get_local_name(name))), attributes);
111 : }
112 0 : }
113 :
114 0 : static void xml_end_element_handler(void* UserData, const XML_Char* name)
115 : {
116 : assert(UserData);
117 :
118 0 : xml_parser* pImpl = get_parser_instance(UserData);
119 0 : i_xml_parser_event_handler* pDocHdl = pImpl->get_document_handler();
120 0 : if (pDocHdl)
121 0 : pDocHdl->end_element(UTF8ToWString(reinterpret_cast<const char*>(name)), UTF8ToWString(reinterpret_cast<const char*>(get_local_name(name))));
122 0 : }
123 :
124 0 : static void xml_character_data_handler(void* UserData, const XML_Char* s, int len)
125 : {
126 : assert(UserData);
127 :
128 0 : xml_parser* pImpl = get_parser_instance(UserData);
129 0 : i_xml_parser_event_handler* pDocHdl = pImpl->get_document_handler();
130 0 : if (pDocHdl)
131 : {
132 0 : if (has_only_whitespaces(s,len))
133 0 : pDocHdl->ignore_whitespace(UTF8ToWString(std::string(reinterpret_cast<const char*>(s), len)));
134 : else
135 0 : pDocHdl->characters(UTF8ToWString(std::string(reinterpret_cast<const char*>(s), len)));
136 : }
137 0 : }
138 :
139 0 : static void xml_comment_handler(void* UserData, const XML_Char* Data)
140 : {
141 : assert(UserData);
142 :
143 0 : xml_parser* pImpl = get_parser_instance(UserData);
144 0 : i_xml_parser_event_handler* pDocHdl = pImpl->get_document_handler();
145 0 : if (pDocHdl)
146 0 : pDocHdl->comment(UTF8ToWString(reinterpret_cast<const char*>(Data)));
147 0 : }
148 :
149 : } // extern "C"
150 :
151 0 : void xml_parser::init()
152 : {
153 0 : XML_SetUserData(xml_parser_, this);
154 :
155 : // we use the parser as handler argument,
156 : // so we could use it if necessary, the
157 : // UserData are usable anyway using
158 : // XML_GetUserData(...)
159 0 : XML_UseParserAsHandlerArg(xml_parser_);
160 :
161 : XML_SetElementHandler(
162 : xml_parser_,
163 : xml_start_element_handler,
164 0 : xml_end_element_handler);
165 :
166 : XML_SetCharacterDataHandler(
167 : xml_parser_,
168 0 : xml_character_data_handler);
169 :
170 : XML_SetCommentHandler(
171 : xml_parser_,
172 0 : xml_comment_handler);
173 0 : }
174 :
175 0 : void xml_parser::parse(const char* XmlData, size_t Length, bool IsFinal)
176 : {
177 0 : if (XML_STATUS_ERROR ==
178 0 : XML_Parse(xml_parser_, XmlData, static_cast<int>(Length), IsFinal))
179 : {
180 : throw xml_parser_exception(
181 0 : (char*)XML_ErrorString(XML_GetErrorCode(xml_parser_)),
182 0 : (int)XML_GetErrorCode(xml_parser_),
183 0 : XML_GetCurrentLineNumber(xml_parser_),
184 0 : XML_GetCurrentColumnNumber(xml_parser_),
185 0 : XML_GetCurrentByteIndex(xml_parser_));
186 : }
187 0 : }
188 :
189 0 : void xml_parser::set_document_handler(
190 : i_xml_parser_event_handler* event_handler)
191 : {
192 0 : document_handler_ = event_handler;
193 0 : }
194 :
195 :
196 : /* vim:set shiftwidth=4 softtabstop=4 expandtab: */
|