Line data Source code
1 : /*************************************************************************
2 : *
3 : * Copyright (c) 2012 Kohei Yoshida
4 : *
5 : * Permission is hereby granted, free of charge, to any person
6 : * obtaining a copy of this software and associated documentation
7 : * files (the "Software"), to deal in the Software without
8 : * restriction, including without limitation the rights to use,
9 : * copy, modify, merge, publish, distribute, sublicense, and/or sell
10 : * copies of the Software, and to permit persons to whom the
11 : * Software is furnished to do so, subject to the following
12 : * conditions:
13 : *
14 : * The above copyright notice and this permission notice shall be
15 : * included in all copies or substantial portions of the Software.
16 : *
17 : * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
18 : * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
19 : * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
20 : * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
21 : * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
22 : * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
23 : * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
24 : * OTHER DEALINGS IN THE SOFTWARE.
25 : *
26 : ************************************************************************/
27 :
28 : #include "orcus/xml_namespace.hpp"
29 : #include "orcus/exception.hpp"
30 : #include "string_pool.hpp"
31 :
32 : #include <boost/unordered_map.hpp>
33 : #include <vector>
34 : #include <limits>
35 : #include <sstream>
36 :
37 : #define ORCUS_DEBUG_XML_NAMESPACE 0
38 :
39 : using namespace std;
40 :
41 : #if ORCUS_DEBUG_XML_NAMESPACE
42 : #include <cstdio>
43 : #include <iostream>
44 : #endif
45 :
46 : namespace orcus {
47 :
48 : namespace {
49 :
50 : #if ORCUS_DEBUG_XML_NAMESPACE
51 : template<typename _MapType>
52 : void print_map_keys(const _MapType& map_store)
53 : {
54 : cout << "keys: (";
55 : bool first = true;
56 : typename _MapType::const_iterator it = map_store.begin(), it_end = map_store.end();
57 : for (; it != it_end; ++it)
58 : {
59 : if (first)
60 : first = false;
61 : else
62 : cout << " ";
63 : cout << "'" << it->first << "'";
64 : }
65 : cout << ")";
66 : };
67 : #endif
68 :
69 : }
70 :
71 : typedef boost::unordered_map<pstring, size_t, pstring::hash> strid_map_type;
72 :
73 0 : struct xmlns_repository_impl
74 : {
75 : string_pool m_pool; /// storage of live string instances.
76 : std::vector<pstring> m_identifiers; /// map strings to numerical identifiers.
77 : strid_map_type m_strid_map;
78 : };
79 :
80 0 : xmlns_repository::xmlns_repository() : mp_impl(new xmlns_repository_impl) {}
81 :
82 0 : xmlns_repository::~xmlns_repository()
83 : {
84 0 : delete mp_impl;
85 0 : }
86 :
87 0 : xmlns_id_t xmlns_repository::intern(const pstring& uri)
88 : {
89 0 : if (uri.empty())
90 : return XMLNS_UNKNOWN_ID;
91 :
92 : try
93 : {
94 0 : std::pair<pstring, bool> r = mp_impl->m_pool.intern(uri);
95 0 : pstring uri_interned = r.first;
96 0 : if (!uri_interned.empty())
97 : {
98 : // Intern successful.
99 0 : if (r.second)
100 : {
101 : // This is a new instance. Assign a numerical identifier.
102 : mp_impl->m_strid_map.insert(
103 0 : strid_map_type::value_type(r.first, mp_impl->m_identifiers.size()));
104 : #if ORCUS_DEBUG_XML_NAMESPACE
105 : cout << "xmlns_repository::intern: uri='" << uri_interned << "' (" << mp_impl->m_identifiers.size() << ")" << endl;
106 : #endif
107 0 : mp_impl->m_identifiers.push_back(r.first);
108 :
109 0 : assert(mp_impl->m_pool.size() == mp_impl->m_identifiers.size());
110 0 : assert(mp_impl->m_pool.size() == mp_impl->m_strid_map.size());
111 : }
112 : return uri_interned.get();
113 : }
114 : }
115 0 : catch (const general_error&)
116 : {
117 : }
118 :
119 : return XMLNS_UNKNOWN_ID;
120 : }
121 :
122 0 : xmlns_context xmlns_repository::create_context()
123 : {
124 0 : return xmlns_context(*this);
125 : }
126 :
127 0 : xmlns_id_t xmlns_repository::get_identifier(size_t index) const
128 : {
129 0 : if (index >= mp_impl->m_identifiers.size())
130 : return XMLNS_UNKNOWN_ID;
131 :
132 : // All identifier strings are interned which means they are all null-terminated.
133 0 : return mp_impl->m_identifiers[index].get();
134 : }
135 :
136 0 : string xmlns_repository::get_short_name(xmlns_id_t ns_id) const
137 : {
138 0 : size_t index = get_index(ns_id);
139 0 : return get_short_name(index);
140 : }
141 :
142 0 : string xmlns_repository::get_short_name(size_t index) const
143 : {
144 0 : if (index == xmlns_context::index_not_found)
145 0 : return string("???");
146 :
147 0 : ostringstream os;
148 0 : os << "ns" << index;
149 0 : return os.str();
150 : }
151 :
152 0 : size_t xmlns_repository::get_index(xmlns_id_t ns_id) const
153 : {
154 0 : if (!ns_id)
155 0 : return xmlns_context::index_not_found;
156 :
157 0 : strid_map_type::const_iterator it = mp_impl->m_strid_map.find(pstring(ns_id));
158 0 : if (it == mp_impl->m_strid_map.end())
159 0 : return xmlns_context::index_not_found;
160 :
161 0 : return it->second;
162 : }
163 :
164 : typedef std::vector<xmlns_id_t> xmlns_list_type;
165 : typedef boost::unordered_map<pstring, xmlns_list_type, pstring::hash> alias_map_type;
166 :
167 0 : struct xmlns_context_impl
168 : {
169 : xmlns_repository& m_repo;
170 : xmlns_list_type m_all_ns; /// all namespaces ever used in this context.
171 : xmlns_list_type m_default;
172 : alias_map_type m_map;
173 :
174 : bool m_trim_all_ns;
175 :
176 0 : xmlns_context_impl(xmlns_repository& repo) : m_repo(repo), m_trim_all_ns(true) {}
177 0 : xmlns_context_impl(const xmlns_context_impl& r) :
178 0 : m_repo(r.m_repo), m_all_ns(r.m_all_ns), m_default(r.m_default), m_map(r.m_map), m_trim_all_ns(r.m_trim_all_ns) {}
179 : };
180 :
181 8 : size_t xmlns_context::index_not_found = std::numeric_limits<size_t>::max();
182 :
183 0 : xmlns_context::xmlns_context(xmlns_repository& repo) : mp_impl(new xmlns_context_impl(repo)) {}
184 0 : xmlns_context::xmlns_context(const xmlns_context& r) : mp_impl(new xmlns_context_impl(*r.mp_impl)) {}
185 :
186 0 : xmlns_context::~xmlns_context()
187 : {
188 0 : delete mp_impl;
189 0 : }
190 :
191 0 : xmlns_id_t xmlns_context::push(const pstring& key, const pstring& uri)
192 : {
193 : #if ORCUS_DEBUG_XML_NAMESPACE
194 : cout << "xmlns_context::push: key='" << key << "', uri='" << uri << "'" << endl;
195 : #endif
196 0 : if (uri.empty())
197 : return XMLNS_UNKNOWN_ID;
198 :
199 0 : mp_impl->m_trim_all_ns = true;
200 :
201 0 : pstring uri_interned = mp_impl->m_repo.intern(uri);
202 :
203 0 : if (key.empty())
204 : {
205 : // empty key value is associated with default namespace.
206 0 : mp_impl->m_default.push_back(uri_interned.get());
207 0 : mp_impl->m_all_ns.push_back(uri_interned.get());
208 0 : return mp_impl->m_default.back();
209 : }
210 :
211 : // See if this key already exists.
212 0 : alias_map_type::iterator it = mp_impl->m_map.find(key);
213 0 : if (it == mp_impl->m_map.end())
214 : {
215 : // This is the first time this key is used.
216 : xmlns_list_type nslist;
217 0 : nslist.push_back(uri_interned.get());
218 0 : mp_impl->m_all_ns.push_back(uri_interned.get());
219 : std::pair<alias_map_type::iterator,bool> r =
220 0 : mp_impl->m_map.insert(alias_map_type::value_type(key, nslist));
221 :
222 0 : if (!r.second)
223 : // insertion failed.
224 0 : throw general_error("Failed to insert new namespace.");
225 :
226 0 : return nslist.back();
227 : }
228 :
229 : // The key already exists.
230 0 : xmlns_list_type& nslist = it->second;
231 0 : nslist.push_back(uri_interned.get());
232 0 : mp_impl->m_all_ns.push_back(uri_interned.get());
233 0 : return nslist.back();
234 : }
235 :
236 0 : void xmlns_context::pop(const pstring& key)
237 : {
238 0 : if (key.empty())
239 : {
240 : // empty key value is associated with default namespace.
241 0 : if (mp_impl->m_default.empty())
242 0 : throw general_error("default namespace stack is empty.");
243 :
244 : mp_impl->m_default.pop_back();
245 0 : return;
246 : }
247 :
248 : // See if this key really exists.
249 0 : alias_map_type::iterator it = mp_impl->m_map.find(key);
250 0 : if (it == mp_impl->m_map.end())
251 0 : throw general_error("failed to find the key.");
252 :
253 : xmlns_list_type& nslist = it->second;
254 0 : if (nslist.empty())
255 0 : throw general_error("namespace stack for this key is empty.");
256 :
257 : nslist.pop_back();
258 : }
259 :
260 0 : xmlns_id_t xmlns_context::get(const pstring& key) const
261 : {
262 : #if ORCUS_DEBUG_XML_NAMESPACE
263 : cout << "xmlns_context::get: alias='" << key << "', default ns stack size="
264 : << mp_impl->m_default.size() << ", non-default alias count=" << mp_impl->m_map.size();
265 : cout << ", ";
266 : print_map_keys(mp_impl->m_map);
267 : cout << endl;
268 : #endif
269 0 : if (key.empty())
270 0 : return mp_impl->m_default.empty() ? XMLNS_UNKNOWN_ID : mp_impl->m_default.back();
271 :
272 0 : alias_map_type::const_iterator it = mp_impl->m_map.find(key);
273 0 : if (it == mp_impl->m_map.end())
274 : {
275 : #if ORCUS_DEBUG_XML_NAMESPACE
276 : cout << "xmlns_context::get: alias not in this context" << endl;
277 : #endif
278 : return XMLNS_UNKNOWN_ID;
279 : }
280 :
281 : #if ORCUS_DEBUG_XML_NAMESPACE
282 : cout << "xmlns_context::get: alias stack size=" << it->second.size() << endl;
283 : #endif
284 0 : return it->second.empty() ? XMLNS_UNKNOWN_ID : it->second.back();
285 : }
286 :
287 0 : size_t xmlns_context::get_index(xmlns_id_t ns_id) const
288 : {
289 0 : return mp_impl->m_repo.get_index(ns_id);
290 : }
291 :
292 0 : string xmlns_context::get_short_name(xmlns_id_t ns_id) const
293 : {
294 0 : return mp_impl->m_repo.get_short_name(ns_id);
295 : }
296 :
297 : namespace {
298 :
299 : #if ORCUS_DEBUG_XML_NAMESPACE
300 : struct print_ns : std::unary_function<xmlns_id_t, void>
301 : {
302 : void operator() (xmlns_id_t ns_id) const
303 : {
304 : const char* p = ns_id;
305 : printf("%p: %s\n", p, p);
306 : }
307 : };
308 : #endif
309 :
310 : struct ns_item
311 : {
312 : size_t index;
313 : xmlns_id_t ns;
314 :
315 0 : ns_item(size_t _index, xmlns_id_t _ns) : index(_index), ns(_ns) {}
316 : };
317 :
318 : struct less_ns_by_index : binary_function<ns_item, ns_item, bool>
319 : {
320 0 : bool operator() (const ns_item& left, const ns_item& right) const
321 : {
322 0 : return left.index < right.index;
323 : }
324 : };
325 :
326 : class push_back_ns_to_item : unary_function<xmlns_id_t, void>
327 : {
328 : vector<ns_item>& m_store;
329 : const xmlns_context& m_cxt;
330 : public:
331 0 : push_back_ns_to_item(vector<ns_item>& store, const xmlns_context& cxt) : m_store(store), m_cxt(cxt) {}
332 0 : void operator() (xmlns_id_t ns)
333 : {
334 0 : size_t num_id = m_cxt.get_index(ns);
335 0 : if (num_id != xmlns_context::index_not_found)
336 0 : m_store.push_back(ns_item(num_id, ns));
337 0 : }
338 : };
339 :
340 : class push_back_item_to_ns : unary_function<ns_item, void>
341 : {
342 : std::vector<xmlns_id_t>& m_store;
343 : public:
344 0 : push_back_item_to_ns(std::vector<xmlns_id_t>& store) : m_store(store) {}
345 0 : void operator() (const ns_item& item)
346 : {
347 0 : m_store.push_back(item.ns);
348 0 : }
349 : };
350 :
351 : }
352 :
353 0 : void xmlns_context::get_all_namespaces(std::vector<xmlns_id_t>& nslist) const
354 : {
355 : #if ORCUS_DEBUG_XML_NAMESPACE
356 : cout << "xmlns_context::get_all_namespaces: count=" << mp_impl->m_all_ns.size() << endl;
357 : std::for_each(mp_impl->m_all_ns.begin(), mp_impl->m_all_ns.end(), print_ns());
358 : #endif
359 :
360 0 : if (mp_impl->m_trim_all_ns)
361 : {
362 0 : xmlns_list_type& all_ns = mp_impl->m_all_ns;
363 :
364 : nslist.assign(mp_impl->m_all_ns.begin(), mp_impl->m_all_ns.end());
365 :
366 : // Sort it and remove duplicate.
367 0 : std::sort(all_ns.begin(), all_ns.end());
368 : xmlns_list_type::iterator it_unique_end =
369 0 : std::unique(all_ns.begin(), all_ns.end());
370 0 : all_ns.erase(it_unique_end, all_ns.end());
371 :
372 : // Now, sort by indices.
373 : vector<ns_item> items;
374 0 : std::for_each(all_ns.begin(), all_ns.end(), push_back_ns_to_item(items, *this));
375 0 : std::sort(items.begin(), items.end(), less_ns_by_index());
376 :
377 : all_ns.clear();
378 0 : std::for_each(items.begin(), items.end(), push_back_item_to_ns(all_ns));
379 :
380 0 : mp_impl->m_trim_all_ns = false;
381 : }
382 :
383 0 : nslist.assign(mp_impl->m_all_ns.begin(), mp_impl->m_all_ns.end());
384 0 : }
385 :
386 0 : void xmlns_context::dump(std::ostream& os) const
387 : {
388 : vector<xmlns_id_t> nslist;
389 0 : get_all_namespaces(nslist);
390 : vector<xmlns_id_t>::const_iterator it = nslist.begin(), it_end = nslist.end();
391 0 : for (; it != it_end; ++it)
392 : {
393 0 : xmlns_id_t ns_id = *it;
394 0 : size_t num_id = get_index(ns_id);
395 0 : if (num_id == index_not_found)
396 0 : continue;
397 :
398 0 : os << "ns" << num_id << "=\"" << ns_id << '"' << endl;
399 0 : }
400 0 : }
401 :
402 24 : }
|