LCOV - code coverage report
Current view: top level - libreoffice/workdir/unxlngi6.pro/UnpackedTarball/orcus/src/liborcus - xml_namespace.cpp (source / functions) Hit Total Coverage
Test: libreoffice_filtered.info Lines: 2 123 1.6 %
Date: 2012-12-17 Functions: 2 28 7.1 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /*************************************************************************
       2             :  *
       3             :  * Copyright (c) 2012 Kohei Yoshida
       4             :  *
       5             :  * Permission is hereby granted, free of charge, to any person
       6             :  * obtaining a copy of this software and associated documentation
       7             :  * files (the "Software"), to deal in the Software without
       8             :  * restriction, including without limitation the rights to use,
       9             :  * copy, modify, merge, publish, distribute, sublicense, and/or sell
      10             :  * copies of the Software, and to permit persons to whom the
      11             :  * Software is furnished to do so, subject to the following
      12             :  * conditions:
      13             :  *
      14             :  * The above copyright notice and this permission notice shall be
      15             :  * included in all copies or substantial portions of the Software.
      16             :  *
      17             :  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
      18             :  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
      19             :  * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
      20             :  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
      21             :  * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
      22             :  * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
      23             :  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
      24             :  * OTHER DEALINGS IN THE SOFTWARE.
      25             :  *
      26             :  ************************************************************************/
      27             : 
      28             : #include "orcus/xml_namespace.hpp"
      29             : #include "orcus/exception.hpp"
      30             : #include "string_pool.hpp"
      31             : 
      32             : #include <boost/unordered_map.hpp>
      33             : #include <vector>
      34             : #include <limits>
      35             : #include <sstream>
      36             : 
      37             : #define ORCUS_DEBUG_XML_NAMESPACE 0
      38             : 
      39             : using namespace std;
      40             : 
      41             : #if ORCUS_DEBUG_XML_NAMESPACE
      42             : #include <cstdio>
      43             : #include <iostream>
      44             : #endif
      45             : 
      46             : namespace orcus {
      47             : 
      48             : namespace {
      49             : 
      50             : #if ORCUS_DEBUG_XML_NAMESPACE
      51             : template<typename _MapType>
      52             : void print_map_keys(const _MapType& map_store)
      53             : {
      54             :     cout << "keys: (";
      55             :     bool first = true;
      56             :     typename _MapType::const_iterator it = map_store.begin(), it_end = map_store.end();
      57             :     for (; it != it_end; ++it)
      58             :     {
      59             :         if (first)
      60             :             first = false;
      61             :         else
      62             :             cout << " ";
      63             :         cout << "'" << it->first << "'";
      64             :     }
      65             :     cout << ")";
      66             : };
      67             : #endif
      68             : 
      69             : }
      70             : 
      71             : typedef boost::unordered_map<pstring, size_t, pstring::hash> strid_map_type;
      72             : 
      73           0 : struct xmlns_repository_impl
      74             : {
      75             :     string_pool m_pool; /// storage of live string instances.
      76             :     std::vector<pstring> m_identifiers; /// map strings to numerical identifiers.
      77             :     strid_map_type m_strid_map;
      78             : };
      79             : 
      80           0 : xmlns_repository::xmlns_repository() : mp_impl(new xmlns_repository_impl) {}
      81             : 
      82           0 : xmlns_repository::~xmlns_repository()
      83             : {
      84           0 :     delete mp_impl;
      85           0 : }
      86             : 
      87           0 : xmlns_id_t xmlns_repository::intern(const pstring& uri)
      88             : {
      89           0 :     if (uri.empty())
      90             :         return XMLNS_UNKNOWN_ID;
      91             : 
      92             :     try
      93             :     {
      94           0 :         std::pair<pstring, bool> r = mp_impl->m_pool.intern(uri);
      95           0 :         pstring uri_interned = r.first;
      96           0 :         if (!uri_interned.empty())
      97             :         {
      98             :             // Intern successful.
      99           0 :             if (r.second)
     100             :             {
     101             :                 // This is a new instance. Assign a numerical identifier.
     102             :                 mp_impl->m_strid_map.insert(
     103           0 :                     strid_map_type::value_type(r.first, mp_impl->m_identifiers.size()));
     104             : #if ORCUS_DEBUG_XML_NAMESPACE
     105             :                 cout << "xmlns_repository::intern: uri='" << uri_interned << "' (" << mp_impl->m_identifiers.size() << ")" << endl;
     106             : #endif
     107           0 :                 mp_impl->m_identifiers.push_back(r.first);
     108             : 
     109           0 :                 assert(mp_impl->m_pool.size() == mp_impl->m_identifiers.size());
     110           0 :                 assert(mp_impl->m_pool.size() == mp_impl->m_strid_map.size());
     111             :             }
     112             :             return uri_interned.get();
     113             :         }
     114             :     }
     115           0 :     catch (const general_error&)
     116             :     {
     117             :     }
     118             : 
     119             :     return XMLNS_UNKNOWN_ID;
     120             : }
     121             : 
     122           0 : xmlns_context xmlns_repository::create_context()
     123             : {
     124           0 :     return xmlns_context(*this);
     125             : }
     126             : 
     127           0 : xmlns_id_t xmlns_repository::get_identifier(size_t index) const
     128             : {
     129           0 :     if (index >= mp_impl->m_identifiers.size())
     130             :         return XMLNS_UNKNOWN_ID;
     131             : 
     132             :     // All identifier strings are interned which means they are all null-terminated.
     133           0 :     return mp_impl->m_identifiers[index].get();
     134             : }
     135             : 
     136           0 : string xmlns_repository::get_short_name(xmlns_id_t ns_id) const
     137             : {
     138           0 :     size_t index = get_index(ns_id);
     139           0 :     return get_short_name(index);
     140             : }
     141             : 
     142           0 : string xmlns_repository::get_short_name(size_t index) const
     143             : {
     144           0 :     if (index == xmlns_context::index_not_found)
     145           0 :         return string("???");
     146             : 
     147           0 :     ostringstream os;
     148           0 :     os << "ns" << index;
     149           0 :     return os.str();
     150             : }
     151             : 
     152           0 : size_t xmlns_repository::get_index(xmlns_id_t ns_id) const
     153             : {
     154           0 :     if (!ns_id)
     155           0 :         return xmlns_context::index_not_found;
     156             : 
     157           0 :     strid_map_type::const_iterator it = mp_impl->m_strid_map.find(pstring(ns_id));
     158           0 :     if (it == mp_impl->m_strid_map.end())
     159           0 :         return xmlns_context::index_not_found;
     160             : 
     161           0 :     return it->second;
     162             : }
     163             : 
     164             : typedef std::vector<xmlns_id_t> xmlns_list_type;
     165             : typedef boost::unordered_map<pstring, xmlns_list_type, pstring::hash> alias_map_type;
     166             : 
     167           0 : struct xmlns_context_impl
     168             : {
     169             :     xmlns_repository& m_repo;
     170             :     xmlns_list_type m_all_ns; /// all namespaces ever used in this context.
     171             :     xmlns_list_type m_default;
     172             :     alias_map_type m_map;
     173             : 
     174             :     bool m_trim_all_ns;
     175             : 
     176           0 :     xmlns_context_impl(xmlns_repository& repo) : m_repo(repo), m_trim_all_ns(true) {}
     177           0 :     xmlns_context_impl(const xmlns_context_impl& r) :
     178           0 :         m_repo(r.m_repo), m_all_ns(r.m_all_ns), m_default(r.m_default), m_map(r.m_map), m_trim_all_ns(r.m_trim_all_ns) {}
     179             : };
     180             : 
     181           8 : size_t xmlns_context::index_not_found = std::numeric_limits<size_t>::max();
     182             : 
     183           0 : xmlns_context::xmlns_context(xmlns_repository& repo) : mp_impl(new xmlns_context_impl(repo)) {}
     184           0 : xmlns_context::xmlns_context(const xmlns_context& r) : mp_impl(new xmlns_context_impl(*r.mp_impl)) {}
     185             : 
     186           0 : xmlns_context::~xmlns_context()
     187             : {
     188           0 :     delete mp_impl;
     189           0 : }
     190             : 
     191           0 : xmlns_id_t xmlns_context::push(const pstring& key, const pstring& uri)
     192             : {
     193             : #if ORCUS_DEBUG_XML_NAMESPACE
     194             :     cout << "xmlns_context::push: key='" << key << "', uri='" << uri << "'" << endl;
     195             : #endif
     196           0 :     if (uri.empty())
     197             :         return XMLNS_UNKNOWN_ID;
     198             : 
     199           0 :     mp_impl->m_trim_all_ns = true;
     200             : 
     201           0 :     pstring uri_interned = mp_impl->m_repo.intern(uri);
     202             : 
     203           0 :     if (key.empty())
     204             :     {
     205             :         // empty key value is associated with default namespace.
     206           0 :         mp_impl->m_default.push_back(uri_interned.get());
     207           0 :         mp_impl->m_all_ns.push_back(uri_interned.get());
     208           0 :         return mp_impl->m_default.back();
     209             :     }
     210             : 
     211             :     // See if this key already exists.
     212           0 :     alias_map_type::iterator it = mp_impl->m_map.find(key);
     213           0 :     if (it == mp_impl->m_map.end())
     214             :     {
     215             :         // This is the first time this key is used.
     216             :         xmlns_list_type nslist;
     217           0 :         nslist.push_back(uri_interned.get());
     218           0 :         mp_impl->m_all_ns.push_back(uri_interned.get());
     219             :         std::pair<alias_map_type::iterator,bool> r =
     220           0 :             mp_impl->m_map.insert(alias_map_type::value_type(key, nslist));
     221             : 
     222           0 :         if (!r.second)
     223             :             // insertion failed.
     224           0 :             throw general_error("Failed to insert new namespace.");
     225             : 
     226           0 :         return nslist.back();
     227             :     }
     228             : 
     229             :     // The key already exists.
     230           0 :     xmlns_list_type& nslist = it->second;
     231           0 :     nslist.push_back(uri_interned.get());
     232           0 :     mp_impl->m_all_ns.push_back(uri_interned.get());
     233           0 :     return nslist.back();
     234             : }
     235             : 
     236           0 : void xmlns_context::pop(const pstring& key)
     237             : {
     238           0 :     if (key.empty())
     239             :     {
     240             :         // empty key value is associated with default namespace.
     241           0 :         if (mp_impl->m_default.empty())
     242           0 :             throw general_error("default namespace stack is empty.");
     243             : 
     244             :         mp_impl->m_default.pop_back();
     245           0 :         return;
     246             :     }
     247             : 
     248             :     // See if this key really exists.
     249           0 :     alias_map_type::iterator it = mp_impl->m_map.find(key);
     250           0 :     if (it == mp_impl->m_map.end())
     251           0 :         throw general_error("failed to find the key.");
     252             : 
     253             :     xmlns_list_type& nslist = it->second;
     254           0 :     if (nslist.empty())
     255           0 :         throw general_error("namespace stack for this key is empty.");
     256             : 
     257             :     nslist.pop_back();
     258             : }
     259             : 
     260           0 : xmlns_id_t xmlns_context::get(const pstring& key) const
     261             : {
     262             : #if ORCUS_DEBUG_XML_NAMESPACE
     263             :     cout << "xmlns_context::get: alias='" << key << "', default ns stack size="
     264             :         << mp_impl->m_default.size() << ", non-default alias count=" << mp_impl->m_map.size();
     265             :     cout << ", ";
     266             :     print_map_keys(mp_impl->m_map);
     267             :     cout << endl;
     268             : #endif
     269           0 :     if (key.empty())
     270           0 :         return mp_impl->m_default.empty() ? XMLNS_UNKNOWN_ID : mp_impl->m_default.back();
     271             : 
     272           0 :     alias_map_type::const_iterator it = mp_impl->m_map.find(key);
     273           0 :     if (it == mp_impl->m_map.end())
     274             :     {
     275             : #if ORCUS_DEBUG_XML_NAMESPACE
     276             :         cout << "xmlns_context::get: alias not in this context" << endl;
     277             : #endif
     278             :         return XMLNS_UNKNOWN_ID;
     279             :     }
     280             : 
     281             : #if ORCUS_DEBUG_XML_NAMESPACE
     282             :     cout << "xmlns_context::get: alias stack size=" << it->second.size() << endl;
     283             : #endif
     284           0 :     return it->second.empty() ? XMLNS_UNKNOWN_ID : it->second.back();
     285             : }
     286             : 
     287           0 : size_t xmlns_context::get_index(xmlns_id_t ns_id) const
     288             : {
     289           0 :     return mp_impl->m_repo.get_index(ns_id);
     290             : }
     291             : 
     292           0 : string xmlns_context::get_short_name(xmlns_id_t ns_id) const
     293             : {
     294           0 :     return mp_impl->m_repo.get_short_name(ns_id);
     295             : }
     296             : 
     297             : namespace {
     298             : 
     299             : #if ORCUS_DEBUG_XML_NAMESPACE
     300             : struct print_ns : std::unary_function<xmlns_id_t, void>
     301             : {
     302             :     void operator() (xmlns_id_t ns_id) const
     303             :     {
     304             :         const char* p = ns_id;
     305             :         printf("%p: %s\n", p, p);
     306             :     }
     307             : };
     308             : #endif
     309             : 
     310             : struct ns_item
     311             : {
     312             :     size_t index;
     313             :     xmlns_id_t ns;
     314             : 
     315           0 :     ns_item(size_t _index, xmlns_id_t _ns) : index(_index), ns(_ns) {}
     316             : };
     317             : 
     318             : struct less_ns_by_index : binary_function<ns_item, ns_item, bool>
     319             : {
     320           0 :     bool operator() (const ns_item& left, const ns_item& right) const
     321             :     {
     322           0 :         return left.index < right.index;
     323             :     }
     324             : };
     325             : 
     326             : class push_back_ns_to_item : unary_function<xmlns_id_t, void>
     327             : {
     328             :     vector<ns_item>& m_store;
     329             :     const xmlns_context& m_cxt;
     330             : public:
     331           0 :     push_back_ns_to_item(vector<ns_item>& store, const xmlns_context& cxt) : m_store(store), m_cxt(cxt) {}
     332           0 :     void operator() (xmlns_id_t ns)
     333             :     {
     334           0 :         size_t num_id = m_cxt.get_index(ns);
     335           0 :         if (num_id != xmlns_context::index_not_found)
     336           0 :             m_store.push_back(ns_item(num_id, ns));
     337           0 :     }
     338             : };
     339             : 
     340             : class push_back_item_to_ns : unary_function<ns_item, void>
     341             : {
     342             :     std::vector<xmlns_id_t>& m_store;
     343             : public:
     344           0 :     push_back_item_to_ns(std::vector<xmlns_id_t>& store) : m_store(store) {}
     345           0 :     void operator() (const ns_item& item)
     346             :     {
     347           0 :         m_store.push_back(item.ns);
     348           0 :     }
     349             : };
     350             : 
     351             : }
     352             : 
     353           0 : void xmlns_context::get_all_namespaces(std::vector<xmlns_id_t>& nslist) const
     354             : {
     355             : #if ORCUS_DEBUG_XML_NAMESPACE
     356             :     cout << "xmlns_context::get_all_namespaces: count=" << mp_impl->m_all_ns.size() << endl;
     357             :     std::for_each(mp_impl->m_all_ns.begin(), mp_impl->m_all_ns.end(), print_ns());
     358             : #endif
     359             : 
     360           0 :     if (mp_impl->m_trim_all_ns)
     361             :     {
     362           0 :         xmlns_list_type& all_ns = mp_impl->m_all_ns;
     363             : 
     364             :         nslist.assign(mp_impl->m_all_ns.begin(), mp_impl->m_all_ns.end());
     365             : 
     366             :         // Sort it and remove duplicate.
     367           0 :         std::sort(all_ns.begin(), all_ns.end());
     368             :         xmlns_list_type::iterator it_unique_end =
     369           0 :             std::unique(all_ns.begin(), all_ns.end());
     370           0 :         all_ns.erase(it_unique_end, all_ns.end());
     371             : 
     372             :         // Now, sort by indices.
     373             :         vector<ns_item> items;
     374           0 :         std::for_each(all_ns.begin(), all_ns.end(), push_back_ns_to_item(items, *this));
     375           0 :         std::sort(items.begin(), items.end(), less_ns_by_index());
     376             : 
     377             :         all_ns.clear();
     378           0 :         std::for_each(items.begin(), items.end(), push_back_item_to_ns(all_ns));
     379             : 
     380           0 :         mp_impl->m_trim_all_ns = false;
     381             :     }
     382             : 
     383           0 :     nslist.assign(mp_impl->m_all_ns.begin(), mp_impl->m_all_ns.end());
     384           0 : }
     385             : 
     386           0 : void xmlns_context::dump(std::ostream& os) const
     387             : {
     388             :     vector<xmlns_id_t> nslist;
     389           0 :     get_all_namespaces(nslist);
     390             :     vector<xmlns_id_t>::const_iterator it = nslist.begin(), it_end = nslist.end();
     391           0 :     for (; it != it_end; ++it)
     392             :     {
     393           0 :         xmlns_id_t ns_id = *it;
     394           0 :         size_t num_id = get_index(ns_id);
     395           0 :         if (num_id == index_not_found)
     396           0 :             continue;
     397             : 
     398           0 :         os << "ns" << num_id << "=\"" << ns_id << '"' << endl;
     399           0 :     }
     400           0 : }
     401             : 
     402          24 : }

Generated by: LCOV version 1.10