LCOV - code coverage report
Current view: top level - libreoffice/workdir/unxlngi6.pro/UnpackedTarball/orcus/src/liborcus - xml_structure_tree.cpp (source / functions) Hit Total Coverage
Test: libreoffice_filtered.info Lines: 2 213 0.9 %
Date: 2012-12-17 Functions: 2 48 4.2 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /*************************************************************************
       2             :  *
       3             :  * Copyright (c) 2012 Kohei Yoshida
       4             :  *
       5             :  * Permission is hereby granted, free of charge, to any person
       6             :  * obtaining a copy of this software and associated documentation
       7             :  * files (the "Software"), to deal in the Software without
       8             :  * restriction, including without limitation the rights to use,
       9             :  * copy, modify, merge, publish, distribute, sublicense, and/or sell
      10             :  * copies of the Software, and to permit persons to whom the
      11             :  * Software is furnished to do so, subject to the following
      12             :  * conditions:
      13             :  *
      14             :  * The above copyright notice and this permission notice shall be
      15             :  * included in all copies or substantial portions of the Software.
      16             :  *
      17             :  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
      18             :  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
      19             :  * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
      20             :  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
      21             :  * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
      22             :  * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
      23             :  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
      24             :  * OTHER DEALINGS IN THE SOFTWARE.
      25             :  *
      26             :  ************************************************************************/
      27             : 
      28             : #include "orcus/xml_structure_tree.hpp"
      29             : #include "orcus/sax_ns_parser.hpp"
      30             : #include "orcus/xml_namespace.hpp"
      31             : #include "orcus/global.hpp"
      32             : #include "orcus/exception.hpp"
      33             : 
      34             : #include "string_pool.hpp"
      35             : 
      36             : #include <iostream>
      37             : #include <sstream>
      38             : #include <vector>
      39             : #include <cstdio>
      40             : 
      41             : #include <boost/noncopyable.hpp>
      42             : #include <boost/unordered_map.hpp>
      43             : #include <boost/unordered_set.hpp>
      44             : #include <boost/ptr_container/ptr_vector.hpp>
      45             : 
      46             : using namespace std;
      47             : 
      48             : namespace orcus {
      49             : 
      50             : namespace {
      51             : 
      52             : struct elem_prop;
      53             : typedef boost::unordered_map<xml_structure_tree::entity_name, elem_prop*, xml_structure_tree::entity_name::hash> element_store_type;
      54             : typedef boost::unordered_set<xml_structure_tree::entity_name, xml_structure_tree::entity_name::hash> attribute_names_type;
      55             : 
      56             : /** Element properties. */
      57             : struct elem_prop : boost::noncopyable
      58             : {
      59             :     element_store_type child_elements;
      60             :     attribute_names_type attributes;
      61             : 
      62             :     /** Store child element names in order of appearance. */
      63             :     xml_structure_tree::entity_names_type child_element_names;
      64             : 
      65             :     /** Store attribute names in order of appearance. */
      66             :     xml_structure_tree::entity_names_type attribute_names;
      67             : 
      68             :     size_t appearance_order;
      69             : 
      70             :     size_t in_scope_count;
      71             : 
      72             :     /**
      73             :      * When true, this element is the base element of repeated structures.
      74             :      * This flag is set only with the base element; none of the child
      75             :      * elements below the base element have this flag set.
      76             :      */
      77             :     bool repeat:1;
      78             : 
      79           0 :     elem_prop() : appearance_order(0), in_scope_count(1), repeat(false) {}
      80           0 :     elem_prop(size_t _appearance_order) : appearance_order(_appearance_order), in_scope_count(1), repeat(false) {}
      81           0 :     ~elem_prop()
      82           0 :     {
      83           0 :         for_each(child_elements.begin(), child_elements.end(), map_object_deleter<element_store_type>());
      84           0 :     };
      85             : };
      86             : 
      87           0 : struct root
      88             : {
      89             :     xml_structure_tree::entity_name name;
      90             :     elem_prop prop;
      91             : };
      92             : 
      93           0 : struct element_ref
      94             : {
      95             :     xml_structure_tree::entity_name name;
      96             :     elem_prop* prop;
      97             : 
      98           0 :     element_ref() : prop(NULL) {}
      99             :     element_ref(xml_structure_tree::entity_name _name, elem_prop* _prop) :
     100           0 :         name(_name), prop(_prop) {}
     101             : };
     102             : 
     103             : typedef std::vector<element_ref> elements_type;
     104             : 
     105           0 : class xml_sax_handler
     106             : {
     107             :     string_pool& m_pool;
     108             :     unique_ptr<root> mp_root;
     109             :     elements_type m_stack;
     110             :     xml_structure_tree::entity_names_type m_attrs;
     111             : 
     112             : private:
     113           0 :     void merge_attributes(elem_prop& prop)
     114             :     {
     115             :         xml_structure_tree::entity_names_type::const_iterator it = m_attrs.begin(), it_end = m_attrs.end();
     116           0 :         for (; it != it_end; ++it)
     117             :         {
     118           0 :             if (prop.attributes.find(*it) == prop.attributes.end())
     119             :             {
     120             :                 // New attribute.  Insert it.
     121           0 :                 prop.attributes.insert(*it);
     122           0 :                 prop.attribute_names.push_back(*it);
     123             :             }
     124             :         }
     125             : 
     126             :         m_attrs.clear();
     127           0 :     }
     128             : 
     129             : public:
     130             :     xml_sax_handler(string_pool& pool) :
     131           0 :         m_pool(pool), mp_root(NULL) {}
     132             : 
     133             :     void declaration()
     134             :     {
     135             :         m_attrs.clear();
     136             :     }
     137             : 
     138           0 :     void start_element(const sax_ns_parser_element& elem)
     139             :     {
     140           0 :         if (!mp_root)
     141             :         {
     142             :             // This is a root element.
     143           0 :             mp_root.reset(new root);
     144           0 :             mp_root->name.ns = elem.ns;
     145           0 :             mp_root->name.name = m_pool.intern(elem.name).first;
     146           0 :             element_ref ref(mp_root->name, &mp_root->prop);
     147           0 :             merge_attributes(mp_root->prop);
     148           0 :             m_stack.push_back(ref);
     149             :             return;
     150             :         }
     151             : 
     152             :         // See if the current element already has a child element of the same name.
     153           0 :         assert(!m_stack.empty());
     154             :         element_ref& current = m_stack.back();
     155           0 :         xml_structure_tree::entity_name key(elem.ns, elem.name);
     156           0 :         element_store_type::const_iterator it = current.prop->child_elements.find(key);
     157           0 :         if (it != current.prop->child_elements.end())
     158             :         {
     159             :             // Recurring element. Set its repeat flag only when it occurs
     160             :             // multiple times in the same scope.
     161           0 :             ++it->second->in_scope_count;
     162           0 :             if (it->second->in_scope_count > 1)
     163           0 :                 it->second->repeat = true;
     164             : 
     165           0 :             element_ref ref(it->first, it->second);
     166           0 :             merge_attributes(*it->second);
     167           0 :             m_stack.push_back(ref);
     168             :             return;
     169             :         }
     170             : 
     171             :         // New element.
     172           0 :         size_t order = current.prop->child_elements.size();
     173           0 :         key.name = m_pool.intern(key.name).first;
     174             :         pair<element_store_type::const_iterator,bool> r =
     175             :             current.prop->child_elements.insert(
     176           0 :                 element_store_type::value_type(key, new elem_prop(order)));
     177             : 
     178           0 :         if (!r.second)
     179           0 :             throw general_error("Insertion failed");
     180             : 
     181           0 :         current.prop->child_element_names.push_back(key);
     182             : 
     183             :         it = r.first;
     184           0 :         element_ref ref(it->first, it->second);
     185           0 :         merge_attributes(*it->second);
     186           0 :         m_stack.push_back(ref);
     187             :     }
     188             : 
     189           0 :     void end_element(const sax_ns_parser_element& elem)
     190             :     {
     191           0 :         if (m_stack.empty())
     192           0 :             throw general_error("Element stack is empty.");
     193             : 
     194             :         const element_ref& current = m_stack.back();
     195             : 
     196             :         // Reset the in-scope count of all child elements to 0 before ending
     197             :         // the current scope.
     198           0 :         element_store_type::iterator it = current.prop->child_elements.begin(), it_end = current.prop->child_elements.end();
     199           0 :         for (; it != it_end; ++it)
     200           0 :             it->second->in_scope_count = 0;
     201             : 
     202             :         m_stack.pop_back();
     203           0 :     }
     204             : 
     205           0 :     void characters(const pstring&) {}
     206             : 
     207           0 :     void attribute(const pstring&, const pstring&)
     208             :     {
     209             :         // Attribute for declaration. We don't handle this.
     210           0 :     }
     211             : 
     212           0 :     void attribute(const sax_ns_parser_attribute& attr)
     213             :     {
     214           0 :         m_attrs.push_back(xml_structure_tree::entity_name(attr.ns, attr.name));
     215           0 :     }
     216             : 
     217             :     root* release_root_element()
     218             :     {
     219             :         return mp_root.release();
     220             :     }
     221             : };
     222             : 
     223             : struct sort_by_appearance : std::binary_function<element_ref, element_ref, bool>
     224             : {
     225           0 :     bool operator() (const element_ref& left, const element_ref& right) const
     226             :     {
     227           0 :         return left.prop->appearance_order < right.prop->appearance_order;
     228             :     }
     229             : };
     230             : 
     231           0 : struct scope : boost::noncopyable
     232             : {
     233             :     xml_structure_tree::entity_name name;
     234             :     elements_type elements;
     235             :     elements_type::const_iterator current_pos;
     236             :     bool repeat:1;
     237             : 
     238           0 :     scope(const xml_structure_tree::entity_name& _name, bool _repeat, const element_ref& _elem) :
     239           0 :         name(_name), repeat(_repeat)
     240             :     {
     241           0 :         elements.push_back(_elem);
     242           0 :         current_pos = elements.begin();
     243           0 :     }
     244             : 
     245             :     scope(const xml_structure_tree::entity_name& _name, bool _repeat) :
     246           0 :         name(_name), repeat(_repeat) {}
     247             : };
     248             : 
     249             : typedef boost::ptr_vector<scope> scopes_type;
     250             : 
     251           0 : void print_scope(ostream& os, const scopes_type& scopes, const xmlns_context& cxt)
     252             : {
     253           0 :     if (scopes.empty())
     254           0 :         throw general_error("scope stack shouldn't be empty while dumping tree.");
     255             : 
     256             :     // Skip the first scope which is root.
     257             :     scopes_type::const_iterator it = scopes.begin(), it_end = scopes.end();
     258           0 :     for (++it; it != it_end; ++it)
     259             :     {
     260           0 :         os << "/";
     261           0 :         size_t num_id = cxt.get_index(it->name.ns);
     262           0 :         if (num_id != xmlns_context::index_not_found)
     263           0 :             os << "ns" << num_id << ":";
     264           0 :         os << it->name.name;
     265           0 :         if (it->repeat)
     266           0 :             os << "[*]";
     267             :     }
     268           0 : }
     269             : 
     270             : }
     271             : 
     272             : struct xml_structure_tree_impl : boost::noncopyable
     273             : {
     274             :     string_pool m_pool;
     275             :     xmlns_context& m_xmlns_cxt;
     276             :     root* mp_root;
     277             : 
     278           0 :     xml_structure_tree_impl(xmlns_context& xmlns_cxt) :
     279           0 :         m_xmlns_cxt(xmlns_cxt), mp_root(NULL) {}
     280             : 
     281           0 :     ~xml_structure_tree_impl()
     282           0 :     {
     283           0 :         delete mp_root;
     284           0 :     }
     285             : };
     286             : 
     287           0 : struct xml_structure_tree::walker_impl : boost::noncopyable
     288             : {
     289             :     const xml_structure_tree_impl& m_parent_impl;
     290             :     root* mp_root; /// Root element of the authoritative tree.
     291             :     element_ref m_cur_elem;
     292             :     std::vector<element_ref> m_scopes;
     293             : 
     294           0 :     walker_impl(const xml_structure_tree_impl& parent_impl) :
     295           0 :         m_parent_impl(parent_impl), mp_root(parent_impl.mp_root) {}
     296             : 
     297           0 :     walker_impl(const walker_impl& r) :
     298           0 :         m_parent_impl(r.m_parent_impl), mp_root(r.mp_root), m_cur_elem(r.m_cur_elem), m_scopes(r.m_scopes) {}
     299             : };
     300             : 
     301           0 : xml_structure_tree::entity_name::entity_name() :
     302           0 :     ns(XMLNS_UNKNOWN_ID) {}
     303             : 
     304           0 : xml_structure_tree::entity_name::entity_name(xmlns_id_t _ns, const pstring& _name) :
     305           0 :     ns(_ns), name(_name) {}
     306             : 
     307           0 : bool xml_structure_tree::entity_name::operator< (const entity_name& r) const
     308             : {
     309           0 :     if (ns != r.ns)
     310           0 :         return ns < r.ns;
     311             : 
     312           0 :     return name < r.name;
     313             : }
     314             : 
     315           0 : bool xml_structure_tree::entity_name::operator== (const entity_name& r) const
     316             : {
     317           0 :     return ns == r.ns && name == r.name;
     318             : }
     319             : 
     320           0 : size_t xml_structure_tree::entity_name::hash::operator() (const entity_name& val) const
     321             : {
     322             :     static pstring::hash hasher;
     323           0 :     size_t n = reinterpret_cast<size_t>(val.ns);
     324           0 :     n += hasher(val.name);
     325           0 :     return n;
     326             : }
     327             : 
     328           0 : xml_structure_tree::element::element() :
     329           0 :     repeat(false) {}
     330             : 
     331           0 : xml_structure_tree::element::element(const entity_name& _name, bool _repeat) :
     332           0 :     name(_name), repeat(_repeat) {}
     333             : 
     334           8 : size_t xml_structure_tree::walker::index_not_found = xmlns_context::index_not_found;
     335             : 
     336           0 : xml_structure_tree::walker::walker(const xml_structure_tree_impl& parent_impl) :
     337           0 :     mp_impl(new walker_impl(parent_impl))
     338             : {
     339           0 : }
     340             : 
     341           0 : xml_structure_tree::walker::walker(const walker& r) :
     342           0 :     mp_impl(new walker_impl(*r.mp_impl))
     343             : {
     344           0 : }
     345             : 
     346           0 : xml_structure_tree::walker::~walker()
     347             : {
     348           0 :     delete mp_impl;
     349           0 : }
     350             : 
     351           0 : xml_structure_tree::walker& xml_structure_tree::walker::operator= (const walker& r)
     352             : {
     353           0 :     mp_impl->mp_root = r.mp_impl->mp_root;
     354           0 :     return *this;
     355             : }
     356             : 
     357           0 : xml_structure_tree::element xml_structure_tree::walker::root()
     358             : {
     359           0 :     if (!mp_impl->mp_root)
     360           0 :         throw general_error("Tree is empty.");
     361             : 
     362             :     mp_impl->m_scopes.clear();
     363             : 
     364             :     // Set the current element to root.
     365           0 :     element_ref ref(mp_impl->mp_root->name, &mp_impl->mp_root->prop);
     366             :     mp_impl->m_cur_elem = ref;
     367           0 :     mp_impl->m_scopes.push_back(ref);
     368           0 :     return xml_structure_tree::element(ref.name, false);
     369             : }
     370             : 
     371           0 : xml_structure_tree::element xml_structure_tree::walker::descend(const entity_name& name)
     372             : {
     373           0 :     if (mp_impl->m_scopes.empty())
     374           0 :         throw general_error("Scope is empty.");
     375             : 
     376           0 :     assert(mp_impl->m_scopes.back().prop);
     377           0 :     const element_store_type& child_elems = mp_impl->m_scopes.back().prop->child_elements;
     378           0 :     element_store_type::const_iterator it = child_elems.find(name);
     379             : 
     380           0 :     if (it == child_elems.end())
     381           0 :         throw general_error("Specified child element does not exist.");
     382             : 
     383             :     // Push this new child element onto the stack.
     384           0 :     element_ref ref(name, it->second);
     385           0 :     mp_impl->m_scopes.push_back(ref);
     386             : 
     387           0 :     return element(name, it->second->repeat);
     388             : }
     389             : 
     390           0 : xml_structure_tree::element xml_structure_tree::walker::ascend()
     391             : {
     392           0 :     if (mp_impl->m_scopes.empty())
     393           0 :         throw general_error("Scope is empty.");
     394             : 
     395           0 :     if (mp_impl->m_scopes.size() == 1)
     396           0 :         throw general_error("You can't ascend from the root element.");
     397             : 
     398             :     mp_impl->m_scopes.pop_back();
     399           0 :     const element_ref& ref = mp_impl->m_scopes.back();
     400           0 :     return element(ref.name, ref.prop->repeat);
     401             : }
     402             : 
     403           0 : void xml_structure_tree::walker::get_children(entity_names_type& names)
     404             : {
     405           0 :     if (mp_impl->m_scopes.empty())
     406           0 :         throw general_error("Scope is empty.");
     407             : 
     408           0 :     assert(mp_impl->m_scopes.back().prop);
     409             :     const elem_prop& prop = *mp_impl->m_scopes.back().prop;
     410             :     names.assign(prop.child_element_names.begin(), prop.child_element_names.end());
     411           0 : }
     412             : 
     413           0 : void xml_structure_tree::walker::get_attributes(entity_names_type& names)
     414             : {
     415           0 :     if (mp_impl->m_scopes.empty())
     416           0 :         throw general_error("Scope is empty.");
     417             : 
     418           0 :     assert(mp_impl->m_scopes.back().prop);
     419             :     const elem_prop& prop = *mp_impl->m_scopes.back().prop;
     420             :     names.assign(prop.attribute_names.begin(), prop.attribute_names.end());
     421           0 : }
     422             : 
     423           0 : size_t xml_structure_tree::walker::get_xmlns_index(xmlns_id_t ns) const
     424             : {
     425           0 :     return mp_impl->m_parent_impl.m_xmlns_cxt.get_index(ns);
     426             : }
     427             : 
     428           0 : string xml_structure_tree::walker::get_xmlns_short_name(xmlns_id_t ns) const
     429             : {
     430           0 :     return mp_impl->m_parent_impl.m_xmlns_cxt.get_short_name(ns);
     431             : }
     432             : 
     433           0 : xml_structure_tree::xml_structure_tree(xmlns_context& xmlns_cxt) :
     434           0 :     mp_impl(new xml_structure_tree_impl(xmlns_cxt)) {}
     435             : 
     436           0 : xml_structure_tree::~xml_structure_tree()
     437             : {
     438           0 :     delete mp_impl;
     439           0 : }
     440             : 
     441           0 : void xml_structure_tree::parse(const char* p, size_t n)
     442             : {
     443           0 :     xml_sax_handler hdl(mp_impl->m_pool);
     444           0 :     sax_ns_parser<xml_sax_handler> parser(p, n, mp_impl->m_xmlns_cxt, hdl);
     445           0 :     parser.parse();
     446           0 :     mp_impl->mp_root = hdl.release_root_element();
     447           0 : }
     448             : 
     449           0 : void xml_structure_tree::dump_compact(ostream& os) const
     450             : {
     451           0 :     if (!mp_impl->mp_root)
     452           0 :         return;
     453             : 
     454             :     scopes_type scopes;
     455           0 :     const xmlns_context& cxt = mp_impl->m_xmlns_cxt;
     456             : 
     457             :     // Dump all namespaces first.
     458           0 :     cxt.dump(os);
     459             : 
     460           0 :     element_ref ref(mp_impl->mp_root->name, &mp_impl->mp_root->prop);
     461           0 :     scopes.push_back(new scope(entity_name(), false, ref));
     462           0 :     while (!scopes.empty())
     463             :     {
     464             :         bool new_scope = false;
     465             : 
     466             :         // Iterate through all elements in the current scope.
     467           0 :         scope& cur_scope = scopes.back();
     468           0 :         for (; cur_scope.current_pos != cur_scope.elements.end(); ++cur_scope.current_pos)
     469             :         {
     470             :             const element_ref& this_elem = *cur_scope.current_pos;
     471           0 :             ostringstream ss;
     472           0 :             print_scope(ss, scopes, cxt);
     473             : 
     474           0 :             ss << "/";
     475           0 :             size_t num_id = cxt.get_index(this_elem.name.ns);
     476           0 :             if (num_id != xmlns_context::index_not_found)
     477           0 :                 ss << "ns" << num_id << ":";
     478           0 :             ss << this_elem.name.name;
     479           0 :             if (this_elem.prop->repeat)
     480           0 :                 ss << "[*]";
     481             : 
     482           0 :             string elem_name = ss.str();
     483           0 :             os << elem_name << endl;
     484             : 
     485             :             // Print all attributes that belong to this element.
     486             :             {
     487           0 :                 const entity_names_type& attrs = this_elem.prop->attribute_names;
     488             :                 entity_names_type::const_iterator it = attrs.begin(), it_end = attrs.end();
     489           0 :                 for (; it != it_end; ++it)
     490           0 :                     os << elem_name << '@' << it->name << endl;
     491             :             }
     492             : 
     493           0 :             const element_store_type& child_elements = this_elem.prop->child_elements;
     494           0 :             if (child_elements.empty())
     495           0 :                 continue;
     496             : 
     497             :             // This element has child elements.  Push a new scope and populate
     498             :             // it with all child elements.
     499             :             element_store_type::const_iterator it = child_elements.begin(), it_end = child_elements.end();
     500             :             elements_type elems;
     501           0 :             for (; it != it_end; ++it)
     502             :             {
     503             :                 ref.name = it->first;
     504           0 :                 ref.prop = it->second;
     505           0 :                 elems.push_back(ref);
     506             :             }
     507             : 
     508             :             // Sort the elements by order of appearance.
     509           0 :             std::sort(elems.begin(), elems.end(), sort_by_appearance());
     510             : 
     511           0 :             assert(!elems.empty());
     512             : 
     513             :             // Push a new scope, and restart the loop with the new scope.
     514             :             ++cur_scope.current_pos;
     515           0 :             scopes.push_back(new scope(this_elem.name, this_elem.prop->repeat));
     516           0 :             scope& child_scope = scopes.back();
     517             :             child_scope.elements.swap(elems);
     518           0 :             child_scope.current_pos = child_scope.elements.begin();
     519             : 
     520             :             new_scope = true;
     521             :             break;
     522           0 :         }
     523             : 
     524           0 :         if (new_scope)
     525           0 :             continue;
     526             : 
     527           0 :         scopes.pop_back();
     528           0 :     }
     529             : }
     530             : 
     531           0 : xml_structure_tree::walker xml_structure_tree::get_walker() const
     532             : {
     533           0 :     return walker(*mp_impl);
     534             : }
     535             : 
     536          24 : }

Generated by: LCOV version 1.10