Line data Source code
1 : /*************************************************************************
2 : *
3 : * Copyright (c) 2012 Kohei Yoshida
4 : *
5 : * Permission is hereby granted, free of charge, to any person
6 : * obtaining a copy of this software and associated documentation
7 : * files (the "Software"), to deal in the Software without
8 : * restriction, including without limitation the rights to use,
9 : * copy, modify, merge, publish, distribute, sublicense, and/or sell
10 : * copies of the Software, and to permit persons to whom the
11 : * Software is furnished to do so, subject to the following
12 : * conditions:
13 : *
14 : * The above copyright notice and this permission notice shall be
15 : * included in all copies or substantial portions of the Software.
16 : *
17 : * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
18 : * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
19 : * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
20 : * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
21 : * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
22 : * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
23 : * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
24 : * OTHER DEALINGS IN THE SOFTWARE.
25 : *
26 : ************************************************************************/
27 :
28 : #ifndef __ORCUS_XML_MAP_TREE_HPP__
29 : #define __ORCUS_XML_MAP_TREE_HPP__
30 :
31 : #include "orcus/pstring.hpp"
32 : #include "orcus/spreadsheet/types.hpp"
33 : #include "orcus/exception.hpp"
34 : #include "orcus/types.hpp"
35 : #include "orcus/xml_namespace.hpp"
36 : #include "string_pool.hpp"
37 :
38 : #include <ostream>
39 : #include <map>
40 :
41 : #include <boost/noncopyable.hpp>
42 : #include <boost/ptr_container/ptr_vector.hpp>
43 :
44 : namespace orcus {
45 :
46 : class xmlns_repository;
47 :
48 : /**
49 : * Tree representing XML-to-sheet mapping for mapped XML import.
50 : */
51 : class xml_map_tree : boost::noncopyable
52 : {
53 : xml_map_tree(); // disabled
54 :
55 : public:
56 : /**
57 : * Error indicating improper xpath syntax.
58 : */
59 0 : class xpath_error : public general_error
60 : {
61 : public:
62 : xpath_error(const std::string& msg);
63 : };
64 :
65 : /**
66 : * A single cell position. Used both for single cell as well as range
67 : * links. For a range link, this represents the upper-left cell of a
68 : * range.
69 : */
70 0 : struct cell_position
71 : {
72 : pstring sheet;
73 : spreadsheet::row_t row;
74 : spreadsheet::col_t col;
75 :
76 : cell_position();
77 : cell_position(const pstring& _sheet, spreadsheet::row_t _row, spreadsheet::col_t _col);
78 : cell_position(const cell_position& r);
79 : };
80 :
81 : /**
82 : * Positions of opening and closing elements in xml stream.
83 : */
84 : struct element_position
85 : {
86 : const char* open_begin;
87 : const char* open_end;
88 : const char* close_begin;
89 : const char* close_end;
90 :
91 : element_position();
92 : };
93 :
94 0 : struct cell_reference : boost::noncopyable
95 : {
96 : cell_position pos;
97 : cell_reference();
98 : };
99 :
100 : struct element;
101 : struct linkable;
102 : typedef boost::ptr_vector<element> element_store_type;
103 : typedef std::vector<element*> element_list_type;
104 : typedef std::vector<const element*> const_element_list_type;
105 : typedef std::vector<const linkable*> const_linkable_list_type;
106 :
107 0 : struct range_reference : boost::noncopyable
108 : {
109 : cell_position pos;
110 :
111 : /**
112 : * List of elements comprising the fields, in order of appearance from
113 : * left to right.
114 : */
115 : const_linkable_list_type field_nodes;
116 :
117 : /**
118 : * Total number of rows comprising data. This does not include the
119 : * label row at the top.
120 : */
121 : spreadsheet::row_t row_size;
122 :
123 : range_reference(const cell_position& _pos);
124 : };
125 :
126 : struct field_in_range
127 : {
128 : range_reference* ref;
129 : spreadsheet::col_t column_pos;
130 : };
131 :
132 : typedef std::map<cell_position, range_reference*> range_ref_map_type;
133 :
134 : enum linkable_node_type { node_unknown, node_element, node_attribute };
135 : enum reference_type { reference_unknown, reference_cell, reference_range_field };
136 : enum element_type { element_unknown, element_linked, element_unlinked };
137 :
138 0 : struct linkable : boost::noncopyable
139 : {
140 : xmlns_id_t ns;
141 : pstring name;
142 : linkable_node_type node_type;
143 :
144 : mutable pstring ns_alias; // namespace alias used in the content stream.
145 :
146 : linkable(xmlns_id_t _ns, const pstring& _name, linkable_node_type _node_type);
147 : };
148 :
149 : struct attribute : public linkable
150 : {
151 : reference_type ref_type;
152 : union {
153 : cell_reference* cell_ref;
154 : field_in_range* field_ref;
155 : };
156 :
157 : attribute(xmlns_id_t _ns, const pstring& _name, reference_type _ref_type);
158 : ~attribute();
159 : };
160 :
161 : typedef boost::ptr_vector<attribute> attribute_store_type;
162 :
163 : struct element : public linkable
164 : {
165 : element_type elem_type;
166 : reference_type ref_type;
167 :
168 : union {
169 : element_store_type* child_elements;
170 : cell_reference* cell_ref;
171 : field_in_range* field_ref;
172 : };
173 :
174 : mutable element_position stream_pos; // position of this element in the content stream
175 :
176 : attribute_store_type attributes;
177 :
178 : /**
179 : * Points to a range reference instance of which this element is a
180 : * parent. NULL if this element is not a parent element of any range
181 : * reference.
182 : */
183 : range_reference* range_parent;
184 :
185 : element(xmlns_id_t _ns, const pstring& _name, element_type _elem_type, reference_type _ref_type);
186 : ~element();
187 :
188 : const element* get_child(xmlns_id_t _ns, const pstring& _name) const;
189 :
190 : /**
191 : * Unlinked attribute anchor is an element that's not linked but has
192 : * one or more attributes that are linked.
193 : *
194 : * @return true if the element is an unlinked attribute anchor, false
195 : * otherwise.
196 : */
197 : bool unlinked_attribute_anchor() const;
198 : };
199 :
200 : public:
201 :
202 : /**
203 : * Wrapper class to allow walking through the element tree.
204 : */
205 0 : class walker
206 : {
207 : typedef std::vector<const element*> ref_element_stack_type;
208 : typedef std::vector<xml_name_t> name_stack_type;
209 : const xml_map_tree& m_parent;
210 : ref_element_stack_type m_stack;
211 : name_stack_type m_unlinked_stack;
212 : public:
213 : walker(const xml_map_tree& parent);
214 : walker(const walker& r);
215 :
216 : void reset();
217 : const element* push_element(xmlns_id_t ns, const pstring& name);
218 : const element* pop_element(xmlns_id_t ns, const pstring& name);
219 : };
220 :
221 : xml_map_tree(xmlns_repository& xmlns_repo);
222 : ~xml_map_tree();
223 :
224 : void set_namespace_alias(const pstring& alias, const pstring& uri);
225 : xmlns_id_t get_namespace(const pstring& alias) const;
226 :
227 : void set_cell_link(const pstring& xpath, const cell_position& ref);
228 :
229 : void start_range();
230 : void append_range_field_link(const pstring& xpath, const cell_position& pos);
231 : void commit_range();
232 :
233 : const linkable* get_link(const pstring& xpath) const;
234 :
235 : walker get_tree_walker() const;
236 :
237 : range_ref_map_type& get_range_references();
238 :
239 : pstring intern_string(const pstring& str) const;
240 :
241 : private:
242 : linkable* get_element_stack(const pstring& xpath, reference_type type, element_list_type& elem_stack);
243 :
244 : private:
245 : xmlns_context m_xmlns_cxt;
246 :
247 : /**
248 : * Element stack of current range parent element. This is used to
249 : * determine a common parent element for all field links of a current
250 : * range reference.
251 : */
252 : element_list_type m_cur_range_parent;
253 :
254 : range_reference* mp_cur_range_ref;
255 :
256 : /**
257 : * All range references present in the tree. This container manages the
258 : * life cycles of stored range references.
259 : */
260 : range_ref_map_type m_field_refs;
261 :
262 : /** pool of element names. */
263 : mutable string_pool m_names;
264 :
265 : element* mp_root;
266 : };
267 :
268 : std::ostream& operator<< (std::ostream& os, const xml_map_tree::cell_position& ref);
269 : std::ostream& operator<< (std::ostream& os, const xml_map_tree::linkable& link);
270 :
271 : bool operator< (const xml_map_tree::cell_position& left, const xml_map_tree::cell_position& right);
272 :
273 : }
274 :
275 : #endif
|