xmlwrapp
Lightweight C++ XML parsing library
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
document.h
Go to the documentation of this file.
1 /*
2  * Copyright (C) 2001-2003 Peter J Jones (pjones@pmade.org)
3  * Copyright (C) 2013 Vaclav Slavik <vslavik@gmail.com>
4  * All Rights Reserved
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions
8  * are met:
9  *
10  * 1. Redistributions of source code must retain the above copyright
11  * notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  * notice, this list of conditions and the following disclaimer in
14  * the documentation and/or other materials provided with the
15  * distribution.
16  * 3. Neither the name of the Author nor the names of its contributors
17  * may be used to endorse or promote products derived from this software
18  * without specific prior written permission.
19  *
20  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS''
21  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
22  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
23  * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR
24  * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
25  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
26  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
27  * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
28  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
29  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
30  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31  * SUCH DAMAGE.
32  */
33 
34 /**
35  @file
36 
37  This file contains the definition of the xml::document class.
38  */
39 
40 #ifndef _xmlwrapp_document_h_
41 #define _xmlwrapp_document_h_
42 
43 // xmlwrapp includes
44 #include "xmlwrapp/init.h"
45 #include "xmlwrapp/node.h"
46 #include "xmlwrapp/export.h"
47 #include "xmlwrapp/errors.h"
48 
49 // standard includes
50 #include <iosfwd>
51 #include <string>
52 #include <cstddef>
53 
54 // forward declaration
55 namespace xslt
56 {
57 
58 class stylesheet;
59 namespace impl
60 {
61 class result;
62 }
63 
64 } // end xslt namespace
65 
66 namespace xml
67 {
68 
69 // forward declarations
70 class schema;
71 class tree_parser;
72 
73 namespace impl
74 {
75 struct doc_impl;
76 struct xpath_context_impl;
77 }
78 
79 /**
80  The xml::document class is used to hold the XML tree and various bits of
81  information about it.
82  */
83 class XMLWRAPP_API document
84 {
85 public:
86  /// size type
87  typedef std::size_t size_type;
88 
89  /**
90  Create a new XML document with the default settings. The new document
91  will contain a root node with a name of "blank".
92  */
93  document();
94 
95  /**
96  Create a new XML document and set the name of the root element to the
97  given text.
98 
99  @param root_name What to set the name of the root element to.
100 
101  @deprecated Use `xml::document(xml::node(root_name))` constructor instead.
102  */
103  XMLWRAPP_DEPRECATED("use xml::document(xml::node(root_name)) instead")
104  explicit document(const char *root_name);
105 
106  /**
107  Create a new XML document and set the root node.
108 
109  @param n The node to use as the root node. n will be copied.
110  */
111  explicit document(const node& n);
112 
113  /**
114  Load XML document from given file.
115 
116  Errors are handled by @a on_error handler; if you pass
117  xml::throw_on_error, xml::exception is thrown on errors. If there's a
118  fatal error that prevents the document from being loaded and the error
119  handler doesn't throw an exception, the constructor will throw
120  xml::exception anyway.
121 
122  @param filename The name of the file to parse.
123  @param on_error Handler called to process errors and warnings.
124 
125  @since 0.7.0
126  */
127  explicit document(const char *filename, error_handler& on_error);
128 
129  /**
130  Load XML document from given data.
131 
132  Errors are handled by @a on_error handler; by default, xml::exception
133  is thrown on errors. If there's a fatal error that prevents the document
134  from being loaded and the error handler doesn't throw an exception, the
135  constructor will throw xml::exception anyway.
136 
137  @param data The XML data to parse.
138  @param size The size of the XML data to parse.
139  @param on_error Handler called to process errors and warnings.
140 
141  @since 0.7.0
142  */
143  explicit document(const char *data, size_type size, error_handler& on_error = throw_on_error);
144 
145  /**
146  Copy construct a new XML document. The new document will be an exact
147  copy of the original.
148 
149  @param other The other document object to copy from.
150  */
151  document(const document& other);
152 
153  /**
154  Copy another document object into this one using the assignment
155  operator. This document object will be an exact copy of the other
156  document after the assignment.
157 
158  @param other The document to copy from.
159  @return *this.
160  */
161  document& operator=(const document& other);
162 
163  /**
164  Swap one xml::document object for another.
165 
166  @param other The other document to swap
167  */
168  void swap(document& other);
169 
170  /**
171  Clean up after an XML document object.
172  */
173  ~document();
174 
175  /**
176  Get a reference to the root node of this document. If no root node
177  has been set, the returned node will be a blank node. You should take
178  caution to use a reference so that you don't copy the whole node
179  tree!
180 
181  @return A const reference to the root node.
182  */
183  const node& get_root_node() const;
184 
185  /**
186  Get a reference to the root node of this document. If no root node
187  has been set, the returned node will be a blank node. You should take
188  caution to use a reference so that you don't copy the whole node
189  tree!
190 
191  @return A reference to the root node.
192  */
193  node& get_root_node();
194 
195  /**
196  Set the root node to the given node. A full copy is made and stored
197  in the document object.
198 
199  @param n The new root node to use.
200  */
201  void set_root_node(const node& n);
202 
203  /**
204  Get the XML version for this document. For generated documents, the
205  version will be the default. For parsed documents, this will be the
206  version from the XML processing instruction.
207 
208  @return The XML version string for this document.
209  */
210  const std::string& get_version() const;
211 
212  /**
213  Set the XML version number for this document. This version string
214  will be used when generating the XML output.
215 
216  @param version The version string to use, like "1.0".
217  */
218  void set_version(const char *version);
219 
220  /**
221  Get the XML encoding for this document. The default encoding is
222  ISO-8859-1.
223 
224  @return The encoding string.
225  */
226  const std::string& get_encoding() const;
227 
228  /**
229  Set the XML encoding string. If you don't set this, it will default
230  to ISO-8859-1.
231 
232  @param encoding The XML encoding to use.
233  */
234  void set_encoding(const char *encoding);
235 
236  /**
237  Find out if the current document is a standalone document. For
238  generated documents, this will be the default. For parsed documents
239  this will be set based on the XML processing instruction.
240 
241  @return True if this document is standalone.
242  @return False if this document is not standalone.
243  */
244  bool get_is_standalone() const;
245 
246  /**
247  Set the standalone flag. This will show up in the XML output in the
248  correct processing instruction.
249 
250  @param sa What to set the standalone flag to.
251  */
252  void set_is_standalone(bool sa);
253 
254  /**
255  Walk through the document and expand <xi:include> elements. For more
256  information, please see the w3c recommendation for XInclude.
257  http://www.w3.org/2001/XInclude.
258 
259  The return value of this function may change to int after a bug has
260  been fixed in libxml2 (xmlXIncludeDoProcess).
261 
262  @return False if there was an error with substitutions.
263  @return True if there were no errors (with or without substitutions).
264  */
265  bool process_xinclude();
266 
267  /**
268  Test to see if this document has an internal subset. That is, DTD
269  data that is declared within the XML document itself.
270 
271  @return True if this document has an internal subset.
272  @return False otherwise.
273  */
274  bool has_internal_subset() const;
275 
276  /**
277  Test to see if this document has an external subset. That is, it
278  references a DTD from an external source, such as a file or URL.
279 
280  @return True if this document has an external subset.
281  @return False otherwise.
282  */
283  bool has_external_subset() const;
284 
285  /**
286  Validate this document against the DTD that has been attached to it.
287  This would happen at parse time if there was a !DOCTYPE definition.
288  If the DTD is valid, and the document is valid, this member function
289  will return true.
290 
291  If it returns false, you may want to send the document through
292  xmllint to get the actual error messages.
293 
294  @return True if the document is valid.
295  @return False if there was a problem with the DTD or XML doc.
296  */
297  bool validate();
298 
299  /**
300  Parse the given DTD and try to validate this document against it. If
301  the DTD is valid, and the document is valid, this member function
302  will return true.
303 
304  If it returns false, you may want to send the document through
305  xmllint to get the actual error messages.
306 
307  This member function will add the parsed DTD to this document as the
308  external subset after the validation. If there is already an external
309  DTD attached to this document it will be removed and deleted.
310 
311  @param dtdname A filename or URL for the DTD to use.
312  @return True if the document is valid.
313  @return False if there was a problem with the DTD or XML doc.
314  */
315  bool validate(const char *dtdname);
316 
317  /**
318  Returns the number of child nodes of this document. This will always
319  be at least one, since all xmlwrapp documents must have a root node.
320  This member function is useful to find out how many document children
321  there are, including processing instructions, comments, etc.
322 
323  @return The number of children nodes that this document has.
324  */
325  size_type size() const;
326 
327  /**
328  Get an iterator to the first child node of this document. If what you
329  really wanted was the root node (the first element) you should use
330  the get_root_node() member function instead.
331 
332  @return A xml::node::iterator that points to the first child node.
333  @return An end iterator if there are no children in this document
334  */
335  node::iterator begin();
336 
337  /**
338  Get a const_iterator to the first child node of this document. If
339  what you really wanted was the root node (the first element) you
340  should use the get_root_node() member function instead.
341 
342  @return A xml::node::const_iterator that points to the first child node.
343  @return An end const_iterator if there are no children in this document.
344  */
345  node::const_iterator begin() const;
346 
347  /**
348  Get an iterator that points one past the last child node for this
349  document.
350 
351  @return An end xml::node::iterator.
352  */
353  node::iterator end();
354 
355  /**
356  Get a const_iterator that points one past the last child node for
357  this document.
358 
359  @return An end xml::node::const_iterator.
360  */
361  node::const_iterator end() const;
362 
363  /**
364  Add a child xml::node to this document. You should not add a element
365  type node, since there can only be one root node. This member
366  function is only useful for adding processing instructions, comments,
367  etc.. If you do try to add a node of type element, an exception will
368  be thrown.
369 
370  @param child The child xml::node to add.
371  */
372  void push_back (const node &child);
373 
374  /**
375  Insert a new child node. The new node will be inserted at the end of
376  the child list. This is similar to the xml::node::push_back member
377  function except that an iterator to the inserted node is returned.
378 
379  The rules from the push_back member function apply here. Don't add a
380  node of type element.
381 
382  @param n The node to insert as a child of this document.
383  @return An iterator that points to the newly inserted node.
384  @see xml::document::push_back
385  */
386  node::iterator insert (const node &n);
387 
388  /**
389  Insert a new child node. The new node will be inserted before the
390  node pointed to by the given iterator.
391 
392  The rules from the push_back member function apply here. Don't add a
393  node of type element.
394 
395  @param position An iterator that points to the location where the new node should be inserted (before it).
396  @param n The node to insert as a child of this document.
397  @return An iterator that points to the newly inserted node.
398  @see xml::document::push_back
399  */
400  node::iterator insert(node::iterator position, const node &n);
401 
402  /**
403  Replace the node pointed to by the given iterator with another node.
404  The old node will be removed, including all its children, and
405  replaced with the new node. This will invalidate any iterators that
406  point to the node to be replaced, or any pointers or references to
407  that node.
408 
409  Do not replace this root node with this member function. The same
410  rules that apply to push_back apply here. If you try to replace a
411  node of type element, an exception will be thrown.
412 
413  @param old_node An iterator that points to the node that should be removed.
414  @param new_node The node to put in old_node's place.
415  @return An iterator that points to the new node.
416  @see xml::document::push_back
417  */
418  node::iterator replace(node::iterator old_node, const node& new_node);
419 
420  /**
421  Erase the node that is pointed to by the given iterator. The node
422  and all its children will be removed from this node. This will
423  invalidate any iterators that point to the node to be erased, or any
424  pointers or references to that node.
425 
426  Do not remove the root node using this member function. The same
427  rules that apply to push_back apply here. If you try to erase the
428  root node, an exception will be thrown.
429 
430  @param to_erase An iterator that points to the node to be erased.
431  @return An iterator that points to the node after the one being erased.
432  @see xml::document::push_back
433  */
434  node::iterator erase(node::iterator to_erase);
435 
436  /**
437  Erase all nodes in the given range, from first to last. This will
438  invalidate any iterators that point to the nodes to be erased, or any
439  pointers or references to those nodes.
440 
441  Do not remove the root node using this member function. The same
442  rules that apply to push_back apply here. If you try to erase the
443  root node, an exception will be thrown.
444 
445  @param first The first node in the range to be removed.
446  @param last An iterator that points one past the last node to erase. Think xml::node::end().
447  @return An iterator that points to the node after the last one being erased.
448  @see xml::document::push_back
449  */
450  node::iterator erase(node::iterator first, node::iterator last);
451 
452  /**
453  Convert the XML document tree into XML text data and place it into
454  the given string.
455 
456  Any errors occurring while converting the document to string are passed
457  to @a on_error handler. By default, an exception will be thrown if
458  anything goes wrong.
459 
460  @param s The string to place the XML text data.
461  @param on_error Handler called to process errors and warnings (new
462  since 0.8.0).
463  */
464  void save_to_string(std::string& s, error_handler& on_error = throw_on_error) const;
465 
466  /**
467  Convert the XML document tree into XML text data and place it into
468  the given filename.
469 
470  This function throws an exception if saving fails for any reason by
471  default and allows to customize this behaviour by passing a non-default
472  @a on_error handler.
473 
474  @param filename The name of the file to place the XML text data into.
475  @param on_error Handler called to process errors and warnings (new
476  since 0.8.0).
477  @param compression_level 0 is no compression, 1-9 allowed, where 1 is
478  for better speed, and 9 is for smaller size
479  @return True if the data was saved successfully.
480  @return False otherwise (notice that this is only possible if a custom
481  error handler not throwing on error is specified).
482  */
483  bool save_to_file(const char *filename,
484  int compression_level = 0,
485  error_handler& on_error = throw_on_error) const;
486 
487  /**
488  Convert the XML document tree into XML text data and then insert it
489  into the given stream.
490 
491  @param stream The stream to insert the XML into.
492  @param doc The document to insert.
493  @return The stream from the first parameter.
494  */
495  friend XMLWRAPP_API std::ostream& operator<< (std::ostream &stream, const document &doc);
496 
497 private:
498  impl::doc_impl *pimpl_;
499 
500  void set_doc_data (void *data);
501  void set_doc_data_from_xslt (void *data, xslt::impl::result *xr);
502  void* get_doc_data();
503  void* get_doc_data_read_only() const;
504  void* release_doc_data();
505 
506  friend class tree_parser;
507  friend class schema;
508  friend class xslt::stylesheet;
509  friend struct impl::xpath_context_impl;
510 };
511 
512 } // namespace xml
513 
514 #endif // _xmlwrapp_document_h_
The xml::tree_parser class is used to parse an XML document and generate a tree like structure of xml...
Definition: tree_parser.h:73
This file contains errors-handling classes: xml::exception and xml::error_handler and derived classes...
The xml::error_handler class is used to handle libxml2 errors and warnings emitted during parsing...
Definition: errors.h:84
This file contains the definition of the xml::init class.
This file contains the definition of the xml::node class.
XSLT library namespace.
Definition: document.h:55
error_handler_throw_on_error throw_on_error
Error handler object that throws on any error.
STL namespace.
The xslt::stylesheet class is used to hold information about an XSLT stylesheet.
Definition: stylesheet.h:61
The xml::document class is used to hold the XML tree and various bits of information about it...
Definition: document.h:83
XML library namespace.
Definition: attributes.h:51
std::size_t size_type
size type
Definition: document.h:87
The xml::node class is used to hold information about one XML node.
Definition: node.h:88
XML Schema.
Definition: schema.h:66