2 * pugixml parser - version 1.4
\r
3 * --------------------------------------------------------
\r
5 * Report bugs and download new versions at http://pugixml.org/
\r
7 * This library is distributed under the MIT License. See notice at the end
\r
10 * This work is based on the pugxml parser, which is:
\r
14 #ifndef SOURCE_PUGIXML_CPP
\r
15 #define SOURCE_PUGIXML_CPP
\r
17 #include "pugixml.hpp"
\r
24 #ifdef PUGIXML_WCHAR_MODE
\r
28 #ifndef PUGIXML_NO_XPATH
\r
31 # ifdef PUGIXML_NO_EXCEPTIONS
\r
32 # include <setjmp.h>
\r
36 #ifndef PUGIXML_NO_STL
\r
42 // For placement new
\r
46 # pragma warning(push)
\r
47 # pragma warning(disable: 4127) // conditional expression is constant
\r
48 # pragma warning(disable: 4324) // structure was padded due to __declspec(align())
\r
49 # pragma warning(disable: 4611) // interaction between '_setjmp' and C++ object destruction is non-portable
\r
50 # pragma warning(disable: 4702) // unreachable code
\r
51 # pragma warning(disable: 4996) // this function or variable may be unsafe
\r
52 # pragma warning(disable: 4793) // function compiled as native: presence of '_setjmp' makes a function unmanaged
\r
55 #ifdef __INTEL_COMPILER
\r
56 # pragma warning(disable: 177) // function was declared but never referenced
\r
57 # pragma warning(disable: 279) // controlling expression is constant
\r
58 # pragma warning(disable: 1478 1786) // function was declared "deprecated"
\r
59 # pragma warning(disable: 1684) // conversion from pointer to same-sized integral type
\r
62 #if defined(__BORLANDC__) && defined(PUGIXML_HEADER_ONLY)
\r
63 # pragma warn -8080 // symbol is declared but never used; disabling this inside push/pop bracket does not make the warning go away
\r
67 # pragma option push
\r
68 # pragma warn -8008 // condition is always false
\r
69 # pragma warn -8066 // unreachable code
\r
73 // Using diag_push/diag_pop does not disable the warnings inside templates due to a compiler bug
\r
74 # pragma diag_suppress=178 // function was declared but never referenced
\r
75 # pragma diag_suppress=237 // controlling expression is constant
\r
78 // Inlining controls
\r
79 #if defined(_MSC_VER) && _MSC_VER >= 1300
\r
80 # define PUGI__NO_INLINE __declspec(noinline)
\r
81 #elif defined(__GNUC__)
\r
82 # define PUGI__NO_INLINE __attribute__((noinline))
\r
84 # define PUGI__NO_INLINE
\r
87 // Simple static assertion
\r
88 #define PUGI__STATIC_ASSERT(cond) { static const char condition_failed[(cond) ? 1 : -1] = {0}; (void)condition_failed[0]; }
\r
90 // Digital Mars C++ bug workaround for passing char loaded from memory via stack
\r
92 # define PUGI__DMC_VOLATILE volatile
\r
94 # define PUGI__DMC_VOLATILE
\r
97 // Borland C++ bug workaround for not defining ::memcpy depending on header include order (can't always use std::memcpy because some compilers don't have it at all)
\r
98 #if defined(__BORLANDC__) && !defined(__MEM_H_USING_LIST)
\r
100 using std::memmove;
\r
103 // In some environments MSVC is a compiler but the CRT lacks certain MSVC-specific features
\r
104 #if defined(_MSC_VER) && !defined(__S3E__)
\r
105 # define PUGI__MSVC_CRT_VERSION _MSC_VER
\r
108 #ifdef PUGIXML_HEADER_ONLY
\r
109 # define PUGI__NS_BEGIN namespace pugi { namespace impl {
\r
110 # define PUGI__NS_END } }
\r
111 # define PUGI__FN inline
\r
112 # define PUGI__FN_NO_INLINE inline
\r
114 # if defined(_MSC_VER) && _MSC_VER < 1300 // MSVC6 seems to have an amusing bug with anonymous namespaces inside namespaces
\r
115 # define PUGI__NS_BEGIN namespace pugi { namespace impl {
\r
116 # define PUGI__NS_END } }
\r
118 # define PUGI__NS_BEGIN namespace pugi { namespace impl { namespace {
\r
119 # define PUGI__NS_END } } }
\r
122 # define PUGI__FN_NO_INLINE PUGI__NO_INLINE
\r
126 #if !defined(_MSC_VER) || _MSC_VER >= 1600
\r
127 # include <stdint.h>
\r
129 # ifndef _UINTPTR_T_DEFINED
\r
130 // No native uintptr_t in MSVC6 and in some WinCE versions
\r
131 typedef size_t uintptr_t;
\r
132 #define _UINTPTR_T_DEFINED
\r
135 typedef unsigned __int8 uint8_t;
\r
136 typedef unsigned __int16 uint16_t;
\r
137 typedef unsigned __int32 uint32_t;
\r
141 // Memory allocation
\r
143 PUGI__FN void* default_allocate(size_t size)
\r
145 return malloc(size);
\r
148 PUGI__FN void default_deallocate(void* ptr)
\r
153 template <typename T>
\r
154 struct xml_memory_management_function_storage
\r
156 static allocation_function allocate;
\r
157 static deallocation_function deallocate;
\r
160 template <typename T> allocation_function xml_memory_management_function_storage<T>::allocate = default_allocate;
\r
161 template <typename T> deallocation_function xml_memory_management_function_storage<T>::deallocate = default_deallocate;
\r
163 typedef xml_memory_management_function_storage<int> xml_memory;
\r
166 // String utilities
\r
168 // Get string length
\r
169 PUGI__FN size_t strlength(const char_t* s)
\r
173 #ifdef PUGIXML_WCHAR_MODE
\r
180 // Compare two strings
\r
181 PUGI__FN bool strequal(const char_t* src, const char_t* dst)
\r
183 assert(src && dst);
\r
185 #ifdef PUGIXML_WCHAR_MODE
\r
186 return wcscmp(src, dst) == 0;
\r
188 return strcmp(src, dst) == 0;
\r
192 // Compare lhs with [rhs_begin, rhs_end)
\r
193 PUGI__FN bool strequalrange(const char_t* lhs, const char_t* rhs, size_t count)
\r
195 for (size_t i = 0; i < count; ++i)
\r
196 if (lhs[i] != rhs[i])
\r
199 return lhs[count] == 0;
\r
202 // Get length of wide string, even if CRT lacks wide character support
\r
203 PUGI__FN size_t strlength_wide(const wchar_t* s)
\r
207 #ifdef PUGIXML_WCHAR_MODE
\r
210 const wchar_t* end = s;
\r
211 while (*end) end++;
\r
212 return static_cast<size_t>(end - s);
\r
216 #ifdef PUGIXML_WCHAR_MODE
\r
217 // Convert string to wide string, assuming all symbols are ASCII
\r
218 PUGI__FN void widen_ascii(wchar_t* dest, const char* source)
\r
220 for (const char* i = source; *i; ++i) *dest++ = *i;
\r
226 #if !defined(PUGIXML_NO_STL) || !defined(PUGIXML_NO_XPATH)
\r
227 // auto_ptr-like buffer holder for exception recovery
\r
229 struct buffer_holder
\r
232 void (*deleter)(void*);
\r
234 buffer_holder(void* data_, void (*deleter_)(void*)): data(data_), deleter(deleter_)
\r
240 if (data) deleter(data);
\r
245 void* result = data;
\r
254 static const size_t xml_memory_page_size =
\r
255 #ifdef PUGIXML_MEMORY_PAGE_SIZE
\r
256 PUGIXML_MEMORY_PAGE_SIZE
\r
262 static const uintptr_t xml_memory_page_alignment = 32;
\r
263 static const uintptr_t xml_memory_page_pointer_mask = ~(xml_memory_page_alignment - 1);
\r
264 static const uintptr_t xml_memory_page_name_allocated_mask = 16;
\r
265 static const uintptr_t xml_memory_page_value_allocated_mask = 8;
\r
266 static const uintptr_t xml_memory_page_type_mask = 7;
\r
268 struct xml_allocator;
\r
270 struct xml_memory_page
\r
272 static xml_memory_page* construct(void* memory)
\r
274 if (!memory) return 0; //$ redundant, left for performance
\r
276 xml_memory_page* result = static_cast<xml_memory_page*>(memory);
\r
278 result->allocator = 0;
\r
279 result->memory = 0;
\r
282 result->busy_size = 0;
\r
283 result->freed_size = 0;
\r
288 xml_allocator* allocator;
\r
292 xml_memory_page* prev;
\r
293 xml_memory_page* next;
\r
301 struct xml_memory_string_header
\r
303 uint16_t page_offset; // offset from page->data
\r
304 uint16_t full_size; // 0 if string occupies whole page
\r
307 struct xml_allocator
\r
309 xml_allocator(xml_memory_page* root): _root(root), _busy_size(root->busy_size)
\r
313 xml_memory_page* allocate_page(size_t data_size)
\r
315 size_t size = offsetof(xml_memory_page, data) + data_size;
\r
317 // allocate block with some alignment, leaving memory for worst-case padding
\r
318 void* memory = xml_memory::allocate(size + xml_memory_page_alignment);
\r
319 if (!memory) return 0;
\r
321 // align upwards to page boundary
\r
322 void* page_memory = reinterpret_cast<void*>((reinterpret_cast<uintptr_t>(memory) + (xml_memory_page_alignment - 1)) & ~(xml_memory_page_alignment - 1));
\r
324 // prepare page structure
\r
325 xml_memory_page* page = xml_memory_page::construct(page_memory);
\r
328 page->memory = memory;
\r
329 page->allocator = _root->allocator;
\r
334 static void deallocate_page(xml_memory_page* page)
\r
336 xml_memory::deallocate(page->memory);
\r
339 void* allocate_memory_oob(size_t size, xml_memory_page*& out_page);
\r
341 void* allocate_memory(size_t size, xml_memory_page*& out_page)
\r
343 if (_busy_size + size > xml_memory_page_size) return allocate_memory_oob(size, out_page);
\r
345 void* buf = _root->data + _busy_size;
\r
347 _busy_size += size;
\r
354 void deallocate_memory(void* ptr, size_t size, xml_memory_page* page)
\r
356 if (page == _root) page->busy_size = _busy_size;
\r
358 assert(ptr >= page->data && ptr < page->data + page->busy_size);
\r
361 page->freed_size += size;
\r
362 assert(page->freed_size <= page->busy_size);
\r
364 if (page->freed_size == page->busy_size)
\r
366 if (page->next == 0)
\r
368 assert(_root == page);
\r
370 // top page freed, just reset sizes
\r
371 page->busy_size = page->freed_size = 0;
\r
376 assert(_root != page);
\r
377 assert(page->prev);
\r
379 // remove from the list
\r
380 page->prev->next = page->next;
\r
381 page->next->prev = page->prev;
\r
384 deallocate_page(page);
\r
389 char_t* allocate_string(size_t length)
\r
391 // allocate memory for string and header block
\r
392 size_t size = sizeof(xml_memory_string_header) + length * sizeof(char_t);
\r
394 // round size up to pointer alignment boundary
\r
395 size_t full_size = (size + (sizeof(void*) - 1)) & ~(sizeof(void*) - 1);
\r
397 xml_memory_page* page;
\r
398 xml_memory_string_header* header = static_cast<xml_memory_string_header*>(allocate_memory(full_size, page));
\r
400 if (!header) return 0;
\r
403 ptrdiff_t page_offset = reinterpret_cast<char*>(header) - page->data;
\r
405 assert(page_offset >= 0 && page_offset < (1 << 16));
\r
406 header->page_offset = static_cast<uint16_t>(page_offset);
\r
408 // full_size == 0 for large strings that occupy the whole page
\r
409 assert(full_size < (1 << 16) || (page->busy_size == full_size && page_offset == 0));
\r
410 header->full_size = static_cast<uint16_t>(full_size < (1 << 16) ? full_size : 0);
\r
412 // round-trip through void* to avoid 'cast increases required alignment of target type' warning
\r
413 // header is guaranteed a pointer-sized alignment, which should be enough for char_t
\r
414 return static_cast<char_t*>(static_cast<void*>(header + 1));
\r
417 void deallocate_string(char_t* string)
\r
419 // this function casts pointers through void* to avoid 'cast increases required alignment of target type' warnings
\r
420 // we're guaranteed the proper (pointer-sized) alignment on the input string if it was allocated via allocate_string
\r
423 xml_memory_string_header* header = static_cast<xml_memory_string_header*>(static_cast<void*>(string)) - 1;
\r
426 size_t page_offset = offsetof(xml_memory_page, data) + header->page_offset;
\r
427 xml_memory_page* page = reinterpret_cast<xml_memory_page*>(static_cast<void*>(reinterpret_cast<char*>(header) - page_offset));
\r
429 // if full_size == 0 then this string occupies the whole page
\r
430 size_t full_size = header->full_size == 0 ? page->busy_size : header->full_size;
\r
432 deallocate_memory(header, full_size, page);
\r
435 xml_memory_page* _root;
\r
439 PUGI__FN_NO_INLINE void* xml_allocator::allocate_memory_oob(size_t size, xml_memory_page*& out_page)
\r
441 const size_t large_allocation_threshold = xml_memory_page_size / 4;
\r
443 xml_memory_page* page = allocate_page(size <= large_allocation_threshold ? xml_memory_page_size : size);
\r
446 if (!page) return 0;
\r
448 if (size <= large_allocation_threshold)
\r
450 _root->busy_size = _busy_size;
\r
452 // insert page at the end of linked list
\r
453 page->prev = _root;
\r
454 _root->next = page;
\r
461 // insert page before the end of linked list, so that it is deleted as soon as possible
\r
462 // the last page is not deleted even if it's empty (see deallocate_memory)
\r
463 assert(_root->prev);
\r
465 page->prev = _root->prev;
\r
466 page->next = _root;
\r
468 _root->prev->next = page;
\r
469 _root->prev = page;
\r
472 // allocate inside page
\r
473 page->busy_size = size;
\r
481 /// A 'name=value' XML attribute structure.
\r
482 struct xml_attribute_struct
\r
485 xml_attribute_struct(impl::xml_memory_page* page): header(reinterpret_cast<uintptr_t>(page)), name(0), value(0), prev_attribute_c(0), next_attribute(0)
\r
491 char_t* name; ///< Pointer to attribute name.
\r
492 char_t* value; ///< Pointer to attribute value.
\r
494 xml_attribute_struct* prev_attribute_c; ///< Previous attribute (cyclic list)
\r
495 xml_attribute_struct* next_attribute; ///< Next attribute
\r
498 /// An XML document tree node.
\r
499 struct xml_node_struct
\r
502 /// \param type - node type
\r
503 xml_node_struct(impl::xml_memory_page* page, xml_node_type type): header(reinterpret_cast<uintptr_t>(page) | (type - 1)), parent(0), name(0), value(0), first_child(0), prev_sibling_c(0), next_sibling(0), first_attribute(0)
\r
509 xml_node_struct* parent; ///< Pointer to parent
\r
511 char_t* name; ///< Pointer to element name.
\r
512 char_t* value; ///< Pointer to any associated string data.
\r
514 xml_node_struct* first_child; ///< First child
\r
516 xml_node_struct* prev_sibling_c; ///< Left brother (cyclic list)
\r
517 xml_node_struct* next_sibling; ///< Right brother
\r
519 xml_attribute_struct* first_attribute; ///< First attribute
\r
524 struct xml_extra_buffer
\r
527 xml_extra_buffer* next;
\r
530 struct xml_document_struct: public xml_node_struct, public xml_allocator
\r
532 xml_document_struct(xml_memory_page* page): xml_node_struct(page, node_document), xml_allocator(page), buffer(0), extra_buffers(0)
\r
536 const char_t* buffer;
\r
538 xml_extra_buffer* extra_buffers;
\r
541 inline xml_allocator& get_allocator(const xml_node_struct* node)
\r
545 return *reinterpret_cast<xml_memory_page*>(node->header & xml_memory_page_pointer_mask)->allocator;
\r
549 // Low-level DOM operations
\r
551 inline xml_attribute_struct* allocate_attribute(xml_allocator& alloc)
\r
553 xml_memory_page* page;
\r
554 void* memory = alloc.allocate_memory(sizeof(xml_attribute_struct), page);
\r
556 return new (memory) xml_attribute_struct(page);
\r
559 inline xml_node_struct* allocate_node(xml_allocator& alloc, xml_node_type type)
\r
561 xml_memory_page* page;
\r
562 void* memory = alloc.allocate_memory(sizeof(xml_node_struct), page);
\r
564 return new (memory) xml_node_struct(page, type);
\r
567 inline void destroy_attribute(xml_attribute_struct* a, xml_allocator& alloc)
\r
569 uintptr_t header = a->header;
\r
571 if (header & impl::xml_memory_page_name_allocated_mask) alloc.deallocate_string(a->name);
\r
572 if (header & impl::xml_memory_page_value_allocated_mask) alloc.deallocate_string(a->value);
\r
574 alloc.deallocate_memory(a, sizeof(xml_attribute_struct), reinterpret_cast<xml_memory_page*>(header & xml_memory_page_pointer_mask));
\r
577 inline void destroy_node(xml_node_struct* n, xml_allocator& alloc)
\r
579 uintptr_t header = n->header;
\r
581 if (header & impl::xml_memory_page_name_allocated_mask) alloc.deallocate_string(n->name);
\r
582 if (header & impl::xml_memory_page_value_allocated_mask) alloc.deallocate_string(n->value);
\r
584 for (xml_attribute_struct* attr = n->first_attribute; attr; )
\r
586 xml_attribute_struct* next = attr->next_attribute;
\r
588 destroy_attribute(attr, alloc);
\r
593 for (xml_node_struct* child = n->first_child; child; )
\r
595 xml_node_struct* next = child->next_sibling;
\r
597 destroy_node(child, alloc);
\r
602 alloc.deallocate_memory(n, sizeof(xml_node_struct), reinterpret_cast<xml_memory_page*>(header & xml_memory_page_pointer_mask));
\r
605 PUGI__FN_NO_INLINE xml_node_struct* append_node(xml_node_struct* node, xml_allocator& alloc, xml_node_type type = node_element)
\r
607 xml_node_struct* child = allocate_node(alloc, type);
\r
608 if (!child) return 0;
\r
610 child->parent = node;
\r
612 xml_node_struct* first_child = node->first_child;
\r
616 xml_node_struct* last_child = first_child->prev_sibling_c;
\r
618 last_child->next_sibling = child;
\r
619 child->prev_sibling_c = last_child;
\r
620 first_child->prev_sibling_c = child;
\r
624 node->first_child = child;
\r
625 child->prev_sibling_c = child;
\r
631 PUGI__FN_NO_INLINE xml_attribute_struct* append_attribute_ll(xml_node_struct* node, xml_allocator& alloc)
\r
633 xml_attribute_struct* a = allocate_attribute(alloc);
\r
636 xml_attribute_struct* first_attribute = node->first_attribute;
\r
638 if (first_attribute)
\r
640 xml_attribute_struct* last_attribute = first_attribute->prev_attribute_c;
\r
642 last_attribute->next_attribute = a;
\r
643 a->prev_attribute_c = last_attribute;
\r
644 first_attribute->prev_attribute_c = a;
\r
648 node->first_attribute = a;
\r
649 a->prev_attribute_c = a;
\r
656 // Helper classes for code generation
\r
660 enum { value = 0 };
\r
665 enum { value = 1 };
\r
669 // Unicode utilities
\r
671 inline uint16_t endian_swap(uint16_t value)
\r
673 return static_cast<uint16_t>(((value & 0xff) << 8) | (value >> 8));
\r
676 inline uint32_t endian_swap(uint32_t value)
\r
678 return ((value & 0xff) << 24) | ((value & 0xff00) << 8) | ((value & 0xff0000) >> 8) | (value >> 24);
\r
681 struct utf8_counter
\r
683 typedef size_t value_type;
\r
685 static value_type low(value_type result, uint32_t ch)
\r
688 if (ch < 0x80) return result + 1;
\r
690 else if (ch < 0x800) return result + 2;
\r
692 else return result + 3;
\r
695 static value_type high(value_type result, uint32_t)
\r
697 // U+10000..U+10FFFF
\r
704 typedef uint8_t* value_type;
\r
706 static value_type low(value_type result, uint32_t ch)
\r
711 *result = static_cast<uint8_t>(ch);
\r
715 else if (ch < 0x800)
\r
717 result[0] = static_cast<uint8_t>(0xC0 | (ch >> 6));
\r
718 result[1] = static_cast<uint8_t>(0x80 | (ch & 0x3F));
\r
724 result[0] = static_cast<uint8_t>(0xE0 | (ch >> 12));
\r
725 result[1] = static_cast<uint8_t>(0x80 | ((ch >> 6) & 0x3F));
\r
726 result[2] = static_cast<uint8_t>(0x80 | (ch & 0x3F));
\r
731 static value_type high(value_type result, uint32_t ch)
\r
733 // U+10000..U+10FFFF
\r
734 result[0] = static_cast<uint8_t>(0xF0 | (ch >> 18));
\r
735 result[1] = static_cast<uint8_t>(0x80 | ((ch >> 12) & 0x3F));
\r
736 result[2] = static_cast<uint8_t>(0x80 | ((ch >> 6) & 0x3F));
\r
737 result[3] = static_cast<uint8_t>(0x80 | (ch & 0x3F));
\r
741 static value_type any(value_type result, uint32_t ch)
\r
743 return (ch < 0x10000) ? low(result, ch) : high(result, ch);
\r
747 struct utf16_counter
\r
749 typedef size_t value_type;
\r
751 static value_type low(value_type result, uint32_t)
\r
756 static value_type high(value_type result, uint32_t)
\r
762 struct utf16_writer
\r
764 typedef uint16_t* value_type;
\r
766 static value_type low(value_type result, uint32_t ch)
\r
768 *result = static_cast<uint16_t>(ch);
\r
773 static value_type high(value_type result, uint32_t ch)
\r
775 uint32_t msh = static_cast<uint32_t>(ch - 0x10000) >> 10;
\r
776 uint32_t lsh = static_cast<uint32_t>(ch - 0x10000) & 0x3ff;
\r
778 result[0] = static_cast<uint16_t>(0xD800 + msh);
\r
779 result[1] = static_cast<uint16_t>(0xDC00 + lsh);
\r
784 static value_type any(value_type result, uint32_t ch)
\r
786 return (ch < 0x10000) ? low(result, ch) : high(result, ch);
\r
790 struct utf32_counter
\r
792 typedef size_t value_type;
\r
794 static value_type low(value_type result, uint32_t)
\r
799 static value_type high(value_type result, uint32_t)
\r
805 struct utf32_writer
\r
807 typedef uint32_t* value_type;
\r
809 static value_type low(value_type result, uint32_t ch)
\r
816 static value_type high(value_type result, uint32_t ch)
\r
823 static value_type any(value_type result, uint32_t ch)
\r
831 struct latin1_writer
\r
833 typedef uint8_t* value_type;
\r
835 static value_type low(value_type result, uint32_t ch)
\r
837 *result = static_cast<uint8_t>(ch > 255 ? '?' : ch);
\r
842 static value_type high(value_type result, uint32_t ch)
\r
852 template <size_t size> struct wchar_selector;
\r
854 template <> struct wchar_selector<2>
\r
856 typedef uint16_t type;
\r
857 typedef utf16_counter counter;
\r
858 typedef utf16_writer writer;
\r
861 template <> struct wchar_selector<4>
\r
863 typedef uint32_t type;
\r
864 typedef utf32_counter counter;
\r
865 typedef utf32_writer writer;
\r
868 typedef wchar_selector<sizeof(wchar_t)>::counter wchar_counter;
\r
869 typedef wchar_selector<sizeof(wchar_t)>::writer wchar_writer;
\r
871 template <typename Traits, typename opt_swap = opt_false> struct utf_decoder
\r
873 static inline typename Traits::value_type decode_utf8_block(const uint8_t* data, size_t size, typename Traits::value_type result)
\r
875 const uint8_t utf8_byte_mask = 0x3f;
\r
879 uint8_t lead = *data;
\r
881 // 0xxxxxxx -> U+0000..U+007F
\r
884 result = Traits::low(result, lead);
\r
888 // process aligned single-byte (ascii) blocks
\r
889 if ((reinterpret_cast<uintptr_t>(data) & 3) == 0)
\r
891 // round-trip through void* to silence 'cast increases required alignment of target type' warnings
\r
892 while (size >= 4 && (*static_cast<const uint32_t*>(static_cast<const void*>(data)) & 0x80808080) == 0)
\r
894 result = Traits::low(result, data[0]);
\r
895 result = Traits::low(result, data[1]);
\r
896 result = Traits::low(result, data[2]);
\r
897 result = Traits::low(result, data[3]);
\r
903 // 110xxxxx -> U+0080..U+07FF
\r
904 else if (static_cast<unsigned int>(lead - 0xC0) < 0x20 && size >= 2 && (data[1] & 0xc0) == 0x80)
\r
906 result = Traits::low(result, ((lead & ~0xC0) << 6) | (data[1] & utf8_byte_mask));
\r
910 // 1110xxxx -> U+0800-U+FFFF
\r
911 else if (static_cast<unsigned int>(lead - 0xE0) < 0x10 && size >= 3 && (data[1] & 0xc0) == 0x80 && (data[2] & 0xc0) == 0x80)
\r
913 result = Traits::low(result, ((lead & ~0xE0) << 12) | ((data[1] & utf8_byte_mask) << 6) | (data[2] & utf8_byte_mask));
\r
917 // 11110xxx -> U+10000..U+10FFFF
\r
918 else if (static_cast<unsigned int>(lead - 0xF0) < 0x08 && size >= 4 && (data[1] & 0xc0) == 0x80 && (data[2] & 0xc0) == 0x80 && (data[3] & 0xc0) == 0x80)
\r
920 result = Traits::high(result, ((lead & ~0xF0) << 18) | ((data[1] & utf8_byte_mask) << 12) | ((data[2] & utf8_byte_mask) << 6) | (data[3] & utf8_byte_mask));
\r
924 // 10xxxxxx or 11111xxx -> invalid
\r
935 static inline typename Traits::value_type decode_utf16_block(const uint16_t* data, size_t size, typename Traits::value_type result)
\r
937 const uint16_t* end = data + size;
\r
941 unsigned int lead = opt_swap::value ? endian_swap(*data) : *data;
\r
946 result = Traits::low(result, lead);
\r
950 else if (static_cast<unsigned int>(lead - 0xE000) < 0x2000)
\r
952 result = Traits::low(result, lead);
\r
955 // surrogate pair lead
\r
956 else if (static_cast<unsigned int>(lead - 0xD800) < 0x400 && data + 1 < end)
\r
958 uint16_t next = opt_swap::value ? endian_swap(data[1]) : data[1];
\r
960 if (static_cast<unsigned int>(next - 0xDC00) < 0x400)
\r
962 result = Traits::high(result, 0x10000 + ((lead & 0x3ff) << 10) + (next & 0x3ff));
\r
979 static inline typename Traits::value_type decode_utf32_block(const uint32_t* data, size_t size, typename Traits::value_type result)
\r
981 const uint32_t* end = data + size;
\r
985 uint32_t lead = opt_swap::value ? endian_swap(*data) : *data;
\r
988 if (lead < 0x10000)
\r
990 result = Traits::low(result, lead);
\r
993 // U+10000..U+10FFFF
\r
996 result = Traits::high(result, lead);
\r
1004 static inline typename Traits::value_type decode_latin1_block(const uint8_t* data, size_t size, typename Traits::value_type result)
\r
1006 for (size_t i = 0; i < size; ++i)
\r
1008 result = Traits::low(result, data[i]);
\r
1014 static inline typename Traits::value_type decode_wchar_block_impl(const uint16_t* data, size_t size, typename Traits::value_type result)
\r
1016 return decode_utf16_block(data, size, result);
\r
1019 static inline typename Traits::value_type decode_wchar_block_impl(const uint32_t* data, size_t size, typename Traits::value_type result)
\r
1021 return decode_utf32_block(data, size, result);
\r
1024 static inline typename Traits::value_type decode_wchar_block(const wchar_t* data, size_t size, typename Traits::value_type result)
\r
1026 return decode_wchar_block_impl(reinterpret_cast<const wchar_selector<sizeof(wchar_t)>::type*>(data), size, result);
\r
1030 template <typename T> PUGI__FN void convert_utf_endian_swap(T* result, const T* data, size_t length)
\r
1032 for (size_t i = 0; i < length; ++i) result[i] = endian_swap(data[i]);
\r
1035 #ifdef PUGIXML_WCHAR_MODE
\r
1036 PUGI__FN void convert_wchar_endian_swap(wchar_t* result, const wchar_t* data, size_t length)
\r
1038 for (size_t i = 0; i < length; ++i) result[i] = static_cast<wchar_t>(endian_swap(static_cast<wchar_selector<sizeof(wchar_t)>::type>(data[i])));
\r
1046 ct_parse_pcdata = 1, // \0, &, \r, <
\r
1047 ct_parse_attr = 2, // \0, &, \r, ', "
\r
1048 ct_parse_attr_ws = 4, // \0, &, \r, ', ", \n, tab
\r
1049 ct_space = 8, // \r, \n, space, tab
\r
1050 ct_parse_cdata = 16, // \0, ], >, \r
\r
1051 ct_parse_comment = 32, // \0, -, >, \r
\r
1052 ct_symbol = 64, // Any symbol > 127, a-z, A-Z, 0-9, _, :, -, .
\r
1053 ct_start_symbol = 128 // Any symbol > 127, a-z, A-Z, _, :
\r
1056 static const unsigned char chartype_table[256] =
\r
1058 55, 0, 0, 0, 0, 0, 0, 0, 0, 12, 12, 0, 0, 63, 0, 0, // 0-15
\r
1059 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 16-31
\r
1060 8, 0, 6, 0, 0, 0, 7, 6, 0, 0, 0, 0, 0, 96, 64, 0, // 32-47
\r
1061 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 192, 0, 1, 0, 48, 0, // 48-63
\r
1062 0, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, // 64-79
\r
1063 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 0, 0, 16, 0, 192, // 80-95
\r
1064 0, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, // 96-111
\r
1065 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 0, 0, 0, 0, 0, // 112-127
\r
1067 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, // 128+
\r
1068 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192,
\r
1069 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192,
\r
1070 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192,
\r
1071 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192,
\r
1072 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192,
\r
1073 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192,
\r
1074 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192
\r
1079 ctx_special_pcdata = 1, // Any symbol >= 0 and < 32 (except \t, \r, \n), &, <, >
\r
1080 ctx_special_attr = 2, // Any symbol >= 0 and < 32 (except \t), &, <, >, "
\r
1081 ctx_start_symbol = 4, // Any symbol > 127, a-z, A-Z, _
\r
1082 ctx_digit = 8, // 0-9
\r
1083 ctx_symbol = 16 // Any symbol > 127, a-z, A-Z, 0-9, _, -, .
\r
1086 static const unsigned char chartypex_table[256] =
\r
1088 3, 3, 3, 3, 3, 3, 3, 3, 3, 0, 2, 3, 3, 2, 3, 3, // 0-15
\r
1089 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, // 16-31
\r
1090 0, 0, 2, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 16, 16, 0, // 32-47
\r
1091 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 0, 0, 3, 0, 3, 0, // 48-63
\r
1093 0, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, // 64-79
\r
1094 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 0, 0, 0, 0, 20, // 80-95
\r
1095 0, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, // 96-111
\r
1096 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 0, 0, 0, 0, 0, // 112-127
\r
1098 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, // 128+
\r
1099 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
\r
1100 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
\r
1101 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
\r
1102 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
\r
1103 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
\r
1104 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
\r
1105 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20
\r
1108 #ifdef PUGIXML_WCHAR_MODE
\r
1109 #define PUGI__IS_CHARTYPE_IMPL(c, ct, table) ((static_cast<unsigned int>(c) < 128 ? table[static_cast<unsigned int>(c)] : table[128]) & (ct))
\r
1111 #define PUGI__IS_CHARTYPE_IMPL(c, ct, table) (table[static_cast<unsigned char>(c)] & (ct))
\r
1114 #define PUGI__IS_CHARTYPE(c, ct) PUGI__IS_CHARTYPE_IMPL(c, ct, chartype_table)
\r
1115 #define PUGI__IS_CHARTYPEX(c, ct) PUGI__IS_CHARTYPE_IMPL(c, ct, chartypex_table)
\r
1117 PUGI__FN bool is_little_endian()
\r
1119 unsigned int ui = 1;
\r
1121 return *reinterpret_cast<unsigned char*>(&ui) == 1;
\r
1124 PUGI__FN xml_encoding get_wchar_encoding()
\r
1126 PUGI__STATIC_ASSERT(sizeof(wchar_t) == 2 || sizeof(wchar_t) == 4);
\r
1128 if (sizeof(wchar_t) == 2)
\r
1129 return is_little_endian() ? encoding_utf16_le : encoding_utf16_be;
\r
1131 return is_little_endian() ? encoding_utf32_le : encoding_utf32_be;
\r
1134 PUGI__FN xml_encoding guess_buffer_encoding(uint8_t d0, uint8_t d1, uint8_t d2, uint8_t d3)
\r
1136 // look for BOM in first few bytes
\r
1137 if (d0 == 0 && d1 == 0 && d2 == 0xfe && d3 == 0xff) return encoding_utf32_be;
\r
1138 if (d0 == 0xff && d1 == 0xfe && d2 == 0 && d3 == 0) return encoding_utf32_le;
\r
1139 if (d0 == 0xfe && d1 == 0xff) return encoding_utf16_be;
\r
1140 if (d0 == 0xff && d1 == 0xfe) return encoding_utf16_le;
\r
1141 if (d0 == 0xef && d1 == 0xbb && d2 == 0xbf) return encoding_utf8;
\r
1143 // look for <, <? or <?xm in various encodings
\r
1144 if (d0 == 0 && d1 == 0 && d2 == 0 && d3 == 0x3c) return encoding_utf32_be;
\r
1145 if (d0 == 0x3c && d1 == 0 && d2 == 0 && d3 == 0) return encoding_utf32_le;
\r
1146 if (d0 == 0 && d1 == 0x3c && d2 == 0 && d3 == 0x3f) return encoding_utf16_be;
\r
1147 if (d0 == 0x3c && d1 == 0 && d2 == 0x3f && d3 == 0) return encoding_utf16_le;
\r
1148 if (d0 == 0x3c && d1 == 0x3f && d2 == 0x78 && d3 == 0x6d) return encoding_utf8;
\r
1150 // look for utf16 < followed by node name (this may fail, but is better than utf8 since it's zero terminated so early)
\r
1151 if (d0 == 0 && d1 == 0x3c) return encoding_utf16_be;
\r
1152 if (d0 == 0x3c && d1 == 0) return encoding_utf16_le;
\r
1154 // no known BOM detected, assume utf8
\r
1155 return encoding_utf8;
\r
1158 PUGI__FN xml_encoding get_buffer_encoding(xml_encoding encoding, const void* contents, size_t size)
\r
1160 // replace wchar encoding with utf implementation
\r
1161 if (encoding == encoding_wchar) return get_wchar_encoding();
\r
1163 // replace utf16 encoding with utf16 with specific endianness
\r
1164 if (encoding == encoding_utf16) return is_little_endian() ? encoding_utf16_le : encoding_utf16_be;
\r
1166 // replace utf32 encoding with utf32 with specific endianness
\r
1167 if (encoding == encoding_utf32) return is_little_endian() ? encoding_utf32_le : encoding_utf32_be;
\r
1169 // only do autodetection if no explicit encoding is requested
\r
1170 if (encoding != encoding_auto) return encoding;
\r
1172 // skip encoding autodetection if input buffer is too small
\r
1173 if (size < 4) return encoding_utf8;
\r
1175 // try to guess encoding (based on XML specification, Appendix F.1)
\r
1176 const uint8_t* data = static_cast<const uint8_t*>(contents);
\r
1178 PUGI__DMC_VOLATILE uint8_t d0 = data[0], d1 = data[1], d2 = data[2], d3 = data[3];
\r
1180 return guess_buffer_encoding(d0, d1, d2, d3);
\r
1183 PUGI__FN bool get_mutable_buffer(char_t*& out_buffer, size_t& out_length, const void* contents, size_t size, bool is_mutable)
\r
1185 size_t length = size / sizeof(char_t);
\r
1189 out_buffer = static_cast<char_t*>(const_cast<void*>(contents));
\r
1190 out_length = length;
\r
1194 char_t* buffer = static_cast<char_t*>(xml_memory::allocate((length + 1) * sizeof(char_t)));
\r
1195 if (!buffer) return false;
\r
1197 memcpy(buffer, contents, length * sizeof(char_t));
\r
1198 buffer[length] = 0;
\r
1200 out_buffer = buffer;
\r
1201 out_length = length + 1;
\r
1207 #ifdef PUGIXML_WCHAR_MODE
\r
1208 PUGI__FN bool need_endian_swap_utf(xml_encoding le, xml_encoding re)
\r
1210 return (le == encoding_utf16_be && re == encoding_utf16_le) || (le == encoding_utf16_le && re == encoding_utf16_be) ||
\r
1211 (le == encoding_utf32_be && re == encoding_utf32_le) || (le == encoding_utf32_le && re == encoding_utf32_be);
\r
1214 PUGI__FN bool convert_buffer_endian_swap(char_t*& out_buffer, size_t& out_length, const void* contents, size_t size, bool is_mutable)
\r
1216 const char_t* data = static_cast<const char_t*>(contents);
\r
1217 size_t length = size / sizeof(char_t);
\r
1221 char_t* buffer = const_cast<char_t*>(data);
\r
1223 convert_wchar_endian_swap(buffer, data, length);
\r
1225 out_buffer = buffer;
\r
1226 out_length = length;
\r
1230 char_t* buffer = static_cast<char_t*>(xml_memory::allocate((length + 1) * sizeof(char_t)));
\r
1231 if (!buffer) return false;
\r
1233 convert_wchar_endian_swap(buffer, data, length);
\r
1234 buffer[length] = 0;
\r
1236 out_buffer = buffer;
\r
1237 out_length = length + 1;
\r
1243 PUGI__FN bool convert_buffer_utf8(char_t*& out_buffer, size_t& out_length, const void* contents, size_t size)
\r
1245 const uint8_t* data = static_cast<const uint8_t*>(contents);
\r
1246 size_t data_length = size;
\r
1248 // first pass: get length in wchar_t units
\r
1249 size_t length = utf_decoder<wchar_counter>::decode_utf8_block(data, data_length, 0);
\r
1251 // allocate buffer of suitable length
\r
1252 char_t* buffer = static_cast<char_t*>(xml_memory::allocate((length + 1) * sizeof(char_t)));
\r
1253 if (!buffer) return false;
\r
1255 // second pass: convert utf8 input to wchar_t
\r
1256 wchar_writer::value_type obegin = reinterpret_cast<wchar_writer::value_type>(buffer);
\r
1257 wchar_writer::value_type oend = utf_decoder<wchar_writer>::decode_utf8_block(data, data_length, obegin);
\r
1259 assert(oend == obegin + length);
\r
1262 out_buffer = buffer;
\r
1263 out_length = length + 1;
\r
1268 template <typename opt_swap> PUGI__FN bool convert_buffer_utf16(char_t*& out_buffer, size_t& out_length, const void* contents, size_t size, opt_swap)
\r
1270 const uint16_t* data = static_cast<const uint16_t*>(contents);
\r
1271 size_t data_length = size / sizeof(uint16_t);
\r
1273 // first pass: get length in wchar_t units
\r
1274 size_t length = utf_decoder<wchar_counter, opt_swap>::decode_utf16_block(data, data_length, 0);
\r
1276 // allocate buffer of suitable length
\r
1277 char_t* buffer = static_cast<char_t*>(xml_memory::allocate((length + 1) * sizeof(char_t)));
\r
1278 if (!buffer) return false;
\r
1280 // second pass: convert utf16 input to wchar_t
\r
1281 wchar_writer::value_type obegin = reinterpret_cast<wchar_writer::value_type>(buffer);
\r
1282 wchar_writer::value_type oend = utf_decoder<wchar_writer, opt_swap>::decode_utf16_block(data, data_length, obegin);
\r
1284 assert(oend == obegin + length);
\r
1287 out_buffer = buffer;
\r
1288 out_length = length + 1;
\r
1293 template <typename opt_swap> PUGI__FN bool convert_buffer_utf32(char_t*& out_buffer, size_t& out_length, const void* contents, size_t size, opt_swap)
\r
1295 const uint32_t* data = static_cast<const uint32_t*>(contents);
\r
1296 size_t data_length = size / sizeof(uint32_t);
\r
1298 // first pass: get length in wchar_t units
\r
1299 size_t length = utf_decoder<wchar_counter, opt_swap>::decode_utf32_block(data, data_length, 0);
\r
1301 // allocate buffer of suitable length
\r
1302 char_t* buffer = static_cast<char_t*>(xml_memory::allocate((length + 1) * sizeof(char_t)));
\r
1303 if (!buffer) return false;
\r
1305 // second pass: convert utf32 input to wchar_t
\r
1306 wchar_writer::value_type obegin = reinterpret_cast<wchar_writer::value_type>(buffer);
\r
1307 wchar_writer::value_type oend = utf_decoder<wchar_writer, opt_swap>::decode_utf32_block(data, data_length, obegin);
\r
1309 assert(oend == obegin + length);
\r
1312 out_buffer = buffer;
\r
1313 out_length = length + 1;
\r
1318 PUGI__FN bool convert_buffer_latin1(char_t*& out_buffer, size_t& out_length, const void* contents, size_t size)
\r
1320 const uint8_t* data = static_cast<const uint8_t*>(contents);
\r
1321 size_t data_length = size;
\r
1323 // get length in wchar_t units
\r
1324 size_t length = data_length;
\r
1326 // allocate buffer of suitable length
\r
1327 char_t* buffer = static_cast<char_t*>(xml_memory::allocate((length + 1) * sizeof(char_t)));
\r
1328 if (!buffer) return false;
\r
1330 // convert latin1 input to wchar_t
\r
1331 wchar_writer::value_type obegin = reinterpret_cast<wchar_writer::value_type>(buffer);
\r
1332 wchar_writer::value_type oend = utf_decoder<wchar_writer>::decode_latin1_block(data, data_length, obegin);
\r
1334 assert(oend == obegin + length);
\r
1337 out_buffer = buffer;
\r
1338 out_length = length + 1;
\r
1343 PUGI__FN bool convert_buffer(char_t*& out_buffer, size_t& out_length, xml_encoding encoding, const void* contents, size_t size, bool is_mutable)
\r
1345 // get native encoding
\r
1346 xml_encoding wchar_encoding = get_wchar_encoding();
\r
1348 // fast path: no conversion required
\r
1349 if (encoding == wchar_encoding) return get_mutable_buffer(out_buffer, out_length, contents, size, is_mutable);
\r
1351 // only endian-swapping is required
\r
1352 if (need_endian_swap_utf(encoding, wchar_encoding)) return convert_buffer_endian_swap(out_buffer, out_length, contents, size, is_mutable);
\r
1354 // source encoding is utf8
\r
1355 if (encoding == encoding_utf8) return convert_buffer_utf8(out_buffer, out_length, contents, size);
\r
1357 // source encoding is utf16
\r
1358 if (encoding == encoding_utf16_be || encoding == encoding_utf16_le)
\r
1360 xml_encoding native_encoding = is_little_endian() ? encoding_utf16_le : encoding_utf16_be;
\r
1362 return (native_encoding == encoding) ?
\r
1363 convert_buffer_utf16(out_buffer, out_length, contents, size, opt_false()) :
\r
1364 convert_buffer_utf16(out_buffer, out_length, contents, size, opt_true());
\r
1367 // source encoding is utf32
\r
1368 if (encoding == encoding_utf32_be || encoding == encoding_utf32_le)
\r
1370 xml_encoding native_encoding = is_little_endian() ? encoding_utf32_le : encoding_utf32_be;
\r
1372 return (native_encoding == encoding) ?
\r
1373 convert_buffer_utf32(out_buffer, out_length, contents, size, opt_false()) :
\r
1374 convert_buffer_utf32(out_buffer, out_length, contents, size, opt_true());
\r
1377 // source encoding is latin1
\r
1378 if (encoding == encoding_latin1) return convert_buffer_latin1(out_buffer, out_length, contents, size);
\r
1380 assert(!"Invalid encoding");
\r
1384 template <typename opt_swap> PUGI__FN bool convert_buffer_utf16(char_t*& out_buffer, size_t& out_length, const void* contents, size_t size, opt_swap)
\r
1386 const uint16_t* data = static_cast<const uint16_t*>(contents);
\r
1387 size_t data_length = size / sizeof(uint16_t);
\r
1389 // first pass: get length in utf8 units
\r
1390 size_t length = utf_decoder<utf8_counter, opt_swap>::decode_utf16_block(data, data_length, 0);
\r
1392 // allocate buffer of suitable length
\r
1393 char_t* buffer = static_cast<char_t*>(xml_memory::allocate((length + 1) * sizeof(char_t)));
\r
1394 if (!buffer) return false;
\r
1396 // second pass: convert utf16 input to utf8
\r
1397 uint8_t* obegin = reinterpret_cast<uint8_t*>(buffer);
\r
1398 uint8_t* oend = utf_decoder<utf8_writer, opt_swap>::decode_utf16_block(data, data_length, obegin);
\r
1400 assert(oend == obegin + length);
\r
1403 out_buffer = buffer;
\r
1404 out_length = length + 1;
\r
1409 template <typename opt_swap> PUGI__FN bool convert_buffer_utf32(char_t*& out_buffer, size_t& out_length, const void* contents, size_t size, opt_swap)
\r
1411 const uint32_t* data = static_cast<const uint32_t*>(contents);
\r
1412 size_t data_length = size / sizeof(uint32_t);
\r
1414 // first pass: get length in utf8 units
\r
1415 size_t length = utf_decoder<utf8_counter, opt_swap>::decode_utf32_block(data, data_length, 0);
\r
1417 // allocate buffer of suitable length
\r
1418 char_t* buffer = static_cast<char_t*>(xml_memory::allocate((length + 1) * sizeof(char_t)));
\r
1419 if (!buffer) return false;
\r
1421 // second pass: convert utf32 input to utf8
\r
1422 uint8_t* obegin = reinterpret_cast<uint8_t*>(buffer);
\r
1423 uint8_t* oend = utf_decoder<utf8_writer, opt_swap>::decode_utf32_block(data, data_length, obegin);
\r
1425 assert(oend == obegin + length);
\r
1428 out_buffer = buffer;
\r
1429 out_length = length + 1;
\r
1434 PUGI__FN size_t get_latin1_7bit_prefix_length(const uint8_t* data, size_t size)
\r
1436 for (size_t i = 0; i < size; ++i)
\r
1437 if (data[i] > 127)
\r
1443 PUGI__FN bool convert_buffer_latin1(char_t*& out_buffer, size_t& out_length, const void* contents, size_t size, bool is_mutable)
\r
1445 const uint8_t* data = static_cast<const uint8_t*>(contents);
\r
1446 size_t data_length = size;
\r
1448 // get size of prefix that does not need utf8 conversion
\r
1449 size_t prefix_length = get_latin1_7bit_prefix_length(data, data_length);
\r
1450 assert(prefix_length <= data_length);
\r
1452 const uint8_t* postfix = data + prefix_length;
\r
1453 size_t postfix_length = data_length - prefix_length;
\r
1455 // if no conversion is needed, just return the original buffer
\r
1456 if (postfix_length == 0) return get_mutable_buffer(out_buffer, out_length, contents, size, is_mutable);
\r
1458 // first pass: get length in utf8 units
\r
1459 size_t length = prefix_length + utf_decoder<utf8_counter>::decode_latin1_block(postfix, postfix_length, 0);
\r
1461 // allocate buffer of suitable length
\r
1462 char_t* buffer = static_cast<char_t*>(xml_memory::allocate((length + 1) * sizeof(char_t)));
\r
1463 if (!buffer) return false;
\r
1465 // second pass: convert latin1 input to utf8
\r
1466 memcpy(buffer, data, prefix_length);
\r
1468 uint8_t* obegin = reinterpret_cast<uint8_t*>(buffer);
\r
1469 uint8_t* oend = utf_decoder<utf8_writer>::decode_latin1_block(postfix, postfix_length, obegin + prefix_length);
\r
1471 assert(oend == obegin + length);
\r
1474 out_buffer = buffer;
\r
1475 out_length = length + 1;
\r
1480 PUGI__FN bool convert_buffer(char_t*& out_buffer, size_t& out_length, xml_encoding encoding, const void* contents, size_t size, bool is_mutable)
\r
1482 // fast path: no conversion required
\r
1483 if (encoding == encoding_utf8) return get_mutable_buffer(out_buffer, out_length, contents, size, is_mutable);
\r
1485 // source encoding is utf16
\r
1486 if (encoding == encoding_utf16_be || encoding == encoding_utf16_le)
\r
1488 xml_encoding native_encoding = is_little_endian() ? encoding_utf16_le : encoding_utf16_be;
\r
1490 return (native_encoding == encoding) ?
\r
1491 convert_buffer_utf16(out_buffer, out_length, contents, size, opt_false()) :
\r
1492 convert_buffer_utf16(out_buffer, out_length, contents, size, opt_true());
\r
1495 // source encoding is utf32
\r
1496 if (encoding == encoding_utf32_be || encoding == encoding_utf32_le)
\r
1498 xml_encoding native_encoding = is_little_endian() ? encoding_utf32_le : encoding_utf32_be;
\r
1500 return (native_encoding == encoding) ?
\r
1501 convert_buffer_utf32(out_buffer, out_length, contents, size, opt_false()) :
\r
1502 convert_buffer_utf32(out_buffer, out_length, contents, size, opt_true());
\r
1505 // source encoding is latin1
\r
1506 if (encoding == encoding_latin1) return convert_buffer_latin1(out_buffer, out_length, contents, size, is_mutable);
\r
1508 assert(!"Invalid encoding");
\r
1513 PUGI__FN size_t as_utf8_begin(const wchar_t* str, size_t length)
\r
1515 // get length in utf8 characters
\r
1516 return utf_decoder<utf8_counter>::decode_wchar_block(str, length, 0);
\r
1519 PUGI__FN void as_utf8_end(char* buffer, size_t size, const wchar_t* str, size_t length)
\r
1521 // convert to utf8
\r
1522 uint8_t* begin = reinterpret_cast<uint8_t*>(buffer);
\r
1523 uint8_t* end = utf_decoder<utf8_writer>::decode_wchar_block(str, length, begin);
\r
1525 assert(begin + size == end);
\r
1532 #ifndef PUGIXML_NO_STL
\r
1533 PUGI__FN std::string as_utf8_impl(const wchar_t* str, size_t length)
\r
1535 // first pass: get length in utf8 characters
\r
1536 size_t size = as_utf8_begin(str, length);
\r
1538 // allocate resulting string
\r
1539 std::string result;
\r
1540 result.resize(size);
\r
1542 // second pass: convert to utf8
\r
1543 if (size > 0) as_utf8_end(&result[0], size, str, length);
\r
1548 PUGI__FN std::basic_string<wchar_t> as_wide_impl(const char* str, size_t size)
\r
1550 const uint8_t* data = reinterpret_cast<const uint8_t*>(str);
\r
1552 // first pass: get length in wchar_t units
\r
1553 size_t length = utf_decoder<wchar_counter>::decode_utf8_block(data, size, 0);
\r
1555 // allocate resulting string
\r
1556 std::basic_string<wchar_t> result;
\r
1557 result.resize(length);
\r
1559 // second pass: convert to wchar_t
\r
1562 wchar_writer::value_type begin = reinterpret_cast<wchar_writer::value_type>(&result[0]);
\r
1563 wchar_writer::value_type end = utf_decoder<wchar_writer>::decode_utf8_block(data, size, begin);
\r
1565 assert(begin + length == end);
\r
1573 inline bool strcpy_insitu_allow(size_t length, uintptr_t allocated, char_t* target)
\r
1576 size_t target_length = strlength(target);
\r
1578 // always reuse document buffer memory if possible
\r
1579 if (!allocated) return target_length >= length;
\r
1581 // reuse heap memory if waste is not too great
\r
1582 const size_t reuse_threshold = 32;
\r
1584 return target_length >= length && (target_length < reuse_threshold || target_length - length < target_length / 2);
\r
1587 PUGI__FN bool strcpy_insitu(char_t*& dest, uintptr_t& header, uintptr_t header_mask, const char_t* source)
\r
1591 size_t source_length = strlength(source);
\r
1593 if (source_length == 0)
\r
1595 // empty string and null pointer are equivalent, so just deallocate old memory
\r
1596 xml_allocator* alloc = reinterpret_cast<xml_memory_page*>(header & xml_memory_page_pointer_mask)->allocator;
\r
1598 if (header & header_mask) alloc->deallocate_string(dest);
\r
1600 // mark the string as not allocated
\r
1602 header &= ~header_mask;
\r
1606 else if (dest && strcpy_insitu_allow(source_length, header & header_mask, dest))
\r
1608 // we can reuse old buffer, so just copy the new data (including zero terminator)
\r
1609 memcpy(dest, source, (source_length + 1) * sizeof(char_t));
\r
1615 xml_allocator* alloc = reinterpret_cast<xml_memory_page*>(header & xml_memory_page_pointer_mask)->allocator;
\r
1617 // allocate new buffer
\r
1618 char_t* buf = alloc->allocate_string(source_length + 1);
\r
1619 if (!buf) return false;
\r
1621 // copy the string (including zero terminator)
\r
1622 memcpy(buf, source, (source_length + 1) * sizeof(char_t));
\r
1624 // deallocate old buffer (*after* the above to protect against overlapping memory and/or allocation failures)
\r
1625 if (header & header_mask) alloc->deallocate_string(dest);
\r
1627 // the string is now allocated, so set the flag
\r
1629 header |= header_mask;
\r
1640 gap(): end(0), size(0)
\r
1644 // Push new gap, move s count bytes further (skipping the gap).
\r
1645 // Collapse previous gap.
\r
1646 void push(char_t*& s, size_t count)
\r
1648 if (end) // there was a gap already; collapse it
\r
1650 // Move [old_gap_end, new_gap_start) to [old_gap_start, ...)
\r
1652 memmove(end - size, end, reinterpret_cast<char*>(s) - reinterpret_cast<char*>(end));
\r
1655 s += count; // end of current gap
\r
1657 // "merge" two gaps
\r
1662 // Collapse all gaps, return past-the-end pointer
\r
1663 char_t* flush(char_t* s)
\r
1667 // Move [old_gap_end, current_pos) to [old_gap_start, ...)
\r
1669 memmove(end - size, end, reinterpret_cast<char*>(s) - reinterpret_cast<char*>(end));
\r
1677 PUGI__FN char_t* strconv_escape(char_t* s, gap& g)
\r
1679 char_t* stre = s + 1;
\r
1683 case '#': // &#...
\r
1685 unsigned int ucsc = 0;
\r
1687 if (stre[1] == 'x') // &#x... (hex code)
\r
1691 char_t ch = *stre;
\r
1693 if (ch == ';') return stre;
\r
1697 if (static_cast<unsigned int>(ch - '0') <= 9)
\r
1698 ucsc = 16 * ucsc + (ch - '0');
\r
1699 else if (static_cast<unsigned int>((ch | ' ') - 'a') <= 5)
\r
1700 ucsc = 16 * ucsc + ((ch | ' ') - 'a' + 10);
\r
1701 else if (ch == ';')
\r
1711 else // &#... (dec code)
\r
1713 char_t ch = *++stre;
\r
1715 if (ch == ';') return stre;
\r
1719 if (static_cast<unsigned int>(static_cast<unsigned int>(ch) - '0') <= 9)
\r
1720 ucsc = 10 * ucsc + (ch - '0');
\r
1721 else if (ch == ';')
\r
1732 #ifdef PUGIXML_WCHAR_MODE
\r
1733 s = reinterpret_cast<char_t*>(wchar_writer::any(reinterpret_cast<wchar_writer::value_type>(s), ucsc));
\r
1735 s = reinterpret_cast<char_t*>(utf8_writer::any(reinterpret_cast<uint8_t*>(s), ucsc));
\r
1738 g.push(s, stre - s);
\r
1746 if (*stre == 'm') // &am
\r
1748 if (*++stre == 'p' && *++stre == ';') // &
\r
1753 g.push(s, stre - s);
\r
1757 else if (*stre == 'p') // &ap
\r
1759 if (*++stre == 'o' && *++stre == 's' && *++stre == ';') // '
\r
1764 g.push(s, stre - s);
\r
1773 if (*++stre == 't' && *++stre == ';') // >
\r
1778 g.push(s, stre - s);
\r
1786 if (*++stre == 't' && *++stre == ';') // <
\r
1791 g.push(s, stre - s);
\r
1799 if (*++stre == 'u' && *++stre == 'o' && *++stre == 't' && *++stre == ';') // "
\r
1804 g.push(s, stre - s);
\r
1817 // Utility macro for last character handling
\r
1818 #define ENDSWITH(c, e) ((c) == (e) || ((c) == 0 && endch == (e)))
\r
1820 PUGI__FN char_t* strconv_comment(char_t* s, char_t endch)
\r
1826 while (!PUGI__IS_CHARTYPE(*s, ct_parse_comment)) ++s;
\r
1828 if (*s == '\r') // Either a single 0x0d or 0x0d 0x0a pair
\r
1830 *s++ = '\n'; // replace first one with 0x0a
\r
1832 if (*s == '\n') g.push(s, 1);
\r
1834 else if (s[0] == '-' && s[1] == '-' && ENDSWITH(s[2], '>')) // comment ends here
\r
1838 return s + (s[2] == '>' ? 3 : 2);
\r
1848 PUGI__FN char_t* strconv_cdata(char_t* s, char_t endch)
\r
1854 while (!PUGI__IS_CHARTYPE(*s, ct_parse_cdata)) ++s;
\r
1856 if (*s == '\r') // Either a single 0x0d or 0x0d 0x0a pair
\r
1858 *s++ = '\n'; // replace first one with 0x0a
\r
1860 if (*s == '\n') g.push(s, 1);
\r
1862 else if (s[0] == ']' && s[1] == ']' && ENDSWITH(s[2], '>')) // CDATA ends here
\r
1876 typedef char_t* (*strconv_pcdata_t)(char_t*);
\r
1878 template <typename opt_trim, typename opt_eol, typename opt_escape> struct strconv_pcdata_impl
\r
1880 static char_t* parse(char_t* s)
\r
1884 char_t* begin = s;
\r
1888 while (!PUGI__IS_CHARTYPE(*s, ct_parse_pcdata)) ++s;
\r
1890 if (*s == '<') // PCDATA ends here
\r
1892 char_t* end = g.flush(s);
\r
1894 if (opt_trim::value)
\r
1895 while (end > begin && PUGI__IS_CHARTYPE(end[-1], ct_space))
\r
1902 else if (opt_eol::value && *s == '\r') // Either a single 0x0d or 0x0d 0x0a pair
\r
1904 *s++ = '\n'; // replace first one with 0x0a
\r
1906 if (*s == '\n') g.push(s, 1);
\r
1908 else if (opt_escape::value && *s == '&')
\r
1910 s = strconv_escape(s, g);
\r
1914 char_t* end = g.flush(s);
\r
1916 if (opt_trim::value)
\r
1917 while (end > begin && PUGI__IS_CHARTYPE(end[-1], ct_space))
\r
1929 PUGI__FN strconv_pcdata_t get_strconv_pcdata(unsigned int optmask)
\r
1931 PUGI__STATIC_ASSERT(parse_escapes == 0x10 && parse_eol == 0x20 && parse_trim_pcdata == 0x0800);
\r
1933 switch (((optmask >> 4) & 3) | ((optmask >> 9) & 4)) // get bitmask for flags (eol escapes trim)
\r
1935 case 0: return strconv_pcdata_impl<opt_false, opt_false, opt_false>::parse;
\r
1936 case 1: return strconv_pcdata_impl<opt_false, opt_false, opt_true>::parse;
\r
1937 case 2: return strconv_pcdata_impl<opt_false, opt_true, opt_false>::parse;
\r
1938 case 3: return strconv_pcdata_impl<opt_false, opt_true, opt_true>::parse;
\r
1939 case 4: return strconv_pcdata_impl<opt_true, opt_false, opt_false>::parse;
\r
1940 case 5: return strconv_pcdata_impl<opt_true, opt_false, opt_true>::parse;
\r
1941 case 6: return strconv_pcdata_impl<opt_true, opt_true, opt_false>::parse;
\r
1942 case 7: return strconv_pcdata_impl<opt_true, opt_true, opt_true>::parse;
\r
1943 default: assert(false); return 0; // should not get here
\r
1947 typedef char_t* (*strconv_attribute_t)(char_t*, char_t);
\r
1949 template <typename opt_escape> struct strconv_attribute_impl
\r
1951 static char_t* parse_wnorm(char_t* s, char_t end_quote)
\r
1955 // trim leading whitespaces
\r
1956 if (PUGI__IS_CHARTYPE(*s, ct_space))
\r
1961 while (PUGI__IS_CHARTYPE(*str, ct_space));
\r
1963 g.push(s, str - s);
\r
1968 while (!PUGI__IS_CHARTYPE(*s, ct_parse_attr_ws | ct_space)) ++s;
\r
1970 if (*s == end_quote)
\r
1972 char_t* str = g.flush(s);
\r
1975 while (PUGI__IS_CHARTYPE(*str, ct_space));
\r
1979 else if (PUGI__IS_CHARTYPE(*s, ct_space))
\r
1983 if (PUGI__IS_CHARTYPE(*s, ct_space))
\r
1985 char_t* str = s + 1;
\r
1986 while (PUGI__IS_CHARTYPE(*str, ct_space)) ++str;
\r
1988 g.push(s, str - s);
\r
1991 else if (opt_escape::value && *s == '&')
\r
1993 s = strconv_escape(s, g);
\r
2003 static char_t* parse_wconv(char_t* s, char_t end_quote)
\r
2009 while (!PUGI__IS_CHARTYPE(*s, ct_parse_attr_ws)) ++s;
\r
2011 if (*s == end_quote)
\r
2017 else if (PUGI__IS_CHARTYPE(*s, ct_space))
\r
2023 if (*s == '\n') g.push(s, 1);
\r
2027 else if (opt_escape::value && *s == '&')
\r
2029 s = strconv_escape(s, g);
\r
2039 static char_t* parse_eol(char_t* s, char_t end_quote)
\r
2045 while (!PUGI__IS_CHARTYPE(*s, ct_parse_attr)) ++s;
\r
2047 if (*s == end_quote)
\r
2053 else if (*s == '\r')
\r
2057 if (*s == '\n') g.push(s, 1);
\r
2059 else if (opt_escape::value && *s == '&')
\r
2061 s = strconv_escape(s, g);
\r
2071 static char_t* parse_simple(char_t* s, char_t end_quote)
\r
2077 while (!PUGI__IS_CHARTYPE(*s, ct_parse_attr)) ++s;
\r
2079 if (*s == end_quote)
\r
2085 else if (opt_escape::value && *s == '&')
\r
2087 s = strconv_escape(s, g);
\r
2098 PUGI__FN strconv_attribute_t get_strconv_attribute(unsigned int optmask)
\r
2100 PUGI__STATIC_ASSERT(parse_escapes == 0x10 && parse_eol == 0x20 && parse_wconv_attribute == 0x40 && parse_wnorm_attribute == 0x80);
\r
2102 switch ((optmask >> 4) & 15) // get bitmask for flags (wconv wnorm eol escapes)
\r
2104 case 0: return strconv_attribute_impl<opt_false>::parse_simple;
\r
2105 case 1: return strconv_attribute_impl<opt_true>::parse_simple;
\r
2106 case 2: return strconv_attribute_impl<opt_false>::parse_eol;
\r
2107 case 3: return strconv_attribute_impl<opt_true>::parse_eol;
\r
2108 case 4: return strconv_attribute_impl<opt_false>::parse_wconv;
\r
2109 case 5: return strconv_attribute_impl<opt_true>::parse_wconv;
\r
2110 case 6: return strconv_attribute_impl<opt_false>::parse_wconv;
\r
2111 case 7: return strconv_attribute_impl<opt_true>::parse_wconv;
\r
2112 case 8: return strconv_attribute_impl<opt_false>::parse_wnorm;
\r
2113 case 9: return strconv_attribute_impl<opt_true>::parse_wnorm;
\r
2114 case 10: return strconv_attribute_impl<opt_false>::parse_wnorm;
\r
2115 case 11: return strconv_attribute_impl<opt_true>::parse_wnorm;
\r
2116 case 12: return strconv_attribute_impl<opt_false>::parse_wnorm;
\r
2117 case 13: return strconv_attribute_impl<opt_true>::parse_wnorm;
\r
2118 case 14: return strconv_attribute_impl<opt_false>::parse_wnorm;
\r
2119 case 15: return strconv_attribute_impl<opt_true>::parse_wnorm;
\r
2120 default: assert(false); return 0; // should not get here
\r
2124 inline xml_parse_result make_parse_result(xml_parse_status status, ptrdiff_t offset = 0)
\r
2126 xml_parse_result result;
\r
2127 result.status = status;
\r
2128 result.offset = offset;
\r
2135 xml_allocator alloc;
\r
2136 char_t* error_offset;
\r
2137 xml_parse_status error_status;
\r
2139 // Parser utilities.
\r
2140 #define PUGI__SKIPWS() { while (PUGI__IS_CHARTYPE(*s, ct_space)) ++s; }
\r
2141 #define PUGI__OPTSET(OPT) ( optmsk & (OPT) )
\r
2142 #define PUGI__PUSHNODE(TYPE) { cursor = append_node(cursor, alloc, TYPE); if (!cursor) PUGI__THROW_ERROR(status_out_of_memory, s); }
\r
2143 #define PUGI__POPNODE() { cursor = cursor->parent; }
\r
2144 #define PUGI__SCANFOR(X) { while (*s != 0 && !(X)) ++s; }
\r
2145 #define PUGI__SCANWHILE(X) { while ((X)) ++s; }
\r
2146 #define PUGI__ENDSEG() { ch = *s; *s = 0; ++s; }
\r
2147 #define PUGI__THROW_ERROR(err, m) return error_offset = m, error_status = err, static_cast<char_t*>(0)
\r
2148 #define PUGI__CHECK_ERROR(err, m) { if (*s == 0) PUGI__THROW_ERROR(err, m); }
\r
2150 xml_parser(const xml_allocator& alloc_): alloc(alloc_), error_offset(0), error_status(status_ok)
\r
2154 // DOCTYPE consists of nested sections of the following possible types:
\r
2155 // <!-- ... -->, <? ... ?>, "...", '...'
\r
2158 // First group can not contain nested groups
\r
2159 // Second group can contain nested groups of the same type
\r
2160 // Third group can contain all other groups
\r
2161 char_t* parse_doctype_primitive(char_t* s)
\r
2163 if (*s == '"' || *s == '\'')
\r
2167 PUGI__SCANFOR(*s == ch);
\r
2168 if (!*s) PUGI__THROW_ERROR(status_bad_doctype, s);
\r
2172 else if (s[0] == '<' && s[1] == '?')
\r
2176 PUGI__SCANFOR(s[0] == '?' && s[1] == '>'); // no need for ENDSWITH because ?> can't terminate proper doctype
\r
2177 if (!*s) PUGI__THROW_ERROR(status_bad_doctype, s);
\r
2181 else if (s[0] == '<' && s[1] == '!' && s[2] == '-' && s[3] == '-')
\r
2184 PUGI__SCANFOR(s[0] == '-' && s[1] == '-' && s[2] == '>'); // no need for ENDSWITH because --> can't terminate proper doctype
\r
2185 if (!*s) PUGI__THROW_ERROR(status_bad_doctype, s);
\r
2189 else PUGI__THROW_ERROR(status_bad_doctype, s);
\r
2194 char_t* parse_doctype_ignore(char_t* s)
\r
2196 assert(s[0] == '<' && s[1] == '!' && s[2] == '[');
\r
2201 if (s[0] == '<' && s[1] == '!' && s[2] == '[')
\r
2203 // nested ignore section
\r
2204 s = parse_doctype_ignore(s);
\r
2207 else if (s[0] == ']' && s[1] == ']' && s[2] == '>')
\r
2209 // ignore section end
\r
2217 PUGI__THROW_ERROR(status_bad_doctype, s);
\r
2220 char_t* parse_doctype_group(char_t* s, char_t endch, bool toplevel)
\r
2222 assert((s[0] == '<' || s[0] == 0) && s[1] == '!');
\r
2227 if (s[0] == '<' && s[1] == '!' && s[2] != '-')
\r
2232 s = parse_doctype_ignore(s);
\r
2237 // some control group
\r
2238 s = parse_doctype_group(s, endch, false);
\r
2242 assert(*s == '>');
\r
2246 else if (s[0] == '<' || s[0] == '"' || s[0] == '\'')
\r
2248 // unknown tag (forbidden), or some primitive group
\r
2249 s = parse_doctype_primitive(s);
\r
2252 else if (*s == '>')
\r
2259 if (!toplevel || endch != '>') PUGI__THROW_ERROR(status_bad_doctype, s);
\r
2264 char_t* parse_exclamation(char_t* s, xml_node_struct* cursor, unsigned int optmsk, char_t endch)
\r
2266 // parse node contents, starting with exclamation mark
\r
2269 if (*s == '-') // '<!-...'
\r
2273 if (*s == '-') // '<!--...'
\r
2277 if (PUGI__OPTSET(parse_comments))
\r
2279 PUGI__PUSHNODE(node_comment); // Append a new node on the tree.
\r
2280 cursor->value = s; // Save the offset.
\r
2283 if (PUGI__OPTSET(parse_eol) && PUGI__OPTSET(parse_comments))
\r
2285 s = strconv_comment(s, endch);
\r
2287 if (!s) PUGI__THROW_ERROR(status_bad_comment, cursor->value);
\r
2291 // Scan for terminating '-->'.
\r
2292 PUGI__SCANFOR(s[0] == '-' && s[1] == '-' && ENDSWITH(s[2], '>'));
\r
2293 PUGI__CHECK_ERROR(status_bad_comment, s);
\r
2295 if (PUGI__OPTSET(parse_comments))
\r
2296 *s = 0; // Zero-terminate this segment at the first terminating '-'.
\r
2298 s += (s[2] == '>' ? 3 : 2); // Step over the '\0->'.
\r
2301 else PUGI__THROW_ERROR(status_bad_comment, s);
\r
2303 else if (*s == '[')
\r
2306 if (*++s=='C' && *++s=='D' && *++s=='A' && *++s=='T' && *++s=='A' && *++s == '[')
\r
2310 if (PUGI__OPTSET(parse_cdata))
\r
2312 PUGI__PUSHNODE(node_cdata); // Append a new node on the tree.
\r
2313 cursor->value = s; // Save the offset.
\r
2315 if (PUGI__OPTSET(parse_eol))
\r
2317 s = strconv_cdata(s, endch);
\r
2319 if (!s) PUGI__THROW_ERROR(status_bad_cdata, cursor->value);
\r
2323 // Scan for terminating ']]>'.
\r
2324 PUGI__SCANFOR(s[0] == ']' && s[1] == ']' && ENDSWITH(s[2], '>'));
\r
2325 PUGI__CHECK_ERROR(status_bad_cdata, s);
\r
2327 *s++ = 0; // Zero-terminate this segment.
\r
2330 else // Flagged for discard, but we still have to scan for the terminator.
\r
2332 // Scan for terminating ']]>'.
\r
2333 PUGI__SCANFOR(s[0] == ']' && s[1] == ']' && ENDSWITH(s[2], '>'));
\r
2334 PUGI__CHECK_ERROR(status_bad_cdata, s);
\r
2339 s += (s[1] == '>' ? 2 : 1); // Step over the last ']>'.
\r
2341 else PUGI__THROW_ERROR(status_bad_cdata, s);
\r
2343 else if (s[0] == 'D' && s[1] == 'O' && s[2] == 'C' && s[3] == 'T' && s[4] == 'Y' && s[5] == 'P' && ENDSWITH(s[6], 'E'))
\r
2347 if (cursor->parent) PUGI__THROW_ERROR(status_bad_doctype, s);
\r
2349 char_t* mark = s + 9;
\r
2351 s = parse_doctype_group(s, endch, true);
\r
2354 assert((*s == 0 && endch == '>') || *s == '>');
\r
2357 if (PUGI__OPTSET(parse_doctype))
\r
2359 while (PUGI__IS_CHARTYPE(*mark, ct_space)) ++mark;
\r
2361 PUGI__PUSHNODE(node_doctype);
\r
2363 cursor->value = mark;
\r
2368 else if (*s == 0 && endch == '-') PUGI__THROW_ERROR(status_bad_comment, s);
\r
2369 else if (*s == 0 && endch == '[') PUGI__THROW_ERROR(status_bad_cdata, s);
\r
2370 else PUGI__THROW_ERROR(status_unrecognized_tag, s);
\r
2375 char_t* parse_question(char_t* s, xml_node_struct*& ref_cursor, unsigned int optmsk, char_t endch)
\r
2377 // load into registers
\r
2378 xml_node_struct* cursor = ref_cursor;
\r
2381 // parse node contents, starting with question mark
\r
2385 char_t* target = s;
\r
2387 if (!PUGI__IS_CHARTYPE(*s, ct_start_symbol)) PUGI__THROW_ERROR(status_bad_pi, s);
\r
2389 PUGI__SCANWHILE(PUGI__IS_CHARTYPE(*s, ct_symbol));
\r
2390 PUGI__CHECK_ERROR(status_bad_pi, s);
\r
2392 // determine node type; stricmp / strcasecmp is not portable
\r
2393 bool declaration = (target[0] | ' ') == 'x' && (target[1] | ' ') == 'm' && (target[2] | ' ') == 'l' && target + 3 == s;
\r
2395 if (declaration ? PUGI__OPTSET(parse_declaration) : PUGI__OPTSET(parse_pi))
\r
2399 // disallow non top-level declarations
\r
2400 if (cursor->parent) PUGI__THROW_ERROR(status_bad_pi, s);
\r
2402 PUGI__PUSHNODE(node_declaration);
\r
2406 PUGI__PUSHNODE(node_pi);
\r
2409 cursor->name = target;
\r
2413 // parse value/attributes
\r
2417 if (!ENDSWITH(*s, '>')) PUGI__THROW_ERROR(status_bad_pi, s);
\r
2422 else if (PUGI__IS_CHARTYPE(ch, ct_space))
\r
2426 // scan for tag end
\r
2427 char_t* value = s;
\r
2429 PUGI__SCANFOR(s[0] == '?' && ENDSWITH(s[1], '>'));
\r
2430 PUGI__CHECK_ERROR(status_bad_pi, s);
\r
2434 // replace ending ? with / so that 'element' terminates properly
\r
2437 // we exit from this function with cursor at node_declaration, which is a signal to parse() to go to LOC_ATTRIBUTES
\r
2442 // store value and step over >
\r
2443 cursor->value = value;
\r
2451 else PUGI__THROW_ERROR(status_bad_pi, s);
\r
2455 // scan for tag end
\r
2456 PUGI__SCANFOR(s[0] == '?' && ENDSWITH(s[1], '>'));
\r
2457 PUGI__CHECK_ERROR(status_bad_pi, s);
\r
2459 s += (s[1] == '>' ? 2 : 1);
\r
2462 // store from registers
\r
2463 ref_cursor = cursor;
\r
2468 char_t* parse_tree(char_t* s, xml_node_struct* root, unsigned int optmsk, char_t endch)
\r
2470 strconv_attribute_t strconv_attribute = get_strconv_attribute(optmsk);
\r
2471 strconv_pcdata_t strconv_pcdata = get_strconv_pcdata(optmsk);
\r
2474 xml_node_struct* cursor = root;
\r
2484 if (PUGI__IS_CHARTYPE(*s, ct_start_symbol)) // '<#...'
\r
2486 PUGI__PUSHNODE(node_element); // Append a new node to the tree.
\r
2490 PUGI__SCANWHILE(PUGI__IS_CHARTYPE(*s, ct_symbol)); // Scan for a terminator.
\r
2491 PUGI__ENDSEG(); // Save char in 'ch', terminate & step over.
\r
2497 else if (PUGI__IS_CHARTYPE(ch, ct_space))
\r
2502 PUGI__SKIPWS(); // Eat any whitespace.
\r
2504 if (PUGI__IS_CHARTYPE(*s, ct_start_symbol)) // <... #...
\r
2506 xml_attribute_struct* a = append_attribute_ll(cursor, alloc); // Make space for this attribute.
\r
2507 if (!a) PUGI__THROW_ERROR(status_out_of_memory, s);
\r
2509 a->name = s; // Save the offset.
\r
2511 PUGI__SCANWHILE(PUGI__IS_CHARTYPE(*s, ct_symbol)); // Scan for a terminator.
\r
2512 PUGI__CHECK_ERROR(status_bad_attribute, s); //$ redundant, left for performance
\r
2514 PUGI__ENDSEG(); // Save char in 'ch', terminate & step over.
\r
2515 PUGI__CHECK_ERROR(status_bad_attribute, s); //$ redundant, left for performance
\r
2517 if (PUGI__IS_CHARTYPE(ch, ct_space))
\r
2519 PUGI__SKIPWS(); // Eat any whitespace.
\r
2520 PUGI__CHECK_ERROR(status_bad_attribute, s); //$ redundant, left for performance
\r
2526 if (ch == '=') // '<... #=...'
\r
2528 PUGI__SKIPWS(); // Eat any whitespace.
\r
2530 if (*s == '"' || *s == '\'') // '<... #="...'
\r
2532 ch = *s; // Save quote char to avoid breaking on "''" -or- '""'.
\r
2533 ++s; // Step over the quote.
\r
2534 a->value = s; // Save the offset.
\r
2536 s = strconv_attribute(s, ch);
\r
2538 if (!s) PUGI__THROW_ERROR(status_bad_attribute, a->value);
\r
2540 // After this line the loop continues from the start;
\r
2541 // Whitespaces, / and > are ok, symbols and EOF are wrong,
\r
2542 // everything else will be detected
\r
2543 if (PUGI__IS_CHARTYPE(*s, ct_start_symbol)) PUGI__THROW_ERROR(status_bad_attribute, s);
\r
2545 else PUGI__THROW_ERROR(status_bad_attribute, s);
\r
2547 else PUGI__THROW_ERROR(status_bad_attribute, s);
\r
2549 else if (*s == '/')
\r
2559 else if (*s == 0 && endch == '>')
\r
2564 else PUGI__THROW_ERROR(status_bad_start_element, s);
\r
2566 else if (*s == '>')
\r
2572 else if (*s == 0 && endch == '>')
\r
2576 else PUGI__THROW_ERROR(status_bad_start_element, s);
\r
2581 else if (ch == '/') // '<#.../'
\r
2583 if (!ENDSWITH(*s, '>')) PUGI__THROW_ERROR(status_bad_start_element, s);
\r
2585 PUGI__POPNODE(); // Pop.
\r
2591 // we stepped over null terminator, backtrack & handle closing tag
\r
2594 if (endch != '>') PUGI__THROW_ERROR(status_bad_start_element, s);
\r
2596 else PUGI__THROW_ERROR(status_bad_start_element, s);
\r
2598 else if (*s == '/')
\r
2602 char_t* name = cursor->name;
\r
2603 if (!name) PUGI__THROW_ERROR(status_end_element_mismatch, s);
\r
2605 while (PUGI__IS_CHARTYPE(*s, ct_symbol))
\r
2607 if (*s++ != *name++) PUGI__THROW_ERROR(status_end_element_mismatch, s);
\r
2612 if (*s == 0 && name[0] == endch && name[1] == 0) PUGI__THROW_ERROR(status_bad_end_element, s);
\r
2613 else PUGI__THROW_ERROR(status_end_element_mismatch, s);
\r
2616 PUGI__POPNODE(); // Pop.
\r
2622 if (endch != '>') PUGI__THROW_ERROR(status_bad_end_element, s);
\r
2626 if (*s != '>') PUGI__THROW_ERROR(status_bad_end_element, s);
\r
2630 else if (*s == '?') // '<?...'
\r
2632 s = parse_question(s, cursor, optmsk, endch);
\r
2636 if ((cursor->header & xml_memory_page_type_mask) + 1 == node_declaration) goto LOC_ATTRIBUTES;
\r
2638 else if (*s == '!') // '<!...'
\r
2640 s = parse_exclamation(s, cursor, optmsk, endch);
\r
2643 else if (*s == 0 && endch == '?') PUGI__THROW_ERROR(status_bad_pi, s);
\r
2644 else PUGI__THROW_ERROR(status_unrecognized_tag, s);
\r
2648 mark = s; // Save this offset while searching for a terminator.
\r
2650 PUGI__SKIPWS(); // Eat whitespace if no genuine PCDATA here.
\r
2652 if (*s == '<' || !*s)
\r
2654 // We skipped some whitespace characters because otherwise we would take the tag branch instead of PCDATA one
\r
2655 assert(mark != s);
\r
2657 if (!PUGI__OPTSET(parse_ws_pcdata | parse_ws_pcdata_single) || PUGI__OPTSET(parse_trim_pcdata))
\r
2661 else if (PUGI__OPTSET(parse_ws_pcdata_single))
\r
2663 if (s[0] != '<' || s[1] != '/' || cursor->first_child) continue;
\r
2667 if (!PUGI__OPTSET(parse_trim_pcdata))
\r
2670 if (cursor->parent || PUGI__OPTSET(parse_fragment))
\r
2672 PUGI__PUSHNODE(node_pcdata); // Append a new node on the tree.
\r
2673 cursor->value = s; // Save the offset.
\r
2675 s = strconv_pcdata(s);
\r
2677 PUGI__POPNODE(); // Pop since this is a standalone.
\r
2683 PUGI__SCANFOR(*s == '<'); // '...<'
\r
2689 // We're after '<'
\r
2694 // check that last tag is closed
\r
2695 if (cursor != root) PUGI__THROW_ERROR(status_end_element_mismatch, s);
\r
2700 #ifdef PUGIXML_WCHAR_MODE
\r
2701 static char_t* parse_skip_bom(char_t* s)
\r
2703 unsigned int bom = 0xfeff;
\r
2704 return (s[0] == static_cast<wchar_t>(bom)) ? s + 1 : s;
\r
2707 static char_t* parse_skip_bom(char_t* s)
\r
2709 return (s[0] == '\xef' && s[1] == '\xbb' && s[2] == '\xbf') ? s + 3 : s;
\r
2713 static bool has_element_node_siblings(xml_node_struct* node)
\r
2717 xml_node_type type = static_cast<xml_node_type>((node->header & impl::xml_memory_page_type_mask) + 1);
\r
2718 if (type == node_element) return true;
\r
2720 node = node->next_sibling;
\r
2726 static xml_parse_result parse(char_t* buffer, size_t length, xml_document_struct* xmldoc, xml_node_struct* root, unsigned int optmsk)
\r
2728 // allocator object is a part of document object
\r
2729 xml_allocator& alloc = *static_cast<xml_allocator*>(xmldoc);
\r
2731 // early-out for empty documents
\r
2733 return make_parse_result(PUGI__OPTSET(parse_fragment) ? status_ok : status_no_document_element);
\r
2735 // get last child of the root before parsing
\r
2736 xml_node_struct* last_root_child = root->first_child ? root->first_child->prev_sibling_c : 0;
\r
2738 // create parser on stack
\r
2739 xml_parser parser(alloc);
\r
2741 // save last character and make buffer zero-terminated (speeds up parsing)
\r
2742 char_t endch = buffer[length - 1];
\r
2743 buffer[length - 1] = 0;
\r
2745 // skip BOM to make sure it does not end up as part of parse output
\r
2746 char_t* buffer_data = parse_skip_bom(buffer);
\r
2748 // perform actual parsing
\r
2749 parser.parse_tree(buffer_data, root, optmsk, endch);
\r
2751 // update allocator state
\r
2752 alloc = parser.alloc;
\r
2754 xml_parse_result result = make_parse_result(parser.error_status, parser.error_offset ? parser.error_offset - buffer : 0);
\r
2755 assert(result.offset >= 0 && static_cast<size_t>(result.offset) <= length);
\r
2759 // since we removed last character, we have to handle the only possible false positive (stray <)
\r
2761 return make_parse_result(status_unrecognized_tag, length - 1);
\r
2763 // check if there are any element nodes parsed
\r
2764 xml_node_struct* first_root_child_parsed = last_root_child ? last_root_child->next_sibling : root->first_child;
\r
2766 if (!PUGI__OPTSET(parse_fragment) && !has_element_node_siblings(first_root_child_parsed))
\r
2767 return make_parse_result(status_no_document_element, length - 1);
\r
2771 // roll back offset if it occurs on a null terminator in the source buffer
\r
2772 if (result.offset > 0 && static_cast<size_t>(result.offset) == length - 1 && endch == 0)
\r
2780 // Output facilities
\r
2781 PUGI__FN xml_encoding get_write_native_encoding()
\r
2783 #ifdef PUGIXML_WCHAR_MODE
\r
2784 return get_wchar_encoding();
\r
2786 return encoding_utf8;
\r
2790 PUGI__FN xml_encoding get_write_encoding(xml_encoding encoding)
\r
2792 // replace wchar encoding with utf implementation
\r
2793 if (encoding == encoding_wchar) return get_wchar_encoding();
\r
2795 // replace utf16 encoding with utf16 with specific endianness
\r
2796 if (encoding == encoding_utf16) return is_little_endian() ? encoding_utf16_le : encoding_utf16_be;
\r
2798 // replace utf32 encoding with utf32 with specific endianness
\r
2799 if (encoding == encoding_utf32) return is_little_endian() ? encoding_utf32_le : encoding_utf32_be;
\r
2801 // only do autodetection if no explicit encoding is requested
\r
2802 if (encoding != encoding_auto) return encoding;
\r
2804 // assume utf8 encoding
\r
2805 return encoding_utf8;
\r
2808 #ifdef PUGIXML_WCHAR_MODE
\r
2809 PUGI__FN size_t get_valid_length(const char_t* data, size_t length)
\r
2811 assert(length > 0);
\r
2813 // discard last character if it's the lead of a surrogate pair
\r
2814 return (sizeof(wchar_t) == 2 && static_cast<unsigned int>(static_cast<uint16_t>(data[length - 1]) - 0xD800) < 0x400) ? length - 1 : length;
\r
2817 PUGI__FN size_t convert_buffer_output(char_t* r_char, uint8_t* r_u8, uint16_t* r_u16, uint32_t* r_u32, const char_t* data, size_t length, xml_encoding encoding)
\r
2819 // only endian-swapping is required
\r
2820 if (need_endian_swap_utf(encoding, get_wchar_encoding()))
\r
2822 convert_wchar_endian_swap(r_char, data, length);
\r
2824 return length * sizeof(char_t);
\r
2827 // convert to utf8
\r
2828 if (encoding == encoding_utf8)
\r
2830 uint8_t* dest = r_u8;
\r
2831 uint8_t* end = utf_decoder<utf8_writer>::decode_wchar_block(data, length, dest);
\r
2833 return static_cast<size_t>(end - dest);
\r
2836 // convert to utf16
\r
2837 if (encoding == encoding_utf16_be || encoding == encoding_utf16_le)
\r
2839 uint16_t* dest = r_u16;
\r
2841 // convert to native utf16
\r
2842 uint16_t* end = utf_decoder<utf16_writer>::decode_wchar_block(data, length, dest);
\r
2844 // swap if necessary
\r
2845 xml_encoding native_encoding = is_little_endian() ? encoding_utf16_le : encoding_utf16_be;
\r
2847 if (native_encoding != encoding) convert_utf_endian_swap(dest, dest, static_cast<size_t>(end - dest));
\r
2849 return static_cast<size_t>(end - dest) * sizeof(uint16_t);
\r
2852 // convert to utf32
\r
2853 if (encoding == encoding_utf32_be || encoding == encoding_utf32_le)
\r
2855 uint32_t* dest = r_u32;
\r
2857 // convert to native utf32
\r
2858 uint32_t* end = utf_decoder<utf32_writer>::decode_wchar_block(data, length, dest);
\r
2860 // swap if necessary
\r
2861 xml_encoding native_encoding = is_little_endian() ? encoding_utf32_le : encoding_utf32_be;
\r
2863 if (native_encoding != encoding) convert_utf_endian_swap(dest, dest, static_cast<size_t>(end - dest));
\r
2865 return static_cast<size_t>(end - dest) * sizeof(uint32_t);
\r
2868 // convert to latin1
\r
2869 if (encoding == encoding_latin1)
\r
2871 uint8_t* dest = r_u8;
\r
2872 uint8_t* end = utf_decoder<latin1_writer>::decode_wchar_block(data, length, dest);
\r
2874 return static_cast<size_t>(end - dest);
\r
2877 assert(!"Invalid encoding");
\r
2881 PUGI__FN size_t get_valid_length(const char_t* data, size_t length)
\r
2883 assert(length > 4);
\r
2885 for (size_t i = 1; i <= 4; ++i)
\r
2887 uint8_t ch = static_cast<uint8_t>(data[length - i]);
\r
2889 // either a standalone character or a leading one
\r
2890 if ((ch & 0xc0) != 0x80) return length - i;
\r
2893 // there are four non-leading characters at the end, sequence tail is broken so might as well process the whole chunk
\r
2897 PUGI__FN size_t convert_buffer_output(char_t* /* r_char */, uint8_t* r_u8, uint16_t* r_u16, uint32_t* r_u32, const char_t* data, size_t length, xml_encoding encoding)
\r
2899 if (encoding == encoding_utf16_be || encoding == encoding_utf16_le)
\r
2901 uint16_t* dest = r_u16;
\r
2903 // convert to native utf16
\r
2904 uint16_t* end = utf_decoder<utf16_writer>::decode_utf8_block(reinterpret_cast<const uint8_t*>(data), length, dest);
\r
2906 // swap if necessary
\r
2907 xml_encoding native_encoding = is_little_endian() ? encoding_utf16_le : encoding_utf16_be;
\r
2909 if (native_encoding != encoding) convert_utf_endian_swap(dest, dest, static_cast<size_t>(end - dest));
\r
2911 return static_cast<size_t>(end - dest) * sizeof(uint16_t);
\r
2914 if (encoding == encoding_utf32_be || encoding == encoding_utf32_le)
\r
2916 uint32_t* dest = r_u32;
\r
2918 // convert to native utf32
\r
2919 uint32_t* end = utf_decoder<utf32_writer>::decode_utf8_block(reinterpret_cast<const uint8_t*>(data), length, dest);
\r
2921 // swap if necessary
\r
2922 xml_encoding native_encoding = is_little_endian() ? encoding_utf32_le : encoding_utf32_be;
\r
2924 if (native_encoding != encoding) convert_utf_endian_swap(dest, dest, static_cast<size_t>(end - dest));
\r
2926 return static_cast<size_t>(end - dest) * sizeof(uint32_t);
\r
2929 if (encoding == encoding_latin1)
\r
2931 uint8_t* dest = r_u8;
\r
2932 uint8_t* end = utf_decoder<latin1_writer>::decode_utf8_block(reinterpret_cast<const uint8_t*>(data), length, dest);
\r
2934 return static_cast<size_t>(end - dest);
\r
2937 assert(!"Invalid encoding");
\r
2942 class xml_buffered_writer
\r
2944 xml_buffered_writer(const xml_buffered_writer&);
\r
2945 xml_buffered_writer& operator=(const xml_buffered_writer&);
\r
2948 xml_buffered_writer(xml_writer& writer_, xml_encoding user_encoding): writer(writer_), bufsize(0), encoding(get_write_encoding(user_encoding))
\r
2950 PUGI__STATIC_ASSERT(bufcapacity >= 8);
\r
2953 ~xml_buffered_writer()
\r
2960 flush(buffer, bufsize);
\r
2964 void flush(const char_t* data, size_t size)
\r
2966 if (size == 0) return;
\r
2968 // fast path, just write data
\r
2969 if (encoding == get_write_native_encoding())
\r
2970 writer.write(data, size * sizeof(char_t));
\r
2974 size_t result = convert_buffer_output(scratch.data_char, scratch.data_u8, scratch.data_u16, scratch.data_u32, data, size, encoding);
\r
2975 assert(result <= sizeof(scratch));
\r
2978 writer.write(scratch.data_u8, result);
\r
2982 void write(const char_t* data, size_t length)
\r
2984 if (bufsize + length > bufcapacity)
\r
2986 // flush the remaining buffer contents
\r
2989 // handle large chunks
\r
2990 if (length > bufcapacity)
\r
2992 if (encoding == get_write_native_encoding())
\r
2994 // fast path, can just write data chunk
\r
2995 writer.write(data, length * sizeof(char_t));
\r
2999 // need to convert in suitable chunks
\r
3000 while (length > bufcapacity)
\r
3002 // get chunk size by selecting such number of characters that are guaranteed to fit into scratch buffer
\r
3003 // and form a complete codepoint sequence (i.e. discard start of last codepoint if necessary)
\r
3004 size_t chunk_size = get_valid_length(data, bufcapacity);
\r
3006 // convert chunk and write
\r
3007 flush(data, chunk_size);
\r
3010 data += chunk_size;
\r
3011 length -= chunk_size;
\r
3014 // small tail is copied below
\r
3019 memcpy(buffer + bufsize, data, length * sizeof(char_t));
\r
3020 bufsize += length;
\r
3023 void write(const char_t* data)
\r
3025 write(data, strlength(data));
\r
3028 void write(char_t d0)
\r
3030 if (bufsize + 1 > bufcapacity) flush();
\r
3032 buffer[bufsize + 0] = d0;
\r
3036 void write(char_t d0, char_t d1)
\r
3038 if (bufsize + 2 > bufcapacity) flush();
\r
3040 buffer[bufsize + 0] = d0;
\r
3041 buffer[bufsize + 1] = d1;
\r
3045 void write(char_t d0, char_t d1, char_t d2)
\r
3047 if (bufsize + 3 > bufcapacity) flush();
\r
3049 buffer[bufsize + 0] = d0;
\r
3050 buffer[bufsize + 1] = d1;
\r
3051 buffer[bufsize + 2] = d2;
\r
3055 void write(char_t d0, char_t d1, char_t d2, char_t d3)
\r
3057 if (bufsize + 4 > bufcapacity) flush();
\r
3059 buffer[bufsize + 0] = d0;
\r
3060 buffer[bufsize + 1] = d1;
\r
3061 buffer[bufsize + 2] = d2;
\r
3062 buffer[bufsize + 3] = d3;
\r
3066 void write(char_t d0, char_t d1, char_t d2, char_t d3, char_t d4)
\r
3068 if (bufsize + 5 > bufcapacity) flush();
\r
3070 buffer[bufsize + 0] = d0;
\r
3071 buffer[bufsize + 1] = d1;
\r
3072 buffer[bufsize + 2] = d2;
\r
3073 buffer[bufsize + 3] = d3;
\r
3074 buffer[bufsize + 4] = d4;
\r
3078 void write(char_t d0, char_t d1, char_t d2, char_t d3, char_t d4, char_t d5)
\r
3080 if (bufsize + 6 > bufcapacity) flush();
\r
3082 buffer[bufsize + 0] = d0;
\r
3083 buffer[bufsize + 1] = d1;
\r
3084 buffer[bufsize + 2] = d2;
\r
3085 buffer[bufsize + 3] = d3;
\r
3086 buffer[bufsize + 4] = d4;
\r
3087 buffer[bufsize + 5] = d5;
\r
3091 // utf8 maximum expansion: x4 (-> utf32)
\r
3092 // utf16 maximum expansion: x2 (-> utf32)
\r
3093 // utf32 maximum expansion: x1
\r
3096 bufcapacitybytes =
\r
3097 #ifdef PUGIXML_MEMORY_OUTPUT_STACK
\r
3098 PUGIXML_MEMORY_OUTPUT_STACK
\r
3103 bufcapacity = bufcapacitybytes / (sizeof(char_t) + 4)
\r
3106 char_t buffer[bufcapacity];
\r
3110 uint8_t data_u8[4 * bufcapacity];
\r
3111 uint16_t data_u16[2 * bufcapacity];
\r
3112 uint32_t data_u32[bufcapacity];
\r
3113 char_t data_char[bufcapacity];
\r
3116 xml_writer& writer;
\r
3118 xml_encoding encoding;
\r
3121 PUGI__FN void text_output_escaped(xml_buffered_writer& writer, const char_t* s, chartypex_t type)
\r
3125 const char_t* prev = s;
\r
3127 // While *s is a usual symbol
\r
3128 while (!PUGI__IS_CHARTYPEX(*s, type)) ++s;
\r
3130 writer.write(prev, static_cast<size_t>(s - prev));
\r
3136 writer.write('&', 'a', 'm', 'p', ';');
\r
3140 writer.write('&', 'l', 't', ';');
\r
3144 writer.write('&', 'g', 't', ';');
\r
3148 writer.write('&', 'q', 'u', 'o', 't', ';');
\r
3151 default: // s is not a usual symbol
\r
3153 unsigned int ch = static_cast<unsigned int>(*s++);
\r
3156 writer.write('&', '#', static_cast<char_t>((ch / 10) + '0'), static_cast<char_t>((ch % 10) + '0'), ';');
\r
3162 PUGI__FN void text_output(xml_buffered_writer& writer, const char_t* s, chartypex_t type, unsigned int flags)
\r
3164 if (flags & format_no_escapes)
\r
3167 text_output_escaped(writer, s, type);
\r
3170 PUGI__FN void text_output_cdata(xml_buffered_writer& writer, const char_t* s)
\r
3174 writer.write('<', '!', '[', 'C', 'D');
\r
3175 writer.write('A', 'T', 'A', '[');
\r
3177 const char_t* prev = s;
\r
3179 // look for ]]> sequence - we can't output it as is since it terminates CDATA
\r
3180 while (*s && !(s[0] == ']' && s[1] == ']' && s[2] == '>')) ++s;
\r
3182 // skip ]] if we stopped at ]]>, > will go to the next CDATA section
\r
3185 writer.write(prev, static_cast<size_t>(s - prev));
\r
3187 writer.write(']', ']', '>');
\r
3192 PUGI__FN void node_output_attributes(xml_buffered_writer& writer, const xml_node& node, unsigned int flags)
\r
3194 const char_t* default_name = PUGIXML_TEXT(":anonymous");
\r
3196 for (xml_attribute a = node.first_attribute(); a; a = a.next_attribute())
\r
3198 writer.write(' ');
\r
3199 writer.write(a.name()[0] ? a.name() : default_name);
\r
3200 writer.write('=', '"');
\r
3202 text_output(writer, a.value(), ctx_special_attr, flags);
\r
3204 writer.write('"');
\r
3208 PUGI__FN void node_output(xml_buffered_writer& writer, const xml_node& node, const char_t* indent, unsigned int flags, unsigned int depth)
\r
3210 const char_t* default_name = PUGIXML_TEXT(":anonymous");
\r
3212 if ((flags & format_indent) != 0 && (flags & format_raw) == 0)
\r
3213 for (unsigned int i = 0; i < depth; ++i) writer.write(indent);
\r
3215 switch (node.type())
\r
3217 case node_document:
\r
3219 for (xml_node n = node.first_child(); n; n = n.next_sibling())
\r
3220 node_output(writer, n, indent, flags, depth);
\r
3224 case node_element:
\r
3226 const char_t* name = node.name()[0] ? node.name() : default_name;
\r
3228 writer.write('<');
\r
3229 writer.write(name);
\r
3231 node_output_attributes(writer, node, flags);
\r
3233 if (flags & format_raw)
\r
3235 if (!node.first_child())
\r
3236 writer.write(' ', '/', '>');
\r
3239 writer.write('>');
\r
3241 for (xml_node n = node.first_child(); n; n = n.next_sibling())
\r
3242 node_output(writer, n, indent, flags, depth + 1);
\r
3244 writer.write('<', '/');
\r
3245 writer.write(name);
\r
3246 writer.write('>');
\r
3249 else if (!node.first_child())
\r
3250 writer.write(' ', '/', '>', '\n');
\r
3251 else if (node.first_child() == node.last_child() && (node.first_child().type() == node_pcdata || node.first_child().type() == node_cdata))
\r
3253 writer.write('>');
\r
3255 if (node.first_child().type() == node_pcdata)
\r
3256 text_output(writer, node.first_child().value(), ctx_special_pcdata, flags);
\r
3258 text_output_cdata(writer, node.first_child().value());
\r
3260 writer.write('<', '/');
\r
3261 writer.write(name);
\r
3262 writer.write('>', '\n');
\r
3266 writer.write('>', '\n');
\r
3268 for (xml_node n = node.first_child(); n; n = n.next_sibling())
\r
3269 node_output(writer, n, indent, flags, depth + 1);
\r
3271 if ((flags & format_indent) != 0 && (flags & format_raw) == 0)
\r
3272 for (unsigned int i = 0; i < depth; ++i) writer.write(indent);
\r
3274 writer.write('<', '/');
\r
3275 writer.write(name);
\r
3276 writer.write('>', '\n');
\r
3283 text_output(writer, node.value(), ctx_special_pcdata, flags);
\r
3284 if ((flags & format_raw) == 0) writer.write('\n');
\r
3288 text_output_cdata(writer, node.value());
\r
3289 if ((flags & format_raw) == 0) writer.write('\n');
\r
3292 case node_comment:
\r
3293 writer.write('<', '!', '-', '-');
\r
3294 writer.write(node.value());
\r
3295 writer.write('-', '-', '>');
\r
3296 if ((flags & format_raw) == 0) writer.write('\n');
\r
3300 case node_declaration:
\r
3301 writer.write('<', '?');
\r
3302 writer.write(node.name()[0] ? node.name() : default_name);
\r
3304 if (node.type() == node_declaration)
\r
3306 node_output_attributes(writer, node, flags);
\r
3308 else if (node.value()[0])
\r
3310 writer.write(' ');
\r
3311 writer.write(node.value());
\r
3314 writer.write('?', '>');
\r
3315 if ((flags & format_raw) == 0) writer.write('\n');
\r
3318 case node_doctype:
\r
3319 writer.write('<', '!', 'D', 'O', 'C');
\r
3320 writer.write('T', 'Y', 'P', 'E');
\r
3322 if (node.value()[0])
\r
3324 writer.write(' ');
\r
3325 writer.write(node.value());
\r
3328 writer.write('>');
\r
3329 if ((flags & format_raw) == 0) writer.write('\n');
\r
3333 assert(!"Invalid node type");
\r
3337 inline bool has_declaration(const xml_node& node)
\r
3339 for (xml_node child = node.first_child(); child; child = child.next_sibling())
\r
3341 xml_node_type type = child.type();
\r
3343 if (type == node_declaration) return true;
\r
3344 if (type == node_element) return false;
\r
3350 inline bool allow_insert_child(xml_node_type parent, xml_node_type child)
\r
3352 if (parent != node_document && parent != node_element) return false;
\r
3353 if (child == node_document || child == node_null) return false;
\r
3354 if (parent != node_document && (child == node_declaration || child == node_doctype)) return false;
\r
3359 PUGI__FN void recursive_copy_skip(xml_node& dest, const xml_node& source, const xml_node& skip)
\r
3361 assert(dest.type() == source.type());
\r
3363 switch (source.type())
\r
3365 case node_element:
\r
3367 dest.set_name(source.name());
\r
3369 for (xml_attribute a = source.first_attribute(); a; a = a.next_attribute())
\r
3370 dest.append_attribute(a.name()).set_value(a.value());
\r
3372 for (xml_node c = source.first_child(); c; c = c.next_sibling())
\r
3374 if (c == skip) continue;
\r
3376 xml_node cc = dest.append_child(c.type());
\r
3379 recursive_copy_skip(cc, c, skip);
\r
3387 case node_comment:
\r
3388 case node_doctype:
\r
3389 dest.set_value(source.value());
\r
3393 dest.set_name(source.name());
\r
3394 dest.set_value(source.value());
\r
3397 case node_declaration:
\r
3399 dest.set_name(source.name());
\r
3401 for (xml_attribute a = source.first_attribute(); a; a = a.next_attribute())
\r
3402 dest.append_attribute(a.name()).set_value(a.value());
\r
3408 assert(!"Invalid node type");
\r
3412 inline bool is_text_node(xml_node_struct* node)
\r
3414 xml_node_type type = static_cast<xml_node_type>((node->header & impl::xml_memory_page_type_mask) + 1);
\r
3416 return type == node_pcdata || type == node_cdata;
\r
3419 // get value with conversion functions
\r
3420 PUGI__FN int get_integer_base(const char_t* value)
\r
3422 const char_t* s = value;
\r
3424 while (PUGI__IS_CHARTYPE(*s, ct_space))
\r
3430 return (s[0] == '0' && (s[1] == 'x' || s[1] == 'X')) ? 16 : 10;
\r
3433 PUGI__FN int get_value_int(const char_t* value, int def)
\r
3435 if (!value) return def;
\r
3437 int base = get_integer_base(value);
\r
3439 #ifdef PUGIXML_WCHAR_MODE
\r
3440 return static_cast<int>(wcstol(value, 0, base));
\r
3442 return static_cast<int>(strtol(value, 0, base));
\r
3446 PUGI__FN unsigned int get_value_uint(const char_t* value, unsigned int def)
\r
3448 if (!value) return def;
\r
3450 int base = get_integer_base(value);
\r
3452 #ifdef PUGIXML_WCHAR_MODE
\r
3453 return static_cast<unsigned int>(wcstoul(value, 0, base));
\r
3455 return static_cast<unsigned int>(strtoul(value, 0, base));
\r
3459 PUGI__FN double get_value_double(const char_t* value, double def)
\r
3461 if (!value) return def;
\r
3463 #ifdef PUGIXML_WCHAR_MODE
\r
3464 return wcstod(value, 0);
\r
3466 return strtod(value, 0);
\r
3470 PUGI__FN float get_value_float(const char_t* value, float def)
\r
3472 if (!value) return def;
\r
3474 #ifdef PUGIXML_WCHAR_MODE
\r
3475 return static_cast<float>(wcstod(value, 0));
\r
3477 return static_cast<float>(strtod(value, 0));
\r
3481 PUGI__FN bool get_value_bool(const char_t* value, bool def)
\r
3483 if (!value) return def;
\r
3485 // only look at first char
\r
3486 char_t first = *value;
\r
3488 // 1*, t* (true), T* (True), y* (yes), Y* (YES)
\r
3489 return (first == '1' || first == 't' || first == 'T' || first == 'y' || first == 'Y');
\r
3492 #ifdef PUGIXML_HAS_LONG_LONG
\r
3493 PUGI__FN long long get_value_llong(const char_t* value, long long def)
\r
3495 if (!value) return def;
\r
3497 int base = get_integer_base(value);
\r
3499 #ifdef PUGIXML_WCHAR_MODE
\r
3500 #ifdef PUGI__MSVC_CRT_VERSION
\r
3501 return _wcstoi64(value, 0, base);
\r
3503 return wcstoll(value, 0, base);
\r
3506 #ifdef PUGI__MSVC_CRT_VERSION
\r
3507 return _strtoi64(value, 0, base);
\r
3509 return strtoll(value, 0, base);
\r
3514 PUGI__FN unsigned long long get_value_ullong(const char_t* value, unsigned long long def)
\r
3516 if (!value) return def;
\r
3518 int base = get_integer_base(value);
\r
3520 #ifdef PUGIXML_WCHAR_MODE
\r
3521 #ifdef PUGI__MSVC_CRT_VERSION
\r
3522 return _wcstoui64(value, 0, base);
\r
3524 return wcstoull(value, 0, base);
\r
3527 #ifdef PUGI__MSVC_CRT_VERSION
\r
3528 return _strtoui64(value, 0, base);
\r
3530 return strtoull(value, 0, base);
\r
3536 // set value with conversion functions
\r
3537 PUGI__FN bool set_value_buffer(char_t*& dest, uintptr_t& header, uintptr_t header_mask, char (&buf)[128])
\r
3539 #ifdef PUGIXML_WCHAR_MODE
\r
3541 impl::widen_ascii(wbuf, buf);
\r
3543 return strcpy_insitu(dest, header, header_mask, wbuf);
\r
3545 return strcpy_insitu(dest, header, header_mask, buf);
\r
3549 PUGI__FN bool set_value_convert(char_t*& dest, uintptr_t& header, uintptr_t header_mask, int value)
\r
3552 sprintf(buf, "%d", value);
\r
3554 return set_value_buffer(dest, header, header_mask, buf);
\r
3557 PUGI__FN bool set_value_convert(char_t*& dest, uintptr_t& header, uintptr_t header_mask, unsigned int value)
\r
3560 sprintf(buf, "%u", value);
\r
3562 return set_value_buffer(dest, header, header_mask, buf);
\r
3565 PUGI__FN bool set_value_convert(char_t*& dest, uintptr_t& header, uintptr_t header_mask, double value)
\r
3568 sprintf(buf, "%g", value);
\r
3570 return set_value_buffer(dest, header, header_mask, buf);
\r
3573 PUGI__FN bool set_value_convert(char_t*& dest, uintptr_t& header, uintptr_t header_mask, bool value)
\r
3575 return strcpy_insitu(dest, header, header_mask, value ? PUGIXML_TEXT("true") : PUGIXML_TEXT("false"));
\r
3578 #ifdef PUGIXML_HAS_LONG_LONG
\r
3579 PUGI__FN bool set_value_convert(char_t*& dest, uintptr_t& header, uintptr_t header_mask, long long value)
\r
3582 sprintf(buf, "%lld", value);
\r
3584 return set_value_buffer(dest, header, header_mask, buf);
\r
3587 PUGI__FN bool set_value_convert(char_t*& dest, uintptr_t& header, uintptr_t header_mask, unsigned long long value)
\r
3590 sprintf(buf, "%llu", value);
\r
3592 return set_value_buffer(dest, header, header_mask, buf);
\r
3596 // we need to get length of entire file to load it in memory; the only (relatively) sane way to do it is via seek/tell trick
\r
3597 PUGI__FN xml_parse_status get_file_size(FILE* file, size_t& out_result)
\r
3599 #if defined(PUGI__MSVC_CRT_VERSION) && PUGI__MSVC_CRT_VERSION >= 1400 && !defined(_WIN32_WCE)
\r
3600 // there are 64-bit versions of fseek/ftell, let's use them
\r
3601 typedef __int64 length_type;
\r
3603 _fseeki64(file, 0, SEEK_END);
\r
3604 length_type length = _ftelli64(file);
\r
3605 _fseeki64(file, 0, SEEK_SET);
\r
3606 #elif defined(__MINGW32__) && !defined(__NO_MINGW_LFS) && !defined(__STRICT_ANSI__)
\r
3607 // there are 64-bit versions of fseek/ftell, let's use them
\r
3608 typedef off64_t length_type;
\r
3610 fseeko64(file, 0, SEEK_END);
\r
3611 length_type length = ftello64(file);
\r
3612 fseeko64(file, 0, SEEK_SET);
\r
3614 // if this is a 32-bit OS, long is enough; if this is a unix system, long is 64-bit, which is enough; otherwise we can't do anything anyway.
\r
3615 typedef long length_type;
\r
3617 fseek(file, 0, SEEK_END);
\r
3618 length_type length = ftell(file);
\r
3619 fseek(file, 0, SEEK_SET);
\r
3622 // check for I/O errors
\r
3623 if (length < 0) return status_io_error;
\r
3625 // check for overflow
\r
3626 size_t result = static_cast<size_t>(length);
\r
3628 if (static_cast<length_type>(result) != length) return status_out_of_memory;
\r
3631 out_result = result;
\r
3636 PUGI__FN size_t zero_terminate_buffer(void* buffer, size_t size, xml_encoding encoding)
\r
3638 // We only need to zero-terminate if encoding conversion does not do it for us
\r
3639 #ifdef PUGIXML_WCHAR_MODE
\r
3640 xml_encoding wchar_encoding = get_wchar_encoding();
\r
3642 if (encoding == wchar_encoding || need_endian_swap_utf(encoding, wchar_encoding))
\r
3644 size_t length = size / sizeof(char_t);
\r
3646 static_cast<char_t*>(buffer)[length] = 0;
\r
3647 return (length + 1) * sizeof(char_t);
\r
3650 if (encoding == encoding_utf8)
\r
3652 static_cast<char*>(buffer)[size] = 0;
\r
3660 PUGI__FN xml_parse_result load_file_impl(xml_document& doc, FILE* file, unsigned int options, xml_encoding encoding)
\r
3662 if (!file) return make_parse_result(status_file_not_found);
\r
3664 // get file size (can result in I/O errors)
\r
3666 xml_parse_status size_status = get_file_size(file, size);
\r
3668 if (size_status != status_ok)
\r
3671 return make_parse_result(size_status);
\r
3674 size_t max_suffix_size = sizeof(char_t);
\r
3676 // allocate buffer for the whole file
\r
3677 char* contents = static_cast<char*>(xml_memory::allocate(size + max_suffix_size));
\r
3682 return make_parse_result(status_out_of_memory);
\r
3685 // read file in memory
\r
3686 size_t read_size = fread(contents, 1, size, file);
\r
3689 if (read_size != size)
\r
3691 xml_memory::deallocate(contents);
\r
3692 return make_parse_result(status_io_error);
\r
3695 xml_encoding real_encoding = get_buffer_encoding(encoding, contents, size);
\r
3697 return doc.load_buffer_inplace_own(contents, zero_terminate_buffer(contents, size, real_encoding), options, real_encoding);
\r
3700 #ifndef PUGIXML_NO_STL
\r
3701 template <typename T> struct xml_stream_chunk
\r
3703 static xml_stream_chunk* create()
\r
3705 void* memory = xml_memory::allocate(sizeof(xml_stream_chunk));
\r
3707 return new (memory) xml_stream_chunk();
\r
3710 static void destroy(void* ptr)
\r
3712 xml_stream_chunk* chunk = static_cast<xml_stream_chunk*>(ptr);
\r
3714 // free chunk chain
\r
3717 xml_stream_chunk* next = chunk->next;
\r
3718 xml_memory::deallocate(chunk);
\r
3723 xml_stream_chunk(): next(0), size(0)
\r
3727 xml_stream_chunk* next;
\r
3730 T data[xml_memory_page_size / sizeof(T)];
\r
3733 template <typename T> PUGI__FN xml_parse_status load_stream_data_noseek(std::basic_istream<T>& stream, void** out_buffer, size_t* out_size)
\r
3735 buffer_holder chunks(0, xml_stream_chunk<T>::destroy);
\r
3737 // read file to a chunk list
\r
3739 xml_stream_chunk<T>* last = 0;
\r
3741 while (!stream.eof())
\r
3743 // allocate new chunk
\r
3744 xml_stream_chunk<T>* chunk = xml_stream_chunk<T>::create();
\r
3745 if (!chunk) return status_out_of_memory;
\r
3747 // append chunk to list
\r
3748 if (last) last = last->next = chunk;
\r
3749 else chunks.data = last = chunk;
\r
3751 // read data to chunk
\r
3752 stream.read(chunk->data, static_cast<std::streamsize>(sizeof(chunk->data) / sizeof(T)));
\r
3753 chunk->size = static_cast<size_t>(stream.gcount()) * sizeof(T);
\r
3755 // read may set failbit | eofbit in case gcount() is less than read length, so check for other I/O errors
\r
3756 if (stream.bad() || (!stream.eof() && stream.fail())) return status_io_error;
\r
3758 // guard against huge files (chunk size is small enough to make this overflow check work)
\r
3759 if (total + chunk->size < total) return status_out_of_memory;
\r
3760 total += chunk->size;
\r
3763 size_t max_suffix_size = sizeof(char_t);
\r
3765 // copy chunk list to a contiguous buffer
\r
3766 char* buffer = static_cast<char*>(xml_memory::allocate(total + max_suffix_size));
\r
3767 if (!buffer) return status_out_of_memory;
\r
3769 char* write = buffer;
\r
3771 for (xml_stream_chunk<T>* chunk = static_cast<xml_stream_chunk<T>*>(chunks.data); chunk; chunk = chunk->next)
\r
3773 assert(write + chunk->size <= buffer + total);
\r
3774 memcpy(write, chunk->data, chunk->size);
\r
3775 write += chunk->size;
\r
3778 assert(write == buffer + total);
\r
3781 *out_buffer = buffer;
\r
3782 *out_size = total;
\r
3787 template <typename T> PUGI__FN xml_parse_status load_stream_data_seek(std::basic_istream<T>& stream, void** out_buffer, size_t* out_size)
\r
3789 // get length of remaining data in stream
\r
3790 typename std::basic_istream<T>::pos_type pos = stream.tellg();
\r
3791 stream.seekg(0, std::ios::end);
\r
3792 std::streamoff length = stream.tellg() - pos;
\r
3793 stream.seekg(pos);
\r
3795 if (stream.fail() || pos < 0) return status_io_error;
\r
3797 // guard against huge files
\r
3798 size_t read_length = static_cast<size_t>(length);
\r
3800 if (static_cast<std::streamsize>(read_length) != length || length < 0) return status_out_of_memory;
\r
3802 size_t max_suffix_size = sizeof(char_t);
\r
3804 // read stream data into memory (guard against stream exceptions with buffer holder)
\r
3805 buffer_holder buffer(xml_memory::allocate(read_length * sizeof(T) + max_suffix_size), xml_memory::deallocate);
\r
3806 if (!buffer.data) return status_out_of_memory;
\r
3808 stream.read(static_cast<T*>(buffer.data), static_cast<std::streamsize>(read_length));
\r
3810 // read may set failbit | eofbit in case gcount() is less than read_length (i.e. line ending conversion), so check for other I/O errors
\r
3811 if (stream.bad() || (!stream.eof() && stream.fail())) return status_io_error;
\r
3814 size_t actual_length = static_cast<size_t>(stream.gcount());
\r
3815 assert(actual_length <= read_length);
\r
3817 *out_buffer = buffer.release();
\r
3818 *out_size = actual_length * sizeof(T);
\r
3823 template <typename T> PUGI__FN xml_parse_result load_stream_impl(xml_document& doc, std::basic_istream<T>& stream, unsigned int options, xml_encoding encoding)
\r
3827 xml_parse_status status = status_ok;
\r
3829 // if stream has an error bit set, bail out (otherwise tellg() can fail and we'll clear error bits)
\r
3830 if (stream.fail()) return make_parse_result(status_io_error);
\r
3832 // load stream to memory (using seek-based implementation if possible, since it's faster and takes less memory)
\r
3833 if (stream.tellg() < 0)
\r
3835 stream.clear(); // clear error flags that could be set by a failing tellg
\r
3836 status = load_stream_data_noseek(stream, &buffer, &size);
\r
3839 status = load_stream_data_seek(stream, &buffer, &size);
\r
3841 if (status != status_ok) return make_parse_result(status);
\r
3843 xml_encoding real_encoding = get_buffer_encoding(encoding, buffer, size);
\r
3845 return doc.load_buffer_inplace_own(buffer, zero_terminate_buffer(buffer, size, real_encoding), options, real_encoding);
\r
3849 #if defined(PUGI__MSVC_CRT_VERSION) || defined(__BORLANDC__) || (defined(__MINGW32__) && !defined(__STRICT_ANSI__))
\r
3850 PUGI__FN FILE* open_file_wide(const wchar_t* path, const wchar_t* mode)
\r
3852 return _wfopen(path, mode);
\r
3855 PUGI__FN char* convert_path_heap(const wchar_t* str)
\r
3859 // first pass: get length in utf8 characters
\r
3860 size_t length = strlength_wide(str);
\r
3861 size_t size = as_utf8_begin(str, length);
\r
3863 // allocate resulting string
\r
3864 char* result = static_cast<char*>(xml_memory::allocate(size + 1));
\r
3865 if (!result) return 0;
\r
3867 // second pass: convert to utf8
\r
3868 as_utf8_end(result, size, str, length);
\r
3873 PUGI__FN FILE* open_file_wide(const wchar_t* path, const wchar_t* mode)
\r
3875 // there is no standard function to open wide paths, so our best bet is to try utf8 path
\r
3876 char* path_utf8 = convert_path_heap(path);
\r
3877 if (!path_utf8) return 0;
\r
3879 // convert mode to ASCII (we mirror _wfopen interface)
\r
3880 char mode_ascii[4] = {0};
\r
3881 for (size_t i = 0; mode[i]; ++i) mode_ascii[i] = static_cast<char>(mode[i]);
\r
3883 // try to open the utf8 path
\r
3884 FILE* result = fopen(path_utf8, mode_ascii);
\r
3886 // free dummy buffer
\r
3887 xml_memory::deallocate(path_utf8);
\r
3893 PUGI__FN bool save_file_impl(const xml_document& doc, FILE* file, const char_t* indent, unsigned int flags, xml_encoding encoding)
\r
3895 if (!file) return false;
\r
3897 xml_writer_file writer(file);
\r
3898 doc.save(writer, indent, flags, encoding);
\r
3900 int result = ferror(file);
\r
3904 return result == 0;
\r
3907 PUGI__FN xml_parse_result load_buffer_impl(xml_document_struct* doc, xml_node_struct* root, void* contents, size_t size, unsigned int options, xml_encoding encoding, bool is_mutable, bool own, char_t** out_buffer)
\r
3909 // check input buffer
\r
3910 assert(contents || size == 0);
\r
3912 // get actual encoding
\r
3913 xml_encoding buffer_encoding = impl::get_buffer_encoding(encoding, contents, size);
\r
3915 // get private buffer
\r
3916 char_t* buffer = 0;
\r
3917 size_t length = 0;
\r
3919 if (!impl::convert_buffer(buffer, length, buffer_encoding, contents, size, is_mutable)) return impl::make_parse_result(status_out_of_memory);
\r
3921 // delete original buffer if we performed a conversion
\r
3922 if (own && buffer != contents && contents) impl::xml_memory::deallocate(contents);
\r
3924 // store buffer for offset_debug
\r
3925 doc->buffer = buffer;
\r
3928 xml_parse_result res = impl::xml_parser::parse(buffer, length, doc, root, options);
\r
3930 // remember encoding
\r
3931 res.encoding = buffer_encoding;
\r
3933 // grab onto buffer if it's our buffer, user is responsible for deallocating contents himself
\r
3934 if (own || buffer != contents) *out_buffer = buffer;
\r
3942 PUGI__FN xml_writer_file::xml_writer_file(void* file_): file(file_)
\r
3946 PUGI__FN void xml_writer_file::write(const void* data, size_t size)
\r
3948 size_t result = fwrite(data, 1, size, static_cast<FILE*>(file));
\r
3949 (void)!result; // unfortunately we can't do proper error handling here
\r
3952 #ifndef PUGIXML_NO_STL
\r
3953 PUGI__FN xml_writer_stream::xml_writer_stream(std::basic_ostream<char, std::char_traits<char> >& stream): narrow_stream(&stream), wide_stream(0)
\r
3957 PUGI__FN xml_writer_stream::xml_writer_stream(std::basic_ostream<wchar_t, std::char_traits<wchar_t> >& stream): narrow_stream(0), wide_stream(&stream)
\r
3961 PUGI__FN void xml_writer_stream::write(const void* data, size_t size)
\r
3963 if (narrow_stream)
\r
3965 assert(!wide_stream);
\r
3966 narrow_stream->write(reinterpret_cast<const char*>(data), static_cast<std::streamsize>(size));
\r
3970 assert(wide_stream);
\r
3971 assert(size % sizeof(wchar_t) == 0);
\r
3973 wide_stream->write(reinterpret_cast<const wchar_t*>(data), static_cast<std::streamsize>(size / sizeof(wchar_t)));
\r
3978 PUGI__FN xml_tree_walker::xml_tree_walker(): _depth(0)
\r
3982 PUGI__FN xml_tree_walker::~xml_tree_walker()
\r
3986 PUGI__FN int xml_tree_walker::depth() const
\r
3991 PUGI__FN bool xml_tree_walker::begin(xml_node&)
\r
3996 PUGI__FN bool xml_tree_walker::end(xml_node&)
\r
4001 PUGI__FN xml_attribute::xml_attribute(): _attr(0)
\r
4005 PUGI__FN xml_attribute::xml_attribute(xml_attribute_struct* attr): _attr(attr)
\r
4009 PUGI__FN static void unspecified_bool_xml_attribute(xml_attribute***)
\r
4013 PUGI__FN xml_attribute::operator xml_attribute::unspecified_bool_type() const
\r
4015 return _attr ? unspecified_bool_xml_attribute : 0;
\r
4018 PUGI__FN bool xml_attribute::operator!() const
\r
4023 PUGI__FN bool xml_attribute::operator==(const xml_attribute& r) const
\r
4025 return (_attr == r._attr);
\r
4028 PUGI__FN bool xml_attribute::operator!=(const xml_attribute& r) const
\r
4030 return (_attr != r._attr);
\r
4033 PUGI__FN bool xml_attribute::operator<(const xml_attribute& r) const
\r
4035 return (_attr < r._attr);
\r
4038 PUGI__FN bool xml_attribute::operator>(const xml_attribute& r) const
\r
4040 return (_attr > r._attr);
\r
4043 PUGI__FN bool xml_attribute::operator<=(const xml_attribute& r) const
\r
4045 return (_attr <= r._attr);
\r
4048 PUGI__FN bool xml_attribute::operator>=(const xml_attribute& r) const
\r
4050 return (_attr >= r._attr);
\r
4053 PUGI__FN xml_attribute xml_attribute::next_attribute() const
\r
4055 return _attr ? xml_attribute(_attr->next_attribute) : xml_attribute();
\r
4058 PUGI__FN xml_attribute xml_attribute::previous_attribute() const
\r
4060 return _attr && _attr->prev_attribute_c->next_attribute ? xml_attribute(_attr->prev_attribute_c) : xml_attribute();
\r
4063 PUGI__FN const char_t* xml_attribute::as_string(const char_t* def) const
\r
4065 return (_attr && _attr->value) ? _attr->value : def;
\r
4068 PUGI__FN int xml_attribute::as_int(int def) const
\r
4070 return impl::get_value_int(_attr ? _attr->value : 0, def);
\r
4073 PUGI__FN unsigned int xml_attribute::as_uint(unsigned int def) const
\r
4075 return impl::get_value_uint(_attr ? _attr->value : 0, def);
\r
4078 PUGI__FN double xml_attribute::as_double(double def) const
\r
4080 return impl::get_value_double(_attr ? _attr->value : 0, def);
\r
4083 PUGI__FN float xml_attribute::as_float(float def) const
\r
4085 return impl::get_value_float(_attr ? _attr->value : 0, def);
\r
4088 PUGI__FN bool xml_attribute::as_bool(bool def) const
\r
4090 return impl::get_value_bool(_attr ? _attr->value : 0, def);
\r
4093 #ifdef PUGIXML_HAS_LONG_LONG
\r
4094 PUGI__FN long long xml_attribute::as_llong(long long def) const
\r
4096 return impl::get_value_llong(_attr ? _attr->value : 0, def);
\r
4099 PUGI__FN unsigned long long xml_attribute::as_ullong(unsigned long long def) const
\r
4101 return impl::get_value_ullong(_attr ? _attr->value : 0, def);
\r
4105 PUGI__FN bool xml_attribute::empty() const
\r
4110 PUGI__FN const char_t* xml_attribute::name() const
\r
4112 return (_attr && _attr->name) ? _attr->name : PUGIXML_TEXT("");
\r
4115 PUGI__FN const char_t* xml_attribute::value() const
\r
4117 return (_attr && _attr->value) ? _attr->value : PUGIXML_TEXT("");
\r
4120 PUGI__FN size_t xml_attribute::hash_value() const
\r
4122 return static_cast<size_t>(reinterpret_cast<uintptr_t>(_attr) / sizeof(xml_attribute_struct));
\r
4125 PUGI__FN xml_attribute_struct* xml_attribute::internal_object() const
\r
4130 PUGI__FN xml_attribute& xml_attribute::operator=(const char_t* rhs)
\r
4136 PUGI__FN xml_attribute& xml_attribute::operator=(int rhs)
\r
4142 PUGI__FN xml_attribute& xml_attribute::operator=(unsigned int rhs)
\r
4148 PUGI__FN xml_attribute& xml_attribute::operator=(double rhs)
\r
4154 PUGI__FN xml_attribute& xml_attribute::operator=(bool rhs)
\r
4160 #ifdef PUGIXML_HAS_LONG_LONG
\r
4161 PUGI__FN xml_attribute& xml_attribute::operator=(long long rhs)
\r
4167 PUGI__FN xml_attribute& xml_attribute::operator=(unsigned long long rhs)
\r
4174 PUGI__FN bool xml_attribute::set_name(const char_t* rhs)
\r
4176 if (!_attr) return false;
\r
4178 return impl::strcpy_insitu(_attr->name, _attr->header, impl::xml_memory_page_name_allocated_mask, rhs);
\r
4181 PUGI__FN bool xml_attribute::set_value(const char_t* rhs)
\r
4183 if (!_attr) return false;
\r
4185 return impl::strcpy_insitu(_attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask, rhs);
\r
4188 PUGI__FN bool xml_attribute::set_value(int rhs)
\r
4190 if (!_attr) return false;
\r
4192 return impl::set_value_convert(_attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask, rhs);
\r
4195 PUGI__FN bool xml_attribute::set_value(unsigned int rhs)
\r
4197 if (!_attr) return false;
\r
4199 return impl::set_value_convert(_attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask, rhs);
\r
4202 PUGI__FN bool xml_attribute::set_value(double rhs)
\r
4204 if (!_attr) return false;
\r
4206 return impl::set_value_convert(_attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask, rhs);
\r
4209 PUGI__FN bool xml_attribute::set_value(bool rhs)
\r
4211 if (!_attr) return false;
\r
4213 return impl::set_value_convert(_attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask, rhs);
\r
4216 #ifdef PUGIXML_HAS_LONG_LONG
\r
4217 PUGI__FN bool xml_attribute::set_value(long long rhs)
\r
4219 if (!_attr) return false;
\r
4221 return impl::set_value_convert(_attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask, rhs);
\r
4224 PUGI__FN bool xml_attribute::set_value(unsigned long long rhs)
\r
4226 if (!_attr) return false;
\r
4228 return impl::set_value_convert(_attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask, rhs);
\r
4232 #ifdef __BORLANDC__
\r
4233 PUGI__FN bool operator&&(const xml_attribute& lhs, bool rhs)
\r
4235 return (bool)lhs && rhs;
\r
4238 PUGI__FN bool operator||(const xml_attribute& lhs, bool rhs)
\r
4240 return (bool)lhs || rhs;
\r
4244 PUGI__FN xml_node::xml_node(): _root(0)
\r
4248 PUGI__FN xml_node::xml_node(xml_node_struct* p): _root(p)
\r
4252 PUGI__FN static void unspecified_bool_xml_node(xml_node***)
\r
4256 PUGI__FN xml_node::operator xml_node::unspecified_bool_type() const
\r
4258 return _root ? unspecified_bool_xml_node : 0;
\r
4261 PUGI__FN bool xml_node::operator!() const
\r
4266 PUGI__FN xml_node::iterator xml_node::begin() const
\r
4268 return iterator(_root ? _root->first_child : 0, _root);
\r
4271 PUGI__FN xml_node::iterator xml_node::end() const
\r
4273 return iterator(0, _root);
\r
4276 PUGI__FN xml_node::attribute_iterator xml_node::attributes_begin() const
\r
4278 return attribute_iterator(_root ? _root->first_attribute : 0, _root);
\r
4281 PUGI__FN xml_node::attribute_iterator xml_node::attributes_end() const
\r
4283 return attribute_iterator(0, _root);
\r
4286 PUGI__FN xml_object_range<xml_node_iterator> xml_node::children() const
\r
4288 return xml_object_range<xml_node_iterator>(begin(), end());
\r
4291 PUGI__FN xml_object_range<xml_named_node_iterator> xml_node::children(const char_t* name_) const
\r
4293 return xml_object_range<xml_named_node_iterator>(xml_named_node_iterator(child(name_)._root, _root, name_), xml_named_node_iterator(0, _root, name_));
\r
4296 PUGI__FN xml_object_range<xml_attribute_iterator> xml_node::attributes() const
\r
4298 return xml_object_range<xml_attribute_iterator>(attributes_begin(), attributes_end());
\r
4301 PUGI__FN bool xml_node::operator==(const xml_node& r) const
\r
4303 return (_root == r._root);
\r
4306 PUGI__FN bool xml_node::operator!=(const xml_node& r) const
\r
4308 return (_root != r._root);
\r
4311 PUGI__FN bool xml_node::operator<(const xml_node& r) const
\r
4313 return (_root < r._root);
\r
4316 PUGI__FN bool xml_node::operator>(const xml_node& r) const
\r
4318 return (_root > r._root);
\r
4321 PUGI__FN bool xml_node::operator<=(const xml_node& r) const
\r
4323 return (_root <= r._root);
\r
4326 PUGI__FN bool xml_node::operator>=(const xml_node& r) const
\r
4328 return (_root >= r._root);
\r
4331 PUGI__FN bool xml_node::empty() const
\r
4336 PUGI__FN const char_t* xml_node::name() const
\r
4338 return (_root && _root->name) ? _root->name : PUGIXML_TEXT("");
\r
4341 PUGI__FN xml_node_type xml_node::type() const
\r
4343 return _root ? static_cast<xml_node_type>((_root->header & impl::xml_memory_page_type_mask) + 1) : node_null;
\r
4346 PUGI__FN const char_t* xml_node::value() const
\r
4348 return (_root && _root->value) ? _root->value : PUGIXML_TEXT("");
\r
4351 PUGI__FN xml_node xml_node::child(const char_t* name_) const
\r
4353 if (!_root) return xml_node();
\r
4355 for (xml_node_struct* i = _root->first_child; i; i = i->next_sibling)
\r
4356 if (i->name && impl::strequal(name_, i->name)) return xml_node(i);
\r
4358 return xml_node();
\r
4361 PUGI__FN xml_attribute xml_node::attribute(const char_t* name_) const
\r
4363 if (!_root) return xml_attribute();
\r
4365 for (xml_attribute_struct* i = _root->first_attribute; i; i = i->next_attribute)
\r
4366 if (i->name && impl::strequal(name_, i->name))
\r
4367 return xml_attribute(i);
\r
4369 return xml_attribute();
\r
4372 PUGI__FN xml_node xml_node::next_sibling(const char_t* name_) const
\r
4374 if (!_root) return xml_node();
\r
4376 for (xml_node_struct* i = _root->next_sibling; i; i = i->next_sibling)
\r
4377 if (i->name && impl::strequal(name_, i->name)) return xml_node(i);
\r
4379 return xml_node();
\r
4382 PUGI__FN xml_node xml_node::next_sibling() const
\r
4384 if (!_root) return xml_node();
\r
4386 if (_root->next_sibling) return xml_node(_root->next_sibling);
\r
4387 else return xml_node();
\r
4390 PUGI__FN xml_node xml_node::previous_sibling(const char_t* name_) const
\r
4392 if (!_root) return xml_node();
\r
4394 for (xml_node_struct* i = _root->prev_sibling_c; i->next_sibling; i = i->prev_sibling_c)
\r
4395 if (i->name && impl::strequal(name_, i->name)) return xml_node(i);
\r
4397 return xml_node();
\r
4400 PUGI__FN xml_node xml_node::previous_sibling() const
\r
4402 if (!_root) return xml_node();
\r
4404 if (_root->prev_sibling_c->next_sibling) return xml_node(_root->prev_sibling_c);
\r
4405 else return xml_node();
\r
4408 PUGI__FN xml_node xml_node::parent() const
\r
4410 return _root ? xml_node(_root->parent) : xml_node();
\r
4413 PUGI__FN xml_node xml_node::root() const
\r
4415 if (!_root) return xml_node();
\r
4417 impl::xml_memory_page* page = reinterpret_cast<impl::xml_memory_page*>(_root->header & impl::xml_memory_page_pointer_mask);
\r
4419 return xml_node(static_cast<impl::xml_document_struct*>(page->allocator));
\r
4422 PUGI__FN xml_text xml_node::text() const
\r
4424 return xml_text(_root);
\r
4427 PUGI__FN const char_t* xml_node::child_value() const
\r
4429 if (!_root) return PUGIXML_TEXT("");
\r
4431 for (xml_node_struct* i = _root->first_child; i; i = i->next_sibling)
\r
4432 if (i->value && impl::is_text_node(i))
\r
4435 return PUGIXML_TEXT("");
\r
4438 PUGI__FN const char_t* xml_node::child_value(const char_t* name_) const
\r
4440 return child(name_).child_value();
\r
4443 PUGI__FN xml_attribute xml_node::first_attribute() const
\r
4445 return _root ? xml_attribute(_root->first_attribute) : xml_attribute();
\r
4448 PUGI__FN xml_attribute xml_node::last_attribute() const
\r
4450 return _root && _root->first_attribute ? xml_attribute(_root->first_attribute->prev_attribute_c) : xml_attribute();
\r
4453 PUGI__FN xml_node xml_node::first_child() const
\r
4455 return _root ? xml_node(_root->first_child) : xml_node();
\r
4458 PUGI__FN xml_node xml_node::last_child() const
\r
4460 return _root && _root->first_child ? xml_node(_root->first_child->prev_sibling_c) : xml_node();
\r
4463 PUGI__FN bool xml_node::set_name(const char_t* rhs)
\r
4468 case node_declaration:
\r
4469 case node_element:
\r
4470 return impl::strcpy_insitu(_root->name, _root->header, impl::xml_memory_page_name_allocated_mask, rhs);
\r
4477 PUGI__FN bool xml_node::set_value(const char_t* rhs)
\r
4484 case node_comment:
\r
4485 case node_doctype:
\r
4486 return impl::strcpy_insitu(_root->value, _root->header, impl::xml_memory_page_value_allocated_mask, rhs);
\r
4493 PUGI__FN xml_attribute xml_node::append_attribute(const char_t* name_)
\r
4495 if (type() != node_element && type() != node_declaration) return xml_attribute();
\r
4497 xml_attribute a(impl::append_attribute_ll(_root, impl::get_allocator(_root)));
\r
4498 a.set_name(name_);
\r
4503 PUGI__FN xml_attribute xml_node::prepend_attribute(const char_t* name_)
\r
4505 if (type() != node_element && type() != node_declaration) return xml_attribute();
\r
4507 xml_attribute a(impl::allocate_attribute(impl::get_allocator(_root)));
\r
4508 if (!a) return xml_attribute();
\r
4510 a.set_name(name_);
\r
4512 xml_attribute_struct* head = _root->first_attribute;
\r
4516 a._attr->prev_attribute_c = head->prev_attribute_c;
\r
4517 head->prev_attribute_c = a._attr;
\r
4520 a._attr->prev_attribute_c = a._attr;
\r
4522 a._attr->next_attribute = head;
\r
4523 _root->first_attribute = a._attr;
\r
4528 PUGI__FN xml_attribute xml_node::insert_attribute_before(const char_t* name_, const xml_attribute& attr)
\r
4530 if ((type() != node_element && type() != node_declaration) || attr.empty()) return xml_attribute();
\r
4532 // check that attribute belongs to *this
\r
4533 xml_attribute_struct* cur = attr._attr;
\r
4535 while (cur->prev_attribute_c->next_attribute) cur = cur->prev_attribute_c;
\r
4537 if (cur != _root->first_attribute) return xml_attribute();
\r
4539 xml_attribute a(impl::allocate_attribute(impl::get_allocator(_root)));
\r
4540 if (!a) return xml_attribute();
\r
4542 a.set_name(name_);
\r
4544 if (attr._attr->prev_attribute_c->next_attribute)
\r
4545 attr._attr->prev_attribute_c->next_attribute = a._attr;
\r
4547 _root->first_attribute = a._attr;
\r
4549 a._attr->prev_attribute_c = attr._attr->prev_attribute_c;
\r
4550 a._attr->next_attribute = attr._attr;
\r
4551 attr._attr->prev_attribute_c = a._attr;
\r
4556 PUGI__FN xml_attribute xml_node::insert_attribute_after(const char_t* name_, const xml_attribute& attr)
\r
4558 if ((type() != node_element && type() != node_declaration) || attr.empty()) return xml_attribute();
\r
4560 // check that attribute belongs to *this
\r
4561 xml_attribute_struct* cur = attr._attr;
\r
4563 while (cur->prev_attribute_c->next_attribute) cur = cur->prev_attribute_c;
\r
4565 if (cur != _root->first_attribute) return xml_attribute();
\r
4567 xml_attribute a(impl::allocate_attribute(impl::get_allocator(_root)));
\r
4568 if (!a) return xml_attribute();
\r
4570 a.set_name(name_);
\r
4572 if (attr._attr->next_attribute)
\r
4573 attr._attr->next_attribute->prev_attribute_c = a._attr;
\r
4575 _root->first_attribute->prev_attribute_c = a._attr;
\r
4577 a._attr->next_attribute = attr._attr->next_attribute;
\r
4578 a._attr->prev_attribute_c = attr._attr;
\r
4579 attr._attr->next_attribute = a._attr;
\r
4584 PUGI__FN xml_attribute xml_node::append_copy(const xml_attribute& proto)
\r
4586 if (!proto) return xml_attribute();
\r
4588 xml_attribute result = append_attribute(proto.name());
\r
4589 result.set_value(proto.value());
\r
4594 PUGI__FN xml_attribute xml_node::prepend_copy(const xml_attribute& proto)
\r
4596 if (!proto) return xml_attribute();
\r
4598 xml_attribute result = prepend_attribute(proto.name());
\r
4599 result.set_value(proto.value());
\r
4604 PUGI__FN xml_attribute xml_node::insert_copy_after(const xml_attribute& proto, const xml_attribute& attr)
\r
4606 if (!proto) return xml_attribute();
\r
4608 xml_attribute result = insert_attribute_after(proto.name(), attr);
\r
4609 result.set_value(proto.value());
\r
4614 PUGI__FN xml_attribute xml_node::insert_copy_before(const xml_attribute& proto, const xml_attribute& attr)
\r
4616 if (!proto) return xml_attribute();
\r
4618 xml_attribute result = insert_attribute_before(proto.name(), attr);
\r
4619 result.set_value(proto.value());
\r
4624 PUGI__FN xml_node xml_node::append_child(xml_node_type type_)
\r
4626 if (!impl::allow_insert_child(this->type(), type_)) return xml_node();
\r
4628 xml_node n(impl::append_node(_root, impl::get_allocator(_root), type_));
\r
4630 if (type_ == node_declaration) n.set_name(PUGIXML_TEXT("xml"));
\r
4635 PUGI__FN xml_node xml_node::prepend_child(xml_node_type type_)
\r
4637 if (!impl::allow_insert_child(this->type(), type_)) return xml_node();
\r
4639 xml_node n(impl::allocate_node(impl::get_allocator(_root), type_));
\r
4640 if (!n) return xml_node();
\r
4642 n._root->parent = _root;
\r
4644 xml_node_struct* head = _root->first_child;
\r
4648 n._root->prev_sibling_c = head->prev_sibling_c;
\r
4649 head->prev_sibling_c = n._root;
\r
4652 n._root->prev_sibling_c = n._root;
\r
4654 n._root->next_sibling = head;
\r
4655 _root->first_child = n._root;
\r
4657 if (type_ == node_declaration) n.set_name(PUGIXML_TEXT("xml"));
\r
4662 PUGI__FN xml_node xml_node::insert_child_before(xml_node_type type_, const xml_node& node)
\r
4664 if (!impl::allow_insert_child(this->type(), type_)) return xml_node();
\r
4665 if (!node._root || node._root->parent != _root) return xml_node();
\r
4667 xml_node n(impl::allocate_node(impl::get_allocator(_root), type_));
\r
4668 if (!n) return xml_node();
\r
4670 n._root->parent = _root;
\r
4672 if (node._root->prev_sibling_c->next_sibling)
\r
4673 node._root->prev_sibling_c->next_sibling = n._root;
\r
4675 _root->first_child = n._root;
\r
4677 n._root->prev_sibling_c = node._root->prev_sibling_c;
\r
4678 n._root->next_sibling = node._root;
\r
4679 node._root->prev_sibling_c = n._root;
\r
4681 if (type_ == node_declaration) n.set_name(PUGIXML_TEXT("xml"));
\r
4686 PUGI__FN xml_node xml_node::insert_child_after(xml_node_type type_, const xml_node& node)
\r
4688 if (!impl::allow_insert_child(this->type(), type_)) return xml_node();
\r
4689 if (!node._root || node._root->parent != _root) return xml_node();
\r
4691 xml_node n(impl::allocate_node(impl::get_allocator(_root), type_));
\r
4692 if (!n) return xml_node();
\r
4694 n._root->parent = _root;
\r
4696 if (node._root->next_sibling)
\r
4697 node._root->next_sibling->prev_sibling_c = n._root;
\r
4699 _root->first_child->prev_sibling_c = n._root;
\r
4701 n._root->next_sibling = node._root->next_sibling;
\r
4702 n._root->prev_sibling_c = node._root;
\r
4703 node._root->next_sibling = n._root;
\r
4705 if (type_ == node_declaration) n.set_name(PUGIXML_TEXT("xml"));
\r
4710 PUGI__FN xml_node xml_node::append_child(const char_t* name_)
\r
4712 xml_node result = append_child(node_element);
\r
4714 result.set_name(name_);
\r
4719 PUGI__FN xml_node xml_node::prepend_child(const char_t* name_)
\r
4721 xml_node result = prepend_child(node_element);
\r
4723 result.set_name(name_);
\r
4728 PUGI__FN xml_node xml_node::insert_child_after(const char_t* name_, const xml_node& node)
\r
4730 xml_node result = insert_child_after(node_element, node);
\r
4732 result.set_name(name_);
\r
4737 PUGI__FN xml_node xml_node::insert_child_before(const char_t* name_, const xml_node& node)
\r
4739 xml_node result = insert_child_before(node_element, node);
\r
4741 result.set_name(name_);
\r
4746 PUGI__FN xml_node xml_node::append_copy(const xml_node& proto)
\r
4748 xml_node result = append_child(proto.type());
\r
4750 if (result) impl::recursive_copy_skip(result, proto, result);
\r
4755 PUGI__FN xml_node xml_node::prepend_copy(const xml_node& proto)
\r
4757 xml_node result = prepend_child(proto.type());
\r
4759 if (result) impl::recursive_copy_skip(result, proto, result);
\r
4764 PUGI__FN xml_node xml_node::insert_copy_after(const xml_node& proto, const xml_node& node)
\r
4766 xml_node result = insert_child_after(proto.type(), node);
\r
4768 if (result) impl::recursive_copy_skip(result, proto, result);
\r
4773 PUGI__FN xml_node xml_node::insert_copy_before(const xml_node& proto, const xml_node& node)
\r
4775 xml_node result = insert_child_before(proto.type(), node);
\r
4777 if (result) impl::recursive_copy_skip(result, proto, result);
\r
4782 PUGI__FN bool xml_node::remove_attribute(const char_t* name_)
\r
4784 return remove_attribute(attribute(name_));
\r
4787 PUGI__FN bool xml_node::remove_attribute(const xml_attribute& a)
\r
4789 if (!_root || !a._attr) return false;
\r
4791 // check that attribute belongs to *this
\r
4792 xml_attribute_struct* attr = a._attr;
\r
4794 while (attr->prev_attribute_c->next_attribute) attr = attr->prev_attribute_c;
\r
4796 if (attr != _root->first_attribute) return false;
\r
4798 if (a._attr->next_attribute) a._attr->next_attribute->prev_attribute_c = a._attr->prev_attribute_c;
\r
4799 else if (_root->first_attribute) _root->first_attribute->prev_attribute_c = a._attr->prev_attribute_c;
\r
4801 if (a._attr->prev_attribute_c->next_attribute) a._attr->prev_attribute_c->next_attribute = a._attr->next_attribute;
\r
4802 else _root->first_attribute = a._attr->next_attribute;
\r
4804 impl::destroy_attribute(a._attr, impl::get_allocator(_root));
\r
4809 PUGI__FN bool xml_node::remove_child(const char_t* name_)
\r
4811 return remove_child(child(name_));
\r
4814 PUGI__FN bool xml_node::remove_child(const xml_node& n)
\r
4816 if (!_root || !n._root || n._root->parent != _root) return false;
\r
4818 if (n._root->next_sibling) n._root->next_sibling->prev_sibling_c = n._root->prev_sibling_c;
\r
4819 else if (_root->first_child) _root->first_child->prev_sibling_c = n._root->prev_sibling_c;
\r
4821 if (n._root->prev_sibling_c->next_sibling) n._root->prev_sibling_c->next_sibling = n._root->next_sibling;
\r
4822 else _root->first_child = n._root->next_sibling;
\r
4824 impl::destroy_node(n._root, impl::get_allocator(_root));
\r
4829 PUGI__FN xml_parse_result xml_node::append_buffer(const void* contents, size_t size, unsigned int options, xml_encoding encoding)
\r
4831 // append_buffer is only valid for elements/documents
\r
4832 if (!impl::allow_insert_child(type(), node_element)) return impl::make_parse_result(status_append_invalid_root);
\r
4834 // get document node
\r
4835 impl::xml_document_struct* doc = static_cast<impl::xml_document_struct*>(root()._root);
\r
4838 // get extra buffer element (we'll store the document fragment buffer there so that we can deallocate it later)
\r
4839 impl::xml_memory_page* page = 0;
\r
4840 impl::xml_extra_buffer* extra = static_cast<impl::xml_extra_buffer*>(doc->allocate_memory(sizeof(impl::xml_extra_buffer), page));
\r
4843 if (!extra) return impl::make_parse_result(status_out_of_memory);
\r
4845 // save name; name of the root has to be NULL before parsing - otherwise closing node mismatches will not be detected at the top level
\r
4846 char_t* rootname = _root->name;
\r
4850 char_t* buffer = 0;
\r
4851 xml_parse_result res = impl::load_buffer_impl(doc, _root, const_cast<void*>(contents), size, options, encoding, false, false, &buffer);
\r
4854 _root->name = rootname;
\r
4856 // add extra buffer to the list
\r
4857 extra->buffer = buffer;
\r
4858 extra->next = doc->extra_buffers;
\r
4859 doc->extra_buffers = extra;
\r
4864 PUGI__FN xml_node xml_node::find_child_by_attribute(const char_t* name_, const char_t* attr_name, const char_t* attr_value) const
\r
4866 if (!_root) return xml_node();
\r
4868 for (xml_node_struct* i = _root->first_child; i; i = i->next_sibling)
\r
4869 if (i->name && impl::strequal(name_, i->name))
\r
4871 for (xml_attribute_struct* a = i->first_attribute; a; a = a->next_attribute)
\r
4872 if (a->name && impl::strequal(attr_name, a->name) && impl::strequal(attr_value, a->value ? a->value : PUGIXML_TEXT("")))
\r
4873 return xml_node(i);
\r
4876 return xml_node();
\r
4879 PUGI__FN xml_node xml_node::find_child_by_attribute(const char_t* attr_name, const char_t* attr_value) const
\r
4881 if (!_root) return xml_node();
\r
4883 for (xml_node_struct* i = _root->first_child; i; i = i->next_sibling)
\r
4884 for (xml_attribute_struct* a = i->first_attribute; a; a = a->next_attribute)
\r
4885 if (a->name && impl::strequal(attr_name, a->name) && impl::strequal(attr_value, a->value ? a->value : PUGIXML_TEXT("")))
\r
4886 return xml_node(i);
\r
4888 return xml_node();
\r
4891 #ifndef PUGIXML_NO_STL
\r
4892 PUGI__FN string_t xml_node::path(char_t delimiter) const
\r
4894 xml_node cursor = *this; // Make a copy.
\r
4896 string_t result = cursor.name();
\r
4898 while (cursor.parent())
\r
4900 cursor = cursor.parent();
\r
4902 string_t temp = cursor.name();
\r
4903 temp += delimiter;
\r
4905 result.swap(temp);
\r
4912 PUGI__FN xml_node xml_node::first_element_by_path(const char_t* path_, char_t delimiter) const
\r
4914 xml_node found = *this; // Current search context.
\r
4916 if (!_root || !path_ || !path_[0]) return found;
\r
4918 if (path_[0] == delimiter)
\r
4920 // Absolute path; e.g. '/foo/bar'
\r
4921 found = found.root();
\r
4925 const char_t* path_segment = path_;
\r
4927 while (*path_segment == delimiter) ++path_segment;
\r
4929 const char_t* path_segment_end = path_segment;
\r
4931 while (*path_segment_end && *path_segment_end != delimiter) ++path_segment_end;
\r
4933 if (path_segment == path_segment_end) return found;
\r
4935 const char_t* next_segment = path_segment_end;
\r
4937 while (*next_segment == delimiter) ++next_segment;
\r
4939 if (*path_segment == '.' && path_segment + 1 == path_segment_end)
\r
4940 return found.first_element_by_path(next_segment, delimiter);
\r
4941 else if (*path_segment == '.' && *(path_segment+1) == '.' && path_segment + 2 == path_segment_end)
\r
4942 return found.parent().first_element_by_path(next_segment, delimiter);
\r
4945 for (xml_node_struct* j = found._root->first_child; j; j = j->next_sibling)
\r
4947 if (j->name && impl::strequalrange(j->name, path_segment, static_cast<size_t>(path_segment_end - path_segment)))
\r
4949 xml_node subsearch = xml_node(j).first_element_by_path(next_segment, delimiter);
\r
4951 if (subsearch) return subsearch;
\r
4955 return xml_node();
\r
4959 PUGI__FN bool xml_node::traverse(xml_tree_walker& walker)
\r
4961 walker._depth = -1;
\r
4963 xml_node arg_begin = *this;
\r
4964 if (!walker.begin(arg_begin)) return false;
\r
4966 xml_node cur = first_child();
\r
4974 xml_node arg_for_each = cur;
\r
4975 if (!walker.for_each(arg_for_each))
\r
4978 if (cur.first_child())
\r
4981 cur = cur.first_child();
\r
4983 else if (cur.next_sibling())
\r
4984 cur = cur.next_sibling();
\r
4987 // Borland C++ workaround
\r
4988 while (!cur.next_sibling() && cur != *this && !cur.parent().empty())
\r
4991 cur = cur.parent();
\r
4995 cur = cur.next_sibling();
\r
4998 while (cur && cur != *this);
\r
5001 assert(walker._depth == -1);
\r
5003 xml_node arg_end = *this;
\r
5004 return walker.end(arg_end);
\r
5007 PUGI__FN size_t xml_node::hash_value() const
\r
5009 return static_cast<size_t>(reinterpret_cast<uintptr_t>(_root) / sizeof(xml_node_struct));
\r
5012 PUGI__FN xml_node_struct* xml_node::internal_object() const
\r
5017 PUGI__FN void xml_node::print(xml_writer& writer, const char_t* indent, unsigned int flags, xml_encoding encoding, unsigned int depth) const
\r
5019 if (!_root) return;
\r
5021 impl::xml_buffered_writer buffered_writer(writer, encoding);
\r
5023 impl::node_output(buffered_writer, *this, indent, flags, depth);
\r
5026 #ifndef PUGIXML_NO_STL
\r
5027 PUGI__FN void xml_node::print(std::basic_ostream<char, std::char_traits<char> >& stream, const char_t* indent, unsigned int flags, xml_encoding encoding, unsigned int depth) const
\r
5029 xml_writer_stream writer(stream);
\r
5031 print(writer, indent, flags, encoding, depth);
\r
5034 PUGI__FN void xml_node::print(std::basic_ostream<wchar_t, std::char_traits<wchar_t> >& stream, const char_t* indent, unsigned int flags, unsigned int depth) const
\r
5036 xml_writer_stream writer(stream);
\r
5038 print(writer, indent, flags, encoding_wchar, depth);
\r
5042 PUGI__FN ptrdiff_t xml_node::offset_debug() const
\r
5044 xml_node_struct* r = root()._root;
\r
5046 if (!r) return -1;
\r
5048 const char_t* buffer = static_cast<impl::xml_document_struct*>(r)->buffer;
\r
5050 if (!buffer) return -1;
\r
5054 case node_document:
\r
5057 case node_element:
\r
5058 case node_declaration:
\r
5060 return (_root->header & impl::xml_memory_page_name_allocated_mask) ? -1 : _root->name - buffer;
\r
5064 case node_comment:
\r
5065 case node_doctype:
\r
5066 return (_root->header & impl::xml_memory_page_value_allocated_mask) ? -1 : _root->value - buffer;
\r
5073 #ifdef __BORLANDC__
\r
5074 PUGI__FN bool operator&&(const xml_node& lhs, bool rhs)
\r
5076 return (bool)lhs && rhs;
\r
5079 PUGI__FN bool operator||(const xml_node& lhs, bool rhs)
\r
5081 return (bool)lhs || rhs;
\r
5085 PUGI__FN xml_text::xml_text(xml_node_struct* root): _root(root)
\r
5089 PUGI__FN xml_node_struct* xml_text::_data() const
\r
5091 if (!_root || impl::is_text_node(_root)) return _root;
\r
5093 for (xml_node_struct* node = _root->first_child; node; node = node->next_sibling)
\r
5094 if (impl::is_text_node(node))
\r
5100 PUGI__FN xml_node_struct* xml_text::_data_new()
\r
5102 xml_node_struct* d = _data();
\r
5105 return xml_node(_root).append_child(node_pcdata).internal_object();
\r
5108 PUGI__FN xml_text::xml_text(): _root(0)
\r
5112 PUGI__FN static void unspecified_bool_xml_text(xml_text***)
\r
5116 PUGI__FN xml_text::operator xml_text::unspecified_bool_type() const
\r
5118 return _data() ? unspecified_bool_xml_text : 0;
\r
5121 PUGI__FN bool xml_text::operator!() const
\r
5126 PUGI__FN bool xml_text::empty() const
\r
5128 return _data() == 0;
\r
5131 PUGI__FN const char_t* xml_text::get() const
\r
5133 xml_node_struct* d = _data();
\r
5135 return (d && d->value) ? d->value : PUGIXML_TEXT("");
\r
5138 PUGI__FN const char_t* xml_text::as_string(const char_t* def) const
\r
5140 xml_node_struct* d = _data();
\r
5142 return (d && d->value) ? d->value : def;
\r
5145 PUGI__FN int xml_text::as_int(int def) const
\r
5147 xml_node_struct* d = _data();
\r
5149 return impl::get_value_int(d ? d->value : 0, def);
\r
5152 PUGI__FN unsigned int xml_text::as_uint(unsigned int def) const
\r
5154 xml_node_struct* d = _data();
\r
5156 return impl::get_value_uint(d ? d->value : 0, def);
\r
5159 PUGI__FN double xml_text::as_double(double def) const
\r
5161 xml_node_struct* d = _data();
\r
5163 return impl::get_value_double(d ? d->value : 0, def);
\r
5166 PUGI__FN float xml_text::as_float(float def) const
\r
5168 xml_node_struct* d = _data();
\r
5170 return impl::get_value_float(d ? d->value : 0, def);
\r
5173 PUGI__FN bool xml_text::as_bool(bool def) const
\r
5175 xml_node_struct* d = _data();
\r
5177 return impl::get_value_bool(d ? d->value : 0, def);
\r
5180 #ifdef PUGIXML_HAS_LONG_LONG
\r
5181 PUGI__FN long long xml_text::as_llong(long long def) const
\r
5183 xml_node_struct* d = _data();
\r
5185 return impl::get_value_llong(d ? d->value : 0, def);
\r
5188 PUGI__FN unsigned long long xml_text::as_ullong(unsigned long long def) const
\r
5190 xml_node_struct* d = _data();
\r
5192 return impl::get_value_ullong(d ? d->value : 0, def);
\r
5196 PUGI__FN bool xml_text::set(const char_t* rhs)
\r
5198 xml_node_struct* dn = _data_new();
\r
5200 return dn ? impl::strcpy_insitu(dn->value, dn->header, impl::xml_memory_page_value_allocated_mask, rhs) : false;
\r
5203 PUGI__FN bool xml_text::set(int rhs)
\r
5205 xml_node_struct* dn = _data_new();
\r
5207 return dn ? impl::set_value_convert(dn->value, dn->header, impl::xml_memory_page_value_allocated_mask, rhs) : false;
\r
5210 PUGI__FN bool xml_text::set(unsigned int rhs)
\r
5212 xml_node_struct* dn = _data_new();
\r
5214 return dn ? impl::set_value_convert(dn->value, dn->header, impl::xml_memory_page_value_allocated_mask, rhs) : false;
\r
5217 PUGI__FN bool xml_text::set(double rhs)
\r
5219 xml_node_struct* dn = _data_new();
\r
5221 return dn ? impl::set_value_convert(dn->value, dn->header, impl::xml_memory_page_value_allocated_mask, rhs) : false;
\r
5224 PUGI__FN bool xml_text::set(bool rhs)
\r
5226 xml_node_struct* dn = _data_new();
\r
5228 return dn ? impl::set_value_convert(dn->value, dn->header, impl::xml_memory_page_value_allocated_mask, rhs) : false;
\r
5231 #ifdef PUGIXML_HAS_LONG_LONG
\r
5232 PUGI__FN bool xml_text::set(long long rhs)
\r
5234 xml_node_struct* dn = _data_new();
\r
5236 return dn ? impl::set_value_convert(dn->value, dn->header, impl::xml_memory_page_value_allocated_mask, rhs) : false;
\r
5239 PUGI__FN bool xml_text::set(unsigned long long rhs)
\r
5241 xml_node_struct* dn = _data_new();
\r
5243 return dn ? impl::set_value_convert(dn->value, dn->header, impl::xml_memory_page_value_allocated_mask, rhs) : false;
\r
5247 PUGI__FN xml_text& xml_text::operator=(const char_t* rhs)
\r
5253 PUGI__FN xml_text& xml_text::operator=(int rhs)
\r
5259 PUGI__FN xml_text& xml_text::operator=(unsigned int rhs)
\r
5265 PUGI__FN xml_text& xml_text::operator=(double rhs)
\r
5271 PUGI__FN xml_text& xml_text::operator=(bool rhs)
\r
5277 #ifdef PUGIXML_HAS_LONG_LONG
\r
5278 PUGI__FN xml_text& xml_text::operator=(long long rhs)
\r
5284 PUGI__FN xml_text& xml_text::operator=(unsigned long long rhs)
\r
5291 PUGI__FN xml_node xml_text::data() const
\r
5293 return xml_node(_data());
\r
5296 #ifdef __BORLANDC__
\r
5297 PUGI__FN bool operator&&(const xml_text& lhs, bool rhs)
\r
5299 return (bool)lhs && rhs;
\r
5302 PUGI__FN bool operator||(const xml_text& lhs, bool rhs)
\r
5304 return (bool)lhs || rhs;
\r
5308 PUGI__FN xml_node_iterator::xml_node_iterator()
\r
5312 PUGI__FN xml_node_iterator::xml_node_iterator(const xml_node& node): _wrap(node), _parent(node.parent())
\r
5316 PUGI__FN xml_node_iterator::xml_node_iterator(xml_node_struct* ref, xml_node_struct* parent): _wrap(ref), _parent(parent)
\r
5320 PUGI__FN bool xml_node_iterator::operator==(const xml_node_iterator& rhs) const
\r
5322 return _wrap._root == rhs._wrap._root && _parent._root == rhs._parent._root;
\r
5325 PUGI__FN bool xml_node_iterator::operator!=(const xml_node_iterator& rhs) const
\r
5327 return _wrap._root != rhs._wrap._root || _parent._root != rhs._parent._root;
\r
5330 PUGI__FN xml_node& xml_node_iterator::operator*() const
\r
5332 assert(_wrap._root);
\r
5336 PUGI__FN xml_node* xml_node_iterator::operator->() const
\r
5338 assert(_wrap._root);
\r
5339 return const_cast<xml_node*>(&_wrap); // BCC32 workaround
\r
5342 PUGI__FN const xml_node_iterator& xml_node_iterator::operator++()
\r
5344 assert(_wrap._root);
\r
5345 _wrap._root = _wrap._root->next_sibling;
\r
5349 PUGI__FN xml_node_iterator xml_node_iterator::operator++(int)
\r
5351 xml_node_iterator temp = *this;
\r
5356 PUGI__FN const xml_node_iterator& xml_node_iterator::operator--()
\r
5358 _wrap = _wrap._root ? _wrap.previous_sibling() : _parent.last_child();
\r
5362 PUGI__FN xml_node_iterator xml_node_iterator::operator--(int)
\r
5364 xml_node_iterator temp = *this;
\r
5369 PUGI__FN xml_attribute_iterator::xml_attribute_iterator()
\r
5373 PUGI__FN xml_attribute_iterator::xml_attribute_iterator(const xml_attribute& attr, const xml_node& parent): _wrap(attr), _parent(parent)
\r
5377 PUGI__FN xml_attribute_iterator::xml_attribute_iterator(xml_attribute_struct* ref, xml_node_struct* parent): _wrap(ref), _parent(parent)
\r
5381 PUGI__FN bool xml_attribute_iterator::operator==(const xml_attribute_iterator& rhs) const
\r
5383 return _wrap._attr == rhs._wrap._attr && _parent._root == rhs._parent._root;
\r
5386 PUGI__FN bool xml_attribute_iterator::operator!=(const xml_attribute_iterator& rhs) const
\r
5388 return _wrap._attr != rhs._wrap._attr || _parent._root != rhs._parent._root;
\r
5391 PUGI__FN xml_attribute& xml_attribute_iterator::operator*() const
\r
5393 assert(_wrap._attr);
\r
5397 PUGI__FN xml_attribute* xml_attribute_iterator::operator->() const
\r
5399 assert(_wrap._attr);
\r
5400 return const_cast<xml_attribute*>(&_wrap); // BCC32 workaround
\r
5403 PUGI__FN const xml_attribute_iterator& xml_attribute_iterator::operator++()
\r
5405 assert(_wrap._attr);
\r
5406 _wrap._attr = _wrap._attr->next_attribute;
\r
5410 PUGI__FN xml_attribute_iterator xml_attribute_iterator::operator++(int)
\r
5412 xml_attribute_iterator temp = *this;
\r
5417 PUGI__FN const xml_attribute_iterator& xml_attribute_iterator::operator--()
\r
5419 _wrap = _wrap._attr ? _wrap.previous_attribute() : _parent.last_attribute();
\r
5423 PUGI__FN xml_attribute_iterator xml_attribute_iterator::operator--(int)
\r
5425 xml_attribute_iterator temp = *this;
\r
5430 PUGI__FN xml_named_node_iterator::xml_named_node_iterator(): _name(0)
\r
5434 PUGI__FN xml_named_node_iterator::xml_named_node_iterator(const xml_node& node, const char_t* name): _wrap(node), _parent(node.parent()), _name(name)
\r
5438 PUGI__FN xml_named_node_iterator::xml_named_node_iterator(xml_node_struct* ref, xml_node_struct* parent, const char_t* name): _wrap(ref), _parent(parent), _name(name)
\r
5442 PUGI__FN bool xml_named_node_iterator::operator==(const xml_named_node_iterator& rhs) const
\r
5444 return _wrap._root == rhs._wrap._root && _parent._root == rhs._parent._root;
\r
5447 PUGI__FN bool xml_named_node_iterator::operator!=(const xml_named_node_iterator& rhs) const
\r
5449 return _wrap._root != rhs._wrap._root || _parent._root != rhs._parent._root;
\r
5452 PUGI__FN xml_node& xml_named_node_iterator::operator*() const
\r
5454 assert(_wrap._root);
\r
5458 PUGI__FN xml_node* xml_named_node_iterator::operator->() const
\r
5460 assert(_wrap._root);
\r
5461 return const_cast<xml_node*>(&_wrap); // BCC32 workaround
\r
5464 PUGI__FN const xml_named_node_iterator& xml_named_node_iterator::operator++()
\r
5466 assert(_wrap._root);
\r
5467 _wrap = _wrap.next_sibling(_name);
\r
5471 PUGI__FN xml_named_node_iterator xml_named_node_iterator::operator++(int)
\r
5473 xml_named_node_iterator temp = *this;
\r
5478 PUGI__FN const xml_named_node_iterator& xml_named_node_iterator::operator--()
\r
5481 _wrap = _wrap.previous_sibling(_name);
\r
5484 _wrap = _parent.last_child();
\r
5486 if (!impl::strequal(_wrap.name(), _name))
\r
5487 _wrap = _wrap.previous_sibling(_name);
\r
5493 PUGI__FN xml_named_node_iterator xml_named_node_iterator::operator--(int)
\r
5495 xml_named_node_iterator temp = *this;
\r
5500 PUGI__FN xml_parse_result::xml_parse_result(): status(status_internal_error), offset(0), encoding(encoding_auto)
\r
5504 PUGI__FN xml_parse_result::operator bool() const
\r
5506 return status == status_ok;
\r
5509 PUGI__FN const char* xml_parse_result::description() const
\r
5513 case status_ok: return "No error";
\r
5515 case status_file_not_found: return "File was not found";
\r
5516 case status_io_error: return "Error reading from file/stream";
\r
5517 case status_out_of_memory: return "Could not allocate memory";
\r
5518 case status_internal_error: return "Internal error occurred";
\r
5520 case status_unrecognized_tag: return "Could not determine tag type";
\r
5522 case status_bad_pi: return "Error parsing document declaration/processing instruction";
\r
5523 case status_bad_comment: return "Error parsing comment";
\r
5524 case status_bad_cdata: return "Error parsing CDATA section";
\r
5525 case status_bad_doctype: return "Error parsing document type declaration";
\r
5526 case status_bad_pcdata: return "Error parsing PCDATA section";
\r
5527 case status_bad_start_element: return "Error parsing start element tag";
\r
5528 case status_bad_attribute: return "Error parsing element attribute";
\r
5529 case status_bad_end_element: return "Error parsing end element tag";
\r
5530 case status_end_element_mismatch: return "Start-end tags mismatch";
\r
5532 case status_append_invalid_root: return "Unable to append nodes: root is not an element or document";
\r
5534 case status_no_document_element: return "No document element found";
\r
5536 default: return "Unknown error";
\r
5540 PUGI__FN xml_document::xml_document(): _buffer(0)
\r
5545 PUGI__FN xml_document::~xml_document()
\r
5550 PUGI__FN void xml_document::reset()
\r
5556 PUGI__FN void xml_document::reset(const xml_document& proto)
\r
5560 for (xml_node cur = proto.first_child(); cur; cur = cur.next_sibling())
\r
5564 PUGI__FN void xml_document::create()
\r
5568 // initialize sentinel page
\r
5569 PUGI__STATIC_ASSERT(sizeof(impl::xml_memory_page) + sizeof(impl::xml_document_struct) + impl::xml_memory_page_alignment <= sizeof(_memory));
\r
5571 // align upwards to page boundary
\r
5572 void* page_memory = reinterpret_cast<void*>((reinterpret_cast<uintptr_t>(_memory) + (impl::xml_memory_page_alignment - 1)) & ~(impl::xml_memory_page_alignment - 1));
\r
5574 // prepare page structure
\r
5575 impl::xml_memory_page* page = impl::xml_memory_page::construct(page_memory);
\r
5578 page->busy_size = impl::xml_memory_page_size;
\r
5580 // allocate new root
\r
5581 _root = new (page->data) impl::xml_document_struct(page);
\r
5582 _root->prev_sibling_c = _root;
\r
5584 // setup sentinel page
\r
5585 page->allocator = static_cast<impl::xml_document_struct*>(_root);
\r
5588 PUGI__FN void xml_document::destroy()
\r
5592 // destroy static storage
\r
5595 impl::xml_memory::deallocate(_buffer);
\r
5599 // destroy extra buffers (note: no need to destroy linked list nodes, they're allocated using document allocator)
\r
5600 for (impl::xml_extra_buffer* extra = static_cast<impl::xml_document_struct*>(_root)->extra_buffers; extra; extra = extra->next)
\r
5602 if (extra->buffer) impl::xml_memory::deallocate(extra->buffer);
\r
5605 // destroy dynamic storage, leave sentinel page (it's in static memory)
\r
5606 impl::xml_memory_page* root_page = reinterpret_cast<impl::xml_memory_page*>(_root->header & impl::xml_memory_page_pointer_mask);
\r
5607 assert(root_page && !root_page->prev && !root_page->memory);
\r
5609 for (impl::xml_memory_page* page = root_page->next; page; )
\r
5611 impl::xml_memory_page* next = page->next;
\r
5613 impl::xml_allocator::deallocate_page(page);
\r
5621 #ifndef PUGIXML_NO_STL
\r
5622 PUGI__FN xml_parse_result xml_document::load(std::basic_istream<char, std::char_traits<char> >& stream, unsigned int options, xml_encoding encoding)
\r
5626 return impl::load_stream_impl(*this, stream, options, encoding);
\r
5629 PUGI__FN xml_parse_result xml_document::load(std::basic_istream<wchar_t, std::char_traits<wchar_t> >& stream, unsigned int options)
\r
5633 return impl::load_stream_impl(*this, stream, options, encoding_wchar);
\r
5637 PUGI__FN xml_parse_result xml_document::load(const char_t* contents, unsigned int options)
\r
5639 // Force native encoding (skip autodetection)
\r
5640 #ifdef PUGIXML_WCHAR_MODE
\r
5641 xml_encoding encoding = encoding_wchar;
\r
5643 xml_encoding encoding = encoding_utf8;
\r
5646 return load_buffer(contents, impl::strlength(contents) * sizeof(char_t), options, encoding);
\r
5649 PUGI__FN xml_parse_result xml_document::load_file(const char* path_, unsigned int options, xml_encoding encoding)
\r
5653 FILE* file = fopen(path_, "rb");
\r
5655 return impl::load_file_impl(*this, file, options, encoding);
\r
5658 PUGI__FN xml_parse_result xml_document::load_file(const wchar_t* path_, unsigned int options, xml_encoding encoding)
\r
5662 FILE* file = impl::open_file_wide(path_, L"rb");
\r
5664 return impl::load_file_impl(*this, file, options, encoding);
\r
5667 PUGI__FN xml_parse_result xml_document::load_buffer(const void* contents, size_t size, unsigned int options, xml_encoding encoding)
\r
5671 return impl::load_buffer_impl(static_cast<impl::xml_document_struct*>(_root), _root, const_cast<void*>(contents), size, options, encoding, false, false, &_buffer);
\r
5674 PUGI__FN xml_parse_result xml_document::load_buffer_inplace(void* contents, size_t size, unsigned int options, xml_encoding encoding)
\r
5678 return impl::load_buffer_impl(static_cast<impl::xml_document_struct*>(_root), _root, contents, size, options, encoding, true, false, &_buffer);
\r
5681 PUGI__FN xml_parse_result xml_document::load_buffer_inplace_own(void* contents, size_t size, unsigned int options, xml_encoding encoding)
\r
5685 return impl::load_buffer_impl(static_cast<impl::xml_document_struct*>(_root), _root, contents, size, options, encoding, true, true, &_buffer);
\r
5688 PUGI__FN void xml_document::save(xml_writer& writer, const char_t* indent, unsigned int flags, xml_encoding encoding) const
\r
5690 impl::xml_buffered_writer buffered_writer(writer, encoding);
\r
5692 if ((flags & format_write_bom) && encoding != encoding_latin1)
\r
5694 // BOM always represents the codepoint U+FEFF, so just write it in native encoding
\r
5695 #ifdef PUGIXML_WCHAR_MODE
\r
5696 unsigned int bom = 0xfeff;
\r
5697 buffered_writer.write(static_cast<wchar_t>(bom));
\r
5699 buffered_writer.write('\xef', '\xbb', '\xbf');
\r
5703 if (!(flags & format_no_declaration) && !impl::has_declaration(*this))
\r
5705 buffered_writer.write(PUGIXML_TEXT("<?xml version=\"1.0\""));
\r
5706 if (encoding == encoding_latin1) buffered_writer.write(PUGIXML_TEXT(" encoding=\"ISO-8859-1\""));
\r
5707 buffered_writer.write('?', '>');
\r
5708 if (!(flags & format_raw)) buffered_writer.write('\n');
\r
5711 impl::node_output(buffered_writer, *this, indent, flags, 0);
\r
5714 #ifndef PUGIXML_NO_STL
\r
5715 PUGI__FN void xml_document::save(std::basic_ostream<char, std::char_traits<char> >& stream, const char_t* indent, unsigned int flags, xml_encoding encoding) const
\r
5717 xml_writer_stream writer(stream);
\r
5719 save(writer, indent, flags, encoding);
\r
5722 PUGI__FN void xml_document::save(std::basic_ostream<wchar_t, std::char_traits<wchar_t> >& stream, const char_t* indent, unsigned int flags) const
\r
5724 xml_writer_stream writer(stream);
\r
5726 save(writer, indent, flags, encoding_wchar);
\r
5730 PUGI__FN bool xml_document::save_file(const char* path_, const char_t* indent, unsigned int flags, xml_encoding encoding) const
\r
5732 FILE* file = fopen(path_, (flags & format_save_file_text) ? "w" : "wb");
\r
5733 return impl::save_file_impl(*this, file, indent, flags, encoding);
\r
5736 PUGI__FN bool xml_document::save_file(const wchar_t* path_, const char_t* indent, unsigned int flags, xml_encoding encoding) const
\r
5738 FILE* file = impl::open_file_wide(path_, (flags & format_save_file_text) ? L"w" : L"wb");
\r
5739 return impl::save_file_impl(*this, file, indent, flags, encoding);
\r
5742 PUGI__FN xml_node xml_document::document_element() const
\r
5746 for (xml_node_struct* i = _root->first_child; i; i = i->next_sibling)
\r
5747 if ((i->header & impl::xml_memory_page_type_mask) + 1 == node_element)
\r
5748 return xml_node(i);
\r
5750 return xml_node();
\r
5753 #ifndef PUGIXML_NO_STL
\r
5754 PUGI__FN std::string PUGIXML_FUNCTION as_utf8(const wchar_t* str)
\r
5758 return impl::as_utf8_impl(str, impl::strlength_wide(str));
\r
5761 PUGI__FN std::string PUGIXML_FUNCTION as_utf8(const std::basic_string<wchar_t>& str)
\r
5763 return impl::as_utf8_impl(str.c_str(), str.size());
\r
5766 PUGI__FN std::basic_string<wchar_t> PUGIXML_FUNCTION as_wide(const char* str)
\r
5770 return impl::as_wide_impl(str, strlen(str));
\r
5773 PUGI__FN std::basic_string<wchar_t> PUGIXML_FUNCTION as_wide(const std::string& str)
\r
5775 return impl::as_wide_impl(str.c_str(), str.size());
\r
5779 PUGI__FN void PUGIXML_FUNCTION set_memory_management_functions(allocation_function allocate, deallocation_function deallocate)
\r
5781 impl::xml_memory::allocate = allocate;
\r
5782 impl::xml_memory::deallocate = deallocate;
\r
5785 PUGI__FN allocation_function PUGIXML_FUNCTION get_memory_allocation_function()
\r
5787 return impl::xml_memory::allocate;
\r
5790 PUGI__FN deallocation_function PUGIXML_FUNCTION get_memory_deallocation_function()
\r
5792 return impl::xml_memory::deallocate;
\r
5796 #if !defined(PUGIXML_NO_STL) && (defined(_MSC_VER) || defined(__ICC))
\r
5799 // Workarounds for (non-standard) iterator category detection for older versions (MSVC7/IC8 and earlier)
\r
5800 PUGI__FN std::bidirectional_iterator_tag _Iter_cat(const pugi::xml_node_iterator&)
\r
5802 return std::bidirectional_iterator_tag();
\r
5805 PUGI__FN std::bidirectional_iterator_tag _Iter_cat(const pugi::xml_attribute_iterator&)
\r
5807 return std::bidirectional_iterator_tag();
\r
5810 PUGI__FN std::bidirectional_iterator_tag _Iter_cat(const pugi::xml_named_node_iterator&)
\r
5812 return std::bidirectional_iterator_tag();
\r
5817 #if !defined(PUGIXML_NO_STL) && defined(__SUNPRO_CC)
\r
5820 // Workarounds for (non-standard) iterator category detection
\r
5821 PUGI__FN std::bidirectional_iterator_tag __iterator_category(const pugi::xml_node_iterator&)
\r
5823 return std::bidirectional_iterator_tag();
\r
5826 PUGI__FN std::bidirectional_iterator_tag __iterator_category(const pugi::xml_attribute_iterator&)
\r
5828 return std::bidirectional_iterator_tag();
\r
5831 PUGI__FN std::bidirectional_iterator_tag __iterator_category(const pugi::xml_named_node_iterator&)
\r
5833 return std::bidirectional_iterator_tag();
\r
5838 #ifndef PUGIXML_NO_XPATH
\r
5840 // STL replacements
\r
5844 template <typename T> bool operator()(const T& lhs, const T& rhs) const
\r
5846 return lhs == rhs;
\r
5850 struct not_equal_to
\r
5852 template <typename T> bool operator()(const T& lhs, const T& rhs) const
\r
5854 return lhs != rhs;
\r
5860 template <typename T> bool operator()(const T& lhs, const T& rhs) const
\r
5868 template <typename T> bool operator()(const T& lhs, const T& rhs) const
\r
5870 return lhs <= rhs;
\r
5874 template <typename T> void swap(T& lhs, T& rhs)
\r
5881 template <typename I, typename Pred> I min_element(I begin, I end, const Pred& pred)
\r
5885 for (I it = begin + 1; it != end; ++it)
\r
5886 if (pred(*it, *result))
\r
5892 template <typename I> void reverse(I begin, I end)
\r
5894 while (end - begin > 1) swap(*begin++, *--end);
\r
5897 template <typename I> I unique(I begin, I end)
\r
5900 while (end - begin > 1 && *begin != *(begin + 1)) begin++;
\r
5902 if (begin == end) return begin;
\r
5904 // last written element
\r
5905 I write = begin++;
\r
5907 // merge unique elements
\r
5908 while (begin != end)
\r
5910 if (*begin != *write)
\r
5911 *++write = *begin++;
\r
5916 // past-the-end (write points to live element)
\r
5920 template <typename I> void copy_backwards(I begin, I end, I target)
\r
5922 while (begin != end) *--target = *--end;
\r
5925 template <typename I, typename Pred, typename T> void insertion_sort(I begin, I end, const Pred& pred, T*)
\r
5927 assert(begin != end);
\r
5929 for (I it = begin + 1; it != end; ++it)
\r
5933 if (pred(val, *begin))
\r
5936 copy_backwards(begin, it, it + 1);
\r
5943 // move hole backwards
\r
5944 while (pred(val, *(hole - 1)))
\r
5946 *hole = *(hole - 1);
\r
5950 // fill hole with element
\r
5956 // std variant for elements with ==
\r
5957 template <typename I, typename Pred> void partition(I begin, I middle, I end, const Pred& pred, I* out_eqbeg, I* out_eqend)
\r
5959 I eqbeg = middle, eqend = middle + 1;
\r
5961 // expand equal range
\r
5962 while (eqbeg != begin && *(eqbeg - 1) == *eqbeg) --eqbeg;
\r
5963 while (eqend != end && *eqend == *eqbeg) ++eqend;
\r
5965 // process outer elements
\r
5966 I ltend = eqbeg, gtbeg = eqend;
\r
5970 // find the element from the right side that belongs to the left one
\r
5971 for (; gtbeg != end; ++gtbeg)
\r
5972 if (!pred(*eqbeg, *gtbeg))
\r
5974 if (*gtbeg == *eqbeg) swap(*gtbeg, *eqend++);
\r
5978 // find the element from the left side that belongs to the right one
\r
5979 for (; ltend != begin; --ltend)
\r
5980 if (!pred(*(ltend - 1), *eqbeg))
\r
5982 if (*eqbeg == *(ltend - 1)) swap(*(ltend - 1), *--eqbeg);
\r
5986 // scanned all elements
\r
5987 if (gtbeg == end && ltend == begin)
\r
5989 *out_eqbeg = eqbeg;
\r
5990 *out_eqend = eqend;
\r
5994 // make room for elements by moving equal area
\r
5997 if (--ltend != --eqbeg) swap(*ltend, *eqbeg);
\r
5998 swap(*eqbeg, *--eqend);
\r
6000 else if (ltend == begin)
\r
6002 if (eqend != gtbeg) swap(*eqbeg, *eqend);
\r
6004 swap(*gtbeg++, *eqbeg++);
\r
6006 else swap(*gtbeg++, *--ltend);
\r
6010 template <typename I, typename Pred> void median3(I first, I middle, I last, const Pred& pred)
\r
6012 if (pred(*middle, *first)) swap(*middle, *first);
\r
6013 if (pred(*last, *middle)) swap(*last, *middle);
\r
6014 if (pred(*middle, *first)) swap(*middle, *first);
\r
6017 template <typename I, typename Pred> void median(I first, I middle, I last, const Pred& pred)
\r
6019 if (last - first <= 40)
\r
6021 // median of three for small chunks
\r
6022 median3(first, middle, last, pred);
\r
6027 size_t step = (last - first + 1) / 8;
\r
6029 median3(first, first + step, first + 2 * step, pred);
\r
6030 median3(middle - step, middle, middle + step, pred);
\r
6031 median3(last - 2 * step, last - step, last, pred);
\r
6032 median3(first + step, middle, last - step, pred);
\r
6036 template <typename I, typename Pred> void sort(I begin, I end, const Pred& pred)
\r
6038 // sort large chunks
\r
6039 while (end - begin > 32)
\r
6041 // find median element
\r
6042 I middle = begin + (end - begin) / 2;
\r
6043 median(begin, middle, end - 1, pred);
\r
6045 // partition in three chunks (< = >)
\r
6047 partition(begin, middle, end, pred, &eqbeg, &eqend);
\r
6049 // loop on larger half
\r
6050 if (eqbeg - begin > end - eqend)
\r
6052 sort(eqend, end, pred);
\r
6057 sort(begin, eqbeg, pred);
\r
6062 // insertion sort small chunk
\r
6063 if (begin != end) insertion_sort(begin, end, pred, &*begin);
\r
6067 // Allocator used for AST and evaluation stacks
\r
6069 struct xpath_memory_block
\r
6071 xpath_memory_block* next;
\r
6074 #ifdef PUGIXML_MEMORY_XPATH_PAGE_SIZE
\r
6075 PUGIXML_MEMORY_XPATH_PAGE_SIZE
\r
6082 class xpath_allocator
\r
6084 xpath_memory_block* _root;
\r
6085 size_t _root_size;
\r
6088 #ifdef PUGIXML_NO_EXCEPTIONS
\r
6089 jmp_buf* error_handler;
\r
6092 xpath_allocator(xpath_memory_block* root, size_t root_size = 0): _root(root), _root_size(root_size)
\r
6094 #ifdef PUGIXML_NO_EXCEPTIONS
\r
6095 error_handler = 0;
\r
6099 void* allocate_nothrow(size_t size)
\r
6101 const size_t block_capacity = sizeof(_root->data);
\r
6103 // align size so that we're able to store pointers in subsequent blocks
\r
6104 size = (size + sizeof(void*) - 1) & ~(sizeof(void*) - 1);
\r
6106 if (_root_size + size <= block_capacity)
\r
6108 void* buf = _root->data + _root_size;
\r
6109 _root_size += size;
\r
6114 size_t block_data_size = (size > block_capacity) ? size : block_capacity;
\r
6115 size_t block_size = block_data_size + offsetof(xpath_memory_block, data);
\r
6117 xpath_memory_block* block = static_cast<xpath_memory_block*>(xml_memory::allocate(block_size));
\r
6118 if (!block) return 0;
\r
6120 block->next = _root;
\r
6123 _root_size = size;
\r
6125 return block->data;
\r
6129 void* allocate(size_t size)
\r
6131 void* result = allocate_nothrow(size);
\r
6135 #ifdef PUGIXML_NO_EXCEPTIONS
\r
6136 assert(error_handler);
\r
6137 longjmp(*error_handler, 1);
\r
6139 throw std::bad_alloc();
\r
6146 void* reallocate(void* ptr, size_t old_size, size_t new_size)
\r
6148 // align size so that we're able to store pointers in subsequent blocks
\r
6149 old_size = (old_size + sizeof(void*) - 1) & ~(sizeof(void*) - 1);
\r
6150 new_size = (new_size + sizeof(void*) - 1) & ~(sizeof(void*) - 1);
\r
6152 // we can only reallocate the last object
\r
6153 assert(ptr == 0 || static_cast<char*>(ptr) + old_size == _root->data + _root_size);
\r
6155 // adjust root size so that we have not allocated the object at all
\r
6156 bool only_object = (_root_size == old_size);
\r
6158 if (ptr) _root_size -= old_size;
\r
6160 // allocate a new version (this will obviously reuse the memory if possible)
\r
6161 void* result = allocate(new_size);
\r
6164 // we have a new block
\r
6165 if (result != ptr && ptr)
\r
6168 assert(new_size >= old_size);
\r
6169 memcpy(result, ptr, old_size);
\r
6171 // free the previous page if it had no other objects
\r
6174 assert(_root->data == result);
\r
6175 assert(_root->next);
\r
6177 xpath_memory_block* next = _root->next->next;
\r
6181 // deallocate the whole page, unless it was the first one
\r
6182 xml_memory::deallocate(_root->next);
\r
6183 _root->next = next;
\r
6191 void revert(const xpath_allocator& state)
\r
6193 // free all new pages
\r
6194 xpath_memory_block* cur = _root;
\r
6196 while (cur != state._root)
\r
6198 xpath_memory_block* next = cur->next;
\r
6200 xml_memory::deallocate(cur);
\r
6206 _root = state._root;
\r
6207 _root_size = state._root_size;
\r
6212 xpath_memory_block* cur = _root;
\r
6217 xpath_memory_block* next = cur->next;
\r
6219 xml_memory::deallocate(cur);
\r
6226 struct xpath_allocator_capture
\r
6228 xpath_allocator_capture(xpath_allocator* alloc): _target(alloc), _state(*alloc)
\r
6232 ~xpath_allocator_capture()
\r
6234 _target->revert(_state);
\r
6237 xpath_allocator* _target;
\r
6238 xpath_allocator _state;
\r
6241 struct xpath_stack
\r
6243 xpath_allocator* result;
\r
6244 xpath_allocator* temp;
\r
6247 struct xpath_stack_data
\r
6249 xpath_memory_block blocks[2];
\r
6250 xpath_allocator result;
\r
6251 xpath_allocator temp;
\r
6252 xpath_stack stack;
\r
6254 #ifdef PUGIXML_NO_EXCEPTIONS
\r
6255 jmp_buf error_handler;
\r
6258 xpath_stack_data(): result(blocks + 0), temp(blocks + 1)
\r
6260 blocks[0].next = blocks[1].next = 0;
\r
6262 stack.result = &result;
\r
6263 stack.temp = &temp;
\r
6265 #ifdef PUGIXML_NO_EXCEPTIONS
\r
6266 result.error_handler = temp.error_handler = &error_handler;
\r
6270 ~xpath_stack_data()
\r
6280 class xpath_string
\r
6282 const char_t* _buffer;
\r
6285 static char_t* duplicate_string(const char_t* string, size_t length, xpath_allocator* alloc)
\r
6287 char_t* result = static_cast<char_t*>(alloc->allocate((length + 1) * sizeof(char_t)));
\r
6290 memcpy(result, string, length * sizeof(char_t));
\r
6291 result[length] = 0;
\r
6296 static char_t* duplicate_string(const char_t* string, xpath_allocator* alloc)
\r
6298 return duplicate_string(string, strlength(string), alloc);
\r
6302 xpath_string(): _buffer(PUGIXML_TEXT("")), _uses_heap(false)
\r
6306 explicit xpath_string(const char_t* str, xpath_allocator* alloc)
\r
6308 bool empty_ = (*str == 0);
\r
6310 _buffer = empty_ ? PUGIXML_TEXT("") : duplicate_string(str, alloc);
\r
6311 _uses_heap = !empty_;
\r
6314 explicit xpath_string(const char_t* str, bool use_heap): _buffer(str), _uses_heap(use_heap)
\r
6318 xpath_string(const char_t* begin, const char_t* end, xpath_allocator* alloc)
\r
6320 assert(begin <= end);
\r
6322 bool empty_ = (begin == end);
\r
6324 _buffer = empty_ ? PUGIXML_TEXT("") : duplicate_string(begin, static_cast<size_t>(end - begin), alloc);
\r
6325 _uses_heap = !empty_;
\r
6328 void append(const xpath_string& o, xpath_allocator* alloc)
\r
6330 // skip empty sources
\r
6331 if (!*o._buffer) return;
\r
6333 // fast append for constant empty target and constant source
\r
6334 if (!*_buffer && !_uses_heap && !o._uses_heap)
\r
6336 _buffer = o._buffer;
\r
6340 // need to make heap copy
\r
6341 size_t target_length = strlength(_buffer);
\r
6342 size_t source_length = strlength(o._buffer);
\r
6343 size_t result_length = target_length + source_length;
\r
6345 // allocate new buffer
\r
6346 char_t* result = static_cast<char_t*>(alloc->reallocate(_uses_heap ? const_cast<char_t*>(_buffer) : 0, (target_length + 1) * sizeof(char_t), (result_length + 1) * sizeof(char_t)));
\r
6349 // append first string to the new buffer in case there was no reallocation
\r
6350 if (!_uses_heap) memcpy(result, _buffer, target_length * sizeof(char_t));
\r
6352 // append second string to the new buffer
\r
6353 memcpy(result + target_length, o._buffer, source_length * sizeof(char_t));
\r
6354 result[result_length] = 0;
\r
6358 _uses_heap = true;
\r
6362 const char_t* c_str() const
\r
6367 size_t length() const
\r
6369 return strlength(_buffer);
\r
6372 char_t* data(xpath_allocator* alloc)
\r
6374 // make private heap copy
\r
6377 _buffer = duplicate_string(_buffer, alloc);
\r
6378 _uses_heap = true;
\r
6381 return const_cast<char_t*>(_buffer);
\r
6384 bool empty() const
\r
6386 return *_buffer == 0;
\r
6389 bool operator==(const xpath_string& o) const
\r
6391 return strequal(_buffer, o._buffer);
\r
6394 bool operator!=(const xpath_string& o) const
\r
6396 return !strequal(_buffer, o._buffer);
\r
6399 bool uses_heap() const
\r
6401 return _uses_heap;
\r
6405 PUGI__FN xpath_string xpath_string_const(const char_t* str)
\r
6407 return xpath_string(str, false);
\r
6412 PUGI__FN bool starts_with(const char_t* string, const char_t* pattern)
\r
6414 while (*pattern && *string == *pattern)
\r
6420 return *pattern == 0;
\r
6423 PUGI__FN const char_t* find_char(const char_t* s, char_t c)
\r
6425 #ifdef PUGIXML_WCHAR_MODE
\r
6426 return wcschr(s, c);
\r
6428 return strchr(s, c);
\r
6432 PUGI__FN const char_t* find_substring(const char_t* s, const char_t* p)
\r
6434 #ifdef PUGIXML_WCHAR_MODE
\r
6435 // MSVC6 wcsstr bug workaround (if s is empty it always returns 0)
\r
6436 return (*p == 0) ? s : wcsstr(s, p);
\r
6438 return strstr(s, p);
\r
6442 // Converts symbol to lower case, if it is an ASCII one
\r
6443 PUGI__FN char_t tolower_ascii(char_t ch)
\r
6445 return static_cast<unsigned int>(ch - 'A') < 26 ? static_cast<char_t>(ch | ' ') : ch;
\r
6448 PUGI__FN xpath_string string_value(const xpath_node& na, xpath_allocator* alloc)
\r
6450 if (na.attribute())
\r
6451 return xpath_string_const(na.attribute().value());
\r
6454 const xml_node& n = na.node();
\r
6460 case node_comment:
\r
6462 return xpath_string_const(n.value());
\r
6464 case node_document:
\r
6465 case node_element:
\r
6467 xpath_string result;
\r
6469 xml_node cur = n.first_child();
\r
6471 while (cur && cur != n)
\r
6473 if (cur.type() == node_pcdata || cur.type() == node_cdata)
\r
6474 result.append(xpath_string_const(cur.value()), alloc);
\r
6476 if (cur.first_child())
\r
6477 cur = cur.first_child();
\r
6478 else if (cur.next_sibling())
\r
6479 cur = cur.next_sibling();
\r
6482 while (!cur.next_sibling() && cur != n)
\r
6483 cur = cur.parent();
\r
6485 if (cur != n) cur = cur.next_sibling();
\r
6493 return xpath_string();
\r
6498 PUGI__FN unsigned int node_height(xml_node n)
\r
6500 unsigned int result = 0;
\r
6511 PUGI__FN bool node_is_before(xml_node ln, unsigned int lh, xml_node rn, unsigned int rh)
\r
6513 // normalize heights
\r
6514 for (unsigned int i = rh; i < lh; i++) ln = ln.parent();
\r
6515 for (unsigned int j = lh; j < rh; j++) rn = rn.parent();
\r
6517 // one node is the ancestor of the other
\r
6518 if (ln == rn) return lh < rh;
\r
6520 // find common ancestor
\r
6521 while (ln.parent() != rn.parent())
\r
6527 // there is no common ancestor (the shared parent is null), nodes are from different documents
\r
6528 if (!ln.parent()) return ln < rn;
\r
6530 // determine sibling order
\r
6531 for (; ln; ln = ln.next_sibling())
\r
6538 PUGI__FN bool node_is_ancestor(xml_node parent, xml_node node)
\r
6540 while (node && node != parent) node = node.parent();
\r
6542 return parent && node == parent;
\r
6545 PUGI__FN const void* document_order(const xpath_node& xnode)
\r
6547 xml_node_struct* node = xnode.node().internal_object();
\r
6551 if (node->name && (node->header & xml_memory_page_name_allocated_mask) == 0) return node->name;
\r
6552 if (node->value && (node->header & xml_memory_page_value_allocated_mask) == 0) return node->value;
\r
6556 xml_attribute_struct* attr = xnode.attribute().internal_object();
\r
6560 if ((attr->header & xml_memory_page_name_allocated_mask) == 0) return attr->name;
\r
6561 if ((attr->header & xml_memory_page_value_allocated_mask) == 0) return attr->value;
\r
6568 struct document_order_comparator
\r
6570 bool operator()(const xpath_node& lhs, const xpath_node& rhs) const
\r
6572 // optimized document order based check
\r
6573 const void* lo = document_order(lhs);
\r
6574 const void* ro = document_order(rhs);
\r
6576 if (lo && ro) return lo < ro;
\r
6578 // slow comparison
\r
6579 xml_node ln = lhs.node(), rn = rhs.node();
\r
6581 // compare attributes
\r
6582 if (lhs.attribute() && rhs.attribute())
\r
6585 if (lhs.parent() == rhs.parent())
\r
6587 // determine sibling order
\r
6588 for (xml_attribute a = lhs.attribute(); a; a = a.next_attribute())
\r
6589 if (a == rhs.attribute())
\r
6595 // compare attribute parents
\r
6596 ln = lhs.parent();
\r
6597 rn = rhs.parent();
\r
6599 else if (lhs.attribute())
\r
6601 // attributes go after the parent element
\r
6602 if (lhs.parent() == rhs.node()) return false;
\r
6604 ln = lhs.parent();
\r
6606 else if (rhs.attribute())
\r
6608 // attributes go after the parent element
\r
6609 if (rhs.parent() == lhs.node()) return true;
\r
6611 rn = rhs.parent();
\r
6614 if (ln == rn) return false;
\r
6616 unsigned int lh = node_height(ln);
\r
6617 unsigned int rh = node_height(rn);
\r
6619 return node_is_before(ln, lh, rn, rh);
\r
6623 struct duplicate_comparator
\r
6625 bool operator()(const xpath_node& lhs, const xpath_node& rhs) const
\r
6627 if (lhs.attribute()) return rhs.attribute() ? lhs.attribute() < rhs.attribute() : true;
\r
6628 else return rhs.attribute() ? false : lhs.node() < rhs.node();
\r
6632 PUGI__FN double gen_nan()
\r
6634 #if defined(__STDC_IEC_559__) || ((FLT_RADIX - 0 == 2) && (FLT_MAX_EXP - 0 == 128) && (FLT_MANT_DIG - 0 == 24))
\r
6635 union { float f; uint32_t i; } u[sizeof(float) == sizeof(uint32_t) ? 1 : -1];
\r
6636 u[0].i = 0x7fc00000;
\r
6640 const volatile double zero = 0.0;
\r
6641 return zero / zero;
\r
6645 PUGI__FN bool is_nan(double value)
\r
6647 #if defined(PUGI__MSVC_CRT_VERSION) || defined(__BORLANDC__)
\r
6648 return !!_isnan(value);
\r
6649 #elif defined(fpclassify) && defined(FP_NAN)
\r
6650 return fpclassify(value) == FP_NAN;
\r
6653 const volatile double v = value;
\r
6658 PUGI__FN const char_t* convert_number_to_string_special(double value)
\r
6660 #if defined(PUGI__MSVC_CRT_VERSION) || defined(__BORLANDC__)
\r
6661 if (_finite(value)) return (value == 0) ? PUGIXML_TEXT("0") : 0;
\r
6662 if (_isnan(value)) return PUGIXML_TEXT("NaN");
\r
6663 return value > 0 ? PUGIXML_TEXT("Infinity") : PUGIXML_TEXT("-Infinity");
\r
6664 #elif defined(fpclassify) && defined(FP_NAN) && defined(FP_INFINITE) && defined(FP_ZERO)
\r
6665 switch (fpclassify(value))
\r
6668 return PUGIXML_TEXT("NaN");
\r
6671 return value > 0 ? PUGIXML_TEXT("Infinity") : PUGIXML_TEXT("-Infinity");
\r
6674 return PUGIXML_TEXT("0");
\r
6681 const volatile double v = value;
\r
6683 if (v == 0) return PUGIXML_TEXT("0");
\r
6684 if (v != v) return PUGIXML_TEXT("NaN");
\r
6685 if (v * 2 == v) return value > 0 ? PUGIXML_TEXT("Infinity") : PUGIXML_TEXT("-Infinity");
\r
6690 PUGI__FN bool convert_number_to_boolean(double value)
\r
6692 return (value != 0 && !is_nan(value));
\r
6695 PUGI__FN void truncate_zeros(char* begin, char* end)
\r
6697 while (begin != end && end[-1] == '0') end--;
\r
6702 // gets mantissa digits in the form of 0.xxxxx with 0. implied and the exponent
\r
6703 #if defined(PUGI__MSVC_CRT_VERSION) && PUGI__MSVC_CRT_VERSION >= 1400 && !defined(_WIN32_WCE)
\r
6704 PUGI__FN void convert_number_to_mantissa_exponent(double value, char* buffer, size_t buffer_size, char** out_mantissa, int* out_exponent)
\r
6706 // get base values
\r
6707 int sign, exponent;
\r
6708 _ecvt_s(buffer, buffer_size, value, DBL_DIG + 1, &exponent, &sign);
\r
6710 // truncate redundant zeros
\r
6711 truncate_zeros(buffer, buffer + strlen(buffer));
\r
6714 *out_mantissa = buffer;
\r
6715 *out_exponent = exponent;
\r
6718 PUGI__FN void convert_number_to_mantissa_exponent(double value, char* buffer, size_t buffer_size, char** out_mantissa, int* out_exponent)
\r
6720 // get a scientific notation value with IEEE DBL_DIG decimals
\r
6721 sprintf(buffer, "%.*e", DBL_DIG, value);
\r
6722 assert(strlen(buffer) < buffer_size);
\r
6723 (void)!buffer_size;
\r
6725 // get the exponent (possibly negative)
\r
6726 char* exponent_string = strchr(buffer, 'e');
\r
6727 assert(exponent_string);
\r
6729 int exponent = atoi(exponent_string + 1);
\r
6731 // extract mantissa string: skip sign
\r
6732 char* mantissa = buffer[0] == '-' ? buffer + 1 : buffer;
\r
6733 assert(mantissa[0] != '0' && mantissa[1] == '.');
\r
6735 // divide mantissa by 10 to eliminate integer part
\r
6736 mantissa[1] = mantissa[0];
\r
6740 // remove extra mantissa digits and zero-terminate mantissa
\r
6741 truncate_zeros(mantissa, exponent_string);
\r
6744 *out_mantissa = mantissa;
\r
6745 *out_exponent = exponent;
\r
6749 PUGI__FN xpath_string convert_number_to_string(double value, xpath_allocator* alloc)
\r
6751 // try special number conversion
\r
6752 const char_t* special = convert_number_to_string_special(value);
\r
6753 if (special) return xpath_string_const(special);
\r
6755 // get mantissa + exponent form
\r
6756 char mantissa_buffer[32];
\r
6760 convert_number_to_mantissa_exponent(value, mantissa_buffer, sizeof(mantissa_buffer), &mantissa, &exponent);
\r
6762 // allocate a buffer of suitable length for the number
\r
6763 size_t result_size = strlen(mantissa_buffer) + (exponent > 0 ? exponent : -exponent) + 4;
\r
6764 char_t* result = static_cast<char_t*>(alloc->allocate(sizeof(char_t) * result_size));
\r
6767 // make the number!
\r
6768 char_t* s = result;
\r
6771 if (value < 0) *s++ = '-';
\r
6774 if (exponent <= 0)
\r
6780 while (exponent > 0)
\r
6782 assert(*mantissa == 0 || static_cast<unsigned int>(static_cast<unsigned int>(*mantissa) - '0') <= 9);
\r
6783 *s++ = *mantissa ? *mantissa++ : '0';
\r
6788 // fractional part
\r
6794 // extra zeroes from negative exponent
\r
6795 while (exponent < 0)
\r
6801 // extra mantissa digits
\r
6804 assert(static_cast<unsigned int>(*mantissa - '0') <= 9);
\r
6805 *s++ = *mantissa++;
\r
6810 assert(s < result + result_size);
\r
6813 return xpath_string(result, true);
\r
6816 PUGI__FN bool check_string_to_number_format(const char_t* string)
\r
6818 // parse leading whitespace
\r
6819 while (PUGI__IS_CHARTYPE(*string, ct_space)) ++string;
\r
6822 if (*string == '-') ++string;
\r
6824 if (!*string) return false;
\r
6826 // if there is no integer part, there should be a decimal part with at least one digit
\r
6827 if (!PUGI__IS_CHARTYPEX(string[0], ctx_digit) && (string[0] != '.' || !PUGI__IS_CHARTYPEX(string[1], ctx_digit))) return false;
\r
6829 // parse integer part
\r
6830 while (PUGI__IS_CHARTYPEX(*string, ctx_digit)) ++string;
\r
6832 // parse decimal part
\r
6833 if (*string == '.')
\r
6837 while (PUGI__IS_CHARTYPEX(*string, ctx_digit)) ++string;
\r
6840 // parse trailing whitespace
\r
6841 while (PUGI__IS_CHARTYPE(*string, ct_space)) ++string;
\r
6843 return *string == 0;
\r
6846 PUGI__FN double convert_string_to_number(const char_t* string)
\r
6848 // check string format
\r
6849 if (!check_string_to_number_format(string)) return gen_nan();
\r
6852 #ifdef PUGIXML_WCHAR_MODE
\r
6853 return wcstod(string, 0);
\r
6855 return atof(string);
\r
6859 PUGI__FN bool convert_string_to_number_scratch(char_t (&buffer)[32], const char_t* begin, const char_t* end, double* out_result)
\r
6861 size_t length = static_cast<size_t>(end - begin);
\r
6862 char_t* scratch = buffer;
\r
6864 if (length >= sizeof(buffer) / sizeof(buffer[0]))
\r
6866 // need to make dummy on-heap copy
\r
6867 scratch = static_cast<char_t*>(xml_memory::allocate((length + 1) * sizeof(char_t)));
\r
6868 if (!scratch) return false;
\r
6871 // copy string to zero-terminated buffer and perform conversion
\r
6872 memcpy(scratch, begin, length * sizeof(char_t));
\r
6873 scratch[length] = 0;
\r
6875 *out_result = convert_string_to_number(scratch);
\r
6877 // free dummy buffer
\r
6878 if (scratch != buffer) xml_memory::deallocate(scratch);
\r
6883 PUGI__FN double round_nearest(double value)
\r
6885 return floor(value + 0.5);
\r
6888 PUGI__FN double round_nearest_nzero(double value)
\r
6890 // same as round_nearest, but returns -0 for [-0.5, -0]
\r
6891 // ceil is used to differentiate between +0 and -0 (we return -0 for [-0.5, -0] and +0 for +0)
\r
6892 return (value >= -0.5 && value <= 0) ? ceil(value) : floor(value + 0.5);
\r
6895 PUGI__FN const char_t* qualified_name(const xpath_node& node)
\r
6897 return node.attribute() ? node.attribute().name() : node.node().name();
\r
6900 PUGI__FN const char_t* local_name(const xpath_node& node)
\r
6902 const char_t* name = qualified_name(node);
\r
6903 const char_t* p = find_char(name, ':');
\r
6905 return p ? p + 1 : name;
\r
6908 struct namespace_uri_predicate
\r
6910 const char_t* prefix;
\r
6911 size_t prefix_length;
\r
6913 namespace_uri_predicate(const char_t* name)
\r
6915 const char_t* pos = find_char(name, ':');
\r
6917 prefix = pos ? name : 0;
\r
6918 prefix_length = pos ? static_cast<size_t>(pos - name) : 0;
\r
6921 bool operator()(const xml_attribute& a) const
\r
6923 const char_t* name = a.name();
\r
6925 if (!starts_with(name, PUGIXML_TEXT("xmlns"))) return false;
\r
6927 return prefix ? name[5] == ':' && strequalrange(name + 6, prefix, prefix_length) : name[5] == 0;
\r
6931 PUGI__FN const char_t* namespace_uri(const xml_node& node)
\r
6933 namespace_uri_predicate pred = node.name();
\r
6935 xml_node p = node;
\r
6939 xml_attribute a = p.find_attribute(pred);
\r
6941 if (a) return a.value();
\r
6946 return PUGIXML_TEXT("");
\r
6949 PUGI__FN const char_t* namespace_uri(const xml_attribute& attr, const xml_node& parent)
\r
6951 namespace_uri_predicate pred = attr.name();
\r
6953 // Default namespace does not apply to attributes
\r
6954 if (!pred.prefix) return PUGIXML_TEXT("");
\r
6956 xml_node p = parent;
\r
6960 xml_attribute a = p.find_attribute(pred);
\r
6962 if (a) return a.value();
\r
6967 return PUGIXML_TEXT("");
\r
6970 PUGI__FN const char_t* namespace_uri(const xpath_node& node)
\r
6972 return node.attribute() ? namespace_uri(node.attribute(), node.parent()) : namespace_uri(node.node());
\r
6975 PUGI__FN void normalize_space(char_t* buffer)
\r
6977 char_t* write = buffer;
\r
6979 for (char_t* it = buffer; *it; )
\r
6981 char_t ch = *it++;
\r
6983 if (PUGI__IS_CHARTYPE(ch, ct_space))
\r
6985 // replace whitespace sequence with single space
\r
6986 while (PUGI__IS_CHARTYPE(*it, ct_space)) it++;
\r
6988 // avoid leading spaces
\r
6989 if (write != buffer) *write++ = ' ';
\r
6991 else *write++ = ch;
\r
6994 // remove trailing space
\r
6995 if (write != buffer && PUGI__IS_CHARTYPE(write[-1], ct_space)) write--;
\r
7001 PUGI__FN void translate(char_t* buffer, const char_t* from, const char_t* to)
\r
7003 size_t to_length = strlength(to);
\r
7005 char_t* write = buffer;
\r
7009 PUGI__DMC_VOLATILE char_t ch = *buffer++;
\r
7011 const char_t* pos = find_char(from, ch);
\r
7014 *write++ = ch; // do not process
\r
7015 else if (static_cast<size_t>(pos - from) < to_length)
\r
7016 *write++ = to[pos - from]; // replace
\r
7023 struct xpath_variable_boolean: xpath_variable
\r
7025 xpath_variable_boolean(): value(false)
\r
7033 struct xpath_variable_number: xpath_variable
\r
7035 xpath_variable_number(): value(0)
\r
7043 struct xpath_variable_string: xpath_variable
\r
7045 xpath_variable_string(): value(0)
\r
7049 ~xpath_variable_string()
\r
7051 if (value) xml_memory::deallocate(value);
\r
7058 struct xpath_variable_node_set: xpath_variable
\r
7060 xpath_node_set value;
\r
7064 static const xpath_node_set dummy_node_set;
\r
7066 PUGI__FN unsigned int hash_string(const char_t* str)
\r
7068 // Jenkins one-at-a-time hash (http://en.wikipedia.org/wiki/Jenkins_hash_function#one-at-a-time)
\r
7069 unsigned int result = 0;
\r
7073 result += static_cast<unsigned int>(*str++);
\r
7074 result += result << 10;
\r
7075 result ^= result >> 6;
\r
7078 result += result << 3;
\r
7079 result ^= result >> 11;
\r
7080 result += result << 15;
\r
7085 template <typename T> PUGI__FN T* new_xpath_variable(const char_t* name)
\r
7087 size_t length = strlength(name);
\r
7088 if (length == 0) return 0; // empty variable names are invalid
\r
7090 // $$ we can't use offsetof(T, name) because T is non-POD, so we just allocate additional length characters
\r
7091 void* memory = xml_memory::allocate(sizeof(T) + length * sizeof(char_t));
\r
7092 if (!memory) return 0;
\r
7094 T* result = new (memory) T();
\r
7096 memcpy(result->name, name, (length + 1) * sizeof(char_t));
\r
7101 PUGI__FN xpath_variable* new_xpath_variable(xpath_value_type type, const char_t* name)
\r
7105 case xpath_type_node_set:
\r
7106 return new_xpath_variable<xpath_variable_node_set>(name);
\r
7108 case xpath_type_number:
\r
7109 return new_xpath_variable<xpath_variable_number>(name);
\r
7111 case xpath_type_string:
\r
7112 return new_xpath_variable<xpath_variable_string>(name);
\r
7114 case xpath_type_boolean:
\r
7115 return new_xpath_variable<xpath_variable_boolean>(name);
\r
7122 template <typename T> PUGI__FN void delete_xpath_variable(T* var)
\r
7125 xml_memory::deallocate(var);
\r
7128 PUGI__FN void delete_xpath_variable(xpath_value_type type, xpath_variable* var)
\r
7132 case xpath_type_node_set:
\r
7133 delete_xpath_variable(static_cast<xpath_variable_node_set*>(var));
\r
7136 case xpath_type_number:
\r
7137 delete_xpath_variable(static_cast<xpath_variable_number*>(var));
\r
7140 case xpath_type_string:
\r
7141 delete_xpath_variable(static_cast<xpath_variable_string*>(var));
\r
7144 case xpath_type_boolean:
\r
7145 delete_xpath_variable(static_cast<xpath_variable_boolean*>(var));
\r
7149 assert(!"Invalid variable type");
\r
7153 PUGI__FN xpath_variable* get_variable_scratch(char_t (&buffer)[32], xpath_variable_set* set, const char_t* begin, const char_t* end)
\r
7155 size_t length = static_cast<size_t>(end - begin);
\r
7156 char_t* scratch = buffer;
\r
7158 if (length >= sizeof(buffer) / sizeof(buffer[0]))
\r
7160 // need to make dummy on-heap copy
\r
7161 scratch = static_cast<char_t*>(xml_memory::allocate((length + 1) * sizeof(char_t)));
\r
7162 if (!scratch) return 0;
\r
7165 // copy string to zero-terminated buffer and perform lookup
\r
7166 memcpy(scratch, begin, length * sizeof(char_t));
\r
7167 scratch[length] = 0;
\r
7169 xpath_variable* result = set->get(scratch);
\r
7171 // free dummy buffer
\r
7172 if (scratch != buffer) xml_memory::deallocate(scratch);
\r
7178 // Internal node set class
\r
7180 PUGI__FN xpath_node_set::type_t xpath_sort(xpath_node* begin, xpath_node* end, xpath_node_set::type_t type, bool rev)
\r
7182 xpath_node_set::type_t order = rev ? xpath_node_set::type_sorted_reverse : xpath_node_set::type_sorted;
\r
7184 if (type == xpath_node_set::type_unsorted)
\r
7186 sort(begin, end, document_order_comparator());
\r
7188 type = xpath_node_set::type_sorted;
\r
7191 if (type != order) reverse(begin, end);
\r
7196 PUGI__FN xpath_node xpath_first(const xpath_node* begin, const xpath_node* end, xpath_node_set::type_t type)
\r
7198 if (begin == end) return xpath_node();
\r
7202 case xpath_node_set::type_sorted:
\r
7205 case xpath_node_set::type_sorted_reverse:
\r
7206 return *(end - 1);
\r
7208 case xpath_node_set::type_unsorted:
\r
7209 return *min_element(begin, end, document_order_comparator());
\r
7212 assert(!"Invalid node set type");
\r
7213 return xpath_node();
\r
7217 class xpath_node_set_raw
\r
7219 xpath_node_set::type_t _type;
\r
7221 xpath_node* _begin;
\r
7226 xpath_node_set_raw(): _type(xpath_node_set::type_unsorted), _begin(0), _end(0), _eos(0)
\r
7230 xpath_node* begin() const
\r
7235 xpath_node* end() const
\r
7240 bool empty() const
\r
7242 return _begin == _end;
\r
7245 size_t size() const
\r
7247 return static_cast<size_t>(_end - _begin);
\r
7250 xpath_node first() const
\r
7252 return xpath_first(_begin, _end, _type);
\r
7255 void push_back(const xpath_node& node, xpath_allocator* alloc)
\r
7259 size_t capacity = static_cast<size_t>(_eos - _begin);
\r
7261 // get new capacity (1.5x rule)
\r
7262 size_t new_capacity = capacity + capacity / 2 + 1;
\r
7264 // reallocate the old array or allocate a new one
\r
7265 xpath_node* data = static_cast<xpath_node*>(alloc->reallocate(_begin, capacity * sizeof(xpath_node), new_capacity * sizeof(xpath_node)));
\r
7270 _end = data + capacity;
\r
7271 _eos = data + new_capacity;
\r
7277 void append(const xpath_node* begin_, const xpath_node* end_, xpath_allocator* alloc)
\r
7279 size_t size_ = static_cast<size_t>(_end - _begin);
\r
7280 size_t capacity = static_cast<size_t>(_eos - _begin);
\r
7281 size_t count = static_cast<size_t>(end_ - begin_);
\r
7283 if (size_ + count > capacity)
\r
7285 // reallocate the old array or allocate a new one
\r
7286 xpath_node* data = static_cast<xpath_node*>(alloc->reallocate(_begin, capacity * sizeof(xpath_node), (size_ + count) * sizeof(xpath_node)));
\r
7291 _end = data + size_;
\r
7292 _eos = data + size_ + count;
\r
7295 memcpy(_end, begin_, count * sizeof(xpath_node));
\r
7301 _type = xpath_sort(_begin, _end, _type, false);
\r
7304 void truncate(xpath_node* pos)
\r
7306 assert(_begin <= pos && pos <= _end);
\r
7311 void remove_duplicates()
\r
7313 if (_type == xpath_node_set::type_unsorted)
\r
7314 sort(_begin, _end, duplicate_comparator());
\r
7316 _end = unique(_begin, _end);
\r
7319 xpath_node_set::type_t type() const
\r
7324 void set_type(xpath_node_set::type_t value)
\r
7332 struct xpath_context
\r
7335 size_t position, size;
\r
7337 xpath_context(const xpath_node& n_, size_t position_, size_t size_): n(n_), position(position_), size(size_)
\r
7349 lex_less_or_equal,
\r
7350 lex_greater_or_equal,
\r
7358 lex_quoted_string,
\r
7362 lex_open_square_brace,
\r
7363 lex_close_square_brace,
\r
7366 lex_axis_attribute,
\r
7373 struct xpath_lexer_string
\r
7375 const char_t* begin;
\r
7376 const char_t* end;
\r
7378 xpath_lexer_string(): begin(0), end(0)
\r
7382 bool operator==(const char_t* other) const
\r
7384 size_t length = static_cast<size_t>(end - begin);
\r
7386 return strequalrange(other, begin, length);
\r
7392 const char_t* _cur;
\r
7393 const char_t* _cur_lexeme_pos;
\r
7394 xpath_lexer_string _cur_lexeme_contents;
\r
7396 lexeme_t _cur_lexeme;
\r
7399 explicit xpath_lexer(const char_t* query): _cur(query)
\r
7404 const char_t* state() const
\r
7411 const char_t* cur = _cur;
\r
7413 while (PUGI__IS_CHARTYPE(*cur, ct_space)) ++cur;
\r
7415 // save lexeme position for error reporting
\r
7416 _cur_lexeme_pos = cur;
\r
7421 _cur_lexeme = lex_eof;
\r
7425 if (*(cur+1) == '=')
\r
7428 _cur_lexeme = lex_greater_or_equal;
\r
7433 _cur_lexeme = lex_greater;
\r
7438 if (*(cur+1) == '=')
\r
7441 _cur_lexeme = lex_less_or_equal;
\r
7446 _cur_lexeme = lex_less;
\r
7451 if (*(cur+1) == '=')
\r
7454 _cur_lexeme = lex_not_equal;
\r
7458 _cur_lexeme = lex_none;
\r
7464 _cur_lexeme = lex_equal;
\r
7470 _cur_lexeme = lex_plus;
\r
7476 _cur_lexeme = lex_minus;
\r
7482 _cur_lexeme = lex_multiply;
\r
7488 _cur_lexeme = lex_union;
\r
7495 if (PUGI__IS_CHARTYPEX(*cur, ctx_start_symbol))
\r
7497 _cur_lexeme_contents.begin = cur;
\r
7499 while (PUGI__IS_CHARTYPEX(*cur, ctx_symbol)) cur++;
\r
7501 if (cur[0] == ':' && PUGI__IS_CHARTYPEX(cur[1], ctx_symbol)) // qname
\r
7505 while (PUGI__IS_CHARTYPEX(*cur, ctx_symbol)) cur++;
\r
7508 _cur_lexeme_contents.end = cur;
\r
7510 _cur_lexeme = lex_var_ref;
\r
7514 _cur_lexeme = lex_none;
\r
7521 _cur_lexeme = lex_open_brace;
\r
7527 _cur_lexeme = lex_close_brace;
\r
7533 _cur_lexeme = lex_open_square_brace;
\r
7539 _cur_lexeme = lex_close_square_brace;
\r
7545 _cur_lexeme = lex_comma;
\r
7550 if (*(cur+1) == '/')
\r
7553 _cur_lexeme = lex_double_slash;
\r
7558 _cur_lexeme = lex_slash;
\r
7563 if (*(cur+1) == '.')
\r
7566 _cur_lexeme = lex_double_dot;
\r
7568 else if (PUGI__IS_CHARTYPEX(*(cur+1), ctx_digit))
\r
7570 _cur_lexeme_contents.begin = cur; // .
\r
7574 while (PUGI__IS_CHARTYPEX(*cur, ctx_digit)) cur++;
\r
7576 _cur_lexeme_contents.end = cur;
\r
7578 _cur_lexeme = lex_number;
\r
7583 _cur_lexeme = lex_dot;
\r
7589 _cur_lexeme = lex_axis_attribute;
\r
7596 char_t terminator = *cur;
\r
7600 _cur_lexeme_contents.begin = cur;
\r
7601 while (*cur && *cur != terminator) cur++;
\r
7602 _cur_lexeme_contents.end = cur;
\r
7605 _cur_lexeme = lex_none;
\r
7609 _cur_lexeme = lex_quoted_string;
\r
7616 if (*(cur+1) == ':')
\r
7619 _cur_lexeme = lex_double_colon;
\r
7623 _cur_lexeme = lex_none;
\r
7628 if (PUGI__IS_CHARTYPEX(*cur, ctx_digit))
\r
7630 _cur_lexeme_contents.begin = cur;
\r
7632 while (PUGI__IS_CHARTYPEX(*cur, ctx_digit)) cur++;
\r
7638 while (PUGI__IS_CHARTYPEX(*cur, ctx_digit)) cur++;
\r
7641 _cur_lexeme_contents.end = cur;
\r
7643 _cur_lexeme = lex_number;
\r
7645 else if (PUGI__IS_CHARTYPEX(*cur, ctx_start_symbol))
\r
7647 _cur_lexeme_contents.begin = cur;
\r
7649 while (PUGI__IS_CHARTYPEX(*cur, ctx_symbol)) cur++;
\r
7651 if (cur[0] == ':')
\r
7653 if (cur[1] == '*') // namespace test ncname:*
\r
7657 else if (PUGI__IS_CHARTYPEX(cur[1], ctx_symbol)) // namespace test qname
\r
7661 while (PUGI__IS_CHARTYPEX(*cur, ctx_symbol)) cur++;
\r
7665 _cur_lexeme_contents.end = cur;
\r
7667 _cur_lexeme = lex_string;
\r
7671 _cur_lexeme = lex_none;
\r
7678 lexeme_t current() const
\r
7680 return _cur_lexeme;
\r
7683 const char_t* current_pos() const
\r
7685 return _cur_lexeme_pos;
\r
7688 const xpath_lexer_string& contents() const
\r
7690 assert(_cur_lexeme == lex_var_ref || _cur_lexeme == lex_number || _cur_lexeme == lex_string || _cur_lexeme == lex_quoted_string);
\r
7692 return _cur_lexeme_contents;
\r
7699 ast_op_or, // left or right
\r
7700 ast_op_and, // left and right
\r
7701 ast_op_equal, // left = right
\r
7702 ast_op_not_equal, // left != right
\r
7703 ast_op_less, // left < right
\r
7704 ast_op_greater, // left > right
\r
7705 ast_op_less_or_equal, // left <= right
\r
7706 ast_op_greater_or_equal, // left >= right
\r
7707 ast_op_add, // left + right
\r
7708 ast_op_subtract, // left - right
\r
7709 ast_op_multiply, // left * right
\r
7710 ast_op_divide, // left / right
\r
7711 ast_op_mod, // left % right
\r
7712 ast_op_negate, // left - right
\r
7713 ast_op_union, // left | right
\r
7714 ast_predicate, // apply predicate to set; next points to next predicate
\r
7715 ast_filter, // select * from left where right
\r
7716 ast_filter_posinv, // select * from left where right; proximity position invariant
\r
7717 ast_string_constant, // string constant
\r
7718 ast_number_constant, // number constant
\r
7719 ast_variable, // variable
\r
7720 ast_func_last, // last()
\r
7721 ast_func_position, // position()
\r
7722 ast_func_count, // count(left)
\r
7723 ast_func_id, // id(left)
\r
7724 ast_func_local_name_0, // local-name()
\r
7725 ast_func_local_name_1, // local-name(left)
\r
7726 ast_func_namespace_uri_0, // namespace-uri()
\r
7727 ast_func_namespace_uri_1, // namespace-uri(left)
\r
7728 ast_func_name_0, // name()
\r
7729 ast_func_name_1, // name(left)
\r
7730 ast_func_string_0, // string()
\r
7731 ast_func_string_1, // string(left)
\r
7732 ast_func_concat, // concat(left, right, siblings)
\r
7733 ast_func_starts_with, // starts_with(left, right)
\r
7734 ast_func_contains, // contains(left, right)
\r
7735 ast_func_substring_before, // substring-before(left, right)
\r
7736 ast_func_substring_after, // substring-after(left, right)
\r
7737 ast_func_substring_2, // substring(left, right)
\r
7738 ast_func_substring_3, // substring(left, right, third)
\r
7739 ast_func_string_length_0, // string-length()
\r
7740 ast_func_string_length_1, // string-length(left)
\r
7741 ast_func_normalize_space_0, // normalize-space()
\r
7742 ast_func_normalize_space_1, // normalize-space(left)
\r
7743 ast_func_translate, // translate(left, right, third)
\r
7744 ast_func_boolean, // boolean(left)
\r
7745 ast_func_not, // not(left)
\r
7746 ast_func_true, // true()
\r
7747 ast_func_false, // false()
\r
7748 ast_func_lang, // lang(left)
\r
7749 ast_func_number_0, // number()
\r
7750 ast_func_number_1, // number(left)
\r
7751 ast_func_sum, // sum(left)
\r
7752 ast_func_floor, // floor(left)
\r
7753 ast_func_ceiling, // ceiling(left)
\r
7754 ast_func_round, // round(left)
\r
7755 ast_step, // process set left with step
\r
7756 ast_step_root // select root node
\r
7762 axis_ancestor_or_self,
\r
7766 axis_descendant_or_self,
\r
7768 axis_following_sibling,
\r
7772 axis_preceding_sibling,
\r
7780 nodetest_type_node,
\r
7781 nodetest_type_comment,
\r
7783 nodetest_type_text,
\r
7786 nodetest_all_in_namespace
\r
7789 template <axis_t N> struct axis_to_type
\r
7791 static const axis_t axis;
\r
7794 template <axis_t N> const axis_t axis_to_type<N>::axis = N;
\r
7796 class xpath_ast_node
\r
7803 // for ast_step / ast_predicate
\r
7807 // tree node structure
\r
7808 xpath_ast_node* _left;
\r
7809 xpath_ast_node* _right;
\r
7810 xpath_ast_node* _next;
\r
7814 // value for ast_string_constant
\r
7815 const char_t* string;
\r
7816 // value for ast_number_constant
\r
7818 // variable for ast_variable
\r
7819 xpath_variable* variable;
\r
7820 // node test for ast_step (node name/namespace/node type/pi target)
\r
7821 const char_t* nodetest;
\r
7824 xpath_ast_node(const xpath_ast_node&);
\r
7825 xpath_ast_node& operator=(const xpath_ast_node&);
\r
7827 template <class Comp> static bool compare_eq(xpath_ast_node* lhs, xpath_ast_node* rhs, const xpath_context& c, const xpath_stack& stack, const Comp& comp)
\r
7829 xpath_value_type lt = lhs->rettype(), rt = rhs->rettype();
\r
7831 if (lt != xpath_type_node_set && rt != xpath_type_node_set)
\r
7833 if (lt == xpath_type_boolean || rt == xpath_type_boolean)
\r
7834 return comp(lhs->eval_boolean(c, stack), rhs->eval_boolean(c, stack));
\r
7835 else if (lt == xpath_type_number || rt == xpath_type_number)
\r
7836 return comp(lhs->eval_number(c, stack), rhs->eval_number(c, stack));
\r
7837 else if (lt == xpath_type_string || rt == xpath_type_string)
\r
7839 xpath_allocator_capture cr(stack.result);
\r
7841 xpath_string ls = lhs->eval_string(c, stack);
\r
7842 xpath_string rs = rhs->eval_string(c, stack);
\r
7844 return comp(ls, rs);
\r
7847 else if (lt == xpath_type_node_set && rt == xpath_type_node_set)
\r
7849 xpath_allocator_capture cr(stack.result);
\r
7851 xpath_node_set_raw ls = lhs->eval_node_set(c, stack);
\r
7852 xpath_node_set_raw rs = rhs->eval_node_set(c, stack);
\r
7854 for (const xpath_node* li = ls.begin(); li != ls.end(); ++li)
\r
7855 for (const xpath_node* ri = rs.begin(); ri != rs.end(); ++ri)
\r
7857 xpath_allocator_capture cri(stack.result);
\r
7859 if (comp(string_value(*li, stack.result), string_value(*ri, stack.result)))
\r
7867 if (lt == xpath_type_node_set)
\r
7873 if (lt == xpath_type_boolean)
\r
7874 return comp(lhs->eval_boolean(c, stack), rhs->eval_boolean(c, stack));
\r
7875 else if (lt == xpath_type_number)
\r
7877 xpath_allocator_capture cr(stack.result);
\r
7879 double l = lhs->eval_number(c, stack);
\r
7880 xpath_node_set_raw rs = rhs->eval_node_set(c, stack);
\r
7882 for (const xpath_node* ri = rs.begin(); ri != rs.end(); ++ri)
\r
7884 xpath_allocator_capture cri(stack.result);
\r
7886 if (comp(l, convert_string_to_number(string_value(*ri, stack.result).c_str())))
\r
7892 else if (lt == xpath_type_string)
\r
7894 xpath_allocator_capture cr(stack.result);
\r
7896 xpath_string l = lhs->eval_string(c, stack);
\r
7897 xpath_node_set_raw rs = rhs->eval_node_set(c, stack);
\r
7899 for (const xpath_node* ri = rs.begin(); ri != rs.end(); ++ri)
\r
7901 xpath_allocator_capture cri(stack.result);
\r
7903 if (comp(l, string_value(*ri, stack.result)))
\r
7911 assert(!"Wrong types");
\r
7915 template <class Comp> static bool compare_rel(xpath_ast_node* lhs, xpath_ast_node* rhs, const xpath_context& c, const xpath_stack& stack, const Comp& comp)
\r
7917 xpath_value_type lt = lhs->rettype(), rt = rhs->rettype();
\r
7919 if (lt != xpath_type_node_set && rt != xpath_type_node_set)
\r
7920 return comp(lhs->eval_number(c, stack), rhs->eval_number(c, stack));
\r
7921 else if (lt == xpath_type_node_set && rt == xpath_type_node_set)
\r
7923 xpath_allocator_capture cr(stack.result);
\r
7925 xpath_node_set_raw ls = lhs->eval_node_set(c, stack);
\r
7926 xpath_node_set_raw rs = rhs->eval_node_set(c, stack);
\r
7928 for (const xpath_node* li = ls.begin(); li != ls.end(); ++li)
\r
7930 xpath_allocator_capture cri(stack.result);
\r
7932 double l = convert_string_to_number(string_value(*li, stack.result).c_str());
\r
7934 for (const xpath_node* ri = rs.begin(); ri != rs.end(); ++ri)
\r
7936 xpath_allocator_capture crii(stack.result);
\r
7938 if (comp(l, convert_string_to_number(string_value(*ri, stack.result).c_str())))
\r
7945 else if (lt != xpath_type_node_set && rt == xpath_type_node_set)
\r
7947 xpath_allocator_capture cr(stack.result);
\r
7949 double l = lhs->eval_number(c, stack);
\r
7950 xpath_node_set_raw rs = rhs->eval_node_set(c, stack);
\r
7952 for (const xpath_node* ri = rs.begin(); ri != rs.end(); ++ri)
\r
7954 xpath_allocator_capture cri(stack.result);
\r
7956 if (comp(l, convert_string_to_number(string_value(*ri, stack.result).c_str())))
\r
7962 else if (lt == xpath_type_node_set && rt != xpath_type_node_set)
\r
7964 xpath_allocator_capture cr(stack.result);
\r
7966 xpath_node_set_raw ls = lhs->eval_node_set(c, stack);
\r
7967 double r = rhs->eval_number(c, stack);
\r
7969 for (const xpath_node* li = ls.begin(); li != ls.end(); ++li)
\r
7971 xpath_allocator_capture cri(stack.result);
\r
7973 if (comp(convert_string_to_number(string_value(*li, stack.result).c_str()), r))
\r
7981 assert(!"Wrong types");
\r
7986 void apply_predicate(xpath_node_set_raw& ns, size_t first, xpath_ast_node* expr, const xpath_stack& stack)
\r
7988 assert(ns.size() >= first);
\r
7991 size_t size = ns.size() - first;
\r
7993 xpath_node* last = ns.begin() + first;
\r
7995 // remove_if... or well, sort of
\r
7996 for (xpath_node* it = last; it != ns.end(); ++it, ++i)
\r
7998 xpath_context c(*it, i, size);
\r
8000 if (expr->rettype() == xpath_type_number)
\r
8002 if (expr->eval_number(c, stack) == i)
\r
8005 else if (expr->eval_boolean(c, stack))
\r
8009 ns.truncate(last);
\r
8012 void apply_predicates(xpath_node_set_raw& ns, size_t first, const xpath_stack& stack)
\r
8014 if (ns.size() == first) return;
\r
8016 for (xpath_ast_node* pred = _right; pred; pred = pred->_next)
\r
8018 apply_predicate(ns, first, pred->_left, stack);
\r
8022 void step_push(xpath_node_set_raw& ns, const xml_attribute& a, const xml_node& parent, xpath_allocator* alloc)
\r
8026 const char_t* name = a.name();
\r
8028 // There are no attribute nodes corresponding to attributes that declare namespaces
\r
8029 // That is, "xmlns:..." or "xmlns"
\r
8030 if (starts_with(name, PUGIXML_TEXT("xmlns")) && (name[5] == 0 || name[5] == ':')) return;
\r
8034 case nodetest_name:
\r
8035 if (strequal(name, _data.nodetest)) ns.push_back(xpath_node(a, parent), alloc);
\r
8038 case nodetest_type_node:
\r
8039 case nodetest_all:
\r
8040 ns.push_back(xpath_node(a, parent), alloc);
\r
8043 case nodetest_all_in_namespace:
\r
8044 if (starts_with(name, _data.nodetest))
\r
8045 ns.push_back(xpath_node(a, parent), alloc);
\r
8053 void step_push(xpath_node_set_raw& ns, const xml_node& n, xpath_allocator* alloc)
\r
8059 case nodetest_name:
\r
8060 if (n.type() == node_element && strequal(n.name(), _data.nodetest)) ns.push_back(n, alloc);
\r
8063 case nodetest_type_node:
\r
8064 ns.push_back(n, alloc);
\r
8067 case nodetest_type_comment:
\r
8068 if (n.type() == node_comment)
\r
8069 ns.push_back(n, alloc);
\r
8072 case nodetest_type_text:
\r
8073 if (n.type() == node_pcdata || n.type() == node_cdata)
\r
8074 ns.push_back(n, alloc);
\r
8077 case nodetest_type_pi:
\r
8078 if (n.type() == node_pi)
\r
8079 ns.push_back(n, alloc);
\r
8083 if (n.type() == node_pi && strequal(n.name(), _data.nodetest))
\r
8084 ns.push_back(n, alloc);
\r
8087 case nodetest_all:
\r
8088 if (n.type() == node_element)
\r
8089 ns.push_back(n, alloc);
\r
8092 case nodetest_all_in_namespace:
\r
8093 if (n.type() == node_element && starts_with(n.name(), _data.nodetest))
\r
8094 ns.push_back(n, alloc);
\r
8098 assert(!"Unknown axis");
\r
8102 template <class T> void step_fill(xpath_node_set_raw& ns, const xml_node& n, xpath_allocator* alloc, T)
\r
8104 const axis_t axis = T::axis;
\r
8108 case axis_attribute:
\r
8110 for (xml_attribute a = n.first_attribute(); a; a = a.next_attribute())
\r
8111 step_push(ns, a, n, alloc);
\r
8118 for (xml_node c = n.first_child(); c; c = c.next_sibling())
\r
8119 step_push(ns, c, alloc);
\r
8124 case axis_descendant:
\r
8125 case axis_descendant_or_self:
\r
8127 if (axis == axis_descendant_or_self)
\r
8128 step_push(ns, n, alloc);
\r
8130 xml_node cur = n.first_child();
\r
8132 while (cur && cur != n)
\r
8134 step_push(ns, cur, alloc);
\r
8136 if (cur.first_child())
\r
8137 cur = cur.first_child();
\r
8138 else if (cur.next_sibling())
\r
8139 cur = cur.next_sibling();
\r
8142 while (!cur.next_sibling() && cur != n)
\r
8143 cur = cur.parent();
\r
8145 if (cur != n) cur = cur.next_sibling();
\r
8152 case axis_following_sibling:
\r
8154 for (xml_node c = n.next_sibling(); c; c = c.next_sibling())
\r
8155 step_push(ns, c, alloc);
\r
8160 case axis_preceding_sibling:
\r
8162 for (xml_node c = n.previous_sibling(); c; c = c.previous_sibling())
\r
8163 step_push(ns, c, alloc);
\r
8168 case axis_following:
\r
8172 // exit from this node so that we don't include descendants
\r
8173 while (cur && !cur.next_sibling()) cur = cur.parent();
\r
8174 cur = cur.next_sibling();
\r
8178 step_push(ns, cur, alloc);
\r
8180 if (cur.first_child())
\r
8181 cur = cur.first_child();
\r
8182 else if (cur.next_sibling())
\r
8183 cur = cur.next_sibling();
\r
8186 while (cur && !cur.next_sibling()) cur = cur.parent();
\r
8187 cur = cur.next_sibling();
\r
8196 case axis_preceding:
\r
8200 while (cur && !cur.previous_sibling()) cur = cur.parent();
\r
8201 cur = cur.previous_sibling();
\r
8205 if (cur.last_child())
\r
8206 cur = cur.last_child();
\r
8209 // leaf node, can't be ancestor
\r
8210 step_push(ns, cur, alloc);
\r
8212 if (cur.previous_sibling())
\r
8213 cur = cur.previous_sibling();
\r
8218 cur = cur.parent();
\r
8221 if (!node_is_ancestor(cur, n)) step_push(ns, cur, alloc);
\r
8223 while (!cur.previous_sibling());
\r
8225 cur = cur.previous_sibling();
\r
8235 case axis_ancestor:
\r
8236 case axis_ancestor_or_self:
\r
8238 if (axis == axis_ancestor_or_self)
\r
8239 step_push(ns, n, alloc);
\r
8241 xml_node cur = n.parent();
\r
8245 step_push(ns, cur, alloc);
\r
8247 cur = cur.parent();
\r
8255 step_push(ns, n, alloc);
\r
8262 if (n.parent()) step_push(ns, n.parent(), alloc);
\r
8268 assert(!"Unimplemented axis");
\r
8272 template <class T> void step_fill(xpath_node_set_raw& ns, const xml_attribute& a, const xml_node& p, xpath_allocator* alloc, T v)
\r
8274 const axis_t axis = T::axis;
\r
8278 case axis_ancestor:
\r
8279 case axis_ancestor_or_self:
\r
8281 if (axis == axis_ancestor_or_self && _test == nodetest_type_node) // reject attributes based on principal node type test
\r
8282 step_push(ns, a, p, alloc);
\r
8288 step_push(ns, cur, alloc);
\r
8290 cur = cur.parent();
\r
8296 case axis_descendant_or_self:
\r
8299 if (_test == nodetest_type_node) // reject attributes based on principal node type test
\r
8300 step_push(ns, a, p, alloc);
\r
8305 case axis_following:
\r
8311 if (cur.first_child())
\r
8312 cur = cur.first_child();
\r
8313 else if (cur.next_sibling())
\r
8314 cur = cur.next_sibling();
\r
8317 while (cur && !cur.next_sibling()) cur = cur.parent();
\r
8318 cur = cur.next_sibling();
\r
8323 step_push(ns, cur, alloc);
\r
8331 step_push(ns, p, alloc);
\r
8336 case axis_preceding:
\r
8338 // preceding:: axis does not include attribute nodes and attribute ancestors (they are the same as parent's ancestors), so we can reuse node preceding
\r
8339 step_fill(ns, p, alloc, v);
\r
8344 assert(!"Unimplemented axis");
\r
8348 template <class T> xpath_node_set_raw step_do(const xpath_context& c, const xpath_stack& stack, T v)
\r
8350 const axis_t axis = T::axis;
\r
8351 bool attributes = (axis == axis_ancestor || axis == axis_ancestor_or_self || axis == axis_descendant_or_self || axis == axis_following || axis == axis_parent || axis == axis_preceding || axis == axis_self);
\r
8353 xpath_node_set_raw ns;
\r
8354 ns.set_type((axis == axis_ancestor || axis == axis_ancestor_or_self || axis == axis_preceding || axis == axis_preceding_sibling) ? xpath_node_set::type_sorted_reverse : xpath_node_set::type_sorted);
\r
8358 xpath_node_set_raw s = _left->eval_node_set(c, stack);
\r
8360 // self axis preserves the original order
\r
8361 if (axis == axis_self) ns.set_type(s.type());
\r
8363 for (const xpath_node* it = s.begin(); it != s.end(); ++it)
\r
8365 size_t size = ns.size();
\r
8367 // in general, all axes generate elements in a particular order, but there is no order guarantee if axis is applied to two nodes
\r
8368 if (axis != axis_self && size != 0) ns.set_type(xpath_node_set::type_unsorted);
\r
8371 step_fill(ns, it->node(), stack.result, v);
\r
8372 else if (attributes)
\r
8373 step_fill(ns, it->attribute(), it->parent(), stack.result, v);
\r
8375 apply_predicates(ns, size, stack);
\r
8381 step_fill(ns, c.n.node(), stack.result, v);
\r
8382 else if (attributes)
\r
8383 step_fill(ns, c.n.attribute(), c.n.parent(), stack.result, v);
\r
8385 apply_predicates(ns, 0, stack);
\r
8388 // child, attribute and self axes always generate unique set of nodes
\r
8389 // for other axis, if the set stayed sorted, it stayed unique because the traversal algorithms do not visit the same node twice
\r
8390 if (axis != axis_child && axis != axis_attribute && axis != axis_self && ns.type() == xpath_node_set::type_unsorted)
\r
8391 ns.remove_duplicates();
\r
8397 xpath_ast_node(ast_type_t type, xpath_value_type rettype_, const char_t* value):
\r
8398 _type(static_cast<char>(type)), _rettype(static_cast<char>(rettype_)), _axis(0), _test(0), _left(0), _right(0), _next(0)
\r
8400 assert(type == ast_string_constant);
\r
8401 _data.string = value;
\r
8404 xpath_ast_node(ast_type_t type, xpath_value_type rettype_, double value):
\r
8405 _type(static_cast<char>(type)), _rettype(static_cast<char>(rettype_)), _axis(0), _test(0), _left(0), _right(0), _next(0)
\r
8407 assert(type == ast_number_constant);
\r
8408 _data.number = value;
\r
8411 xpath_ast_node(ast_type_t type, xpath_value_type rettype_, xpath_variable* value):
\r
8412 _type(static_cast<char>(type)), _rettype(static_cast<char>(rettype_)), _axis(0), _test(0), _left(0), _right(0), _next(0)
\r
8414 assert(type == ast_variable);
\r
8415 _data.variable = value;
\r
8418 xpath_ast_node(ast_type_t type, xpath_value_type rettype_, xpath_ast_node* left = 0, xpath_ast_node* right = 0):
\r
8419 _type(static_cast<char>(type)), _rettype(static_cast<char>(rettype_)), _axis(0), _test(0), _left(left), _right(right), _next(0)
\r
8423 xpath_ast_node(ast_type_t type, xpath_ast_node* left, axis_t axis, nodetest_t test, const char_t* contents):
\r
8424 _type(static_cast<char>(type)), _rettype(xpath_type_node_set), _axis(static_cast<char>(axis)), _test(static_cast<char>(test)), _left(left), _right(0), _next(0)
\r
8426 _data.nodetest = contents;
\r
8429 void set_next(xpath_ast_node* value)
\r
8434 void set_right(xpath_ast_node* value)
\r
8439 bool eval_boolean(const xpath_context& c, const xpath_stack& stack)
\r
8444 return _left->eval_boolean(c, stack) || _right->eval_boolean(c, stack);
\r
8447 return _left->eval_boolean(c, stack) && _right->eval_boolean(c, stack);
\r
8449 case ast_op_equal:
\r
8450 return compare_eq(_left, _right, c, stack, equal_to());
\r
8452 case ast_op_not_equal:
\r
8453 return compare_eq(_left, _right, c, stack, not_equal_to());
\r
8456 return compare_rel(_left, _right, c, stack, less());
\r
8458 case ast_op_greater:
\r
8459 return compare_rel(_right, _left, c, stack, less());
\r
8461 case ast_op_less_or_equal:
\r
8462 return compare_rel(_left, _right, c, stack, less_equal());
\r
8464 case ast_op_greater_or_equal:
\r
8465 return compare_rel(_right, _left, c, stack, less_equal());
\r
8467 case ast_func_starts_with:
\r
8469 xpath_allocator_capture cr(stack.result);
\r
8471 xpath_string lr = _left->eval_string(c, stack);
\r
8472 xpath_string rr = _right->eval_string(c, stack);
\r
8474 return starts_with(lr.c_str(), rr.c_str());
\r
8477 case ast_func_contains:
\r
8479 xpath_allocator_capture cr(stack.result);
\r
8481 xpath_string lr = _left->eval_string(c, stack);
\r
8482 xpath_string rr = _right->eval_string(c, stack);
\r
8484 return find_substring(lr.c_str(), rr.c_str()) != 0;
\r
8487 case ast_func_boolean:
\r
8488 return _left->eval_boolean(c, stack);
\r
8490 case ast_func_not:
\r
8491 return !_left->eval_boolean(c, stack);
\r
8493 case ast_func_true:
\r
8496 case ast_func_false:
\r
8499 case ast_func_lang:
\r
8501 if (c.n.attribute()) return false;
\r
8503 xpath_allocator_capture cr(stack.result);
\r
8505 xpath_string lang = _left->eval_string(c, stack);
\r
8507 for (xml_node n = c.n.node(); n; n = n.parent())
\r
8509 xml_attribute a = n.attribute(PUGIXML_TEXT("xml:lang"));
\r
8513 const char_t* value = a.value();
\r
8515 // strnicmp / strncasecmp is not portable
\r
8516 for (const char_t* lit = lang.c_str(); *lit; ++lit)
\r
8518 if (tolower_ascii(*lit) != tolower_ascii(*value)) return false;
\r
8522 return *value == 0 || *value == '-';
\r
8529 case ast_variable:
\r
8531 assert(_rettype == _data.variable->type());
\r
8533 if (_rettype == xpath_type_boolean)
\r
8534 return _data.variable->get_boolean();
\r
8536 // fallthrough to type conversion
\r
8543 case xpath_type_number:
\r
8544 return convert_number_to_boolean(eval_number(c, stack));
\r
8546 case xpath_type_string:
\r
8548 xpath_allocator_capture cr(stack.result);
\r
8550 return !eval_string(c, stack).empty();
\r
8553 case xpath_type_node_set:
\r
8555 xpath_allocator_capture cr(stack.result);
\r
8557 return !eval_node_set(c, stack).empty();
\r
8561 assert(!"Wrong expression for return type boolean");
\r
8568 double eval_number(const xpath_context& c, const xpath_stack& stack)
\r
8573 return _left->eval_number(c, stack) + _right->eval_number(c, stack);
\r
8575 case ast_op_subtract:
\r
8576 return _left->eval_number(c, stack) - _right->eval_number(c, stack);
\r
8578 case ast_op_multiply:
\r
8579 return _left->eval_number(c, stack) * _right->eval_number(c, stack);
\r
8581 case ast_op_divide:
\r
8582 return _left->eval_number(c, stack) / _right->eval_number(c, stack);
\r
8585 return fmod(_left->eval_number(c, stack), _right->eval_number(c, stack));
\r
8587 case ast_op_negate:
\r
8588 return -_left->eval_number(c, stack);
\r
8590 case ast_number_constant:
\r
8591 return _data.number;
\r
8593 case ast_func_last:
\r
8594 return static_cast<double>(c.size);
\r
8596 case ast_func_position:
\r
8597 return static_cast<double>(c.position);
\r
8599 case ast_func_count:
\r
8601 xpath_allocator_capture cr(stack.result);
\r
8603 return static_cast<double>(_left->eval_node_set(c, stack).size());
\r
8606 case ast_func_string_length_0:
\r
8608 xpath_allocator_capture cr(stack.result);
\r
8610 return static_cast<double>(string_value(c.n, stack.result).length());
\r
8613 case ast_func_string_length_1:
\r
8615 xpath_allocator_capture cr(stack.result);
\r
8617 return static_cast<double>(_left->eval_string(c, stack).length());
\r
8620 case ast_func_number_0:
\r
8622 xpath_allocator_capture cr(stack.result);
\r
8624 return convert_string_to_number(string_value(c.n, stack.result).c_str());
\r
8627 case ast_func_number_1:
\r
8628 return _left->eval_number(c, stack);
\r
8630 case ast_func_sum:
\r
8632 xpath_allocator_capture cr(stack.result);
\r
8636 xpath_node_set_raw ns = _left->eval_node_set(c, stack);
\r
8638 for (const xpath_node* it = ns.begin(); it != ns.end(); ++it)
\r
8640 xpath_allocator_capture cri(stack.result);
\r
8642 r += convert_string_to_number(string_value(*it, stack.result).c_str());
\r
8648 case ast_func_floor:
\r
8650 double r = _left->eval_number(c, stack);
\r
8652 return r == r ? floor(r) : r;
\r
8655 case ast_func_ceiling:
\r
8657 double r = _left->eval_number(c, stack);
\r
8659 return r == r ? ceil(r) : r;
\r
8662 case ast_func_round:
\r
8663 return round_nearest_nzero(_left->eval_number(c, stack));
\r
8665 case ast_variable:
\r
8667 assert(_rettype == _data.variable->type());
\r
8669 if (_rettype == xpath_type_number)
\r
8670 return _data.variable->get_number();
\r
8672 // fallthrough to type conversion
\r
8679 case xpath_type_boolean:
\r
8680 return eval_boolean(c, stack) ? 1 : 0;
\r
8682 case xpath_type_string:
\r
8684 xpath_allocator_capture cr(stack.result);
\r
8686 return convert_string_to_number(eval_string(c, stack).c_str());
\r
8689 case xpath_type_node_set:
\r
8691 xpath_allocator_capture cr(stack.result);
\r
8693 return convert_string_to_number(eval_string(c, stack).c_str());
\r
8697 assert(!"Wrong expression for return type number");
\r
8705 xpath_string eval_string_concat(const xpath_context& c, const xpath_stack& stack)
\r
8707 assert(_type == ast_func_concat);
\r
8709 xpath_allocator_capture ct(stack.temp);
\r
8711 // count the string number
\r
8713 for (xpath_ast_node* nc = _right; nc; nc = nc->_next) count++;
\r
8715 // gather all strings
\r
8716 xpath_string static_buffer[4];
\r
8717 xpath_string* buffer = static_buffer;
\r
8719 // allocate on-heap for large concats
\r
8720 if (count > sizeof(static_buffer) / sizeof(static_buffer[0]))
\r
8722 buffer = static_cast<xpath_string*>(stack.temp->allocate(count * sizeof(xpath_string)));
\r
8726 // evaluate all strings to temporary stack
\r
8727 xpath_stack swapped_stack = {stack.temp, stack.result};
\r
8729 buffer[0] = _left->eval_string(c, swapped_stack);
\r
8732 for (xpath_ast_node* n = _right; n; n = n->_next, ++pos) buffer[pos] = n->eval_string(c, swapped_stack);
\r
8733 assert(pos == count);
\r
8735 // get total length
\r
8736 size_t length = 0;
\r
8737 for (size_t i = 0; i < count; ++i) length += buffer[i].length();
\r
8739 // create final string
\r
8740 char_t* result = static_cast<char_t*>(stack.result->allocate((length + 1) * sizeof(char_t)));
\r
8743 char_t* ri = result;
\r
8745 for (size_t j = 0; j < count; ++j)
\r
8746 for (const char_t* bi = buffer[j].c_str(); *bi; ++bi)
\r
8751 return xpath_string(result, true);
\r
8754 xpath_string eval_string(const xpath_context& c, const xpath_stack& stack)
\r
8758 case ast_string_constant:
\r
8759 return xpath_string_const(_data.string);
\r
8761 case ast_func_local_name_0:
\r
8763 xpath_node na = c.n;
\r
8765 return xpath_string_const(local_name(na));
\r
8768 case ast_func_local_name_1:
\r
8770 xpath_allocator_capture cr(stack.result);
\r
8772 xpath_node_set_raw ns = _left->eval_node_set(c, stack);
\r
8773 xpath_node na = ns.first();
\r
8775 return xpath_string_const(local_name(na));
\r
8778 case ast_func_name_0:
\r
8780 xpath_node na = c.n;
\r
8782 return xpath_string_const(qualified_name(na));
\r
8785 case ast_func_name_1:
\r
8787 xpath_allocator_capture cr(stack.result);
\r
8789 xpath_node_set_raw ns = _left->eval_node_set(c, stack);
\r
8790 xpath_node na = ns.first();
\r
8792 return xpath_string_const(qualified_name(na));
\r
8795 case ast_func_namespace_uri_0:
\r
8797 xpath_node na = c.n;
\r
8799 return xpath_string_const(namespace_uri(na));
\r
8802 case ast_func_namespace_uri_1:
\r
8804 xpath_allocator_capture cr(stack.result);
\r
8806 xpath_node_set_raw ns = _left->eval_node_set(c, stack);
\r
8807 xpath_node na = ns.first();
\r
8809 return xpath_string_const(namespace_uri(na));
\r
8812 case ast_func_string_0:
\r
8813 return string_value(c.n, stack.result);
\r
8815 case ast_func_string_1:
\r
8816 return _left->eval_string(c, stack);
\r
8818 case ast_func_concat:
\r
8819 return eval_string_concat(c, stack);
\r
8821 case ast_func_substring_before:
\r
8823 xpath_allocator_capture cr(stack.temp);
\r
8825 xpath_stack swapped_stack = {stack.temp, stack.result};
\r
8827 xpath_string s = _left->eval_string(c, swapped_stack);
\r
8828 xpath_string p = _right->eval_string(c, swapped_stack);
\r
8830 const char_t* pos = find_substring(s.c_str(), p.c_str());
\r
8832 return pos ? xpath_string(s.c_str(), pos, stack.result) : xpath_string();
\r
8835 case ast_func_substring_after:
\r
8837 xpath_allocator_capture cr(stack.temp);
\r
8839 xpath_stack swapped_stack = {stack.temp, stack.result};
\r
8841 xpath_string s = _left->eval_string(c, swapped_stack);
\r
8842 xpath_string p = _right->eval_string(c, swapped_stack);
\r
8844 const char_t* pos = find_substring(s.c_str(), p.c_str());
\r
8845 if (!pos) return xpath_string();
\r
8847 const char_t* result = pos + p.length();
\r
8849 return s.uses_heap() ? xpath_string(result, stack.result) : xpath_string_const(result);
\r
8852 case ast_func_substring_2:
\r
8854 xpath_allocator_capture cr(stack.temp);
\r
8856 xpath_stack swapped_stack = {stack.temp, stack.result};
\r
8858 xpath_string s = _left->eval_string(c, swapped_stack);
\r
8859 size_t s_length = s.length();
\r
8861 double first = round_nearest(_right->eval_number(c, stack));
\r
8863 if (is_nan(first)) return xpath_string(); // NaN
\r
8864 else if (first >= s_length + 1) return xpath_string();
\r
8866 size_t pos = first < 1 ? 1 : static_cast<size_t>(first);
\r
8867 assert(1 <= pos && pos <= s_length + 1);
\r
8869 const char_t* rbegin = s.c_str() + (pos - 1);
\r
8871 return s.uses_heap() ? xpath_string(rbegin, stack.result) : xpath_string_const(rbegin);
\r
8874 case ast_func_substring_3:
\r
8876 xpath_allocator_capture cr(stack.temp);
\r
8878 xpath_stack swapped_stack = {stack.temp, stack.result};
\r
8880 xpath_string s = _left->eval_string(c, swapped_stack);
\r
8881 size_t s_length = s.length();
\r
8883 double first = round_nearest(_right->eval_number(c, stack));
\r
8884 double last = first + round_nearest(_right->_next->eval_number(c, stack));
\r
8886 if (is_nan(first) || is_nan(last)) return xpath_string();
\r
8887 else if (first >= s_length + 1) return xpath_string();
\r
8888 else if (first >= last) return xpath_string();
\r
8889 else if (last < 1) return xpath_string();
\r
8891 size_t pos = first < 1 ? 1 : static_cast<size_t>(first);
\r
8892 size_t end = last >= s_length + 1 ? s_length + 1 : static_cast<size_t>(last);
\r
8894 assert(1 <= pos && pos <= end && end <= s_length + 1);
\r
8895 const char_t* rbegin = s.c_str() + (pos - 1);
\r
8896 const char_t* rend = s.c_str() + (end - 1);
\r
8898 return (end == s_length + 1 && !s.uses_heap()) ? xpath_string_const(rbegin) : xpath_string(rbegin, rend, stack.result);
\r
8901 case ast_func_normalize_space_0:
\r
8903 xpath_string s = string_value(c.n, stack.result);
\r
8905 normalize_space(s.data(stack.result));
\r
8910 case ast_func_normalize_space_1:
\r
8912 xpath_string s = _left->eval_string(c, stack);
\r
8914 normalize_space(s.data(stack.result));
\r
8919 case ast_func_translate:
\r
8921 xpath_allocator_capture cr(stack.temp);
\r
8923 xpath_stack swapped_stack = {stack.temp, stack.result};
\r
8925 xpath_string s = _left->eval_string(c, stack);
\r
8926 xpath_string from = _right->eval_string(c, swapped_stack);
\r
8927 xpath_string to = _right->_next->eval_string(c, swapped_stack);
\r
8929 translate(s.data(stack.result), from.c_str(), to.c_str());
\r
8934 case ast_variable:
\r
8936 assert(_rettype == _data.variable->type());
\r
8938 if (_rettype == xpath_type_string)
\r
8939 return xpath_string_const(_data.variable->get_string());
\r
8941 // fallthrough to type conversion
\r
8948 case xpath_type_boolean:
\r
8949 return xpath_string_const(eval_boolean(c, stack) ? PUGIXML_TEXT("true") : PUGIXML_TEXT("false"));
\r
8951 case xpath_type_number:
\r
8952 return convert_number_to_string(eval_number(c, stack), stack.result);
\r
8954 case xpath_type_node_set:
\r
8956 xpath_allocator_capture cr(stack.temp);
\r
8958 xpath_stack swapped_stack = {stack.temp, stack.result};
\r
8960 xpath_node_set_raw ns = eval_node_set(c, swapped_stack);
\r
8961 return ns.empty() ? xpath_string() : string_value(ns.first(), stack.result);
\r
8965 assert(!"Wrong expression for return type string");
\r
8966 return xpath_string();
\r
8972 xpath_node_set_raw eval_node_set(const xpath_context& c, const xpath_stack& stack)
\r
8976 case ast_op_union:
\r
8978 xpath_allocator_capture cr(stack.temp);
\r
8980 xpath_stack swapped_stack = {stack.temp, stack.result};
\r
8982 xpath_node_set_raw ls = _left->eval_node_set(c, swapped_stack);
\r
8983 xpath_node_set_raw rs = _right->eval_node_set(c, stack);
\r
8985 // we can optimize merging two sorted sets, but this is a very rare operation, so don't bother
\r
8986 rs.set_type(xpath_node_set::type_unsorted);
\r
8988 rs.append(ls.begin(), ls.end(), stack.result);
\r
8989 rs.remove_duplicates();
\r
8995 case ast_filter_posinv:
\r
8997 xpath_node_set_raw set = _left->eval_node_set(c, stack);
\r
8999 // either expression is a number or it contains position() call; sort by document order
\r
9000 if (_type == ast_filter) set.sort_do();
\r
9002 apply_predicate(set, 0, _right, stack);
\r
9008 return xpath_node_set_raw();
\r
9014 case axis_ancestor:
\r
9015 return step_do(c, stack, axis_to_type<axis_ancestor>());
\r
9017 case axis_ancestor_or_self:
\r
9018 return step_do(c, stack, axis_to_type<axis_ancestor_or_self>());
\r
9020 case axis_attribute:
\r
9021 return step_do(c, stack, axis_to_type<axis_attribute>());
\r
9024 return step_do(c, stack, axis_to_type<axis_child>());
\r
9026 case axis_descendant:
\r
9027 return step_do(c, stack, axis_to_type<axis_descendant>());
\r
9029 case axis_descendant_or_self:
\r
9030 return step_do(c, stack, axis_to_type<axis_descendant_or_self>());
\r
9032 case axis_following:
\r
9033 return step_do(c, stack, axis_to_type<axis_following>());
\r
9035 case axis_following_sibling:
\r
9036 return step_do(c, stack, axis_to_type<axis_following_sibling>());
\r
9038 case axis_namespace:
\r
9039 // namespaced axis is not supported
\r
9040 return xpath_node_set_raw();
\r
9043 return step_do(c, stack, axis_to_type<axis_parent>());
\r
9045 case axis_preceding:
\r
9046 return step_do(c, stack, axis_to_type<axis_preceding>());
\r
9048 case axis_preceding_sibling:
\r
9049 return step_do(c, stack, axis_to_type<axis_preceding_sibling>());
\r
9052 return step_do(c, stack, axis_to_type<axis_self>());
\r
9055 assert(!"Unknown axis");
\r
9056 return xpath_node_set_raw();
\r
9060 case ast_step_root:
\r
9062 assert(!_right); // root step can't have any predicates
\r
9064 xpath_node_set_raw ns;
\r
9066 ns.set_type(xpath_node_set::type_sorted);
\r
9068 if (c.n.node()) ns.push_back(c.n.node().root(), stack.result);
\r
9069 else if (c.n.attribute()) ns.push_back(c.n.parent().root(), stack.result);
\r
9074 case ast_variable:
\r
9076 assert(_rettype == _data.variable->type());
\r
9078 if (_rettype == xpath_type_node_set)
\r
9080 const xpath_node_set& s = _data.variable->get_node_set();
\r
9082 xpath_node_set_raw ns;
\r
9084 ns.set_type(s.type());
\r
9085 ns.append(s.begin(), s.end(), stack.result);
\r
9090 // fallthrough to type conversion
\r
9094 assert(!"Wrong expression for return type node set");
\r
9095 return xpath_node_set_raw();
\r
9103 case ast_func_position:
\r
9106 case ast_string_constant:
\r
9107 case ast_number_constant:
\r
9108 case ast_variable:
\r
9112 case ast_step_root:
\r
9115 case ast_predicate:
\r
9117 case ast_filter_posinv:
\r
9121 if (_left && !_left->is_posinv()) return false;
\r
9123 for (xpath_ast_node* n = _right; n; n = n->_next)
\r
9124 if (!n->is_posinv()) return false;
\r
9130 xpath_value_type rettype() const
\r
9132 return static_cast<xpath_value_type>(_rettype);
\r
9136 struct xpath_parser
\r
9138 xpath_allocator* _alloc;
\r
9139 xpath_lexer _lexer;
\r
9141 const char_t* _query;
\r
9142 xpath_variable_set* _variables;
\r
9144 xpath_parse_result* _result;
\r
9146 char_t _scratch[32];
\r
9148 #ifdef PUGIXML_NO_EXCEPTIONS
\r
9149 jmp_buf _error_handler;
\r
9152 void throw_error(const char* message)
\r
9154 _result->error = message;
\r
9155 _result->offset = _lexer.current_pos() - _query;
\r
9157 #ifdef PUGIXML_NO_EXCEPTIONS
\r
9158 longjmp(_error_handler, 1);
\r
9160 throw xpath_exception(*_result);
\r
9164 void throw_error_oom()
\r
9166 #ifdef PUGIXML_NO_EXCEPTIONS
\r
9167 throw_error("Out of memory");
\r
9169 throw std::bad_alloc();
\r
9173 void* alloc_node()
\r
9175 void* result = _alloc->allocate_nothrow(sizeof(xpath_ast_node));
\r
9177 if (!result) throw_error_oom();
\r
9182 const char_t* alloc_string(const xpath_lexer_string& value)
\r
9186 size_t length = static_cast<size_t>(value.end - value.begin);
\r
9188 char_t* c = static_cast<char_t*>(_alloc->allocate_nothrow((length + 1) * sizeof(char_t)));
\r
9189 if (!c) throw_error_oom();
\r
9190 assert(c); // workaround for clang static analysis
\r
9192 memcpy(c, value.begin, length * sizeof(char_t));
\r
9200 xpath_ast_node* parse_function_helper(ast_type_t type0, ast_type_t type1, size_t argc, xpath_ast_node* args[2])
\r
9202 assert(argc <= 1);
\r
9204 if (argc == 1 && args[0]->rettype() != xpath_type_node_set) throw_error("Function has to be applied to node set");
\r
9206 return new (alloc_node()) xpath_ast_node(argc == 0 ? type0 : type1, xpath_type_string, args[0]);
\r
9209 xpath_ast_node* parse_function(const xpath_lexer_string& name, size_t argc, xpath_ast_node* args[2])
\r
9211 switch (name.begin[0])
\r
9214 if (name == PUGIXML_TEXT("boolean") && argc == 1)
\r
9215 return new (alloc_node()) xpath_ast_node(ast_func_boolean, xpath_type_boolean, args[0]);
\r
9220 if (name == PUGIXML_TEXT("count") && argc == 1)
\r
9222 if (args[0]->rettype() != xpath_type_node_set) throw_error("Function has to be applied to node set");
\r
9223 return new (alloc_node()) xpath_ast_node(ast_func_count, xpath_type_number, args[0]);
\r
9225 else if (name == PUGIXML_TEXT("contains") && argc == 2)
\r
9226 return new (alloc_node()) xpath_ast_node(ast_func_contains, xpath_type_boolean, args[0], args[1]);
\r
9227 else if (name == PUGIXML_TEXT("concat") && argc >= 2)
\r
9228 return new (alloc_node()) xpath_ast_node(ast_func_concat, xpath_type_string, args[0], args[1]);
\r
9229 else if (name == PUGIXML_TEXT("ceiling") && argc == 1)
\r
9230 return new (alloc_node()) xpath_ast_node(ast_func_ceiling, xpath_type_number, args[0]);
\r
9235 if (name == PUGIXML_TEXT("false") && argc == 0)
\r
9236 return new (alloc_node()) xpath_ast_node(ast_func_false, xpath_type_boolean);
\r
9237 else if (name == PUGIXML_TEXT("floor") && argc == 1)
\r
9238 return new (alloc_node()) xpath_ast_node(ast_func_floor, xpath_type_number, args[0]);
\r
9243 if (name == PUGIXML_TEXT("id") && argc == 1)
\r
9244 return new (alloc_node()) xpath_ast_node(ast_func_id, xpath_type_node_set, args[0]);
\r
9249 if (name == PUGIXML_TEXT("last") && argc == 0)
\r
9250 return new (alloc_node()) xpath_ast_node(ast_func_last, xpath_type_number);
\r
9251 else if (name == PUGIXML_TEXT("lang") && argc == 1)
\r
9252 return new (alloc_node()) xpath_ast_node(ast_func_lang, xpath_type_boolean, args[0]);
\r
9253 else if (name == PUGIXML_TEXT("local-name") && argc <= 1)
\r
9254 return parse_function_helper(ast_func_local_name_0, ast_func_local_name_1, argc, args);
\r
9259 if (name == PUGIXML_TEXT("name") && argc <= 1)
\r
9260 return parse_function_helper(ast_func_name_0, ast_func_name_1, argc, args);
\r
9261 else if (name == PUGIXML_TEXT("namespace-uri") && argc <= 1)
\r
9262 return parse_function_helper(ast_func_namespace_uri_0, ast_func_namespace_uri_1, argc, args);
\r
9263 else if (name == PUGIXML_TEXT("normalize-space") && argc <= 1)
\r
9264 return new (alloc_node()) xpath_ast_node(argc == 0 ? ast_func_normalize_space_0 : ast_func_normalize_space_1, xpath_type_string, args[0], args[1]);
\r
9265 else if (name == PUGIXML_TEXT("not") && argc == 1)
\r
9266 return new (alloc_node()) xpath_ast_node(ast_func_not, xpath_type_boolean, args[0]);
\r
9267 else if (name == PUGIXML_TEXT("number") && argc <= 1)
\r
9268 return new (alloc_node()) xpath_ast_node(argc == 0 ? ast_func_number_0 : ast_func_number_1, xpath_type_number, args[0]);
\r
9273 if (name == PUGIXML_TEXT("position") && argc == 0)
\r
9274 return new (alloc_node()) xpath_ast_node(ast_func_position, xpath_type_number);
\r
9279 if (name == PUGIXML_TEXT("round") && argc == 1)
\r
9280 return new (alloc_node()) xpath_ast_node(ast_func_round, xpath_type_number, args[0]);
\r
9285 if (name == PUGIXML_TEXT("string") && argc <= 1)
\r
9286 return new (alloc_node()) xpath_ast_node(argc == 0 ? ast_func_string_0 : ast_func_string_1, xpath_type_string, args[0]);
\r
9287 else if (name == PUGIXML_TEXT("string-length") && argc <= 1)
\r
9288 return new (alloc_node()) xpath_ast_node(argc == 0 ? ast_func_string_length_0 : ast_func_string_length_1, xpath_type_number, args[0]);
\r
9289 else if (name == PUGIXML_TEXT("starts-with") && argc == 2)
\r
9290 return new (alloc_node()) xpath_ast_node(ast_func_starts_with, xpath_type_boolean, args[0], args[1]);
\r
9291 else if (name == PUGIXML_TEXT("substring-before") && argc == 2)
\r
9292 return new (alloc_node()) xpath_ast_node(ast_func_substring_before, xpath_type_string, args[0], args[1]);
\r
9293 else if (name == PUGIXML_TEXT("substring-after") && argc == 2)
\r
9294 return new (alloc_node()) xpath_ast_node(ast_func_substring_after, xpath_type_string, args[0], args[1]);
\r
9295 else if (name == PUGIXML_TEXT("substring") && (argc == 2 || argc == 3))
\r
9296 return new (alloc_node()) xpath_ast_node(argc == 2 ? ast_func_substring_2 : ast_func_substring_3, xpath_type_string, args[0], args[1]);
\r
9297 else if (name == PUGIXML_TEXT("sum") && argc == 1)
\r
9299 if (args[0]->rettype() != xpath_type_node_set) throw_error("Function has to be applied to node set");
\r
9300 return new (alloc_node()) xpath_ast_node(ast_func_sum, xpath_type_number, args[0]);
\r
9306 if (name == PUGIXML_TEXT("translate") && argc == 3)
\r
9307 return new (alloc_node()) xpath_ast_node(ast_func_translate, xpath_type_string, args[0], args[1]);
\r
9308 else if (name == PUGIXML_TEXT("true") && argc == 0)
\r
9309 return new (alloc_node()) xpath_ast_node(ast_func_true, xpath_type_boolean);
\r
9317 throw_error("Unrecognized function or wrong parameter count");
\r
9322 axis_t parse_axis_name(const xpath_lexer_string& name, bool& specified)
\r
9326 switch (name.begin[0])
\r
9329 if (name == PUGIXML_TEXT("ancestor"))
\r
9330 return axis_ancestor;
\r
9331 else if (name == PUGIXML_TEXT("ancestor-or-self"))
\r
9332 return axis_ancestor_or_self;
\r
9333 else if (name == PUGIXML_TEXT("attribute"))
\r
9334 return axis_attribute;
\r
9339 if (name == PUGIXML_TEXT("child"))
\r
9340 return axis_child;
\r
9345 if (name == PUGIXML_TEXT("descendant"))
\r
9346 return axis_descendant;
\r
9347 else if (name == PUGIXML_TEXT("descendant-or-self"))
\r
9348 return axis_descendant_or_self;
\r
9353 if (name == PUGIXML_TEXT("following"))
\r
9354 return axis_following;
\r
9355 else if (name == PUGIXML_TEXT("following-sibling"))
\r
9356 return axis_following_sibling;
\r
9361 if (name == PUGIXML_TEXT("namespace"))
\r
9362 return axis_namespace;
\r
9367 if (name == PUGIXML_TEXT("parent"))
\r
9368 return axis_parent;
\r
9369 else if (name == PUGIXML_TEXT("preceding"))
\r
9370 return axis_preceding;
\r
9371 else if (name == PUGIXML_TEXT("preceding-sibling"))
\r
9372 return axis_preceding_sibling;
\r
9377 if (name == PUGIXML_TEXT("self"))
\r
9386 specified = false;
\r
9387 return axis_child;
\r
9390 nodetest_t parse_node_test_type(const xpath_lexer_string& name)
\r
9392 switch (name.begin[0])
\r
9395 if (name == PUGIXML_TEXT("comment"))
\r
9396 return nodetest_type_comment;
\r
9401 if (name == PUGIXML_TEXT("node"))
\r
9402 return nodetest_type_node;
\r
9407 if (name == PUGIXML_TEXT("processing-instruction"))
\r
9408 return nodetest_type_pi;
\r
9413 if (name == PUGIXML_TEXT("text"))
\r
9414 return nodetest_type_text;
\r
9422 return nodetest_none;
\r
9425 // PrimaryExpr ::= VariableReference | '(' Expr ')' | Literal | Number | FunctionCall
\r
9426 xpath_ast_node* parse_primary_expression()
\r
9428 switch (_lexer.current())
\r
9432 xpath_lexer_string name = _lexer.contents();
\r
9435 throw_error("Unknown variable: variable set is not provided");
\r
9437 xpath_variable* var = get_variable_scratch(_scratch, _variables, name.begin, name.end);
\r
9440 throw_error("Unknown variable: variable set does not contain the given name");
\r
9444 return new (alloc_node()) xpath_ast_node(ast_variable, var->type(), var);
\r
9447 case lex_open_brace:
\r
9451 xpath_ast_node* n = parse_expression();
\r
9453 if (_lexer.current() != lex_close_brace)
\r
9454 throw_error("Unmatched braces");
\r
9461 case lex_quoted_string:
\r
9463 const char_t* value = alloc_string(_lexer.contents());
\r
9465 xpath_ast_node* n = new (alloc_node()) xpath_ast_node(ast_string_constant, xpath_type_string, value);
\r
9475 if (!convert_string_to_number_scratch(_scratch, _lexer.contents().begin, _lexer.contents().end, &value))
\r
9476 throw_error_oom();
\r
9478 xpath_ast_node* n = new (alloc_node()) xpath_ast_node(ast_number_constant, xpath_type_number, value);
\r
9486 xpath_ast_node* args[2] = {0};
\r
9489 xpath_lexer_string function = _lexer.contents();
\r
9492 xpath_ast_node* last_arg = 0;
\r
9494 if (_lexer.current() != lex_open_brace)
\r
9495 throw_error("Unrecognized function call");
\r
9498 if (_lexer.current() != lex_close_brace)
\r
9499 args[argc++] = parse_expression();
\r
9501 while (_lexer.current() != lex_close_brace)
\r
9503 if (_lexer.current() != lex_comma)
\r
9504 throw_error("No comma between function arguments");
\r
9507 xpath_ast_node* n = parse_expression();
\r
9509 if (argc < 2) args[argc] = n;
\r
9510 else last_arg->set_next(n);
\r
9518 return parse_function(function, argc, args);
\r
9522 throw_error("Unrecognizable primary expression");
\r
9528 // FilterExpr ::= PrimaryExpr | FilterExpr Predicate
\r
9529 // Predicate ::= '[' PredicateExpr ']'
\r
9530 // PredicateExpr ::= Expr
\r
9531 xpath_ast_node* parse_filter_expression()
\r
9533 xpath_ast_node* n = parse_primary_expression();
\r
9535 while (_lexer.current() == lex_open_square_brace)
\r
9539 xpath_ast_node* expr = parse_expression();
\r
9541 if (n->rettype() != xpath_type_node_set) throw_error("Predicate has to be applied to node set");
\r
9543 bool posinv = expr->rettype() != xpath_type_number && expr->is_posinv();
\r
9545 n = new (alloc_node()) xpath_ast_node(posinv ? ast_filter_posinv : ast_filter, xpath_type_node_set, n, expr);
\r
9547 if (_lexer.current() != lex_close_square_brace)
\r
9548 throw_error("Unmatched square brace");
\r
9556 // Step ::= AxisSpecifier NodeTest Predicate* | AbbreviatedStep
\r
9557 // AxisSpecifier ::= AxisName '::' | '@'?
\r
9558 // NodeTest ::= NameTest | NodeType '(' ')' | 'processing-instruction' '(' Literal ')'
\r
9559 // NameTest ::= '*' | NCName ':' '*' | QName
\r
9560 // AbbreviatedStep ::= '.' | '..'
\r
9561 xpath_ast_node* parse_step(xpath_ast_node* set)
\r
9563 if (set && set->rettype() != xpath_type_node_set)
\r
9564 throw_error("Step has to be applied to node set");
\r
9566 bool axis_specified = false;
\r
9567 axis_t axis = axis_child; // implied child axis
\r
9569 if (_lexer.current() == lex_axis_attribute)
\r
9571 axis = axis_attribute;
\r
9572 axis_specified = true;
\r
9576 else if (_lexer.current() == lex_dot)
\r
9580 return new (alloc_node()) xpath_ast_node(ast_step, set, axis_self, nodetest_type_node, 0);
\r
9582 else if (_lexer.current() == lex_double_dot)
\r
9586 return new (alloc_node()) xpath_ast_node(ast_step, set, axis_parent, nodetest_type_node, 0);
\r
9589 nodetest_t nt_type = nodetest_none;
\r
9590 xpath_lexer_string nt_name;
\r
9592 if (_lexer.current() == lex_string)
\r
9595 nt_name = _lexer.contents();
\r
9598 // was it an axis name?
\r
9599 if (_lexer.current() == lex_double_colon)
\r
9601 // parse axis name
\r
9602 if (axis_specified) throw_error("Two axis specifiers in one step");
\r
9604 axis = parse_axis_name(nt_name, axis_specified);
\r
9606 if (!axis_specified) throw_error("Unknown axis");
\r
9608 // read actual node test
\r
9611 if (_lexer.current() == lex_multiply)
\r
9613 nt_type = nodetest_all;
\r
9614 nt_name = xpath_lexer_string();
\r
9617 else if (_lexer.current() == lex_string)
\r
9619 nt_name = _lexer.contents();
\r
9622 else throw_error("Unrecognized node test");
\r
9625 if (nt_type == nodetest_none)
\r
9627 // node type test or processing-instruction
\r
9628 if (_lexer.current() == lex_open_brace)
\r
9632 if (_lexer.current() == lex_close_brace)
\r
9636 nt_type = parse_node_test_type(nt_name);
\r
9638 if (nt_type == nodetest_none) throw_error("Unrecognized node type");
\r
9640 nt_name = xpath_lexer_string();
\r
9642 else if (nt_name == PUGIXML_TEXT("processing-instruction"))
\r
9644 if (_lexer.current() != lex_quoted_string)
\r
9645 throw_error("Only literals are allowed as arguments to processing-instruction()");
\r
9647 nt_type = nodetest_pi;
\r
9648 nt_name = _lexer.contents();
\r
9651 if (_lexer.current() != lex_close_brace)
\r
9652 throw_error("Unmatched brace near processing-instruction()");
\r
9656 throw_error("Unmatched brace near node type test");
\r
9659 // QName or NCName:*
\r
9662 if (nt_name.end - nt_name.begin > 2 && nt_name.end[-2] == ':' && nt_name.end[-1] == '*') // NCName:*
\r
9664 nt_name.end--; // erase *
\r
9666 nt_type = nodetest_all_in_namespace;
\r
9668 else nt_type = nodetest_name;
\r
9672 else if (_lexer.current() == lex_multiply)
\r
9674 nt_type = nodetest_all;
\r
9677 else throw_error("Unrecognized node test");
\r
9679 xpath_ast_node* n = new (alloc_node()) xpath_ast_node(ast_step, set, axis, nt_type, alloc_string(nt_name));
\r
9681 xpath_ast_node* last = 0;
\r
9683 while (_lexer.current() == lex_open_square_brace)
\r
9687 xpath_ast_node* expr = parse_expression();
\r
9689 xpath_ast_node* pred = new (alloc_node()) xpath_ast_node(ast_predicate, xpath_type_node_set, expr);
\r
9691 if (_lexer.current() != lex_close_square_brace)
\r
9692 throw_error("Unmatched square brace");
\r
9695 if (last) last->set_next(pred);
\r
9696 else n->set_right(pred);
\r
9704 // RelativeLocationPath ::= Step | RelativeLocationPath '/' Step | RelativeLocationPath '//' Step
\r
9705 xpath_ast_node* parse_relative_location_path(xpath_ast_node* set)
\r
9707 xpath_ast_node* n = parse_step(set);
\r
9709 while (_lexer.current() == lex_slash || _lexer.current() == lex_double_slash)
\r
9711 lexeme_t l = _lexer.current();
\r
9714 if (l == lex_double_slash)
\r
9715 n = new (alloc_node()) xpath_ast_node(ast_step, n, axis_descendant_or_self, nodetest_type_node, 0);
\r
9717 n = parse_step(n);
\r
9723 // LocationPath ::= RelativeLocationPath | AbsoluteLocationPath
\r
9724 // AbsoluteLocationPath ::= '/' RelativeLocationPath? | '//' RelativeLocationPath
\r
9725 xpath_ast_node* parse_location_path()
\r
9727 if (_lexer.current() == lex_slash)
\r
9731 xpath_ast_node* n = new (alloc_node()) xpath_ast_node(ast_step_root, xpath_type_node_set);
\r
9733 // relative location path can start from axis_attribute, dot, double_dot, multiply and string lexemes; any other lexeme means standalone root path
\r
9734 lexeme_t l = _lexer.current();
\r
9736 if (l == lex_string || l == lex_axis_attribute || l == lex_dot || l == lex_double_dot || l == lex_multiply)
\r
9737 return parse_relative_location_path(n);
\r
9741 else if (_lexer.current() == lex_double_slash)
\r
9745 xpath_ast_node* n = new (alloc_node()) xpath_ast_node(ast_step_root, xpath_type_node_set);
\r
9746 n = new (alloc_node()) xpath_ast_node(ast_step, n, axis_descendant_or_self, nodetest_type_node, 0);
\r
9748 return parse_relative_location_path(n);
\r
9751 // else clause moved outside of if because of bogus warning 'control may reach end of non-void function being inlined' in gcc 4.0.1
\r
9752 return parse_relative_location_path(0);
\r
9755 // PathExpr ::= LocationPath
\r
9757 // | FilterExpr '/' RelativeLocationPath
\r
9758 // | FilterExpr '//' RelativeLocationPath
\r
9759 // UnionExpr ::= PathExpr | UnionExpr '|' PathExpr
\r
9760 // UnaryExpr ::= UnionExpr | '-' UnaryExpr
\r
9761 xpath_ast_node* parse_path_or_unary_expression()
\r
9764 // PathExpr begins with either LocationPath or FilterExpr.
\r
9765 // FilterExpr begins with PrimaryExpr
\r
9766 // PrimaryExpr begins with '$' in case of it being a variable reference,
\r
9767 // '(' in case of it being an expression, string literal, number constant or
\r
9770 if (_lexer.current() == lex_var_ref || _lexer.current() == lex_open_brace ||
\r
9771 _lexer.current() == lex_quoted_string || _lexer.current() == lex_number ||
\r
9772 _lexer.current() == lex_string)
\r
9774 if (_lexer.current() == lex_string)
\r
9776 // This is either a function call, or not - if not, we shall proceed with location path
\r
9777 const char_t* state = _lexer.state();
\r
9779 while (PUGI__IS_CHARTYPE(*state, ct_space)) ++state;
\r
9781 if (*state != '(') return parse_location_path();
\r
9783 // This looks like a function call; however this still can be a node-test. Check it.
\r
9784 if (parse_node_test_type(_lexer.contents()) != nodetest_none) return parse_location_path();
\r
9787 xpath_ast_node* n = parse_filter_expression();
\r
9789 if (_lexer.current() == lex_slash || _lexer.current() == lex_double_slash)
\r
9791 lexeme_t l = _lexer.current();
\r
9794 if (l == lex_double_slash)
\r
9796 if (n->rettype() != xpath_type_node_set) throw_error("Step has to be applied to node set");
\r
9798 n = new (alloc_node()) xpath_ast_node(ast_step, n, axis_descendant_or_self, nodetest_type_node, 0);
\r
9801 // select from location path
\r
9802 return parse_relative_location_path(n);
\r
9807 else if (_lexer.current() == lex_minus)
\r
9811 // precedence 7+ - only parses union expressions
\r
9812 xpath_ast_node* expr = parse_expression_rec(parse_path_or_unary_expression(), 7);
\r
9814 return new (alloc_node()) xpath_ast_node(ast_op_negate, xpath_type_number, expr);
\r
9817 return parse_location_path();
\r
9820 struct binary_op_t
\r
9822 ast_type_t asttype;
\r
9823 xpath_value_type rettype;
\r
9826 binary_op_t(): asttype(ast_unknown), rettype(xpath_type_none), precedence(0)
\r
9830 binary_op_t(ast_type_t asttype_, xpath_value_type rettype_, int precedence_): asttype(asttype_), rettype(rettype_), precedence(precedence_)
\r
9834 static binary_op_t parse(xpath_lexer& lexer)
\r
9836 switch (lexer.current())
\r
9839 if (lexer.contents() == PUGIXML_TEXT("or"))
\r
9840 return binary_op_t(ast_op_or, xpath_type_boolean, 1);
\r
9841 else if (lexer.contents() == PUGIXML_TEXT("and"))
\r
9842 return binary_op_t(ast_op_and, xpath_type_boolean, 2);
\r
9843 else if (lexer.contents() == PUGIXML_TEXT("div"))
\r
9844 return binary_op_t(ast_op_divide, xpath_type_number, 6);
\r
9845 else if (lexer.contents() == PUGIXML_TEXT("mod"))
\r
9846 return binary_op_t(ast_op_mod, xpath_type_number, 6);
\r
9848 return binary_op_t();
\r
9851 return binary_op_t(ast_op_equal, xpath_type_boolean, 3);
\r
9853 case lex_not_equal:
\r
9854 return binary_op_t(ast_op_not_equal, xpath_type_boolean, 3);
\r
9857 return binary_op_t(ast_op_less, xpath_type_boolean, 4);
\r
9860 return binary_op_t(ast_op_greater, xpath_type_boolean, 4);
\r
9862 case lex_less_or_equal:
\r
9863 return binary_op_t(ast_op_less_or_equal, xpath_type_boolean, 4);
\r
9865 case lex_greater_or_equal:
\r
9866 return binary_op_t(ast_op_greater_or_equal, xpath_type_boolean, 4);
\r
9869 return binary_op_t(ast_op_add, xpath_type_number, 5);
\r
9872 return binary_op_t(ast_op_subtract, xpath_type_number, 5);
\r
9874 case lex_multiply:
\r
9875 return binary_op_t(ast_op_multiply, xpath_type_number, 6);
\r
9878 return binary_op_t(ast_op_union, xpath_type_node_set, 7);
\r
9881 return binary_op_t();
\r
9886 xpath_ast_node* parse_expression_rec(xpath_ast_node* lhs, int limit)
\r
9888 binary_op_t op = binary_op_t::parse(_lexer);
\r
9890 while (op.asttype != ast_unknown && op.precedence >= limit)
\r
9894 xpath_ast_node* rhs = parse_path_or_unary_expression();
\r
9896 binary_op_t nextop = binary_op_t::parse(_lexer);
\r
9898 while (nextop.asttype != ast_unknown && nextop.precedence > op.precedence)
\r
9900 rhs = parse_expression_rec(rhs, nextop.precedence);
\r
9902 nextop = binary_op_t::parse(_lexer);
\r
9905 if (op.asttype == ast_op_union && (lhs->rettype() != xpath_type_node_set || rhs->rettype() != xpath_type_node_set))
\r
9906 throw_error("Union operator has to be applied to node sets");
\r
9908 lhs = new (alloc_node()) xpath_ast_node(op.asttype, op.rettype, lhs, rhs);
\r
9910 op = binary_op_t::parse(_lexer);
\r
9916 // Expr ::= OrExpr
\r
9917 // OrExpr ::= AndExpr | OrExpr 'or' AndExpr
\r
9918 // AndExpr ::= EqualityExpr | AndExpr 'and' EqualityExpr
\r
9919 // EqualityExpr ::= RelationalExpr
\r
9920 // | EqualityExpr '=' RelationalExpr
\r
9921 // | EqualityExpr '!=' RelationalExpr
\r
9922 // RelationalExpr ::= AdditiveExpr
\r
9923 // | RelationalExpr '<' AdditiveExpr
\r
9924 // | RelationalExpr '>' AdditiveExpr
\r
9925 // | RelationalExpr '<=' AdditiveExpr
\r
9926 // | RelationalExpr '>=' AdditiveExpr
\r
9927 // AdditiveExpr ::= MultiplicativeExpr
\r
9928 // | AdditiveExpr '+' MultiplicativeExpr
\r
9929 // | AdditiveExpr '-' MultiplicativeExpr
\r
9930 // MultiplicativeExpr ::= UnaryExpr
\r
9931 // | MultiplicativeExpr '*' UnaryExpr
\r
9932 // | MultiplicativeExpr 'div' UnaryExpr
\r
9933 // | MultiplicativeExpr 'mod' UnaryExpr
\r
9934 xpath_ast_node* parse_expression()
\r
9936 return parse_expression_rec(parse_path_or_unary_expression(), 0);
\r
9939 xpath_parser(const char_t* query, xpath_variable_set* variables, xpath_allocator* alloc, xpath_parse_result* result): _alloc(alloc), _lexer(query), _query(query), _variables(variables), _result(result)
\r
9943 xpath_ast_node* parse()
\r
9945 xpath_ast_node* result = parse_expression();
\r
9947 if (_lexer.current() != lex_eof)
\r
9949 // there are still unparsed tokens left, error
\r
9950 throw_error("Incorrect query");
\r
9956 static xpath_ast_node* parse(const char_t* query, xpath_variable_set* variables, xpath_allocator* alloc, xpath_parse_result* result)
\r
9958 xpath_parser parser(query, variables, alloc, result);
\r
9960 #ifdef PUGIXML_NO_EXCEPTIONS
\r
9961 int error = setjmp(parser._error_handler);
\r
9963 return (error == 0) ? parser.parse() : 0;
\r
9965 return parser.parse();
\r
9970 struct xpath_query_impl
\r
9972 static xpath_query_impl* create()
\r
9974 void* memory = xml_memory::allocate(sizeof(xpath_query_impl));
\r
9976 return new (memory) xpath_query_impl();
\r
9979 static void destroy(void* ptr)
\r
9983 // free all allocated pages
\r
9984 static_cast<xpath_query_impl*>(ptr)->alloc.release();
\r
9986 // free allocator memory (with the first page)
\r
9987 xml_memory::deallocate(ptr);
\r
9990 xpath_query_impl(): root(0), alloc(&block)
\r
9995 xpath_ast_node* root;
\r
9996 xpath_allocator alloc;
\r
9997 xpath_memory_block block;
\r
10000 PUGI__FN xpath_string evaluate_string_impl(xpath_query_impl* impl, const xpath_node& n, xpath_stack_data& sd)
\r
10002 if (!impl) return xpath_string();
\r
10004 #ifdef PUGIXML_NO_EXCEPTIONS
\r
10005 if (setjmp(sd.error_handler)) return xpath_string();
\r
10008 xpath_context c(n, 1, 1);
\r
10010 return impl->root->eval_string(c, sd.stack);
\r
10016 #ifndef PUGIXML_NO_EXCEPTIONS
\r
10017 PUGI__FN xpath_exception::xpath_exception(const xpath_parse_result& result_): _result(result_)
\r
10019 assert(_result.error);
\r
10022 PUGI__FN const char* xpath_exception::what() const throw()
\r
10024 return _result.error;
\r
10027 PUGI__FN const xpath_parse_result& xpath_exception::result() const
\r
10033 PUGI__FN xpath_node::xpath_node()
\r
10037 PUGI__FN xpath_node::xpath_node(const xml_node& node_): _node(node_)
\r
10041 PUGI__FN xpath_node::xpath_node(const xml_attribute& attribute_, const xml_node& parent_): _node(attribute_ ? parent_ : xml_node()), _attribute(attribute_)
\r
10045 PUGI__FN xml_node xpath_node::node() const
\r
10047 return _attribute ? xml_node() : _node;
\r
10050 PUGI__FN xml_attribute xpath_node::attribute() const
\r
10052 return _attribute;
\r
10055 PUGI__FN xml_node xpath_node::parent() const
\r
10057 return _attribute ? _node : _node.parent();
\r
10060 PUGI__FN static void unspecified_bool_xpath_node(xpath_node***)
\r
10064 PUGI__FN xpath_node::operator xpath_node::unspecified_bool_type() const
\r
10066 return (_node || _attribute) ? unspecified_bool_xpath_node : 0;
\r
10069 PUGI__FN bool xpath_node::operator!() const
\r
10071 return !(_node || _attribute);
\r
10074 PUGI__FN bool xpath_node::operator==(const xpath_node& n) const
\r
10076 return _node == n._node && _attribute == n._attribute;
\r
10079 PUGI__FN bool xpath_node::operator!=(const xpath_node& n) const
\r
10081 return _node != n._node || _attribute != n._attribute;
\r
10084 #ifdef __BORLANDC__
\r
10085 PUGI__FN bool operator&&(const xpath_node& lhs, bool rhs)
\r
10087 return (bool)lhs && rhs;
\r
10090 PUGI__FN bool operator||(const xpath_node& lhs, bool rhs)
\r
10092 return (bool)lhs || rhs;
\r
10096 PUGI__FN void xpath_node_set::_assign(const_iterator begin_, const_iterator end_)
\r
10098 assert(begin_ <= end_);
\r
10100 size_t size_ = static_cast<size_t>(end_ - begin_);
\r
10104 // deallocate old buffer
\r
10105 if (_begin != &_storage) impl::xml_memory::deallocate(_begin);
\r
10107 // use internal buffer
\r
10108 if (begin_ != end_) _storage = *begin_;
\r
10110 _begin = &_storage;
\r
10111 _end = &_storage + size_;
\r
10115 // make heap copy
\r
10116 xpath_node* storage = static_cast<xpath_node*>(impl::xml_memory::allocate(size_ * sizeof(xpath_node)));
\r
10120 #ifdef PUGIXML_NO_EXCEPTIONS
\r
10123 throw std::bad_alloc();
\r
10127 memcpy(storage, begin_, size_ * sizeof(xpath_node));
\r
10129 // deallocate old buffer
\r
10130 if (_begin != &_storage) impl::xml_memory::deallocate(_begin);
\r
10133 _begin = storage;
\r
10134 _end = storage + size_;
\r
10138 PUGI__FN xpath_node_set::xpath_node_set(): _type(type_unsorted), _begin(&_storage), _end(&_storage)
\r
10142 PUGI__FN xpath_node_set::xpath_node_set(const_iterator begin_, const_iterator end_, type_t type_): _type(type_), _begin(&_storage), _end(&_storage)
\r
10144 _assign(begin_, end_);
\r
10147 PUGI__FN xpath_node_set::~xpath_node_set()
\r
10149 if (_begin != &_storage) impl::xml_memory::deallocate(_begin);
\r
10152 PUGI__FN xpath_node_set::xpath_node_set(const xpath_node_set& ns): _type(ns._type), _begin(&_storage), _end(&_storage)
\r
10154 _assign(ns._begin, ns._end);
\r
10157 PUGI__FN xpath_node_set& xpath_node_set::operator=(const xpath_node_set& ns)
\r
10159 if (this == &ns) return *this;
\r
10161 _type = ns._type;
\r
10162 _assign(ns._begin, ns._end);
\r
10167 PUGI__FN xpath_node_set::type_t xpath_node_set::type() const
\r
10172 PUGI__FN size_t xpath_node_set::size() const
\r
10174 return _end - _begin;
\r
10177 PUGI__FN bool xpath_node_set::empty() const
\r
10179 return _begin == _end;
\r
10182 PUGI__FN const xpath_node& xpath_node_set::operator[](size_t index) const
\r
10184 assert(index < size());
\r
10185 return _begin[index];
\r
10188 PUGI__FN xpath_node_set::const_iterator xpath_node_set::begin() const
\r
10193 PUGI__FN xpath_node_set::const_iterator xpath_node_set::end() const
\r
10198 PUGI__FN void xpath_node_set::sort(bool reverse)
\r
10200 _type = impl::xpath_sort(_begin, _end, _type, reverse);
\r
10203 PUGI__FN xpath_node xpath_node_set::first() const
\r
10205 return impl::xpath_first(_begin, _end, _type);
\r
10208 PUGI__FN xpath_parse_result::xpath_parse_result(): error("Internal error"), offset(0)
\r
10212 PUGI__FN xpath_parse_result::operator bool() const
\r
10214 return error == 0;
\r
10217 PUGI__FN const char* xpath_parse_result::description() const
\r
10219 return error ? error : "No error";
\r
10222 PUGI__FN xpath_variable::xpath_variable(): _type(xpath_type_none), _next(0)
\r
10226 PUGI__FN const char_t* xpath_variable::name() const
\r
10230 case xpath_type_node_set:
\r
10231 return static_cast<const impl::xpath_variable_node_set*>(this)->name;
\r
10233 case xpath_type_number:
\r
10234 return static_cast<const impl::xpath_variable_number*>(this)->name;
\r
10236 case xpath_type_string:
\r
10237 return static_cast<const impl::xpath_variable_string*>(this)->name;
\r
10239 case xpath_type_boolean:
\r
10240 return static_cast<const impl::xpath_variable_boolean*>(this)->name;
\r
10243 assert(!"Invalid variable type");
\r
10248 PUGI__FN xpath_value_type xpath_variable::type() const
\r
10253 PUGI__FN bool xpath_variable::get_boolean() const
\r
10255 return (_type == xpath_type_boolean) ? static_cast<const impl::xpath_variable_boolean*>(this)->value : false;
\r
10258 PUGI__FN double xpath_variable::get_number() const
\r
10260 return (_type == xpath_type_number) ? static_cast<const impl::xpath_variable_number*>(this)->value : impl::gen_nan();
\r
10263 PUGI__FN const char_t* xpath_variable::get_string() const
\r
10265 const char_t* value = (_type == xpath_type_string) ? static_cast<const impl::xpath_variable_string*>(this)->value : 0;
\r
10266 return value ? value : PUGIXML_TEXT("");
\r
10269 PUGI__FN const xpath_node_set& xpath_variable::get_node_set() const
\r
10271 return (_type == xpath_type_node_set) ? static_cast<const impl::xpath_variable_node_set*>(this)->value : impl::dummy_node_set;
\r
10274 PUGI__FN bool xpath_variable::set(bool value)
\r
10276 if (_type != xpath_type_boolean) return false;
\r
10278 static_cast<impl::xpath_variable_boolean*>(this)->value = value;
\r
10282 PUGI__FN bool xpath_variable::set(double value)
\r
10284 if (_type != xpath_type_number) return false;
\r
10286 static_cast<impl::xpath_variable_number*>(this)->value = value;
\r
10290 PUGI__FN bool xpath_variable::set(const char_t* value)
\r
10292 if (_type != xpath_type_string) return false;
\r
10294 impl::xpath_variable_string* var = static_cast<impl::xpath_variable_string*>(this);
\r
10296 // duplicate string
\r
10297 size_t size = (impl::strlength(value) + 1) * sizeof(char_t);
\r
10299 char_t* copy = static_cast<char_t*>(impl::xml_memory::allocate(size));
\r
10300 if (!copy) return false;
\r
10302 memcpy(copy, value, size);
\r
10304 // replace old string
\r
10305 if (var->value) impl::xml_memory::deallocate(var->value);
\r
10306 var->value = copy;
\r
10311 PUGI__FN bool xpath_variable::set(const xpath_node_set& value)
\r
10313 if (_type != xpath_type_node_set) return false;
\r
10315 static_cast<impl::xpath_variable_node_set*>(this)->value = value;
\r
10319 PUGI__FN xpath_variable_set::xpath_variable_set()
\r
10321 for (size_t i = 0; i < sizeof(_data) / sizeof(_data[0]); ++i) _data[i] = 0;
\r
10324 PUGI__FN xpath_variable_set::~xpath_variable_set()
\r
10326 for (size_t i = 0; i < sizeof(_data) / sizeof(_data[0]); ++i)
\r
10328 xpath_variable* var = _data[i];
\r
10332 xpath_variable* next = var->_next;
\r
10334 impl::delete_xpath_variable(var->_type, var);
\r
10341 PUGI__FN xpath_variable* xpath_variable_set::find(const char_t* name) const
\r
10343 const size_t hash_size = sizeof(_data) / sizeof(_data[0]);
\r
10344 size_t hash = impl::hash_string(name) % hash_size;
\r
10346 // look for existing variable
\r
10347 for (xpath_variable* var = _data[hash]; var; var = var->_next)
\r
10348 if (impl::strequal(var->name(), name))
\r
10354 PUGI__FN xpath_variable* xpath_variable_set::add(const char_t* name, xpath_value_type type)
\r
10356 const size_t hash_size = sizeof(_data) / sizeof(_data[0]);
\r
10357 size_t hash = impl::hash_string(name) % hash_size;
\r
10359 // look for existing variable
\r
10360 for (xpath_variable* var = _data[hash]; var; var = var->_next)
\r
10361 if (impl::strequal(var->name(), name))
\r
10362 return var->type() == type ? var : 0;
\r
10364 // add new variable
\r
10365 xpath_variable* result = impl::new_xpath_variable(type, name);
\r
10369 result->_type = type;
\r
10370 result->_next = _data[hash];
\r
10372 _data[hash] = result;
\r
10378 PUGI__FN bool xpath_variable_set::set(const char_t* name, bool value)
\r
10380 xpath_variable* var = add(name, xpath_type_boolean);
\r
10381 return var ? var->set(value) : false;
\r
10384 PUGI__FN bool xpath_variable_set::set(const char_t* name, double value)
\r
10386 xpath_variable* var = add(name, xpath_type_number);
\r
10387 return var ? var->set(value) : false;
\r
10390 PUGI__FN bool xpath_variable_set::set(const char_t* name, const char_t* value)
\r
10392 xpath_variable* var = add(name, xpath_type_string);
\r
10393 return var ? var->set(value) : false;
\r
10396 PUGI__FN bool xpath_variable_set::set(const char_t* name, const xpath_node_set& value)
\r
10398 xpath_variable* var = add(name, xpath_type_node_set);
\r
10399 return var ? var->set(value) : false;
\r
10402 PUGI__FN xpath_variable* xpath_variable_set::get(const char_t* name)
\r
10404 return find(name);
\r
10407 PUGI__FN const xpath_variable* xpath_variable_set::get(const char_t* name) const
\r
10409 return find(name);
\r
10412 PUGI__FN xpath_query::xpath_query(const char_t* query, xpath_variable_set* variables): _impl(0)
\r
10414 impl::xpath_query_impl* qimpl = impl::xpath_query_impl::create();
\r
10418 #ifdef PUGIXML_NO_EXCEPTIONS
\r
10419 _result.error = "Out of memory";
\r
10421 throw std::bad_alloc();
\r
10426 impl::buffer_holder impl_holder(qimpl, impl::xpath_query_impl::destroy);
\r
10428 qimpl->root = impl::xpath_parser::parse(query, variables, &qimpl->alloc, &_result);
\r
10432 _impl = static_cast<impl::xpath_query_impl*>(impl_holder.release());
\r
10433 _result.error = 0;
\r
10438 PUGI__FN xpath_query::~xpath_query()
\r
10440 impl::xpath_query_impl::destroy(_impl);
\r
10443 PUGI__FN xpath_value_type xpath_query::return_type() const
\r
10445 if (!_impl) return xpath_type_none;
\r
10447 return static_cast<impl::xpath_query_impl*>(_impl)->root->rettype();
\r
10450 PUGI__FN bool xpath_query::evaluate_boolean(const xpath_node& n) const
\r
10452 if (!_impl) return false;
\r
10454 impl::xpath_context c(n, 1, 1);
\r
10455 impl::xpath_stack_data sd;
\r
10457 #ifdef PUGIXML_NO_EXCEPTIONS
\r
10458 if (setjmp(sd.error_handler)) return false;
\r
10461 return static_cast<impl::xpath_query_impl*>(_impl)->root->eval_boolean(c, sd.stack);
\r
10464 PUGI__FN double xpath_query::evaluate_number(const xpath_node& n) const
\r
10466 if (!_impl) return impl::gen_nan();
\r
10468 impl::xpath_context c(n, 1, 1);
\r
10469 impl::xpath_stack_data sd;
\r
10471 #ifdef PUGIXML_NO_EXCEPTIONS
\r
10472 if (setjmp(sd.error_handler)) return impl::gen_nan();
\r
10475 return static_cast<impl::xpath_query_impl*>(_impl)->root->eval_number(c, sd.stack);
\r
10478 #ifndef PUGIXML_NO_STL
\r
10479 PUGI__FN string_t xpath_query::evaluate_string(const xpath_node& n) const
\r
10481 impl::xpath_stack_data sd;
\r
10483 return impl::evaluate_string_impl(static_cast<impl::xpath_query_impl*>(_impl), n, sd).c_str();
\r
10487 PUGI__FN size_t xpath_query::evaluate_string(char_t* buffer, size_t capacity, const xpath_node& n) const
\r
10489 impl::xpath_stack_data sd;
\r
10491 impl::xpath_string r = impl::evaluate_string_impl(static_cast<impl::xpath_query_impl*>(_impl), n, sd);
\r
10493 size_t full_size = r.length() + 1;
\r
10495 if (capacity > 0)
\r
10497 size_t size = (full_size < capacity) ? full_size : capacity;
\r
10498 assert(size > 0);
\r
10500 memcpy(buffer, r.c_str(), (size - 1) * sizeof(char_t));
\r
10501 buffer[size - 1] = 0;
\r
10504 return full_size;
\r
10507 PUGI__FN xpath_node_set xpath_query::evaluate_node_set(const xpath_node& n) const
\r
10509 if (!_impl) return xpath_node_set();
\r
10511 impl::xpath_ast_node* root = static_cast<impl::xpath_query_impl*>(_impl)->root;
\r
10513 if (root->rettype() != xpath_type_node_set)
\r
10515 #ifdef PUGIXML_NO_EXCEPTIONS
\r
10516 return xpath_node_set();
\r
10518 xpath_parse_result res;
\r
10519 res.error = "Expression does not evaluate to node set";
\r
10521 throw xpath_exception(res);
\r
10525 impl::xpath_context c(n, 1, 1);
\r
10526 impl::xpath_stack_data sd;
\r
10528 #ifdef PUGIXML_NO_EXCEPTIONS
\r
10529 if (setjmp(sd.error_handler)) return xpath_node_set();
\r
10532 impl::xpath_node_set_raw r = root->eval_node_set(c, sd.stack);
\r
10534 return xpath_node_set(r.begin(), r.end(), r.type());
\r
10537 PUGI__FN const xpath_parse_result& xpath_query::result() const
\r
10542 PUGI__FN static void unspecified_bool_xpath_query(xpath_query***)
\r
10546 PUGI__FN xpath_query::operator xpath_query::unspecified_bool_type() const
\r
10548 return _impl ? unspecified_bool_xpath_query : 0;
\r
10551 PUGI__FN bool xpath_query::operator!() const
\r
10556 PUGI__FN xpath_node xml_node::select_single_node(const char_t* query, xpath_variable_set* variables) const
\r
10558 xpath_query q(query, variables);
\r
10559 return select_single_node(q);
\r
10562 PUGI__FN xpath_node xml_node::select_single_node(const xpath_query& query) const
\r
10564 xpath_node_set s = query.evaluate_node_set(*this);
\r
10565 return s.empty() ? xpath_node() : s.first();
\r
10568 PUGI__FN xpath_node_set xml_node::select_nodes(const char_t* query, xpath_variable_set* variables) const
\r
10570 xpath_query q(query, variables);
\r
10571 return select_nodes(q);
\r
10574 PUGI__FN xpath_node_set xml_node::select_nodes(const xpath_query& query) const
\r
10576 return query.evaluate_node_set(*this);
\r
10582 #ifdef __BORLANDC__
\r
10583 # pragma option pop
\r
10586 // Intel C++ does not properly keep warning state for function templates,
\r
10587 // so popping warning state at the end of translation unit leads to warnings in the middle.
\r
10588 #if defined(_MSC_VER) && !defined(__INTEL_COMPILER)
\r
10589 # pragma warning(pop)
\r
10592 // Undefine all local macros (makes sure we're not leaking macros in header-only mode)
\r
10593 #undef PUGI__NO_INLINE
\r
10594 #undef PUGI__STATIC_ASSERT
\r
10595 #undef PUGI__DMC_VOLATILE
\r
10596 #undef PUGI__MSVC_CRT_VERSION
\r
10597 #undef PUGI__NS_BEGIN
\r
10598 #undef PUGI__NS_END
\r
10600 #undef PUGI__FN_NO_INLINE
\r
10601 #undef PUGI__IS_CHARTYPE_IMPL
\r
10602 #undef PUGI__IS_CHARTYPE
\r
10603 #undef PUGI__IS_CHARTYPEX
\r
10604 #undef PUGI__SKIPWS
\r
10605 #undef PUGI__OPTSET
\r
10606 #undef PUGI__PUSHNODE
\r
10607 #undef PUGI__POPNODE
\r
10608 #undef PUGI__SCANFOR
\r
10609 #undef PUGI__SCANWHILE
\r
10610 #undef PUGI__ENDSEG
\r
10611 #undef PUGI__THROW_ERROR
\r
10612 #undef PUGI__CHECK_ERROR
\r
10617 * Copyright (c) 2006-2014 Arseny Kapoulkine
\r
10619 * Permission is hereby granted, free of charge, to any person
\r
10620 * obtaining a copy of this software and associated documentation
\r
10621 * files (the "Software"), to deal in the Software without
\r
10622 * restriction, including without limitation the rights to use,
\r
10623 * copy, modify, merge, publish, distribute, sublicense, and/or sell
\r
10624 * copies of the Software, and to permit persons to whom the
\r
10625 * Software is furnished to do so, subject to the following
\r
10628 * The above copyright notice and this permission notice shall be
\r
10629 * included in all copies or substantial portions of the Software.
\r
10631 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
\r
10632 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
\r
10633 * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
\r
10634 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
\r
10635 * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
\r
10636 * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
\r
10637 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
\r
10638 * OTHER DEALINGS IN THE SOFTWARE.
\r