Automatically generate quadtree children.
[ipdf/code.git] / contrib / pugixml-1.4 / src / pugixml.cpp
1 /**\r
2  * pugixml parser - version 1.4\r
3  * --------------------------------------------------------\r
4  * Copyright (C) 2006-2014, by Arseny Kapoulkine ([email protected])\r
5  * Report bugs and download new versions at http://pugixml.org/\r
6  *\r
7  * This library is distributed under the MIT License. See notice at the end\r
8  * of this file.\r
9  *\r
10  * This work is based on the pugxml parser, which is:\r
11  * Copyright (C) 2003, by Kristen Wegner ([email protected])\r
12  */\r
13 \r
14 #ifndef SOURCE_PUGIXML_CPP\r
15 #define SOURCE_PUGIXML_CPP\r
16 \r
17 #include "pugixml.hpp"\r
18 \r
19 #include <stdlib.h>\r
20 #include <stdio.h>\r
21 #include <string.h>\r
22 #include <assert.h>\r
23 \r
24 #ifdef PUGIXML_WCHAR_MODE\r
25 #       include <wchar.h>\r
26 #endif\r
27 \r
28 #ifndef PUGIXML_NO_XPATH\r
29 #       include <math.h>\r
30 #       include <float.h>\r
31 #       ifdef PUGIXML_NO_EXCEPTIONS\r
32 #               include <setjmp.h>\r
33 #       endif\r
34 #endif\r
35 \r
36 #ifndef PUGIXML_NO_STL\r
37 #       include <istream>\r
38 #       include <ostream>\r
39 #       include <string>\r
40 #endif\r
41 \r
42 // For placement new\r
43 #include <new>\r
44 \r
45 #ifdef _MSC_VER\r
46 #       pragma warning(push)\r
47 #       pragma warning(disable: 4127) // conditional expression is constant\r
48 #       pragma warning(disable: 4324) // structure was padded due to __declspec(align())\r
49 #       pragma warning(disable: 4611) // interaction between '_setjmp' and C++ object destruction is non-portable\r
50 #       pragma warning(disable: 4702) // unreachable code\r
51 #       pragma warning(disable: 4996) // this function or variable may be unsafe\r
52 #       pragma warning(disable: 4793) // function compiled as native: presence of '_setjmp' makes a function unmanaged\r
53 #endif\r
54 \r
55 #ifdef __INTEL_COMPILER\r
56 #       pragma warning(disable: 177) // function was declared but never referenced \r
57 #       pragma warning(disable: 279) // controlling expression is constant\r
58 #       pragma warning(disable: 1478 1786) // function was declared "deprecated"\r
59 #       pragma warning(disable: 1684) // conversion from pointer to same-sized integral type\r
60 #endif\r
61 \r
62 #if defined(__BORLANDC__) && defined(PUGIXML_HEADER_ONLY)\r
63 #       pragma warn -8080 // symbol is declared but never used; disabling this inside push/pop bracket does not make the warning go away\r
64 #endif\r
65 \r
66 #ifdef __BORLANDC__\r
67 #       pragma option push\r
68 #       pragma warn -8008 // condition is always false\r
69 #       pragma warn -8066 // unreachable code\r
70 #endif\r
71 \r
72 #ifdef __SNC__\r
73 // Using diag_push/diag_pop does not disable the warnings inside templates due to a compiler bug\r
74 #       pragma diag_suppress=178 // function was declared but never referenced\r
75 #       pragma diag_suppress=237 // controlling expression is constant\r
76 #endif\r
77 \r
78 // Inlining controls\r
79 #if defined(_MSC_VER) && _MSC_VER >= 1300\r
80 #       define PUGI__NO_INLINE __declspec(noinline)\r
81 #elif defined(__GNUC__)\r
82 #       define PUGI__NO_INLINE __attribute__((noinline))\r
83 #else\r
84 #       define PUGI__NO_INLINE \r
85 #endif\r
86 \r
87 // Simple static assertion\r
88 #define PUGI__STATIC_ASSERT(cond) { static const char condition_failed[(cond) ? 1 : -1] = {0}; (void)condition_failed[0]; }\r
89 \r
90 // Digital Mars C++ bug workaround for passing char loaded from memory via stack\r
91 #ifdef __DMC__\r
92 #       define PUGI__DMC_VOLATILE volatile\r
93 #else\r
94 #       define PUGI__DMC_VOLATILE\r
95 #endif\r
96 \r
97 // Borland C++ bug workaround for not defining ::memcpy depending on header include order (can't always use std::memcpy because some compilers don't have it at all)\r
98 #if defined(__BORLANDC__) && !defined(__MEM_H_USING_LIST)\r
99 using std::memcpy;\r
100 using std::memmove;\r
101 #endif\r
102 \r
103 // In some environments MSVC is a compiler but the CRT lacks certain MSVC-specific features\r
104 #if defined(_MSC_VER) && !defined(__S3E__)\r
105 #       define PUGI__MSVC_CRT_VERSION _MSC_VER\r
106 #endif\r
107 \r
108 #ifdef PUGIXML_HEADER_ONLY\r
109 #       define PUGI__NS_BEGIN namespace pugi { namespace impl {\r
110 #       define PUGI__NS_END } }\r
111 #       define PUGI__FN inline\r
112 #       define PUGI__FN_NO_INLINE inline\r
113 #else\r
114 #       if defined(_MSC_VER) && _MSC_VER < 1300 // MSVC6 seems to have an amusing bug with anonymous namespaces inside namespaces\r
115 #               define PUGI__NS_BEGIN namespace pugi { namespace impl {\r
116 #               define PUGI__NS_END } }\r
117 #       else\r
118 #               define PUGI__NS_BEGIN namespace pugi { namespace impl { namespace {\r
119 #               define PUGI__NS_END } } }\r
120 #       endif\r
121 #       define PUGI__FN\r
122 #       define PUGI__FN_NO_INLINE PUGI__NO_INLINE\r
123 #endif\r
124 \r
125 // uintptr_t\r
126 #if !defined(_MSC_VER) || _MSC_VER >= 1600\r
127 #       include <stdint.h>\r
128 #else\r
129 #       ifndef _UINTPTR_T_DEFINED\r
130 // No native uintptr_t in MSVC6 and in some WinCE versions\r
131 typedef size_t uintptr_t;\r
132 #define _UINTPTR_T_DEFINED\r
133 #       endif\r
134 PUGI__NS_BEGIN\r
135         typedef unsigned __int8 uint8_t;\r
136         typedef unsigned __int16 uint16_t;\r
137         typedef unsigned __int32 uint32_t;\r
138 PUGI__NS_END\r
139 #endif\r
140 \r
141 // Memory allocation\r
142 PUGI__NS_BEGIN\r
143         PUGI__FN void* default_allocate(size_t size)\r
144         {\r
145                 return malloc(size);\r
146         }\r
147 \r
148         PUGI__FN void default_deallocate(void* ptr)\r
149         {\r
150                 free(ptr);\r
151         }\r
152 \r
153         template <typename T>\r
154         struct xml_memory_management_function_storage\r
155         {\r
156                 static allocation_function allocate;\r
157                 static deallocation_function deallocate;\r
158         };\r
159 \r
160         template <typename T> allocation_function xml_memory_management_function_storage<T>::allocate = default_allocate;\r
161         template <typename T> deallocation_function xml_memory_management_function_storage<T>::deallocate = default_deallocate;\r
162 \r
163         typedef xml_memory_management_function_storage<int> xml_memory;\r
164 PUGI__NS_END\r
165 \r
166 // String utilities\r
167 PUGI__NS_BEGIN\r
168         // Get string length\r
169         PUGI__FN size_t strlength(const char_t* s)\r
170         {\r
171                 assert(s);\r
172 \r
173         #ifdef PUGIXML_WCHAR_MODE\r
174                 return wcslen(s);\r
175         #else\r
176                 return strlen(s);\r
177         #endif\r
178         }\r
179 \r
180         // Compare two strings\r
181         PUGI__FN bool strequal(const char_t* src, const char_t* dst)\r
182         {\r
183                 assert(src && dst);\r
184 \r
185         #ifdef PUGIXML_WCHAR_MODE\r
186                 return wcscmp(src, dst) == 0;\r
187         #else\r
188                 return strcmp(src, dst) == 0;\r
189         #endif\r
190         }\r
191 \r
192         // Compare lhs with [rhs_begin, rhs_end)\r
193         PUGI__FN bool strequalrange(const char_t* lhs, const char_t* rhs, size_t count)\r
194         {\r
195                 for (size_t i = 0; i < count; ++i)\r
196                         if (lhs[i] != rhs[i])\r
197                                 return false;\r
198         \r
199                 return lhs[count] == 0;\r
200         }\r
201 \r
202         // Get length of wide string, even if CRT lacks wide character support\r
203         PUGI__FN size_t strlength_wide(const wchar_t* s)\r
204         {\r
205                 assert(s);\r
206 \r
207         #ifdef PUGIXML_WCHAR_MODE\r
208                 return wcslen(s);\r
209         #else\r
210                 const wchar_t* end = s;\r
211                 while (*end) end++;\r
212                 return static_cast<size_t>(end - s);\r
213         #endif\r
214         }\r
215 \r
216 #ifdef PUGIXML_WCHAR_MODE\r
217         // Convert string to wide string, assuming all symbols are ASCII\r
218         PUGI__FN void widen_ascii(wchar_t* dest, const char* source)\r
219         {\r
220                 for (const char* i = source; *i; ++i) *dest++ = *i;\r
221                 *dest = 0;\r
222         }\r
223 #endif\r
224 PUGI__NS_END\r
225 \r
226 #if !defined(PUGIXML_NO_STL) || !defined(PUGIXML_NO_XPATH)\r
227 // auto_ptr-like buffer holder for exception recovery\r
228 PUGI__NS_BEGIN\r
229         struct buffer_holder\r
230         {\r
231                 void* data;\r
232                 void (*deleter)(void*);\r
233 \r
234                 buffer_holder(void* data_, void (*deleter_)(void*)): data(data_), deleter(deleter_)\r
235                 {\r
236                 }\r
237 \r
238                 ~buffer_holder()\r
239                 {\r
240                         if (data) deleter(data);\r
241                 }\r
242 \r
243                 void* release()\r
244                 {\r
245                         void* result = data;\r
246                         data = 0;\r
247                         return result;\r
248                 }\r
249         };\r
250 PUGI__NS_END\r
251 #endif\r
252 \r
253 PUGI__NS_BEGIN\r
254         static const size_t xml_memory_page_size =\r
255         #ifdef PUGIXML_MEMORY_PAGE_SIZE\r
256                 PUGIXML_MEMORY_PAGE_SIZE\r
257         #else\r
258                 32768\r
259         #endif\r
260                 ;\r
261 \r
262         static const uintptr_t xml_memory_page_alignment = 32;\r
263         static const uintptr_t xml_memory_page_pointer_mask = ~(xml_memory_page_alignment - 1);\r
264         static const uintptr_t xml_memory_page_name_allocated_mask = 16;\r
265         static const uintptr_t xml_memory_page_value_allocated_mask = 8;\r
266         static const uintptr_t xml_memory_page_type_mask = 7;\r
267 \r
268         struct xml_allocator;\r
269 \r
270         struct xml_memory_page\r
271         {\r
272                 static xml_memory_page* construct(void* memory)\r
273                 {\r
274                         if (!memory) return 0; //$ redundant, left for performance\r
275 \r
276                         xml_memory_page* result = static_cast<xml_memory_page*>(memory);\r
277 \r
278                         result->allocator = 0;\r
279                         result->memory = 0;\r
280                         result->prev = 0;\r
281                         result->next = 0;\r
282                         result->busy_size = 0;\r
283                         result->freed_size = 0;\r
284 \r
285                         return result;\r
286                 }\r
287 \r
288                 xml_allocator* allocator;\r
289 \r
290                 void* memory;\r
291 \r
292                 xml_memory_page* prev;\r
293                 xml_memory_page* next;\r
294 \r
295                 size_t busy_size;\r
296                 size_t freed_size;\r
297 \r
298                 char data[1];\r
299         };\r
300 \r
301         struct xml_memory_string_header\r
302         {\r
303                 uint16_t page_offset; // offset from page->data\r
304                 uint16_t full_size; // 0 if string occupies whole page\r
305         };\r
306 \r
307         struct xml_allocator\r
308         {\r
309                 xml_allocator(xml_memory_page* root): _root(root), _busy_size(root->busy_size)\r
310                 {\r
311                 }\r
312 \r
313                 xml_memory_page* allocate_page(size_t data_size)\r
314                 {\r
315                         size_t size = offsetof(xml_memory_page, data) + data_size;\r
316 \r
317                         // allocate block with some alignment, leaving memory for worst-case padding\r
318                         void* memory = xml_memory::allocate(size + xml_memory_page_alignment);\r
319                         if (!memory) return 0;\r
320 \r
321                         // align upwards to page boundary\r
322                         void* page_memory = reinterpret_cast<void*>((reinterpret_cast<uintptr_t>(memory) + (xml_memory_page_alignment - 1)) & ~(xml_memory_page_alignment - 1));\r
323 \r
324                         // prepare page structure\r
325                         xml_memory_page* page = xml_memory_page::construct(page_memory);\r
326                         assert(page);\r
327 \r
328                         page->memory = memory;\r
329                         page->allocator = _root->allocator;\r
330 \r
331                         return page;\r
332                 }\r
333 \r
334                 static void deallocate_page(xml_memory_page* page)\r
335                 {\r
336                         xml_memory::deallocate(page->memory);\r
337                 }\r
338 \r
339                 void* allocate_memory_oob(size_t size, xml_memory_page*& out_page);\r
340 \r
341                 void* allocate_memory(size_t size, xml_memory_page*& out_page)\r
342                 {\r
343                         if (_busy_size + size > xml_memory_page_size) return allocate_memory_oob(size, out_page);\r
344 \r
345                         void* buf = _root->data + _busy_size;\r
346 \r
347                         _busy_size += size;\r
348 \r
349                         out_page = _root;\r
350 \r
351                         return buf;\r
352                 }\r
353 \r
354                 void deallocate_memory(void* ptr, size_t size, xml_memory_page* page)\r
355                 {\r
356                         if (page == _root) page->busy_size = _busy_size;\r
357 \r
358                         assert(ptr >= page->data && ptr < page->data + page->busy_size);\r
359                         (void)!ptr;\r
360 \r
361                         page->freed_size += size;\r
362                         assert(page->freed_size <= page->busy_size);\r
363 \r
364                         if (page->freed_size == page->busy_size)\r
365                         {\r
366                                 if (page->next == 0)\r
367                                 {\r
368                                         assert(_root == page);\r
369 \r
370                                         // top page freed, just reset sizes\r
371                                         page->busy_size = page->freed_size = 0;\r
372                                         _busy_size = 0;\r
373                                 }\r
374                                 else\r
375                                 {\r
376                                         assert(_root != page);\r
377                                         assert(page->prev);\r
378 \r
379                                         // remove from the list\r
380                                         page->prev->next = page->next;\r
381                                         page->next->prev = page->prev;\r
382 \r
383                                         // deallocate\r
384                                         deallocate_page(page);\r
385                                 }\r
386                         }\r
387                 }\r
388 \r
389                 char_t* allocate_string(size_t length)\r
390                 {\r
391                         // allocate memory for string and header block\r
392                         size_t size = sizeof(xml_memory_string_header) + length * sizeof(char_t);\r
393                         \r
394                         // round size up to pointer alignment boundary\r
395                         size_t full_size = (size + (sizeof(void*) - 1)) & ~(sizeof(void*) - 1);\r
396 \r
397                         xml_memory_page* page;\r
398                         xml_memory_string_header* header = static_cast<xml_memory_string_header*>(allocate_memory(full_size, page));\r
399 \r
400                         if (!header) return 0;\r
401 \r
402                         // setup header\r
403                         ptrdiff_t page_offset = reinterpret_cast<char*>(header) - page->data;\r
404 \r
405                         assert(page_offset >= 0 && page_offset < (1 << 16));\r
406                         header->page_offset = static_cast<uint16_t>(page_offset);\r
407 \r
408                         // full_size == 0 for large strings that occupy the whole page\r
409                         assert(full_size < (1 << 16) || (page->busy_size == full_size && page_offset == 0));\r
410                         header->full_size = static_cast<uint16_t>(full_size < (1 << 16) ? full_size : 0);\r
411 \r
412                         // round-trip through void* to avoid 'cast increases required alignment of target type' warning\r
413                         // header is guaranteed a pointer-sized alignment, which should be enough for char_t\r
414                         return static_cast<char_t*>(static_cast<void*>(header + 1));\r
415                 }\r
416 \r
417                 void deallocate_string(char_t* string)\r
418                 {\r
419                         // this function casts pointers through void* to avoid 'cast increases required alignment of target type' warnings\r
420                         // we're guaranteed the proper (pointer-sized) alignment on the input string if it was allocated via allocate_string\r
421 \r
422                         // get header\r
423                         xml_memory_string_header* header = static_cast<xml_memory_string_header*>(static_cast<void*>(string)) - 1;\r
424 \r
425                         // deallocate\r
426                         size_t page_offset = offsetof(xml_memory_page, data) + header->page_offset;\r
427                         xml_memory_page* page = reinterpret_cast<xml_memory_page*>(static_cast<void*>(reinterpret_cast<char*>(header) - page_offset));\r
428 \r
429                         // if full_size == 0 then this string occupies the whole page\r
430                         size_t full_size = header->full_size == 0 ? page->busy_size : header->full_size;\r
431 \r
432                         deallocate_memory(header, full_size, page);\r
433                 }\r
434 \r
435                 xml_memory_page* _root;\r
436                 size_t _busy_size;\r
437         };\r
438 \r
439         PUGI__FN_NO_INLINE void* xml_allocator::allocate_memory_oob(size_t size, xml_memory_page*& out_page)\r
440         {\r
441                 const size_t large_allocation_threshold = xml_memory_page_size / 4;\r
442 \r
443                 xml_memory_page* page = allocate_page(size <= large_allocation_threshold ? xml_memory_page_size : size);\r
444                 out_page = page;\r
445 \r
446                 if (!page) return 0;\r
447 \r
448                 if (size <= large_allocation_threshold)\r
449                 {\r
450                         _root->busy_size = _busy_size;\r
451 \r
452                         // insert page at the end of linked list\r
453                         page->prev = _root;\r
454                         _root->next = page;\r
455                         _root = page;\r
456 \r
457                         _busy_size = size;\r
458                 }\r
459                 else\r
460                 {\r
461                         // insert page before the end of linked list, so that it is deleted as soon as possible\r
462                         // the last page is not deleted even if it's empty (see deallocate_memory)\r
463                         assert(_root->prev);\r
464 \r
465                         page->prev = _root->prev;\r
466                         page->next = _root;\r
467 \r
468                         _root->prev->next = page;\r
469                         _root->prev = page;\r
470                 }\r
471 \r
472                 // allocate inside page\r
473                 page->busy_size = size;\r
474 \r
475                 return page->data;\r
476         }\r
477 PUGI__NS_END\r
478 \r
479 namespace pugi\r
480 {\r
481         /// A 'name=value' XML attribute structure.\r
482         struct xml_attribute_struct\r
483         {\r
484                 /// Default ctor\r
485                 xml_attribute_struct(impl::xml_memory_page* page): header(reinterpret_cast<uintptr_t>(page)), name(0), value(0), prev_attribute_c(0), next_attribute(0)\r
486                 {\r
487                 }\r
488 \r
489                 uintptr_t header;\r
490 \r
491                 char_t* name;   ///< Pointer to attribute name.\r
492                 char_t* value;  ///< Pointer to attribute value.\r
493 \r
494                 xml_attribute_struct* prev_attribute_c; ///< Previous attribute (cyclic list)\r
495                 xml_attribute_struct* next_attribute;   ///< Next attribute\r
496         };\r
497 \r
498         /// An XML document tree node.\r
499         struct xml_node_struct\r
500         {\r
501                 /// Default ctor\r
502                 /// \param type - node type\r
503                 xml_node_struct(impl::xml_memory_page* page, xml_node_type type): header(reinterpret_cast<uintptr_t>(page) | (type - 1)), parent(0), name(0), value(0), first_child(0), prev_sibling_c(0), next_sibling(0), first_attribute(0)\r
504                 {\r
505                 }\r
506 \r
507                 uintptr_t header;\r
508 \r
509                 xml_node_struct*                parent;                                 ///< Pointer to parent\r
510 \r
511                 char_t*                                 name;                                   ///< Pointer to element name.\r
512                 char_t*                                 value;                                  ///< Pointer to any associated string data.\r
513 \r
514                 xml_node_struct*                first_child;                    ///< First child\r
515                 \r
516                 xml_node_struct*                prev_sibling_c;                 ///< Left brother (cyclic list)\r
517                 xml_node_struct*                next_sibling;                   ///< Right brother\r
518                 \r
519                 xml_attribute_struct*   first_attribute;                ///< First attribute\r
520         };\r
521 }\r
522 \r
523 PUGI__NS_BEGIN\r
524         struct xml_extra_buffer\r
525         {\r
526                 char_t* buffer;\r
527                 xml_extra_buffer* next;\r
528         };\r
529 \r
530         struct xml_document_struct: public xml_node_struct, public xml_allocator\r
531         {\r
532                 xml_document_struct(xml_memory_page* page): xml_node_struct(page, node_document), xml_allocator(page), buffer(0), extra_buffers(0)\r
533                 {\r
534                 }\r
535 \r
536                 const char_t* buffer;\r
537 \r
538                 xml_extra_buffer* extra_buffers;\r
539         };\r
540 \r
541         inline xml_allocator& get_allocator(const xml_node_struct* node)\r
542         {\r
543                 assert(node);\r
544 \r
545                 return *reinterpret_cast<xml_memory_page*>(node->header & xml_memory_page_pointer_mask)->allocator;\r
546         }\r
547 PUGI__NS_END\r
548 \r
549 // Low-level DOM operations\r
550 PUGI__NS_BEGIN\r
551         inline xml_attribute_struct* allocate_attribute(xml_allocator& alloc)\r
552         {\r
553                 xml_memory_page* page;\r
554                 void* memory = alloc.allocate_memory(sizeof(xml_attribute_struct), page);\r
555 \r
556                 return new (memory) xml_attribute_struct(page);\r
557         }\r
558 \r
559         inline xml_node_struct* allocate_node(xml_allocator& alloc, xml_node_type type)\r
560         {\r
561                 xml_memory_page* page;\r
562                 void* memory = alloc.allocate_memory(sizeof(xml_node_struct), page);\r
563 \r
564                 return new (memory) xml_node_struct(page, type);\r
565         }\r
566 \r
567         inline void destroy_attribute(xml_attribute_struct* a, xml_allocator& alloc)\r
568         {\r
569                 uintptr_t header = a->header;\r
570 \r
571                 if (header & impl::xml_memory_page_name_allocated_mask) alloc.deallocate_string(a->name);\r
572                 if (header & impl::xml_memory_page_value_allocated_mask) alloc.deallocate_string(a->value);\r
573 \r
574                 alloc.deallocate_memory(a, sizeof(xml_attribute_struct), reinterpret_cast<xml_memory_page*>(header & xml_memory_page_pointer_mask));\r
575         }\r
576 \r
577         inline void destroy_node(xml_node_struct* n, xml_allocator& alloc)\r
578         {\r
579                 uintptr_t header = n->header;\r
580 \r
581                 if (header & impl::xml_memory_page_name_allocated_mask) alloc.deallocate_string(n->name);\r
582                 if (header & impl::xml_memory_page_value_allocated_mask) alloc.deallocate_string(n->value);\r
583 \r
584                 for (xml_attribute_struct* attr = n->first_attribute; attr; )\r
585                 {\r
586                         xml_attribute_struct* next = attr->next_attribute;\r
587 \r
588                         destroy_attribute(attr, alloc);\r
589 \r
590                         attr = next;\r
591                 }\r
592 \r
593                 for (xml_node_struct* child = n->first_child; child; )\r
594                 {\r
595                         xml_node_struct* next = child->next_sibling;\r
596 \r
597                         destroy_node(child, alloc);\r
598 \r
599                         child = next;\r
600                 }\r
601 \r
602                 alloc.deallocate_memory(n, sizeof(xml_node_struct), reinterpret_cast<xml_memory_page*>(header & xml_memory_page_pointer_mask));\r
603         }\r
604 \r
605         PUGI__FN_NO_INLINE xml_node_struct* append_node(xml_node_struct* node, xml_allocator& alloc, xml_node_type type = node_element)\r
606         {\r
607                 xml_node_struct* child = allocate_node(alloc, type);\r
608                 if (!child) return 0;\r
609 \r
610                 child->parent = node;\r
611 \r
612                 xml_node_struct* first_child = node->first_child;\r
613                         \r
614                 if (first_child)\r
615                 {\r
616                         xml_node_struct* last_child = first_child->prev_sibling_c;\r
617 \r
618                         last_child->next_sibling = child;\r
619                         child->prev_sibling_c = last_child;\r
620                         first_child->prev_sibling_c = child;\r
621                 }\r
622                 else\r
623                 {\r
624                         node->first_child = child;\r
625                         child->prev_sibling_c = child;\r
626                 }\r
627                         \r
628                 return child;\r
629         }\r
630 \r
631         PUGI__FN_NO_INLINE xml_attribute_struct* append_attribute_ll(xml_node_struct* node, xml_allocator& alloc)\r
632         {\r
633                 xml_attribute_struct* a = allocate_attribute(alloc);\r
634                 if (!a) return 0;\r
635 \r
636                 xml_attribute_struct* first_attribute = node->first_attribute;\r
637 \r
638                 if (first_attribute)\r
639                 {\r
640                         xml_attribute_struct* last_attribute = first_attribute->prev_attribute_c;\r
641 \r
642                         last_attribute->next_attribute = a;\r
643                         a->prev_attribute_c = last_attribute;\r
644                         first_attribute->prev_attribute_c = a;\r
645                 }\r
646                 else\r
647                 {\r
648                         node->first_attribute = a;\r
649                         a->prev_attribute_c = a;\r
650                 }\r
651                         \r
652                 return a;\r
653         }\r
654 PUGI__NS_END\r
655 \r
656 // Helper classes for code generation\r
657 PUGI__NS_BEGIN\r
658         struct opt_false\r
659         {\r
660                 enum { value = 0 };\r
661         };\r
662 \r
663         struct opt_true\r
664         {\r
665                 enum { value = 1 };\r
666         };\r
667 PUGI__NS_END\r
668 \r
669 // Unicode utilities\r
670 PUGI__NS_BEGIN\r
671         inline uint16_t endian_swap(uint16_t value)\r
672         {\r
673                 return static_cast<uint16_t>(((value & 0xff) << 8) | (value >> 8));\r
674         }\r
675 \r
676         inline uint32_t endian_swap(uint32_t value)\r
677         {\r
678                 return ((value & 0xff) << 24) | ((value & 0xff00) << 8) | ((value & 0xff0000) >> 8) | (value >> 24);\r
679         }\r
680 \r
681         struct utf8_counter\r
682         {\r
683                 typedef size_t value_type;\r
684 \r
685                 static value_type low(value_type result, uint32_t ch)\r
686                 {\r
687                         // U+0000..U+007F\r
688                         if (ch < 0x80) return result + 1;\r
689                         // U+0080..U+07FF\r
690                         else if (ch < 0x800) return result + 2;\r
691                         // U+0800..U+FFFF\r
692                         else return result + 3;\r
693                 }\r
694 \r
695                 static value_type high(value_type result, uint32_t)\r
696                 {\r
697                         // U+10000..U+10FFFF\r
698                         return result + 4;\r
699                 }\r
700         };\r
701 \r
702         struct utf8_writer\r
703         {\r
704                 typedef uint8_t* value_type;\r
705 \r
706                 static value_type low(value_type result, uint32_t ch)\r
707                 {\r
708                         // U+0000..U+007F\r
709                         if (ch < 0x80)\r
710                         {\r
711                                 *result = static_cast<uint8_t>(ch);\r
712                                 return result + 1;\r
713                         }\r
714                         // U+0080..U+07FF\r
715                         else if (ch < 0x800)\r
716                         {\r
717                                 result[0] = static_cast<uint8_t>(0xC0 | (ch >> 6));\r
718                                 result[1] = static_cast<uint8_t>(0x80 | (ch & 0x3F));\r
719                                 return result + 2;\r
720                         }\r
721                         // U+0800..U+FFFF\r
722                         else\r
723                         {\r
724                                 result[0] = static_cast<uint8_t>(0xE0 | (ch >> 12));\r
725                                 result[1] = static_cast<uint8_t>(0x80 | ((ch >> 6) & 0x3F));\r
726                                 result[2] = static_cast<uint8_t>(0x80 | (ch & 0x3F));\r
727                                 return result + 3;\r
728                         }\r
729                 }\r
730 \r
731                 static value_type high(value_type result, uint32_t ch)\r
732                 {\r
733                         // U+10000..U+10FFFF\r
734                         result[0] = static_cast<uint8_t>(0xF0 | (ch >> 18));\r
735                         result[1] = static_cast<uint8_t>(0x80 | ((ch >> 12) & 0x3F));\r
736                         result[2] = static_cast<uint8_t>(0x80 | ((ch >> 6) & 0x3F));\r
737                         result[3] = static_cast<uint8_t>(0x80 | (ch & 0x3F));\r
738                         return result + 4;\r
739                 }\r
740 \r
741                 static value_type any(value_type result, uint32_t ch)\r
742                 {\r
743                         return (ch < 0x10000) ? low(result, ch) : high(result, ch);\r
744                 }\r
745         };\r
746 \r
747         struct utf16_counter\r
748         {\r
749                 typedef size_t value_type;\r
750 \r
751                 static value_type low(value_type result, uint32_t)\r
752                 {\r
753                         return result + 1;\r
754                 }\r
755 \r
756                 static value_type high(value_type result, uint32_t)\r
757                 {\r
758                         return result + 2;\r
759                 }\r
760         };\r
761 \r
762         struct utf16_writer\r
763         {\r
764                 typedef uint16_t* value_type;\r
765 \r
766                 static value_type low(value_type result, uint32_t ch)\r
767                 {\r
768                         *result = static_cast<uint16_t>(ch);\r
769 \r
770                         return result + 1;\r
771                 }\r
772 \r
773                 static value_type high(value_type result, uint32_t ch)\r
774                 {\r
775                         uint32_t msh = static_cast<uint32_t>(ch - 0x10000) >> 10;\r
776                         uint32_t lsh = static_cast<uint32_t>(ch - 0x10000) & 0x3ff;\r
777 \r
778                         result[0] = static_cast<uint16_t>(0xD800 + msh);\r
779                         result[1] = static_cast<uint16_t>(0xDC00 + lsh);\r
780 \r
781                         return result + 2;\r
782                 }\r
783 \r
784                 static value_type any(value_type result, uint32_t ch)\r
785                 {\r
786                         return (ch < 0x10000) ? low(result, ch) : high(result, ch);\r
787                 }\r
788         };\r
789 \r
790         struct utf32_counter\r
791         {\r
792                 typedef size_t value_type;\r
793 \r
794                 static value_type low(value_type result, uint32_t)\r
795                 {\r
796                         return result + 1;\r
797                 }\r
798 \r
799                 static value_type high(value_type result, uint32_t)\r
800                 {\r
801                         return result + 1;\r
802                 }\r
803         };\r
804 \r
805         struct utf32_writer\r
806         {\r
807                 typedef uint32_t* value_type;\r
808 \r
809                 static value_type low(value_type result, uint32_t ch)\r
810                 {\r
811                         *result = ch;\r
812 \r
813                         return result + 1;\r
814                 }\r
815 \r
816                 static value_type high(value_type result, uint32_t ch)\r
817                 {\r
818                         *result = ch;\r
819 \r
820                         return result + 1;\r
821                 }\r
822 \r
823                 static value_type any(value_type result, uint32_t ch)\r
824                 {\r
825                         *result = ch;\r
826 \r
827                         return result + 1;\r
828                 }\r
829         };\r
830 \r
831         struct latin1_writer\r
832         {\r
833                 typedef uint8_t* value_type;\r
834 \r
835                 static value_type low(value_type result, uint32_t ch)\r
836                 {\r
837                         *result = static_cast<uint8_t>(ch > 255 ? '?' : ch);\r
838 \r
839                         return result + 1;\r
840                 }\r
841 \r
842                 static value_type high(value_type result, uint32_t ch)\r
843                 {\r
844                         (void)ch;\r
845 \r
846                         *result = '?';\r
847 \r
848                         return result + 1;\r
849                 }\r
850         };\r
851 \r
852         template <size_t size> struct wchar_selector;\r
853 \r
854         template <> struct wchar_selector<2>\r
855         {\r
856                 typedef uint16_t type;\r
857                 typedef utf16_counter counter;\r
858                 typedef utf16_writer writer;\r
859         };\r
860 \r
861         template <> struct wchar_selector<4>\r
862         {\r
863                 typedef uint32_t type;\r
864                 typedef utf32_counter counter;\r
865                 typedef utf32_writer writer;\r
866         };\r
867 \r
868         typedef wchar_selector<sizeof(wchar_t)>::counter wchar_counter;\r
869         typedef wchar_selector<sizeof(wchar_t)>::writer wchar_writer;\r
870 \r
871         template <typename Traits, typename opt_swap = opt_false> struct utf_decoder\r
872         {\r
873                 static inline typename Traits::value_type decode_utf8_block(const uint8_t* data, size_t size, typename Traits::value_type result)\r
874                 {\r
875                         const uint8_t utf8_byte_mask = 0x3f;\r
876 \r
877                         while (size)\r
878                         {\r
879                                 uint8_t lead = *data;\r
880 \r
881                                 // 0xxxxxxx -> U+0000..U+007F\r
882                                 if (lead < 0x80)\r
883                                 {\r
884                                         result = Traits::low(result, lead);\r
885                                         data += 1;\r
886                                         size -= 1;\r
887 \r
888                                         // process aligned single-byte (ascii) blocks\r
889                                         if ((reinterpret_cast<uintptr_t>(data) & 3) == 0)\r
890                                         {\r
891                                                 // round-trip through void* to silence 'cast increases required alignment of target type' warnings\r
892                                                 while (size >= 4 && (*static_cast<const uint32_t*>(static_cast<const void*>(data)) & 0x80808080) == 0)\r
893                                                 {\r
894                                                         result = Traits::low(result, data[0]);\r
895                                                         result = Traits::low(result, data[1]);\r
896                                                         result = Traits::low(result, data[2]);\r
897                                                         result = Traits::low(result, data[3]);\r
898                                                         data += 4;\r
899                                                         size -= 4;\r
900                                                 }\r
901                                         }\r
902                                 }\r
903                                 // 110xxxxx -> U+0080..U+07FF\r
904                                 else if (static_cast<unsigned int>(lead - 0xC0) < 0x20 && size >= 2 && (data[1] & 0xc0) == 0x80)\r
905                                 {\r
906                                         result = Traits::low(result, ((lead & ~0xC0) << 6) | (data[1] & utf8_byte_mask));\r
907                                         data += 2;\r
908                                         size -= 2;\r
909                                 }\r
910                                 // 1110xxxx -> U+0800-U+FFFF\r
911                                 else if (static_cast<unsigned int>(lead - 0xE0) < 0x10 && size >= 3 && (data[1] & 0xc0) == 0x80 && (data[2] & 0xc0) == 0x80)\r
912                                 {\r
913                                         result = Traits::low(result, ((lead & ~0xE0) << 12) | ((data[1] & utf8_byte_mask) << 6) | (data[2] & utf8_byte_mask));\r
914                                         data += 3;\r
915                                         size -= 3;\r
916                                 }\r
917                                 // 11110xxx -> U+10000..U+10FFFF\r
918                                 else if (static_cast<unsigned int>(lead - 0xF0) < 0x08 && size >= 4 && (data[1] & 0xc0) == 0x80 && (data[2] & 0xc0) == 0x80 && (data[3] & 0xc0) == 0x80)\r
919                                 {\r
920                                         result = Traits::high(result, ((lead & ~0xF0) << 18) | ((data[1] & utf8_byte_mask) << 12) | ((data[2] & utf8_byte_mask) << 6) | (data[3] & utf8_byte_mask));\r
921                                         data += 4;\r
922                                         size -= 4;\r
923                                 }\r
924                                 // 10xxxxxx or 11111xxx -> invalid\r
925                                 else\r
926                                 {\r
927                                         data += 1;\r
928                                         size -= 1;\r
929                                 }\r
930                         }\r
931 \r
932                         return result;\r
933                 }\r
934 \r
935                 static inline typename Traits::value_type decode_utf16_block(const uint16_t* data, size_t size, typename Traits::value_type result)\r
936                 {\r
937                         const uint16_t* end = data + size;\r
938 \r
939                         while (data < end)\r
940                         {\r
941                                 unsigned int lead = opt_swap::value ? endian_swap(*data) : *data;\r
942 \r
943                                 // U+0000..U+D7FF\r
944                                 if (lead < 0xD800)\r
945                                 {\r
946                                         result = Traits::low(result, lead);\r
947                                         data += 1;\r
948                                 }\r
949                                 // U+E000..U+FFFF\r
950                                 else if (static_cast<unsigned int>(lead - 0xE000) < 0x2000)\r
951                                 {\r
952                                         result = Traits::low(result, lead);\r
953                                         data += 1;\r
954                                 }\r
955                                 // surrogate pair lead\r
956                                 else if (static_cast<unsigned int>(lead - 0xD800) < 0x400 && data + 1 < end)\r
957                                 {\r
958                                         uint16_t next = opt_swap::value ? endian_swap(data[1]) : data[1];\r
959 \r
960                                         if (static_cast<unsigned int>(next - 0xDC00) < 0x400)\r
961                                         {\r
962                                                 result = Traits::high(result, 0x10000 + ((lead & 0x3ff) << 10) + (next & 0x3ff));\r
963                                                 data += 2;\r
964                                         }\r
965                                         else\r
966                                         {\r
967                                                 data += 1;\r
968                                         }\r
969                                 }\r
970                                 else\r
971                                 {\r
972                                         data += 1;\r
973                                 }\r
974                         }\r
975 \r
976                         return result;\r
977                 }\r
978 \r
979                 static inline typename Traits::value_type decode_utf32_block(const uint32_t* data, size_t size, typename Traits::value_type result)\r
980                 {\r
981                         const uint32_t* end = data + size;\r
982 \r
983                         while (data < end)\r
984                         {\r
985                                 uint32_t lead = opt_swap::value ? endian_swap(*data) : *data;\r
986 \r
987                                 // U+0000..U+FFFF\r
988                                 if (lead < 0x10000)\r
989                                 {\r
990                                         result = Traits::low(result, lead);\r
991                                         data += 1;\r
992                                 }\r
993                                 // U+10000..U+10FFFF\r
994                                 else\r
995                                 {\r
996                                         result = Traits::high(result, lead);\r
997                                         data += 1;\r
998                                 }\r
999                         }\r
1000 \r
1001                         return result;\r
1002                 }\r
1003 \r
1004                 static inline typename Traits::value_type decode_latin1_block(const uint8_t* data, size_t size, typename Traits::value_type result)\r
1005                 {\r
1006                         for (size_t i = 0; i < size; ++i)\r
1007                         {\r
1008                                 result = Traits::low(result, data[i]);\r
1009                         }\r
1010 \r
1011                         return result;\r
1012                 }\r
1013 \r
1014                 static inline typename Traits::value_type decode_wchar_block_impl(const uint16_t* data, size_t size, typename Traits::value_type result)\r
1015                 {\r
1016                         return decode_utf16_block(data, size, result);\r
1017                 }\r
1018 \r
1019                 static inline typename Traits::value_type decode_wchar_block_impl(const uint32_t* data, size_t size, typename Traits::value_type result)\r
1020                 {\r
1021                         return decode_utf32_block(data, size, result);\r
1022                 }\r
1023 \r
1024                 static inline typename Traits::value_type decode_wchar_block(const wchar_t* data, size_t size, typename Traits::value_type result)\r
1025                 {\r
1026                         return decode_wchar_block_impl(reinterpret_cast<const wchar_selector<sizeof(wchar_t)>::type*>(data), size, result);\r
1027                 }\r
1028         };\r
1029 \r
1030         template <typename T> PUGI__FN void convert_utf_endian_swap(T* result, const T* data, size_t length)\r
1031         {\r
1032                 for (size_t i = 0; i < length; ++i) result[i] = endian_swap(data[i]);\r
1033         }\r
1034 \r
1035 #ifdef PUGIXML_WCHAR_MODE\r
1036         PUGI__FN void convert_wchar_endian_swap(wchar_t* result, const wchar_t* data, size_t length)\r
1037         {\r
1038                 for (size_t i = 0; i < length; ++i) result[i] = static_cast<wchar_t>(endian_swap(static_cast<wchar_selector<sizeof(wchar_t)>::type>(data[i])));\r
1039         }\r
1040 #endif\r
1041 PUGI__NS_END\r
1042 \r
1043 PUGI__NS_BEGIN\r
1044         enum chartype_t\r
1045         {\r
1046                 ct_parse_pcdata = 1,    // \0, &, \r, <\r
1047                 ct_parse_attr = 2,              // \0, &, \r, ', "\r
1048                 ct_parse_attr_ws = 4,   // \0, &, \r, ', ", \n, tab\r
1049                 ct_space = 8,                   // \r, \n, space, tab\r
1050                 ct_parse_cdata = 16,    // \0, ], >, \r\r
1051                 ct_parse_comment = 32,  // \0, -, >, \r\r
1052                 ct_symbol = 64,                 // Any symbol > 127, a-z, A-Z, 0-9, _, :, -, .\r
1053                 ct_start_symbol = 128   // Any symbol > 127, a-z, A-Z, _, :\r
1054         };\r
1055 \r
1056         static const unsigned char chartype_table[256] =\r
1057         {\r
1058                 55,  0,   0,   0,   0,   0,   0,   0,      0,   12,  12,  0,   0,   63,  0,   0,   // 0-15\r
1059                 0,   0,   0,   0,   0,   0,   0,   0,      0,   0,   0,   0,   0,   0,   0,   0,   // 16-31\r
1060                 8,   0,   6,   0,   0,   0,   7,   6,      0,   0,   0,   0,   0,   96,  64,  0,   // 32-47\r
1061                 64,  64,  64,  64,  64,  64,  64,  64,     64,  64,  192, 0,   1,   0,   48,  0,   // 48-63\r
1062                 0,   192, 192, 192, 192, 192, 192, 192,    192, 192, 192, 192, 192, 192, 192, 192, // 64-79\r
1063                 192, 192, 192, 192, 192, 192, 192, 192,    192, 192, 192, 0,   0,   16,  0,   192, // 80-95\r
1064                 0,   192, 192, 192, 192, 192, 192, 192,    192, 192, 192, 192, 192, 192, 192, 192, // 96-111\r
1065                 192, 192, 192, 192, 192, 192, 192, 192,    192, 192, 192, 0, 0, 0, 0, 0,           // 112-127\r
1066 \r
1067                 192, 192, 192, 192, 192, 192, 192, 192,    192, 192, 192, 192, 192, 192, 192, 192, // 128+\r
1068                 192, 192, 192, 192, 192, 192, 192, 192,    192, 192, 192, 192, 192, 192, 192, 192,\r
1069                 192, 192, 192, 192, 192, 192, 192, 192,    192, 192, 192, 192, 192, 192, 192, 192,\r
1070                 192, 192, 192, 192, 192, 192, 192, 192,    192, 192, 192, 192, 192, 192, 192, 192,\r
1071                 192, 192, 192, 192, 192, 192, 192, 192,    192, 192, 192, 192, 192, 192, 192, 192,\r
1072                 192, 192, 192, 192, 192, 192, 192, 192,    192, 192, 192, 192, 192, 192, 192, 192,\r
1073                 192, 192, 192, 192, 192, 192, 192, 192,    192, 192, 192, 192, 192, 192, 192, 192,\r
1074                 192, 192, 192, 192, 192, 192, 192, 192,    192, 192, 192, 192, 192, 192, 192, 192\r
1075         };\r
1076 \r
1077         enum chartypex_t\r
1078         {\r
1079                 ctx_special_pcdata = 1,   // Any symbol >= 0 and < 32 (except \t, \r, \n), &, <, >\r
1080                 ctx_special_attr = 2,     // Any symbol >= 0 and < 32 (except \t), &, <, >, "\r
1081                 ctx_start_symbol = 4,     // Any symbol > 127, a-z, A-Z, _\r
1082                 ctx_digit = 8,                    // 0-9\r
1083                 ctx_symbol = 16                   // Any symbol > 127, a-z, A-Z, 0-9, _, -, .\r
1084         };\r
1085         \r
1086         static const unsigned char chartypex_table[256] =\r
1087         {\r
1088                 3,  3,  3,  3,  3,  3,  3,  3,     3,  0,  2,  3,  3,  2,  3,  3,     // 0-15\r
1089                 3,  3,  3,  3,  3,  3,  3,  3,     3,  3,  3,  3,  3,  3,  3,  3,     // 16-31\r
1090                 0,  0,  2,  0,  0,  0,  3,  0,     0,  0,  0,  0,  0, 16, 16,  0,     // 32-47\r
1091                 24, 24, 24, 24, 24, 24, 24, 24,    24, 24, 0,  0,  3,  0,  3,  0,     // 48-63\r
1092 \r
1093                 0,  20, 20, 20, 20, 20, 20, 20,    20, 20, 20, 20, 20, 20, 20, 20,    // 64-79\r
1094                 20, 20, 20, 20, 20, 20, 20, 20,    20, 20, 20, 0,  0,  0,  0,  20,    // 80-95\r
1095                 0,  20, 20, 20, 20, 20, 20, 20,    20, 20, 20, 20, 20, 20, 20, 20,    // 96-111\r
1096                 20, 20, 20, 20, 20, 20, 20, 20,    20, 20, 20, 0,  0,  0,  0,  0,     // 112-127\r
1097 \r
1098                 20, 20, 20, 20, 20, 20, 20, 20,    20, 20, 20, 20, 20, 20, 20, 20,    // 128+\r
1099                 20, 20, 20, 20, 20, 20, 20, 20,    20, 20, 20, 20, 20, 20, 20, 20,\r
1100                 20, 20, 20, 20, 20, 20, 20, 20,    20, 20, 20, 20, 20, 20, 20, 20,\r
1101                 20, 20, 20, 20, 20, 20, 20, 20,    20, 20, 20, 20, 20, 20, 20, 20,\r
1102                 20, 20, 20, 20, 20, 20, 20, 20,    20, 20, 20, 20, 20, 20, 20, 20,\r
1103                 20, 20, 20, 20, 20, 20, 20, 20,    20, 20, 20, 20, 20, 20, 20, 20,\r
1104                 20, 20, 20, 20, 20, 20, 20, 20,    20, 20, 20, 20, 20, 20, 20, 20,\r
1105                 20, 20, 20, 20, 20, 20, 20, 20,    20, 20, 20, 20, 20, 20, 20, 20\r
1106         };\r
1107         \r
1108 #ifdef PUGIXML_WCHAR_MODE\r
1109         #define PUGI__IS_CHARTYPE_IMPL(c, ct, table) ((static_cast<unsigned int>(c) < 128 ? table[static_cast<unsigned int>(c)] : table[128]) & (ct))\r
1110 #else\r
1111         #define PUGI__IS_CHARTYPE_IMPL(c, ct, table) (table[static_cast<unsigned char>(c)] & (ct))\r
1112 #endif\r
1113 \r
1114         #define PUGI__IS_CHARTYPE(c, ct) PUGI__IS_CHARTYPE_IMPL(c, ct, chartype_table)\r
1115         #define PUGI__IS_CHARTYPEX(c, ct) PUGI__IS_CHARTYPE_IMPL(c, ct, chartypex_table)\r
1116 \r
1117         PUGI__FN bool is_little_endian()\r
1118         {\r
1119                 unsigned int ui = 1;\r
1120 \r
1121                 return *reinterpret_cast<unsigned char*>(&ui) == 1;\r
1122         }\r
1123 \r
1124         PUGI__FN xml_encoding get_wchar_encoding()\r
1125         {\r
1126                 PUGI__STATIC_ASSERT(sizeof(wchar_t) == 2 || sizeof(wchar_t) == 4);\r
1127 \r
1128                 if (sizeof(wchar_t) == 2)\r
1129                         return is_little_endian() ? encoding_utf16_le : encoding_utf16_be;\r
1130                 else \r
1131                         return is_little_endian() ? encoding_utf32_le : encoding_utf32_be;\r
1132         }\r
1133 \r
1134         PUGI__FN xml_encoding guess_buffer_encoding(uint8_t d0, uint8_t d1, uint8_t d2, uint8_t d3)\r
1135         {\r
1136                 // look for BOM in first few bytes\r
1137                 if (d0 == 0 && d1 == 0 && d2 == 0xfe && d3 == 0xff) return encoding_utf32_be;\r
1138                 if (d0 == 0xff && d1 == 0xfe && d2 == 0 && d3 == 0) return encoding_utf32_le;\r
1139                 if (d0 == 0xfe && d1 == 0xff) return encoding_utf16_be;\r
1140                 if (d0 == 0xff && d1 == 0xfe) return encoding_utf16_le;\r
1141                 if (d0 == 0xef && d1 == 0xbb && d2 == 0xbf) return encoding_utf8;\r
1142 \r
1143                 // look for <, <? or <?xm in various encodings\r
1144                 if (d0 == 0 && d1 == 0 && d2 == 0 && d3 == 0x3c) return encoding_utf32_be;\r
1145                 if (d0 == 0x3c && d1 == 0 && d2 == 0 && d3 == 0) return encoding_utf32_le;\r
1146                 if (d0 == 0 && d1 == 0x3c && d2 == 0 && d3 == 0x3f) return encoding_utf16_be;\r
1147                 if (d0 == 0x3c && d1 == 0 && d2 == 0x3f && d3 == 0) return encoding_utf16_le;\r
1148                 if (d0 == 0x3c && d1 == 0x3f && d2 == 0x78 && d3 == 0x6d) return encoding_utf8;\r
1149 \r
1150                 // look for utf16 < followed by node name (this may fail, but is better than utf8 since it's zero terminated so early)\r
1151                 if (d0 == 0 && d1 == 0x3c) return encoding_utf16_be;\r
1152                 if (d0 == 0x3c && d1 == 0) return encoding_utf16_le;\r
1153 \r
1154                 // no known BOM detected, assume utf8\r
1155                 return encoding_utf8;\r
1156         }\r
1157 \r
1158         PUGI__FN xml_encoding get_buffer_encoding(xml_encoding encoding, const void* contents, size_t size)\r
1159         {\r
1160                 // replace wchar encoding with utf implementation\r
1161                 if (encoding == encoding_wchar) return get_wchar_encoding();\r
1162 \r
1163                 // replace utf16 encoding with utf16 with specific endianness\r
1164                 if (encoding == encoding_utf16) return is_little_endian() ? encoding_utf16_le : encoding_utf16_be;\r
1165 \r
1166                 // replace utf32 encoding with utf32 with specific endianness\r
1167                 if (encoding == encoding_utf32) return is_little_endian() ? encoding_utf32_le : encoding_utf32_be;\r
1168 \r
1169                 // only do autodetection if no explicit encoding is requested\r
1170                 if (encoding != encoding_auto) return encoding;\r
1171 \r
1172                 // skip encoding autodetection if input buffer is too small\r
1173                 if (size < 4) return encoding_utf8;\r
1174 \r
1175                 // try to guess encoding (based on XML specification, Appendix F.1)\r
1176                 const uint8_t* data = static_cast<const uint8_t*>(contents);\r
1177 \r
1178                 PUGI__DMC_VOLATILE uint8_t d0 = data[0], d1 = data[1], d2 = data[2], d3 = data[3];\r
1179 \r
1180                 return guess_buffer_encoding(d0, d1, d2, d3);\r
1181         }\r
1182 \r
1183         PUGI__FN bool get_mutable_buffer(char_t*& out_buffer, size_t& out_length, const void* contents, size_t size, bool is_mutable)\r
1184         {\r
1185                 size_t length = size / sizeof(char_t);\r
1186 \r
1187                 if (is_mutable)\r
1188                 {\r
1189                         out_buffer = static_cast<char_t*>(const_cast<void*>(contents));\r
1190                         out_length = length;\r
1191                 }\r
1192                 else\r
1193                 {\r
1194                         char_t* buffer = static_cast<char_t*>(xml_memory::allocate((length + 1) * sizeof(char_t)));\r
1195                         if (!buffer) return false;\r
1196 \r
1197                         memcpy(buffer, contents, length * sizeof(char_t));\r
1198                         buffer[length] = 0;\r
1199 \r
1200                         out_buffer = buffer;\r
1201                         out_length = length + 1;\r
1202                 }\r
1203 \r
1204                 return true;\r
1205         }\r
1206 \r
1207 #ifdef PUGIXML_WCHAR_MODE\r
1208         PUGI__FN bool need_endian_swap_utf(xml_encoding le, xml_encoding re)\r
1209         {\r
1210                 return (le == encoding_utf16_be && re == encoding_utf16_le) || (le == encoding_utf16_le && re == encoding_utf16_be) ||\r
1211                            (le == encoding_utf32_be && re == encoding_utf32_le) || (le == encoding_utf32_le && re == encoding_utf32_be);\r
1212         }\r
1213 \r
1214         PUGI__FN bool convert_buffer_endian_swap(char_t*& out_buffer, size_t& out_length, const void* contents, size_t size, bool is_mutable)\r
1215         {\r
1216                 const char_t* data = static_cast<const char_t*>(contents);\r
1217                 size_t length = size / sizeof(char_t);\r
1218 \r
1219                 if (is_mutable)\r
1220                 {\r
1221                         char_t* buffer = const_cast<char_t*>(data);\r
1222 \r
1223                         convert_wchar_endian_swap(buffer, data, length);\r
1224 \r
1225                         out_buffer = buffer;\r
1226                         out_length = length;\r
1227                 }\r
1228                 else\r
1229                 {\r
1230                         char_t* buffer = static_cast<char_t*>(xml_memory::allocate((length + 1) * sizeof(char_t)));\r
1231                         if (!buffer) return false;\r
1232 \r
1233                         convert_wchar_endian_swap(buffer, data, length);\r
1234                         buffer[length] = 0;\r
1235 \r
1236                         out_buffer = buffer;\r
1237                         out_length = length + 1;\r
1238                 }\r
1239 \r
1240                 return true;\r
1241         }\r
1242 \r
1243         PUGI__FN bool convert_buffer_utf8(char_t*& out_buffer, size_t& out_length, const void* contents, size_t size)\r
1244         {\r
1245                 const uint8_t* data = static_cast<const uint8_t*>(contents);\r
1246                 size_t data_length = size;\r
1247 \r
1248                 // first pass: get length in wchar_t units\r
1249                 size_t length = utf_decoder<wchar_counter>::decode_utf8_block(data, data_length, 0);\r
1250 \r
1251                 // allocate buffer of suitable length\r
1252                 char_t* buffer = static_cast<char_t*>(xml_memory::allocate((length + 1) * sizeof(char_t)));\r
1253                 if (!buffer) return false;\r
1254 \r
1255                 // second pass: convert utf8 input to wchar_t\r
1256                 wchar_writer::value_type obegin = reinterpret_cast<wchar_writer::value_type>(buffer);\r
1257                 wchar_writer::value_type oend = utf_decoder<wchar_writer>::decode_utf8_block(data, data_length, obegin);\r
1258 \r
1259                 assert(oend == obegin + length);\r
1260                 *oend = 0;\r
1261 \r
1262                 out_buffer = buffer;\r
1263                 out_length = length + 1;\r
1264 \r
1265                 return true;\r
1266         }\r
1267 \r
1268         template <typename opt_swap> PUGI__FN bool convert_buffer_utf16(char_t*& out_buffer, size_t& out_length, const void* contents, size_t size, opt_swap)\r
1269         {\r
1270                 const uint16_t* data = static_cast<const uint16_t*>(contents);\r
1271                 size_t data_length = size / sizeof(uint16_t);\r
1272 \r
1273                 // first pass: get length in wchar_t units\r
1274                 size_t length = utf_decoder<wchar_counter, opt_swap>::decode_utf16_block(data, data_length, 0);\r
1275 \r
1276                 // allocate buffer of suitable length\r
1277                 char_t* buffer = static_cast<char_t*>(xml_memory::allocate((length + 1) * sizeof(char_t)));\r
1278                 if (!buffer) return false;\r
1279 \r
1280                 // second pass: convert utf16 input to wchar_t\r
1281                 wchar_writer::value_type obegin = reinterpret_cast<wchar_writer::value_type>(buffer);\r
1282                 wchar_writer::value_type oend = utf_decoder<wchar_writer, opt_swap>::decode_utf16_block(data, data_length, obegin);\r
1283 \r
1284                 assert(oend == obegin + length);\r
1285                 *oend = 0;\r
1286 \r
1287                 out_buffer = buffer;\r
1288                 out_length = length + 1;\r
1289 \r
1290                 return true;\r
1291         }\r
1292 \r
1293         template <typename opt_swap> PUGI__FN bool convert_buffer_utf32(char_t*& out_buffer, size_t& out_length, const void* contents, size_t size, opt_swap)\r
1294         {\r
1295                 const uint32_t* data = static_cast<const uint32_t*>(contents);\r
1296                 size_t data_length = size / sizeof(uint32_t);\r
1297 \r
1298                 // first pass: get length in wchar_t units\r
1299                 size_t length = utf_decoder<wchar_counter, opt_swap>::decode_utf32_block(data, data_length, 0);\r
1300 \r
1301                 // allocate buffer of suitable length\r
1302                 char_t* buffer = static_cast<char_t*>(xml_memory::allocate((length + 1) * sizeof(char_t)));\r
1303                 if (!buffer) return false;\r
1304 \r
1305                 // second pass: convert utf32 input to wchar_t\r
1306                 wchar_writer::value_type obegin = reinterpret_cast<wchar_writer::value_type>(buffer);\r
1307                 wchar_writer::value_type oend = utf_decoder<wchar_writer, opt_swap>::decode_utf32_block(data, data_length, obegin);\r
1308 \r
1309                 assert(oend == obegin + length);\r
1310                 *oend = 0;\r
1311 \r
1312                 out_buffer = buffer;\r
1313                 out_length = length + 1;\r
1314 \r
1315                 return true;\r
1316         }\r
1317 \r
1318         PUGI__FN bool convert_buffer_latin1(char_t*& out_buffer, size_t& out_length, const void* contents, size_t size)\r
1319         {\r
1320                 const uint8_t* data = static_cast<const uint8_t*>(contents);\r
1321                 size_t data_length = size;\r
1322 \r
1323                 // get length in wchar_t units\r
1324                 size_t length = data_length;\r
1325 \r
1326                 // allocate buffer of suitable length\r
1327                 char_t* buffer = static_cast<char_t*>(xml_memory::allocate((length + 1) * sizeof(char_t)));\r
1328                 if (!buffer) return false;\r
1329 \r
1330                 // convert latin1 input to wchar_t\r
1331                 wchar_writer::value_type obegin = reinterpret_cast<wchar_writer::value_type>(buffer);\r
1332                 wchar_writer::value_type oend = utf_decoder<wchar_writer>::decode_latin1_block(data, data_length, obegin);\r
1333 \r
1334                 assert(oend == obegin + length);\r
1335                 *oend = 0;\r
1336 \r
1337                 out_buffer = buffer;\r
1338                 out_length = length + 1;\r
1339 \r
1340                 return true;\r
1341         }\r
1342 \r
1343         PUGI__FN bool convert_buffer(char_t*& out_buffer, size_t& out_length, xml_encoding encoding, const void* contents, size_t size, bool is_mutable)\r
1344         {\r
1345                 // get native encoding\r
1346                 xml_encoding wchar_encoding = get_wchar_encoding();\r
1347 \r
1348                 // fast path: no conversion required\r
1349                 if (encoding == wchar_encoding) return get_mutable_buffer(out_buffer, out_length, contents, size, is_mutable);\r
1350 \r
1351                 // only endian-swapping is required\r
1352                 if (need_endian_swap_utf(encoding, wchar_encoding)) return convert_buffer_endian_swap(out_buffer, out_length, contents, size, is_mutable);\r
1353 \r
1354                 // source encoding is utf8\r
1355                 if (encoding == encoding_utf8) return convert_buffer_utf8(out_buffer, out_length, contents, size);\r
1356 \r
1357                 // source encoding is utf16\r
1358                 if (encoding == encoding_utf16_be || encoding == encoding_utf16_le)\r
1359                 {\r
1360                         xml_encoding native_encoding = is_little_endian() ? encoding_utf16_le : encoding_utf16_be;\r
1361 \r
1362                         return (native_encoding == encoding) ?\r
1363                                 convert_buffer_utf16(out_buffer, out_length, contents, size, opt_false()) :\r
1364                                 convert_buffer_utf16(out_buffer, out_length, contents, size, opt_true());\r
1365                 }\r
1366 \r
1367                 // source encoding is utf32\r
1368                 if (encoding == encoding_utf32_be || encoding == encoding_utf32_le)\r
1369                 {\r
1370                         xml_encoding native_encoding = is_little_endian() ? encoding_utf32_le : encoding_utf32_be;\r
1371 \r
1372                         return (native_encoding == encoding) ?\r
1373                                 convert_buffer_utf32(out_buffer, out_length, contents, size, opt_false()) :\r
1374                                 convert_buffer_utf32(out_buffer, out_length, contents, size, opt_true());\r
1375                 }\r
1376 \r
1377                 // source encoding is latin1\r
1378                 if (encoding == encoding_latin1) return convert_buffer_latin1(out_buffer, out_length, contents, size);\r
1379 \r
1380                 assert(!"Invalid encoding");\r
1381                 return false;\r
1382         }\r
1383 #else\r
1384         template <typename opt_swap> PUGI__FN bool convert_buffer_utf16(char_t*& out_buffer, size_t& out_length, const void* contents, size_t size, opt_swap)\r
1385         {\r
1386                 const uint16_t* data = static_cast<const uint16_t*>(contents);\r
1387                 size_t data_length = size / sizeof(uint16_t);\r
1388 \r
1389                 // first pass: get length in utf8 units\r
1390                 size_t length = utf_decoder<utf8_counter, opt_swap>::decode_utf16_block(data, data_length, 0);\r
1391 \r
1392                 // allocate buffer of suitable length\r
1393                 char_t* buffer = static_cast<char_t*>(xml_memory::allocate((length + 1) * sizeof(char_t)));\r
1394                 if (!buffer) return false;\r
1395 \r
1396                 // second pass: convert utf16 input to utf8\r
1397                 uint8_t* obegin = reinterpret_cast<uint8_t*>(buffer);\r
1398                 uint8_t* oend = utf_decoder<utf8_writer, opt_swap>::decode_utf16_block(data, data_length, obegin);\r
1399 \r
1400                 assert(oend == obegin + length);\r
1401                 *oend = 0;\r
1402 \r
1403                 out_buffer = buffer;\r
1404                 out_length = length + 1;\r
1405 \r
1406                 return true;\r
1407         }\r
1408 \r
1409         template <typename opt_swap> PUGI__FN bool convert_buffer_utf32(char_t*& out_buffer, size_t& out_length, const void* contents, size_t size, opt_swap)\r
1410         {\r
1411                 const uint32_t* data = static_cast<const uint32_t*>(contents);\r
1412                 size_t data_length = size / sizeof(uint32_t);\r
1413 \r
1414                 // first pass: get length in utf8 units\r
1415                 size_t length = utf_decoder<utf8_counter, opt_swap>::decode_utf32_block(data, data_length, 0);\r
1416 \r
1417                 // allocate buffer of suitable length\r
1418                 char_t* buffer = static_cast<char_t*>(xml_memory::allocate((length + 1) * sizeof(char_t)));\r
1419                 if (!buffer) return false;\r
1420 \r
1421                 // second pass: convert utf32 input to utf8\r
1422                 uint8_t* obegin = reinterpret_cast<uint8_t*>(buffer);\r
1423                 uint8_t* oend = utf_decoder<utf8_writer, opt_swap>::decode_utf32_block(data, data_length, obegin);\r
1424 \r
1425                 assert(oend == obegin + length);\r
1426                 *oend = 0;\r
1427 \r
1428                 out_buffer = buffer;\r
1429                 out_length = length + 1;\r
1430 \r
1431                 return true;\r
1432         }\r
1433 \r
1434         PUGI__FN size_t get_latin1_7bit_prefix_length(const uint8_t* data, size_t size)\r
1435         {\r
1436                 for (size_t i = 0; i < size; ++i)\r
1437                         if (data[i] > 127)\r
1438                                 return i;\r
1439 \r
1440                 return size;\r
1441         }\r
1442 \r
1443         PUGI__FN bool convert_buffer_latin1(char_t*& out_buffer, size_t& out_length, const void* contents, size_t size, bool is_mutable)\r
1444         {\r
1445                 const uint8_t* data = static_cast<const uint8_t*>(contents);\r
1446                 size_t data_length = size;\r
1447 \r
1448                 // get size of prefix that does not need utf8 conversion\r
1449                 size_t prefix_length = get_latin1_7bit_prefix_length(data, data_length);\r
1450                 assert(prefix_length <= data_length);\r
1451 \r
1452                 const uint8_t* postfix = data + prefix_length;\r
1453                 size_t postfix_length = data_length - prefix_length;\r
1454 \r
1455                 // if no conversion is needed, just return the original buffer\r
1456                 if (postfix_length == 0) return get_mutable_buffer(out_buffer, out_length, contents, size, is_mutable);\r
1457 \r
1458                 // first pass: get length in utf8 units\r
1459                 size_t length = prefix_length + utf_decoder<utf8_counter>::decode_latin1_block(postfix, postfix_length, 0);\r
1460 \r
1461                 // allocate buffer of suitable length\r
1462                 char_t* buffer = static_cast<char_t*>(xml_memory::allocate((length + 1) * sizeof(char_t)));\r
1463                 if (!buffer) return false;\r
1464 \r
1465                 // second pass: convert latin1 input to utf8\r
1466                 memcpy(buffer, data, prefix_length);\r
1467 \r
1468                 uint8_t* obegin = reinterpret_cast<uint8_t*>(buffer);\r
1469                 uint8_t* oend = utf_decoder<utf8_writer>::decode_latin1_block(postfix, postfix_length, obegin + prefix_length);\r
1470 \r
1471                 assert(oend == obegin + length);\r
1472                 *oend = 0;\r
1473 \r
1474                 out_buffer = buffer;\r
1475                 out_length = length + 1;\r
1476 \r
1477                 return true;\r
1478         }\r
1479 \r
1480         PUGI__FN bool convert_buffer(char_t*& out_buffer, size_t& out_length, xml_encoding encoding, const void* contents, size_t size, bool is_mutable)\r
1481         {\r
1482                 // fast path: no conversion required\r
1483                 if (encoding == encoding_utf8) return get_mutable_buffer(out_buffer, out_length, contents, size, is_mutable);\r
1484 \r
1485                 // source encoding is utf16\r
1486                 if (encoding == encoding_utf16_be || encoding == encoding_utf16_le)\r
1487                 {\r
1488                         xml_encoding native_encoding = is_little_endian() ? encoding_utf16_le : encoding_utf16_be;\r
1489 \r
1490                         return (native_encoding == encoding) ?\r
1491                                 convert_buffer_utf16(out_buffer, out_length, contents, size, opt_false()) :\r
1492                                 convert_buffer_utf16(out_buffer, out_length, contents, size, opt_true());\r
1493                 }\r
1494 \r
1495                 // source encoding is utf32\r
1496                 if (encoding == encoding_utf32_be || encoding == encoding_utf32_le)\r
1497                 {\r
1498                         xml_encoding native_encoding = is_little_endian() ? encoding_utf32_le : encoding_utf32_be;\r
1499 \r
1500                         return (native_encoding == encoding) ?\r
1501                                 convert_buffer_utf32(out_buffer, out_length, contents, size, opt_false()) :\r
1502                                 convert_buffer_utf32(out_buffer, out_length, contents, size, opt_true());\r
1503                 }\r
1504 \r
1505                 // source encoding is latin1\r
1506                 if (encoding == encoding_latin1) return convert_buffer_latin1(out_buffer, out_length, contents, size, is_mutable);\r
1507 \r
1508                 assert(!"Invalid encoding");\r
1509                 return false;\r
1510         }\r
1511 #endif\r
1512 \r
1513         PUGI__FN size_t as_utf8_begin(const wchar_t* str, size_t length)\r
1514         {\r
1515                 // get length in utf8 characters\r
1516                 return utf_decoder<utf8_counter>::decode_wchar_block(str, length, 0);\r
1517         }\r
1518 \r
1519         PUGI__FN void as_utf8_end(char* buffer, size_t size, const wchar_t* str, size_t length)\r
1520         {\r
1521                 // convert to utf8\r
1522                 uint8_t* begin = reinterpret_cast<uint8_t*>(buffer);\r
1523                 uint8_t* end = utf_decoder<utf8_writer>::decode_wchar_block(str, length, begin);\r
1524         \r
1525                 assert(begin + size == end);\r
1526                 (void)!end;\r
1527 \r
1528                 // zero-terminate\r
1529                 buffer[size] = 0;\r
1530         }\r
1531         \r
1532 #ifndef PUGIXML_NO_STL\r
1533         PUGI__FN std::string as_utf8_impl(const wchar_t* str, size_t length)\r
1534         {\r
1535                 // first pass: get length in utf8 characters\r
1536                 size_t size = as_utf8_begin(str, length);\r
1537 \r
1538                 // allocate resulting string\r
1539                 std::string result;\r
1540                 result.resize(size);\r
1541 \r
1542                 // second pass: convert to utf8\r
1543                 if (size > 0) as_utf8_end(&result[0], size, str, length);\r
1544 \r
1545                 return result;\r
1546         }\r
1547 \r
1548         PUGI__FN std::basic_string<wchar_t> as_wide_impl(const char* str, size_t size)\r
1549         {\r
1550                 const uint8_t* data = reinterpret_cast<const uint8_t*>(str);\r
1551 \r
1552                 // first pass: get length in wchar_t units\r
1553                 size_t length = utf_decoder<wchar_counter>::decode_utf8_block(data, size, 0);\r
1554 \r
1555                 // allocate resulting string\r
1556                 std::basic_string<wchar_t> result;\r
1557                 result.resize(length);\r
1558 \r
1559                 // second pass: convert to wchar_t\r
1560                 if (length > 0)\r
1561                 {\r
1562                         wchar_writer::value_type begin = reinterpret_cast<wchar_writer::value_type>(&result[0]);\r
1563                         wchar_writer::value_type end = utf_decoder<wchar_writer>::decode_utf8_block(data, size, begin);\r
1564 \r
1565                         assert(begin + length == end);\r
1566                         (void)!end;\r
1567                 }\r
1568 \r
1569                 return result;\r
1570         }\r
1571 #endif\r
1572 \r
1573         inline bool strcpy_insitu_allow(size_t length, uintptr_t allocated, char_t* target)\r
1574         {\r
1575                 assert(target);\r
1576                 size_t target_length = strlength(target);\r
1577 \r
1578                 // always reuse document buffer memory if possible\r
1579                 if (!allocated) return target_length >= length;\r
1580 \r
1581                 // reuse heap memory if waste is not too great\r
1582                 const size_t reuse_threshold = 32;\r
1583 \r
1584                 return target_length >= length && (target_length < reuse_threshold || target_length - length < target_length / 2);\r
1585         }\r
1586 \r
1587         PUGI__FN bool strcpy_insitu(char_t*& dest, uintptr_t& header, uintptr_t header_mask, const char_t* source)\r
1588         {\r
1589                 assert(header);\r
1590 \r
1591                 size_t source_length = strlength(source);\r
1592 \r
1593                 if (source_length == 0)\r
1594                 {\r
1595                         // empty string and null pointer are equivalent, so just deallocate old memory\r
1596                         xml_allocator* alloc = reinterpret_cast<xml_memory_page*>(header & xml_memory_page_pointer_mask)->allocator;\r
1597 \r
1598                         if (header & header_mask) alloc->deallocate_string(dest);\r
1599                         \r
1600                         // mark the string as not allocated\r
1601                         dest = 0;\r
1602                         header &= ~header_mask;\r
1603 \r
1604                         return true;\r
1605                 }\r
1606                 else if (dest && strcpy_insitu_allow(source_length, header & header_mask, dest))\r
1607                 {\r
1608                         // we can reuse old buffer, so just copy the new data (including zero terminator)\r
1609                         memcpy(dest, source, (source_length + 1) * sizeof(char_t));\r
1610                         \r
1611                         return true;\r
1612                 }\r
1613                 else\r
1614                 {\r
1615                         xml_allocator* alloc = reinterpret_cast<xml_memory_page*>(header & xml_memory_page_pointer_mask)->allocator;\r
1616 \r
1617                         // allocate new buffer\r
1618                         char_t* buf = alloc->allocate_string(source_length + 1);\r
1619                         if (!buf) return false;\r
1620 \r
1621                         // copy the string (including zero terminator)\r
1622                         memcpy(buf, source, (source_length + 1) * sizeof(char_t));\r
1623 \r
1624                         // deallocate old buffer (*after* the above to protect against overlapping memory and/or allocation failures)\r
1625                         if (header & header_mask) alloc->deallocate_string(dest);\r
1626                         \r
1627                         // the string is now allocated, so set the flag\r
1628                         dest = buf;\r
1629                         header |= header_mask;\r
1630 \r
1631                         return true;\r
1632                 }\r
1633         }\r
1634 \r
1635         struct gap\r
1636         {\r
1637                 char_t* end;\r
1638                 size_t size;\r
1639                         \r
1640                 gap(): end(0), size(0)\r
1641                 {\r
1642                 }\r
1643                         \r
1644                 // Push new gap, move s count bytes further (skipping the gap).\r
1645                 // Collapse previous gap.\r
1646                 void push(char_t*& s, size_t count)\r
1647                 {\r
1648                         if (end) // there was a gap already; collapse it\r
1649                         {\r
1650                                 // Move [old_gap_end, new_gap_start) to [old_gap_start, ...)\r
1651                                 assert(s >= end);\r
1652                                 memmove(end - size, end, reinterpret_cast<char*>(s) - reinterpret_cast<char*>(end));\r
1653                         }\r
1654                                 \r
1655                         s += count; // end of current gap\r
1656                                 \r
1657                         // "merge" two gaps\r
1658                         end = s;\r
1659                         size += count;\r
1660                 }\r
1661                         \r
1662                 // Collapse all gaps, return past-the-end pointer\r
1663                 char_t* flush(char_t* s)\r
1664                 {\r
1665                         if (end)\r
1666                         {\r
1667                                 // Move [old_gap_end, current_pos) to [old_gap_start, ...)\r
1668                                 assert(s >= end);\r
1669                                 memmove(end - size, end, reinterpret_cast<char*>(s) - reinterpret_cast<char*>(end));\r
1670 \r
1671                                 return s - size;\r
1672                         }\r
1673                         else return s;\r
1674                 }\r
1675         };\r
1676         \r
1677         PUGI__FN char_t* strconv_escape(char_t* s, gap& g)\r
1678         {\r
1679                 char_t* stre = s + 1;\r
1680 \r
1681                 switch (*stre)\r
1682                 {\r
1683                         case '#':       // &#...\r
1684                         {\r
1685                                 unsigned int ucsc = 0;\r
1686 \r
1687                                 if (stre[1] == 'x') // &#x... (hex code)\r
1688                                 {\r
1689                                         stre += 2;\r
1690 \r
1691                                         char_t ch = *stre;\r
1692 \r
1693                                         if (ch == ';') return stre;\r
1694 \r
1695                                         for (;;)\r
1696                                         {\r
1697                                                 if (static_cast<unsigned int>(ch - '0') <= 9)\r
1698                                                         ucsc = 16 * ucsc + (ch - '0');\r
1699                                                 else if (static_cast<unsigned int>((ch | ' ') - 'a') <= 5)\r
1700                                                         ucsc = 16 * ucsc + ((ch | ' ') - 'a' + 10);\r
1701                                                 else if (ch == ';')\r
1702                                                         break;\r
1703                                                 else // cancel\r
1704                                                         return stre;\r
1705 \r
1706                                                 ch = *++stre;\r
1707                                         }\r
1708                                         \r
1709                                         ++stre;\r
1710                                 }\r
1711                                 else    // &#... (dec code)\r
1712                                 {\r
1713                                         char_t ch = *++stre;\r
1714 \r
1715                                         if (ch == ';') return stre;\r
1716 \r
1717                                         for (;;)\r
1718                                         {\r
1719                                                 if (static_cast<unsigned int>(static_cast<unsigned int>(ch) - '0') <= 9)\r
1720                                                         ucsc = 10 * ucsc + (ch - '0');\r
1721                                                 else if (ch == ';')\r
1722                                                         break;\r
1723                                                 else // cancel\r
1724                                                         return stre;\r
1725 \r
1726                                                 ch = *++stre;\r
1727                                         }\r
1728                                         \r
1729                                         ++stre;\r
1730                                 }\r
1731 \r
1732                         #ifdef PUGIXML_WCHAR_MODE\r
1733                                 s = reinterpret_cast<char_t*>(wchar_writer::any(reinterpret_cast<wchar_writer::value_type>(s), ucsc));\r
1734                         #else\r
1735                                 s = reinterpret_cast<char_t*>(utf8_writer::any(reinterpret_cast<uint8_t*>(s), ucsc));\r
1736                         #endif\r
1737                                         \r
1738                                 g.push(s, stre - s);\r
1739                                 return stre;\r
1740                         }\r
1741 \r
1742                         case 'a':       // &a\r
1743                         {\r
1744                                 ++stre;\r
1745 \r
1746                                 if (*stre == 'm') // &am\r
1747                                 {\r
1748                                         if (*++stre == 'p' && *++stre == ';') // &amp;\r
1749                                         {\r
1750                                                 *s++ = '&';\r
1751                                                 ++stre;\r
1752                                                         \r
1753                                                 g.push(s, stre - s);\r
1754                                                 return stre;\r
1755                                         }\r
1756                                 }\r
1757                                 else if (*stre == 'p') // &ap\r
1758                                 {\r
1759                                         if (*++stre == 'o' && *++stre == 's' && *++stre == ';') // &apos;\r
1760                                         {\r
1761                                                 *s++ = '\'';\r
1762                                                 ++stre;\r
1763 \r
1764                                                 g.push(s, stre - s);\r
1765                                                 return stre;\r
1766                                         }\r
1767                                 }\r
1768                                 break;\r
1769                         }\r
1770 \r
1771                         case 'g': // &g\r
1772                         {\r
1773                                 if (*++stre == 't' && *++stre == ';') // &gt;\r
1774                                 {\r
1775                                         *s++ = '>';\r
1776                                         ++stre;\r
1777                                         \r
1778                                         g.push(s, stre - s);\r
1779                                         return stre;\r
1780                                 }\r
1781                                 break;\r
1782                         }\r
1783 \r
1784                         case 'l': // &l\r
1785                         {\r
1786                                 if (*++stre == 't' && *++stre == ';') // &lt;\r
1787                                 {\r
1788                                         *s++ = '<';\r
1789                                         ++stre;\r
1790                                                 \r
1791                                         g.push(s, stre - s);\r
1792                                         return stre;\r
1793                                 }\r
1794                                 break;\r
1795                         }\r
1796 \r
1797                         case 'q': // &q\r
1798                         {\r
1799                                 if (*++stre == 'u' && *++stre == 'o' && *++stre == 't' && *++stre == ';') // &quot;\r
1800                                 {\r
1801                                         *s++ = '"';\r
1802                                         ++stre;\r
1803                                         \r
1804                                         g.push(s, stre - s);\r
1805                                         return stre;\r
1806                                 }\r
1807                                 break;\r
1808                         }\r
1809 \r
1810                         default:\r
1811                                 break;\r
1812                 }\r
1813                 \r
1814                 return stre;\r
1815         }\r
1816 \r
1817         // Utility macro for last character handling\r
1818         #define ENDSWITH(c, e) ((c) == (e) || ((c) == 0 && endch == (e)))\r
1819 \r
1820         PUGI__FN char_t* strconv_comment(char_t* s, char_t endch)\r
1821         {\r
1822                 gap g;\r
1823                 \r
1824                 while (true)\r
1825                 {\r
1826                         while (!PUGI__IS_CHARTYPE(*s, ct_parse_comment)) ++s;\r
1827                 \r
1828                         if (*s == '\r') // Either a single 0x0d or 0x0d 0x0a pair\r
1829                         {\r
1830                                 *s++ = '\n'; // replace first one with 0x0a\r
1831                                 \r
1832                                 if (*s == '\n') g.push(s, 1);\r
1833                         }\r
1834                         else if (s[0] == '-' && s[1] == '-' && ENDSWITH(s[2], '>')) // comment ends here\r
1835                         {\r
1836                                 *g.flush(s) = 0;\r
1837                                 \r
1838                                 return s + (s[2] == '>' ? 3 : 2);\r
1839                         }\r
1840                         else if (*s == 0)\r
1841                         {\r
1842                                 return 0;\r
1843                         }\r
1844                         else ++s;\r
1845                 }\r
1846         }\r
1847 \r
1848         PUGI__FN char_t* strconv_cdata(char_t* s, char_t endch)\r
1849         {\r
1850                 gap g;\r
1851                         \r
1852                 while (true)\r
1853                 {\r
1854                         while (!PUGI__IS_CHARTYPE(*s, ct_parse_cdata)) ++s;\r
1855                         \r
1856                         if (*s == '\r') // Either a single 0x0d or 0x0d 0x0a pair\r
1857                         {\r
1858                                 *s++ = '\n'; // replace first one with 0x0a\r
1859                                 \r
1860                                 if (*s == '\n') g.push(s, 1);\r
1861                         }\r
1862                         else if (s[0] == ']' && s[1] == ']' && ENDSWITH(s[2], '>')) // CDATA ends here\r
1863                         {\r
1864                                 *g.flush(s) = 0;\r
1865                                 \r
1866                                 return s + 1;\r
1867                         }\r
1868                         else if (*s == 0)\r
1869                         {\r
1870                                 return 0;\r
1871                         }\r
1872                         else ++s;\r
1873                 }\r
1874         }\r
1875         \r
1876         typedef char_t* (*strconv_pcdata_t)(char_t*);\r
1877                 \r
1878         template <typename opt_trim, typename opt_eol, typename opt_escape> struct strconv_pcdata_impl\r
1879         {\r
1880                 static char_t* parse(char_t* s)\r
1881                 {\r
1882                         gap g;\r
1883 \r
1884                         char_t* begin = s;\r
1885 \r
1886                         while (true)\r
1887                         {\r
1888                                 while (!PUGI__IS_CHARTYPE(*s, ct_parse_pcdata)) ++s;\r
1889                                         \r
1890                                 if (*s == '<') // PCDATA ends here\r
1891                                 {\r
1892                                         char_t* end = g.flush(s);\r
1893 \r
1894                                         if (opt_trim::value)\r
1895                                                 while (end > begin && PUGI__IS_CHARTYPE(end[-1], ct_space))\r
1896                                                         --end;\r
1897 \r
1898                                         *end = 0;\r
1899                                         \r
1900                                         return s + 1;\r
1901                                 }\r
1902                                 else if (opt_eol::value && *s == '\r') // Either a single 0x0d or 0x0d 0x0a pair\r
1903                                 {\r
1904                                         *s++ = '\n'; // replace first one with 0x0a\r
1905                                         \r
1906                                         if (*s == '\n') g.push(s, 1);\r
1907                                 }\r
1908                                 else if (opt_escape::value && *s == '&')\r
1909                                 {\r
1910                                         s = strconv_escape(s, g);\r
1911                                 }\r
1912                                 else if (*s == 0)\r
1913                                 {\r
1914                                         char_t* end = g.flush(s);\r
1915 \r
1916                                         if (opt_trim::value)\r
1917                                                 while (end > begin && PUGI__IS_CHARTYPE(end[-1], ct_space))\r
1918                                                         --end;\r
1919 \r
1920                                         *end = 0;\r
1921 \r
1922                                         return s;\r
1923                                 }\r
1924                                 else ++s;\r
1925                         }\r
1926                 }\r
1927         };\r
1928         \r
1929         PUGI__FN strconv_pcdata_t get_strconv_pcdata(unsigned int optmask)\r
1930         {\r
1931                 PUGI__STATIC_ASSERT(parse_escapes == 0x10 && parse_eol == 0x20 && parse_trim_pcdata == 0x0800);\r
1932 \r
1933                 switch (((optmask >> 4) & 3) | ((optmask >> 9) & 4)) // get bitmask for flags (eol escapes trim)\r
1934                 {\r
1935                 case 0: return strconv_pcdata_impl<opt_false, opt_false, opt_false>::parse;\r
1936                 case 1: return strconv_pcdata_impl<opt_false, opt_false, opt_true>::parse;\r
1937                 case 2: return strconv_pcdata_impl<opt_false, opt_true, opt_false>::parse;\r
1938                 case 3: return strconv_pcdata_impl<opt_false, opt_true, opt_true>::parse;\r
1939                 case 4: return strconv_pcdata_impl<opt_true, opt_false, opt_false>::parse;\r
1940                 case 5: return strconv_pcdata_impl<opt_true, opt_false, opt_true>::parse;\r
1941                 case 6: return strconv_pcdata_impl<opt_true, opt_true, opt_false>::parse;\r
1942                 case 7: return strconv_pcdata_impl<opt_true, opt_true, opt_true>::parse;\r
1943                 default: assert(false); return 0; // should not get here\r
1944                 }\r
1945         }\r
1946 \r
1947         typedef char_t* (*strconv_attribute_t)(char_t*, char_t);\r
1948         \r
1949         template <typename opt_escape> struct strconv_attribute_impl\r
1950         {\r
1951                 static char_t* parse_wnorm(char_t* s, char_t end_quote)\r
1952                 {\r
1953                         gap g;\r
1954 \r
1955                         // trim leading whitespaces\r
1956                         if (PUGI__IS_CHARTYPE(*s, ct_space))\r
1957                         {\r
1958                                 char_t* str = s;\r
1959                                 \r
1960                                 do ++str;\r
1961                                 while (PUGI__IS_CHARTYPE(*str, ct_space));\r
1962                                 \r
1963                                 g.push(s, str - s);\r
1964                         }\r
1965 \r
1966                         while (true)\r
1967                         {\r
1968                                 while (!PUGI__IS_CHARTYPE(*s, ct_parse_attr_ws | ct_space)) ++s;\r
1969                                 \r
1970                                 if (*s == end_quote)\r
1971                                 {\r
1972                                         char_t* str = g.flush(s);\r
1973                                         \r
1974                                         do *str-- = 0;\r
1975                                         while (PUGI__IS_CHARTYPE(*str, ct_space));\r
1976                                 \r
1977                                         return s + 1;\r
1978                                 }\r
1979                                 else if (PUGI__IS_CHARTYPE(*s, ct_space))\r
1980                                 {\r
1981                                         *s++ = ' ';\r
1982                 \r
1983                                         if (PUGI__IS_CHARTYPE(*s, ct_space))\r
1984                                         {\r
1985                                                 char_t* str = s + 1;\r
1986                                                 while (PUGI__IS_CHARTYPE(*str, ct_space)) ++str;\r
1987                                                 \r
1988                                                 g.push(s, str - s);\r
1989                                         }\r
1990                                 }\r
1991                                 else if (opt_escape::value && *s == '&')\r
1992                                 {\r
1993                                         s = strconv_escape(s, g);\r
1994                                 }\r
1995                                 else if (!*s)\r
1996                                 {\r
1997                                         return 0;\r
1998                                 }\r
1999                                 else ++s;\r
2000                         }\r
2001                 }\r
2002 \r
2003                 static char_t* parse_wconv(char_t* s, char_t end_quote)\r
2004                 {\r
2005                         gap g;\r
2006 \r
2007                         while (true)\r
2008                         {\r
2009                                 while (!PUGI__IS_CHARTYPE(*s, ct_parse_attr_ws)) ++s;\r
2010                                 \r
2011                                 if (*s == end_quote)\r
2012                                 {\r
2013                                         *g.flush(s) = 0;\r
2014                                 \r
2015                                         return s + 1;\r
2016                                 }\r
2017                                 else if (PUGI__IS_CHARTYPE(*s, ct_space))\r
2018                                 {\r
2019                                         if (*s == '\r')\r
2020                                         {\r
2021                                                 *s++ = ' ';\r
2022                                 \r
2023                                                 if (*s == '\n') g.push(s, 1);\r
2024                                         }\r
2025                                         else *s++ = ' ';\r
2026                                 }\r
2027                                 else if (opt_escape::value && *s == '&')\r
2028                                 {\r
2029                                         s = strconv_escape(s, g);\r
2030                                 }\r
2031                                 else if (!*s)\r
2032                                 {\r
2033                                         return 0;\r
2034                                 }\r
2035                                 else ++s;\r
2036                         }\r
2037                 }\r
2038 \r
2039                 static char_t* parse_eol(char_t* s, char_t end_quote)\r
2040                 {\r
2041                         gap g;\r
2042 \r
2043                         while (true)\r
2044                         {\r
2045                                 while (!PUGI__IS_CHARTYPE(*s, ct_parse_attr)) ++s;\r
2046                                 \r
2047                                 if (*s == end_quote)\r
2048                                 {\r
2049                                         *g.flush(s) = 0;\r
2050                                 \r
2051                                         return s + 1;\r
2052                                 }\r
2053                                 else if (*s == '\r')\r
2054                                 {\r
2055                                         *s++ = '\n';\r
2056                                         \r
2057                                         if (*s == '\n') g.push(s, 1);\r
2058                                 }\r
2059                                 else if (opt_escape::value && *s == '&')\r
2060                                 {\r
2061                                         s = strconv_escape(s, g);\r
2062                                 }\r
2063                                 else if (!*s)\r
2064                                 {\r
2065                                         return 0;\r
2066                                 }\r
2067                                 else ++s;\r
2068                         }\r
2069                 }\r
2070 \r
2071                 static char_t* parse_simple(char_t* s, char_t end_quote)\r
2072                 {\r
2073                         gap g;\r
2074 \r
2075                         while (true)\r
2076                         {\r
2077                                 while (!PUGI__IS_CHARTYPE(*s, ct_parse_attr)) ++s;\r
2078                                 \r
2079                                 if (*s == end_quote)\r
2080                                 {\r
2081                                         *g.flush(s) = 0;\r
2082                                 \r
2083                                         return s + 1;\r
2084                                 }\r
2085                                 else if (opt_escape::value && *s == '&')\r
2086                                 {\r
2087                                         s = strconv_escape(s, g);\r
2088                                 }\r
2089                                 else if (!*s)\r
2090                                 {\r
2091                                         return 0;\r
2092                                 }\r
2093                                 else ++s;\r
2094                         }\r
2095                 }\r
2096         };\r
2097 \r
2098         PUGI__FN strconv_attribute_t get_strconv_attribute(unsigned int optmask)\r
2099         {\r
2100                 PUGI__STATIC_ASSERT(parse_escapes == 0x10 && parse_eol == 0x20 && parse_wconv_attribute == 0x40 && parse_wnorm_attribute == 0x80);\r
2101                 \r
2102                 switch ((optmask >> 4) & 15) // get bitmask for flags (wconv wnorm eol escapes)\r
2103                 {\r
2104                 case 0:  return strconv_attribute_impl<opt_false>::parse_simple;\r
2105                 case 1:  return strconv_attribute_impl<opt_true>::parse_simple;\r
2106                 case 2:  return strconv_attribute_impl<opt_false>::parse_eol;\r
2107                 case 3:  return strconv_attribute_impl<opt_true>::parse_eol;\r
2108                 case 4:  return strconv_attribute_impl<opt_false>::parse_wconv;\r
2109                 case 5:  return strconv_attribute_impl<opt_true>::parse_wconv;\r
2110                 case 6:  return strconv_attribute_impl<opt_false>::parse_wconv;\r
2111                 case 7:  return strconv_attribute_impl<opt_true>::parse_wconv;\r
2112                 case 8:  return strconv_attribute_impl<opt_false>::parse_wnorm;\r
2113                 case 9:  return strconv_attribute_impl<opt_true>::parse_wnorm;\r
2114                 case 10: return strconv_attribute_impl<opt_false>::parse_wnorm;\r
2115                 case 11: return strconv_attribute_impl<opt_true>::parse_wnorm;\r
2116                 case 12: return strconv_attribute_impl<opt_false>::parse_wnorm;\r
2117                 case 13: return strconv_attribute_impl<opt_true>::parse_wnorm;\r
2118                 case 14: return strconv_attribute_impl<opt_false>::parse_wnorm;\r
2119                 case 15: return strconv_attribute_impl<opt_true>::parse_wnorm;\r
2120                 default: assert(false); return 0; // should not get here\r
2121                 }\r
2122         }\r
2123 \r
2124         inline xml_parse_result make_parse_result(xml_parse_status status, ptrdiff_t offset = 0)\r
2125         {\r
2126                 xml_parse_result result;\r
2127                 result.status = status;\r
2128                 result.offset = offset;\r
2129 \r
2130                 return result;\r
2131         }\r
2132 \r
2133         struct xml_parser\r
2134         {\r
2135                 xml_allocator alloc;\r
2136                 char_t* error_offset;\r
2137                 xml_parse_status error_status;\r
2138                 \r
2139                 // Parser utilities.\r
2140                 #define PUGI__SKIPWS()                  { while (PUGI__IS_CHARTYPE(*s, ct_space)) ++s; }\r
2141                 #define PUGI__OPTSET(OPT)                       ( optmsk & (OPT) )\r
2142                 #define PUGI__PUSHNODE(TYPE)            { cursor = append_node(cursor, alloc, TYPE); if (!cursor) PUGI__THROW_ERROR(status_out_of_memory, s); }\r
2143                 #define PUGI__POPNODE()                 { cursor = cursor->parent; }\r
2144                 #define PUGI__SCANFOR(X)                        { while (*s != 0 && !(X)) ++s; }\r
2145                 #define PUGI__SCANWHILE(X)              { while ((X)) ++s; }\r
2146                 #define PUGI__ENDSEG()                  { ch = *s; *s = 0; ++s; }\r
2147                 #define PUGI__THROW_ERROR(err, m)       return error_offset = m, error_status = err, static_cast<char_t*>(0)\r
2148                 #define PUGI__CHECK_ERROR(err, m)       { if (*s == 0) PUGI__THROW_ERROR(err, m); }\r
2149                 \r
2150                 xml_parser(const xml_allocator& alloc_): alloc(alloc_), error_offset(0), error_status(status_ok)\r
2151                 {\r
2152                 }\r
2153 \r
2154                 // DOCTYPE consists of nested sections of the following possible types:\r
2155                 // <!-- ... -->, <? ... ?>, "...", '...'\r
2156                 // <![...]]>\r
2157                 // <!...>\r
2158                 // First group can not contain nested groups\r
2159                 // Second group can contain nested groups of the same type\r
2160                 // Third group can contain all other groups\r
2161                 char_t* parse_doctype_primitive(char_t* s)\r
2162                 {\r
2163                         if (*s == '"' || *s == '\'')\r
2164                         {\r
2165                                 // quoted string\r
2166                                 char_t ch = *s++;\r
2167                                 PUGI__SCANFOR(*s == ch);\r
2168                                 if (!*s) PUGI__THROW_ERROR(status_bad_doctype, s);\r
2169 \r
2170                                 s++;\r
2171                         }\r
2172                         else if (s[0] == '<' && s[1] == '?')\r
2173                         {\r
2174                                 // <? ... ?>\r
2175                                 s += 2;\r
2176                                 PUGI__SCANFOR(s[0] == '?' && s[1] == '>'); // no need for ENDSWITH because ?> can't terminate proper doctype\r
2177                                 if (!*s) PUGI__THROW_ERROR(status_bad_doctype, s);\r
2178 \r
2179                                 s += 2;\r
2180                         }\r
2181                         else if (s[0] == '<' && s[1] == '!' && s[2] == '-' && s[3] == '-')\r
2182                         {\r
2183                                 s += 4;\r
2184                                 PUGI__SCANFOR(s[0] == '-' && s[1] == '-' && s[2] == '>'); // no need for ENDSWITH because --> can't terminate proper doctype\r
2185                                 if (!*s) PUGI__THROW_ERROR(status_bad_doctype, s);\r
2186 \r
2187                                 s += 4;\r
2188                         }\r
2189                         else PUGI__THROW_ERROR(status_bad_doctype, s);\r
2190 \r
2191                         return s;\r
2192                 }\r
2193 \r
2194                 char_t* parse_doctype_ignore(char_t* s)\r
2195                 {\r
2196                         assert(s[0] == '<' && s[1] == '!' && s[2] == '[');\r
2197                         s++;\r
2198 \r
2199                         while (*s)\r
2200                         {\r
2201                                 if (s[0] == '<' && s[1] == '!' && s[2] == '[')\r
2202                                 {\r
2203                                         // nested ignore section\r
2204                                         s = parse_doctype_ignore(s);\r
2205                                         if (!s) return s;\r
2206                                 }\r
2207                                 else if (s[0] == ']' && s[1] == ']' && s[2] == '>')\r
2208                                 {\r
2209                                         // ignore section end\r
2210                                         s += 3;\r
2211 \r
2212                                         return s;\r
2213                                 }\r
2214                                 else s++;\r
2215                         }\r
2216 \r
2217                         PUGI__THROW_ERROR(status_bad_doctype, s);\r
2218                 }\r
2219 \r
2220                 char_t* parse_doctype_group(char_t* s, char_t endch, bool toplevel)\r
2221                 {\r
2222                         assert((s[0] == '<' || s[0] == 0) && s[1] == '!');\r
2223                         s++;\r
2224 \r
2225                         while (*s)\r
2226                         {\r
2227                                 if (s[0] == '<' && s[1] == '!' && s[2] != '-')\r
2228                                 {\r
2229                                         if (s[2] == '[')\r
2230                                         {\r
2231                                                 // ignore\r
2232                                                 s = parse_doctype_ignore(s);\r
2233                                                 if (!s) return s;\r
2234                                         }\r
2235                                         else\r
2236                                         {\r
2237                                                 // some control group\r
2238                                                 s = parse_doctype_group(s, endch, false);\r
2239                                                 if (!s) return s;\r
2240 \r
2241                                                 // skip >\r
2242                                                 assert(*s == '>');\r
2243                                                 s++;\r
2244                                         }\r
2245                                 }\r
2246                                 else if (s[0] == '<' || s[0] == '"' || s[0] == '\'')\r
2247                                 {\r
2248                                         // unknown tag (forbidden), or some primitive group\r
2249                                         s = parse_doctype_primitive(s);\r
2250                                         if (!s) return s;\r
2251                                 }\r
2252                                 else if (*s == '>')\r
2253                                 {\r
2254                                         return s;\r
2255                                 }\r
2256                                 else s++;\r
2257                         }\r
2258 \r
2259                         if (!toplevel || endch != '>') PUGI__THROW_ERROR(status_bad_doctype, s);\r
2260 \r
2261                         return s;\r
2262                 }\r
2263 \r
2264                 char_t* parse_exclamation(char_t* s, xml_node_struct* cursor, unsigned int optmsk, char_t endch)\r
2265                 {\r
2266                         // parse node contents, starting with exclamation mark\r
2267                         ++s;\r
2268 \r
2269                         if (*s == '-') // '<!-...'\r
2270                         {\r
2271                                 ++s;\r
2272 \r
2273                                 if (*s == '-') // '<!--...'\r
2274                                 {\r
2275                                         ++s;\r
2276 \r
2277                                         if (PUGI__OPTSET(parse_comments))\r
2278                                         {\r
2279                                                 PUGI__PUSHNODE(node_comment); // Append a new node on the tree.\r
2280                                                 cursor->value = s; // Save the offset.\r
2281                                         }\r
2282 \r
2283                                         if (PUGI__OPTSET(parse_eol) && PUGI__OPTSET(parse_comments))\r
2284                                         {\r
2285                                                 s = strconv_comment(s, endch);\r
2286 \r
2287                                                 if (!s) PUGI__THROW_ERROR(status_bad_comment, cursor->value);\r
2288                                         }\r
2289                                         else\r
2290                                         {\r
2291                                                 // Scan for terminating '-->'.\r
2292                                                 PUGI__SCANFOR(s[0] == '-' && s[1] == '-' && ENDSWITH(s[2], '>'));\r
2293                                                 PUGI__CHECK_ERROR(status_bad_comment, s);\r
2294 \r
2295                                                 if (PUGI__OPTSET(parse_comments))\r
2296                                                         *s = 0; // Zero-terminate this segment at the first terminating '-'.\r
2297 \r
2298                                                 s += (s[2] == '>' ? 3 : 2); // Step over the '\0->'.\r
2299                                         }\r
2300                                 }\r
2301                                 else PUGI__THROW_ERROR(status_bad_comment, s);\r
2302                         }\r
2303                         else if (*s == '[')\r
2304                         {\r
2305                                 // '<![CDATA[...'\r
2306                                 if (*++s=='C' && *++s=='D' && *++s=='A' && *++s=='T' && *++s=='A' && *++s == '[')\r
2307                                 {\r
2308                                         ++s;\r
2309 \r
2310                                         if (PUGI__OPTSET(parse_cdata))\r
2311                                         {\r
2312                                                 PUGI__PUSHNODE(node_cdata); // Append a new node on the tree.\r
2313                                                 cursor->value = s; // Save the offset.\r
2314 \r
2315                                                 if (PUGI__OPTSET(parse_eol))\r
2316                                                 {\r
2317                                                         s = strconv_cdata(s, endch);\r
2318 \r
2319                                                         if (!s) PUGI__THROW_ERROR(status_bad_cdata, cursor->value);\r
2320                                                 }\r
2321                                                 else\r
2322                                                 {\r
2323                                                         // Scan for terminating ']]>'.\r
2324                                                         PUGI__SCANFOR(s[0] == ']' && s[1] == ']' && ENDSWITH(s[2], '>'));\r
2325                                                         PUGI__CHECK_ERROR(status_bad_cdata, s);\r
2326 \r
2327                                                         *s++ = 0; // Zero-terminate this segment.\r
2328                                                 }\r
2329                                         }\r
2330                                         else // Flagged for discard, but we still have to scan for the terminator.\r
2331                                         {\r
2332                                                 // Scan for terminating ']]>'.\r
2333                                                 PUGI__SCANFOR(s[0] == ']' && s[1] == ']' && ENDSWITH(s[2], '>'));\r
2334                                                 PUGI__CHECK_ERROR(status_bad_cdata, s);\r
2335 \r
2336                                                 ++s;\r
2337                                         }\r
2338 \r
2339                                         s += (s[1] == '>' ? 2 : 1); // Step over the last ']>'.\r
2340                                 }\r
2341                                 else PUGI__THROW_ERROR(status_bad_cdata, s);\r
2342                         }\r
2343                         else if (s[0] == 'D' && s[1] == 'O' && s[2] == 'C' && s[3] == 'T' && s[4] == 'Y' && s[5] == 'P' && ENDSWITH(s[6], 'E'))\r
2344                         {\r
2345                                 s -= 2;\r
2346 \r
2347                                 if (cursor->parent) PUGI__THROW_ERROR(status_bad_doctype, s);\r
2348 \r
2349                                 char_t* mark = s + 9;\r
2350 \r
2351                                 s = parse_doctype_group(s, endch, true);\r
2352                                 if (!s) return s;\r
2353 \r
2354                                 assert((*s == 0 && endch == '>') || *s == '>');\r
2355                                 if (*s) *s++ = 0;\r
2356 \r
2357                                 if (PUGI__OPTSET(parse_doctype))\r
2358                                 {\r
2359                                         while (PUGI__IS_CHARTYPE(*mark, ct_space)) ++mark;\r
2360 \r
2361                                         PUGI__PUSHNODE(node_doctype);\r
2362 \r
2363                                         cursor->value = mark;\r
2364 \r
2365                                         PUGI__POPNODE();\r
2366                                 }\r
2367                         }\r
2368                         else if (*s == 0 && endch == '-') PUGI__THROW_ERROR(status_bad_comment, s);\r
2369                         else if (*s == 0 && endch == '[') PUGI__THROW_ERROR(status_bad_cdata, s);\r
2370                         else PUGI__THROW_ERROR(status_unrecognized_tag, s);\r
2371 \r
2372                         return s;\r
2373                 }\r
2374 \r
2375                 char_t* parse_question(char_t* s, xml_node_struct*& ref_cursor, unsigned int optmsk, char_t endch)\r
2376                 {\r
2377                         // load into registers\r
2378                         xml_node_struct* cursor = ref_cursor;\r
2379                         char_t ch = 0;\r
2380 \r
2381                         // parse node contents, starting with question mark\r
2382                         ++s;\r
2383 \r
2384                         // read PI target\r
2385                         char_t* target = s;\r
2386 \r
2387                         if (!PUGI__IS_CHARTYPE(*s, ct_start_symbol)) PUGI__THROW_ERROR(status_bad_pi, s);\r
2388 \r
2389                         PUGI__SCANWHILE(PUGI__IS_CHARTYPE(*s, ct_symbol));\r
2390                         PUGI__CHECK_ERROR(status_bad_pi, s);\r
2391 \r
2392                         // determine node type; stricmp / strcasecmp is not portable\r
2393                         bool declaration = (target[0] | ' ') == 'x' && (target[1] | ' ') == 'm' && (target[2] | ' ') == 'l' && target + 3 == s;\r
2394 \r
2395                         if (declaration ? PUGI__OPTSET(parse_declaration) : PUGI__OPTSET(parse_pi))\r
2396                         {\r
2397                                 if (declaration)\r
2398                                 {\r
2399                                         // disallow non top-level declarations\r
2400                                         if (cursor->parent) PUGI__THROW_ERROR(status_bad_pi, s);\r
2401 \r
2402                                         PUGI__PUSHNODE(node_declaration);\r
2403                                 }\r
2404                                 else\r
2405                                 {\r
2406                                         PUGI__PUSHNODE(node_pi);\r
2407                                 }\r
2408 \r
2409                                 cursor->name = target;\r
2410 \r
2411                                 PUGI__ENDSEG();\r
2412 \r
2413                                 // parse value/attributes\r
2414                                 if (ch == '?')\r
2415                                 {\r
2416                                         // empty node\r
2417                                         if (!ENDSWITH(*s, '>')) PUGI__THROW_ERROR(status_bad_pi, s);\r
2418                                         s += (*s == '>');\r
2419 \r
2420                                         PUGI__POPNODE();\r
2421                                 }\r
2422                                 else if (PUGI__IS_CHARTYPE(ch, ct_space))\r
2423                                 {\r
2424                                         PUGI__SKIPWS();\r
2425 \r
2426                                         // scan for tag end\r
2427                                         char_t* value = s;\r
2428 \r
2429                                         PUGI__SCANFOR(s[0] == '?' && ENDSWITH(s[1], '>'));\r
2430                                         PUGI__CHECK_ERROR(status_bad_pi, s);\r
2431 \r
2432                                         if (declaration)\r
2433                                         {\r
2434                                                 // replace ending ? with / so that 'element' terminates properly\r
2435                                                 *s = '/';\r
2436 \r
2437                                                 // we exit from this function with cursor at node_declaration, which is a signal to parse() to go to LOC_ATTRIBUTES\r
2438                                                 s = value;\r
2439                                         }\r
2440                                         else\r
2441                                         {\r
2442                                                 // store value and step over >\r
2443                                                 cursor->value = value;\r
2444                                                 PUGI__POPNODE();\r
2445 \r
2446                                                 PUGI__ENDSEG();\r
2447 \r
2448                                                 s += (*s == '>');\r
2449                                         }\r
2450                                 }\r
2451                                 else PUGI__THROW_ERROR(status_bad_pi, s);\r
2452                         }\r
2453                         else\r
2454                         {\r
2455                                 // scan for tag end\r
2456                                 PUGI__SCANFOR(s[0] == '?' && ENDSWITH(s[1], '>'));\r
2457                                 PUGI__CHECK_ERROR(status_bad_pi, s);\r
2458 \r
2459                                 s += (s[1] == '>' ? 2 : 1);\r
2460                         }\r
2461 \r
2462                         // store from registers\r
2463                         ref_cursor = cursor;\r
2464 \r
2465                         return s;\r
2466                 }\r
2467 \r
2468                 char_t* parse_tree(char_t* s, xml_node_struct* root, unsigned int optmsk, char_t endch)\r
2469                 {\r
2470                         strconv_attribute_t strconv_attribute = get_strconv_attribute(optmsk);\r
2471                         strconv_pcdata_t strconv_pcdata = get_strconv_pcdata(optmsk);\r
2472                         \r
2473                         char_t ch = 0;\r
2474                         xml_node_struct* cursor = root;\r
2475                         char_t* mark = s;\r
2476 \r
2477                         while (*s != 0)\r
2478                         {\r
2479                                 if (*s == '<')\r
2480                                 {\r
2481                                         ++s;\r
2482 \r
2483                                 LOC_TAG:\r
2484                                         if (PUGI__IS_CHARTYPE(*s, ct_start_symbol)) // '<#...'\r
2485                                         {\r
2486                                                 PUGI__PUSHNODE(node_element); // Append a new node to the tree.\r
2487 \r
2488                                                 cursor->name = s;\r
2489 \r
2490                                                 PUGI__SCANWHILE(PUGI__IS_CHARTYPE(*s, ct_symbol)); // Scan for a terminator.\r
2491                                                 PUGI__ENDSEG(); // Save char in 'ch', terminate & step over.\r
2492 \r
2493                                                 if (ch == '>')\r
2494                                                 {\r
2495                                                         // end of tag\r
2496                                                 }\r
2497                                                 else if (PUGI__IS_CHARTYPE(ch, ct_space))\r
2498                                                 {\r
2499                                                 LOC_ATTRIBUTES:\r
2500                                                         while (true)\r
2501                                                         {\r
2502                                                                 PUGI__SKIPWS(); // Eat any whitespace.\r
2503                                                 \r
2504                                                                 if (PUGI__IS_CHARTYPE(*s, ct_start_symbol)) // <... #...\r
2505                                                                 {\r
2506                                                                         xml_attribute_struct* a = append_attribute_ll(cursor, alloc); // Make space for this attribute.\r
2507                                                                         if (!a) PUGI__THROW_ERROR(status_out_of_memory, s);\r
2508 \r
2509                                                                         a->name = s; // Save the offset.\r
2510 \r
2511                                                                         PUGI__SCANWHILE(PUGI__IS_CHARTYPE(*s, ct_symbol)); // Scan for a terminator.\r
2512                                                                         PUGI__CHECK_ERROR(status_bad_attribute, s); //$ redundant, left for performance\r
2513 \r
2514                                                                         PUGI__ENDSEG(); // Save char in 'ch', terminate & step over.\r
2515                                                                         PUGI__CHECK_ERROR(status_bad_attribute, s); //$ redundant, left for performance\r
2516 \r
2517                                                                         if (PUGI__IS_CHARTYPE(ch, ct_space))\r
2518                                                                         {\r
2519                                                                                 PUGI__SKIPWS(); // Eat any whitespace.\r
2520                                                                                 PUGI__CHECK_ERROR(status_bad_attribute, s); //$ redundant, left for performance\r
2521 \r
2522                                                                                 ch = *s;\r
2523                                                                                 ++s;\r
2524                                                                         }\r
2525                                                                         \r
2526                                                                         if (ch == '=') // '<... #=...'\r
2527                                                                         {\r
2528                                                                                 PUGI__SKIPWS(); // Eat any whitespace.\r
2529 \r
2530                                                                                 if (*s == '"' || *s == '\'') // '<... #="...'\r
2531                                                                                 {\r
2532                                                                                         ch = *s; // Save quote char to avoid breaking on "''" -or- '""'.\r
2533                                                                                         ++s; // Step over the quote.\r
2534                                                                                         a->value = s; // Save the offset.\r
2535 \r
2536                                                                                         s = strconv_attribute(s, ch);\r
2537                                                                                 \r
2538                                                                                         if (!s) PUGI__THROW_ERROR(status_bad_attribute, a->value);\r
2539 \r
2540                                                                                         // After this line the loop continues from the start;\r
2541                                                                                         // Whitespaces, / and > are ok, symbols and EOF are wrong,\r
2542                                                                                         // everything else will be detected\r
2543                                                                                         if (PUGI__IS_CHARTYPE(*s, ct_start_symbol)) PUGI__THROW_ERROR(status_bad_attribute, s);\r
2544                                                                                 }\r
2545                                                                                 else PUGI__THROW_ERROR(status_bad_attribute, s);\r
2546                                                                         }\r
2547                                                                         else PUGI__THROW_ERROR(status_bad_attribute, s);\r
2548                                                                 }\r
2549                                                                 else if (*s == '/')\r
2550                                                                 {\r
2551                                                                         ++s;\r
2552                                                                         \r
2553                                                                         if (*s == '>')\r
2554                                                                         {\r
2555                                                                                 PUGI__POPNODE();\r
2556                                                                                 s++;\r
2557                                                                                 break;\r
2558                                                                         }\r
2559                                                                         else if (*s == 0 && endch == '>')\r
2560                                                                         {\r
2561                                                                                 PUGI__POPNODE();\r
2562                                                                                 break;\r
2563                                                                         }\r
2564                                                                         else PUGI__THROW_ERROR(status_bad_start_element, s);\r
2565                                                                 }\r
2566                                                                 else if (*s == '>')\r
2567                                                                 {\r
2568                                                                         ++s;\r
2569 \r
2570                                                                         break;\r
2571                                                                 }\r
2572                                                                 else if (*s == 0 && endch == '>')\r
2573                                                                 {\r
2574                                                                         break;\r
2575                                                                 }\r
2576                                                                 else PUGI__THROW_ERROR(status_bad_start_element, s);\r
2577                                                         }\r
2578 \r
2579                                                         // !!!\r
2580                                                 }\r
2581                                                 else if (ch == '/') // '<#.../'\r
2582                                                 {\r
2583                                                         if (!ENDSWITH(*s, '>')) PUGI__THROW_ERROR(status_bad_start_element, s);\r
2584 \r
2585                                                         PUGI__POPNODE(); // Pop.\r
2586 \r
2587                                                         s += (*s == '>');\r
2588                                                 }\r
2589                                                 else if (ch == 0)\r
2590                                                 {\r
2591                                                         // we stepped over null terminator, backtrack & handle closing tag\r
2592                                                         --s;\r
2593                                                         \r
2594                                                         if (endch != '>') PUGI__THROW_ERROR(status_bad_start_element, s);\r
2595                                                 }\r
2596                                                 else PUGI__THROW_ERROR(status_bad_start_element, s);\r
2597                                         }\r
2598                                         else if (*s == '/')\r
2599                                         {\r
2600                                                 ++s;\r
2601 \r
2602                                                 char_t* name = cursor->name;\r
2603                                                 if (!name) PUGI__THROW_ERROR(status_end_element_mismatch, s);\r
2604                                                 \r
2605                                                 while (PUGI__IS_CHARTYPE(*s, ct_symbol))\r
2606                                                 {\r
2607                                                         if (*s++ != *name++) PUGI__THROW_ERROR(status_end_element_mismatch, s);\r
2608                                                 }\r
2609 \r
2610                                                 if (*name)\r
2611                                                 {\r
2612                                                         if (*s == 0 && name[0] == endch && name[1] == 0) PUGI__THROW_ERROR(status_bad_end_element, s);\r
2613                                                         else PUGI__THROW_ERROR(status_end_element_mismatch, s);\r
2614                                                 }\r
2615                                                         \r
2616                                                 PUGI__POPNODE(); // Pop.\r
2617 \r
2618                                                 PUGI__SKIPWS();\r
2619 \r
2620                                                 if (*s == 0)\r
2621                                                 {\r
2622                                                         if (endch != '>') PUGI__THROW_ERROR(status_bad_end_element, s);\r
2623                                                 }\r
2624                                                 else\r
2625                                                 {\r
2626                                                         if (*s != '>') PUGI__THROW_ERROR(status_bad_end_element, s);\r
2627                                                         ++s;\r
2628                                                 }\r
2629                                         }\r
2630                                         else if (*s == '?') // '<?...'\r
2631                                         {\r
2632                                                 s = parse_question(s, cursor, optmsk, endch);\r
2633                                                 if (!s) return s;\r
2634 \r
2635                                                 assert(cursor);\r
2636                                                 if ((cursor->header & xml_memory_page_type_mask) + 1 == node_declaration) goto LOC_ATTRIBUTES;\r
2637                                         }\r
2638                                         else if (*s == '!') // '<!...'\r
2639                                         {\r
2640                                                 s = parse_exclamation(s, cursor, optmsk, endch);\r
2641                                                 if (!s) return s;\r
2642                                         }\r
2643                                         else if (*s == 0 && endch == '?') PUGI__THROW_ERROR(status_bad_pi, s);\r
2644                                         else PUGI__THROW_ERROR(status_unrecognized_tag, s);\r
2645                                 }\r
2646                                 else\r
2647                                 {\r
2648                                         mark = s; // Save this offset while searching for a terminator.\r
2649 \r
2650                                         PUGI__SKIPWS(); // Eat whitespace if no genuine PCDATA here.\r
2651 \r
2652                                         if (*s == '<' || !*s)\r
2653                                         {\r
2654                                                 // We skipped some whitespace characters because otherwise we would take the tag branch instead of PCDATA one\r
2655                                                 assert(mark != s);\r
2656 \r
2657                                                 if (!PUGI__OPTSET(parse_ws_pcdata | parse_ws_pcdata_single) || PUGI__OPTSET(parse_trim_pcdata))\r
2658                                                 {\r
2659                                                         continue;\r
2660                                                 }\r
2661                                                 else if (PUGI__OPTSET(parse_ws_pcdata_single))\r
2662                                                 {\r
2663                                                         if (s[0] != '<' || s[1] != '/' || cursor->first_child) continue;\r
2664                                                 }\r
2665                                         }\r
2666 \r
2667                                         if (!PUGI__OPTSET(parse_trim_pcdata))\r
2668                                                 s = mark;\r
2669                                                         \r
2670                                         if (cursor->parent || PUGI__OPTSET(parse_fragment))\r
2671                                         {\r
2672                                                 PUGI__PUSHNODE(node_pcdata); // Append a new node on the tree.\r
2673                                                 cursor->value = s; // Save the offset.\r
2674 \r
2675                                                 s = strconv_pcdata(s);\r
2676                                                                 \r
2677                                                 PUGI__POPNODE(); // Pop since this is a standalone.\r
2678                                                 \r
2679                                                 if (!*s) break;\r
2680                                         }\r
2681                                         else\r
2682                                         {\r
2683                                                 PUGI__SCANFOR(*s == '<'); // '...<'\r
2684                                                 if (!*s) break;\r
2685                                                 \r
2686                                                 ++s;\r
2687                                         }\r
2688 \r
2689                                         // We're after '<'\r
2690                                         goto LOC_TAG;\r
2691                                 }\r
2692                         }\r
2693 \r
2694                         // check that last tag is closed\r
2695                         if (cursor != root) PUGI__THROW_ERROR(status_end_element_mismatch, s);\r
2696 \r
2697                         return s;\r
2698                 }\r
2699 \r
2700         #ifdef PUGIXML_WCHAR_MODE\r
2701                 static char_t* parse_skip_bom(char_t* s)\r
2702                 {\r
2703                         unsigned int bom = 0xfeff;\r
2704                         return (s[0] == static_cast<wchar_t>(bom)) ? s + 1 : s;\r
2705                 }\r
2706         #else\r
2707                 static char_t* parse_skip_bom(char_t* s)\r
2708                 {\r
2709                         return (s[0] == '\xef' && s[1] == '\xbb' && s[2] == '\xbf') ? s + 3 : s;\r
2710                 }\r
2711         #endif\r
2712 \r
2713                 static bool has_element_node_siblings(xml_node_struct* node)\r
2714                 {\r
2715                         while (node)\r
2716                         {\r
2717                                 xml_node_type type = static_cast<xml_node_type>((node->header & impl::xml_memory_page_type_mask) + 1);\r
2718                                 if (type == node_element) return true;\r
2719 \r
2720                                 node = node->next_sibling;\r
2721                         }\r
2722 \r
2723                         return false;\r
2724                 }\r
2725 \r
2726                 static xml_parse_result parse(char_t* buffer, size_t length, xml_document_struct* xmldoc, xml_node_struct* root, unsigned int optmsk)\r
2727                 {\r
2728                         // allocator object is a part of document object\r
2729                         xml_allocator& alloc = *static_cast<xml_allocator*>(xmldoc);\r
2730 \r
2731                         // early-out for empty documents\r
2732                         if (length == 0)\r
2733                                 return make_parse_result(PUGI__OPTSET(parse_fragment) ? status_ok : status_no_document_element);\r
2734 \r
2735                         // get last child of the root before parsing\r
2736                         xml_node_struct* last_root_child = root->first_child ? root->first_child->prev_sibling_c : 0;\r
2737         \r
2738                         // create parser on stack\r
2739                         xml_parser parser(alloc);\r
2740 \r
2741                         // save last character and make buffer zero-terminated (speeds up parsing)\r
2742                         char_t endch = buffer[length - 1];\r
2743                         buffer[length - 1] = 0;\r
2744                         \r
2745                         // skip BOM to make sure it does not end up as part of parse output\r
2746                         char_t* buffer_data = parse_skip_bom(buffer);\r
2747 \r
2748                         // perform actual parsing\r
2749                         parser.parse_tree(buffer_data, root, optmsk, endch);\r
2750 \r
2751                         // update allocator state\r
2752                         alloc = parser.alloc;\r
2753 \r
2754                         xml_parse_result result = make_parse_result(parser.error_status, parser.error_offset ? parser.error_offset - buffer : 0);\r
2755                         assert(result.offset >= 0 && static_cast<size_t>(result.offset) <= length);\r
2756 \r
2757                         if (result)\r
2758                         {\r
2759                                 // since we removed last character, we have to handle the only possible false positive (stray <)\r
2760                                 if (endch == '<')\r
2761                                         return make_parse_result(status_unrecognized_tag, length - 1);\r
2762 \r
2763                                 // check if there are any element nodes parsed\r
2764                                 xml_node_struct* first_root_child_parsed = last_root_child ? last_root_child->next_sibling : root->first_child;\r
2765 \r
2766                                 if (!PUGI__OPTSET(parse_fragment) && !has_element_node_siblings(first_root_child_parsed))\r
2767                                         return make_parse_result(status_no_document_element, length - 1);\r
2768                         }\r
2769                         else\r
2770                         {\r
2771                                 // roll back offset if it occurs on a null terminator in the source buffer\r
2772                                 if (result.offset > 0 && static_cast<size_t>(result.offset) == length - 1 && endch == 0)\r
2773                                         result.offset--;\r
2774                         }\r
2775 \r
2776                         return result;\r
2777                 }\r
2778         };\r
2779 \r
2780         // Output facilities\r
2781         PUGI__FN xml_encoding get_write_native_encoding()\r
2782         {\r
2783         #ifdef PUGIXML_WCHAR_MODE\r
2784                 return get_wchar_encoding();\r
2785         #else\r
2786                 return encoding_utf8;\r
2787         #endif\r
2788         }\r
2789 \r
2790         PUGI__FN xml_encoding get_write_encoding(xml_encoding encoding)\r
2791         {\r
2792                 // replace wchar encoding with utf implementation\r
2793                 if (encoding == encoding_wchar) return get_wchar_encoding();\r
2794 \r
2795                 // replace utf16 encoding with utf16 with specific endianness\r
2796                 if (encoding == encoding_utf16) return is_little_endian() ? encoding_utf16_le : encoding_utf16_be;\r
2797 \r
2798                 // replace utf32 encoding with utf32 with specific endianness\r
2799                 if (encoding == encoding_utf32) return is_little_endian() ? encoding_utf32_le : encoding_utf32_be;\r
2800 \r
2801                 // only do autodetection if no explicit encoding is requested\r
2802                 if (encoding != encoding_auto) return encoding;\r
2803 \r
2804                 // assume utf8 encoding\r
2805                 return encoding_utf8;\r
2806         }\r
2807 \r
2808 #ifdef PUGIXML_WCHAR_MODE\r
2809         PUGI__FN size_t get_valid_length(const char_t* data, size_t length)\r
2810         {\r
2811                 assert(length > 0);\r
2812 \r
2813                 // discard last character if it's the lead of a surrogate pair \r
2814                 return (sizeof(wchar_t) == 2 && static_cast<unsigned int>(static_cast<uint16_t>(data[length - 1]) - 0xD800) < 0x400) ? length - 1 : length;\r
2815         }\r
2816 \r
2817         PUGI__FN size_t convert_buffer_output(char_t* r_char, uint8_t* r_u8, uint16_t* r_u16, uint32_t* r_u32, const char_t* data, size_t length, xml_encoding encoding)\r
2818         {\r
2819                 // only endian-swapping is required\r
2820                 if (need_endian_swap_utf(encoding, get_wchar_encoding()))\r
2821                 {\r
2822                         convert_wchar_endian_swap(r_char, data, length);\r
2823 \r
2824                         return length * sizeof(char_t);\r
2825                 }\r
2826         \r
2827                 // convert to utf8\r
2828                 if (encoding == encoding_utf8)\r
2829                 {\r
2830                         uint8_t* dest = r_u8;\r
2831                         uint8_t* end = utf_decoder<utf8_writer>::decode_wchar_block(data, length, dest);\r
2832 \r
2833                         return static_cast<size_t>(end - dest);\r
2834                 }\r
2835 \r
2836                 // convert to utf16\r
2837                 if (encoding == encoding_utf16_be || encoding == encoding_utf16_le)\r
2838                 {\r
2839                         uint16_t* dest = r_u16;\r
2840 \r
2841                         // convert to native utf16\r
2842                         uint16_t* end = utf_decoder<utf16_writer>::decode_wchar_block(data, length, dest);\r
2843 \r
2844                         // swap if necessary\r
2845                         xml_encoding native_encoding = is_little_endian() ? encoding_utf16_le : encoding_utf16_be;\r
2846 \r
2847                         if (native_encoding != encoding) convert_utf_endian_swap(dest, dest, static_cast<size_t>(end - dest));\r
2848 \r
2849                         return static_cast<size_t>(end - dest) * sizeof(uint16_t);\r
2850                 }\r
2851 \r
2852                 // convert to utf32\r
2853                 if (encoding == encoding_utf32_be || encoding == encoding_utf32_le)\r
2854                 {\r
2855                         uint32_t* dest = r_u32;\r
2856 \r
2857                         // convert to native utf32\r
2858                         uint32_t* end = utf_decoder<utf32_writer>::decode_wchar_block(data, length, dest);\r
2859 \r
2860                         // swap if necessary\r
2861                         xml_encoding native_encoding = is_little_endian() ? encoding_utf32_le : encoding_utf32_be;\r
2862 \r
2863                         if (native_encoding != encoding) convert_utf_endian_swap(dest, dest, static_cast<size_t>(end - dest));\r
2864 \r
2865                         return static_cast<size_t>(end - dest) * sizeof(uint32_t);\r
2866                 }\r
2867 \r
2868                 // convert to latin1\r
2869                 if (encoding == encoding_latin1)\r
2870                 {\r
2871                         uint8_t* dest = r_u8;\r
2872                         uint8_t* end = utf_decoder<latin1_writer>::decode_wchar_block(data, length, dest);\r
2873 \r
2874                         return static_cast<size_t>(end - dest);\r
2875                 }\r
2876 \r
2877                 assert(!"Invalid encoding");\r
2878                 return 0;\r
2879         }\r
2880 #else\r
2881         PUGI__FN size_t get_valid_length(const char_t* data, size_t length)\r
2882         {\r
2883                 assert(length > 4);\r
2884 \r
2885                 for (size_t i = 1; i <= 4; ++i)\r
2886                 {\r
2887                         uint8_t ch = static_cast<uint8_t>(data[length - i]);\r
2888 \r
2889                         // either a standalone character or a leading one\r
2890                         if ((ch & 0xc0) != 0x80) return length - i;\r
2891                 }\r
2892 \r
2893                 // there are four non-leading characters at the end, sequence tail is broken so might as well process the whole chunk\r
2894                 return length;\r
2895         }\r
2896 \r
2897         PUGI__FN size_t convert_buffer_output(char_t* /* r_char */, uint8_t* r_u8, uint16_t* r_u16, uint32_t* r_u32, const char_t* data, size_t length, xml_encoding encoding)\r
2898         {\r
2899                 if (encoding == encoding_utf16_be || encoding == encoding_utf16_le)\r
2900                 {\r
2901                         uint16_t* dest = r_u16;\r
2902 \r
2903                         // convert to native utf16\r
2904                         uint16_t* end = utf_decoder<utf16_writer>::decode_utf8_block(reinterpret_cast<const uint8_t*>(data), length, dest);\r
2905 \r
2906                         // swap if necessary\r
2907                         xml_encoding native_encoding = is_little_endian() ? encoding_utf16_le : encoding_utf16_be;\r
2908 \r
2909                         if (native_encoding != encoding) convert_utf_endian_swap(dest, dest, static_cast<size_t>(end - dest));\r
2910 \r
2911                         return static_cast<size_t>(end - dest) * sizeof(uint16_t);\r
2912                 }\r
2913 \r
2914                 if (encoding == encoding_utf32_be || encoding == encoding_utf32_le)\r
2915                 {\r
2916                         uint32_t* dest = r_u32;\r
2917 \r
2918                         // convert to native utf32\r
2919                         uint32_t* end = utf_decoder<utf32_writer>::decode_utf8_block(reinterpret_cast<const uint8_t*>(data), length, dest);\r
2920 \r
2921                         // swap if necessary\r
2922                         xml_encoding native_encoding = is_little_endian() ? encoding_utf32_le : encoding_utf32_be;\r
2923 \r
2924                         if (native_encoding != encoding) convert_utf_endian_swap(dest, dest, static_cast<size_t>(end - dest));\r
2925 \r
2926                         return static_cast<size_t>(end - dest) * sizeof(uint32_t);\r
2927                 }\r
2928 \r
2929                 if (encoding == encoding_latin1)\r
2930                 {\r
2931                         uint8_t* dest = r_u8;\r
2932                         uint8_t* end = utf_decoder<latin1_writer>::decode_utf8_block(reinterpret_cast<const uint8_t*>(data), length, dest);\r
2933 \r
2934                         return static_cast<size_t>(end - dest);\r
2935                 }\r
2936 \r
2937                 assert(!"Invalid encoding");\r
2938                 return 0;\r
2939         }\r
2940 #endif\r
2941 \r
2942         class xml_buffered_writer\r
2943         {\r
2944                 xml_buffered_writer(const xml_buffered_writer&);\r
2945                 xml_buffered_writer& operator=(const xml_buffered_writer&);\r
2946 \r
2947         public:\r
2948                 xml_buffered_writer(xml_writer& writer_, xml_encoding user_encoding): writer(writer_), bufsize(0), encoding(get_write_encoding(user_encoding))\r
2949                 {\r
2950                         PUGI__STATIC_ASSERT(bufcapacity >= 8);\r
2951                 }\r
2952 \r
2953                 ~xml_buffered_writer()\r
2954                 {\r
2955                         flush();\r
2956                 }\r
2957 \r
2958                 void flush()\r
2959                 {\r
2960                         flush(buffer, bufsize);\r
2961                         bufsize = 0;\r
2962                 }\r
2963 \r
2964                 void flush(const char_t* data, size_t size)\r
2965                 {\r
2966                         if (size == 0) return;\r
2967 \r
2968                         // fast path, just write data\r
2969                         if (encoding == get_write_native_encoding())\r
2970                                 writer.write(data, size * sizeof(char_t));\r
2971                         else\r
2972                         {\r
2973                                 // convert chunk\r
2974                                 size_t result = convert_buffer_output(scratch.data_char, scratch.data_u8, scratch.data_u16, scratch.data_u32, data, size, encoding);\r
2975                                 assert(result <= sizeof(scratch));\r
2976 \r
2977                                 // write data\r
2978                                 writer.write(scratch.data_u8, result);\r
2979                         }\r
2980                 }\r
2981 \r
2982                 void write(const char_t* data, size_t length)\r
2983                 {\r
2984                         if (bufsize + length > bufcapacity)\r
2985                         {\r
2986                                 // flush the remaining buffer contents\r
2987                                 flush();\r
2988 \r
2989                                 // handle large chunks\r
2990                                 if (length > bufcapacity)\r
2991                                 {\r
2992                                         if (encoding == get_write_native_encoding())\r
2993                                         {\r
2994                                                 // fast path, can just write data chunk\r
2995                                                 writer.write(data, length * sizeof(char_t));\r
2996                                                 return;\r
2997                                         }\r
2998 \r
2999                                         // need to convert in suitable chunks\r
3000                                         while (length > bufcapacity)\r
3001                                         {\r
3002                                                 // get chunk size by selecting such number of characters that are guaranteed to fit into scratch buffer\r
3003                                                 // and form a complete codepoint sequence (i.e. discard start of last codepoint if necessary)\r
3004                                                 size_t chunk_size = get_valid_length(data, bufcapacity);\r
3005 \r
3006                                                 // convert chunk and write\r
3007                                                 flush(data, chunk_size);\r
3008 \r
3009                                                 // iterate\r
3010                                                 data += chunk_size;\r
3011                                                 length -= chunk_size;\r
3012                                         }\r
3013 \r
3014                                         // small tail is copied below\r
3015                                         bufsize = 0;\r
3016                                 }\r
3017                         }\r
3018 \r
3019                         memcpy(buffer + bufsize, data, length * sizeof(char_t));\r
3020                         bufsize += length;\r
3021                 }\r
3022 \r
3023                 void write(const char_t* data)\r
3024                 {\r
3025                         write(data, strlength(data));\r
3026                 }\r
3027 \r
3028                 void write(char_t d0)\r
3029                 {\r
3030                         if (bufsize + 1 > bufcapacity) flush();\r
3031 \r
3032                         buffer[bufsize + 0] = d0;\r
3033                         bufsize += 1;\r
3034                 }\r
3035 \r
3036                 void write(char_t d0, char_t d1)\r
3037                 {\r
3038                         if (bufsize + 2 > bufcapacity) flush();\r
3039 \r
3040                         buffer[bufsize + 0] = d0;\r
3041                         buffer[bufsize + 1] = d1;\r
3042                         bufsize += 2;\r
3043                 }\r
3044 \r
3045                 void write(char_t d0, char_t d1, char_t d2)\r
3046                 {\r
3047                         if (bufsize + 3 > bufcapacity) flush();\r
3048 \r
3049                         buffer[bufsize + 0] = d0;\r
3050                         buffer[bufsize + 1] = d1;\r
3051                         buffer[bufsize + 2] = d2;\r
3052                         bufsize += 3;\r
3053                 }\r
3054 \r
3055                 void write(char_t d0, char_t d1, char_t d2, char_t d3)\r
3056                 {\r
3057                         if (bufsize + 4 > bufcapacity) flush();\r
3058 \r
3059                         buffer[bufsize + 0] = d0;\r
3060                         buffer[bufsize + 1] = d1;\r
3061                         buffer[bufsize + 2] = d2;\r
3062                         buffer[bufsize + 3] = d3;\r
3063                         bufsize += 4;\r
3064                 }\r
3065 \r
3066                 void write(char_t d0, char_t d1, char_t d2, char_t d3, char_t d4)\r
3067                 {\r
3068                         if (bufsize + 5 > bufcapacity) flush();\r
3069 \r
3070                         buffer[bufsize + 0] = d0;\r
3071                         buffer[bufsize + 1] = d1;\r
3072                         buffer[bufsize + 2] = d2;\r
3073                         buffer[bufsize + 3] = d3;\r
3074                         buffer[bufsize + 4] = d4;\r
3075                         bufsize += 5;\r
3076                 }\r
3077 \r
3078                 void write(char_t d0, char_t d1, char_t d2, char_t d3, char_t d4, char_t d5)\r
3079                 {\r
3080                         if (bufsize + 6 > bufcapacity) flush();\r
3081 \r
3082                         buffer[bufsize + 0] = d0;\r
3083                         buffer[bufsize + 1] = d1;\r
3084                         buffer[bufsize + 2] = d2;\r
3085                         buffer[bufsize + 3] = d3;\r
3086                         buffer[bufsize + 4] = d4;\r
3087                         buffer[bufsize + 5] = d5;\r
3088                         bufsize += 6;\r
3089                 }\r
3090 \r
3091                 // utf8 maximum expansion: x4 (-> utf32)\r
3092                 // utf16 maximum expansion: x2 (-> utf32)\r
3093                 // utf32 maximum expansion: x1\r
3094                 enum\r
3095                 {\r
3096                         bufcapacitybytes =\r
3097                         #ifdef PUGIXML_MEMORY_OUTPUT_STACK\r
3098                                 PUGIXML_MEMORY_OUTPUT_STACK\r
3099                         #else\r
3100                                 10240\r
3101                         #endif\r
3102                         ,\r
3103                         bufcapacity = bufcapacitybytes / (sizeof(char_t) + 4)\r
3104                 };\r
3105 \r
3106                 char_t buffer[bufcapacity];\r
3107 \r
3108                 union\r
3109                 {\r
3110                         uint8_t data_u8[4 * bufcapacity];\r
3111                         uint16_t data_u16[2 * bufcapacity];\r
3112                         uint32_t data_u32[bufcapacity];\r
3113                         char_t data_char[bufcapacity];\r
3114                 } scratch;\r
3115 \r
3116                 xml_writer& writer;\r
3117                 size_t bufsize;\r
3118                 xml_encoding encoding;\r
3119         };\r
3120 \r
3121         PUGI__FN void text_output_escaped(xml_buffered_writer& writer, const char_t* s, chartypex_t type)\r
3122         {\r
3123                 while (*s)\r
3124                 {\r
3125                         const char_t* prev = s;\r
3126                         \r
3127                         // While *s is a usual symbol\r
3128                         while (!PUGI__IS_CHARTYPEX(*s, type)) ++s;\r
3129                 \r
3130                         writer.write(prev, static_cast<size_t>(s - prev));\r
3131 \r
3132                         switch (*s)\r
3133                         {\r
3134                                 case 0: break;\r
3135                                 case '&':\r
3136                                         writer.write('&', 'a', 'm', 'p', ';');\r
3137                                         ++s;\r
3138                                         break;\r
3139                                 case '<':\r
3140                                         writer.write('&', 'l', 't', ';');\r
3141                                         ++s;\r
3142                                         break;\r
3143                                 case '>':\r
3144                                         writer.write('&', 'g', 't', ';');\r
3145                                         ++s;\r
3146                                         break;\r
3147                                 case '"':\r
3148                                         writer.write('&', 'q', 'u', 'o', 't', ';');\r
3149                                         ++s;\r
3150                                         break;\r
3151                                 default: // s is not a usual symbol\r
3152                                 {\r
3153                                         unsigned int ch = static_cast<unsigned int>(*s++);\r
3154                                         assert(ch < 32);\r
3155 \r
3156                                         writer.write('&', '#', static_cast<char_t>((ch / 10) + '0'), static_cast<char_t>((ch % 10) + '0'), ';');\r
3157                                 }\r
3158                         }\r
3159                 }\r
3160         }\r
3161 \r
3162         PUGI__FN void text_output(xml_buffered_writer& writer, const char_t* s, chartypex_t type, unsigned int flags)\r
3163         {\r
3164                 if (flags & format_no_escapes)\r
3165                         writer.write(s);\r
3166                 else\r
3167                         text_output_escaped(writer, s, type);\r
3168         }\r
3169 \r
3170         PUGI__FN void text_output_cdata(xml_buffered_writer& writer, const char_t* s)\r
3171         {\r
3172                 do\r
3173                 {\r
3174                         writer.write('<', '!', '[', 'C', 'D');\r
3175                         writer.write('A', 'T', 'A', '[');\r
3176 \r
3177                         const char_t* prev = s;\r
3178 \r
3179                         // look for ]]> sequence - we can't output it as is since it terminates CDATA\r
3180                         while (*s && !(s[0] == ']' && s[1] == ']' && s[2] == '>')) ++s;\r
3181 \r
3182                         // skip ]] if we stopped at ]]>, > will go to the next CDATA section\r
3183                         if (*s) s += 2;\r
3184 \r
3185                         writer.write(prev, static_cast<size_t>(s - prev));\r
3186 \r
3187                         writer.write(']', ']', '>');\r
3188                 }\r
3189                 while (*s);\r
3190         }\r
3191 \r
3192         PUGI__FN void node_output_attributes(xml_buffered_writer& writer, const xml_node& node, unsigned int flags)\r
3193         {\r
3194                 const char_t* default_name = PUGIXML_TEXT(":anonymous");\r
3195 \r
3196                 for (xml_attribute a = node.first_attribute(); a; a = a.next_attribute())\r
3197                 {\r
3198                         writer.write(' ');\r
3199                         writer.write(a.name()[0] ? a.name() : default_name);\r
3200                         writer.write('=', '"');\r
3201 \r
3202                         text_output(writer, a.value(), ctx_special_attr, flags);\r
3203 \r
3204                         writer.write('"');\r
3205                 }\r
3206         }\r
3207 \r
3208         PUGI__FN void node_output(xml_buffered_writer& writer, const xml_node& node, const char_t* indent, unsigned int flags, unsigned int depth)\r
3209         {\r
3210                 const char_t* default_name = PUGIXML_TEXT(":anonymous");\r
3211 \r
3212                 if ((flags & format_indent) != 0 && (flags & format_raw) == 0)\r
3213                         for (unsigned int i = 0; i < depth; ++i) writer.write(indent);\r
3214 \r
3215                 switch (node.type())\r
3216                 {\r
3217                 case node_document:\r
3218                 {\r
3219                         for (xml_node n = node.first_child(); n; n = n.next_sibling())\r
3220                                 node_output(writer, n, indent, flags, depth);\r
3221                         break;\r
3222                 }\r
3223                         \r
3224                 case node_element:\r
3225                 {\r
3226                         const char_t* name = node.name()[0] ? node.name() : default_name;\r
3227 \r
3228                         writer.write('<');\r
3229                         writer.write(name);\r
3230 \r
3231                         node_output_attributes(writer, node, flags);\r
3232 \r
3233                         if (flags & format_raw)\r
3234                         {\r
3235                                 if (!node.first_child())\r
3236                                         writer.write(' ', '/', '>');\r
3237                                 else\r
3238                                 {\r
3239                                         writer.write('>');\r
3240 \r
3241                                         for (xml_node n = node.first_child(); n; n = n.next_sibling())\r
3242                                                 node_output(writer, n, indent, flags, depth + 1);\r
3243 \r
3244                                         writer.write('<', '/');\r
3245                                         writer.write(name);\r
3246                                         writer.write('>');\r
3247                                 }\r
3248                         }\r
3249                         else if (!node.first_child())\r
3250                                 writer.write(' ', '/', '>', '\n');\r
3251                         else if (node.first_child() == node.last_child() && (node.first_child().type() == node_pcdata || node.first_child().type() == node_cdata))\r
3252                         {\r
3253                                 writer.write('>');\r
3254 \r
3255                                 if (node.first_child().type() == node_pcdata)\r
3256                                         text_output(writer, node.first_child().value(), ctx_special_pcdata, flags);\r
3257                                 else\r
3258                                         text_output_cdata(writer, node.first_child().value());\r
3259 \r
3260                                 writer.write('<', '/');\r
3261                                 writer.write(name);\r
3262                                 writer.write('>', '\n');\r
3263                         }\r
3264                         else\r
3265                         {\r
3266                                 writer.write('>', '\n');\r
3267                                 \r
3268                                 for (xml_node n = node.first_child(); n; n = n.next_sibling())\r
3269                                         node_output(writer, n, indent, flags, depth + 1);\r
3270 \r
3271                                 if ((flags & format_indent) != 0 && (flags & format_raw) == 0)\r
3272                                         for (unsigned int i = 0; i < depth; ++i) writer.write(indent);\r
3273                                 \r
3274                                 writer.write('<', '/');\r
3275                                 writer.write(name);\r
3276                                 writer.write('>', '\n');\r
3277                         }\r
3278 \r
3279                         break;\r
3280                 }\r
3281                 \r
3282                 case node_pcdata:\r
3283                         text_output(writer, node.value(), ctx_special_pcdata, flags);\r
3284                         if ((flags & format_raw) == 0) writer.write('\n');\r
3285                         break;\r
3286 \r
3287                 case node_cdata:\r
3288                         text_output_cdata(writer, node.value());\r
3289                         if ((flags & format_raw) == 0) writer.write('\n');\r
3290                         break;\r
3291 \r
3292                 case node_comment:\r
3293                         writer.write('<', '!', '-', '-');\r
3294                         writer.write(node.value());\r
3295                         writer.write('-', '-', '>');\r
3296                         if ((flags & format_raw) == 0) writer.write('\n');\r
3297                         break;\r
3298 \r
3299                 case node_pi:\r
3300                 case node_declaration:\r
3301                         writer.write('<', '?');\r
3302                         writer.write(node.name()[0] ? node.name() : default_name);\r
3303 \r
3304                         if (node.type() == node_declaration)\r
3305                         {\r
3306                                 node_output_attributes(writer, node, flags);\r
3307                         }\r
3308                         else if (node.value()[0])\r
3309                         {\r
3310                                 writer.write(' ');\r
3311                                 writer.write(node.value());\r
3312                         }\r
3313 \r
3314                         writer.write('?', '>');\r
3315                         if ((flags & format_raw) == 0) writer.write('\n');\r
3316                         break;\r
3317 \r
3318                 case node_doctype:\r
3319                         writer.write('<', '!', 'D', 'O', 'C');\r
3320                         writer.write('T', 'Y', 'P', 'E');\r
3321 \r
3322                         if (node.value()[0])\r
3323                         {\r
3324                                 writer.write(' ');\r
3325                                 writer.write(node.value());\r
3326                         }\r
3327 \r
3328                         writer.write('>');\r
3329                         if ((flags & format_raw) == 0) writer.write('\n');\r
3330                         break;\r
3331 \r
3332                 default:\r
3333                         assert(!"Invalid node type");\r
3334                 }\r
3335         }\r
3336 \r
3337         inline bool has_declaration(const xml_node& node)\r
3338         {\r
3339                 for (xml_node child = node.first_child(); child; child = child.next_sibling())\r
3340                 {\r
3341                         xml_node_type type = child.type();\r
3342 \r
3343                         if (type == node_declaration) return true;\r
3344                         if (type == node_element) return false;\r
3345                 }\r
3346 \r
3347                 return false;\r
3348         }\r
3349 \r
3350         inline bool allow_insert_child(xml_node_type parent, xml_node_type child)\r
3351         {\r
3352                 if (parent != node_document && parent != node_element) return false;\r
3353                 if (child == node_document || child == node_null) return false;\r
3354                 if (parent != node_document && (child == node_declaration || child == node_doctype)) return false;\r
3355 \r
3356                 return true;\r
3357         }\r
3358 \r
3359         PUGI__FN void recursive_copy_skip(xml_node& dest, const xml_node& source, const xml_node& skip)\r
3360         {\r
3361                 assert(dest.type() == source.type());\r
3362 \r
3363                 switch (source.type())\r
3364                 {\r
3365                 case node_element:\r
3366                 {\r
3367                         dest.set_name(source.name());\r
3368 \r
3369                         for (xml_attribute a = source.first_attribute(); a; a = a.next_attribute())\r
3370                                 dest.append_attribute(a.name()).set_value(a.value());\r
3371 \r
3372                         for (xml_node c = source.first_child(); c; c = c.next_sibling())\r
3373                         {\r
3374                                 if (c == skip) continue;\r
3375 \r
3376                                 xml_node cc = dest.append_child(c.type());\r
3377                                 assert(cc);\r
3378 \r
3379                                 recursive_copy_skip(cc, c, skip);\r
3380                         }\r
3381 \r
3382                         break;\r
3383                 }\r
3384 \r
3385                 case node_pcdata:\r
3386                 case node_cdata:\r
3387                 case node_comment:\r
3388                 case node_doctype:\r
3389                         dest.set_value(source.value());\r
3390                         break;\r
3391 \r
3392                 case node_pi:\r
3393                         dest.set_name(source.name());\r
3394                         dest.set_value(source.value());\r
3395                         break;\r
3396 \r
3397                 case node_declaration:\r
3398                 {\r
3399                         dest.set_name(source.name());\r
3400 \r
3401                         for (xml_attribute a = source.first_attribute(); a; a = a.next_attribute())\r
3402                                 dest.append_attribute(a.name()).set_value(a.value());\r
3403 \r
3404                         break;\r
3405                 }\r
3406 \r
3407                 default:\r
3408                         assert(!"Invalid node type");\r
3409                 }\r
3410         }\r
3411 \r
3412         inline bool is_text_node(xml_node_struct* node)\r
3413         {\r
3414                 xml_node_type type = static_cast<xml_node_type>((node->header & impl::xml_memory_page_type_mask) + 1);\r
3415 \r
3416                 return type == node_pcdata || type == node_cdata;\r
3417         }\r
3418 \r
3419         // get value with conversion functions\r
3420         PUGI__FN int get_integer_base(const char_t* value)\r
3421         {\r
3422                 const char_t* s = value;\r
3423 \r
3424                 while (PUGI__IS_CHARTYPE(*s, ct_space))\r
3425                         s++;\r
3426 \r
3427                 if (*s == '-')\r
3428                         s++;\r
3429 \r
3430                 return (s[0] == '0' && (s[1] == 'x' || s[1] == 'X')) ? 16 : 10;\r
3431         }\r
3432 \r
3433         PUGI__FN int get_value_int(const char_t* value, int def)\r
3434         {\r
3435                 if (!value) return def;\r
3436 \r
3437                 int base = get_integer_base(value);\r
3438 \r
3439         #ifdef PUGIXML_WCHAR_MODE\r
3440                 return static_cast<int>(wcstol(value, 0, base));\r
3441         #else\r
3442                 return static_cast<int>(strtol(value, 0, base));\r
3443         #endif\r
3444         }\r
3445 \r
3446         PUGI__FN unsigned int get_value_uint(const char_t* value, unsigned int def)\r
3447         {\r
3448                 if (!value) return def;\r
3449 \r
3450                 int base = get_integer_base(value);\r
3451 \r
3452         #ifdef PUGIXML_WCHAR_MODE\r
3453                 return static_cast<unsigned int>(wcstoul(value, 0, base));\r
3454         #else\r
3455                 return static_cast<unsigned int>(strtoul(value, 0, base));\r
3456         #endif\r
3457         }\r
3458 \r
3459         PUGI__FN double get_value_double(const char_t* value, double def)\r
3460         {\r
3461                 if (!value) return def;\r
3462 \r
3463         #ifdef PUGIXML_WCHAR_MODE\r
3464                 return wcstod(value, 0);\r
3465         #else\r
3466                 return strtod(value, 0);\r
3467         #endif\r
3468         }\r
3469 \r
3470         PUGI__FN float get_value_float(const char_t* value, float def)\r
3471         {\r
3472                 if (!value) return def;\r
3473 \r
3474         #ifdef PUGIXML_WCHAR_MODE\r
3475                 return static_cast<float>(wcstod(value, 0));\r
3476         #else\r
3477                 return static_cast<float>(strtod(value, 0));\r
3478         #endif\r
3479         }\r
3480 \r
3481         PUGI__FN bool get_value_bool(const char_t* value, bool def)\r
3482         {\r
3483                 if (!value) return def;\r
3484 \r
3485                 // only look at first char\r
3486                 char_t first = *value;\r
3487 \r
3488                 // 1*, t* (true), T* (True), y* (yes), Y* (YES)\r
3489                 return (first == '1' || first == 't' || first == 'T' || first == 'y' || first == 'Y');\r
3490         }\r
3491 \r
3492 #ifdef PUGIXML_HAS_LONG_LONG\r
3493         PUGI__FN long long get_value_llong(const char_t* value, long long def)\r
3494         {\r
3495                 if (!value) return def;\r
3496 \r
3497                 int base = get_integer_base(value);\r
3498 \r
3499         #ifdef PUGIXML_WCHAR_MODE\r
3500                 #ifdef PUGI__MSVC_CRT_VERSION\r
3501                         return _wcstoi64(value, 0, base);\r
3502                 #else\r
3503                         return wcstoll(value, 0, base);\r
3504                 #endif\r
3505         #else\r
3506                 #ifdef PUGI__MSVC_CRT_VERSION\r
3507                         return _strtoi64(value, 0, base);\r
3508                 #else\r
3509                         return strtoll(value, 0, base);\r
3510                 #endif\r
3511         #endif\r
3512         }\r
3513 \r
3514         PUGI__FN unsigned long long get_value_ullong(const char_t* value, unsigned long long def)\r
3515         {\r
3516                 if (!value) return def;\r
3517 \r
3518                 int base = get_integer_base(value);\r
3519 \r
3520         #ifdef PUGIXML_WCHAR_MODE\r
3521                 #ifdef PUGI__MSVC_CRT_VERSION\r
3522                         return _wcstoui64(value, 0, base);\r
3523                 #else\r
3524                         return wcstoull(value, 0, base);\r
3525                 #endif\r
3526         #else\r
3527                 #ifdef PUGI__MSVC_CRT_VERSION\r
3528                         return _strtoui64(value, 0, base);\r
3529                 #else\r
3530                         return strtoull(value, 0, base);\r
3531                 #endif\r
3532         #endif\r
3533         }\r
3534 #endif\r
3535 \r
3536         // set value with conversion functions\r
3537         PUGI__FN bool set_value_buffer(char_t*& dest, uintptr_t& header, uintptr_t header_mask, char (&buf)[128])\r
3538         {\r
3539         #ifdef PUGIXML_WCHAR_MODE\r
3540                 char_t wbuf[128];\r
3541                 impl::widen_ascii(wbuf, buf);\r
3542 \r
3543                 return strcpy_insitu(dest, header, header_mask, wbuf);\r
3544         #else\r
3545                 return strcpy_insitu(dest, header, header_mask, buf);\r
3546         #endif\r
3547         }\r
3548 \r
3549         PUGI__FN bool set_value_convert(char_t*& dest, uintptr_t& header, uintptr_t header_mask, int value)\r
3550         {\r
3551                 char buf[128];\r
3552                 sprintf(buf, "%d", value);\r
3553         \r
3554                 return set_value_buffer(dest, header, header_mask, buf);\r
3555         }\r
3556 \r
3557         PUGI__FN bool set_value_convert(char_t*& dest, uintptr_t& header, uintptr_t header_mask, unsigned int value)\r
3558         {\r
3559                 char buf[128];\r
3560                 sprintf(buf, "%u", value);\r
3561 \r
3562                 return set_value_buffer(dest, header, header_mask, buf);\r
3563         }\r
3564 \r
3565         PUGI__FN bool set_value_convert(char_t*& dest, uintptr_t& header, uintptr_t header_mask, double value)\r
3566         {\r
3567                 char buf[128];\r
3568                 sprintf(buf, "%g", value);\r
3569 \r
3570                 return set_value_buffer(dest, header, header_mask, buf);\r
3571         }\r
3572         \r
3573         PUGI__FN bool set_value_convert(char_t*& dest, uintptr_t& header, uintptr_t header_mask, bool value)\r
3574         {\r
3575                 return strcpy_insitu(dest, header, header_mask, value ? PUGIXML_TEXT("true") : PUGIXML_TEXT("false"));\r
3576         }\r
3577 \r
3578 #ifdef PUGIXML_HAS_LONG_LONG\r
3579         PUGI__FN bool set_value_convert(char_t*& dest, uintptr_t& header, uintptr_t header_mask, long long value)\r
3580         {\r
3581                 char buf[128];\r
3582                 sprintf(buf, "%lld", value);\r
3583         \r
3584                 return set_value_buffer(dest, header, header_mask, buf);\r
3585         }\r
3586 \r
3587         PUGI__FN bool set_value_convert(char_t*& dest, uintptr_t& header, uintptr_t header_mask, unsigned long long value)\r
3588         {\r
3589                 char buf[128];\r
3590                 sprintf(buf, "%llu", value);\r
3591         \r
3592                 return set_value_buffer(dest, header, header_mask, buf);\r
3593         }\r
3594 #endif\r
3595 \r
3596         // we need to get length of entire file to load it in memory; the only (relatively) sane way to do it is via seek/tell trick\r
3597         PUGI__FN xml_parse_status get_file_size(FILE* file, size_t& out_result)\r
3598         {\r
3599         #if defined(PUGI__MSVC_CRT_VERSION) && PUGI__MSVC_CRT_VERSION >= 1400 && !defined(_WIN32_WCE)\r
3600                 // there are 64-bit versions of fseek/ftell, let's use them\r
3601                 typedef __int64 length_type;\r
3602 \r
3603                 _fseeki64(file, 0, SEEK_END);\r
3604                 length_type length = _ftelli64(file);\r
3605                 _fseeki64(file, 0, SEEK_SET);\r
3606         #elif defined(__MINGW32__) && !defined(__NO_MINGW_LFS) && !defined(__STRICT_ANSI__)\r
3607                 // there are 64-bit versions of fseek/ftell, let's use them\r
3608                 typedef off64_t length_type;\r
3609 \r
3610                 fseeko64(file, 0, SEEK_END);\r
3611                 length_type length = ftello64(file);\r
3612                 fseeko64(file, 0, SEEK_SET);\r
3613         #else\r
3614                 // if this is a 32-bit OS, long is enough; if this is a unix system, long is 64-bit, which is enough; otherwise we can't do anything anyway.\r
3615                 typedef long length_type;\r
3616 \r
3617                 fseek(file, 0, SEEK_END);\r
3618                 length_type length = ftell(file);\r
3619                 fseek(file, 0, SEEK_SET);\r
3620         #endif\r
3621 \r
3622                 // check for I/O errors\r
3623                 if (length < 0) return status_io_error;\r
3624                 \r
3625                 // check for overflow\r
3626                 size_t result = static_cast<size_t>(length);\r
3627 \r
3628                 if (static_cast<length_type>(result) != length) return status_out_of_memory;\r
3629 \r
3630                 // finalize\r
3631                 out_result = result;\r
3632 \r
3633                 return status_ok;\r
3634         }\r
3635 \r
3636         PUGI__FN size_t zero_terminate_buffer(void* buffer, size_t size, xml_encoding encoding) \r
3637         {\r
3638                 // We only need to zero-terminate if encoding conversion does not do it for us\r
3639         #ifdef PUGIXML_WCHAR_MODE\r
3640                 xml_encoding wchar_encoding = get_wchar_encoding();\r
3641 \r
3642                 if (encoding == wchar_encoding || need_endian_swap_utf(encoding, wchar_encoding))\r
3643                 {\r
3644                         size_t length = size / sizeof(char_t);\r
3645 \r
3646                         static_cast<char_t*>(buffer)[length] = 0;\r
3647                         return (length + 1) * sizeof(char_t);\r
3648                 }\r
3649         #else\r
3650                 if (encoding == encoding_utf8)\r
3651                 {\r
3652                         static_cast<char*>(buffer)[size] = 0;\r
3653                         return size + 1;\r
3654                 }\r
3655         #endif\r
3656 \r
3657                 return size;\r
3658         }\r
3659 \r
3660         PUGI__FN xml_parse_result load_file_impl(xml_document& doc, FILE* file, unsigned int options, xml_encoding encoding)\r
3661         {\r
3662                 if (!file) return make_parse_result(status_file_not_found);\r
3663 \r
3664                 // get file size (can result in I/O errors)\r
3665                 size_t size = 0;\r
3666                 xml_parse_status size_status = get_file_size(file, size);\r
3667 \r
3668                 if (size_status != status_ok)\r
3669                 {\r
3670                         fclose(file);\r
3671                         return make_parse_result(size_status);\r
3672                 }\r
3673                 \r
3674                 size_t max_suffix_size = sizeof(char_t);\r
3675 \r
3676                 // allocate buffer for the whole file\r
3677                 char* contents = static_cast<char*>(xml_memory::allocate(size + max_suffix_size));\r
3678 \r
3679                 if (!contents)\r
3680                 {\r
3681                         fclose(file);\r
3682                         return make_parse_result(status_out_of_memory);\r
3683                 }\r
3684 \r
3685                 // read file in memory\r
3686                 size_t read_size = fread(contents, 1, size, file);\r
3687                 fclose(file);\r
3688 \r
3689                 if (read_size != size)\r
3690                 {\r
3691                         xml_memory::deallocate(contents);\r
3692                         return make_parse_result(status_io_error);\r
3693                 }\r
3694 \r
3695                 xml_encoding real_encoding = get_buffer_encoding(encoding, contents, size);\r
3696                 \r
3697                 return doc.load_buffer_inplace_own(contents, zero_terminate_buffer(contents, size, real_encoding), options, real_encoding);\r
3698         }\r
3699 \r
3700 #ifndef PUGIXML_NO_STL\r
3701         template <typename T> struct xml_stream_chunk\r
3702         {\r
3703                 static xml_stream_chunk* create()\r
3704                 {\r
3705                         void* memory = xml_memory::allocate(sizeof(xml_stream_chunk));\r
3706                         \r
3707                         return new (memory) xml_stream_chunk();\r
3708                 }\r
3709 \r
3710                 static void destroy(void* ptr)\r
3711                 {\r
3712                         xml_stream_chunk* chunk = static_cast<xml_stream_chunk*>(ptr);\r
3713 \r
3714                         // free chunk chain\r
3715                         while (chunk)\r
3716                         {\r
3717                                 xml_stream_chunk* next = chunk->next;\r
3718                                 xml_memory::deallocate(chunk);\r
3719                                 chunk = next;\r
3720                         }\r
3721                 }\r
3722 \r
3723                 xml_stream_chunk(): next(0), size(0)\r
3724                 {\r
3725                 }\r
3726 \r
3727                 xml_stream_chunk* next;\r
3728                 size_t size;\r
3729 \r
3730                 T data[xml_memory_page_size / sizeof(T)];\r
3731         };\r
3732 \r
3733         template <typename T> PUGI__FN xml_parse_status load_stream_data_noseek(std::basic_istream<T>& stream, void** out_buffer, size_t* out_size)\r
3734         {\r
3735                 buffer_holder chunks(0, xml_stream_chunk<T>::destroy);\r
3736 \r
3737                 // read file to a chunk list\r
3738                 size_t total = 0;\r
3739                 xml_stream_chunk<T>* last = 0;\r
3740 \r
3741                 while (!stream.eof())\r
3742                 {\r
3743                         // allocate new chunk\r
3744                         xml_stream_chunk<T>* chunk = xml_stream_chunk<T>::create();\r
3745                         if (!chunk) return status_out_of_memory;\r
3746 \r
3747                         // append chunk to list\r
3748                         if (last) last = last->next = chunk;\r
3749                         else chunks.data = last = chunk;\r
3750 \r
3751                         // read data to chunk\r
3752                         stream.read(chunk->data, static_cast<std::streamsize>(sizeof(chunk->data) / sizeof(T)));\r
3753                         chunk->size = static_cast<size_t>(stream.gcount()) * sizeof(T);\r
3754 \r
3755                         // read may set failbit | eofbit in case gcount() is less than read length, so check for other I/O errors\r
3756                         if (stream.bad() || (!stream.eof() && stream.fail())) return status_io_error;\r
3757 \r
3758                         // guard against huge files (chunk size is small enough to make this overflow check work)\r
3759                         if (total + chunk->size < total) return status_out_of_memory;\r
3760                         total += chunk->size;\r
3761                 }\r
3762 \r
3763                 size_t max_suffix_size = sizeof(char_t);\r
3764 \r
3765                 // copy chunk list to a contiguous buffer\r
3766                 char* buffer = static_cast<char*>(xml_memory::allocate(total + max_suffix_size));\r
3767                 if (!buffer) return status_out_of_memory;\r
3768 \r
3769                 char* write = buffer;\r
3770 \r
3771                 for (xml_stream_chunk<T>* chunk = static_cast<xml_stream_chunk<T>*>(chunks.data); chunk; chunk = chunk->next)\r
3772                 {\r
3773                         assert(write + chunk->size <= buffer + total);\r
3774                         memcpy(write, chunk->data, chunk->size);\r
3775                         write += chunk->size;\r
3776                 }\r
3777 \r
3778                 assert(write == buffer + total);\r
3779 \r
3780                 // return buffer\r
3781                 *out_buffer = buffer;\r
3782                 *out_size = total;\r
3783 \r
3784                 return status_ok;\r
3785         }\r
3786 \r
3787         template <typename T> PUGI__FN xml_parse_status load_stream_data_seek(std::basic_istream<T>& stream, void** out_buffer, size_t* out_size)\r
3788         {\r
3789                 // get length of remaining data in stream\r
3790                 typename std::basic_istream<T>::pos_type pos = stream.tellg();\r
3791                 stream.seekg(0, std::ios::end);\r
3792                 std::streamoff length = stream.tellg() - pos;\r
3793                 stream.seekg(pos);\r
3794 \r
3795                 if (stream.fail() || pos < 0) return status_io_error;\r
3796 \r
3797                 // guard against huge files\r
3798                 size_t read_length = static_cast<size_t>(length);\r
3799 \r
3800                 if (static_cast<std::streamsize>(read_length) != length || length < 0) return status_out_of_memory;\r
3801 \r
3802                 size_t max_suffix_size = sizeof(char_t);\r
3803 \r
3804                 // read stream data into memory (guard against stream exceptions with buffer holder)\r
3805                 buffer_holder buffer(xml_memory::allocate(read_length * sizeof(T) + max_suffix_size), xml_memory::deallocate);\r
3806                 if (!buffer.data) return status_out_of_memory;\r
3807 \r
3808                 stream.read(static_cast<T*>(buffer.data), static_cast<std::streamsize>(read_length));\r
3809 \r
3810                 // read may set failbit | eofbit in case gcount() is less than read_length (i.e. line ending conversion), so check for other I/O errors\r
3811                 if (stream.bad() || (!stream.eof() && stream.fail())) return status_io_error;\r
3812 \r
3813                 // return buffer\r
3814                 size_t actual_length = static_cast<size_t>(stream.gcount());\r
3815                 assert(actual_length <= read_length);\r
3816                 \r
3817                 *out_buffer = buffer.release();\r
3818                 *out_size = actual_length * sizeof(T);\r
3819 \r
3820                 return status_ok;\r
3821         }\r
3822 \r
3823         template <typename T> PUGI__FN xml_parse_result load_stream_impl(xml_document& doc, std::basic_istream<T>& stream, unsigned int options, xml_encoding encoding)\r
3824         {\r
3825                 void* buffer = 0;\r
3826                 size_t size = 0;\r
3827                 xml_parse_status status = status_ok;\r
3828 \r
3829                 // if stream has an error bit set, bail out (otherwise tellg() can fail and we'll clear error bits)\r
3830                 if (stream.fail()) return make_parse_result(status_io_error);\r
3831 \r
3832                 // load stream to memory (using seek-based implementation if possible, since it's faster and takes less memory)\r
3833                 if (stream.tellg() < 0)\r
3834                 {\r
3835                         stream.clear(); // clear error flags that could be set by a failing tellg\r
3836                         status = load_stream_data_noseek(stream, &buffer, &size);\r
3837                 }\r
3838                 else\r
3839                         status = load_stream_data_seek(stream, &buffer, &size);\r
3840 \r
3841                 if (status != status_ok) return make_parse_result(status);\r
3842 \r
3843                 xml_encoding real_encoding = get_buffer_encoding(encoding, buffer, size);\r
3844                 \r
3845                 return doc.load_buffer_inplace_own(buffer, zero_terminate_buffer(buffer, size, real_encoding), options, real_encoding);\r
3846         }\r
3847 #endif\r
3848 \r
3849 #if defined(PUGI__MSVC_CRT_VERSION) || defined(__BORLANDC__) || (defined(__MINGW32__) && !defined(__STRICT_ANSI__))\r
3850         PUGI__FN FILE* open_file_wide(const wchar_t* path, const wchar_t* mode)\r
3851         {\r
3852                 return _wfopen(path, mode);\r
3853         }\r
3854 #else\r
3855         PUGI__FN char* convert_path_heap(const wchar_t* str)\r
3856         {\r
3857                 assert(str);\r
3858 \r
3859                 // first pass: get length in utf8 characters\r
3860                 size_t length = strlength_wide(str);\r
3861                 size_t size = as_utf8_begin(str, length);\r
3862 \r
3863                 // allocate resulting string\r
3864                 char* result = static_cast<char*>(xml_memory::allocate(size + 1));\r
3865                 if (!result) return 0;\r
3866 \r
3867                 // second pass: convert to utf8\r
3868                 as_utf8_end(result, size, str, length);\r
3869 \r
3870                 return result;\r
3871         }\r
3872 \r
3873         PUGI__FN FILE* open_file_wide(const wchar_t* path, const wchar_t* mode)\r
3874         {\r
3875                 // there is no standard function to open wide paths, so our best bet is to try utf8 path\r
3876                 char* path_utf8 = convert_path_heap(path);\r
3877                 if (!path_utf8) return 0;\r
3878 \r
3879                 // convert mode to ASCII (we mirror _wfopen interface)\r
3880                 char mode_ascii[4] = {0};\r
3881                 for (size_t i = 0; mode[i]; ++i) mode_ascii[i] = static_cast<char>(mode[i]);\r
3882 \r
3883                 // try to open the utf8 path\r
3884                 FILE* result = fopen(path_utf8, mode_ascii);\r
3885 \r
3886                 // free dummy buffer\r
3887                 xml_memory::deallocate(path_utf8);\r
3888 \r
3889                 return result;\r
3890         }\r
3891 #endif\r
3892 \r
3893         PUGI__FN bool save_file_impl(const xml_document& doc, FILE* file, const char_t* indent, unsigned int flags, xml_encoding encoding)\r
3894         {\r
3895                 if (!file) return false;\r
3896 \r
3897                 xml_writer_file writer(file);\r
3898                 doc.save(writer, indent, flags, encoding);\r
3899 \r
3900                 int result = ferror(file);\r
3901 \r
3902                 fclose(file);\r
3903 \r
3904                 return result == 0;\r
3905         }\r
3906 \r
3907         PUGI__FN xml_parse_result load_buffer_impl(xml_document_struct* doc, xml_node_struct* root, void* contents, size_t size, unsigned int options, xml_encoding encoding, bool is_mutable, bool own, char_t** out_buffer)\r
3908         {\r
3909                 // check input buffer\r
3910                 assert(contents || size == 0);\r
3911 \r
3912                 // get actual encoding\r
3913                 xml_encoding buffer_encoding = impl::get_buffer_encoding(encoding, contents, size);\r
3914 \r
3915                 // get private buffer\r
3916                 char_t* buffer = 0;\r
3917                 size_t length = 0;\r
3918 \r
3919                 if (!impl::convert_buffer(buffer, length, buffer_encoding, contents, size, is_mutable)) return impl::make_parse_result(status_out_of_memory);\r
3920                 \r
3921                 // delete original buffer if we performed a conversion\r
3922                 if (own && buffer != contents && contents) impl::xml_memory::deallocate(contents);\r
3923 \r
3924                 // store buffer for offset_debug\r
3925                 doc->buffer = buffer;\r
3926 \r
3927                 // parse\r
3928                 xml_parse_result res = impl::xml_parser::parse(buffer, length, doc, root, options);\r
3929 \r
3930                 // remember encoding\r
3931                 res.encoding = buffer_encoding;\r
3932 \r
3933                 // grab onto buffer if it's our buffer, user is responsible for deallocating contents himself\r
3934                 if (own || buffer != contents) *out_buffer = buffer;\r
3935 \r
3936                 return res;\r
3937         }\r
3938 PUGI__NS_END\r
3939 \r
3940 namespace pugi\r
3941 {\r
3942         PUGI__FN xml_writer_file::xml_writer_file(void* file_): file(file_)\r
3943         {\r
3944         }\r
3945 \r
3946         PUGI__FN void xml_writer_file::write(const void* data, size_t size)\r
3947         {\r
3948                 size_t result = fwrite(data, 1, size, static_cast<FILE*>(file));\r
3949                 (void)!result; // unfortunately we can't do proper error handling here\r
3950         }\r
3951 \r
3952 #ifndef PUGIXML_NO_STL\r
3953         PUGI__FN xml_writer_stream::xml_writer_stream(std::basic_ostream<char, std::char_traits<char> >& stream): narrow_stream(&stream), wide_stream(0)\r
3954         {\r
3955         }\r
3956 \r
3957         PUGI__FN xml_writer_stream::xml_writer_stream(std::basic_ostream<wchar_t, std::char_traits<wchar_t> >& stream): narrow_stream(0), wide_stream(&stream)\r
3958         {\r
3959         }\r
3960 \r
3961         PUGI__FN void xml_writer_stream::write(const void* data, size_t size)\r
3962         {\r
3963                 if (narrow_stream)\r
3964                 {\r
3965                         assert(!wide_stream);\r
3966                         narrow_stream->write(reinterpret_cast<const char*>(data), static_cast<std::streamsize>(size));\r
3967                 }\r
3968                 else\r
3969                 {\r
3970                         assert(wide_stream);\r
3971                         assert(size % sizeof(wchar_t) == 0);\r
3972 \r
3973                         wide_stream->write(reinterpret_cast<const wchar_t*>(data), static_cast<std::streamsize>(size / sizeof(wchar_t)));\r
3974                 }\r
3975         }\r
3976 #endif\r
3977 \r
3978         PUGI__FN xml_tree_walker::xml_tree_walker(): _depth(0)\r
3979         {\r
3980         }\r
3981         \r
3982         PUGI__FN xml_tree_walker::~xml_tree_walker()\r
3983         {\r
3984         }\r
3985 \r
3986         PUGI__FN int xml_tree_walker::depth() const\r
3987         {\r
3988                 return _depth;\r
3989         }\r
3990 \r
3991         PUGI__FN bool xml_tree_walker::begin(xml_node&)\r
3992         {\r
3993                 return true;\r
3994         }\r
3995 \r
3996         PUGI__FN bool xml_tree_walker::end(xml_node&)\r
3997         {\r
3998                 return true;\r
3999         }\r
4000 \r
4001         PUGI__FN xml_attribute::xml_attribute(): _attr(0)\r
4002         {\r
4003         }\r
4004 \r
4005         PUGI__FN xml_attribute::xml_attribute(xml_attribute_struct* attr): _attr(attr)\r
4006         {\r
4007         }\r
4008 \r
4009         PUGI__FN static void unspecified_bool_xml_attribute(xml_attribute***)\r
4010         {\r
4011         }\r
4012 \r
4013         PUGI__FN xml_attribute::operator xml_attribute::unspecified_bool_type() const\r
4014         {\r
4015                 return _attr ? unspecified_bool_xml_attribute : 0;\r
4016         }\r
4017 \r
4018         PUGI__FN bool xml_attribute::operator!() const\r
4019         {\r
4020                 return !_attr;\r
4021         }\r
4022 \r
4023         PUGI__FN bool xml_attribute::operator==(const xml_attribute& r) const\r
4024         {\r
4025                 return (_attr == r._attr);\r
4026         }\r
4027         \r
4028         PUGI__FN bool xml_attribute::operator!=(const xml_attribute& r) const\r
4029         {\r
4030                 return (_attr != r._attr);\r
4031         }\r
4032 \r
4033         PUGI__FN bool xml_attribute::operator<(const xml_attribute& r) const\r
4034         {\r
4035                 return (_attr < r._attr);\r
4036         }\r
4037         \r
4038         PUGI__FN bool xml_attribute::operator>(const xml_attribute& r) const\r
4039         {\r
4040                 return (_attr > r._attr);\r
4041         }\r
4042         \r
4043         PUGI__FN bool xml_attribute::operator<=(const xml_attribute& r) const\r
4044         {\r
4045                 return (_attr <= r._attr);\r
4046         }\r
4047         \r
4048         PUGI__FN bool xml_attribute::operator>=(const xml_attribute& r) const\r
4049         {\r
4050                 return (_attr >= r._attr);\r
4051         }\r
4052 \r
4053         PUGI__FN xml_attribute xml_attribute::next_attribute() const\r
4054         {\r
4055                 return _attr ? xml_attribute(_attr->next_attribute) : xml_attribute();\r
4056         }\r
4057 \r
4058         PUGI__FN xml_attribute xml_attribute::previous_attribute() const\r
4059         {\r
4060                 return _attr && _attr->prev_attribute_c->next_attribute ? xml_attribute(_attr->prev_attribute_c) : xml_attribute();\r
4061         }\r
4062 \r
4063         PUGI__FN const char_t* xml_attribute::as_string(const char_t* def) const\r
4064         {\r
4065                 return (_attr && _attr->value) ? _attr->value : def;\r
4066         }\r
4067 \r
4068         PUGI__FN int xml_attribute::as_int(int def) const\r
4069         {\r
4070                 return impl::get_value_int(_attr ? _attr->value : 0, def);\r
4071         }\r
4072 \r
4073         PUGI__FN unsigned int xml_attribute::as_uint(unsigned int def) const\r
4074         {\r
4075                 return impl::get_value_uint(_attr ? _attr->value : 0, def);\r
4076         }\r
4077 \r
4078         PUGI__FN double xml_attribute::as_double(double def) const\r
4079         {\r
4080                 return impl::get_value_double(_attr ? _attr->value : 0, def);\r
4081         }\r
4082 \r
4083         PUGI__FN float xml_attribute::as_float(float def) const\r
4084         {\r
4085                 return impl::get_value_float(_attr ? _attr->value : 0, def);\r
4086         }\r
4087 \r
4088         PUGI__FN bool xml_attribute::as_bool(bool def) const\r
4089         {\r
4090                 return impl::get_value_bool(_attr ? _attr->value : 0, def);\r
4091         }\r
4092 \r
4093 #ifdef PUGIXML_HAS_LONG_LONG\r
4094         PUGI__FN long long xml_attribute::as_llong(long long def) const\r
4095         {\r
4096                 return impl::get_value_llong(_attr ? _attr->value : 0, def);\r
4097         }\r
4098 \r
4099         PUGI__FN unsigned long long xml_attribute::as_ullong(unsigned long long def) const\r
4100         {\r
4101                 return impl::get_value_ullong(_attr ? _attr->value : 0, def);\r
4102         }\r
4103 #endif\r
4104 \r
4105         PUGI__FN bool xml_attribute::empty() const\r
4106         {\r
4107                 return !_attr;\r
4108         }\r
4109 \r
4110         PUGI__FN const char_t* xml_attribute::name() const\r
4111         {\r
4112                 return (_attr && _attr->name) ? _attr->name : PUGIXML_TEXT("");\r
4113         }\r
4114 \r
4115         PUGI__FN const char_t* xml_attribute::value() const\r
4116         {\r
4117                 return (_attr && _attr->value) ? _attr->value : PUGIXML_TEXT("");\r
4118         }\r
4119 \r
4120         PUGI__FN size_t xml_attribute::hash_value() const\r
4121         {\r
4122                 return static_cast<size_t>(reinterpret_cast<uintptr_t>(_attr) / sizeof(xml_attribute_struct));\r
4123         }\r
4124 \r
4125         PUGI__FN xml_attribute_struct* xml_attribute::internal_object() const\r
4126         {\r
4127                 return _attr;\r
4128         }\r
4129 \r
4130         PUGI__FN xml_attribute& xml_attribute::operator=(const char_t* rhs)\r
4131         {\r
4132                 set_value(rhs);\r
4133                 return *this;\r
4134         }\r
4135         \r
4136         PUGI__FN xml_attribute& xml_attribute::operator=(int rhs)\r
4137         {\r
4138                 set_value(rhs);\r
4139                 return *this;\r
4140         }\r
4141 \r
4142         PUGI__FN xml_attribute& xml_attribute::operator=(unsigned int rhs)\r
4143         {\r
4144                 set_value(rhs);\r
4145                 return *this;\r
4146         }\r
4147 \r
4148         PUGI__FN xml_attribute& xml_attribute::operator=(double rhs)\r
4149         {\r
4150                 set_value(rhs);\r
4151                 return *this;\r
4152         }\r
4153         \r
4154         PUGI__FN xml_attribute& xml_attribute::operator=(bool rhs)\r
4155         {\r
4156                 set_value(rhs);\r
4157                 return *this;\r
4158         }\r
4159 \r
4160 #ifdef PUGIXML_HAS_LONG_LONG\r
4161         PUGI__FN xml_attribute& xml_attribute::operator=(long long rhs)\r
4162         {\r
4163                 set_value(rhs);\r
4164                 return *this;\r
4165         }\r
4166 \r
4167         PUGI__FN xml_attribute& xml_attribute::operator=(unsigned long long rhs)\r
4168         {\r
4169                 set_value(rhs);\r
4170                 return *this;\r
4171         }\r
4172 #endif\r
4173 \r
4174         PUGI__FN bool xml_attribute::set_name(const char_t* rhs)\r
4175         {\r
4176                 if (!_attr) return false;\r
4177                 \r
4178                 return impl::strcpy_insitu(_attr->name, _attr->header, impl::xml_memory_page_name_allocated_mask, rhs);\r
4179         }\r
4180                 \r
4181         PUGI__FN bool xml_attribute::set_value(const char_t* rhs)\r
4182         {\r
4183                 if (!_attr) return false;\r
4184 \r
4185                 return impl::strcpy_insitu(_attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask, rhs);\r
4186         }\r
4187 \r
4188         PUGI__FN bool xml_attribute::set_value(int rhs)\r
4189         {\r
4190                 if (!_attr) return false;\r
4191 \r
4192                 return impl::set_value_convert(_attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask, rhs);\r
4193         }\r
4194 \r
4195         PUGI__FN bool xml_attribute::set_value(unsigned int rhs)\r
4196         {\r
4197                 if (!_attr) return false;\r
4198 \r
4199                 return impl::set_value_convert(_attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask, rhs);\r
4200         }\r
4201 \r
4202         PUGI__FN bool xml_attribute::set_value(double rhs)\r
4203         {\r
4204                 if (!_attr) return false;\r
4205 \r
4206                 return impl::set_value_convert(_attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask, rhs);\r
4207         }\r
4208         \r
4209         PUGI__FN bool xml_attribute::set_value(bool rhs)\r
4210         {\r
4211                 if (!_attr) return false;\r
4212 \r
4213                 return impl::set_value_convert(_attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask, rhs);\r
4214         }\r
4215 \r
4216 #ifdef PUGIXML_HAS_LONG_LONG\r
4217         PUGI__FN bool xml_attribute::set_value(long long rhs)\r
4218         {\r
4219                 if (!_attr) return false;\r
4220 \r
4221                 return impl::set_value_convert(_attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask, rhs);\r
4222         }\r
4223 \r
4224         PUGI__FN bool xml_attribute::set_value(unsigned long long rhs)\r
4225         {\r
4226                 if (!_attr) return false;\r
4227 \r
4228                 return impl::set_value_convert(_attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask, rhs);\r
4229         }\r
4230 #endif\r
4231 \r
4232 #ifdef __BORLANDC__\r
4233         PUGI__FN bool operator&&(const xml_attribute& lhs, bool rhs)\r
4234         {\r
4235                 return (bool)lhs && rhs;\r
4236         }\r
4237 \r
4238         PUGI__FN bool operator||(const xml_attribute& lhs, bool rhs)\r
4239         {\r
4240                 return (bool)lhs || rhs;\r
4241         }\r
4242 #endif\r
4243 \r
4244         PUGI__FN xml_node::xml_node(): _root(0)\r
4245         {\r
4246         }\r
4247 \r
4248         PUGI__FN xml_node::xml_node(xml_node_struct* p): _root(p)\r
4249         {\r
4250         }\r
4251         \r
4252         PUGI__FN static void unspecified_bool_xml_node(xml_node***)\r
4253         {\r
4254         }\r
4255 \r
4256         PUGI__FN xml_node::operator xml_node::unspecified_bool_type() const\r
4257         {\r
4258                 return _root ? unspecified_bool_xml_node : 0;\r
4259         }\r
4260 \r
4261         PUGI__FN bool xml_node::operator!() const\r
4262         {\r
4263                 return !_root;\r
4264         }\r
4265 \r
4266         PUGI__FN xml_node::iterator xml_node::begin() const\r
4267         {\r
4268                 return iterator(_root ? _root->first_child : 0, _root);\r
4269         }\r
4270 \r
4271         PUGI__FN xml_node::iterator xml_node::end() const\r
4272         {\r
4273                 return iterator(0, _root);\r
4274         }\r
4275         \r
4276         PUGI__FN xml_node::attribute_iterator xml_node::attributes_begin() const\r
4277         {\r
4278                 return attribute_iterator(_root ? _root->first_attribute : 0, _root);\r
4279         }\r
4280 \r
4281         PUGI__FN xml_node::attribute_iterator xml_node::attributes_end() const\r
4282         {\r
4283                 return attribute_iterator(0, _root);\r
4284         }\r
4285         \r
4286         PUGI__FN xml_object_range<xml_node_iterator> xml_node::children() const\r
4287         {\r
4288                 return xml_object_range<xml_node_iterator>(begin(), end());\r
4289         }\r
4290 \r
4291         PUGI__FN xml_object_range<xml_named_node_iterator> xml_node::children(const char_t* name_) const\r
4292         {\r
4293                 return xml_object_range<xml_named_node_iterator>(xml_named_node_iterator(child(name_)._root, _root, name_), xml_named_node_iterator(0, _root, name_));\r
4294         }\r
4295 \r
4296         PUGI__FN xml_object_range<xml_attribute_iterator> xml_node::attributes() const\r
4297         {\r
4298                 return xml_object_range<xml_attribute_iterator>(attributes_begin(), attributes_end());\r
4299         }\r
4300 \r
4301         PUGI__FN bool xml_node::operator==(const xml_node& r) const\r
4302         {\r
4303                 return (_root == r._root);\r
4304         }\r
4305 \r
4306         PUGI__FN bool xml_node::operator!=(const xml_node& r) const\r
4307         {\r
4308                 return (_root != r._root);\r
4309         }\r
4310 \r
4311         PUGI__FN bool xml_node::operator<(const xml_node& r) const\r
4312         {\r
4313                 return (_root < r._root);\r
4314         }\r
4315         \r
4316         PUGI__FN bool xml_node::operator>(const xml_node& r) const\r
4317         {\r
4318                 return (_root > r._root);\r
4319         }\r
4320         \r
4321         PUGI__FN bool xml_node::operator<=(const xml_node& r) const\r
4322         {\r
4323                 return (_root <= r._root);\r
4324         }\r
4325         \r
4326         PUGI__FN bool xml_node::operator>=(const xml_node& r) const\r
4327         {\r
4328                 return (_root >= r._root);\r
4329         }\r
4330 \r
4331         PUGI__FN bool xml_node::empty() const\r
4332         {\r
4333                 return !_root;\r
4334         }\r
4335         \r
4336         PUGI__FN const char_t* xml_node::name() const\r
4337         {\r
4338                 return (_root && _root->name) ? _root->name : PUGIXML_TEXT("");\r
4339         }\r
4340 \r
4341         PUGI__FN xml_node_type xml_node::type() const\r
4342         {\r
4343                 return _root ? static_cast<xml_node_type>((_root->header & impl::xml_memory_page_type_mask) + 1) : node_null;\r
4344         }\r
4345         \r
4346         PUGI__FN const char_t* xml_node::value() const\r
4347         {\r
4348                 return (_root && _root->value) ? _root->value : PUGIXML_TEXT("");\r
4349         }\r
4350         \r
4351         PUGI__FN xml_node xml_node::child(const char_t* name_) const\r
4352         {\r
4353                 if (!_root) return xml_node();\r
4354 \r
4355                 for (xml_node_struct* i = _root->first_child; i; i = i->next_sibling)\r
4356                         if (i->name && impl::strequal(name_, i->name)) return xml_node(i);\r
4357 \r
4358                 return xml_node();\r
4359         }\r
4360 \r
4361         PUGI__FN xml_attribute xml_node::attribute(const char_t* name_) const\r
4362         {\r
4363                 if (!_root) return xml_attribute();\r
4364 \r
4365                 for (xml_attribute_struct* i = _root->first_attribute; i; i = i->next_attribute)\r
4366                         if (i->name && impl::strequal(name_, i->name))\r
4367                                 return xml_attribute(i);\r
4368                 \r
4369                 return xml_attribute();\r
4370         }\r
4371         \r
4372         PUGI__FN xml_node xml_node::next_sibling(const char_t* name_) const\r
4373         {\r
4374                 if (!_root) return xml_node();\r
4375                 \r
4376                 for (xml_node_struct* i = _root->next_sibling; i; i = i->next_sibling)\r
4377                         if (i->name && impl::strequal(name_, i->name)) return xml_node(i);\r
4378 \r
4379                 return xml_node();\r
4380         }\r
4381 \r
4382         PUGI__FN xml_node xml_node::next_sibling() const\r
4383         {\r
4384                 if (!_root) return xml_node();\r
4385                 \r
4386                 if (_root->next_sibling) return xml_node(_root->next_sibling);\r
4387                 else return xml_node();\r
4388         }\r
4389 \r
4390         PUGI__FN xml_node xml_node::previous_sibling(const char_t* name_) const\r
4391         {\r
4392                 if (!_root) return xml_node();\r
4393                 \r
4394                 for (xml_node_struct* i = _root->prev_sibling_c; i->next_sibling; i = i->prev_sibling_c)\r
4395                         if (i->name && impl::strequal(name_, i->name)) return xml_node(i);\r
4396 \r
4397                 return xml_node();\r
4398         }\r
4399 \r
4400         PUGI__FN xml_node xml_node::previous_sibling() const\r
4401         {\r
4402                 if (!_root) return xml_node();\r
4403                 \r
4404                 if (_root->prev_sibling_c->next_sibling) return xml_node(_root->prev_sibling_c);\r
4405                 else return xml_node();\r
4406         }\r
4407 \r
4408         PUGI__FN xml_node xml_node::parent() const\r
4409         {\r
4410                 return _root ? xml_node(_root->parent) : xml_node();\r
4411         }\r
4412 \r
4413         PUGI__FN xml_node xml_node::root() const\r
4414         {\r
4415                 if (!_root) return xml_node();\r
4416 \r
4417                 impl::xml_memory_page* page = reinterpret_cast<impl::xml_memory_page*>(_root->header & impl::xml_memory_page_pointer_mask);\r
4418 \r
4419                 return xml_node(static_cast<impl::xml_document_struct*>(page->allocator));\r
4420         }\r
4421 \r
4422         PUGI__FN xml_text xml_node::text() const\r
4423         {\r
4424                 return xml_text(_root);\r
4425         }\r
4426 \r
4427         PUGI__FN const char_t* xml_node::child_value() const\r
4428         {\r
4429                 if (!_root) return PUGIXML_TEXT("");\r
4430                 \r
4431                 for (xml_node_struct* i = _root->first_child; i; i = i->next_sibling)\r
4432                         if (i->value && impl::is_text_node(i))\r
4433                                 return i->value;\r
4434 \r
4435                 return PUGIXML_TEXT("");\r
4436         }\r
4437 \r
4438         PUGI__FN const char_t* xml_node::child_value(const char_t* name_) const\r
4439         {\r
4440                 return child(name_).child_value();\r
4441         }\r
4442 \r
4443         PUGI__FN xml_attribute xml_node::first_attribute() const\r
4444         {\r
4445                 return _root ? xml_attribute(_root->first_attribute) : xml_attribute();\r
4446         }\r
4447 \r
4448         PUGI__FN xml_attribute xml_node::last_attribute() const\r
4449         {\r
4450                 return _root && _root->first_attribute ? xml_attribute(_root->first_attribute->prev_attribute_c) : xml_attribute();\r
4451         }\r
4452 \r
4453         PUGI__FN xml_node xml_node::first_child() const\r
4454         {\r
4455                 return _root ? xml_node(_root->first_child) : xml_node();\r
4456         }\r
4457 \r
4458         PUGI__FN xml_node xml_node::last_child() const\r
4459         {\r
4460                 return _root && _root->first_child ? xml_node(_root->first_child->prev_sibling_c) : xml_node();\r
4461         }\r
4462 \r
4463         PUGI__FN bool xml_node::set_name(const char_t* rhs)\r
4464         {\r
4465                 switch (type())\r
4466                 {\r
4467                 case node_pi:\r
4468                 case node_declaration:\r
4469                 case node_element:\r
4470                         return impl::strcpy_insitu(_root->name, _root->header, impl::xml_memory_page_name_allocated_mask, rhs);\r
4471 \r
4472                 default:\r
4473                         return false;\r
4474                 }\r
4475         }\r
4476                 \r
4477         PUGI__FN bool xml_node::set_value(const char_t* rhs)\r
4478         {\r
4479                 switch (type())\r
4480                 {\r
4481                 case node_pi:\r
4482                 case node_cdata:\r
4483                 case node_pcdata:\r
4484                 case node_comment:\r
4485                 case node_doctype:\r
4486                         return impl::strcpy_insitu(_root->value, _root->header, impl::xml_memory_page_value_allocated_mask, rhs);\r
4487 \r
4488                 default:\r
4489                         return false;\r
4490                 }\r
4491         }\r
4492 \r
4493         PUGI__FN xml_attribute xml_node::append_attribute(const char_t* name_)\r
4494         {\r
4495                 if (type() != node_element && type() != node_declaration) return xml_attribute();\r
4496                 \r
4497                 xml_attribute a(impl::append_attribute_ll(_root, impl::get_allocator(_root)));\r
4498                 a.set_name(name_);\r
4499                 \r
4500                 return a;\r
4501         }\r
4502 \r
4503         PUGI__FN xml_attribute xml_node::prepend_attribute(const char_t* name_)\r
4504         {\r
4505                 if (type() != node_element && type() != node_declaration) return xml_attribute();\r
4506                 \r
4507                 xml_attribute a(impl::allocate_attribute(impl::get_allocator(_root)));\r
4508                 if (!a) return xml_attribute();\r
4509 \r
4510                 a.set_name(name_);\r
4511                 \r
4512                 xml_attribute_struct* head = _root->first_attribute;\r
4513 \r
4514                 if (head)\r
4515                 {\r
4516                         a._attr->prev_attribute_c = head->prev_attribute_c;\r
4517                         head->prev_attribute_c = a._attr;\r
4518                 }\r
4519                 else\r
4520                         a._attr->prev_attribute_c = a._attr;\r
4521                 \r
4522                 a._attr->next_attribute = head;\r
4523                 _root->first_attribute = a._attr;\r
4524                                 \r
4525                 return a;\r
4526         }\r
4527 \r
4528         PUGI__FN xml_attribute xml_node::insert_attribute_before(const char_t* name_, const xml_attribute& attr)\r
4529         {\r
4530                 if ((type() != node_element && type() != node_declaration) || attr.empty()) return xml_attribute();\r
4531                 \r
4532                 // check that attribute belongs to *this\r
4533                 xml_attribute_struct* cur = attr._attr;\r
4534 \r
4535                 while (cur->prev_attribute_c->next_attribute) cur = cur->prev_attribute_c;\r
4536 \r
4537                 if (cur != _root->first_attribute) return xml_attribute();\r
4538 \r
4539                 xml_attribute a(impl::allocate_attribute(impl::get_allocator(_root)));\r
4540                 if (!a) return xml_attribute();\r
4541 \r
4542                 a.set_name(name_);\r
4543 \r
4544                 if (attr._attr->prev_attribute_c->next_attribute)\r
4545                         attr._attr->prev_attribute_c->next_attribute = a._attr;\r
4546                 else\r
4547                         _root->first_attribute = a._attr;\r
4548                 \r
4549                 a._attr->prev_attribute_c = attr._attr->prev_attribute_c;\r
4550                 a._attr->next_attribute = attr._attr;\r
4551                 attr._attr->prev_attribute_c = a._attr;\r
4552                                 \r
4553                 return a;\r
4554         }\r
4555 \r
4556         PUGI__FN xml_attribute xml_node::insert_attribute_after(const char_t* name_, const xml_attribute& attr)\r
4557         {\r
4558                 if ((type() != node_element && type() != node_declaration) || attr.empty()) return xml_attribute();\r
4559                 \r
4560                 // check that attribute belongs to *this\r
4561                 xml_attribute_struct* cur = attr._attr;\r
4562 \r
4563                 while (cur->prev_attribute_c->next_attribute) cur = cur->prev_attribute_c;\r
4564 \r
4565                 if (cur != _root->first_attribute) return xml_attribute();\r
4566 \r
4567                 xml_attribute a(impl::allocate_attribute(impl::get_allocator(_root)));\r
4568                 if (!a) return xml_attribute();\r
4569 \r
4570                 a.set_name(name_);\r
4571 \r
4572                 if (attr._attr->next_attribute)\r
4573                         attr._attr->next_attribute->prev_attribute_c = a._attr;\r
4574                 else\r
4575                         _root->first_attribute->prev_attribute_c = a._attr;\r
4576                 \r
4577                 a._attr->next_attribute = attr._attr->next_attribute;\r
4578                 a._attr->prev_attribute_c = attr._attr;\r
4579                 attr._attr->next_attribute = a._attr;\r
4580 \r
4581                 return a;\r
4582         }\r
4583 \r
4584         PUGI__FN xml_attribute xml_node::append_copy(const xml_attribute& proto)\r
4585         {\r
4586                 if (!proto) return xml_attribute();\r
4587 \r
4588                 xml_attribute result = append_attribute(proto.name());\r
4589                 result.set_value(proto.value());\r
4590 \r
4591                 return result;\r
4592         }\r
4593 \r
4594         PUGI__FN xml_attribute xml_node::prepend_copy(const xml_attribute& proto)\r
4595         {\r
4596                 if (!proto) return xml_attribute();\r
4597 \r
4598                 xml_attribute result = prepend_attribute(proto.name());\r
4599                 result.set_value(proto.value());\r
4600 \r
4601                 return result;\r
4602         }\r
4603 \r
4604         PUGI__FN xml_attribute xml_node::insert_copy_after(const xml_attribute& proto, const xml_attribute& attr)\r
4605         {\r
4606                 if (!proto) return xml_attribute();\r
4607 \r
4608                 xml_attribute result = insert_attribute_after(proto.name(), attr);\r
4609                 result.set_value(proto.value());\r
4610 \r
4611                 return result;\r
4612         }\r
4613 \r
4614         PUGI__FN xml_attribute xml_node::insert_copy_before(const xml_attribute& proto, const xml_attribute& attr)\r
4615         {\r
4616                 if (!proto) return xml_attribute();\r
4617 \r
4618                 xml_attribute result = insert_attribute_before(proto.name(), attr);\r
4619                 result.set_value(proto.value());\r
4620 \r
4621                 return result;\r
4622         }\r
4623 \r
4624         PUGI__FN xml_node xml_node::append_child(xml_node_type type_)\r
4625         {\r
4626                 if (!impl::allow_insert_child(this->type(), type_)) return xml_node();\r
4627                 \r
4628                 xml_node n(impl::append_node(_root, impl::get_allocator(_root), type_));\r
4629 \r
4630                 if (type_ == node_declaration) n.set_name(PUGIXML_TEXT("xml"));\r
4631 \r
4632                 return n;\r
4633         }\r
4634 \r
4635         PUGI__FN xml_node xml_node::prepend_child(xml_node_type type_)\r
4636         {\r
4637                 if (!impl::allow_insert_child(this->type(), type_)) return xml_node();\r
4638                 \r
4639                 xml_node n(impl::allocate_node(impl::get_allocator(_root), type_));\r
4640                 if (!n) return xml_node();\r
4641 \r
4642                 n._root->parent = _root;\r
4643 \r
4644                 xml_node_struct* head = _root->first_child;\r
4645 \r
4646                 if (head)\r
4647                 {\r
4648                         n._root->prev_sibling_c = head->prev_sibling_c;\r
4649                         head->prev_sibling_c = n._root;\r
4650                 }\r
4651                 else\r
4652                         n._root->prev_sibling_c = n._root;\r
4653                 \r
4654                 n._root->next_sibling = head;\r
4655                 _root->first_child = n._root;\r
4656                                 \r
4657                 if (type_ == node_declaration) n.set_name(PUGIXML_TEXT("xml"));\r
4658 \r
4659                 return n;\r
4660         }\r
4661 \r
4662         PUGI__FN xml_node xml_node::insert_child_before(xml_node_type type_, const xml_node& node)\r
4663         {\r
4664                 if (!impl::allow_insert_child(this->type(), type_)) return xml_node();\r
4665                 if (!node._root || node._root->parent != _root) return xml_node();\r
4666         \r
4667                 xml_node n(impl::allocate_node(impl::get_allocator(_root), type_));\r
4668                 if (!n) return xml_node();\r
4669 \r
4670                 n._root->parent = _root;\r
4671                 \r
4672                 if (node._root->prev_sibling_c->next_sibling)\r
4673                         node._root->prev_sibling_c->next_sibling = n._root;\r
4674                 else\r
4675                         _root->first_child = n._root;\r
4676                 \r
4677                 n._root->prev_sibling_c = node._root->prev_sibling_c;\r
4678                 n._root->next_sibling = node._root;\r
4679                 node._root->prev_sibling_c = n._root;\r
4680 \r
4681                 if (type_ == node_declaration) n.set_name(PUGIXML_TEXT("xml"));\r
4682 \r
4683                 return n;\r
4684         }\r
4685 \r
4686         PUGI__FN xml_node xml_node::insert_child_after(xml_node_type type_, const xml_node& node)\r
4687         {\r
4688                 if (!impl::allow_insert_child(this->type(), type_)) return xml_node();\r
4689                 if (!node._root || node._root->parent != _root) return xml_node();\r
4690         \r
4691                 xml_node n(impl::allocate_node(impl::get_allocator(_root), type_));\r
4692                 if (!n) return xml_node();\r
4693 \r
4694                 n._root->parent = _root;\r
4695         \r
4696                 if (node._root->next_sibling)\r
4697                         node._root->next_sibling->prev_sibling_c = n._root;\r
4698                 else\r
4699                         _root->first_child->prev_sibling_c = n._root;\r
4700                 \r
4701                 n._root->next_sibling = node._root->next_sibling;\r
4702                 n._root->prev_sibling_c = node._root;\r
4703                 node._root->next_sibling = n._root;\r
4704 \r
4705                 if (type_ == node_declaration) n.set_name(PUGIXML_TEXT("xml"));\r
4706 \r
4707                 return n;\r
4708         }\r
4709 \r
4710         PUGI__FN xml_node xml_node::append_child(const char_t* name_)\r
4711         {\r
4712                 xml_node result = append_child(node_element);\r
4713 \r
4714                 result.set_name(name_);\r
4715 \r
4716                 return result;\r
4717         }\r
4718 \r
4719         PUGI__FN xml_node xml_node::prepend_child(const char_t* name_)\r
4720         {\r
4721                 xml_node result = prepend_child(node_element);\r
4722 \r
4723                 result.set_name(name_);\r
4724 \r
4725                 return result;\r
4726         }\r
4727 \r
4728         PUGI__FN xml_node xml_node::insert_child_after(const char_t* name_, const xml_node& node)\r
4729         {\r
4730                 xml_node result = insert_child_after(node_element, node);\r
4731 \r
4732                 result.set_name(name_);\r
4733 \r
4734                 return result;\r
4735         }\r
4736 \r
4737         PUGI__FN xml_node xml_node::insert_child_before(const char_t* name_, const xml_node& node)\r
4738         {\r
4739                 xml_node result = insert_child_before(node_element, node);\r
4740 \r
4741                 result.set_name(name_);\r
4742 \r
4743                 return result;\r
4744         }\r
4745 \r
4746         PUGI__FN xml_node xml_node::append_copy(const xml_node& proto)\r
4747         {\r
4748                 xml_node result = append_child(proto.type());\r
4749 \r
4750                 if (result) impl::recursive_copy_skip(result, proto, result);\r
4751 \r
4752                 return result;\r
4753         }\r
4754 \r
4755         PUGI__FN xml_node xml_node::prepend_copy(const xml_node& proto)\r
4756         {\r
4757                 xml_node result = prepend_child(proto.type());\r
4758 \r
4759                 if (result) impl::recursive_copy_skip(result, proto, result);\r
4760 \r
4761                 return result;\r
4762         }\r
4763 \r
4764         PUGI__FN xml_node xml_node::insert_copy_after(const xml_node& proto, const xml_node& node)\r
4765         {\r
4766                 xml_node result = insert_child_after(proto.type(), node);\r
4767 \r
4768                 if (result) impl::recursive_copy_skip(result, proto, result);\r
4769 \r
4770                 return result;\r
4771         }\r
4772 \r
4773         PUGI__FN xml_node xml_node::insert_copy_before(const xml_node& proto, const xml_node& node)\r
4774         {\r
4775                 xml_node result = insert_child_before(proto.type(), node);\r
4776 \r
4777                 if (result) impl::recursive_copy_skip(result, proto, result);\r
4778 \r
4779                 return result;\r
4780         }\r
4781 \r
4782         PUGI__FN bool xml_node::remove_attribute(const char_t* name_)\r
4783         {\r
4784                 return remove_attribute(attribute(name_));\r
4785         }\r
4786 \r
4787         PUGI__FN bool xml_node::remove_attribute(const xml_attribute& a)\r
4788         {\r
4789                 if (!_root || !a._attr) return false;\r
4790 \r
4791                 // check that attribute belongs to *this\r
4792                 xml_attribute_struct* attr = a._attr;\r
4793 \r
4794                 while (attr->prev_attribute_c->next_attribute) attr = attr->prev_attribute_c;\r
4795 \r
4796                 if (attr != _root->first_attribute) return false;\r
4797 \r
4798                 if (a._attr->next_attribute) a._attr->next_attribute->prev_attribute_c = a._attr->prev_attribute_c;\r
4799                 else if (_root->first_attribute) _root->first_attribute->prev_attribute_c = a._attr->prev_attribute_c;\r
4800                 \r
4801                 if (a._attr->prev_attribute_c->next_attribute) a._attr->prev_attribute_c->next_attribute = a._attr->next_attribute;\r
4802                 else _root->first_attribute = a._attr->next_attribute;\r
4803 \r
4804                 impl::destroy_attribute(a._attr, impl::get_allocator(_root));\r
4805 \r
4806                 return true;\r
4807         }\r
4808 \r
4809         PUGI__FN bool xml_node::remove_child(const char_t* name_)\r
4810         {\r
4811                 return remove_child(child(name_));\r
4812         }\r
4813 \r
4814         PUGI__FN bool xml_node::remove_child(const xml_node& n)\r
4815         {\r
4816                 if (!_root || !n._root || n._root->parent != _root) return false;\r
4817 \r
4818                 if (n._root->next_sibling) n._root->next_sibling->prev_sibling_c = n._root->prev_sibling_c;\r
4819                 else if (_root->first_child) _root->first_child->prev_sibling_c = n._root->prev_sibling_c;\r
4820                 \r
4821                 if (n._root->prev_sibling_c->next_sibling) n._root->prev_sibling_c->next_sibling = n._root->next_sibling;\r
4822                 else _root->first_child = n._root->next_sibling;\r
4823                 \r
4824                 impl::destroy_node(n._root, impl::get_allocator(_root));\r
4825 \r
4826                 return true;\r
4827         }\r
4828 \r
4829         PUGI__FN xml_parse_result xml_node::append_buffer(const void* contents, size_t size, unsigned int options, xml_encoding encoding)\r
4830         {\r
4831                 // append_buffer is only valid for elements/documents\r
4832                 if (!impl::allow_insert_child(type(), node_element)) return impl::make_parse_result(status_append_invalid_root);\r
4833 \r
4834                 // get document node\r
4835                 impl::xml_document_struct* doc = static_cast<impl::xml_document_struct*>(root()._root);\r
4836                 assert(doc);\r
4837                 \r
4838                 // get extra buffer element (we'll store the document fragment buffer there so that we can deallocate it later)\r
4839                 impl::xml_memory_page* page = 0;\r
4840                 impl::xml_extra_buffer* extra = static_cast<impl::xml_extra_buffer*>(doc->allocate_memory(sizeof(impl::xml_extra_buffer), page));\r
4841                 (void)page;\r
4842 \r
4843                 if (!extra) return impl::make_parse_result(status_out_of_memory);\r
4844 \r
4845                 // save name; name of the root has to be NULL before parsing - otherwise closing node mismatches will not be detected at the top level\r
4846                 char_t* rootname = _root->name;\r
4847                 _root->name = 0;\r
4848 \r
4849                 // parse\r
4850                 char_t* buffer = 0;\r
4851                 xml_parse_result res = impl::load_buffer_impl(doc, _root, const_cast<void*>(contents), size, options, encoding, false, false, &buffer);\r
4852 \r
4853                 // restore name\r
4854                 _root->name = rootname;\r
4855 \r
4856                 // add extra buffer to the list\r
4857                 extra->buffer = buffer;\r
4858                 extra->next = doc->extra_buffers;\r
4859                 doc->extra_buffers = extra;\r
4860 \r
4861                 return res;\r
4862         }\r
4863 \r
4864         PUGI__FN xml_node xml_node::find_child_by_attribute(const char_t* name_, const char_t* attr_name, const char_t* attr_value) const\r
4865         {\r
4866                 if (!_root) return xml_node();\r
4867                 \r
4868                 for (xml_node_struct* i = _root->first_child; i; i = i->next_sibling)\r
4869                         if (i->name && impl::strequal(name_, i->name))\r
4870                         {\r
4871                                 for (xml_attribute_struct* a = i->first_attribute; a; a = a->next_attribute)\r
4872                                         if (a->name && impl::strequal(attr_name, a->name) && impl::strequal(attr_value, a->value ? a->value : PUGIXML_TEXT("")))\r
4873                                                 return xml_node(i);\r
4874                         }\r
4875 \r
4876                 return xml_node();\r
4877         }\r
4878 \r
4879         PUGI__FN xml_node xml_node::find_child_by_attribute(const char_t* attr_name, const char_t* attr_value) const\r
4880         {\r
4881                 if (!_root) return xml_node();\r
4882                 \r
4883                 for (xml_node_struct* i = _root->first_child; i; i = i->next_sibling)\r
4884                         for (xml_attribute_struct* a = i->first_attribute; a; a = a->next_attribute)\r
4885                                 if (a->name && impl::strequal(attr_name, a->name) && impl::strequal(attr_value, a->value ? a->value : PUGIXML_TEXT("")))\r
4886                                         return xml_node(i);\r
4887 \r
4888                 return xml_node();\r
4889         }\r
4890 \r
4891 #ifndef PUGIXML_NO_STL\r
4892         PUGI__FN string_t xml_node::path(char_t delimiter) const\r
4893         {\r
4894                 xml_node cursor = *this; // Make a copy.\r
4895                 \r
4896                 string_t result = cursor.name();\r
4897 \r
4898                 while (cursor.parent())\r
4899                 {\r
4900                         cursor = cursor.parent();\r
4901                         \r
4902                         string_t temp = cursor.name();\r
4903                         temp += delimiter;\r
4904                         temp += result;\r
4905                         result.swap(temp);\r
4906                 }\r
4907 \r
4908                 return result;\r
4909         }\r
4910 #endif\r
4911 \r
4912         PUGI__FN xml_node xml_node::first_element_by_path(const char_t* path_, char_t delimiter) const\r
4913         {\r
4914                 xml_node found = *this; // Current search context.\r
4915 \r
4916                 if (!_root || !path_ || !path_[0]) return found;\r
4917 \r
4918                 if (path_[0] == delimiter)\r
4919                 {\r
4920                         // Absolute path; e.g. '/foo/bar'\r
4921                         found = found.root();\r
4922                         ++path_;\r
4923                 }\r
4924 \r
4925                 const char_t* path_segment = path_;\r
4926 \r
4927                 while (*path_segment == delimiter) ++path_segment;\r
4928 \r
4929                 const char_t* path_segment_end = path_segment;\r
4930 \r
4931                 while (*path_segment_end && *path_segment_end != delimiter) ++path_segment_end;\r
4932 \r
4933                 if (path_segment == path_segment_end) return found;\r
4934 \r
4935                 const char_t* next_segment = path_segment_end;\r
4936 \r
4937                 while (*next_segment == delimiter) ++next_segment;\r
4938 \r
4939                 if (*path_segment == '.' && path_segment + 1 == path_segment_end)\r
4940                         return found.first_element_by_path(next_segment, delimiter);\r
4941                 else if (*path_segment == '.' && *(path_segment+1) == '.' && path_segment + 2 == path_segment_end)\r
4942                         return found.parent().first_element_by_path(next_segment, delimiter);\r
4943                 else\r
4944                 {\r
4945                         for (xml_node_struct* j = found._root->first_child; j; j = j->next_sibling)\r
4946                         {\r
4947                                 if (j->name && impl::strequalrange(j->name, path_segment, static_cast<size_t>(path_segment_end - path_segment)))\r
4948                                 {\r
4949                                         xml_node subsearch = xml_node(j).first_element_by_path(next_segment, delimiter);\r
4950 \r
4951                                         if (subsearch) return subsearch;\r
4952                                 }\r
4953                         }\r
4954 \r
4955                         return xml_node();\r
4956                 }\r
4957         }\r
4958 \r
4959         PUGI__FN bool xml_node::traverse(xml_tree_walker& walker)\r
4960         {\r
4961                 walker._depth = -1;\r
4962                 \r
4963                 xml_node arg_begin = *this;\r
4964                 if (!walker.begin(arg_begin)) return false;\r
4965 \r
4966                 xml_node cur = first_child();\r
4967                                 \r
4968                 if (cur)\r
4969                 {\r
4970                         ++walker._depth;\r
4971 \r
4972                         do \r
4973                         {\r
4974                                 xml_node arg_for_each = cur;\r
4975                                 if (!walker.for_each(arg_for_each))\r
4976                                         return false;\r
4977                                                 \r
4978                                 if (cur.first_child())\r
4979                                 {\r
4980                                         ++walker._depth;\r
4981                                         cur = cur.first_child();\r
4982                                 }\r
4983                                 else if (cur.next_sibling())\r
4984                                         cur = cur.next_sibling();\r
4985                                 else\r
4986                                 {\r
4987                                         // Borland C++ workaround\r
4988                                         while (!cur.next_sibling() && cur != *this && !cur.parent().empty())\r
4989                                         {\r
4990                                                 --walker._depth;\r
4991                                                 cur = cur.parent();\r
4992                                         }\r
4993                                                 \r
4994                                         if (cur != *this)\r
4995                                                 cur = cur.next_sibling();\r
4996                                 }\r
4997                         }\r
4998                         while (cur && cur != *this);\r
4999                 }\r
5000 \r
5001                 assert(walker._depth == -1);\r
5002 \r
5003                 xml_node arg_end = *this;\r
5004                 return walker.end(arg_end);\r
5005         }\r
5006 \r
5007         PUGI__FN size_t xml_node::hash_value() const\r
5008         {\r
5009                 return static_cast<size_t>(reinterpret_cast<uintptr_t>(_root) / sizeof(xml_node_struct));\r
5010         }\r
5011 \r
5012         PUGI__FN xml_node_struct* xml_node::internal_object() const\r
5013         {\r
5014                 return _root;\r
5015         }\r
5016 \r
5017         PUGI__FN void xml_node::print(xml_writer& writer, const char_t* indent, unsigned int flags, xml_encoding encoding, unsigned int depth) const\r
5018         {\r
5019                 if (!_root) return;\r
5020 \r
5021                 impl::xml_buffered_writer buffered_writer(writer, encoding);\r
5022 \r
5023                 impl::node_output(buffered_writer, *this, indent, flags, depth);\r
5024         }\r
5025 \r
5026 #ifndef PUGIXML_NO_STL\r
5027         PUGI__FN void xml_node::print(std::basic_ostream<char, std::char_traits<char> >& stream, const char_t* indent, unsigned int flags, xml_encoding encoding, unsigned int depth) const\r
5028         {\r
5029                 xml_writer_stream writer(stream);\r
5030 \r
5031                 print(writer, indent, flags, encoding, depth);\r
5032         }\r
5033 \r
5034         PUGI__FN void xml_node::print(std::basic_ostream<wchar_t, std::char_traits<wchar_t> >& stream, const char_t* indent, unsigned int flags, unsigned int depth) const\r
5035         {\r
5036                 xml_writer_stream writer(stream);\r
5037 \r
5038                 print(writer, indent, flags, encoding_wchar, depth);\r
5039         }\r
5040 #endif\r
5041 \r
5042         PUGI__FN ptrdiff_t xml_node::offset_debug() const\r
5043         {\r
5044                 xml_node_struct* r = root()._root;\r
5045 \r
5046                 if (!r) return -1;\r
5047 \r
5048                 const char_t* buffer = static_cast<impl::xml_document_struct*>(r)->buffer;\r
5049 \r
5050                 if (!buffer) return -1;\r
5051 \r
5052                 switch (type())\r
5053                 {\r
5054                 case node_document:\r
5055                         return 0;\r
5056 \r
5057                 case node_element:\r
5058                 case node_declaration:\r
5059                 case node_pi:\r
5060                         return (_root->header & impl::xml_memory_page_name_allocated_mask) ? -1 : _root->name - buffer;\r
5061 \r
5062                 case node_pcdata:\r
5063                 case node_cdata:\r
5064                 case node_comment:\r
5065                 case node_doctype:\r
5066                         return (_root->header & impl::xml_memory_page_value_allocated_mask) ? -1 : _root->value - buffer;\r
5067 \r
5068                 default:\r
5069                         return -1;\r
5070                 }\r
5071         }\r
5072 \r
5073 #ifdef __BORLANDC__\r
5074         PUGI__FN bool operator&&(const xml_node& lhs, bool rhs)\r
5075         {\r
5076                 return (bool)lhs && rhs;\r
5077         }\r
5078 \r
5079         PUGI__FN bool operator||(const xml_node& lhs, bool rhs)\r
5080         {\r
5081                 return (bool)lhs || rhs;\r
5082         }\r
5083 #endif\r
5084 \r
5085         PUGI__FN xml_text::xml_text(xml_node_struct* root): _root(root)\r
5086         {\r
5087         }\r
5088 \r
5089         PUGI__FN xml_node_struct* xml_text::_data() const\r
5090         {\r
5091                 if (!_root || impl::is_text_node(_root)) return _root;\r
5092 \r
5093                 for (xml_node_struct* node = _root->first_child; node; node = node->next_sibling)\r
5094                         if (impl::is_text_node(node))\r
5095                                 return node;\r
5096 \r
5097                 return 0;\r
5098         }\r
5099 \r
5100         PUGI__FN xml_node_struct* xml_text::_data_new()\r
5101         {\r
5102                 xml_node_struct* d = _data();\r
5103                 if (d) return d;\r
5104 \r
5105                 return xml_node(_root).append_child(node_pcdata).internal_object();\r
5106         }\r
5107 \r
5108         PUGI__FN xml_text::xml_text(): _root(0)\r
5109         {\r
5110         }\r
5111 \r
5112         PUGI__FN static void unspecified_bool_xml_text(xml_text***)\r
5113         {\r
5114         }\r
5115 \r
5116         PUGI__FN xml_text::operator xml_text::unspecified_bool_type() const\r
5117         {\r
5118                 return _data() ? unspecified_bool_xml_text : 0;\r
5119         }\r
5120 \r
5121         PUGI__FN bool xml_text::operator!() const\r
5122         {\r
5123                 return !_data();\r
5124         }\r
5125 \r
5126         PUGI__FN bool xml_text::empty() const\r
5127         {\r
5128                 return _data() == 0;\r
5129         }\r
5130 \r
5131         PUGI__FN const char_t* xml_text::get() const\r
5132         {\r
5133                 xml_node_struct* d = _data();\r
5134 \r
5135                 return (d && d->value) ? d->value : PUGIXML_TEXT("");\r
5136         }\r
5137 \r
5138         PUGI__FN const char_t* xml_text::as_string(const char_t* def) const\r
5139         {\r
5140                 xml_node_struct* d = _data();\r
5141 \r
5142                 return (d && d->value) ? d->value : def;\r
5143         }\r
5144 \r
5145         PUGI__FN int xml_text::as_int(int def) const\r
5146         {\r
5147                 xml_node_struct* d = _data();\r
5148 \r
5149                 return impl::get_value_int(d ? d->value : 0, def);\r
5150         }\r
5151 \r
5152         PUGI__FN unsigned int xml_text::as_uint(unsigned int def) const\r
5153         {\r
5154                 xml_node_struct* d = _data();\r
5155 \r
5156                 return impl::get_value_uint(d ? d->value : 0, def);\r
5157         }\r
5158 \r
5159         PUGI__FN double xml_text::as_double(double def) const\r
5160         {\r
5161                 xml_node_struct* d = _data();\r
5162 \r
5163                 return impl::get_value_double(d ? d->value : 0, def);\r
5164         }\r
5165 \r
5166         PUGI__FN float xml_text::as_float(float def) const\r
5167         {\r
5168                 xml_node_struct* d = _data();\r
5169 \r
5170                 return impl::get_value_float(d ? d->value : 0, def);\r
5171         }\r
5172 \r
5173         PUGI__FN bool xml_text::as_bool(bool def) const\r
5174         {\r
5175                 xml_node_struct* d = _data();\r
5176 \r
5177                 return impl::get_value_bool(d ? d->value : 0, def);\r
5178         }\r
5179 \r
5180 #ifdef PUGIXML_HAS_LONG_LONG\r
5181         PUGI__FN long long xml_text::as_llong(long long def) const\r
5182         {\r
5183                 xml_node_struct* d = _data();\r
5184 \r
5185                 return impl::get_value_llong(d ? d->value : 0, def);\r
5186         }\r
5187 \r
5188         PUGI__FN unsigned long long xml_text::as_ullong(unsigned long long def) const\r
5189         {\r
5190                 xml_node_struct* d = _data();\r
5191 \r
5192                 return impl::get_value_ullong(d ? d->value : 0, def);\r
5193         }\r
5194 #endif\r
5195 \r
5196         PUGI__FN bool xml_text::set(const char_t* rhs)\r
5197         {\r
5198                 xml_node_struct* dn = _data_new();\r
5199 \r
5200                 return dn ? impl::strcpy_insitu(dn->value, dn->header, impl::xml_memory_page_value_allocated_mask, rhs) : false;\r
5201         }\r
5202 \r
5203         PUGI__FN bool xml_text::set(int rhs)\r
5204         {\r
5205                 xml_node_struct* dn = _data_new();\r
5206 \r
5207                 return dn ? impl::set_value_convert(dn->value, dn->header, impl::xml_memory_page_value_allocated_mask, rhs) : false;\r
5208         }\r
5209 \r
5210         PUGI__FN bool xml_text::set(unsigned int rhs)\r
5211         {\r
5212                 xml_node_struct* dn = _data_new();\r
5213 \r
5214                 return dn ? impl::set_value_convert(dn->value, dn->header, impl::xml_memory_page_value_allocated_mask, rhs) : false;\r
5215         }\r
5216 \r
5217         PUGI__FN bool xml_text::set(double rhs)\r
5218         {\r
5219                 xml_node_struct* dn = _data_new();\r
5220 \r
5221                 return dn ? impl::set_value_convert(dn->value, dn->header, impl::xml_memory_page_value_allocated_mask, rhs) : false;\r
5222         }\r
5223 \r
5224         PUGI__FN bool xml_text::set(bool rhs)\r
5225         {\r
5226                 xml_node_struct* dn = _data_new();\r
5227 \r
5228                 return dn ? impl::set_value_convert(dn->value, dn->header, impl::xml_memory_page_value_allocated_mask, rhs) : false;\r
5229         }\r
5230 \r
5231 #ifdef PUGIXML_HAS_LONG_LONG\r
5232         PUGI__FN bool xml_text::set(long long rhs)\r
5233         {\r
5234                 xml_node_struct* dn = _data_new();\r
5235 \r
5236                 return dn ? impl::set_value_convert(dn->value, dn->header, impl::xml_memory_page_value_allocated_mask, rhs) : false;\r
5237         }\r
5238 \r
5239         PUGI__FN bool xml_text::set(unsigned long long rhs)\r
5240         {\r
5241                 xml_node_struct* dn = _data_new();\r
5242 \r
5243                 return dn ? impl::set_value_convert(dn->value, dn->header, impl::xml_memory_page_value_allocated_mask, rhs) : false;\r
5244         }\r
5245 #endif\r
5246 \r
5247         PUGI__FN xml_text& xml_text::operator=(const char_t* rhs)\r
5248         {\r
5249                 set(rhs);\r
5250                 return *this;\r
5251         }\r
5252 \r
5253         PUGI__FN xml_text& xml_text::operator=(int rhs)\r
5254         {\r
5255                 set(rhs);\r
5256                 return *this;\r
5257         }\r
5258 \r
5259         PUGI__FN xml_text& xml_text::operator=(unsigned int rhs)\r
5260         {\r
5261                 set(rhs);\r
5262                 return *this;\r
5263         }\r
5264 \r
5265         PUGI__FN xml_text& xml_text::operator=(double rhs)\r
5266         {\r
5267                 set(rhs);\r
5268                 return *this;\r
5269         }\r
5270 \r
5271         PUGI__FN xml_text& xml_text::operator=(bool rhs)\r
5272         {\r
5273                 set(rhs);\r
5274                 return *this;\r
5275         }\r
5276 \r
5277 #ifdef PUGIXML_HAS_LONG_LONG\r
5278         PUGI__FN xml_text& xml_text::operator=(long long rhs)\r
5279         {\r
5280                 set(rhs);\r
5281                 return *this;\r
5282         }\r
5283 \r
5284         PUGI__FN xml_text& xml_text::operator=(unsigned long long rhs)\r
5285         {\r
5286                 set(rhs);\r
5287                 return *this;\r
5288         }\r
5289 #endif\r
5290 \r
5291         PUGI__FN xml_node xml_text::data() const\r
5292         {\r
5293                 return xml_node(_data());\r
5294         }\r
5295 \r
5296 #ifdef __BORLANDC__\r
5297         PUGI__FN bool operator&&(const xml_text& lhs, bool rhs)\r
5298         {\r
5299                 return (bool)lhs && rhs;\r
5300         }\r
5301 \r
5302         PUGI__FN bool operator||(const xml_text& lhs, bool rhs)\r
5303         {\r
5304                 return (bool)lhs || rhs;\r
5305         }\r
5306 #endif\r
5307 \r
5308         PUGI__FN xml_node_iterator::xml_node_iterator()\r
5309         {\r
5310         }\r
5311 \r
5312         PUGI__FN xml_node_iterator::xml_node_iterator(const xml_node& node): _wrap(node), _parent(node.parent())\r
5313         {\r
5314         }\r
5315 \r
5316         PUGI__FN xml_node_iterator::xml_node_iterator(xml_node_struct* ref, xml_node_struct* parent): _wrap(ref), _parent(parent)\r
5317         {\r
5318         }\r
5319 \r
5320         PUGI__FN bool xml_node_iterator::operator==(const xml_node_iterator& rhs) const\r
5321         {\r
5322                 return _wrap._root == rhs._wrap._root && _parent._root == rhs._parent._root;\r
5323         }\r
5324         \r
5325         PUGI__FN bool xml_node_iterator::operator!=(const xml_node_iterator& rhs) const\r
5326         {\r
5327                 return _wrap._root != rhs._wrap._root || _parent._root != rhs._parent._root;\r
5328         }\r
5329 \r
5330         PUGI__FN xml_node& xml_node_iterator::operator*() const\r
5331         {\r
5332                 assert(_wrap._root);\r
5333                 return _wrap;\r
5334         }\r
5335 \r
5336         PUGI__FN xml_node* xml_node_iterator::operator->() const\r
5337         {\r
5338                 assert(_wrap._root);\r
5339                 return const_cast<xml_node*>(&_wrap); // BCC32 workaround\r
5340         }\r
5341 \r
5342         PUGI__FN const xml_node_iterator& xml_node_iterator::operator++()\r
5343         {\r
5344                 assert(_wrap._root);\r
5345                 _wrap._root = _wrap._root->next_sibling;\r
5346                 return *this;\r
5347         }\r
5348 \r
5349         PUGI__FN xml_node_iterator xml_node_iterator::operator++(int)\r
5350         {\r
5351                 xml_node_iterator temp = *this;\r
5352                 ++*this;\r
5353                 return temp;\r
5354         }\r
5355 \r
5356         PUGI__FN const xml_node_iterator& xml_node_iterator::operator--()\r
5357         {\r
5358                 _wrap = _wrap._root ? _wrap.previous_sibling() : _parent.last_child();\r
5359                 return *this;\r
5360         }\r
5361 \r
5362         PUGI__FN xml_node_iterator xml_node_iterator::operator--(int)\r
5363         {\r
5364                 xml_node_iterator temp = *this;\r
5365                 --*this;\r
5366                 return temp;\r
5367         }\r
5368 \r
5369         PUGI__FN xml_attribute_iterator::xml_attribute_iterator()\r
5370         {\r
5371         }\r
5372 \r
5373         PUGI__FN xml_attribute_iterator::xml_attribute_iterator(const xml_attribute& attr, const xml_node& parent): _wrap(attr), _parent(parent)\r
5374         {\r
5375         }\r
5376 \r
5377         PUGI__FN xml_attribute_iterator::xml_attribute_iterator(xml_attribute_struct* ref, xml_node_struct* parent): _wrap(ref), _parent(parent)\r
5378         {\r
5379         }\r
5380 \r
5381         PUGI__FN bool xml_attribute_iterator::operator==(const xml_attribute_iterator& rhs) const\r
5382         {\r
5383                 return _wrap._attr == rhs._wrap._attr && _parent._root == rhs._parent._root;\r
5384         }\r
5385         \r
5386         PUGI__FN bool xml_attribute_iterator::operator!=(const xml_attribute_iterator& rhs) const\r
5387         {\r
5388                 return _wrap._attr != rhs._wrap._attr || _parent._root != rhs._parent._root;\r
5389         }\r
5390 \r
5391         PUGI__FN xml_attribute& xml_attribute_iterator::operator*() const\r
5392         {\r
5393                 assert(_wrap._attr);\r
5394                 return _wrap;\r
5395         }\r
5396 \r
5397         PUGI__FN xml_attribute* xml_attribute_iterator::operator->() const\r
5398         {\r
5399                 assert(_wrap._attr);\r
5400                 return const_cast<xml_attribute*>(&_wrap); // BCC32 workaround\r
5401         }\r
5402 \r
5403         PUGI__FN const xml_attribute_iterator& xml_attribute_iterator::operator++()\r
5404         {\r
5405                 assert(_wrap._attr);\r
5406                 _wrap._attr = _wrap._attr->next_attribute;\r
5407                 return *this;\r
5408         }\r
5409 \r
5410         PUGI__FN xml_attribute_iterator xml_attribute_iterator::operator++(int)\r
5411         {\r
5412                 xml_attribute_iterator temp = *this;\r
5413                 ++*this;\r
5414                 return temp;\r
5415         }\r
5416 \r
5417         PUGI__FN const xml_attribute_iterator& xml_attribute_iterator::operator--()\r
5418         {\r
5419                 _wrap = _wrap._attr ? _wrap.previous_attribute() : _parent.last_attribute();\r
5420                 return *this;\r
5421         }\r
5422 \r
5423         PUGI__FN xml_attribute_iterator xml_attribute_iterator::operator--(int)\r
5424         {\r
5425                 xml_attribute_iterator temp = *this;\r
5426                 --*this;\r
5427                 return temp;\r
5428         }\r
5429 \r
5430         PUGI__FN xml_named_node_iterator::xml_named_node_iterator(): _name(0)\r
5431         {\r
5432         }\r
5433 \r
5434         PUGI__FN xml_named_node_iterator::xml_named_node_iterator(const xml_node& node, const char_t* name): _wrap(node), _parent(node.parent()), _name(name)\r
5435         {\r
5436         }\r
5437 \r
5438         PUGI__FN xml_named_node_iterator::xml_named_node_iterator(xml_node_struct* ref, xml_node_struct* parent, const char_t* name): _wrap(ref), _parent(parent), _name(name)\r
5439         {\r
5440         }\r
5441 \r
5442         PUGI__FN bool xml_named_node_iterator::operator==(const xml_named_node_iterator& rhs) const\r
5443         {\r
5444                 return _wrap._root == rhs._wrap._root && _parent._root == rhs._parent._root;\r
5445         }\r
5446 \r
5447         PUGI__FN bool xml_named_node_iterator::operator!=(const xml_named_node_iterator& rhs) const\r
5448         {\r
5449                 return _wrap._root != rhs._wrap._root || _parent._root != rhs._parent._root;\r
5450         }\r
5451 \r
5452         PUGI__FN xml_node& xml_named_node_iterator::operator*() const\r
5453         {\r
5454                 assert(_wrap._root);\r
5455                 return _wrap;\r
5456         }\r
5457 \r
5458         PUGI__FN xml_node* xml_named_node_iterator::operator->() const\r
5459         {\r
5460                 assert(_wrap._root);\r
5461                 return const_cast<xml_node*>(&_wrap); // BCC32 workaround\r
5462         }\r
5463 \r
5464         PUGI__FN const xml_named_node_iterator& xml_named_node_iterator::operator++()\r
5465         {\r
5466                 assert(_wrap._root);\r
5467                 _wrap = _wrap.next_sibling(_name);\r
5468                 return *this;\r
5469         }\r
5470 \r
5471         PUGI__FN xml_named_node_iterator xml_named_node_iterator::operator++(int)\r
5472         {\r
5473                 xml_named_node_iterator temp = *this;\r
5474                 ++*this;\r
5475                 return temp;\r
5476         }\r
5477 \r
5478         PUGI__FN const xml_named_node_iterator& xml_named_node_iterator::operator--()\r
5479         {\r
5480                 if (_wrap._root)\r
5481                         _wrap = _wrap.previous_sibling(_name);\r
5482                 else\r
5483                 {\r
5484                         _wrap = _parent.last_child();\r
5485 \r
5486                         if (!impl::strequal(_wrap.name(), _name))\r
5487                                 _wrap = _wrap.previous_sibling(_name);\r
5488                 }\r
5489 \r
5490                 return *this;\r
5491         }\r
5492 \r
5493         PUGI__FN xml_named_node_iterator xml_named_node_iterator::operator--(int)\r
5494         {\r
5495                 xml_named_node_iterator temp = *this;\r
5496                 --*this;\r
5497                 return temp;\r
5498         }\r
5499 \r
5500         PUGI__FN xml_parse_result::xml_parse_result(): status(status_internal_error), offset(0), encoding(encoding_auto)\r
5501         {\r
5502         }\r
5503 \r
5504         PUGI__FN xml_parse_result::operator bool() const\r
5505         {\r
5506                 return status == status_ok;\r
5507         }\r
5508 \r
5509         PUGI__FN const char* xml_parse_result::description() const\r
5510         {\r
5511                 switch (status)\r
5512                 {\r
5513                 case status_ok: return "No error";\r
5514 \r
5515                 case status_file_not_found: return "File was not found";\r
5516                 case status_io_error: return "Error reading from file/stream";\r
5517                 case status_out_of_memory: return "Could not allocate memory";\r
5518                 case status_internal_error: return "Internal error occurred";\r
5519 \r
5520                 case status_unrecognized_tag: return "Could not determine tag type";\r
5521 \r
5522                 case status_bad_pi: return "Error parsing document declaration/processing instruction";\r
5523                 case status_bad_comment: return "Error parsing comment";\r
5524                 case status_bad_cdata: return "Error parsing CDATA section";\r
5525                 case status_bad_doctype: return "Error parsing document type declaration";\r
5526                 case status_bad_pcdata: return "Error parsing PCDATA section";\r
5527                 case status_bad_start_element: return "Error parsing start element tag";\r
5528                 case status_bad_attribute: return "Error parsing element attribute";\r
5529                 case status_bad_end_element: return "Error parsing end element tag";\r
5530                 case status_end_element_mismatch: return "Start-end tags mismatch";\r
5531 \r
5532                 case status_append_invalid_root: return "Unable to append nodes: root is not an element or document";\r
5533 \r
5534                 case status_no_document_element: return "No document element found";\r
5535 \r
5536                 default: return "Unknown error";\r
5537                 }\r
5538         }\r
5539 \r
5540         PUGI__FN xml_document::xml_document(): _buffer(0)\r
5541         {\r
5542                 create();\r
5543         }\r
5544 \r
5545         PUGI__FN xml_document::~xml_document()\r
5546         {\r
5547                 destroy();\r
5548         }\r
5549 \r
5550         PUGI__FN void xml_document::reset()\r
5551         {\r
5552                 destroy();\r
5553                 create();\r
5554         }\r
5555 \r
5556         PUGI__FN void xml_document::reset(const xml_document& proto)\r
5557         {\r
5558                 reset();\r
5559 \r
5560                 for (xml_node cur = proto.first_child(); cur; cur = cur.next_sibling())\r
5561                         append_copy(cur);\r
5562         }\r
5563 \r
5564         PUGI__FN void xml_document::create()\r
5565         {\r
5566         assert(!_root);\r
5567 \r
5568                 // initialize sentinel page\r
5569                 PUGI__STATIC_ASSERT(sizeof(impl::xml_memory_page) + sizeof(impl::xml_document_struct) + impl::xml_memory_page_alignment <= sizeof(_memory));\r
5570 \r
5571                 // align upwards to page boundary\r
5572                 void* page_memory = reinterpret_cast<void*>((reinterpret_cast<uintptr_t>(_memory) + (impl::xml_memory_page_alignment - 1)) & ~(impl::xml_memory_page_alignment - 1));\r
5573 \r
5574                 // prepare page structure\r
5575                 impl::xml_memory_page* page = impl::xml_memory_page::construct(page_memory);\r
5576                 assert(page);\r
5577 \r
5578                 page->busy_size = impl::xml_memory_page_size;\r
5579 \r
5580                 // allocate new root\r
5581                 _root = new (page->data) impl::xml_document_struct(page);\r
5582                 _root->prev_sibling_c = _root;\r
5583 \r
5584                 // setup sentinel page\r
5585                 page->allocator = static_cast<impl::xml_document_struct*>(_root);\r
5586         }\r
5587 \r
5588         PUGI__FN void xml_document::destroy()\r
5589         {\r
5590         assert(_root);\r
5591 \r
5592                 // destroy static storage\r
5593                 if (_buffer)\r
5594                 {\r
5595                         impl::xml_memory::deallocate(_buffer);\r
5596                         _buffer = 0;\r
5597                 }\r
5598 \r
5599                 // destroy extra buffers (note: no need to destroy linked list nodes, they're allocated using document allocator)\r
5600                 for (impl::xml_extra_buffer* extra = static_cast<impl::xml_document_struct*>(_root)->extra_buffers; extra; extra = extra->next)\r
5601                 {\r
5602                         if (extra->buffer) impl::xml_memory::deallocate(extra->buffer);\r
5603                 }\r
5604 \r
5605                 // destroy dynamic storage, leave sentinel page (it's in static memory)\r
5606         impl::xml_memory_page* root_page = reinterpret_cast<impl::xml_memory_page*>(_root->header & impl::xml_memory_page_pointer_mask);\r
5607         assert(root_page && !root_page->prev && !root_page->memory);\r
5608 \r
5609         for (impl::xml_memory_page* page = root_page->next; page; )\r
5610         {\r
5611             impl::xml_memory_page* next = page->next;\r
5612 \r
5613             impl::xml_allocator::deallocate_page(page);\r
5614 \r
5615             page = next;\r
5616         }\r
5617 \r
5618         _root = 0;\r
5619         }\r
5620 \r
5621 #ifndef PUGIXML_NO_STL\r
5622         PUGI__FN xml_parse_result xml_document::load(std::basic_istream<char, std::char_traits<char> >& stream, unsigned int options, xml_encoding encoding)\r
5623         {\r
5624                 reset();\r
5625 \r
5626                 return impl::load_stream_impl(*this, stream, options, encoding);\r
5627         }\r
5628 \r
5629         PUGI__FN xml_parse_result xml_document::load(std::basic_istream<wchar_t, std::char_traits<wchar_t> >& stream, unsigned int options)\r
5630         {\r
5631                 reset();\r
5632 \r
5633                 return impl::load_stream_impl(*this, stream, options, encoding_wchar);\r
5634         }\r
5635 #endif\r
5636 \r
5637         PUGI__FN xml_parse_result xml_document::load(const char_t* contents, unsigned int options)\r
5638         {\r
5639                 // Force native encoding (skip autodetection)\r
5640         #ifdef PUGIXML_WCHAR_MODE\r
5641                 xml_encoding encoding = encoding_wchar;\r
5642         #else\r
5643                 xml_encoding encoding = encoding_utf8;\r
5644         #endif\r
5645 \r
5646                 return load_buffer(contents, impl::strlength(contents) * sizeof(char_t), options, encoding);\r
5647         }\r
5648 \r
5649         PUGI__FN xml_parse_result xml_document::load_file(const char* path_, unsigned int options, xml_encoding encoding)\r
5650         {\r
5651                 reset();\r
5652 \r
5653                 FILE* file = fopen(path_, "rb");\r
5654 \r
5655                 return impl::load_file_impl(*this, file, options, encoding);\r
5656         }\r
5657 \r
5658         PUGI__FN xml_parse_result xml_document::load_file(const wchar_t* path_, unsigned int options, xml_encoding encoding)\r
5659         {\r
5660                 reset();\r
5661 \r
5662                 FILE* file = impl::open_file_wide(path_, L"rb");\r
5663 \r
5664                 return impl::load_file_impl(*this, file, options, encoding);\r
5665         }\r
5666 \r
5667         PUGI__FN xml_parse_result xml_document::load_buffer(const void* contents, size_t size, unsigned int options, xml_encoding encoding)\r
5668         {\r
5669                 reset();\r
5670 \r
5671                 return impl::load_buffer_impl(static_cast<impl::xml_document_struct*>(_root), _root, const_cast<void*>(contents), size, options, encoding, false, false, &_buffer);\r
5672         }\r
5673 \r
5674         PUGI__FN xml_parse_result xml_document::load_buffer_inplace(void* contents, size_t size, unsigned int options, xml_encoding encoding)\r
5675         {\r
5676                 reset();\r
5677 \r
5678                 return impl::load_buffer_impl(static_cast<impl::xml_document_struct*>(_root), _root, contents, size, options, encoding, true, false, &_buffer);\r
5679         }\r
5680                 \r
5681         PUGI__FN xml_parse_result xml_document::load_buffer_inplace_own(void* contents, size_t size, unsigned int options, xml_encoding encoding)\r
5682         {\r
5683                 reset();\r
5684 \r
5685                 return impl::load_buffer_impl(static_cast<impl::xml_document_struct*>(_root), _root, contents, size, options, encoding, true, true, &_buffer);\r
5686         }\r
5687 \r
5688         PUGI__FN void xml_document::save(xml_writer& writer, const char_t* indent, unsigned int flags, xml_encoding encoding) const\r
5689         {\r
5690                 impl::xml_buffered_writer buffered_writer(writer, encoding);\r
5691 \r
5692                 if ((flags & format_write_bom) && encoding != encoding_latin1)\r
5693                 {\r
5694                         // BOM always represents the codepoint U+FEFF, so just write it in native encoding\r
5695                 #ifdef PUGIXML_WCHAR_MODE\r
5696                         unsigned int bom = 0xfeff;\r
5697                         buffered_writer.write(static_cast<wchar_t>(bom));\r
5698                 #else\r
5699                         buffered_writer.write('\xef', '\xbb', '\xbf');\r
5700                 #endif\r
5701                 }\r
5702 \r
5703                 if (!(flags & format_no_declaration) && !impl::has_declaration(*this))\r
5704                 {\r
5705                         buffered_writer.write(PUGIXML_TEXT("<?xml version=\"1.0\""));\r
5706                         if (encoding == encoding_latin1) buffered_writer.write(PUGIXML_TEXT(" encoding=\"ISO-8859-1\""));\r
5707                         buffered_writer.write('?', '>');\r
5708                         if (!(flags & format_raw)) buffered_writer.write('\n');\r
5709                 }\r
5710 \r
5711                 impl::node_output(buffered_writer, *this, indent, flags, 0);\r
5712         }\r
5713 \r
5714 #ifndef PUGIXML_NO_STL\r
5715         PUGI__FN void xml_document::save(std::basic_ostream<char, std::char_traits<char> >& stream, const char_t* indent, unsigned int flags, xml_encoding encoding) const\r
5716         {\r
5717                 xml_writer_stream writer(stream);\r
5718 \r
5719                 save(writer, indent, flags, encoding);\r
5720         }\r
5721 \r
5722         PUGI__FN void xml_document::save(std::basic_ostream<wchar_t, std::char_traits<wchar_t> >& stream, const char_t* indent, unsigned int flags) const\r
5723         {\r
5724                 xml_writer_stream writer(stream);\r
5725 \r
5726                 save(writer, indent, flags, encoding_wchar);\r
5727         }\r
5728 #endif\r
5729 \r
5730         PUGI__FN bool xml_document::save_file(const char* path_, const char_t* indent, unsigned int flags, xml_encoding encoding) const\r
5731         {\r
5732                 FILE* file = fopen(path_, (flags & format_save_file_text) ? "w" : "wb");\r
5733                 return impl::save_file_impl(*this, file, indent, flags, encoding);\r
5734         }\r
5735 \r
5736         PUGI__FN bool xml_document::save_file(const wchar_t* path_, const char_t* indent, unsigned int flags, xml_encoding encoding) const\r
5737         {\r
5738                 FILE* file = impl::open_file_wide(path_, (flags & format_save_file_text) ? L"w" : L"wb");\r
5739                 return impl::save_file_impl(*this, file, indent, flags, encoding);\r
5740         }\r
5741 \r
5742         PUGI__FN xml_node xml_document::document_element() const\r
5743         {\r
5744         assert(_root);\r
5745 \r
5746                 for (xml_node_struct* i = _root->first_child; i; i = i->next_sibling)\r
5747                         if ((i->header & impl::xml_memory_page_type_mask) + 1 == node_element)\r
5748                                 return xml_node(i);\r
5749 \r
5750                 return xml_node();\r
5751         }\r
5752 \r
5753 #ifndef PUGIXML_NO_STL\r
5754         PUGI__FN std::string PUGIXML_FUNCTION as_utf8(const wchar_t* str)\r
5755         {\r
5756                 assert(str);\r
5757 \r
5758                 return impl::as_utf8_impl(str, impl::strlength_wide(str));\r
5759         }\r
5760 \r
5761         PUGI__FN std::string PUGIXML_FUNCTION as_utf8(const std::basic_string<wchar_t>& str)\r
5762         {\r
5763                 return impl::as_utf8_impl(str.c_str(), str.size());\r
5764         }\r
5765         \r
5766         PUGI__FN std::basic_string<wchar_t> PUGIXML_FUNCTION as_wide(const char* str)\r
5767         {\r
5768                 assert(str);\r
5769 \r
5770                 return impl::as_wide_impl(str, strlen(str));\r
5771         }\r
5772         \r
5773         PUGI__FN std::basic_string<wchar_t> PUGIXML_FUNCTION as_wide(const std::string& str)\r
5774         {\r
5775                 return impl::as_wide_impl(str.c_str(), str.size());\r
5776         }\r
5777 #endif\r
5778 \r
5779         PUGI__FN void PUGIXML_FUNCTION set_memory_management_functions(allocation_function allocate, deallocation_function deallocate)\r
5780         {\r
5781                 impl::xml_memory::allocate = allocate;\r
5782                 impl::xml_memory::deallocate = deallocate;\r
5783         }\r
5784 \r
5785         PUGI__FN allocation_function PUGIXML_FUNCTION get_memory_allocation_function()\r
5786         {\r
5787                 return impl::xml_memory::allocate;\r
5788         }\r
5789 \r
5790         PUGI__FN deallocation_function PUGIXML_FUNCTION get_memory_deallocation_function()\r
5791         {\r
5792                 return impl::xml_memory::deallocate;\r
5793         }\r
5794 }\r
5795 \r
5796 #if !defined(PUGIXML_NO_STL) && (defined(_MSC_VER) || defined(__ICC))\r
5797 namespace std\r
5798 {\r
5799         // Workarounds for (non-standard) iterator category detection for older versions (MSVC7/IC8 and earlier)\r
5800         PUGI__FN std::bidirectional_iterator_tag _Iter_cat(const pugi::xml_node_iterator&)\r
5801         {\r
5802                 return std::bidirectional_iterator_tag();\r
5803         }\r
5804 \r
5805         PUGI__FN std::bidirectional_iterator_tag _Iter_cat(const pugi::xml_attribute_iterator&)\r
5806         {\r
5807                 return std::bidirectional_iterator_tag();\r
5808         }\r
5809 \r
5810         PUGI__FN std::bidirectional_iterator_tag _Iter_cat(const pugi::xml_named_node_iterator&)\r
5811         {\r
5812                 return std::bidirectional_iterator_tag();\r
5813         }\r
5814 }\r
5815 #endif\r
5816 \r
5817 #if !defined(PUGIXML_NO_STL) && defined(__SUNPRO_CC)\r
5818 namespace std\r
5819 {\r
5820         // Workarounds for (non-standard) iterator category detection\r
5821         PUGI__FN std::bidirectional_iterator_tag __iterator_category(const pugi::xml_node_iterator&)\r
5822         {\r
5823                 return std::bidirectional_iterator_tag();\r
5824         }\r
5825 \r
5826         PUGI__FN std::bidirectional_iterator_tag __iterator_category(const pugi::xml_attribute_iterator&)\r
5827         {\r
5828                 return std::bidirectional_iterator_tag();\r
5829         }\r
5830 \r
5831         PUGI__FN std::bidirectional_iterator_tag __iterator_category(const pugi::xml_named_node_iterator&)\r
5832         {\r
5833                 return std::bidirectional_iterator_tag();\r
5834         }\r
5835 }\r
5836 #endif\r
5837 \r
5838 #ifndef PUGIXML_NO_XPATH\r
5839 \r
5840 // STL replacements\r
5841 PUGI__NS_BEGIN\r
5842         struct equal_to\r
5843         {\r
5844                 template <typename T> bool operator()(const T& lhs, const T& rhs) const\r
5845                 {\r
5846                         return lhs == rhs;\r
5847                 }\r
5848         };\r
5849 \r
5850         struct not_equal_to\r
5851         {\r
5852                 template <typename T> bool operator()(const T& lhs, const T& rhs) const\r
5853                 {\r
5854                         return lhs != rhs;\r
5855                 }\r
5856         };\r
5857 \r
5858         struct less\r
5859         {\r
5860                 template <typename T> bool operator()(const T& lhs, const T& rhs) const\r
5861                 {\r
5862                         return lhs < rhs;\r
5863                 }\r
5864         };\r
5865 \r
5866         struct less_equal\r
5867         {\r
5868                 template <typename T> bool operator()(const T& lhs, const T& rhs) const\r
5869                 {\r
5870                         return lhs <= rhs;\r
5871                 }\r
5872         };\r
5873 \r
5874         template <typename T> void swap(T& lhs, T& rhs)\r
5875         {\r
5876                 T temp = lhs;\r
5877                 lhs = rhs;\r
5878                 rhs = temp;\r
5879         }\r
5880 \r
5881         template <typename I, typename Pred> I min_element(I begin, I end, const Pred& pred)\r
5882         {\r
5883                 I result = begin;\r
5884 \r
5885                 for (I it = begin + 1; it != end; ++it)\r
5886                         if (pred(*it, *result))\r
5887                                 result = it;\r
5888 \r
5889                 return result;\r
5890         }\r
5891 \r
5892         template <typename I> void reverse(I begin, I end)\r
5893         {\r
5894                 while (end - begin > 1) swap(*begin++, *--end);\r
5895         }\r
5896 \r
5897         template <typename I> I unique(I begin, I end)\r
5898         {\r
5899                 // fast skip head\r
5900                 while (end - begin > 1 && *begin != *(begin + 1)) begin++;\r
5901 \r
5902                 if (begin == end) return begin;\r
5903 \r
5904                 // last written element\r
5905                 I write = begin++; \r
5906 \r
5907                 // merge unique elements\r
5908                 while (begin != end)\r
5909                 {\r
5910                         if (*begin != *write)\r
5911                                 *++write = *begin++;\r
5912                         else\r
5913                                 begin++;\r
5914                 }\r
5915 \r
5916                 // past-the-end (write points to live element)\r
5917                 return write + 1;\r
5918         }\r
5919 \r
5920         template <typename I> void copy_backwards(I begin, I end, I target)\r
5921         {\r
5922                 while (begin != end) *--target = *--end;\r
5923         }\r
5924 \r
5925         template <typename I, typename Pred, typename T> void insertion_sort(I begin, I end, const Pred& pred, T*)\r
5926         {\r
5927                 assert(begin != end);\r
5928 \r
5929                 for (I it = begin + 1; it != end; ++it)\r
5930                 {\r
5931                         T val = *it;\r
5932 \r
5933                         if (pred(val, *begin))\r
5934                         {\r
5935                                 // move to front\r
5936                                 copy_backwards(begin, it, it + 1);\r
5937                                 *begin = val;\r
5938                         }\r
5939                         else\r
5940                         {\r
5941                                 I hole = it;\r
5942 \r
5943                                 // move hole backwards\r
5944                                 while (pred(val, *(hole - 1)))\r
5945                                 {\r
5946                                         *hole = *(hole - 1);\r
5947                                         hole--;\r
5948                                 }\r
5949 \r
5950                                 // fill hole with element\r
5951                                 *hole = val;\r
5952                         }\r
5953                 }\r
5954         }\r
5955 \r
5956         // std variant for elements with ==\r
5957         template <typename I, typename Pred> void partition(I begin, I middle, I end, const Pred& pred, I* out_eqbeg, I* out_eqend)\r
5958         {\r
5959                 I eqbeg = middle, eqend = middle + 1;\r
5960 \r
5961                 // expand equal range\r
5962                 while (eqbeg != begin && *(eqbeg - 1) == *eqbeg) --eqbeg;\r
5963                 while (eqend != end && *eqend == *eqbeg) ++eqend;\r
5964 \r
5965                 // process outer elements\r
5966                 I ltend = eqbeg, gtbeg = eqend;\r
5967 \r
5968                 for (;;)\r
5969                 {\r
5970                         // find the element from the right side that belongs to the left one\r
5971                         for (; gtbeg != end; ++gtbeg)\r
5972                                 if (!pred(*eqbeg, *gtbeg))\r
5973                                 {\r
5974                                         if (*gtbeg == *eqbeg) swap(*gtbeg, *eqend++);\r
5975                                         else break;\r
5976                                 }\r
5977 \r
5978                         // find the element from the left side that belongs to the right one\r
5979                         for (; ltend != begin; --ltend)\r
5980                                 if (!pred(*(ltend - 1), *eqbeg))\r
5981                                 {\r
5982                                         if (*eqbeg == *(ltend - 1)) swap(*(ltend - 1), *--eqbeg);\r
5983                                         else break;\r
5984                                 }\r
5985 \r
5986                         // scanned all elements\r
5987                         if (gtbeg == end && ltend == begin)\r
5988                         {\r
5989                                 *out_eqbeg = eqbeg;\r
5990                                 *out_eqend = eqend;\r
5991                                 return;\r
5992                         }\r
5993 \r
5994                         // make room for elements by moving equal area\r
5995                         if (gtbeg == end)\r
5996                         {\r
5997                                 if (--ltend != --eqbeg) swap(*ltend, *eqbeg);\r
5998                                 swap(*eqbeg, *--eqend);\r
5999                         }\r
6000                         else if (ltend == begin)\r
6001                         {\r
6002                                 if (eqend != gtbeg) swap(*eqbeg, *eqend);\r
6003                                 ++eqend;\r
6004                                 swap(*gtbeg++, *eqbeg++);\r
6005                         }\r
6006                         else swap(*gtbeg++, *--ltend);\r
6007                 }\r
6008         }\r
6009 \r
6010         template <typename I, typename Pred> void median3(I first, I middle, I last, const Pred& pred)\r
6011         {\r
6012                 if (pred(*middle, *first)) swap(*middle, *first);\r
6013                 if (pred(*last, *middle)) swap(*last, *middle);\r
6014                 if (pred(*middle, *first)) swap(*middle, *first);\r
6015         }\r
6016 \r
6017         template <typename I, typename Pred> void median(I first, I middle, I last, const Pred& pred)\r
6018         {\r
6019                 if (last - first <= 40)\r
6020                 {\r
6021                         // median of three for small chunks\r
6022                         median3(first, middle, last, pred);\r
6023                 }\r
6024                 else\r
6025                 {\r
6026                         // median of nine\r
6027                         size_t step = (last - first + 1) / 8;\r
6028 \r
6029                         median3(first, first + step, first + 2 * step, pred);\r
6030                         median3(middle - step, middle, middle + step, pred);\r
6031                         median3(last - 2 * step, last - step, last, pred);\r
6032                         median3(first + step, middle, last - step, pred);\r
6033                 }\r
6034         }\r
6035 \r
6036         template <typename I, typename Pred> void sort(I begin, I end, const Pred& pred)\r
6037         {\r
6038                 // sort large chunks\r
6039                 while (end - begin > 32)\r
6040                 {\r
6041                         // find median element\r
6042                         I middle = begin + (end - begin) / 2;\r
6043                         median(begin, middle, end - 1, pred);\r
6044 \r
6045                         // partition in three chunks (< = >)\r
6046                         I eqbeg, eqend;\r
6047                         partition(begin, middle, end, pred, &eqbeg, &eqend);\r
6048 \r
6049                         // loop on larger half\r
6050                         if (eqbeg - begin > end - eqend)\r
6051                         {\r
6052                                 sort(eqend, end, pred);\r
6053                                 end = eqbeg;\r
6054                         }\r
6055                         else\r
6056                         {\r
6057                                 sort(begin, eqbeg, pred);\r
6058                                 begin = eqend;\r
6059                         }\r
6060                 }\r
6061 \r
6062                 // insertion sort small chunk\r
6063                 if (begin != end) insertion_sort(begin, end, pred, &*begin);\r
6064         }\r
6065 PUGI__NS_END\r
6066 \r
6067 // Allocator used for AST and evaluation stacks\r
6068 PUGI__NS_BEGIN\r
6069         struct xpath_memory_block\r
6070         {       \r
6071                 xpath_memory_block* next;\r
6072 \r
6073                 char data[\r
6074         #ifdef PUGIXML_MEMORY_XPATH_PAGE_SIZE\r
6075                         PUGIXML_MEMORY_XPATH_PAGE_SIZE\r
6076         #else\r
6077                         4096\r
6078         #endif\r
6079                 ];\r
6080         };\r
6081                 \r
6082         class xpath_allocator\r
6083         {\r
6084                 xpath_memory_block* _root;\r
6085                 size_t _root_size;\r
6086 \r
6087         public:\r
6088         #ifdef PUGIXML_NO_EXCEPTIONS\r
6089                 jmp_buf* error_handler;\r
6090         #endif\r
6091 \r
6092                 xpath_allocator(xpath_memory_block* root, size_t root_size = 0): _root(root), _root_size(root_size)\r
6093                 {\r
6094                 #ifdef PUGIXML_NO_EXCEPTIONS\r
6095                         error_handler = 0;\r
6096                 #endif\r
6097                 }\r
6098                 \r
6099                 void* allocate_nothrow(size_t size)\r
6100                 {\r
6101                         const size_t block_capacity = sizeof(_root->data);\r
6102 \r
6103                         // align size so that we're able to store pointers in subsequent blocks\r
6104                         size = (size + sizeof(void*) - 1) & ~(sizeof(void*) - 1);\r
6105 \r
6106                         if (_root_size + size <= block_capacity)\r
6107                         {\r
6108                                 void* buf = _root->data + _root_size;\r
6109                                 _root_size += size;\r
6110                                 return buf;\r
6111                         }\r
6112                         else\r
6113                         {\r
6114                                 size_t block_data_size = (size > block_capacity) ? size : block_capacity;\r
6115                                 size_t block_size = block_data_size + offsetof(xpath_memory_block, data);\r
6116 \r
6117                                 xpath_memory_block* block = static_cast<xpath_memory_block*>(xml_memory::allocate(block_size));\r
6118                                 if (!block) return 0;\r
6119                                 \r
6120                                 block->next = _root;\r
6121                                 \r
6122                                 _root = block;\r
6123                                 _root_size = size;\r
6124                                 \r
6125                                 return block->data;\r
6126                         }\r
6127                 }\r
6128 \r
6129                 void* allocate(size_t size)\r
6130                 {\r
6131                         void* result = allocate_nothrow(size);\r
6132 \r
6133                         if (!result)\r
6134                         {\r
6135                         #ifdef PUGIXML_NO_EXCEPTIONS\r
6136                                 assert(error_handler);\r
6137                                 longjmp(*error_handler, 1);\r
6138                         #else\r
6139                                 throw std::bad_alloc();\r
6140                         #endif\r
6141                         }\r
6142 \r
6143                         return result;\r
6144                 }\r
6145 \r
6146                 void* reallocate(void* ptr, size_t old_size, size_t new_size)\r
6147                 {\r
6148                         // align size so that we're able to store pointers in subsequent blocks\r
6149                         old_size = (old_size + sizeof(void*) - 1) & ~(sizeof(void*) - 1);\r
6150                         new_size = (new_size + sizeof(void*) - 1) & ~(sizeof(void*) - 1);\r
6151 \r
6152                         // we can only reallocate the last object\r
6153                         assert(ptr == 0 || static_cast<char*>(ptr) + old_size == _root->data + _root_size);\r
6154 \r
6155                         // adjust root size so that we have not allocated the object at all\r
6156                         bool only_object = (_root_size == old_size);\r
6157 \r
6158                         if (ptr) _root_size -= old_size;\r
6159 \r
6160                         // allocate a new version (this will obviously reuse the memory if possible)\r
6161                         void* result = allocate(new_size);\r
6162                         assert(result);\r
6163 \r
6164                         // we have a new block\r
6165                         if (result != ptr && ptr)\r
6166                         {\r
6167                                 // copy old data\r
6168                                 assert(new_size >= old_size);\r
6169                                 memcpy(result, ptr, old_size);\r
6170 \r
6171                                 // free the previous page if it had no other objects\r
6172                                 if (only_object)\r
6173                                 {\r
6174                                         assert(_root->data == result);\r
6175                                         assert(_root->next);\r
6176 \r
6177                                         xpath_memory_block* next = _root->next->next;\r
6178 \r
6179                                         if (next)\r
6180                                         {\r
6181                                                 // deallocate the whole page, unless it was the first one\r
6182                                                 xml_memory::deallocate(_root->next);\r
6183                                                 _root->next = next;\r
6184                                         }\r
6185                                 }\r
6186                         }\r
6187 \r
6188                         return result;\r
6189                 }\r
6190 \r
6191                 void revert(const xpath_allocator& state)\r
6192                 {\r
6193                         // free all new pages\r
6194                         xpath_memory_block* cur = _root;\r
6195 \r
6196                         while (cur != state._root)\r
6197                         {\r
6198                                 xpath_memory_block* next = cur->next;\r
6199 \r
6200                                 xml_memory::deallocate(cur);\r
6201 \r
6202                                 cur = next;\r
6203                         }\r
6204 \r
6205                         // restore state\r
6206                         _root = state._root;\r
6207                         _root_size = state._root_size;\r
6208                 }\r
6209 \r
6210                 void release()\r
6211                 {\r
6212                         xpath_memory_block* cur = _root;\r
6213                         assert(cur);\r
6214 \r
6215                         while (cur->next)\r
6216                         {\r
6217                                 xpath_memory_block* next = cur->next;\r
6218 \r
6219                                 xml_memory::deallocate(cur);\r
6220 \r
6221                                 cur = next;\r
6222                         }\r
6223                 }\r
6224         };\r
6225 \r
6226         struct xpath_allocator_capture\r
6227         {\r
6228                 xpath_allocator_capture(xpath_allocator* alloc): _target(alloc), _state(*alloc)\r
6229                 {\r
6230                 }\r
6231 \r
6232                 ~xpath_allocator_capture()\r
6233                 {\r
6234                         _target->revert(_state);\r
6235                 }\r
6236 \r
6237                 xpath_allocator* _target;\r
6238                 xpath_allocator _state;\r
6239         };\r
6240 \r
6241         struct xpath_stack\r
6242         {\r
6243                 xpath_allocator* result;\r
6244                 xpath_allocator* temp;\r
6245         };\r
6246 \r
6247         struct xpath_stack_data\r
6248         {\r
6249                 xpath_memory_block blocks[2];\r
6250                 xpath_allocator result;\r
6251                 xpath_allocator temp;\r
6252                 xpath_stack stack;\r
6253 \r
6254         #ifdef PUGIXML_NO_EXCEPTIONS\r
6255                 jmp_buf error_handler;\r
6256         #endif\r
6257 \r
6258                 xpath_stack_data(): result(blocks + 0), temp(blocks + 1)\r
6259                 {\r
6260                         blocks[0].next = blocks[1].next = 0;\r
6261 \r
6262                         stack.result = &result;\r
6263                         stack.temp = &temp;\r
6264 \r
6265                 #ifdef PUGIXML_NO_EXCEPTIONS\r
6266                         result.error_handler = temp.error_handler = &error_handler;\r
6267                 #endif\r
6268                 }\r
6269 \r
6270                 ~xpath_stack_data()\r
6271                 {\r
6272                         result.release();\r
6273                         temp.release();\r
6274                 }\r
6275         };\r
6276 PUGI__NS_END\r
6277 \r
6278 // String class\r
6279 PUGI__NS_BEGIN\r
6280         class xpath_string\r
6281         {\r
6282                 const char_t* _buffer;\r
6283                 bool _uses_heap;\r
6284 \r
6285                 static char_t* duplicate_string(const char_t* string, size_t length, xpath_allocator* alloc)\r
6286                 {\r
6287                         char_t* result = static_cast<char_t*>(alloc->allocate((length + 1) * sizeof(char_t)));\r
6288                         assert(result);\r
6289 \r
6290                         memcpy(result, string, length * sizeof(char_t));\r
6291                         result[length] = 0;\r
6292 \r
6293                         return result;\r
6294                 }\r
6295 \r
6296                 static char_t* duplicate_string(const char_t* string, xpath_allocator* alloc)\r
6297                 {\r
6298                         return duplicate_string(string, strlength(string), alloc);\r
6299                 }\r
6300 \r
6301         public:\r
6302                 xpath_string(): _buffer(PUGIXML_TEXT("")), _uses_heap(false)\r
6303                 {\r
6304                 }\r
6305 \r
6306                 explicit xpath_string(const char_t* str, xpath_allocator* alloc)\r
6307                 {\r
6308                         bool empty_ = (*str == 0);\r
6309 \r
6310                         _buffer = empty_ ? PUGIXML_TEXT("") : duplicate_string(str, alloc);\r
6311                         _uses_heap = !empty_;\r
6312                 }\r
6313 \r
6314                 explicit xpath_string(const char_t* str, bool use_heap): _buffer(str), _uses_heap(use_heap)\r
6315                 {\r
6316                 }\r
6317 \r
6318                 xpath_string(const char_t* begin, const char_t* end, xpath_allocator* alloc)\r
6319                 {\r
6320                         assert(begin <= end);\r
6321 \r
6322                         bool empty_ = (begin == end);\r
6323 \r
6324                         _buffer = empty_ ? PUGIXML_TEXT("") : duplicate_string(begin, static_cast<size_t>(end - begin), alloc);\r
6325                         _uses_heap = !empty_;\r
6326                 }\r
6327 \r
6328                 void append(const xpath_string& o, xpath_allocator* alloc)\r
6329                 {\r
6330                         // skip empty sources\r
6331                         if (!*o._buffer) return;\r
6332 \r
6333                         // fast append for constant empty target and constant source\r
6334                         if (!*_buffer && !_uses_heap && !o._uses_heap)\r
6335                         {\r
6336                                 _buffer = o._buffer;\r
6337                         }\r
6338                         else\r
6339                         {\r
6340                                 // need to make heap copy\r
6341                                 size_t target_length = strlength(_buffer);\r
6342                                 size_t source_length = strlength(o._buffer);\r
6343                                 size_t result_length = target_length + source_length;\r
6344 \r
6345                                 // allocate new buffer\r
6346                                 char_t* result = static_cast<char_t*>(alloc->reallocate(_uses_heap ? const_cast<char_t*>(_buffer) : 0, (target_length + 1) * sizeof(char_t), (result_length + 1) * sizeof(char_t)));\r
6347                                 assert(result);\r
6348 \r
6349                                 // append first string to the new buffer in case there was no reallocation\r
6350                                 if (!_uses_heap) memcpy(result, _buffer, target_length * sizeof(char_t));\r
6351 \r
6352                                 // append second string to the new buffer\r
6353                                 memcpy(result + target_length, o._buffer, source_length * sizeof(char_t));\r
6354                                 result[result_length] = 0;\r
6355 \r
6356                                 // finalize\r
6357                                 _buffer = result;\r
6358                                 _uses_heap = true;\r
6359                         }\r
6360                 }\r
6361 \r
6362                 const char_t* c_str() const\r
6363                 {\r
6364                         return _buffer;\r
6365                 }\r
6366 \r
6367                 size_t length() const\r
6368                 {\r
6369                         return strlength(_buffer);\r
6370                 }\r
6371                 \r
6372                 char_t* data(xpath_allocator* alloc)\r
6373                 {\r
6374                         // make private heap copy\r
6375                         if (!_uses_heap)\r
6376                         {\r
6377                                 _buffer = duplicate_string(_buffer, alloc);\r
6378                                 _uses_heap = true;\r
6379                         }\r
6380 \r
6381                         return const_cast<char_t*>(_buffer);\r
6382                 }\r
6383 \r
6384                 bool empty() const\r
6385                 {\r
6386                         return *_buffer == 0;\r
6387                 }\r
6388 \r
6389                 bool operator==(const xpath_string& o) const\r
6390                 {\r
6391                         return strequal(_buffer, o._buffer);\r
6392                 }\r
6393 \r
6394                 bool operator!=(const xpath_string& o) const\r
6395                 {\r
6396                         return !strequal(_buffer, o._buffer);\r
6397                 }\r
6398 \r
6399                 bool uses_heap() const\r
6400                 {\r
6401                         return _uses_heap;\r
6402                 }\r
6403         };\r
6404 \r
6405         PUGI__FN xpath_string xpath_string_const(const char_t* str)\r
6406         {\r
6407                 return xpath_string(str, false);\r
6408         }\r
6409 PUGI__NS_END\r
6410 \r
6411 PUGI__NS_BEGIN\r
6412         PUGI__FN bool starts_with(const char_t* string, const char_t* pattern)\r
6413         {\r
6414                 while (*pattern && *string == *pattern)\r
6415                 {\r
6416                         string++;\r
6417                         pattern++;\r
6418                 }\r
6419 \r
6420                 return *pattern == 0;\r
6421         }\r
6422 \r
6423         PUGI__FN const char_t* find_char(const char_t* s, char_t c)\r
6424         {\r
6425         #ifdef PUGIXML_WCHAR_MODE\r
6426                 return wcschr(s, c);\r
6427         #else\r
6428                 return strchr(s, c);\r
6429         #endif\r
6430         }\r
6431 \r
6432         PUGI__FN const char_t* find_substring(const char_t* s, const char_t* p)\r
6433         {\r
6434         #ifdef PUGIXML_WCHAR_MODE\r
6435                 // MSVC6 wcsstr bug workaround (if s is empty it always returns 0)\r
6436                 return (*p == 0) ? s : wcsstr(s, p);\r
6437         #else\r
6438                 return strstr(s, p);\r
6439         #endif\r
6440         }\r
6441 \r
6442         // Converts symbol to lower case, if it is an ASCII one\r
6443         PUGI__FN char_t tolower_ascii(char_t ch)\r
6444         {\r
6445                 return static_cast<unsigned int>(ch - 'A') < 26 ? static_cast<char_t>(ch | ' ') : ch;\r
6446         }\r
6447 \r
6448         PUGI__FN xpath_string string_value(const xpath_node& na, xpath_allocator* alloc)\r
6449         {\r
6450                 if (na.attribute())\r
6451                         return xpath_string_const(na.attribute().value());\r
6452                 else\r
6453                 {\r
6454                         const xml_node& n = na.node();\r
6455 \r
6456                         switch (n.type())\r
6457                         {\r
6458                         case node_pcdata:\r
6459                         case node_cdata:\r
6460                         case node_comment:\r
6461                         case node_pi:\r
6462                                 return xpath_string_const(n.value());\r
6463                         \r
6464                         case node_document:\r
6465                         case node_element:\r
6466                         {\r
6467                                 xpath_string result;\r
6468 \r
6469                                 xml_node cur = n.first_child();\r
6470                                 \r
6471                                 while (cur && cur != n)\r
6472                                 {\r
6473                                         if (cur.type() == node_pcdata || cur.type() == node_cdata)\r
6474                                                 result.append(xpath_string_const(cur.value()), alloc);\r
6475 \r
6476                                         if (cur.first_child())\r
6477                                                 cur = cur.first_child();\r
6478                                         else if (cur.next_sibling())\r
6479                                                 cur = cur.next_sibling();\r
6480                                         else\r
6481                                         {\r
6482                                                 while (!cur.next_sibling() && cur != n)\r
6483                                                         cur = cur.parent();\r
6484 \r
6485                                                 if (cur != n) cur = cur.next_sibling();\r
6486                                         }\r
6487                                 }\r
6488                                 \r
6489                                 return result;\r
6490                         }\r
6491                         \r
6492                         default:\r
6493                                 return xpath_string();\r
6494                         }\r
6495                 }\r
6496         }\r
6497         \r
6498         PUGI__FN unsigned int node_height(xml_node n)\r
6499         {\r
6500                 unsigned int result = 0;\r
6501                 \r
6502                 while (n)\r
6503                 {\r
6504                         ++result;\r
6505                         n = n.parent();\r
6506                 }\r
6507                 \r
6508                 return result;\r
6509         }\r
6510         \r
6511         PUGI__FN bool node_is_before(xml_node ln, unsigned int lh, xml_node rn, unsigned int rh)\r
6512         {\r
6513                 // normalize heights\r
6514                 for (unsigned int i = rh; i < lh; i++) ln = ln.parent();\r
6515                 for (unsigned int j = lh; j < rh; j++) rn = rn.parent();\r
6516                 \r
6517                 // one node is the ancestor of the other\r
6518                 if (ln == rn) return lh < rh;\r
6519                 \r
6520                 // find common ancestor\r
6521                 while (ln.parent() != rn.parent())\r
6522                 {\r
6523                         ln = ln.parent();\r
6524                         rn = rn.parent();\r
6525                 }\r
6526 \r
6527                 // there is no common ancestor (the shared parent is null), nodes are from different documents\r
6528                 if (!ln.parent()) return ln < rn;\r
6529 \r
6530                 // determine sibling order\r
6531                 for (; ln; ln = ln.next_sibling())\r
6532                         if (ln == rn)\r
6533                                 return true;\r
6534                                 \r
6535                 return false;\r
6536         }\r
6537 \r
6538         PUGI__FN bool node_is_ancestor(xml_node parent, xml_node node)\r
6539         {\r
6540                 while (node && node != parent) node = node.parent();\r
6541 \r
6542                 return parent && node == parent;\r
6543         }\r
6544 \r
6545         PUGI__FN const void* document_order(const xpath_node& xnode)\r
6546         {\r
6547                 xml_node_struct* node = xnode.node().internal_object();\r
6548 \r
6549                 if (node)\r
6550                 {\r
6551                         if (node->name && (node->header & xml_memory_page_name_allocated_mask) == 0) return node->name;\r
6552                         if (node->value && (node->header & xml_memory_page_value_allocated_mask) == 0) return node->value;\r
6553                         return 0;\r
6554                 }\r
6555 \r
6556                 xml_attribute_struct* attr = xnode.attribute().internal_object();\r
6557 \r
6558                 if (attr)\r
6559                 {\r
6560                         if ((attr->header & xml_memory_page_name_allocated_mask) == 0) return attr->name;\r
6561                         if ((attr->header & xml_memory_page_value_allocated_mask) == 0) return attr->value;\r
6562                         return 0;\r
6563                 }\r
6564 \r
6565                 return 0;\r
6566         }\r
6567         \r
6568         struct document_order_comparator\r
6569         {\r
6570                 bool operator()(const xpath_node& lhs, const xpath_node& rhs) const\r
6571                 {\r
6572                         // optimized document order based check\r
6573                         const void* lo = document_order(lhs);\r
6574                         const void* ro = document_order(rhs);\r
6575 \r
6576                         if (lo && ro) return lo < ro;\r
6577 \r
6578                         // slow comparison\r
6579                         xml_node ln = lhs.node(), rn = rhs.node();\r
6580 \r
6581                         // compare attributes\r
6582                         if (lhs.attribute() && rhs.attribute())\r
6583                         {\r
6584                                 // shared parent\r
6585                                 if (lhs.parent() == rhs.parent())\r
6586                                 {\r
6587                                         // determine sibling order\r
6588                                         for (xml_attribute a = lhs.attribute(); a; a = a.next_attribute())\r
6589                                                 if (a == rhs.attribute())\r
6590                                                         return true;\r
6591                                         \r
6592                                         return false;\r
6593                                 }\r
6594                                 \r
6595                                 // compare attribute parents\r
6596                                 ln = lhs.parent();\r
6597                                 rn = rhs.parent();\r
6598                         }\r
6599                         else if (lhs.attribute())\r
6600                         {\r
6601                                 // attributes go after the parent element\r
6602                                 if (lhs.parent() == rhs.node()) return false;\r
6603                                 \r
6604                                 ln = lhs.parent();\r
6605                         }\r
6606                         else if (rhs.attribute())\r
6607                         {\r
6608                                 // attributes go after the parent element\r
6609                                 if (rhs.parent() == lhs.node()) return true;\r
6610                                 \r
6611                                 rn = rhs.parent();\r
6612                         }\r
6613 \r
6614                         if (ln == rn) return false;\r
6615                         \r
6616                         unsigned int lh = node_height(ln);\r
6617                         unsigned int rh = node_height(rn);\r
6618                         \r
6619                         return node_is_before(ln, lh, rn, rh);\r
6620                 }\r
6621         };\r
6622 \r
6623         struct duplicate_comparator\r
6624         {\r
6625                 bool operator()(const xpath_node& lhs, const xpath_node& rhs) const\r
6626                 {\r
6627                         if (lhs.attribute()) return rhs.attribute() ? lhs.attribute() < rhs.attribute() : true;\r
6628                         else return rhs.attribute() ? false : lhs.node() < rhs.node();\r
6629                 }\r
6630         };\r
6631         \r
6632         PUGI__FN double gen_nan()\r
6633         {\r
6634         #if defined(__STDC_IEC_559__) || ((FLT_RADIX - 0 == 2) && (FLT_MAX_EXP - 0 == 128) && (FLT_MANT_DIG - 0 == 24))\r
6635                 union { float f; uint32_t i; } u[sizeof(float) == sizeof(uint32_t) ? 1 : -1];\r
6636                 u[0].i = 0x7fc00000;\r
6637                 return u[0].f;\r
6638         #else\r
6639                 // fallback\r
6640                 const volatile double zero = 0.0;\r
6641                 return zero / zero;\r
6642         #endif\r
6643         }\r
6644         \r
6645         PUGI__FN bool is_nan(double value)\r
6646         {\r
6647         #if defined(PUGI__MSVC_CRT_VERSION) || defined(__BORLANDC__)\r
6648                 return !!_isnan(value);\r
6649         #elif defined(fpclassify) && defined(FP_NAN)\r
6650                 return fpclassify(value) == FP_NAN;\r
6651         #else\r
6652                 // fallback\r
6653                 const volatile double v = value;\r
6654                 return v != v;\r
6655         #endif\r
6656         }\r
6657         \r
6658         PUGI__FN const char_t* convert_number_to_string_special(double value)\r
6659         {\r
6660         #if defined(PUGI__MSVC_CRT_VERSION) || defined(__BORLANDC__)\r
6661                 if (_finite(value)) return (value == 0) ? PUGIXML_TEXT("0") : 0;\r
6662                 if (_isnan(value)) return PUGIXML_TEXT("NaN");\r
6663                 return value > 0 ? PUGIXML_TEXT("Infinity") : PUGIXML_TEXT("-Infinity");\r
6664         #elif defined(fpclassify) && defined(FP_NAN) && defined(FP_INFINITE) && defined(FP_ZERO)\r
6665                 switch (fpclassify(value))\r
6666                 {\r
6667                 case FP_NAN:\r
6668                         return PUGIXML_TEXT("NaN");\r
6669 \r
6670                 case FP_INFINITE:\r
6671                         return value > 0 ? PUGIXML_TEXT("Infinity") : PUGIXML_TEXT("-Infinity");\r
6672 \r
6673                 case FP_ZERO:\r
6674                         return PUGIXML_TEXT("0");\r
6675 \r
6676                 default:\r
6677                         return 0;\r
6678                 }\r
6679         #else\r
6680                 // fallback\r
6681                 const volatile double v = value;\r
6682 \r
6683                 if (v == 0) return PUGIXML_TEXT("0");\r
6684                 if (v != v) return PUGIXML_TEXT("NaN");\r
6685                 if (v * 2 == v) return value > 0 ? PUGIXML_TEXT("Infinity") : PUGIXML_TEXT("-Infinity");\r
6686                 return 0;\r
6687         #endif\r
6688         }\r
6689         \r
6690         PUGI__FN bool convert_number_to_boolean(double value)\r
6691         {\r
6692                 return (value != 0 && !is_nan(value));\r
6693         }\r
6694         \r
6695         PUGI__FN void truncate_zeros(char* begin, char* end)\r
6696         {\r
6697                 while (begin != end && end[-1] == '0') end--;\r
6698 \r
6699                 *end = 0;\r
6700         }\r
6701 \r
6702         // gets mantissa digits in the form of 0.xxxxx with 0. implied and the exponent\r
6703 #if defined(PUGI__MSVC_CRT_VERSION) && PUGI__MSVC_CRT_VERSION >= 1400 && !defined(_WIN32_WCE)\r
6704         PUGI__FN void convert_number_to_mantissa_exponent(double value, char* buffer, size_t buffer_size, char** out_mantissa, int* out_exponent)\r
6705         {\r
6706                 // get base values\r
6707                 int sign, exponent;\r
6708                 _ecvt_s(buffer, buffer_size, value, DBL_DIG + 1, &exponent, &sign);\r
6709 \r
6710                 // truncate redundant zeros\r
6711                 truncate_zeros(buffer, buffer + strlen(buffer));\r
6712 \r
6713                 // fill results\r
6714                 *out_mantissa = buffer;\r
6715                 *out_exponent = exponent;\r
6716         }\r
6717 #else\r
6718         PUGI__FN void convert_number_to_mantissa_exponent(double value, char* buffer, size_t buffer_size, char** out_mantissa, int* out_exponent)\r
6719         {\r
6720                 // get a scientific notation value with IEEE DBL_DIG decimals\r
6721                 sprintf(buffer, "%.*e", DBL_DIG, value);\r
6722                 assert(strlen(buffer) < buffer_size);\r
6723                 (void)!buffer_size;\r
6724 \r
6725                 // get the exponent (possibly negative)\r
6726                 char* exponent_string = strchr(buffer, 'e');\r
6727                 assert(exponent_string);\r
6728 \r
6729                 int exponent = atoi(exponent_string + 1);\r
6730 \r
6731                 // extract mantissa string: skip sign\r
6732                 char* mantissa = buffer[0] == '-' ? buffer + 1 : buffer;\r
6733                 assert(mantissa[0] != '0' && mantissa[1] == '.');\r
6734 \r
6735                 // divide mantissa by 10 to eliminate integer part\r
6736                 mantissa[1] = mantissa[0];\r
6737                 mantissa++;\r
6738                 exponent++;\r
6739 \r
6740                 // remove extra mantissa digits and zero-terminate mantissa\r
6741                 truncate_zeros(mantissa, exponent_string);\r
6742 \r
6743                 // fill results\r
6744                 *out_mantissa = mantissa;\r
6745                 *out_exponent = exponent;\r
6746         }\r
6747 #endif\r
6748 \r
6749         PUGI__FN xpath_string convert_number_to_string(double value, xpath_allocator* alloc)\r
6750         {\r
6751                 // try special number conversion\r
6752                 const char_t* special = convert_number_to_string_special(value);\r
6753                 if (special) return xpath_string_const(special);\r
6754 \r
6755                 // get mantissa + exponent form\r
6756                 char mantissa_buffer[32];\r
6757 \r
6758                 char* mantissa;\r
6759                 int exponent;\r
6760                 convert_number_to_mantissa_exponent(value, mantissa_buffer, sizeof(mantissa_buffer), &mantissa, &exponent);\r
6761 \r
6762                 // allocate a buffer of suitable length for the number\r
6763                 size_t result_size = strlen(mantissa_buffer) + (exponent > 0 ? exponent : -exponent) + 4;\r
6764                 char_t* result = static_cast<char_t*>(alloc->allocate(sizeof(char_t) * result_size));\r
6765                 assert(result);\r
6766 \r
6767                 // make the number!\r
6768                 char_t* s = result;\r
6769 \r
6770                 // sign\r
6771                 if (value < 0) *s++ = '-';\r
6772 \r
6773                 // integer part\r
6774                 if (exponent <= 0)\r
6775                 {\r
6776                         *s++ = '0';\r
6777                 }\r
6778                 else\r
6779                 {\r
6780                         while (exponent > 0)\r
6781                         {\r
6782                                 assert(*mantissa == 0 || static_cast<unsigned int>(static_cast<unsigned int>(*mantissa) - '0') <= 9);\r
6783                                 *s++ = *mantissa ? *mantissa++ : '0';\r
6784                                 exponent--;\r
6785                         }\r
6786                 }\r
6787 \r
6788                 // fractional part\r
6789                 if (*mantissa)\r
6790                 {\r
6791                         // decimal point\r
6792                         *s++ = '.';\r
6793 \r
6794                         // extra zeroes from negative exponent\r
6795                         while (exponent < 0)\r
6796                         {\r
6797                                 *s++ = '0';\r
6798                                 exponent++;\r
6799                         }\r
6800 \r
6801                         // extra mantissa digits\r
6802                         while (*mantissa)\r
6803                         {\r
6804                                 assert(static_cast<unsigned int>(*mantissa - '0') <= 9);\r
6805                                 *s++ = *mantissa++;\r
6806                         }\r
6807                 }\r
6808 \r
6809                 // zero-terminate\r
6810                 assert(s < result + result_size);\r
6811                 *s = 0;\r
6812 \r
6813                 return xpath_string(result, true);\r
6814         }\r
6815         \r
6816         PUGI__FN bool check_string_to_number_format(const char_t* string)\r
6817         {\r
6818                 // parse leading whitespace\r
6819                 while (PUGI__IS_CHARTYPE(*string, ct_space)) ++string;\r
6820 \r
6821                 // parse sign\r
6822                 if (*string == '-') ++string;\r
6823 \r
6824                 if (!*string) return false;\r
6825 \r
6826                 // if there is no integer part, there should be a decimal part with at least one digit\r
6827                 if (!PUGI__IS_CHARTYPEX(string[0], ctx_digit) && (string[0] != '.' || !PUGI__IS_CHARTYPEX(string[1], ctx_digit))) return false;\r
6828 \r
6829                 // parse integer part\r
6830                 while (PUGI__IS_CHARTYPEX(*string, ctx_digit)) ++string;\r
6831 \r
6832                 // parse decimal part\r
6833                 if (*string == '.')\r
6834                 {\r
6835                         ++string;\r
6836 \r
6837                         while (PUGI__IS_CHARTYPEX(*string, ctx_digit)) ++string;\r
6838                 }\r
6839 \r
6840                 // parse trailing whitespace\r
6841                 while (PUGI__IS_CHARTYPE(*string, ct_space)) ++string;\r
6842 \r
6843                 return *string == 0;\r
6844         }\r
6845 \r
6846         PUGI__FN double convert_string_to_number(const char_t* string)\r
6847         {\r
6848                 // check string format\r
6849                 if (!check_string_to_number_format(string)) return gen_nan();\r
6850 \r
6851                 // parse string\r
6852         #ifdef PUGIXML_WCHAR_MODE\r
6853                 return wcstod(string, 0);\r
6854         #else\r
6855                 return atof(string);\r
6856         #endif\r
6857         }\r
6858 \r
6859         PUGI__FN bool convert_string_to_number_scratch(char_t (&buffer)[32], const char_t* begin, const char_t* end, double* out_result)\r
6860         {\r
6861                 size_t length = static_cast<size_t>(end - begin);\r
6862                 char_t* scratch = buffer;\r
6863 \r
6864                 if (length >= sizeof(buffer) / sizeof(buffer[0]))\r
6865                 {\r
6866                         // need to make dummy on-heap copy\r
6867                         scratch = static_cast<char_t*>(xml_memory::allocate((length + 1) * sizeof(char_t)));\r
6868                         if (!scratch) return false;\r
6869                 }\r
6870 \r
6871                 // copy string to zero-terminated buffer and perform conversion\r
6872                 memcpy(scratch, begin, length * sizeof(char_t));\r
6873                 scratch[length] = 0;\r
6874 \r
6875                 *out_result = convert_string_to_number(scratch);\r
6876 \r
6877                 // free dummy buffer\r
6878                 if (scratch != buffer) xml_memory::deallocate(scratch);\r
6879 \r
6880                 return true;\r
6881         }\r
6882         \r
6883         PUGI__FN double round_nearest(double value)\r
6884         {\r
6885                 return floor(value + 0.5);\r
6886         }\r
6887 \r
6888         PUGI__FN double round_nearest_nzero(double value)\r
6889         {\r
6890                 // same as round_nearest, but returns -0 for [-0.5, -0]\r
6891                 // ceil is used to differentiate between +0 and -0 (we return -0 for [-0.5, -0] and +0 for +0)\r
6892                 return (value >= -0.5 && value <= 0) ? ceil(value) : floor(value + 0.5);\r
6893         }\r
6894         \r
6895         PUGI__FN const char_t* qualified_name(const xpath_node& node)\r
6896         {\r
6897                 return node.attribute() ? node.attribute().name() : node.node().name();\r
6898         }\r
6899         \r
6900         PUGI__FN const char_t* local_name(const xpath_node& node)\r
6901         {\r
6902                 const char_t* name = qualified_name(node);\r
6903                 const char_t* p = find_char(name, ':');\r
6904                 \r
6905                 return p ? p + 1 : name;\r
6906         }\r
6907 \r
6908         struct namespace_uri_predicate\r
6909         {\r
6910                 const char_t* prefix;\r
6911                 size_t prefix_length;\r
6912 \r
6913                 namespace_uri_predicate(const char_t* name)\r
6914                 {\r
6915                         const char_t* pos = find_char(name, ':');\r
6916 \r
6917                         prefix = pos ? name : 0;\r
6918                         prefix_length = pos ? static_cast<size_t>(pos - name) : 0;\r
6919                 }\r
6920 \r
6921                 bool operator()(const xml_attribute& a) const\r
6922                 {\r
6923                         const char_t* name = a.name();\r
6924 \r
6925                         if (!starts_with(name, PUGIXML_TEXT("xmlns"))) return false;\r
6926 \r
6927                         return prefix ? name[5] == ':' && strequalrange(name + 6, prefix, prefix_length) : name[5] == 0;\r
6928                 }\r
6929         };\r
6930 \r
6931         PUGI__FN const char_t* namespace_uri(const xml_node& node)\r
6932         {\r
6933                 namespace_uri_predicate pred = node.name();\r
6934                 \r
6935                 xml_node p = node;\r
6936                 \r
6937                 while (p)\r
6938                 {\r
6939                         xml_attribute a = p.find_attribute(pred);\r
6940                         \r
6941                         if (a) return a.value();\r
6942                         \r
6943                         p = p.parent();\r
6944                 }\r
6945                 \r
6946                 return PUGIXML_TEXT("");\r
6947         }\r
6948 \r
6949         PUGI__FN const char_t* namespace_uri(const xml_attribute& attr, const xml_node& parent)\r
6950         {\r
6951                 namespace_uri_predicate pred = attr.name();\r
6952                 \r
6953                 // Default namespace does not apply to attributes\r
6954                 if (!pred.prefix) return PUGIXML_TEXT("");\r
6955                 \r
6956                 xml_node p = parent;\r
6957                 \r
6958                 while (p)\r
6959                 {\r
6960                         xml_attribute a = p.find_attribute(pred);\r
6961                         \r
6962                         if (a) return a.value();\r
6963                         \r
6964                         p = p.parent();\r
6965                 }\r
6966                 \r
6967                 return PUGIXML_TEXT("");\r
6968         }\r
6969 \r
6970         PUGI__FN const char_t* namespace_uri(const xpath_node& node)\r
6971         {\r
6972                 return node.attribute() ? namespace_uri(node.attribute(), node.parent()) : namespace_uri(node.node());\r
6973         }\r
6974 \r
6975         PUGI__FN void normalize_space(char_t* buffer)\r
6976         {\r
6977                 char_t* write = buffer;\r
6978 \r
6979                 for (char_t* it = buffer; *it; )\r
6980                 {\r
6981                         char_t ch = *it++;\r
6982 \r
6983                         if (PUGI__IS_CHARTYPE(ch, ct_space))\r
6984                         {\r
6985                                 // replace whitespace sequence with single space\r
6986                                 while (PUGI__IS_CHARTYPE(*it, ct_space)) it++;\r
6987 \r
6988                                 // avoid leading spaces\r
6989                                 if (write != buffer) *write++ = ' ';\r
6990                         }\r
6991                         else *write++ = ch;\r
6992                 }\r
6993 \r
6994                 // remove trailing space\r
6995                 if (write != buffer && PUGI__IS_CHARTYPE(write[-1], ct_space)) write--;\r
6996 \r
6997                 // zero-terminate\r
6998                 *write = 0;\r
6999         }\r
7000 \r
7001         PUGI__FN void translate(char_t* buffer, const char_t* from, const char_t* to)\r
7002         {\r
7003                 size_t to_length = strlength(to);\r
7004 \r
7005                 char_t* write = buffer;\r
7006 \r
7007                 while (*buffer)\r
7008                 {\r
7009                         PUGI__DMC_VOLATILE char_t ch = *buffer++;\r
7010 \r
7011                         const char_t* pos = find_char(from, ch);\r
7012 \r
7013                         if (!pos)\r
7014                                 *write++ = ch; // do not process\r
7015                         else if (static_cast<size_t>(pos - from) < to_length)\r
7016                                 *write++ = to[pos - from]; // replace\r
7017                 }\r
7018 \r
7019                 // zero-terminate\r
7020                 *write = 0;\r
7021         }\r
7022 \r
7023         struct xpath_variable_boolean: xpath_variable\r
7024         {\r
7025                 xpath_variable_boolean(): value(false)\r
7026                 {\r
7027                 }\r
7028 \r
7029                 bool value;\r
7030                 char_t name[1];\r
7031         };\r
7032 \r
7033         struct xpath_variable_number: xpath_variable\r
7034         {\r
7035                 xpath_variable_number(): value(0)\r
7036                 {\r
7037                 }\r
7038 \r
7039                 double value;\r
7040                 char_t name[1];\r
7041         };\r
7042 \r
7043         struct xpath_variable_string: xpath_variable\r
7044         {\r
7045                 xpath_variable_string(): value(0)\r
7046                 {\r
7047                 }\r
7048 \r
7049                 ~xpath_variable_string()\r
7050                 {\r
7051                         if (value) xml_memory::deallocate(value);\r
7052                 }\r
7053 \r
7054                 char_t* value;\r
7055                 char_t name[1];\r
7056         };\r
7057 \r
7058         struct xpath_variable_node_set: xpath_variable\r
7059         {\r
7060                 xpath_node_set value;\r
7061                 char_t name[1];\r
7062         };\r
7063 \r
7064         static const xpath_node_set dummy_node_set;\r
7065 \r
7066         PUGI__FN unsigned int hash_string(const char_t* str)\r
7067         {\r
7068                 // Jenkins one-at-a-time hash (http://en.wikipedia.org/wiki/Jenkins_hash_function#one-at-a-time)\r
7069                 unsigned int result = 0;\r
7070 \r
7071                 while (*str)\r
7072                 {\r
7073                         result += static_cast<unsigned int>(*str++);\r
7074                         result += result << 10;\r
7075                         result ^= result >> 6;\r
7076                 }\r
7077         \r
7078                 result += result << 3;\r
7079                 result ^= result >> 11;\r
7080                 result += result << 15;\r
7081         \r
7082                 return result;\r
7083         }\r
7084 \r
7085         template <typename T> PUGI__FN T* new_xpath_variable(const char_t* name)\r
7086         {\r
7087                 size_t length = strlength(name);\r
7088                 if (length == 0) return 0; // empty variable names are invalid\r
7089 \r
7090                 // $$ we can't use offsetof(T, name) because T is non-POD, so we just allocate additional length characters\r
7091                 void* memory = xml_memory::allocate(sizeof(T) + length * sizeof(char_t));\r
7092                 if (!memory) return 0;\r
7093 \r
7094                 T* result = new (memory) T();\r
7095 \r
7096                 memcpy(result->name, name, (length + 1) * sizeof(char_t));\r
7097 \r
7098                 return result;\r
7099         }\r
7100 \r
7101         PUGI__FN xpath_variable* new_xpath_variable(xpath_value_type type, const char_t* name)\r
7102         {\r
7103                 switch (type)\r
7104                 {\r
7105                 case xpath_type_node_set:\r
7106                         return new_xpath_variable<xpath_variable_node_set>(name);\r
7107 \r
7108                 case xpath_type_number:\r
7109                         return new_xpath_variable<xpath_variable_number>(name);\r
7110 \r
7111                 case xpath_type_string:\r
7112                         return new_xpath_variable<xpath_variable_string>(name);\r
7113 \r
7114                 case xpath_type_boolean:\r
7115                         return new_xpath_variable<xpath_variable_boolean>(name);\r
7116 \r
7117                 default:\r
7118                         return 0;\r
7119                 }\r
7120         }\r
7121 \r
7122         template <typename T> PUGI__FN void delete_xpath_variable(T* var)\r
7123         {\r
7124                 var->~T();\r
7125                 xml_memory::deallocate(var);\r
7126         }\r
7127 \r
7128         PUGI__FN void delete_xpath_variable(xpath_value_type type, xpath_variable* var)\r
7129         {\r
7130                 switch (type)\r
7131                 {\r
7132                 case xpath_type_node_set:\r
7133                         delete_xpath_variable(static_cast<xpath_variable_node_set*>(var));\r
7134                         break;\r
7135 \r
7136                 case xpath_type_number:\r
7137                         delete_xpath_variable(static_cast<xpath_variable_number*>(var));\r
7138                         break;\r
7139 \r
7140                 case xpath_type_string:\r
7141                         delete_xpath_variable(static_cast<xpath_variable_string*>(var));\r
7142                         break;\r
7143 \r
7144                 case xpath_type_boolean:\r
7145                         delete_xpath_variable(static_cast<xpath_variable_boolean*>(var));\r
7146                         break;\r
7147 \r
7148                 default:\r
7149                         assert(!"Invalid variable type");\r
7150                 }\r
7151         }\r
7152 \r
7153         PUGI__FN xpath_variable* get_variable_scratch(char_t (&buffer)[32], xpath_variable_set* set, const char_t* begin, const char_t* end)\r
7154         {\r
7155                 size_t length = static_cast<size_t>(end - begin);\r
7156                 char_t* scratch = buffer;\r
7157 \r
7158                 if (length >= sizeof(buffer) / sizeof(buffer[0]))\r
7159                 {\r
7160                         // need to make dummy on-heap copy\r
7161                         scratch = static_cast<char_t*>(xml_memory::allocate((length + 1) * sizeof(char_t)));\r
7162                         if (!scratch) return 0;\r
7163                 }\r
7164 \r
7165                 // copy string to zero-terminated buffer and perform lookup\r
7166                 memcpy(scratch, begin, length * sizeof(char_t));\r
7167                 scratch[length] = 0;\r
7168 \r
7169                 xpath_variable* result = set->get(scratch);\r
7170 \r
7171                 // free dummy buffer\r
7172                 if (scratch != buffer) xml_memory::deallocate(scratch);\r
7173 \r
7174                 return result;\r
7175         }\r
7176 PUGI__NS_END\r
7177 \r
7178 // Internal node set class\r
7179 PUGI__NS_BEGIN\r
7180         PUGI__FN xpath_node_set::type_t xpath_sort(xpath_node* begin, xpath_node* end, xpath_node_set::type_t type, bool rev)\r
7181         {\r
7182                 xpath_node_set::type_t order = rev ? xpath_node_set::type_sorted_reverse : xpath_node_set::type_sorted;\r
7183 \r
7184                 if (type == xpath_node_set::type_unsorted)\r
7185                 {\r
7186                         sort(begin, end, document_order_comparator());\r
7187 \r
7188                         type = xpath_node_set::type_sorted;\r
7189                 }\r
7190                 \r
7191                 if (type != order) reverse(begin, end);\r
7192                         \r
7193                 return order;\r
7194         }\r
7195 \r
7196         PUGI__FN xpath_node xpath_first(const xpath_node* begin, const xpath_node* end, xpath_node_set::type_t type)\r
7197         {\r
7198                 if (begin == end) return xpath_node();\r
7199 \r
7200                 switch (type)\r
7201                 {\r
7202                 case xpath_node_set::type_sorted:\r
7203                         return *begin;\r
7204 \r
7205                 case xpath_node_set::type_sorted_reverse:\r
7206                         return *(end - 1);\r
7207 \r
7208                 case xpath_node_set::type_unsorted:\r
7209                         return *min_element(begin, end, document_order_comparator());\r
7210 \r
7211                 default:\r
7212                         assert(!"Invalid node set type");\r
7213                         return xpath_node();\r
7214                 }\r
7215         }\r
7216 \r
7217         class xpath_node_set_raw\r
7218         {\r
7219                 xpath_node_set::type_t _type;\r
7220 \r
7221                 xpath_node* _begin;\r
7222                 xpath_node* _end;\r
7223                 xpath_node* _eos;\r
7224 \r
7225         public:\r
7226                 xpath_node_set_raw(): _type(xpath_node_set::type_unsorted), _begin(0), _end(0), _eos(0)\r
7227                 {\r
7228                 }\r
7229 \r
7230                 xpath_node* begin() const\r
7231                 {\r
7232                         return _begin;\r
7233                 }\r
7234 \r
7235                 xpath_node* end() const\r
7236                 {\r
7237                         return _end;\r
7238                 }\r
7239 \r
7240                 bool empty() const\r
7241                 {\r
7242                         return _begin == _end;\r
7243                 }\r
7244 \r
7245                 size_t size() const\r
7246                 {\r
7247                         return static_cast<size_t>(_end - _begin);\r
7248                 }\r
7249 \r
7250                 xpath_node first() const\r
7251                 {\r
7252                         return xpath_first(_begin, _end, _type);\r
7253                 }\r
7254 \r
7255                 void push_back(const xpath_node& node, xpath_allocator* alloc)\r
7256                 {\r
7257                         if (_end == _eos)\r
7258                         {\r
7259                                 size_t capacity = static_cast<size_t>(_eos - _begin);\r
7260 \r
7261                                 // get new capacity (1.5x rule)\r
7262                                 size_t new_capacity = capacity + capacity / 2 + 1;\r
7263 \r
7264                                 // reallocate the old array or allocate a new one\r
7265                                 xpath_node* data = static_cast<xpath_node*>(alloc->reallocate(_begin, capacity * sizeof(xpath_node), new_capacity * sizeof(xpath_node)));\r
7266                                 assert(data);\r
7267 \r
7268                                 // finalize\r
7269                                 _begin = data;\r
7270                                 _end = data + capacity;\r
7271                                 _eos = data + new_capacity;\r
7272                         }\r
7273 \r
7274                         *_end++ = node;\r
7275                 }\r
7276 \r
7277                 void append(const xpath_node* begin_, const xpath_node* end_, xpath_allocator* alloc)\r
7278                 {\r
7279                         size_t size_ = static_cast<size_t>(_end - _begin);\r
7280                         size_t capacity = static_cast<size_t>(_eos - _begin);\r
7281                         size_t count = static_cast<size_t>(end_ - begin_);\r
7282 \r
7283                         if (size_ + count > capacity)\r
7284                         {\r
7285                                 // reallocate the old array or allocate a new one\r
7286                                 xpath_node* data = static_cast<xpath_node*>(alloc->reallocate(_begin, capacity * sizeof(xpath_node), (size_ + count) * sizeof(xpath_node)));\r
7287                                 assert(data);\r
7288 \r
7289                                 // finalize\r
7290                                 _begin = data;\r
7291                                 _end = data + size_;\r
7292                                 _eos = data + size_ + count;\r
7293                         }\r
7294 \r
7295                         memcpy(_end, begin_, count * sizeof(xpath_node));\r
7296                         _end += count;\r
7297                 }\r
7298 \r
7299                 void sort_do()\r
7300                 {\r
7301                         _type = xpath_sort(_begin, _end, _type, false);\r
7302                 }\r
7303 \r
7304                 void truncate(xpath_node* pos)\r
7305                 {\r
7306                         assert(_begin <= pos && pos <= _end);\r
7307 \r
7308                         _end = pos;\r
7309                 }\r
7310 \r
7311                 void remove_duplicates()\r
7312                 {\r
7313                         if (_type == xpath_node_set::type_unsorted)\r
7314                                 sort(_begin, _end, duplicate_comparator());\r
7315                 \r
7316                         _end = unique(_begin, _end);\r
7317                 }\r
7318 \r
7319                 xpath_node_set::type_t type() const\r
7320                 {\r
7321                         return _type;\r
7322                 }\r
7323 \r
7324                 void set_type(xpath_node_set::type_t value)\r
7325                 {\r
7326                         _type = value;\r
7327                 }\r
7328         };\r
7329 PUGI__NS_END\r
7330 \r
7331 PUGI__NS_BEGIN\r
7332         struct xpath_context\r
7333         {\r
7334                 xpath_node n;\r
7335                 size_t position, size;\r
7336 \r
7337                 xpath_context(const xpath_node& n_, size_t position_, size_t size_): n(n_), position(position_), size(size_)\r
7338                 {\r
7339                 }\r
7340         };\r
7341 \r
7342         enum lexeme_t\r
7343         {\r
7344                 lex_none = 0,\r
7345                 lex_equal,\r
7346                 lex_not_equal,\r
7347                 lex_less,\r
7348                 lex_greater,\r
7349                 lex_less_or_equal,\r
7350                 lex_greater_or_equal,\r
7351                 lex_plus,\r
7352                 lex_minus,\r
7353                 lex_multiply,\r
7354                 lex_union,\r
7355                 lex_var_ref,\r
7356                 lex_open_brace,\r
7357                 lex_close_brace,\r
7358                 lex_quoted_string,\r
7359                 lex_number,\r
7360                 lex_slash,\r
7361                 lex_double_slash,\r
7362                 lex_open_square_brace,\r
7363                 lex_close_square_brace,\r
7364                 lex_string,\r
7365                 lex_comma,\r
7366                 lex_axis_attribute,\r
7367                 lex_dot,\r
7368                 lex_double_dot,\r
7369                 lex_double_colon,\r
7370                 lex_eof\r
7371         };\r
7372 \r
7373         struct xpath_lexer_string\r
7374         {\r
7375                 const char_t* begin;\r
7376                 const char_t* end;\r
7377 \r
7378                 xpath_lexer_string(): begin(0), end(0)\r
7379                 {\r
7380                 }\r
7381 \r
7382                 bool operator==(const char_t* other) const\r
7383                 {\r
7384                         size_t length = static_cast<size_t>(end - begin);\r
7385 \r
7386                         return strequalrange(other, begin, length);\r
7387                 }\r
7388         };\r
7389 \r
7390         class xpath_lexer\r
7391         {\r
7392                 const char_t* _cur;\r
7393                 const char_t* _cur_lexeme_pos;\r
7394                 xpath_lexer_string _cur_lexeme_contents;\r
7395 \r
7396                 lexeme_t _cur_lexeme;\r
7397 \r
7398         public:\r
7399                 explicit xpath_lexer(const char_t* query): _cur(query)\r
7400                 {\r
7401                         next();\r
7402                 }\r
7403                 \r
7404                 const char_t* state() const\r
7405                 {\r
7406                         return _cur;\r
7407                 }\r
7408                 \r
7409                 void next()\r
7410                 {\r
7411                         const char_t* cur = _cur;\r
7412 \r
7413                         while (PUGI__IS_CHARTYPE(*cur, ct_space)) ++cur;\r
7414 \r
7415                         // save lexeme position for error reporting\r
7416                         _cur_lexeme_pos = cur;\r
7417 \r
7418                         switch (*cur)\r
7419                         {\r
7420                         case 0:\r
7421                                 _cur_lexeme = lex_eof;\r
7422                                 break;\r
7423                         \r
7424                         case '>':\r
7425                                 if (*(cur+1) == '=')\r
7426                                 {\r
7427                                         cur += 2;\r
7428                                         _cur_lexeme = lex_greater_or_equal;\r
7429                                 }\r
7430                                 else\r
7431                                 {\r
7432                                         cur += 1;\r
7433                                         _cur_lexeme = lex_greater;\r
7434                                 }\r
7435                                 break;\r
7436 \r
7437                         case '<':\r
7438                                 if (*(cur+1) == '=')\r
7439                                 {\r
7440                                         cur += 2;\r
7441                                         _cur_lexeme = lex_less_or_equal;\r
7442                                 }\r
7443                                 else\r
7444                                 {\r
7445                                         cur += 1;\r
7446                                         _cur_lexeme = lex_less;\r
7447                                 }\r
7448                                 break;\r
7449 \r
7450                         case '!':\r
7451                                 if (*(cur+1) == '=')\r
7452                                 {\r
7453                                         cur += 2;\r
7454                                         _cur_lexeme = lex_not_equal;\r
7455                                 }\r
7456                                 else\r
7457                                 {\r
7458                                         _cur_lexeme = lex_none;\r
7459                                 }\r
7460                                 break;\r
7461 \r
7462                         case '=':\r
7463                                 cur += 1;\r
7464                                 _cur_lexeme = lex_equal;\r
7465 \r
7466                                 break;\r
7467                         \r
7468                         case '+':\r
7469                                 cur += 1;\r
7470                                 _cur_lexeme = lex_plus;\r
7471 \r
7472                                 break;\r
7473 \r
7474                         case '-':\r
7475                                 cur += 1;\r
7476                                 _cur_lexeme = lex_minus;\r
7477 \r
7478                                 break;\r
7479 \r
7480                         case '*':\r
7481                                 cur += 1;\r
7482                                 _cur_lexeme = lex_multiply;\r
7483 \r
7484                                 break;\r
7485 \r
7486                         case '|':\r
7487                                 cur += 1;\r
7488                                 _cur_lexeme = lex_union;\r
7489 \r
7490                                 break;\r
7491                         \r
7492                         case '$':\r
7493                                 cur += 1;\r
7494 \r
7495                                 if (PUGI__IS_CHARTYPEX(*cur, ctx_start_symbol))\r
7496                                 {\r
7497                                         _cur_lexeme_contents.begin = cur;\r
7498 \r
7499                                         while (PUGI__IS_CHARTYPEX(*cur, ctx_symbol)) cur++;\r
7500 \r
7501                                         if (cur[0] == ':' && PUGI__IS_CHARTYPEX(cur[1], ctx_symbol)) // qname\r
7502                                         {\r
7503                                                 cur++; // :\r
7504 \r
7505                                                 while (PUGI__IS_CHARTYPEX(*cur, ctx_symbol)) cur++;\r
7506                                         }\r
7507 \r
7508                                         _cur_lexeme_contents.end = cur;\r
7509                                 \r
7510                                         _cur_lexeme = lex_var_ref;\r
7511                                 }\r
7512                                 else\r
7513                                 {\r
7514                                         _cur_lexeme = lex_none;\r
7515                                 }\r
7516 \r
7517                                 break;\r
7518 \r
7519                         case '(':\r
7520                                 cur += 1;\r
7521                                 _cur_lexeme = lex_open_brace;\r
7522 \r
7523                                 break;\r
7524 \r
7525                         case ')':\r
7526                                 cur += 1;\r
7527                                 _cur_lexeme = lex_close_brace;\r
7528 \r
7529                                 break;\r
7530                         \r
7531                         case '[':\r
7532                                 cur += 1;\r
7533                                 _cur_lexeme = lex_open_square_brace;\r
7534 \r
7535                                 break;\r
7536 \r
7537                         case ']':\r
7538                                 cur += 1;\r
7539                                 _cur_lexeme = lex_close_square_brace;\r
7540 \r
7541                                 break;\r
7542 \r
7543                         case ',':\r
7544                                 cur += 1;\r
7545                                 _cur_lexeme = lex_comma;\r
7546 \r
7547                                 break;\r
7548 \r
7549                         case '/':\r
7550                                 if (*(cur+1) == '/')\r
7551                                 {\r
7552                                         cur += 2;\r
7553                                         _cur_lexeme = lex_double_slash;\r
7554                                 }\r
7555                                 else\r
7556                                 {\r
7557                                         cur += 1;\r
7558                                         _cur_lexeme = lex_slash;\r
7559                                 }\r
7560                                 break;\r
7561                 \r
7562                         case '.':\r
7563                                 if (*(cur+1) == '.')\r
7564                                 {\r
7565                                         cur += 2;\r
7566                                         _cur_lexeme = lex_double_dot;\r
7567                                 }\r
7568                                 else if (PUGI__IS_CHARTYPEX(*(cur+1), ctx_digit))\r
7569                                 {\r
7570                                         _cur_lexeme_contents.begin = cur; // .\r
7571 \r
7572                                         ++cur;\r
7573 \r
7574                                         while (PUGI__IS_CHARTYPEX(*cur, ctx_digit)) cur++;\r
7575 \r
7576                                         _cur_lexeme_contents.end = cur;\r
7577                                         \r
7578                                         _cur_lexeme = lex_number;\r
7579                                 }\r
7580                                 else\r
7581                                 {\r
7582                                         cur += 1;\r
7583                                         _cur_lexeme = lex_dot;\r
7584                                 }\r
7585                                 break;\r
7586 \r
7587                         case '@':\r
7588                                 cur += 1;\r
7589                                 _cur_lexeme = lex_axis_attribute;\r
7590 \r
7591                                 break;\r
7592 \r
7593                         case '"':\r
7594                         case '\'':\r
7595                         {\r
7596                                 char_t terminator = *cur;\r
7597 \r
7598                                 ++cur;\r
7599 \r
7600                                 _cur_lexeme_contents.begin = cur;\r
7601                                 while (*cur && *cur != terminator) cur++;\r
7602                                 _cur_lexeme_contents.end = cur;\r
7603                                 \r
7604                                 if (!*cur)\r
7605                                         _cur_lexeme = lex_none;\r
7606                                 else\r
7607                                 {\r
7608                                         cur += 1;\r
7609                                         _cur_lexeme = lex_quoted_string;\r
7610                                 }\r
7611 \r
7612                                 break;\r
7613                         }\r
7614 \r
7615                         case ':':\r
7616                                 if (*(cur+1) == ':')\r
7617                                 {\r
7618                                         cur += 2;\r
7619                                         _cur_lexeme = lex_double_colon;\r
7620                                 }\r
7621                                 else\r
7622                                 {\r
7623                                         _cur_lexeme = lex_none;\r
7624                                 }\r
7625                                 break;\r
7626 \r
7627                         default:\r
7628                                 if (PUGI__IS_CHARTYPEX(*cur, ctx_digit))\r
7629                                 {\r
7630                                         _cur_lexeme_contents.begin = cur;\r
7631 \r
7632                                         while (PUGI__IS_CHARTYPEX(*cur, ctx_digit)) cur++;\r
7633                                 \r
7634                                         if (*cur == '.')\r
7635                                         {\r
7636                                                 cur++;\r
7637 \r
7638                                                 while (PUGI__IS_CHARTYPEX(*cur, ctx_digit)) cur++;\r
7639                                         }\r
7640 \r
7641                                         _cur_lexeme_contents.end = cur;\r
7642 \r
7643                                         _cur_lexeme = lex_number;\r
7644                                 }\r
7645                                 else if (PUGI__IS_CHARTYPEX(*cur, ctx_start_symbol))\r
7646                                 {\r
7647                                         _cur_lexeme_contents.begin = cur;\r
7648 \r
7649                                         while (PUGI__IS_CHARTYPEX(*cur, ctx_symbol)) cur++;\r
7650 \r
7651                                         if (cur[0] == ':')\r
7652                                         {\r
7653                                                 if (cur[1] == '*') // namespace test ncname:*\r
7654                                                 {\r
7655                                                         cur += 2; // :*\r
7656                                                 }\r
7657                                                 else if (PUGI__IS_CHARTYPEX(cur[1], ctx_symbol)) // namespace test qname\r
7658                                                 {\r
7659                                                         cur++; // :\r
7660 \r
7661                                                         while (PUGI__IS_CHARTYPEX(*cur, ctx_symbol)) cur++;\r
7662                                                 }\r
7663                                         }\r
7664 \r
7665                                         _cur_lexeme_contents.end = cur;\r
7666                                 \r
7667                                         _cur_lexeme = lex_string;\r
7668                                 }\r
7669                                 else\r
7670                                 {\r
7671                                         _cur_lexeme = lex_none;\r
7672                                 }\r
7673                         }\r
7674 \r
7675                         _cur = cur;\r
7676                 }\r
7677 \r
7678                 lexeme_t current() const\r
7679                 {\r
7680                         return _cur_lexeme;\r
7681                 }\r
7682 \r
7683                 const char_t* current_pos() const\r
7684                 {\r
7685                         return _cur_lexeme_pos;\r
7686                 }\r
7687 \r
7688                 const xpath_lexer_string& contents() const\r
7689                 {\r
7690                         assert(_cur_lexeme == lex_var_ref || _cur_lexeme == lex_number || _cur_lexeme == lex_string || _cur_lexeme == lex_quoted_string);\r
7691 \r
7692                         return _cur_lexeme_contents;\r
7693                 }\r
7694         };\r
7695 \r
7696         enum ast_type_t\r
7697         {\r
7698                 ast_unknown,\r
7699                 ast_op_or,                                              // left or right\r
7700                 ast_op_and,                                             // left and right\r
7701                 ast_op_equal,                                   // left = right\r
7702                 ast_op_not_equal,                               // left != right\r
7703                 ast_op_less,                                    // left < right\r
7704                 ast_op_greater,                                 // left > right\r
7705                 ast_op_less_or_equal,                   // left <= right\r
7706                 ast_op_greater_or_equal,                // left >= right\r
7707                 ast_op_add,                                             // left + right\r
7708                 ast_op_subtract,                                // left - right\r
7709                 ast_op_multiply,                                // left * right\r
7710                 ast_op_divide,                                  // left / right\r
7711                 ast_op_mod,                                             // left % right\r
7712                 ast_op_negate,                                  // left - right\r
7713                 ast_op_union,                                   // left | right\r
7714                 ast_predicate,                                  // apply predicate to set; next points to next predicate\r
7715                 ast_filter,                                             // select * from left where right\r
7716                 ast_filter_posinv,                              // select * from left where right; proximity position invariant\r
7717                 ast_string_constant,                    // string constant\r
7718                 ast_number_constant,                    // number constant\r
7719                 ast_variable,                                   // variable\r
7720                 ast_func_last,                                  // last()\r
7721                 ast_func_position,                              // position()\r
7722                 ast_func_count,                                 // count(left)\r
7723                 ast_func_id,                                    // id(left)\r
7724                 ast_func_local_name_0,                  // local-name()\r
7725                 ast_func_local_name_1,                  // local-name(left)\r
7726                 ast_func_namespace_uri_0,               // namespace-uri()\r
7727                 ast_func_namespace_uri_1,               // namespace-uri(left)\r
7728                 ast_func_name_0,                                // name()\r
7729                 ast_func_name_1,                                // name(left)\r
7730                 ast_func_string_0,                              // string()\r
7731                 ast_func_string_1,                              // string(left)\r
7732                 ast_func_concat,                                // concat(left, right, siblings)\r
7733                 ast_func_starts_with,                   // starts_with(left, right)\r
7734                 ast_func_contains,                              // contains(left, right)\r
7735                 ast_func_substring_before,              // substring-before(left, right)\r
7736                 ast_func_substring_after,               // substring-after(left, right)\r
7737                 ast_func_substring_2,                   // substring(left, right)\r
7738                 ast_func_substring_3,                   // substring(left, right, third)\r
7739                 ast_func_string_length_0,               // string-length()\r
7740                 ast_func_string_length_1,               // string-length(left)\r
7741                 ast_func_normalize_space_0,             // normalize-space()\r
7742                 ast_func_normalize_space_1,             // normalize-space(left)\r
7743                 ast_func_translate,                             // translate(left, right, third)\r
7744                 ast_func_boolean,                               // boolean(left)\r
7745                 ast_func_not,                                   // not(left)\r
7746                 ast_func_true,                                  // true()\r
7747                 ast_func_false,                                 // false()\r
7748                 ast_func_lang,                                  // lang(left)\r
7749                 ast_func_number_0,                              // number()\r
7750                 ast_func_number_1,                              // number(left)\r
7751                 ast_func_sum,                                   // sum(left)\r
7752                 ast_func_floor,                                 // floor(left)\r
7753                 ast_func_ceiling,                               // ceiling(left)\r
7754                 ast_func_round,                                 // round(left)\r
7755                 ast_step,                                               // process set left with step\r
7756                 ast_step_root                                   // select root node\r
7757         };\r
7758 \r
7759         enum axis_t\r
7760         {\r
7761                 axis_ancestor,\r
7762                 axis_ancestor_or_self,\r
7763                 axis_attribute,\r
7764                 axis_child,\r
7765                 axis_descendant,\r
7766                 axis_descendant_or_self,\r
7767                 axis_following,\r
7768                 axis_following_sibling,\r
7769                 axis_namespace,\r
7770                 axis_parent,\r
7771                 axis_preceding,\r
7772                 axis_preceding_sibling,\r
7773                 axis_self\r
7774         };\r
7775         \r
7776         enum nodetest_t\r
7777         {\r
7778                 nodetest_none,\r
7779                 nodetest_name,\r
7780                 nodetest_type_node,\r
7781                 nodetest_type_comment,\r
7782                 nodetest_type_pi,\r
7783                 nodetest_type_text,\r
7784                 nodetest_pi,\r
7785                 nodetest_all,\r
7786                 nodetest_all_in_namespace\r
7787         };\r
7788 \r
7789         template <axis_t N> struct axis_to_type\r
7790         {\r
7791                 static const axis_t axis;\r
7792         };\r
7793 \r
7794         template <axis_t N> const axis_t axis_to_type<N>::axis = N;\r
7795                 \r
7796         class xpath_ast_node\r
7797         {\r
7798         private:\r
7799                 // node type\r
7800                 char _type;\r
7801                 char _rettype;\r
7802 \r
7803                 // for ast_step / ast_predicate\r
7804                 char _axis;\r
7805                 char _test;\r
7806 \r
7807                 // tree node structure\r
7808                 xpath_ast_node* _left;\r
7809                 xpath_ast_node* _right;\r
7810                 xpath_ast_node* _next;\r
7811 \r
7812                 union\r
7813                 {\r
7814                         // value for ast_string_constant\r
7815                         const char_t* string;\r
7816                         // value for ast_number_constant\r
7817                         double number;\r
7818                         // variable for ast_variable\r
7819                         xpath_variable* variable;\r
7820                         // node test for ast_step (node name/namespace/node type/pi target)\r
7821                         const char_t* nodetest;\r
7822                 } _data;\r
7823 \r
7824                 xpath_ast_node(const xpath_ast_node&);\r
7825                 xpath_ast_node& operator=(const xpath_ast_node&);\r
7826 \r
7827                 template <class Comp> static bool compare_eq(xpath_ast_node* lhs, xpath_ast_node* rhs, const xpath_context& c, const xpath_stack& stack, const Comp& comp)\r
7828                 {\r
7829                         xpath_value_type lt = lhs->rettype(), rt = rhs->rettype();\r
7830 \r
7831                         if (lt != xpath_type_node_set && rt != xpath_type_node_set)\r
7832                         {\r
7833                                 if (lt == xpath_type_boolean || rt == xpath_type_boolean)\r
7834                                         return comp(lhs->eval_boolean(c, stack), rhs->eval_boolean(c, stack));\r
7835                                 else if (lt == xpath_type_number || rt == xpath_type_number)\r
7836                                         return comp(lhs->eval_number(c, stack), rhs->eval_number(c, stack));\r
7837                                 else if (lt == xpath_type_string || rt == xpath_type_string)\r
7838                                 {\r
7839                                         xpath_allocator_capture cr(stack.result);\r
7840 \r
7841                                         xpath_string ls = lhs->eval_string(c, stack);\r
7842                                         xpath_string rs = rhs->eval_string(c, stack);\r
7843 \r
7844                                         return comp(ls, rs);\r
7845                                 }\r
7846                         }\r
7847                         else if (lt == xpath_type_node_set && rt == xpath_type_node_set)\r
7848                         {\r
7849                                 xpath_allocator_capture cr(stack.result);\r
7850 \r
7851                                 xpath_node_set_raw ls = lhs->eval_node_set(c, stack);\r
7852                                 xpath_node_set_raw rs = rhs->eval_node_set(c, stack);\r
7853 \r
7854                                 for (const xpath_node* li = ls.begin(); li != ls.end(); ++li)\r
7855                                         for (const xpath_node* ri = rs.begin(); ri != rs.end(); ++ri)\r
7856                                         {\r
7857                                                 xpath_allocator_capture cri(stack.result);\r
7858 \r
7859                                                 if (comp(string_value(*li, stack.result), string_value(*ri, stack.result)))\r
7860                                                         return true;\r
7861                                         }\r
7862 \r
7863                                 return false;\r
7864                         }\r
7865                         else\r
7866                         {\r
7867                                 if (lt == xpath_type_node_set)\r
7868                                 {\r
7869                                         swap(lhs, rhs);\r
7870                                         swap(lt, rt);\r
7871                                 }\r
7872 \r
7873                                 if (lt == xpath_type_boolean)\r
7874                                         return comp(lhs->eval_boolean(c, stack), rhs->eval_boolean(c, stack));\r
7875                                 else if (lt == xpath_type_number)\r
7876                                 {\r
7877                                         xpath_allocator_capture cr(stack.result);\r
7878 \r
7879                                         double l = lhs->eval_number(c, stack);\r
7880                                         xpath_node_set_raw rs = rhs->eval_node_set(c, stack);\r
7881 \r
7882                                         for (const xpath_node* ri = rs.begin(); ri != rs.end(); ++ri)\r
7883                                         {\r
7884                                                 xpath_allocator_capture cri(stack.result);\r
7885 \r
7886                                                 if (comp(l, convert_string_to_number(string_value(*ri, stack.result).c_str())))\r
7887                                                         return true;\r
7888                                         }\r
7889 \r
7890                                         return false;\r
7891                                 }\r
7892                                 else if (lt == xpath_type_string)\r
7893                                 {\r
7894                                         xpath_allocator_capture cr(stack.result);\r
7895 \r
7896                                         xpath_string l = lhs->eval_string(c, stack);\r
7897                                         xpath_node_set_raw rs = rhs->eval_node_set(c, stack);\r
7898 \r
7899                                         for (const xpath_node* ri = rs.begin(); ri != rs.end(); ++ri)\r
7900                                         {\r
7901                                                 xpath_allocator_capture cri(stack.result);\r
7902 \r
7903                                                 if (comp(l, string_value(*ri, stack.result)))\r
7904                                                         return true;\r
7905                                         }\r
7906 \r
7907                                         return false;\r
7908                                 }\r
7909                         }\r
7910 \r
7911                         assert(!"Wrong types");\r
7912                         return false;\r
7913                 }\r
7914 \r
7915                 template <class Comp> static bool compare_rel(xpath_ast_node* lhs, xpath_ast_node* rhs, const xpath_context& c, const xpath_stack& stack, const Comp& comp)\r
7916                 {\r
7917                         xpath_value_type lt = lhs->rettype(), rt = rhs->rettype();\r
7918 \r
7919                         if (lt != xpath_type_node_set && rt != xpath_type_node_set)\r
7920                                 return comp(lhs->eval_number(c, stack), rhs->eval_number(c, stack));\r
7921                         else if (lt == xpath_type_node_set && rt == xpath_type_node_set)\r
7922                         {\r
7923                                 xpath_allocator_capture cr(stack.result);\r
7924 \r
7925                                 xpath_node_set_raw ls = lhs->eval_node_set(c, stack);\r
7926                                 xpath_node_set_raw rs = rhs->eval_node_set(c, stack);\r
7927 \r
7928                                 for (const xpath_node* li = ls.begin(); li != ls.end(); ++li)\r
7929                                 {\r
7930                                         xpath_allocator_capture cri(stack.result);\r
7931 \r
7932                                         double l = convert_string_to_number(string_value(*li, stack.result).c_str());\r
7933 \r
7934                                         for (const xpath_node* ri = rs.begin(); ri != rs.end(); ++ri)\r
7935                                         {\r
7936                                                 xpath_allocator_capture crii(stack.result);\r
7937 \r
7938                                                 if (comp(l, convert_string_to_number(string_value(*ri, stack.result).c_str())))\r
7939                                                         return true;\r
7940                                         }\r
7941                                 }\r
7942 \r
7943                                 return false;\r
7944                         }\r
7945                         else if (lt != xpath_type_node_set && rt == xpath_type_node_set)\r
7946                         {\r
7947                                 xpath_allocator_capture cr(stack.result);\r
7948 \r
7949                                 double l = lhs->eval_number(c, stack);\r
7950                                 xpath_node_set_raw rs = rhs->eval_node_set(c, stack);\r
7951 \r
7952                                 for (const xpath_node* ri = rs.begin(); ri != rs.end(); ++ri)\r
7953                                 {\r
7954                                         xpath_allocator_capture cri(stack.result);\r
7955 \r
7956                                         if (comp(l, convert_string_to_number(string_value(*ri, stack.result).c_str())))\r
7957                                                 return true;\r
7958                                 }\r
7959 \r
7960                                 return false;\r
7961                         }\r
7962                         else if (lt == xpath_type_node_set && rt != xpath_type_node_set)\r
7963                         {\r
7964                                 xpath_allocator_capture cr(stack.result);\r
7965 \r
7966                                 xpath_node_set_raw ls = lhs->eval_node_set(c, stack);\r
7967                                 double r = rhs->eval_number(c, stack);\r
7968 \r
7969                                 for (const xpath_node* li = ls.begin(); li != ls.end(); ++li)\r
7970                                 {\r
7971                                         xpath_allocator_capture cri(stack.result);\r
7972 \r
7973                                         if (comp(convert_string_to_number(string_value(*li, stack.result).c_str()), r))\r
7974                                                 return true;\r
7975                                 }\r
7976 \r
7977                                 return false;\r
7978                         }\r
7979                         else\r
7980                         {\r
7981                                 assert(!"Wrong types");\r
7982                                 return false;\r
7983                         }\r
7984                 }\r
7985 \r
7986                 void apply_predicate(xpath_node_set_raw& ns, size_t first, xpath_ast_node* expr, const xpath_stack& stack)\r
7987                 {\r
7988                         assert(ns.size() >= first);\r
7989 \r
7990                         size_t i = 1;\r
7991                         size_t size = ns.size() - first;\r
7992                                 \r
7993                         xpath_node* last = ns.begin() + first;\r
7994                                 \r
7995                         // remove_if... or well, sort of\r
7996                         for (xpath_node* it = last; it != ns.end(); ++it, ++i)\r
7997                         {\r
7998                                 xpath_context c(*it, i, size);\r
7999                         \r
8000                                 if (expr->rettype() == xpath_type_number)\r
8001                                 {\r
8002                                         if (expr->eval_number(c, stack) == i)\r
8003                                                 *last++ = *it;\r
8004                                 }\r
8005                                 else if (expr->eval_boolean(c, stack))\r
8006                                         *last++ = *it;\r
8007                         }\r
8008                         \r
8009                         ns.truncate(last);\r
8010                 }\r
8011 \r
8012                 void apply_predicates(xpath_node_set_raw& ns, size_t first, const xpath_stack& stack)\r
8013                 {\r
8014                         if (ns.size() == first) return;\r
8015                         \r
8016                         for (xpath_ast_node* pred = _right; pred; pred = pred->_next)\r
8017                         {\r
8018                                 apply_predicate(ns, first, pred->_left, stack);\r
8019                         }\r
8020                 }\r
8021 \r
8022                 void step_push(xpath_node_set_raw& ns, const xml_attribute& a, const xml_node& parent, xpath_allocator* alloc)\r
8023                 {\r
8024                         if (!a) return;\r
8025 \r
8026                         const char_t* name = a.name();\r
8027 \r
8028                         // There are no attribute nodes corresponding to attributes that declare namespaces\r
8029                         // That is, "xmlns:..." or "xmlns"\r
8030                         if (starts_with(name, PUGIXML_TEXT("xmlns")) && (name[5] == 0 || name[5] == ':')) return;\r
8031                         \r
8032                         switch (_test)\r
8033                         {\r
8034                         case nodetest_name:\r
8035                                 if (strequal(name, _data.nodetest)) ns.push_back(xpath_node(a, parent), alloc);\r
8036                                 break;\r
8037                                 \r
8038                         case nodetest_type_node:\r
8039                         case nodetest_all:\r
8040                                 ns.push_back(xpath_node(a, parent), alloc);\r
8041                                 break;\r
8042                                 \r
8043                         case nodetest_all_in_namespace:\r
8044                                 if (starts_with(name, _data.nodetest))\r
8045                                         ns.push_back(xpath_node(a, parent), alloc);\r
8046                                 break;\r
8047                         \r
8048                         default:\r
8049                                 ;\r
8050                         }\r
8051                 }\r
8052                 \r
8053                 void step_push(xpath_node_set_raw& ns, const xml_node& n, xpath_allocator* alloc)\r
8054                 {\r
8055                         if (!n) return;\r
8056 \r
8057                         switch (_test)\r
8058                         {\r
8059                         case nodetest_name:\r
8060                                 if (n.type() == node_element && strequal(n.name(), _data.nodetest)) ns.push_back(n, alloc);\r
8061                                 break;\r
8062                                 \r
8063                         case nodetest_type_node:\r
8064                                 ns.push_back(n, alloc);\r
8065                                 break;\r
8066                                 \r
8067                         case nodetest_type_comment:\r
8068                                 if (n.type() == node_comment)\r
8069                                         ns.push_back(n, alloc);\r
8070                                 break;\r
8071                                 \r
8072                         case nodetest_type_text:\r
8073                                 if (n.type() == node_pcdata || n.type() == node_cdata)\r
8074                                         ns.push_back(n, alloc);\r
8075                                 break;\r
8076                                 \r
8077                         case nodetest_type_pi:\r
8078                                 if (n.type() == node_pi)\r
8079                                         ns.push_back(n, alloc);\r
8080                                 break;\r
8081                                                                         \r
8082                         case nodetest_pi:\r
8083                                 if (n.type() == node_pi && strequal(n.name(), _data.nodetest))\r
8084                                         ns.push_back(n, alloc);\r
8085                                 break;\r
8086                                 \r
8087                         case nodetest_all:\r
8088                                 if (n.type() == node_element)\r
8089                                         ns.push_back(n, alloc);\r
8090                                 break;\r
8091                                 \r
8092                         case nodetest_all_in_namespace:\r
8093                                 if (n.type() == node_element && starts_with(n.name(), _data.nodetest))\r
8094                                         ns.push_back(n, alloc);\r
8095                                 break;\r
8096 \r
8097                         default:\r
8098                                 assert(!"Unknown axis");\r
8099                         } \r
8100                 }\r
8101 \r
8102                 template <class T> void step_fill(xpath_node_set_raw& ns, const xml_node& n, xpath_allocator* alloc, T)\r
8103                 {\r
8104                         const axis_t axis = T::axis;\r
8105 \r
8106                         switch (axis)\r
8107                         {\r
8108                         case axis_attribute:\r
8109                         {\r
8110                                 for (xml_attribute a = n.first_attribute(); a; a = a.next_attribute())\r
8111                                         step_push(ns, a, n, alloc);\r
8112                                 \r
8113                                 break;\r
8114                         }\r
8115                         \r
8116                         case axis_child:\r
8117                         {\r
8118                                 for (xml_node c = n.first_child(); c; c = c.next_sibling())\r
8119                                         step_push(ns, c, alloc);\r
8120                                         \r
8121                                 break;\r
8122                         }\r
8123                         \r
8124                         case axis_descendant:\r
8125                         case axis_descendant_or_self:\r
8126                         {\r
8127                                 if (axis == axis_descendant_or_self)\r
8128                                         step_push(ns, n, alloc);\r
8129                                         \r
8130                                 xml_node cur = n.first_child();\r
8131                                 \r
8132                                 while (cur && cur != n)\r
8133                                 {\r
8134                                         step_push(ns, cur, alloc);\r
8135                                         \r
8136                                         if (cur.first_child())\r
8137                                                 cur = cur.first_child();\r
8138                                         else if (cur.next_sibling())\r
8139                                                 cur = cur.next_sibling();\r
8140                                         else\r
8141                                         {\r
8142                                                 while (!cur.next_sibling() && cur != n)\r
8143                                                         cur = cur.parent();\r
8144                                         \r
8145                                                 if (cur != n) cur = cur.next_sibling();\r
8146                                         }\r
8147                                 }\r
8148                                 \r
8149                                 break;\r
8150                         }\r
8151                         \r
8152                         case axis_following_sibling:\r
8153                         {\r
8154                                 for (xml_node c = n.next_sibling(); c; c = c.next_sibling())\r
8155                                         step_push(ns, c, alloc);\r
8156                                 \r
8157                                 break;\r
8158                         }\r
8159                         \r
8160                         case axis_preceding_sibling:\r
8161                         {\r
8162                                 for (xml_node c = n.previous_sibling(); c; c = c.previous_sibling())\r
8163                                         step_push(ns, c, alloc);\r
8164                                 \r
8165                                 break;\r
8166                         }\r
8167                         \r
8168                         case axis_following:\r
8169                         {\r
8170                                 xml_node cur = n;\r
8171 \r
8172                                 // exit from this node so that we don't include descendants\r
8173                                 while (cur && !cur.next_sibling()) cur = cur.parent();\r
8174                                 cur = cur.next_sibling();\r
8175 \r
8176                                 for (;;)\r
8177                                 {\r
8178                                         step_push(ns, cur, alloc);\r
8179 \r
8180                                         if (cur.first_child())\r
8181                                                 cur = cur.first_child();\r
8182                                         else if (cur.next_sibling())\r
8183                                                 cur = cur.next_sibling();\r
8184                                         else\r
8185                                         {\r
8186                                                 while (cur && !cur.next_sibling()) cur = cur.parent();\r
8187                                                 cur = cur.next_sibling();\r
8188 \r
8189                                                 if (!cur) break;\r
8190                                         }\r
8191                                 }\r
8192 \r
8193                                 break;\r
8194                         }\r
8195 \r
8196                         case axis_preceding:\r
8197                         {\r
8198                                 xml_node cur = n;\r
8199 \r
8200                                 while (cur && !cur.previous_sibling()) cur = cur.parent();\r
8201                                 cur = cur.previous_sibling();\r
8202 \r
8203                                 for (;;)\r
8204                                 {\r
8205                                         if (cur.last_child())\r
8206                                                 cur = cur.last_child();\r
8207                                         else\r
8208                                         {\r
8209                                                 // leaf node, can't be ancestor\r
8210                                                 step_push(ns, cur, alloc);\r
8211 \r
8212                                                 if (cur.previous_sibling())\r
8213                                                         cur = cur.previous_sibling();\r
8214                                                 else\r
8215                                                 {\r
8216                                                         do \r
8217                                                         {\r
8218                                                                 cur = cur.parent();\r
8219                                                                 if (!cur) break;\r
8220 \r
8221                                                                 if (!node_is_ancestor(cur, n)) step_push(ns, cur, alloc);\r
8222                                                         }\r
8223                                                         while (!cur.previous_sibling());\r
8224 \r
8225                                                         cur = cur.previous_sibling();\r
8226 \r
8227                                                         if (!cur) break;\r
8228                                                 }\r
8229                                         }\r
8230                                 }\r
8231 \r
8232                                 break;\r
8233                         }\r
8234                         \r
8235                         case axis_ancestor:\r
8236                         case axis_ancestor_or_self:\r
8237                         {\r
8238                                 if (axis == axis_ancestor_or_self)\r
8239                                         step_push(ns, n, alloc);\r
8240 \r
8241                                 xml_node cur = n.parent();\r
8242                                 \r
8243                                 while (cur)\r
8244                                 {\r
8245                                         step_push(ns, cur, alloc);\r
8246                                         \r
8247                                         cur = cur.parent();\r
8248                                 }\r
8249                                 \r
8250                                 break;\r
8251                         }\r
8252 \r
8253                         case axis_self:\r
8254                         {\r
8255                                 step_push(ns, n, alloc);\r
8256 \r
8257                                 break;\r
8258                         }\r
8259 \r
8260                         case axis_parent:\r
8261                         {\r
8262                                 if (n.parent()) step_push(ns, n.parent(), alloc);\r
8263 \r
8264                                 break;\r
8265                         }\r
8266                                 \r
8267                         default:\r
8268                                 assert(!"Unimplemented axis");\r
8269                         }\r
8270                 }\r
8271                 \r
8272                 template <class T> void step_fill(xpath_node_set_raw& ns, const xml_attribute& a, const xml_node& p, xpath_allocator* alloc, T v)\r
8273                 {\r
8274                         const axis_t axis = T::axis;\r
8275 \r
8276                         switch (axis)\r
8277                         {\r
8278                         case axis_ancestor:\r
8279                         case axis_ancestor_or_self:\r
8280                         {\r
8281                                 if (axis == axis_ancestor_or_self && _test == nodetest_type_node) // reject attributes based on principal node type test\r
8282                                         step_push(ns, a, p, alloc);\r
8283 \r
8284                                 xml_node cur = p;\r
8285                                 \r
8286                                 while (cur)\r
8287                                 {\r
8288                                         step_push(ns, cur, alloc);\r
8289                                         \r
8290                                         cur = cur.parent();\r
8291                                 }\r
8292                                 \r
8293                                 break;\r
8294                         }\r
8295 \r
8296                         case axis_descendant_or_self:\r
8297                         case axis_self:\r
8298                         {\r
8299                                 if (_test == nodetest_type_node) // reject attributes based on principal node type test\r
8300                                         step_push(ns, a, p, alloc);\r
8301 \r
8302                                 break;\r
8303                         }\r
8304 \r
8305                         case axis_following:\r
8306                         {\r
8307                                 xml_node cur = p;\r
8308                                 \r
8309                                 for (;;)\r
8310                                 {\r
8311                                         if (cur.first_child())\r
8312                                                 cur = cur.first_child();\r
8313                                         else if (cur.next_sibling())\r
8314                                                 cur = cur.next_sibling();\r
8315                                         else\r
8316                                         {\r
8317                                                 while (cur && !cur.next_sibling()) cur = cur.parent();\r
8318                                                 cur = cur.next_sibling();\r
8319                                                 \r
8320                                                 if (!cur) break;\r
8321                                         }\r
8322 \r
8323                                         step_push(ns, cur, alloc);\r
8324                                 }\r
8325 \r
8326                                 break;\r
8327                         }\r
8328 \r
8329                         case axis_parent:\r
8330                         {\r
8331                                 step_push(ns, p, alloc);\r
8332 \r
8333                                 break;\r
8334                         }\r
8335 \r
8336                         case axis_preceding:\r
8337                         {\r
8338                                 // preceding:: axis does not include attribute nodes and attribute ancestors (they are the same as parent's ancestors), so we can reuse node preceding\r
8339                                 step_fill(ns, p, alloc, v);\r
8340                                 break;\r
8341                         }\r
8342                         \r
8343                         default:\r
8344                                 assert(!"Unimplemented axis");\r
8345                         }\r
8346                 }\r
8347                 \r
8348                 template <class T> xpath_node_set_raw step_do(const xpath_context& c, const xpath_stack& stack, T v)\r
8349                 {\r
8350                         const axis_t axis = T::axis;\r
8351                         bool attributes = (axis == axis_ancestor || axis == axis_ancestor_or_self || axis == axis_descendant_or_self || axis == axis_following || axis == axis_parent || axis == axis_preceding || axis == axis_self);\r
8352 \r
8353                         xpath_node_set_raw ns;\r
8354                         ns.set_type((axis == axis_ancestor || axis == axis_ancestor_or_self || axis == axis_preceding || axis == axis_preceding_sibling) ? xpath_node_set::type_sorted_reverse : xpath_node_set::type_sorted);\r
8355 \r
8356                         if (_left)\r
8357                         {\r
8358                                 xpath_node_set_raw s = _left->eval_node_set(c, stack);\r
8359 \r
8360                                 // self axis preserves the original order\r
8361                                 if (axis == axis_self) ns.set_type(s.type());\r
8362 \r
8363                                 for (const xpath_node* it = s.begin(); it != s.end(); ++it)\r
8364                                 {\r
8365                                         size_t size = ns.size();\r
8366 \r
8367                                         // in general, all axes generate elements in a particular order, but there is no order guarantee if axis is applied to two nodes\r
8368                                         if (axis != axis_self && size != 0) ns.set_type(xpath_node_set::type_unsorted);\r
8369                                         \r
8370                                         if (it->node())\r
8371                                                 step_fill(ns, it->node(), stack.result, v);\r
8372                                         else if (attributes)\r
8373                                                 step_fill(ns, it->attribute(), it->parent(), stack.result, v);\r
8374                                                 \r
8375                                         apply_predicates(ns, size, stack);\r
8376                                 }\r
8377                         }\r
8378                         else\r
8379                         {\r
8380                                 if (c.n.node())\r
8381                                         step_fill(ns, c.n.node(), stack.result, v);\r
8382                                 else if (attributes)\r
8383                                         step_fill(ns, c.n.attribute(), c.n.parent(), stack.result, v);\r
8384                                 \r
8385                                 apply_predicates(ns, 0, stack);\r
8386                         }\r
8387 \r
8388                         // child, attribute and self axes always generate unique set of nodes\r
8389                         // for other axis, if the set stayed sorted, it stayed unique because the traversal algorithms do not visit the same node twice\r
8390                         if (axis != axis_child && axis != axis_attribute && axis != axis_self && ns.type() == xpath_node_set::type_unsorted)\r
8391                                 ns.remove_duplicates();\r
8392 \r
8393                         return ns;\r
8394                 }\r
8395                 \r
8396         public:\r
8397                 xpath_ast_node(ast_type_t type, xpath_value_type rettype_, const char_t* value):\r
8398                         _type(static_cast<char>(type)), _rettype(static_cast<char>(rettype_)), _axis(0), _test(0), _left(0), _right(0), _next(0)\r
8399                 {\r
8400                         assert(type == ast_string_constant);\r
8401                         _data.string = value;\r
8402                 }\r
8403 \r
8404                 xpath_ast_node(ast_type_t type, xpath_value_type rettype_, double value):\r
8405                         _type(static_cast<char>(type)), _rettype(static_cast<char>(rettype_)), _axis(0), _test(0), _left(0), _right(0), _next(0)\r
8406                 {\r
8407                         assert(type == ast_number_constant);\r
8408                         _data.number = value;\r
8409                 }\r
8410                 \r
8411                 xpath_ast_node(ast_type_t type, xpath_value_type rettype_, xpath_variable* value):\r
8412                         _type(static_cast<char>(type)), _rettype(static_cast<char>(rettype_)), _axis(0), _test(0), _left(0), _right(0), _next(0)\r
8413                 {\r
8414                         assert(type == ast_variable);\r
8415                         _data.variable = value;\r
8416                 }\r
8417                 \r
8418                 xpath_ast_node(ast_type_t type, xpath_value_type rettype_, xpath_ast_node* left = 0, xpath_ast_node* right = 0):\r
8419                         _type(static_cast<char>(type)), _rettype(static_cast<char>(rettype_)), _axis(0), _test(0), _left(left), _right(right), _next(0)\r
8420                 {\r
8421                 }\r
8422 \r
8423                 xpath_ast_node(ast_type_t type, xpath_ast_node* left, axis_t axis, nodetest_t test, const char_t* contents):\r
8424                         _type(static_cast<char>(type)), _rettype(xpath_type_node_set), _axis(static_cast<char>(axis)), _test(static_cast<char>(test)), _left(left), _right(0), _next(0)\r
8425                 {\r
8426                         _data.nodetest = contents;\r
8427                 }\r
8428 \r
8429                 void set_next(xpath_ast_node* value)\r
8430                 {\r
8431                         _next = value;\r
8432                 }\r
8433 \r
8434                 void set_right(xpath_ast_node* value)\r
8435                 {\r
8436                         _right = value;\r
8437                 }\r
8438 \r
8439                 bool eval_boolean(const xpath_context& c, const xpath_stack& stack)\r
8440                 {\r
8441                         switch (_type)\r
8442                         {\r
8443                         case ast_op_or:\r
8444                                 return _left->eval_boolean(c, stack) || _right->eval_boolean(c, stack);\r
8445                                 \r
8446                         case ast_op_and:\r
8447                                 return _left->eval_boolean(c, stack) && _right->eval_boolean(c, stack);\r
8448                                 \r
8449                         case ast_op_equal:\r
8450                                 return compare_eq(_left, _right, c, stack, equal_to());\r
8451 \r
8452                         case ast_op_not_equal:\r
8453                                 return compare_eq(_left, _right, c, stack, not_equal_to());\r
8454         \r
8455                         case ast_op_less:\r
8456                                 return compare_rel(_left, _right, c, stack, less());\r
8457                         \r
8458                         case ast_op_greater:\r
8459                                 return compare_rel(_right, _left, c, stack, less());\r
8460 \r
8461                         case ast_op_less_or_equal:\r
8462                                 return compare_rel(_left, _right, c, stack, less_equal());\r
8463                         \r
8464                         case ast_op_greater_or_equal:\r
8465                                 return compare_rel(_right, _left, c, stack, less_equal());\r
8466 \r
8467                         case ast_func_starts_with:\r
8468                         {\r
8469                                 xpath_allocator_capture cr(stack.result);\r
8470 \r
8471                                 xpath_string lr = _left->eval_string(c, stack);\r
8472                                 xpath_string rr = _right->eval_string(c, stack);\r
8473 \r
8474                                 return starts_with(lr.c_str(), rr.c_str());\r
8475                         }\r
8476 \r
8477                         case ast_func_contains:\r
8478                         {\r
8479                                 xpath_allocator_capture cr(stack.result);\r
8480 \r
8481                                 xpath_string lr = _left->eval_string(c, stack);\r
8482                                 xpath_string rr = _right->eval_string(c, stack);\r
8483 \r
8484                                 return find_substring(lr.c_str(), rr.c_str()) != 0;\r
8485                         }\r
8486 \r
8487                         case ast_func_boolean:\r
8488                                 return _left->eval_boolean(c, stack);\r
8489                                 \r
8490                         case ast_func_not:\r
8491                                 return !_left->eval_boolean(c, stack);\r
8492                                 \r
8493                         case ast_func_true:\r
8494                                 return true;\r
8495                                 \r
8496                         case ast_func_false:\r
8497                                 return false;\r
8498 \r
8499                         case ast_func_lang:\r
8500                         {\r
8501                                 if (c.n.attribute()) return false;\r
8502                                 \r
8503                                 xpath_allocator_capture cr(stack.result);\r
8504 \r
8505                                 xpath_string lang = _left->eval_string(c, stack);\r
8506                                 \r
8507                                 for (xml_node n = c.n.node(); n; n = n.parent())\r
8508                                 {\r
8509                                         xml_attribute a = n.attribute(PUGIXML_TEXT("xml:lang"));\r
8510                                         \r
8511                                         if (a)\r
8512                                         {\r
8513                                                 const char_t* value = a.value();\r
8514                                                 \r
8515                                                 // strnicmp / strncasecmp is not portable\r
8516                                                 for (const char_t* lit = lang.c_str(); *lit; ++lit)\r
8517                                                 {\r
8518                                                         if (tolower_ascii(*lit) != tolower_ascii(*value)) return false;\r
8519                                                         ++value;\r
8520                                                 }\r
8521                                                 \r
8522                                                 return *value == 0 || *value == '-';\r
8523                                         }\r
8524                                 }\r
8525                                 \r
8526                                 return false;\r
8527                         }\r
8528 \r
8529                         case ast_variable:\r
8530                         {\r
8531                                 assert(_rettype == _data.variable->type());\r
8532 \r
8533                                 if (_rettype == xpath_type_boolean)\r
8534                                         return _data.variable->get_boolean();\r
8535 \r
8536                                 // fallthrough to type conversion\r
8537                         }\r
8538 \r
8539                         default:\r
8540                         {\r
8541                                 switch (_rettype)\r
8542                                 {\r
8543                                 case xpath_type_number:\r
8544                                         return convert_number_to_boolean(eval_number(c, stack));\r
8545                                         \r
8546                                 case xpath_type_string:\r
8547                                 {\r
8548                                         xpath_allocator_capture cr(stack.result);\r
8549 \r
8550                                         return !eval_string(c, stack).empty();\r
8551                                 }\r
8552                                         \r
8553                                 case xpath_type_node_set:                               \r
8554                                 {\r
8555                                         xpath_allocator_capture cr(stack.result);\r
8556 \r
8557                                         return !eval_node_set(c, stack).empty();\r
8558                                 }\r
8559 \r
8560                                 default:\r
8561                                         assert(!"Wrong expression for return type boolean");\r
8562                                         return false;\r
8563                                 }\r
8564                         }\r
8565                         }\r
8566                 }\r
8567 \r
8568                 double eval_number(const xpath_context& c, const xpath_stack& stack)\r
8569                 {\r
8570                         switch (_type)\r
8571                         {\r
8572                         case ast_op_add:\r
8573                                 return _left->eval_number(c, stack) + _right->eval_number(c, stack);\r
8574                                 \r
8575                         case ast_op_subtract:\r
8576                                 return _left->eval_number(c, stack) - _right->eval_number(c, stack);\r
8577 \r
8578                         case ast_op_multiply:\r
8579                                 return _left->eval_number(c, stack) * _right->eval_number(c, stack);\r
8580 \r
8581                         case ast_op_divide:\r
8582                                 return _left->eval_number(c, stack) / _right->eval_number(c, stack);\r
8583 \r
8584                         case ast_op_mod:\r
8585                                 return fmod(_left->eval_number(c, stack), _right->eval_number(c, stack));\r
8586 \r
8587                         case ast_op_negate:\r
8588                                 return -_left->eval_number(c, stack);\r
8589 \r
8590                         case ast_number_constant:\r
8591                                 return _data.number;\r
8592 \r
8593                         case ast_func_last:\r
8594                                 return static_cast<double>(c.size);\r
8595                         \r
8596                         case ast_func_position:\r
8597                                 return static_cast<double>(c.position);\r
8598 \r
8599                         case ast_func_count:\r
8600                         {\r
8601                                 xpath_allocator_capture cr(stack.result);\r
8602 \r
8603                                 return static_cast<double>(_left->eval_node_set(c, stack).size());\r
8604                         }\r
8605                         \r
8606                         case ast_func_string_length_0:\r
8607                         {\r
8608                                 xpath_allocator_capture cr(stack.result);\r
8609 \r
8610                                 return static_cast<double>(string_value(c.n, stack.result).length());\r
8611                         }\r
8612                         \r
8613                         case ast_func_string_length_1:\r
8614                         {\r
8615                                 xpath_allocator_capture cr(stack.result);\r
8616 \r
8617                                 return static_cast<double>(_left->eval_string(c, stack).length());\r
8618                         }\r
8619                         \r
8620                         case ast_func_number_0:\r
8621                         {\r
8622                                 xpath_allocator_capture cr(stack.result);\r
8623 \r
8624                                 return convert_string_to_number(string_value(c.n, stack.result).c_str());\r
8625                         }\r
8626                         \r
8627                         case ast_func_number_1:\r
8628                                 return _left->eval_number(c, stack);\r
8629 \r
8630                         case ast_func_sum:\r
8631                         {\r
8632                                 xpath_allocator_capture cr(stack.result);\r
8633 \r
8634                                 double r = 0;\r
8635                                 \r
8636                                 xpath_node_set_raw ns = _left->eval_node_set(c, stack);\r
8637                                 \r
8638                                 for (const xpath_node* it = ns.begin(); it != ns.end(); ++it)\r
8639                                 {\r
8640                                         xpath_allocator_capture cri(stack.result);\r
8641 \r
8642                                         r += convert_string_to_number(string_value(*it, stack.result).c_str());\r
8643                                 }\r
8644                         \r
8645                                 return r;\r
8646                         }\r
8647 \r
8648                         case ast_func_floor:\r
8649                         {\r
8650                                 double r = _left->eval_number(c, stack);\r
8651                                 \r
8652                                 return r == r ? floor(r) : r;\r
8653                         }\r
8654 \r
8655                         case ast_func_ceiling:\r
8656                         {\r
8657                                 double r = _left->eval_number(c, stack);\r
8658                                 \r
8659                                 return r == r ? ceil(r) : r;\r
8660                         }\r
8661 \r
8662                         case ast_func_round:\r
8663                                 return round_nearest_nzero(_left->eval_number(c, stack));\r
8664                         \r
8665                         case ast_variable:\r
8666                         {\r
8667                                 assert(_rettype == _data.variable->type());\r
8668 \r
8669                                 if (_rettype == xpath_type_number)\r
8670                                         return _data.variable->get_number();\r
8671 \r
8672                                 // fallthrough to type conversion\r
8673                         }\r
8674 \r
8675                         default:\r
8676                         {\r
8677                                 switch (_rettype)\r
8678                                 {\r
8679                                 case xpath_type_boolean:\r
8680                                         return eval_boolean(c, stack) ? 1 : 0;\r
8681                                         \r
8682                                 case xpath_type_string:\r
8683                                 {\r
8684                                         xpath_allocator_capture cr(stack.result);\r
8685 \r
8686                                         return convert_string_to_number(eval_string(c, stack).c_str());\r
8687                                 }\r
8688                                         \r
8689                                 case xpath_type_node_set:\r
8690                                 {\r
8691                                         xpath_allocator_capture cr(stack.result);\r
8692 \r
8693                                         return convert_string_to_number(eval_string(c, stack).c_str());\r
8694                                 }\r
8695                                         \r
8696                                 default:\r
8697                                         assert(!"Wrong expression for return type number");\r
8698                                         return 0;\r
8699                                 }\r
8700                                 \r
8701                         }\r
8702                         }\r
8703                 }\r
8704                 \r
8705                 xpath_string eval_string_concat(const xpath_context& c, const xpath_stack& stack)\r
8706                 {\r
8707                         assert(_type == ast_func_concat);\r
8708 \r
8709                         xpath_allocator_capture ct(stack.temp);\r
8710 \r
8711                         // count the string number\r
8712                         size_t count = 1;\r
8713                         for (xpath_ast_node* nc = _right; nc; nc = nc->_next) count++;\r
8714 \r
8715                         // gather all strings\r
8716                         xpath_string static_buffer[4];\r
8717                         xpath_string* buffer = static_buffer;\r
8718 \r
8719                         // allocate on-heap for large concats\r
8720                         if (count > sizeof(static_buffer) / sizeof(static_buffer[0]))\r
8721                         {\r
8722                                 buffer = static_cast<xpath_string*>(stack.temp->allocate(count * sizeof(xpath_string)));\r
8723                                 assert(buffer);\r
8724                         }\r
8725 \r
8726                         // evaluate all strings to temporary stack\r
8727                         xpath_stack swapped_stack = {stack.temp, stack.result};\r
8728 \r
8729                         buffer[0] = _left->eval_string(c, swapped_stack);\r
8730 \r
8731                         size_t pos = 1;\r
8732                         for (xpath_ast_node* n = _right; n; n = n->_next, ++pos) buffer[pos] = n->eval_string(c, swapped_stack);\r
8733                         assert(pos == count);\r
8734 \r
8735                         // get total length\r
8736                         size_t length = 0;\r
8737                         for (size_t i = 0; i < count; ++i) length += buffer[i].length();\r
8738 \r
8739                         // create final string\r
8740                         char_t* result = static_cast<char_t*>(stack.result->allocate((length + 1) * sizeof(char_t)));\r
8741                         assert(result);\r
8742 \r
8743                         char_t* ri = result;\r
8744 \r
8745                         for (size_t j = 0; j < count; ++j)\r
8746                                 for (const char_t* bi = buffer[j].c_str(); *bi; ++bi)\r
8747                                         *ri++ = *bi;\r
8748 \r
8749                         *ri = 0;\r
8750 \r
8751                         return xpath_string(result, true);\r
8752                 }\r
8753 \r
8754                 xpath_string eval_string(const xpath_context& c, const xpath_stack& stack)\r
8755                 {\r
8756                         switch (_type)\r
8757                         {\r
8758                         case ast_string_constant:\r
8759                                 return xpath_string_const(_data.string);\r
8760                         \r
8761                         case ast_func_local_name_0:\r
8762                         {\r
8763                                 xpath_node na = c.n;\r
8764                                 \r
8765                                 return xpath_string_const(local_name(na));\r
8766                         }\r
8767 \r
8768                         case ast_func_local_name_1:\r
8769                         {\r
8770                                 xpath_allocator_capture cr(stack.result);\r
8771 \r
8772                                 xpath_node_set_raw ns = _left->eval_node_set(c, stack);\r
8773                                 xpath_node na = ns.first();\r
8774                                 \r
8775                                 return xpath_string_const(local_name(na));\r
8776                         }\r
8777 \r
8778                         case ast_func_name_0:\r
8779                         {\r
8780                                 xpath_node na = c.n;\r
8781                                 \r
8782                                 return xpath_string_const(qualified_name(na));\r
8783                         }\r
8784 \r
8785                         case ast_func_name_1:\r
8786                         {\r
8787                                 xpath_allocator_capture cr(stack.result);\r
8788 \r
8789                                 xpath_node_set_raw ns = _left->eval_node_set(c, stack);\r
8790                                 xpath_node na = ns.first();\r
8791                                 \r
8792                                 return xpath_string_const(qualified_name(na));\r
8793                         }\r
8794 \r
8795                         case ast_func_namespace_uri_0:\r
8796                         {\r
8797                                 xpath_node na = c.n;\r
8798                                 \r
8799                                 return xpath_string_const(namespace_uri(na));\r
8800                         }\r
8801 \r
8802                         case ast_func_namespace_uri_1:\r
8803                         {\r
8804                                 xpath_allocator_capture cr(stack.result);\r
8805 \r
8806                                 xpath_node_set_raw ns = _left->eval_node_set(c, stack);\r
8807                                 xpath_node na = ns.first();\r
8808                                 \r
8809                                 return xpath_string_const(namespace_uri(na));\r
8810                         }\r
8811 \r
8812                         case ast_func_string_0:\r
8813                                 return string_value(c.n, stack.result);\r
8814 \r
8815                         case ast_func_string_1:\r
8816                                 return _left->eval_string(c, stack);\r
8817 \r
8818                         case ast_func_concat:\r
8819                                 return eval_string_concat(c, stack);\r
8820 \r
8821                         case ast_func_substring_before:\r
8822                         {\r
8823                                 xpath_allocator_capture cr(stack.temp);\r
8824 \r
8825                                 xpath_stack swapped_stack = {stack.temp, stack.result};\r
8826 \r
8827                                 xpath_string s = _left->eval_string(c, swapped_stack);\r
8828                                 xpath_string p = _right->eval_string(c, swapped_stack);\r
8829 \r
8830                                 const char_t* pos = find_substring(s.c_str(), p.c_str());\r
8831                                 \r
8832                                 return pos ? xpath_string(s.c_str(), pos, stack.result) : xpath_string();\r
8833                         }\r
8834                         \r
8835                         case ast_func_substring_after:\r
8836                         {\r
8837                                 xpath_allocator_capture cr(stack.temp);\r
8838 \r
8839                                 xpath_stack swapped_stack = {stack.temp, stack.result};\r
8840 \r
8841                                 xpath_string s = _left->eval_string(c, swapped_stack);\r
8842                                 xpath_string p = _right->eval_string(c, swapped_stack);\r
8843                                 \r
8844                                 const char_t* pos = find_substring(s.c_str(), p.c_str());\r
8845                                 if (!pos) return xpath_string();\r
8846 \r
8847                                 const char_t* result = pos + p.length();\r
8848 \r
8849                                 return s.uses_heap() ? xpath_string(result, stack.result) : xpath_string_const(result);\r
8850                         }\r
8851 \r
8852                         case ast_func_substring_2:\r
8853                         {\r
8854                                 xpath_allocator_capture cr(stack.temp);\r
8855 \r
8856                                 xpath_stack swapped_stack = {stack.temp, stack.result};\r
8857 \r
8858                                 xpath_string s = _left->eval_string(c, swapped_stack);\r
8859                                 size_t s_length = s.length();\r
8860 \r
8861                                 double first = round_nearest(_right->eval_number(c, stack));\r
8862                                 \r
8863                                 if (is_nan(first)) return xpath_string(); // NaN\r
8864                                 else if (first >= s_length + 1) return xpath_string();\r
8865                                 \r
8866                                 size_t pos = first < 1 ? 1 : static_cast<size_t>(first);\r
8867                                 assert(1 <= pos && pos <= s_length + 1);\r
8868 \r
8869                                 const char_t* rbegin = s.c_str() + (pos - 1);\r
8870                                 \r
8871                                 return s.uses_heap() ? xpath_string(rbegin, stack.result) : xpath_string_const(rbegin);\r
8872                         }\r
8873                         \r
8874                         case ast_func_substring_3:\r
8875                         {\r
8876                                 xpath_allocator_capture cr(stack.temp);\r
8877 \r
8878                                 xpath_stack swapped_stack = {stack.temp, stack.result};\r
8879 \r
8880                                 xpath_string s = _left->eval_string(c, swapped_stack);\r
8881                                 size_t s_length = s.length();\r
8882 \r
8883                                 double first = round_nearest(_right->eval_number(c, stack));\r
8884                                 double last = first + round_nearest(_right->_next->eval_number(c, stack));\r
8885                                 \r
8886                                 if (is_nan(first) || is_nan(last)) return xpath_string();\r
8887                                 else if (first >= s_length + 1) return xpath_string();\r
8888                                 else if (first >= last) return xpath_string();\r
8889                                 else if (last < 1) return xpath_string();\r
8890                                 \r
8891                                 size_t pos = first < 1 ? 1 : static_cast<size_t>(first);\r
8892                                 size_t end = last >= s_length + 1 ? s_length + 1 : static_cast<size_t>(last);\r
8893 \r
8894                                 assert(1 <= pos && pos <= end && end <= s_length + 1);\r
8895                                 const char_t* rbegin = s.c_str() + (pos - 1);\r
8896                                 const char_t* rend = s.c_str() + (end - 1);\r
8897 \r
8898                                 return (end == s_length + 1 && !s.uses_heap()) ? xpath_string_const(rbegin) : xpath_string(rbegin, rend, stack.result);\r
8899                         }\r
8900 \r
8901                         case ast_func_normalize_space_0:\r
8902                         {\r
8903                                 xpath_string s = string_value(c.n, stack.result);\r
8904 \r
8905                                 normalize_space(s.data(stack.result));\r
8906 \r
8907                                 return s;\r
8908                         }\r
8909 \r
8910                         case ast_func_normalize_space_1:\r
8911                         {\r
8912                                 xpath_string s = _left->eval_string(c, stack);\r
8913 \r
8914                                 normalize_space(s.data(stack.result));\r
8915                         \r
8916                                 return s;\r
8917                         }\r
8918 \r
8919                         case ast_func_translate:\r
8920                         {\r
8921                                 xpath_allocator_capture cr(stack.temp);\r
8922 \r
8923                                 xpath_stack swapped_stack = {stack.temp, stack.result};\r
8924 \r
8925                                 xpath_string s = _left->eval_string(c, stack);\r
8926                                 xpath_string from = _right->eval_string(c, swapped_stack);\r
8927                                 xpath_string to = _right->_next->eval_string(c, swapped_stack);\r
8928 \r
8929                                 translate(s.data(stack.result), from.c_str(), to.c_str());\r
8930 \r
8931                                 return s;\r
8932                         }\r
8933 \r
8934                         case ast_variable:\r
8935                         {\r
8936                                 assert(_rettype == _data.variable->type());\r
8937 \r
8938                                 if (_rettype == xpath_type_string)\r
8939                                         return xpath_string_const(_data.variable->get_string());\r
8940 \r
8941                                 // fallthrough to type conversion\r
8942                         }\r
8943 \r
8944                         default:\r
8945                         {\r
8946                                 switch (_rettype)\r
8947                                 {\r
8948                                 case xpath_type_boolean:\r
8949                                         return xpath_string_const(eval_boolean(c, stack) ? PUGIXML_TEXT("true") : PUGIXML_TEXT("false"));\r
8950                                         \r
8951                                 case xpath_type_number:\r
8952                                         return convert_number_to_string(eval_number(c, stack), stack.result);\r
8953                                         \r
8954                                 case xpath_type_node_set:\r
8955                                 {\r
8956                                         xpath_allocator_capture cr(stack.temp);\r
8957 \r
8958                                         xpath_stack swapped_stack = {stack.temp, stack.result};\r
8959 \r
8960                                         xpath_node_set_raw ns = eval_node_set(c, swapped_stack);\r
8961                                         return ns.empty() ? xpath_string() : string_value(ns.first(), stack.result);\r
8962                                 }\r
8963                                 \r
8964                                 default:\r
8965                                         assert(!"Wrong expression for return type string");\r
8966                                         return xpath_string();\r
8967                                 }\r
8968                         }\r
8969                         }\r
8970                 }\r
8971 \r
8972                 xpath_node_set_raw eval_node_set(const xpath_context& c, const xpath_stack& stack)\r
8973                 {\r
8974                         switch (_type)\r
8975                         {\r
8976                         case ast_op_union:\r
8977                         {\r
8978                                 xpath_allocator_capture cr(stack.temp);\r
8979 \r
8980                                 xpath_stack swapped_stack = {stack.temp, stack.result};\r
8981 \r
8982                                 xpath_node_set_raw ls = _left->eval_node_set(c, swapped_stack);\r
8983                                 xpath_node_set_raw rs = _right->eval_node_set(c, stack);\r
8984                                 \r
8985                                 // we can optimize merging two sorted sets, but this is a very rare operation, so don't bother\r
8986                                 rs.set_type(xpath_node_set::type_unsorted);\r
8987 \r
8988                                 rs.append(ls.begin(), ls.end(), stack.result);\r
8989                                 rs.remove_duplicates();\r
8990                                 \r
8991                                 return rs;\r
8992                         }\r
8993 \r
8994                         case ast_filter:\r
8995                         case ast_filter_posinv:\r
8996                         {\r
8997                                 xpath_node_set_raw set = _left->eval_node_set(c, stack);\r
8998 \r
8999                                 // either expression is a number or it contains position() call; sort by document order\r
9000                                 if (_type == ast_filter) set.sort_do();\r
9001 \r
9002                                 apply_predicate(set, 0, _right, stack);\r
9003                         \r
9004                                 return set;\r
9005                         }\r
9006                         \r
9007                         case ast_func_id:\r
9008                                 return xpath_node_set_raw();\r
9009                         \r
9010                         case ast_step:\r
9011                         {\r
9012                                 switch (_axis)\r
9013                                 {\r
9014                                 case axis_ancestor:\r
9015                                         return step_do(c, stack, axis_to_type<axis_ancestor>());\r
9016                                         \r
9017                                 case axis_ancestor_or_self:\r
9018                                         return step_do(c, stack, axis_to_type<axis_ancestor_or_self>());\r
9019 \r
9020                                 case axis_attribute:\r
9021                                         return step_do(c, stack, axis_to_type<axis_attribute>());\r
9022 \r
9023                                 case axis_child:\r
9024                                         return step_do(c, stack, axis_to_type<axis_child>());\r
9025                                 \r
9026                                 case axis_descendant:\r
9027                                         return step_do(c, stack, axis_to_type<axis_descendant>());\r
9028 \r
9029                                 case axis_descendant_or_self:\r
9030                                         return step_do(c, stack, axis_to_type<axis_descendant_or_self>());\r
9031 \r
9032                                 case axis_following:\r
9033                                         return step_do(c, stack, axis_to_type<axis_following>());\r
9034                                 \r
9035                                 case axis_following_sibling:\r
9036                                         return step_do(c, stack, axis_to_type<axis_following_sibling>());\r
9037                                 \r
9038                                 case axis_namespace:\r
9039                                         // namespaced axis is not supported\r
9040                                         return xpath_node_set_raw();\r
9041                                 \r
9042                                 case axis_parent:\r
9043                                         return step_do(c, stack, axis_to_type<axis_parent>());\r
9044                                 \r
9045                                 case axis_preceding:\r
9046                                         return step_do(c, stack, axis_to_type<axis_preceding>());\r
9047 \r
9048                                 case axis_preceding_sibling:\r
9049                                         return step_do(c, stack, axis_to_type<axis_preceding_sibling>());\r
9050                                 \r
9051                                 case axis_self:\r
9052                                         return step_do(c, stack, axis_to_type<axis_self>());\r
9053 \r
9054                                 default:\r
9055                                         assert(!"Unknown axis");\r
9056                                         return xpath_node_set_raw();\r
9057                                 }\r
9058                         }\r
9059 \r
9060                         case ast_step_root:\r
9061                         {\r
9062                                 assert(!_right); // root step can't have any predicates\r
9063 \r
9064                                 xpath_node_set_raw ns;\r
9065 \r
9066                                 ns.set_type(xpath_node_set::type_sorted);\r
9067 \r
9068                                 if (c.n.node()) ns.push_back(c.n.node().root(), stack.result);\r
9069                                 else if (c.n.attribute()) ns.push_back(c.n.parent().root(), stack.result);\r
9070 \r
9071                                 return ns;\r
9072                         }\r
9073 \r
9074                         case ast_variable:\r
9075                         {\r
9076                                 assert(_rettype == _data.variable->type());\r
9077 \r
9078                                 if (_rettype == xpath_type_node_set)\r
9079                                 {\r
9080                                         const xpath_node_set& s = _data.variable->get_node_set();\r
9081 \r
9082                                         xpath_node_set_raw ns;\r
9083 \r
9084                                         ns.set_type(s.type());\r
9085                                         ns.append(s.begin(), s.end(), stack.result);\r
9086 \r
9087                                         return ns;\r
9088                                 }\r
9089 \r
9090                                 // fallthrough to type conversion\r
9091                         }\r
9092 \r
9093                         default:\r
9094                                 assert(!"Wrong expression for return type node set");\r
9095                                 return xpath_node_set_raw();\r
9096                         }\r
9097                 }\r
9098                 \r
9099                 bool is_posinv()\r
9100                 {\r
9101                         switch (_type)\r
9102                         {\r
9103                         case ast_func_position:\r
9104                                 return false;\r
9105 \r
9106                         case ast_string_constant:\r
9107                         case ast_number_constant:\r
9108                         case ast_variable:\r
9109                                 return true;\r
9110 \r
9111                         case ast_step:\r
9112                         case ast_step_root:\r
9113                                 return true;\r
9114 \r
9115                         case ast_predicate:\r
9116                         case ast_filter:\r
9117                         case ast_filter_posinv:\r
9118                                 return true;\r
9119 \r
9120                         default:\r
9121                                 if (_left && !_left->is_posinv()) return false;\r
9122                                 \r
9123                                 for (xpath_ast_node* n = _right; n; n = n->_next)\r
9124                                         if (!n->is_posinv()) return false;\r
9125                                         \r
9126                                 return true;\r
9127                         }\r
9128                 }\r
9129 \r
9130                 xpath_value_type rettype() const\r
9131                 {\r
9132                         return static_cast<xpath_value_type>(_rettype);\r
9133                 }\r
9134         };\r
9135 \r
9136         struct xpath_parser\r
9137         {\r
9138                 xpath_allocator* _alloc;\r
9139                 xpath_lexer _lexer;\r
9140 \r
9141                 const char_t* _query;\r
9142                 xpath_variable_set* _variables;\r
9143 \r
9144                 xpath_parse_result* _result;\r
9145 \r
9146                 char_t _scratch[32];\r
9147 \r
9148         #ifdef PUGIXML_NO_EXCEPTIONS\r
9149                 jmp_buf _error_handler;\r
9150         #endif\r
9151 \r
9152                 void throw_error(const char* message)\r
9153                 {\r
9154                         _result->error = message;\r
9155                         _result->offset = _lexer.current_pos() - _query;\r
9156 \r
9157                 #ifdef PUGIXML_NO_EXCEPTIONS\r
9158                         longjmp(_error_handler, 1);\r
9159                 #else\r
9160                         throw xpath_exception(*_result);\r
9161                 #endif\r
9162                 }\r
9163 \r
9164                 void throw_error_oom()\r
9165                 {\r
9166                 #ifdef PUGIXML_NO_EXCEPTIONS\r
9167                         throw_error("Out of memory");\r
9168                 #else\r
9169                         throw std::bad_alloc();\r
9170                 #endif\r
9171                 }\r
9172 \r
9173                 void* alloc_node()\r
9174                 {\r
9175                         void* result = _alloc->allocate_nothrow(sizeof(xpath_ast_node));\r
9176 \r
9177                         if (!result) throw_error_oom();\r
9178 \r
9179                         return result;\r
9180                 }\r
9181 \r
9182                 const char_t* alloc_string(const xpath_lexer_string& value)\r
9183                 {\r
9184                         if (value.begin)\r
9185                         {\r
9186                                 size_t length = static_cast<size_t>(value.end - value.begin);\r
9187 \r
9188                                 char_t* c = static_cast<char_t*>(_alloc->allocate_nothrow((length + 1) * sizeof(char_t)));\r
9189                                 if (!c) throw_error_oom();\r
9190                                 assert(c); // workaround for clang static analysis\r
9191 \r
9192                                 memcpy(c, value.begin, length * sizeof(char_t));\r
9193                                 c[length] = 0;\r
9194 \r
9195                                 return c;\r
9196                         }\r
9197                         else return 0;\r
9198                 }\r
9199 \r
9200                 xpath_ast_node* parse_function_helper(ast_type_t type0, ast_type_t type1, size_t argc, xpath_ast_node* args[2])\r
9201                 {\r
9202                         assert(argc <= 1);\r
9203 \r
9204                         if (argc == 1 && args[0]->rettype() != xpath_type_node_set) throw_error("Function has to be applied to node set");\r
9205 \r
9206                         return new (alloc_node()) xpath_ast_node(argc == 0 ? type0 : type1, xpath_type_string, args[0]);\r
9207                 }\r
9208 \r
9209                 xpath_ast_node* parse_function(const xpath_lexer_string& name, size_t argc, xpath_ast_node* args[2])\r
9210                 {\r
9211                         switch (name.begin[0])\r
9212                         {\r
9213                         case 'b':\r
9214                                 if (name == PUGIXML_TEXT("boolean") && argc == 1)\r
9215                                         return new (alloc_node()) xpath_ast_node(ast_func_boolean, xpath_type_boolean, args[0]);\r
9216                                         \r
9217                                 break;\r
9218                         \r
9219                         case 'c':\r
9220                                 if (name == PUGIXML_TEXT("count") && argc == 1)\r
9221                                 {\r
9222                                         if (args[0]->rettype() != xpath_type_node_set) throw_error("Function has to be applied to node set");\r
9223                                         return new (alloc_node()) xpath_ast_node(ast_func_count, xpath_type_number, args[0]);\r
9224                                 }\r
9225                                 else if (name == PUGIXML_TEXT("contains") && argc == 2)\r
9226                                         return new (alloc_node()) xpath_ast_node(ast_func_contains, xpath_type_boolean, args[0], args[1]);\r
9227                                 else if (name == PUGIXML_TEXT("concat") && argc >= 2)\r
9228                                         return new (alloc_node()) xpath_ast_node(ast_func_concat, xpath_type_string, args[0], args[1]);\r
9229                                 else if (name == PUGIXML_TEXT("ceiling") && argc == 1)\r
9230                                         return new (alloc_node()) xpath_ast_node(ast_func_ceiling, xpath_type_number, args[0]);\r
9231                                         \r
9232                                 break;\r
9233                         \r
9234                         case 'f':\r
9235                                 if (name == PUGIXML_TEXT("false") && argc == 0)\r
9236                                         return new (alloc_node()) xpath_ast_node(ast_func_false, xpath_type_boolean);\r
9237                                 else if (name == PUGIXML_TEXT("floor") && argc == 1)\r
9238                                         return new (alloc_node()) xpath_ast_node(ast_func_floor, xpath_type_number, args[0]);\r
9239                                         \r
9240                                 break;\r
9241                         \r
9242                         case 'i':\r
9243                                 if (name == PUGIXML_TEXT("id") && argc == 1)\r
9244                                         return new (alloc_node()) xpath_ast_node(ast_func_id, xpath_type_node_set, args[0]);\r
9245                                         \r
9246                                 break;\r
9247                         \r
9248                         case 'l':\r
9249                                 if (name == PUGIXML_TEXT("last") && argc == 0)\r
9250                                         return new (alloc_node()) xpath_ast_node(ast_func_last, xpath_type_number);\r
9251                                 else if (name == PUGIXML_TEXT("lang") && argc == 1)\r
9252                                         return new (alloc_node()) xpath_ast_node(ast_func_lang, xpath_type_boolean, args[0]);\r
9253                                 else if (name == PUGIXML_TEXT("local-name") && argc <= 1)\r
9254                                         return parse_function_helper(ast_func_local_name_0, ast_func_local_name_1, argc, args);\r
9255                         \r
9256                                 break;\r
9257                         \r
9258                         case 'n':\r
9259                                 if (name == PUGIXML_TEXT("name") && argc <= 1)\r
9260                                         return parse_function_helper(ast_func_name_0, ast_func_name_1, argc, args);\r
9261                                 else if (name == PUGIXML_TEXT("namespace-uri") && argc <= 1)\r
9262                                         return parse_function_helper(ast_func_namespace_uri_0, ast_func_namespace_uri_1, argc, args);\r
9263                                 else if (name == PUGIXML_TEXT("normalize-space") && argc <= 1)\r
9264                                         return new (alloc_node()) xpath_ast_node(argc == 0 ? ast_func_normalize_space_0 : ast_func_normalize_space_1, xpath_type_string, args[0], args[1]);\r
9265                                 else if (name == PUGIXML_TEXT("not") && argc == 1)\r
9266                                         return new (alloc_node()) xpath_ast_node(ast_func_not, xpath_type_boolean, args[0]);\r
9267                                 else if (name == PUGIXML_TEXT("number") && argc <= 1)\r
9268                                         return new (alloc_node()) xpath_ast_node(argc == 0 ? ast_func_number_0 : ast_func_number_1, xpath_type_number, args[0]);\r
9269                         \r
9270                                 break;\r
9271                         \r
9272                         case 'p':\r
9273                                 if (name == PUGIXML_TEXT("position") && argc == 0)\r
9274                                         return new (alloc_node()) xpath_ast_node(ast_func_position, xpath_type_number);\r
9275                                 \r
9276                                 break;\r
9277                         \r
9278                         case 'r':\r
9279                                 if (name == PUGIXML_TEXT("round") && argc == 1)\r
9280                                         return new (alloc_node()) xpath_ast_node(ast_func_round, xpath_type_number, args[0]);\r
9281 \r
9282                                 break;\r
9283                         \r
9284                         case 's':\r
9285                                 if (name == PUGIXML_TEXT("string") && argc <= 1)\r
9286                                         return new (alloc_node()) xpath_ast_node(argc == 0 ? ast_func_string_0 : ast_func_string_1, xpath_type_string, args[0]);\r
9287                                 else if (name == PUGIXML_TEXT("string-length") && argc <= 1)\r
9288                                         return new (alloc_node()) xpath_ast_node(argc == 0 ? ast_func_string_length_0 : ast_func_string_length_1, xpath_type_number, args[0]);\r
9289                                 else if (name == PUGIXML_TEXT("starts-with") && argc == 2)\r
9290                                         return new (alloc_node()) xpath_ast_node(ast_func_starts_with, xpath_type_boolean, args[0], args[1]);\r
9291                                 else if (name == PUGIXML_TEXT("substring-before") && argc == 2)\r
9292                                         return new (alloc_node()) xpath_ast_node(ast_func_substring_before, xpath_type_string, args[0], args[1]);\r
9293                                 else if (name == PUGIXML_TEXT("substring-after") && argc == 2)\r
9294                                         return new (alloc_node()) xpath_ast_node(ast_func_substring_after, xpath_type_string, args[0], args[1]);\r
9295                                 else if (name == PUGIXML_TEXT("substring") && (argc == 2 || argc == 3))\r
9296                                         return new (alloc_node()) xpath_ast_node(argc == 2 ? ast_func_substring_2 : ast_func_substring_3, xpath_type_string, args[0], args[1]);\r
9297                                 else if (name == PUGIXML_TEXT("sum") && argc == 1)\r
9298                                 {\r
9299                                         if (args[0]->rettype() != xpath_type_node_set) throw_error("Function has to be applied to node set");\r
9300                                         return new (alloc_node()) xpath_ast_node(ast_func_sum, xpath_type_number, args[0]);\r
9301                                 }\r
9302 \r
9303                                 break;\r
9304                         \r
9305                         case 't':\r
9306                                 if (name == PUGIXML_TEXT("translate") && argc == 3)\r
9307                                         return new (alloc_node()) xpath_ast_node(ast_func_translate, xpath_type_string, args[0], args[1]);\r
9308                                 else if (name == PUGIXML_TEXT("true") && argc == 0)\r
9309                                         return new (alloc_node()) xpath_ast_node(ast_func_true, xpath_type_boolean);\r
9310                                         \r
9311                                 break;\r
9312 \r
9313                         default:\r
9314                                 break;\r
9315                         }\r
9316 \r
9317                         throw_error("Unrecognized function or wrong parameter count");\r
9318 \r
9319                         return 0;\r
9320                 }\r
9321 \r
9322                 axis_t parse_axis_name(const xpath_lexer_string& name, bool& specified)\r
9323                 {\r
9324                         specified = true;\r
9325 \r
9326                         switch (name.begin[0])\r
9327                         {\r
9328                         case 'a':\r
9329                                 if (name == PUGIXML_TEXT("ancestor"))\r
9330                                         return axis_ancestor;\r
9331                                 else if (name == PUGIXML_TEXT("ancestor-or-self"))\r
9332                                         return axis_ancestor_or_self;\r
9333                                 else if (name == PUGIXML_TEXT("attribute"))\r
9334                                         return axis_attribute;\r
9335                                 \r
9336                                 break;\r
9337                         \r
9338                         case 'c':\r
9339                                 if (name == PUGIXML_TEXT("child"))\r
9340                                         return axis_child;\r
9341                                 \r
9342                                 break;\r
9343                         \r
9344                         case 'd':\r
9345                                 if (name == PUGIXML_TEXT("descendant"))\r
9346                                         return axis_descendant;\r
9347                                 else if (name == PUGIXML_TEXT("descendant-or-self"))\r
9348                                         return axis_descendant_or_self;\r
9349                                 \r
9350                                 break;\r
9351                         \r
9352                         case 'f':\r
9353                                 if (name == PUGIXML_TEXT("following"))\r
9354                                         return axis_following;\r
9355                                 else if (name == PUGIXML_TEXT("following-sibling"))\r
9356                                         return axis_following_sibling;\r
9357                                 \r
9358                                 break;\r
9359                         \r
9360                         case 'n':\r
9361                                 if (name == PUGIXML_TEXT("namespace"))\r
9362                                         return axis_namespace;\r
9363                                 \r
9364                                 break;\r
9365                         \r
9366                         case 'p':\r
9367                                 if (name == PUGIXML_TEXT("parent"))\r
9368                                         return axis_parent;\r
9369                                 else if (name == PUGIXML_TEXT("preceding"))\r
9370                                         return axis_preceding;\r
9371                                 else if (name == PUGIXML_TEXT("preceding-sibling"))\r
9372                                         return axis_preceding_sibling;\r
9373                                 \r
9374                                 break;\r
9375                         \r
9376                         case 's':\r
9377                                 if (name == PUGIXML_TEXT("self"))\r
9378                                         return axis_self;\r
9379                                 \r
9380                                 break;\r
9381 \r
9382                         default:\r
9383                                 break;\r
9384                         }\r
9385 \r
9386                         specified = false;\r
9387                         return axis_child;\r
9388                 }\r
9389 \r
9390                 nodetest_t parse_node_test_type(const xpath_lexer_string& name)\r
9391                 {\r
9392                         switch (name.begin[0])\r
9393                         {\r
9394                         case 'c':\r
9395                                 if (name == PUGIXML_TEXT("comment"))\r
9396                                         return nodetest_type_comment;\r
9397 \r
9398                                 break;\r
9399 \r
9400                         case 'n':\r
9401                                 if (name == PUGIXML_TEXT("node"))\r
9402                                         return nodetest_type_node;\r
9403 \r
9404                                 break;\r
9405 \r
9406                         case 'p':\r
9407                                 if (name == PUGIXML_TEXT("processing-instruction"))\r
9408                                         return nodetest_type_pi;\r
9409 \r
9410                                 break;\r
9411 \r
9412                         case 't':\r
9413                                 if (name == PUGIXML_TEXT("text"))\r
9414                                         return nodetest_type_text;\r
9415 \r
9416                                 break;\r
9417                         \r
9418                         default:\r
9419                                 break;\r
9420                         }\r
9421 \r
9422                         return nodetest_none;\r
9423                 }\r
9424 \r
9425                 // PrimaryExpr ::= VariableReference | '(' Expr ')' | Literal | Number | FunctionCall\r
9426                 xpath_ast_node* parse_primary_expression()\r
9427                 {\r
9428                         switch (_lexer.current())\r
9429                         {\r
9430                         case lex_var_ref:\r
9431                         {\r
9432                                 xpath_lexer_string name = _lexer.contents();\r
9433 \r
9434                                 if (!_variables)\r
9435                                         throw_error("Unknown variable: variable set is not provided");\r
9436 \r
9437                                 xpath_variable* var = get_variable_scratch(_scratch, _variables, name.begin, name.end);\r
9438 \r
9439                                 if (!var)\r
9440                                         throw_error("Unknown variable: variable set does not contain the given name");\r
9441 \r
9442                                 _lexer.next();\r
9443 \r
9444                                 return new (alloc_node()) xpath_ast_node(ast_variable, var->type(), var);\r
9445                         }\r
9446 \r
9447                         case lex_open_brace:\r
9448                         {\r
9449                                 _lexer.next();\r
9450 \r
9451                                 xpath_ast_node* n = parse_expression();\r
9452 \r
9453                                 if (_lexer.current() != lex_close_brace)\r
9454                                         throw_error("Unmatched braces");\r
9455 \r
9456                                 _lexer.next();\r
9457 \r
9458                                 return n;\r
9459                         }\r
9460 \r
9461                         case lex_quoted_string:\r
9462                         {\r
9463                                 const char_t* value = alloc_string(_lexer.contents());\r
9464 \r
9465                                 xpath_ast_node* n = new (alloc_node()) xpath_ast_node(ast_string_constant, xpath_type_string, value);\r
9466                                 _lexer.next();\r
9467 \r
9468                                 return n;\r
9469                         }\r
9470 \r
9471                         case lex_number:\r
9472                         {\r
9473                                 double value = 0;\r
9474 \r
9475                                 if (!convert_string_to_number_scratch(_scratch, _lexer.contents().begin, _lexer.contents().end, &value))\r
9476                                         throw_error_oom();\r
9477 \r
9478                                 xpath_ast_node* n = new (alloc_node()) xpath_ast_node(ast_number_constant, xpath_type_number, value);\r
9479                                 _lexer.next();\r
9480 \r
9481                                 return n;\r
9482                         }\r
9483 \r
9484                         case lex_string:\r
9485                         {\r
9486                                 xpath_ast_node* args[2] = {0};\r
9487                                 size_t argc = 0;\r
9488                                 \r
9489                                 xpath_lexer_string function = _lexer.contents();\r
9490                                 _lexer.next();\r
9491                                 \r
9492                                 xpath_ast_node* last_arg = 0;\r
9493                                 \r
9494                                 if (_lexer.current() != lex_open_brace)\r
9495                                         throw_error("Unrecognized function call");\r
9496                                 _lexer.next();\r
9497 \r
9498                                 if (_lexer.current() != lex_close_brace)\r
9499                                         args[argc++] = parse_expression();\r
9500 \r
9501                                 while (_lexer.current() != lex_close_brace)\r
9502                                 {\r
9503                                         if (_lexer.current() != lex_comma)\r
9504                                                 throw_error("No comma between function arguments");\r
9505                                         _lexer.next();\r
9506                                         \r
9507                                         xpath_ast_node* n = parse_expression();\r
9508                                         \r
9509                                         if (argc < 2) args[argc] = n;\r
9510                                         else last_arg->set_next(n);\r
9511 \r
9512                                         argc++;\r
9513                                         last_arg = n;\r
9514                                 }\r
9515                                 \r
9516                                 _lexer.next();\r
9517 \r
9518                                 return parse_function(function, argc, args);\r
9519                         }\r
9520 \r
9521                         default:\r
9522                                 throw_error("Unrecognizable primary expression");\r
9523 \r
9524                                 return 0;\r
9525                         }\r
9526                 }\r
9527                 \r
9528                 // FilterExpr ::= PrimaryExpr | FilterExpr Predicate\r
9529                 // Predicate ::= '[' PredicateExpr ']'\r
9530                 // PredicateExpr ::= Expr\r
9531                 xpath_ast_node* parse_filter_expression()\r
9532                 {\r
9533                         xpath_ast_node* n = parse_primary_expression();\r
9534 \r
9535                         while (_lexer.current() == lex_open_square_brace)\r
9536                         {\r
9537                                 _lexer.next();\r
9538 \r
9539                                 xpath_ast_node* expr = parse_expression();\r
9540 \r
9541                                 if (n->rettype() != xpath_type_node_set) throw_error("Predicate has to be applied to node set");\r
9542 \r
9543                                 bool posinv = expr->rettype() != xpath_type_number && expr->is_posinv();\r
9544 \r
9545                                 n = new (alloc_node()) xpath_ast_node(posinv ? ast_filter_posinv : ast_filter, xpath_type_node_set, n, expr);\r
9546 \r
9547                                 if (_lexer.current() != lex_close_square_brace)\r
9548                                         throw_error("Unmatched square brace");\r
9549                         \r
9550                                 _lexer.next();\r
9551                         }\r
9552                         \r
9553                         return n;\r
9554                 }\r
9555                 \r
9556                 // Step ::= AxisSpecifier NodeTest Predicate* | AbbreviatedStep\r
9557                 // AxisSpecifier ::= AxisName '::' | '@'?\r
9558                 // NodeTest ::= NameTest | NodeType '(' ')' | 'processing-instruction' '(' Literal ')'\r
9559                 // NameTest ::= '*' | NCName ':' '*' | QName\r
9560                 // AbbreviatedStep ::= '.' | '..'\r
9561                 xpath_ast_node* parse_step(xpath_ast_node* set)\r
9562                 {\r
9563                         if (set && set->rettype() != xpath_type_node_set)\r
9564                                 throw_error("Step has to be applied to node set");\r
9565 \r
9566                         bool axis_specified = false;\r
9567                         axis_t axis = axis_child; // implied child axis\r
9568 \r
9569                         if (_lexer.current() == lex_axis_attribute)\r
9570                         {\r
9571                                 axis = axis_attribute;\r
9572                                 axis_specified = true;\r
9573                                 \r
9574                                 _lexer.next();\r
9575                         }\r
9576                         else if (_lexer.current() == lex_dot)\r
9577                         {\r
9578                                 _lexer.next();\r
9579                                 \r
9580                                 return new (alloc_node()) xpath_ast_node(ast_step, set, axis_self, nodetest_type_node, 0);\r
9581                         }\r
9582                         else if (_lexer.current() == lex_double_dot)\r
9583                         {\r
9584                                 _lexer.next();\r
9585                                 \r
9586                                 return new (alloc_node()) xpath_ast_node(ast_step, set, axis_parent, nodetest_type_node, 0);\r
9587                         }\r
9588                 \r
9589                         nodetest_t nt_type = nodetest_none;\r
9590                         xpath_lexer_string nt_name;\r
9591                         \r
9592                         if (_lexer.current() == lex_string)\r
9593                         {\r
9594                                 // node name test\r
9595                                 nt_name = _lexer.contents();\r
9596                                 _lexer.next();\r
9597 \r
9598                                 // was it an axis name?\r
9599                                 if (_lexer.current() == lex_double_colon)\r
9600                                 {\r
9601                                         // parse axis name\r
9602                                         if (axis_specified) throw_error("Two axis specifiers in one step");\r
9603 \r
9604                                         axis = parse_axis_name(nt_name, axis_specified);\r
9605 \r
9606                                         if (!axis_specified) throw_error("Unknown axis");\r
9607 \r
9608                                         // read actual node test\r
9609                                         _lexer.next();\r
9610 \r
9611                                         if (_lexer.current() == lex_multiply)\r
9612                                         {\r
9613                                                 nt_type = nodetest_all;\r
9614                                                 nt_name = xpath_lexer_string();\r
9615                                                 _lexer.next();\r
9616                                         }\r
9617                                         else if (_lexer.current() == lex_string)\r
9618                                         {\r
9619                                                 nt_name = _lexer.contents();\r
9620                                                 _lexer.next();\r
9621                                         }\r
9622                                         else throw_error("Unrecognized node test");\r
9623                                 }\r
9624                                 \r
9625                                 if (nt_type == nodetest_none)\r
9626                                 {\r
9627                                         // node type test or processing-instruction\r
9628                                         if (_lexer.current() == lex_open_brace)\r
9629                                         {\r
9630                                                 _lexer.next();\r
9631                                                 \r
9632                                                 if (_lexer.current() == lex_close_brace)\r
9633                                                 {\r
9634                                                         _lexer.next();\r
9635 \r
9636                                                         nt_type = parse_node_test_type(nt_name);\r
9637 \r
9638                                                         if (nt_type == nodetest_none) throw_error("Unrecognized node type");\r
9639                                                         \r
9640                                                         nt_name = xpath_lexer_string();\r
9641                                                 }\r
9642                                                 else if (nt_name == PUGIXML_TEXT("processing-instruction"))\r
9643                                                 {\r
9644                                                         if (_lexer.current() != lex_quoted_string)\r
9645                                                                 throw_error("Only literals are allowed as arguments to processing-instruction()");\r
9646                                                 \r
9647                                                         nt_type = nodetest_pi;\r
9648                                                         nt_name = _lexer.contents();\r
9649                                                         _lexer.next();\r
9650                                                         \r
9651                                                         if (_lexer.current() != lex_close_brace)\r
9652                                                                 throw_error("Unmatched brace near processing-instruction()");\r
9653                                                         _lexer.next();\r
9654                                                 }\r
9655                                                 else\r
9656                                                         throw_error("Unmatched brace near node type test");\r
9657 \r
9658                                         }\r
9659                                         // QName or NCName:*\r
9660                                         else\r
9661                                         {\r
9662                                                 if (nt_name.end - nt_name.begin > 2 && nt_name.end[-2] == ':' && nt_name.end[-1] == '*') // NCName:*\r
9663                                                 {\r
9664                                                         nt_name.end--; // erase *\r
9665                                                         \r
9666                                                         nt_type = nodetest_all_in_namespace;\r
9667                                                 }\r
9668                                                 else nt_type = nodetest_name;\r
9669                                         }\r
9670                                 }\r
9671                         }\r
9672                         else if (_lexer.current() == lex_multiply)\r
9673                         {\r
9674                                 nt_type = nodetest_all;\r
9675                                 _lexer.next();\r
9676                         }\r
9677                         else throw_error("Unrecognized node test");\r
9678                         \r
9679                         xpath_ast_node* n = new (alloc_node()) xpath_ast_node(ast_step, set, axis, nt_type, alloc_string(nt_name));\r
9680                         \r
9681                         xpath_ast_node* last = 0;\r
9682                         \r
9683                         while (_lexer.current() == lex_open_square_brace)\r
9684                         {\r
9685                                 _lexer.next();\r
9686                                 \r
9687                                 xpath_ast_node* expr = parse_expression();\r
9688 \r
9689                                 xpath_ast_node* pred = new (alloc_node()) xpath_ast_node(ast_predicate, xpath_type_node_set, expr);\r
9690                                 \r
9691                                 if (_lexer.current() != lex_close_square_brace)\r
9692                                         throw_error("Unmatched square brace");\r
9693                                 _lexer.next();\r
9694                                 \r
9695                                 if (last) last->set_next(pred);\r
9696                                 else n->set_right(pred);\r
9697                                 \r
9698                                 last = pred;\r
9699                         }\r
9700                         \r
9701                         return n;\r
9702                 }\r
9703                 \r
9704                 // RelativeLocationPath ::= Step | RelativeLocationPath '/' Step | RelativeLocationPath '//' Step\r
9705                 xpath_ast_node* parse_relative_location_path(xpath_ast_node* set)\r
9706                 {\r
9707                         xpath_ast_node* n = parse_step(set);\r
9708                         \r
9709                         while (_lexer.current() == lex_slash || _lexer.current() == lex_double_slash)\r
9710                         {\r
9711                                 lexeme_t l = _lexer.current();\r
9712                                 _lexer.next();\r
9713 \r
9714                                 if (l == lex_double_slash)\r
9715                                         n = new (alloc_node()) xpath_ast_node(ast_step, n, axis_descendant_or_self, nodetest_type_node, 0);\r
9716                                 \r
9717                                 n = parse_step(n);\r
9718                         }\r
9719                         \r
9720                         return n;\r
9721                 }\r
9722                 \r
9723                 // LocationPath ::= RelativeLocationPath | AbsoluteLocationPath\r
9724                 // AbsoluteLocationPath ::= '/' RelativeLocationPath? | '//' RelativeLocationPath\r
9725                 xpath_ast_node* parse_location_path()\r
9726                 {\r
9727                         if (_lexer.current() == lex_slash)\r
9728                         {\r
9729                                 _lexer.next();\r
9730                                 \r
9731                                 xpath_ast_node* n = new (alloc_node()) xpath_ast_node(ast_step_root, xpath_type_node_set);\r
9732 \r
9733                                 // relative location path can start from axis_attribute, dot, double_dot, multiply and string lexemes; any other lexeme means standalone root path\r
9734                                 lexeme_t l = _lexer.current();\r
9735 \r
9736                                 if (l == lex_string || l == lex_axis_attribute || l == lex_dot || l == lex_double_dot || l == lex_multiply)\r
9737                                         return parse_relative_location_path(n);\r
9738                                 else\r
9739                                         return n;\r
9740                         }\r
9741                         else if (_lexer.current() == lex_double_slash)\r
9742                         {\r
9743                                 _lexer.next();\r
9744                                 \r
9745                                 xpath_ast_node* n = new (alloc_node()) xpath_ast_node(ast_step_root, xpath_type_node_set);\r
9746                                 n = new (alloc_node()) xpath_ast_node(ast_step, n, axis_descendant_or_self, nodetest_type_node, 0);\r
9747                                 \r
9748                                 return parse_relative_location_path(n);\r
9749                         }\r
9750 \r
9751                         // else clause moved outside of if because of bogus warning 'control may reach end of non-void function being inlined' in gcc 4.0.1\r
9752                         return parse_relative_location_path(0);\r
9753                 }\r
9754                 \r
9755                 // PathExpr ::= LocationPath\r
9756                 //                              | FilterExpr\r
9757                 //                              | FilterExpr '/' RelativeLocationPath\r
9758                 //                              | FilterExpr '//' RelativeLocationPath\r
9759                 // UnionExpr ::= PathExpr | UnionExpr '|' PathExpr\r
9760                 // UnaryExpr ::= UnionExpr | '-' UnaryExpr\r
9761                 xpath_ast_node* parse_path_or_unary_expression()\r
9762                 {\r
9763                         // Clarification.\r
9764                         // PathExpr begins with either LocationPath or FilterExpr.\r
9765                         // FilterExpr begins with PrimaryExpr\r
9766                         // PrimaryExpr begins with '$' in case of it being a variable reference,\r
9767                         // '(' in case of it being an expression, string literal, number constant or\r
9768                         // function call.\r
9769 \r
9770                         if (_lexer.current() == lex_var_ref || _lexer.current() == lex_open_brace || \r
9771                                 _lexer.current() == lex_quoted_string || _lexer.current() == lex_number ||\r
9772                                 _lexer.current() == lex_string)\r
9773                         {\r
9774                                 if (_lexer.current() == lex_string)\r
9775                                 {\r
9776                                         // This is either a function call, or not - if not, we shall proceed with location path\r
9777                                         const char_t* state = _lexer.state();\r
9778                                         \r
9779                                         while (PUGI__IS_CHARTYPE(*state, ct_space)) ++state;\r
9780                                         \r
9781                                         if (*state != '(') return parse_location_path();\r
9782 \r
9783                                         // This looks like a function call; however this still can be a node-test. Check it.\r
9784                                         if (parse_node_test_type(_lexer.contents()) != nodetest_none) return parse_location_path();\r
9785                                 }\r
9786                                 \r
9787                                 xpath_ast_node* n = parse_filter_expression();\r
9788 \r
9789                                 if (_lexer.current() == lex_slash || _lexer.current() == lex_double_slash)\r
9790                                 {\r
9791                                         lexeme_t l = _lexer.current();\r
9792                                         _lexer.next();\r
9793                                         \r
9794                                         if (l == lex_double_slash)\r
9795                                         {\r
9796                                                 if (n->rettype() != xpath_type_node_set) throw_error("Step has to be applied to node set");\r
9797 \r
9798                                                 n = new (alloc_node()) xpath_ast_node(ast_step, n, axis_descendant_or_self, nodetest_type_node, 0);\r
9799                                         }\r
9800         \r
9801                                         // select from location path\r
9802                                         return parse_relative_location_path(n);\r
9803                                 }\r
9804 \r
9805                                 return n;\r
9806                         }\r
9807                         else if (_lexer.current() == lex_minus)\r
9808                         {\r
9809                                 _lexer.next();\r
9810 \r
9811                                 // precedence 7+ - only parses union expressions\r
9812                                 xpath_ast_node* expr = parse_expression_rec(parse_path_or_unary_expression(), 7);\r
9813 \r
9814                                 return new (alloc_node()) xpath_ast_node(ast_op_negate, xpath_type_number, expr);\r
9815                         }\r
9816                         else\r
9817                                 return parse_location_path();\r
9818                 }\r
9819 \r
9820                 struct binary_op_t\r
9821                 {\r
9822                         ast_type_t asttype;\r
9823                         xpath_value_type rettype;\r
9824                         int precedence;\r
9825 \r
9826                         binary_op_t(): asttype(ast_unknown), rettype(xpath_type_none), precedence(0)\r
9827                         {\r
9828                         }\r
9829 \r
9830                         binary_op_t(ast_type_t asttype_, xpath_value_type rettype_, int precedence_): asttype(asttype_), rettype(rettype_), precedence(precedence_)\r
9831                         {\r
9832                         }\r
9833 \r
9834                         static binary_op_t parse(xpath_lexer& lexer)\r
9835                         {\r
9836                                 switch (lexer.current())\r
9837                                 {\r
9838                                 case lex_string:\r
9839                                         if (lexer.contents() == PUGIXML_TEXT("or"))\r
9840                                                 return binary_op_t(ast_op_or, xpath_type_boolean, 1);\r
9841                                         else if (lexer.contents() == PUGIXML_TEXT("and"))\r
9842                                                 return binary_op_t(ast_op_and, xpath_type_boolean, 2);\r
9843                                         else if (lexer.contents() == PUGIXML_TEXT("div"))\r
9844                                                 return binary_op_t(ast_op_divide, xpath_type_number, 6);\r
9845                                         else if (lexer.contents() == PUGIXML_TEXT("mod"))\r
9846                                                 return binary_op_t(ast_op_mod, xpath_type_number, 6);\r
9847                                         else\r
9848                                                 return binary_op_t();\r
9849 \r
9850                                 case lex_equal:\r
9851                                         return binary_op_t(ast_op_equal, xpath_type_boolean, 3);\r
9852 \r
9853                                 case lex_not_equal:\r
9854                                         return binary_op_t(ast_op_not_equal, xpath_type_boolean, 3);\r
9855 \r
9856                                 case lex_less:\r
9857                                         return binary_op_t(ast_op_less, xpath_type_boolean, 4);\r
9858 \r
9859                                 case lex_greater:\r
9860                                         return binary_op_t(ast_op_greater, xpath_type_boolean, 4);\r
9861 \r
9862                                 case lex_less_or_equal:\r
9863                                         return binary_op_t(ast_op_less_or_equal, xpath_type_boolean, 4);\r
9864 \r
9865                                 case lex_greater_or_equal:\r
9866                                         return binary_op_t(ast_op_greater_or_equal, xpath_type_boolean, 4);\r
9867 \r
9868                                 case lex_plus:\r
9869                                         return binary_op_t(ast_op_add, xpath_type_number, 5);\r
9870 \r
9871                                 case lex_minus:\r
9872                                         return binary_op_t(ast_op_subtract, xpath_type_number, 5);\r
9873 \r
9874                                 case lex_multiply:\r
9875                                         return binary_op_t(ast_op_multiply, xpath_type_number, 6);\r
9876 \r
9877                                 case lex_union:\r
9878                                         return binary_op_t(ast_op_union, xpath_type_node_set, 7);\r
9879 \r
9880                                 default:\r
9881                                         return binary_op_t();\r
9882                                 }\r
9883                         }\r
9884                 };\r
9885 \r
9886                 xpath_ast_node* parse_expression_rec(xpath_ast_node* lhs, int limit)\r
9887                 {\r
9888                         binary_op_t op = binary_op_t::parse(_lexer);\r
9889 \r
9890                         while (op.asttype != ast_unknown && op.precedence >= limit)\r
9891                         {\r
9892                                 _lexer.next();\r
9893 \r
9894                                 xpath_ast_node* rhs = parse_path_or_unary_expression();\r
9895 \r
9896                                 binary_op_t nextop = binary_op_t::parse(_lexer);\r
9897 \r
9898                                 while (nextop.asttype != ast_unknown && nextop.precedence > op.precedence)\r
9899                                 {\r
9900                                         rhs = parse_expression_rec(rhs, nextop.precedence);\r
9901 \r
9902                                         nextop = binary_op_t::parse(_lexer);\r
9903                                 }\r
9904 \r
9905                                 if (op.asttype == ast_op_union && (lhs->rettype() != xpath_type_node_set || rhs->rettype() != xpath_type_node_set))\r
9906                                         throw_error("Union operator has to be applied to node sets");\r
9907 \r
9908                                 lhs = new (alloc_node()) xpath_ast_node(op.asttype, op.rettype, lhs, rhs);\r
9909 \r
9910                                 op = binary_op_t::parse(_lexer);\r
9911                         }\r
9912 \r
9913                         return lhs;\r
9914                 }\r
9915 \r
9916                 // Expr ::= OrExpr\r
9917                 // OrExpr ::= AndExpr | OrExpr 'or' AndExpr\r
9918                 // AndExpr ::= EqualityExpr | AndExpr 'and' EqualityExpr\r
9919                 // EqualityExpr ::= RelationalExpr\r
9920                 //                                      | EqualityExpr '=' RelationalExpr\r
9921                 //                                      | EqualityExpr '!=' RelationalExpr\r
9922                 // RelationalExpr ::= AdditiveExpr\r
9923                 //                                        | RelationalExpr '<' AdditiveExpr\r
9924                 //                                        | RelationalExpr '>' AdditiveExpr\r
9925                 //                                        | RelationalExpr '<=' AdditiveExpr\r
9926                 //                                        | RelationalExpr '>=' AdditiveExpr\r
9927                 // AdditiveExpr ::= MultiplicativeExpr\r
9928                 //                                      | AdditiveExpr '+' MultiplicativeExpr\r
9929                 //                                      | AdditiveExpr '-' MultiplicativeExpr\r
9930                 // MultiplicativeExpr ::= UnaryExpr\r
9931                 //                                                | MultiplicativeExpr '*' UnaryExpr\r
9932                 //                                                | MultiplicativeExpr 'div' UnaryExpr\r
9933                 //                                                | MultiplicativeExpr 'mod' UnaryExpr\r
9934                 xpath_ast_node* parse_expression()\r
9935                 {\r
9936                         return parse_expression_rec(parse_path_or_unary_expression(), 0);\r
9937                 }\r
9938 \r
9939                 xpath_parser(const char_t* query, xpath_variable_set* variables, xpath_allocator* alloc, xpath_parse_result* result): _alloc(alloc), _lexer(query), _query(query), _variables(variables), _result(result)\r
9940                 {\r
9941                 }\r
9942 \r
9943                 xpath_ast_node* parse()\r
9944                 {\r
9945                         xpath_ast_node* result = parse_expression();\r
9946                         \r
9947                         if (_lexer.current() != lex_eof)\r
9948                         {\r
9949                                 // there are still unparsed tokens left, error\r
9950                                 throw_error("Incorrect query");\r
9951                         }\r
9952                         \r
9953                         return result;\r
9954                 }\r
9955 \r
9956                 static xpath_ast_node* parse(const char_t* query, xpath_variable_set* variables, xpath_allocator* alloc, xpath_parse_result* result)\r
9957                 {\r
9958                         xpath_parser parser(query, variables, alloc, result);\r
9959 \r
9960                 #ifdef PUGIXML_NO_EXCEPTIONS\r
9961                         int error = setjmp(parser._error_handler);\r
9962 \r
9963                         return (error == 0) ? parser.parse() : 0;\r
9964                 #else\r
9965                         return parser.parse();\r
9966                 #endif\r
9967                 }\r
9968         };\r
9969 \r
9970         struct xpath_query_impl\r
9971         {\r
9972                 static xpath_query_impl* create()\r
9973                 {\r
9974                         void* memory = xml_memory::allocate(sizeof(xpath_query_impl));\r
9975 \r
9976                         return new (memory) xpath_query_impl();\r
9977                 }\r
9978 \r
9979                 static void destroy(void* ptr)\r
9980                 {\r
9981                         if (!ptr) return;\r
9982                         \r
9983                         // free all allocated pages\r
9984                         static_cast<xpath_query_impl*>(ptr)->alloc.release();\r
9985 \r
9986                         // free allocator memory (with the first page)\r
9987                         xml_memory::deallocate(ptr);\r
9988                 }\r
9989 \r
9990                 xpath_query_impl(): root(0), alloc(&block)\r
9991                 {\r
9992                         block.next = 0;\r
9993                 }\r
9994 \r
9995                 xpath_ast_node* root;\r
9996                 xpath_allocator alloc;\r
9997                 xpath_memory_block block;\r
9998         };\r
9999 \r
10000         PUGI__FN xpath_string evaluate_string_impl(xpath_query_impl* impl, const xpath_node& n, xpath_stack_data& sd)\r
10001         {\r
10002                 if (!impl) return xpath_string();\r
10003 \r
10004         #ifdef PUGIXML_NO_EXCEPTIONS\r
10005                 if (setjmp(sd.error_handler)) return xpath_string();\r
10006         #endif\r
10007 \r
10008                 xpath_context c(n, 1, 1);\r
10009 \r
10010                 return impl->root->eval_string(c, sd.stack);\r
10011         }\r
10012 PUGI__NS_END\r
10013 \r
10014 namespace pugi\r
10015 {\r
10016 #ifndef PUGIXML_NO_EXCEPTIONS\r
10017         PUGI__FN xpath_exception::xpath_exception(const xpath_parse_result& result_): _result(result_)\r
10018         {\r
10019                 assert(_result.error);\r
10020         }\r
10021         \r
10022         PUGI__FN const char* xpath_exception::what() const throw()\r
10023         {\r
10024                 return _result.error;\r
10025         }\r
10026 \r
10027         PUGI__FN const xpath_parse_result& xpath_exception::result() const\r
10028         {\r
10029                 return _result;\r
10030         }\r
10031 #endif\r
10032         \r
10033         PUGI__FN xpath_node::xpath_node()\r
10034         {\r
10035         }\r
10036                 \r
10037         PUGI__FN xpath_node::xpath_node(const xml_node& node_): _node(node_)\r
10038         {\r
10039         }\r
10040                 \r
10041         PUGI__FN xpath_node::xpath_node(const xml_attribute& attribute_, const xml_node& parent_): _node(attribute_ ? parent_ : xml_node()), _attribute(attribute_)\r
10042         {\r
10043         }\r
10044 \r
10045         PUGI__FN xml_node xpath_node::node() const\r
10046         {\r
10047                 return _attribute ? xml_node() : _node;\r
10048         }\r
10049                 \r
10050         PUGI__FN xml_attribute xpath_node::attribute() const\r
10051         {\r
10052                 return _attribute;\r
10053         }\r
10054         \r
10055         PUGI__FN xml_node xpath_node::parent() const\r
10056         {\r
10057                 return _attribute ? _node : _node.parent();\r
10058         }\r
10059 \r
10060         PUGI__FN static void unspecified_bool_xpath_node(xpath_node***)\r
10061         {\r
10062         }\r
10063 \r
10064         PUGI__FN xpath_node::operator xpath_node::unspecified_bool_type() const\r
10065         {\r
10066                 return (_node || _attribute) ? unspecified_bool_xpath_node : 0;\r
10067         }\r
10068         \r
10069         PUGI__FN bool xpath_node::operator!() const\r
10070         {\r
10071                 return !(_node || _attribute);\r
10072         }\r
10073 \r
10074         PUGI__FN bool xpath_node::operator==(const xpath_node& n) const\r
10075         {\r
10076                 return _node == n._node && _attribute == n._attribute;\r
10077         }\r
10078         \r
10079         PUGI__FN bool xpath_node::operator!=(const xpath_node& n) const\r
10080         {\r
10081                 return _node != n._node || _attribute != n._attribute;\r
10082         }\r
10083 \r
10084 #ifdef __BORLANDC__\r
10085         PUGI__FN bool operator&&(const xpath_node& lhs, bool rhs)\r
10086         {\r
10087                 return (bool)lhs && rhs;\r
10088         }\r
10089 \r
10090         PUGI__FN bool operator||(const xpath_node& lhs, bool rhs)\r
10091         {\r
10092                 return (bool)lhs || rhs;\r
10093         }\r
10094 #endif\r
10095 \r
10096         PUGI__FN void xpath_node_set::_assign(const_iterator begin_, const_iterator end_)\r
10097         {\r
10098                 assert(begin_ <= end_);\r
10099 \r
10100                 size_t size_ = static_cast<size_t>(end_ - begin_);\r
10101 \r
10102                 if (size_ <= 1)\r
10103                 {\r
10104                         // deallocate old buffer\r
10105                         if (_begin != &_storage) impl::xml_memory::deallocate(_begin);\r
10106 \r
10107                         // use internal buffer\r
10108                         if (begin_ != end_) _storage = *begin_;\r
10109 \r
10110                         _begin = &_storage;\r
10111                         _end = &_storage + size_;\r
10112                 }\r
10113                 else\r
10114                 {\r
10115                         // make heap copy\r
10116                         xpath_node* storage = static_cast<xpath_node*>(impl::xml_memory::allocate(size_ * sizeof(xpath_node)));\r
10117 \r
10118                         if (!storage)\r
10119                         {\r
10120                         #ifdef PUGIXML_NO_EXCEPTIONS\r
10121                                 return;\r
10122                         #else\r
10123                                 throw std::bad_alloc();\r
10124                         #endif\r
10125                         }\r
10126 \r
10127                         memcpy(storage, begin_, size_ * sizeof(xpath_node));\r
10128                         \r
10129                         // deallocate old buffer\r
10130                         if (_begin != &_storage) impl::xml_memory::deallocate(_begin);\r
10131 \r
10132                         // finalize\r
10133                         _begin = storage;\r
10134                         _end = storage + size_;\r
10135                 }\r
10136         }\r
10137 \r
10138         PUGI__FN xpath_node_set::xpath_node_set(): _type(type_unsorted), _begin(&_storage), _end(&_storage)\r
10139         {\r
10140         }\r
10141 \r
10142         PUGI__FN xpath_node_set::xpath_node_set(const_iterator begin_, const_iterator end_, type_t type_): _type(type_), _begin(&_storage), _end(&_storage)\r
10143         {\r
10144                 _assign(begin_, end_);\r
10145         }\r
10146 \r
10147         PUGI__FN xpath_node_set::~xpath_node_set()\r
10148         {\r
10149                 if (_begin != &_storage) impl::xml_memory::deallocate(_begin);\r
10150         }\r
10151                 \r
10152         PUGI__FN xpath_node_set::xpath_node_set(const xpath_node_set& ns): _type(ns._type), _begin(&_storage), _end(&_storage)\r
10153         {\r
10154                 _assign(ns._begin, ns._end);\r
10155         }\r
10156         \r
10157         PUGI__FN xpath_node_set& xpath_node_set::operator=(const xpath_node_set& ns)\r
10158         {\r
10159                 if (this == &ns) return *this;\r
10160                 \r
10161                 _type = ns._type;\r
10162                 _assign(ns._begin, ns._end);\r
10163 \r
10164                 return *this;\r
10165         }\r
10166 \r
10167         PUGI__FN xpath_node_set::type_t xpath_node_set::type() const\r
10168         {\r
10169                 return _type;\r
10170         }\r
10171                 \r
10172         PUGI__FN size_t xpath_node_set::size() const\r
10173         {\r
10174                 return _end - _begin;\r
10175         }\r
10176                 \r
10177         PUGI__FN bool xpath_node_set::empty() const\r
10178         {\r
10179                 return _begin == _end;\r
10180         }\r
10181                 \r
10182         PUGI__FN const xpath_node& xpath_node_set::operator[](size_t index) const\r
10183         {\r
10184                 assert(index < size());\r
10185                 return _begin[index];\r
10186         }\r
10187 \r
10188         PUGI__FN xpath_node_set::const_iterator xpath_node_set::begin() const\r
10189         {\r
10190                 return _begin;\r
10191         }\r
10192                 \r
10193         PUGI__FN xpath_node_set::const_iterator xpath_node_set::end() const\r
10194         {\r
10195                 return _end;\r
10196         }\r
10197         \r
10198         PUGI__FN void xpath_node_set::sort(bool reverse)\r
10199         {\r
10200                 _type = impl::xpath_sort(_begin, _end, _type, reverse);\r
10201         }\r
10202 \r
10203         PUGI__FN xpath_node xpath_node_set::first() const\r
10204         {\r
10205                 return impl::xpath_first(_begin, _end, _type);\r
10206         }\r
10207 \r
10208         PUGI__FN xpath_parse_result::xpath_parse_result(): error("Internal error"), offset(0)\r
10209         {\r
10210         }\r
10211 \r
10212         PUGI__FN xpath_parse_result::operator bool() const\r
10213         {\r
10214                 return error == 0;\r
10215         }\r
10216 \r
10217         PUGI__FN const char* xpath_parse_result::description() const\r
10218         {\r
10219                 return error ? error : "No error";\r
10220         }\r
10221 \r
10222         PUGI__FN xpath_variable::xpath_variable(): _type(xpath_type_none), _next(0)\r
10223         {\r
10224         }\r
10225 \r
10226         PUGI__FN const char_t* xpath_variable::name() const\r
10227         {\r
10228                 switch (_type)\r
10229                 {\r
10230                 case xpath_type_node_set:\r
10231                         return static_cast<const impl::xpath_variable_node_set*>(this)->name;\r
10232 \r
10233                 case xpath_type_number:\r
10234                         return static_cast<const impl::xpath_variable_number*>(this)->name;\r
10235 \r
10236                 case xpath_type_string:\r
10237                         return static_cast<const impl::xpath_variable_string*>(this)->name;\r
10238 \r
10239                 case xpath_type_boolean:\r
10240                         return static_cast<const impl::xpath_variable_boolean*>(this)->name;\r
10241 \r
10242                 default:\r
10243                         assert(!"Invalid variable type");\r
10244                         return 0;\r
10245                 }\r
10246         }\r
10247 \r
10248         PUGI__FN xpath_value_type xpath_variable::type() const\r
10249         {\r
10250                 return _type;\r
10251         }\r
10252 \r
10253         PUGI__FN bool xpath_variable::get_boolean() const\r
10254         {\r
10255                 return (_type == xpath_type_boolean) ? static_cast<const impl::xpath_variable_boolean*>(this)->value : false;\r
10256         }\r
10257 \r
10258         PUGI__FN double xpath_variable::get_number() const\r
10259         {\r
10260                 return (_type == xpath_type_number) ? static_cast<const impl::xpath_variable_number*>(this)->value : impl::gen_nan();\r
10261         }\r
10262 \r
10263         PUGI__FN const char_t* xpath_variable::get_string() const\r
10264         {\r
10265                 const char_t* value = (_type == xpath_type_string) ? static_cast<const impl::xpath_variable_string*>(this)->value : 0;\r
10266                 return value ? value : PUGIXML_TEXT("");\r
10267         }\r
10268 \r
10269         PUGI__FN const xpath_node_set& xpath_variable::get_node_set() const\r
10270         {\r
10271                 return (_type == xpath_type_node_set) ? static_cast<const impl::xpath_variable_node_set*>(this)->value : impl::dummy_node_set;\r
10272         }\r
10273 \r
10274         PUGI__FN bool xpath_variable::set(bool value)\r
10275         {\r
10276                 if (_type != xpath_type_boolean) return false;\r
10277 \r
10278                 static_cast<impl::xpath_variable_boolean*>(this)->value = value;\r
10279                 return true;\r
10280         }\r
10281 \r
10282         PUGI__FN bool xpath_variable::set(double value)\r
10283         {\r
10284                 if (_type != xpath_type_number) return false;\r
10285 \r
10286                 static_cast<impl::xpath_variable_number*>(this)->value = value;\r
10287                 return true;\r
10288         }\r
10289 \r
10290         PUGI__FN bool xpath_variable::set(const char_t* value)\r
10291         {\r
10292                 if (_type != xpath_type_string) return false;\r
10293 \r
10294                 impl::xpath_variable_string* var = static_cast<impl::xpath_variable_string*>(this);\r
10295 \r
10296                 // duplicate string\r
10297                 size_t size = (impl::strlength(value) + 1) * sizeof(char_t);\r
10298 \r
10299                 char_t* copy = static_cast<char_t*>(impl::xml_memory::allocate(size));\r
10300                 if (!copy) return false;\r
10301 \r
10302                 memcpy(copy, value, size);\r
10303 \r
10304                 // replace old string\r
10305                 if (var->value) impl::xml_memory::deallocate(var->value);\r
10306                 var->value = copy;\r
10307 \r
10308                 return true;\r
10309         }\r
10310 \r
10311         PUGI__FN bool xpath_variable::set(const xpath_node_set& value)\r
10312         {\r
10313                 if (_type != xpath_type_node_set) return false;\r
10314 \r
10315                 static_cast<impl::xpath_variable_node_set*>(this)->value = value;\r
10316                 return true;\r
10317         }\r
10318 \r
10319         PUGI__FN xpath_variable_set::xpath_variable_set()\r
10320         {\r
10321                 for (size_t i = 0; i < sizeof(_data) / sizeof(_data[0]); ++i) _data[i] = 0;\r
10322         }\r
10323 \r
10324         PUGI__FN xpath_variable_set::~xpath_variable_set()\r
10325         {\r
10326                 for (size_t i = 0; i < sizeof(_data) / sizeof(_data[0]); ++i)\r
10327                 {\r
10328                         xpath_variable* var = _data[i];\r
10329 \r
10330                         while (var)\r
10331                         {\r
10332                                 xpath_variable* next = var->_next;\r
10333 \r
10334                                 impl::delete_xpath_variable(var->_type, var);\r
10335 \r
10336                                 var = next;\r
10337                         }\r
10338                 }\r
10339         }\r
10340 \r
10341         PUGI__FN xpath_variable* xpath_variable_set::find(const char_t* name) const\r
10342         {\r
10343                 const size_t hash_size = sizeof(_data) / sizeof(_data[0]);\r
10344                 size_t hash = impl::hash_string(name) % hash_size;\r
10345 \r
10346                 // look for existing variable\r
10347                 for (xpath_variable* var = _data[hash]; var; var = var->_next)\r
10348                         if (impl::strequal(var->name(), name))\r
10349                                 return var;\r
10350 \r
10351                 return 0;\r
10352         }\r
10353 \r
10354         PUGI__FN xpath_variable* xpath_variable_set::add(const char_t* name, xpath_value_type type)\r
10355         {\r
10356                 const size_t hash_size = sizeof(_data) / sizeof(_data[0]);\r
10357                 size_t hash = impl::hash_string(name) % hash_size;\r
10358 \r
10359                 // look for existing variable\r
10360                 for (xpath_variable* var = _data[hash]; var; var = var->_next)\r
10361                         if (impl::strequal(var->name(), name))\r
10362                                 return var->type() == type ? var : 0;\r
10363 \r
10364                 // add new variable\r
10365                 xpath_variable* result = impl::new_xpath_variable(type, name);\r
10366 \r
10367                 if (result)\r
10368                 {\r
10369                         result->_type = type;\r
10370                         result->_next = _data[hash];\r
10371 \r
10372                         _data[hash] = result;\r
10373                 }\r
10374 \r
10375                 return result;\r
10376         }\r
10377 \r
10378         PUGI__FN bool xpath_variable_set::set(const char_t* name, bool value)\r
10379         {\r
10380                 xpath_variable* var = add(name, xpath_type_boolean);\r
10381                 return var ? var->set(value) : false;\r
10382         }\r
10383 \r
10384         PUGI__FN bool xpath_variable_set::set(const char_t* name, double value)\r
10385         {\r
10386                 xpath_variable* var = add(name, xpath_type_number);\r
10387                 return var ? var->set(value) : false;\r
10388         }\r
10389 \r
10390         PUGI__FN bool xpath_variable_set::set(const char_t* name, const char_t* value)\r
10391         {\r
10392                 xpath_variable* var = add(name, xpath_type_string);\r
10393                 return var ? var->set(value) : false;\r
10394         }\r
10395 \r
10396         PUGI__FN bool xpath_variable_set::set(const char_t* name, const xpath_node_set& value)\r
10397         {\r
10398                 xpath_variable* var = add(name, xpath_type_node_set);\r
10399                 return var ? var->set(value) : false;\r
10400         }\r
10401 \r
10402         PUGI__FN xpath_variable* xpath_variable_set::get(const char_t* name)\r
10403         {\r
10404                 return find(name);\r
10405         }\r
10406 \r
10407         PUGI__FN const xpath_variable* xpath_variable_set::get(const char_t* name) const\r
10408         {\r
10409                 return find(name);\r
10410         }\r
10411 \r
10412         PUGI__FN xpath_query::xpath_query(const char_t* query, xpath_variable_set* variables): _impl(0)\r
10413         {\r
10414                 impl::xpath_query_impl* qimpl = impl::xpath_query_impl::create();\r
10415 \r
10416                 if (!qimpl)\r
10417                 {\r
10418                 #ifdef PUGIXML_NO_EXCEPTIONS\r
10419                         _result.error = "Out of memory";\r
10420                 #else\r
10421                         throw std::bad_alloc();\r
10422                 #endif\r
10423                 }\r
10424                 else\r
10425                 {\r
10426                         impl::buffer_holder impl_holder(qimpl, impl::xpath_query_impl::destroy);\r
10427 \r
10428                         qimpl->root = impl::xpath_parser::parse(query, variables, &qimpl->alloc, &_result);\r
10429 \r
10430                         if (qimpl->root)\r
10431                         {\r
10432                                 _impl = static_cast<impl::xpath_query_impl*>(impl_holder.release());\r
10433                                 _result.error = 0;\r
10434                         }\r
10435                 }\r
10436         }\r
10437 \r
10438         PUGI__FN xpath_query::~xpath_query()\r
10439         {\r
10440                 impl::xpath_query_impl::destroy(_impl);\r
10441         }\r
10442 \r
10443         PUGI__FN xpath_value_type xpath_query::return_type() const\r
10444         {\r
10445                 if (!_impl) return xpath_type_none;\r
10446 \r
10447                 return static_cast<impl::xpath_query_impl*>(_impl)->root->rettype();\r
10448         }\r
10449 \r
10450         PUGI__FN bool xpath_query::evaluate_boolean(const xpath_node& n) const\r
10451         {\r
10452                 if (!_impl) return false;\r
10453                 \r
10454                 impl::xpath_context c(n, 1, 1);\r
10455                 impl::xpath_stack_data sd;\r
10456 \r
10457         #ifdef PUGIXML_NO_EXCEPTIONS\r
10458                 if (setjmp(sd.error_handler)) return false;\r
10459         #endif\r
10460                 \r
10461                 return static_cast<impl::xpath_query_impl*>(_impl)->root->eval_boolean(c, sd.stack);\r
10462         }\r
10463         \r
10464         PUGI__FN double xpath_query::evaluate_number(const xpath_node& n) const\r
10465         {\r
10466                 if (!_impl) return impl::gen_nan();\r
10467                 \r
10468                 impl::xpath_context c(n, 1, 1);\r
10469                 impl::xpath_stack_data sd;\r
10470 \r
10471         #ifdef PUGIXML_NO_EXCEPTIONS\r
10472                 if (setjmp(sd.error_handler)) return impl::gen_nan();\r
10473         #endif\r
10474 \r
10475                 return static_cast<impl::xpath_query_impl*>(_impl)->root->eval_number(c, sd.stack);\r
10476         }\r
10477 \r
10478 #ifndef PUGIXML_NO_STL\r
10479         PUGI__FN string_t xpath_query::evaluate_string(const xpath_node& n) const\r
10480         {\r
10481                 impl::xpath_stack_data sd;\r
10482 \r
10483                 return impl::evaluate_string_impl(static_cast<impl::xpath_query_impl*>(_impl), n, sd).c_str();\r
10484         }\r
10485 #endif\r
10486 \r
10487         PUGI__FN size_t xpath_query::evaluate_string(char_t* buffer, size_t capacity, const xpath_node& n) const\r
10488         {\r
10489                 impl::xpath_stack_data sd;\r
10490 \r
10491                 impl::xpath_string r = impl::evaluate_string_impl(static_cast<impl::xpath_query_impl*>(_impl), n, sd);\r
10492 \r
10493                 size_t full_size = r.length() + 1;\r
10494                 \r
10495                 if (capacity > 0)\r
10496                 {\r
10497                         size_t size = (full_size < capacity) ? full_size : capacity;\r
10498                         assert(size > 0);\r
10499 \r
10500                         memcpy(buffer, r.c_str(), (size - 1) * sizeof(char_t));\r
10501                         buffer[size - 1] = 0;\r
10502                 }\r
10503                 \r
10504                 return full_size;\r
10505         }\r
10506 \r
10507         PUGI__FN xpath_node_set xpath_query::evaluate_node_set(const xpath_node& n) const\r
10508         {\r
10509                 if (!_impl) return xpath_node_set();\r
10510 \r
10511                 impl::xpath_ast_node* root = static_cast<impl::xpath_query_impl*>(_impl)->root;\r
10512 \r
10513                 if (root->rettype() != xpath_type_node_set)\r
10514                 {\r
10515                 #ifdef PUGIXML_NO_EXCEPTIONS\r
10516                         return xpath_node_set();\r
10517                 #else\r
10518                         xpath_parse_result res;\r
10519                         res.error = "Expression does not evaluate to node set";\r
10520 \r
10521                         throw xpath_exception(res);\r
10522                 #endif\r
10523                 }\r
10524                 \r
10525                 impl::xpath_context c(n, 1, 1);\r
10526                 impl::xpath_stack_data sd;\r
10527 \r
10528         #ifdef PUGIXML_NO_EXCEPTIONS\r
10529                 if (setjmp(sd.error_handler)) return xpath_node_set();\r
10530         #endif\r
10531 \r
10532                 impl::xpath_node_set_raw r = root->eval_node_set(c, sd.stack);\r
10533 \r
10534                 return xpath_node_set(r.begin(), r.end(), r.type());\r
10535         }\r
10536 \r
10537         PUGI__FN const xpath_parse_result& xpath_query::result() const\r
10538         {\r
10539                 return _result;\r
10540         }\r
10541 \r
10542         PUGI__FN static void unspecified_bool_xpath_query(xpath_query***)\r
10543         {\r
10544         }\r
10545 \r
10546         PUGI__FN xpath_query::operator xpath_query::unspecified_bool_type() const\r
10547         {\r
10548                 return _impl ? unspecified_bool_xpath_query : 0;\r
10549         }\r
10550 \r
10551         PUGI__FN bool xpath_query::operator!() const\r
10552         {\r
10553                 return !_impl;\r
10554         }\r
10555 \r
10556         PUGI__FN xpath_node xml_node::select_single_node(const char_t* query, xpath_variable_set* variables) const\r
10557         {\r
10558                 xpath_query q(query, variables);\r
10559                 return select_single_node(q);\r
10560         }\r
10561 \r
10562         PUGI__FN xpath_node xml_node::select_single_node(const xpath_query& query) const\r
10563         {\r
10564                 xpath_node_set s = query.evaluate_node_set(*this);\r
10565                 return s.empty() ? xpath_node() : s.first();\r
10566         }\r
10567 \r
10568         PUGI__FN xpath_node_set xml_node::select_nodes(const char_t* query, xpath_variable_set* variables) const\r
10569         {\r
10570                 xpath_query q(query, variables);\r
10571                 return select_nodes(q);\r
10572         }\r
10573 \r
10574         PUGI__FN xpath_node_set xml_node::select_nodes(const xpath_query& query) const\r
10575         {\r
10576                 return query.evaluate_node_set(*this);\r
10577         }\r
10578 }\r
10579 \r
10580 #endif\r
10581 \r
10582 #ifdef __BORLANDC__\r
10583 #       pragma option pop\r
10584 #endif\r
10585 \r
10586 // Intel C++ does not properly keep warning state for function templates,\r
10587 // so popping warning state at the end of translation unit leads to warnings in the middle.\r
10588 #if defined(_MSC_VER) && !defined(__INTEL_COMPILER)\r
10589 #       pragma warning(pop)\r
10590 #endif\r
10591 \r
10592 // Undefine all local macros (makes sure we're not leaking macros in header-only mode)\r
10593 #undef PUGI__NO_INLINE\r
10594 #undef PUGI__STATIC_ASSERT\r
10595 #undef PUGI__DMC_VOLATILE\r
10596 #undef PUGI__MSVC_CRT_VERSION\r
10597 #undef PUGI__NS_BEGIN\r
10598 #undef PUGI__NS_END\r
10599 #undef PUGI__FN\r
10600 #undef PUGI__FN_NO_INLINE\r
10601 #undef PUGI__IS_CHARTYPE_IMPL\r
10602 #undef PUGI__IS_CHARTYPE\r
10603 #undef PUGI__IS_CHARTYPEX\r
10604 #undef PUGI__SKIPWS\r
10605 #undef PUGI__OPTSET\r
10606 #undef PUGI__PUSHNODE\r
10607 #undef PUGI__POPNODE\r
10608 #undef PUGI__SCANFOR\r
10609 #undef PUGI__SCANWHILE\r
10610 #undef PUGI__ENDSEG\r
10611 #undef PUGI__THROW_ERROR\r
10612 #undef PUGI__CHECK_ERROR\r
10613 \r
10614 #endif\r
10615 \r
10616 /**\r
10617  * Copyright (c) 2006-2014 Arseny Kapoulkine\r
10618  *\r
10619  * Permission is hereby granted, free of charge, to any person\r
10620  * obtaining a copy of this software and associated documentation\r
10621  * files (the "Software"), to deal in the Software without\r
10622  * restriction, including without limitation the rights to use,\r
10623  * copy, modify, merge, publish, distribute, sublicense, and/or sell\r
10624  * copies of the Software, and to permit persons to whom the\r
10625  * Software is furnished to do so, subject to the following\r
10626  * conditions:\r
10627  *\r
10628  * The above copyright notice and this permission notice shall be\r
10629  * included in all copies or substantial portions of the Software.\r
10630  * \r
10631  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,\r
10632  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES\r
10633  * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND\r
10634  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT\r
10635  * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,\r
10636  * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING\r
10637  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR\r
10638  * OTHER DEALINGS IN THE SOFTWARE.\r
10639  */\r

UCC git Repository :: git.ucc.asn.au