feat(deps): add raylib and raytmx dependencies

author: Leander Scherer <leander@schererleander.de> 2026-03-08 20:04:08 +0100
committer: Leander Scherer <leander@schererleander.de> 2026-03-08 20:07:58 +0100
commit: c5455ef0fbfc4203a4aa8ad185dfa43bdadc0b82 (patch)
tree: 731fffe1d9591587a366895f4a46859d2c79436f /include/hoxml.h
parent: 7bdc10ae6de645812f4e57185067f0a83ca5655f (diff)
1 files changed, 1497 insertions, 0 deletions
diff --git a/include/hoxml.h b/include/hoxml.h
new file mode 100644
index 0000000..d7addec
--- /dev/null
+++ b/include/hoxml.h
@@ -0,0 +1,1497 @@
+/*
+Copyright (c) 2024-2025 Luke Philipsen
+
+Permission to use, copy, modify, and/or distribute this software for
+any purpose with or without fee is hereby granted.
+
+THE SOFTWARE IS PROVIDED “AS IS” AND THE AUTHOR DISCLAIMS ALL
+WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES
+OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE
+FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY
+DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN
+AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT
+OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+*/
+
+/* Usage
+
+  Do this:
+    #define HOXML_IMPLEMENTATION
+  before you include this file in *one* C or C++ file to create the implementation.
+
+  You can define HOXML_DECL with
+    #define HOXML_DECL static
+  or
+    #define HOXML_DECL extern
+  to specify hoxml function declarations as static or extern, respectively.
+  The default specifier is extern.
+*/
+
+#ifndef HOXML_H
+    #define HOXML_H
+
+#include <stdlib.h> /* strtoul() */
+#include <string.h> /* memcpy(), memset(), NULL, size_t */
+
+#ifndef HOXML_DECL
+    #define HOXML_DECL
+#endif /* HOXML_DECL */
+
+#ifdef __cplusplus
+    extern "C" {
+#endif /* __cpluspus */
+
+/***************/
+/* Definitions */
+
+/**
+ * Error and token codes returned after parsing.
+ */
+typedef enum {
+    HOXML_ERROR_INVALID_INPUT = -9, /**< One or more parameter passed to hoxml was unacceptable. */
+    HOXML_ERROR_INTERNAL = -8, /**< There's a bug in hoxml and parsing must halt. */
+    HOXML_ERROR_INSUFFICIENT_MEMORY = -7, /**< Initialization or continued parsing require more memory. */
+    HOXML_ERROR_UNEXPECTED_EOF = -6, /**< Reached the end of the XML content before the end of the document. */
+    HOXML_ERROR_SYNTAX = -5, /**< Syntax error (e.g. "<element<"). */
+    HOXML_ERROR_ENCODING = -4, /**< Character encoding error or contradiction. */
+    HOXML_ERROR_TAG_MISMATCH = -3, /**< Close tag does not match the open tag (e.g. "<tag>" followed by "</tga>"). */
+    HOXML_ERROR_INVALID_DOCUMENT_TYPE_DECLARATION = -2, /**< <!DOCTYPE> declaration not before the root element. */
+    HOXML_ERROR_INVALID_DOCUMENT_DECLARATION = -1, /**< <?xml?> declaration not before the root element. */
+    HOXML_END_OF_DOCUMENT = 0, /**< The root element has been closed, parsing is done. */
+    HOXML_ELEMENT_BEGIN, /**< A new element/tag began and its name is available. */
+    HOXML_ELEMENT_END, /**< An element was closed, </tag> or <tag/>, and its name and content are available. */
+    HOXML_ATTRIBUTE, /**< An attribute's value, its name, and its element are available. */
+    HOXML_PROCESSING_INSTRUCTION_BEGIN, /**< A processing instruction began and its target is available. */
+    HOXML_PROCESSING_INSTRUCTION_END /**< A processing instruction ended and its content is available. */
+} hoxml_code_t;
+
+/**
+ * Holds context and state information needed by hoxml. Some of this information is public and holds the data parsed
+ * from XML content (element names, attribute names and values, etc.) but some is private and only makes sense to hoxml.
+ */
+typedef struct {
+    /* Public */
+    char* tag; /**< Holds the name of the open or just-closed tag, or processing instruction target. */
+    char* attribute; /**< Holds the current attribute's name. */
+    char* value; /**< Holds the current attribute's value. */
+    char* content; /**< Holds the current element's content. This means all character data found, including spaces. */
+    int line; /**< The line currently being parsed. Lines are determined by line feeds and carriage returns. */
+    int column; /**< The column, on the current line, of the character last parsed. */
+    int depth; /**< The nested level of elements. Assigned with the level in which the element was found. */
+
+    /* Private (for internal use) */
+    int is_initialized; /* Set to 1, or true, by hoxml_init() and indicates this context is safe to use */
+    const char* xml; /* XML content to be parsed */
+    size_t xml_length; /* Length of the XML content to parse */
+    int encoding; /* Character encoding of the XML content */
+    const char* iterator; /* Pointer to the character in the XML content being parsed */
+    char* buffer; /* Memory allocated for hoxml to use */
+    size_t buffer_length; /* Amount of memory allocated for hoxml */
+    char* reference_start; /* Pointer to a location on the stack where a reference entity string (e.g "&lt;") began */
+    char* stack; /* Pointer to the current node in the stack-like structure of elements */
+    int state; /* Current parsing state, determines which characters are acceptable and when to return */
+    int post_state; /* When not "none" this indicates a post-state that has a cleanup step */
+    int return_state; /* State to return to after the processing of a comment or reference has finished */
+    int error_return_state; /* State to return to after recovering from an error */
+    unsigned long stream; /* Holds the current character, whole or partial. May contain bytes from different strings. */
+    size_t stream_length; /* Length of the 'stream' variable in bytes */
+    unsigned newline_character; /* The character used to increment the 'line' variable, \r or \n */
+} hoxml_context_t;
+
+/**
+ * Sets up the hoxml context object to begin parsing. Following this, call hoxml_parse() until
+ * HOXML_END_OF_DOCUMENT or one of the error values is returned.
+ *
+ * @param context Pointer to an allocated hoxml context object. This instance will be modified.
+ * @param buffer A pointer to some contiguous block of memory for hoxml to use. This will also be modified, frequently.
+ * @param buffer_length The length, in bytes, of the buffer handed to hoxml as the 'buffer' parameter.
+ */
+HOXML_DECL void hoxml_init(hoxml_context_t* context, void* buffer, size_t buffer_length);
+
+/**
+ * Instruct hoxml to use a new buffer. This maintains the current state of parsing meaning that the next call to
+ * hoxml_parse() will continue none the wiser.
+ * The buffer must have a length greater than the current buffer and both buffers must be allocated at the time this
+ * function is called. Once it returns, the original buffer may and should be freed.
+ *
+ * @param context An initialized hoxml context object.
+ * @param buffer A pointer to a new, contiguous block of memory for hoxml to use.
+ * @param buffer_length The length, in bytes, of the buffer handed to hoxml as the 'buffer' parameter.
+ */
+HOXML_DECL void hoxml_realloc(hoxml_context_t* context, void* buffer, size_t buffer_length);
+
+/**
+ * Begin or continue parsing the given XML content string.
+ * The XML content string does not need to contain the content in its entirety. If hoxml finds a null terminator or
+ * parses up to the indicated length of the content, HOXML_ERROR_UNEXPECTED_EOF is returned and parsing will cease.
+ * However, this error is recoverable and parsing will continue if the next call to hoxml_parse() passes a new XML
+ * content string, using the same pointer or not.
+ *
+ * @param context An initialized hoxml context object. This should be treated as read-only until parsing is done.
+ * @param xml XML content as an encoded string. Supported character encodings include ASCII, UTF-8, and UTF-16(BE|LE).
+ * @param xml_length Length of the XML content in bytes.
+ * @return A code indicating what information from the XML content is available or an error.
+ */
+HOXML_DECL hoxml_code_t hoxml_parse(hoxml_context_t* context, const char* xml, size_t xml_length);
+
+#ifdef __cplusplus
+    }
+#endif /* __cplusplus */
+
+#ifdef HOXML_IMPLEMENTATION
+
+/******************/
+/* Implementation */
+
+enum {
+    /* Current parser states */
+    HOXML_STATE_ERROR_INTERNAL = -8,
+    HOXML_STATE_ERROR_INSUFFICIENT_MEMORY = -7,
+    HOXML_STATE_ERROR_UNEXPECTED_EOF = -6,
+    HOXML_STATE_ERROR_SYNTAX = -5,
+    HOXML_STATE_ERROR_ENCODING = -4,
+    HOXML_STATE_ERROR_TAG_MISMATCH = -3,
+    HOXML_STATE_ERROR_INVALID_DOCUMENT_TYPE_DECLARATION = -2,
+    HOXML_STATE_ERROR_INVALID_DOCUMENT_DECLARATION = -1,
+    HOXML_STATE_NONE = 0,
+    HOXML_STATE_UTF8_BOM1,
+    HOXML_STATE_UTF8_BOM2,
+    HOXML_STATE_UTF16BE_BOM,
+    HOXML_STATE_UTF16LE_BOM,
+    HOXML_STATE_TAG_BEGIN,
+    HOXML_STATE_TAG_END,
+    HOXML_STATE_ELEMENT_NAME1,
+    HOXML_STATE_ELEMENT_NAME2,
+    HOXML_STATE_ATTRIBUTE_NAME1,
+    HOXML_STATE_ATTRIBUTE_NAME2,
+    HOXML_STATE_ATTRIBUTE_ASSIGNMENT,
+    HOXML_STATE_ATTRIBUTE_VALUE,
+    HOXML_STATE_OPEN_TAG,
+    HOXML_STATE_COMMENT_CDATA_OR_DTD_BEGIN,
+    HOXML_STATE_COMMENT_BEGIN,
+    HOXML_STATE_COMMENT,
+    HOXML_STATE_COMMENT_END1,
+    HOXML_STATE_COMMENT_END2,
+    HOXML_STATE_CDATA_BEGIN1,
+    HOXML_STATE_CDATA_BEGIN2,
+    HOXML_STATE_CDATA_BEGIN3,
+    HOXML_STATE_CDATA_BEGIN4,
+    HOXML_STATE_CDATA_BEGIN5,
+    HOXML_STATE_CDATA_BEGIN6,
+    HOXML_STATE_CDATA_CONTENT,
+    HOXML_STATE_CDATA_END1,
+    HOXML_STATE_CDATA_END2,
+    HOXML_STATE_REFERENCE_BEGIN,
+    HOXML_STATE_REFERENCE_ENTITY,
+    HOXML_STATE_REFERENCE_NUMERIC,
+    HOXML_STATE_REFERENCE_HEX,
+    HOXML_STATE_PROCESSING_INSTRUCTION_BEGIN,
+    HOXML_STATE_PROCESSING_INSTRUCTION_TARGET1,
+    HOXML_STATE_PROCESSING_INSTRUCTION_TARGET2,
+    HOXML_STATE_PROCESSING_INSTRUCTION_CONTENT,
+    HOXML_STATE_PROCESSING_INSTRUCTION_END,
+    HOXML_STATE_DTD_BEGIN1,
+    HOXML_STATE_DTD_BEGIN2,
+    HOXML_STATE_DTD_BEGIN3,
+    HOXML_STATE_DTD_BEGIN4,
+    HOXML_STATE_DTD_BEGIN5,
+    HOXML_STATE_DTD_BEGIN6,
+    HOXML_STATE_DTD_BEGIN7,
+    HOXML_STATE_DTD_BEGIN8,
+    HOXML_STATE_DTD_NAME,
+    HOXML_STATE_DTD_CONTENT,
+    HOXML_STATE_DTD_OPEN_BRACKET,
+    HOXML_STATE_DONE,
+    /* Post (i.e. after) parser states indicating actions to take on the next call to hoxml_parse() */
+    HOXML_POST_STATE_TAG_END,
+    HOXML_POST_STATE_ATTRIBUTE_END,
+};
+
+enum {
+    HOXML_FLAG_END_TAG = 1, /* The node is a dedicated end tag (not an empty element) */
+    HOXML_FLAG_EMPTY_ELEMENT = 2, /* The node is an empty element */
+    HOXML_FLAG_PROCESSING_INSTRUCTION = 4, /* The node is a processing instruction */
+    HOXML_FLAG_DOUBLE_QUOTE = 8, /* The value string being parsed was opened with a double quote (") */
+    HOXML_FLAG_TERMINATED = 16, /* The node's current string (tag, attribute, etc.) is null terminated */
+    HOXML_FLAG_BEGUN = 32, /* The "element begun" code was already returned for this node */
+    HOXML_FLAG_INCREMENT_DEPTH = 64, /* Context object's depth value should increase by one next hoxml_parse() */
+    HOXML_FLAG_DECREMENT_DEPTH = 128 /* Context object's depth value should decrease by one next hoxml_parse() */
+};
+
+enum {
+    HOXML_ENC_UNKNOWN = 0, /* The character encoding is unknown. UTF-8 is assumed. */
+    HOXML_ENC_UTF_8, /* Variable-length encoding (8, 16, 24, or 32 bits) compatible with ASCII */
+    HOXML_ENC_UTF_16_LE, /* Variable-length encoding (16 or 32 bits), little-endian variant */
+    HOXML_ENC_UTF_16_BE /* Variable-lenght encoding (16 or 32 bits), big-endian variant */
+};
+
+enum {
+    HOXML_CASE_SENSITIVE = 0, /* Cases must match. 'A' == 'a' -> false. */
+    HOXML_CASE_INSENSITIVE /* Cases need not match. 'A' == 'a' -> true. */
+};
+
+enum {
+    HOXML_REF_TYPE_ENTITY = 0, /* Predefined strings representing known, problematic characters (e.g. '<') */
+    HOXML_REF_TYPE_NUMERIC, /* A value of a character given as a decimal number */
+    HOXML_REF_TYPE_HEX /* A value of a character given as a hexadecimal number */
+};
+
+struct _hoxml_node_t;
+typedef struct _hoxml_node_t {
+    struct _hoxml_node_t* parent; /* Points to the parent node, or NULL if this is the root */
+    char* end; /* Points to the last byte of this node's data */
+    int flags; /* May contain any number of the flags defined in hoxml_node_flags */
+    char tag; /* Where the tag string will be stored in the buffer, must be defined last */
+} hoxml_node_t;
+
+typedef struct {
+    unsigned encoded; /* Character as it appeared in the content. In other words, the original, encoded character. */
+    unsigned codepoint; /* Unicode codepoint of the character. In other words, the decoded character. */
+    size_t bytes; /* Number of eight-bit bytes of the encoded character, in the [1, 4] range */
+} hoxml_character_t;
+
+#ifndef UINT32_MAX /* Defined in stdint.h with later revisions of C and C++ but not for some earlier ones */
+    #define UINT32_MAX (0xffffffff)
+#endif
+#define HOXML_STACK ((hoxml_node_t*)context->stack)
+#define HOXML_TO_LOWER(c) (c >= 'A' && c <= 'Z' ? c + 32 : c)
+#define HOXML_IS_NEW_LINE(c) (c == 0x0A || c == 0x0D)
+#define HOXML_IS_WHITESPACE(c) (c == 0x20 || c == 0x09 || HOXML_IS_NEW_LINE(c))
+#define HOXML_IS_ASCII_CHAR(c) (c >= 0x21 && c <= 0x7F)
+#define HOXML_IS_CHAR_DATA(c) (c != '<' && c != '&')
+#define HOXML_IS_ALPHA(c) ((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z'))
+#define HOXML_IS_NUMERIC(c) (c >= '0' && c <= '9')
+#define HOXML_IS_NAME_START_CHAR(c) (HOXML_IS_ALPHA(c) || c == ':' || c == '_' || (c >= 0xC0 && c <= 0xD6) || \
+    (c >= 0xD8 && c <= 0xF6) || c >= 0xF8)
+#define HOXML_IS_NAME_CHAR(c) (HOXML_IS_NAME_START_CHAR(c) || c == '-' || c == '.'|| HOXML_IS_NUMERIC(c))
+#define HOXML_IS_HEX_CHAR(c) (HOXML_IS_NUMERIC(c) || (c >= 'a' && c <= 'f') || (c >= 'A' && c <= 'F'))
+#define HOXML_IS_VALUE_CHAR_DATA(f, c) (HOXML_IS_CHAR_DATA(c) && ((f & HOXML_FLAG_DOUBLE_QUOTE && c != '"') || \
+    c != '\''))
+
+void hoxml_push_stack(hoxml_context_t* context);
+void hoxml_pop_stack(hoxml_context_t* context);
+void hoxml_append_character(hoxml_context_t* context, hoxml_character_t c);
+void hoxml_append_terminator(hoxml_context_t* context);
+void hoxml_end_reference(hoxml_context_t* context, int type);
+void hoxml_begin_tag(hoxml_context_t* context);
+hoxml_code_t hoxml_end_tag(hoxml_context_t* context);
+int hoxml_post_state_cleanup(hoxml_context_t* context);
+hoxml_character_t hoxml_decode_character(const char* str, size_t str_length, int encoding);
+hoxml_character_t hoxml_encode_character(unsigned codepoint, int encoding);
+char* hoxml_to_ascii(const char* str, int encoding);
+size_t hoxml_strlen(const char* str, int encoding);
+int hoxml_strcmp(const char* str1, int encoding1, const char* str2, int encoding2, int sensitivity);
+const char* hoxml_strstr(const char* haystack, int haystack_encoding, const char* needle, int needle_encoding,
+    int sensitivity);
+#ifdef HOXML_DEBUG
+    #include <stdio.h> /* printf() */
+    #define HOXML_LOG_STATE(s) printf("%s\n", s);
+#else
+    #define HOXML_LOG_STATE(s)
+#endif
+
+HOXML_DECL void hoxml_init(hoxml_context_t* context, void* buffer, size_t buffer_length) {
+    if (context == NULL || buffer == NULL || buffer_length <= 0)
+        return;
+
+    memset(context, 0, sizeof(hoxml_context_t)); /* Assign all values of the context to zero */
+    context->buffer = (char*)buffer; /* Use the provided buffer */
+    context->buffer_length = buffer_length; /* Remember the length of the provided buffer */
+    context->line = 1; /* This is meant to be human-readable and humans begin counting at one */
+    context->is_initialized = 1;
+    memset(buffer, 0, buffer_length); /* Fill the buffer with zeroes */
+}
+
+HOXML_DECL void hoxml_realloc(hoxml_context_t* context, void* buffer, size_t buffer_length) {
+    if (context == NULL || context->is_initialized == 0 || buffer == NULL || buffer_length <= context->buffer_length)
+        return;
+
+    /* Reassign the end and parent pointers of each node, beginning at the tail and iterate to the head */
+    hoxml_node_t* node = HOXML_STACK;
+    while (node != NULL) {
+        hoxml_node_t* parent = node->parent;
+        node->end = (char*)buffer + (node->end - context->buffer);
+        if (node->parent != NULL)
+            node->parent = (hoxml_node_t*)((char*)buffer + ((char*)node->parent - context->buffer));
+        node = parent;
+    }
+
+    /* Use offsets from the original buffer pointer to reassign pointers such that they now point to the new buffer */
+    if (context->tag != NULL)
+        context->tag = (char*)buffer + (context->tag - context->buffer);
+    if (context->attribute != NULL)
+        context->attribute = (char*)buffer + (context->attribute - context->buffer);
+    if (context->value != NULL)
+        context->value = (char*)buffer + (context->value - context->buffer);
+    if (context->content != NULL)
+        context->content = (char*)buffer + (context->content - context->buffer);
+    if (context->reference_start != NULL)
+        context->reference_start = (char*)buffer + (context->reference_start - context->buffer);
+    if (context->stack != NULL)
+        context->stack = (char*)buffer + (context->stack - context->buffer);
+
+    memset(buffer, 0, buffer_length); /* Fill the new buffer with zeroes */
+    memcpy(buffer, context->buffer, context->buffer_length); /* Copy the entire, current buffer to the new buffer */
+    context->buffer = (char*)buffer;
+    context->buffer_length = buffer_length;
+
+    if (context->state == HOXML_STATE_ERROR_INSUFFICIENT_MEMORY) {
+        context->state = context->error_return_state;
+        context->error_return_state = HOXML_STATE_NONE;
+    }
+}
+
+HOXML_DECL hoxml_code_t hoxml_parse(hoxml_context_t* context, const char* xml, const size_t xml_length) {
+    if (context == NULL || context->is_initialized == 0 || xml == NULL || xml_length == 0)
+        return HOXML_ERROR_INVALID_INPUT;
+
+    if (HOXML_STACK != NULL) {
+        if (HOXML_STACK->flags & HOXML_FLAG_INCREMENT_DEPTH) { /* If an element began, increasing nesting */
+            context->depth += 1;
+            HOXML_STACK->flags &= ~HOXML_FLAG_INCREMENT_DEPTH; /* Clear the flag */
+        }
+
+        if (HOXML_STACK->flags & HOXML_FLAG_DECREMENT_DEPTH) { /* If an element ended, decreasing nesting */
+            context->depth -= 1;
+            HOXML_STACK->flags &= ~HOXML_FLAG_DECREMENT_DEPTH; /* Clear the flag */
+        }
+    }
+
+    switch (context->state) {
+        /* Two errors are recoverable: HOXML_ERROR_INSUFFICIENT_MEMORY and HOXML_ERROR_UNEXPECTED_EOF. The former can */
+        /* be recovered by assigning a new buffer with hoxml_realloc(). The latter can be recovered by passing a new */
+        /* XML content string to hoxml_parse() so we'll check for one before concluding we're still in error. */
+        case HOXML_STATE_ERROR_UNEXPECTED_EOF: {
+            /* Try to decode a character, or remainder of a character, at the beginning of this hopefully-new string */
+            unsigned long stream = context->stream;
+            /* Calculate the number of bytes to copy into the 'stream' variable from the hopefully-new string. We */
+            /* want 4 bytes, or whatever is available. */
+            size_t bytes_to_copy = 4;
+            if (bytes_to_copy > xml_length)
+                bytes_to_copy = xml_length;
+            if (context->stream_length > 0) {
+                /* Adjust the number of bytes to copy to account for possible bytes from a previous string */
+                bytes_to_copy -= context->stream_length;
+                /* Append the new bytes to the previous one(s) */
+                memcpy((char*)&stream + context->stream_length, xml, bytes_to_copy);
+            }
+            else {
+                /* Copy to the 'stream' under the assumption that all of it can be overwritten */
+                memcpy(&stream, xml, bytes_to_copy);
+            }
+            hoxml_character_t c = hoxml_decode_character((const char*)&stream, xml_length, context->encoding);
+            /* If the character is the equivalent of a null terminator or there was not enough data */
+            if (c.codepoint == 0 || c.codepoint == UINT32_MAX)
+                return HOXML_ERROR_UNEXPECTED_EOF;
+            context->state = context->error_return_state;
+            context->error_return_state = HOXML_STATE_NONE;
+            /* Note: there is a check for a change in the input pointer a little further down */
+        } break;
+        case HOXML_STATE_DONE: return HOXML_END_OF_DOCUMENT;
+        case HOXML_STATE_ERROR_INTERNAL: return HOXML_ERROR_INTERNAL;
+        case HOXML_STATE_ERROR_INSUFFICIENT_MEMORY: return HOXML_ERROR_INSUFFICIENT_MEMORY;
+        case HOXML_STATE_ERROR_SYNTAX: return HOXML_ERROR_SYNTAX;
+        case HOXML_STATE_ERROR_ENCODING: return HOXML_ERROR_ENCODING;
+        case HOXML_STATE_ERROR_TAG_MISMATCH: return HOXML_ERROR_TAG_MISMATCH;
+        case HOXML_STATE_ERROR_INVALID_DOCUMENT_DECLARATION: return HOXML_ERROR_INVALID_DOCUMENT_DECLARATION;
+    }
+
+    /* A handful of cases leave the context in an intermediary state. This allows the caller to have access to things */
+    /* like the tag's name, an attribute's value, etc. but that old data may now need to be cleaned up. */
+    if (hoxml_post_state_cleanup(context)) /* If the cleanup process found the document ended */
+        return HOXML_END_OF_DOCUMENT;
+
+    /* If the pointer to the XML content string has changed */
+    if (context->xml != xml) {
+        /* A few variables are now invalid: the pointer to the content, its length, and the iterator */
+        context->xml = xml;
+        context->xml_length = xml_length;
+        context->iterator = xml;
+    }
+
+    /* Remember some context variables in case we hit an unexpected EoF and need to undo an iteration */
+    const char* previous_iterator = context->iterator;
+    size_t previous_stream_length = context->stream_length;
+    while (context->state >= HOXML_STATE_NONE && context->state <= HOXML_STATE_DONE) {
+        /* About half of the parsing states assume the stack is non-null. */
+        /* If parsing is currently in one of those states and the stack (head) pointer is null. */
+        if (((context->state >= HOXML_STATE_TAG_BEGIN && context->state <= HOXML_STATE_OPEN_TAG) ||
+                (context->state >= HOXML_STATE_REFERENCE_BEGIN && context->state <= HOXML_STATE_REFERENCE_HEX)) &&
+                context->stack == NULL) {
+            /* Some unforseen bug has led us to a state in which continuing would cause an illegal memory access. */
+            /* Parsing must halt. There is no way to recover. */
+            context->state = HOXML_STATE_ERROR_INTERNAL;
+            return HOXML_ERROR_INTERNAL;
+        }
+
+        /* Calculate the number of bytes remaining in the current XML content string */
+        size_t bytes_remaining = (size_t)(context->xml_length - (context->iterator - context->xml));
+        /* Calculate the number of bytes to copy into the 'stream' variable. We want 4 bytes, or whatever is left. */
+        size_t bytes_to_copy = 4;
+        if (bytes_to_copy > bytes_remaining)
+            bytes_to_copy = bytes_remaining;
+        if (context->stream_length > 0) {
+            /* Adjust the number of bytes to copy to account for possible bytes from a previous XML content string. */
+            /* This will be non-zero in the rare case where content is being given in parts. */
+            bytes_to_copy -= context->stream_length;
+            /* Append the new bytes to the previous one(s) */
+            memcpy((char*)&(context->stream) + context->stream_length, context->iterator, bytes_to_copy);
+        }
+        else {
+            /* Copy to the 'stream' under the assumption that all of it can be overwritten */
+            memcpy(&(context->stream), context->iterator, bytes_to_copy);
+        }
+        hoxml_character_t c = hoxml_decode_character((const char*)&(context->stream), bytes_remaining,
+            context->encoding);
+
+        /* If the character is the equivalent of a null terminator or there was not enough data to decode the value */
+        if (c.codepoint == 0 || c.codepoint == UINT32_MAX) {
+            context->stream_length = bytes_to_copy;
+            context->error_return_state = context->state;
+            context->state = HOXML_STATE_ERROR_UNEXPECTED_EOF;
+            return HOXML_ERROR_UNEXPECTED_EOF;
+        } else if (HOXML_IS_NEW_LINE(c.codepoint)) {
+            if (context->newline_character == 0) /* If this is the first newline */
+                context->newline_character = c.codepoint; /* Remember this as the character to use for increments */
+            if (c.codepoint == context->newline_character) /* Avoid incrementing twice for files with \r\n endings */
+                context->line++;
+            context->column = 0;
+        } else
+            context->column++;
+
+        /* Iterate up to four bytes into the XML content string. The idea is to jump forward by the number of bytes */
+        /* that were just decoded as a single character. The number of bytes varies from one to four bytes depending */
+        /* on the character encoding and character's codepoint. We also need to consider the case in which some of */
+        /* this character's bytes were carried over from a previous XML content string. Those bytes would have been */
+        /* stashed in the context's 'stream' variable where 'stream_length' tells us the number of said bytes. */
+        previous_iterator = context->iterator;
+        previous_stream_length = context->stream_length;
+        context->iterator += c.bytes - context->stream_length;
+        context->stream_length = 0;
+
+        #ifdef HOXML_DEBUG
+            char debugCodepoint = HOXML_IS_NEW_LINE(c.codepoint) ? ' ' : c.codepoint;
+            printf("  %c [%08X] [L%02dC%02d] -> ", debugCodepoint, c.codepoint, context->line, context->column);
+        #endif
+
+        switch(context->state) {
+        case HOXML_STATE_NONE: /* The first state immediately following initialization, or a document declaration */
+            HOXML_LOG_STATE("HOXML_STATE_NONE")
+            if (c.codepoint == '<')
+                hoxml_begin_tag(context);
+            else if (c.encoded == 0xEF) { /* UTF-8 Byte Order Marker (BOM) is [EF] BB BF, as hex bytes */
+                context->state = HOXML_STATE_UTF8_BOM1;
+                context->column--; /* Don't count this as a column */
+            } else if (c.encoded == 0xFE) { /* UTF-16BE BOM is [FE] FF, as hex bytes */
+                context->state = HOXML_STATE_UTF16BE_BOM;
+                context->column--; /* Don't count this as a column */
+            } else if (c.encoded == 0xFF) { /* UTF-16LE BOM is [FF] FE, as hex bytes */
+                context->state = HOXML_STATE_UTF16LE_BOM;
+                context->column--; /* Don't count this as a column */
+            } else if (!HOXML_IS_WHITESPACE(c.codepoint))
+                context->state = HOXML_STATE_ERROR_SYNTAX;
+            break;
+        case HOXML_STATE_UTF8_BOM1: /* The first byte of a UTF-8 byte order marker was found */
+            HOXML_LOG_STATE("HOXML_STATE_UTF8_BOM1")
+            context->column--; /* Don't count this as a column */
+            if (c.encoded == 0xBB) /* UTF-8 BOM is EF [BB] BF, as hex bytes */
+                context->state = HOXML_STATE_UTF8_BOM2;
+            else
+                context->state = HOXML_STATE_ERROR_SYNTAX;
+            break;
+        case HOXML_STATE_UTF8_BOM2: /* The second byte of a UTF-8 byte order marker was found */
+            HOXML_LOG_STATE("HOXML_STATE_UTF8_BOM2")
+            context->column--; /* Don't count this as a column */
+            if (c.encoded == 0xBF) { /* UTF-8 BOM is EF BB [BF], as hex bytes */
+                context->state = HOXML_STATE_NONE;
+                context->encoding = HOXML_ENC_UTF_8;
+            } else
+                context->state = HOXML_STATE_ERROR_SYNTAX;
+            break;
+        case HOXML_STATE_UTF16BE_BOM: /* The first byte of a UTF-16BE byte order marker was found */
+            HOXML_LOG_STATE("HOXML_STATE_UTF16BE_BOM")
+            context->column--; /* Don't count this as a column */
+            if (c.encoded == 0xFF) { /* UTF-16BE BOM is FE [FF], as hex bytes */
+                context->state = HOXML_STATE_NONE;
+                context->encoding = HOXML_ENC_UTF_16_BE;
+            } else
+                context->state = HOXML_STATE_ERROR_SYNTAX;
+            break;
+        case HOXML_STATE_UTF16LE_BOM: /* The first byte of a UTF-16LE byte order marker was found */
+            HOXML_LOG_STATE("HOXML_STATE_UTF16LE_BOM")
+            context->column--; /* Don't count this as a column */
+            if (c.encoded == 0xFE) { /* UTF-16LE BOM is FF [FE], as hex bytes */
+                context->state = HOXML_STATE_NONE;
+                context->encoding = HOXML_ENC_UTF_16_LE;
+            } else
+                context->state = HOXML_STATE_ERROR_SYNTAX;
+            break;
+        case HOXML_STATE_TAG_BEGIN: /* A new tag was started (a '<' was found) and a new node has been pushed */
+            HOXML_LOG_STATE("HOXML_STATE_TAG_BEGIN")
+            if (c.codepoint == '?') { /* "<?" begins a processing instruction */
+                context->state = HOXML_STATE_PROCESSING_INSTRUCTION_BEGIN;
+                HOXML_STACK->flags |= HOXML_FLAG_PROCESSING_INSTRUCTION; /* Apply the PI flag to this node */
+            } else if (c.codepoint == '/') /* "</" begins an end tag */
+                HOXML_STACK->flags |= HOXML_FLAG_END_TAG; /* Apply the end tag flag to this node */
+            else if (c.codepoint == '!') /* "<!--" = comment, "<![CDATA[" = CDATA, and "<!DOCTYPE" = DTD */
+                context->state = HOXML_STATE_COMMENT_CDATA_OR_DTD_BEGIN;
+            else if (HOXML_IS_NAME_START_CHAR(c.codepoint)) {
+                hoxml_append_character(context, c);
+                if (context->state >= HOXML_STATE_NONE) { /* If appending the character was successful */
+                    context->state = HOXML_STATE_ELEMENT_NAME1;
+                    context->tag = &(HOXML_STACK->tag); /* The tag's name string will begin here */
+                }
+            } else
+                context->state = HOXML_STATE_ERROR_SYNTAX;
+            break;
+        case HOXML_STATE_ELEMENT_NAME1: /* A name start character was found after '<' (e.g. the 't' in "<tag>") */
+            HOXML_LOG_STATE("HOXML_STATE_ELEMENT_NAME1")
+            if (c.codepoint == '>') {
+                hoxml_append_terminator(context);
+                if (context->state >= HOXML_STATE_NONE) /* If appending the terminator was successful */
+                    return hoxml_end_tag(context);
+            } else if (c.codepoint == '/') { /* The tag is an empty element, AKA self-closed tag (e.g. "<tag/>") */
+                if (HOXML_STACK->flags & HOXML_FLAG_END_TAG) /* If it's also a regular close tag (e.g. "</tag/>") */
+                    context->state = HOXML_STATE_ERROR_SYNTAX;
+                else {
+                    hoxml_append_terminator(context);
+                    if (context->state >= HOXML_STATE_NONE) { /* If appending the terminator was successful */
+                        HOXML_STACK->flags |= HOXML_FLAG_EMPTY_ELEMENT; /* Apply the empty element flag */
+                        return HOXML_ELEMENT_BEGIN;
+                    }
+                }
+            } else if (HOXML_IS_WHITESPACE(c.codepoint)) { /* If whitespace ended the element name (e.g. "<tag ") */
+                hoxml_append_terminator(context);
+                if (context->state >= HOXML_STATE_NONE) { /* If appending the terminator was successful */
+                    context->state = HOXML_STATE_ELEMENT_NAME2;
+                    HOXML_STACK->flags |= HOXML_FLAG_BEGUN; /* Indicate "element begun" has already been returned */
+                    return HOXML_ELEMENT_BEGIN;
+                }
+            } else if (HOXML_IS_NAME_CHAR(c.codepoint))
+                hoxml_append_character(context, c);
+            else
+                context->state = HOXML_STATE_ERROR_SYNTAX;
+            break;
+        case HOXML_STATE_ELEMENT_NAME2: /* Whitespace was found after a tag name (e.g. "<tag    >") */
+            HOXML_LOG_STATE("HOXML_STATE_ELEMENT_NAME2")
+            if (c.codepoint == '>') {
+                hoxml_append_terminator(context);
+                if (context->state >= HOXML_STATE_NONE) { /* If appending the terminator was successful */
+                    /* If "element begun" has already been returned for this element and this element is not */
+                    /* self-closing (i.e. the element has the form "<tag ... />" where "..." may be attributes) */
+                    if (HOXML_STACK->flags & HOXML_FLAG_BEGUN &&
+                            !(HOXML_STACK->flags & HOXML_FLAG_EMPTY_ELEMENT)) {
+                        hoxml_end_tag(context); /* Do not return, "element begun" was returned when the name ended */
+                        hoxml_post_state_cleanup(context); /* Because hoxml_parse() won't be called, clean up now */
+                    } else
+                        return hoxml_end_tag(context);
+                }
+            } else if (c.codepoint == '/') { /* The tag is an empty element, AKA self-closed tag (e.g. "<tag/>") */
+                if (HOXML_STACK->flags & HOXML_FLAG_END_TAG) /* If it's also a regular close tag (e.g. "</tag/>") */
+                    context->state = HOXML_STATE_ERROR_SYNTAX;
+                else
+                    HOXML_STACK->flags |= HOXML_FLAG_EMPTY_ELEMENT; /* Apply the empty element flag to this node */
+            } else if (HOXML_IS_NAME_START_CHAR(c.codepoint)) { /* First letter of an attribute name */
+                hoxml_append_character(context, c);
+                if (context->state >= HOXML_STATE_NONE) { /* If appending the character was successful */
+                    context->state = HOXML_STATE_ATTRIBUTE_NAME1;
+                    context->attribute = HOXML_STACK->end; /* The attribute's name string began here */
+                }
+            } else if (!HOXML_IS_WHITESPACE(c.codepoint))
+                context->state = HOXML_STATE_ERROR_SYNTAX;
+            break;
+        case HOXML_STATE_ATTRIBUTE_NAME1: /* A name start character was found inside a tag after whitespace */
+            HOXML_LOG_STATE("HOXML_STATE_ATTRIBUTE_NAME1")
+            if (c.codepoint == '=') { /* The name was immediately followed by '=' */
+                hoxml_append_terminator(context);
+                if (context->state >= HOXML_STATE_NONE) /* If appending the terminator was successful */
+                    context->state = HOXML_STATE_ATTRIBUTE_ASSIGNMENT;
+            } else if (HOXML_IS_NAME_CHAR(c.codepoint))
+                hoxml_append_character(context, c);
+            else if (HOXML_IS_WHITESPACE(c.codepoint)) { /* Whitespace after the name, only '=' is allowed next */
+                hoxml_append_terminator(context);
+                if (context->state >= HOXML_STATE_NONE) /* If appending the terminator was successful */
+                    context->state = HOXML_STATE_ATTRIBUTE_NAME2;
+            } else
+                context->state = HOXML_STATE_ERROR_SYNTAX;
+            break;
+        case HOXML_STATE_ATTRIBUTE_NAME2: /* Whitespace was found after an attribute name, look for '=' */
+            HOXML_LOG_STATE("HOXML_STATE_ATTRIBUTE_NAME2")
+            if (c.codepoint == '=')
+                context->state = HOXML_STATE_ATTRIBUTE_ASSIGNMENT;
+            else if (!HOXML_IS_WHITESPACE(c.codepoint)) /* Only '=' and whitespace are allowed after an attribute name */
+                context->state = HOXML_STATE_ERROR_SYNTAX;
+            break;
+        case HOXML_STATE_ATTRIBUTE_ASSIGNMENT: /* Found a ' =' after an attribute name, look for quotes or whitespace */
+            HOXML_LOG_STATE("HOXML_STATE_ATTRIBUTE_ASSIGNMENT")
+            if (c.codepoint == '"' || c.codepoint == '\'') {
+                context->state = HOXML_STATE_ATTRIBUTE_VALUE;
+                if (c.codepoint == '"')
+                    HOXML_STACK->flags |= HOXML_FLAG_DOUBLE_QUOTE; /* Apply the double quote flag to this node */
+                else
+                    HOXML_STACK->flags &= ~HOXML_FLAG_DOUBLE_QUOTE; /* Remove the double quote flag from this node */
+                context->value = HOXML_STACK->end + 1; /* The attribute's value string will begin here */
+            }
+            else if (!HOXML_IS_WHITESPACE(c.codepoint))
+                context->state = HOXML_STATE_ERROR_SYNTAX;
+            break;
+        case HOXML_STATE_ATTRIBUTE_VALUE: /* A quotation, single or double, was found after an attribute name and '=' */
+            HOXML_LOG_STATE("HOXML_STATE_ATTRIBUTE_VALUE")
+            if ((HOXML_STACK->flags & HOXML_FLAG_DOUBLE_QUOTE && c.codepoint == '"') || (!(HOXML_STACK->flags &
+                    HOXML_FLAG_DOUBLE_QUOTE) && c.codepoint == '\'')) { /* The quotation marks match, value is done */
+                hoxml_append_terminator(context);
+                if (context->state >= HOXML_STATE_NONE) { /* If appending the terminator was successful */
+                    context->state = HOXML_STATE_ELEMENT_NAME2;
+                    context->post_state = HOXML_POST_STATE_ATTRIBUTE_END; /* Clean up some attribute things next call */
+                    return HOXML_ATTRIBUTE;
+                }
+            } else if (c.codepoint == '&') {
+                context->state = HOXML_STATE_REFERENCE_BEGIN;
+                context->return_state = HOXML_STATE_ATTRIBUTE_VALUE; /* Return to this attribute value state later */
+            } else if (HOXML_IS_VALUE_CHAR_DATA(HOXML_STACK->flags, c.codepoint))
+                hoxml_append_character(context, c);
+            else
+                context->state = HOXML_STATE_ERROR_SYNTAX;
+            break;
+        case HOXML_STATE_OPEN_TAG: /* Found a '>' and now inside an open tag, looking for multiple things */
+            HOXML_LOG_STATE("HOXML_STATE_OPEN_TAG")
+            if (c.codepoint == '<')
+                hoxml_begin_tag(context);
+            else if (c.codepoint == '&') {
+                context->state = HOXML_STATE_REFERENCE_BEGIN;
+                context->return_state = HOXML_STATE_OPEN_TAG; /* Return to this open tag state later */
+            } else if (HOXML_IS_CHAR_DATA(c.codepoint))
+                hoxml_append_character(context, c);
+            else
+                context->state = HOXML_STATE_ERROR_SYNTAX;
+            break;
+        case HOXML_STATE_COMMENT_CDATA_OR_DTD_BEGIN: /* Found "<!", looking for a '-', '[', or 'D' */
+            HOXML_LOG_STATE("HOXML_STATE_COMMENT_CDATA_OR_DTD_BEGIN")
+            if (c.codepoint == '-') /* Possible beginning of a comment (i.e. "<!--") */
+                context->state = HOXML_STATE_COMMENT_BEGIN;
+            else if (c.codepoint == '[') /* Possible beginning of a CDATA section (i.e. "<![CDATA[") */
+                context->state = HOXML_STATE_CDATA_BEGIN1;
+            else if (c.codepoint == 'D') { /* Possible beginning of a DTD (i.e. "<!DOCTYPE") */
+                if (context->return_state != HOXML_STATE_NONE) { /* If this DTD was found after a root element */
+                    context->state = HOXML_STATE_ERROR_INVALID_DOCUMENT_TYPE_DECLARATION;
+                    return HOXML_ERROR_INVALID_DOCUMENT_TYPE_DECLARATION;
+                } else
+                    context->state = HOXML_STATE_DTD_BEGIN1;
+            } else
+                context->state = HOXML_STATE_ERROR_SYNTAX;
+            break;
+        case HOXML_STATE_COMMENT_BEGIN: /* Found a '-' was found after "<!", looking for a '-' beginning a comment */
+            HOXML_LOG_STATE("HOXML_STATE_COMMENT_BEGIN")
+            hoxml_pop_stack(context); /* The preceeding '<' triggered a new node. Undo it. */
+            if (c.codepoint == '-')
+                context->state = HOXML_STATE_COMMENT;
+            else
+                context->state = HOXML_STATE_ERROR_SYNTAX;
+            break;
+        case HOXML_STATE_COMMENT: /* Found a second '-' and now in a comment, looking for '-' */
+            HOXML_LOG_STATE("HOXML_STATE_COMMENT")
+            if (c.codepoint == '-')
+                context->state = HOXML_STATE_COMMENT_END1;
+            else
+                context->state = HOXML_STATE_COMMENT;
+            break;
+        case HOXML_STATE_COMMENT_END1: /* Found a '-' while in a comment, looking for a second '-' */
+            HOXML_LOG_STATE("HOXML_STATE_COMMENT_END1")
+            if (c.codepoint == '-')
+                context->state = HOXML_STATE_COMMENT_END2;
+            else
+                context->state = HOXML_STATE_COMMENT;
+            break;
+        case HOXML_STATE_COMMENT_END2: /* Found a second '-' while in a comment, looking for '>' to end the comment */
+            HOXML_LOG_STATE("HOXML_STATE_COMMENT_END2")
+            if (c.codepoint == '>')
+                context->state = context->return_state; /* Return to the original state at the time '<' was found */
+            else
+                context->state = HOXML_STATE_ERROR_SYNTAX;
+            break;
+        case HOXML_STATE_CDATA_BEGIN1: /* Found a '[' after "<!", looking for 'C' */
+            HOXML_LOG_STATE("HOXML_STATE_CDATA_BEGIN1")
+            hoxml_pop_stack(context); /* The preceeding '<' triggered a new node. Undo it. */
+            if (c.codepoint == 'C')
+                context->state = HOXML_STATE_CDATA_BEGIN2;
+            else
+                context->state = HOXML_STATE_ERROR_SYNTAX;
+            break;
+        case HOXML_STATE_CDATA_BEGIN2: /* Found a 'C' after "<![", looking for 'D' */
+            HOXML_LOG_STATE("HOXML_STATE_CDATA_BEGIN2")
+            if (c.codepoint == 'D')
+                context->state = HOXML_STATE_CDATA_BEGIN3;
+            else
+                context->state = HOXML_STATE_ERROR_SYNTAX;
+            break;
+        case HOXML_STATE_CDATA_BEGIN3: /* Found a 'D' after "<![C", looking for 'A' */
+            HOXML_LOG_STATE("HOXML_STATE_CDATA_BEGIN3")
+            if (c.codepoint == 'A')
+                context->state = HOXML_STATE_CDATA_BEGIN4;
+            else
+                context->state = HOXML_STATE_ERROR_SYNTAX;
+            break;
+        case HOXML_STATE_CDATA_BEGIN4: /* Found an 'A' after "<![CD", looking for 'T' */
+            HOXML_LOG_STATE("HOXML_STATE_CDATA_BEGIN4")
+            if (c.codepoint == 'T')
+                context->state = HOXML_STATE_CDATA_BEGIN5;
+            else
+                context->state = HOXML_STATE_ERROR_SYNTAX;
+            break;
+        case HOXML_STATE_CDATA_BEGIN5: /* Found a 'T' after "<![CDA", looking for 'A' */
+            HOXML_LOG_STATE("HOXML_STATE_CDATA_BEGIN5")
+            if (c.codepoint == 'A')
+                context->state = HOXML_STATE_CDATA_BEGIN6;
+            else
+                context->state = HOXML_STATE_ERROR_SYNTAX;
+            break;
+        case HOXML_STATE_CDATA_BEGIN6: /* Found an 'A' after "<![CDAT", looking for '[' */
+            HOXML_LOG_STATE("HOXML_STATE_CDATA_BEGIN6")
+            if (c.codepoint == '[')
+                context->state = HOXML_STATE_CDATA_CONTENT;
+            else
+                context->state = HOXML_STATE_ERROR_SYNTAX;
+            break;
+        case HOXML_STATE_CDATA_CONTENT: /* Found a '[' after "<![CDATA" and now in a CDATA section, looking for ']' */
+            HOXML_LOG_STATE("HOXML_STATE_CDATA_CONTENT")
+            hoxml_append_character(context, c);
+            if (context->state >= HOXML_STATE_NONE) { /* If appending the character was successful */
+                if (c.codepoint == ']')
+                    context->state = HOXML_STATE_CDATA_END1;
+            } break;
+        case HOXML_STATE_CDATA_END1: /* Found a ']' while in a CDATA section, looking for a second ']' */
+            HOXML_LOG_STATE("HOXML_STATE_CDATA_END1")
+            hoxml_append_character(context, c);
+            if (context->state >= HOXML_STATE_NONE) { /* If appending the character was successful */
+                if (c.codepoint == ']')
+                    context->state = HOXML_STATE_CDATA_END2;
+                else
+                    context->state = HOXML_STATE_CDATA_CONTENT;
+            } break;
+        case HOXML_STATE_CDATA_END2: /* Found a second ']' while in a CDATA section, looking for '>' */
+            HOXML_LOG_STATE("HOXML_STATE_CDATA_END2")
+            if (c.codepoint == '>') {
+                context->state = HOXML_STATE_OPEN_TAG;
+                /* We couldn't be sure the CDATA section had ended until now so two ']' characters were appended. */
+                /* If the document is encoded with UTF-16, four bytes need to be removed. Two bytes otherwise. */
+                size_t bytes = context->encoding >= HOXML_ENC_UTF_16_BE ? 4 : 2;
+                /* The 'end' pointer is currently pointing at the last byte, the second ']' or its latter half if */
+                /* using UTF-16. To remove the "]]" we replace them with zeroes. */
+                memset(HOXML_STACK->end - bytes + 1, 0, bytes);
+                HOXML_STACK->end -= bytes;
+            } else {
+                hoxml_append_character(context, c);
+                if (context->state >= HOXML_STATE_NONE) /* If appending the character was successful */
+                    context->state = HOXML_STATE_CDATA_CONTENT;
+            } break;
+        case HOXML_STATE_REFERENCE_BEGIN: /* Found an '&' in content or a value, looking for '#', ';', or characters */
+            HOXML_LOG_STATE("HOXML_STATE_REFERENCE_BEGIN")
+            context->reference_start = HOXML_STACK->end + 1; /* Point to the first byte for comparisons later */
+            if (c.codepoint == '#')
+                context->state = HOXML_STATE_REFERENCE_NUMERIC;
+            /* The predefined entities are "amp", "lt", "gt", "quot", and "apos". Check for just their first letters. */
+            else if (c.codepoint == 'a' || c.codepoint == 'g' || c.codepoint == 'l' || c.codepoint == 'q') {
+                hoxml_append_character(context, c);
+                if (context->state >= HOXML_STATE_NONE) /* If appending the character was successful */
+                    context->state = HOXML_STATE_REFERENCE_ENTITY;
+            } else
+                context->state = HOXML_STATE_ERROR_SYNTAX;
+            break;
+        case HOXML_STATE_REFERENCE_ENTITY: /* Looking for "[a]mp", "[l]t", "[g]t", "[q]uot", or "apos" */
+            HOXML_LOG_STATE("HOXML_STATE_REFERENCE_ENTITY")
+            if (c.codepoint == ';')
+                hoxml_end_reference(context, HOXML_REF_TYPE_ENTITY);
+            /* Predefined escapes only use a subset of lower case English characters. For now, we'll check for ASCII. */
+            else if (HOXML_IS_ASCII_CHAR(c.codepoint))
+                hoxml_append_character(context, c);
+            else
+                context->state = HOXML_STATE_ERROR_SYNTAX;
+            break;
+        case HOXML_STATE_REFERENCE_NUMERIC: /* Found a '#' in a reference, looking for 'x', ';', or chars */
+            HOXML_LOG_STATE("HOXML_STATE_REFERENCE_NUMERIC")
+            if (c.codepoint == 'x')
+                context->state = HOXML_STATE_REFERENCE_HEX;
+            else if (c.codepoint == ';')
+                hoxml_end_reference(context, HOXML_REF_TYPE_NUMERIC);
+            else if (HOXML_IS_NUMERIC(c.codepoint))
+                hoxml_append_character(context, c);
+            else
+                context->state = HOXML_STATE_ERROR_SYNTAX;
+            break;
+        case HOXML_STATE_REFERENCE_HEX: /* Found an 'x' in a reference after '#', looking for chars or ';' */
+            HOXML_LOG_STATE("HOXML_STATE_REFERENCE_HEX")
+            if (c.codepoint == ';')
+                hoxml_end_reference(context, HOXML_REF_TYPE_HEX);
+            else if (HOXML_IS_HEX_CHAR(c.codepoint))
+                hoxml_append_character(context, c);
+            else
+                context->state = HOXML_STATE_ERROR_SYNTAX;
+            break;
+        case HOXML_STATE_PROCESSING_INSTRUCTION_BEGIN: /* Found a '?' after a '<' and now in a processing instruction */
+            HOXML_LOG_STATE("HOXML_STATE_PROCESSING_INSTRUCTION_BEGIN")
+            if (HOXML_IS_NAME_START_CHAR(c.codepoint)) {
+                hoxml_append_character(context, c);
+                if (context->state >= HOXML_STATE_NONE) { /* If appending the character was successful */
+                    context->state = HOXML_STATE_PROCESSING_INSTRUCTION_TARGET1;
+                    context->tag = &(HOXML_STACK->tag); /* The processing instruction's target string began here */
+                }
+            } else
+                context->state = HOXML_STATE_ERROR_SYNTAX;
+            break;
+        case HOXML_STATE_PROCESSING_INSTRUCTION_TARGET1: /* Found a name char after "<?", looking for more name chars */
+            HOXML_LOG_STATE("HOXML_STATE_PROCESSING_INSTRUCTION_TARGET1")
+            if (HOXML_IS_WHITESPACE(c.codepoint)) { /* A whitespace marks an end of a target and beginning of content */
+                if (hoxml_strcmp(&(HOXML_STACK->tag), context->encoding, "xml", HOXML_ENC_UNKNOWN,
+                        HOXML_CASE_INSENSITIVE) && HOXML_STACK->parent != NULL) {
+                    /* The document declaration (e.g. <?xml encoding="UTF-8"?>) must come before the first element */
+                    context->state = HOXML_STATE_ERROR_INVALID_DOCUMENT_DECLARATION;
+                    return HOXML_ERROR_INVALID_DOCUMENT_DECLARATION;
+                }
+                hoxml_append_terminator(context);
+                if (context->state >= HOXML_STATE_NONE) { /* If appending the terminator was successful */
+                    context->state = HOXML_STATE_PROCESSING_INSTRUCTION_CONTENT;
+                    return HOXML_PROCESSING_INSTRUCTION_BEGIN;
+                }
+            } else if (c.codepoint == '?') { /* A '?' (or "?>") marks the end of the target and PI */
+                hoxml_append_terminator(context);
+                if (context->state >= HOXML_STATE_NONE) /* If appending the terminator was successful */
+                    context->state = HOXML_STATE_PROCESSING_INSTRUCTION_END;
+            } else if (HOXML_IS_NAME_CHAR(c.codepoint))
+                hoxml_append_character(context, c);
+            else
+                context->state = HOXML_STATE_ERROR_SYNTAX;
+            break;
+        case HOXML_STATE_PROCESSING_INSTRUCTION_CONTENT: /* Found space after a PI name, looking for '?' or chars */
+            HOXML_LOG_STATE("HOXML_STATE_PROCESSING_INSTRUCTION_CONTENT")
+            if (c.codepoint == '?') { /* "?>" marks the end of a processing instruction */
+                const char* declaration;
+                if ((declaration = hoxml_strstr(context->content, context->encoding, "encoding=", HOXML_ENC_UNKNOWN,
+                        HOXML_CASE_SENSITIVE)) != NULL) {
+                    const char* encoding;
+                    if ((encoding = hoxml_strstr(declaration, context->encoding, "\"", HOXML_ENC_UNKNOWN,
+                            HOXML_CASE_SENSITIVE)) != NULL || (encoding = hoxml_strstr(declaration, context->encoding,
+                            "'", HOXML_ENC_UNKNOWN, HOXML_CASE_SENSITIVE)) != NULL) {
+                        switch (context->encoding) {
+                        case HOXML_ENC_UNKNOWN: /* The document did not begin with a byte order marker (BOM) */
+                            if (hoxml_strcmp(encoding, context->encoding, "\"UTF-8\"", HOXML_ENC_UNKNOWN,
+                                    HOXML_CASE_INSENSITIVE) != 0 || hoxml_strcmp(encoding, context->encoding,
+                                    "'UTF-8'", HOXML_ENC_UNKNOWN, HOXML_CASE_INSENSITIVE) != 0) {
+                                context->encoding = HOXML_ENC_UTF_8;
+                            } else if (hoxml_strcmp(encoding, context->encoding, "\"UTF-16\"", HOXML_ENC_UNKNOWN,
+                                    HOXML_CASE_INSENSITIVE) != 0 || hoxml_strcmp(encoding, context->encoding,
+                                    "'UTF-16'", HOXML_ENC_UNKNOWN, HOXML_CASE_INSENSITIVE) != 0) {
+                                /* UTF-16 encoded documents require one of the UTF-16 BOMs so this is an error */
+                                context->state = HOXML_STATE_ERROR_ENCODING;
+                                return HOXML_ERROR_ENCODING;
+                            } break;
+                        case HOXML_ENC_UTF_8: /* The UTF-8 BOM was found at the beginning of the document */
+                            if (hoxml_strcmp(encoding, context->encoding, "\"UTF-8\"", HOXML_ENC_UNKNOWN,
+                                    HOXML_CASE_INSENSITIVE) == 0 && hoxml_strcmp(encoding, context->encoding,
+                                    "'UTF-8'", HOXML_ENC_UNKNOWN, HOXML_CASE_INSENSITIVE) == 0) {
+                                /* If the UTF-8 BOM was found but the encoding declaration was not "UTF-8" then we */
+                                /* have a contradiction and, therefore, an error */
+                                context->state = HOXML_STATE_ERROR_ENCODING;
+                                return HOXML_ERROR_ENCODING;
+                            } break;
+                        case HOXML_ENC_UTF_16_LE: /* The UTF-16LE BOM was found at the beginning of the document */
+                        case HOXML_ENC_UTF_16_BE: /* The UTF-16BE BOM was found at the beginning of the document */
+                            if (hoxml_strcmp(encoding, context->encoding, "\"UTF-16\"", HOXML_ENC_UNKNOWN,
+                                    HOXML_CASE_INSENSITIVE) == 0 && hoxml_strcmp(encoding, context->encoding,
+                                    "'UTF-16'", HOXML_ENC_UNKNOWN, HOXML_CASE_INSENSITIVE) == 0)
+                                return HOXML_ERROR_ENCODING;
+                            break;
+                        }
+                    }
+                }
+                hoxml_append_terminator(context);
+                if (context->state >= HOXML_STATE_NONE) /* If appending the terminator was successful */
+                    context->state = HOXML_STATE_PROCESSING_INSTRUCTION_END;
+            } else {
+                if (context->content == NULL) /* If this is the first character of the PI's content */
+                    context->content = HOXML_STACK->end + 1; /* The PI's content string will begin here */
+                hoxml_append_character(context, c);
+            } break;
+        case HOXML_STATE_DTD_BEGIN1: /* Found a 'D' after "<!", looking for 'O' */
+            HOXML_LOG_STATE("HOXML_STATE_DTD_BEGIN1")
+            hoxml_pop_stack(context); /* The preceeding '<' triggered a new node. Undo it. */
+            if (c.codepoint == 'O')
+                context->state = HOXML_STATE_DTD_BEGIN2;
+            else
+                context->state = HOXML_STATE_ERROR_SYNTAX;
+            break;
+        case HOXML_STATE_DTD_BEGIN2: /* Found an 'O' after "<!D", looking for 'C' */
+            HOXML_LOG_STATE("HOXML_STATE_DTD_BEGIN2")
+            if (c.codepoint == 'C')
+                context->state = HOXML_STATE_DTD_BEGIN3;
+            else
+                context->state = HOXML_STATE_ERROR_SYNTAX;
+            break;
+        case HOXML_STATE_DTD_BEGIN3: /* Found a 'C' after "<!DO", looking for 'T' */
+            HOXML_LOG_STATE("HOXML_STATE_DTD_BEGIN3")
+            if (c.codepoint == 'T')
+                context->state = HOXML_STATE_DTD_BEGIN4;
+            else
+                context->state = HOXML_STATE_ERROR_SYNTAX;
+            break;
+        case HOXML_STATE_DTD_BEGIN4: /* Found a 'T' after "<!DOC", looking for 'Y' */
+            HOXML_LOG_STATE("HOXML_STATE_DTD_BEGIN4")
+            if (c.codepoint == 'Y')
+                context->state = HOXML_STATE_DTD_BEGIN5;
+            else
+                context->state = HOXML_STATE_ERROR_SYNTAX;
+            break;
+        case HOXML_STATE_DTD_BEGIN5: /* Found a 'Y' after "<!DOCT", looking for 'P' */
+            HOXML_LOG_STATE("HOXML_STATE_DTD_BEGIN5")
+            if (c.codepoint == 'P')
+                context->state = HOXML_STATE_DTD_BEGIN6;
+            else
+                context->state = HOXML_STATE_ERROR_SYNTAX;
+            break;
+        case HOXML_STATE_DTD_BEGIN6: /* Found a 'P' after "<!DOCTY", looking for 'E' */
+            HOXML_LOG_STATE("HOXML_STATE_DTD_BEGIN6")
+            if (c.codepoint == 'E')
+                context->state = HOXML_STATE_DTD_BEGIN7;
+            else
+                context->state = HOXML_STATE_ERROR_SYNTAX;
+            break;
+        case HOXML_STATE_DTD_BEGIN7: /* Found an 'E' after "<!DOCTYP", looking for whitespace */
+            HOXML_LOG_STATE("HOXML_STATE_DTD_BEGIN7")
+            if (HOXML_IS_WHITESPACE(c.codepoint))
+                context->state = HOXML_STATE_DTD_BEGIN8;
+            else
+                context->state = HOXML_STATE_ERROR_SYNTAX;
+            break;
+        case HOXML_STATE_DTD_BEGIN8: /* Found space after "<!DOCTYPE", looking for more or a name start character */
+            HOXML_LOG_STATE("HOXML_STATE_DTD_BEGIN8")
+            if (HOXML_IS_NAME_START_CHAR(c.codepoint))
+                context->state = HOXML_STATE_DTD_NAME;
+            else if (!HOXML_IS_WHITESPACE(c.codepoint))
+                context->state = HOXML_STATE_ERROR_SYNTAX;
+            break;
+        case HOXML_STATE_DTD_NAME: /* Found a name start character, looking for whitespace or name characters */
+            HOXML_LOG_STATE("HOXML_STATE_DTD_NAME")
+            if (HOXML_IS_WHITESPACE(c.codepoint))
+                context->state = HOXML_STATE_DTD_CONTENT;
+            else if (!HOXML_IS_NAME_CHAR(c.codepoint))
+                context->state = HOXML_STATE_ERROR_SYNTAX;
+            break;
+        case HOXML_STATE_DTD_CONTENT: /* Found a DTD name and now looking for mostly anything but mainly '[' or '>' */
+            HOXML_LOG_STATE("HOXML_STATE_DTD_CONTENT")
+            /* We support Document Type Declarations (DTDs) insofar as they do not cause problems and DTD names may */
+            /* be recognized as invalid. Beyond that, the content is ignored just as comments are. That said, some */
+            /* checks are done here and the "open bracket" state because they're easy. */
+            if (c.codepoint == '[')
+                context->state = HOXML_STATE_DTD_OPEN_BRACKET;
+            else if (c.codepoint == '>')
+                context->state = HOXML_STATE_NONE; /* Return to the "before root element" state, the only one allowed */
+            else if (!HOXML_IS_CHAR_DATA(c.codepoint))
+                context->state = HOXML_STATE_ERROR_SYNTAX;
+            break;
+        case HOXML_STATE_DTD_OPEN_BRACKET: /* Found a '[' within DTD content, looking for a closing ']' */
+            HOXML_LOG_STATE("HOXML_STATE_DTD_OPEN_BRACKET")
+            /* Some additional characters are allowed between '[' and ']' brackets, namely markup declaration */
+            /* characters like '<' and '>'. We'll just allow anything to keep things simple. */
+            if (c.codepoint == ']')
+                context->state = HOXML_STATE_DTD_CONTENT;
+            break;
+        case HOXML_STATE_PROCESSING_INSTRUCTION_END: /* Found a '?' after PI content, looking for '>' */
+            HOXML_LOG_STATE("HOXML_STATE_PROCESSING_INSTRUCTION_END")
+            if (c.codepoint == '>')
+                return hoxml_end_tag(context);
+            else
+                context->state = HOXML_STATE_ERROR_SYNTAX;
+            break;
+        }
+    } /* while (context->state >= HOXML_STATE_NONE && context->state <= HOXML_STATE_DONE) */
+
+    /* A dozen or so states may try to add data to the buffer, new nodes, characters, or terminators. It's possible */
+    /* there was not enough space left in the buffer for this putting us into an error state where parsing cannot */
+    /* continue. In these cases, the state will have been set appropriately. */
+    if (context->state == HOXML_STATE_ERROR_INSUFFICIENT_MEMORY) {
+        /* Because the character leading to this error state could not be used, we'll undo the iteration in the hopes */
+        /* that we recover from this error (one of two errors that can be recovered, by hoxml_realloc() in this case) */
+        /* and parsing can continue on the next call to hoxml_parse() */
+        context->iterator = previous_iterator;
+        context->stream_length = previous_stream_length;
+        context->column--; /* If recovered, parsing will continue with the same character so don't count this one */
+        return HOXML_ERROR_INSUFFICIENT_MEMORY;
+    }
+
+    /* Any other error case not yet covered by previous checks is due to incorrect syntax in the document */
+    return HOXML_ERROR_SYNTAX;
+}
+
+/* Attempt to push a new node to the stack as a child of the current head node */
+void hoxml_push_stack(hoxml_context_t* context) {
+    /* If "allocating" a new node would overflow the buffer */
+    if ((context->stack == NULL && sizeof(hoxml_node_t) >= context->buffer_length) || (context->stack != NULL &&
+            HOXML_STACK->end + 1 + sizeof(hoxml_node_t) >= context->buffer + context->buffer_length)) {
+        context->error_return_state = context->state;
+        context->state = HOXML_STATE_ERROR_INSUFFICIENT_MEMORY;
+        return;
+    }
+
+    hoxml_node_t* node;
+    if (context->stack == NULL)/* If pushing the root node */
+        node = (hoxml_node_t*) context->buffer; /* Place the new node at the beginning of the buffer */
+    else
+        node = (hoxml_node_t*)(HOXML_STACK->end + 1);
+    if (node != NULL) {
+        /* Assign initial values to the node */
+        node->parent = HOXML_STACK; /* This new node's parent is the previous stack node */
+        node->end = &(node->tag) - 1; /* Point to the last byte of the node, -1 because no tag has been copied yet */
+    }
+    context->stack = (char*)node;
+}
+
+/* Pop the head node from the stack */
+void hoxml_pop_stack(hoxml_context_t* context) {
+    if (context->stack == NULL)
+        return;
+
+    /* Reassign the stack (head) pointer so that it now points to the parent of the node about to be popped */
+    hoxml_node_t* popped_node = HOXML_STACK;
+    context->stack = (char*)popped_node->parent;
+
+    /* Overwrite the memory used by this node with zeroes */
+    context->tag = context->attribute = context->value = context->content = NULL; /* TODO: move somewhere else */
+    memset(popped_node, 0, popped_node->end - (char*)popped_node + 1);
+}
+
+/* Attempt to add the given character to the end of the stack's current head node */
+void hoxml_append_character(hoxml_context_t* context, hoxml_character_t c) {
+    HOXML_STACK->flags &= ~HOXML_FLAG_TERMINATED;
+
+    if (HOXML_STACK->end + c.bytes >= context->buffer + context->buffer_length) {
+        context->error_return_state = context->state;
+        context->state = HOXML_STATE_ERROR_INSUFFICIENT_MEMORY;
+        return;
+    }
+
+    memcpy(HOXML_STACK->end + 1, &(c.encoded), c.bytes); /* Copy the character to the stack */
+    HOXML_STACK->end += c.bytes; /* Redirect the end pointer to the new end just after the appended character */
+}
+
+/* Attempt to add a null terminator to the end of the stack's current head node */
+void hoxml_append_terminator(hoxml_context_t* context) {
+    if (HOXML_STACK->flags & HOXML_FLAG_TERMINATED) /* If the node's current string is already terminated */
+        return; /* To avoid adding additional terminators and using more bytes than expected, do nothing */
+    HOXML_STACK->flags |= HOXML_FLAG_TERMINATED;
+
+    /* If the document is encoded with UTF-16, two bytes will be appended. One byte otherwise. */
+    size_t bytes = context->encoding >= HOXML_ENC_UTF_16_BE ? 2 : 1;
+    if (HOXML_STACK->end + bytes >= context->buffer + context->buffer_length) {
+        context->error_return_state = context->state;
+        context->state = HOXML_STATE_ERROR_INSUFFICIENT_MEMORY;
+        return;
+    }
+
+    memset(HOXML_STACK->end + 1, '\0', bytes); /* Copy the terminator to the stack */
+    HOXML_STACK->end += bytes; /* Redirect the end pointer to the new end just after the appended terminator */
+}
+
+/* Perform the steps needed to decode and clean up after a character or entity reference given the context obect and */
+/* the type of reference. There are three types defined in an enumeration. */
+void hoxml_end_reference(hoxml_context_t* context, int type) {
+    hoxml_character_t c;
+    c.codepoint = c.encoded = 0;
+    c.bytes = 0;
+    unsigned long value; /* Integer value of numeric or hexadecimal reference */
+
+    switch (type) {
+    case HOXML_REF_TYPE_ENTITY:
+        if (hoxml_strcmp(context->reference_start, context->encoding, "lt",
+                HOXML_ENC_UNKNOWN, HOXML_CASE_SENSITIVE) != 0) {
+            c = hoxml_encode_character('<', context->encoding);
+        } else if (hoxml_strcmp(context->reference_start, context->encoding, "gt",
+                HOXML_ENC_UNKNOWN, HOXML_CASE_SENSITIVE) != 0) {
+            c = hoxml_encode_character('>', context->encoding);
+        } else if (hoxml_strcmp(context->reference_start, context->encoding, "amp", HOXML_ENC_UNKNOWN,
+                HOXML_CASE_SENSITIVE) != 0) {
+            c = hoxml_encode_character('&', context->encoding);
+        } else if (hoxml_strcmp(context->reference_start, context->encoding, "apos", HOXML_ENC_UNKNOWN,
+                HOXML_CASE_SENSITIVE) != 0) {
+            c = hoxml_encode_character('\'', context->encoding);
+        } else if (hoxml_strcmp(context->reference_start, context->encoding, "quot", HOXML_ENC_UNKNOWN,
+                HOXML_CASE_SENSITIVE) != 0) {
+            c = hoxml_encode_character('"', context->encoding);
+        } else
+            context->state = HOXML_STATE_ERROR_SYNTAX;
+        break;
+    case HOXML_REF_TYPE_NUMERIC:
+        value = strtoul(hoxml_to_ascii(context->reference_start, context->encoding), NULL, 10);
+        if (value != 0) /* If the reference string could be converted as a base-ten integer */
+            c = hoxml_encode_character(value, context->encoding);
+        else
+            context->state = HOXML_STATE_ERROR_SYNTAX;
+        break;
+    case HOXML_REF_TYPE_HEX:
+        value = strtoul(hoxml_to_ascii(context->reference_start, context->encoding), NULL, 16);
+        if (value != 0) /* If the reference string could be converted as a base-16 integer */
+            c = hoxml_encode_character(value, context->encoding);
+        else
+            context->state = HOXML_STATE_ERROR_SYNTAX;
+        break;
+    }
+
+    /* If the reference could not be turned into a character */
+    if (c.bytes == 0)
+        return;
+
+    /* Remove the reference's string from the buffer. For example, "&lt;" would result in "lt" being stored so it */
+    /* could be parsed here. It should now be removed from the buffer. */
+    memset(context->reference_start, 0, HOXML_STACK->end - context->reference_start + 1);
+    HOXML_STACK->end = context->reference_start - 1;
+    context->reference_start = NULL;
+    hoxml_append_character(context, c); /* Append the character being referenced */
+    /* No need for any checks against the buffer length. In all cases, more bytes were removed just now than added. */
+    context->state = context->return_state; /* Either HOXML_STATE_OPEN_TAG or HOXML_STATE_ATTRIBUTE_VALUE */
+    context->return_state = HOXML_STATE_NONE;
+}
+
+void hoxml_begin_tag(hoxml_context_t* context) {
+    hoxml_push_stack(context);
+    if (context->state >= HOXML_STATE_NONE) { /* If pushing a new node was successful */
+        context->return_state = context->state; /* For comments and references, so we know which state to return to */
+        context->state = HOXML_STATE_TAG_BEGIN;
+    }
+}
+
+hoxml_code_t hoxml_end_tag(hoxml_context_t* context) {
+    context->state = HOXML_STATE_OPEN_TAG;
+    context->post_state = HOXML_POST_STATE_TAG_END; /* Common to three of the four possible cases */
+    hoxml_node_t* node = HOXML_STACK;
+    hoxml_node_t* parent = node->parent;
+    if (node->flags & HOXML_FLAG_END_TAG) { /* True for e.g. </tag> but not <tag/> */
+        if (parent == NULL || hoxml_strcmp(&(node->tag), context->encoding, &(parent->tag), context->encoding,
+                HOXML_CASE_SENSITIVE) == 0) { /* If there was preceeding open tag or there is but it doesn't match */
+            context->state = HOXML_STATE_ERROR_TAG_MISMATCH;
+            return HOXML_ERROR_TAG_MISMATCH;
+        } else { /* If an element successfully closed a matching open tag */
+            hoxml_pop_stack(context); /* Pop the end tag (e.g. "</tag>") */
+            context->tag = &(parent->tag);
+            /* Element content is placed, in memory, after the tag and its terminator... */
+            context->content = context->tag + hoxml_strlen(context->tag, context->encoding);
+            /* ...which may be either one or two bytes, depending on encoding */
+            context->content += (context->encoding >= HOXML_ENC_UTF_16_BE ? 2 : 1);
+             /* Closing an element means one less level of nesting so decrement the depth after returning */
+            HOXML_STACK->flags |= HOXML_FLAG_DECREMENT_DEPTH;
+            return HOXML_ELEMENT_END;
+        }
+    } else if (node->flags & HOXML_FLAG_EMPTY_ELEMENT) /* Self-closing/empty element (e.g. "<tag/>") */
+        return HOXML_ELEMENT_END;
+    else if (node->flags & HOXML_FLAG_PROCESSING_INSTRUCTION) /* Processing instruction (e.g. "<?xml?>") */
+        return HOXML_PROCESSING_INSTRUCTION_END;
+    /* The only remaining case is an open tag (e.g. "<tag>") and we expect a matching close tag later */
+    context->post_state = HOXML_STATE_NONE; /* For this fourth case, of four possible, there is no clean up */
+    /* Opening an element means one more level of nesting so increment the depth after returning */
+    HOXML_STACK->flags |= HOXML_FLAG_INCREMENT_DEPTH;
+    return HOXML_ELEMENT_BEGIN;
+}
+
+int hoxml_post_state_cleanup(hoxml_context_t* context) {
+    if (context->post_state != HOXML_STATE_NONE) {
+        switch (context->post_state) {
+        case HOXML_POST_STATE_TAG_END: { /* Clean up after a close tag, empty element, or processing instruction */
+            int was_document_or_document_type_declaration = 0;
+            /* If the processing instruction flag is applied (i.e. this is a PI) and the PI's target is the reserved */
+            /* "xml" target, or some other case variant of it */
+            if (HOXML_STACK->flags & HOXML_FLAG_PROCESSING_INSTRUCTION && hoxml_strcmp(&(HOXML_STACK->tag),
+                    context->encoding, "xml", HOXML_ENC_UNKNOWN, HOXML_CASE_INSENSITIVE)) {
+                context->state = HOXML_STATE_NONE; /* Return to the initial state as if nothing happened */
+                was_document_or_document_type_declaration = 1;
+            }
+            hoxml_pop_stack(context); /* Pop a start or self-closed tag (<tag> or <tag/> or <?pi?>)*/
+            if (context->stack == NULL && was_document_or_document_type_declaration == 0)
+                return 1; /* hoxml_parse() should return HOXML_END_OF_DOCUMENT */
+            break;
+        } case HOXML_POST_STATE_ATTRIBUTE_END: /* Remove the most recent attribute and value strings from the buffer */
+            /* Zero the memory from the end pointer to the byte at which the attribute's name begins */
+            memset(context->attribute, 0, HOXML_STACK->end - (char*)context->attribute + 1);
+            HOXML_STACK->end = context->attribute - 1;
+            /* With these public properties now pointing to zeroes, nullify them so there's no confusion */
+            context->attribute = context->value = NULL;
+            break;
+        }
+        context->post_state = HOXML_STATE_NONE;
+    }
+
+    return 0; /* hoxml_parse() should not return */
+}
+
+/* Decode the given character with the given encoding to the its equivalent value */
+hoxml_character_t hoxml_decode_character(const char* str, size_t str_length, int encoding) {
+    hoxml_character_t c;
+    c.encoded = c.codepoint = 0; /* These default values are not valid so pausing will cease if returned */
+    c.bytes = 0;
+
+    switch (encoding) {
+    case HOXML_ENC_UNKNOWN:
+        c.bytes = 1;
+        break;
+    case HOXML_ENC_UTF_8:
+        /* The first byte of a UTF-8 character can can begin with one of four bit patterns, each indicating the */
+        /* number of remaining bytes: 0XXXXXXX = 1 byte, 110XXXXX = 2 bytes, 1110XXXX = 3 bytes, 11110XXX = 4 bytes. */
+        /* NOTE: UTF-8 is *big* endian. */
+        if (((str[0] >> 7) & 0x01) == 0x00)
+            c.bytes = 1;
+        else if (((str[0] >> 5) & 0x07) == 0x06)
+            c.bytes = 2;
+        else if (((str[0] >> 4) & 0x0F) == 0x0E)
+            c.bytes = 3;
+        else if (((str[0] >> 3) & 0x1F) == 0x1E)
+            c.bytes = 4;
+        break;
+    case HOXML_ENC_UTF_16_BE:
+        /* UTF-16 characters are either two bytes or four bytes where the four-byte characters are encoded such that */
+        /* the first two bytes begin with 110110XX and the second with 110111XX. The rest are two-byte characters. */
+        if (((str[0] >> 2) & 0x3F) == 0x36 && ((str[2] >> 2) & 0x3F) == 0x37)
+            c.bytes = 4;
+        else
+            c.bytes = 2;
+        break;
+    case HOXML_ENC_UTF_16_LE:
+        /* UTF-16LE (Little Endian) is just like UTF-16BE (Big Endian) but the most and least significant bytes in */
+        /* any 16-bit sequence are swapped. (Technically, a byte isn't defined as eight bits but it is in practice.) */
+        if (((str[1] >> 2) & 0x3F) == 0x36 && ((str[3] >> 2) & 0x3F) == 0x37)
+            c.bytes = 4;
+        else
+            c.bytes = 2;
+        break;
+    }
+
+    /* If the string doesn't have enough bytes in it to decode this character */
+    if (c.bytes > str_length) {
+        /* Set the decoded value to the maximum possible to indicate a failure, zero the rest, and return early */
+        c.codepoint = UINT32_MAX;
+        c.encoded = 0;
+        c.bytes = 0;
+        return c;
+    }
+
+    switch (encoding) {
+    case HOXML_ENC_UNKNOWN:
+        c.codepoint = str[0];
+        break;
+    case HOXML_ENC_UTF_8:
+        if (c.bytes == 1) {
+            /* One-byte UTF-8 characters are encoded as 0XXXXXXX where the Xs represent the bits of the character's */
+            /* value. For all decoding, we want to grab only those bits and transform them into an integer. */
+            /* The method here takes one byte from the string, uses a mask to zero out any bit that is not part of */
+            /* resulting value, casts the masked byte to an unsigned 32-bit integer, shifts those bits to the left to */
+            /* place them at the indexes they're expected in the value, and then bitwise ORs these components into a */
+            /* single unsigned 32-bit integer. This one-byte case does not need any shift but the remaining cases do. */
+            c.codepoint = (unsigned)(str[0] & 0x7F);
+        } else if (c.bytes == 2) {
+            /* Two-byte UTF-8 characters are encoded as 110XXXXX 10XXXXXX */
+            c.codepoint = ((unsigned)(str[0] & 0x1F) << 6) | (unsigned)(str[1] & 0x3F);
+        } else if (c.bytes == 3) {
+            /* Three-byte UTF-8 characters are encoded as 1110XXXX 10XXXXXX 10XXXXXX */
+            c.codepoint = ((unsigned)(str[0] & 0x0F) << 12) | ((unsigned)(str[1] & 0x3F) << 6) |
+                          ((unsigned)(str[2] & 0x3F) << 0);
+        } else if (c.bytes == 4) {
+            /* Four-byte UTF-8 characters are encoded as 11110XXX 10XXXXXX 10XXXXXX 10XXXXXX */
+            c.codepoint = ((unsigned)(str[0] & 0x07) << 18) | ((unsigned)(str[1] & 0x3F) << 12) |
+                          ((unsigned)(str[2] & 0x3F) << 6)  |  (unsigned)(str[3] & 0x3F);
+        } break;
+    case HOXML_ENC_UTF_16_BE:
+        if (c.bytes == 2) {
+            /* Concatenate the two bytes together to retrieve the original value */
+            c.codepoint = ((unsigned)str[0] << 8) | ((unsigned)str[1] << 0);
+        } else if (c.bytes == 4) {
+            /* Four-byte UTF-16 characters are encoded as 110110XX XXXXXXXX 110111XX XXXXXXXX after first subtracting */
+            /* 0x00010000 from the value. Here, that subtracted value is reconstructed and 0x00010000 is added back. */
+            c.codepoint = (((unsigned)(str[0] & 0x03) << 18) | ((unsigned)str[1] << 16) |
+                           ((unsigned)(str[2] & 0x03) << 8)  | ((unsigned)str[3] << 0)) + 0x00010000;
+        }
+        break;
+    case HOXML_ENC_UTF_16_LE:
+        if (c.bytes == 2)
+            c.codepoint = ((unsigned)str[1] << 8) | ((unsigned)str[0] << 0);
+        else if (c.bytes == 4) {
+            c.codepoint = (((unsigned)(str[1] & 0x03) << 18) | ((unsigned)str[0] << 16) |
+                           ((unsigned)(str[3] & 0x03) << 8)  | ((unsigned)str[2] << 0)) + 0x00010000;
+        }
+        break;
+    }
+
+    memcpy(&(c.encoded), str, c.bytes); /* Copy the bytes of the character from the pointed-to string into c.encoded */
+
+    return c;
+}
+
+/* Encode the given character codepoint to the given character encoding */
+hoxml_character_t hoxml_encode_character(unsigned codepoint, int encoding) {
+    hoxml_character_t c;
+    c.codepoint = codepoint;
+    c.encoded = 0;
+    c.bytes = 0;
+
+    /* This variable will make it easier to assign values to 'c.endoded' without difficult-to-read casts */
+    char* str = (char*)&(c.encoded);
+
+    switch (encoding) {
+    case HOXML_ENC_UNKNOWN: /* If the encoding is somehow not specified, assume UTF-8 */
+    case HOXML_ENC_UTF_8:
+        if (codepoint <= 0x0000007F) { /* If the codepoint will fit into one byte */
+            c.encoded = codepoint;
+            c.bytes = 1;
+        } else if (codepoint >= 0x000080 && codepoint <= 0x000007FF) { /* If the codepoint will fit into two bytes */
+            /* For codepoints with bits XXXXXAAA AABBBBBB, we want to transform them to the form 110AAAAA 10BBBBBB. */
+            /* The method here treats c.encoded as an array of unsigned, eight-bit integers. This is done to assign */
+            /* bytes individually for the sake of endianness where UTF-8 is big endian. The codepoint is masked in */
+            /* order to zero any bits that are not used in the byte being assigned, then shifted all the way to the */
+            /* right. The prefixed "0xC0" and "0x80" bitwise ORs prepend the UTF-8 markers 110 and 10, respectively. */
+            str[0] = 0xC0 | (char)((codepoint & 0x0000007C0) >> 6); /* 110AAAAAA */
+            str[1] = 0x80 | (char)((codepoint & 0x0000000FF) >> 0); /* 10BBBBBB */
+            c.bytes = 2;
+        } else if ((codepoint >= 0x00000800 && codepoint <= 0x0000D7FF) ||
+                (codepoint >= 0x0000E000 && codepoint <= 0x0000FFFF)) {
+            /* For a codepoint with bits AAAABBBB BBCCCCCC we want 1110AAAA 10BBBBBB 10CCCCCC */
+            str[0] = 0xE0 | (char)((codepoint & 0x0000F000) >> 12); /* 1110AAAA */
+            str[1] = 0x80 | (char)((codepoint & 0x00000FC0) >> 6);  /* 10BBBBBB */
+            str[2] = 0x80 | (char)((codepoint & 0x0000003F) >> 0);  /* 10CCCCCC */
+            c.bytes = 3;
+        } else if (codepoint >= 0x00010000 && codepoint <= 0x0010FFFF) {
+            /* For a codepoint with bits XXXAAABB BBBBCCCC CCDDDDDD we want 11110AAA 10BBBBBB 10CCCCCC 10DDDDDD */
+            str[0] = 0xF0 | (char)((codepoint & 0x001C0000) >> 18); /* 11110AAA */
+            str[1] = 0x80 | (char)((codepoint & 0x0003F000) >> 12); /* 10BBBBBB */
+            str[2] = 0x80 | (char)((codepoint & 0x00000FC0) >> 6);  /* 10CCCCCC */
+            str[3] = 0x80 | (char)((codepoint & 0x0000003F) >> 0);  /* 10DDDDDD */
+            c.bytes = 4;
+        } else /* If the codepoint is not valid */
+            c.bytes = 0; /* Don't even try */
+        break;
+    case HOXML_ENC_UTF_16_BE:
+        if (codepoint <= 0x0000D7FF || (codepoint >= 0x0000E000 && codepoint <= 0x0000FFFF)) { /* Fits in two bytes */
+            str[0] = (char)((codepoint & 0x0000FF00) >> 8);
+            str[1] = (char) (codepoint & 0x000000FF);
+            c.bytes = 2;
+        } else if (codepoint >= 0x00010000 && codepoint <= 0x0010FFFF) { /* If the codepoint fits in four bytes */
+            /* For codepoint - 0x00010000 with bits XXXXXXXX XXXXAABB BBBBBBCC DDDDDDDD we want to transform the bits */
+            /* to the form 110110AA BBBBBBBB 110111CC DDDDDDDD. When decoded, as per UTF-16, 0x00010000 is added. */
+            /* The prefixed "0xD8" and "0xDC" bitwise ORs prepend the UTF-16 markers 110110 and 110111, respectively. */
+            codepoint -= 0x00010000;
+            str[0] = 0xD8 | (char)((codepoint & 0x000C0000) >> 20); /* 110110AA */
+            str[1] =        (char)((codepoint & 0x0003FC00) >> 18); /* BBBBBBBB */
+            str[2] = 0xDC | (char)((codepoint & 0x00000300) >> 8);  /* 110111CC */
+            str[3] =        (char)((codepoint & 0x000000FF) >> 0);  /* DDDDDDDD */
+            c.bytes = 4;
+        } else /* If the codepoint is not valid */
+            c.bytes = 0; /* Don't even try */
+        break;
+    case HOXML_ENC_UTF_16_LE:
+        /* UTF-16LE (Little Endian) is just like UTF-16BE (Big Endian) with the reverse endianness meaning that the */
+        /* operations here are identical to those above but the indexes have been changed to reflect endianness */
+        if (codepoint <= 0x0000D7FF || (codepoint >= 0x0000E000 && codepoint <= 0x0000FFFF)) {
+            str[1] = (char)((codepoint & 0x0000FF00) >> 8);
+            str[0] = (char)((codepoint & 0x000000FF) >> 0);
+            c.bytes = 2;
+        } else if (codepoint >= 0x00010000 && codepoint <= 0x0010FFFF) {
+            codepoint -= 0x00010000;
+            str[3] = 0xD8 | (char)((codepoint & 0x000C0000) >> 20); /* 110110AA */
+            str[2] =        (char)((codepoint & 0x0003FC00) >> 18); /* BBBBBBBB */
+            str[1] = 0xDC | (char)((codepoint & 0x00000300) >> 8);  /* 110111CC */
+            str[0] =        (char)((codepoint & 0x000000FF) >> 0);  /* DDDDDDDD */
+            c.bytes = 4;
+        } else
+            c.bytes = 0;
+        break;
+    }
+
+    return c;
+}
+
+/* Given a reference string with the given encoding, return an equivalent string encoding using ASCII */
+char* hoxml_to_ascii(const char* str, int encoding) {
+    /* This is only for references which have a maximum length so this static array will always be large enough */
+    static char ascii[16];
+    memset(ascii, 0, sizeof(ascii));
+
+    size_t ascii_index = 0; /* Current index in the ASCII string */
+    const char* it = str;
+    hoxml_character_t c = hoxml_decode_character(it, 65535, encoding);
+
+    /* While we haven't iterated up to a null terminator AND not beyond the size of the result array */
+    while (c.codepoint != '\0' && ascii_index < sizeof(ascii)) {
+        /* Assign the decoded value of the source to the equivalent index in the result array. In practice, this is */
+        /* effectively only necessary for UTF-16 content where the encoded values differ from decoded ones for */
+        /* characters also included in ASCII. For UTF-8, this whole function call could be skipped. */
+        ascii[ascii_index++] = c.codepoint;
+
+        /* Iterate to the next character in the string */
+        it += c.bytes;
+        c = hoxml_decode_character(it, 65535, encoding);
+    }
+
+    return ascii;
+}
+
+/* Get the length, in bytes not characters, of the given string with the given encoding */
+size_t hoxml_strlen(const char* str, int encoding) {
+    size_t length = 0;
+    const char* it = str;
+    hoxml_character_t c = hoxml_decode_character(it++, 65535, encoding);
+
+    while (c.codepoint != '\0') { /* While we haven't iterated to a null terminator */
+        length++;
+        c = hoxml_decode_character(it++, 65535, encoding);
+    }
+
+    return length;
+}
+
+/* Compare the given strings with the given encodings for equality with an additional parameter for case sensitivity. */
+/* The return value is 0 if the strings are not equal. All other return values mean the strings are equal. */
+int hoxml_strcmp(const char* str1, int encoding1, const char* str2, const int encoding2, int sensitivity) {
+    const char* it1 = str1;
+    const char* it2 = str2;
+    hoxml_character_t c1 = hoxml_decode_character(it1, 65535, encoding1);
+    hoxml_character_t c2 = hoxml_decode_character(it2, 65535, encoding2);
+
+    while (c1.codepoint != '\0' && c2.codepoint != '\0') { /* While neither iterator has reached a null terminator */
+        /* If, accounting for sensitivity, the charcters are not equal */
+        if ((sensitivity == HOXML_CASE_INSENSITIVE && HOXML_TO_LOWER(c1.codepoint) != HOXML_TO_LOWER(c2.codepoint)) ||
+                (sensitivity == HOXML_CASE_SENSITIVE && c1.codepoint != c2.codepoint)) {
+            return 0;
+        }
+
+        /* Continue iterating through both strings one character at a time */
+        it1 += c1.bytes;
+        c1 = hoxml_decode_character(it1, 65535, encoding1);
+        it2 += c2.bytes;
+        c2 = hoxml_decode_character(it2, 65535, encoding2);
+    }
+
+    return c2.codepoint == '\0';
+}
+
+/* Search for a given string, needle, within another string, haystack. The return value is a pointer to the byte at */
+/* which the string, needle, first appears or NULL if it cannot be found. */
+const char* hoxml_strstr(const char* haystack, int haystack_encoding, const char* needle, int needle_encoding,
+        int sensitivity) {
+    const char* it_haystack = haystack;
+    const char* it_needle = needle;
+    hoxml_character_t c_haystack = hoxml_decode_character(it_haystack, 65535, haystack_encoding);
+    hoxml_character_t c_needle = hoxml_decode_character(it_needle, 65535, needle_encoding);
+
+    while (c_haystack.codepoint != '\0') { /* While we haven't iterated to a null terminator */
+        /* If the current character in the haystack equals the first character in the needle AND the whole needle */
+        /* string follows this character */
+        if (c_haystack.codepoint == c_needle.codepoint &&
+                hoxml_strcmp(it_haystack, haystack_encoding, it_needle, needle_encoding, sensitivity) != 0) {
+            /* Return a pointer to the location the needle (first) appeared at */
+            return it_haystack;
+        }
+
+        /* Continue iterating through characters in the haystack string */
+        it_haystack += c_haystack.bytes;
+        c_haystack = hoxml_decode_character(it_haystack, 65535, haystack_encoding);
+    }
+
+    return NULL; /* The needle was not found in the haystack */
+}
+
+#endif /* HOXML_IMPLEMENTATION */
+
+#endif /* HOXML_H */
author	Leander Scherer <leander@schererleander.de>	2026-03-08 20:04:08 +0100
committer	Leander Scherer <leander@schererleander.de>	2026-03-08 20:07:58 +0100
commit	c5455ef0fbfc4203a4aa8ad185dfa43bdadc0b82 (patch)
tree	731fffe1d9591587a366895f4a46859d2c79436f /include/hoxml.h
parent	7bdc10ae6de645812f4e57185067f0a83ca5655f (diff)