From c5455ef0fbfc4203a4aa8ad185dfa43bdadc0b82 Mon Sep 17 00:00:00 2001 From: Leander Scherer Date: Sun, 8 Mar 2026 20:04:08 +0100 Subject: feat(deps): add raylib and raytmx dependencies --- include/hoxml.h | 1497 +++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 1497 insertions(+) create mode 100644 include/hoxml.h (limited to 'include/hoxml.h') diff --git a/include/hoxml.h b/include/hoxml.h new file mode 100644 index 0000000..d7addec --- /dev/null +++ b/include/hoxml.h @@ -0,0 +1,1497 @@ +/* +Copyright (c) 2024-2025 Luke Philipsen + +Permission to use, copy, modify, and/or distribute this software for +any purpose with or without fee is hereby granted. + +THE SOFTWARE IS PROVIDED “AS IS” AND THE AUTHOR DISCLAIMS ALL +WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES +OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE +FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY +DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN +AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT +OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. +*/ + +/* Usage + + Do this: + #define HOXML_IMPLEMENTATION + before you include this file in *one* C or C++ file to create the implementation. + + You can define HOXML_DECL with + #define HOXML_DECL static + or + #define HOXML_DECL extern + to specify hoxml function declarations as static or extern, respectively. + The default specifier is extern. +*/ + +#ifndef HOXML_H + #define HOXML_H + +#include /* strtoul() */ +#include /* memcpy(), memset(), NULL, size_t */ + +#ifndef HOXML_DECL + #define HOXML_DECL +#endif /* HOXML_DECL */ + +#ifdef __cplusplus + extern "C" { +#endif /* __cpluspus */ + +/***************/ +/* Definitions */ + +/** + * Error and token codes returned after parsing. + */ +typedef enum { + HOXML_ERROR_INVALID_INPUT = -9, /**< One or more parameter passed to hoxml was unacceptable. */ + HOXML_ERROR_INTERNAL = -8, /**< There's a bug in hoxml and parsing must halt. */ + HOXML_ERROR_INSUFFICIENT_MEMORY = -7, /**< Initialization or continued parsing require more memory. */ + HOXML_ERROR_UNEXPECTED_EOF = -6, /**< Reached the end of the XML content before the end of the document. */ + HOXML_ERROR_SYNTAX = -5, /**< Syntax error (e.g. "" followed by ""). */ + HOXML_ERROR_INVALID_DOCUMENT_TYPE_DECLARATION = -2, /**< declaration not before the root element. */ + HOXML_ERROR_INVALID_DOCUMENT_DECLARATION = -1, /**< declaration not before the root element. */ + HOXML_END_OF_DOCUMENT = 0, /**< The root element has been closed, parsing is done. */ + HOXML_ELEMENT_BEGIN, /**< A new element/tag began and its name is available. */ + HOXML_ELEMENT_END, /**< An element was closed, or , and its name and content are available. */ + HOXML_ATTRIBUTE, /**< An attribute's value, its name, and its element are available. */ + HOXML_PROCESSING_INSTRUCTION_BEGIN, /**< A processing instruction began and its target is available. */ + HOXML_PROCESSING_INSTRUCTION_END /**< A processing instruction ended and its content is available. */ +} hoxml_code_t; + +/** + * Holds context and state information needed by hoxml. Some of this information is public and holds the data parsed + * from XML content (element names, attribute names and values, etc.) but some is private and only makes sense to hoxml. + */ +typedef struct { + /* Public */ + char* tag; /**< Holds the name of the open or just-closed tag, or processing instruction target. */ + char* attribute; /**< Holds the current attribute's name. */ + char* value; /**< Holds the current attribute's value. */ + char* content; /**< Holds the current element's content. This means all character data found, including spaces. */ + int line; /**< The line currently being parsed. Lines are determined by line feeds and carriage returns. */ + int column; /**< The column, on the current line, of the character last parsed. */ + int depth; /**< The nested level of elements. Assigned with the level in which the element was found. */ + + /* Private (for internal use) */ + int is_initialized; /* Set to 1, or true, by hoxml_init() and indicates this context is safe to use */ + const char* xml; /* XML content to be parsed */ + size_t xml_length; /* Length of the XML content to parse */ + int encoding; /* Character encoding of the XML content */ + const char* iterator; /* Pointer to the character in the XML content being parsed */ + char* buffer; /* Memory allocated for hoxml to use */ + size_t buffer_length; /* Amount of memory allocated for hoxml */ + char* reference_start; /* Pointer to a location on the stack where a reference entity string (e.g "<") began */ + char* stack; /* Pointer to the current node in the stack-like structure of elements */ + int state; /* Current parsing state, determines which characters are acceptable and when to return */ + int post_state; /* When not "none" this indicates a post-state that has a cleanup step */ + int return_state; /* State to return to after the processing of a comment or reference has finished */ + int error_return_state; /* State to return to after recovering from an error */ + unsigned long stream; /* Holds the current character, whole or partial. May contain bytes from different strings. */ + size_t stream_length; /* Length of the 'stream' variable in bytes */ + unsigned newline_character; /* The character used to increment the 'line' variable, \r or \n */ +} hoxml_context_t; + +/** + * Sets up the hoxml context object to begin parsing. Following this, call hoxml_parse() until + * HOXML_END_OF_DOCUMENT or one of the error values is returned. + * + * @param context Pointer to an allocated hoxml context object. This instance will be modified. + * @param buffer A pointer to some contiguous block of memory for hoxml to use. This will also be modified, frequently. + * @param buffer_length The length, in bytes, of the buffer handed to hoxml as the 'buffer' parameter. + */ +HOXML_DECL void hoxml_init(hoxml_context_t* context, void* buffer, size_t buffer_length); + +/** + * Instruct hoxml to use a new buffer. This maintains the current state of parsing meaning that the next call to + * hoxml_parse() will continue none the wiser. + * The buffer must have a length greater than the current buffer and both buffers must be allocated at the time this + * function is called. Once it returns, the original buffer may and should be freed. + * + * @param context An initialized hoxml context object. + * @param buffer A pointer to a new, contiguous block of memory for hoxml to use. + * @param buffer_length The length, in bytes, of the buffer handed to hoxml as the 'buffer' parameter. + */ +HOXML_DECL void hoxml_realloc(hoxml_context_t* context, void* buffer, size_t buffer_length); + +/** + * Begin or continue parsing the given XML content string. + * The XML content string does not need to contain the content in its entirety. If hoxml finds a null terminator or + * parses up to the indicated length of the content, HOXML_ERROR_UNEXPECTED_EOF is returned and parsing will cease. + * However, this error is recoverable and parsing will continue if the next call to hoxml_parse() passes a new XML + * content string, using the same pointer or not. + * + * @param context An initialized hoxml context object. This should be treated as read-only until parsing is done. + * @param xml XML content as an encoded string. Supported character encodings include ASCII, UTF-8, and UTF-16(BE|LE). + * @param xml_length Length of the XML content in bytes. + * @return A code indicating what information from the XML content is available or an error. + */ +HOXML_DECL hoxml_code_t hoxml_parse(hoxml_context_t* context, const char* xml, size_t xml_length); + +#ifdef __cplusplus + } +#endif /* __cplusplus */ + +#ifdef HOXML_IMPLEMENTATION + +/******************/ +/* Implementation */ + +enum { + /* Current parser states */ + HOXML_STATE_ERROR_INTERNAL = -8, + HOXML_STATE_ERROR_INSUFFICIENT_MEMORY = -7, + HOXML_STATE_ERROR_UNEXPECTED_EOF = -6, + HOXML_STATE_ERROR_SYNTAX = -5, + HOXML_STATE_ERROR_ENCODING = -4, + HOXML_STATE_ERROR_TAG_MISMATCH = -3, + HOXML_STATE_ERROR_INVALID_DOCUMENT_TYPE_DECLARATION = -2, + HOXML_STATE_ERROR_INVALID_DOCUMENT_DECLARATION = -1, + HOXML_STATE_NONE = 0, + HOXML_STATE_UTF8_BOM1, + HOXML_STATE_UTF8_BOM2, + HOXML_STATE_UTF16BE_BOM, + HOXML_STATE_UTF16LE_BOM, + HOXML_STATE_TAG_BEGIN, + HOXML_STATE_TAG_END, + HOXML_STATE_ELEMENT_NAME1, + HOXML_STATE_ELEMENT_NAME2, + HOXML_STATE_ATTRIBUTE_NAME1, + HOXML_STATE_ATTRIBUTE_NAME2, + HOXML_STATE_ATTRIBUTE_ASSIGNMENT, + HOXML_STATE_ATTRIBUTE_VALUE, + HOXML_STATE_OPEN_TAG, + HOXML_STATE_COMMENT_CDATA_OR_DTD_BEGIN, + HOXML_STATE_COMMENT_BEGIN, + HOXML_STATE_COMMENT, + HOXML_STATE_COMMENT_END1, + HOXML_STATE_COMMENT_END2, + HOXML_STATE_CDATA_BEGIN1, + HOXML_STATE_CDATA_BEGIN2, + HOXML_STATE_CDATA_BEGIN3, + HOXML_STATE_CDATA_BEGIN4, + HOXML_STATE_CDATA_BEGIN5, + HOXML_STATE_CDATA_BEGIN6, + HOXML_STATE_CDATA_CONTENT, + HOXML_STATE_CDATA_END1, + HOXML_STATE_CDATA_END2, + HOXML_STATE_REFERENCE_BEGIN, + HOXML_STATE_REFERENCE_ENTITY, + HOXML_STATE_REFERENCE_NUMERIC, + HOXML_STATE_REFERENCE_HEX, + HOXML_STATE_PROCESSING_INSTRUCTION_BEGIN, + HOXML_STATE_PROCESSING_INSTRUCTION_TARGET1, + HOXML_STATE_PROCESSING_INSTRUCTION_TARGET2, + HOXML_STATE_PROCESSING_INSTRUCTION_CONTENT, + HOXML_STATE_PROCESSING_INSTRUCTION_END, + HOXML_STATE_DTD_BEGIN1, + HOXML_STATE_DTD_BEGIN2, + HOXML_STATE_DTD_BEGIN3, + HOXML_STATE_DTD_BEGIN4, + HOXML_STATE_DTD_BEGIN5, + HOXML_STATE_DTD_BEGIN6, + HOXML_STATE_DTD_BEGIN7, + HOXML_STATE_DTD_BEGIN8, + HOXML_STATE_DTD_NAME, + HOXML_STATE_DTD_CONTENT, + HOXML_STATE_DTD_OPEN_BRACKET, + HOXML_STATE_DONE, + /* Post (i.e. after) parser states indicating actions to take on the next call to hoxml_parse() */ + HOXML_POST_STATE_TAG_END, + HOXML_POST_STATE_ATTRIBUTE_END, +}; + +enum { + HOXML_FLAG_END_TAG = 1, /* The node is a dedicated end tag (not an empty element) */ + HOXML_FLAG_EMPTY_ELEMENT = 2, /* The node is an empty element */ + HOXML_FLAG_PROCESSING_INSTRUCTION = 4, /* The node is a processing instruction */ + HOXML_FLAG_DOUBLE_QUOTE = 8, /* The value string being parsed was opened with a double quote (") */ + HOXML_FLAG_TERMINATED = 16, /* The node's current string (tag, attribute, etc.) is null terminated */ + HOXML_FLAG_BEGUN = 32, /* The "element begun" code was already returned for this node */ + HOXML_FLAG_INCREMENT_DEPTH = 64, /* Context object's depth value should increase by one next hoxml_parse() */ + HOXML_FLAG_DECREMENT_DEPTH = 128 /* Context object's depth value should decrease by one next hoxml_parse() */ +}; + +enum { + HOXML_ENC_UNKNOWN = 0, /* The character encoding is unknown. UTF-8 is assumed. */ + HOXML_ENC_UTF_8, /* Variable-length encoding (8, 16, 24, or 32 bits) compatible with ASCII */ + HOXML_ENC_UTF_16_LE, /* Variable-length encoding (16 or 32 bits), little-endian variant */ + HOXML_ENC_UTF_16_BE /* Variable-lenght encoding (16 or 32 bits), big-endian variant */ +}; + +enum { + HOXML_CASE_SENSITIVE = 0, /* Cases must match. 'A' == 'a' -> false. */ + HOXML_CASE_INSENSITIVE /* Cases need not match. 'A' == 'a' -> true. */ +}; + +enum { + HOXML_REF_TYPE_ENTITY = 0, /* Predefined strings representing known, problematic characters (e.g. '<') */ + HOXML_REF_TYPE_NUMERIC, /* A value of a character given as a decimal number */ + HOXML_REF_TYPE_HEX /* A value of a character given as a hexadecimal number */ +}; + +struct _hoxml_node_t; +typedef struct _hoxml_node_t { + struct _hoxml_node_t* parent; /* Points to the parent node, or NULL if this is the root */ + char* end; /* Points to the last byte of this node's data */ + int flags; /* May contain any number of the flags defined in hoxml_node_flags */ + char tag; /* Where the tag string will be stored in the buffer, must be defined last */ +} hoxml_node_t; + +typedef struct { + unsigned encoded; /* Character as it appeared in the content. In other words, the original, encoded character. */ + unsigned codepoint; /* Unicode codepoint of the character. In other words, the decoded character. */ + size_t bytes; /* Number of eight-bit bytes of the encoded character, in the [1, 4] range */ +} hoxml_character_t; + +#ifndef UINT32_MAX /* Defined in stdint.h with later revisions of C and C++ but not for some earlier ones */ + #define UINT32_MAX (0xffffffff) +#endif +#define HOXML_STACK ((hoxml_node_t*)context->stack) +#define HOXML_TO_LOWER(c) (c >= 'A' && c <= 'Z' ? c + 32 : c) +#define HOXML_IS_NEW_LINE(c) (c == 0x0A || c == 0x0D) +#define HOXML_IS_WHITESPACE(c) (c == 0x20 || c == 0x09 || HOXML_IS_NEW_LINE(c)) +#define HOXML_IS_ASCII_CHAR(c) (c >= 0x21 && c <= 0x7F) +#define HOXML_IS_CHAR_DATA(c) (c != '<' && c != '&') +#define HOXML_IS_ALPHA(c) ((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z')) +#define HOXML_IS_NUMERIC(c) (c >= '0' && c <= '9') +#define HOXML_IS_NAME_START_CHAR(c) (HOXML_IS_ALPHA(c) || c == ':' || c == '_' || (c >= 0xC0 && c <= 0xD6) || \ + (c >= 0xD8 && c <= 0xF6) || c >= 0xF8) +#define HOXML_IS_NAME_CHAR(c) (HOXML_IS_NAME_START_CHAR(c) || c == '-' || c == '.'|| HOXML_IS_NUMERIC(c)) +#define HOXML_IS_HEX_CHAR(c) (HOXML_IS_NUMERIC(c) || (c >= 'a' && c <= 'f') || (c >= 'A' && c <= 'F')) +#define HOXML_IS_VALUE_CHAR_DATA(f, c) (HOXML_IS_CHAR_DATA(c) && ((f & HOXML_FLAG_DOUBLE_QUOTE && c != '"') || \ + c != '\'')) + +void hoxml_push_stack(hoxml_context_t* context); +void hoxml_pop_stack(hoxml_context_t* context); +void hoxml_append_character(hoxml_context_t* context, hoxml_character_t c); +void hoxml_append_terminator(hoxml_context_t* context); +void hoxml_end_reference(hoxml_context_t* context, int type); +void hoxml_begin_tag(hoxml_context_t* context); +hoxml_code_t hoxml_end_tag(hoxml_context_t* context); +int hoxml_post_state_cleanup(hoxml_context_t* context); +hoxml_character_t hoxml_decode_character(const char* str, size_t str_length, int encoding); +hoxml_character_t hoxml_encode_character(unsigned codepoint, int encoding); +char* hoxml_to_ascii(const char* str, int encoding); +size_t hoxml_strlen(const char* str, int encoding); +int hoxml_strcmp(const char* str1, int encoding1, const char* str2, int encoding2, int sensitivity); +const char* hoxml_strstr(const char* haystack, int haystack_encoding, const char* needle, int needle_encoding, + int sensitivity); +#ifdef HOXML_DEBUG + #include /* printf() */ + #define HOXML_LOG_STATE(s) printf("%s\n", s); +#else + #define HOXML_LOG_STATE(s) +#endif + +HOXML_DECL void hoxml_init(hoxml_context_t* context, void* buffer, size_t buffer_length) { + if (context == NULL || buffer == NULL || buffer_length <= 0) + return; + + memset(context, 0, sizeof(hoxml_context_t)); /* Assign all values of the context to zero */ + context->buffer = (char*)buffer; /* Use the provided buffer */ + context->buffer_length = buffer_length; /* Remember the length of the provided buffer */ + context->line = 1; /* This is meant to be human-readable and humans begin counting at one */ + context->is_initialized = 1; + memset(buffer, 0, buffer_length); /* Fill the buffer with zeroes */ +} + +HOXML_DECL void hoxml_realloc(hoxml_context_t* context, void* buffer, size_t buffer_length) { + if (context == NULL || context->is_initialized == 0 || buffer == NULL || buffer_length <= context->buffer_length) + return; + + /* Reassign the end and parent pointers of each node, beginning at the tail and iterate to the head */ + hoxml_node_t* node = HOXML_STACK; + while (node != NULL) { + hoxml_node_t* parent = node->parent; + node->end = (char*)buffer + (node->end - context->buffer); + if (node->parent != NULL) + node->parent = (hoxml_node_t*)((char*)buffer + ((char*)node->parent - context->buffer)); + node = parent; + } + + /* Use offsets from the original buffer pointer to reassign pointers such that they now point to the new buffer */ + if (context->tag != NULL) + context->tag = (char*)buffer + (context->tag - context->buffer); + if (context->attribute != NULL) + context->attribute = (char*)buffer + (context->attribute - context->buffer); + if (context->value != NULL) + context->value = (char*)buffer + (context->value - context->buffer); + if (context->content != NULL) + context->content = (char*)buffer + (context->content - context->buffer); + if (context->reference_start != NULL) + context->reference_start = (char*)buffer + (context->reference_start - context->buffer); + if (context->stack != NULL) + context->stack = (char*)buffer + (context->stack - context->buffer); + + memset(buffer, 0, buffer_length); /* Fill the new buffer with zeroes */ + memcpy(buffer, context->buffer, context->buffer_length); /* Copy the entire, current buffer to the new buffer */ + context->buffer = (char*)buffer; + context->buffer_length = buffer_length; + + if (context->state == HOXML_STATE_ERROR_INSUFFICIENT_MEMORY) { + context->state = context->error_return_state; + context->error_return_state = HOXML_STATE_NONE; + } +} + +HOXML_DECL hoxml_code_t hoxml_parse(hoxml_context_t* context, const char* xml, const size_t xml_length) { + if (context == NULL || context->is_initialized == 0 || xml == NULL || xml_length == 0) + return HOXML_ERROR_INVALID_INPUT; + + if (HOXML_STACK != NULL) { + if (HOXML_STACK->flags & HOXML_FLAG_INCREMENT_DEPTH) { /* If an element began, increasing nesting */ + context->depth += 1; + HOXML_STACK->flags &= ~HOXML_FLAG_INCREMENT_DEPTH; /* Clear the flag */ + } + + if (HOXML_STACK->flags & HOXML_FLAG_DECREMENT_DEPTH) { /* If an element ended, decreasing nesting */ + context->depth -= 1; + HOXML_STACK->flags &= ~HOXML_FLAG_DECREMENT_DEPTH; /* Clear the flag */ + } + } + + switch (context->state) { + /* Two errors are recoverable: HOXML_ERROR_INSUFFICIENT_MEMORY and HOXML_ERROR_UNEXPECTED_EOF. The former can */ + /* be recovered by assigning a new buffer with hoxml_realloc(). The latter can be recovered by passing a new */ + /* XML content string to hoxml_parse() so we'll check for one before concluding we're still in error. */ + case HOXML_STATE_ERROR_UNEXPECTED_EOF: { + /* Try to decode a character, or remainder of a character, at the beginning of this hopefully-new string */ + unsigned long stream = context->stream; + /* Calculate the number of bytes to copy into the 'stream' variable from the hopefully-new string. We */ + /* want 4 bytes, or whatever is available. */ + size_t bytes_to_copy = 4; + if (bytes_to_copy > xml_length) + bytes_to_copy = xml_length; + if (context->stream_length > 0) { + /* Adjust the number of bytes to copy to account for possible bytes from a previous string */ + bytes_to_copy -= context->stream_length; + /* Append the new bytes to the previous one(s) */ + memcpy((char*)&stream + context->stream_length, xml, bytes_to_copy); + } + else { + /* Copy to the 'stream' under the assumption that all of it can be overwritten */ + memcpy(&stream, xml, bytes_to_copy); + } + hoxml_character_t c = hoxml_decode_character((const char*)&stream, xml_length, context->encoding); + /* If the character is the equivalent of a null terminator or there was not enough data */ + if (c.codepoint == 0 || c.codepoint == UINT32_MAX) + return HOXML_ERROR_UNEXPECTED_EOF; + context->state = context->error_return_state; + context->error_return_state = HOXML_STATE_NONE; + /* Note: there is a check for a change in the input pointer a little further down */ + } break; + case HOXML_STATE_DONE: return HOXML_END_OF_DOCUMENT; + case HOXML_STATE_ERROR_INTERNAL: return HOXML_ERROR_INTERNAL; + case HOXML_STATE_ERROR_INSUFFICIENT_MEMORY: return HOXML_ERROR_INSUFFICIENT_MEMORY; + case HOXML_STATE_ERROR_SYNTAX: return HOXML_ERROR_SYNTAX; + case HOXML_STATE_ERROR_ENCODING: return HOXML_ERROR_ENCODING; + case HOXML_STATE_ERROR_TAG_MISMATCH: return HOXML_ERROR_TAG_MISMATCH; + case HOXML_STATE_ERROR_INVALID_DOCUMENT_DECLARATION: return HOXML_ERROR_INVALID_DOCUMENT_DECLARATION; + } + + /* A handful of cases leave the context in an intermediary state. This allows the caller to have access to things */ + /* like the tag's name, an attribute's value, etc. but that old data may now need to be cleaned up. */ + if (hoxml_post_state_cleanup(context)) /* If the cleanup process found the document ended */ + return HOXML_END_OF_DOCUMENT; + + /* If the pointer to the XML content string has changed */ + if (context->xml != xml) { + /* A few variables are now invalid: the pointer to the content, its length, and the iterator */ + context->xml = xml; + context->xml_length = xml_length; + context->iterator = xml; + } + + /* Remember some context variables in case we hit an unexpected EoF and need to undo an iteration */ + const char* previous_iterator = context->iterator; + size_t previous_stream_length = context->stream_length; + while (context->state >= HOXML_STATE_NONE && context->state <= HOXML_STATE_DONE) { + /* About half of the parsing states assume the stack is non-null. */ + /* If parsing is currently in one of those states and the stack (head) pointer is null. */ + if (((context->state >= HOXML_STATE_TAG_BEGIN && context->state <= HOXML_STATE_OPEN_TAG) || + (context->state >= HOXML_STATE_REFERENCE_BEGIN && context->state <= HOXML_STATE_REFERENCE_HEX)) && + context->stack == NULL) { + /* Some unforseen bug has led us to a state in which continuing would cause an illegal memory access. */ + /* Parsing must halt. There is no way to recover. */ + context->state = HOXML_STATE_ERROR_INTERNAL; + return HOXML_ERROR_INTERNAL; + } + + /* Calculate the number of bytes remaining in the current XML content string */ + size_t bytes_remaining = (size_t)(context->xml_length - (context->iterator - context->xml)); + /* Calculate the number of bytes to copy into the 'stream' variable. We want 4 bytes, or whatever is left. */ + size_t bytes_to_copy = 4; + if (bytes_to_copy > bytes_remaining) + bytes_to_copy = bytes_remaining; + if (context->stream_length > 0) { + /* Adjust the number of bytes to copy to account for possible bytes from a previous XML content string. */ + /* This will be non-zero in the rare case where content is being given in parts. */ + bytes_to_copy -= context->stream_length; + /* Append the new bytes to the previous one(s) */ + memcpy((char*)&(context->stream) + context->stream_length, context->iterator, bytes_to_copy); + } + else { + /* Copy to the 'stream' under the assumption that all of it can be overwritten */ + memcpy(&(context->stream), context->iterator, bytes_to_copy); + } + hoxml_character_t c = hoxml_decode_character((const char*)&(context->stream), bytes_remaining, + context->encoding); + + /* If the character is the equivalent of a null terminator or there was not enough data to decode the value */ + if (c.codepoint == 0 || c.codepoint == UINT32_MAX) { + context->stream_length = bytes_to_copy; + context->error_return_state = context->state; + context->state = HOXML_STATE_ERROR_UNEXPECTED_EOF; + return HOXML_ERROR_UNEXPECTED_EOF; + } else if (HOXML_IS_NEW_LINE(c.codepoint)) { + if (context->newline_character == 0) /* If this is the first newline */ + context->newline_character = c.codepoint; /* Remember this as the character to use for increments */ + if (c.codepoint == context->newline_character) /* Avoid incrementing twice for files with \r\n endings */ + context->line++; + context->column = 0; + } else + context->column++; + + /* Iterate up to four bytes into the XML content string. The idea is to jump forward by the number of bytes */ + /* that were just decoded as a single character. The number of bytes varies from one to four bytes depending */ + /* on the character encoding and character's codepoint. We also need to consider the case in which some of */ + /* this character's bytes were carried over from a previous XML content string. Those bytes would have been */ + /* stashed in the context's 'stream' variable where 'stream_length' tells us the number of said bytes. */ + previous_iterator = context->iterator; + previous_stream_length = context->stream_length; + context->iterator += c.bytes - context->stream_length; + context->stream_length = 0; + + #ifdef HOXML_DEBUG + char debugCodepoint = HOXML_IS_NEW_LINE(c.codepoint) ? ' ' : c.codepoint; + printf(" %c [%08X] [L%02dC%02d] -> ", debugCodepoint, c.codepoint, context->line, context->column); + #endif + + switch(context->state) { + case HOXML_STATE_NONE: /* The first state immediately following initialization, or a document declaration */ + HOXML_LOG_STATE("HOXML_STATE_NONE") + if (c.codepoint == '<') + hoxml_begin_tag(context); + else if (c.encoded == 0xEF) { /* UTF-8 Byte Order Marker (BOM) is [EF] BB BF, as hex bytes */ + context->state = HOXML_STATE_UTF8_BOM1; + context->column--; /* Don't count this as a column */ + } else if (c.encoded == 0xFE) { /* UTF-16BE BOM is [FE] FF, as hex bytes */ + context->state = HOXML_STATE_UTF16BE_BOM; + context->column--; /* Don't count this as a column */ + } else if (c.encoded == 0xFF) { /* UTF-16LE BOM is [FF] FE, as hex bytes */ + context->state = HOXML_STATE_UTF16LE_BOM; + context->column--; /* Don't count this as a column */ + } else if (!HOXML_IS_WHITESPACE(c.codepoint)) + context->state = HOXML_STATE_ERROR_SYNTAX; + break; + case HOXML_STATE_UTF8_BOM1: /* The first byte of a UTF-8 byte order marker was found */ + HOXML_LOG_STATE("HOXML_STATE_UTF8_BOM1") + context->column--; /* Don't count this as a column */ + if (c.encoded == 0xBB) /* UTF-8 BOM is EF [BB] BF, as hex bytes */ + context->state = HOXML_STATE_UTF8_BOM2; + else + context->state = HOXML_STATE_ERROR_SYNTAX; + break; + case HOXML_STATE_UTF8_BOM2: /* The second byte of a UTF-8 byte order marker was found */ + HOXML_LOG_STATE("HOXML_STATE_UTF8_BOM2") + context->column--; /* Don't count this as a column */ + if (c.encoded == 0xBF) { /* UTF-8 BOM is EF BB [BF], as hex bytes */ + context->state = HOXML_STATE_NONE; + context->encoding = HOXML_ENC_UTF_8; + } else + context->state = HOXML_STATE_ERROR_SYNTAX; + break; + case HOXML_STATE_UTF16BE_BOM: /* The first byte of a UTF-16BE byte order marker was found */ + HOXML_LOG_STATE("HOXML_STATE_UTF16BE_BOM") + context->column--; /* Don't count this as a column */ + if (c.encoded == 0xFF) { /* UTF-16BE BOM is FE [FF], as hex bytes */ + context->state = HOXML_STATE_NONE; + context->encoding = HOXML_ENC_UTF_16_BE; + } else + context->state = HOXML_STATE_ERROR_SYNTAX; + break; + case HOXML_STATE_UTF16LE_BOM: /* The first byte of a UTF-16LE byte order marker was found */ + HOXML_LOG_STATE("HOXML_STATE_UTF16LE_BOM") + context->column--; /* Don't count this as a column */ + if (c.encoded == 0xFE) { /* UTF-16LE BOM is FF [FE], as hex bytes */ + context->state = HOXML_STATE_NONE; + context->encoding = HOXML_ENC_UTF_16_LE; + } else + context->state = HOXML_STATE_ERROR_SYNTAX; + break; + case HOXML_STATE_TAG_BEGIN: /* A new tag was started (a '<' was found) and a new node has been pushed */ + HOXML_LOG_STATE("HOXML_STATE_TAG_BEGIN") + if (c.codepoint == '?') { /* "state = HOXML_STATE_PROCESSING_INSTRUCTION_BEGIN; + HOXML_STACK->flags |= HOXML_FLAG_PROCESSING_INSTRUCTION; /* Apply the PI flag to this node */ + } else if (c.codepoint == '/') /* "flags |= HOXML_FLAG_END_TAG; /* Apply the end tag flag to this node */ + else if (c.codepoint == '!') /* "