@@ -1267,6 +1267,11 @@
XML_STATUS_ERROR
otherwise. The possible error codes
are:
+ XML_ERROR_NOT_STARTED
+ -
+ when stopping or suspending a parser before it has started,
+ added in Expat 2.6.4.
+
XML_ERROR_SUSPENDED
- when suspending an already suspended parser.
XML_ERROR_FINISHED
--- contrib/expat/doc/xmlwf.1.orig
+++ contrib/expat/doc/xmlwf.1
@@ -5,7 +5,7 @@
\\$2 \(la\\$1\(ra\\$3
..
.if \n(.g .mso www.tmac
-.TH XMLWF 1 "November 6, 2024" "" ""
+.TH XMLWF 1 "March 27, 2025" "" ""
.SH NAME
xmlwf \- Determines if an XML document is well-formed
.SH SYNOPSIS
--- contrib/expat/doc/xmlwf.xml.orig
+++ contrib/expat/doc/xmlwf.xml
@@ -9,7 +9,7 @@
Copyright (c) 2001 Scott Bronson
Copyright (c) 2002-2003 Fred L. Drake, Jr.
Copyright (c) 2009 Karl Waclawek
- Copyright (c) 2016-2024 Sebastian Pipping
+ Copyright (c) 2016-2025 Sebastian Pipping
Copyright (c) 2016 Ardo van Rangelrooij
Copyright (c) 2017 Rhodri James
Copyright (c) 2020 Joe Orton
@@ -21,7 +21,7 @@
"http://www.oasis-open.org/docbook/xml/4.2/docbookx.dtd" [
Scott">
Bronson">
- November 6, 2024">
+ March 27, 2025">
1">
bronson@rinspin.com">
--- /dev/null
+++ contrib/expat/fuzz/xml_lpm_fuzzer.cpp
@@ -0,0 +1,464 @@
+/*
+ __ __ _
+ ___\ \/ /_ __ __ _| |_
+ / _ \\ /| '_ \ / _` | __|
+ | __// \| |_) | (_| | |_
+ \___/_/\_\ .__/ \__,_|\__|
+ |_| XML parser
+
+ Copyright (c) 2022 Mark Brand
+ Copyright (c) 2025 Sebastian Pipping
+ Licensed under the MIT license:
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to permit
+ persons to whom the Software is furnished to do so, subject to the
+ following conditions:
+
+ The above copyright notice and this permission notice shall be included
+ in all copies or substantial portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN
+ NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
+ DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ USE OR OTHER DEALINGS IN THE SOFTWARE.
+*/
+
+#if defined(NDEBUG)
+# undef NDEBUG // because checks below rely on assert(...)
+#endif
+
+#include
+#include
+#include
+
+#include "expat.h"
+#include "xml_lpm_fuzzer.pb.h"
+#include "src/libfuzzer/libfuzzer_macro.h"
+
+static const char *g_encoding = nullptr;
+static const char *g_external_entity = nullptr;
+static size_t g_external_entity_size = 0;
+
+void
+SetEncoding(const xml_lpm_fuzzer::Encoding &e) {
+ switch (e) {
+ case xml_lpm_fuzzer::Encoding::UTF8:
+ g_encoding = "UTF-8";
+ break;
+
+ case xml_lpm_fuzzer::Encoding::UTF16:
+ g_encoding = "UTF-16";
+ break;
+
+ case xml_lpm_fuzzer::Encoding::ISO88591:
+ g_encoding = "ISO-8859-1";
+ break;
+
+ case xml_lpm_fuzzer::Encoding::ASCII:
+ g_encoding = "US-ASCII";
+ break;
+
+ case xml_lpm_fuzzer::Encoding::NONE:
+ g_encoding = NULL;
+ break;
+
+ default:
+ g_encoding = "UNKNOWN";
+ break;
+ }
+}
+
+static int g_allocation_count = 0;
+static std::vector g_fail_allocations = {};
+
+void *
+MallocHook(size_t size) {
+ g_allocation_count += 1;
+ for (auto index : g_fail_allocations) {
+ if (index == g_allocation_count) {
+ return NULL;
+ }
+ }
+ return malloc(size);
+}
+
+void *
+ReallocHook(void *ptr, size_t size) {
+ g_allocation_count += 1;
+ for (auto index : g_fail_allocations) {
+ if (index == g_allocation_count) {
+ return NULL;
+ }
+ }
+ return realloc(ptr, size);
+}
+
+void
+FreeHook(void *ptr) {
+ free(ptr);
+}
+
+XML_Memory_Handling_Suite memory_handling_suite
+ = {MallocHook, ReallocHook, FreeHook};
+
+void InitializeParser(XML_Parser parser);
+
+// We want a parse function that supports resumption, so that we can cover the
+// suspend/resume code.
+enum XML_Status
+Parse(XML_Parser parser, const char *input, int input_len, int is_final) {
+ enum XML_Status status = XML_Parse(parser, input, input_len, is_final);
+ while (status == XML_STATUS_SUSPENDED) {
+ status = XML_ResumeParser(parser);
+ }
+ return status;
+}
+
+// When the fuzzer is compiled with instrumentation such as ASan, then the
+// accesses in TouchString will fault if they access invalid memory (ie. detect
+// either a use-after-free or buffer-overflow). By calling TouchString in each
+// of the callbacks, we can check that the arguments meet the API specifications
+// in terms of length/null-termination. no_optimize is used to ensure that the
+// compiler has to emit actual memory reads, instead of removing them.
+static volatile size_t no_optimize = 0;
+static void
+TouchString(const XML_Char *ptr, int len = -1) {
+ if (! ptr) {
+ return;
+ }
+
+ if (len == -1) {
+ for (XML_Char value = *ptr++; value; value = *ptr++) {
+ no_optimize += value;
+ }
+ } else {
+ for (int i = 0; i < len; ++i) {
+ no_optimize += ptr[i];
+ }
+ }
+}
+
+static void
+TouchNodeAndRecurse(XML_Content *content) {
+ switch (content->type) {
+ case XML_CTYPE_EMPTY:
+ case XML_CTYPE_ANY:
+ assert(content->quant == XML_CQUANT_NONE);
+ assert(content->name == NULL);
+ assert(content->numchildren == 0);
+ assert(content->children == NULL);
+ break;
+
+ case XML_CTYPE_MIXED:
+ assert(content->quant == XML_CQUANT_NONE
+ || content->quant == XML_CQUANT_REP);
+ assert(content->name == NULL);
+ for (unsigned int i = 0; i < content->numchildren; ++i) {
+ assert(content->children[i].type == XML_CTYPE_NAME);
+ assert(content->children[i].quant == XML_CQUANT_NONE);
+ assert(content->children[i].numchildren == 0);
+ assert(content->children[i].children == NULL);
+ TouchString(content->children[i].name);
+ }
+ break;
+
+ case XML_CTYPE_NAME:
+ assert((content->quant == XML_CQUANT_NONE)
+ || (content->quant == XML_CQUANT_OPT)
+ || (content->quant == XML_CQUANT_REP)
+ || (content->quant == XML_CQUANT_PLUS));
+ assert(content->numchildren == 0);
+ assert(content->children == NULL);
+ TouchString(content->name);
+ break;
+
+ case XML_CTYPE_CHOICE:
+ case XML_CTYPE_SEQ:
+ assert((content->quant == XML_CQUANT_NONE)
+ || (content->quant == XML_CQUANT_OPT)
+ || (content->quant == XML_CQUANT_REP)
+ || (content->quant == XML_CQUANT_PLUS));
+ assert(content->name == NULL);
+ for (unsigned int i = 0; i < content->numchildren; ++i) {
+ TouchNodeAndRecurse(&content->children[i]);
+ }
+ break;
+
+ default:
+ assert(false);
+ }
+}
+
+static void XMLCALL
+ElementDeclHandler(void *userData, const XML_Char *name, XML_Content *model) {
+ TouchString(name);
+ TouchNodeAndRecurse(model);
+ XML_FreeContentModel((XML_Parser)userData, model);
+}
+
+static void XMLCALL
+AttlistDeclHandler(void *userData, const XML_Char *elname,
+ const XML_Char *attname, const XML_Char *atttype,
+ const XML_Char *dflt, int isrequired) {
+ (void)userData;
+ TouchString(elname);
+ TouchString(attname);
+ TouchString(atttype);
+ TouchString(dflt);
+ (void)isrequired;
+}
+
+static void XMLCALL
+XmlDeclHandler(void *userData, const XML_Char *version,
+ const XML_Char *encoding, int standalone) {
+ (void)userData;
+ TouchString(version);
+ TouchString(encoding);
+ (void)standalone;
+}
+
+static void XMLCALL
+StartElementHandler(void *userData, const XML_Char *name,
+ const XML_Char **atts) {
+ (void)userData;
+ TouchString(name);
+ for (size_t i = 0; atts[i] != NULL; ++i) {
+ TouchString(atts[i]);
+ }
+}
+
+static void XMLCALL
+EndElementHandler(void *userData, const XML_Char *name) {
+ (void)userData;
+ TouchString(name);
+}
+
+static void XMLCALL
+CharacterDataHandler(void *userData, const XML_Char *s, int len) {
+ (void)userData;
+ TouchString(s, len);
+}
+
+static void XMLCALL
+ProcessingInstructionHandler(void *userData, const XML_Char *target,
+ const XML_Char *data) {
+ (void)userData;
+ TouchString(target);
+ TouchString(data);
+}
+
+static void XMLCALL
+CommentHandler(void *userData, const XML_Char *data) {
+ TouchString(data);
+ // Use the comment handler to trigger parser suspend, so that we can get
+ // coverage of that code.
+ XML_StopParser((XML_Parser)userData, XML_TRUE);
+}
+
+static void XMLCALL
+StartCdataSectionHandler(void *userData) {
+ (void)userData;
+}
+
+static void XMLCALL
+EndCdataSectionHandler(void *userData) {
+ (void)userData;
+}
+
+static void XMLCALL
+DefaultHandler(void *userData, const XML_Char *s, int len) {
+ (void)userData;
+ TouchString(s, len);
+}
+
+static void XMLCALL
+StartDoctypeDeclHandler(void *userData, const XML_Char *doctypeName,
+ const XML_Char *sysid, const XML_Char *pubid,
+ int has_internal_subset) {
+ (void)userData;
+ TouchString(doctypeName);
+ TouchString(sysid);
+ TouchString(pubid);
+ (void)has_internal_subset;
+}
+
+static void XMLCALL
+EndDoctypeDeclHandler(void *userData) {
+ (void)userData;
+}
+
+static void XMLCALL
+EntityDeclHandler(void *userData, const XML_Char *entityName,
+ int is_parameter_entity, const XML_Char *value,
+ int value_length, const XML_Char *base,
+ const XML_Char *systemId, const XML_Char *publicId,
+ const XML_Char *notationName) {
+ (void)userData;
+ TouchString(entityName);
+ (void)is_parameter_entity;
+ TouchString(value, value_length);
+ TouchString(base);
+ TouchString(systemId);
+ TouchString(publicId);
+ TouchString(notationName);
+}
+
+static void XMLCALL
+NotationDeclHandler(void *userData, const XML_Char *notationName,
+ const XML_Char *base, const XML_Char *systemId,
+ const XML_Char *publicId) {
+ (void)userData;
+ TouchString(notationName);
+ TouchString(base);
+ TouchString(systemId);
+ TouchString(publicId);
+}
+
+static void XMLCALL
+StartNamespaceDeclHandler(void *userData, const XML_Char *prefix,
+ const XML_Char *uri) {
+ (void)userData;
+ TouchString(prefix);
+ TouchString(uri);
+}
+
+static void XMLCALL
+EndNamespaceDeclHandler(void *userData, const XML_Char *prefix) {
+ (void)userData;
+ TouchString(prefix);
+}
+
+static int XMLCALL
+NotStandaloneHandler(void *userData) {
+ (void)userData;
+ return XML_STATUS_OK;
+}
+
+static int XMLCALL
+ExternalEntityRefHandler(XML_Parser parser, const XML_Char *context,
+ const XML_Char *base, const XML_Char *systemId,
+ const XML_Char *publicId) {
+ int rc = XML_STATUS_ERROR;
+ TouchString(context);
+ TouchString(base);
+ TouchString(systemId);
+ TouchString(publicId);
+
+ if (g_external_entity) {
+ XML_Parser ext_parser
+ = XML_ExternalEntityParserCreate(parser, context, g_encoding);
+ rc = Parse(ext_parser, g_external_entity, g_external_entity_size, 1);
+ XML_ParserFree(ext_parser);
+ }
+
+ return rc;
+}
+
+static void XMLCALL
+SkippedEntityHandler(void *userData, const XML_Char *entityName,
+ int is_parameter_entity) {
+ (void)userData;
+ TouchString(entityName);
+ (void)is_parameter_entity;
+}
+
+static int XMLCALL
+UnknownEncodingHandler(void *encodingHandlerData, const XML_Char *name,
+ XML_Encoding *info) {
+ (void)encodingHandlerData;
+ TouchString(name);
+ (void)info;
+ return XML_STATUS_ERROR;
+}
+
+void
+InitializeParser(XML_Parser parser) {
+ XML_SetUserData(parser, (void *)parser);
+ XML_SetHashSalt(parser, 0x41414141);
+ XML_SetParamEntityParsing(parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
+
+ XML_SetElementDeclHandler(parser, ElementDeclHandler);
+ XML_SetAttlistDeclHandler(parser, AttlistDeclHandler);
+ XML_SetXmlDeclHandler(parser, XmlDeclHandler);
+ XML_SetElementHandler(parser, StartElementHandler, EndElementHandler);
+ XML_SetCharacterDataHandler(parser, CharacterDataHandler);
+ XML_SetProcessingInstructionHandler(parser, ProcessingInstructionHandler);
+ XML_SetCommentHandler(parser, CommentHandler);
+ XML_SetCdataSectionHandler(parser, StartCdataSectionHandler,
+ EndCdataSectionHandler);
+ // XML_SetDefaultHandler disables entity expansion
+ XML_SetDefaultHandlerExpand(parser, DefaultHandler);
+ XML_SetDoctypeDeclHandler(parser, StartDoctypeDeclHandler,
+ EndDoctypeDeclHandler);
+ // Note: This is mutually exclusive with XML_SetUnparsedEntityDeclHandler,
+ // and there isn't any significant code change between the two.
+ XML_SetEntityDeclHandler(parser, EntityDeclHandler);
+ XML_SetNotationDeclHandler(parser, NotationDeclHandler);
+ XML_SetNamespaceDeclHandler(parser, StartNamespaceDeclHandler,
+ EndNamespaceDeclHandler);
+ XML_SetNotStandaloneHandler(parser, NotStandaloneHandler);
+ XML_SetExternalEntityRefHandler(parser, ExternalEntityRefHandler);
+ XML_SetSkippedEntityHandler(parser, SkippedEntityHandler);
+ XML_SetUnknownEncodingHandler(parser, UnknownEncodingHandler, (void *)parser);
+}
+
+DEFINE_TEXT_PROTO_FUZZER(const xml_lpm_fuzzer::Testcase &testcase) {
+ g_external_entity = nullptr;
+
+ if (! testcase.actions_size()) {
+ return;
+ }
+
+ g_allocation_count = 0;
+ g_fail_allocations.clear();
+ for (int i = 0; i < testcase.fail_allocations_size(); ++i) {
+ g_fail_allocations.push_back(testcase.fail_allocations(i));
+ }
+
+ SetEncoding(testcase.encoding());
+ XML_Parser parser
+ = XML_ParserCreate_MM(g_encoding, &memory_handling_suite, "|");
+ InitializeParser(parser);
+
+ for (int i = 0; i < testcase.actions_size(); ++i) {
+ const auto &action = testcase.actions(i);
+ switch (action.action_case()) {
+ case xml_lpm_fuzzer::Action::kChunk:
+ if (XML_STATUS_ERROR
+ == Parse(parser, action.chunk().data(), action.chunk().size(), 0)) {
+ // Force a reset after parse error.
+ XML_ParserReset(parser, g_encoding);
+ InitializeParser(parser);
+ }
+ break;
+
+ case xml_lpm_fuzzer::Action::kLastChunk:
+ Parse(parser, action.last_chunk().data(), action.last_chunk().size(), 1);
+ XML_ParserReset(parser, g_encoding);
+ InitializeParser(parser);
+ break;
+
+ case xml_lpm_fuzzer::Action::kReset:
+ XML_ParserReset(parser, g_encoding);
+ InitializeParser(parser);
+ break;
+
+ case xml_lpm_fuzzer::Action::kExternalEntity:
+ g_external_entity = action.external_entity().data();
+ g_external_entity_size = action.external_entity().size();
+ break;
+
+ default:
+ break;
+ }
+ }
+
+ XML_ParserFree(parser);
+}
--- /dev/null
+++ contrib/expat/fuzz/xml_lpm_fuzzer.proto
@@ -0,0 +1,58 @@
+/*
+ __ __ _
+ ___\ \/ /_ __ __ _| |_
+ / _ \\ /| '_ \ / _` | __|
+ | __// \| |_) | (_| | |_
+ \___/_/\_\ .__/ \__,_|\__|
+ |_| XML parser
+
+ Copyright (c) 2022 Mark Brand
+ Copyright (c) 2025 Sebastian Pipping
+ Licensed under the MIT license:
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to permit
+ persons to whom the Software is furnished to do so, subject to the
+ following conditions:
+
+ The above copyright notice and this permission notice shall be included
+ in all copies or substantial portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN
+ NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
+ DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ USE OR OTHER DEALINGS IN THE SOFTWARE.
+*/
+
+syntax = "proto2";
+package xml_lpm_fuzzer;
+
+enum Encoding {
+ UTF8 = 0;
+ UTF16 = 1;
+ ISO88591 = 2;
+ ASCII = 3;
+ UNKNOWN = 4;
+ NONE = 5;
+}
+
+message Action {
+ oneof action {
+ string chunk = 1;
+ string last_chunk = 2;
+ bool reset = 3;
+ string external_entity = 4;
+ }
+}
+
+message Testcase {
+ required Encoding encoding = 1;
+ repeated Action actions = 2;
+ repeated int32 fail_allocations = 3;
+}
--- contrib/expat/fuzz/xml_parse_fuzzer.c.orig
+++ contrib/expat/fuzz/xml_parse_fuzzer.c
@@ -5,7 +5,7 @@
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
- * http://www.apache.org/licenses/LICENSE-2.0
+ * https://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
--- contrib/expat/fuzz/xml_parsebuffer_fuzzer.c.orig
+++ contrib/expat/fuzz/xml_parsebuffer_fuzzer.c
@@ -5,7 +5,7 @@
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
- * http://www.apache.org/licenses/LICENSE-2.0
+ * https://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
--- contrib/expat/lib/expat.h.orig
+++ contrib/expat/lib/expat.h
@@ -11,7 +11,7 @@
Copyright (c) 2000-2005 Fred L. Drake, Jr.
Copyright (c) 2001-2002 Greg Stein
Copyright (c) 2002-2016 Karl Waclawek
- Copyright (c) 2016-2024 Sebastian Pipping
+ Copyright (c) 2016-2025 Sebastian Pipping
Copyright (c) 2016 Cristian Rodríguez
Copyright (c) 2016 Thomas Beutlich
Copyright (c) 2017 Rhodri James
@@ -1067,8 +1067,8 @@
See https://semver.org
*/
#define XML_MAJOR_VERSION 2
-#define XML_MINOR_VERSION 6
-#define XML_MICRO_VERSION 4
+#define XML_MINOR_VERSION 7
+#define XML_MICRO_VERSION 1
#ifdef __cplusplus
}
--- contrib/expat/lib/internal.h.orig
+++ contrib/expat/lib/internal.h
@@ -28,7 +28,7 @@
Copyright (c) 2002-2003 Fred L. Drake, Jr.
Copyright (c) 2002-2006 Karl Waclawek
Copyright (c) 2003 Greg Stein
- Copyright (c) 2016-2024 Sebastian Pipping
+ Copyright (c) 2016-2025 Sebastian Pipping
Copyright (c) 2018 Yury Gribov
Copyright (c) 2019 David Loffredo
Copyright (c) 2023-2024 Sony Corporation / Snild Dolkow
@@ -127,6 +127,9 @@
# elif ULONG_MAX == 18446744073709551615u // 2^64-1
# define EXPAT_FMT_PTRDIFF_T(midpart) "%" midpart "ld"
# define EXPAT_FMT_SIZE_T(midpart) "%" midpart "lu"
+# elif defined(EMSCRIPTEN) // 32bit mode Emscripten
+# define EXPAT_FMT_PTRDIFF_T(midpart) "%" midpart "ld"
+# define EXPAT_FMT_SIZE_T(midpart) "%" midpart "zu"
# else
# define EXPAT_FMT_PTRDIFF_T(midpart) "%" midpart "d"
# define EXPAT_FMT_SIZE_T(midpart) "%" midpart "u"
--- contrib/expat/lib/xmlparse.c.orig
+++ contrib/expat/lib/xmlparse.c
@@ -1,4 +1,4 @@
-/* c5625880f4bf417c1463deee4eb92d86ff413f802048621c57e25fe483eb59e4 (2.6.4+)
+/* d19ae032c224863c1527ba44d228cc34b99192c3a4c5a27af1f4e054d45ee031 (2.7.1+)
__ __ _
___\ \/ /_ __ __ _| |_
/ _ \\ /| '_ \ / _` | __|
@@ -13,7 +13,7 @@
Copyright (c) 2002-2016 Karl Waclawek
Copyright (c) 2005-2009 Steven Solie
Copyright (c) 2016 Eric Rahm
- Copyright (c) 2016-2024 Sebastian Pipping
+ Copyright (c) 2016-2025 Sebastian Pipping
Copyright (c) 2016 Gaurav
Copyright (c) 2016 Thomas Beutlich
Copyright (c) 2016 Gustavo Grieco
@@ -39,7 +39,7 @@
Copyright (c) 2022 Sean McBride
Copyright (c) 2023 Owain Davies
Copyright (c) 2023-2024 Sony Corporation / Snild Dolkow
- Copyright (c) 2024 Berkay Eren Ürün
+ Copyright (c) 2024-2025 Berkay Eren Ürün
Copyright (c) 2024 Hanno Böck
Licensed under the MIT license:
@@ -325,6 +325,10 @@
const XML_Char *publicId;
const XML_Char *notation;
XML_Bool open;
+ XML_Bool hasMore; /* true if entity has not been completely processed */
+ /* An entity can be open while being already completely processed (hasMore ==
+ XML_FALSE). The reason is the delayed closing of entities until their inner
+ entities are processed and closed */
XML_Bool is_param;
XML_Bool is_internal; /* true if declared in internal subset outside PE */
} ENTITY;
@@ -415,6 +419,12 @@
int *scaffIndex;
} DTD;
+enum EntityType {
+ ENTITY_INTERNAL,
+ ENTITY_ATTRIBUTE,
+ ENTITY_VALUE,
+};
+
typedef struct open_internal_entity {
const char *internalEventPtr;
const char *internalEventEndPtr;
@@ -422,6 +432,7 @@
ENTITY *entity;
int startTagLevel;
XML_Bool betweenDecl; /* WFC: PE Between Declarations */
+ enum EntityType type;
} OPEN_INTERNAL_ENTITY;
enum XML_Account {
@@ -481,8 +492,8 @@
const char *next, const char **nextPtr,
XML_Bool haveMore, XML_Bool allowClosingDoctype,
enum XML_Account account);
-static enum XML_Error processInternalEntity(XML_Parser parser, ENTITY *entity,
- XML_Bool betweenDecl);
+static enum XML_Error processEntity(XML_Parser parser, ENTITY *entity,
+ XML_Bool betweenDecl, enum EntityType type);
static enum XML_Error doContent(XML_Parser parser, int startTagLevel,
const ENCODING *enc, const char *start,
const char *end, const char **endPtr,
@@ -513,18 +524,22 @@
const char *ptr, const char *end,
STRING_POOL *pool,
enum XML_Account account);
-static enum XML_Error appendAttributeValue(XML_Parser parser,
- const ENCODING *enc,
- XML_Bool isCdata, const char *ptr,
- const char *end, STRING_POOL *pool,
- enum XML_Account account);
+static enum XML_Error
+appendAttributeValue(XML_Parser parser, const ENCODING *enc, XML_Bool isCdata,
+ const char *ptr, const char *end, STRING_POOL *pool,
+ enum XML_Account account, const char **nextPtr);
static ATTRIBUTE_ID *getAttributeId(XML_Parser parser, const ENCODING *enc,
const char *start, const char *end);
static int setElementTypePrefix(XML_Parser parser, ELEMENT_TYPE *elementType);
#if XML_GE == 1
static enum XML_Error storeEntityValue(XML_Parser parser, const ENCODING *enc,
const char *start, const char *end,
- enum XML_Account account);
+ enum XML_Account account,
+ const char **nextPtr);
+static enum XML_Error callStoreEntityValue(XML_Parser parser,
+ const ENCODING *enc,
+ const char *start, const char *end,
+ enum XML_Account account);
#else
static enum XML_Error storeSelfEntityValue(XML_Parser parser, ENTITY *entity);
#endif
@@ -709,6 +724,10 @@
const char *m_positionPtr;
OPEN_INTERNAL_ENTITY *m_openInternalEntities;
OPEN_INTERNAL_ENTITY *m_freeInternalEntities;
+ OPEN_INTERNAL_ENTITY *m_openAttributeEntities;
+ OPEN_INTERNAL_ENTITY *m_freeAttributeEntities;
+ OPEN_INTERNAL_ENTITY *m_openValueEntities;
+ OPEN_INTERNAL_ENTITY *m_freeValueEntities;
XML_Bool m_defaultExpandInternalEntities;
int m_tagLevel;
ENTITY *m_declEntity;
@@ -756,6 +775,7 @@
ACCOUNTING m_accounting;
ENTITY_STATS m_entity_stats;
#endif
+ XML_Bool m_reenter;
};
#define MALLOC(parser, s) (parser->m_mem.malloc_fcn((s)))
@@ -1028,7 +1048,29 @@
#if defined(XML_TESTING)
g_bytesScanned += (unsigned)have_now;
#endif
- const enum XML_Error ret = parser->m_processor(parser, start, end, endPtr);
+ // Run in a loop to eliminate dangerous recursion depths
+ enum XML_Error ret;
+ *endPtr = start;
+ while (1) {
+ // Use endPtr as the new start in each iteration, since it will
+ // be set to the next start point by m_processor.
+ ret = parser->m_processor(parser, *endPtr, end, endPtr);
+
+ // Make parsing status (and in particular XML_SUSPENDED) take
+ // precedence over re-enter flag when they disagree
+ if (parser->m_parsingStatus.parsing != XML_PARSING) {
+ parser->m_reenter = XML_FALSE;
+ }
+
+ if (! parser->m_reenter) {
+ break;
+ }
+
+ parser->m_reenter = XML_FALSE;
+ if (ret != XML_ERROR_NONE)
+ return ret;
+ }
+
if (ret == XML_ERROR_NONE) {
// if we consumed nothing, remember what we had on this parse attempt.
if (*endPtr == start) {
@@ -1139,6 +1181,8 @@
parser->m_freeBindingList = NULL;
parser->m_freeTagList = NULL;
parser->m_freeInternalEntities = NULL;
+ parser->m_freeAttributeEntities = NULL;
+ parser->m_freeValueEntities = NULL;
parser->m_groupSize = 0;
parser->m_groupConnector = NULL;
@@ -1241,6 +1285,8 @@
parser->m_eventEndPtr = NULL;
parser->m_positionPtr = NULL;
parser->m_openInternalEntities = NULL;
+ parser->m_openAttributeEntities = NULL;
+ parser->m_openValueEntities = NULL;
parser->m_defaultExpandInternalEntities = XML_TRUE;
parser->m_tagLevel = 0;
parser->m_tagStack = NULL;
@@ -1251,6 +1297,8 @@
parser->m_unknownEncodingData = NULL;
parser->m_parentParser = NULL;
parser->m_parsingStatus.parsing = XML_INITIALIZED;
+ // Reentry can only be triggered inside m_processor calls
+ parser->m_reenter = XML_FALSE;
#ifdef XML_DTD
parser->m_isParamEntity = XML_FALSE;
parser->m_useForeignDTD = XML_FALSE;
@@ -1310,6 +1358,24 @@
openEntity->next = parser->m_freeInternalEntities;
parser->m_freeInternalEntities = openEntity;
}
+ /* move m_openAttributeEntities to m_freeAttributeEntities (i.e. same task but
+ * for attributes) */
+ openEntityList = parser->m_openAttributeEntities;
+ while (openEntityList) {
+ OPEN_INTERNAL_ENTITY *openEntity = openEntityList;
+ openEntityList = openEntity->next;
+ openEntity->next = parser->m_freeAttributeEntities;
+ parser->m_freeAttributeEntities = openEntity;
+ }
+ /* move m_openValueEntities to m_freeValueEntities (i.e. same task but
+ * for value entities) */
+ openEntityList = parser->m_openValueEntities;
+ while (openEntityList) {
+ OPEN_INTERNAL_ENTITY *openEntity = openEntityList;
+ openEntityList = openEntity->next;
+ openEntity->next = parser->m_freeValueEntities;
+ parser->m_freeValueEntities = openEntity;
+ }
moveToFreeBindingList(parser, parser->m_inheritedBindings);
FREE(parser, parser->m_unknownEncodingMem);
if (parser->m_unknownEncodingRelease)
@@ -1323,6 +1389,19 @@
return XML_TRUE;
}
+static XML_Bool
+parserBusy(XML_Parser parser) {
+ switch (parser->m_parsingStatus.parsing) {
+ case XML_PARSING:
+ case XML_SUSPENDED:
+ return XML_TRUE;
+ case XML_INITIALIZED:
+ case XML_FINISHED:
+ default:
+ return XML_FALSE;
+ }
+}
+
enum XML_Status XMLCALL
XML_SetEncoding(XML_Parser parser, const XML_Char *encodingName) {
if (parser == NULL)
@@ -1331,8 +1410,7 @@
XXX There's no way for the caller to determine which of the
XXX possible error cases caused the XML_STATUS_ERROR return.
*/
- if (parser->m_parsingStatus.parsing == XML_PARSING
- || parser->m_parsingStatus.parsing == XML_SUSPENDED)
+ if (parserBusy(parser))
return XML_STATUS_ERROR;
/* Get rid of any previous encoding name */
@@ -1569,7 +1647,34 @@
entityList = entityList->next;
FREE(parser, openEntity);
}
-
+ /* free m_openAttributeEntities and m_freeAttributeEntities */
+ entityList = parser->m_openAttributeEntities;
+ for (;;) {
+ OPEN_INTERNAL_ENTITY *openEntity;
+ if (entityList == NULL) {
+ if (parser->m_freeAttributeEntities == NULL)
+ break;
+ entityList = parser->m_freeAttributeEntities;
+ parser->m_freeAttributeEntities = NULL;
+ }
+ openEntity = entityList;
+ entityList = entityList->next;
+ FREE(parser, openEntity);
+ }
+ /* free m_openValueEntities and m_freeValueEntities */
+ entityList = parser->m_openValueEntities;
+ for (;;) {
+ OPEN_INTERNAL_ENTITY *openEntity;
+ if (entityList == NULL) {
+ if (parser->m_freeValueEntities == NULL)
+ break;
+ entityList = parser->m_freeValueEntities;
+ parser->m_freeValueEntities = NULL;
+ }
+ openEntity = entityList;
+ entityList = entityList->next;
+ FREE(parser, openEntity);
+ }
destroyBindings(parser->m_freeBindingList, parser);
destroyBindings(parser->m_inheritedBindings, parser);
poolDestroy(&parser->m_tempPool);
@@ -1611,8 +1716,7 @@
return XML_ERROR_INVALID_ARGUMENT;
#ifdef XML_DTD
/* block after XML_Parse()/XML_ParseBuffer() has been called */
- if (parser->m_parsingStatus.parsing == XML_PARSING
- || parser->m_parsingStatus.parsing == XML_SUSPENDED)
+ if (parserBusy(parser))
return XML_ERROR_CANT_CHANGE_FEATURE_ONCE_PARSING;
parser->m_useForeignDTD = useDTD;
return XML_ERROR_NONE;
@@ -1627,8 +1731,7 @@
if (parser == NULL)
return;
/* block after XML_Parse()/XML_ParseBuffer() has been called */
- if (parser->m_parsingStatus.parsing == XML_PARSING
- || parser->m_parsingStatus.parsing == XML_SUSPENDED)
+ if (parserBusy(parser))
return;
parser->m_ns_triplets = do_nst ? XML_TRUE : XML_FALSE;
}
@@ -1897,8 +2000,7 @@
if (parser == NULL)
return 0;
/* block after XML_Parse()/XML_ParseBuffer() has been called */
- if (parser->m_parsingStatus.parsing == XML_PARSING
- || parser->m_parsingStatus.parsing == XML_SUSPENDED)
+ if (parserBusy(parser))
return 0;
#ifdef XML_DTD
parser->m_paramEntityParsing = peParsing;
@@ -1915,8 +2017,7 @@
if (parser->m_parentParser)
return XML_SetHashSalt(parser->m_parentParser, hash_salt);
/* block after XML_Parse()/XML_ParseBuffer() has been called */
- if (parser->m_parsingStatus.parsing == XML_PARSING
- || parser->m_parsingStatus.parsing == XML_SUSPENDED)
+ if (parserBusy(parser))
return 0;
parser->m_hash_secret_salt = hash_salt;
return 1;
@@ -2230,6 +2331,11 @@
return parser->m_bufferEnd;
}
+static void
+triggerReenter(XML_Parser parser) {
+ parser->m_reenter = XML_TRUE;
+}
+
enum XML_Status XMLCALL
XML_StopParser(XML_Parser parser, XML_Bool resumable) {
if (parser == NULL)
@@ -2704,8 +2810,9 @@
contentProcessor(XML_Parser parser, const char *start, const char *end,
const char **endPtr) {
enum XML_Error result = doContent(
- parser, 0, parser->m_encoding, start, end, endPtr,
- (XML_Bool)! parser->m_parsingStatus.finalBuffer, XML_ACCOUNT_DIRECT);
+ parser, parser->m_parentParser ? 1 : 0, parser->m_encoding, start, end,
+ endPtr, (XML_Bool)! parser->m_parsingStatus.finalBuffer,
+ XML_ACCOUNT_DIRECT);
if (result == XML_ERROR_NONE) {
if (! storeRawNames(parser))
return XML_ERROR_NO_MEMORY;
@@ -2793,6 +2900,11 @@
return XML_ERROR_NONE;
case XML_FINISHED:
return XML_ERROR_ABORTED;
+ case XML_PARSING:
+ if (parser->m_reenter) {
+ return XML_ERROR_UNEXPECTED_STATE; // LCOV_EXCL_LINE
+ }
+ /* Fall through */
default:
start = next;
}
@@ -2966,7 +3078,7 @@
reportDefault(parser, enc, s, next);
break;
}
- result = processInternalEntity(parser, entity, XML_FALSE);
+ result = processEntity(parser, entity, XML_FALSE, ENTITY_INTERNAL);
if (result != XML_ERROR_NONE)
return result;
} else if (parser->m_externalEntityRefHandler) {
@@ -3092,7 +3204,9 @@
}
if ((parser->m_tagLevel == 0)
&& (parser->m_parsingStatus.parsing != XML_FINISHED)) {
- if (parser->m_parsingStatus.parsing == XML_SUSPENDED)
+ if (parser->m_parsingStatus.parsing == XML_SUSPENDED
+ || (parser->m_parsingStatus.parsing == XML_PARSING
+ && parser->m_reenter))
parser->m_processor = epilogProcessor;
else
return epilogProcessor(parser, next, end, nextPtr);
@@ -3153,7 +3267,9 @@
}
if ((parser->m_tagLevel == 0)
&& (parser->m_parsingStatus.parsing != XML_FINISHED)) {
- if (parser->m_parsingStatus.parsing == XML_SUSPENDED)
+ if (parser->m_parsingStatus.parsing == XML_SUSPENDED
+ || (parser->m_parsingStatus.parsing == XML_PARSING
+ && parser->m_reenter))
parser->m_processor = epilogProcessor;
else
return epilogProcessor(parser, next, end, nextPtr);
@@ -3286,14 +3402,22 @@
break;
/* LCOV_EXCL_STOP */
}
- *eventPP = s = next;
switch (parser->m_parsingStatus.parsing) {
case XML_SUSPENDED:
+ *eventPP = next;
*nextPtr = next;
return XML_ERROR_NONE;
case XML_FINISHED:
+ *eventPP = next;
return XML_ERROR_ABORTED;
+ case XML_PARSING:
+ if (parser->m_reenter) {
+ *nextPtr = next;
+ return XML_ERROR_NONE;
+ }
+ /* Fall through */
default:;
+ *eventPP = s = next;
}
}
/* not reached */
@@ -4210,14 +4334,21 @@
/* LCOV_EXCL_STOP */
}
- *eventPP = s = next;
switch (parser->m_parsingStatus.parsing) {
case XML_SUSPENDED:
+ *eventPP = next;
*nextPtr = next;
return XML_ERROR_NONE;
case XML_FINISHED:
+ *eventPP = next;
return XML_ERROR_ABORTED;
+ case XML_PARSING:
+ if (parser->m_reenter) {
+ return XML_ERROR_UNEXPECTED_STATE; // LCOV_EXCL_LINE
+ }
+ /* Fall through */
default:;
+ *eventPP = s = next;
}
}
/* not reached */
@@ -4549,7 +4680,7 @@
}
/* found end of entity value - can store it now */
return storeEntityValue(parser, parser->m_encoding, s, end,
- XML_ACCOUNT_DIRECT);
+ XML_ACCOUNT_DIRECT, NULL);
} else if (tok == XML_TOK_XML_DECL) {
enum XML_Error result;
result = processXmlDecl(parser, 0, start, next);
@@ -4676,7 +4807,7 @@
break;
}
/* found end of entity value - can store it now */
- return storeEntityValue(parser, enc, s, end, XML_ACCOUNT_DIRECT);
+ return storeEntityValue(parser, enc, s, end, XML_ACCOUNT_DIRECT, NULL);
}
start = next;
}
@@ -5119,9 +5250,9 @@
#if XML_GE == 1
// This will store the given replacement text in
// parser->m_declEntity->textPtr.
- enum XML_Error result
- = storeEntityValue(parser, enc, s + enc->minBytesPerChar,
- next - enc->minBytesPerChar, XML_ACCOUNT_NONE);
+ enum XML_Error result = callStoreEntityValue(
+ parser, enc, s + enc->minBytesPerChar, next - enc->minBytesPerChar,
+ XML_ACCOUNT_NONE);
if (parser->m_declEntity) {
parser->m_declEntity->textPtr = poolStart(&dtd->entityValuePool);
parser->m_declEntity->textLen
@@ -5546,7 +5677,7 @@
enum XML_Error result;
XML_Bool betweenDecl
= (role == XML_ROLE_PARAM_ENTITY_REF ? XML_TRUE : XML_FALSE);
- result = processInternalEntity(parser, entity, betweenDecl);
+ result = processEntity(parser, entity, betweenDecl, ENTITY_INTERNAL);
if (result != XML_ERROR_NONE)
return result;
handleDefault = XML_FALSE;
@@ -5751,6 +5882,12 @@
return XML_ERROR_NONE;
case XML_FINISHED:
return XML_ERROR_ABORTED;
+ case XML_PARSING:
+ if (parser->m_reenter) {
+ *nextPtr = next;
+ return XML_ERROR_NONE;
+ }
+ /* Fall through */
default:
s = next;
tok = XmlPrologTok(enc, s, end, &next);
@@ -5818,28 +5955,58 @@
default:
return XML_ERROR_JUNK_AFTER_DOC_ELEMENT;
}
- parser->m_eventPtr = s = next;
switch (parser->m_parsingStatus.parsing) {
case XML_SUSPENDED:
+ parser->m_eventPtr = next;
*nextPtr = next;
return XML_ERROR_NONE;
case XML_FINISHED:
+ parser->m_eventPtr = next;
return XML_ERROR_ABORTED;
+ case XML_PARSING:
+ if (parser->m_reenter) {
+ return XML_ERROR_UNEXPECTED_STATE; // LCOV_EXCL_LINE
+ }
+ /* Fall through */
default:;
+ parser->m_eventPtr = s = next;
}
}
}
static enum XML_Error
-processInternalEntity(XML_Parser parser, ENTITY *entity, XML_Bool betweenDecl) {
- const char *textStart, *textEnd;
- const char *next;
- enum XML_Error result;
- OPEN_INTERNAL_ENTITY *openEntity;
+processEntity(XML_Parser parser, ENTITY *entity, XML_Bool betweenDecl,
+ enum EntityType type) {
+ OPEN_INTERNAL_ENTITY *openEntity, **openEntityList, **freeEntityList;
+ switch (type) {
+ case ENTITY_INTERNAL:
+ parser->m_processor = internalEntityProcessor;
+ openEntityList = &parser->m_openInternalEntities;
+ freeEntityList = &parser->m_freeInternalEntities;
+ break;
+ case ENTITY_ATTRIBUTE:
+ openEntityList = &parser->m_openAttributeEntities;
+ freeEntityList = &parser->m_freeAttributeEntities;
+ break;
+ case ENTITY_VALUE:
+ openEntityList = &parser->m_openValueEntities;
+ freeEntityList = &parser->m_freeValueEntities;
+ break;
+ /* default case serves merely as a safety net in case of a
+ * wrong entityType. Therefore we exclude the following lines
+ * from the test coverage.
+ *
+ * LCOV_EXCL_START
+ */
+ default:
+ // Should not reach here
+ assert(0);
+ /* LCOV_EXCL_STOP */
+ }
- if (parser->m_freeInternalEntities) {
- openEntity = parser->m_freeInternalEntities;
- parser->m_freeInternalEntities = openEntity->next;
+ if (*freeEntityList) {
+ openEntity = *freeEntityList;
+ *freeEntityList = openEntity->next;
} else {
openEntity
= (OPEN_INTERNAL_ENTITY *)MALLOC(parser, sizeof(OPEN_INTERNAL_ENTITY));
@@ -5847,55 +6014,34 @@
return XML_ERROR_NO_MEMORY;
}
entity->open = XML_TRUE;
+ entity->hasMore = XML_TRUE;
#if XML_GE == 1
entityTrackingOnOpen(parser, entity, __LINE__);
#endif
entity->processed = 0;
- openEntity->next = parser->m_openInternalEntities;
- parser->m_openInternalEntities = openEntity;
+ openEntity->next = *openEntityList;
+ *openEntityList = openEntity;
openEntity->entity = entity;
+ openEntity->type = type;
openEntity->startTagLevel = parser->m_tagLevel;
openEntity->betweenDecl = betweenDecl;
openEntity->internalEventPtr = NULL;
openEntity->internalEventEndPtr = NULL;
- textStart = (const char *)entity->textPtr;
- textEnd = (const char *)(entity->textPtr + entity->textLen);
- /* Set a safe default value in case 'next' does not get set */
- next = textStart;
-
- if (entity->is_param) {
- int tok
- = XmlPrologTok(parser->m_internalEncoding, textStart, textEnd, &next);
- result = doProlog(parser, parser->m_internalEncoding, textStart, textEnd,
- tok, next, &next, XML_FALSE, XML_FALSE,
- XML_ACCOUNT_ENTITY_EXPANSION);
- } else {
- result = doContent(parser, parser->m_tagLevel, parser->m_internalEncoding,
- textStart, textEnd, &next, XML_FALSE,
- XML_ACCOUNT_ENTITY_EXPANSION);
- }
- if (result == XML_ERROR_NONE) {
- if (textEnd != next && parser->m_parsingStatus.parsing == XML_SUSPENDED) {
- entity->processed = (int)(next - textStart);
- parser->m_processor = internalEntityProcessor;
- } else if (parser->m_openInternalEntities->entity == entity) {
-#if XML_GE == 1
- entityTrackingOnClose(parser, entity, __LINE__);
-#endif /* XML_GE == 1 */
- entity->open = XML_FALSE;
- parser->m_openInternalEntities = openEntity->next;
- /* put openEntity back in list of free instances */
- openEntity->next = parser->m_freeInternalEntities;
- parser->m_freeInternalEntities = openEntity;
- }
+ // Only internal entities make use of the reenter flag
+ // therefore no need to set it for other entity types
+ if (type == ENTITY_INTERNAL) {
+ triggerReenter(parser);
}
- return result;
+ return XML_ERROR_NONE;
}
static enum XML_Error PTRCALL
internalEntityProcessor(XML_Parser parser, const char *s, const char *end,
const char **nextPtr) {
+ UNUSED_P(s);
+ UNUSED_P(end);
+ UNUSED_P(nextPtr);
ENTITY *entity;
const char *textStart, *textEnd;
const char *next;
@@ -5905,68 +6051,67 @@
return XML_ERROR_UNEXPECTED_STATE;
entity = openEntity->entity;
- textStart = ((const char *)entity->textPtr) + entity->processed;
- textEnd = (const char *)(entity->textPtr + entity->textLen);
- /* Set a safe default value in case 'next' does not get set */
- next = textStart;
-
- if (entity->is_param) {
- int tok
- = XmlPrologTok(parser->m_internalEncoding, textStart, textEnd, &next);
- result = doProlog(parser, parser->m_internalEncoding, textStart, textEnd,
- tok, next, &next, XML_FALSE, XML_TRUE,
- XML_ACCOUNT_ENTITY_EXPANSION);
- } else {
- result = doContent(parser, openEntity->startTagLevel,
- parser->m_internalEncoding, textStart, textEnd, &next,
- XML_FALSE, XML_ACCOUNT_ENTITY_EXPANSION);
- }
- if (result != XML_ERROR_NONE)
- return result;
+ // This will return early
+ if (entity->hasMore) {
+ textStart = ((const char *)entity->textPtr) + entity->processed;
+ textEnd = (const char *)(entity->textPtr + entity->textLen);
+ /* Set a safe default value in case 'next' does not get set */
+ next = textStart;
+
+ if (entity->is_param) {
+ int tok
+ = XmlPrologTok(parser->m_internalEncoding, textStart, textEnd, &next);
+ result = doProlog(parser, parser->m_internalEncoding, textStart, textEnd,
+ tok, next, &next, XML_FALSE, XML_FALSE,
+ XML_ACCOUNT_ENTITY_EXPANSION);
+ } else {
+ result = doContent(parser, openEntity->startTagLevel,
+ parser->m_internalEncoding, textStart, textEnd, &next,
+ XML_FALSE, XML_ACCOUNT_ENTITY_EXPANSION);
+ }
- if (textEnd != next && parser->m_parsingStatus.parsing == XML_SUSPENDED) {
- entity->processed = (int)(next - (const char *)entity->textPtr);
+ if (result != XML_ERROR_NONE)
+ return result;
+ // Check if entity is complete, if not, mark down how much of it is
+ // processed
+ if (textEnd != next
+ && (parser->m_parsingStatus.parsing == XML_SUSPENDED
+ || (parser->m_parsingStatus.parsing == XML_PARSING
+ && parser->m_reenter))) {
+ entity->processed = (int)(next - (const char *)entity->textPtr);
+ return result;
+ }
+
+ // Entity is complete. We cannot close it here since we need to first
+ // process its possible inner entities (which are added to the
+ // m_openInternalEntities during doProlog or doContent calls above)
+ entity->hasMore = XML_FALSE;
+ triggerReenter(parser);
return result;
- }
+ } // End of entity processing, "if" block will return here
+ // Remove fully processed openEntity from open entity list.
#if XML_GE == 1
entityTrackingOnClose(parser, entity, __LINE__);
#endif
+ // openEntity is m_openInternalEntities' head, as we set it at the start of
+ // this function and we skipped doProlog and doContent calls with hasMore set
+ // to false. This means we can directly remove the head of
+ // m_openInternalEntities
+ assert(parser->m_openInternalEntities == openEntity);
entity->open = XML_FALSE;
- parser->m_openInternalEntities = openEntity->next;
+ parser->m_openInternalEntities = parser->m_openInternalEntities->next;
+
/* put openEntity back in list of free instances */
openEntity->next = parser->m_freeInternalEntities;
parser->m_freeInternalEntities = openEntity;
- // If there are more open entities we want to stop right here and have the
- // upcoming call to XML_ResumeParser continue with entity content, or it would
- // be ignored altogether.
- if (parser->m_openInternalEntities != NULL
- && parser->m_parsingStatus.parsing == XML_SUSPENDED) {
- return XML_ERROR_NONE;
- }
-
- if (entity->is_param) {
- int tok;
- parser->m_processor = prologProcessor;
- tok = XmlPrologTok(parser->m_encoding, s, end, &next);
- return doProlog(parser, parser->m_encoding, s, end, tok, next, nextPtr,
- (XML_Bool)! parser->m_parsingStatus.finalBuffer, XML_TRUE,
- XML_ACCOUNT_DIRECT);
- } else {
- parser->m_processor = contentProcessor;
- /* see externalEntityContentProcessor vs contentProcessor */
- result = doContent(parser, parser->m_parentParser ? 1 : 0,
- parser->m_encoding, s, end, nextPtr,
- (XML_Bool)! parser->m_parsingStatus.finalBuffer,
- XML_ACCOUNT_DIRECT);
- if (result == XML_ERROR_NONE) {
- if (! storeRawNames(parser))
- return XML_ERROR_NO_MEMORY;
- }
- return result;
+ if (parser->m_openInternalEntities == NULL) {
+ parser->m_processor = entity->is_param ? prologProcessor : contentProcessor;
}
+ triggerReenter(parser);
+ return XML_ERROR_NONE;
}
static enum XML_Error PTRCALL
@@ -5982,8 +6127,70 @@
storeAttributeValue(XML_Parser parser, const ENCODING *enc, XML_Bool isCdata,
const char *ptr, const char *end, STRING_POOL *pool,
enum XML_Account account) {
- enum XML_Error result
- = appendAttributeValue(parser, enc, isCdata, ptr, end, pool, account);
+ const char *next = ptr;
+ enum XML_Error result = XML_ERROR_NONE;
+
+ while (1) {
+ if (! parser->m_openAttributeEntities) {
+ result = appendAttributeValue(parser, enc, isCdata, next, end, pool,
+ account, &next);
+ } else {
+ OPEN_INTERNAL_ENTITY *const openEntity = parser->m_openAttributeEntities;
+ if (! openEntity)
+ return XML_ERROR_UNEXPECTED_STATE;
+
+ ENTITY *const entity = openEntity->entity;
+ const char *const textStart
+ = ((const char *)entity->textPtr) + entity->processed;
+ const char *const textEnd
+ = (const char *)(entity->textPtr + entity->textLen);
+ /* Set a safe default value in case 'next' does not get set */
+ const char *nextInEntity = textStart;
+ if (entity->hasMore) {
+ result = appendAttributeValue(
+ parser, parser->m_internalEncoding, isCdata, textStart, textEnd,
+ pool, XML_ACCOUNT_ENTITY_EXPANSION, &nextInEntity);
+ if (result != XML_ERROR_NONE)
+ break;
+ // Check if entity is complete, if not, mark down how much of it is
+ // processed. A XML_SUSPENDED check here is not required as
+ // appendAttributeValue will never suspend the parser.
+ if (textEnd != nextInEntity) {
+ entity->processed
+ = (int)(nextInEntity - (const char *)entity->textPtr);
+ continue;
+ }
+
+ // Entity is complete. We cannot close it here since we need to first
+ // process its possible inner entities (which are added to the
+ // m_openAttributeEntities during appendAttributeValue)
+ entity->hasMore = XML_FALSE;
+ continue;
+ } // End of entity processing, "if" block skips the rest
+
+ // Remove fully processed openEntity from open entity list.
+#if XML_GE == 1
+ entityTrackingOnClose(parser, entity, __LINE__);
+#endif
+ // openEntity is m_openAttributeEntities' head, since we set it at the
+ // start of this function and because we skipped appendAttributeValue call
+ // with hasMore set to false. This means we can directly remove the head
+ // of m_openAttributeEntities
+ assert(parser->m_openAttributeEntities == openEntity);
+ entity->open = XML_FALSE;
+ parser->m_openAttributeEntities = parser->m_openAttributeEntities->next;
+
+ /* put openEntity back in list of free instances */
+ openEntity->next = parser->m_freeAttributeEntities;
+ parser->m_freeAttributeEntities = openEntity;
+ }
+
+ // Break if an error occurred or there is nothing left to process
+ if (result || (parser->m_openAttributeEntities == NULL && end == next)) {
+ break;
+ }
+ }
+
if (result)
return result;
if (! isCdata && poolLength(pool) && poolLastChar(pool) == 0x20)
@@ -5996,7 +6203,7 @@
static enum XML_Error
appendAttributeValue(XML_Parser parser, const ENCODING *enc, XML_Bool isCdata,
const char *ptr, const char *end, STRING_POOL *pool,
- enum XML_Account account) {
+ enum XML_Account account, const char **nextPtr) {
DTD *const dtd = parser->m_dtd; /* save one level of indirection */
#ifndef XML_DTD
UNUSED_P(account);
@@ -6014,6 +6221,9 @@
#endif
switch (tok) {
case XML_TOK_NONE:
+ if (nextPtr) {
+ *nextPtr = next;
+ }
return XML_ERROR_NONE;
case XML_TOK_INVALID:
if (enc == parser->m_encoding)
@@ -6154,21 +6364,11 @@
return XML_ERROR_ATTRIBUTE_EXTERNAL_ENTITY_REF;
} else {
enum XML_Error result;
- const XML_Char *textEnd = entity->textPtr + entity->textLen;
- entity->open = XML_TRUE;
-#if XML_GE == 1
- entityTrackingOnOpen(parser, entity, __LINE__);
-#endif
- result = appendAttributeValue(parser, parser->m_internalEncoding,
- isCdata, (const char *)entity->textPtr,
- (const char *)textEnd, pool,
- XML_ACCOUNT_ENTITY_EXPANSION);
-#if XML_GE == 1
- entityTrackingOnClose(parser, entity, __LINE__);
-#endif
- entity->open = XML_FALSE;
- if (result)
- return result;
+ result = processEntity(parser, entity, XML_FALSE, ENTITY_ATTRIBUTE);
+ if ((result == XML_ERROR_NONE) && (nextPtr != NULL)) {
+ *nextPtr = next;
+ }
+ return result;
}
} break;
default:
@@ -6197,7 +6397,7 @@
static enum XML_Error
storeEntityValue(XML_Parser parser, const ENCODING *enc,
const char *entityTextPtr, const char *entityTextEnd,
- enum XML_Account account) {
+ enum XML_Account account, const char **nextPtr) {
DTD *const dtd = parser->m_dtd; /* save one level of indirection */
STRING_POOL *pool = &(dtd->entityValuePool);
enum XML_Error result = XML_ERROR_NONE;
@@ -6215,8 +6415,9 @@
return XML_ERROR_NO_MEMORY;
}
+ const char *next;
for (;;) {
- const char *next
+ next
= entityTextPtr; /* XmlEntityValueTok doesn't always set the last arg */
int tok = XmlEntityValueTok(enc, entityTextPtr, entityTextEnd, &next);
@@ -6278,16 +6479,8 @@
} else
dtd->keepProcessing = dtd->standalone;
} else {
- entity->open = XML_TRUE;
- entityTrackingOnOpen(parser, entity, __LINE__);
- result = storeEntityValue(
- parser, parser->m_internalEncoding, (const char *)entity->textPtr,
- (const char *)(entity->textPtr + entity->textLen),
- XML_ACCOUNT_ENTITY_EXPANSION);
- entityTrackingOnClose(parser, entity, __LINE__);
- entity->open = XML_FALSE;
- if (result)
- goto endEntityValue;
+ result = processEntity(parser, entity, XML_FALSE, ENTITY_VALUE);
+ goto endEntityValue;
}
break;
}
@@ -6375,6 +6568,81 @@
# ifdef XML_DTD
parser->m_prologState.inEntityValue = oldInEntityValue;
# endif /* XML_DTD */
+ // If 'nextPtr' is given, it should be updated during the processing
+ if (nextPtr != NULL) {
+ *nextPtr = next;
+ }
+ return result;
+}
+
+static enum XML_Error
+callStoreEntityValue(XML_Parser parser, const ENCODING *enc,
+ const char *entityTextPtr, const char *entityTextEnd,
+ enum XML_Account account) {
+ const char *next = entityTextPtr;
+ enum XML_Error result = XML_ERROR_NONE;
+ while (1) {
+ if (! parser->m_openValueEntities) {
+ result
+ = storeEntityValue(parser, enc, next, entityTextEnd, account, &next);
+ } else {
+ OPEN_INTERNAL_ENTITY *const openEntity = parser->m_openValueEntities;
+ if (! openEntity)
+ return XML_ERROR_UNEXPECTED_STATE;
+
+ ENTITY *const entity = openEntity->entity;
+ const char *const textStart
+ = ((const char *)entity->textPtr) + entity->processed;
+ const char *const textEnd
+ = (const char *)(entity->textPtr + entity->textLen);
+ /* Set a safe default value in case 'next' does not get set */
+ const char *nextInEntity = textStart;
+ if (entity->hasMore) {
+ result = storeEntityValue(parser, parser->m_internalEncoding, textStart,
+ textEnd, XML_ACCOUNT_ENTITY_EXPANSION,
+ &nextInEntity);
+ if (result != XML_ERROR_NONE)
+ break;
+ // Check if entity is complete, if not, mark down how much of it is
+ // processed. A XML_SUSPENDED check here is not required as
+ // appendAttributeValue will never suspend the parser.
+ if (textEnd != nextInEntity) {
+ entity->processed
+ = (int)(nextInEntity - (const char *)entity->textPtr);
+ continue;
+ }
+
+ // Entity is complete. We cannot close it here since we need to first
+ // process its possible inner entities (which are added to the
+ // m_openValueEntities during storeEntityValue)
+ entity->hasMore = XML_FALSE;
+ continue;
+ } // End of entity processing, "if" block skips the rest
+
+ // Remove fully processed openEntity from open entity list.
+# if XML_GE == 1
+ entityTrackingOnClose(parser, entity, __LINE__);
+# endif
+ // openEntity is m_openValueEntities' head, since we set it at the
+ // start of this function and because we skipped storeEntityValue call
+ // with hasMore set to false. This means we can directly remove the head
+ // of m_openValueEntities
+ assert(parser->m_openValueEntities == openEntity);
+ entity->open = XML_FALSE;
+ parser->m_openValueEntities = parser->m_openValueEntities->next;
+
+ /* put openEntity back in list of free instances */
+ openEntity->next = parser->m_freeValueEntities;
+ parser->m_freeValueEntities = openEntity;
+ }
+
+ // Break if an error occurred or there is nothing left to process
+ if (result
+ || (parser->m_openValueEntities == NULL && entityTextEnd == next)) {
+ break;
+ }
+ }
+
return result;
}
@@ -7983,7 +8251,7 @@
(void *)rootParser, rootParser->m_entity_stats.countEverOpened,
rootParser->m_entity_stats.currentDepth,
rootParser->m_entity_stats.maximumDepthSeen,
- (rootParser->m_entity_stats.currentDepth - 1) * 2, "",
+ ((int)rootParser->m_entity_stats.currentDepth - 1) * 2, "",
entity->is_param ? "%" : "&", entityName, action, entity->textLen,
sourceLine);
}
@@ -8542,11 +8810,13 @@
return "\\xFE";
case 255:
return "\\xFF";
+ // LCOV_EXCL_START
default:
assert(0); /* never gets here */
return "dead code";
}
assert(0); /* never gets here */
+ // LCOV_EXCL_STOP
}
#endif /* XML_GE == 1 */
--- contrib/expat/tests/acc_tests.c.orig
+++ contrib/expat/tests/acc_tests.c
@@ -360,13 +360,16 @@
START_TEST(test_helper_unsigned_char_to_printable) {
// Smoke test
unsigned char uc = 0;
- for (; uc < (unsigned char)-1; uc++) {
+ for (;; uc++) {
set_subtest("char %u", (unsigned)uc);
const char *const printable = unsignedCharToPrintable(uc);
if (printable == NULL)
fail("unsignedCharToPrintable returned NULL");
else if (strlen(printable) < (size_t)1)
fail("unsignedCharToPrintable returned empty string");
+ if (uc == (unsigned char)-1) {
+ break;
+ }
}
// Two concrete samples
--- contrib/expat/tests/alloc_tests.c.orig
+++ contrib/expat/tests/alloc_tests.c
@@ -19,6 +19,7 @@
Copyright (c) 2020 Tim Gates
Copyright (c) 2021 Donghee Na
Copyright (c) 2023 Sony Corporation / Snild Dolkow
+ Copyright (c) 2025 Berkay Eren Ürün
Licensed under the MIT license:
Permission is hereby granted, free of charge, to any person obtaining
@@ -450,6 +451,31 @@
}
END_TEST
+START_TEST(test_alloc_parameter_entity) {
+ const char *text = "\">"
+ "%param1;"
+ "]> &internal;content";
+ int i;
+ const int alloc_test_max_repeats = 30;
+
+ for (i = 0; i < alloc_test_max_repeats; i++) {
+ g_allocation_count = i;
+ XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
+ if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
+ != XML_STATUS_ERROR)
+ break;
+ alloc_teardown();
+ alloc_setup();
+ }
+ g_allocation_count = -1;
+ if (i == 0)
+ fail("Parameter entity processed despite duff allocator");
+ if (i == alloc_test_max_repeats)
+ fail("Parameter entity not processed at max allocation count");
+}
+END_TEST
+
/* Test the robustness against allocation failure of element handling
* Based on test_dtd_default_handling().
*/
@@ -2079,6 +2105,7 @@
tcase_add_test__ifdef_xml_dtd(tc_alloc, test_alloc_external_entity);
tcase_add_test__ifdef_xml_dtd(tc_alloc, test_alloc_ext_entity_set_encoding);
tcase_add_test__ifdef_xml_dtd(tc_alloc, test_alloc_internal_entity);
+ tcase_add_test__ifdef_xml_dtd(tc_alloc, test_alloc_parameter_entity);
tcase_add_test__ifdef_xml_dtd(tc_alloc, test_alloc_dtd_default_handling);
tcase_add_test(tc_alloc, test_alloc_explicit_encoding);
tcase_add_test(tc_alloc, test_alloc_set_base);
--- contrib/expat/tests/basic_tests.c.orig
+++ contrib/expat/tests/basic_tests.c
@@ -10,7 +10,7 @@
Copyright (c) 2003 Greg Stein
Copyright (c) 2005-2007 Steven Solie
Copyright (c) 2005-2012 Karl Waclawek
- Copyright (c) 2016-2024 Sebastian Pipping
+ Copyright (c) 2016-2025 Sebastian Pipping
Copyright (c) 2017-2022 Rhodri James
Copyright (c) 2017 Joe Orton
Copyright (c) 2017 José Gutiérrez de la Concha
@@ -19,6 +19,7 @@
Copyright (c) 2020 Tim Gates
Copyright (c) 2021 Donghee Na
Copyright (c) 2023-2024 Sony Corporation / Snild Dolkow
+ Copyright (c) 2024-2025 Berkay Eren Ürün
Licensed under the MIT license:
Permission is hereby granted, free of charge, to any person obtaining
@@ -1191,6 +1192,22 @@
}
END_TEST
+START_TEST(test_entity_start_tag_level_greater_than_one) {
+ const char *const text = "\n"
+ "]>\n"
+ "\n"
+ " &e1;\n"
+ "\n";
+
+ XML_Parser parser = XML_ParserCreate(NULL);
+ assert_true(_XML_Parse_SINGLE_BYTES(parser, text, (int)strlen(text),
+ /*isFinal*/ XML_TRUE)
+ == XML_STATUS_OK);
+ XML_ParserFree(parser);
+}
+END_TEST
+
START_TEST(test_wfc_no_recursive_entity_refs) {
const char *text = "\n"
@@ -1202,6 +1219,93 @@
}
END_TEST
+START_TEST(test_no_indirectly_recursive_entity_refs) {
+ struct TestCase {
+ const char *doc;
+ bool usesParameterEntities;
+ };
+
+ const struct TestCase cases[] = {
+ // general entity + character data
+ {"\n"
+ " \n"
+ "]>&e2;\n",
+ false},
+
+ // general entity + attribute value
+ {"\n"
+ " \n"
+ "]>\n",
+ false},
+
+ // parameter entity
+ {"\n"
+ " \n"
+ " \">\n"
+ " %define_g;\n"
+ "]>\n"
+ "\n",
+ true},
+ };
+ const XML_Bool reset_or_not[] = {XML_TRUE, XML_FALSE};
+
+ for (size_t i = 0; i < sizeof(cases) / sizeof(cases[0]); i++) {
+ for (size_t j = 0; j < sizeof(reset_or_not) / sizeof(reset_or_not[0]);
+ j++) {
+ const XML_Bool reset_wanted = reset_or_not[j];
+ const char *const doc = cases[i].doc;
+ const bool usesParameterEntities = cases[i].usesParameterEntities;
+
+ set_subtest("[%i,reset=%i] %s", (int)i, (int)j, doc);
+
+#ifdef XML_DTD // both GE and DTD
+ const bool rejection_expected = true;
+#elif XML_GE == 1 // GE but not DTD
+ const bool rejection_expected = ! usesParameterEntities;
+#else // neither DTD nor GE
+ const bool rejection_expected = false;
+#endif
+
+ XML_Parser parser = XML_ParserCreate(NULL);
+
+#ifdef XML_DTD
+ if (usesParameterEntities) {
+ assert_true(
+ XML_SetParamEntityParsing(parser, XML_PARAM_ENTITY_PARSING_ALWAYS)
+ == 1);
+ }
+#else
+ UNUSED_P(usesParameterEntities);
+#endif // XML_DTD
+
+ const enum XML_Status status
+ = _XML_Parse_SINGLE_BYTES(parser, doc, (int)strlen(doc),
+ /*isFinal*/ XML_TRUE);
+
+ if (rejection_expected) {
+ assert_true(status == XML_STATUS_ERROR);
+ assert_true(XML_GetErrorCode(parser) == XML_ERROR_RECURSIVE_ENTITY_REF);
+ } else {
+ assert_true(status == XML_STATUS_OK);
+ }
+
+ if (reset_wanted) {
+ // This covers free'ing of (eventually) all three open entity lists by
+ // XML_ParserReset.
+ XML_ParserReset(parser, NULL);
+ }
+
+ // This covers free'ing of (eventually) all three open entity lists by
+ // XML_ParserFree (unless XML_ParserReset has already done that above).
+ XML_ParserFree(parser);
+ }
+ }
+}
+END_TEST
+
START_TEST(test_recursive_external_parameter_entity_2) {
struct TestCase {
const char *doc;
@@ -1417,7 +1521,9 @@
XML_SetCharacterDataHandler(g_parser, clearing_aborting_character_handler);
g_resumable = XML_TRUE;
- if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
+ // can't use SINGLE_BYTES here, because it'll return early on suspension, and
+ // we won't know exactly how much input we actually managed to give Expat.
+ if (XML_Parse(g_parser, text, (int)strlen(text), XML_TRUE)
!= XML_STATUS_SUSPENDED)
xml_failure(g_parser);
if (XML_GetErrorCode(g_parser) != XML_ERROR_NONE)
@@ -1446,7 +1552,9 @@
XML_SetCharacterDataHandler(g_parser, parser_stop_character_handler);
g_resumable = XML_TRUE;
g_abortable = XML_FALSE;
- if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
+ // can't use SINGLE_BYTES here, because it'll return early on suspension, and
+ // we won't know exactly how much input we actually managed to give Expat.
+ if (XML_Parse(g_parser, text, (int)strlen(text), XML_TRUE)
!= XML_STATUS_SUSPENDED)
fail("Failed to double-suspend parser");
@@ -1830,12 +1938,19 @@
/* Test suspending the parser in cdata handler */
START_TEST(test_suspend_parser_between_cdata_calls) {
+ if (g_chunkSize != 0) {
+ // this test does not use SINGLE_BYTES, because of suspension
+ return;
+ }
+
const char *text = long_cdata_text;
enum XML_Status result;
XML_SetCharacterDataHandler(g_parser, clearing_aborting_character_handler);
g_resumable = XML_TRUE;
- result = _XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE);
+ // can't use SINGLE_BYTES here, because it'll return early on suspension, and
+ // we won't know exactly how much input we actually managed to give Expat.
+ result = XML_Parse(g_parser, text, (int)strlen(text), XML_TRUE);
if (result != XML_STATUS_SUSPENDED) {
if (result == XML_STATUS_ERROR)
xml_failure(g_parser);
@@ -2378,6 +2493,11 @@
* entity. Exercises some obscure code in XML_ParserReset().
*/
START_TEST(test_reset_in_entity) {
+ if (g_chunkSize != 0) {
+ // this test does not use SINGLE_BYTES, because of suspension
+ return;
+ }
+
const char *text = "\n"
"\n"
@@ -2387,7 +2507,9 @@
g_resumable = XML_TRUE;
XML_SetCharacterDataHandler(g_parser, clearing_aborting_character_handler);
- if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
+ // can't use SINGLE_BYTES here, because it'll return early on suspension, and
+ // we won't know exactly how much input we actually managed to give Expat.
+ if (XML_Parse(g_parser, text, (int)strlen(text), XML_TRUE)
== XML_STATUS_ERROR)
xml_failure(g_parser);
XML_GetParsingStatus(g_parser, &status);
@@ -3634,7 +3756,9 @@
XML_SetXmlDeclHandler(g_parser, entity_suspending_xdecl_handler);
XML_SetUserData(g_parser, g_parser);
g_resumable = XML_TRUE;
- if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
+ // can't use SINGLE_BYTES here, because it'll return early on suspension, and
+ // we won't know exactly how much input we actually managed to give Expat.
+ if (XML_Parse(g_parser, text, (int)strlen(text), XML_TRUE)
!= XML_STATUS_SUSPENDED)
xml_failure(g_parser);
if (XML_GetErrorCode(g_parser) != XML_ERROR_NONE)
@@ -3830,13 +3954,20 @@
/* Test syntax error is caught at parse resumption */
START_TEST(test_resume_entity_with_syntax_error) {
+ if (g_chunkSize != 0) {
+ // this test does not use SINGLE_BYTES, because of suspension
+ return;
+ }
+
const char *text = "Hi'>\n"
"]>\n"
"&foo;\n";
XML_SetStartElementHandler(g_parser, start_element_suspender);
- if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
+ // can't use SINGLE_BYTES here, because it'll return early on suspension, and
+ // we won't know exactly how much input we actually managed to give Expat.
+ if (XML_Parse(g_parser, text, (int)strlen(text), XML_TRUE)
!= XML_STATUS_SUSPENDED)
xml_failure(g_parser);
if (XML_ResumeParser(g_parser) != XML_STATUS_ERROR)
@@ -3960,7 +4091,7 @@
= {"\n"
"\n"
"%pe2;\n",
- external_entity_null_loader};
+ external_entity_null_loader, NULL};
XML_SetUserData(g_parser, &test_data);
XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
@@ -3978,7 +4109,7 @@
= {"\n"
"\n"
"%pe2;\n",
- NULL};
+ NULL, NULL};
XML_SetUserData(g_parser, &test_data);
XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
@@ -5278,6 +5409,151 @@
}
END_TEST
+/* Test a possible early return location in internalEntityProcessor */
+START_TEST(test_entity_ref_no_elements) {
+ const char *const text = "\n"
+ "]> &e1;"; // intentionally missing newline
+
+ XML_Parser parser = XML_ParserCreate(NULL);
+ assert_true(_XML_Parse_SINGLE_BYTES(parser, text, (int)strlen(text), XML_TRUE)
+ == XML_STATUS_ERROR);
+ assert_true(XML_GetErrorCode(parser) == XML_ERROR_NO_ELEMENTS);
+ XML_ParserFree(parser);
+}
+END_TEST
+
+/* Tests if chained entity references lead to unbounded recursion */
+START_TEST(test_deep_nested_entity) {
+ const size_t N_LINES = 60000;
+ const size_t SIZE_PER_LINE = 50;
+
+ char *const text = (char *)malloc((N_LINES + 4) * SIZE_PER_LINE);
+ if (text == NULL) {
+ fail("malloc failed");
+ }
+
+ char *textPtr = text;
+
+ // Create the XML
+ textPtr += snprintf(textPtr, SIZE_PER_LINE,
+ "\n");
+
+ for (size_t i = 1; i < N_LINES; ++i) {
+ textPtr += snprintf(textPtr, SIZE_PER_LINE, " \n",
+ (long unsigned)i, (long unsigned)(i - 1));
+ }
+
+ snprintf(textPtr, SIZE_PER_LINE, "]> &s%lu;\n",
+ (long unsigned)(N_LINES - 1));
+
+ const XML_Char *const expected = XCS("deepText");
+
+ CharData storage;
+ CharData_Init(&storage);
+
+ XML_Parser parser = XML_ParserCreate(NULL);
+
+ XML_SetCharacterDataHandler(parser, accumulate_characters);
+ XML_SetUserData(parser, &storage);
+
+ if (_XML_Parse_SINGLE_BYTES(parser, text, (int)strlen(text), XML_TRUE)
+ == XML_STATUS_ERROR)
+ xml_failure(parser);
+
+ CharData_CheckXMLChars(&storage, expected);
+ XML_ParserFree(parser);
+ free(text);
+}
+END_TEST
+
+/* Tests if chained entity references in attributes
+lead to unbounded recursion */
+START_TEST(test_deep_nested_attribute_entity) {
+ const size_t N_LINES = 60000;
+ const size_t SIZE_PER_LINE = 100;
+
+ char *const text = (char *)malloc((N_LINES + 4) * SIZE_PER_LINE);
+ if (text == NULL) {
+ fail("malloc failed");
+ }
+
+ char *textPtr = text;
+
+ // Create the XML
+ textPtr += snprintf(textPtr, SIZE_PER_LINE,
+ "\n");
+
+ for (size_t i = 1; i < N_LINES; ++i) {
+ textPtr += snprintf(textPtr, SIZE_PER_LINE, " \n",
+ (long unsigned)i, (long unsigned)(i - 1));
+ }
+
+ snprintf(textPtr, SIZE_PER_LINE, "]> mainText\n",
+ (long unsigned)(N_LINES - 1));
+
+ AttrInfo doc_info[] = {{XCS("name"), XCS("deepText")}, {NULL, NULL}};
+ ElementInfo info[] = {{XCS("foo"), 1, NULL, NULL}, {NULL, 0, NULL, NULL}};
+ info[0].attributes = doc_info;
+
+ XML_Parser parser = XML_ParserCreate(NULL);
+ ParserAndElementInfo parserPlusElemenInfo = {parser, info};
+
+ XML_SetStartElementHandler(parser, counting_start_element_handler);
+ XML_SetUserData(parser, &parserPlusElemenInfo);
+
+ if (_XML_Parse_SINGLE_BYTES(parser, text, (int)strlen(text), XML_TRUE)
+ == XML_STATUS_ERROR)
+ xml_failure(parser);
+
+ XML_ParserFree(parser);
+ free(text);
+}
+END_TEST
+
+START_TEST(test_deep_nested_entity_delayed_interpretation) {
+ const size_t N_LINES = 70000;
+ const size_t SIZE_PER_LINE = 100;
+
+ char *const text = (char *)malloc((N_LINES + 4) * SIZE_PER_LINE);
+ if (text == NULL) {
+ fail("malloc failed");
+ }
+
+ char *textPtr = text;
+
+ // Create the XML
+ textPtr += snprintf(textPtr, SIZE_PER_LINE,
+ "\n");
+
+ for (size_t i = 1; i < N_LINES; ++i) {
+ textPtr += snprintf(textPtr, SIZE_PER_LINE,
+ " \n", (long unsigned)i,
+ (long unsigned)(i - 1));
+ }
+
+ snprintf(textPtr, SIZE_PER_LINE,
+ " \">\n"
+ " %%define_g;\n"
+ "]>\n"
+ "\n",
+ (long unsigned)(N_LINES - 1));
+
+ XML_Parser parser = XML_ParserCreate(NULL);
+
+ XML_SetParamEntityParsing(parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
+ if (_XML_Parse_SINGLE_BYTES(parser, text, (int)strlen(text), XML_TRUE)
+ == XML_STATUS_ERROR)
+ xml_failure(parser);
+
+ XML_ParserFree(parser);
+ free(text);
+}
+END_TEST
+
START_TEST(test_nested_entity_suspend) {
const char *const text = "'>\n"
@@ -5308,6 +5584,35 @@
}
END_TEST
+START_TEST(test_nested_entity_suspend_2) {
+ const char *const text = "\n"
+ " \n"
+ " \n"
+ "]>\n"
+ "&ge3;";
+ const XML_Char *const expected = XCS("head3") XCS("head2") XCS("head1")
+ XCS("Z") XCS("tail1") XCS("tail2") XCS("tail3");
+ CharData storage;
+ CharData_Init(&storage);
+ XML_Parser parser = XML_ParserCreate(NULL);
+ ParserPlusStorage parserPlusStorage = {parser, &storage};
+
+ XML_SetCharacterDataHandler(parser, accumulate_char_data_and_suspend);
+ XML_SetUserData(parser, &parserPlusStorage);
+
+ enum XML_Status status = XML_Parse(parser, text, (int)strlen(text), XML_TRUE);
+ while (status == XML_STATUS_SUSPENDED) {
+ status = XML_ResumeParser(parser);
+ }
+ if (status != XML_STATUS_OK)
+ xml_failure(parser);
+
+ CharData_CheckXMLChars(&storage, expected);
+ XML_ParserFree(parser);
+}
+END_TEST
+
/* Regression test for quadratic parsing on large tokens */
START_TEST(test_big_tokens_scale_linearly) {
const struct {
@@ -5968,7 +6273,9 @@
tcase_add_test(tc_basic, test_wfc_undeclared_entity_with_external_subset);
tcase_add_test(tc_basic, test_not_standalone_handler_reject);
tcase_add_test(tc_basic, test_not_standalone_handler_accept);
+ tcase_add_test(tc_basic, test_entity_start_tag_level_greater_than_one);
tcase_add_test__if_xml_ge(tc_basic, test_wfc_no_recursive_entity_refs);
+ tcase_add_test(tc_basic, test_no_indirectly_recursive_entity_refs);
tcase_add_test__ifdef_xml_dtd(tc_basic, test_ext_entity_invalid_parse);
tcase_add_test__if_xml_ge(tc_basic, test_dtd_default_handling);
tcase_add_test(tc_basic, test_dtd_attr_handling);
@@ -6147,7 +6454,13 @@
tcase_add_test(tc_basic, test_empty_element_abort);
tcase_add_test__ifdef_xml_dtd(tc_basic,
test_pool_integrity_with_unfinished_attr);
+ tcase_add_test__if_xml_ge(tc_basic, test_entity_ref_no_elements);
+ tcase_add_test__if_xml_ge(tc_basic, test_deep_nested_entity);
+ tcase_add_test__if_xml_ge(tc_basic, test_deep_nested_attribute_entity);
+ tcase_add_test__if_xml_ge(tc_basic,
+ test_deep_nested_entity_delayed_interpretation);
tcase_add_test__if_xml_ge(tc_basic, test_nested_entity_suspend);
+ tcase_add_test__if_xml_ge(tc_basic, test_nested_entity_suspend_2);
tcase_add_test(tc_basic, test_big_tokens_scale_linearly);
tcase_add_test(tc_basic, test_set_reparse_deferral);
tcase_add_test(tc_basic, test_reparse_deferral_is_inherited);
--- contrib/expat/tests/benchmark/benchmark.c.orig
+++ contrib/expat/tests/benchmark/benchmark.c
@@ -8,7 +8,7 @@
Copyright (c) 2003-2006 Karl Waclawek
Copyright (c) 2005-2007 Steven Solie
- Copyright (c) 2017-2023 Sebastian Pipping
+ Copyright (c) 2017-2025 Sebastian Pipping
Copyright (c) 2017 Rhodri James
Licensed under the MIT license:
@@ -32,10 +32,18 @@
USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
+#define _POSIX_C_SOURCE 1 // fdopen
+
+#if defined(_MSC_VER)
+# include // _open, _close
+#else
+# include // close
+#endif
+
+#include // open
#include
#include
#include // ptrdiff_t
-#include
#include
#include
#include "expat.h"
@@ -52,17 +60,18 @@
# define XML_FMT_STR "s"
#endif
-static void
+static int
usage(const char *prog, int rc) {
fprintf(stderr, "usage: %s [-n] filename bufferSize nr_of_loops\n", prog);
- exit(rc);
+ return rc;
}
int
main(int argc, char *argv[]) {
XML_Parser parser;
char *XMLBuf, *XMLBufEnd, *XMLBufPtr;
- FILE *fd;
+ int fd;
+ FILE *file;
struct stat fileAttr;
int nrOfLoops, bufferSize, i, isFinal;
size_t fileSize;
@@ -76,34 +85,48 @@
ns = 1;
j = 1;
} else
- usage(argv[0], 1);
+ return usage(argv[0], 1);
}
}
if (argc != j + 4)
- usage(argv[0], 1);
+ return usage(argv[0], 1);
- if (stat(argv[j + 1], &fileAttr) != 0) {
- fprintf(stderr, "could not access file '%s'\n", argv[j + 1]);
+ fd = open(argv[j + 1], O_RDONLY);
+ if (fd == -1) {
+ fprintf(stderr, "could not open file '%s'\n", argv[j + 1]);
return 2;
}
- fd = fopen(argv[j + 1], "r");
- if (! fd) {
- fprintf(stderr, "could not open file '%s'\n", argv[j + 1]);
- exit(2);
+ if (fstat(fd, &fileAttr) != 0) {
+ close(fd);
+ fprintf(stderr, "could not fstat file '%s'\n", argv[j + 1]);
+ return 2;
+ }
+
+ file = fdopen(fd, "r");
+ if (! file) {
+ close(fd);
+ fprintf(stderr, "could not fdopen file '%s'\n", argv[j + 1]);
+ return 2;
}
bufferSize = atoi(argv[j + 2]);
nrOfLoops = atoi(argv[j + 3]);
if (bufferSize <= 0 || nrOfLoops <= 0) {
+ fclose(file); // NOTE: this closes fd as well
fprintf(stderr, "buffer size and nr of loops must be greater than zero.\n");
- exit(3);
+ return 3;
}
XMLBuf = malloc(fileAttr.st_size);
- fileSize = fread(XMLBuf, sizeof(char), fileAttr.st_size, fd);
- fclose(fd);
+ if (XMLBuf == NULL) {
+ fclose(file); // NOTE: this closes fd as well
+ fprintf(stderr, "ouf of memory.\n");
+ return 5;
+ }
+ fileSize = fread(XMLBuf, sizeof(char), fileAttr.st_size, file);
+ fclose(file); // NOTE: this closes fd as well
if (ns)
parser = XML_ParserCreateNS(NULL, '!');
@@ -132,7 +155,7 @@
XML_GetCurrentColumnNumber(parser));
free(XMLBuf);
XML_ParserFree(parser);
- exit(4);
+ return 4;
}
XMLBufPtr += bufferSize;
} while (! isFinal);
--- contrib/expat/tests/common.c.orig
+++ contrib/expat/tests/common.c
@@ -10,7 +10,7 @@
Copyright (c) 2003 Greg Stein
Copyright (c) 2005-2007 Steven Solie
Copyright (c) 2005-2012 Karl Waclawek
- Copyright (c) 2016-2024 Sebastian Pipping
+ Copyright (c) 2016-2025 Sebastian Pipping
Copyright (c) 2017-2022 Rhodri James
Copyright (c) 2017 Joe Orton
Copyright (c) 2017 José Gutiérrez de la Concha
@@ -42,6 +42,8 @@
*/
#include
+#include
+#include // for SIZE_MAX
#include
#include
@@ -202,6 +204,12 @@
for (; len > chunksize; len -= chunksize, s += chunksize) {
enum XML_Status res = XML_Parse(parser, s, chunksize, XML_FALSE);
if (res != XML_STATUS_OK) {
+ if ((res == XML_STATUS_SUSPENDED) && (len > chunksize)) {
+ fail("Use of function _XML_Parse_SINGLE_BYTES with a chunk size "
+ "greater than 0 (from g_chunkSize) does not work well with "
+ "suspension. Please consider use of plain XML_Parse at this "
+ "place in your test, instead.");
+ }
return res;
}
}
@@ -294,3 +302,26 @@
g_reallocation_count--;
return realloc(ptr, size);
}
+
+// Portable remake of strndup(3) for C99; does not care about space efficiency
+char *
+portable_strndup(const char *s, size_t n) {
+ if ((s == NULL) || (n == SIZE_MAX)) {
+ errno = EINVAL;
+ return NULL;
+ }
+
+ char *const buffer = (char *)malloc(n + 1);
+ if (buffer == NULL) {
+ errno = ENOMEM;
+ return NULL;
+ }
+
+ errno = 0;
+
+ memcpy(buffer, s, n);
+
+ buffer[n] = '\0';
+
+ return buffer;
+}
--- contrib/expat/tests/common.h.orig
+++ contrib/expat/tests/common.h
@@ -10,7 +10,7 @@
Copyright (c) 2003 Greg Stein
Copyright (c) 2005-2007 Steven Solie
Copyright (c) 2005-2012 Karl Waclawek
- Copyright (c) 2016-2024 Sebastian Pipping
+ Copyright (c) 2016-2025 Sebastian Pipping
Copyright (c) 2017-2022 Rhodri James
Copyright (c) 2017 Joe Orton
Copyright (c) 2017 José Gutiérrez de la Concha
@@ -146,6 +146,8 @@
extern void *duff_reallocator(void *ptr, size_t size);
+extern char *portable_strndup(const char *s, size_t n);
+
#endif /* XML_COMMON_H */
#ifdef __cplusplus
--- contrib/expat/tests/handlers.c.orig
+++ contrib/expat/tests/handlers.c
@@ -1842,6 +1842,15 @@
XML_FreeContentModel(g_parser, model);
}
+void XMLCALL
+suspend_after_element_declaration(void *userData, const XML_Char *name,
+ XML_Content *model) {
+ UNUSED_P(name);
+ XML_Parser parser = (XML_Parser)userData;
+ assert_true(XML_StopParser(parser, /*resumable*/ XML_TRUE) == XML_STATUS_OK);
+ XML_FreeContentModel(parser, model);
+}
+
void XMLCALL
accumulate_pi_characters(void *userData, const XML_Char *target,
const XML_Char *data) {
@@ -1882,6 +1891,20 @@
CharData_AppendXMLChars(storage, XCS("\n"), 1);
}
+void XMLCALL
+accumulate_char_data_and_suspend(void *userData, const XML_Char *s, int len) {
+ ParserPlusStorage *const parserPlusStorage = (ParserPlusStorage *)userData;
+
+ CharData_AppendXMLChars(parserPlusStorage->storage, s, len);
+
+ for (int i = 0; i < len; i++) {
+ if (s[i] == 'Z') {
+ XML_StopParser(parserPlusStorage->parser, /*resumable=*/XML_TRUE);
+ break;
+ }
+ }
+}
+
void XMLCALL
accumulate_start_element(void *userData, const XML_Char *name,
const XML_Char **atts) {
--- contrib/expat/tests/handlers.h.orig
+++ contrib/expat/tests/handlers.h
@@ -325,6 +325,7 @@
typedef struct ext_hdlr_data {
const char *parse_text;
XML_ExternalEntityRefHandler handler;
+ CharData *storage;
} ExtHdlrData;
extern int XMLCALL external_entity_oneshot_loader(XML_Parser parser,
@@ -557,6 +558,10 @@
extern void XMLCALL element_decl_suspender(void *userData, const XML_Char *name,
XML_Content *model);
+extern void XMLCALL suspend_after_element_declaration(void *userData,
+ const XML_Char *name,
+ XML_Content *model);
+
extern void XMLCALL accumulate_pi_characters(void *userData,
const XML_Char *target,
const XML_Char *data);
@@ -569,6 +574,10 @@
const XML_Char *systemId, const XML_Char *publicId,
const XML_Char *notationName);
+extern void XMLCALL accumulate_char_data_and_suspend(void *userData,
+ const XML_Char *s,
+ int len);
+
extern void XMLCALL accumulate_start_element(void *userData,
const XML_Char *name,
const XML_Char **atts);
--- contrib/expat/tests/minicheck.h.orig
+++ contrib/expat/tests/minicheck.h
@@ -14,7 +14,7 @@
Copyright (c) 2004-2006 Fred L. Drake, Jr.
Copyright (c) 2006-2012 Karl Waclawek
- Copyright (c) 2016-2024 Sebastian Pipping
+ Copyright (c) 2016-2025 Sebastian Pipping
Copyright (c) 2022 Rhodri James
Copyright (c) 2023-2024 Sony Corporation / Snild Dolkow
Licensed under the MIT license:
@@ -129,8 +129,10 @@
* Prototypes for the actual implementation.
*/
-# if defined(__GNUC__)
+# if defined(__has_attribute)
+# if __has_attribute(noreturn)
__attribute__((noreturn))
+# endif
# endif
void
_fail(const char *file, int line, const char *msg);
--- contrib/expat/tests/misc_tests.c.orig
+++ contrib/expat/tests/misc_tests.c
@@ -10,7 +10,7 @@
Copyright (c) 2003 Greg Stein
Copyright (c) 2005-2007 Steven Solie
Copyright (c) 2005-2012 Karl Waclawek
- Copyright (c) 2016-2024 Sebastian Pipping
+ Copyright (c) 2016-2025 Sebastian Pipping
Copyright (c) 2017-2022 Rhodri James
Copyright (c) 2017 Joe Orton
Copyright (c) 2017 José Gutiérrez de la Concha
@@ -59,6 +59,9 @@
#include "handlers.h"
#include "misc_tests.h"
+void XMLCALL accumulate_characters_ext_handler(void *userData,
+ const XML_Char *s, int len);
+
/* Test that a failure to allocate the parser structure fails gracefully */
START_TEST(test_misc_alloc_create_parser) {
XML_Memory_Handling_Suite memsuite = {duff_allocator, realloc, free};
@@ -208,7 +211,7 @@
if (! versions_equal(&read_version, &parsed_version))
fail("Version mismatch");
- if (xcstrcmp(version_text, XCS("expat_2.6.4"))) /* needs bump on releases */
+ if (xcstrcmp(version_text, XCS("expat_2.7.1"))) /* needs bump on releases */
fail("XML_*_VERSION in expat.h out of sync?\n");
}
END_TEST
@@ -294,6 +297,7 @@
parser = XML_ParserCreate(NULL);
XML_SetElementHandler(parser, start_element_issue_240, end_element_issue_240);
mydata = (DataIssue240 *)malloc(sizeof(DataIssue240));
+ assert_true(mydata != NULL);
mydata->parser = parser;
mydata->deep = 0;
XML_SetUserData(parser, mydata);
@@ -315,6 +319,7 @@
parser = XML_ParserCreate(NULL);
XML_SetElementHandler(parser, start_element_issue_240, end_element_issue_240);
mydata = (DataIssue240 *)malloc(sizeof(DataIssue240));
+ assert_true(mydata != NULL);
mydata->parser = parser;
mydata->deep = 0;
XML_SetUserData(parser, mydata);
@@ -328,64 +333,119 @@
END_TEST
START_TEST(test_misc_deny_internal_entity_closing_doctype_issue_317) {
- const char *const inputOne = "'>\n"
- "\n"
- "%e;";
+ const char *const inputOne
+ = "'>\n"
+ "%element_d;\n"
+ "'>\n"
+ "\n"
+ "%e;";
const char *const inputTwo
= "'>\n"
+ "%element_d;\n"
"'>\n"
"\n"
"%e2;";
- const char *const inputThree = "\n"
- "\n"
- "%e;/>";
- const char *const inputIssue317 = "\n"
- "Hell'>\n"
- "%foo;\n"
- "]>\n"
- "Hello, world";
+ const char *const inputThree
+ = "'>\n"
+ "%element_d;\n"
+ "\n"
+ "\n"
+ "%e;/>";
+ const char *const inputIssue317
+ = "'>\n"
+ "%element_doc;\n"
+ "\n"
+ "Hell'>\n"
+ "%foo;\n"
+ "]>\n"
+ "Hello, world";
const char *const inputs[] = {inputOne, inputTwo, inputThree, inputIssue317};
+ const XML_Bool suspendOrNot[] = {XML_FALSE, XML_TRUE};
size_t inputIndex = 0;
for (; inputIndex < sizeof(inputs) / sizeof(inputs[0]); inputIndex++) {
- set_subtest("%s", inputs[inputIndex]);
- XML_Parser parser;
- enum XML_Status parseResult;
- int setParamEntityResult;
- XML_Size lineNumber;
- XML_Size columnNumber;
- const char *const input = inputs[inputIndex];
-
- parser = XML_ParserCreate(NULL);
- setParamEntityResult
- = XML_SetParamEntityParsing(parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
- if (setParamEntityResult != 1)
- fail("Failed to set XML_PARAM_ENTITY_PARSING_ALWAYS.");
-
- parseResult = _XML_Parse_SINGLE_BYTES(parser, input, (int)strlen(input), 0);
- if (parseResult != XML_STATUS_ERROR) {
- parseResult = _XML_Parse_SINGLE_BYTES(parser, "", 0, 1);
+ for (size_t suspendOrNotIndex = 0;
+ suspendOrNotIndex < sizeof(suspendOrNot) / sizeof(suspendOrNot[0]);
+ suspendOrNotIndex++) {
+ const char *const input = inputs[inputIndex];
+ const XML_Bool suspend = suspendOrNot[suspendOrNotIndex];
+ if (suspend && (g_chunkSize > 0)) {
+ // We cannot use _XML_Parse_SINGLE_BYTES below due to suspension, and
+ // so chunk sizes >0 would only repeat the very same test
+ // due to use of plain XML_Parse; we are saving upon that runtime:
+ return;
+ }
+
+ set_subtest("[input=%d suspend=%s] %s", (int)inputIndex,
+ suspend ? "true" : "false", input);
+ XML_Parser parser;
+ enum XML_Status parseResult;
+ int setParamEntityResult;
+ XML_Size lineNumber;
+ XML_Size columnNumber;
+
+ parser = XML_ParserCreate(NULL);
+ setParamEntityResult
+ = XML_SetParamEntityParsing(parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
+ if (setParamEntityResult != 1)
+ fail("Failed to set XML_PARAM_ENTITY_PARSING_ALWAYS.");
+
+ if (suspend) {
+ XML_SetUserData(parser, parser);
+ XML_SetElementDeclHandler(parser, suspend_after_element_declaration);
+ }
+
+ if (suspend) {
+ // can't use SINGLE_BYTES here, because it'll return early on
+ // suspension, and we won't know exactly how much input we actually
+ // managed to give Expat.
+ parseResult = XML_Parse(parser, input, (int)strlen(input), 0);
+
+ while (parseResult == XML_STATUS_SUSPENDED) {
+ parseResult = XML_ResumeParser(parser);
+ }
+
+ if (parseResult != XML_STATUS_ERROR) {
+ // can't use SINGLE_BYTES here, because it'll return early on
+ // suspension, and we won't know exactly how much input we actually
+ // managed to give Expat.
+ parseResult = XML_Parse(parser, "", 0, 1);
+ }
+
+ while (parseResult == XML_STATUS_SUSPENDED) {
+ parseResult = XML_ResumeParser(parser);
+ }
+ } else {
+ parseResult
+ = _XML_Parse_SINGLE_BYTES(parser, input, (int)strlen(input), 0);
+
+ if (parseResult != XML_STATUS_ERROR) {
+ parseResult = _XML_Parse_SINGLE_BYTES(parser, "", 0, 1);
+ }
+ }
+
if (parseResult != XML_STATUS_ERROR) {
fail("Parsing was expected to fail but succeeded.");
}
- }
- if (XML_GetErrorCode(parser) != XML_ERROR_INVALID_TOKEN)
- fail("Error code does not match XML_ERROR_INVALID_TOKEN");
+ if (XML_GetErrorCode(parser) != XML_ERROR_INVALID_TOKEN)
+ fail("Error code does not match XML_ERROR_INVALID_TOKEN");
- lineNumber = XML_GetCurrentLineNumber(parser);
- if (lineNumber != 4)
- fail("XML_GetCurrentLineNumber does not work as expected.");
+ lineNumber = XML_GetCurrentLineNumber(parser);
+ if (lineNumber != 6)
+ fail("XML_GetCurrentLineNumber does not work as expected.");
- columnNumber = XML_GetCurrentColumnNumber(parser);
- if (columnNumber != 0)
- fail("XML_GetCurrentColumnNumber does not work as expected.");
+ columnNumber = XML_GetCurrentColumnNumber(parser);
+ if (columnNumber != 0)
+ fail("XML_GetCurrentColumnNumber does not work as expected.");
- XML_ParserFree(parser);
+ XML_ParserFree(parser);
+ }
}
}
END_TEST
@@ -519,6 +579,105 @@
}
END_TEST
+/* Adaptation of accumulate_characters that takes ExtHdlrData input to work with
+ * test_renter_loop_finite_content below */
+void XMLCALL
+accumulate_characters_ext_handler(void *userData, const XML_Char *s, int len) {
+ ExtHdlrData *const test_data = (ExtHdlrData *)userData;
+ CharData_AppendXMLChars(test_data->storage, s, len);
+}
+
+/* Test that internalEntityProcessor does not re-enter forever;
+ * based on files tests/xmlconf/xmltest/valid/ext-sa/012.{xml,ent} */
+START_TEST(test_renter_loop_finite_content) {
+ CharData storage;
+ CharData_Init(&storage);
+ const char *const text = "\n"
+ "\n"
+ "\n"
+ "\n"
+ "\n"
+ "\n"
+ "]>\n"
+ "&e1;\n";
+ ExtHdlrData test_data = {"&e4;\n", external_entity_null_loader, &storage};
+ const XML_Char *const expected = XCS("(e5)\n");
+
+ XML_Parser parser = XML_ParserCreate(NULL);
+ assert_true(parser != NULL);
+ XML_SetUserData(parser, &test_data);
+ XML_SetExternalEntityRefHandler(parser, external_entity_oneshot_loader);
+ XML_SetCharacterDataHandler(parser, accumulate_characters_ext_handler);
+ if (_XML_Parse_SINGLE_BYTES(parser, text, (int)strlen(text), XML_TRUE)
+ == XML_STATUS_ERROR)
+ xml_failure(parser);
+
+ CharData_CheckXMLChars(&storage, expected);
+ XML_ParserFree(parser);
+}
+END_TEST
+
+// Inspired by function XML_OriginalString of Perl's XML::Parser
+static char *
+dup_original_string(XML_Parser parser) {
+ const int byte_count = XML_GetCurrentByteCount(parser);
+
+ assert_true(byte_count >= 0);
+
+ int offset = -1;
+ int size = -1;
+
+ const char *const context = XML_GetInputContext(parser, &offset, &size);
+
+#if XML_CONTEXT_BYTES > 0
+ assert_true(context != NULL);
+ assert_true(offset >= 0);
+ assert_true(size >= 0);
+ return portable_strndup(context + offset, byte_count);
+#else
+ assert_true(context == NULL);
+ return NULL;
+#endif
+}
+
+static void
+on_characters_issue_980(void *userData, const XML_Char *s, int len) {
+ (void)s;
+ (void)len;
+ XML_Parser parser = (XML_Parser)userData;
+
+ char *const original_string = dup_original_string(parser);
+
+#if XML_CONTEXT_BYTES > 0
+ assert_true(original_string != NULL);
+ assert_true(strcmp(original_string, "&draft.day;") == 0);
+ free(original_string);
+#else
+ assert_true(original_string == NULL);
+#endif
+}
+
+START_TEST(test_misc_expected_event_ptr_issue_980) {
+ // NOTE: This is a tiny subset of sample "REC-xml-19980210.xml"
+ // from Perl's XML::Parser
+ const char *const doc = "\n"
+ "]>\n"
+ "&draft.day;\n";
+
+ XML_Parser parser = XML_ParserCreate(NULL);
+ XML_SetUserData(parser, parser);
+ XML_SetCharacterDataHandler(parser, on_characters_issue_980);
+
+ assert_true(_XML_Parse_SINGLE_BYTES(parser, doc, (int)strlen(doc),
+ /*isFinal=*/XML_TRUE)
+ == XML_STATUS_OK);
+
+ XML_ParserFree(parser);
+}
+END_TEST
+
void
make_miscellaneous_test_case(Suite *s) {
TCase *tc_misc = tcase_create("miscellaneous tests");
@@ -545,4 +704,6 @@
tcase_add_test(tc_misc, test_misc_char_handler_stop_without_leak);
tcase_add_test(tc_misc, test_misc_resumeparser_not_crashing);
tcase_add_test(tc_misc, test_misc_stopparser_rejects_unstarted_parser);
+ tcase_add_test__if_xml_ge(tc_misc, test_renter_loop_finite_content);
+ tcase_add_test(tc_misc, test_misc_expected_event_ptr_issue_980);
}
--- contrib/expat/tests/xmltest.sh.orig
+++ contrib/expat/tests/xmltest.sh
@@ -2,8 +2,8 @@
# EXPAT TEST SCRIPT FOR W3C XML TEST SUITE
#
# This script can be used to exercise Expat against the
-# w3c.org xml test suite, available from
-# http://www.w3.org/XML/Test/xmlts20020606.zip.
+# w3c.org xml test suite, available from:
+# https://www.w3.org/XML/Test/xmlts20020606.zip
#
# To run this script, first set XMLWF below so that xmlwf can be
# found, then set the output directory with OUTPUT.
@@ -30,6 +30,7 @@
# Copyright (c) 2002 Karl Waclawek
# Copyright (c) 2008-2019 Sebastian Pipping
# Copyright (c) 2017 Rhodri James
+# Copyright (c) 2025 Hanno Böck
# Licensed under the MIT license:
#
# Permission is hereby granted, free of charge, to any person obtaining
--- contrib/expat/xmlwf/readfilemap.c.orig
+++ contrib/expat/xmlwf/readfilemap.c
@@ -14,6 +14,7 @@
Copyright (c) 2017 Rhodri James
Copyright (c) 2017 Franek Korta
Copyright (c) 2022 Sean McBride
+ Copyright (c) 2025 Hanno Böck
Licensed under the MIT license:
Permission is hereby granted, free of charge, to any person obtaining
@@ -55,7 +56,7 @@
# define EXPAT_read_count_t int
# define EXPAT_read_req_t unsigned int
#else /* POSIX */
-/* http://pubs.opengroup.org/onlinepubs/009695399/functions/read.html */
+/* https://pubs.opengroup.org/onlinepubs/009695399/functions/read.html */
# define EXPAT_read read
# define EXPAT_read_count_t ssize_t
# define EXPAT_read_req_t size_t
--- lib/libexpat/expat_config.h.orig
+++ lib/libexpat/expat_config.h
@@ -89,7 +89,7 @@
#define PACKAGE_NAME "expat"
/* Define to the full name and version of this package. */
-#define PACKAGE_STRING "expat 2.6.4"
+#define PACKAGE_STRING "expat 2.7.1"
/* Define to the one symbol short name of this package. */
#define PACKAGE_TARNAME "expat"
@@ -98,7 +98,7 @@
#define PACKAGE_URL ""
/* Define to the version of this package. */
-#define PACKAGE_VERSION "2.6.4"
+#define PACKAGE_VERSION "2.7.1"
/* Define to 1 if all of the C90 standard headers exist (not just the ones
required in a freestanding environment). This macro is provided for
@@ -106,7 +106,7 @@
#define STDC_HEADERS 1
/* Version number of package */
-#define VERSION "2.6.4"
+#define VERSION "2.7.1"
/* Define WORDS_BIGENDIAN to 1 if your processor stores words with the most
significant byte first (like Motorola and SPARC, unlike Intel). */
--- lib/libexpat/libbsdxml.3.orig
+++ lib/libexpat/libbsdxml.3
@@ -23,7 +23,7 @@
.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
.\" SUCH DAMAGE.
.\"/
-.Dd December 8, 2024
+.Dd April 7, 2025
.Dt LIBBSDXML 3
.Os
.Sh NAME
@@ -34,7 +34,7 @@
.Sh DESCRIPTION
The
.Nm
-library is a verbatim copy of the eXpat XML library version 2.6.4.
+library is a verbatim copy of the eXpat XML library version 2.7.1.
.Pp
The
.Nm