@@ -319,7 +319,7 @@
Developer Studio installed,
you can use CMake to generate a
.sln
file, e.g.
-cmake -G"Visual Studio 15 2017" -DCMAKE_BUILD_TYPE=RelWithDebInfo .
+cmake -G"Visual Studio 16 2019" -DCMAKE_BUILD_TYPE=RelWithDebInfo .
, and build Expat using
msbuild /m expat.sln
after.
Alternatively, you may download the Win32 binary package that
@@ -356,10 +356,7 @@
Configuring Expat Using the Pre-Processor
Expat's feature set can be configured using a small number of
-pre-processor definitions. The definition of this symbols does not
-affect the set of entry points for Expat, only the behavior of the API
-and the definition of character types in the case of
-XML_UNICODE_WCHAR_T
. The symbols are:
+pre-processor definitions. The symbols are:
- XML_GE
@@ -1138,7 +1135,9 @@
that are part of the document is indicated by len
. This means
that s
doesn't have to be null-terminated. It also means that
if len
is larger than the number of bytes in the block of
-memory that s
points at, then a memory fault is likely. The
+memory that s
points at, then a memory fault is likely.
+Negative values for len
are rejected since Expat 2.2.1.
+The
isFinal
parameter informs the parser that this is the last
piece of the document. Frequently, the last piece is empty (i.e.
len
is zero.)
@@ -1186,11 +1185,17 @@
int isFinal);
+
This is just like XML_Parse
,
except in this case Expat provides the buffer. By obtaining the
buffer from Expat with the XML_GetBuffer
function, the application can avoid double
copying of the input.
+
+
+
+Negative values for len
are rejected since Expat 2.6.3.
+
XML_GetBuffer
@@ -1262,6 +1267,11 @@
XML_STATUS_ERROR
otherwise. The possible error codes
are:
+ XML_ERROR_NOT_STARTED
+ -
+ when stopping or suspending a parser before it has started,
+ added in Expat 2.6.4.
+
XML_ERROR_SUSPENDED
- when suspending an already suspended parser.
XML_ERROR_FINISHED
--- contrib/expat/doc/xmlwf.1.orig
+++ contrib/expat/doc/xmlwf.1
@@ -5,7 +5,7 @@
\\$2 \(la\\$1\(ra\\$3
..
.if \n(.g .mso www.tmac
-.TH XMLWF 1 "February 6, 2024" "" ""
+.TH XMLWF 1 "March 27, 2025" "" ""
.SH NAME
xmlwf \- Determines if an XML document is well-formed
.SH SYNOPSIS
--- contrib/expat/doc/xmlwf.xml.orig
+++ contrib/expat/doc/xmlwf.xml
@@ -9,7 +9,7 @@
Copyright (c) 2001 Scott Bronson
Copyright (c) 2002-2003 Fred L. Drake, Jr.
Copyright (c) 2009 Karl Waclawek
- Copyright (c) 2016-2024 Sebastian Pipping
+ Copyright (c) 2016-2025 Sebastian Pipping
Copyright (c) 2016 Ardo van Rangelrooij
Copyright (c) 2017 Rhodri James
Copyright (c) 2020 Joe Orton
@@ -21,7 +21,7 @@
"http://www.oasis-open.org/docbook/xml/4.2/docbookx.dtd" [
Scott">
Bronson">
- February 6, 2024">
+ March 27, 2025">
1">
bronson@rinspin.com">
--- contrib/expat/examples/Makefile.in.orig
+++ contrib/expat/examples/Makefile.in
@@ -313,6 +313,7 @@
SED = @SED@
SET_MAKE = @SET_MAKE@
SHELL = @SHELL@
+SIZEOF_VOID_P = @SIZEOF_VOID_P@
SO_MAJOR = @SO_MAJOR@
SO_MINOR = @SO_MINOR@
SO_PATCH = @SO_PATCH@
@@ -326,7 +327,6 @@
ac_ct_CC = @ac_ct_CC@
ac_ct_CXX = @ac_ct_CXX@
ac_ct_DUMPBIN = @ac_ct_DUMPBIN@
-ac_cv_sizeof_void_p = @ac_cv_sizeof_void_p@
am__include = @am__include@
am__leading_dot = @am__leading_dot@
am__quote = @am__quote@
--- contrib/expat/examples/element_declarations.c.orig
+++ contrib/expat/examples/element_declarations.c
@@ -15,6 +15,7 @@
Copyright (c) 2016-2024 Sebastian Pipping
Copyright (c) 2017 Rhodri James
Copyright (c) 2019 Zhongyuan Zhou
+ Copyright (c) 2024 Hanno Böck
Licensed under the MIT license:
Permission is hereby granted, free of charge, to any person obtaining
@@ -127,15 +128,15 @@
}
// Node
- printf("[%u] type=%s(%d), quant=%s(%d)", (unsigned)(model - root),
- contentTypeName(model->type), model->type,
- contentQuantName(model->quant), model->quant);
+ printf("[%u] type=%s(%u), quant=%s(%u)", (unsigned)(model - root),
+ contentTypeName(model->type), (unsigned int)model->type,
+ contentQuantName(model->quant), (unsigned int)model->quant);
if (model->name) {
printf(", name=\"%" XML_FMT_STR "\"", model->name);
} else {
printf(", name=NULL");
}
- printf(", numchildren=%d", model->numchildren);
+ printf(", numchildren=%u", model->numchildren);
printf("\n");
}
--- contrib/expat/expat_config.h.in.orig
+++ contrib/expat/expat_config.h.in
@@ -139,7 +139,4 @@
/* Define to `long int' if does not define. */
#undef off_t
-/* Define to `unsigned int' if does not define. */
-#undef size_t
-
#endif // ndef EXPAT_CONFIG_H
--- contrib/expat/fix-xmltest-log.sh.orig
+++ contrib/expat/fix-xmltest-log.sh
@@ -7,6 +7,7 @@
# |_| XML parser
#
# Copyright (c) 2019-2022 Sebastian Pipping
+# Copyright (c) 2024 Dag-Erling Smørgrav
# Licensed under the MIT license:
#
# Permission is hereby granted, free of charge, to any person obtaining
@@ -32,10 +33,10 @@
filename="${1:-tests/xmltest.log}"
-dos2unix "${filename}"
-
-tempfile="$(mktemp)"
-sed \
+sed -i.bak \
+ -e '# convert DOS line endings to Unix without resorting to dos2unix' \
+ -e $'s/\r//' \
+ \
-e 's/^wine: Call .* msvcrt\.dll\._wperror, aborting$/ibm49i02.dtd: No such file or directory/' \
\
-e '/^wine: /d' \
@@ -46,5 +47,4 @@
-e '/^wine client error:/d' \
-e '/^In ibm\/invalid\/P49\/: Unhandled exception: unimplemented .\+/d' \
\
- "${filename}" > "${tempfile}"
-mv "${tempfile}" "${filename}"
+ "${filename}"
--- /dev/null
+++ contrib/expat/fuzz/xml_lpm_fuzzer.cpp
@@ -0,0 +1,464 @@
+/*
+ __ __ _
+ ___\ \/ /_ __ __ _| |_
+ / _ \\ /| '_ \ / _` | __|
+ | __// \| |_) | (_| | |_
+ \___/_/\_\ .__/ \__,_|\__|
+ |_| XML parser
+
+ Copyright (c) 2022 Mark Brand
+ Copyright (c) 2025 Sebastian Pipping
+ Licensed under the MIT license:
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to permit
+ persons to whom the Software is furnished to do so, subject to the
+ following conditions:
+
+ The above copyright notice and this permission notice shall be included
+ in all copies or substantial portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN
+ NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
+ DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ USE OR OTHER DEALINGS IN THE SOFTWARE.
+*/
+
+#if defined(NDEBUG)
+# undef NDEBUG // because checks below rely on assert(...)
+#endif
+
+#include
+#include
+#include
+
+#include "expat.h"
+#include "xml_lpm_fuzzer.pb.h"
+#include "src/libfuzzer/libfuzzer_macro.h"
+
+static const char *g_encoding = nullptr;
+static const char *g_external_entity = nullptr;
+static size_t g_external_entity_size = 0;
+
+void
+SetEncoding(const xml_lpm_fuzzer::Encoding &e) {
+ switch (e) {
+ case xml_lpm_fuzzer::Encoding::UTF8:
+ g_encoding = "UTF-8";
+ break;
+
+ case xml_lpm_fuzzer::Encoding::UTF16:
+ g_encoding = "UTF-16";
+ break;
+
+ case xml_lpm_fuzzer::Encoding::ISO88591:
+ g_encoding = "ISO-8859-1";
+ break;
+
+ case xml_lpm_fuzzer::Encoding::ASCII:
+ g_encoding = "US-ASCII";
+ break;
+
+ case xml_lpm_fuzzer::Encoding::NONE:
+ g_encoding = NULL;
+ break;
+
+ default:
+ g_encoding = "UNKNOWN";
+ break;
+ }
+}
+
+static int g_allocation_count = 0;
+static std::vector g_fail_allocations = {};
+
+void *
+MallocHook(size_t size) {
+ g_allocation_count += 1;
+ for (auto index : g_fail_allocations) {
+ if (index == g_allocation_count) {
+ return NULL;
+ }
+ }
+ return malloc(size);
+}
+
+void *
+ReallocHook(void *ptr, size_t size) {
+ g_allocation_count += 1;
+ for (auto index : g_fail_allocations) {
+ if (index == g_allocation_count) {
+ return NULL;
+ }
+ }
+ return realloc(ptr, size);
+}
+
+void
+FreeHook(void *ptr) {
+ free(ptr);
+}
+
+XML_Memory_Handling_Suite memory_handling_suite
+ = {MallocHook, ReallocHook, FreeHook};
+
+void InitializeParser(XML_Parser parser);
+
+// We want a parse function that supports resumption, so that we can cover the
+// suspend/resume code.
+enum XML_Status
+Parse(XML_Parser parser, const char *input, int input_len, int is_final) {
+ enum XML_Status status = XML_Parse(parser, input, input_len, is_final);
+ while (status == XML_STATUS_SUSPENDED) {
+ status = XML_ResumeParser(parser);
+ }
+ return status;
+}
+
+// When the fuzzer is compiled with instrumentation such as ASan, then the
+// accesses in TouchString will fault if they access invalid memory (ie. detect
+// either a use-after-free or buffer-overflow). By calling TouchString in each
+// of the callbacks, we can check that the arguments meet the API specifications
+// in terms of length/null-termination. no_optimize is used to ensure that the
+// compiler has to emit actual memory reads, instead of removing them.
+static volatile size_t no_optimize = 0;
+static void
+TouchString(const XML_Char *ptr, int len = -1) {
+ if (! ptr) {
+ return;
+ }
+
+ if (len == -1) {
+ for (XML_Char value = *ptr++; value; value = *ptr++) {
+ no_optimize += value;
+ }
+ } else {
+ for (int i = 0; i < len; ++i) {
+ no_optimize += ptr[i];
+ }
+ }
+}
+
+static void
+TouchNodeAndRecurse(XML_Content *content) {
+ switch (content->type) {
+ case XML_CTYPE_EMPTY:
+ case XML_CTYPE_ANY:
+ assert(content->quant == XML_CQUANT_NONE);
+ assert(content->name == NULL);
+ assert(content->numchildren == 0);
+ assert(content->children == NULL);
+ break;
+
+ case XML_CTYPE_MIXED:
+ assert(content->quant == XML_CQUANT_NONE
+ || content->quant == XML_CQUANT_REP);
+ assert(content->name == NULL);
+ for (unsigned int i = 0; i < content->numchildren; ++i) {
+ assert(content->children[i].type == XML_CTYPE_NAME);
+ assert(content->children[i].quant == XML_CQUANT_NONE);
+ assert(content->children[i].numchildren == 0);
+ assert(content->children[i].children == NULL);
+ TouchString(content->children[i].name);
+ }
+ break;
+
+ case XML_CTYPE_NAME:
+ assert((content->quant == XML_CQUANT_NONE)
+ || (content->quant == XML_CQUANT_OPT)
+ || (content->quant == XML_CQUANT_REP)
+ || (content->quant == XML_CQUANT_PLUS));
+ assert(content->numchildren == 0);
+ assert(content->children == NULL);
+ TouchString(content->name);
+ break;
+
+ case XML_CTYPE_CHOICE:
+ case XML_CTYPE_SEQ:
+ assert((content->quant == XML_CQUANT_NONE)
+ || (content->quant == XML_CQUANT_OPT)
+ || (content->quant == XML_CQUANT_REP)
+ || (content->quant == XML_CQUANT_PLUS));
+ assert(content->name == NULL);
+ for (unsigned int i = 0; i < content->numchildren; ++i) {
+ TouchNodeAndRecurse(&content->children[i]);
+ }
+ break;
+
+ default:
+ assert(false);
+ }
+}
+
+static void XMLCALL
+ElementDeclHandler(void *userData, const XML_Char *name, XML_Content *model) {
+ TouchString(name);
+ TouchNodeAndRecurse(model);
+ XML_FreeContentModel((XML_Parser)userData, model);
+}
+
+static void XMLCALL
+AttlistDeclHandler(void *userData, const XML_Char *elname,
+ const XML_Char *attname, const XML_Char *atttype,
+ const XML_Char *dflt, int isrequired) {
+ (void)userData;
+ TouchString(elname);
+ TouchString(attname);
+ TouchString(atttype);
+ TouchString(dflt);
+ (void)isrequired;
+}
+
+static void XMLCALL
+XmlDeclHandler(void *userData, const XML_Char *version,
+ const XML_Char *encoding, int standalone) {
+ (void)userData;
+ TouchString(version);
+ TouchString(encoding);
+ (void)standalone;
+}
+
+static void XMLCALL
+StartElementHandler(void *userData, const XML_Char *name,
+ const XML_Char **atts) {
+ (void)userData;
+ TouchString(name);
+ for (size_t i = 0; atts[i] != NULL; ++i) {
+ TouchString(atts[i]);
+ }
+}
+
+static void XMLCALL
+EndElementHandler(void *userData, const XML_Char *name) {
+ (void)userData;
+ TouchString(name);
+}
+
+static void XMLCALL
+CharacterDataHandler(void *userData, const XML_Char *s, int len) {
+ (void)userData;
+ TouchString(s, len);
+}
+
+static void XMLCALL
+ProcessingInstructionHandler(void *userData, const XML_Char *target,
+ const XML_Char *data) {
+ (void)userData;
+ TouchString(target);
+ TouchString(data);
+}
+
+static void XMLCALL
+CommentHandler(void *userData, const XML_Char *data) {
+ TouchString(data);
+ // Use the comment handler to trigger parser suspend, so that we can get
+ // coverage of that code.
+ XML_StopParser((XML_Parser)userData, XML_TRUE);
+}
+
+static void XMLCALL
+StartCdataSectionHandler(void *userData) {
+ (void)userData;
+}
+
+static void XMLCALL
+EndCdataSectionHandler(void *userData) {
+ (void)userData;
+}
+
+static void XMLCALL
+DefaultHandler(void *userData, const XML_Char *s, int len) {
+ (void)userData;
+ TouchString(s, len);
+}
+
+static void XMLCALL
+StartDoctypeDeclHandler(void *userData, const XML_Char *doctypeName,
+ const XML_Char *sysid, const XML_Char *pubid,
+ int has_internal_subset) {
+ (void)userData;
+ TouchString(doctypeName);
+ TouchString(sysid);
+ TouchString(pubid);
+ (void)has_internal_subset;
+}
+
+static void XMLCALL
+EndDoctypeDeclHandler(void *userData) {
+ (void)userData;
+}
+
+static void XMLCALL
+EntityDeclHandler(void *userData, const XML_Char *entityName,
+ int is_parameter_entity, const XML_Char *value,
+ int value_length, const XML_Char *base,
+ const XML_Char *systemId, const XML_Char *publicId,
+ const XML_Char *notationName) {
+ (void)userData;
+ TouchString(entityName);
+ (void)is_parameter_entity;
+ TouchString(value, value_length);
+ TouchString(base);
+ TouchString(systemId);
+ TouchString(publicId);
+ TouchString(notationName);
+}
+
+static void XMLCALL
+NotationDeclHandler(void *userData, const XML_Char *notationName,
+ const XML_Char *base, const XML_Char *systemId,
+ const XML_Char *publicId) {
+ (void)userData;
+ TouchString(notationName);
+ TouchString(base);
+ TouchString(systemId);
+ TouchString(publicId);
+}
+
+static void XMLCALL
+StartNamespaceDeclHandler(void *userData, const XML_Char *prefix,
+ const XML_Char *uri) {
+ (void)userData;
+ TouchString(prefix);
+ TouchString(uri);
+}
+
+static void XMLCALL
+EndNamespaceDeclHandler(void *userData, const XML_Char *prefix) {
+ (void)userData;
+ TouchString(prefix);
+}
+
+static int XMLCALL
+NotStandaloneHandler(void *userData) {
+ (void)userData;
+ return XML_STATUS_OK;
+}
+
+static int XMLCALL
+ExternalEntityRefHandler(XML_Parser parser, const XML_Char *context,
+ const XML_Char *base, const XML_Char *systemId,
+ const XML_Char *publicId) {
+ int rc = XML_STATUS_ERROR;
+ TouchString(context);
+ TouchString(base);
+ TouchString(systemId);
+ TouchString(publicId);
+
+ if (g_external_entity) {
+ XML_Parser ext_parser
+ = XML_ExternalEntityParserCreate(parser, context, g_encoding);
+ rc = Parse(ext_parser, g_external_entity, g_external_entity_size, 1);
+ XML_ParserFree(ext_parser);
+ }
+
+ return rc;
+}
+
+static void XMLCALL
+SkippedEntityHandler(void *userData, const XML_Char *entityName,
+ int is_parameter_entity) {
+ (void)userData;
+ TouchString(entityName);
+ (void)is_parameter_entity;
+}
+
+static int XMLCALL
+UnknownEncodingHandler(void *encodingHandlerData, const XML_Char *name,
+ XML_Encoding *info) {
+ (void)encodingHandlerData;
+ TouchString(name);
+ (void)info;
+ return XML_STATUS_ERROR;
+}
+
+void
+InitializeParser(XML_Parser parser) {
+ XML_SetUserData(parser, (void *)parser);
+ XML_SetHashSalt(parser, 0x41414141);
+ XML_SetParamEntityParsing(parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
+
+ XML_SetElementDeclHandler(parser, ElementDeclHandler);
+ XML_SetAttlistDeclHandler(parser, AttlistDeclHandler);
+ XML_SetXmlDeclHandler(parser, XmlDeclHandler);
+ XML_SetElementHandler(parser, StartElementHandler, EndElementHandler);
+ XML_SetCharacterDataHandler(parser, CharacterDataHandler);
+ XML_SetProcessingInstructionHandler(parser, ProcessingInstructionHandler);
+ XML_SetCommentHandler(parser, CommentHandler);
+ XML_SetCdataSectionHandler(parser, StartCdataSectionHandler,
+ EndCdataSectionHandler);
+ // XML_SetDefaultHandler disables entity expansion
+ XML_SetDefaultHandlerExpand(parser, DefaultHandler);
+ XML_SetDoctypeDeclHandler(parser, StartDoctypeDeclHandler,
+ EndDoctypeDeclHandler);
+ // Note: This is mutually exclusive with XML_SetUnparsedEntityDeclHandler,
+ // and there isn't any significant code change between the two.
+ XML_SetEntityDeclHandler(parser, EntityDeclHandler);
+ XML_SetNotationDeclHandler(parser, NotationDeclHandler);
+ XML_SetNamespaceDeclHandler(parser, StartNamespaceDeclHandler,
+ EndNamespaceDeclHandler);
+ XML_SetNotStandaloneHandler(parser, NotStandaloneHandler);
+ XML_SetExternalEntityRefHandler(parser, ExternalEntityRefHandler);
+ XML_SetSkippedEntityHandler(parser, SkippedEntityHandler);
+ XML_SetUnknownEncodingHandler(parser, UnknownEncodingHandler, (void *)parser);
+}
+
+DEFINE_TEXT_PROTO_FUZZER(const xml_lpm_fuzzer::Testcase &testcase) {
+ g_external_entity = nullptr;
+
+ if (! testcase.actions_size()) {
+ return;
+ }
+
+ g_allocation_count = 0;
+ g_fail_allocations.clear();
+ for (int i = 0; i < testcase.fail_allocations_size(); ++i) {
+ g_fail_allocations.push_back(testcase.fail_allocations(i));
+ }
+
+ SetEncoding(testcase.encoding());
+ XML_Parser parser
+ = XML_ParserCreate_MM(g_encoding, &memory_handling_suite, "|");
+ InitializeParser(parser);
+
+ for (int i = 0; i < testcase.actions_size(); ++i) {
+ const auto &action = testcase.actions(i);
+ switch (action.action_case()) {
+ case xml_lpm_fuzzer::Action::kChunk:
+ if (XML_STATUS_ERROR
+ == Parse(parser, action.chunk().data(), action.chunk().size(), 0)) {
+ // Force a reset after parse error.
+ XML_ParserReset(parser, g_encoding);
+ InitializeParser(parser);
+ }
+ break;
+
+ case xml_lpm_fuzzer::Action::kLastChunk:
+ Parse(parser, action.last_chunk().data(), action.last_chunk().size(), 1);
+ XML_ParserReset(parser, g_encoding);
+ InitializeParser(parser);
+ break;
+
+ case xml_lpm_fuzzer::Action::kReset:
+ XML_ParserReset(parser, g_encoding);
+ InitializeParser(parser);
+ break;
+
+ case xml_lpm_fuzzer::Action::kExternalEntity:
+ g_external_entity = action.external_entity().data();
+ g_external_entity_size = action.external_entity().size();
+ break;
+
+ default:
+ break;
+ }
+ }
+
+ XML_ParserFree(parser);
+}
--- /dev/null
+++ contrib/expat/fuzz/xml_lpm_fuzzer.proto
@@ -0,0 +1,58 @@
+/*
+ __ __ _
+ ___\ \/ /_ __ __ _| |_
+ / _ \\ /| '_ \ / _` | __|
+ | __// \| |_) | (_| | |_
+ \___/_/\_\ .__/ \__,_|\__|
+ |_| XML parser
+
+ Copyright (c) 2022 Mark Brand
+ Copyright (c) 2025 Sebastian Pipping
+ Licensed under the MIT license:
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to permit
+ persons to whom the Software is furnished to do so, subject to the
+ following conditions:
+
+ The above copyright notice and this permission notice shall be included
+ in all copies or substantial portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN
+ NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
+ DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ USE OR OTHER DEALINGS IN THE SOFTWARE.
+*/
+
+syntax = "proto2";
+package xml_lpm_fuzzer;
+
+enum Encoding {
+ UTF8 = 0;
+ UTF16 = 1;
+ ISO88591 = 2;
+ ASCII = 3;
+ UNKNOWN = 4;
+ NONE = 5;
+}
+
+message Action {
+ oneof action {
+ string chunk = 1;
+ string last_chunk = 2;
+ bool reset = 3;
+ string external_entity = 4;
+ }
+}
+
+message Testcase {
+ required Encoding encoding = 1;
+ repeated Action actions = 2;
+ repeated int32 fail_allocations = 3;
+}
--- contrib/expat/fuzz/xml_parse_fuzzer.c.orig
+++ contrib/expat/fuzz/xml_parse_fuzzer.c
@@ -5,7 +5,7 @@
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
- * http://www.apache.org/licenses/LICENSE-2.0
+ * https://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
--- contrib/expat/fuzz/xml_parsebuffer_fuzzer.c.orig
+++ contrib/expat/fuzz/xml_parsebuffer_fuzzer.c
@@ -5,7 +5,7 @@
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
- * http://www.apache.org/licenses/LICENSE-2.0
+ * https://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
--- contrib/expat/lib/Makefile.am.orig
+++ contrib/expat/lib/Makefile.am
@@ -6,7 +6,7 @@
# \___/_/\_\ .__/ \__,_|\__|
# |_| XML parser
#
-# Copyright (c) 2017-2022 Sebastian Pipping
+# Copyright (c) 2017-2024 Sebastian Pipping
# Copyright (c) 2017 Tomasz Kłoczko
# Copyright (c) 2019 David Loffredo
# Licensed under the MIT license:
@@ -36,7 +36,9 @@
expat_external.h
lib_LTLIBRARIES = libexpat.la
-noinst_LTLIBRARIES = libexpatinternal.la
+if WITH_TESTS
+noinst_LTLIBRARIES = libtestpat.la
+endif
libexpat_la_LDFLAGS = \
@AM_LDFLAGS@ \
@@ -44,17 +46,16 @@
-no-undefined \
-version-info @LIBCURRENT@:@LIBREVISION@:@LIBAGE@
-libexpat_la_SOURCES =
-
-# This layer of indirection allows
-# the test suite to access internal symbols
-# despite compiling with -fvisibility=hidden
-libexpatinternal_la_SOURCES = \
+libexpat_la_SOURCES = \
xmlparse.c \
xmltok.c \
xmlrole.c
-libexpat_la_LIBADD = libexpatinternal.la
+if WITH_TESTS
+libtestpat_la_CPPFLAGS = -DXML_TESTING
+
+libtestpat_la_SOURCES = $(libexpat_la_SOURCES)
+endif
doc_DATA = \
../AUTHORS \
--- contrib/expat/lib/Makefile.in.orig
+++ contrib/expat/lib/Makefile.in
@@ -22,7 +22,7 @@
# \___/_/\_\ .__/ \__,_|\__|
# |_| XML parser
#
-# Copyright (c) 2017-2022 Sebastian Pipping
+# Copyright (c) 2017-2024 Sebastian Pipping
# Copyright (c) 2017 Tomasz Kłoczko
# Copyright (c) 2019 David Loffredo
# Licensed under the MIT license:
@@ -176,8 +176,8 @@
am__installdirs = "$(DESTDIR)$(libdir)" "$(DESTDIR)$(docdir)" \
"$(DESTDIR)$(includedir)"
LTLIBRARIES = $(lib_LTLIBRARIES) $(noinst_LTLIBRARIES)
-libexpat_la_DEPENDENCIES = libexpatinternal.la
-am_libexpat_la_OBJECTS =
+libexpat_la_LIBADD =
+am_libexpat_la_OBJECTS = xmlparse.lo xmltok.lo xmlrole.lo
libexpat_la_OBJECTS = $(am_libexpat_la_OBJECTS)
AM_V_lt = $(am__v_lt_@AM_V@)
am__v_lt_ = $(am__v_lt_@AM_DEFAULT_V@)
@@ -186,9 +186,13 @@
libexpat_la_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \
$(LIBTOOLFLAGS) --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) \
$(libexpat_la_LDFLAGS) $(LDFLAGS) -o $@
-libexpatinternal_la_LIBADD =
-am_libexpatinternal_la_OBJECTS = xmlparse.lo xmltok.lo xmlrole.lo
-libexpatinternal_la_OBJECTS = $(am_libexpatinternal_la_OBJECTS)
+libtestpat_la_LIBADD =
+am__libtestpat_la_SOURCES_DIST = xmlparse.c xmltok.c xmlrole.c
+am__objects_1 = libtestpat_la-xmlparse.lo libtestpat_la-xmltok.lo \
+ libtestpat_la-xmlrole.lo
+@WITH_TESTS_TRUE@am_libtestpat_la_OBJECTS = $(am__objects_1)
+libtestpat_la_OBJECTS = $(am_libtestpat_la_OBJECTS)
+@WITH_TESTS_TRUE@am_libtestpat_la_rpath =
AM_V_P = $(am__v_P_@AM_V@)
am__v_P_ = $(am__v_P_@AM_DEFAULT_V@)
am__v_P_0 = false
@@ -204,8 +208,10 @@
DEFAULT_INCLUDES = -I.@am__isrc@ -I$(top_builddir)
depcomp = $(SHELL) $(top_srcdir)/conftools/depcomp
am__maybe_remake_depfiles = depfiles
-am__depfiles_remade = ./$(DEPDIR)/xmlparse.Plo ./$(DEPDIR)/xmlrole.Plo \
- ./$(DEPDIR)/xmltok.Plo
+am__depfiles_remade = ./$(DEPDIR)/libtestpat_la-xmlparse.Plo \
+ ./$(DEPDIR)/libtestpat_la-xmlrole.Plo \
+ ./$(DEPDIR)/libtestpat_la-xmltok.Plo ./$(DEPDIR)/xmlparse.Plo \
+ ./$(DEPDIR)/xmlrole.Plo ./$(DEPDIR)/xmltok.Plo
am__mv = mv -f
COMPILE = $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) \
$(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS)
@@ -225,8 +231,9 @@
am__v_CCLD_ = $(am__v_CCLD_@AM_DEFAULT_V@)
am__v_CCLD_0 = @echo " CCLD " $@;
am__v_CCLD_1 =
-SOURCES = $(libexpat_la_SOURCES) $(libexpatinternal_la_SOURCES)
-DIST_SOURCES = $(libexpat_la_SOURCES) $(libexpatinternal_la_SOURCES)
+SOURCES = $(libexpat_la_SOURCES) $(libtestpat_la_SOURCES)
+DIST_SOURCES = $(libexpat_la_SOURCES) \
+ $(am__libtestpat_la_SOURCES_DIST)
am__can_run_installinfo = \
case $$AM_UPDATE_INFO_DIR in \
n|no|NO) false;; \
@@ -344,6 +351,7 @@
SED = @SED@
SET_MAKE = @SET_MAKE@
SHELL = @SHELL@
+SIZEOF_VOID_P = @SIZEOF_VOID_P@
SO_MAJOR = @SO_MAJOR@
SO_MINOR = @SO_MINOR@
SO_PATCH = @SO_PATCH@
@@ -357,7 +365,6 @@
ac_ct_CC = @ac_ct_CC@
ac_ct_CXX = @ac_ct_CXX@
ac_ct_DUMPBIN = @ac_ct_DUMPBIN@
-ac_cv_sizeof_void_p = @ac_cv_sizeof_void_p@
am__include = @am__include@
am__leading_dot = @am__leading_dot@
am__quote = @am__quote@
@@ -410,24 +417,20 @@
expat_external.h
lib_LTLIBRARIES = libexpat.la
-noinst_LTLIBRARIES = libexpatinternal.la
+@WITH_TESTS_TRUE@noinst_LTLIBRARIES = libtestpat.la
libexpat_la_LDFLAGS = \
@AM_LDFLAGS@ \
@LIBM@ \
-no-undefined \
-version-info @LIBCURRENT@:@LIBREVISION@:@LIBAGE@
-libexpat_la_SOURCES =
-
-# This layer of indirection allows
-# the test suite to access internal symbols
-# despite compiling with -fvisibility=hidden
-libexpatinternal_la_SOURCES = \
+libexpat_la_SOURCES = \
xmlparse.c \
xmltok.c \
xmlrole.c
-libexpat_la_LIBADD = libexpatinternal.la
+@WITH_TESTS_TRUE@libtestpat_la_CPPFLAGS = -DXML_TESTING
+@WITH_TESTS_TRUE@libtestpat_la_SOURCES = $(libexpat_la_SOURCES)
doc_DATA = \
../AUTHORS \
../Changes
@@ -534,8 +537,8 @@
libexpat.la: $(libexpat_la_OBJECTS) $(libexpat_la_DEPENDENCIES) $(EXTRA_libexpat_la_DEPENDENCIES)
$(AM_V_CCLD)$(libexpat_la_LINK) -rpath $(libdir) $(libexpat_la_OBJECTS) $(libexpat_la_LIBADD) $(LIBS)
-libexpatinternal.la: $(libexpatinternal_la_OBJECTS) $(libexpatinternal_la_DEPENDENCIES) $(EXTRA_libexpatinternal_la_DEPENDENCIES)
- $(AM_V_CCLD)$(LINK) $(libexpatinternal_la_OBJECTS) $(libexpatinternal_la_LIBADD) $(LIBS)
+libtestpat.la: $(libtestpat_la_OBJECTS) $(libtestpat_la_DEPENDENCIES) $(EXTRA_libtestpat_la_DEPENDENCIES)
+ $(AM_V_CCLD)$(LINK) $(am_libtestpat_la_rpath) $(libtestpat_la_OBJECTS) $(libtestpat_la_LIBADD) $(LIBS)
mostlyclean-compile:
-rm -f *.$(OBJEXT)
@@ -543,6 +546,9 @@
distclean-compile:
-rm -f *.tab.c
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libtestpat_la-xmlparse.Plo@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libtestpat_la-xmlrole.Plo@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libtestpat_la-xmltok.Plo@am__quote@ # am--include-marker
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/xmlparse.Plo@am__quote@ # am--include-marker
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/xmlrole.Plo@am__quote@ # am--include-marker
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/xmltok.Plo@am__quote@ # am--include-marker
@@ -574,6 +580,27 @@
@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LTCOMPILE) -c -o $@ $<
+libtestpat_la-xmlparse.lo: xmlparse.c
+@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libtestpat_la_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT libtestpat_la-xmlparse.lo -MD -MP -MF $(DEPDIR)/libtestpat_la-xmlparse.Tpo -c -o libtestpat_la-xmlparse.lo `test -f 'xmlparse.c' || echo '$(srcdir)/'`xmlparse.c
+@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/libtestpat_la-xmlparse.Tpo $(DEPDIR)/libtestpat_la-xmlparse.Plo
+@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='xmlparse.c' object='libtestpat_la-xmlparse.lo' libtool=yes @AMDEPBACKSLASH@
+@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libtestpat_la_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o libtestpat_la-xmlparse.lo `test -f 'xmlparse.c' || echo '$(srcdir)/'`xmlparse.c
+
+libtestpat_la-xmltok.lo: xmltok.c
+@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libtestpat_la_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT libtestpat_la-xmltok.lo -MD -MP -MF $(DEPDIR)/libtestpat_la-xmltok.Tpo -c -o libtestpat_la-xmltok.lo `test -f 'xmltok.c' || echo '$(srcdir)/'`xmltok.c
+@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/libtestpat_la-xmltok.Tpo $(DEPDIR)/libtestpat_la-xmltok.Plo
+@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='xmltok.c' object='libtestpat_la-xmltok.lo' libtool=yes @AMDEPBACKSLASH@
+@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libtestpat_la_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o libtestpat_la-xmltok.lo `test -f 'xmltok.c' || echo '$(srcdir)/'`xmltok.c
+
+libtestpat_la-xmlrole.lo: xmlrole.c
+@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libtestpat_la_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT libtestpat_la-xmlrole.lo -MD -MP -MF $(DEPDIR)/libtestpat_la-xmlrole.Tpo -c -o libtestpat_la-xmlrole.lo `test -f 'xmlrole.c' || echo '$(srcdir)/'`xmlrole.c
+@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/libtestpat_la-xmlrole.Tpo $(DEPDIR)/libtestpat_la-xmlrole.Plo
+@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='xmlrole.c' object='libtestpat_la-xmlrole.lo' libtool=yes @AMDEPBACKSLASH@
+@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libtestpat_la_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o libtestpat_la-xmlrole.lo `test -f 'xmlrole.c' || echo '$(srcdir)/'`xmlrole.c
+
mostlyclean-libtool:
-rm -f *.lo
@@ -749,7 +776,10 @@
clean-noinstLTLIBRARIES mostlyclean-am
distclean: distclean-am
- -rm -f ./$(DEPDIR)/xmlparse.Plo
+ -rm -f ./$(DEPDIR)/libtestpat_la-xmlparse.Plo
+ -rm -f ./$(DEPDIR)/libtestpat_la-xmlrole.Plo
+ -rm -f ./$(DEPDIR)/libtestpat_la-xmltok.Plo
+ -rm -f ./$(DEPDIR)/xmlparse.Plo
-rm -f ./$(DEPDIR)/xmlrole.Plo
-rm -f ./$(DEPDIR)/xmltok.Plo
-rm -f Makefile
@@ -798,7 +828,10 @@
installcheck-am:
maintainer-clean: maintainer-clean-am
- -rm -f ./$(DEPDIR)/xmlparse.Plo
+ -rm -f ./$(DEPDIR)/libtestpat_la-xmlparse.Plo
+ -rm -f ./$(DEPDIR)/libtestpat_la-xmlrole.Plo
+ -rm -f ./$(DEPDIR)/libtestpat_la-xmltok.Plo
+ -rm -f ./$(DEPDIR)/xmlparse.Plo
-rm -f ./$(DEPDIR)/xmlrole.Plo
-rm -f ./$(DEPDIR)/xmltok.Plo
-rm -f Makefile
--- contrib/expat/lib/expat.h.orig
+++ contrib/expat/lib/expat.h
@@ -11,13 +11,14 @@
Copyright (c) 2000-2005 Fred L. Drake, Jr.
Copyright (c) 2001-2002 Greg Stein
Copyright (c) 2002-2016 Karl Waclawek
- Copyright (c) 2016-2024 Sebastian Pipping
+ Copyright (c) 2016-2025 Sebastian Pipping
Copyright (c) 2016 Cristian Rodríguez
Copyright (c) 2016 Thomas Beutlich
Copyright (c) 2017 Rhodri James
Copyright (c) 2022 Thijs Schreijer
Copyright (c) 2023 Hanno Böck
Copyright (c) 2023 Sony Corporation / Snild Dolkow
+ Copyright (c) 2024 Taichi Haradaguchi <20001722@ymail.ne.jp>
Licensed under the MIT license:
Permission is hereby granted, free of charge, to any person obtaining
@@ -129,7 +130,9 @@
/* Added in 2.3.0. */
XML_ERROR_NO_BUFFER,
/* Added in 2.4.0. */
- XML_ERROR_AMPLIFICATION_LIMIT_BREACH
+ XML_ERROR_AMPLIFICATION_LIMIT_BREACH,
+ /* Added in 2.6.4. */
+ XML_ERROR_NOT_STARTED,
};
enum XML_Content_Type {
@@ -1042,7 +1045,7 @@
XMLPARSEAPI(const XML_Feature *)
XML_GetFeatureList(void);
-#if XML_GE == 1
+#if defined(XML_DTD) || (defined(XML_GE) && XML_GE == 1)
/* Added in Expat 2.4.0 for XML_DTD defined and
* added in Expat 2.6.0 for XML_GE == 1. */
XMLPARSEAPI(XML_Bool)
@@ -1064,8 +1067,8 @@
See https://semver.org
*/
#define XML_MAJOR_VERSION 2
-#define XML_MINOR_VERSION 6
-#define XML_MICRO_VERSION 0
+#define XML_MINOR_VERSION 7
+#define XML_MICRO_VERSION 1
#ifdef __cplusplus
}
--- contrib/expat/lib/internal.h.orig
+++ contrib/expat/lib/internal.h
@@ -28,10 +28,11 @@
Copyright (c) 2002-2003 Fred L. Drake, Jr.
Copyright (c) 2002-2006 Karl Waclawek
Copyright (c) 2003 Greg Stein
- Copyright (c) 2016-2023 Sebastian Pipping
+ Copyright (c) 2016-2025 Sebastian Pipping
Copyright (c) 2018 Yury Gribov
Copyright (c) 2019 David Loffredo
- Copyright (c) 2023 Sony Corporation / Snild Dolkow
+ Copyright (c) 2023-2024 Sony Corporation / Snild Dolkow
+ Copyright (c) 2024 Taichi Haradaguchi <20001722@ymail.ne.jp>
Licensed under the MIT license:
Permission is hereby granted, free of charge, to any person obtaining
@@ -126,6 +127,9 @@
# elif ULONG_MAX == 18446744073709551615u // 2^64-1
# define EXPAT_FMT_PTRDIFF_T(midpart) "%" midpart "ld"
# define EXPAT_FMT_SIZE_T(midpart) "%" midpart "lu"
+# elif defined(EMSCRIPTEN) // 32bit mode Emscripten
+# define EXPAT_FMT_PTRDIFF_T(midpart) "%" midpart "ld"
+# define EXPAT_FMT_SIZE_T(midpart) "%" midpart "zu"
# else
# define EXPAT_FMT_PTRDIFF_T(midpart) "%" midpart "d"
# define EXPAT_FMT_SIZE_T(midpart) "%" midpart "u"
@@ -155,14 +159,20 @@
void _INTERNAL_trim_to_complete_utf8_characters(const char *from,
const char **fromLimRef);
-#if XML_GE == 1
+#if defined(XML_GE) && XML_GE == 1
unsigned long long testingAccountingGetCountBytesDirect(XML_Parser parser);
unsigned long long testingAccountingGetCountBytesIndirect(XML_Parser parser);
const char *unsignedCharToPrintable(unsigned char c);
#endif
-extern XML_Bool g_reparseDeferralEnabledDefault; // written ONLY in runtests.c
-extern unsigned int g_parseAttempts; // used for testing only
+extern
+#if ! defined(XML_TESTING)
+ const
+#endif
+ XML_Bool g_reparseDeferralEnabledDefault; // written ONLY in runtests.c
+#if defined(XML_TESTING)
+extern unsigned int g_bytesScanned; // used for testing only
+#endif
#ifdef __cplusplus
}
--- contrib/expat/lib/siphash.h.orig
+++ contrib/expat/lib/siphash.h
@@ -126,8 +126,7 @@
| ((uint64_t)((p)[4]) << 32) | ((uint64_t)((p)[5]) << 40) \
| ((uint64_t)((p)[6]) << 48) | ((uint64_t)((p)[7]) << 56))
-#define SIPHASH_INITIALIZER \
- { 0, 0, 0, 0, {0}, 0, 0 }
+#define SIPHASH_INITIALIZER {0, 0, 0, 0, {0}, 0, 0}
struct siphash {
uint64_t v0, v1, v2, v3;
--- contrib/expat/lib/xmlparse.c.orig
+++ contrib/expat/lib/xmlparse.c
@@ -1,4 +1,4 @@
-/* 628e24d4966bedbd4800f6ed128d06d29703765b4bce12d3b7f099f90f842fc9 (2.6.0+)
+/* d19ae032c224863c1527ba44d228cc34b99192c3a4c5a27af1f4e054d45ee031 (2.7.1+)
__ __ _
___\ \/ /_ __ __ _| |_
/ _ \\ /| '_ \ / _` | __|
@@ -13,7 +13,7 @@
Copyright (c) 2002-2016 Karl Waclawek
Copyright (c) 2005-2009 Steven Solie
Copyright (c) 2016 Eric Rahm
- Copyright (c) 2016-2024 Sebastian Pipping
+ Copyright (c) 2016-2025 Sebastian Pipping
Copyright (c) 2016 Gaurav
Copyright (c) 2016 Thomas Beutlich
Copyright (c) 2016 Gustavo Grieco
@@ -38,7 +38,9 @@
Copyright (c) 2022 Jann Horn
Copyright (c) 2022 Sean McBride
Copyright (c) 2023 Owain Davies
- Copyright (c) 2023 Sony Corporation / Snild Dolkow
+ Copyright (c) 2023-2024 Sony Corporation / Snild Dolkow
+ Copyright (c) 2024-2025 Berkay Eren Ürün
+ Copyright (c) 2024 Hanno Böck
Licensed under the MIT license:
Permission is hereby granted, free of charge, to any person obtaining
@@ -210,7 +212,7 @@
#endif
/* Round up n to be a multiple of sz, where sz is a power of 2. */
-#define ROUND_UP(n, sz) (((n) + ((sz)-1)) & ~((sz)-1))
+#define ROUND_UP(n, sz) (((n) + ((sz) - 1)) & ~((sz) - 1))
/* Do safe (NULL-aware) pointer arithmetic */
#define EXPAT_SAFE_PTR_DIFF(p, q) (((p) && (q)) ? ((p) - (q)) : 0)
@@ -248,7 +250,7 @@
it odd, since odd numbers are always relative prime to a power of 2.
*/
#define SECOND_HASH(hash, mask, power) \
- ((((hash) & ~(mask)) >> ((power)-1)) & ((mask) >> 2))
+ ((((hash) & ~(mask)) >> ((power) - 1)) & ((mask) >> 2))
#define PROBE_STEP(hash, mask, power) \
((unsigned char)((SECOND_HASH(hash, mask, power)) | 1))
@@ -294,7 +296,7 @@
The name of the element is stored in both the document and API
encodings. The memory buffer 'buf' is a separately-allocated
memory area which stores the name. During the XML_Parse()/
- XMLParseBuffer() when the element is open, the memory for the 'raw'
+ XML_ParseBuffer() when the element is open, the memory for the 'raw'
version of the name (in the document encoding) is shared with the
document buffer. If the element is open across calls to
XML_Parse()/XML_ParseBuffer(), the buffer is re-allocated to
@@ -323,6 +325,10 @@
const XML_Char *publicId;
const XML_Char *notation;
XML_Bool open;
+ XML_Bool hasMore; /* true if entity has not been completely processed */
+ /* An entity can be open while being already completely processed (hasMore ==
+ XML_FALSE). The reason is the delayed closing of entities until their inner
+ entities are processed and closed */
XML_Bool is_param;
XML_Bool is_internal; /* true if declared in internal subset outside PE */
} ENTITY;
@@ -413,6 +419,12 @@
int *scaffIndex;
} DTD;
+enum EntityType {
+ ENTITY_INTERNAL,
+ ENTITY_ATTRIBUTE,
+ ENTITY_VALUE,
+};
+
typedef struct open_internal_entity {
const char *internalEventPtr;
const char *internalEventEndPtr;
@@ -420,6 +432,7 @@
ENTITY *entity;
int startTagLevel;
XML_Bool betweenDecl; /* WFC: PE Between Declarations */
+ enum EntityType type;
} OPEN_INTERNAL_ENTITY;
enum XML_Account {
@@ -479,8 +492,8 @@
const char *next, const char **nextPtr,
XML_Bool haveMore, XML_Bool allowClosingDoctype,
enum XML_Account account);
-static enum XML_Error processInternalEntity(XML_Parser parser, ENTITY *entity,
- XML_Bool betweenDecl);
+static enum XML_Error processEntity(XML_Parser parser, ENTITY *entity,
+ XML_Bool betweenDecl, enum EntityType type);
static enum XML_Error doContent(XML_Parser parser, int startTagLevel,
const ENCODING *enc, const char *start,
const char *end, const char **endPtr,
@@ -511,18 +524,22 @@
const char *ptr, const char *end,
STRING_POOL *pool,
enum XML_Account account);
-static enum XML_Error appendAttributeValue(XML_Parser parser,
- const ENCODING *enc,
- XML_Bool isCdata, const char *ptr,
- const char *end, STRING_POOL *pool,
- enum XML_Account account);
+static enum XML_Error
+appendAttributeValue(XML_Parser parser, const ENCODING *enc, XML_Bool isCdata,
+ const char *ptr, const char *end, STRING_POOL *pool,
+ enum XML_Account account, const char **nextPtr);
static ATTRIBUTE_ID *getAttributeId(XML_Parser parser, const ENCODING *enc,
const char *start, const char *end);
static int setElementTypePrefix(XML_Parser parser, ELEMENT_TYPE *elementType);
#if XML_GE == 1
static enum XML_Error storeEntityValue(XML_Parser parser, const ENCODING *enc,
const char *start, const char *end,
- enum XML_Account account);
+ enum XML_Account account,
+ const char **nextPtr);
+static enum XML_Error callStoreEntityValue(XML_Parser parser,
+ const ENCODING *enc,
+ const char *start, const char *end,
+ enum XML_Account account);
#else
static enum XML_Error storeSelfEntityValue(XML_Parser parser, ENTITY *entity);
#endif
@@ -629,8 +646,14 @@
? 0 \
: ((*((pool)->ptr)++ = c), 1))
-XML_Bool g_reparseDeferralEnabledDefault = XML_TRUE; // write ONLY in runtests.c
-unsigned int g_parseAttempts = 0; // used for testing only
+#if ! defined(XML_TESTING)
+const
+#endif
+ XML_Bool g_reparseDeferralEnabledDefault
+ = XML_TRUE; // write ONLY in runtests.c
+#if defined(XML_TESTING)
+unsigned int g_bytesScanned = 0; // used for testing only
+#endif
struct XML_ParserStruct {
/* The first member must be m_userData so that the XML_GetUserData
@@ -701,6 +724,10 @@
const char *m_positionPtr;
OPEN_INTERNAL_ENTITY *m_openInternalEntities;
OPEN_INTERNAL_ENTITY *m_freeInternalEntities;
+ OPEN_INTERNAL_ENTITY *m_openAttributeEntities;
+ OPEN_INTERNAL_ENTITY *m_freeAttributeEntities;
+ OPEN_INTERNAL_ENTITY *m_openValueEntities;
+ OPEN_INTERNAL_ENTITY *m_freeValueEntities;
XML_Bool m_defaultExpandInternalEntities;
int m_tagLevel;
ENTITY *m_declEntity;
@@ -748,6 +775,7 @@
ACCOUNTING m_accounting;
ENTITY_STATS m_entity_stats;
#endif
+ XML_Bool m_reenter;
};
#define MALLOC(parser, s) (parser->m_mem.malloc_fcn((s)))
@@ -1017,8 +1045,32 @@
return XML_ERROR_NONE;
}
}
- g_parseAttempts += 1;
- const enum XML_Error ret = parser->m_processor(parser, start, end, endPtr);
+#if defined(XML_TESTING)
+ g_bytesScanned += (unsigned)have_now;
+#endif
+ // Run in a loop to eliminate dangerous recursion depths
+ enum XML_Error ret;
+ *endPtr = start;
+ while (1) {
+ // Use endPtr as the new start in each iteration, since it will
+ // be set to the next start point by m_processor.
+ ret = parser->m_processor(parser, *endPtr, end, endPtr);
+
+ // Make parsing status (and in particular XML_SUSPENDED) take
+ // precedence over re-enter flag when they disagree
+ if (parser->m_parsingStatus.parsing != XML_PARSING) {
+ parser->m_reenter = XML_FALSE;
+ }
+
+ if (! parser->m_reenter) {
+ break;
+ }
+
+ parser->m_reenter = XML_FALSE;
+ if (ret != XML_ERROR_NONE)
+ return ret;
+ }
+
if (ret == XML_ERROR_NONE) {
// if we consumed nothing, remember what we had on this parse attempt.
if (*endPtr == start) {
@@ -1129,6 +1181,8 @@
parser->m_freeBindingList = NULL;
parser->m_freeTagList = NULL;
parser->m_freeInternalEntities = NULL;
+ parser->m_freeAttributeEntities = NULL;
+ parser->m_freeValueEntities = NULL;
parser->m_groupSize = 0;
parser->m_groupConnector = NULL;
@@ -1231,6 +1285,8 @@
parser->m_eventEndPtr = NULL;
parser->m_positionPtr = NULL;
parser->m_openInternalEntities = NULL;
+ parser->m_openAttributeEntities = NULL;
+ parser->m_openValueEntities = NULL;
parser->m_defaultExpandInternalEntities = XML_TRUE;
parser->m_tagLevel = 0;
parser->m_tagStack = NULL;
@@ -1241,6 +1297,8 @@
parser->m_unknownEncodingData = NULL;
parser->m_parentParser = NULL;
parser->m_parsingStatus.parsing = XML_INITIALIZED;
+ // Reentry can only be triggered inside m_processor calls
+ parser->m_reenter = XML_FALSE;
#ifdef XML_DTD
parser->m_isParamEntity = XML_FALSE;
parser->m_useForeignDTD = XML_FALSE;
@@ -1300,6 +1358,24 @@
openEntity->next = parser->m_freeInternalEntities;
parser->m_freeInternalEntities = openEntity;
}
+ /* move m_openAttributeEntities to m_freeAttributeEntities (i.e. same task but
+ * for attributes) */
+ openEntityList = parser->m_openAttributeEntities;
+ while (openEntityList) {
+ OPEN_INTERNAL_ENTITY *openEntity = openEntityList;
+ openEntityList = openEntity->next;
+ openEntity->next = parser->m_freeAttributeEntities;
+ parser->m_freeAttributeEntities = openEntity;
+ }
+ /* move m_openValueEntities to m_freeValueEntities (i.e. same task but
+ * for value entities) */
+ openEntityList = parser->m_openValueEntities;
+ while (openEntityList) {
+ OPEN_INTERNAL_ENTITY *openEntity = openEntityList;
+ openEntityList = openEntity->next;
+ openEntity->next = parser->m_freeValueEntities;
+ parser->m_freeValueEntities = openEntity;
+ }
moveToFreeBindingList(parser, parser->m_inheritedBindings);
FREE(parser, parser->m_unknownEncodingMem);
if (parser->m_unknownEncodingRelease)
@@ -1313,6 +1389,19 @@
return XML_TRUE;
}
+static XML_Bool
+parserBusy(XML_Parser parser) {
+ switch (parser->m_parsingStatus.parsing) {
+ case XML_PARSING:
+ case XML_SUSPENDED:
+ return XML_TRUE;
+ case XML_INITIALIZED:
+ case XML_FINISHED:
+ default:
+ return XML_FALSE;
+ }
+}
+
enum XML_Status XMLCALL
XML_SetEncoding(XML_Parser parser, const XML_Char *encodingName) {
if (parser == NULL)
@@ -1321,8 +1410,7 @@
XXX There's no way for the caller to determine which of the
XXX possible error cases caused the XML_STATUS_ERROR return.
*/
- if (parser->m_parsingStatus.parsing == XML_PARSING
- || parser->m_parsingStatus.parsing == XML_SUSPENDED)
+ if (parserBusy(parser))
return XML_STATUS_ERROR;
/* Get rid of any previous encoding name */
@@ -1559,7 +1647,34 @@
entityList = entityList->next;
FREE(parser, openEntity);
}
-
+ /* free m_openAttributeEntities and m_freeAttributeEntities */
+ entityList = parser->m_openAttributeEntities;
+ for (;;) {
+ OPEN_INTERNAL_ENTITY *openEntity;
+ if (entityList == NULL) {
+ if (parser->m_freeAttributeEntities == NULL)
+ break;
+ entityList = parser->m_freeAttributeEntities;
+ parser->m_freeAttributeEntities = NULL;
+ }
+ openEntity = entityList;
+ entityList = entityList->next;
+ FREE(parser, openEntity);
+ }
+ /* free m_openValueEntities and m_freeValueEntities */
+ entityList = parser->m_openValueEntities;
+ for (;;) {
+ OPEN_INTERNAL_ENTITY *openEntity;
+ if (entityList == NULL) {
+ if (parser->m_freeValueEntities == NULL)
+ break;
+ entityList = parser->m_freeValueEntities;
+ parser->m_freeValueEntities = NULL;
+ }
+ openEntity = entityList;
+ entityList = entityList->next;
+ FREE(parser, openEntity);
+ }
destroyBindings(parser->m_freeBindingList, parser);
destroyBindings(parser->m_inheritedBindings, parser);
poolDestroy(&parser->m_tempPool);
@@ -1601,8 +1716,7 @@
return XML_ERROR_INVALID_ARGUMENT;
#ifdef XML_DTD
/* block after XML_Parse()/XML_ParseBuffer() has been called */
- if (parser->m_parsingStatus.parsing == XML_PARSING
- || parser->m_parsingStatus.parsing == XML_SUSPENDED)
+ if (parserBusy(parser))
return XML_ERROR_CANT_CHANGE_FEATURE_ONCE_PARSING;
parser->m_useForeignDTD = useDTD;
return XML_ERROR_NONE;
@@ -1617,8 +1731,7 @@
if (parser == NULL)
return;
/* block after XML_Parse()/XML_ParseBuffer() has been called */
- if (parser->m_parsingStatus.parsing == XML_PARSING
- || parser->m_parsingStatus.parsing == XML_SUSPENDED)
+ if (parserBusy(parser))
return;
parser->m_ns_triplets = do_nst ? XML_TRUE : XML_FALSE;
}
@@ -1887,8 +2000,7 @@
if (parser == NULL)
return 0;
/* block after XML_Parse()/XML_ParseBuffer() has been called */
- if (parser->m_parsingStatus.parsing == XML_PARSING
- || parser->m_parsingStatus.parsing == XML_SUSPENDED)
+ if (parserBusy(parser))
return 0;
#ifdef XML_DTD
parser->m_paramEntityParsing = peParsing;
@@ -1905,8 +2017,7 @@
if (parser->m_parentParser)
return XML_SetHashSalt(parser->m_parentParser, hash_salt);
/* block after XML_Parse()/XML_ParseBuffer() has been called */
- if (parser->m_parsingStatus.parsing == XML_PARSING
- || parser->m_parsingStatus.parsing == XML_SUSPENDED)
+ if (parserBusy(parser))
return 0;
parser->m_hash_secret_salt = hash_salt;
return 1;
@@ -2030,6 +2141,12 @@
if (parser == NULL)
return XML_STATUS_ERROR;
+
+ if (len < 0) {
+ parser->m_errorCode = XML_ERROR_INVALID_ARGUMENT;
+ return XML_STATUS_ERROR;
+ }
+
switch (parser->m_parsingStatus.parsing) {
case XML_SUSPENDED:
parser->m_errorCode = XML_ERROR_SUSPENDED;
@@ -2214,11 +2331,19 @@
return parser->m_bufferEnd;
}
+static void
+triggerReenter(XML_Parser parser) {
+ parser->m_reenter = XML_TRUE;
+}
+
enum XML_Status XMLCALL
XML_StopParser(XML_Parser parser, XML_Bool resumable) {
if (parser == NULL)
return XML_STATUS_ERROR;
switch (parser->m_parsingStatus.parsing) {
+ case XML_INITIALIZED:
+ parser->m_errorCode = XML_ERROR_NOT_STARTED;
+ return XML_STATUS_ERROR;
case XML_SUSPENDED:
if (resumable) {
parser->m_errorCode = XML_ERROR_SUSPENDED;
@@ -2229,7 +2354,7 @@
case XML_FINISHED:
parser->m_errorCode = XML_ERROR_FINISHED;
return XML_STATUS_ERROR;
- default:
+ case XML_PARSING:
if (resumable) {
#ifdef XML_DTD
if (parser->m_isParamEntity) {
@@ -2240,6 +2365,9 @@
parser->m_parsingStatus.parsing = XML_SUSPENDED;
} else
parser->m_parsingStatus.parsing = XML_FINISHED;
+ break;
+ default:
+ assert(0);
}
return XML_STATUS_OK;
}
@@ -2504,6 +2632,9 @@
case XML_ERROR_AMPLIFICATION_LIMIT_BREACH:
return XML_L(
"limit on input amplification factor (from DTD and entities) breached");
+ /* Added in 2.6.4. */
+ case XML_ERROR_NOT_STARTED:
+ return XML_L("parser not started");
}
return NULL;
}
@@ -2679,8 +2810,9 @@
contentProcessor(XML_Parser parser, const char *start, const char *end,
const char **endPtr) {
enum XML_Error result = doContent(
- parser, 0, parser->m_encoding, start, end, endPtr,
- (XML_Bool)! parser->m_parsingStatus.finalBuffer, XML_ACCOUNT_DIRECT);
+ parser, parser->m_parentParser ? 1 : 0, parser->m_encoding, start, end,
+ endPtr, (XML_Bool)! parser->m_parsingStatus.finalBuffer,
+ XML_ACCOUNT_DIRECT);
if (result == XML_ERROR_NONE) {
if (! storeRawNames(parser))
return XML_ERROR_NO_MEMORY;
@@ -2768,6 +2900,11 @@
return XML_ERROR_NONE;
case XML_FINISHED:
return XML_ERROR_ABORTED;
+ case XML_PARSING:
+ if (parser->m_reenter) {
+ return XML_ERROR_UNEXPECTED_STATE; // LCOV_EXCL_LINE
+ }
+ /* Fall through */
default:
start = next;
}
@@ -2941,7 +3078,7 @@
reportDefault(parser, enc, s, next);
break;
}
- result = processInternalEntity(parser, entity, XML_FALSE);
+ result = processEntity(parser, entity, XML_FALSE, ENTITY_INTERNAL);
if (result != XML_ERROR_NONE)
return result;
} else if (parser->m_externalEntityRefHandler) {
@@ -3067,7 +3204,9 @@
}
if ((parser->m_tagLevel == 0)
&& (parser->m_parsingStatus.parsing != XML_FINISHED)) {
- if (parser->m_parsingStatus.parsing == XML_SUSPENDED)
+ if (parser->m_parsingStatus.parsing == XML_SUSPENDED
+ || (parser->m_parsingStatus.parsing == XML_PARSING
+ && parser->m_reenter))
parser->m_processor = epilogProcessor;
else
return epilogProcessor(parser, next, end, nextPtr);
@@ -3128,7 +3267,9 @@
}
if ((parser->m_tagLevel == 0)
&& (parser->m_parsingStatus.parsing != XML_FINISHED)) {
- if (parser->m_parsingStatus.parsing == XML_SUSPENDED)
+ if (parser->m_parsingStatus.parsing == XML_SUSPENDED
+ || (parser->m_parsingStatus.parsing == XML_PARSING
+ && parser->m_reenter))
parser->m_processor = epilogProcessor;
else
return epilogProcessor(parser, next, end, nextPtr);
@@ -3261,14 +3402,22 @@
break;
/* LCOV_EXCL_STOP */
}
- *eventPP = s = next;
switch (parser->m_parsingStatus.parsing) {
case XML_SUSPENDED:
+ *eventPP = next;
*nextPtr = next;
return XML_ERROR_NONE;
case XML_FINISHED:
+ *eventPP = next;
return XML_ERROR_ABORTED;
+ case XML_PARSING:
+ if (parser->m_reenter) {
+ *nextPtr = next;
+ return XML_ERROR_NONE;
+ }
+ /* Fall through */
default:;
+ *eventPP = s = next;
}
}
/* not reached */
@@ -4185,14 +4334,21 @@
/* LCOV_EXCL_STOP */
}
- *eventPP = s = next;
switch (parser->m_parsingStatus.parsing) {
case XML_SUSPENDED:
+ *eventPP = next;
*nextPtr = next;
return XML_ERROR_NONE;
case XML_FINISHED:
+ *eventPP = next;
return XML_ERROR_ABORTED;
+ case XML_PARSING:
+ if (parser->m_reenter) {
+ return XML_ERROR_UNEXPECTED_STATE; // LCOV_EXCL_LINE
+ }
+ /* Fall through */
default:;
+ *eventPP = s = next;
}
}
/* not reached */
@@ -4524,7 +4680,7 @@
}
/* found end of entity value - can store it now */
return storeEntityValue(parser, parser->m_encoding, s, end,
- XML_ACCOUNT_DIRECT);
+ XML_ACCOUNT_DIRECT, NULL);
} else if (tok == XML_TOK_XML_DECL) {
enum XML_Error result;
result = processXmlDecl(parser, 0, start, next);
@@ -4651,7 +4807,7 @@
break;
}
/* found end of entity value - can store it now */
- return storeEntityValue(parser, enc, s, end, XML_ACCOUNT_DIRECT);
+ return storeEntityValue(parser, enc, s, end, XML_ACCOUNT_DIRECT, NULL);
}
start = next;
}
@@ -5094,9 +5250,9 @@
#if XML_GE == 1
// This will store the given replacement text in
// parser->m_declEntity->textPtr.
- enum XML_Error result
- = storeEntityValue(parser, enc, s + enc->minBytesPerChar,
- next - enc->minBytesPerChar, XML_ACCOUNT_NONE);
+ enum XML_Error result = callStoreEntityValue(
+ parser, enc, s + enc->minBytesPerChar, next - enc->minBytesPerChar,
+ XML_ACCOUNT_NONE);
if (parser->m_declEntity) {
parser->m_declEntity->textPtr = poolStart(&dtd->entityValuePool);
parser->m_declEntity->textLen
@@ -5521,7 +5677,7 @@
enum XML_Error result;
XML_Bool betweenDecl
= (role == XML_ROLE_PARAM_ENTITY_REF ? XML_TRUE : XML_FALSE);
- result = processInternalEntity(parser, entity, betweenDecl);
+ result = processEntity(parser, entity, betweenDecl, ENTITY_INTERNAL);
if (result != XML_ERROR_NONE)
return result;
handleDefault = XML_FALSE;
@@ -5726,6 +5882,12 @@
return XML_ERROR_NONE;
case XML_FINISHED:
return XML_ERROR_ABORTED;
+ case XML_PARSING:
+ if (parser->m_reenter) {
+ *nextPtr = next;
+ return XML_ERROR_NONE;
+ }
+ /* Fall through */
default:
s = next;
tok = XmlPrologTok(enc, s, end, &next);
@@ -5793,28 +5955,58 @@
default:
return XML_ERROR_JUNK_AFTER_DOC_ELEMENT;
}
- parser->m_eventPtr = s = next;
switch (parser->m_parsingStatus.parsing) {
case XML_SUSPENDED:
+ parser->m_eventPtr = next;
*nextPtr = next;
return XML_ERROR_NONE;
case XML_FINISHED:
+ parser->m_eventPtr = next;
return XML_ERROR_ABORTED;
+ case XML_PARSING:
+ if (parser->m_reenter) {
+ return XML_ERROR_UNEXPECTED_STATE; // LCOV_EXCL_LINE
+ }
+ /* Fall through */
default:;
+ parser->m_eventPtr = s = next;
}
}
}
static enum XML_Error
-processInternalEntity(XML_Parser parser, ENTITY *entity, XML_Bool betweenDecl) {
- const char *textStart, *textEnd;
- const char *next;
- enum XML_Error result;
- OPEN_INTERNAL_ENTITY *openEntity;
+processEntity(XML_Parser parser, ENTITY *entity, XML_Bool betweenDecl,
+ enum EntityType type) {
+ OPEN_INTERNAL_ENTITY *openEntity, **openEntityList, **freeEntityList;
+ switch (type) {
+ case ENTITY_INTERNAL:
+ parser->m_processor = internalEntityProcessor;
+ openEntityList = &parser->m_openInternalEntities;
+ freeEntityList = &parser->m_freeInternalEntities;
+ break;
+ case ENTITY_ATTRIBUTE:
+ openEntityList = &parser->m_openAttributeEntities;
+ freeEntityList = &parser->m_freeAttributeEntities;
+ break;
+ case ENTITY_VALUE:
+ openEntityList = &parser->m_openValueEntities;
+ freeEntityList = &parser->m_freeValueEntities;
+ break;
+ /* default case serves merely as a safety net in case of a
+ * wrong entityType. Therefore we exclude the following lines
+ * from the test coverage.
+ *
+ * LCOV_EXCL_START
+ */
+ default:
+ // Should not reach here
+ assert(0);
+ /* LCOV_EXCL_STOP */
+ }
- if (parser->m_freeInternalEntities) {
- openEntity = parser->m_freeInternalEntities;
- parser->m_freeInternalEntities = openEntity->next;
+ if (*freeEntityList) {
+ openEntity = *freeEntityList;
+ *freeEntityList = openEntity->next;
} else {
openEntity
= (OPEN_INTERNAL_ENTITY *)MALLOC(parser, sizeof(OPEN_INTERNAL_ENTITY));
@@ -5822,56 +6014,34 @@
return XML_ERROR_NO_MEMORY;
}
entity->open = XML_TRUE;
+ entity->hasMore = XML_TRUE;
#if XML_GE == 1
entityTrackingOnOpen(parser, entity, __LINE__);
#endif
entity->processed = 0;
- openEntity->next = parser->m_openInternalEntities;
- parser->m_openInternalEntities = openEntity;
+ openEntity->next = *openEntityList;
+ *openEntityList = openEntity;
openEntity->entity = entity;
+ openEntity->type = type;
openEntity->startTagLevel = parser->m_tagLevel;
openEntity->betweenDecl = betweenDecl;
openEntity->internalEventPtr = NULL;
openEntity->internalEventEndPtr = NULL;
- textStart = (const char *)entity->textPtr;
- textEnd = (const char *)(entity->textPtr + entity->textLen);
- /* Set a safe default value in case 'next' does not get set */
- next = textStart;
-#ifdef XML_DTD
- if (entity->is_param) {
- int tok
- = XmlPrologTok(parser->m_internalEncoding, textStart, textEnd, &next);
- result = doProlog(parser, parser->m_internalEncoding, textStart, textEnd,
- tok, next, &next, XML_FALSE, XML_FALSE,
- XML_ACCOUNT_ENTITY_EXPANSION);
- } else
-#endif /* XML_DTD */
- result = doContent(parser, parser->m_tagLevel, parser->m_internalEncoding,
- textStart, textEnd, &next, XML_FALSE,
- XML_ACCOUNT_ENTITY_EXPANSION);
-
- if (result == XML_ERROR_NONE) {
- if (textEnd != next && parser->m_parsingStatus.parsing == XML_SUSPENDED) {
- entity->processed = (int)(next - textStart);
- parser->m_processor = internalEntityProcessor;
- } else if (parser->m_openInternalEntities->entity == entity) {
-#if XML_GE == 1
- entityTrackingOnClose(parser, entity, __LINE__);
-#endif /* XML_GE == 1 */
- entity->open = XML_FALSE;
- parser->m_openInternalEntities = openEntity->next;
- /* put openEntity back in list of free instances */
- openEntity->next = parser->m_freeInternalEntities;
- parser->m_freeInternalEntities = openEntity;
- }
+ // Only internal entities make use of the reenter flag
+ // therefore no need to set it for other entity types
+ if (type == ENTITY_INTERNAL) {
+ triggerReenter(parser);
}
- return result;
+ return XML_ERROR_NONE;
}
static enum XML_Error PTRCALL
internalEntityProcessor(XML_Parser parser, const char *s, const char *end,
const char **nextPtr) {
+ UNUSED_P(s);
+ UNUSED_P(end);
+ UNUSED_P(nextPtr);
ENTITY *entity;
const char *textStart, *textEnd;
const char *next;
@@ -5881,72 +6051,67 @@
return XML_ERROR_UNEXPECTED_STATE;
entity = openEntity->entity;
- textStart = ((const char *)entity->textPtr) + entity->processed;
- textEnd = (const char *)(entity->textPtr + entity->textLen);
- /* Set a safe default value in case 'next' does not get set */
- next = textStart;
-#ifdef XML_DTD
- if (entity->is_param) {
- int tok
- = XmlPrologTok(parser->m_internalEncoding, textStart, textEnd, &next);
- result = doProlog(parser, parser->m_internalEncoding, textStart, textEnd,
- tok, next, &next, XML_FALSE, XML_TRUE,
- XML_ACCOUNT_ENTITY_EXPANSION);
- } else
-#endif /* XML_DTD */
- result = doContent(parser, openEntity->startTagLevel,
- parser->m_internalEncoding, textStart, textEnd, &next,
- XML_FALSE, XML_ACCOUNT_ENTITY_EXPANSION);
+ // This will return early
+ if (entity->hasMore) {
+ textStart = ((const char *)entity->textPtr) + entity->processed;
+ textEnd = (const char *)(entity->textPtr + entity->textLen);
+ /* Set a safe default value in case 'next' does not get set */
+ next = textStart;
+
+ if (entity->is_param) {
+ int tok
+ = XmlPrologTok(parser->m_internalEncoding, textStart, textEnd, &next);
+ result = doProlog(parser, parser->m_internalEncoding, textStart, textEnd,
+ tok, next, &next, XML_FALSE, XML_FALSE,
+ XML_ACCOUNT_ENTITY_EXPANSION);
+ } else {
+ result = doContent(parser, openEntity->startTagLevel,
+ parser->m_internalEncoding, textStart, textEnd, &next,
+ XML_FALSE, XML_ACCOUNT_ENTITY_EXPANSION);
+ }
- if (result != XML_ERROR_NONE)
- return result;
+ if (result != XML_ERROR_NONE)
+ return result;
+ // Check if entity is complete, if not, mark down how much of it is
+ // processed
+ if (textEnd != next
+ && (parser->m_parsingStatus.parsing == XML_SUSPENDED
+ || (parser->m_parsingStatus.parsing == XML_PARSING
+ && parser->m_reenter))) {
+ entity->processed = (int)(next - (const char *)entity->textPtr);
+ return result;
+ }
- if (textEnd != next && parser->m_parsingStatus.parsing == XML_SUSPENDED) {
- entity->processed = (int)(next - (const char *)entity->textPtr);
+ // Entity is complete. We cannot close it here since we need to first
+ // process its possible inner entities (which are added to the
+ // m_openInternalEntities during doProlog or doContent calls above)
+ entity->hasMore = XML_FALSE;
+ triggerReenter(parser);
return result;
- }
+ } // End of entity processing, "if" block will return here
+ // Remove fully processed openEntity from open entity list.
#if XML_GE == 1
entityTrackingOnClose(parser, entity, __LINE__);
#endif
+ // openEntity is m_openInternalEntities' head, as we set it at the start of
+ // this function and we skipped doProlog and doContent calls with hasMore set
+ // to false. This means we can directly remove the head of
+ // m_openInternalEntities
+ assert(parser->m_openInternalEntities == openEntity);
entity->open = XML_FALSE;
- parser->m_openInternalEntities = openEntity->next;
+ parser->m_openInternalEntities = parser->m_openInternalEntities->next;
+
/* put openEntity back in list of free instances */
openEntity->next = parser->m_freeInternalEntities;
parser->m_freeInternalEntities = openEntity;
- // If there are more open entities we want to stop right here and have the
- // upcoming call to XML_ResumeParser continue with entity content, or it would
- // be ignored altogether.
- if (parser->m_openInternalEntities != NULL
- && parser->m_parsingStatus.parsing == XML_SUSPENDED) {
- return XML_ERROR_NONE;
- }
-
-#ifdef XML_DTD
- if (entity->is_param) {
- int tok;
- parser->m_processor = prologProcessor;
- tok = XmlPrologTok(parser->m_encoding, s, end, &next);
- return doProlog(parser, parser->m_encoding, s, end, tok, next, nextPtr,
- (XML_Bool)! parser->m_parsingStatus.finalBuffer, XML_TRUE,
- XML_ACCOUNT_DIRECT);
- } else
-#endif /* XML_DTD */
- {
- parser->m_processor = contentProcessor;
- /* see externalEntityContentProcessor vs contentProcessor */
- result = doContent(parser, parser->m_parentParser ? 1 : 0,
- parser->m_encoding, s, end, nextPtr,
- (XML_Bool)! parser->m_parsingStatus.finalBuffer,
- XML_ACCOUNT_DIRECT);
- if (result == XML_ERROR_NONE) {
- if (! storeRawNames(parser))
- return XML_ERROR_NO_MEMORY;
- }
- return result;
+ if (parser->m_openInternalEntities == NULL) {
+ parser->m_processor = entity->is_param ? prologProcessor : contentProcessor;
}
+ triggerReenter(parser);
+ return XML_ERROR_NONE;
}
static enum XML_Error PTRCALL
@@ -5962,8 +6127,70 @@
storeAttributeValue(XML_Parser parser, const ENCODING *enc, XML_Bool isCdata,
const char *ptr, const char *end, STRING_POOL *pool,
enum XML_Account account) {
- enum XML_Error result
- = appendAttributeValue(parser, enc, isCdata, ptr, end, pool, account);
+ const char *next = ptr;
+ enum XML_Error result = XML_ERROR_NONE;
+
+ while (1) {
+ if (! parser->m_openAttributeEntities) {
+ result = appendAttributeValue(parser, enc, isCdata, next, end, pool,
+ account, &next);
+ } else {
+ OPEN_INTERNAL_ENTITY *const openEntity = parser->m_openAttributeEntities;
+ if (! openEntity)
+ return XML_ERROR_UNEXPECTED_STATE;
+
+ ENTITY *const entity = openEntity->entity;
+ const char *const textStart
+ = ((const char *)entity->textPtr) + entity->processed;
+ const char *const textEnd
+ = (const char *)(entity->textPtr + entity->textLen);
+ /* Set a safe default value in case 'next' does not get set */
+ const char *nextInEntity = textStart;
+ if (entity->hasMore) {
+ result = appendAttributeValue(
+ parser, parser->m_internalEncoding, isCdata, textStart, textEnd,
+ pool, XML_ACCOUNT_ENTITY_EXPANSION, &nextInEntity);
+ if (result != XML_ERROR_NONE)
+ break;
+ // Check if entity is complete, if not, mark down how much of it is
+ // processed. A XML_SUSPENDED check here is not required as
+ // appendAttributeValue will never suspend the parser.
+ if (textEnd != nextInEntity) {
+ entity->processed
+ = (int)(nextInEntity - (const char *)entity->textPtr);
+ continue;
+ }
+
+ // Entity is complete. We cannot close it here since we need to first
+ // process its possible inner entities (which are added to the
+ // m_openAttributeEntities during appendAttributeValue)
+ entity->hasMore = XML_FALSE;
+ continue;
+ } // End of entity processing, "if" block skips the rest
+
+ // Remove fully processed openEntity from open entity list.
+#if XML_GE == 1
+ entityTrackingOnClose(parser, entity, __LINE__);
+#endif
+ // openEntity is m_openAttributeEntities' head, since we set it at the
+ // start of this function and because we skipped appendAttributeValue call
+ // with hasMore set to false. This means we can directly remove the head
+ // of m_openAttributeEntities
+ assert(parser->m_openAttributeEntities == openEntity);
+ entity->open = XML_FALSE;
+ parser->m_openAttributeEntities = parser->m_openAttributeEntities->next;
+
+ /* put openEntity back in list of free instances */
+ openEntity->next = parser->m_freeAttributeEntities;
+ parser->m_freeAttributeEntities = openEntity;
+ }
+
+ // Break if an error occurred or there is nothing left to process
+ if (result || (parser->m_openAttributeEntities == NULL && end == next)) {
+ break;
+ }
+ }
+
if (result)
return result;
if (! isCdata && poolLength(pool) && poolLastChar(pool) == 0x20)
@@ -5976,7 +6203,7 @@
static enum XML_Error
appendAttributeValue(XML_Parser parser, const ENCODING *enc, XML_Bool isCdata,
const char *ptr, const char *end, STRING_POOL *pool,
- enum XML_Account account) {
+ enum XML_Account account, const char **nextPtr) {
DTD *const dtd = parser->m_dtd; /* save one level of indirection */
#ifndef XML_DTD
UNUSED_P(account);
@@ -5994,6 +6221,9 @@
#endif
switch (tok) {
case XML_TOK_NONE:
+ if (nextPtr) {
+ *nextPtr = next;
+ }
return XML_ERROR_NONE;
case XML_TOK_INVALID:
if (enc == parser->m_encoding)
@@ -6134,21 +6364,11 @@
return XML_ERROR_ATTRIBUTE_EXTERNAL_ENTITY_REF;
} else {
enum XML_Error result;
- const XML_Char *textEnd = entity->textPtr + entity->textLen;
- entity->open = XML_TRUE;
-#if XML_GE == 1
- entityTrackingOnOpen(parser, entity, __LINE__);
-#endif
- result = appendAttributeValue(parser, parser->m_internalEncoding,
- isCdata, (const char *)entity->textPtr,
- (const char *)textEnd, pool,
- XML_ACCOUNT_ENTITY_EXPANSION);
-#if XML_GE == 1
- entityTrackingOnClose(parser, entity, __LINE__);
-#endif
- entity->open = XML_FALSE;
- if (result)
- return result;
+ result = processEntity(parser, entity, XML_FALSE, ENTITY_ATTRIBUTE);
+ if ((result == XML_ERROR_NONE) && (nextPtr != NULL)) {
+ *nextPtr = next;
+ }
+ return result;
}
} break;
default:
@@ -6177,7 +6397,7 @@
static enum XML_Error
storeEntityValue(XML_Parser parser, const ENCODING *enc,
const char *entityTextPtr, const char *entityTextEnd,
- enum XML_Account account) {
+ enum XML_Account account, const char **nextPtr) {
DTD *const dtd = parser->m_dtd; /* save one level of indirection */
STRING_POOL *pool = &(dtd->entityValuePool);
enum XML_Error result = XML_ERROR_NONE;
@@ -6195,8 +6415,9 @@
return XML_ERROR_NO_MEMORY;
}
+ const char *next;
for (;;) {
- const char *next
+ next
= entityTextPtr; /* XmlEntityValueTok doesn't always set the last arg */
int tok = XmlEntityValueTok(enc, entityTextPtr, entityTextEnd, &next);
@@ -6232,7 +6453,7 @@
dtd->keepProcessing = dtd->standalone;
goto endEntityValue;
}
- if (entity->open) {
+ if (entity->open || (entity == parser->m_declEntity)) {
if (enc == parser->m_encoding)
parser->m_eventPtr = entityTextPtr;
result = XML_ERROR_RECURSIVE_ENTITY_REF;
@@ -6258,16 +6479,8 @@
} else
dtd->keepProcessing = dtd->standalone;
} else {
- entity->open = XML_TRUE;
- entityTrackingOnOpen(parser, entity, __LINE__);
- result = storeEntityValue(
- parser, parser->m_internalEncoding, (const char *)entity->textPtr,
- (const char *)(entity->textPtr + entity->textLen),
- XML_ACCOUNT_ENTITY_EXPANSION);
- entityTrackingOnClose(parser, entity, __LINE__);
- entity->open = XML_FALSE;
- if (result)
- goto endEntityValue;
+ result = processEntity(parser, entity, XML_FALSE, ENTITY_VALUE);
+ goto endEntityValue;
}
break;
}
@@ -6355,6 +6568,81 @@
# ifdef XML_DTD
parser->m_prologState.inEntityValue = oldInEntityValue;
# endif /* XML_DTD */
+ // If 'nextPtr' is given, it should be updated during the processing
+ if (nextPtr != NULL) {
+ *nextPtr = next;
+ }
+ return result;
+}
+
+static enum XML_Error
+callStoreEntityValue(XML_Parser parser, const ENCODING *enc,
+ const char *entityTextPtr, const char *entityTextEnd,
+ enum XML_Account account) {
+ const char *next = entityTextPtr;
+ enum XML_Error result = XML_ERROR_NONE;
+ while (1) {
+ if (! parser->m_openValueEntities) {
+ result
+ = storeEntityValue(parser, enc, next, entityTextEnd, account, &next);
+ } else {
+ OPEN_INTERNAL_ENTITY *const openEntity = parser->m_openValueEntities;
+ if (! openEntity)
+ return XML_ERROR_UNEXPECTED_STATE;
+
+ ENTITY *const entity = openEntity->entity;
+ const char *const textStart
+ = ((const char *)entity->textPtr) + entity->processed;
+ const char *const textEnd
+ = (const char *)(entity->textPtr + entity->textLen);
+ /* Set a safe default value in case 'next' does not get set */
+ const char *nextInEntity = textStart;
+ if (entity->hasMore) {
+ result = storeEntityValue(parser, parser->m_internalEncoding, textStart,
+ textEnd, XML_ACCOUNT_ENTITY_EXPANSION,
+ &nextInEntity);
+ if (result != XML_ERROR_NONE)
+ break;
+ // Check if entity is complete, if not, mark down how much of it is
+ // processed. A XML_SUSPENDED check here is not required as
+ // appendAttributeValue will never suspend the parser.
+ if (textEnd != nextInEntity) {
+ entity->processed
+ = (int)(nextInEntity - (const char *)entity->textPtr);
+ continue;
+ }
+
+ // Entity is complete. We cannot close it here since we need to first
+ // process its possible inner entities (which are added to the
+ // m_openValueEntities during storeEntityValue)
+ entity->hasMore = XML_FALSE;
+ continue;
+ } // End of entity processing, "if" block skips the rest
+
+ // Remove fully processed openEntity from open entity list.
+# if XML_GE == 1
+ entityTrackingOnClose(parser, entity, __LINE__);
+# endif
+ // openEntity is m_openValueEntities' head, since we set it at the
+ // start of this function and because we skipped storeEntityValue call
+ // with hasMore set to false. This means we can directly remove the head
+ // of m_openValueEntities
+ assert(parser->m_openValueEntities == openEntity);
+ entity->open = XML_FALSE;
+ parser->m_openValueEntities = parser->m_openValueEntities->next;
+
+ /* put openEntity back in list of free instances */
+ openEntity->next = parser->m_freeValueEntities;
+ parser->m_freeValueEntities = openEntity;
+ }
+
+ // Break if an error occurred or there is nothing left to process
+ if (result
+ || (parser->m_openValueEntities == NULL && entityTextEnd == next)) {
+ break;
+ }
+ }
+
return result;
}
@@ -7008,6 +7296,16 @@
if (! newE)
return 0;
if (oldE->nDefaultAtts) {
+ /* Detect and prevent integer overflow.
+ * The preprocessor guard addresses the "always false" warning
+ * from -Wtype-limits on platforms where
+ * sizeof(int) < sizeof(size_t), e.g. on x86_64. */
+#if UINT_MAX >= SIZE_MAX
+ if ((size_t)oldE->nDefaultAtts
+ > ((size_t)(-1) / sizeof(DEFAULT_ATTRIBUTE))) {
+ return 0;
+ }
+#endif
newE->defaultAtts
= ms->malloc_fcn(oldE->nDefaultAtts * sizeof(DEFAULT_ATTRIBUTE));
if (! newE->defaultAtts) {
@@ -7550,6 +7848,15 @@
int next;
if (! dtd->scaffIndex) {
+ /* Detect and prevent integer overflow.
+ * The preprocessor guard addresses the "always false" warning
+ * from -Wtype-limits on platforms where
+ * sizeof(unsigned int) < sizeof(size_t), e.g. on x86_64. */
+#if UINT_MAX >= SIZE_MAX
+ if (parser->m_groupSize > ((size_t)(-1) / sizeof(int))) {
+ return -1;
+ }
+#endif
dtd->scaffIndex = (int *)MALLOC(parser, parser->m_groupSize * sizeof(int));
if (! dtd->scaffIndex)
return -1;
@@ -7779,6 +8086,8 @@
static float
accountingGetCurrentAmplification(XML_Parser rootParser) {
+ // 1.........1.........12 => 22
+ const size_t lenOfShortestInclude = sizeof("") - 1;
const XmlBigCount countBytesOutput
= rootParser->m_accounting.countBytesDirect
+ rootParser->m_accounting.countBytesIndirect;
@@ -7786,7 +8095,9 @@
= rootParser->m_accounting.countBytesDirect
? (countBytesOutput
/ (float)(rootParser->m_accounting.countBytesDirect))
- : 1.0f;
+ : ((lenOfShortestInclude
+ + rootParser->m_accounting.countBytesIndirect)
+ / (float)lenOfShortestInclude);
assert(! rootParser->m_parentParser);
return amplificationFactor;
}
@@ -7823,7 +8134,7 @@
assert(! rootParser->m_parentParser);
fprintf(stderr,
- " (+" EXPAT_FMT_PTRDIFF_T("6") " bytes %s|%d, xmlparse.c:%d) %*s\"",
+ " (+" EXPAT_FMT_PTRDIFF_T("6") " bytes %s|%u, xmlparse.c:%d) %*s\"",
bytesMore, (account == XML_ACCOUNT_DIRECT) ? "DIR" : "EXP",
levelsAwayFromRootParser, source_line, 10, "");
@@ -7936,11 +8247,11 @@
fprintf(
stderr,
- "expat: Entities(%p): Count %9d, depth %2d/%2d %*s%s%s; %s length %d (xmlparse.c:%d)\n",
+ "expat: Entities(%p): Count %9u, depth %2u/%2u %*s%s%s; %s length %d (xmlparse.c:%d)\n",
(void *)rootParser, rootParser->m_entity_stats.countEverOpened,
rootParser->m_entity_stats.currentDepth,
rootParser->m_entity_stats.maximumDepthSeen,
- (rootParser->m_entity_stats.currentDepth - 1) * 2, "",
+ ((int)rootParser->m_entity_stats.currentDepth - 1) * 2, "",
entity->is_param ? "%" : "&", entityName, action, entity->textLen,
sourceLine);
}
@@ -8499,11 +8810,13 @@
return "\\xFE";
case 255:
return "\\xFF";
+ // LCOV_EXCL_START
default:
assert(0); /* never gets here */
return "dead code";
}
assert(0); /* never gets here */
+ // LCOV_EXCL_STOP
}
#endif /* XML_GE == 1 */
--- contrib/expat/tests/Makefile.am.orig
+++ contrib/expat/tests/Makefile.am
@@ -6,9 +6,10 @@
# \___/_/\_\ .__/ \__,_|\__|
# |_| XML parser
#
-# Copyright (c) 2017-2023 Sebastian Pipping
+# Copyright (c) 2017-2024 Sebastian Pipping
# Copyright (c) 2017-2022 Rhodri James
# Copyright (c) 2020 Jeffrey Walton
+# Copyright (c) 2024 Dag-Erling Smørgrav
# Licensed under the MIT license:
#
# Permission is hereby granted, free of charge, to any person obtaining
@@ -32,7 +33,7 @@
SUBDIRS = . benchmark
-AM_CPPFLAGS = @AM_CPPFLAGS@ -I$(srcdir)/../lib
+AM_CPPFLAGS = @AM_CPPFLAGS@ -I$(srcdir)/../lib -DXML_TESTING
check_PROGRAMS = runtests runtests_cxx
TESTS = runtests runtests_cxx
@@ -72,8 +73,8 @@
runtests_cxx.cpp \
structdata_cxx.cpp
-runtests_LDADD = ../lib/libexpatinternal.la
-runtests_cxx_LDADD = ../lib/libexpatinternal.la
+runtests_LDADD = ../lib/libtestpat.la
+runtests_cxx_LDADD = ../lib/libtestpat.la
runtests_LDFLAGS = @AM_LDFLAGS@ @LIBM@
runtests_cxx_LDFLAGS = @AM_LDFLAGS@ @LIBM@
@@ -92,7 +93,7 @@
structdata.h \
minicheck.h \
memcheck.h \
- README.txt \
+ README.md \
udiffer.py \
xmltest.log.expected \
xmltest.sh
--- contrib/expat/tests/Makefile.in.orig
+++ contrib/expat/tests/Makefile.in
@@ -22,9 +22,10 @@
# \___/_/\_\ .__/ \__,_|\__|
# |_| XML parser
#
-# Copyright (c) 2017-2023 Sebastian Pipping
+# Copyright (c) 2017-2024 Sebastian Pipping
# Copyright (c) 2017-2022 Rhodri James
# Copyright (c) 2020 Jeffrey Walton
+# Copyright (c) 2024 Dag-Erling Smørgrav
# Licensed under the MIT license:
#
# Permission is hereby granted, free of charge, to any person obtaining
@@ -151,7 +152,7 @@
nsalloc_tests.$(OBJEXT) runtests.$(OBJEXT) \
structdata.$(OBJEXT)
runtests_OBJECTS = $(am_runtests_OBJECTS)
-runtests_DEPENDENCIES = ../lib/libexpatinternal.la
+runtests_DEPENDENCIES = ../lib/libtestpat.la
AM_V_lt = $(am__v_lt_@AM_V@)
am__v_lt_ = $(am__v_lt_@AM_DEFAULT_V@)
am__v_lt_0 = --silent
@@ -168,7 +169,7 @@
ns_tests_cxx.$(OBJEXT) runtests_cxx.$(OBJEXT) \
structdata_cxx.$(OBJEXT)
runtests_cxx_OBJECTS = $(am_runtests_cxx_OBJECTS)
-runtests_cxx_DEPENDENCIES = ../lib/libexpatinternal.la
+runtests_cxx_DEPENDENCIES = ../lib/libtestpat.la
runtests_cxx_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX $(AM_LIBTOOLFLAGS) \
$(LIBTOOLFLAGS) --mode=link $(CXXLD) $(AM_CXXFLAGS) \
$(CXXFLAGS) $(runtests_cxx_LDFLAGS) $(LDFLAGS) -o $@
@@ -485,7 +486,7 @@
DIST_SUBDIRS = $(SUBDIRS)
am__DIST_COMMON = $(srcdir)/Makefile.in \
$(top_srcdir)/conftools/depcomp \
- $(top_srcdir)/conftools/test-driver
+ $(top_srcdir)/conftools/test-driver README.md
DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST)
am__relativize = \
dir0=`pwd`; \
@@ -515,7 +516,7 @@
ACLOCAL = @ACLOCAL@
AMTAR = @AMTAR@
AM_CFLAGS = @AM_CFLAGS@
-AM_CPPFLAGS = @AM_CPPFLAGS@ -I$(srcdir)/../lib
+AM_CPPFLAGS = @AM_CPPFLAGS@ -I$(srcdir)/../lib -DXML_TESTING
AM_CXXFLAGS = @AM_CXXFLAGS@
AM_DEFAULT_VERBOSITY = @AM_DEFAULT_VERBOSITY@
AM_LDFLAGS = @AM_LDFLAGS@
@@ -602,6 +603,7 @@
SED = @SED@
SET_MAKE = @SET_MAKE@
SHELL = @SHELL@
+SIZEOF_VOID_P = @SIZEOF_VOID_P@
SO_MAJOR = @SO_MAJOR@
SO_MINOR = @SO_MINOR@
SO_PATCH = @SO_PATCH@
@@ -615,7 +617,6 @@
ac_ct_CC = @ac_ct_CC@
ac_ct_CXX = @ac_ct_CXX@
ac_ct_DUMPBIN = @ac_ct_DUMPBIN@
-ac_cv_sizeof_void_p = @ac_cv_sizeof_void_p@
am__include = @am__include@
am__leading_dot = @am__leading_dot@
am__quote = @am__quote@
@@ -698,8 +699,8 @@
runtests_cxx.cpp \
structdata_cxx.cpp
-runtests_LDADD = ../lib/libexpatinternal.la
-runtests_cxx_LDADD = ../lib/libexpatinternal.la
+runtests_LDADD = ../lib/libtestpat.la
+runtests_cxx_LDADD = ../lib/libtestpat.la
runtests_LDFLAGS = @AM_LDFLAGS@ @LIBM@
runtests_cxx_LDFLAGS = @AM_LDFLAGS@ @LIBM@
EXTRA_DIST = \
@@ -716,7 +717,7 @@
structdata.h \
minicheck.h \
memcheck.h \
- README.txt \
+ README.md \
udiffer.py \
xmltest.log.expected \
xmltest.sh
--- /dev/null
+++ contrib/expat/tests/README.md
@@ -0,0 +1,11 @@
+This directory contains the test suite for Expat. The tests provide
+general unit testing and regression coverage. The tests are not
+expected to be useful examples of Expat usage; see the
+[examples](../examples) directory for that.
+
+The Expat tests use a partial internal implementation of the
+[Check](https://libcheck.github.io/check/) unit testing framework for
+C.
+
+Expat must be built and, on some platforms, installed, before the
+tests can be run.
--- contrib/expat/tests/README.txt.orig
+++ contrib/expat/tests/README.txt
@@ -1,13 +0,0 @@
-This directory contains the (fledgling) test suite for Expat. The
-tests provide general unit testing and regression coverage. The tests
-are not expected to be useful examples of Expat usage; see the
-examples/ directory for that.
-
-The Expat tests use a partial internal implementation of the "Check"
-unit testing framework for C. More information on Check can be found at:
-
- http://check.sourceforge.net/
-
-Expat must be built and, depending on platform, must be installed, before "make check" can be executed.
-
-This test suite can all change in a later version.
--- contrib/expat/tests/acc_tests.c.orig
+++ contrib/expat/tests/acc_tests.c
@@ -360,13 +360,16 @@
START_TEST(test_helper_unsigned_char_to_printable) {
// Smoke test
unsigned char uc = 0;
- for (; uc < (unsigned char)-1; uc++) {
+ for (;; uc++) {
set_subtest("char %u", (unsigned)uc);
const char *const printable = unsignedCharToPrintable(uc);
if (printable == NULL)
fail("unsignedCharToPrintable returned NULL");
else if (strlen(printable) < (size_t)1)
fail("unsignedCharToPrintable returned empty string");
+ if (uc == (unsigned char)-1) {
+ break;
+ }
}
// Two concrete samples
@@ -378,6 +381,63 @@
fail("unsignedCharToPrintable result mistaken");
}
END_TEST
+
+START_TEST(test_amplification_isolated_external_parser) {
+ // NOTE: Length 44 is precisely twice the length of ""
+ // (22) that is used in function accountingGetCurrentAmplification in
+ // xmlparse.c.
+ // 1.........1.........1.........1.........1..4 => 44
+ const char doc[] = "";
+ const int docLen = (int)sizeof(doc) - 1;
+ const float maximumToleratedAmplification = 2.0f;
+
+ struct TestCase {
+ int offsetOfThreshold;
+ enum XML_Status expectedStatus;
+ };
+
+ struct TestCase cases[] = {
+ {-2, XML_STATUS_ERROR}, {-1, XML_STATUS_ERROR}, {0, XML_STATUS_ERROR},
+ {+1, XML_STATUS_OK}, {+2, XML_STATUS_OK},
+ };
+
+ for (size_t i = 0; i < sizeof(cases) / sizeof(cases[0]); i++) {
+ const int offsetOfThreshold = cases[i].offsetOfThreshold;
+ const enum XML_Status expectedStatus = cases[i].expectedStatus;
+ const unsigned long long activationThresholdBytes
+ = docLen + offsetOfThreshold;
+
+ set_subtest("offsetOfThreshold=%d, expectedStatus=%d", offsetOfThreshold,
+ expectedStatus);
+
+ XML_Parser parser = XML_ParserCreate(NULL);
+ assert_true(parser != NULL);
+
+ assert_true(XML_SetBillionLaughsAttackProtectionMaximumAmplification(
+ parser, maximumToleratedAmplification)
+ == XML_TRUE);
+ assert_true(XML_SetBillionLaughsAttackProtectionActivationThreshold(
+ parser, activationThresholdBytes)
+ == XML_TRUE);
+
+ XML_Parser ext_parser = XML_ExternalEntityParserCreate(parser, NULL, NULL);
+ assert_true(ext_parser != NULL);
+
+ const enum XML_Status actualStatus
+ = _XML_Parse_SINGLE_BYTES(ext_parser, doc, docLen, XML_TRUE);
+
+ assert_true(actualStatus == expectedStatus);
+ if (actualStatus != XML_STATUS_OK) {
+ assert_true(XML_GetErrorCode(ext_parser)
+ == XML_ERROR_AMPLIFICATION_LIMIT_BREACH);
+ }
+
+ XML_ParserFree(ext_parser);
+ XML_ParserFree(parser);
+ }
+}
+END_TEST
+
#endif // XML_GE == 1
void
@@ -390,6 +450,8 @@
tcase_add_test(tc_accounting, test_accounting_precision);
tcase_add_test(tc_accounting, test_billion_laughs_attack_protection_api);
tcase_add_test(tc_accounting, test_helper_unsigned_char_to_printable);
+ tcase_add_test__ifdef_xml_dtd(tc_accounting,
+ test_amplification_isolated_external_parser);
#else
UNUSED_P(s);
#endif /* XML_GE == 1 */
--- contrib/expat/tests/alloc_tests.c.orig
+++ contrib/expat/tests/alloc_tests.c
@@ -19,6 +19,7 @@
Copyright (c) 2020 Tim Gates
Copyright (c) 2021 Donghee Na
Copyright (c) 2023 Sony Corporation / Snild Dolkow
+ Copyright (c) 2025 Berkay Eren Ürün
Licensed under the MIT license:
Permission is hereby granted, free of charge, to any person obtaining
@@ -450,6 +451,31 @@
}
END_TEST
+START_TEST(test_alloc_parameter_entity) {
+ const char *text = "\">"
+ "%param1;"
+ "]> &internal;content";
+ int i;
+ const int alloc_test_max_repeats = 30;
+
+ for (i = 0; i < alloc_test_max_repeats; i++) {
+ g_allocation_count = i;
+ XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
+ if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
+ != XML_STATUS_ERROR)
+ break;
+ alloc_teardown();
+ alloc_setup();
+ }
+ g_allocation_count = -1;
+ if (i == 0)
+ fail("Parameter entity processed despite duff allocator");
+ if (i == alloc_test_max_repeats)
+ fail("Parameter entity not processed at max allocation count");
+}
+END_TEST
+
/* Test the robustness against allocation failure of element handling
* Based on test_dtd_default_handling().
*/
@@ -2079,6 +2105,7 @@
tcase_add_test__ifdef_xml_dtd(tc_alloc, test_alloc_external_entity);
tcase_add_test__ifdef_xml_dtd(tc_alloc, test_alloc_ext_entity_set_encoding);
tcase_add_test__ifdef_xml_dtd(tc_alloc, test_alloc_internal_entity);
+ tcase_add_test__ifdef_xml_dtd(tc_alloc, test_alloc_parameter_entity);
tcase_add_test__ifdef_xml_dtd(tc_alloc, test_alloc_dtd_default_handling);
tcase_add_test(tc_alloc, test_alloc_explicit_encoding);
tcase_add_test(tc_alloc, test_alloc_set_base);
--- contrib/expat/tests/basic_tests.c.orig
+++ contrib/expat/tests/basic_tests.c
@@ -10,7 +10,7 @@
Copyright (c) 2003 Greg Stein
Copyright (c) 2005-2007 Steven Solie
Copyright (c) 2005-2012 Karl Waclawek
- Copyright (c) 2016-2024 Sebastian Pipping
+ Copyright (c) 2016-2025 Sebastian Pipping
Copyright (c) 2017-2022 Rhodri James
Copyright (c) 2017 Joe Orton
Copyright (c) 2017 José Gutiérrez de la Concha
@@ -19,6 +19,7 @@
Copyright (c) 2020 Tim Gates
Copyright (c) 2021 Donghee Na
Copyright (c) 2023-2024 Sony Corporation / Snild Dolkow
+ Copyright (c) 2024-2025 Berkay Eren Ürün
Licensed under the MIT license:
Permission is hereby granted, free of charge, to any person obtaining
@@ -1191,6 +1192,22 @@
}
END_TEST
+START_TEST(test_entity_start_tag_level_greater_than_one) {
+ const char *const text = "\n"
+ "]>\n"
+ "\n"
+ " &e1;\n"
+ "\n";
+
+ XML_Parser parser = XML_ParserCreate(NULL);
+ assert_true(_XML_Parse_SINGLE_BYTES(parser, text, (int)strlen(text),
+ /*isFinal*/ XML_TRUE)
+ == XML_STATUS_OK);
+ XML_ParserFree(parser);
+}
+END_TEST
+
START_TEST(test_wfc_no_recursive_entity_refs) {
const char *text = "\n"
@@ -1202,6 +1219,136 @@
}
END_TEST
+START_TEST(test_no_indirectly_recursive_entity_refs) {
+ struct TestCase {
+ const char *doc;
+ bool usesParameterEntities;
+ };
+
+ const struct TestCase cases[] = {
+ // general entity + character data
+ {"\n"
+ " \n"
+ "]>&e2;\n",
+ false},
+
+ // general entity + attribute value
+ {"\n"
+ " \n"
+ "]>\n",
+ false},
+
+ // parameter entity
+ {"\n"
+ " \n"
+ " \">\n"
+ " %define_g;\n"
+ "]>\n"
+ "\n",
+ true},
+ };
+ const XML_Bool reset_or_not[] = {XML_TRUE, XML_FALSE};
+
+ for (size_t i = 0; i < sizeof(cases) / sizeof(cases[0]); i++) {
+ for (size_t j = 0; j < sizeof(reset_or_not) / sizeof(reset_or_not[0]);
+ j++) {
+ const XML_Bool reset_wanted = reset_or_not[j];
+ const char *const doc = cases[i].doc;
+ const bool usesParameterEntities = cases[i].usesParameterEntities;
+
+ set_subtest("[%i,reset=%i] %s", (int)i, (int)j, doc);
+
+#ifdef XML_DTD // both GE and DTD
+ const bool rejection_expected = true;
+#elif XML_GE == 1 // GE but not DTD
+ const bool rejection_expected = ! usesParameterEntities;
+#else // neither DTD nor GE
+ const bool rejection_expected = false;
+#endif
+
+ XML_Parser parser = XML_ParserCreate(NULL);
+
+#ifdef XML_DTD
+ if (usesParameterEntities) {
+ assert_true(
+ XML_SetParamEntityParsing(parser, XML_PARAM_ENTITY_PARSING_ALWAYS)
+ == 1);
+ }
+#else
+ UNUSED_P(usesParameterEntities);
+#endif // XML_DTD
+
+ const enum XML_Status status
+ = _XML_Parse_SINGLE_BYTES(parser, doc, (int)strlen(doc),
+ /*isFinal*/ XML_TRUE);
+
+ if (rejection_expected) {
+ assert_true(status == XML_STATUS_ERROR);
+ assert_true(XML_GetErrorCode(parser) == XML_ERROR_RECURSIVE_ENTITY_REF);
+ } else {
+ assert_true(status == XML_STATUS_OK);
+ }
+
+ if (reset_wanted) {
+ // This covers free'ing of (eventually) all three open entity lists by
+ // XML_ParserReset.
+ XML_ParserReset(parser, NULL);
+ }
+
+ // This covers free'ing of (eventually) all three open entity lists by
+ // XML_ParserFree (unless XML_ParserReset has already done that above).
+ XML_ParserFree(parser);
+ }
+ }
+}
+END_TEST
+
+START_TEST(test_recursive_external_parameter_entity_2) {
+ struct TestCase {
+ const char *doc;
+ enum XML_Status expectedStatus;
+ };
+
+ struct TestCase cases[] = {
+ {"", XML_STATUS_ERROR},
+ {""
+ "",
+ XML_STATUS_ERROR},
+ {""
+ "",
+ XML_STATUS_OK},
+ {"", XML_STATUS_OK},
+ };
+
+ for (size_t i = 0; i < sizeof(cases) / sizeof(cases[0]); i++) {
+ const char *const doc = cases[i].doc;
+ const enum XML_Status expectedStatus = cases[i].expectedStatus;
+ set_subtest("%s", doc);
+
+ XML_Parser parser = XML_ParserCreate(NULL);
+ assert_true(parser != NULL);
+
+ XML_Parser ext_parser = XML_ExternalEntityParserCreate(parser, NULL, NULL);
+ assert_true(ext_parser != NULL);
+
+ const enum XML_Status actualStatus
+ = _XML_Parse_SINGLE_BYTES(ext_parser, doc, (int)strlen(doc), XML_TRUE);
+
+ assert_true(actualStatus == expectedStatus);
+ if (actualStatus != XML_STATUS_OK) {
+ assert_true(XML_GetErrorCode(ext_parser)
+ == XML_ERROR_RECURSIVE_ENTITY_REF);
+ }
+
+ XML_ParserFree(ext_parser);
+ XML_ParserFree(parser);
+ }
+}
+END_TEST
+
/* Test incomplete external entities are faulted */
START_TEST(test_ext_entity_invalid_parse) {
const char *text = "\n"
"\n"
@@ -2335,7 +2507,9 @@
g_resumable = XML_TRUE;
XML_SetCharacterDataHandler(g_parser, clearing_aborting_character_handler);
- if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
+ // can't use SINGLE_BYTES here, because it'll return early on suspension, and
+ // we won't know exactly how much input we actually managed to give Expat.
+ if (XML_Parse(g_parser, text, (int)strlen(text), XML_TRUE)
== XML_STATUS_ERROR)
xml_failure(g_parser);
XML_GetParsingStatus(g_parser, &status);
@@ -2761,6 +2935,61 @@
}
END_TEST
+/* Test XML_Parse for len < 0 */
+START_TEST(test_negative_len_parse) {
+ const char *const doc = "";
+ for (int isFinal = 0; isFinal < 2; isFinal++) {
+ set_subtest("isFinal=%d", isFinal);
+
+ XML_Parser parser = XML_ParserCreate(NULL);
+
+ if (XML_GetErrorCode(parser) != XML_ERROR_NONE)
+ fail("There was not supposed to be any initial parse error.");
+
+ const enum XML_Status status = XML_Parse(parser, doc, -1, isFinal);
+
+ if (status != XML_STATUS_ERROR)
+ fail("Negative len was expected to fail the parse but did not.");
+
+ if (XML_GetErrorCode(parser) != XML_ERROR_INVALID_ARGUMENT)
+ fail("Parse error does not match XML_ERROR_INVALID_ARGUMENT.");
+
+ XML_ParserFree(parser);
+ }
+}
+END_TEST
+
+/* Test XML_ParseBuffer for len < 0 */
+START_TEST(test_negative_len_parse_buffer) {
+ const char *const doc = "";
+ for (int isFinal = 0; isFinal < 2; isFinal++) {
+ set_subtest("isFinal=%d", isFinal);
+
+ XML_Parser parser = XML_ParserCreate(NULL);
+
+ if (XML_GetErrorCode(parser) != XML_ERROR_NONE)
+ fail("There was not supposed to be any initial parse error.");
+
+ void *const buffer = XML_GetBuffer(parser, (int)strlen(doc));
+
+ if (buffer == NULL)
+ fail("XML_GetBuffer failed.");
+
+ memcpy(buffer, doc, strlen(doc));
+
+ const enum XML_Status status = XML_ParseBuffer(parser, -1, isFinal);
+
+ if (status != XML_STATUS_ERROR)
+ fail("Negative len was expected to fail the parse but did not.");
+
+ if (XML_GetErrorCode(parser) != XML_ERROR_INVALID_ARGUMENT)
+ fail("Parse error does not match XML_ERROR_INVALID_ARGUMENT.");
+
+ XML_ParserFree(parser);
+ }
+}
+END_TEST
+
/* Test odd corners of the XML_GetBuffer interface */
static enum XML_Status
get_feature(enum XML_FeatureEnum feature_id, long *presult) {
@@ -3527,7 +3756,9 @@
XML_SetXmlDeclHandler(g_parser, entity_suspending_xdecl_handler);
XML_SetUserData(g_parser, g_parser);
g_resumable = XML_TRUE;
- if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
+ // can't use SINGLE_BYTES here, because it'll return early on suspension, and
+ // we won't know exactly how much input we actually managed to give Expat.
+ if (XML_Parse(g_parser, text, (int)strlen(text), XML_TRUE)
!= XML_STATUS_SUSPENDED)
xml_failure(g_parser);
if (XML_GetErrorCode(g_parser) != XML_ERROR_NONE)
@@ -3723,13 +3954,20 @@
/* Test syntax error is caught at parse resumption */
START_TEST(test_resume_entity_with_syntax_error) {
+ if (g_chunkSize != 0) {
+ // this test does not use SINGLE_BYTES, because of suspension
+ return;
+ }
+
const char *text = "Hi'>\n"
"]>\n"
"&foo;\n";
XML_SetStartElementHandler(g_parser, start_element_suspender);
- if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
+ // can't use SINGLE_BYTES here, because it'll return early on suspension, and
+ // we won't know exactly how much input we actually managed to give Expat.
+ if (XML_Parse(g_parser, text, (int)strlen(text), XML_TRUE)
!= XML_STATUS_SUSPENDED)
xml_failure(g_parser);
if (XML_ResumeParser(g_parser) != XML_STATUS_ERROR)
@@ -3853,7 +4091,7 @@
= {"\n"
"\n"
"%pe2;\n",
- external_entity_null_loader};
+ external_entity_null_loader, NULL};
XML_SetUserData(g_parser, &test_data);
XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
@@ -3871,7 +4109,7 @@
= {"\n"
"\n"
"%pe2;\n",
- NULL};
+ NULL, NULL};
XML_SetUserData(g_parser, &test_data);
XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
@@ -5171,6 +5409,151 @@
}
END_TEST
+/* Test a possible early return location in internalEntityProcessor */
+START_TEST(test_entity_ref_no_elements) {
+ const char *const text = "\n"
+ "]> &e1;"; // intentionally missing newline
+
+ XML_Parser parser = XML_ParserCreate(NULL);
+ assert_true(_XML_Parse_SINGLE_BYTES(parser, text, (int)strlen(text), XML_TRUE)
+ == XML_STATUS_ERROR);
+ assert_true(XML_GetErrorCode(parser) == XML_ERROR_NO_ELEMENTS);
+ XML_ParserFree(parser);
+}
+END_TEST
+
+/* Tests if chained entity references lead to unbounded recursion */
+START_TEST(test_deep_nested_entity) {
+ const size_t N_LINES = 60000;
+ const size_t SIZE_PER_LINE = 50;
+
+ char *const text = (char *)malloc((N_LINES + 4) * SIZE_PER_LINE);
+ if (text == NULL) {
+ fail("malloc failed");
+ }
+
+ char *textPtr = text;
+
+ // Create the XML
+ textPtr += snprintf(textPtr, SIZE_PER_LINE,
+ "\n");
+
+ for (size_t i = 1; i < N_LINES; ++i) {
+ textPtr += snprintf(textPtr, SIZE_PER_LINE, " \n",
+ (long unsigned)i, (long unsigned)(i - 1));
+ }
+
+ snprintf(textPtr, SIZE_PER_LINE, "]> &s%lu;\n",
+ (long unsigned)(N_LINES - 1));
+
+ const XML_Char *const expected = XCS("deepText");
+
+ CharData storage;
+ CharData_Init(&storage);
+
+ XML_Parser parser = XML_ParserCreate(NULL);
+
+ XML_SetCharacterDataHandler(parser, accumulate_characters);
+ XML_SetUserData(parser, &storage);
+
+ if (_XML_Parse_SINGLE_BYTES(parser, text, (int)strlen(text), XML_TRUE)
+ == XML_STATUS_ERROR)
+ xml_failure(parser);
+
+ CharData_CheckXMLChars(&storage, expected);
+ XML_ParserFree(parser);
+ free(text);
+}
+END_TEST
+
+/* Tests if chained entity references in attributes
+lead to unbounded recursion */
+START_TEST(test_deep_nested_attribute_entity) {
+ const size_t N_LINES = 60000;
+ const size_t SIZE_PER_LINE = 100;
+
+ char *const text = (char *)malloc((N_LINES + 4) * SIZE_PER_LINE);
+ if (text == NULL) {
+ fail("malloc failed");
+ }
+
+ char *textPtr = text;
+
+ // Create the XML
+ textPtr += snprintf(textPtr, SIZE_PER_LINE,
+ "\n");
+
+ for (size_t i = 1; i < N_LINES; ++i) {
+ textPtr += snprintf(textPtr, SIZE_PER_LINE, " \n",
+ (long unsigned)i, (long unsigned)(i - 1));
+ }
+
+ snprintf(textPtr, SIZE_PER_LINE, "]> mainText\n",
+ (long unsigned)(N_LINES - 1));
+
+ AttrInfo doc_info[] = {{XCS("name"), XCS("deepText")}, {NULL, NULL}};
+ ElementInfo info[] = {{XCS("foo"), 1, NULL, NULL}, {NULL, 0, NULL, NULL}};
+ info[0].attributes = doc_info;
+
+ XML_Parser parser = XML_ParserCreate(NULL);
+ ParserAndElementInfo parserPlusElemenInfo = {parser, info};
+
+ XML_SetStartElementHandler(parser, counting_start_element_handler);
+ XML_SetUserData(parser, &parserPlusElemenInfo);
+
+ if (_XML_Parse_SINGLE_BYTES(parser, text, (int)strlen(text), XML_TRUE)
+ == XML_STATUS_ERROR)
+ xml_failure(parser);
+
+ XML_ParserFree(parser);
+ free(text);
+}
+END_TEST
+
+START_TEST(test_deep_nested_entity_delayed_interpretation) {
+ const size_t N_LINES = 70000;
+ const size_t SIZE_PER_LINE = 100;
+
+ char *const text = (char *)malloc((N_LINES + 4) * SIZE_PER_LINE);
+ if (text == NULL) {
+ fail("malloc failed");
+ }
+
+ char *textPtr = text;
+
+ // Create the XML
+ textPtr += snprintf(textPtr, SIZE_PER_LINE,
+ "\n");
+
+ for (size_t i = 1; i < N_LINES; ++i) {
+ textPtr += snprintf(textPtr, SIZE_PER_LINE,
+ " \n", (long unsigned)i,
+ (long unsigned)(i - 1));
+ }
+
+ snprintf(textPtr, SIZE_PER_LINE,
+ " \">\n"
+ " %%define_g;\n"
+ "]>\n"
+ "\n",
+ (long unsigned)(N_LINES - 1));
+
+ XML_Parser parser = XML_ParserCreate(NULL);
+
+ XML_SetParamEntityParsing(parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
+ if (_XML_Parse_SINGLE_BYTES(parser, text, (int)strlen(text), XML_TRUE)
+ == XML_STATUS_ERROR)
+ xml_failure(parser);
+
+ XML_ParserFree(parser);
+ free(text);
+}
+END_TEST
+
START_TEST(test_nested_entity_suspend) {
const char *const text = "'>\n"
@@ -5201,14 +5584,37 @@
}
END_TEST
+START_TEST(test_nested_entity_suspend_2) {
+ const char *const text = "\n"
+ " \n"
+ " \n"
+ "]>\n"
+ "&ge3;";
+ const XML_Char *const expected = XCS("head3") XCS("head2") XCS("head1")
+ XCS("Z") XCS("tail1") XCS("tail2") XCS("tail3");
+ CharData storage;
+ CharData_Init(&storage);
+ XML_Parser parser = XML_ParserCreate(NULL);
+ ParserPlusStorage parserPlusStorage = {parser, &storage};
+
+ XML_SetCharacterDataHandler(parser, accumulate_char_data_and_suspend);
+ XML_SetUserData(parser, &parserPlusStorage);
+
+ enum XML_Status status = XML_Parse(parser, text, (int)strlen(text), XML_TRUE);
+ while (status == XML_STATUS_SUSPENDED) {
+ status = XML_ResumeParser(parser);
+ }
+ if (status != XML_STATUS_OK)
+ xml_failure(parser);
+
+ CharData_CheckXMLChars(&storage, expected);
+ XML_ParserFree(parser);
+}
+END_TEST
+
/* Regression test for quadratic parsing on large tokens */
-START_TEST(test_big_tokens_take_linear_time) {
- const char *const too_slow_failure_message
- = "Compared to the baseline runtime of the first test, this test has a "
- "slowdown of more than . "
- "Please keep increasing the value by 1 until it reliably passes the "
- "test on your hardware and open a bug sharing that number with us. "
- "Thanks in advance!";
+START_TEST(test_big_tokens_scale_linearly) {
const struct {
const char *pre;
const char *post;
@@ -5220,65 +5626,57 @@
{"<", "/>"}, // big elem name, used to be O(N²)
};
const int num_cases = sizeof(text) / sizeof(text[0]);
- // For the test we need a value that is:
- // (1) big enough that the test passes reliably (avoiding flaky tests), and
- // (2) small enough that the test actually catches regressions.
- const int max_slowdown = 15;
char aaaaaa[4096];
const int fillsize = (int)sizeof(aaaaaa);
const int fillcount = 100;
+ const unsigned approx_bytes = fillsize * fillcount; // ignore pre/post.
+ const unsigned max_factor = 4;
+ const unsigned max_scanned = max_factor * approx_bytes;
memset(aaaaaa, 'a', fillsize);
if (! g_reparseDeferralEnabledDefault) {
return; // heuristic is disabled; we would get O(n^2) and fail.
}
-#if ! defined(__linux__)
- if (CLOCKS_PER_SEC < 100000) {
- // Skip this test if clock() doesn't have reasonably good resolution.
- // This workaround is primarily targeting Windows and FreeBSD, since
- // XSI requires the value to be 1.000.000 (10x the condition here), and
- // we want to be very sure that at least one platform in CI can catch
- // regressions (through a failing test).
- return;
- }
-#endif
- clock_t baseline = 0;
for (int i = 0; i < num_cases; ++i) {
XML_Parser parser = XML_ParserCreate(NULL);
assert_true(parser != NULL);
enum XML_Status status;
- set_subtest("max_slowdown=%d text=\"%saaaaaa%s\"", max_slowdown,
- text[i].pre, text[i].post);
- const clock_t start = clock();
+ set_subtest("text=\"%saaaaaa%s\"", text[i].pre, text[i].post);
// parse the start text
+ g_bytesScanned = 0;
status = _XML_Parse_SINGLE_BYTES(parser, text[i].pre,
(int)strlen(text[i].pre), XML_FALSE);
if (status != XML_STATUS_OK) {
xml_failure(parser);
}
+
// parse lots of 'a', failing the test early if it takes too long
+ unsigned past_max_count = 0;
for (int f = 0; f < fillcount; ++f) {
status = _XML_Parse_SINGLE_BYTES(parser, aaaaaa, fillsize, XML_FALSE);
if (status != XML_STATUS_OK) {
xml_failure(parser);
}
- // i == 0 means we're still calculating the baseline value
- if (i > 0) {
- const clock_t now = clock();
- const clock_t clocks_so_far = now - start;
- const int slowdown = clocks_so_far / baseline;
- if (slowdown >= max_slowdown) {
- fprintf(
- stderr,
- "fill#%d: clocks_so_far=%d baseline=%d slowdown=%d max_slowdown=%d\n",
- f, (int)clocks_so_far, (int)baseline, slowdown, max_slowdown);
- fail(too_slow_failure_message);
- }
+ if (g_bytesScanned > max_scanned) {
+ // We're not done, and have already passed the limit -- the test will
+ // definitely fail. This block allows us to save time by failing early.
+ const unsigned pushed
+ = (unsigned)strlen(text[i].pre) + (f + 1) * fillsize;
+ fprintf(
+ stderr,
+ "after %d/%d loops: pushed=%u scanned=%u (factor ~%.2f) max_scanned: %u (factor ~%u)\n",
+ f + 1, fillcount, pushed, g_bytesScanned,
+ g_bytesScanned / (double)pushed, max_scanned, max_factor);
+ past_max_count++;
+ // We are failing, but allow a few log prints first. If we don't reach
+ // a count of five, the test will fail after the loop instead.
+ assert_true(past_max_count < 5);
}
}
+
// parse the end text
status = _XML_Parse_SINGLE_BYTES(parser, text[i].post,
(int)strlen(text[i].post), XML_TRUE);
@@ -5286,18 +5684,14 @@
xml_failure(parser);
}
- // how long did it take in total?
- const clock_t end = clock();
- const clock_t taken = end - start;
- if (i == 0) {
- assert_true(taken > 0); // just to make sure we don't div-by-0 later
- baseline = taken;
- }
- const int slowdown = taken / baseline;
- if (slowdown >= max_slowdown) {
- fprintf(stderr, "taken=%d baseline=%d slowdown=%d max_slowdown=%d\n",
- (int)taken, (int)baseline, slowdown, max_slowdown);
- fail(too_slow_failure_message);
+ assert_true(g_bytesScanned > approx_bytes); // or the counter isn't working
+ if (g_bytesScanned > max_scanned) {
+ fprintf(
+ stderr,
+ "after all input: scanned=%u (factor ~%.2f) max_scanned: %u (factor ~%u)\n",
+ g_bytesScanned, g_bytesScanned / (double)approx_bytes, max_scanned,
+ max_factor);
+ fail("scanned too many bytes");
}
XML_ParserFree(parser);
@@ -5774,19 +6168,17 @@
fillsize[2], fillsize[3]);
XML_Parser parser = XML_ParserCreate(NULL);
assert_true(parser != NULL);
- g_parseAttempts = 0;
CharData storage;
CharData_Init(&storage);
XML_SetUserData(parser, &storage);
XML_SetStartElementHandler(parser, start_element_event_handler);
+ g_bytesScanned = 0;
int worstcase_bytes = 0; // sum of (buffered bytes at each XML_Parse call)
- int scanned_bytes = 0; // sum of (buffered bytes at each actual parse)
int offset = 0;
while (*fillsize >= 0) {
assert_true(offset + *fillsize <= document_length); // or test is invalid
- const unsigned attempts_before = g_parseAttempts;
const enum XML_Status status
= XML_Parse(parser, &document[offset], *fillsize, XML_FALSE);
if (status != XML_STATUS_OK) {
@@ -5796,28 +6188,20 @@
fillsize++;
assert_true(offset <= INT_MAX - worstcase_bytes); // avoid overflow
worstcase_bytes += offset; // we might've tried to parse all pending bytes
- if (g_parseAttempts != attempts_before) {
- assert_true(g_parseAttempts == attempts_before + 1); // max 1/XML_Parse
- assert_true(offset <= INT_MAX - scanned_bytes); // avoid overflow
- scanned_bytes += offset; // we *did* try to parse all pending bytes
- }
}
assert_true(storage.count == 1); // the big token should've been parsed
- assert_true(scanned_bytes > 0); // test-the-test: does our counter work?
+ assert_true(g_bytesScanned > 0); // test-the-test: does our counter work?
if (g_reparseDeferralEnabledDefault) {
// heuristic is enabled; some XML_Parse calls may have deferred reparsing
- const int max_bytes_scanned = -*fillsize;
- if (scanned_bytes > max_bytes_scanned) {
+ const unsigned max_bytes_scanned = -*fillsize;
+ if (g_bytesScanned > max_bytes_scanned) {
fprintf(stderr,
- "bytes scanned in parse attempts: actual=%d limit=%d \n",
- scanned_bytes, max_bytes_scanned);
+ "bytes scanned in parse attempts: actual=%u limit=%u \n",
+ g_bytesScanned, max_bytes_scanned);
fail("too many bytes scanned in parse attempts");
}
- assert_true(scanned_bytes <= worstcase_bytes);
- } else {
- // heuristic is disabled; every XML_Parse() will have reparsed
- assert_true(scanned_bytes == worstcase_bytes);
}
+ assert_true(g_bytesScanned <= (unsigned)worstcase_bytes);
XML_ParserFree(parser);
}
@@ -5889,7 +6273,9 @@
tcase_add_test(tc_basic, test_wfc_undeclared_entity_with_external_subset);
tcase_add_test(tc_basic, test_not_standalone_handler_reject);
tcase_add_test(tc_basic, test_not_standalone_handler_accept);
+ tcase_add_test(tc_basic, test_entity_start_tag_level_greater_than_one);
tcase_add_test__if_xml_ge(tc_basic, test_wfc_no_recursive_entity_refs);
+ tcase_add_test(tc_basic, test_no_indirectly_recursive_entity_refs);
tcase_add_test__ifdef_xml_dtd(tc_basic, test_ext_entity_invalid_parse);
tcase_add_test__if_xml_ge(tc_basic, test_dtd_default_handling);
tcase_add_test(tc_basic, test_dtd_attr_handling);
@@ -5940,6 +6326,8 @@
tcase_add_test__ifdef_xml_dtd(tc_basic, test_user_parameters);
tcase_add_test__ifdef_xml_dtd(tc_basic, test_ext_entity_ref_parameter);
tcase_add_test(tc_basic, test_empty_parse);
+ tcase_add_test(tc_basic, test_negative_len_parse);
+ tcase_add_test(tc_basic, test_negative_len_parse_buffer);
tcase_add_test(tc_basic, test_get_buffer_1);
tcase_add_test(tc_basic, test_get_buffer_2);
#if XML_CONTEXT_BYTES > 0
@@ -5972,6 +6360,8 @@
tcase_add_test__ifdef_xml_dtd(tc_basic, test_skipped_parameter_entity);
tcase_add_test__ifdef_xml_dtd(tc_basic,
test_recursive_external_parameter_entity);
+ tcase_add_test__ifdef_xml_dtd(tc_basic,
+ test_recursive_external_parameter_entity_2);
tcase_add_test(tc_basic, test_undefined_ext_entity_in_external_dtd);
tcase_add_test(tc_basic, test_suspend_xdecl);
tcase_add_test(tc_basic, test_abort_epilog);
@@ -6064,8 +6454,14 @@
tcase_add_test(tc_basic, test_empty_element_abort);
tcase_add_test__ifdef_xml_dtd(tc_basic,
test_pool_integrity_with_unfinished_attr);
+ tcase_add_test__if_xml_ge(tc_basic, test_entity_ref_no_elements);
+ tcase_add_test__if_xml_ge(tc_basic, test_deep_nested_entity);
+ tcase_add_test__if_xml_ge(tc_basic, test_deep_nested_attribute_entity);
+ tcase_add_test__if_xml_ge(tc_basic,
+ test_deep_nested_entity_delayed_interpretation);
tcase_add_test__if_xml_ge(tc_basic, test_nested_entity_suspend);
- tcase_add_test(tc_basic, test_big_tokens_take_linear_time);
+ tcase_add_test__if_xml_ge(tc_basic, test_nested_entity_suspend_2);
+ tcase_add_test(tc_basic, test_big_tokens_scale_linearly);
tcase_add_test(tc_basic, test_set_reparse_deferral);
tcase_add_test(tc_basic, test_reparse_deferral_is_inherited);
tcase_add_test(tc_basic, test_set_reparse_deferral_on_null_parser);
--- contrib/expat/tests/benchmark/Makefile.in.orig
+++ contrib/expat/tests/benchmark/Makefile.in
@@ -303,6 +303,7 @@
SED = @SED@
SET_MAKE = @SET_MAKE@
SHELL = @SHELL@
+SIZEOF_VOID_P = @SIZEOF_VOID_P@
SO_MAJOR = @SO_MAJOR@
SO_MINOR = @SO_MINOR@
SO_PATCH = @SO_PATCH@
@@ -316,7 +317,6 @@
ac_ct_CC = @ac_ct_CC@
ac_ct_CXX = @ac_ct_CXX@
ac_ct_DUMPBIN = @ac_ct_DUMPBIN@
-ac_cv_sizeof_void_p = @ac_cv_sizeof_void_p@
am__include = @am__include@
am__leading_dot = @am__leading_dot@
am__quote = @am__quote@
--- contrib/expat/tests/benchmark/benchmark.c.orig
+++ contrib/expat/tests/benchmark/benchmark.c
@@ -8,7 +8,7 @@
Copyright (c) 2003-2006 Karl Waclawek
Copyright (c) 2005-2007 Steven Solie
- Copyright (c) 2017-2023 Sebastian Pipping
+ Copyright (c) 2017-2025 Sebastian Pipping
Copyright (c) 2017 Rhodri James
Licensed under the MIT license:
@@ -32,10 +32,18 @@
USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
+#define _POSIX_C_SOURCE 1 // fdopen
+
+#if defined(_MSC_VER)
+# include // _open, _close
+#else
+# include // close
+#endif
+
+#include // open
#include
#include
#include // ptrdiff_t
-#include
#include
#include
#include "expat.h"
@@ -52,17 +60,18 @@
# define XML_FMT_STR "s"
#endif
-static void
+static int
usage(const char *prog, int rc) {
fprintf(stderr, "usage: %s [-n] filename bufferSize nr_of_loops\n", prog);
- exit(rc);
+ return rc;
}
int
main(int argc, char *argv[]) {
XML_Parser parser;
char *XMLBuf, *XMLBufEnd, *XMLBufPtr;
- FILE *fd;
+ int fd;
+ FILE *file;
struct stat fileAttr;
int nrOfLoops, bufferSize, i, isFinal;
size_t fileSize;
@@ -76,34 +85,48 @@
ns = 1;
j = 1;
} else
- usage(argv[0], 1);
+ return usage(argv[0], 1);
}
}
if (argc != j + 4)
- usage(argv[0], 1);
+ return usage(argv[0], 1);
- if (stat(argv[j + 1], &fileAttr) != 0) {
- fprintf(stderr, "could not access file '%s'\n", argv[j + 1]);
+ fd = open(argv[j + 1], O_RDONLY);
+ if (fd == -1) {
+ fprintf(stderr, "could not open file '%s'\n", argv[j + 1]);
return 2;
}
- fd = fopen(argv[j + 1], "r");
- if (! fd) {
- fprintf(stderr, "could not open file '%s'\n", argv[j + 1]);
- exit(2);
+ if (fstat(fd, &fileAttr) != 0) {
+ close(fd);
+ fprintf(stderr, "could not fstat file '%s'\n", argv[j + 1]);
+ return 2;
+ }
+
+ file = fdopen(fd, "r");
+ if (! file) {
+ close(fd);
+ fprintf(stderr, "could not fdopen file '%s'\n", argv[j + 1]);
+ return 2;
}
bufferSize = atoi(argv[j + 2]);
nrOfLoops = atoi(argv[j + 3]);
if (bufferSize <= 0 || nrOfLoops <= 0) {
+ fclose(file); // NOTE: this closes fd as well
fprintf(stderr, "buffer size and nr of loops must be greater than zero.\n");
- exit(3);
+ return 3;
}
XMLBuf = malloc(fileAttr.st_size);
- fileSize = fread(XMLBuf, sizeof(char), fileAttr.st_size, fd);
- fclose(fd);
+ if (XMLBuf == NULL) {
+ fclose(file); // NOTE: this closes fd as well
+ fprintf(stderr, "ouf of memory.\n");
+ return 5;
+ }
+ fileSize = fread(XMLBuf, sizeof(char), fileAttr.st_size, file);
+ fclose(file); // NOTE: this closes fd as well
if (ns)
parser = XML_ParserCreateNS(NULL, '!');
@@ -132,7 +155,7 @@
XML_GetCurrentColumnNumber(parser));
free(XMLBuf);
XML_ParserFree(parser);
- exit(4);
+ return 4;
}
XMLBufPtr += bufferSize;
} while (! isFinal);
--- contrib/expat/tests/common.c.orig
+++ contrib/expat/tests/common.c
@@ -10,7 +10,7 @@
Copyright (c) 2003 Greg Stein
Copyright (c) 2005-2007 Steven Solie
Copyright (c) 2005-2012 Karl Waclawek
- Copyright (c) 2016-2023 Sebastian Pipping
+ Copyright (c) 2016-2025 Sebastian Pipping
Copyright (c) 2017-2022 Rhodri James
Copyright (c) 2017 Joe Orton
Copyright (c) 2017 José Gutiérrez de la Concha
@@ -42,6 +42,8 @@
*/
#include
+#include
+#include // for SIZE_MAX
#include
#include
@@ -51,6 +53,7 @@
#include "chardata.h"
#include "minicheck.h"
#include "common.h"
+#include "handlers.h"
/* Common test data */
@@ -201,6 +204,12 @@
for (; len > chunksize; len -= chunksize, s += chunksize) {
enum XML_Status res = XML_Parse(parser, s, chunksize, XML_FALSE);
if (res != XML_STATUS_OK) {
+ if ((res == XML_STATUS_SUSPENDED) && (len > chunksize)) {
+ fail("Use of function _XML_Parse_SINGLE_BYTES with a chunk size "
+ "greater than 0 (from g_chunkSize) does not work well with "
+ "suspension. Please consider use of plain XML_Parse at this "
+ "place in your test, instead.");
+ }
return res;
}
}
@@ -221,30 +230,6 @@
_xml_failure(g_parser, file, lineno);
}
-/* Character data support for handlers, built on top of the code in
- * chardata.c
- */
-void XMLCALL
-accumulate_characters(void *userData, const XML_Char *s, int len) {
- CharData_AppendXMLChars((CharData *)userData, s, len);
-}
-
-void XMLCALL
-accumulate_attribute(void *userData, const XML_Char *name,
- const XML_Char **atts) {
- CharData *storage = (CharData *)userData;
- UNUSED_P(name);
- /* Check there are attributes to deal with */
- if (atts == NULL)
- return;
-
- while (storage->count < 0 && atts[0] != NULL) {
- /* "accumulate" the value of the first attribute we see */
- CharData_AppendXMLChars(storage, atts[1], -1);
- atts += 2;
- }
-}
-
void
_run_character_check(const char *text, const XML_Char *expected,
const char *file, int line) {
@@ -273,12 +258,6 @@
CharData_CheckXMLChars(&storage, expected);
}
-void XMLCALL
-ext_accumulate_characters(void *userData, const XML_Char *s, int len) {
- ExtTest *test_data = (ExtTest *)userData;
- accumulate_characters(test_data->storage, s, len);
-}
-
void
_run_ext_character_check(const char *text, ExtTest *test_data,
const XML_Char *expected, const char *file, int line) {
@@ -323,3 +302,26 @@
g_reallocation_count--;
return realloc(ptr, size);
}
+
+// Portable remake of strndup(3) for C99; does not care about space efficiency
+char *
+portable_strndup(const char *s, size_t n) {
+ if ((s == NULL) || (n == SIZE_MAX)) {
+ errno = EINVAL;
+ return NULL;
+ }
+
+ char *const buffer = (char *)malloc(n + 1);
+ if (buffer == NULL) {
+ errno = ENOMEM;
+ return NULL;
+ }
+
+ errno = 0;
+
+ memcpy(buffer, s, n);
+
+ buffer[n] = '\0';
+
+ return buffer;
+}
--- contrib/expat/tests/common.h.orig
+++ contrib/expat/tests/common.h
@@ -10,7 +10,7 @@
Copyright (c) 2003 Greg Stein
Copyright (c) 2005-2007 Steven Solie
Copyright (c) 2005-2012 Karl Waclawek
- Copyright (c) 2016-2023 Sebastian Pipping
+ Copyright (c) 2016-2025 Sebastian Pipping
Copyright (c) 2017-2022 Rhodri James
Copyright (c) 2017 Joe Orton
Copyright (c) 2017 José Gutiérrez de la Concha
@@ -111,12 +111,6 @@
/* Support functions for handlers to collect up character and attribute data.
*/
-extern void XMLCALL accumulate_characters(void *userData, const XML_Char *s,
- int len);
-
-extern void XMLCALL accumulate_attribute(void *userData, const XML_Char *name,
- const XML_Char **atts);
-
extern void _run_character_check(const char *text, const XML_Char *expected,
const char *file, int line);
@@ -135,9 +129,6 @@
CharData *storage;
} ExtTest;
-extern void XMLCALL ext_accumulate_characters(void *userData, const XML_Char *s,
- int len);
-
extern void _run_ext_character_check(const char *text, ExtTest *test_data,
const XML_Char *expected, const char *file,
int line);
@@ -155,6 +146,8 @@
extern void *duff_reallocator(void *ptr, size_t size);
+extern char *portable_strndup(const char *s, size_t n);
+
#endif /* XML_COMMON_H */
#ifdef __cplusplus
--- contrib/expat/tests/handlers.c.orig
+++ contrib/expat/tests/handlers.c
@@ -103,7 +103,9 @@
void XMLCALL
counting_start_element_handler(void *userData, const XML_Char *name,
const XML_Char **atts) {
- ElementInfo *info = (ElementInfo *)userData;
+ ParserAndElementInfo *const parserAndElementInfos
+ = (ParserAndElementInfo *)userData;
+ ElementInfo *info = parserAndElementInfos->info;
AttrInfo *attr;
int count, id, i;
@@ -120,12 +122,12 @@
* is possibly a little unexpected, but it is what the
* documentation in expat.h tells us to expect.
*/
- count = XML_GetSpecifiedAttributeCount(g_parser);
+ count = XML_GetSpecifiedAttributeCount(parserAndElementInfos->parser);
if (info->attr_count * 2 != count) {
fail("Not got expected attribute count");
return;
}
- id = XML_GetIdAttributeIndex(g_parser);
+ id = XML_GetIdAttributeIndex(parserAndElementInfos->parser);
if (id == -1 && info->id_name != NULL) {
fail("ID not present");
return;
@@ -1840,6 +1842,15 @@
XML_FreeContentModel(g_parser, model);
}
+void XMLCALL
+suspend_after_element_declaration(void *userData, const XML_Char *name,
+ XML_Content *model) {
+ UNUSED_P(name);
+ XML_Parser parser = (XML_Parser)userData;
+ assert_true(XML_StopParser(parser, /*resumable*/ XML_TRUE) == XML_STATUS_OK);
+ XML_FreeContentModel(parser, model);
+}
+
void XMLCALL
accumulate_pi_characters(void *userData, const XML_Char *target,
const XML_Char *data) {
@@ -1881,9 +1892,17 @@
}
void XMLCALL
-accumulate_char_data(void *userData, const XML_Char *s, int len) {
- CharData *const storage = (CharData *)userData;
- CharData_AppendXMLChars(storage, s, len);
+accumulate_char_data_and_suspend(void *userData, const XML_Char *s, int len) {
+ ParserPlusStorage *const parserPlusStorage = (ParserPlusStorage *)userData;
+
+ CharData_AppendXMLChars(parserPlusStorage->storage, s, len);
+
+ for (int i = 0; i < len; i++) {
+ if (s[i] == 'Z') {
+ XML_StopParser(parserPlusStorage->parser, /*resumable=*/XML_TRUE);
+ break;
+ }
+ }
}
void XMLCALL
@@ -1910,6 +1929,34 @@
CharData_AppendXMLChars(storage, XCS(")\n"), 2);
}
+void XMLCALL
+accumulate_characters(void *userData, const XML_Char *s, int len) {
+ CharData *const storage = (CharData *)userData;
+ CharData_AppendXMLChars(storage, s, len);
+}
+
+void XMLCALL
+accumulate_attribute(void *userData, const XML_Char *name,
+ const XML_Char **atts) {
+ CharData *const storage = (CharData *)userData;
+ UNUSED_P(name);
+ /* Check there are attributes to deal with */
+ if (atts == NULL)
+ return;
+
+ while (storage->count < 0 && atts[0] != NULL) {
+ /* "accumulate" the value of the first attribute we see */
+ CharData_AppendXMLChars(storage, atts[1], -1);
+ atts += 2;
+ }
+}
+
+void XMLCALL
+ext_accumulate_characters(void *userData, const XML_Char *s, int len) {
+ ExtTest *const test_data = (ExtTest *)userData;
+ accumulate_characters(test_data->storage, s, len);
+}
+
void XMLCALL
checking_default_handler(void *userData, const XML_Char *s, int len) {
DefaultCheck *data = (DefaultCheck *)userData;
--- contrib/expat/tests/handlers.h.orig
+++ contrib/expat/tests/handlers.h
@@ -92,6 +92,11 @@
AttrInfo *attributes;
} ElementInfo;
+typedef struct StructParserAndElementInfo {
+ XML_Parser parser;
+ ElementInfo *info;
+} ParserAndElementInfo;
+
extern void XMLCALL counting_start_element_handler(void *userData,
const XML_Char *name,
const XML_Char **atts);
@@ -320,6 +325,7 @@
typedef struct ext_hdlr_data {
const char *parse_text;
XML_ExternalEntityRefHandler handler;
+ CharData *storage;
} ExtHdlrData;
extern int XMLCALL external_entity_oneshot_loader(XML_Parser parser,
@@ -552,6 +558,10 @@
extern void XMLCALL element_decl_suspender(void *userData, const XML_Char *name,
XML_Content *model);
+extern void XMLCALL suspend_after_element_declaration(void *userData,
+ const XML_Char *name,
+ XML_Content *model);
+
extern void XMLCALL accumulate_pi_characters(void *userData,
const XML_Char *target,
const XML_Char *data);
@@ -564,13 +574,23 @@
const XML_Char *systemId, const XML_Char *publicId,
const XML_Char *notationName);
-extern void XMLCALL accumulate_char_data(void *userData, const XML_Char *s,
- int len);
+extern void XMLCALL accumulate_char_data_and_suspend(void *userData,
+ const XML_Char *s,
+ int len);
extern void XMLCALL accumulate_start_element(void *userData,
const XML_Char *name,
const XML_Char **atts);
+extern void XMLCALL accumulate_characters(void *userData, const XML_Char *s,
+ int len);
+
+extern void XMLCALL accumulate_attribute(void *userData, const XML_Char *name,
+ const XML_Char **atts);
+
+extern void XMLCALL ext_accumulate_characters(void *userData, const XML_Char *s,
+ int len);
+
typedef struct default_check {
const XML_Char *expected;
const int expectedLen;
--- contrib/expat/tests/minicheck.h.orig
+++ contrib/expat/tests/minicheck.h
@@ -14,7 +14,7 @@
Copyright (c) 2004-2006 Fred L. Drake, Jr.
Copyright (c) 2006-2012 Karl Waclawek
- Copyright (c) 2016-2024 Sebastian Pipping
+ Copyright (c) 2016-2025 Sebastian Pipping
Copyright (c) 2022 Rhodri James
Copyright (c) 2023-2024 Sony Corporation / Snild Dolkow
Licensed under the MIT license:
@@ -129,8 +129,10 @@
* Prototypes for the actual implementation.
*/
-# if defined(__GNUC__)
+# if defined(__has_attribute)
+# if __has_attribute(noreturn)
__attribute__((noreturn))
+# endif
# endif
void
_fail(const char *file, int line, const char *msg);
--- contrib/expat/tests/misc_tests.c.orig
+++ contrib/expat/tests/misc_tests.c
@@ -10,7 +10,7 @@
Copyright (c) 2003 Greg Stein
Copyright (c) 2005-2007 Steven Solie
Copyright (c) 2005-2012 Karl Waclawek
- Copyright (c) 2016-2024 Sebastian Pipping
+ Copyright (c) 2016-2025 Sebastian Pipping
Copyright (c) 2017-2022 Rhodri James
Copyright (c) 2017 Joe Orton
Copyright (c) 2017 José Gutiérrez de la Concha
@@ -59,6 +59,9 @@
#include "handlers.h"
#include "misc_tests.h"
+void XMLCALL accumulate_characters_ext_handler(void *userData,
+ const XML_Char *s, int len);
+
/* Test that a failure to allocate the parser structure fails gracefully */
START_TEST(test_misc_alloc_create_parser) {
XML_Memory_Handling_Suite memsuite = {duff_allocator, realloc, free};
@@ -208,7 +211,7 @@
if (! versions_equal(&read_version, &parsed_version))
fail("Version mismatch");
- if (xcstrcmp(version_text, XCS("expat_2.6.0"))) /* needs bump on releases */
+ if (xcstrcmp(version_text, XCS("expat_2.7.1"))) /* needs bump on releases */
fail("XML_*_VERSION in expat.h out of sync?\n");
}
END_TEST
@@ -294,6 +297,7 @@
parser = XML_ParserCreate(NULL);
XML_SetElementHandler(parser, start_element_issue_240, end_element_issue_240);
mydata = (DataIssue240 *)malloc(sizeof(DataIssue240));
+ assert_true(mydata != NULL);
mydata->parser = parser;
mydata->deep = 0;
XML_SetUserData(parser, mydata);
@@ -315,6 +319,7 @@
parser = XML_ParserCreate(NULL);
XML_SetElementHandler(parser, start_element_issue_240, end_element_issue_240);
mydata = (DataIssue240 *)malloc(sizeof(DataIssue240));
+ assert_true(mydata != NULL);
mydata->parser = parser;
mydata->deep = 0;
XML_SetUserData(parser, mydata);
@@ -328,63 +333,119 @@
END_TEST
START_TEST(test_misc_deny_internal_entity_closing_doctype_issue_317) {
- const char *const inputOne = "'>\n"
- "\n"
- "%e;";
- const char *const inputTwo = "'>\n"
- "\n"
- "%e2;";
- const char *const inputThree = "\n"
- "\n"
- "%e;";
- const char *const inputIssue317 = "\n"
- "Hell'>\n"
- "%foo;\n"
- "]>\n"
- "Hello, world";
+ const char *const inputOne
+ = "'>\n"
+ "%element_d;\n"
+ "'>\n"
+ "\n"
+ "%e;";
+ const char *const inputTwo
+ = "'>\n"
+ "%element_d;\n"
+ "'>\n"
+ "\n"
+ "%e2;";
+ const char *const inputThree
+ = "'>\n"
+ "%element_d;\n"
+ "\n"
+ "\n"
+ "%e;/>";
+ const char *const inputIssue317
+ = "'>\n"
+ "%element_doc;\n"
+ "\n"
+ "Hell'>\n"
+ "%foo;\n"
+ "]>\n"
+ "Hello, world";
const char *const inputs[] = {inputOne, inputTwo, inputThree, inputIssue317};
+ const XML_Bool suspendOrNot[] = {XML_FALSE, XML_TRUE};
size_t inputIndex = 0;
for (; inputIndex < sizeof(inputs) / sizeof(inputs[0]); inputIndex++) {
- set_subtest("%s", inputs[inputIndex]);
- XML_Parser parser;
- enum XML_Status parseResult;
- int setParamEntityResult;
- XML_Size lineNumber;
- XML_Size columnNumber;
- const char *const input = inputs[inputIndex];
-
- parser = XML_ParserCreate(NULL);
- setParamEntityResult
- = XML_SetParamEntityParsing(parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
- if (setParamEntityResult != 1)
- fail("Failed to set XML_PARAM_ENTITY_PARSING_ALWAYS.");
-
- parseResult = _XML_Parse_SINGLE_BYTES(parser, input, (int)strlen(input), 0);
- if (parseResult != XML_STATUS_ERROR) {
- parseResult = _XML_Parse_SINGLE_BYTES(parser, "", 0, 1);
+ for (size_t suspendOrNotIndex = 0;
+ suspendOrNotIndex < sizeof(suspendOrNot) / sizeof(suspendOrNot[0]);
+ suspendOrNotIndex++) {
+ const char *const input = inputs[inputIndex];
+ const XML_Bool suspend = suspendOrNot[suspendOrNotIndex];
+ if (suspend && (g_chunkSize > 0)) {
+ // We cannot use _XML_Parse_SINGLE_BYTES below due to suspension, and
+ // so chunk sizes >0 would only repeat the very same test
+ // due to use of plain XML_Parse; we are saving upon that runtime:
+ return;
+ }
+
+ set_subtest("[input=%d suspend=%s] %s", (int)inputIndex,
+ suspend ? "true" : "false", input);
+ XML_Parser parser;
+ enum XML_Status parseResult;
+ int setParamEntityResult;
+ XML_Size lineNumber;
+ XML_Size columnNumber;
+
+ parser = XML_ParserCreate(NULL);
+ setParamEntityResult
+ = XML_SetParamEntityParsing(parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
+ if (setParamEntityResult != 1)
+ fail("Failed to set XML_PARAM_ENTITY_PARSING_ALWAYS.");
+
+ if (suspend) {
+ XML_SetUserData(parser, parser);
+ XML_SetElementDeclHandler(parser, suspend_after_element_declaration);
+ }
+
+ if (suspend) {
+ // can't use SINGLE_BYTES here, because it'll return early on
+ // suspension, and we won't know exactly how much input we actually
+ // managed to give Expat.
+ parseResult = XML_Parse(parser, input, (int)strlen(input), 0);
+
+ while (parseResult == XML_STATUS_SUSPENDED) {
+ parseResult = XML_ResumeParser(parser);
+ }
+
+ if (parseResult != XML_STATUS_ERROR) {
+ // can't use SINGLE_BYTES here, because it'll return early on
+ // suspension, and we won't know exactly how much input we actually
+ // managed to give Expat.
+ parseResult = XML_Parse(parser, "", 0, 1);
+ }
+
+ while (parseResult == XML_STATUS_SUSPENDED) {
+ parseResult = XML_ResumeParser(parser);
+ }
+ } else {
+ parseResult
+ = _XML_Parse_SINGLE_BYTES(parser, input, (int)strlen(input), 0);
+
+ if (parseResult != XML_STATUS_ERROR) {
+ parseResult = _XML_Parse_SINGLE_BYTES(parser, "", 0, 1);
+ }
+ }
+
if (parseResult != XML_STATUS_ERROR) {
fail("Parsing was expected to fail but succeeded.");
}
- }
- if (XML_GetErrorCode(parser) != XML_ERROR_INVALID_TOKEN)
- fail("Error code does not match XML_ERROR_INVALID_TOKEN");
+ if (XML_GetErrorCode(parser) != XML_ERROR_INVALID_TOKEN)
+ fail("Error code does not match XML_ERROR_INVALID_TOKEN");
- lineNumber = XML_GetCurrentLineNumber(parser);
- if (lineNumber != 4)
- fail("XML_GetCurrentLineNumber does not work as expected.");
+ lineNumber = XML_GetCurrentLineNumber(parser);
+ if (lineNumber != 6)
+ fail("XML_GetCurrentLineNumber does not work as expected.");
- columnNumber = XML_GetCurrentColumnNumber(parser);
- if (columnNumber != 0)
- fail("XML_GetCurrentColumnNumber does not work as expected.");
+ columnNumber = XML_GetCurrentColumnNumber(parser);
+ if (columnNumber != 0)
+ fail("XML_GetCurrentColumnNumber does not work as expected.");
- XML_ParserFree(parser);
+ XML_ParserFree(parser);
+ }
}
}
END_TEST
@@ -447,7 +508,7 @@
XML_SetExternalEntityRefHandler(parser,
external_entity_failer__if_not_xml_ge);
XML_SetEntityDeclHandler(parser, accumulate_entity_decl);
- XML_SetCharacterDataHandler(parser, accumulate_char_data);
+ XML_SetCharacterDataHandler(parser, accumulate_characters);
if (_XML_Parse_SINGLE_BYTES(parser, doc, (int)strlen(doc), XML_TRUE)
!= XML_STATUS_OK) {
@@ -496,6 +557,127 @@
}
END_TEST
+START_TEST(test_misc_resumeparser_not_crashing) {
+ XML_Parser parser = XML_ParserCreate(NULL);
+ XML_GetBuffer(parser, 1);
+ XML_StopParser(parser, /*resumable=*/XML_TRUE);
+ XML_ResumeParser(parser); // could crash here, previously
+ XML_ParserFree(parser);
+}
+END_TEST
+
+START_TEST(test_misc_stopparser_rejects_unstarted_parser) {
+ const XML_Bool cases[] = {XML_TRUE, XML_FALSE};
+ for (size_t i = 0; i < sizeof(cases) / sizeof(cases[0]); i++) {
+ const XML_Bool resumable = cases[i];
+ XML_Parser parser = XML_ParserCreate(NULL);
+ assert_true(XML_GetErrorCode(parser) == XML_ERROR_NONE);
+ assert_true(XML_StopParser(parser, resumable) == XML_STATUS_ERROR);
+ assert_true(XML_GetErrorCode(parser) == XML_ERROR_NOT_STARTED);
+ XML_ParserFree(parser);
+ }
+}
+END_TEST
+
+/* Adaptation of accumulate_characters that takes ExtHdlrData input to work with
+ * test_renter_loop_finite_content below */
+void XMLCALL
+accumulate_characters_ext_handler(void *userData, const XML_Char *s, int len) {
+ ExtHdlrData *const test_data = (ExtHdlrData *)userData;
+ CharData_AppendXMLChars(test_data->storage, s, len);
+}
+
+/* Test that internalEntityProcessor does not re-enter forever;
+ * based on files tests/xmlconf/xmltest/valid/ext-sa/012.{xml,ent} */
+START_TEST(test_renter_loop_finite_content) {
+ CharData storage;
+ CharData_Init(&storage);
+ const char *const text = "\n"
+ "\n"
+ "\n"
+ "\n"
+ "\n"
+ "\n"
+ "]>\n"
+ "&e1;\n";
+ ExtHdlrData test_data = {"&e4;\n", external_entity_null_loader, &storage};
+ const XML_Char *const expected = XCS("(e5)\n");
+
+ XML_Parser parser = XML_ParserCreate(NULL);
+ assert_true(parser != NULL);
+ XML_SetUserData(parser, &test_data);
+ XML_SetExternalEntityRefHandler(parser, external_entity_oneshot_loader);
+ XML_SetCharacterDataHandler(parser, accumulate_characters_ext_handler);
+ if (_XML_Parse_SINGLE_BYTES(parser, text, (int)strlen(text), XML_TRUE)
+ == XML_STATUS_ERROR)
+ xml_failure(parser);
+
+ CharData_CheckXMLChars(&storage, expected);
+ XML_ParserFree(parser);
+}
+END_TEST
+
+// Inspired by function XML_OriginalString of Perl's XML::Parser
+static char *
+dup_original_string(XML_Parser parser) {
+ const int byte_count = XML_GetCurrentByteCount(parser);
+
+ assert_true(byte_count >= 0);
+
+ int offset = -1;
+ int size = -1;
+
+ const char *const context = XML_GetInputContext(parser, &offset, &size);
+
+#if XML_CONTEXT_BYTES > 0
+ assert_true(context != NULL);
+ assert_true(offset >= 0);
+ assert_true(size >= 0);
+ return portable_strndup(context + offset, byte_count);
+#else
+ assert_true(context == NULL);
+ return NULL;
+#endif
+}
+
+static void
+on_characters_issue_980(void *userData, const XML_Char *s, int len) {
+ (void)s;
+ (void)len;
+ XML_Parser parser = (XML_Parser)userData;
+
+ char *const original_string = dup_original_string(parser);
+
+#if XML_CONTEXT_BYTES > 0
+ assert_true(original_string != NULL);
+ assert_true(strcmp(original_string, "&draft.day;") == 0);
+ free(original_string);
+#else
+ assert_true(original_string == NULL);
+#endif
+}
+
+START_TEST(test_misc_expected_event_ptr_issue_980) {
+ // NOTE: This is a tiny subset of sample "REC-xml-19980210.xml"
+ // from Perl's XML::Parser
+ const char *const doc = "\n"
+ "]>\n"
+ "&draft.day;\n";
+
+ XML_Parser parser = XML_ParserCreate(NULL);
+ XML_SetUserData(parser, parser);
+ XML_SetCharacterDataHandler(parser, on_characters_issue_980);
+
+ assert_true(_XML_Parse_SINGLE_BYTES(parser, doc, (int)strlen(doc),
+ /*isFinal=*/XML_TRUE)
+ == XML_STATUS_OK);
+
+ XML_ParserFree(parser);
+}
+END_TEST
+
void
make_miscellaneous_test_case(Suite *s) {
TCase *tc_misc = tcase_create("miscellaneous tests");
@@ -520,4 +702,8 @@
test_misc_create_external_entity_parser_with_null_context);
tcase_add_test(tc_misc, test_misc_general_entities_support);
tcase_add_test(tc_misc, test_misc_char_handler_stop_without_leak);
+ tcase_add_test(tc_misc, test_misc_resumeparser_not_crashing);
+ tcase_add_test(tc_misc, test_misc_stopparser_rejects_unstarted_parser);
+ tcase_add_test__if_xml_ge(tc_misc, test_renter_loop_finite_content);
+ tcase_add_test(tc_misc, test_misc_expected_event_ptr_issue_980);
}
--- contrib/expat/tests/xmltest.sh.orig
+++ contrib/expat/tests/xmltest.sh
@@ -2,8 +2,8 @@
# EXPAT TEST SCRIPT FOR W3C XML TEST SUITE
#
# This script can be used to exercise Expat against the
-# w3c.org xml test suite, available from
-# http://www.w3.org/XML/Test/xmlts20020606.zip.
+# w3c.org xml test suite, available from:
+# https://www.w3.org/XML/Test/xmlts20020606.zip
#
# To run this script, first set XMLWF below so that xmlwf can be
# found, then set the output directory with OUTPUT.
@@ -30,6 +30,7 @@
# Copyright (c) 2002 Karl Waclawek
# Copyright (c) 2008-2019 Sebastian Pipping
# Copyright (c) 2017 Rhodri James
+# Copyright (c) 2025 Hanno Böck
# Licensed under the MIT license:
#
# Permission is hereby granted, free of charge, to any person obtaining
--- contrib/expat/xmlwf/Makefile.in.orig
+++ contrib/expat/xmlwf/Makefile.in
@@ -311,6 +311,7 @@
SED = @SED@
SET_MAKE = @SET_MAKE@
SHELL = @SHELL@
+SIZEOF_VOID_P = @SIZEOF_VOID_P@
SO_MAJOR = @SO_MAJOR@
SO_MINOR = @SO_MINOR@
SO_PATCH = @SO_PATCH@
@@ -324,7 +325,6 @@
ac_ct_CC = @ac_ct_CC@
ac_ct_CXX = @ac_ct_CXX@
ac_ct_DUMPBIN = @ac_ct_DUMPBIN@
-ac_cv_sizeof_void_p = @ac_cv_sizeof_void_p@
am__include = @am__include@
am__leading_dot = @am__leading_dot@
am__quote = @am__quote@
--- contrib/expat/xmlwf/readfilemap.c.orig
+++ contrib/expat/xmlwf/readfilemap.c
@@ -14,6 +14,7 @@
Copyright (c) 2017 Rhodri James
Copyright (c) 2017 Franek Korta
Copyright (c) 2022 Sean McBride
+ Copyright (c) 2025 Hanno Böck
Licensed under the MIT license:
Permission is hereby granted, free of charge, to any person obtaining
@@ -55,7 +56,7 @@
# define EXPAT_read_count_t int
# define EXPAT_read_req_t unsigned int
#else /* POSIX */
-/* http://pubs.opengroup.org/onlinepubs/009695399/functions/read.html */
+/* https://pubs.opengroup.org/onlinepubs/009695399/functions/read.html */
# define EXPAT_read read
# define EXPAT_read_count_t ssize_t
# define EXPAT_read_req_t size_t
--- contrib/expat/xmlwf/xmlfile.c.orig
+++ contrib/expat/xmlwf/xmlfile.c
@@ -15,6 +15,7 @@
Copyright (c) 2017 Rhodri James
Copyright (c) 2019 David Loffredo
Copyright (c) 2021 Donghee Na
+ Copyright (c) 2024 Hanno Böck
Licensed under the MIT license:
Permission is hereby granted, free of charge, to any person obtaining
@@ -91,7 +92,8 @@
filename, XML_GetErrorLineNumber(parser),
XML_GetErrorColumnNumber(parser), message);
else
- ftprintf(stderr, T("%s: (unknown message %d)\n"), filename, code);
+ ftprintf(stderr, T("%s: (unknown message %u)\n"), filename,
+ (unsigned int)code);
}
/* This implementation will give problems on files larger than INT_MAX. */
--- lib/libexpat/Makefile.orig
+++ lib/libexpat/Makefile
@@ -1,4 +1,3 @@
-
PACKAGE= runtime
EXPAT= ${SRCTOP}/contrib/expat
--- lib/libexpat/expat_config.h.orig
+++ lib/libexpat/expat_config.h
@@ -89,7 +89,7 @@
#define PACKAGE_NAME "expat"
/* Define to the full name and version of this package. */
-#define PACKAGE_STRING "expat 2.6.0"
+#define PACKAGE_STRING "expat 2.7.1"
/* Define to the one symbol short name of this package. */
#define PACKAGE_TARNAME "expat"
@@ -98,7 +98,7 @@
#define PACKAGE_URL ""
/* Define to the version of this package. */
-#define PACKAGE_VERSION "2.6.0"
+#define PACKAGE_VERSION "2.7.1"
/* Define to 1 if all of the C90 standard headers exist (not just the ones
required in a freestanding environment). This macro is provided for
@@ -106,7 +106,7 @@
#define STDC_HEADERS 1
/* Version number of package */
-#define VERSION "2.6.0"
+#define VERSION "2.7.1"
/* Define WORDS_BIGENDIAN to 1 if your processor stores words with the most
significant byte first (like Motorola and SPARC, unlike Intel). */
@@ -146,7 +146,4 @@
/* Define to `long int' if does not define. */
/* #undef off_t */
-/* Define to `unsigned int' if does not define. */
-/* #undef size_t */
-
#endif // ndef EXPAT_CONFIG_H
--- lib/libexpat/libbsdxml.3.orig
+++ lib/libexpat/libbsdxml.3
@@ -23,7 +23,7 @@
.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
.\" SUCH DAMAGE.
.\"/
-.Dd February 17, 2024
+.Dd April 7, 2025
.Dt LIBBSDXML 3
.Os
.Sh NAME
@@ -34,7 +34,7 @@
.Sh DESCRIPTION
The
.Nm
-library is a verbatim copy of the eXpat XML library version 2.6.0.
+library is a verbatim copy of the eXpat XML library version 2.7.1.
.Pp
The
.Nm