From cfce28e171676fe6f70d17b97ed8a59eaeb83f15 Mon Sep 17 00:00:00 2001 From: Sebastian Pipping Date: Mon, 1 Sep 2025 17:34:58 +0200 Subject: [PATCH] lib: Implement tracking of dynamic memory allocations **PLEASE NOTE** that distributors intending to backport (or cherry-pick) this fix need to copy 99% of the related pull request, not just this commit, to not end up with a state that literally does both too much and too little at the same time. Appending ".diff" to the pull request URL could be of help. CVE: CVE-2025-59375 Upstream-Status: Backport [https://github.com/libexpat/libexpat/commit/cfce28e171676fe6f70d17b97ed8a59eaeb83f15] Signed-off-by: Peter Marko --- lib/expat.h | 15 +- lib/internal.h | 5 + lib/libexpat.def.cmake | 3 + lib/xmlparse.c | 337 +++++++++++++++++++++++++++++++++++++++-- tests/basic_tests.c | 4 + tests/nsalloc_tests.c | 5 + xmlwf/xmlwf.c | 2 + xmlwf/xmlwf_helpgen.py | 2 + 8 files changed, 361 insertions(+), 12 deletions(-) diff --git a/lib/expat.h b/lib/expat.h index 610e1ddc..66a253c1 100644 --- a/lib/expat.h +++ b/lib/expat.h @@ -1032,7 +1032,10 @@ enum XML_FeatureEnum { XML_FEATURE_BILLION_LAUGHS_ATTACK_PROTECTION_MAXIMUM_AMPLIFICATION_DEFAULT, XML_FEATURE_BILLION_LAUGHS_ATTACK_PROTECTION_ACTIVATION_THRESHOLD_DEFAULT, /* Added in Expat 2.6.0. */ - XML_FEATURE_GE + XML_FEATURE_GE, + /* Added in Expat 2.7.2. */ + XML_FEATURE_ALLOC_TRACKER_MAXIMUM_AMPLIFICATION_DEFAULT, + XML_FEATURE_ALLOC_TRACKER_ACTIVATION_THRESHOLD_DEFAULT, /* Additional features must be added to the end of this enum. */ }; @@ -1057,6 +1060,16 @@ XML_SetBillionLaughsAttackProtectionMaximumAmplification( XMLPARSEAPI(XML_Bool) XML_SetBillionLaughsAttackProtectionActivationThreshold( XML_Parser parser, unsigned long long activationThresholdBytes); + +/* Added in Expat 2.7.2. */ +XMLPARSEAPI(XML_Bool) +XML_SetAllocTrackerMaximumAmplification(XML_Parser parser, + float maximumAmplificationFactor); + +/* Added in Expat 2.7.2. */ +XMLPARSEAPI(XML_Bool) +XML_SetAllocTrackerActivationThreshold( + XML_Parser parser, unsigned long long activationThresholdBytes); #endif /* Added in Expat 2.6.0. */ diff --git a/lib/internal.h b/lib/internal.h index 6bde6ae6..eb67cf50 100644 --- a/lib/internal.h +++ b/lib/internal.h @@ -145,6 +145,11 @@ 100.0f #define EXPAT_BILLION_LAUGHS_ATTACK_PROTECTION_ACTIVATION_THRESHOLD_DEFAULT \ 8388608 // 8 MiB, 2^23 + +#define EXPAT_ALLOC_TRACKER_MAXIMUM_AMPLIFICATION_DEFAULT 100.0f +#define EXPAT_ALLOC_TRACKER_ACTIVATION_THRESHOLD_DEFAULT \ + 67108864 // 64 MiB, 2^26 + /* NOTE END */ #include "expat.h" // so we can use type XML_Parser below diff --git a/lib/libexpat.def.cmake b/lib/libexpat.def.cmake index 10ee9cd6..7a3a7ec0 100644 --- a/lib/libexpat.def.cmake +++ b/lib/libexpat.def.cmake @@ -79,3 +79,6 @@ EXPORTS @_EXPAT_COMMENT_DTD_OR_GE@ XML_SetBillionLaughsAttackProtectionMaximumAmplification @70 ; added with version 2.6.0 XML_SetReparseDeferralEnabled @71 +; added with version 2.7.2 +@_EXPAT_COMMENT_DTD_OR_GE@ XML_SetAllocTrackerMaximumAmplification @72 +@_EXPAT_COMMENT_DTD_OR_GE@ XML_SetAllocTrackerActivationThreshold @73 diff --git a/lib/xmlparse.c b/lib/xmlparse.c index 9f0a8b3e..fcf1cfdd 100644 --- a/lib/xmlparse.c +++ b/lib/xmlparse.c @@ -452,6 +452,14 @@ typedef struct accounting { unsigned long long activationThresholdBytes; } ACCOUNTING; +typedef struct MALLOC_TRACKER { + XmlBigCount bytesAllocated; + XmlBigCount peakBytesAllocated; // updated live only for debug level >=2 + unsigned long debugLevel; + float maximumAmplificationFactor; // >=1.0 + XmlBigCount activationThresholdBytes; +} MALLOC_TRACKER; + typedef struct entity_stats { unsigned int countEverOpened; unsigned int currentDepth; @@ -599,7 +607,8 @@ static XML_Bool startParsing(XML_Parser parser); static XML_Parser parserCreate(const XML_Char *encodingName, const XML_Memory_Handling_Suite *memsuite, - const XML_Char *nameSep, DTD *dtd); + const XML_Char *nameSep, DTD *dtd, + XML_Parser parentParser); static void parserInit(XML_Parser parser, const XML_Char *encodingName); @@ -769,14 +778,220 @@ struct XML_ParserStruct { unsigned long m_hash_secret_salt; #if XML_GE == 1 ACCOUNTING m_accounting; + MALLOC_TRACKER m_alloc_tracker; ENTITY_STATS m_entity_stats; #endif XML_Bool m_reenter; }; -#define MALLOC(parser, s) (parser->m_mem.malloc_fcn((s))) -#define REALLOC(parser, p, s) (parser->m_mem.realloc_fcn((p), (s))) -#define FREE(parser, p) (parser->m_mem.free_fcn((p))) +#if XML_GE == 1 +# define MALLOC(parser, s) (expat_malloc((parser), (s), __LINE__)) +# define REALLOC(parser, p, s) (expat_realloc((parser), (p), (s), __LINE__)) +# define FREE(parser, p) (expat_free((parser), (p), __LINE__)) +#else +# define MALLOC(parser, s) (parser->m_mem.malloc_fcn((s))) +# define REALLOC(parser, p, s) (parser->m_mem.realloc_fcn((p), (s))) +# define FREE(parser, p) (parser->m_mem.free_fcn((p))) +#endif + +#if XML_GE == 1 +static void +expat_heap_stat(XML_Parser rootParser, char operator, XmlBigCount absDiff, + XmlBigCount newTotal, XmlBigCount peakTotal, int sourceLine) { + // NOTE: This can be +infinity or -nan + const float amplification + = (float)newTotal / (float)rootParser->m_accounting.countBytesDirect; + fprintf( + stderr, + "expat: Allocations(%p): Direct " EXPAT_FMT_ULL("10") ", allocated %c" EXPAT_FMT_ULL( + "10") " to " EXPAT_FMT_ULL("10") " (" EXPAT_FMT_ULL("10") " peak), amplification %8.2f (xmlparse.c:%d)\n", + (void *)rootParser, rootParser->m_accounting.countBytesDirect, operator, + absDiff, newTotal, peakTotal, (double)amplification, sourceLine); +} + +static bool +expat_heap_increase_tolerable(XML_Parser rootParser, XmlBigCount increase, + int sourceLine) { + assert(rootParser != NULL); + assert(increase > 0); + + XmlBigCount newTotal = 0; + bool tolerable = true; + + // Detect integer overflow + if ((XmlBigCount)-1 - rootParser->m_alloc_tracker.bytesAllocated < increase) { + tolerable = false; + } else { + newTotal = rootParser->m_alloc_tracker.bytesAllocated + increase; + + if (newTotal >= rootParser->m_alloc_tracker.activationThresholdBytes) { + assert(newTotal > 0); + // NOTE: This can be +infinity when dividing by zero but not -nan + const float amplification + = (float)newTotal / (float)rootParser->m_accounting.countBytesDirect; + if (amplification + > rootParser->m_alloc_tracker.maximumAmplificationFactor) { + tolerable = false; + } + } + } + + if (! tolerable && (rootParser->m_alloc_tracker.debugLevel >= 1)) { + expat_heap_stat(rootParser, '+', increase, newTotal, newTotal, sourceLine); + } + + return tolerable; +} + +static void * +expat_malloc(XML_Parser parser, size_t size, int sourceLine) { + // Detect integer overflow + if (SIZE_MAX - size < sizeof(size_t)) { + return NULL; + } + + const XML_Parser rootParser = getRootParserOf(parser, NULL); + assert(rootParser->m_parentParser == NULL); + + const size_t bytesToAllocate = sizeof(size_t) + size; + + if ((XmlBigCount)-1 - rootParser->m_alloc_tracker.bytesAllocated + < bytesToAllocate) { + return NULL; // i.e. signal integer overflow as out-of-memory + } + + if (! expat_heap_increase_tolerable(rootParser, bytesToAllocate, + sourceLine)) { + return NULL; // i.e. signal violation as out-of-memory + } + + // Actually allocate + void *const mallocedPtr = parser->m_mem.malloc_fcn(bytesToAllocate); + + if (mallocedPtr == NULL) { + return NULL; + } + + // Update in-block recorded size + *(size_t *)mallocedPtr = size; + + // Update accounting + rootParser->m_alloc_tracker.bytesAllocated += bytesToAllocate; + + // Report as needed + if (rootParser->m_alloc_tracker.debugLevel >= 2) { + if (rootParser->m_alloc_tracker.bytesAllocated + > rootParser->m_alloc_tracker.peakBytesAllocated) { + rootParser->m_alloc_tracker.peakBytesAllocated + = rootParser->m_alloc_tracker.bytesAllocated; + } + expat_heap_stat(rootParser, '+', bytesToAllocate, + rootParser->m_alloc_tracker.bytesAllocated, + rootParser->m_alloc_tracker.peakBytesAllocated, sourceLine); + } + + return (char *)mallocedPtr + sizeof(size_t); +} + +static void +expat_free(XML_Parser parser, void *ptr, int sourceLine) { + assert(parser != NULL); + + if (ptr == NULL) { + return; + } + + const XML_Parser rootParser = getRootParserOf(parser, NULL); + assert(rootParser->m_parentParser == NULL); + + // Extract size (to the eyes of malloc_fcn/realloc_fcn) and + // the original pointer returned by malloc/realloc + void *const mallocedPtr = (char *)ptr - sizeof(size_t); + const size_t bytesAllocated = sizeof(size_t) + *(size_t *)mallocedPtr; + + // Update accounting + assert(rootParser->m_alloc_tracker.bytesAllocated >= bytesAllocated); + rootParser->m_alloc_tracker.bytesAllocated -= bytesAllocated; + + // Report as needed + if (rootParser->m_alloc_tracker.debugLevel >= 2) { + expat_heap_stat(rootParser, '-', bytesAllocated, + rootParser->m_alloc_tracker.bytesAllocated, + rootParser->m_alloc_tracker.peakBytesAllocated, sourceLine); + } + + // NOTE: This may be freeing rootParser, so freeing has to come last + parser->m_mem.free_fcn(mallocedPtr); +} + +static void * +expat_realloc(XML_Parser parser, void *ptr, size_t size, int sourceLine) { + assert(parser != NULL); + + if (ptr == NULL) { + return expat_malloc(parser, size, sourceLine); + } + + if (size == 0) { + expat_free(parser, ptr, sourceLine); + return NULL; + } + + const XML_Parser rootParser = getRootParserOf(parser, NULL); + assert(rootParser->m_parentParser == NULL); + + // Extract original size (to the eyes of the caller) and the original + // pointer returned by malloc/realloc + void *mallocedPtr = (char *)ptr - sizeof(size_t); + const size_t prevSize = *(size_t *)mallocedPtr; + + // Classify upcoming change + const bool isIncrease = (size > prevSize); + const size_t absDiff + = (size > prevSize) ? (size - prevSize) : (prevSize - size); + + // Ask for permission from accounting + if (isIncrease) { + if (! expat_heap_increase_tolerable(rootParser, absDiff, sourceLine)) { + return NULL; // i.e. signal violation as out-of-memory + } + } + + // Actually allocate + mallocedPtr = parser->m_mem.realloc_fcn(mallocedPtr, sizeof(size_t) + size); + + if (mallocedPtr == NULL) { + return NULL; + } + + // Update accounting + if (isIncrease) { + assert((XmlBigCount)-1 - rootParser->m_alloc_tracker.bytesAllocated + >= absDiff); + rootParser->m_alloc_tracker.bytesAllocated += absDiff; + } else { // i.e. decrease + assert(rootParser->m_alloc_tracker.bytesAllocated >= absDiff); + rootParser->m_alloc_tracker.bytesAllocated -= absDiff; + } + + // Report as needed + if (rootParser->m_alloc_tracker.debugLevel >= 2) { + if (rootParser->m_alloc_tracker.bytesAllocated + > rootParser->m_alloc_tracker.peakBytesAllocated) { + rootParser->m_alloc_tracker.peakBytesAllocated + = rootParser->m_alloc_tracker.bytesAllocated; + } + expat_heap_stat(rootParser, isIncrease ? '+' : '-', absDiff, + rootParser->m_alloc_tracker.bytesAllocated, + rootParser->m_alloc_tracker.peakBytesAllocated, sourceLine); + } + + // Update in-block recorded size + *(size_t *)mallocedPtr = size; + + return (char *)mallocedPtr + sizeof(size_t); +} +#endif // XML_GE == 1 XML_Parser XMLCALL XML_ParserCreate(const XML_Char *encodingName) { @@ -1096,19 +1311,40 @@ XML_Parser XMLCALL XML_ParserCreate_MM(const XML_Char *encodingName, const XML_Memory_Handling_Suite *memsuite, const XML_Char *nameSep) { - return parserCreate(encodingName, memsuite, nameSep, NULL); + return parserCreate(encodingName, memsuite, nameSep, NULL, NULL); } static XML_Parser parserCreate(const XML_Char *encodingName, const XML_Memory_Handling_Suite *memsuite, const XML_Char *nameSep, - DTD *dtd) { - XML_Parser parser; + DTD *dtd, XML_Parser parentParser) { + XML_Parser parser = NULL; + +#if XML_GE == 1 + const size_t increase = sizeof(size_t) + sizeof(struct XML_ParserStruct); + + if (parentParser != NULL) { + const XML_Parser rootParser = getRootParserOf(parentParser, NULL); + if (! expat_heap_increase_tolerable(rootParser, increase, __LINE__)) { + return NULL; + } + } +#else + UNUSED_P(parentParser); +#endif if (memsuite) { XML_Memory_Handling_Suite *mtemp; +#if XML_GE == 1 + void *const sizeAndParser = memsuite->malloc_fcn( + sizeof(size_t) + sizeof(struct XML_ParserStruct)); + if (sizeAndParser != NULL) { + *(size_t *)sizeAndParser = sizeof(struct XML_ParserStruct); + parser = (XML_Parser)((char *)sizeAndParser + sizeof(size_t)); +#else parser = memsuite->malloc_fcn(sizeof(struct XML_ParserStruct)); if (parser != NULL) { +#endif mtemp = (XML_Memory_Handling_Suite *)&(parser->m_mem); mtemp->malloc_fcn = memsuite->malloc_fcn; mtemp->realloc_fcn = memsuite->realloc_fcn; @@ -1116,18 +1352,67 @@ parserCreate(const XML_Char *encodingName, } } else { XML_Memory_Handling_Suite *mtemp; +#if XML_GE == 1 + void *const sizeAndParser + = (XML_Parser)malloc(sizeof(size_t) + sizeof(struct XML_ParserStruct)); + if (sizeAndParser != NULL) { + *(size_t *)sizeAndParser = sizeof(struct XML_ParserStruct); + parser = (XML_Parser)((char *)sizeAndParser + sizeof(size_t)); +#else parser = (XML_Parser)malloc(sizeof(struct XML_ParserStruct)); if (parser != NULL) { +#endif mtemp = (XML_Memory_Handling_Suite *)&(parser->m_mem); mtemp->malloc_fcn = malloc; mtemp->realloc_fcn = realloc; mtemp->free_fcn = free; } - } + } // cppcheck-suppress[memleak symbolName=sizeAndParser] // Cppcheck >=2.18.0 if (! parser) return parser; +#if XML_GE == 1 + // Initialize .m_alloc_tracker + memset(&parser->m_alloc_tracker, 0, sizeof(MALLOC_TRACKER)); + if (parentParser == NULL) { + parser->m_alloc_tracker.debugLevel + = getDebugLevel("EXPAT_MALLOC_DEBUG", 0u); + parser->m_alloc_tracker.maximumAmplificationFactor + = EXPAT_ALLOC_TRACKER_MAXIMUM_AMPLIFICATION_DEFAULT; + parser->m_alloc_tracker.activationThresholdBytes + = EXPAT_ALLOC_TRACKER_ACTIVATION_THRESHOLD_DEFAULT; + + // NOTE: This initialization needs to come this early because these fields + // are read by allocation tracking code + parser->m_parentParser = NULL; + parser->m_accounting.countBytesDirect = 0; + } else { + parser->m_parentParser = parentParser; + } + + // Record XML_ParserStruct allocation we did a few lines up before + const XML_Parser rootParser = getRootParserOf(parser, NULL); + assert(rootParser->m_parentParser == NULL); + assert(SIZE_MAX - rootParser->m_alloc_tracker.bytesAllocated >= increase); + rootParser->m_alloc_tracker.bytesAllocated += increase; + + // Report on allocation + if (rootParser->m_alloc_tracker.debugLevel >= 2) { + if (rootParser->m_alloc_tracker.bytesAllocated + > rootParser->m_alloc_tracker.peakBytesAllocated) { + rootParser->m_alloc_tracker.peakBytesAllocated + = rootParser->m_alloc_tracker.bytesAllocated; + } + + expat_heap_stat(rootParser, '+', increase, + rootParser->m_alloc_tracker.bytesAllocated, + rootParser->m_alloc_tracker.peakBytesAllocated, __LINE__); + } +#else + parser->m_parentParser = NULL; +#endif // XML_GE == 1 + parser->m_buffer = NULL; parser->m_bufferLim = NULL; @@ -1291,7 +1576,6 @@ parserInit(XML_Parser parser, const XML_Char *encodingName) { parser->m_unknownEncodingMem = NULL; parser->m_unknownEncodingRelease = NULL; parser->m_unknownEncodingData = NULL; - parser->m_parentParser = NULL; parser->m_parsingStatus.parsing = XML_INITIALIZED; // Reentry can only be triggered inside m_processor calls parser->m_reenter = XML_FALSE; @@ -1526,9 +1810,10 @@ XML_ExternalEntityParserCreate(XML_Parser oldParser, const XML_Char *context, */ if (parser->m_ns) { XML_Char tmp[2] = {parser->m_namespaceSeparator, 0}; - parser = parserCreate(encodingName, &parser->m_mem, tmp, newDtd); + parser = parserCreate(encodingName, &parser->m_mem, tmp, newDtd, oldParser); } else { - parser = parserCreate(encodingName, &parser->m_mem, NULL, newDtd); + parser + = parserCreate(encodingName, &parser->m_mem, NULL, newDtd, oldParser); } if (! parser) @@ -2708,6 +2993,13 @@ XML_GetFeatureList(void) { EXPAT_BILLION_LAUGHS_ATTACK_PROTECTION_ACTIVATION_THRESHOLD_DEFAULT}, /* Added in Expat 2.6.0. */ {XML_FEATURE_GE, XML_L("XML_GE"), 0}, + /* Added in Expat 2.7.2. */ + {XML_FEATURE_ALLOC_TRACKER_MAXIMUM_AMPLIFICATION_DEFAULT, + XML_L("XML_AT_MAX_AMP"), + (long int)EXPAT_ALLOC_TRACKER_MAXIMUM_AMPLIFICATION_DEFAULT}, + {XML_FEATURE_ALLOC_TRACKER_ACTIVATION_THRESHOLD_DEFAULT, + XML_L("XML_AT_ACT_THRES"), + (long int)EXPAT_ALLOC_TRACKER_ACTIVATION_THRESHOLD_DEFAULT}, #endif {XML_FEATURE_END, NULL, 0}}; @@ -2736,6 +3028,29 @@ XML_SetBillionLaughsAttackProtectionActivationThreshold( parser->m_accounting.activationThresholdBytes = activationThresholdBytes; return XML_TRUE; } + +XML_Bool XMLCALL +XML_SetAllocTrackerMaximumAmplification(XML_Parser parser, + float maximumAmplificationFactor) { + if ((parser == NULL) || (parser->m_parentParser != NULL) + || isnan(maximumAmplificationFactor) + || (maximumAmplificationFactor < 1.0f)) { + return XML_FALSE; + } + parser->m_alloc_tracker.maximumAmplificationFactor + = maximumAmplificationFactor; + return XML_TRUE; +} + +XML_Bool XMLCALL +XML_SetAllocTrackerActivationThreshold( + XML_Parser parser, unsigned long long activationThresholdBytes) { + if ((parser == NULL) || (parser->m_parentParser != NULL)) { + return XML_FALSE; + } + parser->m_alloc_tracker.activationThresholdBytes = activationThresholdBytes; + return XML_TRUE; +} #endif /* XML_GE == 1 */ XML_Bool XMLCALL diff --git a/tests/basic_tests.c b/tests/basic_tests.c index 129db1d8..0231e094 100644 --- a/tests/basic_tests.c +++ b/tests/basic_tests.c @@ -3089,6 +3089,10 @@ START_TEST(test_buffer_can_grow_to_max) { for (int i = 0; i < num_prefixes; ++i) { set_subtest("\"%s\"", prefixes[i]); XML_Parser parser = XML_ParserCreate(NULL); +#if XML_GE == 1 + assert_true(XML_SetAllocTrackerActivationThreshold(parser, (size_t)-1) + == XML_TRUE); // i.e. deactivate +#endif const int prefix_len = (int)strlen(prefixes[i]); const enum XML_Status s = _XML_Parse_SINGLE_BYTES(parser, prefixes[i], prefix_len, XML_FALSE); diff --git a/tests/nsalloc_tests.c b/tests/nsalloc_tests.c index 48520f42..0a594e14 100644 --- a/tests/nsalloc_tests.c +++ b/tests/nsalloc_tests.c @@ -454,10 +454,15 @@ START_TEST(test_nsalloc_realloc_attributes) { nsalloc_teardown(); nsalloc_setup(); } +#if XML_GE == 1 + assert_true( + i == 0); // because expat_realloc relies on expat_malloc to some extent +#else if (i == 0) fail("Parsing worked despite failing reallocations"); else if (i == max_realloc_count) fail("Parsing failed at max reallocation count"); +#endif } END_TEST diff --git a/xmlwf/xmlwf.c b/xmlwf/xmlwf.c index 8cfc73ca..b9d0a7fc 100644 --- a/xmlwf/xmlwf.c +++ b/xmlwf/xmlwf.c @@ -933,6 +933,8 @@ usage(const XML_Char *prog, int rc) { T(" Control verbosity of entity debugging (default: 0)\n") T(" EXPAT_ENTROPY_DEBUG=(0|1)\n") T(" Control verbosity of entropy debugging (default: 0)\n") + T(" EXPAT_MALLOC_DEBUG=(0|1|2)\n") + T(" Control verbosity of allocation tracker (default: 0)\n") T("\n") T("exit status:\n") T(" 0 the input files are well-formed and the output (if requested) was written successfully\n") diff --git a/xmlwf/xmlwf_helpgen.py b/xmlwf/xmlwf_helpgen.py index 39a3dc13..2360820d 100755 --- a/xmlwf/xmlwf_helpgen.py +++ b/xmlwf/xmlwf_helpgen.py @@ -39,6 +39,8 @@ environment variables: Control verbosity of entity debugging (default: 0) EXPAT_ENTROPY_DEBUG=(0|1) Control verbosity of entropy debugging (default: 0) + EXPAT_MALLOC_DEBUG=(0|1|2) + Control verbosity of allocation tracker (default: 0) exit status: 0 the input files are well-formed and the output (if requested) was written successfully