From 78366891a586f293aeff60a14a55e4afe1169586 Mon Sep 17 00:00:00 2001
From: Sebastian Pipping <sebastian@pipping.org>
Date: Tue, 2 Sep 2025 16:44:00 +0200
Subject: [PATCH] xmlwf: Wire allocation tracker config to existing arguments
 -a and -b

CVE: CVE-2025-59375
Upstream-Status: Backport [https://github.com/libexpat/libexpat/commit/78366891a586f293aeff60a14a55e4afe1169586]
Signed-off-by: Peter Marko <peter.marko@siemens.com>
---
 doc/xmlwf.xml          | 26 ++++++++++++++++++++------
 xmlwf/xmlwf.c          |  7 +++++--
 xmlwf/xmlwf_helpgen.py |  4 ++--
 3 files changed, 27 insertions(+), 10 deletions(-)
diff --git a/doc/xmlwf.xml b/doc/xmlwf.xml
index 17e9cf51..65d8ae9b 100644
--- a/doc/xmlwf.xml
+++ b/doc/xmlwf.xml
@@ -158,19 +158,31 @@ supports both.
         <listitem>
           <para>
             Sets the maximum tolerated amplification factor
-            for protection against billion laughs attacks (default: 100.0).
+            for protection against amplification attacks
+            like the billion laughs attack
+            (default: 100.0
+            for the sum of direct and indirect output and also
+            for allocations of dynamic memory).
             The amplification factor is calculated as ..
           </para>
           <literallayout>
             amplification := (direct + indirect) / direct
           </literallayout>
           <para>
-            .. while parsing, whereas
+            .. with regard to use of entities and ..
+          </para>
+          <literallayout>
+            amplification := allocated / direct
+          </literallayout>
+          <para>
+            .. with regard to dynamic memory while parsing.
             &lt;direct&gt; is the number of bytes read
-              from the primary document in parsing and
+              from the primary document in parsing,
             &lt;indirect&gt; is the number of bytes
               added by expanding entities and reading of external DTD files,
-              combined.
+              combined, and
+            &lt;allocated&gt; is the total number of bytes of dynamic memory
+              allocated (and not freed) per hierarchy of parsers.
           </para>
           <para>
             <emphasis>NOTE</emphasis>:
@@ -185,8 +197,10 @@ supports both.
         <listitem>
           <para>
             Sets the number of output bytes (including amplification)
-            needed to activate protection against billion laughs attacks
-            (default: 8 MiB).
+            needed to activate protection against amplification attacks
+            like billion laughs
+            (default: 8 MiB for the sum of direct and indirect output,
+            and 64 MiB for allocations of dynamic memory).
             This can be thought of as an &quot;activation threshold&quot;.
           </para>
           <para>
diff --git a/xmlwf/xmlwf.c b/xmlwf/xmlwf.c
index b9d0a7fc..14206d9e 100644
--- a/xmlwf/xmlwf.c
+++ b/xmlwf/xmlwf.c
@@ -913,11 +913,11 @@ usage(const XML_Char *prog, int rc) {
       T("  -t             write no XML output for [t]iming of plain parsing\n")
       T("  -N             enable adding doctype and [n]otation declarations\n")
       T("\n")
-      T("billion laughs attack protection:\n")
+      T("amplification attack protection (e.g. billion laughs):\n")
       T("  NOTE: If you ever need to increase these values for non-attack payload, please file a bug report.\n")
       T("\n")
       T("  -a FACTOR      set maximum tolerated [a]mplification factor (default: 100.0)\n")
-      T("  -b BYTES       set number of output [b]ytes needed to activate (default: 8 MiB)\n")
+      T("  -b BYTES       set number of output [b]ytes needed to activate (default: 8 MiB/64 MiB)\n")
       T("\n")
       T("reparse deferral:\n")
       T("  -q             disable reparse deferral, and allow [q]uadratic parse runtime with large tokens\n")
@@ -1181,12 +1181,15 @@ tmain(int argc, XML_Char **argv) {
 #if XML_GE == 1
       XML_SetBillionLaughsAttackProtectionMaximumAmplification(
           parser, attackMaximumAmplification);
+      XML_SetAllocTrackerMaximumAmplification(parser,
+                                              attackMaximumAmplification);
 #endif
     }
     if (attackThresholdGiven) {
 #if XML_GE == 1
       XML_SetBillionLaughsAttackProtectionActivationThreshold(
           parser, attackThresholdBytes);
+      XML_SetAllocTrackerActivationThreshold(parser, attackThresholdBytes);
 #else
       (void)attackThresholdBytes; // silence -Wunused-but-set-variable
 #endif
diff --git a/xmlwf/xmlwf_helpgen.py b/xmlwf/xmlwf_helpgen.py
index 2360820d..e91c285c 100755
--- a/xmlwf/xmlwf_helpgen.py
+++ b/xmlwf/xmlwf_helpgen.py
@@ -84,13 +84,13 @@ output_mode.add_argument('-m', action='store_true', help='write [m]eta XML, not
 output_mode.add_argument('-t', action='store_true', help='write no XML output for [t]iming of plain parsing')
 output_related.add_argument('-N', action='store_true', help='enable adding doctype and [n]otation declarations')
 
-billion_laughs = parser.add_argument_group('billion laughs attack protection',
+billion_laughs = parser.add_argument_group('amplification attack protection (e.g. billion laughs)',
                                            description='NOTE: '
                                                        'If you ever need to increase these values '
                                                        'for non-attack payload, please file a bug report.')
 billion_laughs.add_argument('-a', metavar='FACTOR',
                             help='set maximum tolerated [a]mplification factor (default: 100.0)')
-billion_laughs.add_argument('-b', metavar='BYTES', help='set number of output [b]ytes needed to activate (default: 8 MiB)')
+billion_laughs.add_argument('-b', metavar='BYTES', help='set number of output [b]ytes needed to activate (default: 8 MiB/64 MiB)')
 
 reparse_deferral = parser.add_argument_group('reparse deferral')
 reparse_deferral.add_argument('-q', metavar='FACTOR',