From f5e81fe182d36da18563c429d0764e30b8eb3657 Mon Sep 17 00:00:00 2001
From: Niels Dossche <7771979+nielsdos@users.noreply.github.com>
Date: Sun, 29 Sep 2024 12:35:36 +0200
Subject: [PATCH] Optimize in-memory XMLWriter
We're currently using a libxml buffer, which requires copying the buffer
to zend_strings every time we want to output the string. Furthermore,
its use of the system allocator instead of ZendMM makes it not count
towards the memory_limit and hinders performance.
This patch adds a custom writer such that the strings are written to a
smart_str instance, using ZendMM for improved performance, and giving
the ability to not copy the string in the common case where flush has
empty set to true.
Closes GH-16120.
---
NEWS | 3 +
UPGRADING | 3 +
ext/xmlwriter/php_xmlwriter.c | 84 ++++++++++++-------
ext/xmlwriter/php_xmlwriter.h | 2 +-
...xmlwriter_toMemory_flush_combinations.phpt | 23 +++++
5 files changed, 85 insertions(+), 30 deletions(-)
create mode 100644 ext/xmlwriter/tests/xmlwriter_toMemory_flush_combinations.phpt
diff --git a/NEWS b/NEWS
index 5a7c57c3987..76505f350fd 100644
--- a/NEWS
+++ b/NEWS
@@ -19,4 +19,7 @@ PHP NEWS
. Fixed bug #49169 (SoapServer calls wrong function, although "SOAP action"
header is correct). (nielsdos)
+- XMLWriter:
+ . Improved performance and reduce memory consumption. (nielsdos)
+
<<< NOTE: Insert NEWS from last stable release here prior to actual release! >>>
diff --git a/UPGRADING b/UPGRADING
index 463ca218047..649c252492c 100644
--- a/UPGRADING
+++ b/UPGRADING
@@ -95,3 +95,6 @@ PHP 8.5 UPGRADE NOTES
========================================
14. Performance Improvements
========================================
+
+- XMLWriter:
+ . Improved performance and reduce memory consumption.
diff --git a/ext/xmlwriter/php_xmlwriter.c b/ext/xmlwriter/php_xmlwriter.c
index 2029bcbb12e..531e70b7810 100644
--- a/ext/xmlwriter/php_xmlwriter.c
+++ b/ext/xmlwriter/php_xmlwriter.c
@@ -24,6 +24,7 @@
#include "ext/standard/info.h"
#include "php_xmlwriter.h"
#include "php_xmlwriter_arginfo.h"
+#include "zend_smart_str.h"
static zend_class_entry *xmlwriter_class_entry_ce;
@@ -47,11 +48,9 @@ static zend_object_handlers xmlwriter_object_handlers;
static zend_always_inline void xmlwriter_destroy_libxml_objects(ze_xmlwriter_object *intern)
{
if (intern->ptr) {
+ /* Note: this call will also free the output pointer. */
xmlFreeTextWriter(intern->ptr);
intern->ptr = NULL;
- }
- if (intern->output) {
- xmlBufferFree(intern->output);
intern->output = NULL;
}
}
@@ -178,14 +177,14 @@ static char *_xmlwriter_get_valid_file_path(char *source, char *resolved_path, i
}
/* }}} */
-static void xml_writer_create_static(INTERNAL_FUNCTION_PARAMETERS, xmlTextWriterPtr writer, xmlBufferPtr output)
+static void xml_writer_create_static(INTERNAL_FUNCTION_PARAMETERS, xmlTextWriterPtr writer, smart_str *output)
{
if (object_init_with_constructor(return_value, Z_CE_P(ZEND_THIS), 0, NULL, NULL) == SUCCESS) {
ze_xmlwriter_object *intern = Z_XMLWRITER_P(return_value);
intern->ptr = writer;
intern->output = output;
} else {
- xmlBufferFree(output);
+ // output is freed by writer, so we don't need to free it here.
xmlFreeTextWriter(writer);
}
}
@@ -877,11 +876,45 @@ PHP_METHOD(XMLWriter, toUri)
xml_writer_create_static(INTERNAL_FUNCTION_PARAM_PASSTHRU, writer, NULL);
}
+static int xml_writer_stream_write_memory(void *context, const char *buffer, int len)
+{
+ smart_str *output = context;
+ smart_str_appendl(output, buffer, len);
+ return len;
+}
+
+static int xml_writer_stream_close_memory(void *context)
+{
+ smart_str *output = context;
+ smart_str_free_ex(output, false);
+ efree(output);
+ return 0;
+}
+
+static xmlTextWriterPtr xml_writer_create_in_memory(smart_str **output_ptr)
+{
+ smart_str *output = emalloc(sizeof(*output));
+ memset(output, 0, sizeof(*output));
+
+ xmlOutputBufferPtr output_buffer = xmlOutputBufferCreateIO(xml_writer_stream_write_memory, xml_writer_stream_close_memory, output, NULL);
+ if (output_buffer == NULL) {
+ efree(output);
+ return NULL;
+ }
+
+ xmlTextWriterPtr writer = xmlNewTextWriter(output_buffer);
+ if (!writer) {
+ /* This call will free output too. */
+ xmlOutputBufferClose(output_buffer);
+ return NULL;
+ }
+ *output_ptr = output;
+ return writer;
+}
+
/* {{{ Create new xmlwriter using memory for string output */
PHP_FUNCTION(xmlwriter_open_memory)
{
- xmlTextWriterPtr ptr;
- xmlBufferPtr buffer;
zval *self = getThis();
ze_xmlwriter_object *ze_obj = NULL;
@@ -894,28 +927,21 @@ PHP_FUNCTION(xmlwriter_open_memory)
ze_obj = Z_XMLWRITER_P(self);
}
- buffer = xmlBufferCreate();
-
- if (buffer == NULL) {
- php_error_docref(NULL, E_WARNING, "Unable to create output buffer");
- RETURN_FALSE;
- }
-
- ptr = xmlNewTextWriterMemory(buffer, 0);
+ smart_str *output;
+ xmlTextWriterPtr ptr = xml_writer_create_in_memory(&output);
if (! ptr) {
- xmlBufferFree(buffer);
RETURN_FALSE;
}
if (self) {
xmlwriter_destroy_libxml_objects(ze_obj);
ze_obj->ptr = ptr;
- ze_obj->output = buffer;
+ ze_obj->output = output;
RETURN_TRUE;
} else {
ze_obj = php_xmlwriter_fetch_object(xmlwriter_object_new(xmlwriter_class_entry_ce));
ze_obj->ptr = ptr;
- ze_obj->output = buffer;
+ ze_obj->output = output;
RETURN_OBJ(&ze_obj->std);
}
@@ -926,17 +952,16 @@ PHP_METHOD(XMLWriter, toMemory)
{
ZEND_PARSE_PARAMETERS_NONE();
- xmlBufferPtr buffer = xmlBufferCreate();
- xmlTextWriterPtr writer = xmlNewTextWriterMemory(buffer, 0);
+ smart_str *output;
+ xmlTextWriterPtr writer = xml_writer_create_in_memory(&output);
/* No need for an explicit buffer check as this will fail on a NULL buffer. */
if (!writer) {
- xmlBufferFree(buffer);
zend_throw_error(NULL, "Could not construct libxml writer");
RETURN_THROWS();
}
- xml_writer_create_static(INTERNAL_FUNCTION_PARAM_PASSTHRU, writer, buffer);
+ xml_writer_create_static(INTERNAL_FUNCTION_PARAM_PASSTHRU, writer, output);
}
static int xml_writer_stream_write(void *context, const char *buffer, int len)
@@ -992,7 +1017,6 @@ PHP_METHOD(XMLWriter, toStream)
/* {{{ php_xmlwriter_flush */
static void php_xmlwriter_flush(INTERNAL_FUNCTION_PARAMETERS, int force_string) {
xmlTextWriterPtr ptr;
- xmlBufferPtr buffer;
bool empty = 1;
int output_bytes;
zval *self;
@@ -1002,16 +1026,18 @@ static void php_xmlwriter_flush(INTERNAL_FUNCTION_PARAMETERS, int force_string)
}
XMLWRITER_FROM_OBJECT(ptr, self);
- buffer = Z_XMLWRITER_P(self)->output;
- if (force_string == 1 && buffer == NULL) {
+ smart_str *output = Z_XMLWRITER_P(self)->output;
+ if (force_string == 1 && output == NULL) {
RETURN_EMPTY_STRING();
}
output_bytes = xmlTextWriterFlush(ptr);
- if (buffer) {
- const xmlChar *content = xmlBufferContent(buffer);
- RETVAL_STRING((const char *) content);
+ if (output) {
if (empty) {
- xmlBufferEmpty(buffer);
+ RETURN_STR(smart_str_extract(output));
+ } else if (smart_str_get_len(output) > 0) {
+ RETURN_NEW_STR(zend_string_dup(output->s, false));
+ } else {
+ RETURN_EMPTY_STRING();
}
} else {
RETVAL_LONG(output_bytes);
diff --git a/ext/xmlwriter/php_xmlwriter.h b/ext/xmlwriter/php_xmlwriter.h
index 00e21c10974..e9a4dcb2aa4 100644
--- a/ext/xmlwriter/php_xmlwriter.h
+++ b/ext/xmlwriter/php_xmlwriter.h
@@ -35,7 +35,7 @@ extern zend_module_entry xmlwriter_module_entry;
/* Extends zend object */
typedef struct _ze_xmlwriter_object {
xmlTextWriterPtr ptr;
- xmlBufferPtr output;
+ smart_str *output;
zend_object std;
} ze_xmlwriter_object;
diff --git a/ext/xmlwriter/tests/xmlwriter_toMemory_flush_combinations.phpt b/ext/xmlwriter/tests/xmlwriter_toMemory_flush_combinations.phpt
new file mode 100644
index 00000000000..89d2e73090d
--- /dev/null
+++ b/ext/xmlwriter/tests/xmlwriter_toMemory_flush_combinations.phpt
@@ -0,0 +1,23 @@
+--TEST--
+XMLWriter::toMemory() with combinations of empty flush and non-empty flush
+--EXTENSIONS--
+xmlwriter
+--FILE--
+flush(empty: false));
+$writer->startElement('foo');
+var_dump($writer->flush(empty: false));
+$writer->endElement();
+var_dump($writer->flush(empty: false));
+var_dump($writer->flush());
+var_dump($writer->flush());
+
+?>
+--EXPECT--
+string(0) ""
+string(4) ""
+string(6) ""
+string(0) ""