From f5e81fe182d36da18563c429d0764e30b8eb3657 Mon Sep 17 00:00:00 2001 From: Niels Dossche <7771979+nielsdos@users.noreply.github.com> Date: Sun, 29 Sep 2024 12:35:36 +0200 Subject: [PATCH] Optimize in-memory XMLWriter We're currently using a libxml buffer, which requires copying the buffer to zend_strings every time we want to output the string. Furthermore, its use of the system allocator instead of ZendMM makes it not count towards the memory_limit and hinders performance. This patch adds a custom writer such that the strings are written to a smart_str instance, using ZendMM for improved performance, and giving the ability to not copy the string in the common case where flush has empty set to true. Closes GH-16120. --- NEWS | 3 + UPGRADING | 3 + ext/xmlwriter/php_xmlwriter.c | 84 ++++++++++++------- ext/xmlwriter/php_xmlwriter.h | 2 +- ...xmlwriter_toMemory_flush_combinations.phpt | 23 +++++ 5 files changed, 85 insertions(+), 30 deletions(-) create mode 100644 ext/xmlwriter/tests/xmlwriter_toMemory_flush_combinations.phpt diff --git a/NEWS b/NEWS index 5a7c57c3987..76505f350fd 100644 --- a/NEWS +++ b/NEWS @@ -19,4 +19,7 @@ PHP NEWS . Fixed bug #49169 (SoapServer calls wrong function, although "SOAP action" header is correct). (nielsdos) +- XMLWriter: + . Improved performance and reduce memory consumption. (nielsdos) + <<< NOTE: Insert NEWS from last stable release here prior to actual release! >>> diff --git a/UPGRADING b/UPGRADING index 463ca218047..649c252492c 100644 --- a/UPGRADING +++ b/UPGRADING @@ -95,3 +95,6 @@ PHP 8.5 UPGRADE NOTES ======================================== 14. Performance Improvements ======================================== + +- XMLWriter: + . Improved performance and reduce memory consumption. diff --git a/ext/xmlwriter/php_xmlwriter.c b/ext/xmlwriter/php_xmlwriter.c index 2029bcbb12e..531e70b7810 100644 --- a/ext/xmlwriter/php_xmlwriter.c +++ b/ext/xmlwriter/php_xmlwriter.c @@ -24,6 +24,7 @@ #include "ext/standard/info.h" #include "php_xmlwriter.h" #include "php_xmlwriter_arginfo.h" +#include "zend_smart_str.h" static zend_class_entry *xmlwriter_class_entry_ce; @@ -47,11 +48,9 @@ static zend_object_handlers xmlwriter_object_handlers; static zend_always_inline void xmlwriter_destroy_libxml_objects(ze_xmlwriter_object *intern) { if (intern->ptr) { + /* Note: this call will also free the output pointer. */ xmlFreeTextWriter(intern->ptr); intern->ptr = NULL; - } - if (intern->output) { - xmlBufferFree(intern->output); intern->output = NULL; } } @@ -178,14 +177,14 @@ static char *_xmlwriter_get_valid_file_path(char *source, char *resolved_path, i } /* }}} */ -static void xml_writer_create_static(INTERNAL_FUNCTION_PARAMETERS, xmlTextWriterPtr writer, xmlBufferPtr output) +static void xml_writer_create_static(INTERNAL_FUNCTION_PARAMETERS, xmlTextWriterPtr writer, smart_str *output) { if (object_init_with_constructor(return_value, Z_CE_P(ZEND_THIS), 0, NULL, NULL) == SUCCESS) { ze_xmlwriter_object *intern = Z_XMLWRITER_P(return_value); intern->ptr = writer; intern->output = output; } else { - xmlBufferFree(output); + // output is freed by writer, so we don't need to free it here. xmlFreeTextWriter(writer); } } @@ -877,11 +876,45 @@ PHP_METHOD(XMLWriter, toUri) xml_writer_create_static(INTERNAL_FUNCTION_PARAM_PASSTHRU, writer, NULL); } +static int xml_writer_stream_write_memory(void *context, const char *buffer, int len) +{ + smart_str *output = context; + smart_str_appendl(output, buffer, len); + return len; +} + +static int xml_writer_stream_close_memory(void *context) +{ + smart_str *output = context; + smart_str_free_ex(output, false); + efree(output); + return 0; +} + +static xmlTextWriterPtr xml_writer_create_in_memory(smart_str **output_ptr) +{ + smart_str *output = emalloc(sizeof(*output)); + memset(output, 0, sizeof(*output)); + + xmlOutputBufferPtr output_buffer = xmlOutputBufferCreateIO(xml_writer_stream_write_memory, xml_writer_stream_close_memory, output, NULL); + if (output_buffer == NULL) { + efree(output); + return NULL; + } + + xmlTextWriterPtr writer = xmlNewTextWriter(output_buffer); + if (!writer) { + /* This call will free output too. */ + xmlOutputBufferClose(output_buffer); + return NULL; + } + *output_ptr = output; + return writer; +} + /* {{{ Create new xmlwriter using memory for string output */ PHP_FUNCTION(xmlwriter_open_memory) { - xmlTextWriterPtr ptr; - xmlBufferPtr buffer; zval *self = getThis(); ze_xmlwriter_object *ze_obj = NULL; @@ -894,28 +927,21 @@ PHP_FUNCTION(xmlwriter_open_memory) ze_obj = Z_XMLWRITER_P(self); } - buffer = xmlBufferCreate(); - - if (buffer == NULL) { - php_error_docref(NULL, E_WARNING, "Unable to create output buffer"); - RETURN_FALSE; - } - - ptr = xmlNewTextWriterMemory(buffer, 0); + smart_str *output; + xmlTextWriterPtr ptr = xml_writer_create_in_memory(&output); if (! ptr) { - xmlBufferFree(buffer); RETURN_FALSE; } if (self) { xmlwriter_destroy_libxml_objects(ze_obj); ze_obj->ptr = ptr; - ze_obj->output = buffer; + ze_obj->output = output; RETURN_TRUE; } else { ze_obj = php_xmlwriter_fetch_object(xmlwriter_object_new(xmlwriter_class_entry_ce)); ze_obj->ptr = ptr; - ze_obj->output = buffer; + ze_obj->output = output; RETURN_OBJ(&ze_obj->std); } @@ -926,17 +952,16 @@ PHP_METHOD(XMLWriter, toMemory) { ZEND_PARSE_PARAMETERS_NONE(); - xmlBufferPtr buffer = xmlBufferCreate(); - xmlTextWriterPtr writer = xmlNewTextWriterMemory(buffer, 0); + smart_str *output; + xmlTextWriterPtr writer = xml_writer_create_in_memory(&output); /* No need for an explicit buffer check as this will fail on a NULL buffer. */ if (!writer) { - xmlBufferFree(buffer); zend_throw_error(NULL, "Could not construct libxml writer"); RETURN_THROWS(); } - xml_writer_create_static(INTERNAL_FUNCTION_PARAM_PASSTHRU, writer, buffer); + xml_writer_create_static(INTERNAL_FUNCTION_PARAM_PASSTHRU, writer, output); } static int xml_writer_stream_write(void *context, const char *buffer, int len) @@ -992,7 +1017,6 @@ PHP_METHOD(XMLWriter, toStream) /* {{{ php_xmlwriter_flush */ static void php_xmlwriter_flush(INTERNAL_FUNCTION_PARAMETERS, int force_string) { xmlTextWriterPtr ptr; - xmlBufferPtr buffer; bool empty = 1; int output_bytes; zval *self; @@ -1002,16 +1026,18 @@ static void php_xmlwriter_flush(INTERNAL_FUNCTION_PARAMETERS, int force_string) } XMLWRITER_FROM_OBJECT(ptr, self); - buffer = Z_XMLWRITER_P(self)->output; - if (force_string == 1 && buffer == NULL) { + smart_str *output = Z_XMLWRITER_P(self)->output; + if (force_string == 1 && output == NULL) { RETURN_EMPTY_STRING(); } output_bytes = xmlTextWriterFlush(ptr); - if (buffer) { - const xmlChar *content = xmlBufferContent(buffer); - RETVAL_STRING((const char *) content); + if (output) { if (empty) { - xmlBufferEmpty(buffer); + RETURN_STR(smart_str_extract(output)); + } else if (smart_str_get_len(output) > 0) { + RETURN_NEW_STR(zend_string_dup(output->s, false)); + } else { + RETURN_EMPTY_STRING(); } } else { RETVAL_LONG(output_bytes); diff --git a/ext/xmlwriter/php_xmlwriter.h b/ext/xmlwriter/php_xmlwriter.h index 00e21c10974..e9a4dcb2aa4 100644 --- a/ext/xmlwriter/php_xmlwriter.h +++ b/ext/xmlwriter/php_xmlwriter.h @@ -35,7 +35,7 @@ extern zend_module_entry xmlwriter_module_entry; /* Extends zend object */ typedef struct _ze_xmlwriter_object { xmlTextWriterPtr ptr; - xmlBufferPtr output; + smart_str *output; zend_object std; } ze_xmlwriter_object; diff --git a/ext/xmlwriter/tests/xmlwriter_toMemory_flush_combinations.phpt b/ext/xmlwriter/tests/xmlwriter_toMemory_flush_combinations.phpt new file mode 100644 index 00000000000..89d2e73090d --- /dev/null +++ b/ext/xmlwriter/tests/xmlwriter_toMemory_flush_combinations.phpt @@ -0,0 +1,23 @@ +--TEST-- +XMLWriter::toMemory() with combinations of empty flush and non-empty flush +--EXTENSIONS-- +xmlwriter +--FILE-- +flush(empty: false)); +$writer->startElement('foo'); +var_dump($writer->flush(empty: false)); +$writer->endElement(); +var_dump($writer->flush(empty: false)); +var_dump($writer->flush()); +var_dump($writer->flush()); + +?> +--EXPECT-- +string(0) "" +string(4) "" +string(6) "" +string(0) ""