libctf: fix CTF dict compression

Commit 483546ce4f ("libctf: make ctf_serialize() actually serialize")
accidentally broke dict compression.  There were two bugs:

 - ctf_arc_write_one_ctf was still making its own decision about
   whether to compress the dict via direct ctf_size comparison, which is
   unfortunate because now that it no longer calls ctf_serialize itself,
   ctf_size is always zero when it does this: it should let the writing
   functions decide on the threshold, which they contain code to do which is
   simply not used for lack of one trivial wrapper to write to an fd and
   also provide a compression threshold

 - ctf_write_mem, the function underlying all writing as of the commit
   above, was calling zlib's compressBound and avoiding compression if this
   returned a value larger than the input.  Unfortunately compressBound does
   not do a trial compression and determine whether the result is
   compressible: it just adds zlib header sizes to the value passed in, so
   our test would *always* have concluded that the value was incompressible!
   Avoid by simply always compressing if the raw size is larger than the
   threshold: zlib is quite clever enough to avoid actually compressing
   if the data is incompressible.

Add a testcase for this.

libctf/
	* ctf-impl.h (ctf_write_thresholded): New...
	* ctf-serialize.c (ctf_write_thresholded): ... defined here,
        a wrapper around...
        (ctf_write_mem): ... this.  Don't check compressibility.
	(ctf_compress_write): Reimplement as a ctf_write_thresholded
        wrapper.
	(ctf_write): Likewise.
	* ctf-archive.c (arc_write_one_ctf): Just call
        ctf_write_thresholded rather than trying to work out whether
        to compress.
	* testsuite/libctf-writable/ctf-compressed.*: New test.
This commit is contained in:
Nick Alcock 2024-07-15 19:55:40 +01:00
parent 87b2f67310
commit 36c771b179
No known key found for this signature in database
5 changed files with 179 additions and 42 deletions

View File

@ -265,16 +265,10 @@ arc_write_one_ctf (ctf_dict_t *f, int fd, size_t threshold)
uint64_t ctfsz = 0;
char *ctfszp;
size_t ctfsz_len;
int (*writefn) (ctf_dict_t * fp, int fd);
if ((off = lseek (fd, 0, SEEK_CUR)) < 0)
return errno * -1;
if (f->ctf_size > threshold)
writefn = ctf_compress_write;
else
writefn = ctf_write;
/* This zero-write turns into the size in a moment. */
ctfsz_len = sizeof (ctfsz);
ctfszp = (char *) &ctfsz;
@ -287,7 +281,7 @@ arc_write_one_ctf (ctf_dict_t *f, int fd, size_t threshold)
ctfszp += writelen;
}
if (writefn (f, fd) != 0)
if (ctf_write_thresholded (f, fd, threshold) != 0)
return f->ctf_errno * -1;
if ((end_off = lseek (fd, 0, SEEK_CUR)) < 0)

View File

@ -762,6 +762,8 @@ extern int ctf_flip (ctf_dict_t *, ctf_header_t *, unsigned char *, int);
extern int ctf_import_unref (ctf_dict_t *fp, ctf_dict_t *pfp);
extern int ctf_write_thresholded (ctf_dict_t *fp, int fd, size_t threshold);
_libctf_malloc_
extern void *ctf_mmap (size_t length, size_t offset, int fd);
extern void ctf_munmap (void *, size_t);

View File

@ -1180,10 +1180,10 @@ ctf_write_mem (ctf_dict_t *fp, size_t *size, size_t threshold)
alloc_len = compressBound (rawbufsiz - sizeof (ctf_header_t))
+ sizeof (ctf_header_t);
/* Trivial operation if the buffer is incompressible or too small to bother
compressing, and we're not doing a forced write-time flip. */
/* Trivial operation if the buffer is too small to bother compressing, and
we're not doing a forced write-time flip. */
if (rawbufsiz < threshold || rawbufsiz < alloc_len)
if (rawbufsiz < threshold)
{
alloc_len = rawbufsiz;
uncompressed = 1;
@ -1248,10 +1248,10 @@ err:
return NULL;
}
/* Compress the specified CTF data stream and write it to the specified file
descriptor. */
/* Write the compressed CTF data stream to the specified file descriptor,
possibly compressed. Internal only (for now). */
int
ctf_compress_write (ctf_dict_t *fp, int fd)
ctf_write_thresholded (ctf_dict_t *fp, int fd, size_t threshold)
{
unsigned char *buf;
unsigned char *bp;
@ -1260,7 +1260,7 @@ ctf_compress_write (ctf_dict_t *fp, int fd)
ssize_t len;
int err = 0;
if ((buf = ctf_write_mem (fp, &tmp, 0)) == NULL)
if ((buf = ctf_write_mem (fp, &tmp, threshold)) == NULL)
return -1; /* errno is set for us. */
buf_len = tmp;
@ -1283,36 +1283,17 @@ ret:
return err;
}
/* Compress the specified CTF data stream and write it to the specified file
descriptor. */
int
ctf_compress_write (ctf_dict_t *fp, int fd)
{
return ctf_write_thresholded (fp, fd, 0);
}
/* Write the uncompressed CTF data stream to the specified file descriptor. */
int
ctf_write (ctf_dict_t *fp, int fd)
{
unsigned char *buf;
unsigned char *bp;
size_t tmp;
ssize_t buf_len;
ssize_t len;
int err = 0;
if ((buf = ctf_write_mem (fp, &tmp, (size_t) -1)) == NULL)
return -1; /* errno is set for us. */
buf_len = tmp;
bp = buf;
while (buf_len > 0)
{
if ((len = write (fd, bp, buf_len)) < 0)
{
err = ctf_set_errno (fp, errno);
ctf_err_warn (fp, 0, 0, _("ctf_compress_write: error writing"));
goto ret;
}
buf_len -= len;
bp += len;
}
ret:
free (buf);
return err;
return ctf_write_thresholded (fp, fd, (size_t) -1);
}

View File

@ -0,0 +1,158 @@
/* Make sure linking and writing an archive with a low threshold correctly
compresses it. (This tests for two bugs, one where archives weren't
serialized regardless of their threshold, and another where nothing was.) */
#include <ctf-api.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
int
main (int argc, char *argv[])
{
ctf_dict_t *in1;
ctf_dict_t *in2;
ctf_dict_t *fp;
ctf_dict_t *dump_fp;
ctf_archive_t *arc1, *arc2;
ctf_archive_t *final_arc;
ctf_sect_t s1, s2;
ctf_encoding_t encoding = { CTF_INT_SIGNED, 0, sizeof (char) };
ctf_encoding_t encoding2 = { CTF_INT_SIGNED, 0, sizeof (long long) };
unsigned char *buf1, *buf2, *buf3;
size_t buf1_sz, buf2_sz, buf3_sz;
ctf_dump_state_t *dump_state = NULL;
ctf_next_t *i = NULL;
int err;
/* Linking does not currently work on mingw because of an unreliable tmpfile
implementation on that platform (see
https://github.com/msys2/MINGW-packages/issues/18878). Simply skip for
now. */
#ifdef __MINGW32__
printf ("UNSUPPORTED: platform bug breaks ctf_link\n");
return 0;
#else
if ((fp = ctf_create (&err)) == NULL)
goto create_err;
if ((in1 = ctf_create (&err)) == NULL)
goto create_err;
if ((in2 = ctf_create (&err)) == NULL)
goto create_err;
/* Force a conflict to get an archive created. */
if ((ctf_add_integer (in1, CTF_ADD_ROOT, "foo", &encoding)) == CTF_ERR)
{
fprintf (stderr, "Cannot add: %s\n", ctf_errmsg (ctf_errno (in1)));
return 1;
}
if ((ctf_add_integer (in2, CTF_ADD_ROOT, "foo", &encoding2)) == CTF_ERR)
{
fprintf (stderr, "Cannot add: %s\n", ctf_errmsg (ctf_errno (in2)));
return 1;
}
/* Write them out and read them back in, to turn them into archives.
This would be unnecessary if ctf_link_add() were public :( */
if ((buf1 = ctf_write_mem (in1, &buf1_sz, -1)) == NULL)
{
fprintf (stderr, "Cannot serialize: %s\n", ctf_errmsg (ctf_errno (in1)));
return 1;
}
if ((buf2 = ctf_write_mem (in2, &buf2_sz, -1)) == NULL)
{
fprintf (stderr, "Cannot serialize: %s\n", ctf_errmsg (ctf_errno (in2)));
return 1;
}
s1.cts_name = "foo";
s2.cts_name = "bar";
s1.cts_data = (void *) buf1;
s2.cts_data = (void *) buf2;
s1.cts_size = buf1_sz;
s2.cts_size = buf2_sz;
s1.cts_entsize = 64; /* Unimportant. */
s2.cts_entsize = 64; /* Unimportant. */
if ((arc1 = ctf_arc_bufopen (&s1, NULL, NULL, &err)) == NULL ||
(arc2 = ctf_arc_bufopen (&s2, NULL, NULL, &err)) == NULL)
goto open_err;
ctf_dict_close (in1);
ctf_dict_close (in2);
/* Link them together. */
if (ctf_link_add_ctf (fp, arc1, "a") < 0 ||
ctf_link_add_ctf (fp, arc2, "b") < 0)
goto link_err;
if (ctf_link (fp, 0) < 0)
goto link_err;
/* Write them out. We need a new buf here, because the archives are still
using the other two bufs. */
if ((buf3 = ctf_link_write (fp, &buf3_sz, 1)) == NULL)
goto link_err;
/* Read them back in. */
s1.cts_data = (void *) buf3;
s1.cts_size = buf3_sz;
if ((final_arc = ctf_arc_bufopen (&s1, NULL, NULL, &err)) == NULL)
goto open_err;
if (ctf_archive_count (final_arc) != 2)
{
fprintf (stderr, "Archive is the wrong length: %zi.\n", ctf_archive_count (final_arc));
return -1;
}
/* Dump the header of each archive member, and search for CTF_F_COMPRESS in
the resulting dump. */
while ((dump_fp = ctf_archive_next (final_arc, &i, NULL, 0, &err)) != NULL)
{
char *dumpstr;
while ((dumpstr = ctf_dump (dump_fp, &dump_state, CTF_SECT_HEADER,
NULL, NULL)) != NULL)
{
if (strstr (dumpstr, "CTF_F_COMPRESS") != NULL)
printf ("Output is compressed.\n");
free (dumpstr);
}
ctf_dict_close (dump_fp);
}
if (err != ECTF_NEXT_END)
{
fprintf (stderr, "Archive iteation error: %s\n", ctf_errmsg (err));
return 1;
}
ctf_arc_close (final_arc);
free (buf1);
free (buf2);
free (buf3);
ctf_dict_close (fp);
return 0;
create_err:
fprintf (stderr, "Cannot create: %s\n", ctf_errmsg (err));
return 1;
open_err:
fprintf (stderr, "Cannot open: %s\n", ctf_errmsg (err));
return 1;
link_err:
fprintf (stderr, "Cannot link: %s\n", ctf_errmsg (ctf_errno (fp)));
return 1;
#endif
}

View File

@ -0,0 +1,2 @@
Output is compressed.
Output is compressed.