libctf: replace 'pending refs' abstraction

A few years ago we introduced a 'pending refs' abstraction to fix one
problem: serializing a dict, then changing it would tend to corrupt the dict
because the strtab sort we do on strtab writeout (to improve compression
efficiency) would modify the offset of any strings that sorted
lexicographically earlier in the strtab: so we added a new restriction that
all strings are added only at serialization time, and maintained a set of
'pending' refs that were added earlier, whose offsets we could update (like
other refs) at writeout time.

This was in hindsight seriously problematic for maintenance (because
serialization has to traverse all strings in all datatypes in the entire
dict), and has become impossible to sustain now that we can read in existing
dicts, modify them, and reserialize them again.  We really don't want to
have to dig through the entire dict we jut read in just in order to dig out
all its strtab offsets, then *change* it, just for the sake of a sort that
adds a frankly trivial amount of compression efficiency.

Sorting *is* still worthwhile -- but it sacrifices very little to only sort
newly-added portions of the strtab, reusing older portions as necessary.
As a first stage in this, discard the whole "pending refs" abstraction and
replace it with "movable" refs, which are exactly like all other refs
(addresses containing the strtab offset of some string, which are updated
wiht the final strtab offset on serialization) except that we track them in
a reverse dict so that we can move the refs around (which we do whenever we
realloc() a buffer containing a bunch of structure members or something when
we add members to the structure).

libctf/

	* ctf-create.c (ctf_add_enumerator): Call ctf_str_move_refs; add
        a movable ref.
	(ctf_add_member_offset): Likewise.
	* ctf-util.c (ctf_realloc): Delete.
	* ctf-serialize.c (ctf_serialize): No longer use it.  Adjust to
	new fields.
	* ctf-string.c (ctf_str_purge_atom_refs): Purge movable refs.
	(ctf_str_free_atom): Free freeable atoms' strings.
	(ctf_str_create_atoms): Create the movable refs dynhash if needed.
	(ctf_str_free_atoms): Destroy it.
	(CTF_STR_MOVABLE): Switch (back) from ints to flags (see previous
	reversion).  Add new flag.
	(aref_create):  New, populate movable refs if need be.
	(ctf_str_add_ref_internal): Switch back to flags, update refs
	directly for nonprovisional strings (with already-known fixed offsets);
	create refs via aref_create.  Allocate strings only if not within an
	mmapped strtab.
	(ctf_str_add_movable_ref): New.
	(ctf_str_add): Adjust to CTF_STR_* reintroduction.
	(ctf_str_add_external): LIkewise.
	(ctf_str_move_refs): New, move refs via ctf_str_movable_refs
	backpointer.
	(ctf_str_purge_refs): Drop ctf_str_num_refs.
	(ctf_str_update_refs): Fix indentation.
	* ctf-impl.h (struct ctf_str_atom_movable): New.
	(struct ctf_dict.ctf_str_num_refs): Drop.
	(struct ctf_dict.ctf_str_movable_refs): New.
	(ctf_str_add_movable_ref): Declare.
	(ctf_str_move_refs): Likewise.
	(ctf_realloc): Drop.
This commit is contained in:
Nick Alcock 2024-03-25 16:39:02 +00:00
parent 3301ddba1b
commit 149ce5c263
5 changed files with 225 additions and 64 deletions

View File

@ -1079,11 +1079,15 @@ ctf_add_enumerator (ctf_dict_t *fp, ctf_id_t enid, const char *name,
return -1; /* errno is set for us. */
en = (ctf_enum_t *) dtd->dtd_vlen;
/* Remove refs in the old vlen region and reapply them. */
ctf_str_move_refs (fp, old_vlen, sizeof (ctf_enum_t) * vlen, dtd->dtd_vlen);
for (i = 0; i < vlen; i++)
if (strcmp (ctf_strptr (fp, en[i].cte_name), name) == 0)
return (ctf_set_errno (ofp, ECTF_DUPLICATE));
en[i].cte_name = ctf_str_add_ref (fp, name, &en[i].cte_name);
en[i].cte_name = ctf_str_add_movable_ref (fp, name, &en[i].cte_name);
en[i].cte_value = value;
if (en[i].cte_name == 0 && name != NULL && name[0] != '\0')
@ -1143,6 +1147,10 @@ ctf_add_member_offset (ctf_dict_t *fp, ctf_id_t souid, const char *name,
return (ctf_set_errno (ofp, ctf_errno (fp)));
memb = (ctf_lmember_t *) dtd->dtd_vlen;
/* Remove pending refs in the old vlen region and reapply them. */
ctf_str_move_refs (fp, old_vlen, sizeof (ctf_lmember_t) * vlen, dtd->dtd_vlen);
if (name != NULL)
{
for (i = 0; i < vlen; i++)
@ -1172,7 +1180,7 @@ ctf_add_member_offset (ctf_dict_t *fp, ctf_id_t souid, const char *name,
return -1; /* errno is set for us. */
}
memb[vlen].ctlm_name = ctf_str_add_ref (fp, name, &memb[vlen].ctlm_name);
memb[vlen].ctlm_name = ctf_str_add_movable_ref (fp, name, &memb[vlen].ctlm_name);
memb[vlen].ctlm_type = type;
if (memb[vlen].ctlm_name == 0 && name != NULL && name[0] != '\0')
return -1; /* errno is set for us. */

View File

@ -207,13 +207,17 @@ typedef struct ctf_err_warning
removed from this table on ctf_close(), but on every ctf_serialize(), all
the csa_refs in all entries are purged. */
#define CTF_STR_ATOM_FREEABLE 0x1
#define CTF_STR_ATOM_MOVABLE 0x2
typedef struct ctf_str_atom
{
const char *csa_str; /* Backpointer to string (hash key). */
char *csa_str; /* Pointer to string (also used as hash key). */
ctf_list_t csa_refs; /* This string's refs. */
uint32_t csa_offset; /* Strtab offset, if any. */
uint32_t csa_external_offset; /* External strtab offset, if any. */
unsigned long csa_snapshot_id; /* Snapshot ID at time of creation. */
int csa_flags; /* CTF_STR_ATOM_* flags. */
} ctf_str_atom_t;
/* The refs of a single string in the atoms table. */
@ -224,6 +228,15 @@ typedef struct ctf_str_atom_ref
uint32_t *caf_ref; /* A single ref to this string. */
} ctf_str_atom_ref_t;
/* Like a ctf_str_atom_ref_t, but specific to movable refs. */
typedef struct ctf_str_atom_ref_movable
{
ctf_list_t caf_list; /* List forward/back pointers. */
uint32_t *caf_ref; /* A single ref to this string. */
ctf_dynhash_t *caf_movable_refs; /* Backpointer to ctf_str_movable_refs for this dict. */
} ctf_str_atom_ref_movable_t;
/* A single linker-provided symbol, during symbol addition, possibly before we
have been given external strtab refs. */
typedef struct ctf_in_flight_dynsym
@ -384,7 +397,7 @@ struct ctf_dict
ctf_lookup_t ctf_lookups[5]; /* Pointers to nametabs for name lookup. */
ctf_strs_t ctf_str[2]; /* Array of string table base and bounds. */
ctf_dynhash_t *ctf_str_atoms; /* Hash table of ctf_str_atoms_t. */
uint64_t ctf_str_num_refs; /* Number of refs to cts_str_atoms. */
ctf_dynhash_t *ctf_str_movable_refs; /* Hash table of void * -> ctf_str_atom_ref_t. */
uint32_t ctf_str_prov_offset; /* Latest provisional offset assigned so far. */
unsigned char *ctf_base; /* CTF file pointer. */
unsigned char *ctf_dynbase; /* Freeable CTF file pointer. */
@ -725,6 +738,9 @@ extern int ctf_str_create_atoms (ctf_dict_t *);
extern void ctf_str_free_atoms (ctf_dict_t *);
extern uint32_t ctf_str_add (ctf_dict_t *, const char *);
extern uint32_t ctf_str_add_ref (ctf_dict_t *, const char *, uint32_t *ref);
extern uint32_t ctf_str_add_movable_ref (ctf_dict_t *, const char *,
uint32_t *ref);
extern int ctf_str_move_refs (ctf_dict_t *fp, void *src, size_t len, void *dest);
extern int ctf_str_add_external (ctf_dict_t *, const char *, uint32_t offset);
extern void ctf_str_remove_ref (ctf_dict_t *, const char *, uint32_t *ref);
extern void ctf_str_rollback (ctf_dict_t *, ctf_snapshot_id_t);
@ -758,7 +774,6 @@ extern void *ctf_mmap (size_t length, size_t offset, int fd);
extern void ctf_munmap (void *, size_t);
extern ssize_t ctf_pread (int fd, void *buf, ssize_t count, off_t offset);
extern void *ctf_realloc (ctf_dict_t *, void *, size_t);
extern char *ctf_str_append (char *, const char *);
extern char *ctf_str_append_noerr (char *, const char *);

View File

@ -1104,11 +1104,9 @@ ctf_serialize (ctf_dict_t *fp)
ctf_qsort_r (dvarents, nvars, sizeof (ctf_varent_t), ctf_sort_var,
&sort_var_arg);
if ((newbuf = ctf_realloc (fp, buf, buf_size + strtab.cts_len)) == NULL)
{
free (strtab.cts_strs);
goto oom;
}
if ((newbuf = realloc (buf, buf_size + strtab.cts_len)) == NULL)
goto oom;
buf = newbuf;
memcpy (buf + buf_size, strtab.cts_strs, strtab.cts_len);
hdrp = (ctf_header_t *) buf;
@ -1191,8 +1189,10 @@ ctf_serialize (ctf_dict_t *fp)
ctf_str_free_atoms (nfp);
nfp->ctf_str_atoms = fp->ctf_str_atoms;
nfp->ctf_prov_strtab = fp->ctf_prov_strtab;
nfp->ctf_str_movable_refs = fp->ctf_str_movable_refs;
fp->ctf_str_atoms = NULL;
fp->ctf_prov_strtab = NULL;
fp->ctf_str_movable_refs = NULL;
memset (&fp->ctf_dtdefs, 0, sizeof (ctf_list_t));
memset (&fp->ctf_errs_warnings, 0, sizeof (ctf_list_t));
fp->ctf_add_processing = NULL;

View File

@ -17,6 +17,7 @@
along with this program; see the file COPYING. If not see
<http://www.gnu.org/licenses/>. */
#include <assert.h>
#include <ctf-impl.h>
#include <string.h>
#include <assert.h>
@ -107,17 +108,28 @@ ctf_str_purge_atom_refs (ctf_str_atom_t *atom)
{
next = ctf_list_next (ref);
ctf_list_delete (&atom->csa_refs, ref);
if (atom->csa_flags & CTF_STR_ATOM_MOVABLE)
{
ctf_str_atom_ref_movable_t *movref;
movref = (ctf_str_atom_ref_movable_t *) ref;
ctf_dynhash_remove (movref->caf_movable_refs, ref);
}
free (ref);
}
}
/* Free an atom (only called on ctf_close().) */
/* Free an atom. */
static void
ctf_str_free_atom (void *a)
{
ctf_str_atom_t *atom = a;
ctf_str_purge_atom_refs (atom);
if (atom->csa_flags & CTF_STR_ATOM_FREEABLE)
free (atom->csa_str);
free (atom);
}
@ -138,6 +150,12 @@ ctf_str_create_atoms (ctf_dict_t *fp)
if (!fp->ctf_prov_strtab)
goto oom_prov_strtab;
fp->ctf_str_movable_refs = ctf_dynhash_create (ctf_hash_integer,
ctf_hash_eq_integer,
NULL, NULL);
if (!fp->ctf_str_movable_refs)
goto oom_movable_refs;
errno = 0;
ctf_str_add (fp, "");
if (errno == ENOMEM)
@ -146,6 +164,9 @@ ctf_str_create_atoms (ctf_dict_t *fp)
return 0;
oom_str_add:
ctf_dynhash_destroy (fp->ctf_str_movable_refs);
fp->ctf_str_movable_refs = NULL;
oom_movable_refs:
ctf_dynhash_destroy (fp->ctf_prov_strtab);
fp->ctf_prov_strtab = NULL;
oom_prov_strtab:
@ -154,62 +175,140 @@ ctf_str_create_atoms (ctf_dict_t *fp)
return -ENOMEM;
}
/* Destroy the atoms table. */
/* Destroy the atoms table and associated refs. */
void
ctf_str_free_atoms (ctf_dict_t *fp)
{
ctf_dynhash_destroy (fp->ctf_prov_strtab);
ctf_dynhash_destroy (fp->ctf_str_atoms);
ctf_dynhash_destroy (fp->ctf_str_movable_refs);
}
/* Add a string to the atoms table, copying the passed-in string. Return the
atom added. Return NULL only when out of memory (and do not touch the
passed-in string in that case). Possibly augment the ref list with the
passed-in ref. Possibly add a provisional entry for this string to the
provisional strtab. */
#define CTF_STR_ADD_REF 0x1
#define CTF_STR_PROVISIONAL 0x2
#define CTF_STR_MOVABLE 0x4
/* Allocate a ref and bind it into a ref list. */
static ctf_str_atom_ref_t *
aref_create (ctf_dict_t *fp, ctf_str_atom_t *atom, uint32_t *ref, int flags)
{
ctf_str_atom_ref_t *aref;
size_t s = sizeof (struct ctf_str_atom_ref);
if (flags & CTF_STR_MOVABLE)
s = sizeof (struct ctf_str_atom_ref_movable);
aref = malloc (s);
if (!aref)
return NULL;
aref->caf_ref = ref;
/* Movable refs get a backpointer to them in ctf_str_movable_refs, and a
pointer to ctf_str_movable_refs itself in the ref, for use when freeing
refs: they can be moved later in batches via a call to
ctf_str_move_refs. */
if (flags & CTF_STR_MOVABLE)
{
ctf_str_atom_ref_movable_t *movref = (ctf_str_atom_ref_movable_t *) aref;
movref->caf_movable_refs = fp->ctf_str_movable_refs;
if (ctf_dynhash_insert (fp->ctf_str_movable_refs, ref, aref) < 0)
{
free (aref);
return NULL;
}
}
ctf_list_append (&atom->csa_refs, aref);
return aref;
}
/* Add a string to the atoms table, copying the passed-in string if
necessary. Return the atom added. Return NULL only when out of memory
(and do not touch the passed-in string in that case).
Possibly add a provisional entry for this string to the provisional
strtab. If the string is in the provisional strtab, update its ref list
with the passed-in ref, causing the ref to be updated when the strtab is
written out. */
static ctf_str_atom_t *
ctf_str_add_ref_internal (ctf_dict_t *fp, const char *str,
int add_ref, int make_provisional, uint32_t *ref)
int flags, uint32_t *ref)
{
char *newstr = NULL;
ctf_str_atom_t *atom = NULL;
ctf_str_atom_ref_t *aref = NULL;
int added = 0;
atom = ctf_dynhash_lookup (fp->ctf_str_atoms, str);
if (add_ref)
{
if ((aref = malloc (sizeof (struct ctf_str_atom_ref))) == NULL) {
ctf_set_errno (fp, ENOMEM);
return NULL;
}
aref->caf_ref = ref;
}
/* Existing atoms get refs added only if they are provisional:
non-provisional strings already have a fixed strtab offset, and just
get their ref updated immediately, since its value cannot change. */
if (atom)
{
if (add_ref)
if (!ctf_dynhash_lookup (fp->ctf_prov_strtab, (void *) (uintptr_t)
atom->csa_offset))
{
ctf_list_append (&atom->csa_refs, aref);
fp->ctf_str_num_refs++;
if (flags & CTF_STR_ADD_REF)
{
if (atom->csa_external_offset)
*ref = atom->csa_external_offset;
else
*ref = atom->csa_offset;
}
return atom;
}
if (flags & CTF_STR_ADD_REF)
{
if (!aref_create (fp, atom, ref, flags))
{
ctf_set_errno (fp, ENOMEM);
return NULL;
}
}
return atom;
}
/* New atom. */
if ((atom = malloc (sizeof (struct ctf_str_atom))) == NULL)
goto oom;
memset (atom, 0, sizeof (struct ctf_str_atom));
if ((newstr = strdup (str)) == NULL)
goto oom;
/* Don't allocate new strings if this string is within an mmapped
strtab. */
if (ctf_dynhash_insert (fp->ctf_str_atoms, newstr, atom) < 0)
goto oom;
if ((unsigned char *) str < (unsigned char *) fp->ctf_data_mmapped
|| (unsigned char *) str > (unsigned char *) fp->ctf_data_mmapped + fp->ctf_data_mmapped_len)
{
if ((newstr = strdup (str)) == NULL)
goto oom;
atom->csa_flags |= CTF_STR_ATOM_FREEABLE;
atom->csa_str = newstr;
}
else
atom->csa_str = (char *) str;
if (ctf_dynhash_insert (fp->ctf_str_atoms, atom->csa_str, atom) < 0)
goto oom;
added = 1;
atom->csa_str = newstr;
atom->csa_snapshot_id = fp->ctf_snapshots;
if (make_provisional)
/* New atoms marked provisional go into the provisional strtab, and get a
ref added. */
if (flags & CTF_STR_PROVISIONAL)
{
atom->csa_offset = fp->ctf_str_prov_offset;
@ -218,20 +317,20 @@ ctf_str_add_ref_internal (ctf_dict_t *fp, const char *str,
goto oom;
fp->ctf_str_prov_offset += strlen (atom->csa_str) + 1;
if (flags & CTF_STR_ADD_REF)
{
if (!aref_create (fp, atom, ref, flags))
goto oom;
}
}
if (add_ref)
{
ctf_list_append (&atom->csa_refs, aref);
fp->ctf_str_num_refs++;
}
return atom;
oom:
if (newstr)
ctf_dynhash_remove (fp->ctf_str_atoms, newstr);
if (added)
ctf_dynhash_remove (fp->ctf_str_atoms, atom->csa_str);
free (atom);
free (aref);
free (newstr);
ctf_set_errno (fp, ENOMEM);
return NULL;
@ -250,7 +349,7 @@ ctf_str_add (ctf_dict_t *fp, const char *str)
if (!str)
str = "";
atom = ctf_str_add_ref_internal (fp, str, FALSE, TRUE, 0);
atom = ctf_str_add_ref_internal (fp, str, CTF_STR_PROVISIONAL, 0);
if (!atom)
return 0;
@ -268,7 +367,26 @@ ctf_str_add_ref (ctf_dict_t *fp, const char *str, uint32_t *ref)
if (!str)
str = "";
atom = ctf_str_add_ref_internal (fp, str, TRUE, TRUE, ref);
atom = ctf_str_add_ref_internal (fp, str, CTF_STR_ADD_REF
| CTF_STR_PROVISIONAL, ref);
if (!atom)
return 0;
return atom->csa_offset;
}
/* Like ctf_str_add_ref(), but note that the ref may be moved later on. */
uint32_t
ctf_str_add_movable_ref (ctf_dict_t *fp, const char *str, uint32_t *ref)
{
ctf_str_atom_t *atom;
if (!str)
str = "";
atom = ctf_str_add_ref_internal (fp, str, CTF_STR_ADD_REF
| CTF_STR_PROVISIONAL
| CTF_STR_MOVABLE, ref);
if (!atom)
return 0;
@ -285,7 +403,7 @@ ctf_str_add_external (ctf_dict_t *fp, const char *str, uint32_t offset)
if (!str)
str = "";
atom = ctf_str_add_ref_internal (fp, str, FALSE, FALSE, 0);
atom = ctf_str_add_ref_internal (fp, str, 0, 0);
if (!atom)
return 0;
@ -315,6 +433,41 @@ ctf_str_add_external (ctf_dict_t *fp, const char *str, uint32_t offset)
return 1;
}
/* Note that refs have moved from (SRC, LEN) to DEST. We use the movable
refs backpointer for this, because it is done an amortized-constant
number of times during structure member and enumerand addition, and if we
did a linear search this would turn such addition into an O(n^2)
operation. Even this is not linear, but it's better than that. */
int
ctf_str_move_refs (ctf_dict_t *fp, void *src, size_t len, void *dest)
{
uintptr_t p;
if (src == dest)
return 0;
for (p = (uintptr_t) src; p - (uintptr_t) src < len; p++)
{
ctf_str_atom_ref_t *ref;
if ((ref = ctf_dynhash_lookup (fp->ctf_str_movable_refs,
(ctf_str_atom_ref_t *) p)) != NULL)
{
int out_of_memory;
ref->caf_ref = (uint32_t *) (((uintptr_t) ref->caf_ref +
(uintptr_t) dest - (uintptr_t) src));
ctf_dynhash_remove (fp->ctf_str_movable_refs,
(ctf_str_atom_ref_t *) p);
out_of_memory = ctf_dynhash_insert (fp->ctf_str_movable_refs,
ref->caf_ref, ref);
assert (out_of_memory == 0);
}
}
return 0;
}
/* Remove a single ref. */
void
ctf_str_remove_ref (ctf_dict_t *fp, const char *str, uint32_t *ref)
@ -371,9 +524,7 @@ ctf_str_purge_one_atom_refs (void *key _libctf_unused_, void *value,
void
ctf_str_purge_refs (ctf_dict_t *fp)
{
if (fp->ctf_str_num_refs > 0)
ctf_dynhash_iter (fp->ctf_str_atoms, ctf_str_purge_one_atom_refs, NULL);
fp->ctf_str_num_refs = 0;
ctf_dynhash_iter (fp->ctf_str_atoms, ctf_str_purge_one_atom_refs, NULL);
}
/* Update a list of refs to the specified value. */
@ -384,7 +535,7 @@ ctf_str_update_refs (ctf_str_atom_t *refs, uint32_t value)
for (ref = ctf_list_next (&refs->csa_refs); ref != NULL;
ref = ctf_list_next (ref))
*(ref->caf_ref) = value;
*(ref->caf_ref) = value;
}
/* State shared across the strtab write process. */

View File

@ -231,19 +231,6 @@ ctf_str_append_noerr (char *s, const char *append)
return new_s;
}
/* A realloc() that fails noisily if called with any ctf_str_num_users. */
void *
ctf_realloc (ctf_dict_t *fp, void *ptr, size_t size)
{
if (fp->ctf_str_num_refs > 0)
{
ctf_dprintf ("%p: attempt to realloc() string table with %lu active refs\n",
(void *) fp, (unsigned long) fp->ctf_str_num_refs);
return NULL;
}
return realloc (ptr, size);
}
/* Store the specified error code into errp if it is non-NULL, and then
return NULL for the benefit of the caller. */