mirror of
https://sourceware.org/git/binutils-gdb.git
synced 2025-01-16 21:14:02 +08:00
0f0c11f7fc
This adds the core deduplicator that the ctf_link machinery calls (possibly repeatedly) to link the CTF sections: it takes an array of input ctf_file_t's and another array that indicates which entries in the input array are parents of which other entries, and returns an array of outputs. The first output is always the ctf_file_t on which ctf_link/ctf_dedup/etc was called: the other outputs are child dicts that have the first output as their parent. include/ * ctf-api.h (CTF_LINK_SHARE_DUPLICATED): No longer unimplemented. libctf/ * ctf-impl.h (ctf_type_id_key): New, the key in the cd_id_to_file_t. (ctf_dedup): New, core deduplicator state. (ctf_file_t) <ctf_dedup>: New. <ctf_dedup_atoms>: New. <ctf_dedup_atoms_alloc>: New. (ctf_hash_type_id_key): New prototype. (ctf_hash_eq_type_id_key): Likewise. (ctf_dedup_atoms_init): Likewise. * ctf-hash.c (ctf_hash_eq_type_id_key): New. (ctf_dedup_atoms_init): Likewise. * ctf-create.c (ctf_serialize): Adjusted. (ctf_add_encoded): No longer static. (ctf_add_reftype): Likewise. * ctf-open.c (ctf_file_close): Destroy the ctf_dedup_atoms_alloc. * ctf-dedup.c: New file. * ctf-decls.h [!HAVE_DECL_STPCPY]: Add prototype. * configure.ac: Check for stpcpy. * Makefile.am: Add it. * Makefile.in: Regenerate. * config.h.in: Regenerate. * configure: Regenerate.
3156 lines
100 KiB
C
3156 lines
100 KiB
C
/* CTF type deduplication.
|
|
Copyright (C) 2019 Free Software Foundation, Inc.
|
|
|
|
This file is part of libctf.
|
|
|
|
libctf is free software; you can redistribute it and/or modify it under
|
|
the terms of the GNU General Public License as published by the Free
|
|
Software Foundation; either version 3, or (at your option) any later
|
|
version.
|
|
|
|
This program is distributed in the hope that it will be useful, but
|
|
WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
|
|
See the GNU General Public License for more details.
|
|
|
|
You should have received a copy of the GNU General Public License
|
|
along with this program; see the file COPYING. If not see
|
|
<http://www.gnu.org/licenses/>. */
|
|
|
|
#include <ctf-impl.h>
|
|
#include <string.h>
|
|
#include <errno.h>
|
|
#include <assert.h>
|
|
#include "hashtab.h"
|
|
|
|
/* (In the below, relevant functions are named in square brackets.) */
|
|
|
|
/* Type deduplication is a three-phase process:
|
|
|
|
[ctf_dedup, ctf_dedup_hash_type, ctf_dedup_rhash_type]
|
|
1) come up with unambiguous hash values for all types: no two types may have
|
|
the same hash value, and any given type should have only one hash value
|
|
(for optimal deduplication).
|
|
|
|
[ctf_dedup, ctf_dedup_detect_name_ambiguity,
|
|
ctf_dedup_conflictify_unshared, ctf_dedup_mark_conflicting_hash]
|
|
2) mark those distinct types with names that collide (and thus cannot be
|
|
declared simultaneously in the same translation unit) as conflicting, and
|
|
recursively mark all types that cite one of those types as conflicting as
|
|
well. Possibly mark all types cited in only one TU as conflicting, if
|
|
the CTF_LINK_SHARE_DUPLICATED link mode is active.
|
|
|
|
[ctf_dedup_emit, ctf_dedup_emit_struct_members, ctf_dedup_id_to_target]
|
|
3) emit all the types, one hash value at a time. Types not marked
|
|
conflicting are emitted once, into the shared dictionary: types marked
|
|
conflicting are emitted once per TU into a dictionary corresponding to
|
|
each TU in which they appear. Structs marked conflicting get at the very
|
|
least a forward emitted into the shared dict so that other dicts can cite
|
|
it if needed.
|
|
|
|
[id_to_packed_id]
|
|
This all works over an array of inputs (usually in the same order as the
|
|
inputs on the link line). We don't use the ctf_link_inputs hash directly
|
|
because it is convenient to be able to address specific input types as a
|
|
*global type ID* or 'GID', a pair of an array offset and a ctf_id_t. Since
|
|
both are already 32 bits or less or can easily be constrained to that range,
|
|
we can pack them both into a single 64-bit hash word for easy lookups, which
|
|
would be much more annoying to do with a ctf_file_t * and a ctf_id_t. (On
|
|
32-bit platforms, we must do that anyway, since pointers, and thus hash keys
|
|
and values, are only 32 bits wide). We track which inputs are parents of
|
|
which other inputs so that we can correctly recognize that types we have
|
|
traversed in children may cite types in parents, and so that we can process
|
|
the parents first.)
|
|
|
|
Note that thanks to ld -r, the deduplicator can be fed its own output, so the
|
|
inputs may themselves have child dicts. Since we need to support this usage
|
|
anyway, we can use it in one other place. If the caller finds translation
|
|
units to be too small a unit ambiguous types, links can be 'cu-mapped', where
|
|
the caller provides a mapping of input TU names to output child dict names.
|
|
This mapping can fuse many child TUs into one potential child dict, so that
|
|
ambiguous types in any of those input TUs go into the same child dict.
|
|
When a many:1 cu-mapping is detected, the ctf_dedup machinery is called
|
|
repeatedly, once for every output name that has more than one input, to fuse
|
|
all the input TUs associated with a given output dict into one, and once again
|
|
as normal to deduplicate all those intermediate outputs (and any 1:1 inputs)
|
|
together. This has much higher memory usage than otherwise, because in the
|
|
intermediate state, all the output TUs are in memory at once and cannot be
|
|
lazily opened. It also has implications for the emission code: if types
|
|
appear ambiguously in multiple input TUs that are all mapped to the same
|
|
child dict, we cannot put them in children in the cu-mapping link phase
|
|
because this output is meant to *become* a child in the next link stage and
|
|
parent/child relationships are only one level deep: so instead, we just hide
|
|
all but one of the ambiguous types.
|
|
|
|
There are a few other subtleties here that make this more complex than it
|
|
seems. Let's go over the steps above in more detail.
|
|
|
|
1) HASHING.
|
|
|
|
[ctf_dedup_hash_type, ctf_dedup_rhash_type]
|
|
Hashing proceeds recursively, mixing in the properties of each input type
|
|
(including its name, if any), and then adding the hash values of every type
|
|
cited by that type. The result is stashed in the cd_type_hashes so other
|
|
phases can find the hash values of input types given their IDs, and so that
|
|
if we encounter this type again while hashing we can just return its hash
|
|
value: it is also stashed in the *output mapping*, a mapping from hash value
|
|
to the set of GIDs corresponding to that type in all inputs. We also keep
|
|
track of the GID of the first appearance of the type in any input (in
|
|
cd_output_first_gid), and the GID of structs, unions, and forwards that only
|
|
appear in one TU (in cd_struct_origin). See below for where these things are
|
|
used.
|
|
|
|
Everything in this phase is time-critical, because it is operating over
|
|
non-deduplicated types and so may have hundreds or thousands of times the
|
|
data volume to deal with than later phases. Trace output is hidden behind
|
|
ENABLE_LIBCTF_HASH_DEBUGGING to prevent the sheer number of calls to
|
|
ctf_dprintf from slowing things down (tenfold slowdowns are observed purely
|
|
from the calls to ctf_dprintf(), even with debugging switched off), and keep
|
|
down the volume of output (hundreds of gigabytes of debug output are not
|
|
uncommon on larger links).
|
|
|
|
We have to do *something* about potential cycles in the type graph. We'd
|
|
like to avoid emitting forwards in the final output if possible, because
|
|
forwards aren't much use: they have no members. We are mostly saved from
|
|
needing to worry about this at emission time by ctf_add_struct*()
|
|
automatically replacing newly-created forwards when the real struct/union
|
|
comes along. So we only have to avoid getting stuck in cycles during the
|
|
hashing phase, while also not confusing types that cite members that are
|
|
structs with each other. It is easiest to solve this problem by noting two
|
|
things:
|
|
|
|
- all cycles in C depend on the presence of tagged structs/unions
|
|
- all tagged structs/unions have a unique name they can be disambiguated by
|
|
|
|
[ctf_dedup_is_stub]
|
|
This means that we can break all cycles by ceasing to hash in cited types at
|
|
every tagged struct/union and instead hashing in a stub consisting of the
|
|
struct/union's *decorated name*, which is the name preceded by "s " or "u "
|
|
depending on the namespace (cached in cd_decorated_names). Forwards are
|
|
decorated identically (so a forward to "struct foo" would be represented as
|
|
"s foo"): this means that a citation of a forward to a type and a citation of
|
|
a concrete definition of a type with the same name ends up getting the same
|
|
hash value.
|
|
|
|
Of course, it is quite possible to have two TUs with structs with the same
|
|
name and different definitions, but that's OK because when we scan for types
|
|
with ambiguous names we will identify these and mark them conflicting.
|
|
|
|
We populate one thing to help conflictedness marking. No unconflicted type
|
|
may cite a conflicted one, but this means that conflictedness marking must
|
|
walk from types to the types that cite them, which is the opposite of the
|
|
usual order. We can make this easier to do by constructing a *citers* graph
|
|
in cd_citers, which points from types to the types that cite them: because we
|
|
emit forwards corresponding to every conflicted struct/union, we don't need
|
|
to do this for citations of structs/unions by other types. This is very
|
|
convenient for us, because that's the only type we don't traverse
|
|
recursively: so we can construct the citers graph at the same time as we
|
|
hash, rather than needing to add an extra pass. (This graph is a dynhash of
|
|
*type hash values*, so it's small: in effect it is automatically
|
|
deduplicated.)
|
|
|
|
2) COLLISIONAL MARKING.
|
|
|
|
[ctf_dedup_detect_name_ambiguity, ctf_dedup_mark_conflicting_hash]
|
|
We identify types whose names collide during the hashing process, and count
|
|
the rough number of uses of each name (caching may throw it off a bit: this
|
|
doesn't need to be accurate). We then mark the less-frequently-cited types
|
|
with each names conflicting: the most-frequently-cited one goes into the
|
|
shared type dictionary, while all others are duplicated into per-TU
|
|
dictionaries, named after the input TU, that have the shared dictionary as a
|
|
parent. For structures and unions this is not quite good enough: we'd like
|
|
to have citations of forwards to ambiguously named structures and unions
|
|
*stay* as citations of forwards, so that the user can tell that the caller
|
|
didn't actually know which structure definition was meant: but if we put one
|
|
of those structures into the shared dictionary, it would supplant and replace
|
|
the forward, leaving no sign. So structures and unions do not take part in
|
|
this popularity contest: if their names are ambiguous, they are just
|
|
duplicated, and only a forward appears in the shared dict.
|
|
|
|
[ctf_dedup_propagate_conflictedness]
|
|
The process of marking types conflicted is itself recursive: we recursively
|
|
traverse the cd_citers graph populated in the hashing pass above and mark
|
|
everything that we encounter conflicted (without wasting time re-marking
|
|
anything that is already marked). This naturally terminates just where we
|
|
want it to (at types that are cited by no other types, and at structures and
|
|
unions) and suffices to ensure that types that cite conflicted types are
|
|
always marked conflicted.
|
|
|
|
[ctf_dedup_conflictify_unshared, ctf_dedup_multiple_input_dicts]
|
|
When linking in CTF_LINK_SHARE_DUPLICATED mode, we would like all types that
|
|
are used in only one TU to end up in a per-CU dict. The easiest way to do
|
|
that is to mark them conflicted. ctf_dedup_conflictify_unshared does this,
|
|
traversing the output mapping and using ctf_dedup_multiple_input_dicts to
|
|
check the number of input dicts each distinct type hash value came from:
|
|
types that only came from one get marked conflicted. One caveat here is that
|
|
we need to consider both structs and forwards to them: a struct that appears
|
|
in one TU and has a dozen citations to an opaque forward in other TUs should
|
|
*not* be considered to be used in only one TU, because users would find it
|
|
useful to be able to traverse into opaque structures of that sort: so we use
|
|
cd_struct_origin to check both structs/unions and the forwards corresponding
|
|
to them.
|
|
|
|
3) EMISSION.
|
|
|
|
[ctf_dedup_walk_output_mapping, ctf_dedup_rwalk_output_mapping,
|
|
ctf_dedup_rwalk_one_output_mapping]
|
|
Emission involves another walk of the entire output mapping, this time
|
|
traversing everything other than struct members, recursively. Types are
|
|
emitted from leaves to trunk, emitting all types a type cites before emitting
|
|
the type itself. We sort the output mapping before traversing it, for
|
|
reproducibility and also correctness: the input dicts may have parent/child
|
|
relationships, so we simply sort all types that first appear in parents
|
|
before all children, then sort types that first appear in dicts appearing
|
|
earlier on the linker command line before those that appear later, then sort
|
|
by input ctf_id_t. (This is where we use cd_output_first_gid, collected
|
|
above.)
|
|
|
|
The walking is done using a recursive traverser which arranges to not revisit
|
|
any type already visited and to call its callback once per input GID for
|
|
input GIDs corresponding to conflicted output types. The traverser only
|
|
finds input types and calls a callback for them as many times as the output
|
|
needs to appear: it doesn't try to figure out anything about where the output
|
|
might go. That's done by the callback based on whether the type is
|
|
marked conflicted or not.
|
|
|
|
[ctf_dedup_emit_type, ctf_dedup_id_to_target, ctf_dedup_synthesize_forward]
|
|
ctf_dedup_emit_type is the (sole) callback for ctf_dedup_walk_output_mapping.
|
|
Conflicted types have all necessary dictionaries created, and then we emit
|
|
the type into each dictionary in turn, working over each input CTF type
|
|
corresponding to each hash value and using ctf_dedup_id_to_target to map each
|
|
input ctf_id_t into the corresponding type in the output (dealing with input
|
|
ctf_id_t's with parents in the process by simply chasing to the parent dict
|
|
if the type we're looking up is in there). Emitting structures involves
|
|
simply noting that the members of this structure need emission later on:
|
|
because you cannot cite a single structure member from another type, we avoid
|
|
emitting the members at this stage to keep recursion depths down a bit.
|
|
|
|
At this point, if we have by some mischance decided that two different types
|
|
with child types that hash to different values have in fact got the same hash
|
|
value themselves and *not* marked it conflicting, the type walk will walk
|
|
only *one* of them and in all likelihood we'll find that we are trying to
|
|
emit a type into some child dictionary that references a type that was never
|
|
emitted into that dictionary and assertion-fail. This always indicates a bug
|
|
in the conflictedness marking machinery or the hashing code, or both.
|
|
|
|
ctf_dedup_id_to_target calls ctf_dedup_synthesize_forward to do one extra
|
|
thing, alluded to above: if this is a conflicted tagged structure or union,
|
|
and the target is the shared dict (i.e., the type we're being asked to emit
|
|
is not itself conflicted so can't just point straight at the conflicted
|
|
type), we instead synthesise a forward with the same name, emit it into the
|
|
shared dict, record it in cd_output_emission_conflicted_forwards so that we
|
|
don't re-emit it, and return it. This means that cycles that contain
|
|
conflicts do not cause the entire cycle to be replicated in every child: only
|
|
that piece of the cycle which takes you back as far as the closest tagged
|
|
struct/union needs to be replicated. This trick means that no part of the
|
|
deduplicator needs a cycle detector: every recursive walk can stop at tagged
|
|
structures.
|
|
|
|
[ctf_dedup_emit_struct_members]
|
|
The final stage of emission is to walk over all structures with members
|
|
that need emission and emit all of them. Every type has been emitted at
|
|
this stage, so emission cannot fail.
|
|
|
|
[ctf_dedup_populate_type_mappings, ctf_dedup_populate_type_mapping]
|
|
Finally, we update the input -> output type ID mappings used by the ctf-link
|
|
machinery to update all the other sections. This is surprisingly expensive
|
|
and may be replaced with a scheme which lets the ctf-link machinery extract
|
|
the needed info directly from the deduplicator. */
|
|
|
|
/* Possible future optimizations are flagged with 'optimization opportunity'
|
|
below. */
|
|
|
|
/* Global optimization opportunity: a GC pass, eliminating types with no direct
|
|
or indirect citations from the other sections in the dictionary. */
|
|
|
|
/* Internal flag values for ctf_dedup_hash_type. */
|
|
|
|
/* Child call: consider forwardable types equivalent to forwards or stubs below
|
|
this point. */
|
|
#define CTF_DEDUP_HASH_INTERNAL_CHILD 0x01
|
|
|
|
/* Transform references to single ctf_id_ts in passed-in inputs into a number
|
|
that will fit in a uint64_t. Needs rethinking if CTF_MAX_TYPE is boosted.
|
|
|
|
On 32-bit platforms, we pack things together differently: see the note
|
|
above. */
|
|
|
|
#if UINTPTR_MAX < UINT64_MAX
|
|
# define IDS_NEED_ALLOCATION 1
|
|
# define CTF_DEDUP_GID(fp, input, type) id_to_packed_id (fp, input, type)
|
|
# define CTF_DEDUP_GID_TO_INPUT(id) packed_id_to_input (id)
|
|
# define CTF_DEDUP_GID_TO_TYPE(id) packed_id_to_type (id)
|
|
#else
|
|
# define CTF_DEDUP_GID(fp, input, type) \
|
|
(void *) (((uint64_t) input) << 32 | (type))
|
|
# define CTF_DEDUP_GID_TO_INPUT(id) ((int) (((uint64_t) id) >> 32))
|
|
# define CTF_DEDUP_GID_TO_TYPE(id) (ctf_id_t) (((uint64_t) id) & ~(0xffffffff00000000ULL))
|
|
#endif
|
|
|
|
#ifdef IDS_NEED_ALLOCATION
|
|
|
|
/* This is the 32-bit path, which stores GIDs in a pool and returns a pointer
|
|
into the pool. It is notably less efficient than the 64-bit direct storage
|
|
approach, but with a smaller key, this is all we can do. */
|
|
|
|
static void *
|
|
id_to_packed_id (ctf_file_t *fp, int input_num, ctf_id_t type)
|
|
{
|
|
const void *lookup;
|
|
ctf_type_id_key_t *dynkey = NULL;
|
|
ctf_type_id_key_t key = { input_num, type };
|
|
|
|
if (!ctf_dynhash_lookup_kv (fp->ctf_dedup.cd_id_to_file_t,
|
|
&key, &lookup, NULL))
|
|
{
|
|
if ((dynkey = malloc (sizeof (ctf_type_id_key_t))) == NULL)
|
|
goto oom;
|
|
memcpy (dynkey, &key, sizeof (ctf_type_id_key_t));
|
|
|
|
if (ctf_dynhash_insert (fp->ctf_dedup.cd_id_to_file_t, dynkey, NULL) < 0)
|
|
goto oom;
|
|
|
|
ctf_dynhash_lookup_kv (fp->ctf_dedup.cd_id_to_file_t,
|
|
dynkey, &lookup, NULL);
|
|
}
|
|
/* We use a raw assert() here because there isn't really a way to get any sort
|
|
of error back from this routine without vastly complicating things for the
|
|
much more common case of !IDS_NEED_ALLOCATION. */
|
|
assert (lookup);
|
|
return (void *) lookup;
|
|
|
|
oom:
|
|
free (dynkey);
|
|
ctf_set_errno (fp, ENOMEM);
|
|
return NULL;
|
|
}
|
|
|
|
static int
|
|
packed_id_to_input (const void *id)
|
|
{
|
|
const ctf_type_id_key_t *key = (ctf_type_id_key_t *) id;
|
|
|
|
return key->ctii_input_num;
|
|
}
|
|
|
|
static ctf_id_t
|
|
packed_id_to_type (const void *id)
|
|
{
|
|
const ctf_type_id_key_t *key = (ctf_type_id_key_t *) id;
|
|
|
|
return key->ctii_type;
|
|
}
|
|
#endif
|
|
|
|
/* Make an element in a dynhash-of-dynsets, or return it if already present. */
|
|
|
|
static ctf_dynset_t *
|
|
make_set_element (ctf_dynhash_t *set, const void *key)
|
|
{
|
|
ctf_dynset_t *element;
|
|
|
|
if ((element = ctf_dynhash_lookup (set, key)) == NULL)
|
|
{
|
|
if ((element = ctf_dynset_create (htab_hash_string,
|
|
ctf_dynset_eq_string,
|
|
NULL)) == NULL)
|
|
return NULL;
|
|
|
|
if (ctf_dynhash_insert (set, (void *) key, element) < 0)
|
|
{
|
|
ctf_dynset_destroy (element);
|
|
return NULL;
|
|
}
|
|
}
|
|
|
|
return element;
|
|
}
|
|
|
|
/* Initialize the dedup atoms table. */
|
|
int
|
|
ctf_dedup_atoms_init (ctf_file_t *fp)
|
|
{
|
|
if (fp->ctf_dedup_atoms)
|
|
return 0;
|
|
|
|
if (!fp->ctf_dedup_atoms_alloc)
|
|
{
|
|
if ((fp->ctf_dedup_atoms_alloc
|
|
= ctf_dynset_create (htab_hash_string, ctf_dynset_eq_string,
|
|
free)) == NULL)
|
|
return ctf_set_errno (fp, ENOMEM);
|
|
}
|
|
fp->ctf_dedup_atoms = fp->ctf_dedup_atoms_alloc;
|
|
return 0;
|
|
}
|
|
|
|
/* Intern things in the dedup atoms table. */
|
|
|
|
static const char *
|
|
intern (ctf_file_t *fp, char *atom)
|
|
{
|
|
const void *foo;
|
|
|
|
if (atom == NULL)
|
|
return NULL;
|
|
|
|
if (!ctf_dynset_exists (fp->ctf_dedup_atoms, atom, &foo))
|
|
{
|
|
if (ctf_dynset_insert (fp->ctf_dedup_atoms, atom) < 0)
|
|
{
|
|
ctf_set_errno (fp, ENOMEM);
|
|
return NULL;
|
|
}
|
|
foo = atom;
|
|
}
|
|
else
|
|
free (atom);
|
|
|
|
return (const char *) foo;
|
|
}
|
|
|
|
/* Add an indication of the namespace to a type name in a way that is not valid
|
|
for C identifiers. Used to maintain hashes of type names to other things
|
|
while allowing for the four C namespaces (normal, struct, union, enum).
|
|
Return a new dynamically-allocated string. */
|
|
static const char *
|
|
ctf_decorate_type_name (ctf_file_t *fp, const char *name, int kind)
|
|
{
|
|
ctf_dedup_t *d = &fp->ctf_dedup;
|
|
const char *ret;
|
|
const char *k;
|
|
char *p;
|
|
size_t i;
|
|
|
|
switch (kind)
|
|
{
|
|
case CTF_K_STRUCT:
|
|
k = "s ";
|
|
i = 0;
|
|
break;
|
|
case CTF_K_UNION:
|
|
k = "u ";
|
|
i = 1;
|
|
break;
|
|
case CTF_K_ENUM:
|
|
k = "e ";
|
|
i = 2;
|
|
break;
|
|
default:
|
|
k = "";
|
|
i = 3;
|
|
}
|
|
|
|
if ((ret = ctf_dynhash_lookup (d->cd_decorated_names[i], name)) == NULL)
|
|
{
|
|
char *str;
|
|
|
|
if ((str = malloc (strlen (name) + strlen (k) + 1)) == NULL)
|
|
goto oom;
|
|
|
|
p = stpcpy (str, k);
|
|
strcpy (p, name);
|
|
ret = intern (fp, str);
|
|
if (!ret)
|
|
goto oom;
|
|
|
|
if (ctf_dynhash_cinsert (d->cd_decorated_names[i], name, ret) < 0)
|
|
goto oom;
|
|
}
|
|
|
|
return ret;
|
|
|
|
oom:
|
|
ctf_set_errno (fp, ENOMEM);
|
|
return NULL;
|
|
}
|
|
|
|
/* Hash a type, possibly debugging-dumping something about it as well. */
|
|
static inline void
|
|
ctf_dedup_sha1_add (ctf_sha1_t *sha1, const void *buf, size_t len,
|
|
const char *description _libctf_unused_,
|
|
unsigned long depth _libctf_unused_)
|
|
{
|
|
ctf_sha1_add (sha1, buf, len);
|
|
|
|
#ifdef ENABLE_LIBCTF_HASH_DEBUGGING
|
|
ctf_sha1_t tmp;
|
|
char tmp_hval[CTF_SHA1_SIZE];
|
|
tmp = *sha1;
|
|
ctf_sha1_fini (&tmp, tmp_hval);
|
|
ctf_dprintf ("%lu: after hash addition of %s: %s\n", depth, description,
|
|
tmp_hval);
|
|
#endif
|
|
}
|
|
|
|
static const char *
|
|
ctf_dedup_hash_type (ctf_file_t *fp, ctf_file_t *input,
|
|
ctf_file_t **inputs, uint32_t *parents,
|
|
int input_num, ctf_id_t type, int flags,
|
|
unsigned long depth,
|
|
int (*populate_fun) (ctf_file_t *fp,
|
|
ctf_file_t *input,
|
|
ctf_file_t **inputs,
|
|
int input_num,
|
|
ctf_id_t type,
|
|
void *id,
|
|
const char *decorated_name,
|
|
const char *hash));
|
|
|
|
/* Determine whether this type is being hashed as a stub (in which case it is
|
|
unsafe to cache it). */
|
|
static int
|
|
ctf_dedup_is_stub (const char *name, int kind, int fwdkind, int flags)
|
|
{
|
|
/* We can cache all types unless we are recursing to children and are hashing
|
|
in a tagged struct, union or forward, all of which are replaced with their
|
|
decorated name as a stub and will have different hash values when hashed at
|
|
the top level. */
|
|
|
|
return ((flags & CTF_DEDUP_HASH_INTERNAL_CHILD) && name
|
|
&& (kind == CTF_K_STRUCT || kind == CTF_K_UNION
|
|
|| (kind == CTF_K_FORWARD && (fwdkind == CTF_K_STRUCT
|
|
|| fwdkind == CTF_K_UNION))));
|
|
}
|
|
|
|
/* Populate struct_origin if need be (not already populated, or populated with
|
|
a different origin), in which case it must go to -1, "shared".)
|
|
|
|
Only called for forwards or forwardable types with names, when the link mode
|
|
is CTF_LINK_SHARE_DUPLICATED. */
|
|
static int
|
|
ctf_dedup_record_origin (ctf_file_t *fp, int input_num, const char *decorated,
|
|
void *id)
|
|
{
|
|
ctf_dedup_t *d = &fp->ctf_dedup;
|
|
void *origin;
|
|
int populate_origin = 0;
|
|
|
|
if (ctf_dynhash_lookup_kv (d->cd_struct_origin, decorated, NULL, &origin))
|
|
{
|
|
if (CTF_DEDUP_GID_TO_INPUT (origin) != input_num
|
|
&& CTF_DEDUP_GID_TO_INPUT (origin) != -1)
|
|
{
|
|
populate_origin = 1;
|
|
origin = CTF_DEDUP_GID (fp, -1, -1);
|
|
}
|
|
}
|
|
else
|
|
{
|
|
populate_origin = 1;
|
|
origin = id;
|
|
}
|
|
|
|
if (populate_origin)
|
|
if (ctf_dynhash_cinsert (d->cd_struct_origin, decorated, origin) < 0)
|
|
return ctf_set_errno (fp, errno);
|
|
return 0;
|
|
}
|
|
|
|
/* Do the underlying hashing and recursion for ctf_dedup_hash_type (which it
|
|
calls, recursively). */
|
|
|
|
static const char *
|
|
ctf_dedup_rhash_type (ctf_file_t *fp, ctf_file_t *input, ctf_file_t **inputs,
|
|
uint32_t *parents, int input_num, ctf_id_t type,
|
|
void *type_id, const ctf_type_t *tp, const char *name,
|
|
const char *decorated, int kind, int flags,
|
|
unsigned long depth,
|
|
int (*populate_fun) (ctf_file_t *fp,
|
|
ctf_file_t *input,
|
|
ctf_file_t **inputs,
|
|
int input_num,
|
|
ctf_id_t type,
|
|
void *id,
|
|
const char *decorated_name,
|
|
const char *hash))
|
|
{
|
|
ctf_dedup_t *d = &fp->ctf_dedup;
|
|
ctf_next_t *i = NULL;
|
|
ctf_sha1_t hash;
|
|
ctf_id_t child_type;
|
|
char hashbuf[CTF_SHA1_SIZE];
|
|
const char *hval = NULL;
|
|
const char *whaterr;
|
|
int err;
|
|
|
|
const char *citer = NULL;
|
|
ctf_dynset_t *citers = NULL;
|
|
|
|
/* Add a citer to the citers set. */
|
|
#define ADD_CITER(citers, hval) \
|
|
do \
|
|
{ \
|
|
whaterr = "updating citers"; \
|
|
if (!citers) \
|
|
if ((citers = ctf_dynset_create (htab_hash_string, \
|
|
ctf_dynset_eq_string, \
|
|
NULL)) == NULL) \
|
|
goto oom; \
|
|
if (ctf_dynset_cinsert (citers, hval) < 0) \
|
|
goto oom; \
|
|
} while (0)
|
|
|
|
/* If this is a named struct or union or a forward to one, and this is a child
|
|
traversal, treat this type as if it were a forward -- do not recurse to
|
|
children, ignore all content not already hashed in, and hash in the
|
|
decorated name of the type instead. */
|
|
|
|
if (ctf_dedup_is_stub (name, kind, tp->ctt_type, flags))
|
|
{
|
|
#ifdef ENABLE_LIBCTF_HASH_DEBUGGING
|
|
ctf_dprintf ("Struct/union/forward citation: substituting forwarding "
|
|
"stub with decorated name %s\n", decorated);
|
|
|
|
#endif
|
|
ctf_sha1_init (&hash);
|
|
ctf_dedup_sha1_add (&hash, decorated, strlen (decorated) + 1,
|
|
"decorated struct/union/forward name", depth);
|
|
ctf_sha1_fini (&hash, hashbuf);
|
|
|
|
if ((hval = intern (fp, strdup (hashbuf))) == NULL)
|
|
{
|
|
ctf_err_warn (fp, 0, "%s (%i): out of memory during forwarding-stub "
|
|
"hashing for type with GID %p; errno: %s",
|
|
ctf_link_input_name (input), input_num, type_id,
|
|
strerror (errno));
|
|
return NULL; /* errno is set for us. */
|
|
}
|
|
|
|
/* In share--duplicated link mode, make sure the origin of this type is
|
|
recorded, even if this is a type in a parent dict which will not be
|
|
directly traversed. */
|
|
if (d->cd_link_flags & CTF_LINK_SHARE_DUPLICATED
|
|
&& ctf_dedup_record_origin (fp, input_num, decorated, type_id) < 0)
|
|
return NULL; /* errno is set for us. */
|
|
|
|
return hval;
|
|
}
|
|
|
|
/* Now ensure that subsequent recursive calls (but *not* the top-level call)
|
|
get this treatment. */
|
|
flags |= CTF_DEDUP_HASH_INTERNAL_CHILD;
|
|
|
|
/* If this is a struct, union, or forward with a name, record the unique
|
|
originating input TU, if there is one. */
|
|
|
|
if (decorated && (ctf_forwardable_kind (kind) || kind != CTF_K_FORWARD))
|
|
if (d->cd_link_flags & CTF_LINK_SHARE_DUPLICATED
|
|
&& ctf_dedup_record_origin (fp, input_num, decorated, type_id) < 0)
|
|
return NULL; /* errno is set for us. */
|
|
|
|
/* Mix in invariant stuff, transforming the type kind if needed. Note that
|
|
the vlen is *not* hashed in: the actual variable-length info is hashed in
|
|
instead, piecewise. The vlen is not part of the type, only the
|
|
variable-length data is: identical types with distinct vlens are quite
|
|
possible. Equally, we do not want to hash in the isroot flag: both the
|
|
compiler and the deduplicator set the nonroot flag to indicate clashes with
|
|
*other types in the same TU* with the same name: so two types can easily
|
|
have distinct nonroot flags, yet be exactly the same type.*/
|
|
|
|
#ifdef ENABLE_LIBCTF_HASH_DEBUGGING
|
|
ctf_dprintf ("%lu: hashing thing with ID %i/%lx (kind %i): %s.\n",
|
|
depth, input_num, type, kind, name ? name : "");
|
|
#endif
|
|
|
|
ctf_sha1_init (&hash);
|
|
if (name)
|
|
ctf_dedup_sha1_add (&hash, name, strlen (name) + 1, "name", depth);
|
|
ctf_dedup_sha1_add (&hash, &kind, sizeof (uint32_t), "kind", depth);
|
|
|
|
/* Hash content of this type. */
|
|
switch (kind)
|
|
{
|
|
case CTF_K_UNKNOWN:
|
|
/* No extra state. */
|
|
break;
|
|
case CTF_K_FORWARD:
|
|
|
|
/* Add the forwarded kind, stored in the ctt_type. */
|
|
ctf_dedup_sha1_add (&hash, &tp->ctt_type, sizeof (tp->ctt_type),
|
|
"forwarded kind", depth);
|
|
break;
|
|
case CTF_K_INTEGER:
|
|
case CTF_K_FLOAT:
|
|
{
|
|
ctf_encoding_t ep;
|
|
memset (&ep, 0, sizeof (ctf_encoding_t));
|
|
|
|
ctf_dedup_sha1_add (&hash, &tp->ctt_size, sizeof (uint32_t), "size",
|
|
depth);
|
|
if (ctf_type_encoding (input, type, &ep) < 0)
|
|
{
|
|
whaterr = "encoding";
|
|
goto err;
|
|
}
|
|
ctf_dedup_sha1_add (&hash, &ep, sizeof (ctf_encoding_t), "encoding",
|
|
depth);
|
|
break;
|
|
}
|
|
/* Types that reference other types. */
|
|
case CTF_K_TYPEDEF:
|
|
case CTF_K_VOLATILE:
|
|
case CTF_K_CONST:
|
|
case CTF_K_RESTRICT:
|
|
case CTF_K_POINTER:
|
|
/* Hash the referenced type, if not already hashed, and mix it in. */
|
|
child_type = ctf_type_reference (input, type);
|
|
if ((hval = ctf_dedup_hash_type (fp, input, inputs, parents, input_num,
|
|
child_type, flags, depth,
|
|
populate_fun)) == NULL)
|
|
{
|
|
whaterr = "referenced type hashing";
|
|
goto err;
|
|
}
|
|
ctf_dedup_sha1_add (&hash, hval, strlen (hval) + 1, "referenced type",
|
|
depth);
|
|
citer = hval;
|
|
|
|
break;
|
|
|
|
/* The slices of two types hash identically only if the type they overlay
|
|
also has the same encoding. This is not ideal, but in practice will work
|
|
well enough. We work directly rather than using the CTF API because
|
|
we do not want the slice's normal automatically-shine-through
|
|
semantics to kick in here. */
|
|
case CTF_K_SLICE:
|
|
{
|
|
const ctf_slice_t *slice;
|
|
const ctf_dtdef_t *dtd;
|
|
ssize_t size;
|
|
ssize_t increment;
|
|
|
|
child_type = ctf_type_reference (input, type);
|
|
ctf_get_ctt_size (input, tp, &size, &increment);
|
|
ctf_dedup_sha1_add (&hash, &size, sizeof (ssize_t), "size", depth);
|
|
|
|
if ((hval = ctf_dedup_hash_type (fp, input, inputs, parents, input_num,
|
|
child_type, flags, depth,
|
|
populate_fun)) == NULL)
|
|
{
|
|
whaterr = "slice-referenced type hashing";
|
|
goto err;
|
|
}
|
|
ctf_dedup_sha1_add (&hash, hval, strlen (hval) + 1, "sliced type",
|
|
depth);
|
|
citer = hval;
|
|
|
|
if ((dtd = ctf_dynamic_type (input, type)) != NULL)
|
|
slice = &dtd->dtd_u.dtu_slice;
|
|
else
|
|
slice = (ctf_slice_t *) ((uintptr_t) tp + increment);
|
|
|
|
ctf_dedup_sha1_add (&hash, &slice->cts_offset,
|
|
sizeof (slice->cts_offset), "slice offset", depth);
|
|
ctf_dedup_sha1_add (&hash, &slice->cts_bits,
|
|
sizeof (slice->cts_bits), "slice bits", depth);
|
|
break;
|
|
}
|
|
|
|
case CTF_K_ARRAY:
|
|
{
|
|
ctf_arinfo_t ar;
|
|
|
|
if (ctf_array_info (input, type, &ar) < 0)
|
|
{
|
|
whaterr = "array info";
|
|
goto err;
|
|
}
|
|
|
|
if ((hval = ctf_dedup_hash_type (fp, input, inputs, parents, input_num,
|
|
ar.ctr_contents, flags, depth,
|
|
populate_fun)) == NULL)
|
|
{
|
|
whaterr = "array contents type hashing";
|
|
goto err;
|
|
}
|
|
ctf_dedup_sha1_add (&hash, hval, strlen (hval) + 1, "array contents",
|
|
depth);
|
|
ADD_CITER (citers, hval);
|
|
|
|
if ((hval = ctf_dedup_hash_type (fp, input, inputs, parents, input_num,
|
|
ar.ctr_index, flags, depth,
|
|
populate_fun)) == NULL)
|
|
{
|
|
whaterr = "array index type hashing";
|
|
goto err;
|
|
}
|
|
ctf_dedup_sha1_add (&hash, hval, strlen (hval) + 1, "array index",
|
|
depth);
|
|
ctf_dedup_sha1_add (&hash, &ar.ctr_nelems, sizeof (ar.ctr_nelems),
|
|
"element count", depth);
|
|
ADD_CITER (citers, hval);
|
|
|
|
break;
|
|
}
|
|
case CTF_K_FUNCTION:
|
|
{
|
|
ctf_funcinfo_t fi;
|
|
ctf_id_t *args;
|
|
uint32_t j;
|
|
|
|
if (ctf_func_type_info (input, type, &fi) < 0)
|
|
{
|
|
whaterr = "func type info";
|
|
goto err;
|
|
}
|
|
|
|
if ((hval = ctf_dedup_hash_type (fp, input, inputs, parents, input_num,
|
|
fi.ctc_return, flags, depth,
|
|
populate_fun)) == NULL)
|
|
{
|
|
whaterr = "func return type";
|
|
goto err;
|
|
}
|
|
ctf_dedup_sha1_add (&hash, hval, strlen (hval) + 1, "func return",
|
|
depth);
|
|
ctf_dedup_sha1_add (&hash, &fi.ctc_argc, sizeof (fi.ctc_argc),
|
|
"func argc", depth);
|
|
ctf_dedup_sha1_add (&hash, &fi.ctc_flags, sizeof (fi.ctc_flags),
|
|
"func flags", depth);
|
|
ADD_CITER (citers, hval);
|
|
|
|
if ((args = calloc (fi.ctc_argc, sizeof (ctf_id_t))) == NULL)
|
|
{
|
|
whaterr = "memory allocation";
|
|
goto err;
|
|
}
|
|
|
|
if (ctf_func_type_args (input, type, fi.ctc_argc, args) < 0)
|
|
{
|
|
free (args);
|
|
whaterr = "func arg type";
|
|
goto err;
|
|
}
|
|
for (j = 0; j < fi.ctc_argc; j++)
|
|
{
|
|
if ((hval = ctf_dedup_hash_type (fp, input, inputs, parents,
|
|
input_num, args[j], flags, depth,
|
|
populate_fun)) == NULL)
|
|
{
|
|
free (args);
|
|
whaterr = "func arg type hashing";
|
|
goto err;
|
|
}
|
|
ctf_dedup_sha1_add (&hash, hval, strlen (hval) + 1, "func arg type",
|
|
depth);
|
|
ADD_CITER (citers, hval);
|
|
}
|
|
free (args);
|
|
break;
|
|
}
|
|
case CTF_K_ENUM:
|
|
{
|
|
int val;
|
|
const char *ename;
|
|
|
|
ctf_dedup_sha1_add (&hash, &tp->ctt_size, sizeof (uint32_t),
|
|
"enum size", depth);
|
|
while ((ename = ctf_enum_next (input, type, &i, &val)) != NULL)
|
|
{
|
|
ctf_dedup_sha1_add (&hash, ename, strlen (ename) + 1, "enumerator",
|
|
depth);
|
|
ctf_dedup_sha1_add (&hash, &val, sizeof (val), "enumerand", depth);
|
|
}
|
|
if (ctf_errno (input) != ECTF_NEXT_END)
|
|
{
|
|
whaterr = "enum member iteration";
|
|
goto err;
|
|
}
|
|
break;
|
|
}
|
|
/* Top-level only. */
|
|
case CTF_K_STRUCT:
|
|
case CTF_K_UNION:
|
|
{
|
|
ssize_t offset;
|
|
const char *mname;
|
|
ctf_id_t membtype;
|
|
ssize_t size;
|
|
|
|
ctf_get_ctt_size (input, tp, &size, NULL);
|
|
ctf_dedup_sha1_add (&hash, &size, sizeof (ssize_t), "struct size",
|
|
depth);
|
|
|
|
while ((offset = ctf_member_next (input, type, &i, &mname,
|
|
&membtype)) >= 0)
|
|
{
|
|
if (mname == NULL)
|
|
mname = "";
|
|
ctf_dedup_sha1_add (&hash, mname, strlen (mname) + 1,
|
|
"member name", depth);
|
|
|
|
#ifdef ENABLE_LIBCTF_HASH_DEBUGGING
|
|
ctf_dprintf ("%lu: Traversing to member %s\n", depth, mname);
|
|
#endif
|
|
if ((hval = ctf_dedup_hash_type (fp, input, inputs, parents,
|
|
input_num, membtype, flags, depth,
|
|
populate_fun)) == NULL)
|
|
{
|
|
whaterr = "struct/union member type hashing";
|
|
goto iterr;
|
|
}
|
|
|
|
ctf_dedup_sha1_add (&hash, hval, strlen (hval) + 1, "member hash",
|
|
depth);
|
|
ctf_dedup_sha1_add (&hash, &offset, sizeof (offset), "member offset",
|
|
depth);
|
|
ADD_CITER (citers, hval);
|
|
}
|
|
if (ctf_errno (input) != ECTF_NEXT_END)
|
|
{
|
|
whaterr = "struct/union member iteration";
|
|
goto err;
|
|
}
|
|
break;
|
|
}
|
|
default:
|
|
whaterr = "unknown type kind";
|
|
goto err;
|
|
}
|
|
ctf_sha1_fini (&hash, hashbuf);
|
|
|
|
if ((hval = intern (fp, strdup (hashbuf))) == NULL)
|
|
{
|
|
whaterr = "hash internment";
|
|
goto oom;
|
|
}
|
|
|
|
/* Populate the citers for this type's subtypes, now the hash for the type
|
|
itself is known. */
|
|
whaterr = "citer tracking";
|
|
|
|
if (citer)
|
|
{
|
|
ctf_dynset_t *citer_hashes;
|
|
|
|
if ((citer_hashes = make_set_element (d->cd_citers, citer)) == NULL)
|
|
goto oom;
|
|
if (ctf_dynset_cinsert (citer_hashes, hval) < 0)
|
|
goto oom;
|
|
}
|
|
else if (citers)
|
|
{
|
|
const void *k;
|
|
|
|
while ((err = ctf_dynset_cnext (citers, &i, &k)) == 0)
|
|
{
|
|
ctf_dynset_t *citer_hashes;
|
|
citer = (const char *) k;
|
|
|
|
if ((citer_hashes = make_set_element (d->cd_citers, citer)) == NULL)
|
|
goto oom;
|
|
|
|
if (ctf_dynset_exists (citer_hashes, hval, NULL))
|
|
continue;
|
|
if (ctf_dynset_cinsert (citer_hashes, hval) < 0)
|
|
goto oom;
|
|
}
|
|
if (err != ECTF_NEXT_END)
|
|
goto err;
|
|
ctf_dynset_destroy (citers);
|
|
}
|
|
|
|
return hval;
|
|
|
|
iterr:
|
|
ctf_next_destroy (i);
|
|
err:
|
|
ctf_sha1_fini (&hash, NULL);
|
|
ctf_err_warn (fp, 0, "%s (%i): %s error during type hashing for "
|
|
"type %lx, kind %i: CTF error: %s; errno: %s",
|
|
ctf_link_input_name (input), input_num, whaterr, type,
|
|
kind, ctf_errmsg (ctf_errno (fp)), strerror (errno));
|
|
return NULL;
|
|
oom:
|
|
ctf_set_errno (fp, errno);
|
|
ctf_err_warn (fp, 0, "%s (%i): %s error during type hashing for "
|
|
"type %lx, kind %i: CTF error: %s; errno: %s",
|
|
ctf_link_input_name (input), input_num, whaterr, type,
|
|
kind, ctf_errmsg (ctf_errno (fp)), strerror (errno));
|
|
return NULL;
|
|
}
|
|
|
|
/* Hash a TYPE in the INPUT: FP is the eventual output, where the ctf_dedup
|
|
state is stored. INPUT_NUM is the number of this input in the set of inputs.
|
|
Record its hash in FP's cd_type_hashes once it is known. PARENTS is
|
|
described in the comment above ctf_dedup.
|
|
|
|
(The flags argument currently accepts only the flag
|
|
CTF_DEDUP_HASH_INTERNAL_CHILD, an implementation detail used to prevent
|
|
struct/union hashing in recursive traversals below the TYPE.)
|
|
|
|
We use the CTF API rather than direct access wherever possible, because types
|
|
that appear identical through the API should be considered identical, with
|
|
one exception: slices should only be considered identical to other slices,
|
|
not to the corresponding unsliced type.
|
|
|
|
The POPULATE_FUN is a mandatory hook that populates other mappings with each
|
|
type we see (excepting types that are recursively hashed as stubs). The
|
|
caller should not rely on the order of calls to this hook, though it will be
|
|
called at least once for every non-stub reference to every type.
|
|
|
|
Returns a hash value (an atom), or NULL on error. */
|
|
|
|
static const char *
|
|
ctf_dedup_hash_type (ctf_file_t *fp, ctf_file_t *input,
|
|
ctf_file_t **inputs, uint32_t *parents,
|
|
int input_num, ctf_id_t type, int flags,
|
|
unsigned long depth,
|
|
int (*populate_fun) (ctf_file_t *fp,
|
|
ctf_file_t *input,
|
|
ctf_file_t **inputs,
|
|
int input_num,
|
|
ctf_id_t type,
|
|
void *id,
|
|
const char *decorated_name,
|
|
const char *hash))
|
|
{
|
|
ctf_dedup_t *d = &fp->ctf_dedup;
|
|
const ctf_type_t *tp;
|
|
void *type_id;
|
|
const char *hval = NULL;
|
|
const char *name;
|
|
const char *whaterr;
|
|
const char *decorated = NULL;
|
|
uint32_t kind, fwdkind;
|
|
|
|
depth++;
|
|
|
|
#ifdef ENABLE_LIBCTF_HASH_DEBUGGING
|
|
ctf_dprintf ("%lu: ctf_dedup_hash_type (%i, %lx, flags %x)\n", depth, input_num, type, flags);
|
|
#endif
|
|
|
|
/* The unimplemented type doesn't really exist, but must be noted in parent
|
|
hashes: so it gets a fixed, arbitrary hash. */
|
|
if (type == 0)
|
|
return "00000000000000000000";
|
|
|
|
/* Possible optimization: if the input type is in the parent type space, just
|
|
copy recursively-cited hashes from the parent's types into the output
|
|
mapping rather than rehashing them. */
|
|
|
|
type_id = CTF_DEDUP_GID (fp, input_num, type);
|
|
|
|
if ((tp = ctf_lookup_by_id (&input, type)) == NULL)
|
|
{
|
|
ctf_err_warn (fp, 0, "%s (%i): lookup failure for type %lx: flags %x",
|
|
ctf_link_input_name (input), input_num, type, flags);
|
|
return NULL; /* errno is set for us. */
|
|
}
|
|
|
|
kind = LCTF_INFO_KIND (input, tp->ctt_info);
|
|
name = ctf_strraw (input, tp->ctt_name);
|
|
|
|
if (tp->ctt_name == 0 || !name || name[0] == '\0')
|
|
name = NULL;
|
|
|
|
/* Treat the unknown kind just like the unimplemented type. */
|
|
if (kind == CTF_K_UNKNOWN)
|
|
return "00000000000000000000";
|
|
|
|
/* Decorate the name appropriately for the namespace it appears in: forwards
|
|
appear in the namespace of their referent. */
|
|
|
|
fwdkind = kind;
|
|
if (name)
|
|
{
|
|
if (kind == CTF_K_FORWARD)
|
|
fwdkind = tp->ctt_type;
|
|
|
|
if ((decorated = ctf_decorate_type_name (fp, name, fwdkind)) == NULL)
|
|
return NULL; /* errno is set for us. */
|
|
}
|
|
|
|
/* If not hashing a stub, we can rely on various sorts of caches.
|
|
|
|
Optimization opportunity: we may be able to avoid calling the populate_fun
|
|
sometimes here. */
|
|
|
|
if (!ctf_dedup_is_stub (name, kind, fwdkind, flags))
|
|
{
|
|
if ((hval = ctf_dynhash_lookup (d->cd_type_hashes, type_id)) != NULL)
|
|
{
|
|
#ifdef ENABLE_LIBCTF_HASH_DEBUGGING
|
|
ctf_dprintf ("%lu: Known hash for ID %i/%lx: %s\n", depth, input_num,
|
|
type, hval);
|
|
#endif
|
|
populate_fun (fp, input, inputs, input_num, type, type_id,
|
|
decorated, hval);
|
|
|
|
return hval;
|
|
}
|
|
}
|
|
|
|
/* We have never seen this type before, and must figure out its hash and the
|
|
hashes of the types it cites.
|
|
|
|
Hash this type, and call ourselves recursively. (The hashing part is
|
|
optional, and is disabled if overidden_hval is set.) */
|
|
|
|
if ((hval = ctf_dedup_rhash_type (fp, input, inputs, parents, input_num,
|
|
type, type_id, tp, name, decorated,
|
|
kind, flags, depth, populate_fun)) == NULL)
|
|
return NULL; /* errno is set for us. */
|
|
|
|
/* The hash of this type is now known: record it unless caching is unsafe
|
|
because the hash value will change later. This will be the final storage
|
|
of this type's hash, so we call the population function on it. */
|
|
|
|
if (!ctf_dedup_is_stub (name, kind, fwdkind, flags))
|
|
{
|
|
#ifdef ENABLE_LIBCTF_HASH_DEBUGGING
|
|
ctf_dprintf ("Caching %lx, ID %p (%s), %s in final location\n", type,
|
|
type_id, name ? name : "", hval);
|
|
#endif
|
|
|
|
if (ctf_dynhash_cinsert (d->cd_type_hashes, type_id, hval) < 0)
|
|
{
|
|
whaterr = "hash caching";
|
|
goto oom;
|
|
}
|
|
|
|
if (populate_fun (fp, input, inputs, input_num, type, type_id,
|
|
decorated, hval) < 0)
|
|
{
|
|
whaterr = "population function";
|
|
goto err; /* errno is set for us. */
|
|
}
|
|
}
|
|
|
|
#ifdef ENABLE_LIBCTF_HASH_DEBUGGING
|
|
ctf_dprintf ("%lu: Returning final hash for ID %i/%lx: %s\n", depth,
|
|
input_num, type, hval);
|
|
#endif
|
|
return hval;
|
|
|
|
oom:
|
|
ctf_set_errno (fp, errno);
|
|
err:
|
|
ctf_err_warn (fp, 0, "%s (%i): %s error during type hashing, type %lx, "
|
|
"kind %i: CTF errno: %s; errno: %s",
|
|
ctf_link_input_name (input), input_num, whaterr, type,
|
|
kind, ctf_errmsg (ctf_errno (fp)),
|
|
strerror (errno));
|
|
return NULL;
|
|
}
|
|
|
|
/* Populate a number of useful mappings not directly used by the hashing
|
|
machinery: the output mapping, the cd_name_counts mapping from name -> hash
|
|
-> count of hashval deduplication state for a given hashed type, and the
|
|
cd_output_first_tu mapping. */
|
|
|
|
static int
|
|
ctf_dedup_populate_mappings (ctf_file_t *fp, ctf_file_t *input _libctf_unused_,
|
|
ctf_file_t **inputs _libctf_unused_,
|
|
int input_num _libctf_unused_,
|
|
ctf_id_t type _libctf_unused_, void *id,
|
|
const char *decorated_name,
|
|
const char *hval)
|
|
{
|
|
ctf_dedup_t *d = &fp->ctf_dedup;
|
|
ctf_dynset_t *type_ids;
|
|
ctf_dynhash_t *name_counts;
|
|
long int count;
|
|
|
|
#ifdef ENABLE_LIBCTF_HASH_DEBUGGING
|
|
ctf_dprintf ("Hash %s, %s, into output mapping for %i/%lx @ %s\n",
|
|
hval, decorated_name ? decorated_name : "(unnamed)",
|
|
input_num, type, ctf_link_input_name (input));
|
|
|
|
const char *orig_hval;
|
|
|
|
/* Make sure we never map a single GID to multiple hash values. */
|
|
|
|
if ((orig_hval = ctf_dynhash_lookup (d->cd_output_mapping_guard, id)) != NULL)
|
|
{
|
|
/* We can rely on pointer identity here, since all hashes are
|
|
interned. */
|
|
if (!ctf_assert (fp, orig_hval == hval))
|
|
return -1;
|
|
}
|
|
else
|
|
if (ctf_dynhash_cinsert (d->cd_output_mapping_guard, id, hval) < 0)
|
|
return ctf_set_errno (fp, errno);
|
|
#endif
|
|
|
|
/* Record the type in the output mapping: if this is the first time this type
|
|
has been seen, also record it in the cd_output_first_gid. Because we
|
|
traverse types in TU order and we do not merge types after the hashing
|
|
phase, this will be the lowest TU this type ever appears in. */
|
|
|
|
if ((type_ids = ctf_dynhash_lookup (d->cd_output_mapping,
|
|
hval)) == NULL)
|
|
{
|
|
if (ctf_dynhash_cinsert (d->cd_output_first_gid, hval, id) < 0)
|
|
return ctf_set_errno (fp, errno);
|
|
|
|
if ((type_ids = ctf_dynset_create (htab_hash_pointer,
|
|
htab_eq_pointer,
|
|
NULL)) == NULL)
|
|
return ctf_set_errno (fp, errno);
|
|
if (ctf_dynhash_insert (d->cd_output_mapping, (void *) hval,
|
|
type_ids) < 0)
|
|
{
|
|
ctf_dynset_destroy (type_ids);
|
|
return ctf_set_errno (fp, errno);
|
|
}
|
|
}
|
|
#ifdef ENABLE_LIBCTF_HASH_DEBUGGING
|
|
{
|
|
/* Verify that all types with this hash are of the same kind, and that the
|
|
first TU a type was seen in never falls. */
|
|
|
|
int err;
|
|
const void *one_id;
|
|
ctf_next_t *i = NULL;
|
|
int orig_kind = ctf_type_kind_unsliced (input, type);
|
|
int orig_first_tu;
|
|
|
|
orig_first_tu = CTF_DEDUP_GID_TO_INPUT
|
|
(ctf_dynhash_lookup (d->cd_output_first_gid, hval));
|
|
if (!ctf_assert (fp, orig_first_tu <= CTF_DEDUP_GID_TO_INPUT (id)))
|
|
return -1;
|
|
|
|
while ((err = ctf_dynset_cnext (type_ids, &i, &one_id)) == 0)
|
|
{
|
|
ctf_file_t *foo = inputs[CTF_DEDUP_GID_TO_INPUT (one_id)];
|
|
ctf_id_t bar = CTF_DEDUP_GID_TO_TYPE (one_id);
|
|
if (ctf_type_kind_unsliced (foo, bar) != orig_kind)
|
|
{
|
|
ctf_err_warn (fp, 1, "added wrong kind to output mapping "
|
|
"for hash %s named %s: %p/%lx from %s is "
|
|
"kind %i, but newly-added %p/%lx from %s is "
|
|
"kind %i", hval,
|
|
decorated_name ? decorated_name : "(unnamed)",
|
|
(void *) foo, bar,
|
|
ctf_link_input_name (foo),
|
|
ctf_type_kind_unsliced (foo, bar),
|
|
(void *) input, type,
|
|
ctf_link_input_name (input), orig_kind);
|
|
if (!ctf_assert (fp, ctf_type_kind_unsliced (foo, bar)
|
|
== orig_kind))
|
|
return -1;
|
|
}
|
|
}
|
|
if (err != ECTF_NEXT_END)
|
|
return ctf_set_errno (fp, err);
|
|
}
|
|
#endif
|
|
|
|
/* This function will be repeatedly called for the same types many times:
|
|
don't waste time reinserting the same keys in that case. */
|
|
if (!ctf_dynset_exists (type_ids, id, NULL)
|
|
&& ctf_dynset_insert (type_ids, id) < 0)
|
|
return ctf_set_errno (fp, errno);
|
|
|
|
/* The rest only needs to happen for types with names. */
|
|
if (!decorated_name)
|
|
return 0;
|
|
|
|
/* Count the number of occurrences of the hash value for this GID. */
|
|
|
|
hval = ctf_dynhash_lookup (d->cd_type_hashes, id);
|
|
|
|
/* Mapping from name -> hash(hashval, count) not already present? */
|
|
if ((name_counts = ctf_dynhash_lookup (d->cd_name_counts,
|
|
decorated_name)) == NULL)
|
|
{
|
|
if ((name_counts = ctf_dynhash_create (ctf_hash_string,
|
|
ctf_hash_eq_string,
|
|
NULL, NULL)) == NULL)
|
|
return ctf_set_errno (fp, errno);
|
|
if (ctf_dynhash_cinsert (d->cd_name_counts, decorated_name,
|
|
name_counts) < 0)
|
|
{
|
|
ctf_dynhash_destroy (name_counts);
|
|
return ctf_set_errno (fp, errno);
|
|
}
|
|
}
|
|
|
|
/* This will, conveniently, return NULL (i.e. 0) for a new entry. */
|
|
count = (long int) (uintptr_t) ctf_dynhash_lookup (name_counts, hval);
|
|
|
|
if (ctf_dynhash_cinsert (name_counts, hval,
|
|
(const void *) (uintptr_t) (count + 1)) < 0)
|
|
return ctf_set_errno (fp, errno);
|
|
|
|
return 0;
|
|
}
|
|
|
|
/* Mark a single hash as corresponding to a conflicting type. Mark all types
|
|
that cite it as conflicting as well, terminating the recursive walk only when
|
|
types that are already conflicted or types do not cite other types are seen.
|
|
(Tagged structures and unions do not appear in the cd_citers graph, so the
|
|
walk also terminates there, since any reference to a conflicting structure is
|
|
just going to reference an unconflicting forward instead: see
|
|
ctf_dedup_maybe_synthesize_forward.) */
|
|
|
|
static int
|
|
ctf_dedup_mark_conflicting_hash (ctf_file_t *fp, const char *hval)
|
|
{
|
|
ctf_dedup_t *d = &fp->ctf_dedup;
|
|
ctf_next_t *i = NULL;
|
|
int err;
|
|
const void *k;
|
|
ctf_dynset_t *citers;
|
|
|
|
/* Mark conflicted if not already so marked. */
|
|
if (ctf_dynset_exists (d->cd_conflicting_types, hval, NULL))
|
|
return 0;
|
|
|
|
ctf_dprintf ("Marking %s as conflicted\n", hval);
|
|
|
|
if (ctf_dynset_cinsert (d->cd_conflicting_types, hval) < 0)
|
|
{
|
|
ctf_dprintf ("Out of memory marking %s as conflicted\n", hval);
|
|
ctf_set_errno (fp, errno);
|
|
return -1;
|
|
}
|
|
|
|
/* If any types cite this type, mark them conflicted too. */
|
|
if ((citers = ctf_dynhash_lookup (d->cd_citers, hval)) == NULL)
|
|
return 0;
|
|
|
|
while ((err = ctf_dynset_cnext (citers, &i, &k)) == 0)
|
|
{
|
|
const char *hv = (const char *) k;
|
|
|
|
if (ctf_dynset_exists (d->cd_conflicting_types, hv, NULL))
|
|
continue;
|
|
|
|
if (ctf_dedup_mark_conflicting_hash (fp, hv) < 0)
|
|
{
|
|
ctf_next_destroy (i);
|
|
return -1; /* errno is set for us. */
|
|
}
|
|
}
|
|
if (err != ECTF_NEXT_END)
|
|
return ctf_set_errno (fp, err);
|
|
|
|
return 0;
|
|
}
|
|
|
|
/* Look up a type kind from the output mapping, given a type hash value. */
|
|
static int
|
|
ctf_dedup_hash_kind (ctf_file_t *fp, ctf_file_t **inputs, const char *hash)
|
|
{
|
|
ctf_dedup_t *d = &fp->ctf_dedup;
|
|
void *id;
|
|
ctf_dynset_t *type_ids;
|
|
|
|
/* Precondition: the output mapping is populated. */
|
|
if (!ctf_assert (fp, ctf_dynhash_elements (d->cd_output_mapping) > 0))
|
|
return -1;
|
|
|
|
/* Look up some GID from the output hash for this type. (They are all
|
|
identical, so we can pick any). Don't assert if someone calls this
|
|
function wrongly, but do assert if the output mapping knows about the hash,
|
|
but has nothing associated with it. */
|
|
|
|
type_ids = ctf_dynhash_lookup (d->cd_output_mapping, hash);
|
|
if (!type_ids)
|
|
{
|
|
ctf_dprintf ("Looked up type kind by nonexistent hash %s.\n", hash);
|
|
return ctf_set_errno (fp, ECTF_INTERNAL);
|
|
}
|
|
id = ctf_dynset_lookup_any (type_ids);
|
|
if (!ctf_assert (fp, id))
|
|
return -1;
|
|
|
|
return ctf_type_kind_unsliced (inputs[CTF_DEDUP_GID_TO_INPUT (id)],
|
|
CTF_DEDUP_GID_TO_TYPE (id));
|
|
}
|
|
|
|
/* Used to keep a count of types: i.e. distinct type hash values. */
|
|
typedef struct ctf_dedup_type_counter
|
|
{
|
|
ctf_file_t *fp;
|
|
ctf_file_t **inputs;
|
|
int num_non_forwards;
|
|
} ctf_dedup_type_counter_t;
|
|
|
|
/* Add to the type counter for one name entry from the cd_name_counts. */
|
|
static int
|
|
ctf_dedup_count_types (void *key_, void *value _libctf_unused_, void *arg_)
|
|
{
|
|
const char *hval = (const char *) key_;
|
|
int kind;
|
|
ctf_dedup_type_counter_t *arg = (ctf_dedup_type_counter_t *) arg_;
|
|
|
|
kind = ctf_dedup_hash_kind (arg->fp, arg->inputs, hval);
|
|
|
|
/* We rely on ctf_dedup_hash_kind setting the fp to -ECTF_INTERNAL on error to
|
|
smuggle errors out of here. */
|
|
|
|
if (kind != CTF_K_FORWARD)
|
|
{
|
|
arg->num_non_forwards++;
|
|
ctf_dprintf ("Counting hash %s: kind %i: num_non_forwards is %i\n",
|
|
hval, kind, arg->num_non_forwards);
|
|
}
|
|
|
|
/* We only need to know if there is more than one non-forward (an ambiguous
|
|
type): don't waste time iterating any more than needed to figure that
|
|
out. */
|
|
|
|
if (arg->num_non_forwards > 1)
|
|
return 1;
|
|
|
|
return 0;
|
|
}
|
|
|
|
/* Detect name ambiguity and mark ambiguous names as conflicting, other than the
|
|
most common. */
|
|
static int
|
|
ctf_dedup_detect_name_ambiguity (ctf_file_t *fp, ctf_file_t **inputs)
|
|
{
|
|
ctf_dedup_t *d = &fp->ctf_dedup;
|
|
ctf_next_t *i = NULL;
|
|
void *k;
|
|
void *v;
|
|
int err;
|
|
const char *erm;
|
|
|
|
/* Go through cd_name_counts for all CTF namespaces in turn. */
|
|
|
|
while ((err = ctf_dynhash_next (d->cd_name_counts, &i, &k, &v)) == 0)
|
|
{
|
|
const char *decorated = (const char *) k;
|
|
ctf_dynhash_t *name_counts = (ctf_dynhash_t *) v;
|
|
ctf_next_t *j = NULL;
|
|
|
|
/* If this is a forwardable kind or a forward (which we can tell without
|
|
consulting the type because its decorated name has a space as its
|
|
second character: see ctf_decorate_type_name), we are only interested
|
|
in whether this name has many hashes associated with it: any such name
|
|
is necessarily ambiguous, and types with that name are conflicting.
|
|
Once we know whether this is true, we can skip to the next name: so use
|
|
ctf_dynhash_iter_find for efficiency. */
|
|
|
|
if (decorated[0] != '\0' && decorated[1] == ' ')
|
|
{
|
|
ctf_dedup_type_counter_t counters = { fp, inputs, 0 };
|
|
ctf_dynhash_t *counts = (ctf_dynhash_t *) v;
|
|
|
|
ctf_dynhash_iter_find (counts, ctf_dedup_count_types, &counters);
|
|
|
|
/* Check for assertion failure and pass it up. */
|
|
if (ctf_errno (fp) == ECTF_INTERNAL)
|
|
goto assert_err;
|
|
|
|
if (counters.num_non_forwards > 1)
|
|
{
|
|
const void *hval_;
|
|
|
|
while ((err = ctf_dynhash_cnext (counts, &j, &hval_, NULL)) == 0)
|
|
{
|
|
const char *hval = (const char *) hval_;
|
|
ctf_dynset_t *type_ids;
|
|
void *id;
|
|
int kind;
|
|
|
|
/* Dig through the types in this hash to find the non-forwards
|
|
and mark them ambiguous. */
|
|
|
|
type_ids = ctf_dynhash_lookup (d->cd_output_mapping, hval);
|
|
|
|
/* Nonexistent? Must be a forward with no referent. */
|
|
if (!type_ids)
|
|
continue;
|
|
|
|
id = ctf_dynset_lookup_any (type_ids);
|
|
|
|
kind = ctf_type_kind (inputs[CTF_DEDUP_GID_TO_INPUT (id)],
|
|
CTF_DEDUP_GID_TO_TYPE (id));
|
|
|
|
if (kind != CTF_K_FORWARD)
|
|
{
|
|
ctf_dprintf ("Marking %p, with hash %s, conflicting: one "
|
|
"of many non-forward GIDs for %s\n", id,
|
|
hval, (char *) k);
|
|
ctf_dedup_mark_conflicting_hash (fp, hval);
|
|
}
|
|
}
|
|
if (err != ECTF_NEXT_END)
|
|
{
|
|
erm = "marking conflicting structs/unions";
|
|
goto iterr;
|
|
}
|
|
}
|
|
}
|
|
else
|
|
{
|
|
/* This is an ordinary type. Find the most common type with this
|
|
name, and mark it unconflicting: all others are conflicting. (We
|
|
cannot do this sort of popularity contest with forwardable types
|
|
because any forwards to that type would be immediately unified with
|
|
the most-popular type on insertion, and we want conflicting structs
|
|
et al to have all forwards left intact, so the user is notified
|
|
that this type is conflicting. TODO: improve this in future by
|
|
setting such forwards non-root-visible.) */
|
|
|
|
const void *key;
|
|
const void *count;
|
|
const char *hval;
|
|
long max_hcount = -1;
|
|
const char *max_hval = NULL;
|
|
|
|
if (ctf_dynhash_elements (name_counts) <= 1)
|
|
continue;
|
|
|
|
/* First find the most common. */
|
|
while ((err = ctf_dynhash_cnext (name_counts, &j, &key, &count)) == 0)
|
|
{
|
|
hval = (const char *) key;
|
|
if ((long int) (uintptr_t) count > max_hcount)
|
|
{
|
|
max_hcount = (long int) (uintptr_t) count;
|
|
max_hval = hval;
|
|
}
|
|
}
|
|
if (err != ECTF_NEXT_END)
|
|
{
|
|
erm = "finding commonest conflicting type";
|
|
goto iterr;
|
|
}
|
|
|
|
/* Mark all the others as conflicting. */
|
|
while ((err = ctf_dynhash_cnext (name_counts, &j, &key, NULL)) == 0)
|
|
{
|
|
hval = (const char *) key;
|
|
if (strcmp (max_hval, hval) == 0)
|
|
continue;
|
|
|
|
ctf_dprintf ("Marking %s, an uncommon hash for %s, conflicting\n",
|
|
hval, (const char *) k);
|
|
if (ctf_dedup_mark_conflicting_hash (fp, hval) < 0)
|
|
{
|
|
erm = "marking hashes as conflicting";
|
|
goto err;
|
|
}
|
|
}
|
|
if (err != ECTF_NEXT_END)
|
|
{
|
|
erm = "marking uncommon conflicting types";
|
|
goto iterr;
|
|
}
|
|
}
|
|
}
|
|
if (err != ECTF_NEXT_END)
|
|
{
|
|
erm = "scanning for ambiguous names";
|
|
goto iterr;
|
|
}
|
|
|
|
return 0;
|
|
|
|
err:
|
|
ctf_next_destroy (i);
|
|
ctf_err_warn (fp, 0, "%s: %s", erm, ctf_errmsg (ctf_errno (fp)));
|
|
return -1;
|
|
|
|
iterr:
|
|
ctf_err_warn (fp, 0, "iteration failed %s: %s", erm, ctf_errmsg (err));
|
|
return ctf_set_errno (fp, err);
|
|
|
|
assert_err:
|
|
ctf_next_destroy (i);
|
|
return -1; /* errno is set for us. */
|
|
}
|
|
|
|
/* Initialize the deduplication machinery. */
|
|
|
|
static int
|
|
ctf_dedup_init (ctf_file_t *fp)
|
|
{
|
|
ctf_dedup_t *d = &fp->ctf_dedup;
|
|
size_t i;
|
|
|
|
if (ctf_dedup_atoms_init (fp) < 0)
|
|
goto oom;
|
|
|
|
#if IDS_NEED_ALLOCATION
|
|
if ((d->cd_id_to_file_t = ctf_dynhash_create (ctf_hash_type_id_key,
|
|
ctf_hash_eq_type_id_key,
|
|
free, NULL)) == NULL)
|
|
goto oom;
|
|
#endif
|
|
|
|
for (i = 0; i < 4; i++)
|
|
{
|
|
if ((d->cd_decorated_names[i] = ctf_dynhash_create (ctf_hash_string,
|
|
ctf_hash_eq_string,
|
|
NULL, NULL)) == NULL)
|
|
goto oom;
|
|
}
|
|
|
|
if ((d->cd_name_counts
|
|
= ctf_dynhash_create (ctf_hash_string,
|
|
ctf_hash_eq_string, NULL,
|
|
(ctf_hash_free_fun) ctf_dynhash_destroy)) == NULL)
|
|
goto oom;
|
|
|
|
if ((d->cd_type_hashes
|
|
= ctf_dynhash_create (ctf_hash_integer,
|
|
ctf_hash_eq_integer,
|
|
NULL, NULL)) == NULL)
|
|
goto oom;
|
|
|
|
if ((d->cd_struct_origin
|
|
= ctf_dynhash_create (ctf_hash_string,
|
|
ctf_hash_eq_string,
|
|
NULL, NULL)) == NULL)
|
|
goto oom;
|
|
|
|
if ((d->cd_citers
|
|
= ctf_dynhash_create (ctf_hash_string,
|
|
ctf_hash_eq_string, NULL,
|
|
(ctf_hash_free_fun) ctf_dynset_destroy)) == NULL)
|
|
goto oom;
|
|
|
|
if ((d->cd_output_mapping
|
|
= ctf_dynhash_create (ctf_hash_string,
|
|
ctf_hash_eq_string, NULL,
|
|
(ctf_hash_free_fun) ctf_dynset_destroy)) == NULL)
|
|
goto oom;
|
|
|
|
if ((d->cd_output_first_gid
|
|
= ctf_dynhash_create (ctf_hash_string,
|
|
ctf_hash_eq_string,
|
|
NULL, NULL)) == NULL)
|
|
goto oom;
|
|
|
|
#ifdef ENABLE_LIBCTF_HASH_DEBUGGING
|
|
if ((d->cd_output_mapping_guard
|
|
= ctf_dynhash_create (ctf_hash_integer,
|
|
ctf_hash_eq_integer, NULL, NULL)) == NULL)
|
|
goto oom;
|
|
#endif
|
|
|
|
if ((d->cd_emission_struct_members
|
|
= ctf_dynhash_create (ctf_hash_integer,
|
|
ctf_hash_eq_integer,
|
|
NULL, NULL)) == NULL)
|
|
goto oom;
|
|
|
|
if ((d->cd_conflicting_types
|
|
= ctf_dynset_create (htab_hash_string,
|
|
ctf_dynset_eq_string, NULL)) == NULL)
|
|
goto oom;
|
|
|
|
return 0;
|
|
|
|
oom:
|
|
ctf_err_warn (fp, 0, "ctf_dedup_init: cannot initialize: "
|
|
"out of memory.");
|
|
return ctf_set_errno (fp, ENOMEM);
|
|
}
|
|
|
|
void
|
|
ctf_dedup_fini (ctf_file_t *fp, ctf_file_t **outputs, uint32_t noutputs)
|
|
{
|
|
ctf_dedup_t *d = &fp->ctf_dedup;
|
|
size_t i;
|
|
|
|
/* ctf_dedup_atoms is kept across links. */
|
|
#if IDS_NEED_ALLOCATION
|
|
ctf_dynhash_destroy (d->cd_id_to_file_t);
|
|
#endif
|
|
for (i = 0; i < 4; i++)
|
|
ctf_dynhash_destroy (d->cd_decorated_names[i]);
|
|
ctf_dynhash_destroy (d->cd_name_counts);
|
|
ctf_dynhash_destroy (d->cd_type_hashes);
|
|
ctf_dynhash_destroy (d->cd_struct_origin);
|
|
ctf_dynhash_destroy (d->cd_citers);
|
|
ctf_dynhash_destroy (d->cd_output_mapping);
|
|
ctf_dynhash_destroy (d->cd_output_first_gid);
|
|
#ifdef ENABLE_LIBCTF_HASH_DEBUGGING
|
|
ctf_dynhash_destroy (d->cd_output_mapping_guard);
|
|
#endif
|
|
ctf_dynhash_destroy (d->cd_emission_struct_members);
|
|
ctf_dynset_destroy (d->cd_conflicting_types);
|
|
|
|
/* Free the per-output state. */
|
|
if (outputs)
|
|
{
|
|
for (i = 0; i < noutputs; i++)
|
|
{
|
|
ctf_dedup_t *od = &outputs[i]->ctf_dedup;
|
|
ctf_dynhash_destroy (od->cd_output_emission_hashes);
|
|
ctf_dynhash_destroy (od->cd_output_emission_conflicted_forwards);
|
|
ctf_file_close (od->cd_output);
|
|
}
|
|
}
|
|
memset (d, 0, sizeof (ctf_dedup_t));
|
|
}
|
|
|
|
/* Return 1 if this type is cited by multiple input dictionaries. */
|
|
|
|
static int
|
|
ctf_dedup_multiple_input_dicts (ctf_file_t *output, ctf_file_t **inputs,
|
|
const char *hval)
|
|
{
|
|
ctf_dedup_t *d = &output->ctf_dedup;
|
|
ctf_dynset_t *type_ids;
|
|
ctf_next_t *i = NULL;
|
|
void *id;
|
|
ctf_file_t *found = NULL, *relative_found = NULL;
|
|
const char *type_id;
|
|
ctf_file_t *input_fp;
|
|
ctf_id_t input_id;
|
|
const char *name;
|
|
const char *decorated;
|
|
int fwdkind;
|
|
int multiple = 0;
|
|
int err;
|
|
|
|
type_ids = ctf_dynhash_lookup (d->cd_output_mapping, hval);
|
|
if (!ctf_assert (output, type_ids))
|
|
return -1;
|
|
|
|
/* Scan across the IDs until we find proof that two disjoint dictionaries
|
|
are referenced. Exit as soon as possible. Optimization opportunity, but
|
|
possibly not worth it, given that this is only executed in
|
|
CTF_LINK_SHARE_DUPLICATED mode. */
|
|
|
|
while ((err = ctf_dynset_next (type_ids, &i, &id)) == 0)
|
|
{
|
|
ctf_file_t *fp = inputs[CTF_DEDUP_GID_TO_INPUT (id)];
|
|
|
|
if (fp == found || fp == relative_found)
|
|
continue;
|
|
|
|
if (!found)
|
|
{
|
|
found = fp;
|
|
continue;
|
|
}
|
|
|
|
if (!relative_found
|
|
&& (fp->ctf_parent == found || found->ctf_parent == fp))
|
|
{
|
|
relative_found = fp;
|
|
continue;
|
|
}
|
|
|
|
multiple = 1;
|
|
ctf_next_destroy (i);
|
|
break;
|
|
}
|
|
if ((err != ECTF_NEXT_END) && (err != 0))
|
|
{
|
|
ctf_err_warn (output, 0, "propagating conflictedness: %s",
|
|
ctf_errmsg (err));
|
|
ctf_set_errno (output, err);
|
|
return -1;
|
|
}
|
|
|
|
if (multiple)
|
|
return multiple;
|
|
|
|
/* This type itself does not appear in multiple input dicts: how about another
|
|
related type with the same name (e.g. a forward if this is a struct,
|
|
etc). */
|
|
|
|
type_id = ctf_dynset_lookup_any (type_ids);
|
|
if (!ctf_assert (output, type_id))
|
|
return -1;
|
|
|
|
input_fp = inputs[CTF_DEDUP_GID_TO_INPUT (type_id)];
|
|
input_id = CTF_DEDUP_GID_TO_TYPE (type_id);
|
|
fwdkind = ctf_type_kind_forwarded (input_fp, input_id);
|
|
name = ctf_type_name_raw (input_fp, input_id);
|
|
|
|
if ((fwdkind == CTF_K_STRUCT || fwdkind == CTF_K_UNION)
|
|
&& name && name[0] != '\0')
|
|
{
|
|
const void *origin;
|
|
|
|
if ((decorated = ctf_decorate_type_name (output, name,
|
|
fwdkind)) == NULL)
|
|
return -1; /* errno is set for us. */
|
|
|
|
origin = ctf_dynhash_lookup (d->cd_struct_origin, decorated);
|
|
if ((origin != NULL) && (CTF_DEDUP_GID_TO_INPUT (origin) < 0))
|
|
multiple = 1;
|
|
}
|
|
|
|
return multiple;
|
|
}
|
|
|
|
/* Demote unconflicting types which reference only one input, or which reference
|
|
two inputs where one input is the parent of the other, into conflicting
|
|
types. Only used if the link mode is CTF_LINK_SHARE_DUPLICATED. */
|
|
|
|
static int
|
|
ctf_dedup_conflictify_unshared (ctf_file_t *output, ctf_file_t **inputs)
|
|
{
|
|
ctf_dedup_t *d = &output->ctf_dedup;
|
|
ctf_next_t *i = NULL;
|
|
int err;
|
|
const void *k;
|
|
ctf_dynset_t *to_mark = NULL;
|
|
|
|
if ((to_mark = ctf_dynset_create (htab_hash_string, ctf_dynset_eq_string,
|
|
NULL)) == NULL)
|
|
goto err_no;
|
|
|
|
while ((err = ctf_dynhash_cnext (d->cd_output_mapping, &i, &k, NULL)) == 0)
|
|
{
|
|
const char *hval = (const char *) k;
|
|
int conflicting;
|
|
|
|
/* Types referenced by only one dict, with no type appearing under that
|
|
name elsewhere, are marked conflicting. */
|
|
|
|
conflicting = !ctf_dedup_multiple_input_dicts (output, inputs, hval);
|
|
|
|
if (conflicting < 0)
|
|
goto err; /* errno is set for us. */
|
|
|
|
if (conflicting)
|
|
if (ctf_dynset_cinsert (to_mark, hval) < 0)
|
|
goto err;
|
|
}
|
|
if (err != ECTF_NEXT_END)
|
|
goto iterr;
|
|
|
|
while ((err = ctf_dynset_cnext (to_mark, &i, &k)) == 0)
|
|
{
|
|
const char *hval = (const char *) k;
|
|
|
|
if (ctf_dedup_mark_conflicting_hash (output, hval) < 0)
|
|
goto err;
|
|
}
|
|
if (err != ECTF_NEXT_END)
|
|
goto iterr;
|
|
|
|
ctf_dynset_destroy (to_mark);
|
|
|
|
return 0;
|
|
|
|
err_no:
|
|
ctf_set_errno (output, errno);
|
|
err:
|
|
err = ctf_errno (output);
|
|
ctf_next_destroy (i);
|
|
iterr:
|
|
ctf_set_errno (output, err);
|
|
ctf_dynset_destroy (to_mark);
|
|
ctf_err_warn (output, 0, "conflictifying unshared types: %s",
|
|
ctf_errmsg (ctf_errno (output)));
|
|
return -1;
|
|
}
|
|
|
|
/* The core deduplicator. Populate cd_output_mapping in the output ctf_dedup
|
|
with a mapping of all types that belong in this dictionary and where they
|
|
come from, and cd_conflicting_types with an indication of whether each type
|
|
is conflicted or not. OUTPUT is the top-level output: INPUTS is the array of
|
|
input dicts; NINPUTS is the size of that array; PARENTS is an NINPUTS-element
|
|
array with each element corresponding to a input which is a child dict set to
|
|
the number in the INPUTS array of that input's parent.
|
|
|
|
If CU_MAPPED is set, this is a first pass for a link with a non-empty CU
|
|
mapping: only one output will result.
|
|
|
|
Only deduplicates: does not emit the types into the output. Call
|
|
ctf_dedup_emit afterwards to do that. */
|
|
|
|
int
|
|
ctf_dedup (ctf_file_t *output, ctf_file_t **inputs, uint32_t ninputs,
|
|
uint32_t *parents, int cu_mapped)
|
|
{
|
|
ctf_dedup_t *d = &output->ctf_dedup;
|
|
size_t i;
|
|
ctf_next_t *it = NULL;
|
|
|
|
for (i = 0; i < ninputs; i++)
|
|
ctf_dprintf ("Input %i: %s\n", (int) i, ctf_link_input_name (inputs[i]));
|
|
|
|
if (ctf_dedup_init (output) < 0)
|
|
return -1; /* errno is set for us. */
|
|
|
|
/* Some flags do not apply when CU-mapping: this is not a duplicated link,
|
|
because there is only one output and we really don't want to end up marking
|
|
all nonconflicting but appears-only-once types as conflicting (which in the
|
|
CU-mapped link means we'd mark them all as non-root-visible!). */
|
|
d->cd_link_flags = output->ctf_link_flags;
|
|
if (cu_mapped)
|
|
d->cd_link_flags &= ~(CTF_LINK_SHARE_DUPLICATED);
|
|
|
|
/* Compute hash values for all types, recursively, treating child structures
|
|
and unions equivalent to forwards, and hashing in the name of the referent
|
|
of each such type into structures, unions, and non-opaque forwards.
|
|
Populate a mapping from decorated name (including an indication of
|
|
struct/union/enum namespace) to count of type hash values in
|
|
cd_name_counts, a mapping from and a mapping from hash values to input type
|
|
IDs in cd_output_mapping. */
|
|
|
|
ctf_dprintf ("Computing type hashes\n");
|
|
for (i = 0; i < ninputs; i++)
|
|
{
|
|
ctf_id_t id;
|
|
|
|
while ((id = ctf_type_next (inputs[i], &it, NULL, 1)) != CTF_ERR)
|
|
{
|
|
ctf_dedup_hash_type (output, inputs[i], inputs, parents,
|
|
i, id, 0, 0, ctf_dedup_populate_mappings);
|
|
}
|
|
if (ctf_errno (inputs[i]) != ECTF_NEXT_END)
|
|
{
|
|
ctf_err_warn (output, 0, "iteration failure computing type "
|
|
"hashes: %s", ctf_errmsg (ctf_errno (inputs[i])));
|
|
return ctf_set_errno (output, ctf_errno (inputs[i]));
|
|
}
|
|
}
|
|
|
|
/* Go through the cd_name_counts name->hash->count mapping for all CTF
|
|
namespaces: any name with many hashes associated with it at this stage is
|
|
necessarily ambiguous. Mark all the hashes except the most common as
|
|
conflicting in the output. */
|
|
|
|
ctf_dprintf ("Detecting type name ambiguity\n");
|
|
if (ctf_dedup_detect_name_ambiguity (output, inputs) < 0)
|
|
return -1; /* errno is set for us. */
|
|
|
|
/* If the link mode is CTF_LINK_SHARE_DUPLICATED, we change any unconflicting
|
|
types whose output mapping references only one input dict into a
|
|
conflicting type, so that they end up in the per-CU dictionaries. */
|
|
|
|
if (d->cd_link_flags & CTF_LINK_SHARE_DUPLICATED)
|
|
{
|
|
ctf_dprintf ("Conflictifying unshared types\n");
|
|
if (ctf_dedup_conflictify_unshared (output, inputs) < 0)
|
|
return -1; /* errno is set for us. */
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
static int
|
|
ctf_dedup_rwalk_output_mapping (ctf_file_t *output, ctf_file_t **inputs,
|
|
uint32_t ninputs, uint32_t *parents,
|
|
ctf_dynset_t *already_visited,
|
|
const char *hval,
|
|
int (*visit_fun) (const char *hval,
|
|
ctf_file_t *output,
|
|
ctf_file_t **inputs,
|
|
uint32_t ninputs,
|
|
uint32_t *parents,
|
|
int already_visited,
|
|
ctf_file_t *input,
|
|
ctf_id_t type,
|
|
void *id,
|
|
int depth,
|
|
void *arg),
|
|
void *arg, unsigned long depth);
|
|
|
|
/* Like ctf_dedup_rwalk_output_mapping (which see), only takes a single target
|
|
type and visits it. */
|
|
static int
|
|
ctf_dedup_rwalk_one_output_mapping (ctf_file_t *output,
|
|
ctf_file_t **inputs, uint32_t ninputs,
|
|
uint32_t *parents,
|
|
ctf_dynset_t *already_visited,
|
|
int visited, void *type_id,
|
|
const char *hval,
|
|
int (*visit_fun) (const char *hval,
|
|
ctf_file_t *output,
|
|
ctf_file_t **inputs,
|
|
uint32_t ninputs,
|
|
uint32_t *parents,
|
|
int already_visited,
|
|
ctf_file_t *input,
|
|
ctf_id_t type,
|
|
void *id,
|
|
int depth,
|
|
void *arg),
|
|
void *arg, unsigned long depth)
|
|
{
|
|
ctf_dedup_t *d = &output->ctf_dedup;
|
|
ctf_file_t *fp;
|
|
int input_num;
|
|
ctf_id_t type;
|
|
int ret;
|
|
const char *whaterr;
|
|
|
|
input_num = CTF_DEDUP_GID_TO_INPUT (type_id);
|
|
fp = inputs[input_num];
|
|
type = CTF_DEDUP_GID_TO_TYPE (type_id);
|
|
|
|
ctf_dprintf ("%lu: Starting walk over type %s, %i/%lx (%p), from %s, "
|
|
"kind %i\n", depth, hval, input_num, type, (void *) fp,
|
|
ctf_link_input_name (fp), ctf_type_kind_unsliced (fp, type));
|
|
|
|
/* Get the single call we do if this type has already been visited out of the
|
|
way. */
|
|
if (visited)
|
|
return visit_fun (hval, output, inputs, ninputs, parents, visited, fp,
|
|
type, type_id, depth, arg);
|
|
|
|
/* This macro is really ugly, but the alternative is repeating this code many
|
|
times, which is worse. */
|
|
|
|
#define CTF_TYPE_WALK(type, errlabel, errmsg) \
|
|
do { \
|
|
void *type_id; \
|
|
const char *hashval; \
|
|
int cited_type_input_num = input_num; \
|
|
\
|
|
if ((fp->ctf_flags & LCTF_CHILD) && (LCTF_TYPE_ISPARENT (fp, type))) \
|
|
cited_type_input_num = parents[input_num]; \
|
|
\
|
|
type_id = CTF_DEDUP_GID (output, cited_type_input_num, type); \
|
|
\
|
|
if (type == 0) \
|
|
{ \
|
|
ctf_dprintf ("Walking: unimplemented type\n"); \
|
|
break; \
|
|
} \
|
|
\
|
|
ctf_dprintf ("Looking up ID %i/%lx in type hashes\n", \
|
|
cited_type_input_num, type); \
|
|
hashval = ctf_dynhash_lookup (d->cd_type_hashes, type_id); \
|
|
if (!ctf_assert (output, hashval)) \
|
|
{ \
|
|
whaterr = "looking up ID in type hashes"; \
|
|
goto errlabel; \
|
|
} \
|
|
ctf_dprintf ("ID %i/%lx has hash %s\n", cited_type_input_num, type, \
|
|
hashval); \
|
|
\
|
|
ret = ctf_dedup_rwalk_output_mapping (output, inputs, ninputs, parents, \
|
|
already_visited, hashval, \
|
|
visit_fun, arg, depth); \
|
|
if (ret < 0) \
|
|
{ \
|
|
whaterr = errmsg; \
|
|
goto errlabel; \
|
|
} \
|
|
} while (0)
|
|
|
|
switch (ctf_type_kind_unsliced (fp, type))
|
|
{
|
|
case CTF_K_UNKNOWN:
|
|
/* Just skip things of unknown kind. */
|
|
return 0;
|
|
case CTF_K_FORWARD:
|
|
case CTF_K_INTEGER:
|
|
case CTF_K_FLOAT:
|
|
case CTF_K_ENUM:
|
|
/* No types referenced. */
|
|
break;
|
|
|
|
case CTF_K_TYPEDEF:
|
|
case CTF_K_VOLATILE:
|
|
case CTF_K_CONST:
|
|
case CTF_K_RESTRICT:
|
|
case CTF_K_POINTER:
|
|
case CTF_K_SLICE:
|
|
CTF_TYPE_WALK (ctf_type_reference (fp, type), err,
|
|
"Referenced type walk");
|
|
break;
|
|
|
|
case CTF_K_ARRAY:
|
|
{
|
|
ctf_arinfo_t ar;
|
|
|
|
if (ctf_array_info (fp, type, &ar) < 0)
|
|
{
|
|
whaterr = "array info lookup";
|
|
goto err_msg;
|
|
}
|
|
|
|
CTF_TYPE_WALK (ar.ctr_contents, err, "Array contents type walk");
|
|
CTF_TYPE_WALK (ar.ctr_index, err, "Array index type walk");
|
|
break;
|
|
}
|
|
|
|
case CTF_K_FUNCTION:
|
|
{
|
|
ctf_funcinfo_t fi;
|
|
ctf_id_t *args;
|
|
uint32_t j;
|
|
|
|
if (ctf_func_type_info (fp, type, &fi) < 0)
|
|
{
|
|
whaterr = "func type info lookup";
|
|
goto err_msg;
|
|
}
|
|
|
|
CTF_TYPE_WALK (fi.ctc_return, err, "Func return type walk");
|
|
|
|
if ((args = calloc (fi.ctc_argc, sizeof (ctf_id_t))) == NULL)
|
|
{
|
|
whaterr = "memory allocation";
|
|
goto err_msg;
|
|
}
|
|
|
|
if (ctf_func_type_args (fp, type, fi.ctc_argc, args) < 0)
|
|
{
|
|
whaterr = "func arg type lookup";
|
|
free (args);
|
|
goto err_msg;
|
|
}
|
|
|
|
for (j = 0; j < fi.ctc_argc; j++)
|
|
CTF_TYPE_WALK (args[j], err_free_args, "Func arg type walk");
|
|
free (args);
|
|
break;
|
|
|
|
err_free_args:
|
|
free (args);
|
|
goto err;
|
|
}
|
|
case CTF_K_STRUCT:
|
|
case CTF_K_UNION:
|
|
/* We do not recursively traverse the members of structures: they are
|
|
emitted later, in a separate pass. */
|
|
break;
|
|
default:
|
|
whaterr = "unknown type kind";
|
|
goto err;
|
|
}
|
|
|
|
return visit_fun (hval, output, inputs, ninputs, parents, visited, fp, type,
|
|
type_id, depth, arg);
|
|
|
|
err_msg:
|
|
ctf_set_errno (output, ctf_errno (fp));
|
|
ctf_err_warn (fp, 0, "%s during type walking in %s at ID %lx: %s", whaterr,
|
|
ctf_link_input_name (fp), type, ctf_errmsg (ctf_errno (fp)));
|
|
err:
|
|
return -1;
|
|
}
|
|
/* Recursively traverse the output mapping, and do something with each type
|
|
visited, from leaves to root. VISIT_FUN, called as recursion unwinds,
|
|
returns a negative error code or zero. Type hashes may be visited more than
|
|
once, but are not recursed through repeatedly: ALREADY_VISITED tracks whether
|
|
types have already been visited. */
|
|
static int
|
|
ctf_dedup_rwalk_output_mapping (ctf_file_t *output, ctf_file_t **inputs,
|
|
uint32_t ninputs, uint32_t *parents,
|
|
ctf_dynset_t *already_visited,
|
|
const char *hval,
|
|
int (*visit_fun) (const char *hval,
|
|
ctf_file_t *output,
|
|
ctf_file_t **inputs,
|
|
uint32_t ninputs,
|
|
uint32_t *parents,
|
|
int already_visited,
|
|
ctf_file_t *input,
|
|
ctf_id_t type,
|
|
void *id,
|
|
int depth,
|
|
void *arg),
|
|
void *arg, unsigned long depth)
|
|
{
|
|
ctf_dedup_t *d = &output->ctf_dedup;
|
|
ctf_next_t *i = NULL;
|
|
int err;
|
|
int visited = 1;
|
|
ctf_dynset_t *type_ids;
|
|
void *id;
|
|
|
|
depth++;
|
|
|
|
type_ids = ctf_dynhash_lookup (d->cd_output_mapping, hval);
|
|
if (!type_ids)
|
|
{
|
|
ctf_err_warn (output, 0, "looked up type kind by nonexistent "
|
|
"hash %s.", hval);
|
|
return ctf_set_errno (output, ECTF_INTERNAL);
|
|
}
|
|
|
|
/* Have we seen this type before? */
|
|
|
|
if (!ctf_dynset_exists (already_visited, hval, NULL))
|
|
{
|
|
/* Mark as already-visited immediately, to eliminate the possibility of
|
|
cycles: but remember we have not actually visited it yet for the
|
|
upcoming call to the visit_fun. (All our callers handle cycles
|
|
properly themselves, so we can just abort them aggressively as soon as
|
|
we find ourselves in one.) */
|
|
|
|
visited = 0;
|
|
if (ctf_dynset_cinsert (already_visited, hval) < 0)
|
|
{
|
|
ctf_err_warn (output, 0, "out of memory tracking already-visited "
|
|
"types.");
|
|
return ctf_set_errno (output, ENOMEM);
|
|
}
|
|
}
|
|
|
|
/* If this type is marked conflicted, traverse members and call
|
|
ctf_dedup_rwalk_output_mapping_once on all the unique ones: otherwise, just
|
|
pick a random one and use it. */
|
|
|
|
if (!ctf_dynset_exists (d->cd_conflicting_types, hval, NULL))
|
|
{
|
|
id = ctf_dynset_lookup_any (type_ids);
|
|
if (!ctf_assert (output, id))
|
|
return -1;
|
|
|
|
return ctf_dedup_rwalk_one_output_mapping (output, inputs, ninputs,
|
|
parents, already_visited,
|
|
visited, id, hval, visit_fun,
|
|
arg, depth);
|
|
}
|
|
|
|
while ((err = ctf_dynset_next (type_ids, &i, &id)) == 0)
|
|
{
|
|
int ret;
|
|
|
|
ret = ctf_dedup_rwalk_one_output_mapping (output, inputs, ninputs,
|
|
parents, already_visited,
|
|
visited, id, hval,
|
|
visit_fun, arg, depth);
|
|
if (ret < 0)
|
|
{
|
|
ctf_next_destroy (i);
|
|
return ret; /* errno is set for us. */
|
|
}
|
|
}
|
|
if (err != ECTF_NEXT_END)
|
|
{
|
|
ctf_err_warn (output, 0, "walking many types with one hash: %s",
|
|
ctf_errmsg (err));
|
|
return ctf_set_errno (output, err);
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
typedef struct ctf_sort_om_cb_arg
|
|
{
|
|
ctf_file_t **inputs;
|
|
uint32_t ninputs;
|
|
ctf_dedup_t *d;
|
|
} ctf_sort_om_cb_arg_t;
|
|
|
|
/* Sort the output mapping into order: types first appearing in earlier inputs
|
|
first, parents preceding children: if types first appear in the same input,
|
|
sort those with earlier ctf_id_t's first. */
|
|
static int
|
|
sort_output_mapping (const ctf_next_hkv_t *one, const ctf_next_hkv_t *two,
|
|
void *arg_)
|
|
{
|
|
ctf_sort_om_cb_arg_t *arg = (ctf_sort_om_cb_arg_t *) arg_;
|
|
ctf_dedup_t *d = arg->d;
|
|
const char *one_hval = (const char *) one->hkv_key;
|
|
const char *two_hval = (const char *) two->hkv_key;
|
|
void *one_gid, *two_gid;
|
|
uint32_t one_ninput;
|
|
uint32_t two_ninput;
|
|
ctf_file_t *one_fp;
|
|
ctf_file_t *two_fp;
|
|
ctf_id_t one_type;
|
|
ctf_id_t two_type;
|
|
|
|
one_gid = ctf_dynhash_lookup (d->cd_output_first_gid, one_hval);
|
|
two_gid = ctf_dynhash_lookup (d->cd_output_first_gid, two_hval);
|
|
|
|
one_ninput = CTF_DEDUP_GID_TO_INPUT (one_gid);
|
|
two_ninput = CTF_DEDUP_GID_TO_INPUT (two_gid);
|
|
|
|
one_type = CTF_DEDUP_GID_TO_TYPE (one_gid);
|
|
two_type = CTF_DEDUP_GID_TO_TYPE (two_gid);
|
|
|
|
/* It's kind of hard to smuggle an assertion failure out of here. */
|
|
assert (one_ninput < arg->ninputs && two_ninput < arg->ninputs);
|
|
|
|
one_fp = arg->inputs[one_ninput];
|
|
two_fp = arg->inputs[two_ninput];
|
|
|
|
/* Parents before children. */
|
|
|
|
if (!(one_fp->ctf_flags & LCTF_CHILD)
|
|
&& (two_fp->ctf_flags & LCTF_CHILD))
|
|
return -1;
|
|
else if ((one_fp->ctf_flags & LCTF_CHILD)
|
|
&& !(two_fp->ctf_flags & LCTF_CHILD))
|
|
return 1;
|
|
|
|
/* ninput order, types appearing in earlier TUs first. */
|
|
|
|
if (one_ninput < two_ninput)
|
|
return -1;
|
|
else if (two_ninput < one_ninput)
|
|
return 1;
|
|
|
|
/* Same TU. Earliest ctf_id_t first. They cannot be the same. */
|
|
|
|
assert (one_type != two_type);
|
|
if (one_type < two_type)
|
|
return -1;
|
|
else
|
|
return 1;
|
|
}
|
|
|
|
/* The public entry point to ctf_dedup_rwalk_output_mapping, above. */
|
|
static int
|
|
ctf_dedup_walk_output_mapping (ctf_file_t *output, ctf_file_t **inputs,
|
|
uint32_t ninputs, uint32_t *parents,
|
|
int (*visit_fun) (const char *hval,
|
|
ctf_file_t *output,
|
|
ctf_file_t **inputs,
|
|
uint32_t ninputs,
|
|
uint32_t *parents,
|
|
int already_visited,
|
|
ctf_file_t *input,
|
|
ctf_id_t type,
|
|
void *id,
|
|
int depth,
|
|
void *arg),
|
|
void *arg)
|
|
{
|
|
ctf_dynset_t *already_visited;
|
|
ctf_next_t *i = NULL;
|
|
ctf_sort_om_cb_arg_t sort_arg;
|
|
int err;
|
|
void *k;
|
|
|
|
if ((already_visited = ctf_dynset_create (htab_hash_string,
|
|
ctf_dynset_eq_string,
|
|
NULL)) == NULL)
|
|
return ctf_set_errno (output, ENOMEM);
|
|
|
|
sort_arg.inputs = inputs;
|
|
sort_arg.ninputs = ninputs;
|
|
sort_arg.d = &output->ctf_dedup;
|
|
|
|
while ((err = ctf_dynhash_next_sorted (output->ctf_dedup.cd_output_mapping,
|
|
&i, &k, NULL, sort_output_mapping,
|
|
&sort_arg)) == 0)
|
|
{
|
|
const char *hval = (const char *) k;
|
|
|
|
err = ctf_dedup_rwalk_output_mapping (output, inputs, ninputs, parents,
|
|
already_visited, hval, visit_fun,
|
|
arg, 0);
|
|
if (err < 0)
|
|
{
|
|
ctf_next_destroy (i);
|
|
goto err; /* errno is set for us. */
|
|
}
|
|
}
|
|
if (err != ECTF_NEXT_END)
|
|
{
|
|
ctf_err_warn (output, 0, "recursing over output mapping: %s",
|
|
ctf_errmsg (err));
|
|
ctf_set_errno (output, err);
|
|
goto err;
|
|
}
|
|
ctf_dynset_destroy (already_visited);
|
|
|
|
return 0;
|
|
err:
|
|
ctf_dynset_destroy (already_visited);
|
|
return -1;
|
|
}
|
|
|
|
/* Possibly synthesise a synthetic forward in TARGET to subsitute for a
|
|
conflicted per-TU type ID in INPUT with hash HVAL. Return its CTF ID, or 0
|
|
if none was needed. */
|
|
static ctf_id_t
|
|
ctf_dedup_maybe_synthesize_forward (ctf_file_t *output, ctf_file_t *target,
|
|
ctf_file_t *input, ctf_id_t id,
|
|
const char *hval)
|
|
{
|
|
ctf_dedup_t *od = &output->ctf_dedup;
|
|
ctf_dedup_t *td = &target->ctf_dedup;
|
|
int kind;
|
|
int fwdkind;
|
|
const char *name;
|
|
const char *decorated;
|
|
void *v;
|
|
ctf_id_t emitted_forward;
|
|
|
|
if (!ctf_dynset_exists (od->cd_conflicting_types, hval, NULL)
|
|
|| target->ctf_flags & LCTF_CHILD
|
|
|| !ctf_type_name_raw (input, id)
|
|
|| (((kind = ctf_type_kind_unsliced (input, id)) != CTF_K_STRUCT
|
|
&& kind != CTF_K_UNION && kind != CTF_K_FORWARD)))
|
|
return 0;
|
|
|
|
fwdkind = ctf_type_kind_forwarded (input, id);
|
|
name = ctf_type_name_raw (input, id);
|
|
|
|
ctf_dprintf ("Using synthetic forward for conflicted struct/union with "
|
|
"hval %s\n", hval);
|
|
|
|
if (!ctf_assert (output, name))
|
|
return CTF_ERR;
|
|
|
|
if ((decorated = ctf_decorate_type_name (output, name, fwdkind)) == NULL)
|
|
return CTF_ERR;
|
|
|
|
if (!ctf_dynhash_lookup_kv (td->cd_output_emission_conflicted_forwards,
|
|
decorated, NULL, &v))
|
|
{
|
|
if ((emitted_forward = ctf_add_forward (target, CTF_ADD_ROOT, name,
|
|
fwdkind)) == CTF_ERR)
|
|
{
|
|
ctf_set_errno (output, ctf_errno (target));
|
|
return CTF_ERR;
|
|
}
|
|
|
|
if (ctf_dynhash_cinsert (td->cd_output_emission_conflicted_forwards,
|
|
decorated, (void *) (uintptr_t)
|
|
emitted_forward) < 0)
|
|
{
|
|
ctf_set_errno (output, ENOMEM);
|
|
return CTF_ERR;
|
|
}
|
|
}
|
|
else
|
|
emitted_forward = (ctf_id_t) (uintptr_t) v;
|
|
|
|
ctf_dprintf ("Cross-TU conflicted struct: passing back forward, %lx\n",
|
|
emitted_forward);
|
|
|
|
return emitted_forward;
|
|
}
|
|
|
|
/* Map a GID in some INPUT dict, in the form of an input number and a ctf_id_t,
|
|
into a GID in a target output dict. If it returns 0, this is the
|
|
unimplemented type, and the input type must have been 0. The OUTPUT dict is
|
|
assumed to be the parent of the TARGET, if it is not the TARGET itself.
|
|
|
|
Returns CTF_ERR on failure. Responds to an incoming CTF_ERR as an 'id' by
|
|
returning CTF_ERR, to simplify callers. Errors are always propagated to the
|
|
input, even if they relate to the target, for the same reason. (Target
|
|
errors are expected to be very rare.)
|
|
|
|
If the type in question is a citation of a conflicted type in a different TU,
|
|
emit a forward of the right type in its place (if not already emitted), and
|
|
record that forward in cd_output_emission_conflicted_forwards. This avoids
|
|
the need to replicate the entire type graph below this point in the current
|
|
TU (an appalling waste of space).
|
|
|
|
TODO: maybe replace forwards in the same TU with their referents? Might
|
|
make usability a bit better. */
|
|
|
|
static ctf_id_t
|
|
ctf_dedup_id_to_target (ctf_file_t *output, ctf_file_t *target,
|
|
ctf_file_t **inputs, uint32_t ninputs,
|
|
uint32_t *parents, ctf_file_t *input, int input_num,
|
|
ctf_id_t id)
|
|
{
|
|
ctf_dedup_t *od = &output->ctf_dedup;
|
|
ctf_dedup_t *td = &target->ctf_dedup;
|
|
ctf_file_t *err_fp = input;
|
|
const char *hval;
|
|
void *target_id;
|
|
ctf_id_t emitted_forward;
|
|
|
|
/* The target type of an error is an error. */
|
|
if (id == CTF_ERR)
|
|
return CTF_ERR;
|
|
|
|
/* The unimplemented type's ID never changes. */
|
|
if (!id)
|
|
{
|
|
ctf_dprintf ("%i/%lx: unimplemented type\n", input_num, id);
|
|
return 0;
|
|
}
|
|
|
|
ctf_dprintf ("Mapping %i/%lx to target %p (%s)\n", input_num,
|
|
id, (void *) target, ctf_link_input_name (target));
|
|
|
|
/* If the input type is in the parent type space, and this is a child, reset
|
|
the input to the parent (which must already have been emitted, since
|
|
emission of parent dicts happens before children). */
|
|
if ((input->ctf_flags & LCTF_CHILD) && (LCTF_TYPE_ISPARENT (input, id)))
|
|
{
|
|
if (!ctf_assert (output, parents[input_num] <= ninputs))
|
|
return -1;
|
|
input = inputs[parents[input_num]];
|
|
input_num = parents[input_num];
|
|
}
|
|
|
|
hval = ctf_dynhash_lookup (od->cd_type_hashes,
|
|
CTF_DEDUP_GID (output, input_num, id));
|
|
|
|
if (!ctf_assert (output, hval && td->cd_output_emission_hashes))
|
|
return -1;
|
|
|
|
/* If this type is a conflicted tagged structure, union, or forward,
|
|
substitute a synthetic forward instead, emitting it if need be. Only do
|
|
this if the target is in the parent dict: if it's in the child dict, we can
|
|
just point straight at the thing itself. Of course, we might be looking in
|
|
the child dict right now and not find it and have to look in the parent, so
|
|
we have to do this check twice. */
|
|
|
|
emitted_forward = ctf_dedup_maybe_synthesize_forward (output, target,
|
|
input, id, hval);
|
|
switch (emitted_forward)
|
|
{
|
|
case 0: /* No forward needed. */
|
|
break;
|
|
case -1:
|
|
ctf_set_errno (err_fp, ctf_errno (output));
|
|
ctf_err_warn (err_fp, 0, "adding synthetic forward for type %i/%lx: "
|
|
"%s", input_num, id, ctf_errmsg (ctf_errno (err_fp)));
|
|
return -1;
|
|
default:
|
|
return emitted_forward;
|
|
}
|
|
|
|
ctf_dprintf ("Looking up %i/%lx, hash %s, in target\n", input_num, id, hval);
|
|
|
|
target_id = ctf_dynhash_lookup (td->cd_output_emission_hashes, hval);
|
|
if (!target_id)
|
|
{
|
|
/* Must be in the parent, so this must be a child, and they must not be
|
|
the same dict. */
|
|
ctf_dprintf ("Checking shared parent for target\n");
|
|
if (!ctf_assert (output, (target != output)
|
|
&& (target->ctf_flags & LCTF_CHILD)))
|
|
return -1;
|
|
|
|
target_id = ctf_dynhash_lookup (od->cd_output_emission_hashes, hval);
|
|
|
|
emitted_forward = ctf_dedup_maybe_synthesize_forward (output, output,
|
|
input, id, hval);
|
|
switch (emitted_forward)
|
|
{
|
|
case 0: /* No forward needed. */
|
|
break;
|
|
case -1:
|
|
ctf_set_errno (err_fp, ctf_errno (output));
|
|
ctf_err_warn (err_fp, 0, "adding synthetic forward for type %i/%lx: "
|
|
"%s", input_num, id, ctf_errmsg (ctf_errno (err_fp)));
|
|
return -1;
|
|
default:
|
|
return emitted_forward;
|
|
}
|
|
}
|
|
if (!ctf_assert (output, target_id))
|
|
return -1;
|
|
return (ctf_id_t) (uintptr_t) target_id;
|
|
}
|
|
|
|
/* Emit a single deduplicated TYPE with the given HVAL, located in a given
|
|
INPUT, with the given (G)ID, into the shared OUTPUT or a
|
|
possibly-newly-created per-CU dict. All the types this type depends upon
|
|
have already been emitted. (This type itself may also have been emitted.)
|
|
|
|
If the ARG is 1, this is a CU-mapped deduplication round mapping many
|
|
ctf_file_t's into precisely one: conflicting types should be marked
|
|
non-root-visible. If the ARG is 0, conflicting types go into per-CU
|
|
dictionaries stored in the input's ctf_dedup.cd_output: otherwise, everything
|
|
is emitted directly into the output. No struct/union members are emitted.
|
|
|
|
Optimization opportunity: trace the ancestry of non-root-visible types and
|
|
elide all that neither have a root-visible type somewhere towards their root,
|
|
nor have the type visible via any other route (the function info section,
|
|
data object section, backtrace section etc). */
|
|
|
|
static int
|
|
ctf_dedup_emit_type (const char *hval, ctf_file_t *output, ctf_file_t **inputs,
|
|
uint32_t ninputs, uint32_t *parents, int already_visited,
|
|
ctf_file_t *input, ctf_id_t type, void *id, int depth,
|
|
void *arg)
|
|
{
|
|
ctf_dedup_t *d = &output->ctf_dedup;
|
|
int kind = ctf_type_kind_unsliced (input, type);
|
|
const char *name;
|
|
ctf_file_t *target = output;
|
|
ctf_file_t *real_input;
|
|
const ctf_type_t *tp;
|
|
int input_num = CTF_DEDUP_GID_TO_INPUT (id);
|
|
int output_num = (uint32_t) -1; /* 'shared' */
|
|
int cu_mapped = *(int *)arg;
|
|
int isroot = 1;
|
|
int is_conflicting;
|
|
|
|
ctf_next_t *i = NULL;
|
|
ctf_id_t new_type;
|
|
ctf_id_t ref;
|
|
ctf_id_t maybe_dup = 0;
|
|
ctf_encoding_t ep;
|
|
const char *erm;
|
|
int emission_hashed = 0;
|
|
|
|
/* We don't want to re-emit something we've already emitted. */
|
|
|
|
if (already_visited)
|
|
return 0;
|
|
|
|
ctf_dprintf ("%i: Emitting type with hash %s from %s: determining target\n",
|
|
depth, hval, ctf_link_input_name (input));
|
|
|
|
/* Conflicting types go into a per-CU output dictionary, unless this is a
|
|
CU-mapped run. The import is not refcounted, since it goes into the
|
|
ctf_link_outputs dict of the output that is its parent. */
|
|
is_conflicting = ctf_dynset_exists (d->cd_conflicting_types, hval, NULL);
|
|
|
|
if (is_conflicting && !cu_mapped)
|
|
{
|
|
ctf_dprintf ("%i: Type %s in %i/%lx is conflicted: "
|
|
"inserting into per-CU target.\n",
|
|
depth, hval, input_num, type);
|
|
|
|
if (input->ctf_dedup.cd_output)
|
|
target = input->ctf_dedup.cd_output;
|
|
else
|
|
{
|
|
int err;
|
|
|
|
if ((target = ctf_create (&err)) == NULL)
|
|
{
|
|
ctf_err_warn (output, 0, "cannot create per-CU CTF archive "
|
|
"for CU %s: %s", ctf_link_input_name (input),
|
|
ctf_errmsg (err)); ctf_set_errno (output, err);
|
|
return -1;
|
|
}
|
|
|
|
ctf_import_unref (target, output);
|
|
if (ctf_cuname (input) != NULL)
|
|
ctf_cuname_set (target, ctf_cuname (input));
|
|
else
|
|
ctf_cuname_set (target, "unnamed-CU");
|
|
ctf_parent_name_set (target, _CTF_SECTION);
|
|
|
|
input->ctf_dedup.cd_output = target;
|
|
}
|
|
output_num = input_num;
|
|
}
|
|
|
|
real_input = input;
|
|
if ((tp = ctf_lookup_by_id (&real_input, type)) == NULL)
|
|
{
|
|
ctf_err_warn (output, 0, "%s: lookup failure for type %lx: %s",
|
|
ctf_link_input_name (real_input), type,
|
|
ctf_errmsg (ctf_errno (input)));
|
|
ctf_set_errno (output, ctf_errno (input));
|
|
return -1; /* errno is set for us. */
|
|
}
|
|
|
|
name = ctf_strraw (real_input, tp->ctt_name);
|
|
|
|
/* Hide conflicting types, if we were asked to: also hide if a type with this
|
|
name already exists and is not a forward. */
|
|
if (cu_mapped && is_conflicting)
|
|
isroot = 0;
|
|
else if (name
|
|
&& (maybe_dup = ctf_lookup_by_rawname (target, kind, name)) != 0)
|
|
{
|
|
if (ctf_type_kind (target, maybe_dup) != CTF_K_FORWARD)
|
|
isroot = 0;
|
|
}
|
|
|
|
ctf_dprintf ("%i: Emitting type with hash %s (%s), into target %i/%p\n",
|
|
depth, hval, name ? name : "", input_num, (void *) target);
|
|
|
|
if (!target->ctf_dedup.cd_output_emission_hashes)
|
|
if ((target->ctf_dedup.cd_output_emission_hashes
|
|
= ctf_dynhash_create (ctf_hash_string, ctf_hash_eq_string,
|
|
NULL, NULL)) == NULL)
|
|
goto oom_hash;
|
|
|
|
if (!target->ctf_dedup.cd_output_emission_conflicted_forwards)
|
|
if ((target->ctf_dedup.cd_output_emission_conflicted_forwards
|
|
= ctf_dynhash_create (ctf_hash_string, ctf_hash_eq_string,
|
|
NULL, NULL)) == NULL)
|
|
goto oom_hash;
|
|
|
|
switch (kind)
|
|
{
|
|
case CTF_K_UNKNOWN:
|
|
/* These are types that CTF cannot encode, marked as such by the compile.
|
|
We intentionally do not re-emit these. */
|
|
new_type = 0;
|
|
break;
|
|
case CTF_K_FORWARD:
|
|
/* This will do nothing if the type to which this forwards already exists,
|
|
and will be replaced with such a type if it appears later. */
|
|
|
|
erm = "forward";
|
|
if ((new_type = ctf_add_forward (target, isroot, name,
|
|
ctf_type_kind_forwarded (input, type)))
|
|
== CTF_ERR)
|
|
goto err_target;
|
|
break;
|
|
|
|
case CTF_K_FLOAT:
|
|
case CTF_K_INTEGER:
|
|
erm = "float/int";
|
|
if (ctf_type_encoding (input, type, &ep) < 0)
|
|
goto err_input; /* errno is set for us. */
|
|
if ((new_type = ctf_add_encoded (target, isroot, name, &ep, kind))
|
|
== CTF_ERR)
|
|
goto err_target;
|
|
break;
|
|
|
|
case CTF_K_ENUM:
|
|
{
|
|
int val;
|
|
erm = "enum";
|
|
if ((new_type = ctf_add_enum (target, isroot, name)) == CTF_ERR)
|
|
goto err_input; /* errno is set for us. */
|
|
|
|
while ((name = ctf_enum_next (input, type, &i, &val)) != NULL)
|
|
{
|
|
if (ctf_add_enumerator (target, new_type, name, val) < 0)
|
|
{
|
|
ctf_err_warn (target, 0, "%s (%i): cannot add enumeration "
|
|
"value %s from input type %lx: %s",
|
|
ctf_link_input_name (input), input_num, name,
|
|
type, ctf_errmsg (ctf_errno (target)));
|
|
ctf_next_destroy (i);
|
|
return ctf_set_errno (output, ctf_errno (target));
|
|
}
|
|
}
|
|
if (ctf_errno (input) != ECTF_NEXT_END)
|
|
goto err_input;
|
|
break;
|
|
}
|
|
|
|
case CTF_K_TYPEDEF:
|
|
erm = "typedef";
|
|
|
|
ref = ctf_type_reference (input, type);
|
|
if ((ref = ctf_dedup_id_to_target (output, target, inputs, ninputs,
|
|
parents, input, input_num,
|
|
ref)) == CTF_ERR)
|
|
goto err_input; /* errno is set for us. */
|
|
|
|
if ((new_type = ctf_add_typedef (target, isroot, name, ref)) == CTF_ERR)
|
|
goto err_target; /* errno is set for us. */
|
|
break;
|
|
|
|
case CTF_K_VOLATILE:
|
|
case CTF_K_CONST:
|
|
case CTF_K_RESTRICT:
|
|
case CTF_K_POINTER:
|
|
erm = "pointer or cvr-qual";
|
|
|
|
ref = ctf_type_reference (input, type);
|
|
if ((ref = ctf_dedup_id_to_target (output, target, inputs, ninputs,
|
|
parents, input, input_num,
|
|
ref)) == CTF_ERR)
|
|
goto err_input; /* errno is set for us. */
|
|
|
|
if ((new_type = ctf_add_reftype (target, isroot, ref, kind)) == CTF_ERR)
|
|
goto err_target; /* errno is set for us. */
|
|
break;
|
|
|
|
case CTF_K_SLICE:
|
|
erm = "slice";
|
|
|
|
if (ctf_type_encoding (input, type, &ep) < 0)
|
|
goto err_input; /* errno is set for us. */
|
|
|
|
ref = ctf_type_reference (input, type);
|
|
if ((ref = ctf_dedup_id_to_target (output, target, inputs, ninputs,
|
|
parents, input, input_num,
|
|
ref)) == CTF_ERR)
|
|
goto err_input;
|
|
|
|
if ((new_type = ctf_add_slice (target, isroot, ref, &ep)) == CTF_ERR)
|
|
goto err_target;
|
|
break;
|
|
|
|
case CTF_K_ARRAY:
|
|
{
|
|
ctf_arinfo_t ar;
|
|
|
|
erm = "array info";
|
|
if (ctf_array_info (input, type, &ar) < 0)
|
|
goto err_input;
|
|
|
|
ar.ctr_contents = ctf_dedup_id_to_target (output, target, inputs,
|
|
ninputs, parents, input,
|
|
input_num, ar.ctr_contents);
|
|
ar.ctr_index = ctf_dedup_id_to_target (output, target, inputs, ninputs,
|
|
parents, input, input_num,
|
|
ar.ctr_index);
|
|
|
|
if (ar.ctr_contents == CTF_ERR || ar.ctr_index == CTF_ERR)
|
|
goto err_input;
|
|
|
|
if ((new_type = ctf_add_array (target, isroot, &ar)) == CTF_ERR)
|
|
goto err_target;
|
|
|
|
break;
|
|
}
|
|
|
|
case CTF_K_FUNCTION:
|
|
{
|
|
ctf_funcinfo_t fi;
|
|
ctf_id_t *args;
|
|
uint32_t j;
|
|
|
|
erm = "function";
|
|
if (ctf_func_type_info (input, type, &fi) < 0)
|
|
goto err_input;
|
|
|
|
fi.ctc_return = ctf_dedup_id_to_target (output, target, inputs, ninputs,
|
|
parents, input, input_num,
|
|
fi.ctc_return);
|
|
if (fi.ctc_return == CTF_ERR)
|
|
goto err_input;
|
|
|
|
if ((args = calloc (fi.ctc_argc, sizeof (ctf_id_t))) == NULL)
|
|
{
|
|
ctf_set_errno (input, ENOMEM);
|
|
goto err_input;
|
|
}
|
|
|
|
erm = "function args";
|
|
if (ctf_func_type_args (input, type, fi.ctc_argc, args) < 0)
|
|
{
|
|
free (args);
|
|
goto err_input;
|
|
}
|
|
|
|
for (j = 0; j < fi.ctc_argc; j++)
|
|
{
|
|
args[j] = ctf_dedup_id_to_target (output, target, inputs, ninputs,
|
|
parents, input, input_num,
|
|
args[j]);
|
|
if (args[j] == CTF_ERR)
|
|
goto err_input;
|
|
}
|
|
|
|
if ((new_type = ctf_add_function (target, isroot,
|
|
&fi, args)) == CTF_ERR)
|
|
{
|
|
free (args);
|
|
goto err_target;
|
|
}
|
|
free (args);
|
|
break;
|
|
}
|
|
|
|
case CTF_K_STRUCT:
|
|
case CTF_K_UNION:
|
|
{
|
|
size_t size = ctf_type_size (input, type);
|
|
void *out_id;
|
|
/* Insert the structure itself, so other types can refer to it. */
|
|
|
|
erm = "structure/union";
|
|
if (kind == CTF_K_STRUCT)
|
|
new_type = ctf_add_struct_sized (target, isroot, name, size);
|
|
else
|
|
new_type = ctf_add_union_sized (target, isroot, name, size);
|
|
|
|
if (new_type == CTF_ERR)
|
|
goto err_target;
|
|
|
|
out_id = CTF_DEDUP_GID (output, output_num, new_type);
|
|
ctf_dprintf ("%i: Noting need to emit members of %p -> %p\n", depth,
|
|
id, out_id);
|
|
/* Record the need to emit the members of this structure later. */
|
|
if (ctf_dynhash_insert (d->cd_emission_struct_members, id, out_id) < 0)
|
|
goto err_target;
|
|
break;
|
|
}
|
|
default:
|
|
ctf_err_warn (output, 0, "%s: unknown type kind for input type %lx",
|
|
ctf_link_input_name (input), type);
|
|
return -1;
|
|
}
|
|
|
|
if (!emission_hashed
|
|
&& new_type != 0
|
|
&& ctf_dynhash_cinsert (target->ctf_dedup.cd_output_emission_hashes,
|
|
hval, (void *) (uintptr_t) new_type) < 0)
|
|
{
|
|
ctf_err_warn (output, 0, "out of memory tracking deduplicated "
|
|
"global type IDs");
|
|
return ctf_set_errno (output, ENOMEM);
|
|
}
|
|
|
|
if (!emission_hashed && new_type != 0)
|
|
ctf_dprintf ("%i: Inserted %s, %i/%lx -> %lx into emission hash for "
|
|
"target %p (%s)\n", depth, hval, input_num, type, new_type,
|
|
(void *) target, ctf_link_input_name (target));
|
|
|
|
return 0;
|
|
|
|
oom_hash:
|
|
ctf_err_warn (output, 0, "out of memory creating emission-tracking hashes");
|
|
return ctf_set_errno (output, ENOMEM);
|
|
|
|
err_input:
|
|
ctf_err_warn (output, 0, "%s (%i): while emitting deduplicated %s, error "
|
|
"getting input type %lx: %s", ctf_link_input_name (input),
|
|
input_num,erm, type, ctf_errmsg (ctf_errno (input)));
|
|
ctf_set_errno (output, ctf_errno (input));
|
|
return -1;
|
|
err_target:
|
|
ctf_err_warn (output, 0, "%s (%i): while emitting deduplicated %s, error "
|
|
"emitting target type from input type %lx: %s",
|
|
ctf_link_input_name (input), input_num, erm, type,
|
|
ctf_errmsg (ctf_errno (target)));
|
|
ctf_set_errno (output, ctf_errno (target));
|
|
return -1;
|
|
}
|
|
|
|
/* Traverse the cd_emission_struct_members and emit the members of all
|
|
structures and unions. All other types are emitted and complete by this
|
|
point. */
|
|
|
|
static int
|
|
ctf_dedup_emit_struct_members (ctf_file_t *output, ctf_file_t **inputs,
|
|
uint32_t ninputs, uint32_t *parents)
|
|
{
|
|
ctf_dedup_t *d = &output->ctf_dedup;
|
|
ctf_next_t *i = NULL;
|
|
void *input_id, *target_id;
|
|
int err;
|
|
ctf_file_t *err_fp, *input_fp;
|
|
int input_num;
|
|
ctf_id_t err_type;
|
|
|
|
while ((err = ctf_dynhash_next (d->cd_emission_struct_members, &i,
|
|
&input_id, &target_id)) == 0)
|
|
{
|
|
ctf_next_t *j = NULL;
|
|
ctf_file_t *target;
|
|
uint32_t target_num;
|
|
ctf_id_t input_type, target_type;
|
|
ssize_t offset;
|
|
ctf_id_t membtype;
|
|
const char *name;
|
|
|
|
input_num = CTF_DEDUP_GID_TO_INPUT (input_id);
|
|
input_fp = inputs[input_num];
|
|
input_type = CTF_DEDUP_GID_TO_TYPE (input_id);
|
|
|
|
/* The output is either -1 (for the shared, parent output dict) or the
|
|
number of the corresponding input. */
|
|
target_num = CTF_DEDUP_GID_TO_INPUT (target_id);
|
|
if (target_num == (uint32_t) -1)
|
|
target = output;
|
|
else
|
|
{
|
|
target = inputs[target_num]->ctf_dedup.cd_output;
|
|
if (!ctf_assert (output, target))
|
|
{
|
|
err_fp = output;
|
|
err_type = input_type;
|
|
goto err_target;
|
|
}
|
|
}
|
|
target_type = CTF_DEDUP_GID_TO_TYPE (target_id);
|
|
|
|
while ((offset = ctf_member_next (input_fp, input_type, &j, &name,
|
|
&membtype)) >= 0)
|
|
{
|
|
err_fp = target;
|
|
err_type = target_type;
|
|
if ((membtype = ctf_dedup_id_to_target (output, target, inputs,
|
|
ninputs, parents, input_fp,
|
|
input_num,
|
|
membtype)) == CTF_ERR)
|
|
{
|
|
ctf_next_destroy (j);
|
|
goto err_target;
|
|
}
|
|
|
|
if (name == NULL)
|
|
name = "";
|
|
#ifdef ENABLE_LIBCTF_HASH_DEBUGGING
|
|
ctf_dprintf ("Emitting %s, offset %zi\n", name, offset);
|
|
#endif
|
|
if (ctf_add_member_offset (target, target_type, name,
|
|
membtype, offset) < 0)
|
|
{
|
|
ctf_next_destroy (j);
|
|
goto err_target;
|
|
}
|
|
}
|
|
if (ctf_errno (input_fp) != ECTF_NEXT_END)
|
|
{
|
|
err = ctf_errno (input_fp);
|
|
ctf_next_destroy (i);
|
|
goto iterr;
|
|
}
|
|
}
|
|
if (err != ECTF_NEXT_END)
|
|
goto iterr;
|
|
|
|
return 0;
|
|
err_target:
|
|
ctf_next_destroy (i);
|
|
ctf_err_warn (output, 0, "%s (%i): error emitting members for structure "
|
|
"type %lx: %s", ctf_link_input_name (input_fp), input_num,
|
|
err_type, ctf_errmsg (ctf_errno (err_fp)));
|
|
ctf_set_errno (output, ctf_errno (err_fp));
|
|
return -1;
|
|
iterr:
|
|
ctf_err_warn (output, 0, "iteration failure emitting structure members: %s",
|
|
ctf_errmsg (err));
|
|
ctf_set_errno (output, err);
|
|
return -1;
|
|
}
|
|
|
|
/* Populate the type mapping used by the types in one FP (which must be an input
|
|
dict containing a non-null cd_output resulting from a ctf_dedup_emit_type
|
|
walk). */
|
|
static int
|
|
ctf_dedup_populate_type_mapping (ctf_file_t *shared, ctf_file_t *fp,
|
|
ctf_file_t **inputs)
|
|
{
|
|
ctf_dedup_t *d = &shared->ctf_dedup;
|
|
ctf_file_t *output = fp->ctf_dedup.cd_output;
|
|
const void *k, *v;
|
|
ctf_next_t *i = NULL;
|
|
int err;
|
|
|
|
/* The shared dict (the output) stores its types in the fp itself, not in a
|
|
separate cd_output dict. */
|
|
if (shared == fp)
|
|
output = fp;
|
|
|
|
/* There may be no types to emit at all, or all the types in this TU may be
|
|
shared. */
|
|
if (!output || !output->ctf_dedup.cd_output_emission_hashes)
|
|
return 0;
|
|
|
|
while ((err = ctf_dynhash_cnext (output->ctf_dedup.cd_output_emission_hashes,
|
|
&i, &k, &v)) == 0)
|
|
{
|
|
const char *hval = (const char *) k;
|
|
ctf_id_t id_out = (ctf_id_t) (uintptr_t) v;
|
|
ctf_next_t *j = NULL;
|
|
ctf_dynset_t *type_ids;
|
|
const void *id;
|
|
|
|
type_ids = ctf_dynhash_lookup (d->cd_output_mapping, hval);
|
|
if (!ctf_assert (shared, type_ids))
|
|
return -1;
|
|
#ifdef ENABLE_LIBCTF_HASH_DEBUGGING
|
|
ctf_dprintf ("Traversing emission hash: hval %s\n", hval);
|
|
#endif
|
|
|
|
while ((err = ctf_dynset_cnext (type_ids, &j, &id)) == 0)
|
|
{
|
|
ctf_file_t *input = inputs[CTF_DEDUP_GID_TO_INPUT (id)];
|
|
ctf_id_t id_in = CTF_DEDUP_GID_TO_TYPE (id);
|
|
|
|
#ifdef ENABLE_LIBCTF_HASH_DEBUGGING
|
|
ctf_dprintf ("Adding mapping from %i/%lx to %lx\n",
|
|
CTF_DEDUP_GID_TO_INPUT (id), id_in, id_out);
|
|
#endif
|
|
ctf_add_type_mapping (input, id_in, output, id_out);
|
|
}
|
|
if (err != ECTF_NEXT_END)
|
|
{
|
|
ctf_next_destroy (i);
|
|
goto err;
|
|
}
|
|
}
|
|
if (err != ECTF_NEXT_END)
|
|
goto err;
|
|
|
|
return 0;
|
|
|
|
err:
|
|
ctf_err_warn (shared, 0, "iteration error populating the type mapping: %s",
|
|
ctf_errmsg (err));
|
|
return ctf_set_errno (shared, err);
|
|
}
|
|
|
|
/* Populate the type mapping machinery used by the rest of the linker,
|
|
by ctf_add_type, etc. */
|
|
static int
|
|
ctf_dedup_populate_type_mappings (ctf_file_t *output, ctf_file_t **inputs,
|
|
uint32_t ninputs)
|
|
{
|
|
size_t i;
|
|
|
|
if (ctf_dedup_populate_type_mapping (output, output, inputs) < 0)
|
|
{
|
|
ctf_err_warn (output, 0, "cannot populate type mappings for shared "
|
|
"CTF dict: %s", ctf_errmsg (ctf_errno (output)));
|
|
return -1; /* errno is set for us. */
|
|
}
|
|
|
|
for (i = 0; i < ninputs; i++)
|
|
{
|
|
if (ctf_dedup_populate_type_mapping (output, inputs[i], inputs) < 0)
|
|
{
|
|
ctf_err_warn (output, 0, "cannot populate type mappings for per-CU "
|
|
"CTF dict: %s", ctf_errmsg (ctf_errno (inputs[i])));
|
|
return ctf_set_errno (output, ctf_errno (inputs[i]));
|
|
}
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
/* Emit deduplicated types into the outputs. The shared type repository is
|
|
OUTPUT, on which the ctf_dedup function must have already been called. The
|
|
PARENTS array contains the INPUTS index of the parent dict for every child
|
|
dict at the corresponding index in the INPUTS (for non-child dicts, the value
|
|
is undefined).
|
|
|
|
Return an array of fps with content emitted into them (starting with OUTPUT,
|
|
which is the parent of all others, then all the newly-generated outputs).
|
|
|
|
If CU_MAPPED is set, this is a first pass for a link with a non-empty CU
|
|
mapping: only one output will result. */
|
|
|
|
ctf_file_t **
|
|
ctf_dedup_emit (ctf_file_t *output, ctf_file_t **inputs, uint32_t ninputs,
|
|
uint32_t *parents, uint32_t *noutputs, int cu_mapped)
|
|
{
|
|
size_t num_outputs = 1; /* Always at least one output: us. */
|
|
ctf_file_t **outputs;
|
|
ctf_file_t **walk;
|
|
size_t i;
|
|
|
|
ctf_dprintf ("Triggering emission.\n");
|
|
if (ctf_dedup_walk_output_mapping (output, inputs, ninputs, parents,
|
|
ctf_dedup_emit_type, &cu_mapped) < 0)
|
|
return NULL; /* errno is set for us. */
|
|
|
|
ctf_dprintf ("Populating struct members.\n");
|
|
if (ctf_dedup_emit_struct_members (output, inputs, ninputs, parents) < 0)
|
|
return NULL; /* errno is set for us. */
|
|
|
|
if (ctf_dedup_populate_type_mappings (output, inputs, ninputs) < 0)
|
|
return NULL; /* errno is set for us. */
|
|
|
|
for (i = 0; i < ninputs; i++)
|
|
{
|
|
if (inputs[i]->ctf_dedup.cd_output)
|
|
num_outputs++;
|
|
}
|
|
|
|
if (!ctf_assert (output, !cu_mapped || (cu_mapped && num_outputs == 1)))
|
|
return NULL;
|
|
|
|
if ((outputs = calloc (num_outputs, sizeof (ctf_file_t *))) == NULL)
|
|
{
|
|
ctf_err_warn (output, 0, "out of memory allocating link outputs array");
|
|
ctf_set_errno (output, ENOMEM);
|
|
return NULL;
|
|
}
|
|
*noutputs = num_outputs;
|
|
|
|
walk = outputs;
|
|
*walk = output;
|
|
output->ctf_refcnt++;
|
|
walk++;
|
|
|
|
for (i = 0; i < ninputs; i++)
|
|
{
|
|
if (inputs[i]->ctf_dedup.cd_output)
|
|
{
|
|
*walk = inputs[i]->ctf_dedup.cd_output;
|
|
inputs[i]->ctf_dedup.cd_output = NULL;
|
|
walk++;
|
|
}
|
|
}
|
|
|
|
ctf_dedup_fini (output, outputs, num_outputs);
|
|
return outputs;
|
|
}
|