binutils-gdb/bfd/elf-strtab.c
Nick Alcock 211bcd0133 bfd, ld, libctf: skip zero-refcount strings in CTF string reporting
This is a tricky one.  BFD, on the linker's behalf, reports symbols to
libctf via the ctf_new_symbol and ctf_new_dynsym callbacks, which
ultimately call ctf_link_add_linker_symbol.  But while this happens
after strtab offsets are finalized, it happens before the .dynstr is
actually laid out, so we can't iterate over it at this stage and
it is not clear what the reported symbols are actually called.  So
a second callback, examine_strtab, is called after the .dynstr is
finalized, which calls ctf_link_add_strtab and ultimately leads
to ldelf_ctf_strtab_iter_cb being called back repeatedly until the
offsets of every string in the .dynstr is passed to libctf.

libctf can then use this to get symbol names out of the input (which
usually stores symbol types in the form of a name -> type mapping at
this stage) and extract the types of those symbols, feeding them back
into their final form as a 1:1 association with the real symtab's
STT_OBJ and STT_FUNC symbols (with a few skipped, see
ctf_symtab_skippable).

This representation is compact, but has one problem: if libctf somehow
gets confused about the st_type of a symbol, it'll stick an entry into
the function symtypetab when it should put it into the object
symtypetab, or vice versa, and *every symbol from that one on* will have
the wrong CTF type because it's actually looking up the type for a
different symbol.

And we have just such a bug.  ctf_link_add_strtab was not taking the
refcounts of strings into consideration, so even strings that had been
eliminated from the strtab by virtue of being in objects eliminated via
--as-needed etc were being reported.  This is harmful because it can
lead to multiple strings with the same apparent offset, and if the last
duplicate to be reported relates to an eliminated symbol, we look up the
wrong symbol from the input and gets its type wrong: if it's unlucky and
the eliminated symbol is also of the wrong st_type, we will end up with
a corrupted symtypetab.

Thankfully the wrong-st_type case is already diagnosed by a
this-can-never-happen paranoid warning:

  CTF warning: Symbol 61a added to CTF as a function but is of type 1

or the converse

 * CTF warning: Symbol a3 added to CTF as a data object but is of type 2

so at least we can tell when the corruption has spread to more than one
symbol's type.

Skipping zero-refcounted strings is easy: teach _bfd_elf_strtab_str to
skip them, and ldelf_ctf_strtab_iter_cb to loop over skipped strings
until it falls off the end or finds one that isn't skipped.

bfd/ChangeLog
2021-03-02  Nick Alcock  <nick.alcock@oracle.com>

	* elf-strtab.c (_bfd_elf_strtab_str): Skip strings with zero refcount.

ld/ChangeLog
2021-03-02  Nick Alcock  <nick.alcock@oracle.com>

	* ldelfgen.c (ldelf_ctf_strtab_iter_cb): Skip zero-refcount strings.

libctf/ChangeLog
2021-03-02  Nick Alcock  <nick.alcock@oracle.com>

	* ctf-create.c (symtypetab_density): Report the symbol name as
	well as index in the name != object error; note the likely
	consequences.
	* ctf-link.c (ctf_link_shuffle_syms): Report the symbol index
	as well as name.
2021-03-02 15:10:10 +00:00

472 lines
11 KiB
C

/* ELF strtab with GC and suffix merging support.
Copyright (C) 2001-2021 Free Software Foundation, Inc.
Written by Jakub Jelinek <jakub@redhat.com>.
This file is part of BFD, the Binary File Descriptor library.
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston,
MA 02110-1301, USA. */
#include "sysdep.h"
#include "bfd.h"
#include "libbfd.h"
#include "elf-bfd.h"
#include "hashtab.h"
#include "libiberty.h"
/* An entry in the strtab hash table. */
struct elf_strtab_hash_entry
{
struct bfd_hash_entry root;
/* Length of this entry. This includes the zero terminator. */
int len;
unsigned int refcount;
union {
/* Index within the merged section. */
bfd_size_type index;
/* Entry this is a suffix of (if len < 0). */
struct elf_strtab_hash_entry *suffix;
} u;
};
/* The strtab hash table. */
struct elf_strtab_hash
{
struct bfd_hash_table table;
/* Next available index. */
size_t size;
/* Number of array entries alloced. */
size_t alloced;
/* Final strtab size. */
bfd_size_type sec_size;
/* Array of pointers to strtab entries. */
struct elf_strtab_hash_entry **array;
};
/* Routine to create an entry in a section merge hashtab. */
static struct bfd_hash_entry *
elf_strtab_hash_newfunc (struct bfd_hash_entry *entry,
struct bfd_hash_table *table,
const char *string)
{
/* Allocate the structure if it has not already been allocated by a
subclass. */
if (entry == NULL)
entry = (struct bfd_hash_entry *)
bfd_hash_allocate (table, sizeof (struct elf_strtab_hash_entry));
if (entry == NULL)
return NULL;
/* Call the allocation method of the superclass. */
entry = bfd_hash_newfunc (entry, table, string);
if (entry)
{
/* Initialize the local fields. */
struct elf_strtab_hash_entry *ret;
ret = (struct elf_strtab_hash_entry *) entry;
ret->u.index = -1;
ret->refcount = 0;
ret->len = 0;
}
return entry;
}
/* Create a new hash table. */
struct elf_strtab_hash *
_bfd_elf_strtab_init (void)
{
struct elf_strtab_hash *table;
size_t amt = sizeof (struct elf_strtab_hash);
table = (struct elf_strtab_hash *) bfd_malloc (amt);
if (table == NULL)
return NULL;
if (!bfd_hash_table_init (&table->table, elf_strtab_hash_newfunc,
sizeof (struct elf_strtab_hash_entry)))
{
free (table);
return NULL;
}
table->sec_size = 0;
table->size = 1;
table->alloced = 64;
amt = sizeof (struct elf_strtab_hasn_entry *);
table->array = ((struct elf_strtab_hash_entry **)
bfd_malloc (table->alloced * amt));
if (table->array == NULL)
{
free (table);
return NULL;
}
table->array[0] = NULL;
return table;
}
/* Free a strtab. */
void
_bfd_elf_strtab_free (struct elf_strtab_hash *tab)
{
bfd_hash_table_free (&tab->table);
free (tab->array);
free (tab);
}
/* Get the index of an entity in a hash table, adding it if it is not
already present. */
size_t
_bfd_elf_strtab_add (struct elf_strtab_hash *tab,
const char *str,
bfd_boolean copy)
{
register struct elf_strtab_hash_entry *entry;
/* We handle this specially, since we don't want to do refcounting
on it. */
if (*str == '\0')
return 0;
BFD_ASSERT (tab->sec_size == 0);
entry = (struct elf_strtab_hash_entry *)
bfd_hash_lookup (&tab->table, str, TRUE, copy);
if (entry == NULL)
return (size_t) -1;
entry->refcount++;
if (entry->len == 0)
{
entry->len = strlen (str) + 1;
/* 2G strings lose. */
BFD_ASSERT (entry->len > 0);
if (tab->size == tab->alloced)
{
bfd_size_type amt = sizeof (struct elf_strtab_hash_entry *);
tab->alloced *= 2;
tab->array = (struct elf_strtab_hash_entry **)
bfd_realloc_or_free (tab->array, tab->alloced * amt);
if (tab->array == NULL)
return (size_t) -1;
}
entry->u.index = tab->size++;
tab->array[entry->u.index] = entry;
}
return entry->u.index;
}
void
_bfd_elf_strtab_addref (struct elf_strtab_hash *tab, size_t idx)
{
if (idx == 0 || idx == (size_t) -1)
return;
BFD_ASSERT (tab->sec_size == 0);
BFD_ASSERT (idx < tab->size);
++tab->array[idx]->refcount;
}
void
_bfd_elf_strtab_delref (struct elf_strtab_hash *tab, size_t idx)
{
if (idx == 0 || idx == (size_t) -1)
return;
BFD_ASSERT (tab->sec_size == 0);
BFD_ASSERT (idx < tab->size);
BFD_ASSERT (tab->array[idx]->refcount > 0);
--tab->array[idx]->refcount;
}
unsigned int
_bfd_elf_strtab_refcount (struct elf_strtab_hash *tab, size_t idx)
{
return tab->array[idx]->refcount;
}
void
_bfd_elf_strtab_clear_all_refs (struct elf_strtab_hash *tab)
{
size_t idx;
for (idx = 1; idx < tab->size; idx++)
tab->array[idx]->refcount = 0;
}
/* Save strtab refcounts prior to adding --as-needed library. */
struct strtab_save
{
size_t size;
unsigned int refcount[1];
};
void *
_bfd_elf_strtab_save (struct elf_strtab_hash *tab)
{
struct strtab_save *save;
size_t idx, size;
size = sizeof (*save) + (tab->size - 1) * sizeof (save->refcount[0]);
save = bfd_malloc (size);
if (save == NULL)
return save;
save->size = tab->size;
for (idx = 1; idx < tab->size; idx++)
save->refcount[idx] = tab->array[idx]->refcount;
return save;
}
/* Restore strtab refcounts on finding --as-needed library not needed. */
void
_bfd_elf_strtab_restore (struct elf_strtab_hash *tab, void *buf)
{
size_t idx, curr_size = tab->size, save_size;
struct strtab_save *save = (struct strtab_save *) buf;
BFD_ASSERT (tab->sec_size == 0);
save_size = 1;
if (save != NULL)
save_size = save->size;
BFD_ASSERT (save_size <= curr_size);
tab->size = save_size;
for (idx = 1; idx < save_size; ++idx)
tab->array[idx]->refcount = save->refcount[idx];
for (; idx < curr_size; ++idx)
{
/* We don't remove entries from the hash table, just set their
REFCOUNT to zero. Setting LEN zero will result in the size
growing if the entry is added again. See _bfd_elf_strtab_add. */
tab->array[idx]->refcount = 0;
tab->array[idx]->len = 0;
}
}
bfd_size_type
_bfd_elf_strtab_size (struct elf_strtab_hash *tab)
{
return tab->sec_size ? tab->sec_size : tab->size;
}
bfd_size_type
_bfd_elf_strtab_len (struct elf_strtab_hash *tab)
{
return tab->size;
}
bfd_size_type
_bfd_elf_strtab_offset (struct elf_strtab_hash *tab, size_t idx)
{
struct elf_strtab_hash_entry *entry;
if (idx == 0)
return 0;
BFD_ASSERT (idx < tab->size);
BFD_ASSERT (tab->sec_size);
entry = tab->array[idx];
BFD_ASSERT (entry->refcount > 0);
entry->refcount--;
return tab->array[idx]->u.index;
}
const char *
_bfd_elf_strtab_str (struct elf_strtab_hash *tab, size_t idx,
bfd_size_type *offset)
{
if (idx == 0)
return NULL;
BFD_ASSERT (idx < tab->size);
BFD_ASSERT (tab->sec_size);
if (tab->array[idx]->refcount == 0)
return NULL;
if (offset)
*offset = tab->array[idx]->u.index;
return tab->array[idx]->root.string;
}
bfd_boolean
_bfd_elf_strtab_emit (register bfd *abfd, struct elf_strtab_hash *tab)
{
bfd_size_type off = 1;
size_t i;
if (bfd_bwrite ("", 1, abfd) != 1)
return FALSE;
for (i = 1; i < tab->size; ++i)
{
register const char *str;
register unsigned int len;
BFD_ASSERT (tab->array[i]->refcount == 0);
len = tab->array[i]->len;
if ((int) len < 0)
continue;
str = tab->array[i]->root.string;
if (bfd_bwrite (str, len, abfd) != len)
return FALSE;
off += len;
}
BFD_ASSERT (off == tab->sec_size);
return TRUE;
}
/* Compare two elf_strtab_hash_entry structures. Called via qsort.
Won't ever return zero as all entries differ, so there is no issue
with qsort stability here. */
static int
strrevcmp (const void *a, const void *b)
{
struct elf_strtab_hash_entry *A = *(struct elf_strtab_hash_entry **) a;
struct elf_strtab_hash_entry *B = *(struct elf_strtab_hash_entry **) b;
unsigned int lenA = A->len;
unsigned int lenB = B->len;
const unsigned char *s = (const unsigned char *) A->root.string + lenA - 1;
const unsigned char *t = (const unsigned char *) B->root.string + lenB - 1;
int l = lenA < lenB ? lenA : lenB;
while (l)
{
if (*s != *t)
return (int) *s - (int) *t;
s--;
t--;
l--;
}
return lenA - lenB;
}
static inline int
is_suffix (const struct elf_strtab_hash_entry *A,
const struct elf_strtab_hash_entry *B)
{
if (A->len <= B->len)
/* B cannot be a suffix of A unless A is equal to B, which is guaranteed
not to be equal by the hash table. */
return 0;
return memcmp (A->root.string + (A->len - B->len),
B->root.string, B->len - 1) == 0;
}
/* This function assigns final string table offsets for used strings,
merging strings matching suffixes of longer strings if possible. */
void
_bfd_elf_strtab_finalize (struct elf_strtab_hash *tab)
{
struct elf_strtab_hash_entry **array, **a, *e;
bfd_size_type amt, sec_size;
size_t size, i;
/* Sort the strings by suffix and length. */
amt = tab->size;
amt *= sizeof (struct elf_strtab_hash_entry *);
array = (struct elf_strtab_hash_entry **) bfd_malloc (amt);
if (array == NULL)
goto alloc_failure;
for (i = 1, a = array; i < tab->size; ++i)
{
e = tab->array[i];
if (e->refcount)
{
*a++ = e;
/* Adjust the length to not include the zero terminator. */
e->len -= 1;
}
else
e->len = 0;
}
size = a - array;
if (size != 0)
{
qsort (array, size, sizeof (struct elf_strtab_hash_entry *), strrevcmp);
/* Loop over the sorted array and merge suffixes. Start from the
end because we want eg.
s1 -> "d"
s2 -> "bcd"
s3 -> "abcd"
to end up as
s3 -> "abcd"
s2 _____^
s1 _______^
ie. we don't want s1 pointing into the old s2. */
e = *--a;
e->len += 1;
while (--a >= array)
{
struct elf_strtab_hash_entry *cmp = *a;
cmp->len += 1;
if (is_suffix (e, cmp))
{
cmp->u.suffix = e;
cmp->len = -cmp->len;
}
else
e = cmp;
}
}
alloc_failure:
free (array);
/* Assign positions to the strings we want to keep. */
sec_size = 1;
for (i = 1; i < tab->size; ++i)
{
e = tab->array[i];
if (e->refcount && e->len > 0)
{
e->u.index = sec_size;
sec_size += e->len;
}
}
tab->sec_size = sec_size;
/* Adjust the rest. */
for (i = 1; i < tab->size; ++i)
{
e = tab->array[i];
if (e->refcount && e->len < 0)
e->u.index = e->u.suffix->u.index + (e->u.suffix->len + e->len);
}
}