ld: support --build-id=xx mode

The is patch adds a new ld build-id computation mode, "xx", using
xxhash in its 128-bit mode.  The patch prereqs the xxhash-devel
headers being installed, and uses the "all-inlined" model, so no
run-time or link-time library dependence exists.

The xxhash mode performs well, saving roughly 20% of total userspace
run time from an ld job over a 800MB shared library relative to sha1.
128 bits of good hash should be collision-resistant to a number of
distinct binaries that numbers in the 2**32 - 2**64 range, even if not
"crypto" level hash.  Confirmations of this are in progress.

         ld/configury: add --with-xxhash mode, different from gdb case
                       because only using it in inline mode

         ld/ldbuildid.c: add "xx" mode, #if WITH_XXHASH

         ld/NEWS, ld.texi: mention new option

         ld/lexsup.c: add enumeration of --build-id STYLES to --help

         ld/testsuite/ld-elf/build-id.exp: add test case for 0xHEX case
                                           and conditional for xx case;
                                           also, simply tcl list syntax

https://inbox.sourceware.org/binutils/20240917201509.GB26396@redhat.com/

Signed-off-by: Frank Ch. Eigler <fche@redhat.com>
This commit is contained in:
Frank Ch. Eigler 2024-09-19 17:06:48 -04:00
parent 07d74e51ba
commit 2299dfd4ba
9 changed files with 228 additions and 54 deletions

View File

@ -1,5 +1,10 @@
-*- text -*-
Changes in 2.44:
* Add a "--build-id=xx" option, if built with the xxhash library. This
produces a 128-bit hash, 2-4x faster than md5 or sha1.
Changes in 2.43:
* Add support for LoongArch DT_RELR (compressed R_LARCH_RELATIVE).

View File

@ -269,6 +269,9 @@
/* Version number of package */
#undef VERSION
/* whether to use inline xxhash */
#undef WITH_XXHASH
/* Define to 1 if `lex' declares `yytext' as a `char *' by default, not a
`char[]'. */
#undef YYTEXT_POINTER

60
ld/configure vendored
View File

@ -806,6 +806,7 @@ infodir
docdir
oldincludedir
includedir
runstatedir
localstatedir
sharedstatedir
sysconfdir
@ -872,6 +873,7 @@ with_libiconv_prefix
with_libiconv_type
with_libintl_prefix
with_libintl_type
with_xxhash
with_system_zlib
with_zstd
'
@ -935,6 +937,7 @@ datadir='${datarootdir}'
sysconfdir='${prefix}/etc'
sharedstatedir='${prefix}/com'
localstatedir='${prefix}/var'
runstatedir='${localstatedir}/run'
includedir='${prefix}/include'
oldincludedir='/usr/include'
docdir='${datarootdir}/doc/${PACKAGE_TARNAME}'
@ -1187,6 +1190,15 @@ do
| -silent | --silent | --silen | --sile | --sil)
silent=yes ;;
-runstatedir | --runstatedir | --runstatedi | --runstated \
| --runstate | --runstat | --runsta | --runst | --runs \
| --run | --ru | --r)
ac_prev=runstatedir ;;
-runstatedir=* | --runstatedir=* | --runstatedi=* | --runstated=* \
| --runstate=* | --runstat=* | --runsta=* | --runst=* | --runs=* \
| --run=* | --ru=* | --r=*)
runstatedir=$ac_optarg ;;
-sbindir | --sbindir | --sbindi | --sbind | --sbin | --sbi | --sb)
ac_prev=sbindir ;;
-sbindir=* | --sbindir=* | --sbindi=* | --sbind=* | --sbin=* \
@ -1324,7 +1336,7 @@ fi
for ac_var in exec_prefix prefix bindir sbindir libexecdir datarootdir \
datadir sysconfdir sharedstatedir localstatedir includedir \
oldincludedir docdir infodir htmldir dvidir pdfdir psdir \
libdir localedir mandir
libdir localedir mandir runstatedir
do
eval ac_val=\$$ac_var
# Remove trailing slashes.
@ -1477,6 +1489,7 @@ Fine tuning of the installation directories:
--sysconfdir=DIR read-only single-machine data [PREFIX/etc]
--sharedstatedir=DIR modifiable architecture-independent data [PREFIX/com]
--localstatedir=DIR modifiable single-machine data [PREFIX/var]
--runstatedir=DIR modifiable per-process data [LOCALSTATEDIR/run]
--libdir=DIR object code libraries [EPREFIX/lib]
--includedir=DIR C header files [PREFIX/include]
--oldincludedir=DIR C header files for non-gcc [/usr/include]
@ -1589,6 +1602,8 @@ Optional Packages:
--with-libintl-prefix[=DIR] search for libintl in DIR/include and DIR/lib
--without-libintl-prefix don't search for libintl in includedir and libdir
--with-libintl-type=TYPE type of library to search for (auto/static/shared)
--with-xxhash use inlined libxxhash for hashing (faster)
(auto/yes/no)
--with-system-zlib use installed libz
--with-zstd support zstd compressed debug sections
(default=auto)
@ -11683,7 +11698,7 @@ else
lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2
lt_status=$lt_dlunknown
cat > conftest.$ac_ext <<_LT_EOF
#line 11686 "configure"
#line 11701 "configure"
#include "confdefs.h"
#if HAVE_DLFCN_H
@ -11789,7 +11804,7 @@ else
lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2
lt_status=$lt_dlunknown
cat > conftest.$ac_ext <<_LT_EOF
#line 11792 "configure"
#line 11807 "configure"
#include "confdefs.h"
#if HAVE_DLFCN_H
@ -19086,6 +19101,45 @@ $as_echo "#define HAVE_DECL_GETOPT 1" >>confdefs.h
fi
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether to use xxhash" >&5
$as_echo_n "checking whether to use xxhash... " >&6; }
# Check whether --with-xxhash was given.
if test "${with_xxhash+set}" = set; then :
withval=$with_xxhash;
else
with_xxhash=auto
fi
if test "x$with_xxhash" != "xno"; then
cat confdefs.h - <<_ACEOF >conftest.$ac_ext
/* end confdefs.h. */
#define XXH_INLINE_ALL
#include <xxhash.h>
XXH128_hash_t r;
void foo (void) { r = XXH128("foo", 3, 0); }
_ACEOF
if ac_fn_c_try_compile "$LINENO"; then :
with_xxhash=yes
$as_echo "#define WITH_XXHASH 1" >>confdefs.h
else
if test "$with_xxhash" = yes; then
as_fn_error $? "xxhash is missing or unusable" "$LINENO" 5
fi
with_xxhash=no
fi
rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
fi
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $with_xxhash" >&5
$as_echo "$with_xxhash" >&6; }
# Link in zlib/zstd if we can. This allows us to read and write
# compressed debug sections.

View File

@ -424,6 +424,28 @@ if test $ld_cv_decl_getopt_unistd_h = yes; then
[Is the prototype for getopt in <unistd.h> in the expected format?])
fi
dnl xxhash support from gdbsupport/common.m4
AC_MSG_CHECKING([whether to use xxhash])
AC_ARG_WITH(xxhash,
AS_HELP_STRING([--with-xxhash], [use inlined libxxhash for hashing (faster) (auto/yes/no)]),
[], [with_xxhash=auto])
if test "x$with_xxhash" != "xno"; then
AC_COMPILE_IFELSE([AC_LANG_SOURCE([
#define XXH_INLINE_ALL
#include <xxhash.h>
XXH128_hash_t r;
void foo (void) { r = XXH128("foo", 3, 0); }
])],[
with_xxhash=yes
AC_DEFINE([WITH_XXHASH], 1, [whether to use inline xxhash])
],[
if test "$with_xxhash" = yes; then
AC_MSG_ERROR([xxhash is missing or unusable])
fi
with_xxhash=no])
fi
AC_MSG_RESULT([$with_xxhash])
# Link in zlib/zstd if we can. This allows us to read and write
# compressed debug sections.
AM_ZLIB

View File

@ -3216,20 +3216,20 @@ maximum cache size to @var{size}.
Request the creation of a @code{.note.gnu.build-id} ELF note section
or a @code{.buildid} COFF section. The contents of the note are
unique bits identifying this linked file. @var{style} can be
@code{uuid} to use 128 random bits, @code{sha1} to use a 160-bit
@sc{SHA1} hash on the normative parts of the output contents,
@code{md5} to use a 128-bit @sc{MD5} hash on the normative parts of
the output contents, or @code{0x@var{hexstring}} to use a chosen bit
string specified as an even number of hexadecimal digits (@code{-} and
@code{uuid} to use 128 random bits; @code{sha1} to use a 160-bit
@sc{SHA1} hash, @code{md5} to use a 128-bit @sc{MD5} hash, or @code{xx}
to use a 128-bit @sc{XXHASH} on the normative parts of the output
contents; or @code{0x@var{hexstring}} to use a chosen bit string
specified as an even number of hexadecimal digits (@code{-} and
@code{:} characters between digit pairs are ignored). If @var{style}
is omitted, @code{sha1} is used.
The @code{md5} and @code{sha1} styles produces an identifier
that is always the same in an identical output file, but will be
unique among all nonidentical output files. It is not intended
to be compared as a checksum for the file's contents. A linked
file may be changed later by other tools, but the build ID bit
string identifying the original linked file does not change.
The @code{md5}, @code{sha1}, and @code{xx} styles produces an
identifier that is always the same in an identical output file, but
are almost certainly unique among all nonidentical output files. It
is not intended to be compared as a checksum for the file's contents.
A linked file may be changed later by other tools, but the build ID
bit string identifying the original linked file does not change.
Passing @code{none} for @var{style} disables the setting from any
@code{--build-id} options earlier on the command line.

View File

@ -23,6 +23,10 @@
#include "safe-ctype.h"
#include "md5.h"
#include "sha1.h"
#ifdef WITH_XXHASH
#define XXH_INLINE_ALL
#include <xxhash.h>
#endif
#include "ldbuildid.h"
#ifdef __MINGW32__
#include <windows.h>
@ -35,6 +39,9 @@ bool
validate_build_id_style (const char *style)
{
if ((streq (style, "md5")) || (streq (style, "sha1"))
#ifdef WITH_XXHASH
|| (streq (style, "xx"))
#endif
|| (streq (style, "uuid")) || (startswith (style, "0x")))
return true;
@ -47,6 +54,11 @@ compute_build_id_size (const char *style)
if (streq (style, "md5") || streq (style, "uuid"))
return 128 / 8;
#ifdef WITH_XXHASH
if (streq (style, "xx"))
return 128 / 8;
#endif
if (streq (style, "sha1"))
return 160 / 8;
@ -93,6 +105,16 @@ read_hex (const char xdigit)
return 0;
}
#ifdef WITH_XXHASH
static void
xx_process_bytes(const void* buffer, size_t size, void* state)
{
XXH3_128bits_update ((XXH3_state_t*) state, buffer, size);
}
#endif
bool
generate_build_id (bfd *abfd,
const char *style,
@ -100,7 +122,31 @@ generate_build_id (bfd *abfd,
unsigned char *id_bits,
int size ATTRIBUTE_UNUSED)
{
if (streq (style, "md5"))
#ifdef WITH_XXHASH
if (streq (style, "xx"))
{
XXH3_state_t* state = XXH3_createState ();
if (!state)
{
return false;
}
XXH3_128bits_reset (state);
if (!(*checksum_contents) (abfd, &xx_process_bytes, state))
{
XXH3_freeState (state);
return false;
}
XXH128_hash_t result = XXH3_128bits_digest (state);
XXH3_freeState (state);
/* Use canonical-endianness output. */
XXH128_canonical_t result_canon;
XXH128_canonicalFromHash (&result_canon, result);
memcpy (id_bits, &result_canon,
(size_t) size < sizeof (result) ? (size_t) size : sizeof (result));
}
else
#endif
if (streq (style, "md5"))
{
struct md5_ctx ctx;

View File

@ -2278,6 +2278,15 @@ elf_static_list_options (FILE *file)
{
fprintf (file, _("\
--build-id[=STYLE] Generate build ID note\n"));
/* DEFAULT_BUILD_ID_STYLE n/a here */
#ifdef WITH_XXHASH
fprintf (file, _("\
Styles: none,md5,sha1,xx,uuid,0xHEX\n"));
/* NB: testsuite/ld-elf/build-id.exp depends on this syntax */
#else
fprintf (file, _("\
Styles: none,md5,sha1,uuid,0xHEX\n"));
#endif
fprintf (file, _("\
--package-metadata[=JSON] Generate package metadata note\n"));
fprintf (file, _("\

View File

@ -36,42 +36,71 @@ if { !([istarget *-*-linux*]
return
}
run_ld_link_tests [list \
[list \
"pr28639a.o" \
"-r --build-id=md5" \
"" \
"" \
{start.s} \
{{readelf {--notes} pr28639a.rd}} \
"pr28639a.o" \
] \
[list \
"pr28639a.o" \
"-r --build-id" \
"" \
"" \
{dummy.s} \
{{readelf {--notes} pr28639b.rd}} \
"pr28639b.o" \
] \
[list \
"pr28639a" \
"--build-id tmpdir/pr28639a.o tmpdir/pr28639b.o" \
"" \
"" \
{dummy.s} \
{{readelf {--notes} pr28639b.rd} \
{readelf {--notes} pr28639c.rd}} \
"pr28639a" \
] \
[list \
"pr28639b" \
"--build-id=none tmpdir/pr28639a.o tmpdir/pr28639b.o" \
"" \
"" \
{dummy.s} \
{{readelf {--notes} pr28639d.rd}} \
"pr28639b" \
] \
]
set stylelist {"" "--build-id" "--build-id=none" "--build-id=md5"
"--build-id=sha1" "--build-id=guid" "--build-id=0xdeadbeef"}
run_ld_link_tests {
{
"pr28639a.o"
"-r --build-id=md5"
""
""
{start.s}
{{readelf {--notes} pr28639a.rd}}
"pr28639a.o"
}
{
"pr28639b.o"
"-r --build-id"
""
""
{dummy.s}
{{readelf {--notes} pr28639b.rd}}
"pr28639b.o"
}
{
"pr28639a.o deadbeef"
"-r --build-id=0xdeadbeef"
""
""
{start.s}
{{readelf {--notes} pr28639e.rd}}
"pr28639a.o"
}
{
"pr28639a"
"--build-id tmpdir/pr28639a.o tmpdir/pr28639b.o"
""
""
{dummy.s}
{{readelf {--notes} pr28639b.rd}
{readelf {--notes} pr28639c.rd}}
"pr28639a"
}
{
"pr28639b"
"--build-id=none tmpdir/pr28639a.o tmpdir/pr28639b.o"
""
""
{dummy.s}
{{readelf {--notes} pr28639d.rd}}
"pr28639b"
}
}
# see if linker supports xx style also
catch "exec $ld --help | grep -A2 -- --build-id | grep Styles" tmp
if {[string first ",xx," $tmp] >= 0} then {
run_ld_link_tests {
{
"pr28639a.o xx"
"-r --build-id=xx"
""
""
{start.s}
{{readelf {--notes} pr28639a.rd}} # 16 bytes
"pr28639a.o"
}
}
}

View File

@ -0,0 +1,6 @@
#...
Displaying notes found in: \.note\.gnu\.build-id
Owner Data size Description
GNU 0x00000004 NT_GNU_BUILD_ID \(unique build ID bitstring\)
Build ID: deadbeef
#pass