PR 31283 windmc: Parse input correctly on big endian hosts

On big endian hosts (eg. s390x) the windmc tool fails to parse even
trivial files:

  $ cat test.mc
  ;
  $ ./binutils/windmc ./test.mc
  In test.mc at line 1: parser: syntax error.
  In test.mc at line 1: fatal: syntax error.

The tool starts by reading the input as Windows CP1252 and then
converting it internally into an array of UTF-16LE, which it then
processes as an array of unsigned short (typedef unichar).

There are lots of ways this is wrong, but in the specific case of big
endian machines the little endian pairs of bytes are byte-swapped.

For example, the ';' character in the input above is first converted
to UTF16-LE byte sequence { 0x3b, 0x00 }, which is then cast to
unsigned short.  On a big endian machine the first unichar appears to
be 0x3b00.  The lexer is unable to recognize this as the comment
character ((unichar)';') and so parsing fails.

The simple fix is to convert the input to UTF-16BE on big endian
machines (and do the reverse conversion when writing the output).

Fixes: https://sourceware.org/bugzilla/show_bug.cgi?id=31283
Signed-off-by: Richard W.M. Jones <rjones@redhat.com>
This commit is contained in:
Richard W.M. Jones 2024-01-24 12:25:23 +00:00 committed by Alan Modra
parent 16688387ed
commit 3f8f9745c7
4 changed files with 259 additions and 4 deletions

View File

@ -7,6 +7,9 @@
#endif
#define __CONFIG_H__ 1
/* Define if building universal (internal helper macro) */
#undef AC_APPLE_UNIVERSAL_BUILD
/* Should ar and ranlib use -D behavior by default? */
#undef DEFAULT_AR_DETERMINISTIC
@ -256,6 +259,18 @@
/* Version number of package */
#undef VERSION
/* Define WORDS_BIGENDIAN to 1 if your processor stores words with the most
significant byte first (like Motorola and SPARC, unlike Intel). */
#if defined AC_APPLE_UNIVERSAL_BUILD
# if defined __BIG_ENDIAN__
# define WORDS_BIGENDIAN 1
# endif
#else
# ifndef WORDS_BIGENDIAN
# undef WORDS_BIGENDIAN
# endif
#endif
/* Define to 1 if `lex' declares `yytext' as a `char *' by default, not a
`char[]'. */
#undef YYTEXT_POINTER

230
binutils/configure vendored
View File

@ -4725,6 +4725,231 @@ $as_echo "$ac_cv_safe_to_define___extensions__" >&6; }
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether byte ordering is bigendian" >&5
$as_echo_n "checking whether byte ordering is bigendian... " >&6; }
if ${ac_cv_c_bigendian+:} false; then :
$as_echo_n "(cached) " >&6
else
ac_cv_c_bigendian=unknown
# See if we're dealing with a universal compiler.
cat confdefs.h - <<_ACEOF >conftest.$ac_ext
/* end confdefs.h. */
#ifndef __APPLE_CC__
not a universal capable compiler
#endif
typedef int dummy;
_ACEOF
if ac_fn_c_try_compile "$LINENO"; then :
# Check for potential -arch flags. It is not universal unless
# there are at least two -arch flags with different values.
ac_arch=
ac_prev=
for ac_word in $CC $CFLAGS $CPPFLAGS $LDFLAGS; do
if test -n "$ac_prev"; then
case $ac_word in
i?86 | x86_64 | ppc | ppc64)
if test -z "$ac_arch" || test "$ac_arch" = "$ac_word"; then
ac_arch=$ac_word
else
ac_cv_c_bigendian=universal
break
fi
;;
esac
ac_prev=
elif test "x$ac_word" = "x-arch"; then
ac_prev=arch
fi
done
fi
rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
if test $ac_cv_c_bigendian = unknown; then
# See if sys/param.h defines the BYTE_ORDER macro.
cat confdefs.h - <<_ACEOF >conftest.$ac_ext
/* end confdefs.h. */
#include <sys/types.h>
#include <sys/param.h>
int
main ()
{
#if ! (defined BYTE_ORDER && defined BIG_ENDIAN \
&& defined LITTLE_ENDIAN && BYTE_ORDER && BIG_ENDIAN \
&& LITTLE_ENDIAN)
bogus endian macros
#endif
;
return 0;
}
_ACEOF
if ac_fn_c_try_compile "$LINENO"; then :
# It does; now see whether it defined to BIG_ENDIAN or not.
cat confdefs.h - <<_ACEOF >conftest.$ac_ext
/* end confdefs.h. */
#include <sys/types.h>
#include <sys/param.h>
int
main ()
{
#if BYTE_ORDER != BIG_ENDIAN
not big endian
#endif
;
return 0;
}
_ACEOF
if ac_fn_c_try_compile "$LINENO"; then :
ac_cv_c_bigendian=yes
else
ac_cv_c_bigendian=no
fi
rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
fi
rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
fi
if test $ac_cv_c_bigendian = unknown; then
# See if <limits.h> defines _LITTLE_ENDIAN or _BIG_ENDIAN (e.g., Solaris).
cat confdefs.h - <<_ACEOF >conftest.$ac_ext
/* end confdefs.h. */
#include <limits.h>
int
main ()
{
#if ! (defined _LITTLE_ENDIAN || defined _BIG_ENDIAN)
bogus endian macros
#endif
;
return 0;
}
_ACEOF
if ac_fn_c_try_compile "$LINENO"; then :
# It does; now see whether it defined to _BIG_ENDIAN or not.
cat confdefs.h - <<_ACEOF >conftest.$ac_ext
/* end confdefs.h. */
#include <limits.h>
int
main ()
{
#ifndef _BIG_ENDIAN
not big endian
#endif
;
return 0;
}
_ACEOF
if ac_fn_c_try_compile "$LINENO"; then :
ac_cv_c_bigendian=yes
else
ac_cv_c_bigendian=no
fi
rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
fi
rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
fi
if test $ac_cv_c_bigendian = unknown; then
# Compile a test program.
if test "$cross_compiling" = yes; then :
# Try to guess by grepping values from an object file.
cat confdefs.h - <<_ACEOF >conftest.$ac_ext
/* end confdefs.h. */
short int ascii_mm[] =
{ 0x4249, 0x4765, 0x6E44, 0x6961, 0x6E53, 0x7953, 0 };
short int ascii_ii[] =
{ 0x694C, 0x5454, 0x656C, 0x6E45, 0x6944, 0x6E61, 0 };
int use_ascii (int i) {
return ascii_mm[i] + ascii_ii[i];
}
short int ebcdic_ii[] =
{ 0x89D3, 0xE3E3, 0x8593, 0x95C5, 0x89C4, 0x9581, 0 };
short int ebcdic_mm[] =
{ 0xC2C9, 0xC785, 0x95C4, 0x8981, 0x95E2, 0xA8E2, 0 };
int use_ebcdic (int i) {
return ebcdic_mm[i] + ebcdic_ii[i];
}
extern int foo;
int
main ()
{
return use_ascii (foo) == use_ebcdic (foo);
;
return 0;
}
_ACEOF
if ac_fn_c_try_compile "$LINENO"; then :
if grep BIGenDianSyS conftest.$ac_objext >/dev/null; then
ac_cv_c_bigendian=yes
fi
if grep LiTTleEnDian conftest.$ac_objext >/dev/null ; then
if test "$ac_cv_c_bigendian" = unknown; then
ac_cv_c_bigendian=no
else
# finding both strings is unlikely to happen, but who knows?
ac_cv_c_bigendian=unknown
fi
fi
fi
rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
else
cat confdefs.h - <<_ACEOF >conftest.$ac_ext
/* end confdefs.h. */
$ac_includes_default
int
main ()
{
/* Are we little or big endian? From Harbison&Steele. */
union
{
long int l;
char c[sizeof (long int)];
} u;
u.l = 1;
return u.c[sizeof (long int) - 1] == 1;
;
return 0;
}
_ACEOF
if ac_fn_c_try_run "$LINENO"; then :
ac_cv_c_bigendian=no
else
ac_cv_c_bigendian=yes
fi
rm -f core *.core core.conftest.* gmon.out bb.out conftest$ac_exeext \
conftest.$ac_objext conftest.beam conftest.$ac_ext
fi
fi
fi
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_c_bigendian" >&5
$as_echo "$ac_cv_c_bigendian" >&6; }
case $ac_cv_c_bigendian in #(
yes)
$as_echo "#define WORDS_BIGENDIAN 1" >>confdefs.h
;; #(
no)
;; #(
universal)
$as_echo "#define AC_APPLE_UNIVERSAL_BUILD 1" >>confdefs.h
;; #(
*)
as_fn_error $? "unknown endianness
presetting ac_cv_c_bigendian=no (or yes) will help" "$LINENO" 5 ;;
esac
case `pwd` in
*\ * | *\ *)
{ $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: Libtool does not cope well with whitespace in \`pwd\`" >&5
@ -10858,7 +11083,7 @@ else
lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2
lt_status=$lt_dlunknown
cat > conftest.$ac_ext <<_LT_EOF
#line 10861 "configure"
#line 11086 "configure"
#include "confdefs.h"
#if HAVE_DLFCN_H
@ -10964,7 +11189,7 @@ else
lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2
lt_status=$lt_dlunknown
cat > conftest.$ac_ext <<_LT_EOF
#line 10967 "configure"
#line 11192 "configure"
#include "confdefs.h"
#if HAVE_DLFCN_H
@ -16532,6 +16757,7 @@ if test -z "${am__fastdepCC_TRUE}" && test -z "${am__fastdepCC_FALSE}"; then
as_fn_error $? "conditional \"am__fastdepCC\" was never defined.
Usually this means the macro was only invoked conditionally." "$LINENO" 5
fi
if test -z "${ENABLE_LIBCTF_TRUE}" && test -z "${ENABLE_LIBCTF_FALSE}"; then
as_fn_error $? "conditional \"ENABLE_LIBCTF\" was never defined.
Usually this means the macro was only invoked conditionally." "$LINENO" 5

View File

@ -31,6 +31,8 @@ AC_PROG_CC
AC_GNU_SOURCE
AC_USE_SYSTEM_EXTENSIONS
AC_C_BIGENDIAN
LT_INIT
ACX_LARGEFILE

View File

@ -771,7 +771,13 @@ wind_MultiByteToWideChar (rc_uint_type cp, const char *mb,
if (!mb || !iconv_name)
return 0;
iconv_t cd = iconv_open ("UTF-16LE", iconv_name);
iconv_t cd = iconv_open (
#if WORDS_BIGENDIAN
"UTF-16BE",
#else
"UTF-16LE",
#endif
iconv_name);
while (1)
{
@ -844,7 +850,13 @@ wind_WideCharToMultiByte (rc_uint_type cp, const unichar *u, char *mb, rc_uint_t
if (!u || !iconv_name)
return 0;
iconv_t cd = iconv_open (iconv_name, "UTF-16LE");
iconv_t cd = iconv_open (iconv_name,
#if WORDS_BIGENDIAN
"UTF-16BE"
#else
"UTF-16LE"
#endif
);
while (1)
{