mirror of
https://sourceware.org/git/glibc.git
synced 2024-11-27 03:33:33 +08:00
9bcd12d223
The GNU implementation of wcrtomb assumes that there are at least MB_CUR_MAX bytes available in the destination buffer passed to wcrtomb as the first argument. This is not compatible with the POSIX definition, which only requires enough space for the input wide character. This does not break much in practice because when users supply buffers smaller than MB_CUR_MAX (e.g. in ncurses), they compute and dynamically allocate the buffer, which results in enough spare space (thanks to usable_size in malloc and padding in alloca) that no actual buffer overflow occurs. However when the code is built with _FORTIFY_SOURCE, it runs into the hard check against MB_CUR_MAX in __wcrtomb_chk and hence fails. It wasn't evident until now since dynamic allocations would result in wcrtomb not being fortified but since _FORTIFY_SOURCE=3, that limitation is gone, resulting in such code failing. To fix this problem, introduce an internal buffer that is MB_LEN_MAX long and use that to perform the conversion and then copy the resultant bytes into the destination buffer. Also move the fortification check into the main implementation, which checks the result after conversion and aborts if the resultant byte count is greater than the destination buffer size. One complication is that applications that assume the MB_CUR_MAX limitation to be gone may not be able to run safely on older glibcs if they use static destination buffers smaller than MB_CUR_MAX; dynamic allocations will always have enough spare space that no actual overruns will occur. One alternative to fixing this is to bump symbol version to prevent them from running on older glibcs but that seems too strict a constraint. Instead, since these users will only have made this decision on reading the manual, I have put a note in the manual warning them about the pitfalls of having static buffers smaller than MB_CUR_MAX and running them on older glibc. Benchmarking: The wcrtomb microbenchmark shows significant increases in maximum execution time for all locales, ranging from 10x for ar_SA.UTF-8 to 1.5x-2x for nearly everything else. The mean execution time however saw practically no impact, with some results even being quicker, indicating that cache locality has a much bigger role in the overhead. Given that the additional copy uses a temporary buffer inside wcrtomb, it's likely that a hot path will end up putting that buffer (which is responsible for the additional overhead) in a similar place on stack, giving the necessary cache locality to negate the overhead. However in situations where wcrtomb ends up getting called at wildly different spots on the call stack (or is on different call stacks, e.g. with threads or different execution contexts) and is still a hotspot, the performance lag will be visible. Signed-off-by: Siddhesh Poyarekar <siddhesh@sourceware.org>
130 lines
3.5 KiB
C
130 lines
3.5 KiB
C
/* Copyright (C) 1996-2022 Free Software Foundation, Inc.
|
|
Copyright The GNU Toolchain Authors.
|
|
This file is part of the GNU C Library.
|
|
|
|
The GNU C Library is free software; you can redistribute it and/or
|
|
modify it under the terms of the GNU Lesser General Public
|
|
License as published by the Free Software Foundation; either
|
|
version 2.1 of the License, or (at your option) any later version.
|
|
|
|
The GNU C Library is distributed in the hope that it will be useful,
|
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
Lesser General Public License for more details.
|
|
|
|
You should have received a copy of the GNU Lesser General Public
|
|
License along with the GNU C Library; if not, see
|
|
<https://www.gnu.org/licenses/>. */
|
|
|
|
#include <assert.h>
|
|
#include <dlfcn.h>
|
|
#include <errno.h>
|
|
#include <gconv.h>
|
|
#include <stdlib.h>
|
|
#include <string.h>
|
|
#include <wchar.h>
|
|
#include <wcsmbsload.h>
|
|
|
|
#include <sysdep.h>
|
|
|
|
#ifndef EILSEQ
|
|
# define EILSEQ EINVAL
|
|
#endif
|
|
|
|
|
|
/* This is the private state used if PS is NULL. */
|
|
static mbstate_t state;
|
|
|
|
size_t
|
|
__wcrtomb_internal (char *s, wchar_t wc, mbstate_t *ps, size_t s_size)
|
|
{
|
|
char buf[MB_LEN_MAX];
|
|
struct __gconv_step_data data;
|
|
int status;
|
|
size_t result;
|
|
size_t dummy;
|
|
const struct gconv_fcts *fcts;
|
|
|
|
/* Set information for this step. */
|
|
data.__invocation_counter = 0;
|
|
data.__internal_use = 1;
|
|
data.__flags = __GCONV_IS_LAST;
|
|
data.__statep = ps ?: &state;
|
|
|
|
/* A first special case is if S is NULL. This means put PS in the
|
|
initial state. */
|
|
if (s == NULL)
|
|
wc = L'\0';
|
|
|
|
/* Tell where we want to have the result. */
|
|
data.__outbuf = (unsigned char *) buf;
|
|
data.__outbufend = (unsigned char *) buf + sizeof buf;
|
|
|
|
/* Get the conversion functions. */
|
|
fcts = get_gconv_fcts (_NL_CURRENT_DATA (LC_CTYPE));
|
|
__gconv_fct fct = fcts->tomb->__fct;
|
|
#ifdef PTR_DEMANGLE
|
|
if (fcts->tomb->__shlib_handle != NULL)
|
|
PTR_DEMANGLE (fct);
|
|
#endif
|
|
|
|
/* If WC is the NUL character we write into the output buffer the byte
|
|
sequence necessary for PS to get into the initial state, followed
|
|
by a NUL byte. */
|
|
if (wc == L'\0')
|
|
{
|
|
status = DL_CALL_FCT (fct, (fcts->tomb, &data, NULL, NULL,
|
|
NULL, &dummy, 1, 1));
|
|
|
|
if (status == __GCONV_OK || status == __GCONV_EMPTY_INPUT)
|
|
*data.__outbuf++ = '\0';
|
|
}
|
|
else
|
|
{
|
|
/* Do a normal conversion. */
|
|
const unsigned char *inbuf = (const unsigned char *) &wc;
|
|
|
|
status = DL_CALL_FCT (fct,
|
|
(fcts->tomb, &data, &inbuf,
|
|
inbuf + sizeof (wchar_t), NULL, &dummy, 0, 1));
|
|
}
|
|
|
|
/* There must not be any problems with the conversion but illegal input
|
|
characters. The output buffer must be large enough, otherwise the
|
|
definition of MB_CUR_MAX is not correct. All the other possible
|
|
errors also must not happen. */
|
|
assert (status == __GCONV_OK || status == __GCONV_EMPTY_INPUT
|
|
|| status == __GCONV_ILLEGAL_INPUT
|
|
|| status == __GCONV_INCOMPLETE_INPUT
|
|
|| status == __GCONV_FULL_OUTPUT);
|
|
|
|
if (status == __GCONV_OK || status == __GCONV_EMPTY_INPUT
|
|
|| status == __GCONV_FULL_OUTPUT)
|
|
{
|
|
result = data.__outbuf - (unsigned char *) buf;
|
|
|
|
if (s != NULL)
|
|
{
|
|
if (result > s_size)
|
|
__chk_fail ();
|
|
|
|
memcpy (s, buf, result);
|
|
}
|
|
}
|
|
else
|
|
{
|
|
result = (size_t) -1;
|
|
__set_errno (EILSEQ);
|
|
}
|
|
|
|
return result;
|
|
}
|
|
|
|
size_t
|
|
__wcrtomb (char *s, wchar_t wc, mbstate_t *ps)
|
|
{
|
|
return __wcrtomb_internal (s, wc, ps, (size_t) -1);
|
|
}
|
|
weak_alias (__wcrtomb, wcrtomb)
|
|
libc_hidden_weak (wcrtomb)
|