git/gettext.c
Johannes Schindelin c4137be0f5 gettext: avoid using gettext if the locale dir is not present
In cc5e1bf992 (gettext: avoid initialization if the locale dir is not
present, 2018-04-21) Git was taught to avoid a costly gettext start-up
when there are not even any localized messages to work with.

But we still called `gettext()` and `ngettext()` functions.

Which caused a problem in Git for Windows when the libgettext that is
consumed from the MSYS2 project stopped using a runtime prefix in
https://github.com/msys2/MINGW-packages/pull/10461

Due to that change, we now use an unintialized gettext machinery that
might get auto-initialized _using an unintended locale directory_:
`C:\mingw64\share\locale`.

Let's record the fact when the gettext initialization was skipped, and
skip calling the gettext functions accordingly.

This addresses CVE-2023-25815.

Signed-off-by: Johannes Schindelin <johannes.schindelin@gmx.de>
2023-04-17 21:15:39 +02:00

169 lines
3.4 KiB
C

/*
* Copyright (c) 2010 Ævar Arnfjörð Bjarmason
*/
#include "cache.h"
#include "exec-cmd.h"
#include "gettext.h"
#include "strbuf.h"
#include "utf8.h"
#include "config.h"
#ifndef NO_GETTEXT
# include <locale.h>
# include <libintl.h>
# ifdef GIT_WINDOWS_NATIVE
static const char *locale_charset(void)
{
const char *env = getenv("LC_ALL"), *dot;
if (!env || !*env)
env = getenv("LC_CTYPE");
if (!env || !*env)
env = getenv("LANG");
if (!env)
return "UTF-8";
dot = strchr(env, '.');
return !dot ? env : dot + 1;
}
# elif defined HAVE_LIBCHARSET_H
# include <libcharset.h>
# else
# include <langinfo.h>
# define locale_charset() nl_langinfo(CODESET)
# endif
#endif
static const char *charset;
/*
* Guess the user's preferred languages from the value in LANGUAGE environment
* variable and LC_MESSAGES locale category if NO_GETTEXT is not defined.
*
* The result can be a colon-separated list like "ko:ja:en".
*/
const char *get_preferred_languages(void)
{
const char *retval;
retval = getenv("LANGUAGE");
if (retval && *retval)
return retval;
#ifndef NO_GETTEXT
retval = setlocale(LC_MESSAGES, NULL);
if (retval && *retval &&
strcmp(retval, "C") &&
strcmp(retval, "POSIX"))
return retval;
#endif
return NULL;
}
int use_gettext_poison(void)
{
static int poison_requested = -1;
if (poison_requested == -1)
poison_requested = git_env_bool("GIT_TEST_GETTEXT_POISON", 0);
return poison_requested;
}
#ifndef NO_GETTEXT
static int test_vsnprintf(const char *fmt, ...)
{
char buf[26];
int ret;
va_list ap;
va_start(ap, fmt);
ret = vsnprintf(buf, sizeof(buf), fmt, ap);
va_end(ap);
return ret;
}
static void init_gettext_charset(const char *domain)
{
setlocale(LC_CTYPE, "");
charset = locale_charset();
bind_textdomain_codeset(domain, charset);
/*
* Work around an old bug fixed in glibc 2.17 (released on
* 2012-12-24), at the cost of potentially making translated
* messages from external functions like perror() emitted in
* the wrong encoding.
*
* The bug affected e.g. git.git's own 7eb93c89651 ([PATCH]
* Simplify git script, 2005-09-07), which is the origin of
* the "David_K\345gedal" test string.
*
* See a much longer comment added to this file in 5e9637c6297
* (i18n: add infrastructure for translating Git with gettext,
* 2011-11-18) for more details.
*/
if (test_vsnprintf("%.*s", 13, "David_K\345gedal") < 0)
setlocale(LC_CTYPE, "C");
}
int git_gettext_enabled = 0;
void git_setup_gettext(void)
{
const char *podir = getenv(GIT_TEXT_DOMAIN_DIR_ENVIRONMENT);
char *p = NULL;
if (!podir)
podir = p = system_path(GIT_LOCALE_PATH);
use_gettext_poison(); /* getenv() reentrancy paranoia */
if (!is_directory(podir)) {
free(p);
return;
}
bindtextdomain("git", podir);
setlocale(LC_MESSAGES, "");
setlocale(LC_TIME, "");
init_gettext_charset("git");
textdomain("git");
git_gettext_enabled = 1;
free(p);
}
/* return the number of columns of string 's' in current locale */
int gettext_width(const char *s)
{
static int is_utf8 = -1;
if (is_utf8 == -1)
is_utf8 = is_utf8_locale();
return is_utf8 ? utf8_strwidth(s) : strlen(s);
}
#endif
int is_utf8_locale(void)
{
#ifdef NO_GETTEXT
if (!charset) {
const char *env = getenv("LC_ALL");
if (!env || !*env)
env = getenv("LC_CTYPE");
if (!env || !*env)
env = getenv("LANG");
if (!env)
env = "";
if (strchr(env, '.'))
env = strchr(env, '.') + 1;
charset = xstrdup(env);
}
#endif
return is_encoding_utf8(charset);
}