crt: Avoid best-fit mapping when constructing argv for main()

__getmainargs() parses the command line from the return value of
GetCommandLineA() which uses best-fit mapping when converting the
native wide-char command line to the process code page. This can
create security issues. For example, fullwidth quotation mark (U+FF02)
may get converted to ASCII quotation mark (U+0022), which will break
argument quoting and can result in argument injection, for example,
if malicious filenames are passed as an argument to a program. There
are other security issues with best-fit mapping too.

Call __wgetmainargs() to get wide-argv and convert it to narrow-argv
without best-fit mapping. If conversion isn't lossless, print an
error message and _exit(255) without calling main() at all. While
this might not be ideal with every application, with most applications
a lossy conversion would be a "garbage in, garbage out" situation.
For example, lossy conversion of filenames doesn't make any sense.

Note that if _dowildcard is set, then filenames from wildcard expansion
can prevent the application from running if those filenames contain
characters that cannot be converted losslessly.

Setting the process code page to UTF-8 using an application manifest
would also fix the issue (apart from unpaired surrogates which are
invalid UTF-16 but legal on Windows command line and in filenames).
Setting UTF-8 in a manifest is only supported on Windows 10 version 1903
and later, and switching to UTF-8 could create new issues in some apps.
The method in this commit works on old Windows versions too. Even with
UTF-8, this commit matters because it blocks unpaired surrogates on the
command line.

The best-fit conversion issue affects a large number of applications
that use main() instead of wmain(). It's better to fix the issue at
toolchain level instead of trying to fix every application separately.
Examples of applications where this has already been reported:

  - The report about the issue in curl has more technical details:
    https://hackerone.com/reports/2550951

  - In XZ Utils the issue was already solved by setting UTF-8 code page:
    https://tukaani.org/xz/#_argument_injection_on_windows
    (CVE-2024-47611)

Thanks to Orange Tsai and splitline from DEVCORE Research Team
for discovering this issue.

Signed-off-by: Lasse Collin <lasse.collin@tukaani.org>
Signed-off-by: LIU Hao <lh_mouse@126.com>
This commit is contained in:
Lasse Collin 2024-10-17 21:09:15 +03:00 committed by LIU Hao
parent 04ec40f5bc
commit 0d42217123

View File

@ -17,6 +17,7 @@
#include <signal.h>
#include <math.h>
#include <stdlib.h>
#include <stdio.h>
#include <tchar.h>
#include <sect_attribs.h>
#include <locale.h>
@ -67,7 +68,10 @@ extern LPTOP_LEVEL_EXCEPTION_FILTER __mingw_oldexcpt_handler;
extern void _pei386_runtime_relocator (void);
long CALLBACK _gnu_exception_handler (EXCEPTION_POINTERS * exception_data);
#ifdef _UNICODE
static void duplicate_ppstrings (int ac, _TCHAR ***av);
#endif
static int __cdecl pre_c_init (void);
static void __cdecl pre_cpp_init (void);
@ -130,7 +134,58 @@ pre_cpp_init (void)
#ifdef _UNICODE
argret = __wgetmainargs(&argc,&argv,&envp,_dowildcard,&startinfo);
#else
argret = __getmainargs(&argc,&argv,&envp,_dowildcard,&startinfo);
// Get argv as wide chars and convert it to active code page
// without best-fit mapping to avoid security issues.
// Environment will be taken directly as narrow chars later.
wchar_t **wargv;
wchar_t **wenvp_dummy;
argret = __wgetmainargs(&argc,&wargv,&wenvp_dummy,_dowildcard,&startinfo);
size_t buf_size = 0;
for (int i = 0; i < argc; ++i) {
BOOL conv_was_lossy = TRUE;
int size = WideCharToMultiByte(CP_ACP, WC_NO_BEST_FIT_CHARS,
wargv[i], -1,
NULL, 0,
NULL, &conv_was_lossy);
if (size <= 0 || conv_was_lossy) {
fprintf(stderr,
"Error: Command line contains characters that are not supported\n"
"in the active code page (%u).\n", GetACP());
_exit(255);
}
buf_size += size;
}
argv = malloc((argc + 1) * sizeof(char *));
char *buf = malloc(buf_size);
if (argv == NULL || buf == NULL)
_amsg_exit(8); // R6008 "not enough space for arguments" with MSVCRT
for (int i = 0; i < argc; ++i) {
int size = WideCharToMultiByte(CP_ACP, WC_NO_BEST_FIT_CHARS,
wargv[i], -1,
buf, buf_size,
NULL, NULL);
argv[i] = buf;
buf += size;
buf_size -= size;
}
argv[argc] = NULL;
// Initialize narrow environment.
//
// IMPORANT: Don't expand wildcards as that alone can be a security issue
// with __getmainargs! Best-fit mapping can result in arguments that point
// to network shares and wildcard expansion of such arguments could then
// result in unexpected network access.
int argc_dummy;
char **argv_dummy;
(void)__getmainargs(&argc_dummy,&argv_dummy,&envp,0,&startinfo);
#endif
}
@ -249,7 +304,10 @@ __tmainCRTStartup (void)
_fpreset ();
#ifdef _UNICODE
// Narrow argv is already a local copy so only duplicate the wide version.
duplicate_ppstrings (argc, &argv);
#endif
__main (); /* C++ initialization. */
#ifdef _UNICODE
__winitenv = envp;
@ -307,6 +365,7 @@ check_managed_app (void)
return 0;
}
#ifdef _UNICODE
static void duplicate_ppstrings (int ac, _TCHAR ***av)
{
_TCHAR **avl;
@ -323,6 +382,7 @@ static void duplicate_ppstrings (int ac, _TCHAR ***av)
n[i] = NULL;
*av = n;
}
#endif
int __cdecl atexit (_PVFV func)
{