mirror of
https://github.com/videolan/vlc.git
synced 2025-01-10 17:58:22 +08:00
- FromWide(): converts a wchar_t * to UTF-8 char *
(if local charset is UTF-8, this is similar to wcstombs()) - FromUTF16(): converts a host-order UTF-16 byte sequence to UTF-8
This commit is contained in:
parent
e65878a6ef
commit
647cc79956
@ -47,6 +47,16 @@ int utf8_fprintf( FILE *, const char *, ... );
|
||||
|
||||
VLC_EXPORT( char *, EnsureUTF8, ( char * ) );
|
||||
VLC_EXPORT( char *, FromUTF32, ( const uint32_t * ) );
|
||||
VLC_EXPORT( char *, FromUTF16, ( const uint16_t * ) );
|
||||
|
||||
static inline char *FromWide( const wchar_t *in )
|
||||
{
|
||||
return (sizeof( wchar_t ) == 2)
|
||||
? FromUTF16( (const uint16_t *)in )
|
||||
: FromUTF32( (const uint32_t *)in );
|
||||
}
|
||||
|
||||
|
||||
VLC_EXPORT( char *, __vlc_fix_readdir_charset, ( vlc_object_t *, const char * ) );
|
||||
#define vlc_fix_readdir_charset(a,b) __vlc_fix_readdir_charset(VLC_OBJECT(a),b)
|
||||
|
||||
|
@ -484,6 +484,7 @@ struct module_symbols_t
|
||||
char * (*convert_xml_special_chars_inner) (const char *psz_content);
|
||||
char * (*decode_encoded_URI_duplicate_inner) (const char *psz);
|
||||
void (*resolve_xml_special_chars_inner) (char *psz_value);
|
||||
char * (*FromUTF16_inner) (const uint16_t *);
|
||||
};
|
||||
# if defined (__PLUGIN__)
|
||||
# define aout_FiltersCreatePipeline (p_symbols)->aout_FiltersCreatePipeline_inner
|
||||
@ -948,6 +949,7 @@ struct module_symbols_t
|
||||
# define convert_xml_special_chars (p_symbols)->convert_xml_special_chars_inner
|
||||
# define decode_encoded_URI_duplicate (p_symbols)->decode_encoded_URI_duplicate_inner
|
||||
# define resolve_xml_special_chars (p_symbols)->resolve_xml_special_chars_inner
|
||||
# define FromUTF16 (p_symbols)->FromUTF16_inner
|
||||
# elif defined (HAVE_DYNAMIC_PLUGINS) && !defined (__BUILTIN__)
|
||||
/******************************************************************
|
||||
* STORE_SYMBOLS: store VLC APIs into p_symbols for plugin access.
|
||||
@ -1415,6 +1417,7 @@ struct module_symbols_t
|
||||
((p_symbols)->convert_xml_special_chars_inner) = convert_xml_special_chars; \
|
||||
((p_symbols)->decode_encoded_URI_duplicate_inner) = decode_encoded_URI_duplicate; \
|
||||
((p_symbols)->resolve_xml_special_chars_inner) = resolve_xml_special_chars; \
|
||||
((p_symbols)->FromUTF16_inner) = FromUTF16; \
|
||||
(p_symbols)->net_ConvertIPv4_deprecated = NULL; \
|
||||
(p_symbols)->__stats_CounterGet_deprecated = NULL; \
|
||||
(p_symbols)->__stats_TimerDumpAll_deprecated = NULL; \
|
||||
|
@ -1,11 +1,14 @@
|
||||
/*****************************************************************************
|
||||
* unicode.c: UTF8 <-> locale functions
|
||||
* unicode.c: Unicode <-> locale functions
|
||||
*****************************************************************************
|
||||
* Copyright (C) 2005-2006 the VideoLAN team
|
||||
* $Id$
|
||||
*
|
||||
* Authors: Rémi Denis-Courmont <rem # videolan.org>
|
||||
*
|
||||
* UTF16toUTF8() adapted from Perl 5 (also GPL'd)
|
||||
* Copyright (C) 1998-2002, Larry Wall
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
@ -655,9 +658,10 @@ error:
|
||||
}
|
||||
|
||||
/**
|
||||
* UTF32toUTF8(): converts an array from UTF-32 to UTF-8.
|
||||
* UTF32toUTF8(): converts an array from UTF-32 (host byte order)
|
||||
* to UTF-8.
|
||||
*
|
||||
* @param src the UTF32 table to be converted
|
||||
* @param src the UTF-32 table to be converted
|
||||
* @param len the number of code points to be converted from src
|
||||
* (ie. the number of uint32_t in the table pointed to by src)
|
||||
* @param newlen an optional pointer. If not NULL, *newlen will
|
||||
@ -666,7 +670,8 @@ error:
|
||||
* @return the result of the conversion (must be free'd())
|
||||
* or NULL on error (in that case, *newlen is undefined).
|
||||
*/
|
||||
char *UTF32toUTF8( const uint32_t *src, size_t len, size_t *newlen )
|
||||
static char *
|
||||
UTF32toUTF8( const uint32_t *src, size_t len, size_t *newlen )
|
||||
{
|
||||
char *res, *out;
|
||||
|
||||
@ -725,17 +730,111 @@ char *UTF32toUTF8( const uint32_t *src, size_t len, size_t *newlen )
|
||||
/**
|
||||
* FromUTF32(): converts an UTF-32 string to UTF-8.
|
||||
*
|
||||
* @param src UTF-32 bytes sequence, aligned on a 32-bits boundary.
|
||||
*
|
||||
* @return the result of the conversion (must be free()'d),
|
||||
* or NULL in case of error.
|
||||
*/
|
||||
char *FromUTF32( const uint32_t *src )
|
||||
{
|
||||
size_t len;
|
||||
const uint32_t *in;
|
||||
size_t len;
|
||||
|
||||
/* determine the size of the string */
|
||||
for( len = 1, in = src; GetWBE( in ); len++ )
|
||||
for( len = 1, in = src; *in; len++ )
|
||||
in++;
|
||||
|
||||
return UTF32toUTF8( src, len, NULL );
|
||||
}
|
||||
|
||||
/**
|
||||
* UTF16toUTF8: converts UTF-16 (host byte order) to UTF-8
|
||||
*
|
||||
* @param src UTF-16 bytes sequence, aligned on a 16-bits boundary
|
||||
* @param len number of uint16_t to convert
|
||||
*/
|
||||
static char *
|
||||
UTF16toUTF8( const uint16_t *in, size_t len, size_t *newlen )
|
||||
{
|
||||
char *res, *out;
|
||||
|
||||
/* allocate memory */
|
||||
out = res = (char *)malloc( 3 * len );
|
||||
if( res == NULL )
|
||||
return NULL;
|
||||
|
||||
while( len > 0 )
|
||||
{
|
||||
uint32_t uv = *in;
|
||||
|
||||
in++;
|
||||
len--;
|
||||
|
||||
if( uv < 0x80 )
|
||||
{
|
||||
*out++ = uv;
|
||||
continue;
|
||||
}
|
||||
if( uv < 0x800 )
|
||||
{
|
||||
*out++ = (( uv >> 6) | 0xc0);
|
||||
*out++ = (( uv & 0x3f) | 0x80);
|
||||
continue;
|
||||
}
|
||||
if( (uv >= 0xd800) && (uv < 0xdbff) )
|
||||
{ /* surrogates */
|
||||
uint16_t low = GetWBE( in );
|
||||
in++;
|
||||
len--;
|
||||
|
||||
if( (low < 0xdc00) || (low >= 0xdfff) )
|
||||
{
|
||||
*out++ = '?'; /* Malformed surrogate */
|
||||
continue;
|
||||
}
|
||||
else
|
||||
uv = ((uv - 0xd800) << 10) + (low - 0xdc00) + 0x10000;
|
||||
}
|
||||
if( uv < 0x10000 )
|
||||
{
|
||||
*out++ = (( uv >> 12) | 0xe0);
|
||||
*out++ = (((uv >> 6) & 0x3f) | 0x80);
|
||||
*out++ = (( uv & 0x3f) | 0x80);
|
||||
continue;
|
||||
}
|
||||
else
|
||||
{
|
||||
*out++ = (( uv >> 18) | 0xf0);
|
||||
*out++ = (((uv >> 12) & 0x3f) | 0x80);
|
||||
*out++ = (((uv >> 6) & 0x3f) | 0x80);
|
||||
*out++ = (( uv & 0x3f) | 0x80);
|
||||
continue;
|
||||
}
|
||||
}
|
||||
len = out - res;
|
||||
res = realloc( res, len );
|
||||
if( newlen != NULL )
|
||||
*newlen = len;
|
||||
return res;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* FromUTF16(): converts an UTF-16 string to UTF-8.
|
||||
*
|
||||
* @param src UTF-16 bytes sequence, aligned on a 16-bits boundary.
|
||||
*
|
||||
* @return the result of the conversion (must be free()'d),
|
||||
* or NULL in case of error.
|
||||
*/
|
||||
char *FromUTF16( const uint16_t *src )
|
||||
{
|
||||
const uint16_t *in;
|
||||
size_t len;
|
||||
|
||||
/* determine the size of the string */
|
||||
for( len = 1, in = src; *in; len++ )
|
||||
in += 2;
|
||||
|
||||
return UTF16toUTF8( src, len, NULL );
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user