- FromWide(): converts a wchar_t * to UTF-8 char *

(if local charset is UTF-8, this is similar to wcstombs())
- FromUTF16(): converts a host-order UTF-16 byte sequence to UTF-8
This commit is contained in:
Rémi Denis-Courmont 2006-03-21 16:42:34 +00:00
parent e65878a6ef
commit 647cc79956
3 changed files with 118 additions and 6 deletions

View File

@ -47,6 +47,16 @@ int utf8_fprintf( FILE *, const char *, ... );
VLC_EXPORT( char *, EnsureUTF8, ( char * ) );
VLC_EXPORT( char *, FromUTF32, ( const uint32_t * ) );
VLC_EXPORT( char *, FromUTF16, ( const uint16_t * ) );
static inline char *FromWide( const wchar_t *in )
{
return (sizeof( wchar_t ) == 2)
? FromUTF16( (const uint16_t *)in )
: FromUTF32( (const uint32_t *)in );
}
VLC_EXPORT( char *, __vlc_fix_readdir_charset, ( vlc_object_t *, const char * ) );
#define vlc_fix_readdir_charset(a,b) __vlc_fix_readdir_charset(VLC_OBJECT(a),b)

View File

@ -484,6 +484,7 @@ struct module_symbols_t
char * (*convert_xml_special_chars_inner) (const char *psz_content);
char * (*decode_encoded_URI_duplicate_inner) (const char *psz);
void (*resolve_xml_special_chars_inner) (char *psz_value);
char * (*FromUTF16_inner) (const uint16_t *);
};
# if defined (__PLUGIN__)
# define aout_FiltersCreatePipeline (p_symbols)->aout_FiltersCreatePipeline_inner
@ -948,6 +949,7 @@ struct module_symbols_t
# define convert_xml_special_chars (p_symbols)->convert_xml_special_chars_inner
# define decode_encoded_URI_duplicate (p_symbols)->decode_encoded_URI_duplicate_inner
# define resolve_xml_special_chars (p_symbols)->resolve_xml_special_chars_inner
# define FromUTF16 (p_symbols)->FromUTF16_inner
# elif defined (HAVE_DYNAMIC_PLUGINS) && !defined (__BUILTIN__)
/******************************************************************
* STORE_SYMBOLS: store VLC APIs into p_symbols for plugin access.
@ -1415,6 +1417,7 @@ struct module_symbols_t
((p_symbols)->convert_xml_special_chars_inner) = convert_xml_special_chars; \
((p_symbols)->decode_encoded_URI_duplicate_inner) = decode_encoded_URI_duplicate; \
((p_symbols)->resolve_xml_special_chars_inner) = resolve_xml_special_chars; \
((p_symbols)->FromUTF16_inner) = FromUTF16; \
(p_symbols)->net_ConvertIPv4_deprecated = NULL; \
(p_symbols)->__stats_CounterGet_deprecated = NULL; \
(p_symbols)->__stats_TimerDumpAll_deprecated = NULL; \

View File

@ -1,11 +1,14 @@
/*****************************************************************************
* unicode.c: UTF8 <-> locale functions
* unicode.c: Unicode <-> locale functions
*****************************************************************************
* Copyright (C) 2005-2006 the VideoLAN team
* $Id$
*
* Authors: Rémi Denis-Courmont <rem # videolan.org>
*
* UTF16toUTF8() adapted from Perl 5 (also GPL'd)
* Copyright (C) 1998-2002, Larry Wall
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
@ -655,9 +658,10 @@ error:
}
/**
* UTF32toUTF8(): converts an array from UTF-32 to UTF-8.
* UTF32toUTF8(): converts an array from UTF-32 (host byte order)
* to UTF-8.
*
* @param src the UTF32 table to be converted
* @param src the UTF-32 table to be converted
* @param len the number of code points to be converted from src
* (ie. the number of uint32_t in the table pointed to by src)
* @param newlen an optional pointer. If not NULL, *newlen will
@ -666,7 +670,8 @@ error:
* @return the result of the conversion (must be free'd())
* or NULL on error (in that case, *newlen is undefined).
*/
char *UTF32toUTF8( const uint32_t *src, size_t len, size_t *newlen )
static char *
UTF32toUTF8( const uint32_t *src, size_t len, size_t *newlen )
{
char *res, *out;
@ -725,17 +730,111 @@ char *UTF32toUTF8( const uint32_t *src, size_t len, size_t *newlen )
/**
* FromUTF32(): converts an UTF-32 string to UTF-8.
*
* @param src UTF-32 bytes sequence, aligned on a 32-bits boundary.
*
* @return the result of the conversion (must be free()'d),
* or NULL in case of error.
*/
char *FromUTF32( const uint32_t *src )
{
size_t len;
const uint32_t *in;
size_t len;
/* determine the size of the string */
for( len = 1, in = src; GetWBE( in ); len++ )
for( len = 1, in = src; *in; len++ )
in++;
return UTF32toUTF8( src, len, NULL );
}
/**
* UTF16toUTF8: converts UTF-16 (host byte order) to UTF-8
*
* @param src UTF-16 bytes sequence, aligned on a 16-bits boundary
* @param len number of uint16_t to convert
*/
static char *
UTF16toUTF8( const uint16_t *in, size_t len, size_t *newlen )
{
char *res, *out;
/* allocate memory */
out = res = (char *)malloc( 3 * len );
if( res == NULL )
return NULL;
while( len > 0 )
{
uint32_t uv = *in;
in++;
len--;
if( uv < 0x80 )
{
*out++ = uv;
continue;
}
if( uv < 0x800 )
{
*out++ = (( uv >> 6) | 0xc0);
*out++ = (( uv & 0x3f) | 0x80);
continue;
}
if( (uv >= 0xd800) && (uv < 0xdbff) )
{ /* surrogates */
uint16_t low = GetWBE( in );
in++;
len--;
if( (low < 0xdc00) || (low >= 0xdfff) )
{
*out++ = '?'; /* Malformed surrogate */
continue;
}
else
uv = ((uv - 0xd800) << 10) + (low - 0xdc00) + 0x10000;
}
if( uv < 0x10000 )
{
*out++ = (( uv >> 12) | 0xe0);
*out++ = (((uv >> 6) & 0x3f) | 0x80);
*out++ = (( uv & 0x3f) | 0x80);
continue;
}
else
{
*out++ = (( uv >> 18) | 0xf0);
*out++ = (((uv >> 12) & 0x3f) | 0x80);
*out++ = (((uv >> 6) & 0x3f) | 0x80);
*out++ = (( uv & 0x3f) | 0x80);
continue;
}
}
len = out - res;
res = realloc( res, len );
if( newlen != NULL )
*newlen = len;
return res;
}
/**
* FromUTF16(): converts an UTF-16 string to UTF-8.
*
* @param src UTF-16 bytes sequence, aligned on a 16-bits boundary.
*
* @return the result of the conversion (must be free()'d),
* or NULL in case of error.
*/
char *FromUTF16( const uint16_t *src )
{
const uint16_t *in;
size_t len;
/* determine the size of the string */
for( len = 1, in = src; *in; len++ )
in += 2;
return UTF16toUTF8( src, len, NULL );
}