mirror of
https://github.com/videolan/vlc.git
synced 2025-01-27 10:06:08 +08:00
Support URL-escape non-ASCII characters properly as far as possible
(ie. all characters from the Basic Multilingual Plane from Unicode)
This commit is contained in:
parent
798b0d97c4
commit
168549fa94
@ -1,7 +1,7 @@
|
||||
/*****************************************************************************
|
||||
* vlc_url.h: URL related macros
|
||||
*****************************************************************************
|
||||
* Copyright (C) 2002-2005 the VideoLAN team
|
||||
* Copyright (C) 2002-2006 the VideoLAN team
|
||||
* $Id$
|
||||
*
|
||||
* Authors: Christophe Massiot <massiot@via.ecp.fr>
|
||||
@ -174,6 +174,7 @@ static inline int isurlsafe( int c )
|
||||
return ( (unsigned char)( c - 'a' ) < 26 )
|
||||
|| ( (unsigned char)( c - 'A' ) < 26 )
|
||||
|| ( (unsigned char)( c - '0' ) < 10 )
|
||||
|| ( (unsigned char)( c ) > 127 )
|
||||
/* Hmm, we should not encode character that are allowed in URLs
|
||||
* (even if they are not URL-safe), nor URL-safe characters.
|
||||
* We still encode some of them because of Microsoft's crap browser.
|
||||
@ -181,6 +182,11 @@ static inline int isurlsafe( int c )
|
||||
|| ( strchr( "-_.", c ) != NULL );
|
||||
}
|
||||
|
||||
static inline char url_hexchar( int c )
|
||||
{
|
||||
return ( c < 10 ) ? c + '0' : c + 'A' - 10;
|
||||
}
|
||||
|
||||
/*****************************************************************************
|
||||
* vlc_UrlEncode:
|
||||
*****************************************************************************
|
||||
@ -189,14 +195,9 @@ static inline int isurlsafe( int c )
|
||||
*****************************************************************************/
|
||||
static inline char *vlc_UrlEncode( const char *psz_url )
|
||||
{
|
||||
char *psz_enc, *out;
|
||||
char psz_enc[3 * strlen( psz_url ) + 1], *out = psz_enc;
|
||||
const unsigned char *in;
|
||||
|
||||
psz_enc = (char *)malloc( 3 * strlen( psz_url ) + 1 );
|
||||
if( psz_enc == NULL )
|
||||
return NULL;
|
||||
|
||||
out = psz_enc;
|
||||
for( in = (const unsigned char *)psz_url; *in; in++ )
|
||||
{
|
||||
unsigned char c = *in;
|
||||
@ -205,16 +206,48 @@ static inline char *vlc_UrlEncode( const char *psz_url )
|
||||
*out++ = (char)c;
|
||||
else
|
||||
{
|
||||
uint16_t cp;
|
||||
|
||||
*out++ = '%';
|
||||
*out++ = ( ( c >> 4 ) >= 0xA ) ? 'A' + ( c >> 4 ) - 0xA
|
||||
: '0' + ( c >> 4 );
|
||||
*out++ = ( ( c & 0xf ) >= 0xA ) ? 'A' + ( c & 0xf ) - 0xA
|
||||
: '0' + ( c & 0xf );
|
||||
/* UTF-8 to UCS-2 conversion */
|
||||
if( ( c & 0x7f ) == 0 )
|
||||
cp = c;
|
||||
else
|
||||
if( ( c & 0xe0 ) == 0xc0 )
|
||||
{
|
||||
cp = ((c & 0x1f) << 6) | (in[1] & 0x3f);
|
||||
in++;
|
||||
}
|
||||
else
|
||||
if( ( c & 0xf0 ) == 0xe0 )
|
||||
{
|
||||
cp = ((c & 0xf) << 12) | ((in[1] & 0x3f) << 6) | (in[2] & 0x3f);
|
||||
in += 2;
|
||||
}
|
||||
else
|
||||
/* cannot URL-encode code points outside the BMP */
|
||||
return NULL;
|
||||
|
||||
if( cp < 0xff )
|
||||
{
|
||||
/* Encode ISO-8859-1 characters */
|
||||
*out++ = url_hexchar( cp >> 4 );
|
||||
*out++ = url_hexchar( cp & 0xf );
|
||||
}
|
||||
else
|
||||
{
|
||||
/* Encode non-Latin-1 characters */
|
||||
*out++ = 'u';
|
||||
*out++ = url_hexchar( cp >> 12 );
|
||||
*out++ = url_hexchar((cp >> 8) & 0xf );
|
||||
*out++ = url_hexchar((cp >> 4) & 0xf );
|
||||
*out++ = url_hexchar( cp & 0xf );
|
||||
}
|
||||
}
|
||||
}
|
||||
*out++ = '\0';
|
||||
|
||||
return (char *)realloc( psz_enc, out - psz_enc );
|
||||
return strdup( psz_enc );
|
||||
}
|
||||
|
||||
/*****************************************************************************
|
||||
|
Loading…
Reference in New Issue
Block a user