Added SDL_StepBackUTF8()

This commit is contained in:
Sam Lantinga 2024-10-09 09:43:23 -07:00
parent 1f08a03794
commit f8eac30276
5 changed files with 54 additions and 1 deletions

View File

@ -2472,13 +2472,14 @@ extern SDL_DECLSPEC char * SDLCALL SDL_strpbrk(const char *str, const char *brea
/**
* The Unicode REPLACEMENT CHARACTER codepoint.
*
* SDL_StepUTF8() reports this codepoint when it encounters a UTF-8 string
* SDL_StepUTF8() and SDL_StepBackUTF8() report this codepoint when they encounter a UTF-8 string
* with encoding errors.
*
* This tends to render as something like a question mark in most places.
*
* \since This macro is available since SDL 3.0.0.
*
* \sa SDL_StepBackUTF8
* \sa SDL_StepUTF8
*/
#define SDL_INVALID_UNICODE_CODEPOINT 0xFFFD
@ -2528,6 +2529,35 @@ extern SDL_DECLSPEC char * SDLCALL SDL_strpbrk(const char *str, const char *brea
*/
extern SDL_DECLSPEC Uint32 SDLCALL SDL_StepUTF8(const char **pstr, size_t *pslen);
/**
* Decode a UTF-8 string in reverse, one Unicode codepoint at a time.
*
* This will go to the start of the previous Unicode codepoint in the string, move `*pstr` to that location and return that codepoint.
*
* If the resulting codepoint is zero (already at the start of the string), it will not advance `*pstr` at all.
*
* Generally this function is called in a loop until it returns zero,
* adjusting its parameter each iteration.
*
* If an invalid UTF-8 sequence is encountered, this function returns
* SDL_INVALID_UNICODE_CODEPOINT.
*
* Several things can generate invalid UTF-8 sequences, including overlong
* encodings, the use of UTF-16 surrogate values, and truncated data. Please
* refer to
* [RFC3629](https://www.ietf.org/rfc/rfc3629.txt)
* for details.
*
* \param start a pointer to the beginning of the UTF-8 string.
* \param pstr a pointer to a UTF-8 string pointer to be read and adjusted.
* \returns the previous Unicode codepoint in the string.
*
* \threadsafety It is safe to call this function from any thread.
*
* \since This function is available since SDL 3.0.0.
*/
extern SDL_DECLSPEC Uint32 SDLCALL SDL_StepBackUTF8(const char *start, const char **pstr);
/**
* Convert a single Unicode codepoint to UTF-8.
*

View File

@ -1176,6 +1176,7 @@ SDL3_0.0.0 {
SDL_wcsnstr;
SDL_wcsstr;
SDL_wcstol;
SDL_StepBackUTF8;
# extra symbols go here (don't modify this line)
local: *;
};

View File

@ -1201,3 +1201,4 @@
#define SDL_wcsnstr SDL_wcsnstr_REAL
#define SDL_wcsstr SDL_wcsstr_REAL
#define SDL_wcstol SDL_wcstol_REAL
#define SDL_StepBackUTF8 SDL_StepBackUTF8_REAL

View File

@ -1207,3 +1207,4 @@ SDL_DYNAPI_PROC(size_t,SDL_wcsnlen,(const wchar_t *a, size_t b),(a,b),return)
SDL_DYNAPI_PROC(wchar_t*,SDL_wcsnstr,(const wchar_t *a, const wchar_t *b, size_t c),(a,b,c),return)
SDL_DYNAPI_PROC(wchar_t*,SDL_wcsstr,(const wchar_t *a, const wchar_t *b),(a,b),return)
SDL_DYNAPI_PROC(long,SDL_wcstol,(const wchar_t *a, wchar_t **b, int c),(a,b,c),return)
SDL_DYNAPI_PROC(Uint32,SDL_StepBackUTF8,(const char *a, const char **b),(a,b),return)

View File

@ -265,6 +265,26 @@ Uint32 SDL_StepUTF8(const char **pstr, size_t *pslen)
return result;
}
Uint32 SDL_StepBackUTF8(const char *start, const char **pstr)
{
if (!pstr || *pstr <= start) {
return 0;
}
// Step back over the previous UTF-8 character
const char *str = *pstr;
do {
if (str == start) {
break;
}
--str;
} while ((*str & 0xC0) == 0x80);
size_t length = (*pstr - str);
*pstr = str;
return StepUTF8(&str, length);
}
#if (SDL_SIZEOF_WCHAR_T == 2)
static Uint32 StepUTF16(const Uint16 **_str, const size_t slen)
{