2007-10-16 14:31:30 +08:00
|
|
|
#include "Python.h"
|
2020-04-08 08:01:56 +08:00
|
|
|
#include "pycore_abstract.h" // _PyIndex_Check()
|
2020-02-13 05:32:34 +08:00
|
|
|
#include "pycore_bytes_methods.h"
|
2007-10-16 14:31:30 +08:00
|
|
|
|
|
|
|
PyDoc_STRVAR_shared(_Py_isspace__doc__,
|
|
|
|
"B.isspace() -> bool\n\
|
|
|
|
\n\
|
|
|
|
Return True if all characters in B are whitespace\n\
|
|
|
|
and there is at least one character in B, False otherwise.");
|
|
|
|
|
|
|
|
PyObject*
|
|
|
|
_Py_bytes_isspace(const char *cptr, Py_ssize_t len)
|
|
|
|
{
|
2013-08-14 02:18:52 +08:00
|
|
|
const unsigned char *p
|
closes bpo-39605: Fix some casts to not cast away const. (GH-18453)
gcc -Wcast-qual turns up a number of instances of casting away constness of pointers. Some of these can be safely modified, by either:
Adding the const to the type cast, as in:
- return _PyUnicode_FromUCS1((unsigned char*)s, size);
+ return _PyUnicode_FromUCS1((const unsigned char*)s, size);
or, Removing the cast entirely, because it's not necessary (but probably was at one time), as in:
- PyDTrace_FUNCTION_ENTRY((char *)filename, (char *)funcname, lineno);
+ PyDTrace_FUNCTION_ENTRY(filename, funcname, lineno);
These changes will not change code, but they will make it much easier to check for errors in consts
2020-02-12 10:28:35 +08:00
|
|
|
= (const unsigned char *) cptr;
|
2013-08-14 02:18:52 +08:00
|
|
|
const unsigned char *e;
|
2007-10-16 14:31:30 +08:00
|
|
|
|
|
|
|
/* Shortcut for single character strings */
|
2009-04-28 04:39:49 +08:00
|
|
|
if (len == 1 && Py_ISSPACE(*p))
|
2007-10-16 14:31:30 +08:00
|
|
|
Py_RETURN_TRUE;
|
|
|
|
|
|
|
|
/* Special case for empty strings */
|
|
|
|
if (len == 0)
|
|
|
|
Py_RETURN_FALSE;
|
|
|
|
|
|
|
|
e = p + len;
|
|
|
|
for (; p < e; p++) {
|
2010-05-09 23:52:27 +08:00
|
|
|
if (!Py_ISSPACE(*p))
|
2007-10-16 14:31:30 +08:00
|
|
|
Py_RETURN_FALSE;
|
|
|
|
}
|
|
|
|
Py_RETURN_TRUE;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
PyDoc_STRVAR_shared(_Py_isalpha__doc__,
|
|
|
|
"B.isalpha() -> bool\n\
|
|
|
|
\n\
|
|
|
|
Return True if all characters in B are alphabetic\n\
|
|
|
|
and there is at least one character in B, False otherwise.");
|
|
|
|
|
|
|
|
PyObject*
|
|
|
|
_Py_bytes_isalpha(const char *cptr, Py_ssize_t len)
|
|
|
|
{
|
2013-08-14 02:18:52 +08:00
|
|
|
const unsigned char *p
|
closes bpo-39605: Fix some casts to not cast away const. (GH-18453)
gcc -Wcast-qual turns up a number of instances of casting away constness of pointers. Some of these can be safely modified, by either:
Adding the const to the type cast, as in:
- return _PyUnicode_FromUCS1((unsigned char*)s, size);
+ return _PyUnicode_FromUCS1((const unsigned char*)s, size);
or, Removing the cast entirely, because it's not necessary (but probably was at one time), as in:
- PyDTrace_FUNCTION_ENTRY((char *)filename, (char *)funcname, lineno);
+ PyDTrace_FUNCTION_ENTRY(filename, funcname, lineno);
These changes will not change code, but they will make it much easier to check for errors in consts
2020-02-12 10:28:35 +08:00
|
|
|
= (const unsigned char *) cptr;
|
2013-08-14 02:18:52 +08:00
|
|
|
const unsigned char *e;
|
2007-10-16 14:31:30 +08:00
|
|
|
|
|
|
|
/* Shortcut for single character strings */
|
2009-04-28 04:39:49 +08:00
|
|
|
if (len == 1 && Py_ISALPHA(*p))
|
2010-05-09 23:52:27 +08:00
|
|
|
Py_RETURN_TRUE;
|
2007-10-16 14:31:30 +08:00
|
|
|
|
|
|
|
/* Special case for empty strings */
|
|
|
|
if (len == 0)
|
2010-05-09 23:52:27 +08:00
|
|
|
Py_RETURN_FALSE;
|
2007-10-16 14:31:30 +08:00
|
|
|
|
|
|
|
e = p + len;
|
|
|
|
for (; p < e; p++) {
|
2010-05-09 23:52:27 +08:00
|
|
|
if (!Py_ISALPHA(*p))
|
|
|
|
Py_RETURN_FALSE;
|
2007-10-16 14:31:30 +08:00
|
|
|
}
|
|
|
|
Py_RETURN_TRUE;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
PyDoc_STRVAR_shared(_Py_isalnum__doc__,
|
|
|
|
"B.isalnum() -> bool\n\
|
|
|
|
\n\
|
|
|
|
Return True if all characters in B are alphanumeric\n\
|
|
|
|
and there is at least one character in B, False otherwise.");
|
|
|
|
|
|
|
|
PyObject*
|
|
|
|
_Py_bytes_isalnum(const char *cptr, Py_ssize_t len)
|
|
|
|
{
|
2013-08-14 02:18:52 +08:00
|
|
|
const unsigned char *p
|
closes bpo-39605: Fix some casts to not cast away const. (GH-18453)
gcc -Wcast-qual turns up a number of instances of casting away constness of pointers. Some of these can be safely modified, by either:
Adding the const to the type cast, as in:
- return _PyUnicode_FromUCS1((unsigned char*)s, size);
+ return _PyUnicode_FromUCS1((const unsigned char*)s, size);
or, Removing the cast entirely, because it's not necessary (but probably was at one time), as in:
- PyDTrace_FUNCTION_ENTRY((char *)filename, (char *)funcname, lineno);
+ PyDTrace_FUNCTION_ENTRY(filename, funcname, lineno);
These changes will not change code, but they will make it much easier to check for errors in consts
2020-02-12 10:28:35 +08:00
|
|
|
= (const unsigned char *) cptr;
|
2013-08-14 02:18:52 +08:00
|
|
|
const unsigned char *e;
|
2007-10-16 14:31:30 +08:00
|
|
|
|
|
|
|
/* Shortcut for single character strings */
|
2009-04-28 04:39:49 +08:00
|
|
|
if (len == 1 && Py_ISALNUM(*p))
|
2010-05-09 23:52:27 +08:00
|
|
|
Py_RETURN_TRUE;
|
2007-10-16 14:31:30 +08:00
|
|
|
|
|
|
|
/* Special case for empty strings */
|
|
|
|
if (len == 0)
|
2010-05-09 23:52:27 +08:00
|
|
|
Py_RETURN_FALSE;
|
2007-10-16 14:31:30 +08:00
|
|
|
|
|
|
|
e = p + len;
|
|
|
|
for (; p < e; p++) {
|
2010-05-09 23:52:27 +08:00
|
|
|
if (!Py_ISALNUM(*p))
|
|
|
|
Py_RETURN_FALSE;
|
2007-10-16 14:31:30 +08:00
|
|
|
}
|
|
|
|
Py_RETURN_TRUE;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2018-01-27 13:06:21 +08:00
|
|
|
PyDoc_STRVAR_shared(_Py_isascii__doc__,
|
|
|
|
"B.isascii() -> bool\n\
|
|
|
|
\n\
|
|
|
|
Return True if B is empty or all characters in B are ASCII,\n\
|
|
|
|
False otherwise.");
|
|
|
|
|
2018-01-28 08:59:12 +08:00
|
|
|
// Optimization is copied from ascii_decode in unicodeobject.c
|
2020-10-18 22:48:38 +08:00
|
|
|
/* Mask to quickly check whether a C 'size_t' contains a
|
2018-01-28 08:59:12 +08:00
|
|
|
non-ASCII, UTF8-encoded char. */
|
2020-10-18 22:48:38 +08:00
|
|
|
#if (SIZEOF_SIZE_T == 8)
|
|
|
|
# define ASCII_CHAR_MASK 0x8080808080808080ULL
|
|
|
|
#elif (SIZEOF_SIZE_T == 4)
|
|
|
|
# define ASCII_CHAR_MASK 0x80808080U
|
2018-01-28 08:59:12 +08:00
|
|
|
#else
|
2020-10-18 22:48:38 +08:00
|
|
|
# error C 'size_t' size should be either 4 or 8!
|
2018-01-28 08:59:12 +08:00
|
|
|
#endif
|
|
|
|
|
2018-01-27 13:06:21 +08:00
|
|
|
PyObject*
|
|
|
|
_Py_bytes_isascii(const char *cptr, Py_ssize_t len)
|
|
|
|
{
|
2018-01-28 08:59:12 +08:00
|
|
|
const char *p = cptr;
|
|
|
|
const char *end = p + len;
|
|
|
|
|
|
|
|
while (p < end) {
|
|
|
|
/* Fast path, see in STRINGLIB(utf8_decode) in stringlib/codecs.h
|
|
|
|
for an explanation. */
|
2021-03-31 18:12:39 +08:00
|
|
|
if (_Py_IS_ALIGNED(p, ALIGNOF_SIZE_T)) {
|
2018-01-28 08:59:12 +08:00
|
|
|
/* Help allocation */
|
|
|
|
const char *_p = p;
|
2021-03-31 18:12:39 +08:00
|
|
|
while (_p + SIZEOF_SIZE_T <= end) {
|
2020-10-18 22:48:38 +08:00
|
|
|
size_t value = *(const size_t *) _p;
|
2018-01-28 08:59:12 +08:00
|
|
|
if (value & ASCII_CHAR_MASK) {
|
|
|
|
Py_RETURN_FALSE;
|
|
|
|
}
|
2020-10-18 22:48:38 +08:00
|
|
|
_p += SIZEOF_SIZE_T;
|
2018-01-28 08:59:12 +08:00
|
|
|
}
|
|
|
|
p = _p;
|
|
|
|
if (_p == end)
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
if ((unsigned char)*p & 0x80) {
|
2018-01-27 13:06:21 +08:00
|
|
|
Py_RETURN_FALSE;
|
|
|
|
}
|
2018-01-28 08:59:12 +08:00
|
|
|
p++;
|
2018-01-27 13:06:21 +08:00
|
|
|
}
|
|
|
|
Py_RETURN_TRUE;
|
|
|
|
}
|
|
|
|
|
2018-01-28 08:59:12 +08:00
|
|
|
#undef ASCII_CHAR_MASK
|
|
|
|
|
2018-01-27 13:06:21 +08:00
|
|
|
|
2007-10-16 14:31:30 +08:00
|
|
|
PyDoc_STRVAR_shared(_Py_isdigit__doc__,
|
|
|
|
"B.isdigit() -> bool\n\
|
|
|
|
\n\
|
|
|
|
Return True if all characters in B are digits\n\
|
|
|
|
and there is at least one character in B, False otherwise.");
|
|
|
|
|
|
|
|
PyObject*
|
|
|
|
_Py_bytes_isdigit(const char *cptr, Py_ssize_t len)
|
|
|
|
{
|
2013-08-14 02:18:52 +08:00
|
|
|
const unsigned char *p
|
closes bpo-39605: Fix some casts to not cast away const. (GH-18453)
gcc -Wcast-qual turns up a number of instances of casting away constness of pointers. Some of these can be safely modified, by either:
Adding the const to the type cast, as in:
- return _PyUnicode_FromUCS1((unsigned char*)s, size);
+ return _PyUnicode_FromUCS1((const unsigned char*)s, size);
or, Removing the cast entirely, because it's not necessary (but probably was at one time), as in:
- PyDTrace_FUNCTION_ENTRY((char *)filename, (char *)funcname, lineno);
+ PyDTrace_FUNCTION_ENTRY(filename, funcname, lineno);
These changes will not change code, but they will make it much easier to check for errors in consts
2020-02-12 10:28:35 +08:00
|
|
|
= (const unsigned char *) cptr;
|
2013-08-14 02:18:52 +08:00
|
|
|
const unsigned char *e;
|
2007-10-16 14:31:30 +08:00
|
|
|
|
|
|
|
/* Shortcut for single character strings */
|
2009-04-28 04:39:49 +08:00
|
|
|
if (len == 1 && Py_ISDIGIT(*p))
|
2010-05-09 23:52:27 +08:00
|
|
|
Py_RETURN_TRUE;
|
2007-10-16 14:31:30 +08:00
|
|
|
|
|
|
|
/* Special case for empty strings */
|
|
|
|
if (len == 0)
|
2010-05-09 23:52:27 +08:00
|
|
|
Py_RETURN_FALSE;
|
2007-10-16 14:31:30 +08:00
|
|
|
|
|
|
|
e = p + len;
|
|
|
|
for (; p < e; p++) {
|
2010-05-09 23:52:27 +08:00
|
|
|
if (!Py_ISDIGIT(*p))
|
|
|
|
Py_RETURN_FALSE;
|
2007-10-16 14:31:30 +08:00
|
|
|
}
|
|
|
|
Py_RETURN_TRUE;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
PyDoc_STRVAR_shared(_Py_islower__doc__,
|
|
|
|
"B.islower() -> bool\n\
|
|
|
|
\n\
|
|
|
|
Return True if all cased characters in B are lowercase and there is\n\
|
|
|
|
at least one cased character in B, False otherwise.");
|
|
|
|
|
|
|
|
PyObject*
|
|
|
|
_Py_bytes_islower(const char *cptr, Py_ssize_t len)
|
|
|
|
{
|
2013-08-14 02:18:52 +08:00
|
|
|
const unsigned char *p
|
closes bpo-39605: Fix some casts to not cast away const. (GH-18453)
gcc -Wcast-qual turns up a number of instances of casting away constness of pointers. Some of these can be safely modified, by either:
Adding the const to the type cast, as in:
- return _PyUnicode_FromUCS1((unsigned char*)s, size);
+ return _PyUnicode_FromUCS1((const unsigned char*)s, size);
or, Removing the cast entirely, because it's not necessary (but probably was at one time), as in:
- PyDTrace_FUNCTION_ENTRY((char *)filename, (char *)funcname, lineno);
+ PyDTrace_FUNCTION_ENTRY(filename, funcname, lineno);
These changes will not change code, but they will make it much easier to check for errors in consts
2020-02-12 10:28:35 +08:00
|
|
|
= (const unsigned char *) cptr;
|
2013-08-14 02:18:52 +08:00
|
|
|
const unsigned char *e;
|
2007-10-16 14:31:30 +08:00
|
|
|
int cased;
|
|
|
|
|
|
|
|
/* Shortcut for single character strings */
|
|
|
|
if (len == 1)
|
2010-05-09 23:52:27 +08:00
|
|
|
return PyBool_FromLong(Py_ISLOWER(*p));
|
2007-10-16 14:31:30 +08:00
|
|
|
|
|
|
|
/* Special case for empty strings */
|
|
|
|
if (len == 0)
|
2010-05-09 23:52:27 +08:00
|
|
|
Py_RETURN_FALSE;
|
2007-10-16 14:31:30 +08:00
|
|
|
|
|
|
|
e = p + len;
|
|
|
|
cased = 0;
|
|
|
|
for (; p < e; p++) {
|
2010-05-09 23:52:27 +08:00
|
|
|
if (Py_ISUPPER(*p))
|
|
|
|
Py_RETURN_FALSE;
|
|
|
|
else if (!cased && Py_ISLOWER(*p))
|
|
|
|
cased = 1;
|
2007-10-16 14:31:30 +08:00
|
|
|
}
|
|
|
|
return PyBool_FromLong(cased);
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
PyDoc_STRVAR_shared(_Py_isupper__doc__,
|
|
|
|
"B.isupper() -> bool\n\
|
|
|
|
\n\
|
|
|
|
Return True if all cased characters in B are uppercase and there is\n\
|
|
|
|
at least one cased character in B, False otherwise.");
|
|
|
|
|
|
|
|
PyObject*
|
|
|
|
_Py_bytes_isupper(const char *cptr, Py_ssize_t len)
|
|
|
|
{
|
2013-08-14 02:18:52 +08:00
|
|
|
const unsigned char *p
|
closes bpo-39605: Fix some casts to not cast away const. (GH-18453)
gcc -Wcast-qual turns up a number of instances of casting away constness of pointers. Some of these can be safely modified, by either:
Adding the const to the type cast, as in:
- return _PyUnicode_FromUCS1((unsigned char*)s, size);
+ return _PyUnicode_FromUCS1((const unsigned char*)s, size);
or, Removing the cast entirely, because it's not necessary (but probably was at one time), as in:
- PyDTrace_FUNCTION_ENTRY((char *)filename, (char *)funcname, lineno);
+ PyDTrace_FUNCTION_ENTRY(filename, funcname, lineno);
These changes will not change code, but they will make it much easier to check for errors in consts
2020-02-12 10:28:35 +08:00
|
|
|
= (const unsigned char *) cptr;
|
2013-08-14 02:18:52 +08:00
|
|
|
const unsigned char *e;
|
2007-10-16 14:31:30 +08:00
|
|
|
int cased;
|
|
|
|
|
|
|
|
/* Shortcut for single character strings */
|
|
|
|
if (len == 1)
|
2010-05-09 23:52:27 +08:00
|
|
|
return PyBool_FromLong(Py_ISUPPER(*p));
|
2007-10-16 14:31:30 +08:00
|
|
|
|
|
|
|
/* Special case for empty strings */
|
|
|
|
if (len == 0)
|
2010-05-09 23:52:27 +08:00
|
|
|
Py_RETURN_FALSE;
|
2007-10-16 14:31:30 +08:00
|
|
|
|
|
|
|
e = p + len;
|
|
|
|
cased = 0;
|
|
|
|
for (; p < e; p++) {
|
2010-05-09 23:52:27 +08:00
|
|
|
if (Py_ISLOWER(*p))
|
|
|
|
Py_RETURN_FALSE;
|
|
|
|
else if (!cased && Py_ISUPPER(*p))
|
|
|
|
cased = 1;
|
2007-10-16 14:31:30 +08:00
|
|
|
}
|
|
|
|
return PyBool_FromLong(cased);
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
PyDoc_STRVAR_shared(_Py_istitle__doc__,
|
|
|
|
"B.istitle() -> bool\n\
|
|
|
|
\n\
|
|
|
|
Return True if B is a titlecased string and there is at least one\n\
|
|
|
|
character in B, i.e. uppercase characters may only follow uncased\n\
|
|
|
|
characters and lowercase characters only cased ones. Return False\n\
|
|
|
|
otherwise.");
|
|
|
|
|
|
|
|
PyObject*
|
|
|
|
_Py_bytes_istitle(const char *cptr, Py_ssize_t len)
|
|
|
|
{
|
2013-08-14 02:18:52 +08:00
|
|
|
const unsigned char *p
|
closes bpo-39605: Fix some casts to not cast away const. (GH-18453)
gcc -Wcast-qual turns up a number of instances of casting away constness of pointers. Some of these can be safely modified, by either:
Adding the const to the type cast, as in:
- return _PyUnicode_FromUCS1((unsigned char*)s, size);
+ return _PyUnicode_FromUCS1((const unsigned char*)s, size);
or, Removing the cast entirely, because it's not necessary (but probably was at one time), as in:
- PyDTrace_FUNCTION_ENTRY((char *)filename, (char *)funcname, lineno);
+ PyDTrace_FUNCTION_ENTRY(filename, funcname, lineno);
These changes will not change code, but they will make it much easier to check for errors in consts
2020-02-12 10:28:35 +08:00
|
|
|
= (const unsigned char *) cptr;
|
2013-08-14 02:18:52 +08:00
|
|
|
const unsigned char *e;
|
2007-10-16 14:31:30 +08:00
|
|
|
int cased, previous_is_cased;
|
|
|
|
|
2023-04-23 03:39:37 +08:00
|
|
|
if (len == 1) {
|
|
|
|
if (Py_ISUPPER(*p)) {
|
|
|
|
Py_RETURN_TRUE;
|
|
|
|
}
|
|
|
|
Py_RETURN_FALSE;
|
|
|
|
}
|
2007-10-16 14:31:30 +08:00
|
|
|
|
|
|
|
/* Special case for empty strings */
|
|
|
|
if (len == 0)
|
2010-05-09 23:52:27 +08:00
|
|
|
Py_RETURN_FALSE;
|
2007-10-16 14:31:30 +08:00
|
|
|
|
|
|
|
e = p + len;
|
|
|
|
cased = 0;
|
|
|
|
previous_is_cased = 0;
|
|
|
|
for (; p < e; p++) {
|
2013-08-14 02:18:52 +08:00
|
|
|
const unsigned char ch = *p;
|
2010-05-09 23:52:27 +08:00
|
|
|
|
|
|
|
if (Py_ISUPPER(ch)) {
|
|
|
|
if (previous_is_cased)
|
|
|
|
Py_RETURN_FALSE;
|
|
|
|
previous_is_cased = 1;
|
|
|
|
cased = 1;
|
|
|
|
}
|
|
|
|
else if (Py_ISLOWER(ch)) {
|
|
|
|
if (!previous_is_cased)
|
|
|
|
Py_RETURN_FALSE;
|
|
|
|
previous_is_cased = 1;
|
|
|
|
cased = 1;
|
|
|
|
}
|
|
|
|
else
|
|
|
|
previous_is_cased = 0;
|
2007-10-16 14:31:30 +08:00
|
|
|
}
|
|
|
|
return PyBool_FromLong(cased);
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
PyDoc_STRVAR_shared(_Py_lower__doc__,
|
|
|
|
"B.lower() -> copy of B\n\
|
|
|
|
\n\
|
|
|
|
Return a copy of B with all ASCII characters converted to lowercase.");
|
|
|
|
|
|
|
|
void
|
|
|
|
_Py_bytes_lower(char *result, const char *cptr, Py_ssize_t len)
|
|
|
|
{
|
2010-08-16 01:38:46 +08:00
|
|
|
Py_ssize_t i;
|
2007-10-16 14:31:30 +08:00
|
|
|
|
2010-08-16 01:38:46 +08:00
|
|
|
for (i = 0; i < len; i++) {
|
2012-01-08 23:22:46 +08:00
|
|
|
result[i] = Py_TOLOWER((unsigned char) cptr[i]);
|
2010-08-16 01:38:46 +08:00
|
|
|
}
|
2007-10-16 14:31:30 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
PyDoc_STRVAR_shared(_Py_upper__doc__,
|
|
|
|
"B.upper() -> copy of B\n\
|
|
|
|
\n\
|
|
|
|
Return a copy of B with all ASCII characters converted to uppercase.");
|
|
|
|
|
|
|
|
void
|
|
|
|
_Py_bytes_upper(char *result, const char *cptr, Py_ssize_t len)
|
|
|
|
{
|
2010-08-16 01:38:46 +08:00
|
|
|
Py_ssize_t i;
|
2007-10-16 14:31:30 +08:00
|
|
|
|
2010-08-16 01:38:46 +08:00
|
|
|
for (i = 0; i < len; i++) {
|
2012-01-08 23:22:46 +08:00
|
|
|
result[i] = Py_TOUPPER((unsigned char) cptr[i]);
|
2010-08-16 01:38:46 +08:00
|
|
|
}
|
2007-10-16 14:31:30 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
PyDoc_STRVAR_shared(_Py_title__doc__,
|
|
|
|
"B.title() -> copy of B\n\
|
|
|
|
\n\
|
|
|
|
Return a titlecased version of B, i.e. ASCII words start with uppercase\n\
|
|
|
|
characters, all remaining cased characters have lowercase.");
|
|
|
|
|
|
|
|
void
|
2015-12-26 02:01:53 +08:00
|
|
|
_Py_bytes_title(char *result, const char *s, Py_ssize_t len)
|
2007-10-16 14:31:30 +08:00
|
|
|
{
|
2010-08-16 01:38:46 +08:00
|
|
|
Py_ssize_t i;
|
|
|
|
int previous_is_cased = 0;
|
|
|
|
|
|
|
|
for (i = 0; i < len; i++) {
|
|
|
|
int c = Py_CHARMASK(*s++);
|
|
|
|
if (Py_ISLOWER(c)) {
|
|
|
|
if (!previous_is_cased)
|
|
|
|
c = Py_TOUPPER(c);
|
|
|
|
previous_is_cased = 1;
|
|
|
|
} else if (Py_ISUPPER(c)) {
|
|
|
|
if (previous_is_cased)
|
|
|
|
c = Py_TOLOWER(c);
|
|
|
|
previous_is_cased = 1;
|
|
|
|
} else
|
|
|
|
previous_is_cased = 0;
|
|
|
|
*result++ = c;
|
|
|
|
}
|
2007-10-16 14:31:30 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
PyDoc_STRVAR_shared(_Py_capitalize__doc__,
|
|
|
|
"B.capitalize() -> copy of B\n\
|
|
|
|
\n\
|
2010-07-05 20:00:56 +08:00
|
|
|
Return a copy of B with only its first character capitalized (ASCII)\n\
|
|
|
|
and the rest lower-cased.");
|
2007-10-16 14:31:30 +08:00
|
|
|
|
|
|
|
void
|
2015-12-26 02:01:53 +08:00
|
|
|
_Py_bytes_capitalize(char *result, const char *s, Py_ssize_t len)
|
2007-10-16 14:31:30 +08:00
|
|
|
{
|
2018-09-07 12:54:49 +08:00
|
|
|
if (len > 0) {
|
|
|
|
*result = Py_TOUPPER(*s);
|
|
|
|
_Py_bytes_lower(result + 1, s + 1, len - 1);
|
2010-08-16 01:38:46 +08:00
|
|
|
}
|
2007-10-16 14:31:30 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
PyDoc_STRVAR_shared(_Py_swapcase__doc__,
|
|
|
|
"B.swapcase() -> copy of B\n\
|
|
|
|
\n\
|
|
|
|
Return a copy of B with uppercase ASCII characters converted\n\
|
|
|
|
to lowercase ASCII and vice versa.");
|
|
|
|
|
|
|
|
void
|
2015-12-26 02:01:53 +08:00
|
|
|
_Py_bytes_swapcase(char *result, const char *s, Py_ssize_t len)
|
2007-10-16 14:31:30 +08:00
|
|
|
{
|
2010-08-16 01:38:46 +08:00
|
|
|
Py_ssize_t i;
|
|
|
|
|
|
|
|
for (i = 0; i < len; i++) {
|
|
|
|
int c = Py_CHARMASK(*s++);
|
|
|
|
if (Py_ISLOWER(c)) {
|
|
|
|
*result = Py_TOUPPER(c);
|
2010-05-09 23:52:27 +08:00
|
|
|
}
|
2010-08-16 01:38:46 +08:00
|
|
|
else if (Py_ISUPPER(c)) {
|
|
|
|
*result = Py_TOLOWER(c);
|
|
|
|
}
|
|
|
|
else
|
|
|
|
*result = c;
|
|
|
|
result++;
|
|
|
|
}
|
2007-10-16 14:31:30 +08:00
|
|
|
}
|
|
|
|
|
2009-04-12 23:51:51 +08:00
|
|
|
|
|
|
|
PyDoc_STRVAR_shared(_Py_maketrans__doc__,
|
|
|
|
"B.maketrans(frm, to) -> translation table\n\
|
|
|
|
\n\
|
2011-06-28 00:06:45 +08:00
|
|
|
Return a translation table (a bytes object of length 256) suitable\n\
|
|
|
|
for use in the bytes or bytearray translate method where each byte\n\
|
|
|
|
in frm is mapped to the byte at the same position in to.\n\
|
|
|
|
The bytes objects frm and to must be of the same length.");
|
2009-04-12 23:51:51 +08:00
|
|
|
|
|
|
|
PyObject *
|
2015-02-03 07:25:42 +08:00
|
|
|
_Py_bytes_maketrans(Py_buffer *frm, Py_buffer *to)
|
2009-04-12 23:51:51 +08:00
|
|
|
{
|
2014-07-27 22:25:09 +08:00
|
|
|
PyObject *res = NULL;
|
2010-08-16 01:38:46 +08:00
|
|
|
Py_ssize_t i;
|
|
|
|
char *p;
|
|
|
|
|
2015-02-03 07:25:42 +08:00
|
|
|
if (frm->len != to->len) {
|
2010-08-16 01:38:46 +08:00
|
|
|
PyErr_Format(PyExc_ValueError,
|
|
|
|
"maketrans arguments must have same length");
|
2015-02-03 07:25:42 +08:00
|
|
|
return NULL;
|
2010-08-16 01:38:46 +08:00
|
|
|
}
|
|
|
|
res = PyBytes_FromStringAndSize(NULL, 256);
|
2015-02-03 07:25:42 +08:00
|
|
|
if (!res)
|
|
|
|
return NULL;
|
2010-08-16 01:38:46 +08:00
|
|
|
p = PyBytes_AS_STRING(res);
|
|
|
|
for (i = 0; i < 256; i++)
|
2010-08-16 01:41:31 +08:00
|
|
|
p[i] = (char) i;
|
2015-02-03 07:25:42 +08:00
|
|
|
for (i = 0; i < frm->len; i++) {
|
|
|
|
p[((unsigned char *)frm->buf)[i]] = ((char *)to->buf)[i];
|
2010-08-16 01:38:46 +08:00
|
|
|
}
|
2009-04-12 23:51:51 +08:00
|
|
|
|
2010-08-16 01:38:46 +08:00
|
|
|
return res;
|
2009-04-12 23:51:51 +08:00
|
|
|
}
|
2016-05-05 03:23:26 +08:00
|
|
|
|
|
|
|
#define FASTSEARCH fastsearch
|
|
|
|
#define STRINGLIB(F) stringlib_##F
|
|
|
|
#define STRINGLIB_CHAR char
|
|
|
|
#define STRINGLIB_SIZEOF_CHAR 1
|
2022-05-24 09:45:31 +08:00
|
|
|
#define STRINGLIB_FAST_MEMCHR memchr
|
2016-05-05 03:23:26 +08:00
|
|
|
|
|
|
|
#include "stringlib/fastsearch.h"
|
|
|
|
#include "stringlib/count.h"
|
|
|
|
#include "stringlib/find.h"
|
|
|
|
|
|
|
|
/*
|
2017-03-13 06:37:05 +08:00
|
|
|
Wraps stringlib_parse_args_finds() and additionally checks the first
|
|
|
|
argument type.
|
2016-05-05 03:23:26 +08:00
|
|
|
|
2017-03-13 06:37:05 +08:00
|
|
|
In case the first argument is a bytes-like object, sets it to subobj,
|
|
|
|
and doesn't touch the byte parameter.
|
|
|
|
In case it is an integer in range(0, 256), writes the integer value
|
|
|
|
to byte, and sets subobj to NULL.
|
|
|
|
|
|
|
|
The other parameters are similar to those of
|
2016-05-05 03:23:26 +08:00
|
|
|
stringlib_parse_args_finds().
|
|
|
|
*/
|
|
|
|
|
|
|
|
Py_LOCAL_INLINE(int)
|
2024-04-12 15:40:55 +08:00
|
|
|
parse_args_finds_byte(const char *function_name, PyObject **subobj, char *byte)
|
2016-05-05 03:23:26 +08:00
|
|
|
{
|
2024-04-12 15:40:55 +08:00
|
|
|
if (PyObject_CheckBuffer(*subobj)) {
|
2016-05-05 03:23:26 +08:00
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
|
2024-04-12 15:40:55 +08:00
|
|
|
if (!_PyIndex_Check(*subobj)) {
|
2017-03-13 06:37:05 +08:00
|
|
|
PyErr_Format(PyExc_TypeError,
|
|
|
|
"argument should be integer or bytes-like object, "
|
|
|
|
"not '%.200s'",
|
2024-04-12 15:40:55 +08:00
|
|
|
Py_TYPE(*subobj)->tp_name);
|
2017-03-13 06:37:05 +08:00
|
|
|
return 0;
|
2016-05-05 03:23:26 +08:00
|
|
|
}
|
|
|
|
|
2024-04-12 15:40:55 +08:00
|
|
|
Py_ssize_t ival = PyNumber_AsSsize_t(*subobj, NULL);
|
2017-03-13 06:37:05 +08:00
|
|
|
if (ival == -1 && PyErr_Occurred()) {
|
|
|
|
return 0;
|
|
|
|
}
|
2016-05-05 03:23:26 +08:00
|
|
|
if (ival < 0 || ival > 255) {
|
|
|
|
PyErr_SetString(PyExc_ValueError, "byte must be in range(0, 256)");
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
*subobj = NULL;
|
|
|
|
*byte = (char)ival;
|
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* helper macro to fixup start/end slice values */
|
|
|
|
#define ADJUST_INDICES(start, end, len) \
|
|
|
|
if (end > len) \
|
|
|
|
end = len; \
|
|
|
|
else if (end < 0) { \
|
|
|
|
end += len; \
|
|
|
|
if (end < 0) \
|
|
|
|
end = 0; \
|
|
|
|
} \
|
|
|
|
if (start < 0) { \
|
|
|
|
start += len; \
|
|
|
|
if (start < 0) \
|
|
|
|
start = 0; \
|
|
|
|
}
|
|
|
|
|
|
|
|
Py_LOCAL_INLINE(Py_ssize_t)
|
|
|
|
find_internal(const char *str, Py_ssize_t len,
|
2024-04-12 15:40:55 +08:00
|
|
|
const char *function_name, PyObject *subobj,
|
|
|
|
Py_ssize_t start, Py_ssize_t end,
|
|
|
|
int dir)
|
2016-05-05 03:23:26 +08:00
|
|
|
{
|
|
|
|
char byte;
|
|
|
|
Py_buffer subbuf;
|
|
|
|
const char *sub;
|
|
|
|
Py_ssize_t sub_len;
|
|
|
|
Py_ssize_t res;
|
|
|
|
|
2024-04-12 15:40:55 +08:00
|
|
|
if (!parse_args_finds_byte(function_name, &subobj, &byte)) {
|
2016-05-05 03:23:26 +08:00
|
|
|
return -2;
|
2024-04-12 15:40:55 +08:00
|
|
|
}
|
2016-05-05 03:23:26 +08:00
|
|
|
|
|
|
|
if (subobj) {
|
|
|
|
if (PyObject_GetBuffer(subobj, &subbuf, PyBUF_SIMPLE) != 0)
|
|
|
|
return -2;
|
|
|
|
|
|
|
|
sub = subbuf.buf;
|
|
|
|
sub_len = subbuf.len;
|
|
|
|
}
|
|
|
|
else {
|
|
|
|
sub = &byte;
|
|
|
|
sub_len = 1;
|
|
|
|
}
|
|
|
|
|
|
|
|
ADJUST_INDICES(start, end, len);
|
|
|
|
if (end - start < sub_len)
|
|
|
|
res = -1;
|
|
|
|
else if (sub_len == 1) {
|
|
|
|
if (dir > 0)
|
|
|
|
res = stringlib_find_char(
|
|
|
|
str + start, end - start,
|
|
|
|
*sub);
|
|
|
|
else
|
|
|
|
res = stringlib_rfind_char(
|
|
|
|
str + start, end - start,
|
|
|
|
*sub);
|
|
|
|
if (res >= 0)
|
|
|
|
res += start;
|
|
|
|
}
|
|
|
|
else {
|
|
|
|
if (dir > 0)
|
|
|
|
res = stringlib_find_slice(
|
|
|
|
str, len,
|
|
|
|
sub, sub_len, start, end);
|
|
|
|
else
|
|
|
|
res = stringlib_rfind_slice(
|
|
|
|
str, len,
|
|
|
|
sub, sub_len, start, end);
|
|
|
|
}
|
|
|
|
|
|
|
|
if (subobj)
|
|
|
|
PyBuffer_Release(&subbuf);
|
|
|
|
|
|
|
|
return res;
|
|
|
|
}
|
|
|
|
|
|
|
|
PyObject *
|
2024-04-12 15:40:55 +08:00
|
|
|
_Py_bytes_find(const char *str, Py_ssize_t len, PyObject *sub,
|
|
|
|
Py_ssize_t start, Py_ssize_t end)
|
2016-05-05 03:23:26 +08:00
|
|
|
{
|
2024-04-12 15:40:55 +08:00
|
|
|
Py_ssize_t result = find_internal(str, len, "find", sub, start, end, +1);
|
2016-05-05 03:23:26 +08:00
|
|
|
if (result == -2)
|
|
|
|
return NULL;
|
|
|
|
return PyLong_FromSsize_t(result);
|
|
|
|
}
|
|
|
|
|
|
|
|
PyObject *
|
2024-04-12 15:40:55 +08:00
|
|
|
_Py_bytes_index(const char *str, Py_ssize_t len, PyObject *sub,
|
|
|
|
Py_ssize_t start, Py_ssize_t end)
|
2016-05-05 03:23:26 +08:00
|
|
|
{
|
2024-04-12 15:40:55 +08:00
|
|
|
Py_ssize_t result = find_internal(str, len, "index", sub, start, end, +1);
|
2016-05-05 03:23:26 +08:00
|
|
|
if (result == -2)
|
|
|
|
return NULL;
|
|
|
|
if (result == -1) {
|
|
|
|
PyErr_SetString(PyExc_ValueError,
|
|
|
|
"subsection not found");
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
return PyLong_FromSsize_t(result);
|
|
|
|
}
|
|
|
|
|
|
|
|
PyObject *
|
2024-04-12 15:40:55 +08:00
|
|
|
_Py_bytes_rfind(const char *str, Py_ssize_t len, PyObject *sub,
|
|
|
|
Py_ssize_t start, Py_ssize_t end)
|
2016-05-05 03:23:26 +08:00
|
|
|
{
|
2024-04-12 15:40:55 +08:00
|
|
|
Py_ssize_t result = find_internal(str, len, "rfind", sub, start, end, -1);
|
2016-05-05 03:23:26 +08:00
|
|
|
if (result == -2)
|
|
|
|
return NULL;
|
|
|
|
return PyLong_FromSsize_t(result);
|
|
|
|
}
|
|
|
|
|
|
|
|
PyObject *
|
2024-04-12 15:40:55 +08:00
|
|
|
_Py_bytes_rindex(const char *str, Py_ssize_t len, PyObject *sub,
|
|
|
|
Py_ssize_t start, Py_ssize_t end)
|
2016-05-05 03:23:26 +08:00
|
|
|
{
|
2024-04-12 15:40:55 +08:00
|
|
|
Py_ssize_t result = find_internal(str, len, "rindex", sub, start, end, -1);
|
2016-05-05 03:23:26 +08:00
|
|
|
if (result == -2)
|
|
|
|
return NULL;
|
|
|
|
if (result == -1) {
|
|
|
|
PyErr_SetString(PyExc_ValueError,
|
|
|
|
"subsection not found");
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
return PyLong_FromSsize_t(result);
|
|
|
|
}
|
|
|
|
|
|
|
|
PyObject *
|
2024-04-12 15:40:55 +08:00
|
|
|
_Py_bytes_count(const char *str, Py_ssize_t len, PyObject *sub_obj,
|
|
|
|
Py_ssize_t start, Py_ssize_t end)
|
2016-05-05 03:23:26 +08:00
|
|
|
{
|
|
|
|
const char *sub;
|
|
|
|
Py_ssize_t sub_len;
|
|
|
|
char byte;
|
|
|
|
|
|
|
|
Py_buffer vsub;
|
|
|
|
PyObject *count_obj;
|
|
|
|
|
2024-04-12 15:40:55 +08:00
|
|
|
if (!parse_args_finds_byte("count", &sub_obj, &byte)) {
|
2016-05-05 03:23:26 +08:00
|
|
|
return NULL;
|
2024-04-12 15:40:55 +08:00
|
|
|
}
|
2016-05-05 03:23:26 +08:00
|
|
|
|
|
|
|
if (sub_obj) {
|
|
|
|
if (PyObject_GetBuffer(sub_obj, &vsub, PyBUF_SIMPLE) != 0)
|
|
|
|
return NULL;
|
|
|
|
|
|
|
|
sub = vsub.buf;
|
|
|
|
sub_len = vsub.len;
|
|
|
|
}
|
|
|
|
else {
|
|
|
|
sub = &byte;
|
|
|
|
sub_len = 1;
|
|
|
|
}
|
|
|
|
|
|
|
|
ADJUST_INDICES(start, end, len);
|
|
|
|
|
|
|
|
count_obj = PyLong_FromSsize_t(
|
|
|
|
stringlib_count(str + start, end - start, sub, sub_len, PY_SSIZE_T_MAX)
|
|
|
|
);
|
|
|
|
|
|
|
|
if (sub_obj)
|
|
|
|
PyBuffer_Release(&vsub);
|
|
|
|
|
|
|
|
return count_obj;
|
|
|
|
}
|
|
|
|
|
|
|
|
int
|
|
|
|
_Py_bytes_contains(const char *str, Py_ssize_t len, PyObject *arg)
|
|
|
|
{
|
2016-07-10 17:37:30 +08:00
|
|
|
Py_ssize_t ival = PyNumber_AsSsize_t(arg, NULL);
|
2016-05-05 03:23:26 +08:00
|
|
|
if (ival == -1 && PyErr_Occurred()) {
|
|
|
|
Py_buffer varg;
|
|
|
|
Py_ssize_t pos;
|
|
|
|
PyErr_Clear();
|
|
|
|
if (PyObject_GetBuffer(arg, &varg, PyBUF_SIMPLE) != 0)
|
|
|
|
return -1;
|
|
|
|
pos = stringlib_find(str, len,
|
|
|
|
varg.buf, varg.len, 0);
|
|
|
|
PyBuffer_Release(&varg);
|
|
|
|
return pos >= 0;
|
|
|
|
}
|
|
|
|
if (ival < 0 || ival >= 256) {
|
|
|
|
PyErr_SetString(PyExc_ValueError, "byte must be in range(0, 256)");
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
|
|
|
|
return memchr(str, (int) ival, len) != NULL;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/* Matches the end (direction >= 0) or start (direction < 0) of the buffer
|
|
|
|
* against substr, using the start and end arguments. Returns
|
|
|
|
* -1 on error, 0 if not found and 1 if found.
|
|
|
|
*/
|
2016-09-10 04:54:34 +08:00
|
|
|
static int
|
2016-05-05 03:23:26 +08:00
|
|
|
tailmatch(const char *str, Py_ssize_t len, PyObject *substr,
|
|
|
|
Py_ssize_t start, Py_ssize_t end, int direction)
|
|
|
|
{
|
|
|
|
Py_buffer sub_view = {NULL, NULL};
|
|
|
|
const char *sub;
|
|
|
|
Py_ssize_t slen;
|
|
|
|
|
|
|
|
if (PyBytes_Check(substr)) {
|
|
|
|
sub = PyBytes_AS_STRING(substr);
|
|
|
|
slen = PyBytes_GET_SIZE(substr);
|
|
|
|
}
|
|
|
|
else {
|
|
|
|
if (PyObject_GetBuffer(substr, &sub_view, PyBUF_SIMPLE) != 0)
|
|
|
|
return -1;
|
|
|
|
sub = sub_view.buf;
|
|
|
|
slen = sub_view.len;
|
|
|
|
}
|
|
|
|
|
|
|
|
ADJUST_INDICES(start, end, len);
|
|
|
|
|
|
|
|
if (direction < 0) {
|
|
|
|
/* startswith */
|
2019-10-06 20:17:18 +08:00
|
|
|
if (start > len - slen)
|
2016-05-05 03:23:26 +08:00
|
|
|
goto notfound;
|
|
|
|
} else {
|
|
|
|
/* endswith */
|
|
|
|
if (end - start < slen || start > len)
|
|
|
|
goto notfound;
|
|
|
|
|
|
|
|
if (end - slen > start)
|
|
|
|
start = end - slen;
|
|
|
|
}
|
|
|
|
if (end - start < slen)
|
|
|
|
goto notfound;
|
|
|
|
if (memcmp(str + start, sub, slen) != 0)
|
|
|
|
goto notfound;
|
|
|
|
|
|
|
|
PyBuffer_Release(&sub_view);
|
|
|
|
return 1;
|
|
|
|
|
|
|
|
notfound:
|
|
|
|
PyBuffer_Release(&sub_view);
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2016-09-10 04:54:34 +08:00
|
|
|
static PyObject *
|
2016-05-05 03:23:26 +08:00
|
|
|
_Py_bytes_tailmatch(const char *str, Py_ssize_t len,
|
2024-04-03 19:11:14 +08:00
|
|
|
const char *function_name, PyObject *subobj,
|
|
|
|
Py_ssize_t start, Py_ssize_t end,
|
2016-05-05 03:23:26 +08:00
|
|
|
int direction)
|
|
|
|
{
|
|
|
|
if (PyTuple_Check(subobj)) {
|
|
|
|
Py_ssize_t i;
|
|
|
|
for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {
|
2024-04-03 19:11:14 +08:00
|
|
|
PyObject *item = PyTuple_GET_ITEM(subobj, i);
|
|
|
|
int result = tailmatch(str, len, item, start, end, direction);
|
|
|
|
if (result < 0) {
|
2016-05-05 03:23:26 +08:00
|
|
|
return NULL;
|
2024-04-03 19:11:14 +08:00
|
|
|
}
|
2016-05-05 03:23:26 +08:00
|
|
|
else if (result) {
|
|
|
|
Py_RETURN_TRUE;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
Py_RETURN_FALSE;
|
|
|
|
}
|
2024-04-03 19:11:14 +08:00
|
|
|
int result = tailmatch(str, len, subobj, start, end, direction);
|
2016-05-05 03:23:26 +08:00
|
|
|
if (result == -1) {
|
2024-04-03 19:11:14 +08:00
|
|
|
if (PyErr_ExceptionMatches(PyExc_TypeError)) {
|
2016-05-05 03:23:26 +08:00
|
|
|
PyErr_Format(PyExc_TypeError,
|
|
|
|
"%s first arg must be bytes or a tuple of bytes, "
|
|
|
|
"not %s",
|
|
|
|
function_name, Py_TYPE(subobj)->tp_name);
|
2024-04-03 19:11:14 +08:00
|
|
|
}
|
2016-05-05 03:23:26 +08:00
|
|
|
return NULL;
|
|
|
|
}
|
2024-04-03 19:11:14 +08:00
|
|
|
return PyBool_FromLong(result);
|
2016-05-05 03:23:26 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
PyObject *
|
2024-04-03 19:11:14 +08:00
|
|
|
_Py_bytes_startswith(const char *str, Py_ssize_t len, PyObject *subobj,
|
|
|
|
Py_ssize_t start, Py_ssize_t end)
|
2016-05-05 03:23:26 +08:00
|
|
|
{
|
2024-04-03 19:11:14 +08:00
|
|
|
return _Py_bytes_tailmatch(str, len, "startswith", subobj, start, end, -1);
|
2016-05-05 03:23:26 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
PyObject *
|
2024-04-03 19:11:14 +08:00
|
|
|
_Py_bytes_endswith(const char *str, Py_ssize_t len, PyObject *subobj,
|
|
|
|
Py_ssize_t start, Py_ssize_t end)
|
2016-05-05 03:23:26 +08:00
|
|
|
{
|
2024-04-03 19:11:14 +08:00
|
|
|
return _Py_bytes_tailmatch(str, len, "endswith", subobj, start, end, +1);
|
2016-05-05 03:23:26 +08:00
|
|
|
}
|