added "getcode" and "getname" methods to the ucnhash module (they're

probably more useful for the test code than for any applications, but
one never knows...)
This commit is contained in:
Fredrik Lundh 2001-01-19 11:00:42 +00:00
parent d5d2cd149f
commit ee865c64da
2 changed files with 75 additions and 13 deletions

View File

@ -37,6 +37,23 @@ s = u"\N{LATIN CAPITAL LETTER T}" \
u"\N{LATIN SMALL LETTER P}" \
u"\N{FULL STOP}"
verify(s == u"The rEd fOx ate the sheep.", s)
import ucnhash
# minimal sanity check
for char in "SPAM":
name = "LATIN SMALL LETTER %s" % char
code = ucnhash.getcode(name)
verify(ucnhash.getname(code) == name)
# loop over all characters in the database
for code in range(65536):
try:
name = ucnhash.getname(code)
verify(ucnhash.getcode(name) == code)
except ValueError:
pass
print "done."
# misc. symbol testing

View File

@ -22,7 +22,6 @@ typedef struct {
* Generated on: Fri Jul 14 08:00:58 2000
*/
#define cKeys 10538
#define k_cHashElements 18836
#define k_cchMaxKey 83
#define k_cKeys 10538
@ -111,12 +110,6 @@ hash(const char *key, unsigned int cch)
return ((unsigned long)(G[ f1(key, cch) ]) + (unsigned long)(G[ f2(key, cch) ]) ) % k_cHashElements;
}
const _Py_UnicodeCharacterName *
getValue(unsigned long iKey)
{
return (_Py_UnicodeCharacterName *) &aucn[iKey];
}
static int
mystrnicmp(const char *s1, const char *s2, size_t count)
{
@ -136,22 +129,34 @@ mystrnicmp(const char *s1, const char *s2, size_t count)
/* bindings for the new API */
static int
ucnhash_getname(Py_UCS4 code, char* buffer, int buflen)
getname(Py_UCS4 code, char* buffer, int buflen)
{
int i;
/* brute force search */
for (i = 0; i < k_cKeys; i++)
if (aucn[i].value == code) {
int len = strlen(aucn[i].pszUCN);
if (buflen <= len)
return 0;
memcpy(buffer, aucn[i].pszUCN, len+1);
return 1;
}
return 0;
}
static int
ucnhash_getcode(const char* name, int namelen, Py_UCS4* code)
getcode(const char* name, int namelen, Py_UCS4* code)
{
unsigned long j;
j = hash(name, namelen);
if (j > cKeys || mystrnicmp(name, getValue(j)->pszUCN, namelen) != 0)
if (j > k_cKeys || mystrnicmp(name, aucn[j].pszUCN, namelen) != 0)
return 0;
*code = getValue(j)->value;
*code = aucn[j].value;
return 1;
}
@ -159,13 +164,53 @@ ucnhash_getcode(const char* name, int namelen, Py_UCS4* code)
static const _PyUnicode_Name_CAPI hashAPI =
{
sizeof(_PyUnicode_Name_CAPI),
ucnhash_getname,
ucnhash_getcode
getname,
getcode
};
/* -------------------------------------------------------------------- */
/* Python bindings */
static PyObject *
ucnhash_getname(PyObject* self, PyObject* args)
{
char name[256];
int code;
if (!PyArg_ParseTuple(args, "i", &code))
return NULL;
if (!getname((Py_UCS4) code, name, sizeof(name))) {
PyErr_SetString(PyExc_ValueError, "undefined character code");
return NULL;
}
return Py_BuildValue("s", name);
}
static PyObject *
ucnhash_getcode(PyObject* self, PyObject* args)
{
Py_UCS4 code;
char* name;
int namelen;
if (!PyArg_ParseTuple(args, "s#", &name, &namelen))
return NULL;
if (!getcode(name, namelen, &code)) {
PyErr_SetString(PyExc_ValueError, "undefined character name");
return NULL;
}
return Py_BuildValue("i", code);
}
static
PyMethodDef ucnhash_methods[] =
{
{"getname", ucnhash_getname, 1},
{"getcode", ucnhash_getcode, 1},
{NULL, NULL},
};