From 6f600ff1734ca2fdcdd37a809adf8130f0d8cc4e Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Mon, 26 Feb 2018 16:02:22 +0200 Subject: [PATCH] bpo-32922: dbm.open() now encodes filename with the filesystem encoding. (GH-5832) --- Lib/test/test_dbm_dumb.py | 15 ++++++++ Lib/test/test_dbm_gnu.py | 36 +++++++++++++++++- Lib/test/test_dbm_ndbm.py | 37 +++++++++++++++++++ .../2018-02-23-19-12-04.bpo-32922.u-xe0B.rst | 2 + Modules/_dbmmodule.c | 21 +++++++++-- Modules/_gdbmmodule.c | 21 +++++++++-- Modules/clinic/_dbmmodule.c.h | 8 ++-- Modules/clinic/_gdbmmodule.c.h | 13 ++++--- 8 files changed, 134 insertions(+), 19 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2018-02-23-19-12-04.bpo-32922.u-xe0B.rst diff --git a/Lib/test/test_dbm_dumb.py b/Lib/test/test_dbm_dumb.py index 21f29af05d2..652a355d990 100644 --- a/Lib/test/test_dbm_dumb.py +++ b/Lib/test/test_dbm_dumb.py @@ -281,6 +281,21 @@ class DumbDBMTestCase(unittest.TestCase): self.assertEqual(sorted(f.keys()), sorted(self._dict)) f.close() # don't write + @unittest.skipUnless(support.TESTFN_NONASCII, + 'requires OS support of non-ASCII encodings') + def test_nonascii_filename(self): + filename = support.TESTFN_NONASCII + for suffix in ['.dir', '.dat', '.bak']: + self.addCleanup(support.unlink, filename + suffix) + with dumbdbm.open(filename, 'c') as db: + db[b'key'] = b'value' + self.assertTrue(os.path.exists(filename + '.dat')) + self.assertTrue(os.path.exists(filename + '.dir')) + with dumbdbm.open(filename, 'r') as db: + self.assertEqual(list(db.keys()), [b'key']) + self.assertTrue(b'key' in db) + self.assertEqual(db[b'key'], b'value') + def tearDown(self): _delete_files() diff --git a/Lib/test/test_dbm_gnu.py b/Lib/test/test_dbm_gnu.py index 304b3328697..d96df928480 100644 --- a/Lib/test/test_dbm_gnu.py +++ b/Lib/test/test_dbm_gnu.py @@ -2,7 +2,7 @@ from test import support gdbm = support.import_module("dbm.gnu") #skip if not supported import unittest import os -from test.support import TESTFN, unlink +from test.support import TESTFN, TESTFN_NONASCII, unlink filename = TESTFN @@ -93,5 +93,39 @@ class TestGdbm(unittest.TestCase): self.assertEqual(str(cm.exception), "GDBM object has already been closed") + def test_bytes(self): + with gdbm.open(filename, 'c') as db: + db[b'bytes key \xbd'] = b'bytes value \xbd' + with gdbm.open(filename, 'r') as db: + self.assertEqual(list(db.keys()), [b'bytes key \xbd']) + self.assertTrue(b'bytes key \xbd' in db) + self.assertEqual(db[b'bytes key \xbd'], b'bytes value \xbd') + + def test_unicode(self): + with gdbm.open(filename, 'c') as db: + db['Unicode key \U0001f40d'] = 'Unicode value \U0001f40d' + with gdbm.open(filename, 'r') as db: + self.assertEqual(list(db.keys()), ['Unicode key \U0001f40d'.encode()]) + self.assertTrue('Unicode key \U0001f40d'.encode() in db) + self.assertTrue('Unicode key \U0001f40d' in db) + self.assertEqual(db['Unicode key \U0001f40d'.encode()], + 'Unicode value \U0001f40d'.encode()) + self.assertEqual(db['Unicode key \U0001f40d'], + 'Unicode value \U0001f40d'.encode()) + + @unittest.skipUnless(TESTFN_NONASCII, + 'requires OS support of non-ASCII encodings') + def test_nonascii_filename(self): + filename = TESTFN_NONASCII + self.addCleanup(unlink, filename) + with gdbm.open(filename, 'c') as db: + db[b'key'] = b'value' + self.assertTrue(os.path.exists(filename)) + with gdbm.open(filename, 'r') as db: + self.assertEqual(list(db.keys()), [b'key']) + self.assertTrue(b'key' in db) + self.assertEqual(db[b'key'], b'value') + + if __name__ == '__main__': unittest.main() diff --git a/Lib/test/test_dbm_ndbm.py b/Lib/test/test_dbm_ndbm.py index 49f4426e4cb..fb7d0e8281e 100644 --- a/Lib/test/test_dbm_ndbm.py +++ b/Lib/test/test_dbm_ndbm.py @@ -1,5 +1,6 @@ from test import support support.import_module("dbm.ndbm") #skip if not supported +import os import unittest import dbm.ndbm from dbm.ndbm import error @@ -47,6 +48,42 @@ class DbmTestCase(unittest.TestCase): self.assertEqual(str(cm.exception), "DBM object has already been closed") + def test_bytes(self): + with dbm.ndbm.open(self.filename, 'c') as db: + db[b'bytes key \xbd'] = b'bytes value \xbd' + with dbm.ndbm.open(self.filename, 'r') as db: + self.assertEqual(list(db.keys()), [b'bytes key \xbd']) + self.assertTrue(b'bytes key \xbd' in db) + self.assertEqual(db[b'bytes key \xbd'], b'bytes value \xbd') + + def test_unicode(self): + with dbm.ndbm.open(self.filename, 'c') as db: + db['Unicode key \U0001f40d'] = 'Unicode value \U0001f40d' + with dbm.ndbm.open(self.filename, 'r') as db: + self.assertEqual(list(db.keys()), ['Unicode key \U0001f40d'.encode()]) + self.assertTrue('Unicode key \U0001f40d'.encode() in db) + self.assertTrue('Unicode key \U0001f40d' in db) + self.assertEqual(db['Unicode key \U0001f40d'.encode()], + 'Unicode value \U0001f40d'.encode()) + self.assertEqual(db['Unicode key \U0001f40d'], + 'Unicode value \U0001f40d'.encode()) + + @unittest.skipUnless(support.TESTFN_NONASCII, + 'requires OS support of non-ASCII encodings') + def test_nonascii_filename(self): + filename = support.TESTFN_NONASCII + for suffix in ['', '.pag', '.dir', '.db']: + self.addCleanup(support.unlink, filename + suffix) + with dbm.ndbm.open(filename, 'c') as db: + db[b'key'] = b'value' + self.assertTrue(any(os.path.exists(filename + suffix) + for suffix in ['', '.pag', '.dir', '.db'])) + with dbm.ndbm.open(filename, 'r') as db: + self.assertEqual(list(db.keys()), [b'key']) + self.assertTrue(b'key' in db) + self.assertEqual(db[b'key'], b'value') + + if __name__ == '__main__': unittest.main() diff --git a/Misc/NEWS.d/next/Library/2018-02-23-19-12-04.bpo-32922.u-xe0B.rst b/Misc/NEWS.d/next/Library/2018-02-23-19-12-04.bpo-32922.u-xe0B.rst new file mode 100644 index 00000000000..412e588586c --- /dev/null +++ b/Misc/NEWS.d/next/Library/2018-02-23-19-12-04.bpo-32922.u-xe0B.rst @@ -0,0 +1,2 @@ +dbm.open() now encodes filename with the filesystem encoding rather than +default encoding. diff --git a/Modules/_dbmmodule.c b/Modules/_dbmmodule.c index 7e1344177b5..8afd92cf3ca 100644 --- a/Modules/_dbmmodule.c +++ b/Modules/_dbmmodule.c @@ -412,7 +412,7 @@ static PyTypeObject Dbmtype = { _dbm.open as dbmopen - filename: str + filename: unicode The filename to open. flags: str="r" @@ -429,9 +429,9 @@ Return a database object. [clinic start generated code]*/ static PyObject * -dbmopen_impl(PyObject *module, const char *filename, const char *flags, +dbmopen_impl(PyObject *module, PyObject *filename, const char *flags, int mode) -/*[clinic end generated code: output=5fade8cf16e0755f input=226334bade5764e6]*/ +/*[clinic end generated code: output=9527750f5df90764 input=376a9d903a50df59]*/ { int iflags; @@ -450,7 +450,20 @@ dbmopen_impl(PyObject *module, const char *filename, const char *flags, "arg 2 to open should be 'r', 'w', 'c', or 'n'"); return NULL; } - return newdbmobject(filename, iflags, mode); + + PyObject *filenamebytes = PyUnicode_EncodeFSDefault(filename); + if (filenamebytes == NULL) { + return NULL; + } + const char *name = PyBytes_AS_STRING(filenamebytes); + if (strlen(name) != (size_t)PyBytes_GET_SIZE(filenamebytes)) { + Py_DECREF(filenamebytes); + PyErr_SetString(PyExc_ValueError, "embedded null character"); + return NULL; + } + PyObject *self = newdbmobject(name, iflags, mode); + Py_DECREF(filenamebytes); + return self; } static PyMethodDef dbmmodule_methods[] = { diff --git a/Modules/_gdbmmodule.c b/Modules/_gdbmmodule.c index 12d973b5cee..9996d8c26fb 100644 --- a/Modules/_gdbmmodule.c +++ b/Modules/_gdbmmodule.c @@ -527,7 +527,7 @@ static PyTypeObject Dbmtype = { /*[clinic input] _gdbm.open as dbmopen - filename as name: str + filename: unicode flags: str="r" mode: int(py_default="0o666") = 0o666 / @@ -557,8 +557,9 @@ when the database has to be created. It defaults to octal 0o666. [clinic start generated code]*/ static PyObject * -dbmopen_impl(PyObject *module, const char *name, const char *flags, int mode) -/*[clinic end generated code: output=31aa1bafdf5da688 input=55563cd60e51984a]*/ +dbmopen_impl(PyObject *module, PyObject *filename, const char *flags, + int mode) +/*[clinic end generated code: output=9527750f5df90764 input=3be0b0875974b928]*/ { int iflags; @@ -606,7 +607,19 @@ dbmopen_impl(PyObject *module, const char *name, const char *flags, int mode) } } - return newdbmobject(name, iflags, mode); + PyObject *filenamebytes = PyUnicode_EncodeFSDefault(filename); + if (filenamebytes == NULL) { + return NULL; + } + const char *name = PyBytes_AS_STRING(filenamebytes); + if (strlen(name) != (size_t)PyBytes_GET_SIZE(filenamebytes)) { + Py_DECREF(filenamebytes); + PyErr_SetString(PyExc_ValueError, "embedded null character"); + return NULL; + } + PyObject *self = newdbmobject(name, iflags, mode); + Py_DECREF(filenamebytes); + return self; } static const char dbmmodule_open_flags[] = "rwcn" diff --git a/Modules/clinic/_dbmmodule.c.h b/Modules/clinic/_dbmmodule.c.h index 63d5b1a41fb..0f831c9eec7 100644 --- a/Modules/clinic/_dbmmodule.c.h +++ b/Modules/clinic/_dbmmodule.c.h @@ -121,18 +121,18 @@ PyDoc_STRVAR(dbmopen__doc__, {"open", (PyCFunction)dbmopen, METH_FASTCALL, dbmopen__doc__}, static PyObject * -dbmopen_impl(PyObject *module, const char *filename, const char *flags, +dbmopen_impl(PyObject *module, PyObject *filename, const char *flags, int mode); static PyObject * dbmopen(PyObject *module, PyObject *const *args, Py_ssize_t nargs) { PyObject *return_value = NULL; - const char *filename; + PyObject *filename; const char *flags = "r"; int mode = 438; - if (!_PyArg_ParseStack(args, nargs, "s|si:open", + if (!_PyArg_ParseStack(args, nargs, "U|si:open", &filename, &flags, &mode)) { goto exit; } @@ -141,4 +141,4 @@ dbmopen(PyObject *module, PyObject *const *args, Py_ssize_t nargs) exit: return return_value; } -/*[clinic end generated code: output=8ce71abac849155f input=a9049054013a1b77]*/ +/*[clinic end generated code: output=5c858b4080a011a4 input=a9049054013a1b77]*/ diff --git a/Modules/clinic/_gdbmmodule.c.h b/Modules/clinic/_gdbmmodule.c.h index 2222967aaa5..7bdc4321df2 100644 --- a/Modules/clinic/_gdbmmodule.c.h +++ b/Modules/clinic/_gdbmmodule.c.h @@ -234,23 +234,24 @@ PyDoc_STRVAR(dbmopen__doc__, {"open", (PyCFunction)dbmopen, METH_FASTCALL, dbmopen__doc__}, static PyObject * -dbmopen_impl(PyObject *module, const char *name, const char *flags, int mode); +dbmopen_impl(PyObject *module, PyObject *filename, const char *flags, + int mode); static PyObject * dbmopen(PyObject *module, PyObject *const *args, Py_ssize_t nargs) { PyObject *return_value = NULL; - const char *name; + PyObject *filename; const char *flags = "r"; int mode = 438; - if (!_PyArg_ParseStack(args, nargs, "s|si:open", - &name, &flags, &mode)) { + if (!_PyArg_ParseStack(args, nargs, "U|si:open", + &filename, &flags, &mode)) { goto exit; } - return_value = dbmopen_impl(module, name, flags, mode); + return_value = dbmopen_impl(module, filename, flags, mode); exit: return return_value; } -/*[clinic end generated code: output=dc0aca8c00055d02 input=a9049054013a1b77]*/ +/*[clinic end generated code: output=dec05ff9c5aeaeae input=a9049054013a1b77]*/