mirror of
https://github.com/python/cpython.git
synced 2025-01-21 07:55:16 +08:00
#5502: accelerate binary buffered IO (especially small operations).
On a suggestion by Victor Stinner.
This commit is contained in:
parent
aa4398b642
commit
711af3ae1e
@ -174,7 +174,7 @@ PyTypeObject PyBufferedIOBase_Type = {
|
||||
0, /* tp_alloc */
|
||||
0, /* tp_new */
|
||||
};
|
||||
|
||||
|
||||
|
||||
typedef struct {
|
||||
PyObject_HEAD
|
||||
@ -183,6 +183,10 @@ typedef struct {
|
||||
int ok; /* Initialized? */
|
||||
int readable;
|
||||
int writable;
|
||||
|
||||
/* True if this is a vanilla Buffered object (rather than a user derived
|
||||
class) *and* the raw stream is a vanilla FileIO object. */
|
||||
int fast_closed_checks;
|
||||
|
||||
/* Absolute position inside the raw stream (-1 if unknown). */
|
||||
Py_off_t abs_pos;
|
||||
@ -268,6 +272,18 @@ typedef struct {
|
||||
return -1; \
|
||||
}
|
||||
|
||||
#define IS_CLOSED(self) \
|
||||
(self->fast_closed_checks \
|
||||
? _PyFileIO_closed(self->raw) \
|
||||
: BufferedIOMixin_closed(self))
|
||||
|
||||
#define CHECK_CLOSED(self, error_msg) \
|
||||
if (IS_CLOSED(self)) { \
|
||||
PyErr_SetString(PyExc_ValueError, error_msg); \
|
||||
return NULL; \
|
||||
}
|
||||
|
||||
|
||||
#define VALID_READ_BUFFER(self) \
|
||||
(self->readable && self->read_end != -1)
|
||||
|
||||
@ -466,8 +482,8 @@ BufferedIOMixin_isatty(BufferedObject *self, PyObject *args)
|
||||
CHECK_INITIALIZED(self)
|
||||
return PyObject_CallMethodObjArgs(self->raw, _PyIO_str_isatty, NULL);
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
/* Forward decls */
|
||||
static PyObject *
|
||||
_BufferedWriter_flush_unlocked(BufferedObject *, int);
|
||||
@ -480,7 +496,11 @@ _BufferedWriter_reset_buf(BufferedObject *self);
|
||||
static PyObject *
|
||||
_BufferedReader_peek_unlocked(BufferedObject *self, Py_ssize_t);
|
||||
static PyObject *
|
||||
_BufferedReader_read_unlocked(BufferedObject *self, Py_ssize_t);
|
||||
_BufferedReader_read_all(BufferedObject *self);
|
||||
static PyObject *
|
||||
_BufferedReader_read_fast(BufferedObject *self, Py_ssize_t);
|
||||
static PyObject *
|
||||
_BufferedReader_read_generic(BufferedObject *self, Py_ssize_t);
|
||||
|
||||
|
||||
/*
|
||||
@ -509,8 +529,8 @@ _Buffered_check_blocking_error(void)
|
||||
static Py_off_t
|
||||
_Buffered_raw_tell(BufferedObject *self)
|
||||
{
|
||||
PyObject *res;
|
||||
Py_off_t n;
|
||||
PyObject *res;
|
||||
res = PyObject_CallMethodObjArgs(self->raw, _PyIO_str_tell, NULL);
|
||||
if (res == NULL)
|
||||
return -1;
|
||||
@ -604,10 +624,7 @@ Buffered_flush(BufferedObject *self, PyObject *args)
|
||||
PyObject *res;
|
||||
|
||||
CHECK_INITIALIZED(self)
|
||||
if (BufferedIOMixin_closed(self)) {
|
||||
PyErr_SetString(PyExc_ValueError, "flush of closed file");
|
||||
return NULL;
|
||||
}
|
||||
CHECK_CLOSED(self, "flush of closed file")
|
||||
|
||||
ENTER_BUFFERED(self)
|
||||
res = _BufferedWriter_flush_unlocked(self, 0);
|
||||
@ -667,14 +684,23 @@ Buffered_read(BufferedObject *self, PyObject *args)
|
||||
return NULL;
|
||||
}
|
||||
|
||||
if (BufferedIOMixin_closed(self)) {
|
||||
PyErr_SetString(PyExc_ValueError, "read of closed file");
|
||||
return NULL;
|
||||
}
|
||||
CHECK_CLOSED(self, "read of closed file")
|
||||
|
||||
ENTER_BUFFERED(self)
|
||||
res = _BufferedReader_read_unlocked(self, n);
|
||||
LEAVE_BUFFERED(self)
|
||||
if (n == -1) {
|
||||
/* The number of bytes is unspecified, read until the end of stream */
|
||||
ENTER_BUFFERED(self)
|
||||
res = _BufferedReader_read_all(self);
|
||||
LEAVE_BUFFERED(self)
|
||||
}
|
||||
else {
|
||||
res = _BufferedReader_read_fast(self, n);
|
||||
if (res == Py_None) {
|
||||
Py_DECREF(res);
|
||||
ENTER_BUFFERED(self)
|
||||
res = _BufferedReader_read_generic(self, n);
|
||||
LEAVE_BUFFERED(self)
|
||||
}
|
||||
}
|
||||
|
||||
return res;
|
||||
}
|
||||
@ -775,35 +801,31 @@ _Buffered_readline(BufferedObject *self, Py_ssize_t limit)
|
||||
Py_ssize_t n, written = 0;
|
||||
const char *start, *s, *end;
|
||||
|
||||
if (BufferedIOMixin_closed(self)) {
|
||||
PyErr_SetString(PyExc_ValueError, "readline of closed file");
|
||||
return NULL;
|
||||
}
|
||||
CHECK_CLOSED(self, "readline of closed file")
|
||||
|
||||
ENTER_BUFFERED(self)
|
||||
|
||||
/* First, try to find a line in the buffer */
|
||||
/* First, try to find a line in the buffer. This can run unlocked because
|
||||
the calls to the C API are simple enough that they can't trigger
|
||||
any thread switch. */
|
||||
n = Py_SAFE_DOWNCAST(READAHEAD(self), Py_off_t, Py_ssize_t);
|
||||
if (limit >= 0 && n > limit)
|
||||
n = limit;
|
||||
start = self->buffer + self->pos;
|
||||
end = start + n;
|
||||
s = start;
|
||||
while (s < end) {
|
||||
if (*s++ == '\n') {
|
||||
res = PyBytes_FromStringAndSize(start, s - start);
|
||||
if (res != NULL)
|
||||
self->pos += s - start;
|
||||
goto end;
|
||||
}
|
||||
s = memchr(start, '\n', n);
|
||||
if (s != NULL) {
|
||||
res = PyBytes_FromStringAndSize(start, s - start + 1);
|
||||
if (res != NULL)
|
||||
self->pos += s - start + 1;
|
||||
goto end_unlocked;
|
||||
}
|
||||
if (n == limit) {
|
||||
res = PyBytes_FromStringAndSize(start, n);
|
||||
if (res != NULL)
|
||||
self->pos += n;
|
||||
goto end;
|
||||
goto end_unlocked;
|
||||
}
|
||||
|
||||
ENTER_BUFFERED(self)
|
||||
|
||||
/* Now we try to get some more from the raw stream */
|
||||
if (self->writable) {
|
||||
res = _BufferedWriter_flush_unlocked(self, 1);
|
||||
@ -875,6 +897,7 @@ found:
|
||||
|
||||
end:
|
||||
LEAVE_BUFFERED(self)
|
||||
end_unlocked:
|
||||
Py_XDECREF(chunks);
|
||||
return res;
|
||||
}
|
||||
@ -918,23 +941,26 @@ Buffered_seek(BufferedObject *self, PyObject *args)
|
||||
if (!PyArg_ParseTuple(args, "O|i:seek", &targetobj, &whence)) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
if (whence < 0 || whence > 2) {
|
||||
PyErr_Format(PyExc_ValueError,
|
||||
"whence must be between 0 and 2, not %d", whence);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
CHECK_CLOSED(self, "seek of closed file")
|
||||
|
||||
target = PyNumber_AsOff_t(targetobj, PyExc_ValueError);
|
||||
if (target == -1 && PyErr_Occurred())
|
||||
return NULL;
|
||||
|
||||
ENTER_BUFFERED(self)
|
||||
|
||||
if (whence != 2 && self->readable) {
|
||||
Py_off_t current, avail;
|
||||
/* Check if seeking leaves us inside the current buffer,
|
||||
so as to return quickly if possible.
|
||||
so as to return quickly if possible. Also, we needn't take the
|
||||
lock in this fast path.
|
||||
Don't know how to do that when whence == 2, though. */
|
||||
/* NOTE: RAW_TELL() can release the GIL but the object is in a stable
|
||||
state at this point. */
|
||||
current = RAW_TELL(self);
|
||||
avail = READAHEAD(self);
|
||||
if (avail > 0) {
|
||||
@ -945,12 +971,13 @@ Buffered_seek(BufferedObject *self, PyObject *args)
|
||||
offset = target;
|
||||
if (offset >= -self->pos && offset <= avail) {
|
||||
self->pos += offset;
|
||||
res = PyLong_FromOff_t(current - avail + offset);
|
||||
goto end;
|
||||
return PyLong_FromOff_t(current - avail + offset);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
ENTER_BUFFERED(self)
|
||||
|
||||
/* Fallback: invoke raw seek() method and clear buffer */
|
||||
if (self->writable) {
|
||||
res = _BufferedWriter_flush_unlocked(self, 0);
|
||||
@ -1094,6 +1121,9 @@ BufferedReader_init(BufferedObject *self, PyObject *args, PyObject *kwds)
|
||||
return -1;
|
||||
_BufferedReader_reset_buf(self);
|
||||
|
||||
self->fast_closed_checks = (Py_TYPE(self) == &PyBufferedReader_Type &&
|
||||
Py_TYPE(raw) == &PyFileIO_Type);
|
||||
|
||||
self->ok = 1;
|
||||
return 0;
|
||||
}
|
||||
@ -1150,93 +1180,107 @@ _BufferedReader_fill_buffer(BufferedObject *self)
|
||||
}
|
||||
|
||||
static PyObject *
|
||||
_BufferedReader_read_unlocked(BufferedObject *self, Py_ssize_t n)
|
||||
_BufferedReader_read_all(BufferedObject *self)
|
||||
{
|
||||
PyObject *data, *res = NULL;
|
||||
Py_ssize_t current_size, remaining, written;
|
||||
char *out;
|
||||
Py_ssize_t current_size;
|
||||
PyObject *res, *data = NULL;
|
||||
PyObject *chunks = PyList_New(0);
|
||||
|
||||
/* Special case for when the number of bytes to read is unspecified. */
|
||||
if (n == -1) {
|
||||
PyObject *chunks = PyList_New(0);
|
||||
if (chunks == NULL)
|
||||
if (chunks == NULL)
|
||||
return NULL;
|
||||
|
||||
/* First copy what we have in the current buffer. */
|
||||
current_size = Py_SAFE_DOWNCAST(READAHEAD(self), Py_off_t, Py_ssize_t);
|
||||
if (current_size) {
|
||||
data = PyBytes_FromStringAndSize(
|
||||
self->buffer + self->pos, current_size);
|
||||
if (data == NULL) {
|
||||
Py_DECREF(chunks);
|
||||
return NULL;
|
||||
|
||||
/* First copy what we have in the current buffer. */
|
||||
current_size = Py_SAFE_DOWNCAST(READAHEAD(self), Py_off_t, Py_ssize_t);
|
||||
data = NULL;
|
||||
if (current_size) {
|
||||
data = PyBytes_FromStringAndSize(
|
||||
self->buffer + self->pos, current_size);
|
||||
if (data == NULL) {
|
||||
Py_DECREF(chunks);
|
||||
return NULL;
|
||||
}
|
||||
}
|
||||
_BufferedReader_reset_buf(self);
|
||||
/* We're going past the buffer's bounds, flush it */
|
||||
if (self->writable) {
|
||||
res = _BufferedWriter_flush_unlocked(self, 1);
|
||||
if (res == NULL) {
|
||||
Py_DECREF(chunks);
|
||||
return NULL;
|
||||
}
|
||||
Py_CLEAR(res);
|
||||
}
|
||||
while (1) {
|
||||
if (data) {
|
||||
if (PyList_Append(chunks, data) < 0) {
|
||||
Py_DECREF(data);
|
||||
Py_DECREF(chunks);
|
||||
return NULL;
|
||||
}
|
||||
Py_DECREF(data);
|
||||
}
|
||||
|
||||
/* Read until EOF or until read() would block. */
|
||||
data = PyObject_CallMethodObjArgs(self->raw, _PyIO_str_read, NULL);
|
||||
if (data == NULL) {
|
||||
Py_DECREF(chunks);
|
||||
return NULL;
|
||||
}
|
||||
if (data != Py_None && !PyBytes_Check(data)) {
|
||||
Py_DECREF(data);
|
||||
Py_DECREF(chunks);
|
||||
PyErr_SetString(PyExc_TypeError, "read() should return bytes");
|
||||
return NULL;
|
||||
}
|
||||
if (data == Py_None || PyBytes_GET_SIZE(data) == 0) {
|
||||
if (current_size == 0) {
|
||||
Py_DECREF(chunks);
|
||||
return data;
|
||||
}
|
||||
else {
|
||||
res = _PyBytes_Join(_PyIO_empty_bytes, chunks);
|
||||
Py_DECREF(data);
|
||||
Py_DECREF(chunks);
|
||||
return res;
|
||||
}
|
||||
}
|
||||
current_size += PyBytes_GET_SIZE(data);
|
||||
if (self->abs_pos != -1)
|
||||
self->abs_pos += PyBytes_GET_SIZE(data);
|
||||
}
|
||||
}
|
||||
_BufferedReader_reset_buf(self);
|
||||
/* We're going past the buffer's bounds, flush it */
|
||||
if (self->writable) {
|
||||
res = _BufferedWriter_flush_unlocked(self, 1);
|
||||
if (res == NULL) {
|
||||
Py_DECREF(chunks);
|
||||
return NULL;
|
||||
}
|
||||
Py_CLEAR(res);
|
||||
}
|
||||
while (1) {
|
||||
if (data) {
|
||||
if (PyList_Append(chunks, data) < 0) {
|
||||
Py_DECREF(data);
|
||||
Py_DECREF(chunks);
|
||||
return NULL;
|
||||
}
|
||||
Py_DECREF(data);
|
||||
}
|
||||
|
||||
/* Read until EOF or until read() would block. */
|
||||
data = PyObject_CallMethodObjArgs(self->raw, _PyIO_str_read, NULL);
|
||||
if (data == NULL) {
|
||||
Py_DECREF(chunks);
|
||||
return NULL;
|
||||
}
|
||||
if (data != Py_None && !PyBytes_Check(data)) {
|
||||
Py_DECREF(data);
|
||||
Py_DECREF(chunks);
|
||||
PyErr_SetString(PyExc_TypeError, "read() should return bytes");
|
||||
return NULL;
|
||||
}
|
||||
if (data == Py_None || PyBytes_GET_SIZE(data) == 0) {
|
||||
if (current_size == 0) {
|
||||
Py_DECREF(chunks);
|
||||
return data;
|
||||
}
|
||||
else {
|
||||
res = _PyBytes_Join(_PyIO_empty_bytes, chunks);
|
||||
Py_DECREF(data);
|
||||
Py_DECREF(chunks);
|
||||
return res;
|
||||
}
|
||||
}
|
||||
current_size += PyBytes_GET_SIZE(data);
|
||||
if (self->abs_pos != -1)
|
||||
self->abs_pos += PyBytes_GET_SIZE(data);
|
||||
}
|
||||
}
|
||||
|
||||
/* Read n bytes from the buffer if it can, otherwise return None.
|
||||
This function is simple enough that it can run unlocked. */
|
||||
static PyObject *
|
||||
_BufferedReader_read_fast(BufferedObject *self, Py_ssize_t n)
|
||||
{
|
||||
Py_ssize_t current_size;
|
||||
|
||||
/* The number of bytes to read is specified, return at most n bytes. */
|
||||
current_size = Py_SAFE_DOWNCAST(READAHEAD(self), Py_off_t, Py_ssize_t);
|
||||
if (n <= current_size) {
|
||||
/* Fast path: the data to read is fully buffered. */
|
||||
res = PyBytes_FromStringAndSize(self->buffer + self->pos, n);
|
||||
if (res == NULL)
|
||||
goto error;
|
||||
self->pos += n;
|
||||
PyObject *res = PyBytes_FromStringAndSize(self->buffer + self->pos, n);
|
||||
if (res != NULL)
|
||||
self->pos += n;
|
||||
return res;
|
||||
}
|
||||
Py_RETURN_NONE;
|
||||
}
|
||||
|
||||
/* Generic read function: read from the stream until enough bytes are read,
|
||||
* or until an EOF occurs or until read() would block.
|
||||
*/
|
||||
static PyObject *
|
||||
_BufferedReader_read_generic(BufferedObject *self, Py_ssize_t n)
|
||||
{
|
||||
PyObject *res = NULL;
|
||||
Py_ssize_t current_size, remaining, written;
|
||||
char *out;
|
||||
|
||||
current_size = Py_SAFE_DOWNCAST(READAHEAD(self), Py_off_t, Py_ssize_t);
|
||||
if (n <= current_size)
|
||||
return _BufferedReader_read_fast(self, n);
|
||||
|
||||
/* Slow path: read from the stream until enough bytes are read,
|
||||
* or until an EOF occurs or until read() would block.
|
||||
*/
|
||||
res = PyBytes_FromStringAndSize(NULL, n);
|
||||
if (res == NULL)
|
||||
goto error;
|
||||
@ -1479,6 +1523,9 @@ BufferedWriter_init(BufferedObject *self, PyObject *args, PyObject *kwds)
|
||||
_BufferedWriter_reset_buf(self);
|
||||
self->pos = 0;
|
||||
|
||||
self->fast_closed_checks = (Py_TYPE(self) == &PyBufferedWriter_Type &&
|
||||
Py_TYPE(raw) == &PyFileIO_Type);
|
||||
|
||||
self->ok = 1;
|
||||
return 0;
|
||||
}
|
||||
@ -1583,7 +1630,7 @@ BufferedWriter_write(BufferedObject *self, PyObject *args)
|
||||
return NULL;
|
||||
}
|
||||
|
||||
if (BufferedIOMixin_closed(self)) {
|
||||
if (IS_CLOSED(self)) {
|
||||
PyErr_SetString(PyExc_ValueError, "write to closed file");
|
||||
PyBuffer_Release(&buf);
|
||||
return NULL;
|
||||
@ -2066,6 +2113,9 @@ BufferedRandom_init(BufferedObject *self, PyObject *args, PyObject *kwds)
|
||||
_BufferedWriter_reset_buf(self);
|
||||
self->pos = 0;
|
||||
|
||||
self->fast_closed_checks = (Py_TYPE(self) == &PyBufferedRandom_Type &&
|
||||
Py_TYPE(raw) == &PyFileIO_Type);
|
||||
|
||||
self->ok = 1;
|
||||
return 0;
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user