mirror of
https://github.com/python/cpython.git
synced 2024-11-28 12:31:14 +08:00
Merged revisions 68462 via svnmerge from
svn+ssh://pythondev@svn.python.org/python/trunk ........ r68462 | antoine.pitrou | 2009-01-09 22:40:55 +0100 (ven., 09 janv. 2009) | 6 lines Issue #4074: Change the criteria for doing a full garbage collection (i.e. collecting the oldest generation) so that allocating lots of objects without destroying them does not show quadratic performance. Based on a proposal by Martin von Löwis at http://mail.python.org/pipermail/python-dev/2008-June/080579.html. ........
This commit is contained in:
parent
9169641b8b
commit
14b78f5fc5
@ -12,6 +12,12 @@ What's New in Python 3.1 alpha 0
|
|||||||
Core and Builtins
|
Core and Builtins
|
||||||
-----------------
|
-----------------
|
||||||
|
|
||||||
|
- Issue #4074: Change the criteria for doing a full garbage collection (i.e.
|
||||||
|
collecting the oldest generation) so that allocating lots of objects without
|
||||||
|
destroying them does not show quadratic performance. Based on a proposal by
|
||||||
|
Martin von Löwis at
|
||||||
|
http://mail.python.org/pipermail/python-dev/2008-June/080579.html.
|
||||||
|
|
||||||
- Issue #4604: Some objects of the I/O library could still be used after
|
- Issue #4604: Some objects of the I/O library could still be used after
|
||||||
having been closed (for instance, a read() call could return some
|
having been closed (for instance, a read() call could return some
|
||||||
previously buffered data). Patch by Dmitry Vasiliev.
|
previously buffered data). Patch by Dmitry Vasiliev.
|
||||||
|
@ -68,6 +68,55 @@ static PyObject *gc_str = NULL;
|
|||||||
/* Python string used to look for __del__ attribute. */
|
/* Python string used to look for __del__ attribute. */
|
||||||
static PyObject *delstr = NULL;
|
static PyObject *delstr = NULL;
|
||||||
|
|
||||||
|
/* This is the number of objects who survived the last full collection. It
|
||||||
|
approximates the number of long lived objects tracked by the GC.
|
||||||
|
|
||||||
|
(by "full collection", we mean a collection of the oldest generation).
|
||||||
|
*/
|
||||||
|
static Py_ssize_t long_lived_total = 0;
|
||||||
|
|
||||||
|
/* This is the number of objects who survived all "non-full" collections,
|
||||||
|
and are awaiting to undergo a full collection for the first time.
|
||||||
|
|
||||||
|
*/
|
||||||
|
static Py_ssize_t long_lived_pending = 0;
|
||||||
|
|
||||||
|
/*
|
||||||
|
NOTE: about the counting of long-lived objects.
|
||||||
|
|
||||||
|
To limit the cost of garbage collection, there are two strategies;
|
||||||
|
- make each collection faster, e.g. by scanning fewer objects
|
||||||
|
- do less collections
|
||||||
|
This heuristic is about the latter strategy.
|
||||||
|
|
||||||
|
In addition to the various configurable thresholds, we only trigger a
|
||||||
|
full collection if the ratio
|
||||||
|
long_lived_pending / long_lived_total
|
||||||
|
is above a given value (hardwired to 25%).
|
||||||
|
|
||||||
|
The reason is that, while "non-full" collections (i.e., collections of
|
||||||
|
the young and middle generations) will always examine roughly the same
|
||||||
|
number of objects -- determined by the aforementioned thresholds --,
|
||||||
|
the cost of a full collection is proportional to the total number of
|
||||||
|
long-lived objects, which is virtually unbounded.
|
||||||
|
|
||||||
|
Indeed, it has been remarked that doing a full collection every
|
||||||
|
<constant number> of object creations entails a dramatic performance
|
||||||
|
degradation in workloads which consist in creating and storing lots of
|
||||||
|
long-lived objects (e.g. building a large list of GC-tracked objects would
|
||||||
|
show quadratic performance, instead of linear as expected: see issue #4074).
|
||||||
|
|
||||||
|
Using the above ratio, instead, yields amortized linear performance in
|
||||||
|
the total number of objects (the effect of which can be summarized
|
||||||
|
thusly: "each full garbage collection is more and more costly as the
|
||||||
|
number of objects grows, but we do fewer and fewer of them").
|
||||||
|
|
||||||
|
This heuristic was suggested by Martin von Löwis on python-dev in
|
||||||
|
June 2008. His original analysis and proposal can be found at:
|
||||||
|
http://mail.python.org/pipermail/python-dev/2008-June/080579.html
|
||||||
|
*/
|
||||||
|
|
||||||
|
|
||||||
/* set for debugging information */
|
/* set for debugging information */
|
||||||
#define DEBUG_STATS (1<<0) /* print collection statistics */
|
#define DEBUG_STATS (1<<0) /* print collection statistics */
|
||||||
#define DEBUG_COLLECTABLE (1<<1) /* print collectable objects */
|
#define DEBUG_COLLECTABLE (1<<1) /* print collectable objects */
|
||||||
@ -795,8 +844,16 @@ collect(int generation)
|
|||||||
move_unreachable(young, &unreachable);
|
move_unreachable(young, &unreachable);
|
||||||
|
|
||||||
/* Move reachable objects to next generation. */
|
/* Move reachable objects to next generation. */
|
||||||
if (young != old)
|
if (young != old) {
|
||||||
|
if (generation == NUM_GENERATIONS - 2) {
|
||||||
|
long_lived_pending += gc_list_size(young);
|
||||||
|
}
|
||||||
gc_list_merge(young, old);
|
gc_list_merge(young, old);
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
long_lived_pending = 0;
|
||||||
|
long_lived_total = gc_list_size(young);
|
||||||
|
}
|
||||||
|
|
||||||
/* All objects in unreachable are trash, but objects reachable from
|
/* All objects in unreachable are trash, but objects reachable from
|
||||||
* finalizers can't safely be deleted. Python programmers should take
|
* finalizers can't safely be deleted. Python programmers should take
|
||||||
@ -890,6 +947,13 @@ collect_generations(void)
|
|||||||
* generations younger than it will be collected. */
|
* generations younger than it will be collected. */
|
||||||
for (i = NUM_GENERATIONS-1; i >= 0; i--) {
|
for (i = NUM_GENERATIONS-1; i >= 0; i--) {
|
||||||
if (generations[i].count > generations[i].threshold) {
|
if (generations[i].count > generations[i].threshold) {
|
||||||
|
/* Avoid quadratic performance degradation in number
|
||||||
|
of tracked objects. See comments at the beginning
|
||||||
|
of this file, and issue #4074.
|
||||||
|
*/
|
||||||
|
if (i == NUM_GENERATIONS - 1
|
||||||
|
&& long_lived_pending < long_lived_total / 4)
|
||||||
|
continue;
|
||||||
n = collect(i);
|
n = collect(i);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user