From 9de7ec7868554e92700e4bda6c5f9d8fcad634dc Mon Sep 17 00:00:00 2001 From: Jeffrey Yasskin Date: Wed, 25 Feb 2009 02:25:04 +0000 Subject: [PATCH] http://bugs.python.org/issue4715 This patch by Antoine Pitrou optimizes the bytecode for conditional branches by merging the following "POP_TOP" instruction into the conditional jump. For example, the list comprehension "[x for x in l if not x]" produced the following bytecode: 1 0 BUILD_LIST 0 3 LOAD_FAST 0 (.0) >> 6 FOR_ITER 23 (to 32) 9 STORE_FAST 1 (x) 12 LOAD_FAST 1 (x) 15 JUMP_IF_TRUE 10 (to 28) 18 POP_TOP 19 LOAD_FAST 1 (x) 22 LIST_APPEND 2 25 JUMP_ABSOLUTE 6 >> 28 POP_TOP 29 JUMP_ABSOLUTE 6 >> 32 RETURN_VALUE but after the patch it produces the following bytecode: 1 0 BUILD_LIST 0 3 LOAD_FAST 0 (.0) >> 6 FOR_ITER 18 (to 27) 9 STORE_FAST 1 (x) 12 LOAD_FAST 1 (x) 15 POP_JUMP_IF_TRUE 6 18 LOAD_FAST 1 (x) 21 LIST_APPEND 2 24 JUMP_ABSOLUTE 6 >> 27 RETURN_VALUE Notice that not only the code is shorter, but the conditional jump (POP_JUMP_IF_TRUE) jumps right to the start of the loop instead of going through the JUMP_ABSOLUTE at the end. "continue" statements are helped similarly. Furthermore, the old jump opcodes (JUMP_IF_FALSE, JUMP_IF_TRUE) have been replaced by two new opcodes: - JUMP_IF_TRUE_OR_POP, which jumps if true and pops otherwise - JUMP_IF_FALSE_OR_POP, which jumps if false and pops otherwise --- Doc/library/dis.rst | 22 +++++--- Include/opcode.h | 8 +-- Lib/opcode.py | 8 +-- Lib/test/test_peepholer.py | 10 ++-- Python/ceval.c | 84 +++++++++++++++++++++++------- Python/compile.c | 78 ++++++++++++---------------- Python/import.c | 4 +- Python/opcode_targets.h | 8 +-- Python/peephole.c | 102 +++++++++++++++++++++---------------- 9 files changed, 195 insertions(+), 129 deletions(-) diff --git a/Doc/library/dis.rst b/Doc/library/dis.rst index 7dc8a3cb3d3..856a9e4f397 100644 --- a/Doc/library/dis.rst +++ b/Doc/library/dis.rst @@ -595,16 +595,26 @@ the more significant byte last. Increments bytecode counter by *delta*. -.. opcode:: JUMP_IF_TRUE (delta) +.. opcode:: POP_JUMP_IF_TRUE (target) - If TOS is true, increment the bytecode counter by *delta*. TOS is left on the - stack. + If TOS is true, sets the bytecode counter to *target*. TOS is popped. -.. opcode:: JUMP_IF_FALSE (delta) +.. opcode:: POP_JUMP_IF_FALSE (target) - If TOS is false, increment the bytecode counter by *delta*. TOS is not - changed. + If TOS is false, sets the bytecode counter to *target*. TOS is popped. + + +.. opcode:: JUMP_IF_TRUE_OR_POP (target) + + If TOS is true, sets the bytecode counter to *target* and leaves TOS + on the stack. Otherwise (TOS is false), TOS is popped. + + +.. opcode:: JUMP_IF_FALSE_OR_POP (target) + + If TOS is false, sets the bytecode counter to *target* and leaves + TOS on the stack. Otherwise (TOS is true), TOS is popped. .. opcode:: JUMP_ABSOLUTE (target) diff --git a/Include/opcode.h b/Include/opcode.h index 063ab6c515d..2b1c59da9a8 100644 --- a/Include/opcode.h +++ b/Include/opcode.h @@ -96,9 +96,11 @@ extern "C" { #define IMPORT_FROM 109 /* Index in name list */ #define JUMP_FORWARD 110 /* Number of bytes to skip */ -#define JUMP_IF_FALSE 111 /* "" */ -#define JUMP_IF_TRUE 112 /* "" */ -#define JUMP_ABSOLUTE 113 /* Target byte offset from beginning of code */ +#define JUMP_IF_FALSE_OR_POP 111 /* Target byte offset from beginning of code */ +#define JUMP_IF_TRUE_OR_POP 112 /* "" */ +#define JUMP_ABSOLUTE 113 /* "" */ +#define POP_JUMP_IF_FALSE 114 /* "" */ +#define POP_JUMP_IF_TRUE 115 /* "" */ #define LOAD_GLOBAL 116 /* Index in name list */ diff --git a/Lib/opcode.py b/Lib/opcode.py index 9484d1ef859..86ebc5c564d 100644 --- a/Lib/opcode.py +++ b/Lib/opcode.py @@ -131,9 +131,11 @@ name_op('IMPORT_NAME', 108) # Index in name list name_op('IMPORT_FROM', 109) # Index in name list jrel_op('JUMP_FORWARD', 110) # Number of bytes to skip -jrel_op('JUMP_IF_FALSE', 111) # "" -jrel_op('JUMP_IF_TRUE', 112) # "" -jabs_op('JUMP_ABSOLUTE', 113) # Target byte offset from beginning of code +jabs_op('JUMP_IF_FALSE_OR_POP', 111) # Target byte offset from beginning of code +jabs_op('JUMP_IF_TRUE_OR_POP', 112) # "" +jabs_op('JUMP_ABSOLUTE', 113) # "" +jabs_op('POP_JUMP_IF_FALSE', 114) # "" +jabs_op('POP_JUMP_IF_TRUE', 115) # "" name_op('LOAD_GLOBAL', 116) # Index in name list diff --git a/Lib/test/test_peepholer.py b/Lib/test/test_peepholer.py index f3e240e8f8b..1543f3b9c14 100644 --- a/Lib/test/test_peepholer.py +++ b/Lib/test/test_peepholer.py @@ -19,14 +19,14 @@ def dis_single(line): class TestTranforms(unittest.TestCase): def test_unot(self): - # UNARY_NOT JUMP_IF_FALSE POP_TOP --> JUMP_IF_TRUE POP_TOP' + # UNARY_NOT POP_JUMP_IF_FALSE --> POP_JUMP_IF_TRUE' def unot(x): if not x == 2: del x asm = disassemble(unot) - for elem in ('UNARY_NOT', 'JUMP_IF_FALSE'): + for elem in ('UNARY_NOT', 'POP_JUMP_IF_FALSE'): self.assert_(elem not in asm) - for elem in ('JUMP_IF_TRUE', 'POP_TOP'): + for elem in ('POP_JUMP_IF_TRUE',): self.assert_(elem in asm) def test_elim_inversion_of_is_or_in(self): @@ -64,13 +64,13 @@ class TestTranforms(unittest.TestCase): self.assert_('LOAD_GLOBAL' not in disassemble(f)) def test_while_one(self): - # Skip over: LOAD_CONST trueconst JUMP_IF_FALSE xx POP_TOP + # Skip over: LOAD_CONST trueconst POP_JUMP_IF_FALSE xx def f(): while 1: pass return list asm = disassemble(f) - for elem in ('LOAD_CONST', 'JUMP_IF_FALSE'): + for elem in ('LOAD_CONST', 'POP_JUMP_IF_FALSE'): self.assert_(elem not in asm) for elem in ('JUMP_ABSOLUTE',): self.assert_(elem in asm) diff --git a/Python/ceval.c b/Python/ceval.c index e0ce92e3f5c..d1d6d1d849d 100644 --- a/Python/ceval.c +++ b/Python/ceval.c @@ -1295,7 +1295,6 @@ PyEval_EvalFrameEx(PyFrameObject *f, int throwflag) SETLOCAL(oparg, v); FAST_DISPATCH(); - PREDICTED(POP_TOP); TARGET(POP_TOP) v = POP(); Py_DECREF(v); @@ -2204,8 +2203,8 @@ PyEval_EvalFrameEx(PyFrameObject *f, int throwflag) Py_DECREF(w); SET_TOP(x); if (x == NULL) break; - PREDICT(JUMP_IF_FALSE); - PREDICT(JUMP_IF_TRUE); + PREDICT(POP_JUMP_IF_FALSE); + PREDICT(POP_JUMP_IF_TRUE); DISPATCH(); TARGET(IMPORT_NAME) @@ -2282,44 +2281,95 @@ PyEval_EvalFrameEx(PyFrameObject *f, int throwflag) JUMPBY(oparg); FAST_DISPATCH(); - PREDICTED_WITH_ARG(JUMP_IF_FALSE); - TARGET(JUMP_IF_FALSE) - w = TOP(); + PREDICTED_WITH_ARG(POP_JUMP_IF_FALSE); + TARGET(POP_JUMP_IF_FALSE) + w = POP(); if (w == Py_True) { - PREDICT(POP_TOP); + Py_DECREF(w); FAST_DISPATCH(); } if (w == Py_False) { - JUMPBY(oparg); + Py_DECREF(w); + JUMPTO(oparg); FAST_DISPATCH(); } err = PyObject_IsTrue(w); + Py_DECREF(w); if (err > 0) err = 0; else if (err == 0) - JUMPBY(oparg); + JUMPTO(oparg); else break; DISPATCH(); - PREDICTED_WITH_ARG(JUMP_IF_TRUE); - TARGET(JUMP_IF_TRUE) - w = TOP(); + PREDICTED_WITH_ARG(POP_JUMP_IF_TRUE); + TARGET(POP_JUMP_IF_TRUE) + w = POP(); if (w == Py_False) { - PREDICT(POP_TOP); + Py_DECREF(w); FAST_DISPATCH(); } if (w == Py_True) { - JUMPBY(oparg); + Py_DECREF(w); + JUMPTO(oparg); + FAST_DISPATCH(); + } + err = PyObject_IsTrue(w); + Py_DECREF(w); + if (err > 0) { + err = 0; + JUMPTO(oparg); + } + else if (err == 0) + ; + else + break; + DISPATCH(); + + TARGET(JUMP_IF_FALSE_OR_POP) + w = TOP(); + if (w == Py_True) { + STACKADJ(-1); + Py_DECREF(w); + FAST_DISPATCH(); + } + if (w == Py_False) { + JUMPTO(oparg); + FAST_DISPATCH(); + } + err = PyObject_IsTrue(w); + if (err > 0) { + STACKADJ(-1); + Py_DECREF(w); + err = 0; + } + else if (err == 0) + JUMPTO(oparg); + else + break; + DISPATCH(); + + TARGET(JUMP_IF_TRUE_OR_POP) + w = TOP(); + if (w == Py_False) { + STACKADJ(-1); + Py_DECREF(w); + FAST_DISPATCH(); + } + if (w == Py_True) { + JUMPTO(oparg); FAST_DISPATCH(); } err = PyObject_IsTrue(w); if (err > 0) { err = 0; - JUMPBY(oparg); + JUMPTO(oparg); + } + else if (err == 0) { + STACKADJ(-1); + Py_DECREF(w); } - else if (err == 0) - ; else break; DISPATCH(); diff --git a/Python/compile.c b/Python/compile.c index 38b4aa4ecaf..8fae9d7720f 100644 --- a/Python/compile.c +++ b/Python/compile.c @@ -818,11 +818,15 @@ opcode_stack_effect(int opcode, int oparg) return 1; case JUMP_FORWARD: - case JUMP_IF_FALSE: - case JUMP_IF_TRUE: + case JUMP_IF_TRUE_OR_POP: /* -1 if jump not taken */ + case JUMP_IF_FALSE_OR_POP: /* "" */ case JUMP_ABSOLUTE: return 0; + case POP_JUMP_IF_FALSE: + case POP_JUMP_IF_TRUE: + return -1; + case LOAD_GLOBAL: return 1; @@ -1664,12 +1668,10 @@ compiler_ifexp(struct compiler *c, expr_ty e) if (next == NULL) return 0; VISIT(c, expr, e->v.IfExp.test); - ADDOP_JREL(c, JUMP_IF_FALSE, next); - ADDOP(c, POP_TOP); + ADDOP_JABS(c, POP_JUMP_IF_FALSE, next); VISIT(c, expr, e->v.IfExp.body); ADDOP_JREL(c, JUMP_FORWARD, end); compiler_use_next_block(c, next); - ADDOP(c, POP_TOP); VISIT(c, expr, e->v.IfExp.orelse); compiler_use_next_block(c, end); return 1; @@ -1732,9 +1734,6 @@ compiler_if(struct compiler *c, stmt_ty s) end = compiler_new_block(c); if (end == NULL) return 0; - next = compiler_new_block(c); - if (next == NULL) - return 0; constant = expr_constant(s->v.If.test); /* constant = 0: "if 0" @@ -1746,15 +1745,21 @@ compiler_if(struct compiler *c, stmt_ty s) } else if (constant == 1) { VISIT_SEQ(c, stmt, s->v.If.body); } else { + if (s->v.If.orelse) { + next = compiler_new_block(c); + if (next == NULL) + return 0; + } + else + next = end; VISIT(c, expr, s->v.If.test); - ADDOP_JREL(c, JUMP_IF_FALSE, next); - ADDOP(c, POP_TOP); + ADDOP_JABS(c, POP_JUMP_IF_FALSE, next); VISIT_SEQ(c, stmt, s->v.If.body); ADDOP_JREL(c, JUMP_FORWARD, end); - compiler_use_next_block(c, next); - ADDOP(c, POP_TOP); - if (s->v.If.orelse) + if (s->v.If.orelse) { + compiler_use_next_block(c, next); VISIT_SEQ(c, stmt, s->v.If.orelse); + } } compiler_use_next_block(c, end); return 1; @@ -1828,8 +1833,7 @@ compiler_while(struct compiler *c, stmt_ty s) so we need to set an extra line number. */ c->u->u_lineno_set = 0; VISIT(c, expr, s->v.While.test); - ADDOP_JREL(c, JUMP_IF_FALSE, anchor); - ADDOP(c, POP_TOP); + ADDOP_JABS(c, POP_JUMP_IF_FALSE, anchor); } VISIT_SEQ(c, stmt, s->v.While.body); ADDOP_JABS(c, JUMP_ABSOLUTE, loop); @@ -1840,7 +1844,6 @@ compiler_while(struct compiler *c, stmt_ty s) if (constant == -1) { compiler_use_next_block(c, anchor); - ADDOP(c, POP_TOP); ADDOP(c, POP_BLOCK); } compiler_pop_fblock(c, LOOP, loop); @@ -1947,7 +1950,7 @@ compiler_try_finally(struct compiler *c, stmt_ty s) } /* - Code generated for "try: S except E1, V1: S1 except E2, V2: S2 ...": + Code generated for "try: S except E1 as V1: S1 except E2 as V2: S2 ...": (The contents of the value stack is shown in [], with the top at the right; 'tb' is trace-back info, 'val' the exception's associated value, and 'exc' the exception.) @@ -1961,20 +1964,17 @@ compiler_try_finally(struct compiler *c, stmt_ty s) [tb, val, exc] L1: DUP ) [tb, val, exc, exc] ) [tb, val, exc, exc, E1] COMPARE_OP EXC_MATCH ) only if E1 - [tb, val, exc, 1-or-0] JUMP_IF_FALSE L2 ) - [tb, val, exc, 1] POP ) + [tb, val, exc, 1-or-0] POP_JUMP_IF_FALSE L2 ) [tb, val, exc] POP [tb, val] (or POP if no V1) [tb] POP [] JUMP_FORWARD L0 - [tb, val, exc, 0] L2: POP - [tb, val, exc] DUP + [tb, val, exc] L2: DUP .............................etc....................... - [tb, val, exc, 0] Ln+1: POP - [tb, val, exc] END_FINALLY # re-raise exception + [tb, val, exc] Ln+1: END_FINALLY # re-raise exception [] L0: @@ -2016,8 +2016,7 @@ compiler_try_except(struct compiler *c, stmt_ty s) ADDOP(c, DUP_TOP); VISIT(c, expr, handler->v.ExceptHandler.type); ADDOP_I(c, COMPARE_OP, PyCmp_EXC_MATCH); - ADDOP_JREL(c, JUMP_IF_FALSE, except); - ADDOP(c, POP_TOP); + ADDOP_JABS(c, POP_JUMP_IF_FALSE, except); } ADDOP(c, POP_TOP); if (handler->v.ExceptHandler.name) { @@ -2088,8 +2087,6 @@ compiler_try_except(struct compiler *c, stmt_ty s) } ADDOP_JREL(c, JUMP_FORWARD, end); compiler_use_next_block(c, except); - if (handler->v.ExceptHandler.type) - ADDOP(c, POP_TOP); } ADDOP(c, END_FINALLY); compiler_use_next_block(c, orelse); @@ -2268,8 +2265,7 @@ compiler_assert(struct compiler *c, stmt_ty s) end = compiler_new_block(c); if (end == NULL) return 0; - ADDOP_JREL(c, JUMP_IF_TRUE, end); - ADDOP(c, POP_TOP); + ADDOP_JABS(c, POP_JUMP_IF_TRUE, end); ADDOP_O(c, LOAD_GLOBAL, assertion_error, names); if (s->v.Assert.msg) { VISIT(c, expr, s->v.Assert.msg); @@ -2277,7 +2273,6 @@ compiler_assert(struct compiler *c, stmt_ty s) } ADDOP_I(c, RAISE_VARARGS, 1); compiler_use_next_block(c, end); - ADDOP(c, POP_TOP); return 1; } @@ -2629,9 +2624,9 @@ compiler_boolop(struct compiler *c, expr_ty e) assert(e->kind == BoolOp_kind); if (e->v.BoolOp.op == And) - jumpi = JUMP_IF_FALSE; + jumpi = JUMP_IF_FALSE_OR_POP; else - jumpi = JUMP_IF_TRUE; + jumpi = JUMP_IF_TRUE_OR_POP; end = compiler_new_block(c); if (end == NULL) return 0; @@ -2640,8 +2635,7 @@ compiler_boolop(struct compiler *c, expr_ty e) assert(n >= 0); for (i = 0; i < n; ++i) { VISIT(c, expr, (expr_ty)asdl_seq_GET(s, i)); - ADDOP_JREL(c, jumpi, end); - ADDOP(c, POP_TOP) + ADDOP_JABS(c, jumpi, end); } VISIT(c, expr, (expr_ty)asdl_seq_GET(s, n)); compiler_use_next_block(c, end); @@ -2737,9 +2731,8 @@ compiler_compare(struct compiler *c, expr_ty e) ADDOP_I(c, COMPARE_OP, cmpop((cmpop_ty)(asdl_seq_GET( e->v.Compare.ops, i - 1)))); - ADDOP_JREL(c, JUMP_IF_FALSE, cleanup); + ADDOP_JABS(c, JUMP_IF_FALSE_OR_POP, cleanup); NEXT_BLOCK(c); - ADDOP(c, POP_TOP); if (i < (n - 1)) VISIT(c, expr, (expr_ty)asdl_seq_GET(e->v.Compare.comparators, i)); @@ -2872,10 +2865,9 @@ compiler_comprehension_generator(struct compiler *c, for (i = 0; i < n; i++) { expr_ty e = (expr_ty)asdl_seq_GET(gen->ifs, i); VISIT(c, expr, e); - ADDOP_JREL(c, JUMP_IF_FALSE, if_cleanup); + ADDOP_JABS(c, POP_JUMP_IF_FALSE, if_cleanup); NEXT_BLOCK(c); - ADDOP(c, POP_TOP); - } + } if (++gen_index < asdl_seq_LEN(generators)) if (!compiler_comprehension_generator(c, @@ -2913,13 +2905,7 @@ compiler_comprehension_generator(struct compiler *c, compiler_use_next_block(c, skip); } - for (i = 0; i < n; i++) { - ADDOP_I(c, JUMP_FORWARD, 1); - if (i == 0) - compiler_use_next_block(c, if_cleanup); - - ADDOP(c, POP_TOP); - } + compiler_use_next_block(c, if_cleanup); ADDOP_JABS(c, JUMP_ABSOLUTE, start); compiler_use_next_block(c, anchor); diff --git a/Python/import.c b/Python/import.c index 5a2c3cb0386..11cbc58189c 100644 --- a/Python/import.c +++ b/Python/import.c @@ -87,8 +87,10 @@ typedef unsigned short mode_t; Python 3.0a5: 3130 (lexical exception stacking, including POP_EXCEPT) Python 3.1a0: 3140 (optimize list, set and dict comprehensions: change LIST_APPEND and SET_ADD, add MAP_ADD) + Python 3.1a0: 3150 (optimize conditional branches: + introduce POP_JUMP_IF_FALSE and POP_JUMP_IF_TRUE) */ -#define MAGIC (3140 | ((long)'\r'<<16) | ((long)'\n'<<24)) +#define MAGIC (3150 | ((long)'\r'<<16) | ((long)'\n'<<24)) /* Magic word as global; note that _PyImport_Init() can change the value of this global to accommodate for alterations of how the diff --git a/Python/opcode_targets.h b/Python/opcode_targets.h index c66271756e6..043e42a30a5 100644 --- a/Python/opcode_targets.h +++ b/Python/opcode_targets.h @@ -110,11 +110,11 @@ static void *opcode_targets[256] = { &&TARGET_IMPORT_NAME, &&TARGET_IMPORT_FROM, &&TARGET_JUMP_FORWARD, - &&TARGET_JUMP_IF_FALSE, - &&TARGET_JUMP_IF_TRUE, + &&TARGET_JUMP_IF_FALSE_OR_POP, + &&TARGET_JUMP_IF_TRUE_OR_POP, &&TARGET_JUMP_ABSOLUTE, - &&_unknown_opcode, - &&_unknown_opcode, + &&TARGET_POP_JUMP_IF_FALSE, + &&TARGET_POP_JUMP_IF_TRUE, &&TARGET_LOAD_GLOBAL, &&_unknown_opcode, &&_unknown_opcode, diff --git a/Python/peephole.c b/Python/peephole.c index d31c89b86e9..c413fc8e873 100644 --- a/Python/peephole.c +++ b/Python/peephole.c @@ -13,7 +13,12 @@ #define GETARG(arr, i) ((int)((arr[i+2]<<8) + arr[i+1])) #define UNCONDITIONAL_JUMP(op) (op==JUMP_ABSOLUTE || op==JUMP_FORWARD) -#define ABSOLUTE_JUMP(op) (op==JUMP_ABSOLUTE || op==CONTINUE_LOOP) +#define CONDITIONAL_JUMP(op) (op==POP_JUMP_IF_FALSE || op==POP_JUMP_IF_TRUE \ + || op==JUMP_IF_FALSE_OR_POP || op==JUMP_IF_TRUE_OR_POP) +#define ABSOLUTE_JUMP(op) (op==JUMP_ABSOLUTE || op==CONTINUE_LOOP \ + || op==POP_JUMP_IF_FALSE || op==POP_JUMP_IF_TRUE \ + || op==JUMP_IF_FALSE_OR_POP || op==JUMP_IF_TRUE_OR_POP) +#define JUMPS_ON_TRUE(op) (op==POP_JUMP_IF_TRUE || op==JUMP_IF_TRUE_OR_POP) #define GETJUMPTGT(arr, i) (GETARG(arr,i) + (ABSOLUTE_JUMP(arr[i]) ? 0 : i+3)) #define SETARG(arr, i, val) arr[i+2] = val>>8; arr[i+1] = val & 255 #define CODESIZE(op) (HAS_ARG(op) ? 3 : 1) @@ -237,8 +242,10 @@ markblocks(unsigned char *code, Py_ssize_t len) switch (opcode) { case FOR_ITER: case JUMP_FORWARD: - case JUMP_IF_FALSE: - case JUMP_IF_TRUE: + case JUMP_IF_FALSE_OR_POP: + case JUMP_IF_TRUE_OR_POP: + case POP_JUMP_IF_FALSE: + case POP_JUMP_IF_TRUE: case JUMP_ABSOLUTE: case CONTINUE_LOOP: case SETUP_LOOP: @@ -363,29 +370,24 @@ PyCode_Optimize(PyObject *code, PyObject* consts, PyObject *names, assert(PyList_Check(consts)); for (i=0 ; i a is not b not a in b --> a not in b @@ -417,29 +419,19 @@ PyCode_Optimize(PyObject *code, PyObject* consts, PyObject *names, break; /* Skip over LOAD_CONST trueconst - JUMP_IF_FALSE xx POP_TOP */ + POP_JUMP_IF_FALSE xx. This improves + "while 1" performance. */ case LOAD_CONST: cumlc = lastlc + 1; j = GETARG(codestr, i); - if (codestr[i+3] != JUMP_IF_FALSE || - codestr[i+6] != POP_TOP || + if (codestr[i+3] != POP_JUMP_IF_FALSE || !ISBASICBLOCK(blocks,i,7) || !PyObject_IsTrue(PyList_GET_ITEM(consts, j))) continue; - memset(codestr+i, NOP, 7); + memset(codestr+i, NOP, 6); cumlc = 0; break; - /* Replace POP_TOP JUMP_FORWARD 1 POP_TOP - with NOP NOP NOP NOP POP_TOP. */ - case POP_TOP: - if (UNCONDITIONAL_JUMP(codestr[i+1]) && - GETJUMPTGT(codestr, i+1) == i+5 && - codestr[i+4] == POP_TOP && - ISBASICBLOCK(blocks,i,4)) - memset(codestr+i, NOP, 4); - break; - /* Try to fold tuples of constants (includes a case for lists which are only used for "in" and "not in" tests). Skip over BUILD_SEQN 1 UNPACK_SEQN 1. @@ -524,27 +516,47 @@ PyCode_Optimize(PyObject *code, PyObject* consts, PyObject *names, "if a or b:" "a and b or c" "(a and b) and c" - x:JUMP_IF_FALSE y y:JUMP_IF_FALSE z --> x:JUMP_IF_FALSE z - x:JUMP_IF_FALSE y y:JUMP_IF_TRUE z --> x:JUMP_IF_FALSE y+3 + x:POP_OR_JUMP y y:POP_OR_JUMP z --> x:POP_OR_JUMP z + x:POP_OR_JUMP y y:JUMP_OR_POP z --> x:POP_JUMP_IF_FALSE y+3 where y+3 is the instruction following the second test. */ - case JUMP_IF_FALSE: - case JUMP_IF_TRUE: + case JUMP_IF_FALSE_OR_POP: + case JUMP_IF_TRUE_OR_POP: tgt = GETJUMPTGT(codestr, i); j = codestr[tgt]; - if (j == JUMP_IF_FALSE || j == JUMP_IF_TRUE) { - if (j == opcode) { - tgttgt = GETJUMPTGT(codestr, tgt) - i - 3; + if (CONDITIONAL_JUMP(j)) { + /* NOTE: all possible jumps here are + absolute! */ + if (JUMPS_ON_TRUE(j) == JUMPS_ON_TRUE(opcode)) { + /* The second jump will be + taken iff the first is. */ + tgttgt = GETJUMPTGT(codestr, tgt); + /* The current opcode inherits + its target's stack behaviour */ + codestr[i] = j; SETARG(codestr, i, tgttgt); + goto reoptimize_current; } else { - tgt -= i; - SETARG(codestr, i, tgt); + /* The second jump is not taken + if the first is (so jump past + it), and all conditional + jumps pop their argument when + they're not taken (so change + the first jump to pop its + argument when it's taken). */ + if (JUMPS_ON_TRUE(opcode)) + codestr[i] = POP_JUMP_IF_TRUE; + else + codestr[i] = POP_JUMP_IF_FALSE; + SETARG(codestr, i, (tgt + 3)); + goto reoptimize_current; } - break; } - /* Intentional fallthrough */ + /* Intentional fallthrough */ /* Replace jumps to unconditional jumps */ + case POP_JUMP_IF_FALSE: + case POP_JUMP_IF_TRUE: case FOR_ITER: case JUMP_FORWARD: case JUMP_ABSOLUTE: @@ -621,14 +633,16 @@ PyCode_Optimize(PyObject *code, PyObject* consts, PyObject *names, case JUMP_ABSOLUTE: case CONTINUE_LOOP: + case POP_JUMP_IF_FALSE: + case POP_JUMP_IF_TRUE: + case JUMP_IF_FALSE_OR_POP: + case JUMP_IF_TRUE_OR_POP: j = addrmap[GETARG(codestr, i)]; SETARG(codestr, i, j); break; case FOR_ITER: case JUMP_FORWARD: - case JUMP_IF_FALSE: - case JUMP_IF_TRUE: case SETUP_LOOP: case SETUP_EXCEPT: case SETUP_FINALLY: