diff --git a/CMakeLists.txt b/CMakeLists.txt index bb2cf8be..f61d773d 100755 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -364,6 +364,7 @@ if(ARM_DYNAREC) "${BOX86_ROOT}/src/dynarec/arm_epilog.S" "${BOX86_ROOT}/src/dynarec/arm_linker.S" "${BOX86_ROOT}/src/dynarec/arm_table.S" + "${BOX86_ROOT}/src/dynarec/arm_lock_helper.S" ) set(DYNAREC_PASS diff --git a/src/box86context.c b/src/box86context.c index 392812f4..7513e7aa 100755 --- a/src/box86context.c +++ b/src/box86context.c @@ -298,7 +298,9 @@ box86context_t *NewBox86Context(int argc) pthread_mutex_init(&context->mutex_once, NULL); pthread_mutex_init(&context->mutex_once2, NULL); pthread_mutex_init(&context->mutex_trace, NULL); +#ifndef DYNAREC pthread_mutex_init(&context->mutex_lock, NULL); +#endif pthread_mutex_init(&context->mutex_tls, NULL); pthread_mutex_init(&context->mutex_thread, NULL); #ifdef DYNAREC @@ -424,7 +426,9 @@ void FreeBox86Context(box86context_t** context) pthread_mutex_destroy(&ctx->mutex_once); pthread_mutex_destroy(&ctx->mutex_once2); pthread_mutex_destroy(&ctx->mutex_trace); +#ifndef DYNAREC pthread_mutex_destroy(&ctx->mutex_lock); +#endif pthread_mutex_destroy(&ctx->mutex_tls); pthread_mutex_destroy(&ctx->mutex_thread); #ifdef DYNAREC diff --git a/src/dynarec/arm_emitter.h b/src/dynarec/arm_emitter.h index 4d20ad8d..ffb99033 100755 --- a/src/dynarec/arm_emitter.h +++ b/src/dynarec/arm_emitter.h @@ -122,7 +122,7 @@ Op is 20-27 #define SUB_IMM8(dst, src, imm8) \ EMIT(0xe2400000 | ((dst) << 12) | ((src) << 16) | brIMM(imm8) ) // sub cond dst, src, #(imm8) -#define SUB_COND_IMM8(dst, src, imm8) \ +#define SUB_COND_IMM8(cond, dst, src, imm8) \ EMIT((cond) | 0x02400000 | ((dst) << 12) | ((src) << 16) | brIMM(imm8) ) // sub.s dst, src, #(imm8) #define SUBS_IMM8(dst, src, imm8) \ @@ -442,11 +442,27 @@ Op is 20-27 #define LDREXD_gen(cond, Rn, Rt) (cond | 0b000<<25 | 0b11011<<20 | (Rn)<<16 | (Rt)<<12 | 0b1111<<8 | 0b1001<<4 | 0b1111) // Load Exclusive Rt/Rt+1 from Rn (tagging the memory) -#define LDREXD(Rn, Rt) EMIT(LDREXD_gen(c__, Rn, Rt)) +#define LDREXD(Rt, Rn) EMIT(LDREXD_gen(c__, Rn, Rt)) #define STREXD_gen(cond, Rd, Rn, Rt) (cond | 0b000<<25 | 0b11010<<20 | (Rn)<<16 | (Rd)<<12 | 0b1111<<8 | 0b1001<<4 | (Rt)) // Store Exclusive Rt/Rt+1 to Rn, with result in Rd if tag is ok (Rd!=Rn && Rd!=Rt && Rd!=Rt+1), Rd==1 if store failed -#define STREXD(Rd, Rn, Rt) EMIT(STREXD_gen(c__, Rd, Rn, Rt)) +#define STREXD(Rd, Rt, Rn) EMIT(STREXD_gen(c__, Rd, Rn, Rt)) + +#define LDREX_gen(cond, Rn, Rt) (cond | 0b0001100<<21 | 1<<20 | (Rn)<<16 | (Rt)<<12 | 0b1111<<8 | 0b1001<<4 | 0b1111) +// Load Exclusive Rt from Rn (tagging the memory) +#define LDREX(Rt, Rn) EMIT(LDREX_gen(c__, Rn, Rt)) + +#define STREX_gen(cond, Rd, Rn, Rt) (cond | 0b0001100<<21 | 0<<20 | (Rn)<<16 | (Rd)<<12 | 0b1111<<8 | 0b1001<<4 | (Rt)) +// Store Exclusive Rt to Rn, with result in Rd=0 if tag is ok, Rd==1 if store failed (Rd!=Rn && Rd!=Rt) +#define STREX(Rd, Rt, Rn) EMIT(STREX_gen(c__, Rd, Rn, Rt)) + +#define LDREXB_gen(cond, Rn, Rt) (cond | 0b0001110<<21 | 1<<20 | (Rn)<<16 | (Rt)<<12 | 0b1111<<8 | 0b1001<<4 | 0b1111) +// Load Exclusive Byte Rt from Rn (tagging the memory) +#define LDREXB(Rt, Rn) EMIT(LDREXB_gen(c__, Rn, Rt)) + +#define STREXB_gen(cond, Rd, Rn, Rt) (cond | 0b0001110<<21 | 0<<20 | (Rn)<<16 | (Rd)<<12 | 0b1111<<8 | 0b1001<<4 | (Rt)) +// Store Exclusive byte Rt to Rn, with result in Rd=0 if tag is ok, Rd==1 if store failed (Rd!=Rn && Rd!=Rt) +#define STREXB(Rd, Rt, Rn) EMIT(STREXB_gen(c__, Rd, Rn, Rt)) // Count leading 0 bit of Rm, store result in Rd #define CLZ(Rd, Rm) EMIT(c__ | 0b00010110<<20 | 0b1111<<16 | (Rd)<<12 | 0b1111<<8 | 0b0001<<4 | (Rm)) @@ -473,6 +489,9 @@ Op is 20-27 // Unsigned Div Rd <- Rn/Rm #define UDIV(Rd, Rm, Rn) EMIT(UDIV_gen(c__, Rd, Rm, Rn)) +// Yield +#define YIELD(cond) EMIT(cond | 0b00110010<<20 | 0b1111<<12 | 1) + // VFPU #define TRANSFERT64(C, op) ((0b1100<<24) | (0b010<<21) | (0b101<<9) | ((C)<<8) | ((op)<<4)) diff --git a/src/dynarec/arm_lock_helper.S b/src/dynarec/arm_lock_helper.S new file mode 100755 index 00000000..f51247c5 --- /dev/null +++ b/src/dynarec/arm_lock_helper.S @@ -0,0 +1,51 @@ +//arm lock helper +//there is 2 part: read and write +// write return 0 on success, 1 on fail (value has been changed) + +.text +.align 4 + +.global arm_lock_read_b +.global arm_lock_write_b +.global arm_lock_read_d +.global arm_lock_write_d +.global arm_lock_read_dd +.global arm_lock_write_dd + +arm_lock_read_b: + // address is r0, return is r0 + ldrexb r0, [r0] + bx lr + +arm_lock_write_b: + // address is r0, value is r1, return is r0 + mov r2, r0 + strexb r0, r1, [r2] + bx lr + +arm_lock_read_d: + // address is r0, return is r0 + // r0 needs to be aligned + ldrex r0, [r0] + bx lr + +arm_lock_write_d: + // address is r0, value is r1, return is r0 + // r0 needs to be aligned + mov r2, r0 + strex r0, r1, [r2] + bx lr + +arm_lock_read_dd: + // address is r2, return is r0, r1 + ldrexd r2, r3, [r2] + str r2, [r0] + str r3, [r1] + bx lr + +arm_lock_write_dd: + // address is r2, value is r0, r1, return is r0 + // r0 needs to be aligned + strexd r3, r0, r1, [r2] + mov r0, r3 + bx lr \ No newline at end of file diff --git a/src/dynarec/arm_lock_helper.h b/src/dynarec/arm_lock_helper.h new file mode 100755 index 00000000..4a6667ad --- /dev/null +++ b/src/dynarec/arm_lock_helper.h @@ -0,0 +1,20 @@ +#ifndef __ARM_LOCK_HELPER__H__ +#define __ARM_LOCK_HELPER__H__ +#include + +// LDREXB of ADDR +extern uint8_t arm_lock_read_b(void* addr); +// STREXB of ADDR, return 0 if ok, 1 if not +extern int arm_lock_write_b(void* addr, uint8_t val); + +// LDREX of ADDR +extern uint32_t arm_lock_read_d(void* addr); +// STREX of ADDR, return 0 if ok, 1 if not +extern int arm_lock_write_d(void* addr, uint32_t val); + +// LDREXD of ADDR +extern void arm_lock_read_dd(uint32_t* a, uint32_t* b, void* addr); +// STREX of ADDR, return 0 if ok, 1 if not +extern int arm_lock_write_dd(uint32_t a, uint32_t b, void* addr); + +#endif //__ARM_LOCK_HELPER__H__ \ No newline at end of file diff --git a/src/dynarec/dynarec_arm_00.c b/src/dynarec/dynarec_arm_00.c index 7d2c3cf1..3ca1749f 100755 --- a/src/dynarec/dynarec_arm_00.c +++ b/src/dynarec/dynarec_arm_00.c @@ -999,25 +999,16 @@ uintptr_t dynarec00(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, BFI(gb1, x1, gb2*8, 8); BFI(eb1, x12, eb2*8, 8); } else { - if(0/*arm_swap*/) { - // use atomic swap... - addr = geted(dyn, addr, ninst, nextop, &ed, x2, &fixedaddress, 0, 0); - GETGB(x12); - SWPB(x12, x12, ed); - BFI(gb1, x12, gb2*8, 8); - } else { - // Lock - LOCK; - // do the swap - GETGB(x12); - addr = geted(dyn, addr, ninst, nextop, &ed, x2, &fixedaddress, 4095, 0); - LDRB_IMM9(x1, ed, fixedaddress); // 1 gets eb - // do the swap 12 -> strb(ed), 1 -> gd - BFI(gb1, x1, gb2*8, 8); - STRB_IMM9(x12, ed, fixedaddress); - // Unlock - UNLOCK; - } + GETGB(x12); + addr = geted(dyn, addr, ninst, nextop, &ed, x2, &fixedaddress, 0, 0); + MARKLOCK; + // do the swap with exclusive locking + LDREXB(x1, ed); + // do the swap 12 -> strb(ed), 1 -> gd + STREXB(x3, x12, ed); + CMPS_IMM8(x3, 0); + B_MARKLOCK(cNE); + BFI(gb1, x1, gb2*8, 8); } break; case 0x87: @@ -1032,24 +1023,21 @@ uintptr_t dynarec00(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, XOR_REG_LSL_IMM5(gd, gd, ed, 0); } } else { - if(0/*arm_swap*/) { // swap doesn't seem to really work like that, plus there seems to be alignement need on arm7 - GETGD; - addr = geted(dyn, addr, ninst, nextop, &ed, x2, &fixedaddress, 0, 0); - // use atomic swap - SWP(gd, gd, ed); - } else { - LOCK; - GETGD; - GETED; - // xor swap to avoid one more tmp reg - if(gd!=ed) { - XOR_REG_LSL_IMM5(gd, gd, ed, 0); - XOR_REG_LSL_IMM5(ed, gd, ed, 0); - XOR_REG_LSL_IMM5(gd, gd, ed, 0); - } - WBACK; - UNLOCK; - } + GETGD; + addr = geted(dyn, addr, ninst, nextop, &ed, x2, &fixedaddress, 0, 0); + TSTS_IMM8(ed, 3); + B_MARK(cNE); + MARKLOCK; + LDREX(x1, ed); + STREX(x3, gd, ed); + CMPS_IMM8(x3, 0); + B_MARKLOCK(cNE); + B_MARK2(c__); + MARK; + LDR_IMM9(x1, ed, 0); + STR_IMM9(gd, ed, 0); + MARK2; + MOV_REG(gd, x1); } break; case 0x88: diff --git a/src/dynarec/dynarec_arm_0f.c b/src/dynarec/dynarec_arm_0f.c index 4a32a996..2bc2e816 100755 --- a/src/dynarec/dynarec_arm_0f.c +++ b/src/dynarec/dynarec_arm_0f.c @@ -1542,26 +1542,14 @@ uintptr_t dynarec0F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, INST_NAME("CMPXCHG Eb, Gb"); SETFLAGS(X_ALL, SF_SET); nextop = F8; - MOVW(x1, 0); - STR_IMM9(x1, xEmu, offsetof(x86emu_t, df)); // d_none == 0 - STR_IMM9(x1, xEmu, offsetof(x86emu_t, flags[F_AF])); - STR_IMM9(x1, xEmu, offsetof(x86emu_t, flags[F_PF])); - STR_IMM9(x1, xEmu, offsetof(x86emu_t, flags[F_OF])); - STR_IMM9(x1, xEmu, offsetof(x86emu_t, flags[F_SF])); GETEB(x2) UXTB(x1, xEAX, 0); - // Use a quick CMP, without setting A or P... CMPS_REG_LSL_IMM5(x1, ed, 0); - MOVW_COND(cEQ, x1, 1); - STR_IMM9(x1, xEmu, offsetof(x86emu_t, flags[F_CF])); B_MARK(cNE); // AL == Eb GETGB(x1); MOV_REG(ed, x1); EBBACK; - MOVW(x1, 1); - STR_IMM9(x1, xEmu, offsetof(x86emu_t, flags[F_ZF])); - // done B_MARK3(c__); MARK; // AL != Eb @@ -1569,36 +1557,27 @@ uintptr_t dynarec0F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, MOVW(x1, 0); STR_IMM9(x1, xEmu, offsetof(x86emu_t, flags[F_ZF])); MARK3; + // done, do the cmp now + emit_cmp8(dyn, ninst, x1, x2, x3, x12); break; case 0xB1: INST_NAME("CMPXCHG Ed, Gd"); SETFLAGS(X_ALL, SF_SET); nextop = F8; - MOVW(x1, 0); - STR_IMM9(x1, xEmu, offsetof(x86emu_t, df)); // d_none == 0 - STR_IMM9(x1, xEmu, offsetof(x86emu_t, flags[F_AF])); - STR_IMM9(x1, xEmu, offsetof(x86emu_t, flags[F_PF])); - STR_IMM9(x1, xEmu, offsetof(x86emu_t, flags[F_OF])); - STR_IMM9(x1, xEmu, offsetof(x86emu_t, flags[F_SF])); GETED; GETGD; - // Use a quick CMP, without setting A or P... CMPS_REG_LSL_IMM5(xEAX, ed, 0); - MOVW_COND(cEQ, x1, 1); - STR_IMM9(x1, xEmu, offsetof(x86emu_t, flags[F_CF])); B_MARK(cNE); // EAX == Ed + MOV_REG(x3, ed); MOV_REG(ed, gd); WBACK; - MOVW(x1, 1); - STR_IMM9(x1, xEmu, offsetof(x86emu_t, flags[F_ZF])); - // done + emit_cmp32(dyn, ninst, xEAX, x3, x1, x12); B_MARK3(c__); // not next, in case its called with a LOCK prefix MARK; // EAX != Ed + emit_cmp32(dyn, ninst, xEAX, ed, x3, x12); MOV_REG(xEAX, ed); - MOVW(x1, 0); - STR_IMM9(x1, xEmu, offsetof(x86emu_t, flags[F_ZF])); MARK3 break; case 0xB3: diff --git a/src/dynarec/dynarec_arm_f0.c b/src/dynarec/dynarec_arm_f0.c index 4d1b586c..8eb8c199 100755 --- a/src/dynarec/dynarec_arm_f0.c +++ b/src/dynarec/dynarec_arm_f0.c @@ -24,13 +24,452 @@ uintptr_t dynarecF0(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, int* ok, int* need_epilog) { - uint8_t nextop = PK(0); // don't increment addr - int locked = 0; - switch(nextop) { - // generic case + uint8_t nextop, opcode = F8; + uint8_t wback, wb1, wb2, gb1, gb2; + uint8_t ed, gd, u8; + int fixedaddress; + int32_t i32; + MAYUSE(i32); + MAYUSE(gb1); + MAYUSE(gb2); + MAYUSE(wb1); + MAYUSE(wb2); + switch(opcode) { + case 0x00: + INST_NAME("LOCK ADD Eb, Gb"); + SETFLAGS(X_ALL, SF_SET); + nextop = F8; + GETGB(x2); + if((nextop&0xC0)==0xC0) { + wback = (nextop&7); + wb2 = (wback>>2); + wback = xEAX+(wback&3); + UXTB(x1, wback, wb2); + emit_add8(dyn, ninst, x1, x2, x12, x3, 0); + BFI(wback, ed, wb2*8, 8); + } else { + addr = geted(dyn, addr, ninst, nextop, &wback, x3, &fixedaddress, 0, 0); + MARKLOCK; + LDREXB(x1, wback); + emit_add8(dyn, ninst, x1, x2, x12, x3, (wback==x3)?1:0); + STREXB(x12, x1, wback); + CMPS_IMM8(x12, 0); + B_MARKLOCK(cNE); // write failed, try again + } + break; + case 0x01: + INST_NAME("LOCK ADD Ed, Gd"); + SETFLAGS(X_ALL, SF_SET); + nextop = F8; + GETGD; + if((nextop&0xC0)==0xC0) { + ed = xEAX+(nextop&7); + emit_add32(dyn, ninst, ed, gd, x3, x12); + } else { + addr = geted(dyn, addr, ninst, nextop, &wback, x2, &fixedaddress, 0, 0); + MARKLOCK; + LDREX(x1, wback); + emit_add32(dyn, ninst, x1, gd, x3, x12); + STREX(x3, x1, wback); + CMPS_IMM8(x3, 0); + B_MARKLOCK(cNE); + } + break; + case 0x08: + INST_NAME("LOCK OR Eb, Gb"); + SETFLAGS(X_ALL, SF_SET); + nextop = F8; + GETGB(x2); + if((nextop&0xC0)==0xC0) { + wback = (nextop&7); + wb2 = (wback>>2); + wback = xEAX+(wback&3); + UXTB(x1, wback, wb2); + emit_or8(dyn, ninst, x1, x2, x12, x3, 0); + BFI(wback, ed, wb2*8, 8); + } else { + addr = geted(dyn, addr, ninst, nextop, &wback, x3, &fixedaddress, 0, 0); + MARKLOCK; + LDREXB(x1, wback); + emit_or8(dyn, ninst, x1, x2, x12, x3, (wback==x3)?1:0); + STREXB(x12, x1, wback); + CMPS_IMM8(x12, 0); + B_MARKLOCK(cNE); // write failed, try again + } + break; + case 0x09: + INST_NAME("LOCK OR Ed, Gd"); + SETFLAGS(X_ALL, SF_SET); + nextop = F8; + GETGD; + if((nextop&0xC0)==0xC0) { + ed = xEAX+(nextop&7); + emit_or32(dyn, ninst, ed, gd, x3, x12); + } else { + addr = geted(dyn, addr, ninst, nextop, &wback, x2, &fixedaddress, 0, 0); + MARKLOCK; + LDREX(x1, wback); + emit_or32(dyn, ninst, x1, gd, x3, x12); + STREX(x3, x1, wback); + CMPS_IMM8(x3, 0); + B_MARKLOCK(cNE); + } + break; + case 0x10: + INST_NAME("LOCK ADC Eb, Gb"); + SETFLAGS(X_ALL, SF_SET); + nextop = F8; + GETGB(x2); + if((nextop&0xC0)==0xC0) { + wback = (nextop&7); + wb2 = (wback>>2); + wback = xEAX+(wback&3); + UXTB(x1, wback, wb2); + emit_adc8(dyn, ninst, x1, x2, x12, x3, 0); + BFI(wback, ed, wb2*8, 8); + } else { + addr = geted(dyn, addr, ninst, nextop, &wback, x3, &fixedaddress, 0, 0); + MARKLOCK; + LDREXB(x1, wback); + emit_adc8(dyn, ninst, x1, x2, x12, x3, (wback==x3)?1:0); + STREXB(x12, x1, wback); + CMPS_IMM8(x12, 0); + B_MARKLOCK(cNE); // write failed, try again + } + break; + case 0x11: + INST_NAME("LOCK ADC Ed, Gd"); + SETFLAGS(X_ALL, SF_SET); + nextop = F8; + GETGD; + if((nextop&0xC0)==0xC0) { + ed = xEAX+(nextop&7); + emit_adc32(dyn, ninst, ed, gd, x3, x12); + } else { + addr = geted(dyn, addr, ninst, nextop, &wback, x2, &fixedaddress, 0, 0); + MARKLOCK; + LDREX(x1, wback); + emit_adc32(dyn, ninst, x1, gd, x3, x12); + STREX(x3, x1, wback); + CMPS_IMM8(x3, 0); + B_MARKLOCK(cNE); + } + break; + case 0x18: + INST_NAME("LOCK SBB Eb, Gb"); + SETFLAGS(X_ALL, SF_SET); + nextop = F8; + GETGB(x2); + if((nextop&0xC0)==0xC0) { + wback = (nextop&7); + wb2 = (wback>>2); + wback = xEAX+(wback&3); + UXTB(x1, wback, wb2); + emit_sbb8(dyn, ninst, x1, x2, x12, x3, 0); + BFI(wback, ed, wb2*8, 8); + } else { + addr = geted(dyn, addr, ninst, nextop, &wback, x3, &fixedaddress, 0, 0); + MARKLOCK; + LDREXB(x1, wback); + emit_sbb8(dyn, ninst, x1, x2, x12, x3, (wback==x3)?1:0); + STREXB(x12, x1, wback); + CMPS_IMM8(x12, 0); + B_MARKLOCK(cNE); // write failed, try again + } + break; + case 0x19: + INST_NAME("LOCK SBB Ed, Gd"); + SETFLAGS(X_ALL, SF_SET); + nextop = F8; + GETGD; + if((nextop&0xC0)==0xC0) { + ed = xEAX+(nextop&7); + emit_sbb32(dyn, ninst, ed, gd, x3, x12); + } else { + addr = geted(dyn, addr, ninst, nextop, &wback, x2, &fixedaddress, 0, 0); + MARKLOCK; + LDREX(x1, wback); + emit_sbb32(dyn, ninst, x1, gd, x3, x12); + STREX(x3, x1, wback); + CMPS_IMM8(x3, 0); + B_MARKLOCK(cNE); + } + break; + case 0x20: + INST_NAME("LOCK AND Eb, Gb"); + SETFLAGS(X_ALL, SF_SET); + nextop = F8; + GETGB(x2); + if((nextop&0xC0)==0xC0) { + wback = (nextop&7); + wb2 = (wback>>2); + wback = xEAX+(wback&3); + UXTB(x1, wback, wb2); + emit_and8(dyn, ninst, x1, x2, x12, x3, 0); + BFI(wback, ed, wb2*8, 8); + } else { + addr = geted(dyn, addr, ninst, nextop, &wback, x3, &fixedaddress, 0, 0); + MARKLOCK; + LDREXB(x1, wback); + emit_and8(dyn, ninst, x1, x2, x12, x3, (wback==x3)?1:0); + STREXB(x12, x1, wback); + CMPS_IMM8(x12, 0); + B_MARKLOCK(cNE); // write failed, try again + } + break; + case 0x21: + INST_NAME("LOCK AND Ed, Gd"); + SETFLAGS(X_ALL, SF_SET); + nextop = F8; + GETGD; + if((nextop&0xC0)==0xC0) { + ed = xEAX+(nextop&7); + emit_and32(dyn, ninst, ed, gd, x3, x12); + } else { + addr = geted(dyn, addr, ninst, nextop, &wback, x2, &fixedaddress, 0, 0); + MARKLOCK; + LDREX(x1, wback); + emit_and32(dyn, ninst, x1, gd, x3, x12); + STREX(x3, x1, wback); + CMPS_IMM8(x3, 0); + B_MARKLOCK(cNE); + } + break; + case 0x28: + INST_NAME("LOCK SUB Eb, Gb"); + SETFLAGS(X_ALL, SF_SET); + nextop = F8; + GETGB(x2); + if((nextop&0xC0)==0xC0) { + wback = (nextop&7); + wb2 = (wback>>2); + wback = xEAX+(wback&3); + UXTB(x1, wback, wb2); + emit_sub8(dyn, ninst, x1, x2, x12, x3, 0); + BFI(wback, ed, wb2*8, 8); + } else { + addr = geted(dyn, addr, ninst, nextop, &wback, x3, &fixedaddress, 0, 0); + MARKLOCK; + LDREXB(x1, wback); + emit_sub8(dyn, ninst, x1, x2, x12, x3, (wback==x3)?1:0); + STREXB(x12, x1, wback); + CMPS_IMM8(x12, 0); + B_MARKLOCK(cNE); // write failed, try again + } + break; + case 0x29: + INST_NAME("LOCK SUB Ed, Gd"); + SETFLAGS(X_ALL, SF_SET); + nextop = F8; + GETGD; + if((nextop&0xC0)==0xC0) { + ed = xEAX+(nextop&7); + emit_sub32(dyn, ninst, ed, gd, x3, x12); + } else { + addr = geted(dyn, addr, ninst, nextop, &wback, x2, &fixedaddress, 0, 0); + MARKLOCK; + LDREX(x1, wback); + emit_sub32(dyn, ninst, x1, gd, x3, x12); + STREX(x3, x1, wback); + CMPS_IMM8(x3, 0); + B_MARKLOCK(cNE); + } + break; + case 0x38: + INST_NAME("LOCK XOR Eb, Gb"); + SETFLAGS(X_ALL, SF_SET); + nextop = F8; + GETGB(x2); + if((nextop&0xC0)==0xC0) { + wback = (nextop&7); + wb2 = (wback>>2); + wback = xEAX+(wback&3); + UXTB(x1, wback, wb2); + emit_xor8(dyn, ninst, x1, x2, x12, x3, 0); + BFI(wback, ed, wb2*8, 8); + } else { + addr = geted(dyn, addr, ninst, nextop, &wback, x3, &fixedaddress, 0, 0); + MARKLOCK; + LDREXB(x1, wback); + emit_xor8(dyn, ninst, x1, x2, x12, x3, (wback==x3)?1:0); + STREXB(x12, x1, wback); + CMPS_IMM8(x12, 0); + B_MARKLOCK(cNE); // write failed, try again + } + break; + case 0x39: + INST_NAME("LOCK XOR Ed, Gd"); + SETFLAGS(X_ALL, SF_SET); + nextop = F8; + GETGD; + if((nextop&0xC0)==0xC0) { + ed = xEAX+(nextop&7); + emit_xor32(dyn, ninst, ed, gd, x3, x12); + } else { + addr = geted(dyn, addr, ninst, nextop, &wback, x2, &fixedaddress, 0, 0); + MARKLOCK; + LDREX(x1, wback); + emit_xor32(dyn, ninst, x1, gd, x3, x12); + STREX(x3, x1, wback); + CMPS_IMM8(x3, 0); + B_MARKLOCK(cNE); + } + break; + + case 0x81: + case 0x83: + nextop = F8; + switch((nextop>>3)&7) { + case 0: //ADD + if(opcode==0x81) { + INST_NAME("LOCK ADD Ed, Id"); + } else { + INST_NAME("LOCK ADD Ed, Ib"); + } + SETFLAGS(X_ALL, SF_SET); + if((nextop&0xC0)==0xC0) { + if(opcode==0x81) i32 = F32S; else i32 = F8S; + ed = xEAX+(nextop&7); + emit_add32c(dyn, ninst, ed, i32, x3, x12); + } else { + addr = geted(dyn, addr, ninst, nextop, &wback, x2, &fixedaddress, 0, 0); + if(opcode==0x81) i32 = F32S; else i32 = F8S; + MARKLOCK; + LDREX(x1, wback); + emit_add32c(dyn, ninst, x1, i32, x3, x12); + STREX(x3, x1, wback); + CMPS_IMM8(x3, 0); + B_MARKLOCK(cNE); + } + break; + case 1: //OR + if(opcode==0x81) {INST_NAME("LOCK OR Ed, Id");} else {INST_NAME("LOCK OR Ed, Ib");} + SETFLAGS(X_ALL, SF_SET); + if((nextop&0xC0)==0xC0) { + if(opcode==0x81) i32 = F32S; else i32 = F8S; + ed = xEAX+(nextop&7); + emit_or32c(dyn, ninst, ed, i32, x3, x12); + } else { + addr = geted(dyn, addr, ninst, nextop, &wback, x2, &fixedaddress, 0, 0); + if(opcode==0x81) i32 = F32S; else i32 = F8S; + MARKLOCK; + LDREX(x1, wback); + emit_or32c(dyn, ninst, x1, i32, x3, x12); + STREX(x3, x1, wback); + CMPS_IMM8(x3, 0); + B_MARKLOCK(cNE); + } + break; + case 2: //ADC + if(opcode==0x81) {INST_NAME("LOCK ADC Ed, Id");} else {INST_NAME("LOCK ADC Ed, Ib");} + READFLAGS(X_CF); + SETFLAGS(X_ALL, SF_SET); + if((nextop&0xC0)==0xC0) { + if(opcode==0x81) i32 = F32S; else i32 = F8S; + ed = xEAX+(nextop&7); + emit_adc32c(dyn, ninst, ed, i32, x3, x12); + } else { + addr = geted(dyn, addr, ninst, nextop, &wback, x2, &fixedaddress, 0, 0); + if(opcode==0x81) i32 = F32S; else i32 = F8S; + MARKLOCK; + LDREX(x1, wback); + emit_adc32c(dyn, ninst, x1, i32, x3, x12); + STREX(x3, x1, wback); + CMPS_IMM8(x3, 0); + B_MARKLOCK(cNE); + } + break; + case 3: //SBB + if(opcode==0x81) {INST_NAME("LOCK SBB Ed, Id");} else {INST_NAME("LOCK SBB Ed, Ib");} + READFLAGS(X_CF); + SETFLAGS(X_ALL, SF_SET); + if((nextop&0xC0)==0xC0) { + if(opcode==0x81) i32 = F32S; else i32 = F8S; + ed = xEAX+(nextop&7); + emit_sbb32c(dyn, ninst, ed, i32, x3, x12); + } else { + addr = geted(dyn, addr, ninst, nextop, &wback, x2, &fixedaddress, 0, 0); + if(opcode==0x81) i32 = F32S; else i32 = F8S; + MARKLOCK; + LDREX(x1, wback); + emit_sbb32c(dyn, ninst, x1, i32, x3, x12); + STREX(x3, x1, wback); + CMPS_IMM8(x3, 0); + B_MARKLOCK(cNE); + } + break; + case 4: //AND + if(opcode==0x81) {INST_NAME("LOCK AND Ed, Id");} else {INST_NAME("LOCK AND Ed, Ib");} + SETFLAGS(X_ALL, SF_SET); + if((nextop&0xC0)==0xC0) { + if(opcode==0x81) i32 = F32S; else i32 = F8S; + ed = xEAX+(nextop&7); + emit_and32c(dyn, ninst, ed, i32, x3, x12); + } else { + addr = geted(dyn, addr, ninst, nextop, &wback, x2, &fixedaddress, 0, 0); + if(opcode==0x81) i32 = F32S; else i32 = F8S; + MARKLOCK; + LDREX(x1, wback); + emit_and32c(dyn, ninst, x1, i32, x3, x12); + STREX(x3, x1, wback); + CMPS_IMM8(x3, 0); + B_MARKLOCK(cNE); + } + break; + case 5: //SUB + if(opcode==0x81) {INST_NAME("LOCK SUB Ed, Id");} else {INST_NAME("LOCK SUB Ed, Ib");} + SETFLAGS(X_ALL, SF_SET); + if((nextop&0xC0)==0xC0) { + if(opcode==0x81) i32 = F32S; else i32 = F8S; + ed = xEAX+(nextop&7); + emit_sub32c(dyn, ninst, ed, i32, x3, x12); + } else { + addr = geted(dyn, addr, ninst, nextop, &wback, x2, &fixedaddress, 0, 0); + if(opcode==0x81) i32 = F32S; else i32 = F8S; + MARKLOCK; + LDREX(x1, wback); + emit_sub32c(dyn, ninst, x1, i32, x3, x12); + STREX(x3, x1, wback); + CMPS_IMM8(x3, 0); + B_MARKLOCK(cNE); + } + break; + case 6: //XOR + if(opcode==0x81) {INST_NAME("LOCK XOR Ed, Id");} else {INST_NAME("LOCK XOR Ed, Ib");} + SETFLAGS(X_ALL, SF_SET); + if((nextop&0xC0)==0xC0) { + if(opcode==0x81) i32 = F32S; else i32 = F8S; + ed = xEAX+(nextop&7); + emit_xor32c(dyn, ninst, ed, i32, x3, x12); + } else { + addr = geted(dyn, addr, ninst, nextop, &wback, x2, &fixedaddress, 0, 0); + if(opcode==0x81) i32 = F32S; else i32 = F8S; + MARKLOCK; + LDREX(x1, wback); + emit_xor32c(dyn, ninst, x1, i32, x3, x12); + STREX(x3, x1, wback); + CMPS_IMM8(x3, 0); + B_MARKLOCK(cNE); + } + break; + case 7: //CMP + if(opcode==0x81) {INST_NAME("(LOCK) CMP Ed, Id");} else {INST_NAME("(LOCK) CMP Ed, Ib");} + SETFLAGS(X_ALL, SF_SET); + GETEDH(x1); + // No need to LOCK, this is readonly + if(opcode==0x81) i32 = F32S; else i32 = F8S; + if(i32) { + MOV32(x2, i32); + emit_cmp32(dyn, ninst, ed, x2, x3, x12); + } else { + emit_cmp32_0(dyn, ninst, ed, x3, x12); + } + break; + } + break; + + // generic case, no lock needed, the value is only read (note that on x86 locked read is always followed wy a locked write) #define GO(A) \ - case A+0x00: \ - case A+0x01: \ case A+0x02: \ case A+0x03: \ case A+0x04: \ @@ -43,62 +482,346 @@ uintptr_t dynarecF0(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, GO(0x28); GO(0x30); #undef GO - - case 0x81: - case 0x83: - MESSAGE(LOG_DUMP, "LOCK\n"); - locked = 1; - LOCK; - addr = dynarec00(dyn, addr, ip, ninst, ok, need_epilog); - break; - case 0x86: // for this two, the lock is already done by the opcode, so just ignoring it case 0x87: - addr = dynarec00(dyn, addr, ip, ninst, ok, need_epilog); + --addr; break; case 0x0F: - nextop = PK(1); + nextop = F8; switch(nextop) { case 0xB0: + INST_NAME("LOCK CMPXCHG Eb, Gb"); + SETFLAGS(X_ALL, SF_SET); + nextop = F8; + if((nextop&0xC0)==0xC0) { + wback = (nextop&7); + wb2 = (wback>>2); + wback = xEAX+(wback&3); + UXTB(x2, wback, wb2); + ed = x2; + wb1 = 0; + } else { + addr = geted(dyn, addr, ninst, nextop, &wback, x3, &fixedaddress, 0, 0); + MARKLOCK; + LDREXB(x2, wback); + ed = x2; + wb1 = 1; + } + UXTB(x1, xEAX, 0); + CMPS_REG_LSL_IMM5(x1, ed, 0); + B_MARK(cNE); + // AL == Eb + GETGB(x1); + if(wb1) { + STREXB(x12, x1, wback); + CMPS_IMM8(x12, 0); + B_MARKLOCK(cNE); // write failed, try again + } else { + BFI(wback, x1, wb2*8, 8); + } + // done + B_MARK3(c__); + MARK; + // AL != Eb + BFI(xEAX, ed, 0, 8); + MARK3; + // done, do the cmp now + emit_cmp8(dyn, ninst, x1, x2, x3, x12); + break; case 0xB1: + INST_NAME("LOCK CMPXCHG Ed, Gd"); + SETFLAGS(X_ALL, SF_SET); + nextop = F8; + GETGD; + if((nextop&0xC0)==0xC0) { + ed = xEAX+(nextop&7); + wback = 0; + } else { + addr = geted(dyn, addr, ninst, nextop, &wback, x2, &fixedaddress, 0, 0); + MARKLOCK; + LDREX(x1, wback); + ed = x1; + } + CMPS_REG_LSL_IMM5(xEAX, ed, 0); + B_MARK(cNE); + // EAX == Ed + MOV_REG(x3, ed); + MOV_REG(ed, gd); + if(wback) { + STREX(x12, ed, wback); + CMPS_IMM8(x12, 0); + B_MARKLOCK(cNE); + } + emit_cmp32(dyn, ninst, xEAX, x3, x1, x12); + // done + B_MARK3(c__); // not next, in case its called with a LOCK prefix + MARK; + // EAX != Ed + emit_cmp32(dyn, ninst, xEAX, ed, x3, x12); + MOV_REG(xEAX, ed); + MARK3 + break; case 0xB3: + INST_NAME("LOCK BTR Ed, Gd"); + SETFLAGS(X_CF, SF_SET); + nextop = F8; + GETGD; + if((nextop&0xC0)==0xC0) { + ed = xEAX+(nextop&7); + wback = 0; + } else { + addr = geted(dyn, addr, ninst, nextop, &wback, x3, &fixedaddress, 0, 0); + UBFX(x1, gd, 5, 3); // r1 = (gd>>5); + ADD_REG_LSL_IMM5(x3, wback, x1, 2); //(&ed)+=r1*4; + wback = x3; + MARKLOCK; + LDREX(x1, wback); + ed = x1; + } + AND_IMM8(x2, gd, 0x1f); + MOV_REG_LSR_REG(x12, ed, x2); + ANDS_IMM8(x12, x12, 1); + STR_IMM9(x12, xEmu, offsetof(x86emu_t, flags[F_CF])); + B_MARK3(cEQ); // bit already clear, jump to end of instruction + MOVW(x12, 1); + XOR_REG_LSL_REG(ed, ed, x12, x2); + if(wback) { + STREX(x12, ed, wback); + CMPS_IMM8(x12, 0); + B_MARKLOCK(cNE); + } + MARK3; + break; + case 0xBA: + nextop = F8; + switch((nextop>>3)&7) { + case 4: + INST_NAME("(LOCK) BT Ed, Ib"); + SETFLAGS(X_CF, SF_SUBSET); + gd = x2; + if((nextop&0xC0)==0xC0) { + ed = xEAX+(nextop&7); + u8 = F8; + } else { + addr = geted(dyn, addr, ninst, nextop, &ed, x3, &fixedaddress, 4095-32, 0); + u8 = F8; + fixedaddress+=(u8>>5)*4; + LDR_IMM9(x1, ed, fixedaddress); + ed = x1; + } + u8&=0x1f; + if(u8) { + MOV_REG_LSR_IMM5(x1, ed, u8); + ed = x1; + } + AND_IMM8(x1, ed, 1); + STR_IMM9(x1, xEmu, offsetof(x86emu_t, flags[F_CF])); + break; + case 6: + INST_NAME("(LOCK) BTR Ed, Ib"); + SETFLAGS(X_CF, SF_SUBSET); + gd = x2; + if((nextop&0xC0)==0xC0) { + ed = xEAX+(nextop&7); + u8 = F8; + MOVW(gd, u8); + wback = 0; + } else { + addr = geted(dyn, addr, ninst, nextop, &ed, x3, &fixedaddress, 0, 0); + u8 = F8; + MOVW(gd, u8); + UBFX(x1, gd, 5, 3); // r1 = (gd>>5); + ADD_REG_LSL_IMM5(x3, ed, x1, 2); //(&ed)+=r1*4; + MARKLOCK; + LDREX(x1, x3); + ed = x1; + wback = x3; + } + AND_IMM8(x2, gd, 0x1f); + MOV_REG_LSR_REG(x12, ed, x2); + ANDS_IMM8(x12, x12, 1); + STR_IMM9(x12, xEmu, offsetof(x86emu_t, flags[F_CF])); + B_MARK3(cEQ); // bit already clear, jump to next instruction + //MOVW(x12, 1); // already 0x01 + XOR_REG_LSL_REG(ed, ed, x12, x2); + if(wback) { + STREX(x12, ed, wback); + CMPS_IMM8(x12, 0); + B_MARKLOCK(cNE); + } + MARK3; + break; + default: + DEFAULT; + } + break; case 0xBB: + INST_NAME("LOCK BTC Ed, Gd"); + SETFLAGS(X_CF, SF_SET); + nextop = F8; + GETGD; + if((nextop&0xC0)==0xC0) { + ed = xEAX+(nextop&7); + wback = 0; + } else { + addr = geted(dyn, addr, ninst, nextop, &wback, x3, &fixedaddress, 0, 0); + UBFX(x1, gd, 5, 3); // r1 = (gd>>5); + ADD_REG_LSL_IMM5(x3, wback, x1, 2); //(&ed)+=r1*4; + MARKLOCK; + LDREX(x1, x3); + ed = x1; + wback = x3; + } + AND_IMM8(x2, gd, 0x1f); + MOV_REG_LSR_REG(x12, ed, x2); + AND_IMM8(x12, x12, 1); + STR_IMM9(x12, xEmu, offsetof(x86emu_t, flags[F_CF])); + MOVW(x12, 1); + XOR_REG_LSL_REG(ed, ed, x12, x2); + if(wback) { + STREX(x12, ed, wback); + CMPS_IMM8(x12, 0); + B_MARKLOCK(cNE); + } + break; + case 0xC0: + INST_NAME("LOCK XADD Gb, Eb"); + SETFLAGS(X_ALL, SF_SET); + nextop = F8; + GETGB(x1); + if((nextop&0xC0)==0xC0) { + wback = (nextop&7); + wb2 = (wback>>2); + wback = xEAX+(wback&3); + UXTB(x2, wback, wb2); + wb1 = 0; + ed = x2; + } else { + addr = geted(dyn, addr, ninst, nextop, &wback, x3, &fixedaddress, 0, 0); + MARKLOCK; + LDREXB(x2, wback); + wb1 = 1; + ed = x2; + } + BFI(gb1, ed, gb2*8, 8); // gb <- eb + emit_add8(dyn, ninst, ed, gd, x12, x3, 1); + ADD_REG_LSL_IMM5(x12, ed, gd, 0); + if(wb1) { + STREXB(x1, ed, wback); + CMPS_IMM8(x1, 0); + B_MARKLOCK(cNE); + } else { + BFI(wback, ed, wb2*8, 8); + } + break; case 0xC1: + INST_NAME("LOCK XADD Gd, Ed"); + SETFLAGS(X_ALL, SF_SET); + nextop = F8; + GETGD; + if((nextop&0xC0)==0xC0) { + ed = xEAX+(nextop&7); + if(gd!=ed) { + XOR_REG_LSL_IMM5(gd, gd, ed, 0); // swap gd, ed + XOR_REG_LSL_IMM5(ed, gd, ed, 0); + XOR_REG_LSL_IMM5(gd, gd, ed, 0); + } + emit_add32(dyn, ninst, ed, gd, x3, x12); + } else { + addr = geted(dyn, addr, ninst, nextop, &wback, x2, &fixedaddress, 0, 0); + MARKLOCK; + LDREX(x1, wback); + PUSH(xSP, 1<>3)&7) { - case 0: - case 1: - MESSAGE(LOG_DUMP, "LOCK\n"); - locked = 1; - LOCK; - addr = dynarec00(dyn, addr, ip, ninst, ok, need_epilog); + case 0: // INC Ed + INST_NAME("INC Ed"); + SETFLAGS(X_ALL&~X_CF, SF_SUBSET); + if((nextop&0xC0)==0xC0) { + ed = xEAX+(nextop&7); + emit_inc32(dyn, ninst, ed, x3, x12); + } else { + addr = geted(dyn, addr, ninst, nextop, &wback, x2, &fixedaddress, 0, 0); + MARKLOCK; + LDREX(x1, wback); + emit_inc32(dyn, ninst, x1, x3, x12); + STREX(x3, x1, wback); + CMPS_IMM8(x3, 0); + B_MARKLOCK(cNE); + } + break; + case 1: //DEC Ed + INST_NAME("DEC Ed"); + SETFLAGS(X_ALL&~X_CF, SF_SUBSET); + if((nextop&0xC0)==0xC0) { + ed = xEAX+(nextop&7); + emit_dec32(dyn, ninst, ed, x3, x12); + } else { + addr = geted(dyn, addr, ninst, nextop, &wback, x2, &fixedaddress, 0, 0); + MARKLOCK; + LDREX(x1, wback); + emit_dec32(dyn, ninst, x1, x3, x12); + STREX(x3, x1, wback); + CMPS_IMM8(x3, 0); + B_MARKLOCK(cNE); + } break; default: - addr = dynarec00(dyn, addr, ip, ninst, ok, need_epilog); // no lock, regular instruction... + // dafault to NO LOCK + addr-=2; } break; default: - addr = dynarec00(dyn, addr, ip, ninst, ok, need_epilog); // no lock, regular instruction... + // dafault to NO LOCK + --addr; } - if(locked) {UNLOCK;} return addr; } diff --git a/src/dynarec/dynarec_arm_helper.c b/src/dynarec/dynarec_arm_helper.c index f293e404..6ea4cbf7 100755 --- a/src/dynarec/dynarec_arm_helper.c +++ b/src/dynarec/dynarec_arm_helper.c @@ -293,12 +293,12 @@ void jump_to_linker(dynarec_arm_t* dyn, uintptr_t ip, int reg, int ninst) // TODO: This is not thread safe. if(!ip) { // no IP, jump address in a reg, so need smart linker MARK; - LDREXD(x1, x2); // load dest address in x2 and planned ip in x3 + LDREXD(x2, x1); // load dest address in x2 and planned ip in x3 CMPS_REG_LSL_IMM5(xEIP, x3, 0); BXcond(cEQ, x2); MOV32_(x2, (uintptr_t)arm_linker); MOV_REG(x3, x12); - STREXD(x12, x1, x2); // nope, putting back linker & IP in place + STREXD(x12, x2, x1); // nope, putting back linker & IP in place // x12 now contain success / falure for write CMPS_IMM8(x12, 1); MOV_REG(x12, x3); // put back IP in place... @@ -338,12 +338,12 @@ void ret_to_epilog(dynarec_arm_t* dyn, int ninst) dyn->tablei+=4; // smart linker MOV32_(x1, (uintptr_t)table); MARK; - LDREXD(x1, x2); // load dest address in x2 and planned ip in x3 + LDREXD(x2, x1); // load dest address in x2 and planned ip in x3 CMPS_REG_LSL_IMM5(xEIP, x3, 0); BXcond(cEQ, x2); MOV32_(x2, (uintptr_t)arm_linker); MOV_REG(x3, x12); - STREXD(x12, x1, x2); // nope, putting back linker & IP in place + STREXD(x12, x2, x1); // nope, putting back linker & IP in place // x12 now contain success / falure for write CMPS_IMM8(x12, 1); MOV_REG(x12, x3); // put back IP in place... @@ -386,12 +386,12 @@ void retn_to_epilog(dynarec_arm_t* dyn, int ninst, int n) dyn->tablei+=4; // smart linker MOV32_(x1, (uintptr_t)table); MARK; - LDREXD(x1, x2); // load dest address in x2 and planned ip in x3 + LDREXD(x2, x1); // load dest address in x2 and planned ip in x3 CMPS_REG_LSL_IMM5(xEIP, x3, 0); BXcond(cEQ, x2); MOV32_(x2, (uintptr_t)arm_linker); MOV_REG(x3, x12); - STREXD(x12, x1, x2); // nope, putting back linker & IP in place + STREXD(x12, x2, x1); // nope, putting back linker & IP in place // x12 now contain success / falure for write CMPS_IMM8(x12, 1); MOV_REG(x12, x3); // put back IP in place... @@ -491,28 +491,6 @@ int isNativeCall(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t* calladdress, int return 0; } -// emit "lock", x1, x2 and x3 are lost -void emit_lock(dynarec_arm_t* dyn, uintptr_t addr, int ninst) -{ - PUSH(xSP, (1<insts)?dyn->insts[ninst].markf:(dyn->arm_size+4)) #define MARKSEG if(dyn->insts) {dyn->insts[ninst].markseg = (uintptr_t)dyn->arm_size;} #define GETMARKSEG ((dyn->insts)?dyn->insts[ninst].markseg:(dyn->arm_size+4)) +#define MARKLOCK if(dyn->insts) {dyn->insts[ninst].marklock = (uintptr_t)dyn->arm_size;} +#define GETMARKLOCK ((dyn->insts)?dyn->insts[ninst].marklock:(dyn->arm_size+4)) // Branch to MARK if cond (use i32) #define B_MARK(cond) \ @@ -207,6 +209,10 @@ #define B_MARKSEG(cond) \ i32 = GETMARKSEG-(dyn->arm_size+8); \ Bcond(cond, i32) +// Branch to MARKLOCK if cond (use i32) +#define B_MARKLOCK(cond) \ + i32 = GETMARKLOCK-(dyn->arm_size+8); \ + Bcond(cond, i32) #define IFX(A) if(dyn->insts && (dyn->insts[ninst].x86.need_flags&(A))) #define IFXX(A) if(dyn->insts && (dyn->insts[ninst].x86.need_flags==(A))) @@ -301,12 +307,6 @@ #define NEW_BARRIER_INST #endif -// Emit the LOCK mutex (x1, x2 and x3 are lost) -#define LOCK emit_lock(dyn, addr, ninst) -// Emit the UNLOCK mutex (x1, x2 and x3 are lost) -#define UNLOCK emit_unlock(dyn, addr, ninst) - - void arm_epilog(); void* arm_linker(x86emu_t* emu, void** table, uintptr_t addr); @@ -349,8 +349,6 @@ void* arm_linker(x86emu_t* emu, void** table, uintptr_t addr); #define grab_fsdata STEPNAME(grab_fsdata_) #define grab_tlsdata STEPNAME(grab_tlsdata_) #define isNativeCall STEPNAME(isNativeCall_) -#define emit_lock STEPNAME(emit_lock) -#define emit_unlock STEPNAME(emit_unlock) #define emit_cmp8 STEPNAME(emit_cmp8) #define emit_cmp16 STEPNAME(emit_cmp16) #define emit_cmp32 STEPNAME(emit_cmp32) diff --git a/src/dynarec/dynarec_arm_private.h b/src/dynarec/dynarec_arm_private.h index 78ffaac2..0ef942ae 100755 --- a/src/dynarec/dynarec_arm_private.h +++ b/src/dynarec/dynarec_arm_private.h @@ -13,6 +13,7 @@ typedef struct instruction_arm_s { uintptr_t mark, mark2, mark3; uintptr_t markf; uintptr_t markseg; + uintptr_t marklock; } instruction_arm_t; typedef struct dynarec_arm_s { diff --git a/src/emu/x86run.c b/src/emu/x86run.c index 89283da8..206f878a 100755 --- a/src/emu/x86run.c +++ b/src/emu/x86run.c @@ -19,6 +19,9 @@ #include "x87emu_private.h" #include "box86context.h" #include "my_cpuid.h" +#ifdef DYNAREC +#include "../dynarec/arm_lock_helper.h" +#endif int my_setcontext(x86emu_t* emu, void* ucp); @@ -629,6 +632,20 @@ _trace: NEXT; _0x86: /* XCHG Eb,Gb */ nextop = F8; +#ifdef DYNAREC + GET_EB; + if((nextop&0xC0)==0xC0) { // reg / reg: no lock + tmp8u = GB; + GB = EB->byte[0]; + EB->byte[0] = tmp8u; + } else { + do { + tmp8u = arm_lock_read_b(EB); + } while(arm_lock_write_b(EB, GB)); + GB = tmp8u; + } + // dynarec use need it's own mecanism +#else GET_EB; if((nextop&0xC0)!=0xC0) pthread_mutex_lock(&emu->context->mutex_lock); // XCHG always LOCK (but when accessing memory only) @@ -637,9 +654,31 @@ _trace: EB->byte[0] = tmp8u; if((nextop&0xC0)!=0xC0) pthread_mutex_unlock(&emu->context->mutex_lock); +#endif NEXT; _0x87: /* XCHG Ed,Gd */ nextop = F8; +#ifdef DYNAREC + GET_ED; + if((nextop&0xC0)==0xC0) { + tmp32u = GD.dword[0]; + GD.dword[0] = ED->dword[0]; + ED->dword[0] = tmp32u; + } else { + if(((uintptr_t)ED)&3) + { + // not aligned, dont't try to "LOCK" + tmp32u = ED->dword[0]; + ED->dword[0] = GD.dword[0]; + } else { + // XCHG is supposed to automaticaly LOCK memory bus + do { + tmp32u = arm_lock_read_d(ED); + } while(arm_lock_write_d(ED, GD.dword[0])); + } + GD.dword[0] = tmp32u; + } +#else GET_ED; if((nextop&0xC0)!=0xC0) pthread_mutex_lock(&emu->context->mutex_lock); // XCHG always LOCK (but when accessing memory only) @@ -648,6 +687,7 @@ _trace: ED->dword[0] = tmp32u; if((nextop&0xC0)!=0xC0) pthread_mutex_unlock(&emu->context->mutex_lock); +#endif NEXT; _0x88: /* MOV Eb,Gb */ nextop = F8; diff --git a/src/emu/x86run66.c b/src/emu/x86run66.c index 09d884ce..ed337188 100755 --- a/src/emu/x86run66.c +++ b/src/emu/x86run66.c @@ -11,6 +11,9 @@ #include "x86primop.h" #include "x86trace.h" #include "box86context.h" +#ifdef DYNAREC +#include "../dynarec/arm_lock_helper.h" +#endif #define F8 *(uint8_t*)(ip++) @@ -162,9 +165,47 @@ void RunLock(x86emu_t *emu) uint8_t nextop; reg32_t *oped; uint8_t tmp8u; +#ifdef DYNAREC + uint8_t tmp8u2; +#endif uint32_t tmp32u, tmp32u2; int32_t tmp32s; switch(opcode) { +#ifdef DYNAREC + #define GO(B, OP) \ + case B+0: \ + nextop = F8; \ + GET_EB; \ + do { \ + tmp8u = arm_lock_read_b(EB); \ + tmp8u = OP##8(emu, tmp8u, GB); \ + } while (arm_lock_write_b(EB, tmp8u)); \ + break; \ + case B+1: \ + nextop = F8; \ + GET_ED; \ + do { \ + tmp32u = arm_lock_read_d(ED); \ + tmp32u = OP##32(emu, tmp32u, GD.dword[0]); \ + } while (arm_lock_write_d(ED, tmp32u)); \ + break; \ + case B+2: \ + nextop = F8; \ + GET_EB; \ + GB = OP##8(emu, GB, EB->byte[0]); \ + break; \ + case B+3: \ + nextop = F8; \ + GET_ED; \ + GD.dword[0] = OP##32(emu, GD.dword[0], ED->dword[0]); \ + break; \ + case B+4: \ + R_AL = OP##8(emu, R_AL, F8); \ + break; \ + case B+5: \ + R_EAX = OP##32(emu, R_EAX, F32); \ + break; +#else #define GO(B, OP) \ case B+0: \ nextop = F8; \ @@ -204,7 +245,7 @@ void RunLock(x86emu_t *emu) R_EAX = OP##32(emu, R_EAX, F32); \ pthread_mutex_unlock(&emu->context->mutex_lock);\ break; - +#endif GO(0x00, add) /* ADD 0x00 -> 0x05 */ GO(0x08, or) /* OR 0x08 -> 0x0D */ GO(0x10, adc) /* ADC 0x10 -> 0x15 */ @@ -221,6 +262,18 @@ void RunLock(x86emu_t *emu) CHECK_FLAGS(emu); nextop = F8; GET_EB; +#ifdef DYNAREC + do { + tmp8u = arm_lock_read_b(EB); + cmp8(emu, R_AL, tmp8u); + if(ACCESS_FLAG(F_ZF)) { + tmp32s = arm_lock_write_b(EB, GB); + } else { + R_AL = tmp8u; + tmp32s = 0; + } + } while(tmp32s); +#else pthread_mutex_lock(&emu->context->mutex_lock); cmp8(emu, R_AL, EB->byte[0]); if(ACCESS_FLAG(F_ZF)) { @@ -229,10 +282,23 @@ void RunLock(x86emu_t *emu) R_AL = EB->byte[0]; } pthread_mutex_unlock(&emu->context->mutex_lock); +#endif break; case 0xB1: /* CMPXCHG Ed,Gd */ nextop = F8; GET_ED; +#ifdef DYNAREC + do { + tmp32u = arm_lock_read_d(ED); + cmp32(emu, R_EAX, tmp32u); + if(ACCESS_FLAG(F_ZF)) { + tmp32s = arm_lock_write_d(ED, GD.dword[0]); + } else { + R_EAX = tmp32u; + tmp32s = 0; + } + } while(tmp32s); +#else pthread_mutex_lock(&emu->context->mutex_lock); cmp32(emu, R_EAX, ED->dword[0]); if(ACCESS_FLAG(F_ZF)) { @@ -241,6 +307,7 @@ void RunLock(x86emu_t *emu) R_EAX = ED->dword[0]; } pthread_mutex_unlock(&emu->context->mutex_lock); +#endif break; case 0xB3: /* BTR Ed,Gd */ CHECK_FLAGS(emu); @@ -252,6 +319,19 @@ void RunLock(x86emu_t *emu) ED=(reg32_t*)(((uint32_t*)(ED))+(tmp8u>>5)); } tmp8u&=31; +#ifdef DYNAREC + do { + tmp32u = arm_lock_read_d(ED); + if(tmp32u & (1<context->mutex_lock); if(ED->dword[0] & (1<context->mutex_lock); +#endif break; case 0xBA: nextop = F8; @@ -272,12 +353,19 @@ void RunLock(x86emu_t *emu) ED=(reg32_t*)(((uint32_t*)(ED))+(tmp8u>>5)); } tmp8u&=31; +#ifdef DYNAREC + if(arm_lock_read_d(ED) & (1<context->mutex_lock); if(ED->dword[0] & (1<context->mutex_lock); +#endif break; case 6: /* BTR Ed, Ib */ CHECK_FLAGS(emu); @@ -288,6 +376,19 @@ void RunLock(x86emu_t *emu) ED=(reg32_t*)(((uint32_t*)(ED))+(tmp8u>>5)); } tmp8u&=31; +#ifdef DYNAREC + do { + tmp32u = arm_lock_read_d(ED); + if(tmp32u & (1<context->mutex_lock); if(ED->dword[0] & (1<context->mutex_lock); +#endif break; default: @@ -312,6 +414,17 @@ void RunLock(x86emu_t *emu) ED=(reg32_t*)(((uint32_t*)(ED))+(tmp8u>>5)); } tmp8u&=31; +#ifdef DYNAREC + do { + tmp32u = arm_lock_read_d(ED); + if(tmp32u & (1<context->mutex_lock); if(ED->dword[0] & (1<dword[0] ^= (1<context->mutex_lock); +#endif break; case 0xC0: /* XADD Gb,Eb */ nextop = F8; GET_EB; +#ifdef DYNAREC + do { + tmp8u = arm_lock_read_b(EB); + tmp8u2 = add8(emu, tmp8u, GB); + } while (arm_lock_write_b(EB, tmp8u2)); + GB = tmp8u; +#else pthread_mutex_lock(&emu->context->mutex_lock); tmp8u = add8(emu, EB->byte[0], GB); GB = EB->byte[0]; EB->byte[0] = tmp8u; pthread_mutex_unlock(&emu->context->mutex_lock); +#endif break; case 0xC1: /* XADD Gd,Ed */ nextop = F8; GET_ED; +#ifdef DYNAREC + do { + tmp32u = arm_lock_read_d(ED); + tmp32u2 = add32(emu, tmp32u, GD.dword[0]); + } while(arm_lock_write_d(ED, tmp32u2)); + GD.dword[0] = tmp32u; +#else pthread_mutex_lock(&emu->context->mutex_lock); tmp32u = add32(emu, ED->dword[0], GD.dword[0]); GD.dword[0] = ED->dword[0]; ED->dword[0] = tmp32u; pthread_mutex_unlock(&emu->context->mutex_lock); +#endif break; case 0xC7: /* CMPXCHG8B Gq */ CHECK_FLAGS(emu); nextop = F8; GET_ED; +#ifdef DYNAREC + do { + arm_lock_read_dd(&tmp32u, &tmp32u2, ED); + if(R_EAX == tmp32u && R_EDX == tmp32u2) { + SET_FLAG(F_ZF); + tmp32s = arm_lock_write_dd(R_EBX, R_ECX, ED); + } else { + CLEAR_FLAG(F_ZF); + R_EAX = tmp32u; + R_EDX = tmp32u2; + tmp32s = 0; + } + } while(tmp32s); +#else pthread_mutex_lock(&emu->context->mutex_lock); tmp32u = ED->dword[0]; tmp32u2= ED->dword[1]; @@ -355,6 +499,7 @@ void RunLock(x86emu_t *emu) R_EDX = tmp32u2; } pthread_mutex_unlock(&emu->context->mutex_lock); +#endif break; default: // trigger invalid lock? @@ -367,12 +512,24 @@ void RunLock(x86emu_t *emu) case 0x83: /* GRP Ed,Ib */ nextop = F8; GET_ED; - pthread_mutex_lock(&emu->context->mutex_lock); if(opcode==0x83) { tmp32s = F8S; tmp32u = (uint32_t)tmp32s; } else tmp32u = F32; +#ifdef DYNAREC + switch((nextop>>3)&7) { + case 0: do { tmp32u2 = arm_lock_read_d(ED);} while(arm_lock_write_d(ED, add32(emu, tmp32u2, tmp32u))); break; + case 1: do { tmp32u2 = arm_lock_read_d(ED);} while(arm_lock_write_d(ED, or32(emu, tmp32u2, tmp32u))); break; + case 2: do { tmp32u2 = arm_lock_read_d(ED);} while(arm_lock_write_d(ED, adc32(emu, tmp32u2, tmp32u))); break; + case 3: do { tmp32u2 = arm_lock_read_d(ED);} while(arm_lock_write_d(ED, sbb32(emu, tmp32u2, tmp32u))); break; + case 4: do { tmp32u2 = arm_lock_read_d(ED);} while(arm_lock_write_d(ED, and32(emu, tmp32u2, tmp32u))); break; + case 5: do { tmp32u2 = arm_lock_read_d(ED);} while(arm_lock_write_d(ED, sub32(emu, tmp32u2, tmp32u))); break; + case 6: do { tmp32u2 = arm_lock_read_d(ED);} while(arm_lock_write_d(ED, xor32(emu, tmp32u2, tmp32u))); break; + case 7: cmp32(emu, ED->dword[0], tmp32u); break; + } +#else + pthread_mutex_lock(&emu->context->mutex_lock); switch((nextop>>3)&7) { case 0: ED->dword[0] = add32(emu, ED->dword[0], tmp32u); break; case 1: ED->dword[0] = or32(emu, ED->dword[0], tmp32u); break; @@ -384,38 +541,37 @@ void RunLock(x86emu_t *emu) case 7: cmp32(emu, ED->dword[0], tmp32u); break; } pthread_mutex_unlock(&emu->context->mutex_lock); +#endif break; case 0x86: /* XCHG Eb,Gb */ - nextop = F8; - GET_EB; - tmp8u = GB; - pthread_mutex_lock(&emu->context->mutex_lock); - GB = EB->byte[0]; - EB->byte[0] = tmp8u; - pthread_mutex_unlock(&emu->context->mutex_lock); - break; case 0x87: /* XCHG Ed,Gd */ - nextop = F8; - GET_ED; - pthread_mutex_lock(&emu->context->mutex_lock); - tmp32u = GD.dword[0]; - GD.dword[0] = ED->dword[0]; - ED->dword[0] = tmp32u; - pthread_mutex_unlock(&emu->context->mutex_lock); + ip--; // let the normal XCHG execute, it have integrated LOCK break; case 0xFF: /* GRP 5 Ed */ nextop = F8; GET_ED; switch((nextop>>3)&7) { case 0: /* INC Ed */ +#ifdef DYNAREC + do { + tmp32u = arm_lock_read_d(ED); + } while(arm_lock_write_d(ED, inc32(emu, tmp32u))); +#else pthread_mutex_lock(&emu->context->mutex_lock); ED->dword[0] = inc32(emu, ED->dword[0]); pthread_mutex_unlock(&emu->context->mutex_lock); +#endif break; case 1: /* DEC Ed */ +#ifdef DYNAREC + do { + tmp32u = arm_lock_read_d(ED); + } while(arm_lock_write_d(ED, dec32(emu, tmp32u))); +#else pthread_mutex_lock(&emu->context->mutex_lock); ED->dword[0] = dec32(emu, ED->dword[0]); pthread_mutex_unlock(&emu->context->mutex_lock); +#endif break; default: printf_log(LOG_NONE, "Illegal Opcode 0xF0 0xFF 0x%02X 0x%02X\n", nextop, PK(0)); diff --git a/src/include/box86context.h b/src/include/box86context.h index 721745be..0989d198 100755 --- a/src/include/box86context.h +++ b/src/include/box86context.h @@ -117,7 +117,9 @@ typedef struct box86context_s { pthread_mutex_t mutex_once; pthread_mutex_t mutex_once2; pthread_mutex_t mutex_trace; - pthread_mutex_t mutex_lock; + #ifndef DYNAREC + pthread_mutex_t mutex_lock; // dynarec build will use their own mecanism + #endif pthread_mutex_t mutex_tls; pthread_mutex_t mutex_thread;