[DYNAREC] Changed the method used on ARM to simulate LOCK prefix

This commit is contained in:
ptitSeb 2020-09-23 20:22:44 +02:00
parent 14a904cfa9
commit 9219384650
14 changed files with 1112 additions and 152 deletions

View File

@ -364,6 +364,7 @@ if(ARM_DYNAREC)
"${BOX86_ROOT}/src/dynarec/arm_epilog.S"
"${BOX86_ROOT}/src/dynarec/arm_linker.S"
"${BOX86_ROOT}/src/dynarec/arm_table.S"
"${BOX86_ROOT}/src/dynarec/arm_lock_helper.S"
)
set(DYNAREC_PASS

View File

@ -298,7 +298,9 @@ box86context_t *NewBox86Context(int argc)
pthread_mutex_init(&context->mutex_once, NULL);
pthread_mutex_init(&context->mutex_once2, NULL);
pthread_mutex_init(&context->mutex_trace, NULL);
#ifndef DYNAREC
pthread_mutex_init(&context->mutex_lock, NULL);
#endif
pthread_mutex_init(&context->mutex_tls, NULL);
pthread_mutex_init(&context->mutex_thread, NULL);
#ifdef DYNAREC
@ -424,7 +426,9 @@ void FreeBox86Context(box86context_t** context)
pthread_mutex_destroy(&ctx->mutex_once);
pthread_mutex_destroy(&ctx->mutex_once2);
pthread_mutex_destroy(&ctx->mutex_trace);
#ifndef DYNAREC
pthread_mutex_destroy(&ctx->mutex_lock);
#endif
pthread_mutex_destroy(&ctx->mutex_tls);
pthread_mutex_destroy(&ctx->mutex_thread);
#ifdef DYNAREC

View File

@ -122,7 +122,7 @@ Op is 20-27
#define SUB_IMM8(dst, src, imm8) \
EMIT(0xe2400000 | ((dst) << 12) | ((src) << 16) | brIMM(imm8) )
// sub cond dst, src, #(imm8)
#define SUB_COND_IMM8(dst, src, imm8) \
#define SUB_COND_IMM8(cond, dst, src, imm8) \
EMIT((cond) | 0x02400000 | ((dst) << 12) | ((src) << 16) | brIMM(imm8) )
// sub.s dst, src, #(imm8)
#define SUBS_IMM8(dst, src, imm8) \
@ -442,11 +442,27 @@ Op is 20-27
#define LDREXD_gen(cond, Rn, Rt) (cond | 0b000<<25 | 0b11011<<20 | (Rn)<<16 | (Rt)<<12 | 0b1111<<8 | 0b1001<<4 | 0b1111)
// Load Exclusive Rt/Rt+1 from Rn (tagging the memory)
#define LDREXD(Rn, Rt) EMIT(LDREXD_gen(c__, Rn, Rt))
#define LDREXD(Rt, Rn) EMIT(LDREXD_gen(c__, Rn, Rt))
#define STREXD_gen(cond, Rd, Rn, Rt) (cond | 0b000<<25 | 0b11010<<20 | (Rn)<<16 | (Rd)<<12 | 0b1111<<8 | 0b1001<<4 | (Rt))
// Store Exclusive Rt/Rt+1 to Rn, with result in Rd if tag is ok (Rd!=Rn && Rd!=Rt && Rd!=Rt+1), Rd==1 if store failed
#define STREXD(Rd, Rn, Rt) EMIT(STREXD_gen(c__, Rd, Rn, Rt))
#define STREXD(Rd, Rt, Rn) EMIT(STREXD_gen(c__, Rd, Rn, Rt))
#define LDREX_gen(cond, Rn, Rt) (cond | 0b0001100<<21 | 1<<20 | (Rn)<<16 | (Rt)<<12 | 0b1111<<8 | 0b1001<<4 | 0b1111)
// Load Exclusive Rt from Rn (tagging the memory)
#define LDREX(Rt, Rn) EMIT(LDREX_gen(c__, Rn, Rt))
#define STREX_gen(cond, Rd, Rn, Rt) (cond | 0b0001100<<21 | 0<<20 | (Rn)<<16 | (Rd)<<12 | 0b1111<<8 | 0b1001<<4 | (Rt))
// Store Exclusive Rt to Rn, with result in Rd=0 if tag is ok, Rd==1 if store failed (Rd!=Rn && Rd!=Rt)
#define STREX(Rd, Rt, Rn) EMIT(STREX_gen(c__, Rd, Rn, Rt))
#define LDREXB_gen(cond, Rn, Rt) (cond | 0b0001110<<21 | 1<<20 | (Rn)<<16 | (Rt)<<12 | 0b1111<<8 | 0b1001<<4 | 0b1111)
// Load Exclusive Byte Rt from Rn (tagging the memory)
#define LDREXB(Rt, Rn) EMIT(LDREXB_gen(c__, Rn, Rt))
#define STREXB_gen(cond, Rd, Rn, Rt) (cond | 0b0001110<<21 | 0<<20 | (Rn)<<16 | (Rd)<<12 | 0b1111<<8 | 0b1001<<4 | (Rt))
// Store Exclusive byte Rt to Rn, with result in Rd=0 if tag is ok, Rd==1 if store failed (Rd!=Rn && Rd!=Rt)
#define STREXB(Rd, Rt, Rn) EMIT(STREXB_gen(c__, Rd, Rn, Rt))
// Count leading 0 bit of Rm, store result in Rd
#define CLZ(Rd, Rm) EMIT(c__ | 0b00010110<<20 | 0b1111<<16 | (Rd)<<12 | 0b1111<<8 | 0b0001<<4 | (Rm))
@ -473,6 +489,9 @@ Op is 20-27
// Unsigned Div Rd <- Rn/Rm
#define UDIV(Rd, Rm, Rn) EMIT(UDIV_gen(c__, Rd, Rm, Rn))
// Yield
#define YIELD(cond) EMIT(cond | 0b00110010<<20 | 0b1111<<12 | 1)
// VFPU
#define TRANSFERT64(C, op) ((0b1100<<24) | (0b010<<21) | (0b101<<9) | ((C)<<8) | ((op)<<4))

51
src/dynarec/arm_lock_helper.S Executable file
View File

@ -0,0 +1,51 @@
//arm lock helper
//there is 2 part: read and write
// write return 0 on success, 1 on fail (value has been changed)
.text
.align 4
.global arm_lock_read_b
.global arm_lock_write_b
.global arm_lock_read_d
.global arm_lock_write_d
.global arm_lock_read_dd
.global arm_lock_write_dd
arm_lock_read_b:
// address is r0, return is r0
ldrexb r0, [r0]
bx lr
arm_lock_write_b:
// address is r0, value is r1, return is r0
mov r2, r0
strexb r0, r1, [r2]
bx lr
arm_lock_read_d:
// address is r0, return is r0
// r0 needs to be aligned
ldrex r0, [r0]
bx lr
arm_lock_write_d:
// address is r0, value is r1, return is r0
// r0 needs to be aligned
mov r2, r0
strex r0, r1, [r2]
bx lr
arm_lock_read_dd:
// address is r2, return is r0, r1
ldrexd r2, r3, [r2]
str r2, [r0]
str r3, [r1]
bx lr
arm_lock_write_dd:
// address is r2, value is r0, r1, return is r0
// r0 needs to be aligned
strexd r3, r0, r1, [r2]
mov r0, r3
bx lr

20
src/dynarec/arm_lock_helper.h Executable file
View File

@ -0,0 +1,20 @@
#ifndef __ARM_LOCK_HELPER__H__
#define __ARM_LOCK_HELPER__H__
#include <stdint.h>
// LDREXB of ADDR
extern uint8_t arm_lock_read_b(void* addr);
// STREXB of ADDR, return 0 if ok, 1 if not
extern int arm_lock_write_b(void* addr, uint8_t val);
// LDREX of ADDR
extern uint32_t arm_lock_read_d(void* addr);
// STREX of ADDR, return 0 if ok, 1 if not
extern int arm_lock_write_d(void* addr, uint32_t val);
// LDREXD of ADDR
extern void arm_lock_read_dd(uint32_t* a, uint32_t* b, void* addr);
// STREX of ADDR, return 0 if ok, 1 if not
extern int arm_lock_write_dd(uint32_t a, uint32_t b, void* addr);
#endif //__ARM_LOCK_HELPER__H__

View File

@ -999,25 +999,16 @@ uintptr_t dynarec00(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst,
BFI(gb1, x1, gb2*8, 8);
BFI(eb1, x12, eb2*8, 8);
} else {
if(0/*arm_swap*/) {
// use atomic swap...
addr = geted(dyn, addr, ninst, nextop, &ed, x2, &fixedaddress, 0, 0);
GETGB(x12);
SWPB(x12, x12, ed);
BFI(gb1, x12, gb2*8, 8);
} else {
// Lock
LOCK;
// do the swap
GETGB(x12);
addr = geted(dyn, addr, ninst, nextop, &ed, x2, &fixedaddress, 4095, 0);
LDRB_IMM9(x1, ed, fixedaddress); // 1 gets eb
// do the swap 12 -> strb(ed), 1 -> gd
BFI(gb1, x1, gb2*8, 8);
STRB_IMM9(x12, ed, fixedaddress);
// Unlock
UNLOCK;
}
GETGB(x12);
addr = geted(dyn, addr, ninst, nextop, &ed, x2, &fixedaddress, 0, 0);
MARKLOCK;
// do the swap with exclusive locking
LDREXB(x1, ed);
// do the swap 12 -> strb(ed), 1 -> gd
STREXB(x3, x12, ed);
CMPS_IMM8(x3, 0);
B_MARKLOCK(cNE);
BFI(gb1, x1, gb2*8, 8);
}
break;
case 0x87:
@ -1032,24 +1023,21 @@ uintptr_t dynarec00(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst,
XOR_REG_LSL_IMM5(gd, gd, ed, 0);
}
} else {
if(0/*arm_swap*/) { // swap doesn't seem to really work like that, plus there seems to be alignement need on arm7
GETGD;
addr = geted(dyn, addr, ninst, nextop, &ed, x2, &fixedaddress, 0, 0);
// use atomic swap
SWP(gd, gd, ed);
} else {
LOCK;
GETGD;
GETED;
// xor swap to avoid one more tmp reg
if(gd!=ed) {
XOR_REG_LSL_IMM5(gd, gd, ed, 0);
XOR_REG_LSL_IMM5(ed, gd, ed, 0);
XOR_REG_LSL_IMM5(gd, gd, ed, 0);
}
WBACK;
UNLOCK;
}
GETGD;
addr = geted(dyn, addr, ninst, nextop, &ed, x2, &fixedaddress, 0, 0);
TSTS_IMM8(ed, 3);
B_MARK(cNE);
MARKLOCK;
LDREX(x1, ed);
STREX(x3, gd, ed);
CMPS_IMM8(x3, 0);
B_MARKLOCK(cNE);
B_MARK2(c__);
MARK;
LDR_IMM9(x1, ed, 0);
STR_IMM9(gd, ed, 0);
MARK2;
MOV_REG(gd, x1);
}
break;
case 0x88:

View File

@ -1542,26 +1542,14 @@ uintptr_t dynarec0F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst,
INST_NAME("CMPXCHG Eb, Gb");
SETFLAGS(X_ALL, SF_SET);
nextop = F8;
MOVW(x1, 0);
STR_IMM9(x1, xEmu, offsetof(x86emu_t, df)); // d_none == 0
STR_IMM9(x1, xEmu, offsetof(x86emu_t, flags[F_AF]));
STR_IMM9(x1, xEmu, offsetof(x86emu_t, flags[F_PF]));
STR_IMM9(x1, xEmu, offsetof(x86emu_t, flags[F_OF]));
STR_IMM9(x1, xEmu, offsetof(x86emu_t, flags[F_SF]));
GETEB(x2)
UXTB(x1, xEAX, 0);
// Use a quick CMP, without setting A or P...
CMPS_REG_LSL_IMM5(x1, ed, 0);
MOVW_COND(cEQ, x1, 1);
STR_IMM9(x1, xEmu, offsetof(x86emu_t, flags[F_CF]));
B_MARK(cNE);
// AL == Eb
GETGB(x1);
MOV_REG(ed, x1);
EBBACK;
MOVW(x1, 1);
STR_IMM9(x1, xEmu, offsetof(x86emu_t, flags[F_ZF]));
// done
B_MARK3(c__);
MARK;
// AL != Eb
@ -1569,36 +1557,27 @@ uintptr_t dynarec0F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst,
MOVW(x1, 0);
STR_IMM9(x1, xEmu, offsetof(x86emu_t, flags[F_ZF]));
MARK3;
// done, do the cmp now
emit_cmp8(dyn, ninst, x1, x2, x3, x12);
break;
case 0xB1:
INST_NAME("CMPXCHG Ed, Gd");
SETFLAGS(X_ALL, SF_SET);
nextop = F8;
MOVW(x1, 0);
STR_IMM9(x1, xEmu, offsetof(x86emu_t, df)); // d_none == 0
STR_IMM9(x1, xEmu, offsetof(x86emu_t, flags[F_AF]));
STR_IMM9(x1, xEmu, offsetof(x86emu_t, flags[F_PF]));
STR_IMM9(x1, xEmu, offsetof(x86emu_t, flags[F_OF]));
STR_IMM9(x1, xEmu, offsetof(x86emu_t, flags[F_SF]));
GETED;
GETGD;
// Use a quick CMP, without setting A or P...
CMPS_REG_LSL_IMM5(xEAX, ed, 0);
MOVW_COND(cEQ, x1, 1);
STR_IMM9(x1, xEmu, offsetof(x86emu_t, flags[F_CF]));
B_MARK(cNE);
// EAX == Ed
MOV_REG(x3, ed);
MOV_REG(ed, gd);
WBACK;
MOVW(x1, 1);
STR_IMM9(x1, xEmu, offsetof(x86emu_t, flags[F_ZF]));
// done
emit_cmp32(dyn, ninst, xEAX, x3, x1, x12);
B_MARK3(c__); // not next, in case its called with a LOCK prefix
MARK;
// EAX != Ed
emit_cmp32(dyn, ninst, xEAX, ed, x3, x12);
MOV_REG(xEAX, ed);
MOVW(x1, 0);
STR_IMM9(x1, xEmu, offsetof(x86emu_t, flags[F_ZF]));
MARK3
break;
case 0xB3:

View File

@ -24,13 +24,452 @@
uintptr_t dynarecF0(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, int* ok, int* need_epilog)
{
uint8_t nextop = PK(0); // don't increment addr
int locked = 0;
switch(nextop) {
// generic case
uint8_t nextop, opcode = F8;
uint8_t wback, wb1, wb2, gb1, gb2;
uint8_t ed, gd, u8;
int fixedaddress;
int32_t i32;
MAYUSE(i32);
MAYUSE(gb1);
MAYUSE(gb2);
MAYUSE(wb1);
MAYUSE(wb2);
switch(opcode) {
case 0x00:
INST_NAME("LOCK ADD Eb, Gb");
SETFLAGS(X_ALL, SF_SET);
nextop = F8;
GETGB(x2);
if((nextop&0xC0)==0xC0) {
wback = (nextop&7);
wb2 = (wback>>2);
wback = xEAX+(wback&3);
UXTB(x1, wback, wb2);
emit_add8(dyn, ninst, x1, x2, x12, x3, 0);
BFI(wback, ed, wb2*8, 8);
} else {
addr = geted(dyn, addr, ninst, nextop, &wback, x3, &fixedaddress, 0, 0);
MARKLOCK;
LDREXB(x1, wback);
emit_add8(dyn, ninst, x1, x2, x12, x3, (wback==x3)?1:0);
STREXB(x12, x1, wback);
CMPS_IMM8(x12, 0);
B_MARKLOCK(cNE); // write failed, try again
}
break;
case 0x01:
INST_NAME("LOCK ADD Ed, Gd");
SETFLAGS(X_ALL, SF_SET);
nextop = F8;
GETGD;
if((nextop&0xC0)==0xC0) {
ed = xEAX+(nextop&7);
emit_add32(dyn, ninst, ed, gd, x3, x12);
} else {
addr = geted(dyn, addr, ninst, nextop, &wback, x2, &fixedaddress, 0, 0);
MARKLOCK;
LDREX(x1, wback);
emit_add32(dyn, ninst, x1, gd, x3, x12);
STREX(x3, x1, wback);
CMPS_IMM8(x3, 0);
B_MARKLOCK(cNE);
}
break;
case 0x08:
INST_NAME("LOCK OR Eb, Gb");
SETFLAGS(X_ALL, SF_SET);
nextop = F8;
GETGB(x2);
if((nextop&0xC0)==0xC0) {
wback = (nextop&7);
wb2 = (wback>>2);
wback = xEAX+(wback&3);
UXTB(x1, wback, wb2);
emit_or8(dyn, ninst, x1, x2, x12, x3, 0);
BFI(wback, ed, wb2*8, 8);
} else {
addr = geted(dyn, addr, ninst, nextop, &wback, x3, &fixedaddress, 0, 0);
MARKLOCK;
LDREXB(x1, wback);
emit_or8(dyn, ninst, x1, x2, x12, x3, (wback==x3)?1:0);
STREXB(x12, x1, wback);
CMPS_IMM8(x12, 0);
B_MARKLOCK(cNE); // write failed, try again
}
break;
case 0x09:
INST_NAME("LOCK OR Ed, Gd");
SETFLAGS(X_ALL, SF_SET);
nextop = F8;
GETGD;
if((nextop&0xC0)==0xC0) {
ed = xEAX+(nextop&7);
emit_or32(dyn, ninst, ed, gd, x3, x12);
} else {
addr = geted(dyn, addr, ninst, nextop, &wback, x2, &fixedaddress, 0, 0);
MARKLOCK;
LDREX(x1, wback);
emit_or32(dyn, ninst, x1, gd, x3, x12);
STREX(x3, x1, wback);
CMPS_IMM8(x3, 0);
B_MARKLOCK(cNE);
}
break;
case 0x10:
INST_NAME("LOCK ADC Eb, Gb");
SETFLAGS(X_ALL, SF_SET);
nextop = F8;
GETGB(x2);
if((nextop&0xC0)==0xC0) {
wback = (nextop&7);
wb2 = (wback>>2);
wback = xEAX+(wback&3);
UXTB(x1, wback, wb2);
emit_adc8(dyn, ninst, x1, x2, x12, x3, 0);
BFI(wback, ed, wb2*8, 8);
} else {
addr = geted(dyn, addr, ninst, nextop, &wback, x3, &fixedaddress, 0, 0);
MARKLOCK;
LDREXB(x1, wback);
emit_adc8(dyn, ninst, x1, x2, x12, x3, (wback==x3)?1:0);
STREXB(x12, x1, wback);
CMPS_IMM8(x12, 0);
B_MARKLOCK(cNE); // write failed, try again
}
break;
case 0x11:
INST_NAME("LOCK ADC Ed, Gd");
SETFLAGS(X_ALL, SF_SET);
nextop = F8;
GETGD;
if((nextop&0xC0)==0xC0) {
ed = xEAX+(nextop&7);
emit_adc32(dyn, ninst, ed, gd, x3, x12);
} else {
addr = geted(dyn, addr, ninst, nextop, &wback, x2, &fixedaddress, 0, 0);
MARKLOCK;
LDREX(x1, wback);
emit_adc32(dyn, ninst, x1, gd, x3, x12);
STREX(x3, x1, wback);
CMPS_IMM8(x3, 0);
B_MARKLOCK(cNE);
}
break;
case 0x18:
INST_NAME("LOCK SBB Eb, Gb");
SETFLAGS(X_ALL, SF_SET);
nextop = F8;
GETGB(x2);
if((nextop&0xC0)==0xC0) {
wback = (nextop&7);
wb2 = (wback>>2);
wback = xEAX+(wback&3);
UXTB(x1, wback, wb2);
emit_sbb8(dyn, ninst, x1, x2, x12, x3, 0);
BFI(wback, ed, wb2*8, 8);
} else {
addr = geted(dyn, addr, ninst, nextop, &wback, x3, &fixedaddress, 0, 0);
MARKLOCK;
LDREXB(x1, wback);
emit_sbb8(dyn, ninst, x1, x2, x12, x3, (wback==x3)?1:0);
STREXB(x12, x1, wback);
CMPS_IMM8(x12, 0);
B_MARKLOCK(cNE); // write failed, try again
}
break;
case 0x19:
INST_NAME("LOCK SBB Ed, Gd");
SETFLAGS(X_ALL, SF_SET);
nextop = F8;
GETGD;
if((nextop&0xC0)==0xC0) {
ed = xEAX+(nextop&7);
emit_sbb32(dyn, ninst, ed, gd, x3, x12);
} else {
addr = geted(dyn, addr, ninst, nextop, &wback, x2, &fixedaddress, 0, 0);
MARKLOCK;
LDREX(x1, wback);
emit_sbb32(dyn, ninst, x1, gd, x3, x12);
STREX(x3, x1, wback);
CMPS_IMM8(x3, 0);
B_MARKLOCK(cNE);
}
break;
case 0x20:
INST_NAME("LOCK AND Eb, Gb");
SETFLAGS(X_ALL, SF_SET);
nextop = F8;
GETGB(x2);
if((nextop&0xC0)==0xC0) {
wback = (nextop&7);
wb2 = (wback>>2);
wback = xEAX+(wback&3);
UXTB(x1, wback, wb2);
emit_and8(dyn, ninst, x1, x2, x12, x3, 0);
BFI(wback, ed, wb2*8, 8);
} else {
addr = geted(dyn, addr, ninst, nextop, &wback, x3, &fixedaddress, 0, 0);
MARKLOCK;
LDREXB(x1, wback);
emit_and8(dyn, ninst, x1, x2, x12, x3, (wback==x3)?1:0);
STREXB(x12, x1, wback);
CMPS_IMM8(x12, 0);
B_MARKLOCK(cNE); // write failed, try again
}
break;
case 0x21:
INST_NAME("LOCK AND Ed, Gd");
SETFLAGS(X_ALL, SF_SET);
nextop = F8;
GETGD;
if((nextop&0xC0)==0xC0) {
ed = xEAX+(nextop&7);
emit_and32(dyn, ninst, ed, gd, x3, x12);
} else {
addr = geted(dyn, addr, ninst, nextop, &wback, x2, &fixedaddress, 0, 0);
MARKLOCK;
LDREX(x1, wback);
emit_and32(dyn, ninst, x1, gd, x3, x12);
STREX(x3, x1, wback);
CMPS_IMM8(x3, 0);
B_MARKLOCK(cNE);
}
break;
case 0x28:
INST_NAME("LOCK SUB Eb, Gb");
SETFLAGS(X_ALL, SF_SET);
nextop = F8;
GETGB(x2);
if((nextop&0xC0)==0xC0) {
wback = (nextop&7);
wb2 = (wback>>2);
wback = xEAX+(wback&3);
UXTB(x1, wback, wb2);
emit_sub8(dyn, ninst, x1, x2, x12, x3, 0);
BFI(wback, ed, wb2*8, 8);
} else {
addr = geted(dyn, addr, ninst, nextop, &wback, x3, &fixedaddress, 0, 0);
MARKLOCK;
LDREXB(x1, wback);
emit_sub8(dyn, ninst, x1, x2, x12, x3, (wback==x3)?1:0);
STREXB(x12, x1, wback);
CMPS_IMM8(x12, 0);
B_MARKLOCK(cNE); // write failed, try again
}
break;
case 0x29:
INST_NAME("LOCK SUB Ed, Gd");
SETFLAGS(X_ALL, SF_SET);
nextop = F8;
GETGD;
if((nextop&0xC0)==0xC0) {
ed = xEAX+(nextop&7);
emit_sub32(dyn, ninst, ed, gd, x3, x12);
} else {
addr = geted(dyn, addr, ninst, nextop, &wback, x2, &fixedaddress, 0, 0);
MARKLOCK;
LDREX(x1, wback);
emit_sub32(dyn, ninst, x1, gd, x3, x12);
STREX(x3, x1, wback);
CMPS_IMM8(x3, 0);
B_MARKLOCK(cNE);
}
break;
case 0x38:
INST_NAME("LOCK XOR Eb, Gb");
SETFLAGS(X_ALL, SF_SET);
nextop = F8;
GETGB(x2);
if((nextop&0xC0)==0xC0) {
wback = (nextop&7);
wb2 = (wback>>2);
wback = xEAX+(wback&3);
UXTB(x1, wback, wb2);
emit_xor8(dyn, ninst, x1, x2, x12, x3, 0);
BFI(wback, ed, wb2*8, 8);
} else {
addr = geted(dyn, addr, ninst, nextop, &wback, x3, &fixedaddress, 0, 0);
MARKLOCK;
LDREXB(x1, wback);
emit_xor8(dyn, ninst, x1, x2, x12, x3, (wback==x3)?1:0);
STREXB(x12, x1, wback);
CMPS_IMM8(x12, 0);
B_MARKLOCK(cNE); // write failed, try again
}
break;
case 0x39:
INST_NAME("LOCK XOR Ed, Gd");
SETFLAGS(X_ALL, SF_SET);
nextop = F8;
GETGD;
if((nextop&0xC0)==0xC0) {
ed = xEAX+(nextop&7);
emit_xor32(dyn, ninst, ed, gd, x3, x12);
} else {
addr = geted(dyn, addr, ninst, nextop, &wback, x2, &fixedaddress, 0, 0);
MARKLOCK;
LDREX(x1, wback);
emit_xor32(dyn, ninst, x1, gd, x3, x12);
STREX(x3, x1, wback);
CMPS_IMM8(x3, 0);
B_MARKLOCK(cNE);
}
break;
case 0x81:
case 0x83:
nextop = F8;
switch((nextop>>3)&7) {
case 0: //ADD
if(opcode==0x81) {
INST_NAME("LOCK ADD Ed, Id");
} else {
INST_NAME("LOCK ADD Ed, Ib");
}
SETFLAGS(X_ALL, SF_SET);
if((nextop&0xC0)==0xC0) {
if(opcode==0x81) i32 = F32S; else i32 = F8S;
ed = xEAX+(nextop&7);
emit_add32c(dyn, ninst, ed, i32, x3, x12);
} else {
addr = geted(dyn, addr, ninst, nextop, &wback, x2, &fixedaddress, 0, 0);
if(opcode==0x81) i32 = F32S; else i32 = F8S;
MARKLOCK;
LDREX(x1, wback);
emit_add32c(dyn, ninst, x1, i32, x3, x12);
STREX(x3, x1, wback);
CMPS_IMM8(x3, 0);
B_MARKLOCK(cNE);
}
break;
case 1: //OR
if(opcode==0x81) {INST_NAME("LOCK OR Ed, Id");} else {INST_NAME("LOCK OR Ed, Ib");}
SETFLAGS(X_ALL, SF_SET);
if((nextop&0xC0)==0xC0) {
if(opcode==0x81) i32 = F32S; else i32 = F8S;
ed = xEAX+(nextop&7);
emit_or32c(dyn, ninst, ed, i32, x3, x12);
} else {
addr = geted(dyn, addr, ninst, nextop, &wback, x2, &fixedaddress, 0, 0);
if(opcode==0x81) i32 = F32S; else i32 = F8S;
MARKLOCK;
LDREX(x1, wback);
emit_or32c(dyn, ninst, x1, i32, x3, x12);
STREX(x3, x1, wback);
CMPS_IMM8(x3, 0);
B_MARKLOCK(cNE);
}
break;
case 2: //ADC
if(opcode==0x81) {INST_NAME("LOCK ADC Ed, Id");} else {INST_NAME("LOCK ADC Ed, Ib");}
READFLAGS(X_CF);
SETFLAGS(X_ALL, SF_SET);
if((nextop&0xC0)==0xC0) {
if(opcode==0x81) i32 = F32S; else i32 = F8S;
ed = xEAX+(nextop&7);
emit_adc32c(dyn, ninst, ed, i32, x3, x12);
} else {
addr = geted(dyn, addr, ninst, nextop, &wback, x2, &fixedaddress, 0, 0);
if(opcode==0x81) i32 = F32S; else i32 = F8S;
MARKLOCK;
LDREX(x1, wback);
emit_adc32c(dyn, ninst, x1, i32, x3, x12);
STREX(x3, x1, wback);
CMPS_IMM8(x3, 0);
B_MARKLOCK(cNE);
}
break;
case 3: //SBB
if(opcode==0x81) {INST_NAME("LOCK SBB Ed, Id");} else {INST_NAME("LOCK SBB Ed, Ib");}
READFLAGS(X_CF);
SETFLAGS(X_ALL, SF_SET);
if((nextop&0xC0)==0xC0) {
if(opcode==0x81) i32 = F32S; else i32 = F8S;
ed = xEAX+(nextop&7);
emit_sbb32c(dyn, ninst, ed, i32, x3, x12);
} else {
addr = geted(dyn, addr, ninst, nextop, &wback, x2, &fixedaddress, 0, 0);
if(opcode==0x81) i32 = F32S; else i32 = F8S;
MARKLOCK;
LDREX(x1, wback);
emit_sbb32c(dyn, ninst, x1, i32, x3, x12);
STREX(x3, x1, wback);
CMPS_IMM8(x3, 0);
B_MARKLOCK(cNE);
}
break;
case 4: //AND
if(opcode==0x81) {INST_NAME("LOCK AND Ed, Id");} else {INST_NAME("LOCK AND Ed, Ib");}
SETFLAGS(X_ALL, SF_SET);
if((nextop&0xC0)==0xC0) {
if(opcode==0x81) i32 = F32S; else i32 = F8S;
ed = xEAX+(nextop&7);
emit_and32c(dyn, ninst, ed, i32, x3, x12);
} else {
addr = geted(dyn, addr, ninst, nextop, &wback, x2, &fixedaddress, 0, 0);
if(opcode==0x81) i32 = F32S; else i32 = F8S;
MARKLOCK;
LDREX(x1, wback);
emit_and32c(dyn, ninst, x1, i32, x3, x12);
STREX(x3, x1, wback);
CMPS_IMM8(x3, 0);
B_MARKLOCK(cNE);
}
break;
case 5: //SUB
if(opcode==0x81) {INST_NAME("LOCK SUB Ed, Id");} else {INST_NAME("LOCK SUB Ed, Ib");}
SETFLAGS(X_ALL, SF_SET);
if((nextop&0xC0)==0xC0) {
if(opcode==0x81) i32 = F32S; else i32 = F8S;
ed = xEAX+(nextop&7);
emit_sub32c(dyn, ninst, ed, i32, x3, x12);
} else {
addr = geted(dyn, addr, ninst, nextop, &wback, x2, &fixedaddress, 0, 0);
if(opcode==0x81) i32 = F32S; else i32 = F8S;
MARKLOCK;
LDREX(x1, wback);
emit_sub32c(dyn, ninst, x1, i32, x3, x12);
STREX(x3, x1, wback);
CMPS_IMM8(x3, 0);
B_MARKLOCK(cNE);
}
break;
case 6: //XOR
if(opcode==0x81) {INST_NAME("LOCK XOR Ed, Id");} else {INST_NAME("LOCK XOR Ed, Ib");}
SETFLAGS(X_ALL, SF_SET);
if((nextop&0xC0)==0xC0) {
if(opcode==0x81) i32 = F32S; else i32 = F8S;
ed = xEAX+(nextop&7);
emit_xor32c(dyn, ninst, ed, i32, x3, x12);
} else {
addr = geted(dyn, addr, ninst, nextop, &wback, x2, &fixedaddress, 0, 0);
if(opcode==0x81) i32 = F32S; else i32 = F8S;
MARKLOCK;
LDREX(x1, wback);
emit_xor32c(dyn, ninst, x1, i32, x3, x12);
STREX(x3, x1, wback);
CMPS_IMM8(x3, 0);
B_MARKLOCK(cNE);
}
break;
case 7: //CMP
if(opcode==0x81) {INST_NAME("(LOCK) CMP Ed, Id");} else {INST_NAME("(LOCK) CMP Ed, Ib");}
SETFLAGS(X_ALL, SF_SET);
GETEDH(x1);
// No need to LOCK, this is readonly
if(opcode==0x81) i32 = F32S; else i32 = F8S;
if(i32) {
MOV32(x2, i32);
emit_cmp32(dyn, ninst, ed, x2, x3, x12);
} else {
emit_cmp32_0(dyn, ninst, ed, x3, x12);
}
break;
}
break;
// generic case, no lock needed, the value is only read (note that on x86 locked read is always followed wy a locked write)
#define GO(A) \
case A+0x00: \
case A+0x01: \
case A+0x02: \
case A+0x03: \
case A+0x04: \
@ -43,62 +482,346 @@ uintptr_t dynarecF0(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst,
GO(0x28);
GO(0x30);
#undef GO
case 0x81:
case 0x83:
MESSAGE(LOG_DUMP, "LOCK\n");
locked = 1;
LOCK;
addr = dynarec00(dyn, addr, ip, ninst, ok, need_epilog);
break;
case 0x86: // for this two, the lock is already done by the opcode, so just ignoring it
case 0x87:
addr = dynarec00(dyn, addr, ip, ninst, ok, need_epilog);
--addr;
break;
case 0x0F:
nextop = PK(1);
nextop = F8;
switch(nextop) {
case 0xB0:
INST_NAME("LOCK CMPXCHG Eb, Gb");
SETFLAGS(X_ALL, SF_SET);
nextop = F8;
if((nextop&0xC0)==0xC0) {
wback = (nextop&7);
wb2 = (wback>>2);
wback = xEAX+(wback&3);
UXTB(x2, wback, wb2);
ed = x2;
wb1 = 0;
} else {
addr = geted(dyn, addr, ninst, nextop, &wback, x3, &fixedaddress, 0, 0);
MARKLOCK;
LDREXB(x2, wback);
ed = x2;
wb1 = 1;
}
UXTB(x1, xEAX, 0);
CMPS_REG_LSL_IMM5(x1, ed, 0);
B_MARK(cNE);
// AL == Eb
GETGB(x1);
if(wb1) {
STREXB(x12, x1, wback);
CMPS_IMM8(x12, 0);
B_MARKLOCK(cNE); // write failed, try again
} else {
BFI(wback, x1, wb2*8, 8);
}
// done
B_MARK3(c__);
MARK;
// AL != Eb
BFI(xEAX, ed, 0, 8);
MARK3;
// done, do the cmp now
emit_cmp8(dyn, ninst, x1, x2, x3, x12);
break;
case 0xB1:
INST_NAME("LOCK CMPXCHG Ed, Gd");
SETFLAGS(X_ALL, SF_SET);
nextop = F8;
GETGD;
if((nextop&0xC0)==0xC0) {
ed = xEAX+(nextop&7);
wback = 0;
} else {
addr = geted(dyn, addr, ninst, nextop, &wback, x2, &fixedaddress, 0, 0);
MARKLOCK;
LDREX(x1, wback);
ed = x1;
}
CMPS_REG_LSL_IMM5(xEAX, ed, 0);
B_MARK(cNE);
// EAX == Ed
MOV_REG(x3, ed);
MOV_REG(ed, gd);
if(wback) {
STREX(x12, ed, wback);
CMPS_IMM8(x12, 0);
B_MARKLOCK(cNE);
}
emit_cmp32(dyn, ninst, xEAX, x3, x1, x12);
// done
B_MARK3(c__); // not next, in case its called with a LOCK prefix
MARK;
// EAX != Ed
emit_cmp32(dyn, ninst, xEAX, ed, x3, x12);
MOV_REG(xEAX, ed);
MARK3
break;
case 0xB3:
INST_NAME("LOCK BTR Ed, Gd");
SETFLAGS(X_CF, SF_SET);
nextop = F8;
GETGD;
if((nextop&0xC0)==0xC0) {
ed = xEAX+(nextop&7);
wback = 0;
} else {
addr = geted(dyn, addr, ninst, nextop, &wback, x3, &fixedaddress, 0, 0);
UBFX(x1, gd, 5, 3); // r1 = (gd>>5);
ADD_REG_LSL_IMM5(x3, wback, x1, 2); //(&ed)+=r1*4;
wback = x3;
MARKLOCK;
LDREX(x1, wback);
ed = x1;
}
AND_IMM8(x2, gd, 0x1f);
MOV_REG_LSR_REG(x12, ed, x2);
ANDS_IMM8(x12, x12, 1);
STR_IMM9(x12, xEmu, offsetof(x86emu_t, flags[F_CF]));
B_MARK3(cEQ); // bit already clear, jump to end of instruction
MOVW(x12, 1);
XOR_REG_LSL_REG(ed, ed, x12, x2);
if(wback) {
STREX(x12, ed, wback);
CMPS_IMM8(x12, 0);
B_MARKLOCK(cNE);
}
MARK3;
break;
case 0xBA:
nextop = F8;
switch((nextop>>3)&7) {
case 4:
INST_NAME("(LOCK) BT Ed, Ib");
SETFLAGS(X_CF, SF_SUBSET);
gd = x2;
if((nextop&0xC0)==0xC0) {
ed = xEAX+(nextop&7);
u8 = F8;
} else {
addr = geted(dyn, addr, ninst, nextop, &ed, x3, &fixedaddress, 4095-32, 0);
u8 = F8;
fixedaddress+=(u8>>5)*4;
LDR_IMM9(x1, ed, fixedaddress);
ed = x1;
}
u8&=0x1f;
if(u8) {
MOV_REG_LSR_IMM5(x1, ed, u8);
ed = x1;
}
AND_IMM8(x1, ed, 1);
STR_IMM9(x1, xEmu, offsetof(x86emu_t, flags[F_CF]));
break;
case 6:
INST_NAME("(LOCK) BTR Ed, Ib");
SETFLAGS(X_CF, SF_SUBSET);
gd = x2;
if((nextop&0xC0)==0xC0) {
ed = xEAX+(nextop&7);
u8 = F8;
MOVW(gd, u8);
wback = 0;
} else {
addr = geted(dyn, addr, ninst, nextop, &ed, x3, &fixedaddress, 0, 0);
u8 = F8;
MOVW(gd, u8);
UBFX(x1, gd, 5, 3); // r1 = (gd>>5);
ADD_REG_LSL_IMM5(x3, ed, x1, 2); //(&ed)+=r1*4;
MARKLOCK;
LDREX(x1, x3);
ed = x1;
wback = x3;
}
AND_IMM8(x2, gd, 0x1f);
MOV_REG_LSR_REG(x12, ed, x2);
ANDS_IMM8(x12, x12, 1);
STR_IMM9(x12, xEmu, offsetof(x86emu_t, flags[F_CF]));
B_MARK3(cEQ); // bit already clear, jump to next instruction
//MOVW(x12, 1); // already 0x01
XOR_REG_LSL_REG(ed, ed, x12, x2);
if(wback) {
STREX(x12, ed, wback);
CMPS_IMM8(x12, 0);
B_MARKLOCK(cNE);
}
MARK3;
break;
default:
DEFAULT;
}
break;
case 0xBB:
INST_NAME("LOCK BTC Ed, Gd");
SETFLAGS(X_CF, SF_SET);
nextop = F8;
GETGD;
if((nextop&0xC0)==0xC0) {
ed = xEAX+(nextop&7);
wback = 0;
} else {
addr = geted(dyn, addr, ninst, nextop, &wback, x3, &fixedaddress, 0, 0);
UBFX(x1, gd, 5, 3); // r1 = (gd>>5);
ADD_REG_LSL_IMM5(x3, wback, x1, 2); //(&ed)+=r1*4;
MARKLOCK;
LDREX(x1, x3);
ed = x1;
wback = x3;
}
AND_IMM8(x2, gd, 0x1f);
MOV_REG_LSR_REG(x12, ed, x2);
AND_IMM8(x12, x12, 1);
STR_IMM9(x12, xEmu, offsetof(x86emu_t, flags[F_CF]));
MOVW(x12, 1);
XOR_REG_LSL_REG(ed, ed, x12, x2);
if(wback) {
STREX(x12, ed, wback);
CMPS_IMM8(x12, 0);
B_MARKLOCK(cNE);
}
break;
case 0xC0:
INST_NAME("LOCK XADD Gb, Eb");
SETFLAGS(X_ALL, SF_SET);
nextop = F8;
GETGB(x1);
if((nextop&0xC0)==0xC0) {
wback = (nextop&7);
wb2 = (wback>>2);
wback = xEAX+(wback&3);
UXTB(x2, wback, wb2);
wb1 = 0;
ed = x2;
} else {
addr = geted(dyn, addr, ninst, nextop, &wback, x3, &fixedaddress, 0, 0);
MARKLOCK;
LDREXB(x2, wback);
wb1 = 1;
ed = x2;
}
BFI(gb1, ed, gb2*8, 8); // gb <- eb
emit_add8(dyn, ninst, ed, gd, x12, x3, 1);
ADD_REG_LSL_IMM5(x12, ed, gd, 0);
if(wb1) {
STREXB(x1, ed, wback);
CMPS_IMM8(x1, 0);
B_MARKLOCK(cNE);
} else {
BFI(wback, ed, wb2*8, 8);
}
break;
case 0xC1:
INST_NAME("LOCK XADD Gd, Ed");
SETFLAGS(X_ALL, SF_SET);
nextop = F8;
GETGD;
if((nextop&0xC0)==0xC0) {
ed = xEAX+(nextop&7);
if(gd!=ed) {
XOR_REG_LSL_IMM5(gd, gd, ed, 0); // swap gd, ed
XOR_REG_LSL_IMM5(ed, gd, ed, 0);
XOR_REG_LSL_IMM5(gd, gd, ed, 0);
}
emit_add32(dyn, ninst, ed, gd, x3, x12);
} else {
addr = geted(dyn, addr, ninst, nextop, &wback, x2, &fixedaddress, 0, 0);
MARKLOCK;
LDREX(x1, wback);
PUSH(xSP, 1<<x1);
emit_add32(dyn, ninst, x1, gd, x3, x12);
STREX(x3, x1, wback);
CMPS_IMM8(x3, 0);
POP(xSP, 1<<x1);
B_MARKLOCK(cNE);
MOV_REG(gd, x1);
}
break;
case 0xC7:
MESSAGE(LOG_DUMP, "LOCK\n");
locked = 1;
LOCK;
addr = dynarec0F(dyn, addr+1, ip, ninst, ok, need_epilog);
INST_NAME("LOCK CMPXCHG8B Gq, Eq");
SETFLAGS(X_ZF, SF_SET);
nextop = F8;
addr = geted(dyn, addr, ninst, nextop, &wback, x1, &fixedaddress, 0, 0);
MARKLOCK;
LDREXD(x2, wback);
CMPS_REG_LSL_IMM5(xEAX, x2, 0);
B_MARK(cNE); // EAX != Ed[0]
CMPS_REG_LSL_IMM5(xEDX, x3, 0);
B_MARK(cNE); // EDX != Ed[1]
MOV_REG(x2, xEBX);
MOV_REG(x3, xECX);
STREXD(x12, x2, wback);
CMPS_IMM8(x12, 0);
B_MARKLOCK(cNE);
MOVW(x1, 1);
B_MARK3(c__);
MARK;
MOV_REG(xEAX, x2);
MOV_REG(xEDX, x3);
MOVW(x1, 0);
MARK3;
STR_IMM9(x1, xEmu, offsetof(x86emu_t, flags[F_ZF]));
break;
default:
addr = dynarec0F(dyn, addr+1, ip, ninst, ok, need_epilog); // no lock, regular instruction...
// dafault to NO LOCK
addr-=2;
}
break;
case 0xFF:
nextop = PK(1);
nextop = F8;
switch((nextop>>3)&7)
{
case 0:
case 1:
MESSAGE(LOG_DUMP, "LOCK\n");
locked = 1;
LOCK;
addr = dynarec00(dyn, addr, ip, ninst, ok, need_epilog);
case 0: // INC Ed
INST_NAME("INC Ed");
SETFLAGS(X_ALL&~X_CF, SF_SUBSET);
if((nextop&0xC0)==0xC0) {
ed = xEAX+(nextop&7);
emit_inc32(dyn, ninst, ed, x3, x12);
} else {
addr = geted(dyn, addr, ninst, nextop, &wback, x2, &fixedaddress, 0, 0);
MARKLOCK;
LDREX(x1, wback);
emit_inc32(dyn, ninst, x1, x3, x12);
STREX(x3, x1, wback);
CMPS_IMM8(x3, 0);
B_MARKLOCK(cNE);
}
break;
case 1: //DEC Ed
INST_NAME("DEC Ed");
SETFLAGS(X_ALL&~X_CF, SF_SUBSET);
if((nextop&0xC0)==0xC0) {
ed = xEAX+(nextop&7);
emit_dec32(dyn, ninst, ed, x3, x12);
} else {
addr = geted(dyn, addr, ninst, nextop, &wback, x2, &fixedaddress, 0, 0);
MARKLOCK;
LDREX(x1, wback);
emit_dec32(dyn, ninst, x1, x3, x12);
STREX(x3, x1, wback);
CMPS_IMM8(x3, 0);
B_MARKLOCK(cNE);
}
break;
default:
addr = dynarec00(dyn, addr, ip, ninst, ok, need_epilog); // no lock, regular instruction...
// dafault to NO LOCK
addr-=2;
}
break;
default:
addr = dynarec00(dyn, addr, ip, ninst, ok, need_epilog); // no lock, regular instruction...
// dafault to NO LOCK
--addr;
}
if(locked) {UNLOCK;}
return addr;
}

View File

@ -293,12 +293,12 @@ void jump_to_linker(dynarec_arm_t* dyn, uintptr_t ip, int reg, int ninst)
// TODO: This is not thread safe.
if(!ip) { // no IP, jump address in a reg, so need smart linker
MARK;
LDREXD(x1, x2); // load dest address in x2 and planned ip in x3
LDREXD(x2, x1); // load dest address in x2 and planned ip in x3
CMPS_REG_LSL_IMM5(xEIP, x3, 0);
BXcond(cEQ, x2);
MOV32_(x2, (uintptr_t)arm_linker);
MOV_REG(x3, x12);
STREXD(x12, x1, x2); // nope, putting back linker & IP in place
STREXD(x12, x2, x1); // nope, putting back linker & IP in place
// x12 now contain success / falure for write
CMPS_IMM8(x12, 1);
MOV_REG(x12, x3); // put back IP in place...
@ -338,12 +338,12 @@ void ret_to_epilog(dynarec_arm_t* dyn, int ninst)
dyn->tablei+=4; // smart linker
MOV32_(x1, (uintptr_t)table);
MARK;
LDREXD(x1, x2); // load dest address in x2 and planned ip in x3
LDREXD(x2, x1); // load dest address in x2 and planned ip in x3
CMPS_REG_LSL_IMM5(xEIP, x3, 0);
BXcond(cEQ, x2);
MOV32_(x2, (uintptr_t)arm_linker);
MOV_REG(x3, x12);
STREXD(x12, x1, x2); // nope, putting back linker & IP in place
STREXD(x12, x2, x1); // nope, putting back linker & IP in place
// x12 now contain success / falure for write
CMPS_IMM8(x12, 1);
MOV_REG(x12, x3); // put back IP in place...
@ -386,12 +386,12 @@ void retn_to_epilog(dynarec_arm_t* dyn, int ninst, int n)
dyn->tablei+=4; // smart linker
MOV32_(x1, (uintptr_t)table);
MARK;
LDREXD(x1, x2); // load dest address in x2 and planned ip in x3
LDREXD(x2, x1); // load dest address in x2 and planned ip in x3
CMPS_REG_LSL_IMM5(xEIP, x3, 0);
BXcond(cEQ, x2);
MOV32_(x2, (uintptr_t)arm_linker);
MOV_REG(x3, x12);
STREXD(x12, x1, x2); // nope, putting back linker & IP in place
STREXD(x12, x2, x1); // nope, putting back linker & IP in place
// x12 now contain success / falure for write
CMPS_IMM8(x12, 1);
MOV_REG(x12, x3); // put back IP in place...
@ -491,28 +491,6 @@ int isNativeCall(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t* calladdress, int
return 0;
}
// emit "lock", x1, x2 and x3 are lost
void emit_lock(dynarec_arm_t* dyn, uintptr_t addr, int ninst)
{
PUSH(xSP, (1<<xEmu)); // save Emu
LDR_IMM9(xEmu, xEmu, offsetof(x86emu_t, context));
MOV32(x1, offsetof(box86context_t, mutex_lock)); // offset is way to big for imm8
ADD_REG_LSL_IMM5(xEmu, xEmu, x1, 0);
CALL(pthread_mutex_lock, -2, 0);
POP(xSP, (1<<xEmu));
}
// emit "unlock", x1, x2 and x3 are lost
void emit_unlock(dynarec_arm_t* dyn, uintptr_t addr, int ninst)
{
PUSH(xSP, (1<<xEmu)); // save Emu
LDR_IMM9(xEmu, xEmu, offsetof(x86emu_t, context));
MOV32(x1, offsetof(box86context_t, mutex_lock)); // offset is way to big for imm8
ADD_REG_LSL_IMM5(xEmu, xEmu, x1, 0);
CALL(pthread_mutex_unlock, -2, 0);
POP(xSP, (1<<xEmu));
}
// x87 stuffs
static void x87_reset(dynarec_arm_t* dyn, int ninst)
{

View File

@ -186,6 +186,8 @@
#define GETMARKF ((dyn->insts)?dyn->insts[ninst].markf:(dyn->arm_size+4))
#define MARKSEG if(dyn->insts) {dyn->insts[ninst].markseg = (uintptr_t)dyn->arm_size;}
#define GETMARKSEG ((dyn->insts)?dyn->insts[ninst].markseg:(dyn->arm_size+4))
#define MARKLOCK if(dyn->insts) {dyn->insts[ninst].marklock = (uintptr_t)dyn->arm_size;}
#define GETMARKLOCK ((dyn->insts)?dyn->insts[ninst].marklock:(dyn->arm_size+4))
// Branch to MARK if cond (use i32)
#define B_MARK(cond) \
@ -207,6 +209,10 @@
#define B_MARKSEG(cond) \
i32 = GETMARKSEG-(dyn->arm_size+8); \
Bcond(cond, i32)
// Branch to MARKLOCK if cond (use i32)
#define B_MARKLOCK(cond) \
i32 = GETMARKLOCK-(dyn->arm_size+8); \
Bcond(cond, i32)
#define IFX(A) if(dyn->insts && (dyn->insts[ninst].x86.need_flags&(A)))
#define IFXX(A) if(dyn->insts && (dyn->insts[ninst].x86.need_flags==(A)))
@ -301,12 +307,6 @@
#define NEW_BARRIER_INST
#endif
// Emit the LOCK mutex (x1, x2 and x3 are lost)
#define LOCK emit_lock(dyn, addr, ninst)
// Emit the UNLOCK mutex (x1, x2 and x3 are lost)
#define UNLOCK emit_unlock(dyn, addr, ninst)
void arm_epilog();
void* arm_linker(x86emu_t* emu, void** table, uintptr_t addr);
@ -349,8 +349,6 @@ void* arm_linker(x86emu_t* emu, void** table, uintptr_t addr);
#define grab_fsdata STEPNAME(grab_fsdata_)
#define grab_tlsdata STEPNAME(grab_tlsdata_)
#define isNativeCall STEPNAME(isNativeCall_)
#define emit_lock STEPNAME(emit_lock)
#define emit_unlock STEPNAME(emit_unlock)
#define emit_cmp8 STEPNAME(emit_cmp8)
#define emit_cmp16 STEPNAME(emit_cmp16)
#define emit_cmp32 STEPNAME(emit_cmp32)

View File

@ -13,6 +13,7 @@ typedef struct instruction_arm_s {
uintptr_t mark, mark2, mark3;
uintptr_t markf;
uintptr_t markseg;
uintptr_t marklock;
} instruction_arm_t;
typedef struct dynarec_arm_s {

View File

@ -19,6 +19,9 @@
#include "x87emu_private.h"
#include "box86context.h"
#include "my_cpuid.h"
#ifdef DYNAREC
#include "../dynarec/arm_lock_helper.h"
#endif
int my_setcontext(x86emu_t* emu, void* ucp);
@ -629,6 +632,20 @@ _trace:
NEXT;
_0x86: /* XCHG Eb,Gb */
nextop = F8;
#ifdef DYNAREC
GET_EB;
if((nextop&0xC0)==0xC0) { // reg / reg: no lock
tmp8u = GB;
GB = EB->byte[0];
EB->byte[0] = tmp8u;
} else {
do {
tmp8u = arm_lock_read_b(EB);
} while(arm_lock_write_b(EB, GB));
GB = tmp8u;
}
// dynarec use need it's own mecanism
#else
GET_EB;
if((nextop&0xC0)!=0xC0)
pthread_mutex_lock(&emu->context->mutex_lock); // XCHG always LOCK (but when accessing memory only)
@ -637,9 +654,31 @@ _trace:
EB->byte[0] = tmp8u;
if((nextop&0xC0)!=0xC0)
pthread_mutex_unlock(&emu->context->mutex_lock);
#endif
NEXT;
_0x87: /* XCHG Ed,Gd */
nextop = F8;
#ifdef DYNAREC
GET_ED;
if((nextop&0xC0)==0xC0) {
tmp32u = GD.dword[0];
GD.dword[0] = ED->dword[0];
ED->dword[0] = tmp32u;
} else {
if(((uintptr_t)ED)&3)
{
// not aligned, dont't try to "LOCK"
tmp32u = ED->dword[0];
ED->dword[0] = GD.dword[0];
} else {
// XCHG is supposed to automaticaly LOCK memory bus
do {
tmp32u = arm_lock_read_d(ED);
} while(arm_lock_write_d(ED, GD.dword[0]));
}
GD.dword[0] = tmp32u;
}
#else
GET_ED;
if((nextop&0xC0)!=0xC0)
pthread_mutex_lock(&emu->context->mutex_lock); // XCHG always LOCK (but when accessing memory only)
@ -648,6 +687,7 @@ _trace:
ED->dword[0] = tmp32u;
if((nextop&0xC0)!=0xC0)
pthread_mutex_unlock(&emu->context->mutex_lock);
#endif
NEXT;
_0x88: /* MOV Eb,Gb */
nextop = F8;

View File

@ -11,6 +11,9 @@
#include "x86primop.h"
#include "x86trace.h"
#include "box86context.h"
#ifdef DYNAREC
#include "../dynarec/arm_lock_helper.h"
#endif
#define F8 *(uint8_t*)(ip++)
@ -162,9 +165,47 @@ void RunLock(x86emu_t *emu)
uint8_t nextop;
reg32_t *oped;
uint8_t tmp8u;
#ifdef DYNAREC
uint8_t tmp8u2;
#endif
uint32_t tmp32u, tmp32u2;
int32_t tmp32s;
switch(opcode) {
#ifdef DYNAREC
#define GO(B, OP) \
case B+0: \
nextop = F8; \
GET_EB; \
do { \
tmp8u = arm_lock_read_b(EB); \
tmp8u = OP##8(emu, tmp8u, GB); \
} while (arm_lock_write_b(EB, tmp8u)); \
break; \
case B+1: \
nextop = F8; \
GET_ED; \
do { \
tmp32u = arm_lock_read_d(ED); \
tmp32u = OP##32(emu, tmp32u, GD.dword[0]); \
} while (arm_lock_write_d(ED, tmp32u)); \
break; \
case B+2: \
nextop = F8; \
GET_EB; \
GB = OP##8(emu, GB, EB->byte[0]); \
break; \
case B+3: \
nextop = F8; \
GET_ED; \
GD.dword[0] = OP##32(emu, GD.dword[0], ED->dword[0]); \
break; \
case B+4: \
R_AL = OP##8(emu, R_AL, F8); \
break; \
case B+5: \
R_EAX = OP##32(emu, R_EAX, F32); \
break;
#else
#define GO(B, OP) \
case B+0: \
nextop = F8; \
@ -204,7 +245,7 @@ void RunLock(x86emu_t *emu)
R_EAX = OP##32(emu, R_EAX, F32); \
pthread_mutex_unlock(&emu->context->mutex_lock);\
break;
#endif
GO(0x00, add) /* ADD 0x00 -> 0x05 */
GO(0x08, or) /* OR 0x08 -> 0x0D */
GO(0x10, adc) /* ADC 0x10 -> 0x15 */
@ -221,6 +262,18 @@ void RunLock(x86emu_t *emu)
CHECK_FLAGS(emu);
nextop = F8;
GET_EB;
#ifdef DYNAREC
do {
tmp8u = arm_lock_read_b(EB);
cmp8(emu, R_AL, tmp8u);
if(ACCESS_FLAG(F_ZF)) {
tmp32s = arm_lock_write_b(EB, GB);
} else {
R_AL = tmp8u;
tmp32s = 0;
}
} while(tmp32s);
#else
pthread_mutex_lock(&emu->context->mutex_lock);
cmp8(emu, R_AL, EB->byte[0]);
if(ACCESS_FLAG(F_ZF)) {
@ -229,10 +282,23 @@ void RunLock(x86emu_t *emu)
R_AL = EB->byte[0];
}
pthread_mutex_unlock(&emu->context->mutex_lock);
#endif
break;
case 0xB1: /* CMPXCHG Ed,Gd */
nextop = F8;
GET_ED;
#ifdef DYNAREC
do {
tmp32u = arm_lock_read_d(ED);
cmp32(emu, R_EAX, tmp32u);
if(ACCESS_FLAG(F_ZF)) {
tmp32s = arm_lock_write_d(ED, GD.dword[0]);
} else {
R_EAX = tmp32u;
tmp32s = 0;
}
} while(tmp32s);
#else
pthread_mutex_lock(&emu->context->mutex_lock);
cmp32(emu, R_EAX, ED->dword[0]);
if(ACCESS_FLAG(F_ZF)) {
@ -241,6 +307,7 @@ void RunLock(x86emu_t *emu)
R_EAX = ED->dword[0];
}
pthread_mutex_unlock(&emu->context->mutex_lock);
#endif
break;
case 0xB3: /* BTR Ed,Gd */
CHECK_FLAGS(emu);
@ -252,6 +319,19 @@ void RunLock(x86emu_t *emu)
ED=(reg32_t*)(((uint32_t*)(ED))+(tmp8u>>5));
}
tmp8u&=31;
#ifdef DYNAREC
do {
tmp32u = arm_lock_read_d(ED);
if(tmp32u & (1<<tmp8u)) {
SET_FLAG(F_CF);
tmp32u ^= (1<<tmp8u);
tmp32s = arm_lock_write_d(ED, tmp32u);
} else {
CLEAR_FLAG(F_CF);
tmp32s = 0;
}
} while(tmp32s);
#else
pthread_mutex_lock(&emu->context->mutex_lock);
if(ED->dword[0] & (1<<tmp8u)) {
SET_FLAG(F_CF);
@ -259,6 +339,7 @@ void RunLock(x86emu_t *emu)
} else
CLEAR_FLAG(F_CF);
pthread_mutex_unlock(&emu->context->mutex_lock);
#endif
break;
case 0xBA:
nextop = F8;
@ -272,12 +353,19 @@ void RunLock(x86emu_t *emu)
ED=(reg32_t*)(((uint32_t*)(ED))+(tmp8u>>5));
}
tmp8u&=31;
#ifdef DYNAREC
if(arm_lock_read_d(ED) & (1<<tmp8u))
SET_FLAG(F_CF);
else
CLEAR_FLAG(F_CF);
#else
pthread_mutex_lock(&emu->context->mutex_lock);
if(ED->dword[0] & (1<<tmp8u))
SET_FLAG(F_CF);
else
CLEAR_FLAG(F_CF);
pthread_mutex_unlock(&emu->context->mutex_lock);
#endif
break;
case 6: /* BTR Ed, Ib */
CHECK_FLAGS(emu);
@ -288,6 +376,19 @@ void RunLock(x86emu_t *emu)
ED=(reg32_t*)(((uint32_t*)(ED))+(tmp8u>>5));
}
tmp8u&=31;
#ifdef DYNAREC
do {
tmp32u = arm_lock_read_d(ED);
if(tmp32u & (1<<tmp8u)) {
SET_FLAG(F_CF);
tmp32u ^= (1<<tmp8u);
tmp32s = arm_lock_write_d(ED, tmp32u);
} else {
CLEAR_FLAG(F_CF);
tmp32s = 0;
}
} while(tmp32s);
#else
pthread_mutex_lock(&emu->context->mutex_lock);
if(ED->dword[0] & (1<<tmp8u)) {
SET_FLAG(F_CF);
@ -295,6 +396,7 @@ void RunLock(x86emu_t *emu)
} else
CLEAR_FLAG(F_CF);
pthread_mutex_unlock(&emu->context->mutex_lock);
#endif
break;
default:
@ -312,6 +414,17 @@ void RunLock(x86emu_t *emu)
ED=(reg32_t*)(((uint32_t*)(ED))+(tmp8u>>5));
}
tmp8u&=31;
#ifdef DYNAREC
do {
tmp32u = arm_lock_read_d(ED);
if(tmp32u & (1<<tmp8u)) {
SET_FLAG(F_CF);
} else {
CLEAR_FLAG(F_CF);
}
tmp32u ^= (1<<tmp8u);
} while(arm_lock_write_d(ED, tmp32u));
#else
pthread_mutex_lock(&emu->context->mutex_lock);
if(ED->dword[0] & (1<<tmp8u))
SET_FLAG(F_CF);
@ -319,29 +432,60 @@ void RunLock(x86emu_t *emu)
CLEAR_FLAG(F_CF);
ED->dword[0] ^= (1<<tmp8u);
pthread_mutex_unlock(&emu->context->mutex_lock);
#endif
break;
case 0xC0: /* XADD Gb,Eb */
nextop = F8;
GET_EB;
#ifdef DYNAREC
do {
tmp8u = arm_lock_read_b(EB);
tmp8u2 = add8(emu, tmp8u, GB);
} while (arm_lock_write_b(EB, tmp8u2));
GB = tmp8u;
#else
pthread_mutex_lock(&emu->context->mutex_lock);
tmp8u = add8(emu, EB->byte[0], GB);
GB = EB->byte[0];
EB->byte[0] = tmp8u;
pthread_mutex_unlock(&emu->context->mutex_lock);
#endif
break;
case 0xC1: /* XADD Gd,Ed */
nextop = F8;
GET_ED;
#ifdef DYNAREC
do {
tmp32u = arm_lock_read_d(ED);
tmp32u2 = add32(emu, tmp32u, GD.dword[0]);
} while(arm_lock_write_d(ED, tmp32u2));
GD.dword[0] = tmp32u;
#else
pthread_mutex_lock(&emu->context->mutex_lock);
tmp32u = add32(emu, ED->dword[0], GD.dword[0]);
GD.dword[0] = ED->dword[0];
ED->dword[0] = tmp32u;
pthread_mutex_unlock(&emu->context->mutex_lock);
#endif
break;
case 0xC7: /* CMPXCHG8B Gq */
CHECK_FLAGS(emu);
nextop = F8;
GET_ED;
#ifdef DYNAREC
do {
arm_lock_read_dd(&tmp32u, &tmp32u2, ED);
if(R_EAX == tmp32u && R_EDX == tmp32u2) {
SET_FLAG(F_ZF);
tmp32s = arm_lock_write_dd(R_EBX, R_ECX, ED);
} else {
CLEAR_FLAG(F_ZF);
R_EAX = tmp32u;
R_EDX = tmp32u2;
tmp32s = 0;
}
} while(tmp32s);
#else
pthread_mutex_lock(&emu->context->mutex_lock);
tmp32u = ED->dword[0];
tmp32u2= ED->dword[1];
@ -355,6 +499,7 @@ void RunLock(x86emu_t *emu)
R_EDX = tmp32u2;
}
pthread_mutex_unlock(&emu->context->mutex_lock);
#endif
break;
default:
// trigger invalid lock?
@ -367,12 +512,24 @@ void RunLock(x86emu_t *emu)
case 0x83: /* GRP Ed,Ib */
nextop = F8;
GET_ED;
pthread_mutex_lock(&emu->context->mutex_lock);
if(opcode==0x83) {
tmp32s = F8S;
tmp32u = (uint32_t)tmp32s;
} else
tmp32u = F32;
#ifdef DYNAREC
switch((nextop>>3)&7) {
case 0: do { tmp32u2 = arm_lock_read_d(ED);} while(arm_lock_write_d(ED, add32(emu, tmp32u2, tmp32u))); break;
case 1: do { tmp32u2 = arm_lock_read_d(ED);} while(arm_lock_write_d(ED, or32(emu, tmp32u2, tmp32u))); break;
case 2: do { tmp32u2 = arm_lock_read_d(ED);} while(arm_lock_write_d(ED, adc32(emu, tmp32u2, tmp32u))); break;
case 3: do { tmp32u2 = arm_lock_read_d(ED);} while(arm_lock_write_d(ED, sbb32(emu, tmp32u2, tmp32u))); break;
case 4: do { tmp32u2 = arm_lock_read_d(ED);} while(arm_lock_write_d(ED, and32(emu, tmp32u2, tmp32u))); break;
case 5: do { tmp32u2 = arm_lock_read_d(ED);} while(arm_lock_write_d(ED, sub32(emu, tmp32u2, tmp32u))); break;
case 6: do { tmp32u2 = arm_lock_read_d(ED);} while(arm_lock_write_d(ED, xor32(emu, tmp32u2, tmp32u))); break;
case 7: cmp32(emu, ED->dword[0], tmp32u); break;
}
#else
pthread_mutex_lock(&emu->context->mutex_lock);
switch((nextop>>3)&7) {
case 0: ED->dword[0] = add32(emu, ED->dword[0], tmp32u); break;
case 1: ED->dword[0] = or32(emu, ED->dword[0], tmp32u); break;
@ -384,38 +541,37 @@ void RunLock(x86emu_t *emu)
case 7: cmp32(emu, ED->dword[0], tmp32u); break;
}
pthread_mutex_unlock(&emu->context->mutex_lock);
#endif
break;
case 0x86: /* XCHG Eb,Gb */
nextop = F8;
GET_EB;
tmp8u = GB;
pthread_mutex_lock(&emu->context->mutex_lock);
GB = EB->byte[0];
EB->byte[0] = tmp8u;
pthread_mutex_unlock(&emu->context->mutex_lock);
break;
case 0x87: /* XCHG Ed,Gd */
nextop = F8;
GET_ED;
pthread_mutex_lock(&emu->context->mutex_lock);
tmp32u = GD.dword[0];
GD.dword[0] = ED->dword[0];
ED->dword[0] = tmp32u;
pthread_mutex_unlock(&emu->context->mutex_lock);
ip--; // let the normal XCHG execute, it have integrated LOCK
break;
case 0xFF: /* GRP 5 Ed */
nextop = F8;
GET_ED;
switch((nextop>>3)&7) {
case 0: /* INC Ed */
#ifdef DYNAREC
do {
tmp32u = arm_lock_read_d(ED);
} while(arm_lock_write_d(ED, inc32(emu, tmp32u)));
#else
pthread_mutex_lock(&emu->context->mutex_lock);
ED->dword[0] = inc32(emu, ED->dword[0]);
pthread_mutex_unlock(&emu->context->mutex_lock);
#endif
break;
case 1: /* DEC Ed */
#ifdef DYNAREC
do {
tmp32u = arm_lock_read_d(ED);
} while(arm_lock_write_d(ED, dec32(emu, tmp32u)));
#else
pthread_mutex_lock(&emu->context->mutex_lock);
ED->dword[0] = dec32(emu, ED->dword[0]);
pthread_mutex_unlock(&emu->context->mutex_lock);
#endif
break;
default:
printf_log(LOG_NONE, "Illegal Opcode 0xF0 0xFF 0x%02X 0x%02X\n", nextop, PK(0));

View File

@ -117,7 +117,9 @@ typedef struct box86context_s {
pthread_mutex_t mutex_once;
pthread_mutex_t mutex_once2;
pthread_mutex_t mutex_trace;
pthread_mutex_t mutex_lock;
#ifndef DYNAREC
pthread_mutex_t mutex_lock; // dynarec build will use their own mecanism
#endif
pthread_mutex_t mutex_tls;
pthread_mutex_t mutex_thread;