Update bundled pcre2 to 10.42

Closes GH-12109.
This commit is contained in:
Niels Dossche 2023-09-02 20:04:53 +02:00
parent d68073c23b
commit c4e8f652c5
36 changed files with 9915 additions and 3491 deletions

3
NEWS
View File

@ -29,6 +29,9 @@ PHP NEWS
. Fixed persistent procedural ODBC connections not getting closed.
(NattyNarwhal)
- PCRE:
. Update bundled libpcre2 to 10.42. (nielsdos)
- SPL:
. Fixed bug GH-11972 (RecursiveCallbackFilterIterator regression in 8.1.18).
(nielsdos)

View File

@ -42,9 +42,9 @@ POSSIBILITY OF SUCH DAMAGE.
/* The current PCRE version information. */
#define PCRE2_MAJOR 10
#define PCRE2_MINOR 40
#define PCRE2_MINOR 42
#define PCRE2_PRERELEASE
#define PCRE2_DATE 2022-04-14
#define PCRE2_DATE 2022-12-12
/* When an application links to a PCRE DLL in Windows, the symbols that are
imported have to be identified as such. When building PCRE2, the appropriate

View File

@ -1266,8 +1266,10 @@ PCRE2_SIZE* ref_count;
if (code != NULL)
{
#ifdef SUPPORT_JIT
if (code->executable_jit != NULL)
PRIV(jit_free)(code->executable_jit, &code->memctl);
#endif
if ((code->flags & PCRE2_DEREF_TABLES) != 0)
{
@ -2687,7 +2689,7 @@ if ((options & PCRE2_EXTENDED_MORE) != 0) options |= PCRE2_EXTENDED;
while (ptr < ptrend)
{
int prev_expect_cond_assert;
uint32_t min_repeat, max_repeat;
uint32_t min_repeat = 0, max_repeat = 0;
uint32_t set, unset, *optset;
uint32_t terminator;
uint32_t prev_meta_quantifier;
@ -8552,7 +8554,7 @@ do {
op == OP_SCBRA || op == OP_SCBRAPOS)
{
int n = GET2(scode, 1+LINK_SIZE);
int new_map = bracket_map | ((n < 32)? (1u << n) : 1);
unsigned int new_map = bracket_map | ((n < 32)? (1u << n) : 1);
if (!is_startline(scode, new_map, cb, atomcount, inassert)) return FALSE;
}
@ -10620,4 +10622,10 @@ re = NULL;
goto EXIT;
}
/* These #undefs are here to enable unity builds with CMake. */
#undef NLBLOCK /* Block containing newline information */
#undef PSSTART /* Field containing processed string start */
#undef PSEND /* Field containing processed string end */
/* End of pcre2_compile.c */

View File

@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language.
Written by Philip Hazel
Original API code Copyright (c) 1997-2012 University of Cambridge
New API code Copyright (c) 2016-2018 University of Cambridge
New API code Copyright (c) 2016-2022 University of Cambridge
-----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without
@ -443,8 +443,11 @@ mcontext->offset_limit = limit;
return 0;
}
/* This function became obsolete at release 10.30. It is kept as a synonym for
backwards compatibility. */
/* These functions became obsolete at release 10.30. The first is kept as a
synonym for backwards compatibility. The second now does nothing. Exclude both
from coverage reports. */
/* LCOV_EXCL_START */
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
pcre2_set_recursion_limit(pcre2_match_context *mcontext, uint32_t limit)
@ -464,6 +467,9 @@ pcre2_set_recursion_memory_management(pcre2_match_context *mcontext,
return 0;
}
/* LCOV_EXCL_STOP */
/* ------------ Convert context ------------ */
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION

View File

@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language.
Written by Philip Hazel
Original API code Copyright (c) 1997-2012 University of Cambridge
New API code Copyright (c) 2016-2018 University of Cambridge
New API code Copyright (c) 2016-2022 University of Cambridge
-----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without
@ -65,9 +65,8 @@ POSSIBILITY OF SUCH DAMAGE.
#define STR_QUERY_s STR_LEFT_PARENTHESIS STR_QUESTION_MARK STR_s STR_RIGHT_PARENTHESIS
#define STR_STAR_NUL STR_LEFT_PARENTHESIS STR_ASTERISK STR_N STR_U STR_L STR_RIGHT_PARENTHESIS
/* States for range and POSIX processing */
/* States for POSIX processing */
enum { RANGE_NOT_STARTED, RANGE_STARTING, RANGE_STARTED };
enum { POSIX_START_REGEX, POSIX_ANCHORED, POSIX_NOT_BRACKET,
POSIX_CLASS_NOT_STARTED, POSIX_CLASS_STARTING, POSIX_CLASS_STARTED };

View File

@ -350,7 +350,7 @@ Returns: the return from the callout
*/
static int
do_callout(PCRE2_SPTR code, PCRE2_SIZE *offsets, PCRE2_SPTR current_subject,
do_callout_dfa(PCRE2_SPTR code, PCRE2_SIZE *offsets, PCRE2_SPTR current_subject,
PCRE2_SPTR ptr, dfa_match_block *mb, PCRE2_SIZE extracode,
PCRE2_SIZE *lengthptr)
{
@ -2799,7 +2799,7 @@ for (;;)
|| code[LINK_SIZE + 1] == OP_CALLOUT_STR)
{
PCRE2_SIZE callout_length;
rrc = do_callout(code, offsets, current_subject, ptr, mb,
rrc = do_callout_dfa(code, offsets, current_subject, ptr, mb,
1 + LINK_SIZE, &callout_length);
if (rrc < 0) return rrc; /* Abandon */
if (rrc > 0) break; /* Fail this thread */
@ -3196,7 +3196,7 @@ for (;;)
case OP_CALLOUT_STR:
{
PCRE2_SIZE callout_length;
rrc = do_callout(code, offsets, current_subject, ptr, mb, 0,
rrc = do_callout_dfa(code, offsets, current_subject, ptr, mb, 0,
&callout_length);
if (rrc < 0) return rrc; /* Abandon */
if (rrc == 0)
@ -4057,4 +4057,10 @@ while (rws->next != NULL)
return rc;
}
/* These #undefs are here to enable unity builds with CMake. */
#undef NLBLOCK /* Block containing newline information */
#undef PSSTART /* Field containing processed string start */
#undef PSEND /* Field containing processed string end */
/* End of pcre2_dfa_match.c */

View File

@ -220,18 +220,17 @@ not rely on this. */
#define COMPILE_ERROR_BASE 100
/* The initial frames vector for remembering backtracking points in
pcre2_match() is allocated on the system stack, of this size (bytes). The size
must be a multiple of sizeof(PCRE2_SPTR) in all environments, so making it a
multiple of 8 is best. Typical frame sizes are a few hundred bytes (it depends
on the number of capturing parentheses) so 20KiB handles quite a few frames. A
larger vector on the heap is obtained for patterns that need more frames. The
maximum size of this can be limited. */
/* The initial frames vector for remembering pcre2_match() backtracking points
is allocated on the heap, of this size (bytes) or ten times the frame size if
larger, unless the heap limit is smaller. Typical frame sizes are a few hundred
bytes (it depends on the number of capturing parentheses) so 20KiB handles
quite a few frames. A larger vector on the heap is obtained for matches that
need more frames, subject to the heap limit. */
#define START_FRAMES_SIZE 20480
/* Similarly, for DFA matching, an initial internal workspace vector is
allocated on the stack. */
/* For DFA matching, an initial internal workspace vector is allocated on the
stack. The heap is used only if this turns out to be too small. */
#define DFA_START_RWS_SIZE 30720

View File

@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language.
Written by Philip Hazel
Original API code Copyright (c) 1997-2012 University of Cambridge
New API code Copyright (c) 2016-2018 University of Cambridge
New API code Copyright (c) 2016-2022 University of Cambridge
-----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without
@ -649,19 +649,23 @@ the size varies from call to call. As the maximum number of capturing
subpatterns is 65535 we must allow for 65536 strings to include the overall
match. (See also the heapframe structure below.) */
struct heapframe; /* Forward reference */
typedef struct pcre2_real_match_data {
pcre2_memctl memctl;
const pcre2_real_code *code; /* The pattern used for the match */
PCRE2_SPTR subject; /* The subject that was matched */
PCRE2_SPTR mark; /* Pointer to last mark */
PCRE2_SIZE leftchar; /* Offset to leftmost code unit */
PCRE2_SIZE rightchar; /* Offset to rightmost code unit */
PCRE2_SIZE startchar; /* Offset to starting code unit */
uint8_t matchedby; /* Type of match (normal, JIT, DFA) */
uint8_t flags; /* Various flags */
uint16_t oveccount; /* Number of pairs */
int rc; /* The return code from the match */
PCRE2_SIZE ovector[131072]; /* Must be last in the structure */
pcre2_memctl memctl; /* Memory control fields */
const pcre2_real_code *code; /* The pattern used for the match */
PCRE2_SPTR subject; /* The subject that was matched */
PCRE2_SPTR mark; /* Pointer to last mark */
struct heapframe *heapframes; /* Backtracking frames heap memory */
PCRE2_SIZE heapframes_size; /* Malloc-ed size */
PCRE2_SIZE leftchar; /* Offset to leftmost code unit */
PCRE2_SIZE rightchar; /* Offset to rightmost code unit */
PCRE2_SIZE startchar; /* Offset to starting code unit */
uint8_t matchedby; /* Type of match (normal, JIT, DFA) */
uint8_t flags; /* Various flags */
uint16_t oveccount; /* Number of pairs */
int rc; /* The return code from the match */
PCRE2_SIZE ovector[131072]; /* Must be last in the structure */
} pcre2_real_match_data;
@ -854,10 +858,6 @@ doing traditional NFA matching (pcre2_match() and friends). */
typedef struct match_block {
pcre2_memctl memctl; /* For general use */
PCRE2_SIZE frame_vector_size; /* Size of a backtracking frame */
heapframe *match_frames; /* Points to vector of frames */
heapframe *match_frames_top; /* Points after the end of the vector */
heapframe *stack_frames; /* The original vector on the stack */
PCRE2_SIZE heap_limit; /* As it says */
uint32_t match_limit; /* As it says */
uint32_t match_limit_depth; /* As it says */

View File

@ -542,7 +542,7 @@ typedef struct compare_context {
#undef CMP
/* Used for accessing the elements of the stack. */
#define STACK(i) ((i) * (int)sizeof(sljit_sw))
#define STACK(i) ((i) * SSIZE_OF(sw))
#ifdef SLJIT_PREF_SHIFT_REG
#if SLJIT_PREF_SHIFT_REG == SLJIT_R2
@ -590,8 +590,8 @@ to characters. The vector data is divided into two groups: the first
group contains the start / end character pointers, and the second is
the start pointers when the end of the capturing group has not yet reached. */
#define OVECTOR_START (common->ovector_start)
#define OVECTOR(i) (OVECTOR_START + (i) * (sljit_sw)sizeof(sljit_sw))
#define OVECTOR_PRIV(i) (common->cbra_ptr + (i) * (sljit_sw)sizeof(sljit_sw))
#define OVECTOR(i) (OVECTOR_START + (i) * SSIZE_OF(sw))
#define OVECTOR_PRIV(i) (common->cbra_ptr + (i) * SSIZE_OF(sw))
#define PRIVATE_DATA(cc) (common->private_data_ptrs[(cc) - common->start])
#if PCRE2_CODE_UNIT_WIDTH == 8
@ -2151,9 +2151,9 @@ while (cc < ccend)
{
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0));
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -OVECTOR(0));
stackpos -= (int)sizeof(sljit_sw);
stackpos -= SSIZE_OF(sw);
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
stackpos -= (int)sizeof(sljit_sw);
stackpos -= SSIZE_OF(sw);
setsom_found = TRUE;
}
cc += 1;
@ -2168,9 +2168,9 @@ while (cc < ccend)
{
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->mark_ptr);
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->mark_ptr);
stackpos -= (int)sizeof(sljit_sw);
stackpos -= SSIZE_OF(sw);
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
stackpos -= (int)sizeof(sljit_sw);
stackpos -= SSIZE_OF(sw);
setmark_found = TRUE;
}
cc += 1 + 2 + cc[1];
@ -2181,27 +2181,27 @@ while (cc < ccend)
{
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0));
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -OVECTOR(0));
stackpos -= (int)sizeof(sljit_sw);
stackpos -= SSIZE_OF(sw);
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
stackpos -= (int)sizeof(sljit_sw);
stackpos -= SSIZE_OF(sw);
setsom_found = TRUE;
}
if (common->mark_ptr != 0 && !setmark_found)
{
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->mark_ptr);
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->mark_ptr);
stackpos -= (int)sizeof(sljit_sw);
stackpos -= SSIZE_OF(sw);
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
stackpos -= (int)sizeof(sljit_sw);
stackpos -= SSIZE_OF(sw);
setmark_found = TRUE;
}
if (common->capture_last_ptr != 0 && !capture_last_found)
{
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr);
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->capture_last_ptr);
stackpos -= (int)sizeof(sljit_sw);
stackpos -= SSIZE_OF(sw);
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
stackpos -= (int)sizeof(sljit_sw);
stackpos -= SSIZE_OF(sw);
capture_last_found = TRUE;
}
cc += 1 + LINK_SIZE;
@ -2215,20 +2215,20 @@ while (cc < ccend)
{
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr);
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->capture_last_ptr);
stackpos -= (int)sizeof(sljit_sw);
stackpos -= SSIZE_OF(sw);
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
stackpos -= (int)sizeof(sljit_sw);
stackpos -= SSIZE_OF(sw);
capture_last_found = TRUE;
}
offset = (GET2(cc, 1 + LINK_SIZE)) << 1;
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, OVECTOR(offset));
stackpos -= (int)sizeof(sljit_sw);
stackpos -= SSIZE_OF(sw);
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset));
OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
stackpos -= (int)sizeof(sljit_sw);
stackpos -= SSIZE_OF(sw);
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP2, 0);
stackpos -= (int)sizeof(sljit_sw);
stackpos -= SSIZE_OF(sw);
cc += 1 + LINK_SIZE + IMM2_SIZE;
break;
@ -3144,7 +3144,7 @@ static SLJIT_INLINE void allocate_stack(compiler_common *common, int size)
DEFINE_COMPILER;
SLJIT_ASSERT(size > 0);
OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, size * sizeof(sljit_sw));
OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, size * SSIZE_OF(sw));
#ifdef DESTROY_REGISTERS
OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 12345);
OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
@ -3160,7 +3160,7 @@ static SLJIT_INLINE void free_stack(compiler_common *common, int size)
DEFINE_COMPILER;
SLJIT_ASSERT(size > 0);
OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, size * sizeof(sljit_sw));
OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, size * SSIZE_OF(sw));
}
static sljit_uw * allocate_read_only_data(compiler_common *common, sljit_uw size)
@ -3200,12 +3200,12 @@ if (length < 8)
}
else
{
if (sljit_emit_mem(compiler, SLJIT_MOV | SLJIT_MEM_SUPP | SLJIT_MEM_STORE | SLJIT_MEM_PRE, SLJIT_R0, SLJIT_MEM1(SLJIT_R1), sizeof(sljit_sw)) == SLJIT_SUCCESS)
if (sljit_emit_mem_update(compiler, SLJIT_MOV | SLJIT_MEM_SUPP | SLJIT_MEM_STORE | SLJIT_MEM_PRE, SLJIT_R0, SLJIT_MEM1(SLJIT_R1), sizeof(sljit_sw)) == SLJIT_SUCCESS)
{
GET_LOCAL_BASE(SLJIT_R1, 0, OVECTOR_START);
OP1(SLJIT_MOV, SLJIT_R2, 0, SLJIT_IMM, length - 1);
loop = LABEL();
sljit_emit_mem(compiler, SLJIT_MOV | SLJIT_MEM_STORE | SLJIT_MEM_PRE, SLJIT_R0, SLJIT_MEM1(SLJIT_R1), sizeof(sljit_sw));
sljit_emit_mem_update(compiler, SLJIT_MOV | SLJIT_MEM_STORE | SLJIT_MEM_PRE, SLJIT_R0, SLJIT_MEM1(SLJIT_R1), sizeof(sljit_sw));
OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_R2, 0, SLJIT_R2, 0, SLJIT_IMM, 1);
JUMPTO(SLJIT_NOT_ZERO, loop);
}
@ -3261,8 +3261,8 @@ OP2(SLJIT_ADD, TMP2, 0, TMP1, 0, SLJIT_IMM, size - uncleared_size);
loop = LABEL();
OP1(SLJIT_MOV, SLJIT_MEM1(TMP1), 0, src, 0);
OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 3 * sizeof(sljit_sw));
OP1(SLJIT_MOV, SLJIT_MEM1(TMP1), -2 * (sljit_sw)sizeof(sljit_sw), src, 0);
OP1(SLJIT_MOV, SLJIT_MEM1(TMP1), -1 * (sljit_sw)sizeof(sljit_sw), src, 0);
OP1(SLJIT_MOV, SLJIT_MEM1(TMP1), -2 * SSIZE_OF(sw), src, 0);
OP1(SLJIT_MOV, SLJIT_MEM1(TMP1), -1 * SSIZE_OF(sw), src, 0);
CMPTO(SLJIT_LESS, TMP1, 0, TMP2, 0, loop);
if (uncleared_size >= sizeof(sljit_sw))
@ -3289,12 +3289,12 @@ if (length < 8)
}
else
{
if (sljit_emit_mem(compiler, SLJIT_MOV | SLJIT_MEM_SUPP | SLJIT_MEM_STORE | SLJIT_MEM_PRE, TMP1, SLJIT_MEM1(TMP2), sizeof(sljit_sw)) == SLJIT_SUCCESS)
if (sljit_emit_mem_update(compiler, SLJIT_MOV | SLJIT_MEM_SUPP | SLJIT_MEM_STORE | SLJIT_MEM_PRE, TMP1, SLJIT_MEM1(TMP2), sizeof(sljit_sw)) == SLJIT_SUCCESS)
{
GET_LOCAL_BASE(TMP2, 0, OVECTOR_START + sizeof(sljit_sw));
OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_IMM, length - 2);
loop = LABEL();
sljit_emit_mem(compiler, SLJIT_MOV | SLJIT_MEM_STORE | SLJIT_MEM_PRE, TMP1, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
sljit_emit_mem_update(compiler, SLJIT_MOV | SLJIT_MEM_STORE | SLJIT_MEM_PRE, TMP1, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
OP2(SLJIT_SUB | SLJIT_SET_Z, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 1);
JUMPTO(SLJIT_NOT_ZERO, loop);
}
@ -3386,7 +3386,7 @@ else
OP2(SLJIT_ADD, SLJIT_R2, 0, SLJIT_R2, 0, SLJIT_IMM, SLJIT_OFFSETOF(pcre2_match_data, ovector) - sizeof(PCRE2_SIZE));
}
has_pre = sljit_emit_mem(compiler, SLJIT_MOV | SLJIT_MEM_SUPP | SLJIT_MEM_PRE, SLJIT_S1, SLJIT_MEM1(SLJIT_S0), sizeof(sljit_sw)) == SLJIT_SUCCESS;
has_pre = sljit_emit_mem_update(compiler, SLJIT_MOV | SLJIT_MEM_SUPP | SLJIT_MEM_PRE, SLJIT_S1, SLJIT_MEM1(SLJIT_S0), sizeof(sljit_sw)) == SLJIT_SUCCESS;
GET_LOCAL_BASE(SLJIT_S0, 0, OVECTOR_START - (has_pre ? sizeof(sljit_sw) : 0));
OP1(SLJIT_MOV, SLJIT_R0, 0, SLJIT_MEM1(HAS_VIRTUAL_REGISTERS ? SLJIT_R0 : ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, begin));
@ -3394,7 +3394,7 @@ OP1(SLJIT_MOV, SLJIT_R0, 0, SLJIT_MEM1(HAS_VIRTUAL_REGISTERS ? SLJIT_R0 : ARGUME
loop = LABEL();
if (has_pre)
sljit_emit_mem(compiler, SLJIT_MOV | SLJIT_MEM_PRE, SLJIT_S1, SLJIT_MEM1(SLJIT_S0), sizeof(sljit_sw));
sljit_emit_mem_update(compiler, SLJIT_MOV | SLJIT_MEM_PRE, SLJIT_S1, SLJIT_MEM1(SLJIT_S0), sizeof(sljit_sw));
else
{
OP1(SLJIT_MOV, SLJIT_S1, 0, SLJIT_MEM1(SLJIT_S0), 0);
@ -3417,14 +3417,14 @@ JUMPTO(SLJIT_NOT_ZERO, loop);
/* Calculate the return value, which is the maximum ovector value. */
if (topbracket > 1)
{
if (sljit_emit_mem(compiler, SLJIT_MOV | SLJIT_MEM_SUPP | SLJIT_MEM_PRE, SLJIT_R2, SLJIT_MEM1(SLJIT_R0), -(2 * (sljit_sw)sizeof(sljit_sw))) == SLJIT_SUCCESS)
if (sljit_emit_mem_update(compiler, SLJIT_MOV | SLJIT_MEM_SUPP | SLJIT_MEM_PRE, SLJIT_R2, SLJIT_MEM1(SLJIT_R0), -(2 * SSIZE_OF(sw))) == SLJIT_SUCCESS)
{
GET_LOCAL_BASE(SLJIT_R0, 0, OVECTOR_START + topbracket * 2 * sizeof(sljit_sw));
OP1(SLJIT_MOV, SLJIT_R1, 0, SLJIT_IMM, topbracket + 1);
/* OVECTOR(0) is never equal to SLJIT_S2. */
loop = LABEL();
sljit_emit_mem(compiler, SLJIT_MOV | SLJIT_MEM_PRE, SLJIT_R2, SLJIT_MEM1(SLJIT_R0), -(2 * (sljit_sw)sizeof(sljit_sw)));
sljit_emit_mem_update(compiler, SLJIT_MOV | SLJIT_MEM_PRE, SLJIT_R2, SLJIT_MEM1(SLJIT_R0), -(2 * SSIZE_OF(sw)));
OP2(SLJIT_SUB, SLJIT_R1, 0, SLJIT_R1, 0, SLJIT_IMM, 1);
CMPTO(SLJIT_EQUAL, SLJIT_R2, 0, SLJIT_S2, 0, loop);
OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_R1, 0);
@ -3437,7 +3437,7 @@ if (topbracket > 1)
/* OVECTOR(0) is never equal to SLJIT_S2. */
loop = LABEL();
OP1(SLJIT_MOV, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_R0), 0);
OP2(SLJIT_SUB, SLJIT_R0, 0, SLJIT_R0, 0, SLJIT_IMM, 2 * (sljit_sw)sizeof(sljit_sw));
OP2(SLJIT_SUB, SLJIT_R0, 0, SLJIT_R0, 0, SLJIT_IMM, 2 * SSIZE_OF(sw));
OP2(SLJIT_SUB, SLJIT_R1, 0, SLJIT_R1, 0, SLJIT_IMM, 1);
CMPTO(SLJIT_EQUAL, SLJIT_R2, 0, SLJIT_S2, 0, loop);
OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_R1, 0);
@ -4652,8 +4652,8 @@ if (common->nltype != NLTYPE_ANY)
/* All newlines are ascii, just skip intermediate octets. */
jump[0] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
loop = LABEL();
if (sljit_emit_mem(compiler, MOV_UCHAR | SLJIT_MEM_SUPP | SLJIT_MEM_POST, TMP2, SLJIT_MEM1(STR_PTR), IN_UCHARS(1)) == SLJIT_SUCCESS)
sljit_emit_mem(compiler, MOV_UCHAR | SLJIT_MEM_POST, TMP2, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
if (sljit_emit_mem_update(compiler, MOV_UCHAR | SLJIT_MEM_SUPP | SLJIT_MEM_POST, TMP2, SLJIT_MEM1(STR_PTR), IN_UCHARS(1)) == SLJIT_SUCCESS)
sljit_emit_mem_update(compiler, MOV_UCHAR | SLJIT_MEM_POST, TMP2, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
else
{
OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
@ -5886,7 +5886,7 @@ static BOOL check_fast_forward_char_pair_simd(compiler_common *common, fast_forw
while (j < i)
{
b_pri = chars[j].last_count;
if (b_pri > 2 && a_pri + b_pri >= max_pri)
if (b_pri > 2 && (sljit_u32)a_pri + (sljit_u32)b_pri >= max_pri)
{
b1 = chars[j].chars[0];
b2 = chars[j].chars[1];
@ -6572,21 +6572,21 @@ GET_LOCAL_BASE(TMP1, 0, 0);
/* Drop frames until we reach STACK_TOP. */
mainloop = LABEL();
OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), -sizeof(sljit_sw));
OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), -SSIZE_OF(sw));
jump = CMP(SLJIT_SIG_LESS_EQUAL, TMP2, 0, SLJIT_IMM, 0);
OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP1, 0);
if (HAS_VIRTUAL_REGISTERS)
{
OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), 0, SLJIT_MEM1(STACK_TOP), -(2 * sizeof(sljit_sw)));
OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), sizeof(sljit_sw), SLJIT_MEM1(STACK_TOP), -(3 * sizeof(sljit_sw)));
OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 3 * sizeof(sljit_sw));
OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), 0, SLJIT_MEM1(STACK_TOP), -(2 * SSIZE_OF(sw)));
OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), sizeof(sljit_sw), SLJIT_MEM1(STACK_TOP), -(3 * SSIZE_OF(sw)));
OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 3 * SSIZE_OF(sw));
}
else
{
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), -(2 * sizeof(sljit_sw)));
OP1(SLJIT_MOV, TMP3, 0, SLJIT_MEM1(STACK_TOP), -(3 * sizeof(sljit_sw)));
OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 3 * sizeof(sljit_sw));
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), -(2 * SSIZE_OF(sw)));
OP1(SLJIT_MOV, TMP3, 0, SLJIT_MEM1(STACK_TOP), -(3 * SSIZE_OF(sw)));
OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 3 * SSIZE_OF(sw));
OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), 0, TMP1, 0);
GET_LOCAL_BASE(TMP1, 0, 0);
OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), sizeof(sljit_sw), TMP3, 0);
@ -6603,13 +6603,13 @@ OP2(SLJIT_SUB, TMP2, 0, SLJIT_IMM, 0, TMP2, 0);
OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP1, 0);
if (HAS_VIRTUAL_REGISTERS)
{
OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), 0, SLJIT_MEM1(STACK_TOP), -(2 * sizeof(sljit_sw)));
OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 2 * sizeof(sljit_sw));
OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), 0, SLJIT_MEM1(STACK_TOP), -(2 * SSIZE_OF(sw)));
OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 2 * SSIZE_OF(sw));
}
else
{
OP1(SLJIT_MOV, TMP3, 0, SLJIT_MEM1(STACK_TOP), -(2 * sizeof(sljit_sw)));
OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 2 * sizeof(sljit_sw));
OP1(SLJIT_MOV, TMP3, 0, SLJIT_MEM1(STACK_TOP), -(2 * SSIZE_OF(sw)));
OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 2 * SSIZE_OF(sw));
OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), 0, TMP3, 0);
}
JUMPTO(SLJIT_JUMP, mainloop);
@ -7159,11 +7159,11 @@ if (char1_reg == STR_END)
OP1(SLJIT_MOV, RETURN_ADDR, 0, char2_reg, 0);
}
if (sljit_emit_mem(compiler, MOV_UCHAR | SLJIT_MEM_SUPP | SLJIT_MEM_POST, char1_reg, SLJIT_MEM1(TMP1), IN_UCHARS(1)) == SLJIT_SUCCESS)
if (sljit_emit_mem_update(compiler, MOV_UCHAR | SLJIT_MEM_SUPP | SLJIT_MEM_POST, char1_reg, SLJIT_MEM1(TMP1), IN_UCHARS(1)) == SLJIT_SUCCESS)
{
label = LABEL();
sljit_emit_mem(compiler, MOV_UCHAR | SLJIT_MEM_POST, char1_reg, SLJIT_MEM1(TMP1), IN_UCHARS(1));
sljit_emit_mem(compiler, MOV_UCHAR | SLJIT_MEM_POST, char2_reg, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
sljit_emit_mem_update(compiler, MOV_UCHAR | SLJIT_MEM_POST, char1_reg, SLJIT_MEM1(TMP1), IN_UCHARS(1));
sljit_emit_mem_update(compiler, MOV_UCHAR | SLJIT_MEM_POST, char2_reg, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
jump = CMP(SLJIT_NOT_EQUAL, char1_reg, 0, char2_reg, 0);
OP2(SLJIT_SUB | SLJIT_SET_Z, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
JUMPTO(SLJIT_NOT_ZERO, label);
@ -7171,14 +7171,14 @@ if (sljit_emit_mem(compiler, MOV_UCHAR | SLJIT_MEM_SUPP | SLJIT_MEM_POST, char1_
JUMPHERE(jump);
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
}
else if (sljit_emit_mem(compiler, MOV_UCHAR | SLJIT_MEM_SUPP | SLJIT_MEM_PRE, char1_reg, SLJIT_MEM1(TMP1), IN_UCHARS(1)) == SLJIT_SUCCESS)
else if (sljit_emit_mem_update(compiler, MOV_UCHAR | SLJIT_MEM_SUPP | SLJIT_MEM_PRE, char1_reg, SLJIT_MEM1(TMP1), IN_UCHARS(1)) == SLJIT_SUCCESS)
{
OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
label = LABEL();
sljit_emit_mem(compiler, MOV_UCHAR | SLJIT_MEM_PRE, char1_reg, SLJIT_MEM1(TMP1), IN_UCHARS(1));
sljit_emit_mem(compiler, MOV_UCHAR | SLJIT_MEM_PRE, char2_reg, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
sljit_emit_mem_update(compiler, MOV_UCHAR | SLJIT_MEM_PRE, char1_reg, SLJIT_MEM1(TMP1), IN_UCHARS(1));
sljit_emit_mem_update(compiler, MOV_UCHAR | SLJIT_MEM_PRE, char2_reg, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
jump = CMP(SLJIT_NOT_EQUAL, char1_reg, 0, char2_reg, 0);
OP2(SLJIT_SUB | SLJIT_SET_Z, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
JUMPTO(SLJIT_NOT_ZERO, label);
@ -7232,9 +7232,9 @@ else
lcc_table = TMP3;
}
if (sljit_emit_mem(compiler, MOV_UCHAR | SLJIT_MEM_SUPP | SLJIT_MEM_POST, char1_reg, SLJIT_MEM1(TMP1), IN_UCHARS(1)) == SLJIT_SUCCESS)
if (sljit_emit_mem_update(compiler, MOV_UCHAR | SLJIT_MEM_SUPP | SLJIT_MEM_POST, char1_reg, SLJIT_MEM1(TMP1), IN_UCHARS(1)) == SLJIT_SUCCESS)
opt_type = 1;
else if (sljit_emit_mem(compiler, MOV_UCHAR | SLJIT_MEM_SUPP | SLJIT_MEM_PRE, char1_reg, SLJIT_MEM1(TMP1), IN_UCHARS(1)) == SLJIT_SUCCESS)
else if (sljit_emit_mem_update(compiler, MOV_UCHAR | SLJIT_MEM_SUPP | SLJIT_MEM_PRE, char1_reg, SLJIT_MEM1(TMP1), IN_UCHARS(1)) == SLJIT_SUCCESS)
opt_type = 2;
sljit_emit_fast_enter(compiler, SLJIT_MEM1(SLJIT_SP), LOCALS0);
@ -7253,8 +7253,8 @@ OP1(SLJIT_MOV, lcc_table, 0, SLJIT_IMM, common->lcc);
if (opt_type == 1)
{
label = LABEL();
sljit_emit_mem(compiler, MOV_UCHAR | SLJIT_MEM_POST, char1_reg, SLJIT_MEM1(TMP1), IN_UCHARS(1));
sljit_emit_mem(compiler, MOV_UCHAR | SLJIT_MEM_POST, char2_reg, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
sljit_emit_mem_update(compiler, MOV_UCHAR | SLJIT_MEM_POST, char1_reg, SLJIT_MEM1(TMP1), IN_UCHARS(1));
sljit_emit_mem_update(compiler, MOV_UCHAR | SLJIT_MEM_POST, char2_reg, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
}
else if (opt_type == 2)
{
@ -7262,8 +7262,8 @@ else if (opt_type == 2)
OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
label = LABEL();
sljit_emit_mem(compiler, MOV_UCHAR | SLJIT_MEM_PRE, char1_reg, SLJIT_MEM1(TMP1), IN_UCHARS(1));
sljit_emit_mem(compiler, MOV_UCHAR | SLJIT_MEM_PRE, char2_reg, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
sljit_emit_mem_update(compiler, MOV_UCHAR | SLJIT_MEM_PRE, char1_reg, SLJIT_MEM1(TMP1), IN_UCHARS(1));
sljit_emit_mem_update(compiler, MOV_UCHAR | SLJIT_MEM_PRE, char2_reg, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
}
else
{
@ -9689,7 +9689,7 @@ BACKTRACK_AS(recurse_backtrack)->matchingpath = LABEL();
return cc + 1 + LINK_SIZE;
}
static sljit_s32 SLJIT_FUNC do_callout(struct jit_arguments *arguments, pcre2_callout_block *callout_block, PCRE2_SPTR *jit_ovector)
static sljit_s32 SLJIT_FUNC do_callout_jit(struct jit_arguments *arguments, pcre2_callout_block *callout_block, PCRE2_SPTR *jit_ovector)
{
PCRE2_SPTR begin;
PCRE2_SIZE *ovector;
@ -9756,7 +9756,7 @@ unsigned int callout_length = (*cc == OP_CALLOUT)
sljit_sw value1;
sljit_sw value2;
sljit_sw value3;
sljit_uw callout_arg_size = (common->re->top_bracket + 1) * 2 * sizeof(sljit_sw);
sljit_uw callout_arg_size = (common->re->top_bracket + 1) * 2 * SSIZE_OF(sw);
PUSH_BACKTRACK(sizeof(backtrack_common), cc, NULL);
@ -9806,7 +9806,7 @@ OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS0, STR_PTR, 0);
/* SLJIT_R0 = arguments */
OP1(SLJIT_MOV, SLJIT_R1, 0, STACK_TOP, 0);
GET_LOCAL_BASE(SLJIT_R2, 0, OVECTOR_START);
sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_ARGS3(32, W, W, W), SLJIT_IMM, SLJIT_FUNC_ADDR(do_callout));
sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_ARGS3(32, W, W, W), SLJIT_IMM, SLJIT_FUNC_ADDR(do_callout_jit));
OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
free_stack(common, callout_arg_size);
@ -11451,7 +11451,7 @@ struct sljit_label *label;
int private_data_ptr = PRIVATE_DATA(cc);
int base = (private_data_ptr == 0) ? SLJIT_MEM1(STACK_TOP) : SLJIT_MEM1(SLJIT_SP);
int offset0 = (private_data_ptr == 0) ? STACK(0) : private_data_ptr;
int offset1 = (private_data_ptr == 0) ? STACK(1) : private_data_ptr + (int)sizeof(sljit_sw);
int offset1 = (private_data_ptr == 0) ? STACK(1) : private_data_ptr + SSIZE_OF(sw);
int tmp_base, tmp_offset;
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
BOOL use_tmp;
@ -11517,19 +11517,19 @@ if (exact > 1)
}
}
else if (exact == 1)
{
compile_char1_matchingpath(common, type, cc, &backtrack->topbacktracks, TRUE);
if (early_fail_type == type_fail_range)
{
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), early_fail_ptr);
OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), early_fail_ptr + (int)sizeof(sljit_sw));
OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, TMP2, 0);
OP2(SLJIT_SUB, TMP2, 0, STR_PTR, 0, TMP2, 0);
add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_LESS_EQUAL, TMP2, 0, TMP1, 0));
if (early_fail_type == type_fail_range)
{
/* Range end first, followed by range start. */
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), early_fail_ptr);
OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), early_fail_ptr + SSIZE_OF(sw));
OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, TMP2, 0);
OP2(SLJIT_SUB, TMP2, 0, STR_PTR, 0, TMP2, 0);
add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_LESS_EQUAL, TMP2, 0, TMP1, 0));
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), early_fail_ptr + (int)sizeof(sljit_sw), STR_PTR, 0);
}
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), early_fail_ptr, STR_PTR, 0);
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), early_fail_ptr + SSIZE_OF(sw), STR_PTR, 0);
}
switch(opcode)
@ -12428,7 +12428,7 @@ PCRE2_SPTR end;
int private_data_ptr = PRIVATE_DATA(cc);
int base = (private_data_ptr == 0) ? SLJIT_MEM1(STACK_TOP) : SLJIT_MEM1(SLJIT_SP);
int offset0 = (private_data_ptr == 0) ? STACK(0) : private_data_ptr;
int offset1 = (private_data_ptr == 0) ? STACK(1) : private_data_ptr + (int)sizeof(sljit_sw);
int offset1 = (private_data_ptr == 0) ? STACK(1) : private_data_ptr + SSIZE_OF(sw);
cc = get_iterator_parameters(common, cc, &opcode, &type, &max, &exact, &end);
@ -14148,7 +14148,7 @@ quit_label = common->quit_label;
if (common->currententry != NULL)
{
/* A free bit for each private data. */
common->recurse_bitset_size = ((private_data_size / (int)sizeof(sljit_sw)) + 7) >> 3;
common->recurse_bitset_size = ((private_data_size / SSIZE_OF(sw)) + 7) >> 3;
SLJIT_ASSERT(common->recurse_bitset_size > 0);
common->recurse_bitset = (sljit_u8*)SLJIT_MALLOC(common->recurse_bitset_size, allocator_data);;
@ -14384,7 +14384,7 @@ pcre2_jit_compile(pcre2_code *code, uint32_t options)
pcre2_real_code *re = (pcre2_real_code *)code;
#ifdef SUPPORT_JIT
executable_functions *functions;
static int executable_allocator_is_working = 0;
static int executable_allocator_is_working = -1;
#endif
if (code == NULL)
@ -14447,23 +14447,21 @@ return PCRE2_ERROR_JIT_BADOPTION;
if ((re->flags & PCRE2_NOJIT) != 0) return 0;
if (executable_allocator_is_working == 0)
if (executable_allocator_is_working == -1)
{
/* Checks whether the executable allocator is working. This check
might run multiple times in multi-threaded environments, but the
result should not be affected by it. */
void *ptr = SLJIT_MALLOC_EXEC(32, NULL);
executable_allocator_is_working = -1;
if (ptr != NULL)
{
SLJIT_FREE_EXEC(((sljit_u8*)(ptr)) + SLJIT_EXEC_OFFSET(ptr), NULL);
executable_allocator_is_working = 1;
}
else executable_allocator_is_working = 0;
}
if (executable_allocator_is_working < 0)
if (!executable_allocator_is_working)
return PCRE2_ERROR_NOMEMORY;
if ((re->overall_options & PCRE2_MATCH_INVALID_UTF) != 0)

View File

@ -110,8 +110,10 @@ pcre2_jit_free_unused_memory(pcre2_general_context *gcontext)
(void)gcontext; /* Suppress warning */
#else /* SUPPORT_JIT */
SLJIT_UNUSED_ARG(gcontext);
#if (defined SLJIT_EXECUTABLE_ALLOCATOR && SLJIT_EXECUTABLE_ALLOCATOR)
sljit_free_unused_memory_exec();
#endif /* SUPPORT_JIT */
#endif /* SLJIT_EXECUTABLE_ALLOCATOR */
#endif /* SUPPORT_JIT */
}

View File

@ -183,6 +183,8 @@ restart:;
#endif
#if defined(FFCPS)
if (str_ptr >= str_end)
return NULL;
sljit_u8 *p1 = str_ptr - diff;
#endif
sljit_s32 align_offset = ((uint64_t)str_ptr & 0xf);
@ -327,7 +329,7 @@ match:;
return NULL;
#if defined(FF_UTF)
if (utf_continue(str_ptr + IN_UCHARS(-offs1)))
if (utf_continue((PCRE2_SPTR)str_ptr - offs1))
{
/* Not a match. */
str_ptr += IN_UCHARS(1);

View File

@ -776,7 +776,7 @@ typedef union {
} int_char;
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
static SLJIT_INLINE int utf_continue(sljit_u8 *s)
static SLJIT_INLINE int utf_continue(PCRE2_SPTR s)
{
#if PCRE2_CODE_UNIT_WIDTH == 8
return (*s & 0xc0) == 0x80;

View File

@ -204,6 +204,7 @@ Arguments:
P a previous frame of interest
frame_size the frame size
mb points to the match block
match_data points to the match data block
s identification text
Returns: nothing
@ -211,7 +212,7 @@ Returns: nothing
static void
display_frames(FILE *f, heapframe *F, heapframe *P, PCRE2_SIZE frame_size,
match_block *mb, const char *s, ...)
match_block *mb, pcre2_match_data *match_data, const char *s, ...)
{
uint32_t i;
heapframe *Q;
@ -223,10 +224,10 @@ vfprintf(f, s, ap);
va_end(ap);
if (P != NULL) fprintf(f, " P=%lu",
((char *)P - (char *)(mb->match_frames))/frame_size);
((char *)P - (char *)(match_data->heapframes))/frame_size);
fprintf(f, "\n");
for (i = 0, Q = mb->match_frames;
for (i = 0, Q = match_data->heapframes;
Q <= F;
i++, Q = (heapframe *)((char *)Q + frame_size))
{
@ -490,10 +491,16 @@ A version did exist that used individual frames on the heap instead of calling
match() recursively, but this ran substantially slower. The current version is
a refactoring that uses a vector of frames to remember backtracking points.
This runs no slower, and possibly even a bit faster than the original recursive
implementation. An initial vector of size START_FRAMES_SIZE (enough for maybe
50 frames) is allocated on the system stack. If this is not big enough, the
heap is used for a larger vector.
implementation.
At first, an initial vector of size START_FRAMES_SIZE (enough for maybe 50
frames) was allocated on the system stack. If this was not big enough, the heap
was used for a larger vector. However, it turns out that there are environments
where taking as little as 20KiB from the system stack is an embarrassment.
After another refactoring, the heap is used exclusively, but a pointer the
frames vector and its size are cached in the match_data block, so that there is
no new memory allocation if the same match_data block is used for multiple
matches (unless the frames vector has to be extended).
*******************************************************************************
******************************************************************************/
@ -566,10 +573,9 @@ made performance worse.
Arguments:
start_eptr starting character in subject
start_ecode starting position in compiled code
ovector pointer to the final output vector
oveccount number of pairs in ovector
top_bracket number of capturing parentheses in the pattern
frame_size size of each backtracking frame
match_data pointer to the match_data block
mb pointer to "static" variables block
Returns: MATCH_MATCH if matched ) these values are >= 0
@ -580,17 +586,19 @@ Returns: MATCH_MATCH if matched ) these values are >= 0
*/
static int
match(PCRE2_SPTR start_eptr, PCRE2_SPTR start_ecode, PCRE2_SIZE *ovector,
uint16_t oveccount, uint16_t top_bracket, PCRE2_SIZE frame_size,
match_block *mb)
match(PCRE2_SPTR start_eptr, PCRE2_SPTR start_ecode, uint16_t top_bracket,
PCRE2_SIZE frame_size, pcre2_match_data *match_data, match_block *mb)
{
/* Frame-handling variables */
heapframe *F; /* Current frame pointer */
heapframe *N = NULL; /* Temporary frame pointers */
heapframe *P = NULL;
heapframe *frames_top; /* End of frames vector */
heapframe *assert_accept_frame = NULL; /* For passing back a frame with captures */
PCRE2_SIZE frame_copy_size; /* Amount to copy when creating a new frame */
PCRE2_SIZE heapframes_size; /* Usable size of frames vector */
PCRE2_SIZE frame_copy_size; /* Amount to copy when creating a new frame */
/* Local variables that do not need to be preserved over calls to RRMATCH(). */
@ -627,10 +635,14 @@ copied when a new frame is created. */
frame_copy_size = frame_size - offsetof(heapframe, eptr);
/* Set up the first current frame at the start of the vector, and initialize
fields that are not reset for new frames. */
/* Set up the first frame and the end of the frames vector. We set the local
heapframes_size to the usuable amount of the vector, that is, a whole number of
frames. */
F = match_data->heapframes;
heapframes_size = (match_data->heapframes_size / frame_size) * frame_size;
frames_top = (heapframe *)((char *)F + heapframes_size);
F = mb->match_frames;
Frdepth = 0; /* "Recursion" depth */
Fcapture_last = 0; /* Number of most recent capture */
Fcurrent_recurse = RECURSE_UNSET; /* Not pattern recursing. */
@ -646,34 +658,35 @@ backtracking point. */
MATCH_RECURSE:
/* Set up a new backtracking frame. If the vector is full, get a new one
on the heap, doubling the size, but constrained by the heap limit. */
/* Set up a new backtracking frame. If the vector is full, get a new one,
doubling the size, but constrained by the heap limit (which is in KiB). */
N = (heapframe *)((char *)F + frame_size);
if (N >= mb->match_frames_top)
if (N >= frames_top)
{
PCRE2_SIZE newsize = mb->frame_vector_size * 2;
heapframe *new;
PCRE2_SIZE newsize = match_data->heapframes_size * 2;
if ((newsize / 1024) > mb->heap_limit)
if (newsize > mb->heap_limit)
{
PCRE2_SIZE maxsize = ((mb->heap_limit * 1024)/frame_size) * frame_size;
if (mb->frame_vector_size >= maxsize) return PCRE2_ERROR_HEAPLIMIT;
PCRE2_SIZE maxsize = (mb->heap_limit/frame_size) * frame_size;
if (match_data->heapframes_size >= maxsize) return PCRE2_ERROR_HEAPLIMIT;
newsize = maxsize;
}
new = mb->memctl.malloc(newsize, mb->memctl.memory_data);
new = match_data->memctl.malloc(newsize, match_data->memctl.memory_data);
if (new == NULL) return PCRE2_ERROR_NOMEMORY;
memcpy(new, mb->match_frames, mb->frame_vector_size);
memcpy(new, match_data->heapframes, heapframes_size);
F = (heapframe *)((char *)new + ((char *)F - (char *)mb->match_frames));
F = (heapframe *)((char *)new + ((char *)F - (char *)match_data->heapframes));
N = (heapframe *)((char *)F + frame_size);
if (mb->match_frames != mb->stack_frames)
mb->memctl.free(mb->match_frames, mb->memctl.memory_data);
mb->match_frames = new;
mb->match_frames_top = (heapframe *)((char *)mb->match_frames + newsize);
mb->frame_vector_size = newsize;
match_data->memctl.free(match_data->heapframes, match_data->memctl.memory_data);
match_data->heapframes = new;
match_data->heapframes_size = newsize;
heapframes_size = (newsize / frame_size) * frame_size;
frames_top = (heapframe *)((char *)new + heapframes_size);
}
#ifdef DEBUG_SHOW_RMATCH
@ -731,7 +744,7 @@ recursion value. */
if (group_frame_type != 0)
{
Flast_group_offset = (char *)F - (char *)mb->match_frames;
Flast_group_offset = (char *)F - (char *)match_data->heapframes;
if (GF_IDMASK(group_frame_type) == GF_RECURSE)
Fcurrent_recurse = GF_DATAMASK(group_frame_type);
group_frame_type = 0;
@ -773,7 +786,7 @@ fprintf(stderr, "++ op=%d\n", *Fecode);
for(;;)
{
if (offset == PCRE2_UNSET) return PCRE2_ERROR_INTERNAL;
N = (heapframe *)((char *)mb->match_frames + offset);
N = (heapframe *)((char *)match_data->heapframes + offset);
P = (heapframe *)((char *)N - frame_size);
if (N->group_frame_type == (GF_CAPTURE | number)) break;
offset = P->last_group_offset;
@ -811,7 +824,7 @@ fprintf(stderr, "++ op=%d\n", *Fecode);
for(;;)
{
if (offset == PCRE2_UNSET) return PCRE2_ERROR_INTERNAL;
N = (heapframe *)((char *)mb->match_frames + offset);
N = (heapframe *)((char *)match_data->heapframes + offset);
P = (heapframe *)((char *)N - frame_size);
if (GF_IDMASK(N->group_frame_type) == GF_RECURSE) break;
offset = P->last_group_offset;
@ -864,14 +877,15 @@ fprintf(stderr, "++ op=%d\n", *Fecode);
mb->mark = Fmark; /* and the last success mark */
if (Feptr > mb->last_used_ptr) mb->last_used_ptr = Feptr;
ovector[0] = Fstart_match - mb->start_subject;
ovector[1] = Feptr - mb->start_subject;
match_data->ovector[0] = Fstart_match - mb->start_subject;
match_data->ovector[1] = Feptr - mb->start_subject;
/* Set i to the smaller of the sizes of the external and frame ovectors. */
i = 2 * ((top_bracket + 1 > oveccount)? oveccount : top_bracket + 1);
memcpy(ovector + 2, Fovector, (i - 2) * sizeof(PCRE2_SIZE));
while (--i >= Foffset_top + 2) ovector[i] = PCRE2_UNSET;
i = 2 * ((top_bracket + 1 > match_data->oveccount)?
match_data->oveccount : top_bracket + 1);
memcpy(match_data->ovector + 2, Fovector, (i - 2) * sizeof(PCRE2_SIZE));
while (--i >= Foffset_top + 2) match_data->ovector[i] = PCRE2_UNSET;
return MATCH_MATCH; /* Note: NOT RRETURN */
@ -5328,7 +5342,7 @@ fprintf(stderr, "++ op=%d\n", *Fecode);
offset = Flast_group_offset;
while (offset != PCRE2_UNSET)
{
N = (heapframe *)((char *)mb->match_frames + offset);
N = (heapframe *)((char *)match_data->heapframes + offset);
P = (heapframe *)((char *)N - frame_size);
if (N->group_frame_type == (GF_RECURSE | number))
{
@ -5729,7 +5743,7 @@ fprintf(stderr, "++ op=%d\n", *Fecode);
if (*bracode != OP_BRA && *bracode != OP_COND)
{
N = (heapframe *)((char *)mb->match_frames + Flast_group_offset);
N = (heapframe *)((char *)match_data->heapframes + Flast_group_offset);
P = (heapframe *)((char *)N - frame_size);
Flast_group_offset = P->last_group_offset;
@ -6346,6 +6360,7 @@ BOOL jit_checked_utf = FALSE;
#endif /* SUPPORT_UNICODE */
PCRE2_SIZE frame_size;
PCRE2_SIZE heapframes_size;
/* We need to have mb as a pointer to a match block, because the IS_NEWLINE
macro is used below, and it expects NLBLOCK to be defined as a pointer. */
@ -6354,15 +6369,6 @@ pcre2_callout_block cb;
match_block actual_match_block;
match_block *mb = &actual_match_block;
/* Allocate an initial vector of backtracking frames on the stack. If this
proves to be too small, it is replaced by a larger one on the heap. To get a
vector of the size required that is aligned for pointers, allocate it as a
vector of pointers. */
PCRE2_SPTR stack_frames_vector[START_FRAMES_SIZE/sizeof(PCRE2_SPTR)]
PCRE2_KEEP_UNINITIALIZED;
mb->stack_frames = (heapframe *)stack_frames_vector;
/* Recognize NULL, length 0 as an empty string. */
if (subject == NULL && length == 0) subject = (PCRE2_SPTR)"";
@ -6793,15 +6799,11 @@ switch(re->newline_convention)
vector at the end, whose size depends on the number of capturing parentheses in
the pattern. It is not used at all if there are no capturing parentheses.
frame_size is the total size of each frame
mb->frame_vector_size is the total usable size of the vector (rounded down
to a whole number of frames)
frame_size is the total size of each frame
match_data->heapframes is the pointer to the frames vector
match_data->heapframes_size is the total size of the vector
The last of these is changed within the match() function if the frame vector
has to be expanded. We therefore put it into the match block so that it is
correct when calling match() more than once for non-anchored patterns.
We must also pad frame_size for alignment to ensure subsequent frames are as
We must pad the frame_size for alignment to ensure subsequent frames are as
aligned as heapframe. Whilst ovector is word-aligned due to being a PCRE2_SIZE
array, that does not guarantee it is suitably aligned for pointers, as some
architectures have pointers that are larger than a size_t. */
@ -6813,8 +6815,8 @@ frame_size = (offsetof(heapframe, ovector) +
/* Limits set in the pattern override the match context only if they are
smaller. */
mb->heap_limit = (mcontext->heap_limit < re->limit_heap)?
mcontext->heap_limit : re->limit_heap;
mb->heap_limit = ((mcontext->heap_limit < re->limit_heap)?
mcontext->heap_limit : re->limit_heap) * 1024;
mb->match_limit = (mcontext->match_limit < re->limit_match)?
mcontext->match_limit : re->limit_match;
@ -6823,35 +6825,40 @@ mb->match_limit_depth = (mcontext->depth_limit < re->limit_depth)?
mcontext->depth_limit : re->limit_depth;
/* If a pattern has very many capturing parentheses, the frame size may be very
large. Ensure that there are at least 10 available frames by getting an initial
vector on the heap if necessary, except when the heap limit prevents this. Get
fewer if possible. (The heap limit is in kibibytes.) */
large. Set the initial frame vector size to ensure that there are at least 10
available frames, but enforce a minimum of START_FRAMES_SIZE. If this is
greater than the heap limit, get as large a vector as possible. Always round
the size to a multiple of the frame size. */
if (frame_size <= START_FRAMES_SIZE/10)
heapframes_size = frame_size * 10;
if (heapframes_size < START_FRAMES_SIZE) heapframes_size = START_FRAMES_SIZE;
if (heapframes_size > mb->heap_limit)
{
mb->match_frames = mb->stack_frames; /* Initial frame vector on the stack */
mb->frame_vector_size = ((START_FRAMES_SIZE/frame_size) * frame_size);
if (frame_size > mb->heap_limit ) return PCRE2_ERROR_HEAPLIMIT;
heapframes_size = mb->heap_limit;
}
else
/* If an existing frame vector in the match_data block is large enough, we can
use it.Otherwise, free any pre-existing vector and get a new one. */
if (match_data->heapframes_size < heapframes_size)
{
mb->frame_vector_size = frame_size * 10;
if ((mb->frame_vector_size / 1024) > mb->heap_limit)
match_data->memctl.free(match_data->heapframes,
match_data->memctl.memory_data);
match_data->heapframes = match_data->memctl.malloc(heapframes_size,
match_data->memctl.memory_data);
if (match_data->heapframes == NULL)
{
if (frame_size > mb->heap_limit * 1024) return PCRE2_ERROR_HEAPLIMIT;
mb->frame_vector_size = ((mb->heap_limit * 1024)/frame_size) * frame_size;
match_data->heapframes_size = 0;
return PCRE2_ERROR_NOMEMORY;
}
mb->match_frames = mb->memctl.malloc(mb->frame_vector_size,
mb->memctl.memory_data);
if (mb->match_frames == NULL) return PCRE2_ERROR_NOMEMORY;
match_data->heapframes_size = heapframes_size;
}
mb->match_frames_top =
(heapframe *)((char *)mb->match_frames + mb->frame_vector_size);
/* Write to the ovector within the first frame to mark every capture unset and
to avoid uninitialized memory read errors when it is copied to a new frame. */
memset((char *)(mb->match_frames) + offsetof(heapframe, ovector), 0xff,
memset((char *)(match_data->heapframes) + offsetof(heapframe, ovector), 0xff,
frame_size - offsetof(heapframe, ovector));
/* Pointers to the individual character tables */
@ -7279,8 +7286,8 @@ for(;;)
mb->end_offset_top = 0;
mb->skip_arg_count = 0;
rc = match(start_match, mb->start_code, match_data->ovector,
match_data->oveccount, re->top_bracket, frame_size, mb);
rc = match(start_match, mb->start_code, re->top_bracket, frame_size,
match_data, mb);
if (mb->hitend && start_partial == NULL)
{
@ -7463,11 +7470,6 @@ if (utf && end_subject != true_end_subject &&
}
#endif /* SUPPORT_UNICODE */
/* Release an enlarged frame vector that is on the heap. */
if (mb->match_frames != mb->stack_frames)
mb->memctl.free(mb->match_frames, mb->memctl.memory_data);
/* Fill in fields that are always returned in the match data. */
match_data->code = re;
@ -7533,4 +7535,10 @@ else match_data->rc = PCRE2_ERROR_NOMATCH;
return match_data->rc;
}
/* These #undefs are here to enable unity builds with CMake. */
#undef NLBLOCK /* Block containing newline information */
#undef PSSTART /* Field containing processed string start */
#undef PSEND /* Field containing processed string end */
/* End of pcre2_match.c */

View File

@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language.
Written by Philip Hazel
Original API code Copyright (c) 1997-2012 University of Cambridge
New API code Copyright (c) 2016-2019 University of Cambridge
New API code Copyright (c) 2016-2022 University of Cambridge
-----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without
@ -51,19 +51,23 @@ POSSIBILITY OF SUCH DAMAGE.
* Create a match data block given ovector size *
*************************************************/
/* A minimum of 1 is imposed on the number of ovector pairs. */
/* A minimum of 1 is imposed on the number of ovector pairs. A maximum is also
imposed because the oveccount field in a match data block is uintt6_t. */
PCRE2_EXP_DEFN pcre2_match_data * PCRE2_CALL_CONVENTION
pcre2_match_data_create(uint32_t oveccount, pcre2_general_context *gcontext)
{
pcre2_match_data *yield;
if (oveccount < 1) oveccount = 1;
if (oveccount > UINT16_MAX) oveccount = UINT16_MAX;
yield = PRIV(memctl_malloc)(
offsetof(pcre2_match_data, ovector) + 2*oveccount*sizeof(PCRE2_SIZE),
(pcre2_memctl *)gcontext);
if (yield == NULL) return NULL;
yield->oveccount = oveccount;
yield->flags = 0;
yield->heapframes = NULL;
yield->heapframes_size = 0;
return yield;
}
@ -95,6 +99,9 @@ pcre2_match_data_free(pcre2_match_data *match_data)
{
if (match_data != NULL)
{
if (match_data->heapframes != NULL)
match_data->memctl.free(match_data->heapframes,
match_data->memctl.memory_data);
if ((match_data->flags & PCRE2_MD_COPIED_SUBJECT) != 0)
match_data->memctl.free((void *)match_data->subject,
match_data->memctl.memory_data);

View File

@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language.
Written by Philip Hazel
Original API code Copyright (c) 1997-2012 University of Cambridge
New API code Copyright (c) 2016-2021 University of Cambridge
New API code Copyright (c) 2016-2022 University of Cambridge
-----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without
@ -259,16 +259,16 @@ PCRE2_UNSET, so as not to imply an offset in the replacement. */
if ((options & (PCRE2_PARTIAL_HARD|PCRE2_PARTIAL_SOFT)) != 0)
return PCRE2_ERROR_BADOPTION;
/* Validate length and find the end of the replacement. A NULL replacement of
/* Validate length and find the end of the replacement. A NULL replacement of
zero length is interpreted as an empty string. */
if (replacement == NULL)
if (replacement == NULL)
{
if (rlength != 0) return PCRE2_ERROR_NULL;
replacement = (PCRE2_SPTR)"";
}
replacement = (PCRE2_SPTR)"";
}
if (rlength == PCRE2_ZERO_TERMINATED) rlength = PRIV(strlen)(replacement);
repend = replacement + rlength;
@ -282,8 +282,9 @@ replacement_only = ((options & PCRE2_SUBSTITUTE_REPLACEMENT_ONLY) != 0);
match data block. We create an internal match_data block in two cases: (a) an
external one is not supplied (and we are not starting from an existing match);
(b) an existing match is to be used for the first substitution. In the latter
case, we copy the existing match into the internal block. This ensures that no
changes are made to the existing match data block. */
case, we copy the existing match into the internal block, except for any cached
heap frame size and pointer. This ensures that no changes are made to the
external match data block. */
if (match_data == NULL)
{
@ -309,6 +310,8 @@ else if (use_existing_match)
if (internal_match_data == NULL) return PCRE2_ERROR_NOMEMORY;
memcpy(internal_match_data, match_data, offsetof(pcre2_match_data, ovector)
+ 2*pairs*sizeof(PCRE2_SIZE));
internal_match_data->heapframes = NULL;
internal_match_data->heapframes_size = 0;
match_data = internal_match_data;
}
@ -328,9 +331,9 @@ scb.ovector = ovector;
if (subject == NULL)
{
if (length != 0) return PCRE2_ERROR_NULL;
if (length != 0) return PCRE2_ERROR_NULL;
subject = (PCRE2_SPTR)"";
}
}
/* Find length of zero-terminated subject */

View File

@ -53,7 +53,8 @@ extern "C" {
/* #define SLJIT_CONFIG_PPC_64 1 */
/* #define SLJIT_CONFIG_MIPS_32 1 */
/* #define SLJIT_CONFIG_MIPS_64 1 */
/* #define SLJIT_CONFIG_SPARC_32 1 */
/* #define SLJIT_CONFIG_RISCV_32 1 */
/* #define SLJIT_CONFIG_RISCV_64 1 */
/* #define SLJIT_CONFIG_S390X 1 */
/* #define SLJIT_CONFIG_AUTO 1 */
@ -127,17 +128,6 @@ extern "C" {
#endif /* !SLJIT_EXECUTABLE_ALLOCATOR */
/* Force cdecl calling convention even if a better calling
convention (e.g. fastcall) is supported by the C compiler.
If this option is disabled (this is the default), functions
called from JIT should be defined with SLJIT_FUNC attribute.
Standard C functions can still be called by using the
SLJIT_CALL_CDECL jump type. */
#ifndef SLJIT_USE_CDECL_CALLING_CONVENTION
/* Disabled by default */
#define SLJIT_USE_CDECL_CALLING_CONVENTION 0
#endif
/* Return with error when an invalid argument is passed. */
#ifndef SLJIT_ARGUMENT_CHECKS
/* Disabled by default */

View File

@ -59,7 +59,8 @@ extern "C" {
SLJIT_64BIT_ARCHITECTURE : 64 bit architecture
SLJIT_LITTLE_ENDIAN : little endian architecture
SLJIT_BIG_ENDIAN : big endian architecture
SLJIT_UNALIGNED : allows unaligned memory accesses for non-fpu operations (only!)
SLJIT_UNALIGNED : unaligned memory accesses for non-fpu operations are supported
SLJIT_FPU_UNALIGNED : unaligned memory accesses for fpu operations are supported
SLJIT_INDIRECT_CALL : see SLJIT_FUNC_ADDR() for more information
Constants:
@ -98,7 +99,8 @@ extern "C" {
+ (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) \
+ (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) \
+ (defined SLJIT_CONFIG_MIPS_64 && SLJIT_CONFIG_MIPS_64) \
+ (defined SLJIT_CONFIG_SPARC_32 && SLJIT_CONFIG_SPARC_32) \
+ (defined SLJIT_CONFIG_RISCV_32 && SLJIT_CONFIG_RISCV_32) \
+ (defined SLJIT_CONFIG_RISCV_64 && SLJIT_CONFIG_RISCV_64) \
+ (defined SLJIT_CONFIG_S390X && SLJIT_CONFIG_S390X) \
+ (defined SLJIT_CONFIG_AUTO && SLJIT_CONFIG_AUTO) \
+ (defined SLJIT_CONFIG_UNSUPPORTED && SLJIT_CONFIG_UNSUPPORTED) >= 2
@ -115,7 +117,8 @@ extern "C" {
&& !(defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) \
&& !(defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) \
&& !(defined SLJIT_CONFIG_MIPS_64 && SLJIT_CONFIG_MIPS_64) \
&& !(defined SLJIT_CONFIG_SPARC_32 && SLJIT_CONFIG_SPARC_32) \
&& !(defined SLJIT_CONFIG_RISCV_32 && SLJIT_CONFIG_RISCV_32) \
&& !(defined SLJIT_CONFIG_RISCV_64 && SLJIT_CONFIG_RISCV_64) \
&& !(defined SLJIT_CONFIG_S390X && SLJIT_CONFIG_S390X) \
&& !(defined SLJIT_CONFIG_UNSUPPORTED && SLJIT_CONFIG_UNSUPPORTED) \
&& !(defined SLJIT_CONFIG_AUTO && SLJIT_CONFIG_AUTO)
@ -156,8 +159,10 @@ extern "C" {
#define SLJIT_CONFIG_MIPS_32 1
#elif defined(__mips64)
#define SLJIT_CONFIG_MIPS_64 1
#elif (defined(__sparc__) || defined(__sparc)) && !defined(_LP64)
#define SLJIT_CONFIG_SPARC_32 1
#elif defined (__riscv_xlen) && (__riscv_xlen == 32)
#define SLJIT_CONFIG_RISCV_32 1
#elif defined (__riscv_xlen) && (__riscv_xlen == 64)
#define SLJIT_CONFIG_RISCV_64 1
#elif defined(__s390x__)
#define SLJIT_CONFIG_S390X 1
#else
@ -205,8 +210,8 @@ extern "C" {
#define SLJIT_CONFIG_PPC 1
#elif (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) || (defined SLJIT_CONFIG_MIPS_64 && SLJIT_CONFIG_MIPS_64)
#define SLJIT_CONFIG_MIPS 1
#elif (defined SLJIT_CONFIG_SPARC_32 && SLJIT_CONFIG_SPARC_32) || (defined SLJIT_CONFIG_SPARC_64 && SLJIT_CONFIG_SPARC_64)
#define SLJIT_CONFIG_SPARC 1
#elif (defined SLJIT_CONFIG_RISCV_32 && SLJIT_CONFIG_RISCV_32) || (defined SLJIT_CONFIG_RISCV_64 && SLJIT_CONFIG_RISCV_64)
#define SLJIT_CONFIG_RISCV 1
#endif
/***********************************************************/
@ -330,8 +335,14 @@ extern "C" {
* older versions are known to abort in some targets
* https://github.com/PhilipHazel/pcre2/issues/92
*
* beware APPLE is known to have removed the code in iOS so
* it will need to be excempted or result in broken builds
* beware some vendors (ex: Microsoft, Apple) are known to have
* removed the code to support this builtin even if the call for
* __has_builtin reports it is available.
*
* make sure linking doesn't fail because __clear_cache() is
* missing before changing it or add an exception so that the
* system provided method that should be defined below is used
* instead.
*/
#if (!defined SLJIT_CACHE_FLUSH && defined __has_builtin)
#if __has_builtin(__builtin___clear_cache) && !defined(__clang__)
@ -339,9 +350,9 @@ extern "C" {
/*
* https://gcc.gnu.org/bugzilla//show_bug.cgi?id=91248
* https://gcc.gnu.org/bugzilla//show_bug.cgi?id=93811
* gcc's clear_cache builtin for power and sparc are broken
* gcc's clear_cache builtin for power is broken
*/
#if !defined(SLJIT_CONFIG_PPC) && !defined(SLJIT_CONFIG_SPARC_32)
#if !defined(SLJIT_CONFIG_PPC)
#define SLJIT_CACHE_FLUSH(from, to) \
__builtin___clear_cache((char*)(from), (char*)(to))
#endif
@ -373,12 +384,10 @@ extern "C" {
ppc_cache_flush((from), (to))
#define SLJIT_CACHE_FLUSH_OWN_IMPL 1
#elif (defined SLJIT_CONFIG_SPARC_32 && SLJIT_CONFIG_SPARC_32)
#elif defined(_WIN32)
/* The __clear_cache() implementation of GCC is a dummy function on Sparc. */
#define SLJIT_CACHE_FLUSH(from, to) \
sparc_cache_flush((from), (to))
#define SLJIT_CACHE_FLUSH_OWN_IMPL 1
FlushInstructionCache(GetCurrentProcess(), (void*)(from), (char*)(to) - (char*)(from))
#elif (defined(__GNUC__) && (__GNUC__ >= 5 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 3))) || defined(__clang__)
@ -392,11 +401,6 @@ extern "C" {
#define SLJIT_CACHE_FLUSH(from, to) \
cacheflush((long)(from), (long)(to), 0)
#elif defined _WIN32
#define SLJIT_CACHE_FLUSH(from, to) \
FlushInstructionCache(GetCurrentProcess(), (void*)(from), (char*)(to) - (char*)(from))
#else
/* Call __ARM_NR_cacheflush on ARM-Linux or the corresponding MIPS syscall. */
@ -435,6 +439,7 @@ typedef long int sljit_sw;
&& !(defined SLJIT_CONFIG_ARM_64 && SLJIT_CONFIG_ARM_64) \
&& !(defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) \
&& !(defined SLJIT_CONFIG_MIPS_64 && SLJIT_CONFIG_MIPS_64) \
&& !(defined SLJIT_CONFIG_RISCV_64 && SLJIT_CONFIG_RISCV_64) \
&& !(defined SLJIT_CONFIG_S390X && SLJIT_CONFIG_S390X)
#define SLJIT_32BIT_ARCHITECTURE 1
#define SLJIT_WORD_SHIFT 2
@ -495,8 +500,7 @@ typedef double sljit_f64;
#if !defined(SLJIT_BIG_ENDIAN) && !defined(SLJIT_LITTLE_ENDIAN)
/* These macros are mostly useful for the applications. */
#if (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32) \
|| (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
#if (defined SLJIT_CONFIG_PPC && SLJIT_CONFIG_PPC)
#ifdef __LITTLE_ENDIAN__
#define SLJIT_LITTLE_ENDIAN 1
@ -504,8 +508,7 @@ typedef double sljit_f64;
#define SLJIT_BIG_ENDIAN 1
#endif
#elif (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) \
|| (defined SLJIT_CONFIG_MIPS_64 && SLJIT_CONFIG_MIPS_64)
#elif (defined SLJIT_CONFIG_MIPS && SLJIT_CONFIG_MIPS)
#ifdef __MIPSEL__
#define SLJIT_LITTLE_ENDIAN 1
@ -532,8 +535,7 @@ typedef double sljit_f64;
#endif /* !SLJIT_MIPS_REV */
#elif (defined SLJIT_CONFIG_SPARC_32 && SLJIT_CONFIG_SPARC_32) \
|| (defined SLJIT_CONFIG_S390X && SLJIT_CONFIG_S390X)
#elif (defined SLJIT_CONFIG_S390X && SLJIT_CONFIG_S390X)
#define SLJIT_BIG_ENDIAN 1
@ -554,19 +556,30 @@ typedef double sljit_f64;
#ifndef SLJIT_UNALIGNED
#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) \
|| (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) \
#if (defined SLJIT_CONFIG_X86 && SLJIT_CONFIG_X86) \
|| (defined SLJIT_CONFIG_ARM_V7 && SLJIT_CONFIG_ARM_V7) \
|| (defined SLJIT_CONFIG_ARM_THUMB2 && SLJIT_CONFIG_ARM_THUMB2) \
|| (defined SLJIT_CONFIG_ARM_64 && SLJIT_CONFIG_ARM_64) \
|| (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32) \
|| (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) \
|| (defined SLJIT_CONFIG_PPC && SLJIT_CONFIG_PPC) \
|| (defined SLJIT_CONFIG_RISCV && SLJIT_CONFIG_RISCV) \
|| (defined SLJIT_CONFIG_S390X && SLJIT_CONFIG_S390X)
#define SLJIT_UNALIGNED 1
#endif
#endif /* !SLJIT_UNALIGNED */
#ifndef SLJIT_FPU_UNALIGNED
#if (defined SLJIT_CONFIG_X86 && SLJIT_CONFIG_X86) \
|| (defined SLJIT_CONFIG_ARM_64 && SLJIT_CONFIG_ARM_64) \
|| (defined SLJIT_CONFIG_PPC && SLJIT_CONFIG_PPC) \
|| (defined SLJIT_CONFIG_RISCV && SLJIT_CONFIG_RISCV) \
|| (defined SLJIT_CONFIG_S390X && SLJIT_CONFIG_S390X)
#define SLJIT_FPU_UNALIGNED 1
#endif
#endif /* !SLJIT_FPU_UNALIGNED */
#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
/* Auto detect SSE2 support using CPUID.
On 64 bit x86 cpus, sse2 must be present. */
@ -578,38 +591,7 @@ typedef double sljit_f64;
/*****************************************************************************************/
#ifndef SLJIT_FUNC
#if (defined SLJIT_USE_CDECL_CALLING_CONVENTION && SLJIT_USE_CDECL_CALLING_CONVENTION) \
|| !(defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
#define SLJIT_FUNC
#elif defined(__GNUC__) && !defined(__APPLE__)
#if __GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 4)
#define SLJIT_FUNC __attribute__ ((fastcall))
#define SLJIT_X86_32_FASTCALL 1
#else
#define SLJIT_FUNC
#endif /* gcc >= 3.4 */
#elif defined(_MSC_VER)
#define SLJIT_FUNC __fastcall
#define SLJIT_X86_32_FASTCALL 1
#elif defined(__BORLANDC__)
#define SLJIT_FUNC __msfastcall
#define SLJIT_X86_32_FASTCALL 1
#else /* Unknown compiler. */
/* The cdecl calling convention is usually the x86 default. */
#define SLJIT_FUNC
#endif /* SLJIT_USE_CDECL_CALLING_CONVENTION */
#endif /* !SLJIT_FUNC */
#ifndef SLJIT_INDIRECT_CALL
@ -621,14 +603,10 @@ typedef double sljit_f64;
#endif
#endif /* SLJIT_INDIRECT_CALL */
/* The offset which needs to be substracted from the return address to
/* The offset which needs to be subtracted from the return address to
determine the next executed instruction after return. */
#ifndef SLJIT_RETURN_ADDRESS_OFFSET
#if (defined SLJIT_CONFIG_SPARC_32 && SLJIT_CONFIG_SPARC_32)
#define SLJIT_RETURN_ADDRESS_OFFSET 8
#else
#define SLJIT_RETURN_ADDRESS_OFFSET 0
#endif
#endif /* SLJIT_RETURN_ADDRESS_OFFSET */
/***************************************************/
@ -666,10 +644,10 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_sw sljit_exec_offset(void* ptr);
#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
#define SLJIT_NUMBER_OF_REGISTERS 12
#define SLJIT_NUMBER_OF_SAVED_REGISTERS 9
#define SLJIT_NUMBER_OF_SAVED_REGISTERS 7
#define SLJIT_NUMBER_OF_FLOAT_REGISTERS 7
#define SLJIT_NUMBER_OF_SAVED_FLOAT_REGISTERS 0
#define SLJIT_LOCALS_OFFSET_BASE (compiler->locals_offset)
#define SLJIT_LOCALS_OFFSET_BASE (8 * SSIZE_OF(sw))
#define SLJIT_PREF_SHIFT_REG SLJIT_R2
#elif (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
@ -683,7 +661,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_sw sljit_exec_offset(void* ptr);
#else /* _WIN64 */
#define SLJIT_NUMBER_OF_SAVED_REGISTERS 8
#define SLJIT_NUMBER_OF_SAVED_FLOAT_REGISTERS 10
#define SLJIT_LOCALS_OFFSET_BASE (4 * (sljit_s32)sizeof(sljit_sw))
#define SLJIT_LOCALS_OFFSET_BASE (4 * SSIZE_OF(sw))
#endif /* !_WIN64 */
#define SLJIT_PREF_SHIFT_REG SLJIT_R3
@ -740,17 +718,13 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_sw sljit_exec_offset(void* ptr);
#define SLJIT_NUMBER_OF_SAVED_FLOAT_REGISTERS 8
#endif
#elif (defined SLJIT_CONFIG_SPARC && SLJIT_CONFIG_SPARC)
#elif (defined SLJIT_CONFIG_RISCV && SLJIT_CONFIG_RISCV)
#define SLJIT_NUMBER_OF_REGISTERS 18
#define SLJIT_NUMBER_OF_SAVED_REGISTERS 14
#define SLJIT_NUMBER_OF_FLOAT_REGISTERS 14
#define SLJIT_NUMBER_OF_SAVED_FLOAT_REGISTERS 0
#if (defined SLJIT_CONFIG_SPARC_32 && SLJIT_CONFIG_SPARC_32)
/* saved registers (16), return struct pointer (1), space for 6 argument words (1),
4th double arg (2), double alignment (1). */
#define SLJIT_LOCALS_OFFSET_BASE ((16 + 1 + 6 + 2 + 1) * (sljit_s32)sizeof(sljit_sw))
#endif
#define SLJIT_NUMBER_OF_REGISTERS 23
#define SLJIT_NUMBER_OF_SAVED_REGISTERS 12
#define SLJIT_LOCALS_OFFSET_BASE 0
#define SLJIT_NUMBER_OF_FLOAT_REGISTERS 30
#define SLJIT_NUMBER_OF_SAVED_FLOAT_REGISTERS 12
#elif (defined SLJIT_CONFIG_S390X && SLJIT_CONFIG_S390X)
@ -806,7 +780,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_sw sljit_exec_offset(void* ptr);
#if (defined SLJIT_CONFIG_ARM && SLJIT_CONFIG_ARM) \
|| (defined SLJIT_CONFIG_PPC && SLJIT_CONFIG_PPC) \
|| (defined SLJIT_CONFIG_MIPS && SLJIT_CONFIG_MIPS) \
|| (defined SLJIT_CONFIG_SPARC && SLJIT_CONFIG_SPARC) \
|| (defined SLJIT_CONFIG_RISCV && SLJIT_CONFIG_RISCV) \
|| (defined SLJIT_CONFIG_S390X && SLJIT_CONFIG_S390X)
#define SLJIT_HAS_STATUS_FLAGS_STATE 1
#endif

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -86,6 +86,7 @@ static const sljit_u8 freg_map[SLJIT_NUMBER_OF_FLOAT_REGISTERS + 3] = {
#define CSINC 0x9a800400
#define EOR 0xca000000
#define EORI 0xd2000000
#define EXTR 0x93c00000
#define FABS 0x1e60c000
#define FADD 0x1e602800
#define FCMP 0x1e602000
@ -98,6 +99,7 @@ static const sljit_u8 freg_map[SLJIT_NUMBER_OF_FLOAT_REGISTERS + 3] = {
#define FSUB 0x1e603800
#define LDRI 0xf9400000
#define LDRI_F64 0xfd400000
#define LDRI_POST 0xf8400400
#define LDP 0xa9400000
#define LDP_F64 0x6d400000
#define LDP_POST 0xa8c00000
@ -112,7 +114,9 @@ static const sljit_u8 freg_map[SLJIT_NUMBER_OF_FLOAT_REGISTERS + 3] = {
#define ORN 0xaa200000
#define ORR 0xaa000000
#define ORRI 0xb2000000
#define RBIT 0xdac00000
#define RET 0xd65f0000
#define RORV 0x9ac02c00
#define SBC 0xda000000
#define SBFM 0x93000000
#define SCVTF 0x9e620000
@ -137,8 +141,6 @@ static const sljit_u8 freg_map[SLJIT_NUMBER_OF_FLOAT_REGISTERS + 3] = {
#define UDIV 0x9ac00800
#define UMULH 0x9bc03c00
/* dest_reg is the absolute name of the register
Useful for reordering instructions in the delay slot. */
static sljit_s32 push_inst(struct sljit_compiler *compiler, sljit_ins ins)
{
sljit_ins *ptr = (sljit_ins*)ensure_buf(compiler, sizeof(sljit_ins));
@ -296,8 +298,8 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil
}
next_addr = compute_next_addr(label, jump, const_, put_label);
}
code_ptr ++;
word_count ++;
code_ptr++;
word_count++;
} while (buf_ptr < buf_end);
buf = buf->next;
@ -391,6 +393,8 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_has_cpu_feature(sljit_s32 feature_type)
#endif
case SLJIT_HAS_CLZ:
case SLJIT_HAS_CTZ:
case SLJIT_HAS_ROT:
case SLJIT_HAS_CMOV:
case SLJIT_HAS_PREFETCH:
return 1;
@ -631,6 +635,7 @@ static sljit_s32 emit_op_imm(struct sljit_compiler *compiler, sljit_s32 flags, s
switch (op) {
case SLJIT_MUL:
case SLJIT_CLZ:
case SLJIT_CTZ:
case SLJIT_ADDC:
case SLJIT_SUBC:
/* No form with immediate operand (except imm 0, which
@ -701,36 +706,50 @@ static sljit_s32 emit_op_imm(struct sljit_compiler *compiler, sljit_s32 flags, s
FAIL_IF(push_inst(compiler, (inst_bits ^ inv_bits) | RD(dst) | RN(reg)));
goto set_flags;
case SLJIT_SHL:
case SLJIT_MSHL:
if (flags & ARG1_IMM)
break;
if (flags & INT_OP) {
imm &= 0x1f;
FAIL_IF(push_inst(compiler, (UBFM ^ inv_bits) | RD(dst) | RN(arg1)
| (((sljit_ins)-imm & 0x1f) << 16) | ((31 - (sljit_ins)imm) << 10)));
}
else {
inst_bits = (((sljit_ins)-imm & 0x1f) << 16) | ((31 - (sljit_ins)imm) << 10);
} else {
imm &= 0x3f;
FAIL_IF(push_inst(compiler, (UBFM ^ inv_bits) | RD(dst) | RN(arg1) | (1 << 22)
| (((sljit_ins)-imm & 0x3f) << 16) | ((63 - (sljit_ins)imm) << 10)));
inst_bits = ((sljit_ins)1 << 22) | (((sljit_ins)-imm & 0x3f) << 16) | ((63 - (sljit_ins)imm) << 10);
}
FAIL_IF(push_inst(compiler, (UBFM ^ inv_bits) | RD(dst) | RN(arg1) | inst_bits));
goto set_flags;
case SLJIT_LSHR:
case SLJIT_MLSHR:
case SLJIT_ASHR:
case SLJIT_MASHR:
if (flags & ARG1_IMM)
break;
if (op == SLJIT_ASHR)
if (op >= SLJIT_ASHR)
inv_bits |= 1 << 30;
if (flags & INT_OP) {
imm &= 0x1f;
FAIL_IF(push_inst(compiler, (UBFM ^ inv_bits) | RD(dst) | RN(arg1)
| ((sljit_ins)imm << 16) | (31 << 10)));
}
else {
inst_bits = ((sljit_ins)imm << 16) | (31 << 10);
} else {
imm &= 0x3f;
FAIL_IF(push_inst(compiler, (UBFM ^ inv_bits) | RD(dst) | RN(arg1)
| (1 << 22) | ((sljit_ins)imm << 16) | (63 << 10)));
inst_bits = ((sljit_ins)1 << 22) | ((sljit_ins)imm << 16) | (63 << 10);
}
FAIL_IF(push_inst(compiler, (UBFM ^ inv_bits) | RD(dst) | RN(arg1) | inst_bits));
goto set_flags;
case SLJIT_ROTL:
case SLJIT_ROTR:
if (flags & ARG1_IMM)
break;
if (op == SLJIT_ROTL)
imm = -imm;
imm &= (flags & INT_OP) ? 0x1f : 0x3f;
return push_inst(compiler, (EXTR ^ (inv_bits | (inv_bits >> 9))) | RD(dst) | RN(arg1) | RM(arg1) | ((sljit_ins)imm << 10));
default:
SLJIT_UNREACHABLE();
break;
@ -796,6 +815,10 @@ static sljit_s32 emit_op_imm(struct sljit_compiler *compiler, sljit_s32 flags, s
case SLJIT_CLZ:
SLJIT_ASSERT(arg1 == TMP_REG1);
return push_inst(compiler, (CLZ ^ inv_bits) | RD(dst) | RN(arg2));
case SLJIT_CTZ:
SLJIT_ASSERT(arg1 == TMP_REG1);
FAIL_IF(push_inst(compiler, (RBIT ^ inv_bits) | RD(dst) | RN(arg2)));
return push_inst(compiler, (CLZ ^ inv_bits) | RD(dst) | RN(dst));
case SLJIT_ADD:
compiler->status_flags_state = SLJIT_CURRENT_FLAGS_ADD;
CHECK_FLAGS(1 << 29);
@ -834,14 +857,23 @@ static sljit_s32 emit_op_imm(struct sljit_compiler *compiler, sljit_s32 flags, s
FAIL_IF(push_inst(compiler, (EOR ^ inv_bits) | RD(dst) | RN(arg1) | RM(arg2)));
break; /* Set flags. */
case SLJIT_SHL:
case SLJIT_MSHL:
FAIL_IF(push_inst(compiler, (LSLV ^ inv_bits) | RD(dst) | RN(arg1) | RM(arg2)));
break; /* Set flags. */
case SLJIT_LSHR:
case SLJIT_MLSHR:
FAIL_IF(push_inst(compiler, (LSRV ^ inv_bits) | RD(dst) | RN(arg1) | RM(arg2)));
break; /* Set flags. */
case SLJIT_ASHR:
case SLJIT_MASHR:
FAIL_IF(push_inst(compiler, (ASRV ^ inv_bits) | RD(dst) | RN(arg1) | RM(arg2)));
break; /* Set flags. */
case SLJIT_ROTL:
FAIL_IF(push_inst(compiler, (SUB ^ inv_bits) | RD(TMP_REG2) | RN(TMP_ZERO) | RM(arg2)));
arg2 = TMP_REG2;
/* fallthrough */
case SLJIT_ROTR:
return push_inst(compiler, (RORV ^ inv_bits) | RD(dst) | RN(arg1) | RM(arg2));
default:
SLJIT_UNREACHABLE();
return SLJIT_SUCCESS;
@ -895,21 +927,37 @@ static sljit_s32 emit_op_mem(struct sljit_compiler *compiler, sljit_s32 flags, s
return push_inst(compiler, STRBI | type | RT(reg) | RN(tmp_reg) | ((sljit_ins)argw << 10));
}
if (argw >= 0 && (argw & ((1 << shift) - 1)) == 0) {
if ((argw >> shift) <= 0xfff)
return push_inst(compiler, STRBI | type | RT(reg) | RN(arg) | ((sljit_ins)argw << (10 - shift)));
if ((argw & ((1 << shift) - 1)) == 0) {
if (argw >= 0) {
if ((argw >> shift) <= 0xfff)
return push_inst(compiler, STRBI | type | RT(reg) | RN(arg) | ((sljit_ins)argw << (10 - shift)));
if (argw <= 0xffffff) {
FAIL_IF(push_inst(compiler, ADDI | (1 << 22) | RD(tmp_reg) | RN(arg) | (((sljit_ins)argw >> 12) << 10)));
if (argw <= 0xffffff) {
FAIL_IF(push_inst(compiler, ADDI | (1 << 22) | RD(tmp_reg) | RN(arg) | (((sljit_ins)argw >> 12) << 10)));
argw = ((argw & 0xfff) >> shift);
argw = ((argw & 0xfff) >> shift);
return push_inst(compiler, STRBI | type | RT(reg) | RN(tmp_reg) | ((sljit_ins)argw << 10));
}
} else if (argw < -256 && argw >= -0xfff000) {
FAIL_IF(push_inst(compiler, SUBI | (1 << 22) | RD(tmp_reg) | RN(arg) | (((sljit_ins)(-argw + 0xfff) >> 12) << 10)));
argw = ((0x1000 + argw) & 0xfff) >> shift;
return push_inst(compiler, STRBI | type | RT(reg) | RN(tmp_reg) | ((sljit_ins)argw << 10));
}
}
if (argw <= 255 && argw >= -256)
if (argw <= 0xff && argw >= -0x100)
return push_inst(compiler, STURBI | type | RT(reg) | RN(arg) | (((sljit_ins)argw & 0x1ff) << 12));
if (argw >= 0) {
if (argw <= 0xfff0ff && ((argw + 0x100) & 0xfff) <= 0x1ff) {
FAIL_IF(push_inst(compiler, ADDI | (1 << 22) | RD(tmp_reg) | RN(arg) | (((sljit_ins)argw >> 12) << 10)));
return push_inst(compiler, STURBI | type | RT(reg) | RN(tmp_reg) | (((sljit_ins)argw & 0x1ff) << 12));
}
} else if (argw >= -0xfff100 && ((-argw + 0xff) & 0xfff) <= 0x1ff) {
FAIL_IF(push_inst(compiler, SUBI | (1 << 22) | RD(tmp_reg) | RN(arg) | (((sljit_ins)-argw >> 12) << 10)));
return push_inst(compiler, STURBI | type | RT(reg) | RN(tmp_reg) | (((sljit_ins)argw & 0x1ff) << 12));
}
FAIL_IF(load_immediate(compiler, tmp_reg, argw));
return push_inst(compiler, STRB | type | RT(reg) | RN(arg) | RM(tmp_reg));
@ -924,14 +972,14 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compi
sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size)
{
sljit_s32 prev, fprev, saved_regs_size, i, tmp;
sljit_s32 word_arg_count = 0;
sljit_s32 saved_arg_count = SLJIT_KEPT_SAVEDS_COUNT(options);
sljit_ins offs;
CHECK_ERROR();
CHECK(check_sljit_emit_enter(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size));
set_emit_enter(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size);
saved_regs_size = GET_SAVED_REGISTERS_SIZE(scratches, saveds, 2);
saved_regs_size = GET_SAVED_REGISTERS_SIZE(scratches, saveds - saved_arg_count, 2);
saved_regs_size += GET_SAVED_FLOAT_REGISTERS_SIZE(fscratches, fsaveds, SSIZE_OF(f64));
local_size = (local_size + saved_regs_size + 0xf) & ~0xf;
@ -954,7 +1002,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compi
prev = -1;
tmp = SLJIT_S0 - saveds;
for (i = SLJIT_S0; i > tmp; i--) {
for (i = SLJIT_S0 - saved_arg_count; i > tmp; i--) {
if (prev == -1) {
prev = i;
continue;
@ -1003,23 +1051,27 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compi
if (prev != -1)
FAIL_IF(push_inst(compiler, STRI | RT(prev) | RN(SLJIT_SP) | (offs >> 5) | ((fprev == -1) ? (1 << 10) : 0)));
arg_types >>= SLJIT_ARG_SHIFT;
#ifdef _WIN32
if (local_size > 4096)
FAIL_IF(push_inst(compiler, SUBI | RD(SLJIT_SP) | RN(SLJIT_SP) | (1 << 10) | (1 << 22)));
#endif /* _WIN32 */
tmp = 0;
while (arg_types > 0) {
if ((arg_types & SLJIT_ARG_MASK) < SLJIT_ARG_TYPE_F64) {
if (!(arg_types & SLJIT_ARG_TYPE_SCRATCH_REG)) {
FAIL_IF(push_inst(compiler, ORR | RD(SLJIT_S0 - tmp) | RN(TMP_ZERO) | RM(SLJIT_R0 + word_arg_count)));
if (!(options & SLJIT_ENTER_REG_ARG)) {
arg_types >>= SLJIT_ARG_SHIFT;
saved_arg_count = 0;
tmp = SLJIT_R0;
while (arg_types) {
if ((arg_types & SLJIT_ARG_MASK) < SLJIT_ARG_TYPE_F64) {
if (!(arg_types & SLJIT_ARG_TYPE_SCRATCH_REG)) {
FAIL_IF(push_inst(compiler, ORR | RD(SLJIT_S0 - saved_arg_count) | RN(TMP_ZERO) | RM(tmp)));
saved_arg_count++;
}
tmp++;
}
word_arg_count++;
arg_types >>= SLJIT_ARG_SHIFT;
}
arg_types >>= SLJIT_ARG_SHIFT;
}
#ifdef _WIN32
@ -1100,26 +1152,34 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_set_context(struct sljit_compiler *comp
CHECK(check_sljit_set_context(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size));
set_set_context(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size);
saved_regs_size = GET_SAVED_REGISTERS_SIZE(scratches, saveds, 2);
saved_regs_size = GET_SAVED_REGISTERS_SIZE(scratches, saveds - SLJIT_KEPT_SAVEDS_COUNT(options), 2);
saved_regs_size += GET_SAVED_FLOAT_REGISTERS_SIZE(fscratches, fsaveds, SSIZE_OF(f64));
compiler->local_size = (local_size + saved_regs_size + 0xf) & ~0xf;
return SLJIT_SUCCESS;
}
static sljit_s32 emit_stack_frame_release(struct sljit_compiler *compiler)
static sljit_s32 emit_stack_frame_release(struct sljit_compiler *compiler, sljit_s32 is_return_to)
{
sljit_s32 local_size, prev, fprev, i, tmp;
sljit_ins offs;
local_size = compiler->local_size;
if (local_size > 512 && local_size <= 512 + 496) {
FAIL_IF(push_inst(compiler, LDP_POST | RT(TMP_FP) | RT2(TMP_LR)
| RN(SLJIT_SP) | ((sljit_ins)(local_size - 512) << (15 - 3))));
local_size = 512;
} else
FAIL_IF(push_inst(compiler, LDP | RT(TMP_FP) | RT2(TMP_LR) | RN(SLJIT_SP)));
if (!is_return_to) {
if (local_size > 512 && local_size <= 512 + 496) {
FAIL_IF(push_inst(compiler, LDP_POST | RT(TMP_FP) | RT2(TMP_LR)
| RN(SLJIT_SP) | ((sljit_ins)(local_size - 512) << (15 - 3))));
local_size = 512;
} else
FAIL_IF(push_inst(compiler, LDP | RT(TMP_FP) | RT2(TMP_LR) | RN(SLJIT_SP)));
} else {
if (local_size > 512 && local_size <= 512 + 248) {
FAIL_IF(push_inst(compiler, LDRI_POST | RT(TMP_FP) | RN(SLJIT_SP) | ((sljit_ins)(local_size - 512) << 12)));
local_size = 512;
} else
FAIL_IF(push_inst(compiler, LDRI | RT(TMP_FP) | RN(SLJIT_SP) | 0));
}
if (local_size > 512) {
local_size -= 512;
@ -1137,7 +1197,7 @@ static sljit_s32 emit_stack_frame_release(struct sljit_compiler *compiler)
prev = -1;
tmp = SLJIT_S0 - compiler->saveds;
for (i = SLJIT_S0; i > tmp; i--) {
for (i = SLJIT_S0 - SLJIT_KEPT_SAVEDS_COUNT(compiler->options); i > tmp; i--) {
if (prev == -1) {
prev = i;
continue;
@ -1195,11 +1255,34 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_return_void(struct sljit_compiler
CHECK_ERROR();
CHECK(check_sljit_emit_return_void(compiler));
FAIL_IF(emit_stack_frame_release(compiler));
FAIL_IF(emit_stack_frame_release(compiler, 0));
return push_inst(compiler, RET | RN(TMP_LR));
}
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_return_to(struct sljit_compiler *compiler,
sljit_s32 src, sljit_sw srcw)
{
CHECK_ERROR();
CHECK(check_sljit_emit_return_to(compiler, src, srcw));
if (src & SLJIT_MEM) {
ADJUST_LOCAL_OFFSET(src, srcw);
FAIL_IF(emit_op_mem(compiler, WORD_SIZE, TMP_REG1, src, srcw, TMP_REG1));
src = TMP_REG1;
srcw = 0;
} else if (src >= SLJIT_FIRST_SAVED_REG && src <= (SLJIT_S0 - SLJIT_KEPT_SAVEDS_COUNT(compiler->options))) {
FAIL_IF(push_inst(compiler, ORR | RD(TMP_REG1) | RN(TMP_ZERO) | RM(src)));
src = TMP_REG1;
srcw = 0;
}
FAIL_IF(emit_stack_frame_release(compiler, 1));
SLJIT_SKIP_CHECKS(compiler);
return sljit_emit_ijump(compiler, SLJIT_JUMP, src, srcw);
}
/* --------------------------------------------------------------------- */
/* Operators */
/* --------------------------------------------------------------------- */
@ -1392,13 +1475,84 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2u(struct sljit_compiler *compil
CHECK_ERROR();
CHECK(check_sljit_emit_op2(compiler, op, 1, 0, 0, src1, src1w, src2, src2w));
#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \
|| (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
compiler->skip_checks = 1;
#endif
SLJIT_SKIP_CHECKS(compiler);
return sljit_emit_op2(compiler, op, TMP_REG1, 0, src1, src1w, src2, src2w);
}
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_shift_into(struct sljit_compiler *compiler, sljit_s32 op,
sljit_s32 src_dst,
sljit_s32 src1, sljit_sw src1w,
sljit_s32 src2, sljit_sw src2w)
{
sljit_ins inv_bits, imm;
sljit_s32 is_left;
sljit_sw mask;
CHECK_ERROR();
CHECK(check_sljit_emit_shift_into(compiler, op, src_dst, src1, src1w, src2, src2w));
is_left = (GET_OPCODE(op) == SLJIT_SHL || GET_OPCODE(op) == SLJIT_MSHL);
if (src_dst == src1) {
SLJIT_SKIP_CHECKS(compiler);
return sljit_emit_op2(compiler, (is_left ? SLJIT_ROTL : SLJIT_ROTR) | (op & SLJIT_32), src_dst, 0, src_dst, 0, src2, src2w);
}
ADJUST_LOCAL_OFFSET(src1, src1w);
ADJUST_LOCAL_OFFSET(src2, src2w);
inv_bits = (op & SLJIT_32) ? W_OP : 0;
mask = inv_bits ? 0x1f : 0x3f;
if (src2 & SLJIT_IMM) {
src2w &= mask;
if (src2w == 0)
return SLJIT_SUCCESS;
} else if (src2 & SLJIT_MEM) {
FAIL_IF(emit_op_mem(compiler, inv_bits ? INT_SIZE : WORD_SIZE, TMP_REG2, src2, src2w, TMP_REG2));
src2 = TMP_REG2;
}
if (src1 & SLJIT_MEM) {
FAIL_IF(emit_op_mem(compiler, inv_bits ? INT_SIZE : WORD_SIZE, TMP_REG1, src1, src1w, TMP_REG1));
src1 = TMP_REG1;
} else if (src1 & SLJIT_IMM) {
FAIL_IF(load_immediate(compiler, TMP_REG1, src1w));
src1 = TMP_REG1;
}
if (src2 & SLJIT_IMM) {
if (is_left)
src2w = (src2w ^ mask) + 1;
return push_inst(compiler, (EXTR ^ (inv_bits | (inv_bits >> 9))) | RD(src_dst)
| RN(is_left ? src_dst : src1) | RM(is_left ? src1 : src_dst) | ((sljit_ins)src2w << 10));
}
FAIL_IF(push_inst(compiler, ((is_left ? LSLV : LSRV) ^ inv_bits) | RD(src_dst) | RN(src_dst) | RM(src2)));
if (!(op & SLJIT_SHIFT_INTO_NON_ZERO)) {
/* Shift left/right by 1. */
if (is_left)
imm = (sljit_ins)(inv_bits ? ((1 << 16) | (31 << 10)) : ((1 << 16) | (63 << 10) | (1 << 22)));
else
imm = (sljit_ins)(inv_bits ? ((31 << 16) | (30 << 10)) : ((63 << 16) | (62 << 10) | (1 << 22)));
FAIL_IF(push_inst(compiler, (UBFM ^ inv_bits) | RD(TMP_REG1) | RN(src1) | imm));
/* Set imm to mask. */
imm = (sljit_ins)(inv_bits ? (4 << 10) : ((5 << 10) | (1 << 22)));
FAIL_IF(push_inst(compiler, (EORI ^ inv_bits) | RD(TMP_REG2) | RN(src2) | imm));
src1 = TMP_REG1;
} else
FAIL_IF(push_inst(compiler, (SUB ^ inv_bits) | RD(TMP_REG2) | RN(TMP_ZERO) | RM(src2)));
FAIL_IF(push_inst(compiler, ((is_left ? LSRV : LSLV) ^ inv_bits) | RD(TMP_REG1) | RN(src1) | RM(TMP_REG2)));
return push_inst(compiler, (ORR ^ inv_bits) | RD(src_dst) | RN(src_dst) | RM(TMP_REG1));
}
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_src(struct sljit_compiler *compiler, sljit_s32 op,
sljit_s32 src, sljit_sw srcw)
{
@ -1550,10 +1704,9 @@ static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_f64_from_sw(struct sljit_comp
emit_op_mem(compiler, ((GET_OPCODE(op) == SLJIT_CONV_F64_FROM_S32) ? INT_SIZE : WORD_SIZE), TMP_REG1, src, srcw, TMP_REG1);
src = TMP_REG1;
} else if (src & SLJIT_IMM) {
#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
if (GET_OPCODE(op) == SLJIT_CONV_F64_FROM_S32)
srcw = (sljit_s32)srcw;
#endif
FAIL_IF(load_immediate(compiler, TMP_REG1, srcw));
src = TMP_REG1;
}
@ -1699,11 +1852,15 @@ static sljit_ins get_cc(struct sljit_compiler *compiler, sljit_s32 type)
{
switch (type) {
case SLJIT_EQUAL:
case SLJIT_EQUAL_F64:
case SLJIT_F_EQUAL:
case SLJIT_ORDERED_EQUAL:
case SLJIT_UNORDERED_OR_EQUAL: /* Not supported. */
return 0x1;
case SLJIT_NOT_EQUAL:
case SLJIT_NOT_EQUAL_F64:
case SLJIT_F_NOT_EQUAL:
case SLJIT_UNORDERED_OR_NOT_EQUAL:
case SLJIT_ORDERED_NOT_EQUAL: /* Not supported. */
return 0x0;
case SLJIT_CARRY:
@ -1712,7 +1869,6 @@ static sljit_ins get_cc(struct sljit_compiler *compiler, sljit_s32 type)
/* fallthrough */
case SLJIT_LESS:
case SLJIT_LESS_F64:
return 0x2;
case SLJIT_NOT_CARRY:
@ -1721,27 +1877,33 @@ static sljit_ins get_cc(struct sljit_compiler *compiler, sljit_s32 type)
/* fallthrough */
case SLJIT_GREATER_EQUAL:
case SLJIT_GREATER_EQUAL_F64:
return 0x3;
case SLJIT_GREATER:
case SLJIT_GREATER_F64:
case SLJIT_UNORDERED_OR_GREATER:
return 0x9;
case SLJIT_LESS_EQUAL:
case SLJIT_LESS_EQUAL_F64:
case SLJIT_F_LESS_EQUAL:
case SLJIT_ORDERED_LESS_EQUAL:
return 0x8;
case SLJIT_SIG_LESS:
case SLJIT_UNORDERED_OR_LESS:
return 0xa;
case SLJIT_SIG_GREATER_EQUAL:
case SLJIT_F_GREATER_EQUAL:
case SLJIT_ORDERED_GREATER_EQUAL:
return 0xb;
case SLJIT_SIG_GREATER:
case SLJIT_F_GREATER:
case SLJIT_ORDERED_GREATER:
return 0xd;
case SLJIT_SIG_LESS_EQUAL:
case SLJIT_UNORDERED_OR_LESS_EQUAL:
return 0xc;
case SLJIT_OVERFLOW:
@ -1749,7 +1911,7 @@ static sljit_ins get_cc(struct sljit_compiler *compiler, sljit_s32 type)
return 0x0;
/* fallthrough */
case SLJIT_UNORDERED_F64:
case SLJIT_UNORDERED:
return 0x7;
case SLJIT_NOT_OVERFLOW:
@ -1757,9 +1919,16 @@ static sljit_ins get_cc(struct sljit_compiler *compiler, sljit_s32 type)
return 0x1;
/* fallthrough */
case SLJIT_ORDERED_F64:
case SLJIT_ORDERED:
return 0x6;
case SLJIT_F_LESS:
case SLJIT_ORDERED_LESS:
return 0x5;
case SLJIT_UNORDERED_OR_GREATER_EQUAL:
return 0x4;
default:
SLJIT_UNREACHABLE();
return 0xe;
@ -1816,15 +1985,11 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_call(struct sljit_compile
CHECK_PTR(check_sljit_emit_call(compiler, type, arg_types));
if (type & SLJIT_CALL_RETURN) {
PTR_FAIL_IF(emit_stack_frame_release(compiler));
PTR_FAIL_IF(emit_stack_frame_release(compiler, 0));
type = SLJIT_JUMP | (type & SLJIT_REWRITABLE_JUMP);
}
#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \
|| (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
compiler->skip_checks = 1;
#endif
SLJIT_SKIP_CHECKS(compiler);
return sljit_emit_jump(compiler, type);
}
@ -1869,10 +2034,10 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_ijump(struct sljit_compiler *compi
CHECK_ERROR();
CHECK(check_sljit_emit_ijump(compiler, type, src, srcw));
ADJUST_LOCAL_OFFSET(src, srcw);
if (!(src & SLJIT_IMM)) {
if (src & SLJIT_MEM) {
ADJUST_LOCAL_OFFSET(src, srcw);
FAIL_IF(emit_op_mem(compiler, WORD_SIZE, TMP_REG1, src, srcw, TMP_REG1));
src = TMP_REG1;
}
@ -1897,28 +2062,24 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_icall(struct sljit_compiler *compi
SLJIT_UNUSED_ARG(arg_types);
CHECK_ERROR();
CHECK(check_sljit_emit_icall(compiler, type, arg_types, src, srcw));
ADJUST_LOCAL_OFFSET(src, srcw);
if (src & SLJIT_MEM) {
ADJUST_LOCAL_OFFSET(src, srcw);
FAIL_IF(emit_op_mem(compiler, WORD_SIZE, TMP_REG1, src, srcw, TMP_REG1));
src = TMP_REG1;
}
if (type & SLJIT_CALL_RETURN) {
if (src >= SLJIT_FIRST_SAVED_REG && src <= SLJIT_S0) {
if (src >= SLJIT_FIRST_SAVED_REG && src <= (SLJIT_S0 - SLJIT_KEPT_SAVEDS_COUNT(compiler->options))) {
FAIL_IF(push_inst(compiler, ORR | RD(TMP_REG1) | RN(TMP_ZERO) | RM(src)));
src = TMP_REG1;
}
FAIL_IF(emit_stack_frame_release(compiler));
FAIL_IF(emit_stack_frame_release(compiler, 0));
type = SLJIT_JUMP;
}
#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \
|| (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
compiler->skip_checks = 1;
#endif
SLJIT_SKIP_CHECKS(compiler);
return sljit_emit_ijump(compiler, type, src, srcw);
}
@ -1933,7 +2094,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_flags(struct sljit_compiler *co
CHECK(check_sljit_emit_op_flags(compiler, op, dst, dstw, type));
ADJUST_LOCAL_OFFSET(dst, dstw);
cc = get_cc(compiler, type & 0xff);
cc = get_cc(compiler, type);
dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1;
if (GET_OPCODE(op) < SLJIT_ADD) {
@ -1974,22 +2135,21 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_cmov(struct sljit_compiler *compil
sljit_s32 dst_reg,
sljit_s32 src, sljit_sw srcw)
{
sljit_ins inv_bits = (dst_reg & SLJIT_32) ? W_OP : 0;
sljit_ins inv_bits = (type & SLJIT_32) ? W_OP : 0;
sljit_ins cc;
CHECK_ERROR();
CHECK(check_sljit_emit_cmov(compiler, type, dst_reg, src, srcw));
if (SLJIT_UNLIKELY(src & SLJIT_IMM)) {
if (dst_reg & SLJIT_32)
if (type & SLJIT_32)
srcw = (sljit_s32)srcw;
FAIL_IF(load_immediate(compiler, TMP_REG1, srcw));
src = TMP_REG1;
srcw = 0;
}
cc = get_cc(compiler, type & 0xff);
dst_reg &= ~SLJIT_32;
cc = get_cc(compiler, type & ~SLJIT_32);
return push_inst(compiler, (CSEL ^ inv_bits) | (cc << 12) | RD(dst_reg) | RN(dst_reg) | RM(src));
}
@ -1998,11 +2158,82 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_mem(struct sljit_compiler *compile
sljit_s32 reg,
sljit_s32 mem, sljit_sw memw)
{
sljit_u32 sign = 0, inst;
sljit_u32 inst;
CHECK_ERROR();
CHECK(check_sljit_emit_mem(compiler, type, reg, mem, memw));
if (!(reg & REG_PAIR_MASK))
return sljit_emit_mem_unaligned(compiler, type, reg, mem, memw);
ADJUST_LOCAL_OFFSET(mem, memw);
if (!(mem & REG_MASK)) {
FAIL_IF(load_immediate(compiler, TMP_REG1, memw & ~0x1f8));
mem = SLJIT_MEM1(TMP_REG1);
memw &= 0x1f8;
} else if (mem & OFFS_REG_MASK) {
FAIL_IF(push_inst(compiler, ADD | RD(TMP_REG1) | RN(mem & REG_MASK) | RM(OFFS_REG(mem)) | ((sljit_ins)(memw & 0x3) << 10)));
mem = SLJIT_MEM1(TMP_REG1);
memw = 0;
} else if ((memw & 0x7) != 0 || memw > 0x1f8 || memw < -0x200) {
inst = ADDI;
if (memw < 0) {
/* Remains negative for integer min. */
memw = -memw;
inst = SUBI;
} else if ((memw & 0x7) == 0 && memw <= 0x7ff0) {
if (!(type & SLJIT_MEM_STORE) && (mem & REG_MASK) == REG_PAIR_FIRST(reg)) {
FAIL_IF(push_inst(compiler, LDRI | RD(REG_PAIR_SECOND(reg)) | RN(mem & REG_MASK) | ((sljit_ins)memw << 7)));
return push_inst(compiler, LDRI | RD(REG_PAIR_FIRST(reg)) | RN(mem & REG_MASK) | ((sljit_ins)(memw + 0x8) << 7));
}
inst = (type & SLJIT_MEM_STORE) ? STRI : LDRI;
FAIL_IF(push_inst(compiler, inst | RD(REG_PAIR_FIRST(reg)) | RN(mem & REG_MASK) | ((sljit_ins)memw << 7)));
return push_inst(compiler, inst | RD(REG_PAIR_SECOND(reg)) | RN(mem & REG_MASK) | ((sljit_ins)(memw + 0x8) << 7));
}
if ((sljit_uw)memw <= 0xfff) {
FAIL_IF(push_inst(compiler, inst | RD(TMP_REG1) | RN(mem & REG_MASK) | ((sljit_ins)memw << 10)));
memw = 0;
} else if ((sljit_uw)memw <= 0xffffff) {
FAIL_IF(push_inst(compiler, inst | (1 << 22) | RD(TMP_REG1) | RN(mem & REG_MASK) | (((sljit_ins)memw >> 12) << 10)));
if ((memw & 0xe07) != 0) {
FAIL_IF(push_inst(compiler, inst | RD(TMP_REG1) | RN(TMP_REG1) | (((sljit_ins)memw & 0xfff) << 10)));
memw = 0;
} else {
memw &= 0xfff;
}
} else {
FAIL_IF(load_immediate(compiler, TMP_REG1, memw));
FAIL_IF(push_inst(compiler, (inst == ADDI ? ADD : SUB) | RD(TMP_REG1) | RN(mem & REG_MASK) | RM(TMP_REG1)));
memw = 0;
}
mem = SLJIT_MEM1(TMP_REG1);
if (inst == SUBI)
memw = -memw;
}
SLJIT_ASSERT((memw & 0x7) == 0 && memw <= 0x1f8 && memw >= -0x200);
return push_inst(compiler, ((type & SLJIT_MEM_STORE) ? STP : LDP) | RT(REG_PAIR_FIRST(reg)) | RT2(REG_PAIR_SECOND(reg)) | RN(mem & REG_MASK) | (sljit_ins)((memw & 0x3f8) << 12));
}
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_mem_update(struct sljit_compiler *compiler, sljit_s32 type,
sljit_s32 reg,
sljit_s32 mem, sljit_sw memw)
{
sljit_u32 sign = 0, inst;
CHECK_ERROR();
CHECK(check_sljit_emit_mem_update(compiler, type, reg, mem, memw));
if ((mem & OFFS_REG_MASK) || (memw > 255 || memw < -256))
return SLJIT_ERR_UNSUPPORTED;
@ -2042,20 +2273,20 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_mem(struct sljit_compiler *compile
if (!(type & SLJIT_MEM_STORE))
inst |= sign ? 0x00800000 : 0x00400000;
if (type & SLJIT_MEM_PRE)
if (!(type & SLJIT_MEM_POST))
inst |= 0x800;
return push_inst(compiler, inst | RT(reg) | RN(mem & REG_MASK) | (sljit_ins)((memw & 0x1ff) << 12));
}
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fmem(struct sljit_compiler *compiler, sljit_s32 type,
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fmem_update(struct sljit_compiler *compiler, sljit_s32 type,
sljit_s32 freg,
sljit_s32 mem, sljit_sw memw)
{
sljit_u32 inst;
CHECK_ERROR();
CHECK(check_sljit_emit_fmem(compiler, type, freg, mem, memw));
CHECK(check_sljit_emit_fmem_update(compiler, type, freg, mem, memw));
if ((mem & OFFS_REG_MASK) || (memw > 255 || memw < -256))
return SLJIT_ERR_UNSUPPORTED;
@ -2071,7 +2302,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fmem(struct sljit_compiler *compil
if (!(type & SLJIT_MEM_STORE))
inst |= 0x00400000;
if (type & SLJIT_MEM_PRE)
if (!(type & SLJIT_MEM_POST))
inst |= 0x800;
return push_inst(compiler, inst | VT(freg) | RN(mem & REG_MASK) | (sljit_ins)((memw & 0x1ff) << 12));

File diff suppressed because it is too large Load Diff

View File

@ -38,383 +38,6 @@ static sljit_s32 load_immediate(struct sljit_compiler *compiler, sljit_s32 dst_a
return (imm & 0xffff) ? push_inst(compiler, ORI | SA(dst_ar) | TA(dst_ar) | IMM(imm), dst_ar) : SLJIT_SUCCESS;
}
#define EMIT_LOGICAL(op_imm, op_norm) \
if (flags & SRC2_IMM) { \
if (op & SLJIT_SET_Z) \
FAIL_IF(push_inst(compiler, op_imm | S(src1) | TA(EQUAL_FLAG) | IMM(src2), EQUAL_FLAG)); \
if (!(flags & UNUSED_DEST)) \
FAIL_IF(push_inst(compiler, op_imm | S(src1) | T(dst) | IMM(src2), DR(dst))); \
} \
else { \
if (op & SLJIT_SET_Z) \
FAIL_IF(push_inst(compiler, op_norm | S(src1) | T(src2) | DA(EQUAL_FLAG), EQUAL_FLAG)); \
if (!(flags & UNUSED_DEST)) \
FAIL_IF(push_inst(compiler, op_norm | S(src1) | T(src2) | D(dst), DR(dst))); \
}
#define EMIT_SHIFT(op_imm, op_v) \
if (flags & SRC2_IMM) { \
if (op & SLJIT_SET_Z) \
FAIL_IF(push_inst(compiler, op_imm | T(src1) | DA(EQUAL_FLAG) | SH_IMM(src2), EQUAL_FLAG)); \
if (!(flags & UNUSED_DEST)) \
FAIL_IF(push_inst(compiler, op_imm | T(src1) | D(dst) | SH_IMM(src2), DR(dst))); \
} \
else { \
if (op & SLJIT_SET_Z) \
FAIL_IF(push_inst(compiler, op_v | S(src2) | T(src1) | DA(EQUAL_FLAG), EQUAL_FLAG)); \
if (!(flags & UNUSED_DEST)) \
FAIL_IF(push_inst(compiler, op_v | S(src2) | T(src1) | D(dst), DR(dst))); \
}
static SLJIT_INLINE sljit_s32 emit_single_op(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 flags,
sljit_s32 dst, sljit_s32 src1, sljit_sw src2)
{
sljit_s32 is_overflow, is_carry, is_handled;
switch (GET_OPCODE(op)) {
case SLJIT_MOV:
SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM));
if (dst != src2)
return push_inst(compiler, ADDU | S(src2) | TA(0) | D(dst), DR(dst));
return SLJIT_SUCCESS;
case SLJIT_MOV_U8:
SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM));
if ((flags & (REG_DEST | REG2_SOURCE)) == (REG_DEST | REG2_SOURCE))
return push_inst(compiler, ANDI | S(src2) | T(dst) | IMM(0xff), DR(dst));
SLJIT_ASSERT(dst == src2);
return SLJIT_SUCCESS;
case SLJIT_MOV_S8:
SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM));
if ((flags & (REG_DEST | REG2_SOURCE)) == (REG_DEST | REG2_SOURCE)) {
#if (defined SLJIT_MIPS_REV && SLJIT_MIPS_REV >= 1)
return push_inst(compiler, SEB | T(src2) | D(dst), DR(dst));
#else /* SLJIT_MIPS_REV < 1 */
FAIL_IF(push_inst(compiler, SLL | T(src2) | D(dst) | SH_IMM(24), DR(dst)));
return push_inst(compiler, SRA | T(dst) | D(dst) | SH_IMM(24), DR(dst));
#endif /* SLJIT_MIPS_REV >= 1 */
}
SLJIT_ASSERT(dst == src2);
return SLJIT_SUCCESS;
case SLJIT_MOV_U16:
SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM));
if ((flags & (REG_DEST | REG2_SOURCE)) == (REG_DEST | REG2_SOURCE))
return push_inst(compiler, ANDI | S(src2) | T(dst) | IMM(0xffff), DR(dst));
SLJIT_ASSERT(dst == src2);
return SLJIT_SUCCESS;
case SLJIT_MOV_S16:
SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM));
if ((flags & (REG_DEST | REG2_SOURCE)) == (REG_DEST | REG2_SOURCE)) {
#if (defined SLJIT_MIPS_REV && SLJIT_MIPS_REV >= 1)
return push_inst(compiler, SEH | T(src2) | D(dst), DR(dst));
#else /* SLJIT_MIPS_REV < 1 */
FAIL_IF(push_inst(compiler, SLL | T(src2) | D(dst) | SH_IMM(16), DR(dst)));
return push_inst(compiler, SRA | T(dst) | D(dst) | SH_IMM(16), DR(dst));
#endif /* SLJIT_MIPS_REV >= 1 */
}
SLJIT_ASSERT(dst == src2);
return SLJIT_SUCCESS;
case SLJIT_NOT:
SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM));
if (op & SLJIT_SET_Z)
FAIL_IF(push_inst(compiler, NOR | S(src2) | T(src2) | DA(EQUAL_FLAG), EQUAL_FLAG));
if (!(flags & UNUSED_DEST))
FAIL_IF(push_inst(compiler, NOR | S(src2) | T(src2) | D(dst), DR(dst)));
return SLJIT_SUCCESS;
case SLJIT_CLZ:
SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM));
#if (defined SLJIT_MIPS_REV && SLJIT_MIPS_REV >= 1)
if (op & SLJIT_SET_Z)
FAIL_IF(push_inst(compiler, CLZ | S(src2) | TA(EQUAL_FLAG) | DA(EQUAL_FLAG), EQUAL_FLAG));
if (!(flags & UNUSED_DEST))
FAIL_IF(push_inst(compiler, CLZ | S(src2) | T(dst) | D(dst), DR(dst)));
#else /* SLJIT_MIPS_REV < 1 */
if (SLJIT_UNLIKELY(flags & UNUSED_DEST)) {
FAIL_IF(push_inst(compiler, SRL | T(src2) | DA(EQUAL_FLAG) | SH_IMM(31), EQUAL_FLAG));
return push_inst(compiler, XORI | SA(EQUAL_FLAG) | TA(EQUAL_FLAG) | IMM(1), EQUAL_FLAG);
}
/* Nearly all instructions are unmovable in the following sequence. */
FAIL_IF(push_inst(compiler, ADDU | S(src2) | TA(0) | D(TMP_REG1), DR(TMP_REG1)));
/* Check zero. */
FAIL_IF(push_inst(compiler, BEQ | S(TMP_REG1) | TA(0) | IMM(5), UNMOVABLE_INS));
FAIL_IF(push_inst(compiler, ORI | SA(0) | T(dst) | IMM(32), UNMOVABLE_INS));
FAIL_IF(push_inst(compiler, ADDIU | SA(0) | T(dst) | IMM(-1), DR(dst)));
/* Loop for searching the highest bit. */
FAIL_IF(push_inst(compiler, ADDIU | S(dst) | T(dst) | IMM(1), DR(dst)));
FAIL_IF(push_inst(compiler, BGEZ | S(TMP_REG1) | IMM(-2), UNMOVABLE_INS));
FAIL_IF(push_inst(compiler, SLL | T(TMP_REG1) | D(TMP_REG1) | SH_IMM(1), UNMOVABLE_INS));
#endif /* SLJIT_MIPS_REV >= 1 */
return SLJIT_SUCCESS;
case SLJIT_ADD:
is_overflow = GET_FLAG_TYPE(op) == SLJIT_OVERFLOW;
is_carry = GET_FLAG_TYPE(op) == GET_FLAG_TYPE(SLJIT_SET_CARRY);
if (flags & SRC2_IMM) {
if (is_overflow) {
if (src2 >= 0)
FAIL_IF(push_inst(compiler, OR | S(src1) | T(src1) | DA(EQUAL_FLAG), EQUAL_FLAG));
else
FAIL_IF(push_inst(compiler, NOR | S(src1) | T(src1) | DA(EQUAL_FLAG), EQUAL_FLAG));
}
else if (op & SLJIT_SET_Z)
FAIL_IF(push_inst(compiler, ADDIU | S(src1) | TA(EQUAL_FLAG) | IMM(src2), EQUAL_FLAG));
if (is_overflow || is_carry) {
if (src2 >= 0)
FAIL_IF(push_inst(compiler, ORI | S(src1) | TA(OTHER_FLAG) | IMM(src2), OTHER_FLAG));
else {
FAIL_IF(push_inst(compiler, ADDIU | SA(0) | TA(OTHER_FLAG) | IMM(src2), OTHER_FLAG));
FAIL_IF(push_inst(compiler, OR | S(src1) | TA(OTHER_FLAG) | DA(OTHER_FLAG), OTHER_FLAG));
}
}
/* dst may be the same as src1 or src2. */
if (!(flags & UNUSED_DEST) || (op & VARIABLE_FLAG_MASK))
FAIL_IF(push_inst(compiler, ADDIU | S(src1) | T(dst) | IMM(src2), DR(dst)));
}
else {
if (is_overflow)
FAIL_IF(push_inst(compiler, XOR | S(src1) | T(src2) | DA(EQUAL_FLAG), EQUAL_FLAG));
else if (op & SLJIT_SET_Z)
FAIL_IF(push_inst(compiler, ADDU | S(src1) | T(src2) | DA(EQUAL_FLAG), EQUAL_FLAG));
if (is_overflow || is_carry)
FAIL_IF(push_inst(compiler, OR | S(src1) | T(src2) | DA(OTHER_FLAG), OTHER_FLAG));
/* dst may be the same as src1 or src2. */
if (!(flags & UNUSED_DEST) || (op & VARIABLE_FLAG_MASK))
FAIL_IF(push_inst(compiler, ADDU | S(src1) | T(src2) | D(dst), DR(dst)));
}
/* a + b >= a | b (otherwise, the carry should be set to 1). */
if (is_overflow || is_carry)
FAIL_IF(push_inst(compiler, SLTU | S(dst) | TA(OTHER_FLAG) | DA(OTHER_FLAG), OTHER_FLAG));
if (!is_overflow)
return SLJIT_SUCCESS;
FAIL_IF(push_inst(compiler, SLL | TA(OTHER_FLAG) | D(TMP_REG1) | SH_IMM(31), DR(TMP_REG1)));
FAIL_IF(push_inst(compiler, XOR | S(TMP_REG1) | TA(EQUAL_FLAG) | DA(EQUAL_FLAG), EQUAL_FLAG));
FAIL_IF(push_inst(compiler, XOR | S(dst) | TA(EQUAL_FLAG) | DA(OTHER_FLAG), OTHER_FLAG));
if (op & SLJIT_SET_Z)
FAIL_IF(push_inst(compiler, ADDU | S(dst) | TA(0) | DA(EQUAL_FLAG), EQUAL_FLAG));
return push_inst(compiler, SRL | TA(OTHER_FLAG) | DA(OTHER_FLAG) | SH_IMM(31), OTHER_FLAG);
case SLJIT_ADDC:
is_carry = GET_FLAG_TYPE(op) == GET_FLAG_TYPE(SLJIT_SET_CARRY);
if (flags & SRC2_IMM) {
if (is_carry) {
if (src2 >= 0)
FAIL_IF(push_inst(compiler, ORI | S(src1) | TA(EQUAL_FLAG) | IMM(src2), EQUAL_FLAG));
else {
FAIL_IF(push_inst(compiler, ADDIU | SA(0) | TA(EQUAL_FLAG) | IMM(src2), EQUAL_FLAG));
FAIL_IF(push_inst(compiler, OR | S(src1) | TA(EQUAL_FLAG) | DA(EQUAL_FLAG), EQUAL_FLAG));
}
}
FAIL_IF(push_inst(compiler, ADDIU | S(src1) | T(dst) | IMM(src2), DR(dst)));
} else {
if (is_carry)
FAIL_IF(push_inst(compiler, OR | S(src1) | T(src2) | DA(EQUAL_FLAG), EQUAL_FLAG));
/* dst may be the same as src1 or src2. */
FAIL_IF(push_inst(compiler, ADDU | S(src1) | T(src2) | D(dst), DR(dst)));
}
if (is_carry)
FAIL_IF(push_inst(compiler, SLTU | S(dst) | TA(EQUAL_FLAG) | DA(EQUAL_FLAG), EQUAL_FLAG));
FAIL_IF(push_inst(compiler, ADDU | S(dst) | TA(OTHER_FLAG) | D(dst), DR(dst)));
if (!is_carry)
return SLJIT_SUCCESS;
/* Set ULESS_FLAG (dst == 0) && (OTHER_FLAG == 1). */
FAIL_IF(push_inst(compiler, SLTU | S(dst) | TA(OTHER_FLAG) | DA(OTHER_FLAG), OTHER_FLAG));
/* Set carry flag. */
return push_inst(compiler, OR | SA(OTHER_FLAG) | TA(EQUAL_FLAG) | DA(OTHER_FLAG), OTHER_FLAG);
case SLJIT_SUB:
if ((flags & SRC2_IMM) && src2 == SIMM_MIN) {
FAIL_IF(push_inst(compiler, ADDIU | SA(0) | T(TMP_REG2) | IMM(src2), DR(TMP_REG2)));
src2 = TMP_REG2;
flags &= ~SRC2_IMM;
}
is_handled = 0;
if (flags & SRC2_IMM) {
if (GET_FLAG_TYPE(op) == SLJIT_LESS || GET_FLAG_TYPE(op) == SLJIT_GREATER_EQUAL) {
FAIL_IF(push_inst(compiler, SLTIU | S(src1) | TA(OTHER_FLAG) | IMM(src2), OTHER_FLAG));
is_handled = 1;
}
else if (GET_FLAG_TYPE(op) == SLJIT_SIG_LESS || GET_FLAG_TYPE(op) == SLJIT_SIG_GREATER_EQUAL) {
FAIL_IF(push_inst(compiler, SLTI | S(src1) | TA(OTHER_FLAG) | IMM(src2), OTHER_FLAG));
is_handled = 1;
}
}
if (!is_handled && GET_FLAG_TYPE(op) >= SLJIT_LESS && GET_FLAG_TYPE(op) <= SLJIT_SIG_LESS_EQUAL) {
is_handled = 1;
if (flags & SRC2_IMM) {
FAIL_IF(push_inst(compiler, ADDIU | SA(0) | T(TMP_REG2) | IMM(src2), DR(TMP_REG2)));
src2 = TMP_REG2;
flags &= ~SRC2_IMM;
}
if (GET_FLAG_TYPE(op) == SLJIT_LESS || GET_FLAG_TYPE(op) == SLJIT_GREATER_EQUAL) {
FAIL_IF(push_inst(compiler, SLTU | S(src1) | T(src2) | DA(OTHER_FLAG), OTHER_FLAG));
}
else if (GET_FLAG_TYPE(op) == SLJIT_GREATER || GET_FLAG_TYPE(op) == SLJIT_LESS_EQUAL)
{
FAIL_IF(push_inst(compiler, SLTU | S(src2) | T(src1) | DA(OTHER_FLAG), OTHER_FLAG));
}
else if (GET_FLAG_TYPE(op) == SLJIT_SIG_LESS || GET_FLAG_TYPE(op) == SLJIT_SIG_GREATER_EQUAL) {
FAIL_IF(push_inst(compiler, SLT | S(src1) | T(src2) | DA(OTHER_FLAG), OTHER_FLAG));
}
else if (GET_FLAG_TYPE(op) == SLJIT_SIG_GREATER || GET_FLAG_TYPE(op) == SLJIT_SIG_LESS_EQUAL)
{
FAIL_IF(push_inst(compiler, SLT | S(src2) | T(src1) | DA(OTHER_FLAG), OTHER_FLAG));
}
}
if (is_handled) {
if (flags & SRC2_IMM) {
if (op & SLJIT_SET_Z)
FAIL_IF(push_inst(compiler, ADDIU | S(src1) | TA(EQUAL_FLAG) | IMM(-src2), EQUAL_FLAG));
if (!(flags & UNUSED_DEST))
return push_inst(compiler, ADDIU | S(src1) | T(dst) | IMM(-src2), DR(dst));
}
else {
if (op & SLJIT_SET_Z)
FAIL_IF(push_inst(compiler, SUBU | S(src1) | T(src2) | DA(EQUAL_FLAG), EQUAL_FLAG));
if (!(flags & UNUSED_DEST))
return push_inst(compiler, SUBU | S(src1) | T(src2) | D(dst), DR(dst));
}
return SLJIT_SUCCESS;
}
is_overflow = GET_FLAG_TYPE(op) == SLJIT_OVERFLOW;
is_carry = GET_FLAG_TYPE(op) == GET_FLAG_TYPE(SLJIT_SET_CARRY);
if (flags & SRC2_IMM) {
if (is_overflow) {
if (src2 >= 0)
FAIL_IF(push_inst(compiler, OR | S(src1) | T(src1) | DA(EQUAL_FLAG), EQUAL_FLAG));
else
FAIL_IF(push_inst(compiler, NOR | S(src1) | T(src1) | DA(EQUAL_FLAG), EQUAL_FLAG));
}
else if (op & SLJIT_SET_Z)
FAIL_IF(push_inst(compiler, ADDIU | S(src1) | TA(EQUAL_FLAG) | IMM(-src2), EQUAL_FLAG));
if (is_overflow || is_carry)
FAIL_IF(push_inst(compiler, SLTIU | S(src1) | TA(OTHER_FLAG) | IMM(src2), OTHER_FLAG));
/* dst may be the same as src1 or src2. */
if (!(flags & UNUSED_DEST) || (op & VARIABLE_FLAG_MASK))
FAIL_IF(push_inst(compiler, ADDIU | S(src1) | T(dst) | IMM(-src2), DR(dst)));
}
else {
if (is_overflow)
FAIL_IF(push_inst(compiler, XOR | S(src1) | T(src2) | DA(EQUAL_FLAG), EQUAL_FLAG));
else if (op & SLJIT_SET_Z)
FAIL_IF(push_inst(compiler, SUBU | S(src1) | T(src2) | DA(EQUAL_FLAG), EQUAL_FLAG));
if (is_overflow || is_carry)
FAIL_IF(push_inst(compiler, SLTU | S(src1) | T(src2) | DA(OTHER_FLAG), OTHER_FLAG));
/* dst may be the same as src1 or src2. */
if (!(flags & UNUSED_DEST) || (op & VARIABLE_FLAG_MASK))
FAIL_IF(push_inst(compiler, SUBU | S(src1) | T(src2) | D(dst), DR(dst)));
}
if (!is_overflow)
return SLJIT_SUCCESS;
FAIL_IF(push_inst(compiler, SLL | TA(OTHER_FLAG) | D(TMP_REG1) | SH_IMM(31), DR(TMP_REG1)));
FAIL_IF(push_inst(compiler, XOR | S(TMP_REG1) | TA(EQUAL_FLAG) | DA(EQUAL_FLAG), EQUAL_FLAG));
FAIL_IF(push_inst(compiler, XOR | S(dst) | TA(EQUAL_FLAG) | DA(OTHER_FLAG), OTHER_FLAG));
if (op & SLJIT_SET_Z)
FAIL_IF(push_inst(compiler, ADDU | S(dst) | TA(0) | DA(EQUAL_FLAG), EQUAL_FLAG));
return push_inst(compiler, SRL | TA(OTHER_FLAG) | DA(OTHER_FLAG) | SH_IMM(31), OTHER_FLAG);
case SLJIT_SUBC:
if ((flags & SRC2_IMM) && src2 == SIMM_MIN) {
FAIL_IF(push_inst(compiler, ADDIU | SA(0) | T(TMP_REG2) | IMM(src2), DR(TMP_REG2)));
src2 = TMP_REG2;
flags &= ~SRC2_IMM;
}
is_carry = GET_FLAG_TYPE(op) == GET_FLAG_TYPE(SLJIT_SET_CARRY);
if (flags & SRC2_IMM) {
if (is_carry)
FAIL_IF(push_inst(compiler, SLTIU | S(src1) | TA(EQUAL_FLAG) | IMM(src2), EQUAL_FLAG));
/* dst may be the same as src1 or src2. */
FAIL_IF(push_inst(compiler, ADDIU | S(src1) | T(dst) | IMM(-src2), DR(dst)));
}
else {
if (is_carry)
FAIL_IF(push_inst(compiler, SLTU | S(src1) | T(src2) | DA(EQUAL_FLAG), EQUAL_FLAG));
/* dst may be the same as src1 or src2. */
FAIL_IF(push_inst(compiler, SUBU | S(src1) | T(src2) | D(dst), DR(dst)));
}
if (is_carry)
FAIL_IF(push_inst(compiler, SLTU | S(dst) | TA(OTHER_FLAG) | D(TMP_REG1), DR(TMP_REG1)));
FAIL_IF(push_inst(compiler, SUBU | S(dst) | TA(OTHER_FLAG) | D(dst), DR(dst)));
return (is_carry) ? push_inst(compiler, OR | SA(EQUAL_FLAG) | T(TMP_REG1) | DA(OTHER_FLAG), OTHER_FLAG) : SLJIT_SUCCESS;
case SLJIT_MUL:
SLJIT_ASSERT(!(flags & SRC2_IMM));
if (GET_FLAG_TYPE(op) != SLJIT_OVERFLOW) {
#if (defined SLJIT_MIPS_REV && SLJIT_MIPS_REV >= 1)
return push_inst(compiler, MUL | S(src1) | T(src2) | D(dst), DR(dst));
#else /* SLJIT_MIPS_REV < 1 */
FAIL_IF(push_inst(compiler, MULT | S(src1) | T(src2), MOVABLE_INS));
return push_inst(compiler, MFLO | D(dst), DR(dst));
#endif /* SLJIT_MIPS_REV >= 1 */
}
#if (defined SLJIT_MIPS_REV && SLJIT_MIPS_REV >= 6)
FAIL_IF(push_inst(compiler, MUL | S(src1) | T(src2) | D(dst), DR(dst)));
FAIL_IF(push_inst(compiler, MUH | S(src1) | T(src2) | DA(EQUAL_FLAG), EQUAL_FLAG));
#else /* SLJIT_MIPS_REV < 6 */
FAIL_IF(push_inst(compiler, MULT | S(src1) | T(src2), MOVABLE_INS));
FAIL_IF(push_inst(compiler, MFHI | DA(EQUAL_FLAG), EQUAL_FLAG));
FAIL_IF(push_inst(compiler, MFLO | D(dst), DR(dst)));
#endif /* SLJIT_MIPS_REV >= 6 */
FAIL_IF(push_inst(compiler, SRA | T(dst) | DA(OTHER_FLAG) | SH_IMM(31), OTHER_FLAG));
return push_inst(compiler, SUBU | SA(EQUAL_FLAG) | TA(OTHER_FLAG) | DA(OTHER_FLAG), OTHER_FLAG);
case SLJIT_AND:
EMIT_LOGICAL(ANDI, AND);
return SLJIT_SUCCESS;
case SLJIT_OR:
EMIT_LOGICAL(ORI, OR);
return SLJIT_SUCCESS;
case SLJIT_XOR:
EMIT_LOGICAL(XORI, XOR);
return SLJIT_SUCCESS;
case SLJIT_SHL:
EMIT_SHIFT(SLL, SLLV);
return SLJIT_SUCCESS;
case SLJIT_LSHR:
EMIT_SHIFT(SRL, SRLV);
return SLJIT_SUCCESS;
case SLJIT_ASHR:
EMIT_SHIFT(SRA, SRAV);
return SLJIT_SUCCESS;
}
SLJIT_UNREACHABLE();
return SLJIT_SUCCESS;
}
static SLJIT_INLINE sljit_s32 emit_const(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw init_value)
{
FAIL_IF(push_inst(compiler, LUI | T(dst) | IMM(init_value >> 16), DR(dst)));
@ -573,8 +196,8 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_call(struct sljit_compile
sljit_s32 arg_types)
{
struct sljit_jump *jump;
sljit_u32 extra_space = (sljit_u32)type;
sljit_ins ins;
sljit_u32 extra_space = 0;
sljit_ins ins = NOP;
CHECK_ERROR_PTR();
CHECK_PTR(check_sljit_emit_call(compiler, type, arg_types));
@ -583,14 +206,23 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_call(struct sljit_compile
PTR_FAIL_IF(!jump);
set_jump(jump, compiler, type & SLJIT_REWRITABLE_JUMP);
PTR_FAIL_IF(call_with_args(compiler, arg_types, &ins, &extra_space));
if ((type & 0xff) != SLJIT_CALL_REG_ARG) {
extra_space = (sljit_u32)type;
PTR_FAIL_IF(call_with_args(compiler, arg_types, &ins, &extra_space));
} else if (type & SLJIT_CALL_RETURN)
PTR_FAIL_IF(emit_stack_frame_release(compiler, 0, &ins));
SLJIT_ASSERT(DR(PIC_ADDR_REG) == 25 && PIC_ADDR_REG == TMP_REG2);
PTR_FAIL_IF(emit_const(compiler, PIC_ADDR_REG, 0));
if (ins == NOP && compiler->delay_slot != UNMOVABLE_INS)
jump->flags |= IS_MOVABLE;
if (!(type & SLJIT_CALL_RETURN) || extra_space > 0) {
jump->flags |= IS_JAL | IS_CALL;
jump->flags |= IS_JAL;
if ((type & 0xff) != SLJIT_CALL_REG_ARG)
jump->flags |= IS_CALL;
PTR_FAIL_IF(push_inst(compiler, JALR | S(PIC_ADDR_REG) | DA(RETURN_ADDR_REG), UNMOVABLE_INS));
} else
PTR_FAIL_IF(push_inst(compiler, JR | S(PIC_ADDR_REG), UNMOVABLE_INS));
@ -598,6 +230,9 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_call(struct sljit_compile
jump->addr = compiler->size;
PTR_FAIL_IF(push_inst(compiler, ins, UNMOVABLE_INS));
/* Maximum number of instructions required for generating a constant. */
compiler->size += 2;
if (extra_space == 0)
return jump;
@ -623,16 +258,37 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_icall(struct sljit_compiler *compi
CHECK_ERROR();
CHECK(check_sljit_emit_icall(compiler, type, arg_types, src, srcw));
if (src & SLJIT_MEM) {
ADJUST_LOCAL_OFFSET(src, srcw);
FAIL_IF(emit_op_mem(compiler, WORD_DATA | LOAD_DATA, DR(PIC_ADDR_REG), src, srcw));
src = PIC_ADDR_REG;
srcw = 0;
}
if ((type & 0xff) == SLJIT_CALL_REG_ARG) {
if (type & SLJIT_CALL_RETURN) {
if (src >= SLJIT_FIRST_SAVED_REG && src <= (SLJIT_S0 - SLJIT_KEPT_SAVEDS_COUNT(compiler->options))) {
FAIL_IF(push_inst(compiler, ADDU | S(src) | TA(0) | D(PIC_ADDR_REG), DR(PIC_ADDR_REG)));
src = PIC_ADDR_REG;
srcw = 0;
}
FAIL_IF(emit_stack_frame_release(compiler, 0, &ins));
if (ins != NOP)
FAIL_IF(push_inst(compiler, ins, MOVABLE_INS));
}
SLJIT_SKIP_CHECKS(compiler);
return sljit_emit_ijump(compiler, type, src, srcw);
}
SLJIT_ASSERT(DR(PIC_ADDR_REG) == 25 && PIC_ADDR_REG == TMP_REG2);
if (src & SLJIT_IMM)
FAIL_IF(load_immediate(compiler, DR(PIC_ADDR_REG), srcw));
else if (FAST_IS_REG(src))
else if (src != PIC_ADDR_REG)
FAIL_IF(push_inst(compiler, ADDU | S(src) | TA(0) | D(PIC_ADDR_REG), DR(PIC_ADDR_REG)));
else if (src & SLJIT_MEM) {
ADJUST_LOCAL_OFFSET(src, srcw);
FAIL_IF(emit_op_mem(compiler, WORD_DATA | LOAD_DATA, DR(PIC_ADDR_REG), src, srcw));
}
FAIL_IF(call_with_args(compiler, arg_types, &ins, &extra_space));

View File

@ -118,421 +118,6 @@ static sljit_s32 load_immediate(struct sljit_compiler *compiler, sljit_s32 dst_a
return !(imm & 0xffff) ? SLJIT_SUCCESS : push_inst(compiler, ORI | SA(dst_ar) | TA(dst_ar) | IMM(imm), dst_ar);
}
#define SELECT_OP(a, b) \
(!(op & SLJIT_32) ? a : b)
#define EMIT_LOGICAL(op_imm, op_norm) \
if (flags & SRC2_IMM) { \
if (op & SLJIT_SET_Z) \
FAIL_IF(push_inst(compiler, op_imm | S(src1) | TA(EQUAL_FLAG) | IMM(src2), EQUAL_FLAG)); \
if (!(flags & UNUSED_DEST)) \
FAIL_IF(push_inst(compiler, op_imm | S(src1) | T(dst) | IMM(src2), DR(dst))); \
} \
else { \
if (op & SLJIT_SET_Z) \
FAIL_IF(push_inst(compiler, op_norm | S(src1) | T(src2) | DA(EQUAL_FLAG), EQUAL_FLAG)); \
if (!(flags & UNUSED_DEST)) \
FAIL_IF(push_inst(compiler, op_norm | S(src1) | T(src2) | D(dst), DR(dst))); \
}
#define EMIT_SHIFT(op_dimm, op_dimm32, op_imm, op_dv, op_v) \
if (flags & SRC2_IMM) { \
if (src2 >= 32) { \
SLJIT_ASSERT(!(op & SLJIT_32)); \
ins = op_dimm32; \
src2 -= 32; \
} \
else \
ins = (op & SLJIT_32) ? op_imm : op_dimm; \
if (op & SLJIT_SET_Z) \
FAIL_IF(push_inst(compiler, ins | T(src1) | DA(EQUAL_FLAG) | SH_IMM(src2), EQUAL_FLAG)); \
if (!(flags & UNUSED_DEST)) \
FAIL_IF(push_inst(compiler, ins | T(src1) | D(dst) | SH_IMM(src2), DR(dst))); \
} \
else { \
ins = (op & SLJIT_32) ? op_v : op_dv; \
if (op & SLJIT_SET_Z) \
FAIL_IF(push_inst(compiler, ins | S(src2) | T(src1) | DA(EQUAL_FLAG), EQUAL_FLAG)); \
if (!(flags & UNUSED_DEST)) \
FAIL_IF(push_inst(compiler, ins | S(src2) | T(src1) | D(dst), DR(dst))); \
}
static SLJIT_INLINE sljit_s32 emit_single_op(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 flags,
sljit_s32 dst, sljit_s32 src1, sljit_sw src2)
{
sljit_ins ins;
sljit_s32 is_overflow, is_carry, is_handled;
switch (GET_OPCODE(op)) {
case SLJIT_MOV:
SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM));
if (dst != src2)
return push_inst(compiler, SELECT_OP(DADDU, ADDU) | S(src2) | TA(0) | D(dst), DR(dst));
return SLJIT_SUCCESS;
case SLJIT_MOV_U8:
SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM));
if ((flags & (REG_DEST | REG2_SOURCE)) == (REG_DEST | REG2_SOURCE))
return push_inst(compiler, ANDI | S(src2) | T(dst) | IMM(0xff), DR(dst));
SLJIT_ASSERT(dst == src2);
return SLJIT_SUCCESS;
case SLJIT_MOV_S8:
SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM));
if ((flags & (REG_DEST | REG2_SOURCE)) == (REG_DEST | REG2_SOURCE)) {
#if (defined SLJIT_MIPS_REV && SLJIT_MIPS_REV >= 1)
if (op & SLJIT_32)
return push_inst(compiler, SEB | T(src2) | D(dst), DR(dst));
#endif /* SLJIT_MIPS_REV >= 1 */
FAIL_IF(push_inst(compiler, DSLL32 | T(src2) | D(dst) | SH_IMM(24), DR(dst)));
return push_inst(compiler, DSRA32 | T(dst) | D(dst) | SH_IMM(24), DR(dst));
}
SLJIT_ASSERT(dst == src2);
return SLJIT_SUCCESS;
case SLJIT_MOV_U16:
SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM));
if ((flags & (REG_DEST | REG2_SOURCE)) == (REG_DEST | REG2_SOURCE))
return push_inst(compiler, ANDI | S(src2) | T(dst) | IMM(0xffff), DR(dst));
SLJIT_ASSERT(dst == src2);
return SLJIT_SUCCESS;
case SLJIT_MOV_S16:
SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM));
if ((flags & (REG_DEST | REG2_SOURCE)) == (REG_DEST | REG2_SOURCE)) {
#if (defined SLJIT_MIPS_REV && SLJIT_MIPS_REV >= 1)
if (op & SLJIT_32)
return push_inst(compiler, SEH | T(src2) | D(dst), DR(dst));
#endif /* SLJIT_MIPS_REV >= 1 */
FAIL_IF(push_inst(compiler, DSLL32 | T(src2) | D(dst) | SH_IMM(16), DR(dst)));
return push_inst(compiler, DSRA32 | T(dst) | D(dst) | SH_IMM(16), DR(dst));
}
SLJIT_ASSERT(dst == src2);
return SLJIT_SUCCESS;
case SLJIT_MOV_U32:
SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM) && !(op & SLJIT_32));
if ((flags & (REG_DEST | REG2_SOURCE)) == (REG_DEST | REG2_SOURCE)) {
#if (defined SLJIT_MIPS_REV && SLJIT_MIPS_REV >= 2)
if (dst == src2)
return push_inst(compiler, DINSU | T(src2) | SA(0) | (31 << 11) | (0 << 11), DR(dst));
#endif /* SLJIT_MIPS_REV >= 2 */
FAIL_IF(push_inst(compiler, DSLL32 | T(src2) | D(dst) | SH_IMM(0), DR(dst)));
return push_inst(compiler, DSRL32 | T(dst) | D(dst) | SH_IMM(0), DR(dst));
}
SLJIT_ASSERT(dst == src2);
return SLJIT_SUCCESS;
case SLJIT_MOV_S32:
SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM) && !(op & SLJIT_32));
if ((flags & (REG_DEST | REG2_SOURCE)) == (REG_DEST | REG2_SOURCE)) {
return push_inst(compiler, SLL | T(src2) | D(dst) | SH_IMM(0), DR(dst));
}
SLJIT_ASSERT(dst == src2);
return SLJIT_SUCCESS;
case SLJIT_NOT:
SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM));
if (op & SLJIT_SET_Z)
FAIL_IF(push_inst(compiler, NOR | S(src2) | T(src2) | DA(EQUAL_FLAG), EQUAL_FLAG));
if (!(flags & UNUSED_DEST))
FAIL_IF(push_inst(compiler, NOR | S(src2) | T(src2) | D(dst), DR(dst)));
return SLJIT_SUCCESS;
case SLJIT_CLZ:
SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM));
#if (defined SLJIT_MIPS_REV && SLJIT_MIPS_REV >= 1)
if (op & SLJIT_SET_Z)
FAIL_IF(push_inst(compiler, SELECT_OP(DCLZ, CLZ) | S(src2) | TA(EQUAL_FLAG) | DA(EQUAL_FLAG), EQUAL_FLAG));
if (!(flags & UNUSED_DEST))
FAIL_IF(push_inst(compiler, SELECT_OP(DCLZ, CLZ) | S(src2) | T(dst) | D(dst), DR(dst)));
#else /* SLJIT_MIPS_REV < 1 */
if (SLJIT_UNLIKELY(flags & UNUSED_DEST)) {
FAIL_IF(push_inst(compiler, SELECT_OP(DSRL32, SRL) | T(src2) | DA(EQUAL_FLAG) | SH_IMM(31), EQUAL_FLAG));
return push_inst(compiler, XORI | SA(EQUAL_FLAG) | TA(EQUAL_FLAG) | IMM(1), EQUAL_FLAG);
}
/* Nearly all instructions are unmovable in the following sequence. */
FAIL_IF(push_inst(compiler, SELECT_OP(DADDU, ADDU) | S(src2) | TA(0) | D(TMP_REG1), DR(TMP_REG1)));
/* Check zero. */
FAIL_IF(push_inst(compiler, BEQ | S(TMP_REG1) | TA(0) | IMM(5), UNMOVABLE_INS));
FAIL_IF(push_inst(compiler, ORI | SA(0) | T(dst) | IMM((op & SLJIT_32) ? 32 : 64), UNMOVABLE_INS));
FAIL_IF(push_inst(compiler, SELECT_OP(DADDIU, ADDIU) | SA(0) | T(dst) | IMM(-1), DR(dst)));
/* Loop for searching the highest bit. */
FAIL_IF(push_inst(compiler, SELECT_OP(DADDIU, ADDIU) | S(dst) | T(dst) | IMM(1), DR(dst)));
FAIL_IF(push_inst(compiler, BGEZ | S(TMP_REG1) | IMM(-2), UNMOVABLE_INS));
FAIL_IF(push_inst(compiler, SELECT_OP(DSLL, SLL) | T(TMP_REG1) | D(TMP_REG1) | SH_IMM(1), UNMOVABLE_INS));
#endif /* SLJIT_MIPS_REV >= 1 */
return SLJIT_SUCCESS;
case SLJIT_ADD:
is_overflow = GET_FLAG_TYPE(op) == SLJIT_OVERFLOW;
is_carry = GET_FLAG_TYPE(op) == GET_FLAG_TYPE(SLJIT_SET_CARRY);
if (flags & SRC2_IMM) {
if (is_overflow) {
if (src2 >= 0)
FAIL_IF(push_inst(compiler, OR | S(src1) | T(src1) | DA(EQUAL_FLAG), EQUAL_FLAG));
else
FAIL_IF(push_inst(compiler, NOR | S(src1) | T(src1) | DA(EQUAL_FLAG), EQUAL_FLAG));
}
else if (op & SLJIT_SET_Z)
FAIL_IF(push_inst(compiler, SELECT_OP(DADDIU, ADDIU) | S(src1) | TA(EQUAL_FLAG) | IMM(src2), EQUAL_FLAG));
if (is_overflow || is_carry) {
if (src2 >= 0)
FAIL_IF(push_inst(compiler, ORI | S(src1) | TA(OTHER_FLAG) | IMM(src2), OTHER_FLAG));
else {
FAIL_IF(push_inst(compiler, SELECT_OP(DADDIU, ADDIU) | SA(0) | TA(OTHER_FLAG) | IMM(src2), OTHER_FLAG));
FAIL_IF(push_inst(compiler, OR | S(src1) | TA(OTHER_FLAG) | DA(OTHER_FLAG), OTHER_FLAG));
}
}
/* dst may be the same as src1 or src2. */
if (!(flags & UNUSED_DEST) || (op & VARIABLE_FLAG_MASK))
FAIL_IF(push_inst(compiler, SELECT_OP(DADDIU, ADDIU) | S(src1) | T(dst) | IMM(src2), DR(dst)));
}
else {
if (is_overflow)
FAIL_IF(push_inst(compiler, XOR | S(src1) | T(src2) | DA(EQUAL_FLAG), EQUAL_FLAG));
else if (op & SLJIT_SET_Z)
FAIL_IF(push_inst(compiler, SELECT_OP(DADDU, ADDU) | S(src1) | T(src2) | DA(EQUAL_FLAG), EQUAL_FLAG));
if (is_overflow || is_carry)
FAIL_IF(push_inst(compiler, OR | S(src1) | T(src2) | DA(OTHER_FLAG), OTHER_FLAG));
/* dst may be the same as src1 or src2. */
if (!(flags & UNUSED_DEST) || (op & VARIABLE_FLAG_MASK))
FAIL_IF(push_inst(compiler, SELECT_OP(DADDU, ADDU) | S(src1) | T(src2) | D(dst), DR(dst)));
}
/* a + b >= a | b (otherwise, the carry should be set to 1). */
if (is_overflow || is_carry)
FAIL_IF(push_inst(compiler, SLTU | S(dst) | TA(OTHER_FLAG) | DA(OTHER_FLAG), OTHER_FLAG));
if (!is_overflow)
return SLJIT_SUCCESS;
FAIL_IF(push_inst(compiler, SELECT_OP(DSLL32, SLL) | TA(OTHER_FLAG) | D(TMP_REG1) | SH_IMM(31), DR(TMP_REG1)));
FAIL_IF(push_inst(compiler, XOR | S(TMP_REG1) | TA(EQUAL_FLAG) | DA(EQUAL_FLAG), EQUAL_FLAG));
FAIL_IF(push_inst(compiler, XOR | S(dst) | TA(EQUAL_FLAG) | DA(OTHER_FLAG), OTHER_FLAG));
if (op & SLJIT_SET_Z)
FAIL_IF(push_inst(compiler, SELECT_OP(DADDU, ADDU) | S(dst) | TA(0) | DA(EQUAL_FLAG), EQUAL_FLAG));
return push_inst(compiler, SELECT_OP(DSRL32, SRL) | TA(OTHER_FLAG) | DA(OTHER_FLAG) | SH_IMM(31), OTHER_FLAG);
case SLJIT_ADDC:
is_carry = GET_FLAG_TYPE(op) == GET_FLAG_TYPE(SLJIT_SET_CARRY);
if (flags & SRC2_IMM) {
if (is_carry) {
if (src2 >= 0)
FAIL_IF(push_inst(compiler, ORI | S(src1) | TA(EQUAL_FLAG) | IMM(src2), EQUAL_FLAG));
else {
FAIL_IF(push_inst(compiler, SELECT_OP(DADDIU, ADDIU) | SA(0) | TA(EQUAL_FLAG) | IMM(src2), EQUAL_FLAG));
FAIL_IF(push_inst(compiler, OR | S(src1) | TA(EQUAL_FLAG) | DA(EQUAL_FLAG), EQUAL_FLAG));
}
}
FAIL_IF(push_inst(compiler, SELECT_OP(DADDIU, ADDIU) | S(src1) | T(dst) | IMM(src2), DR(dst)));
} else {
if (is_carry)
FAIL_IF(push_inst(compiler, OR | S(src1) | T(src2) | DA(EQUAL_FLAG), EQUAL_FLAG));
/* dst may be the same as src1 or src2. */
FAIL_IF(push_inst(compiler, SELECT_OP(DADDU, ADDU) | S(src1) | T(src2) | D(dst), DR(dst)));
}
if (is_carry)
FAIL_IF(push_inst(compiler, SLTU | S(dst) | TA(EQUAL_FLAG) | DA(EQUAL_FLAG), EQUAL_FLAG));
FAIL_IF(push_inst(compiler, SELECT_OP(DADDU, ADDU) | S(dst) | TA(OTHER_FLAG) | D(dst), DR(dst)));
if (!is_carry)
return SLJIT_SUCCESS;
/* Set ULESS_FLAG (dst == 0) && (OTHER_FLAG == 1). */
FAIL_IF(push_inst(compiler, SLTU | S(dst) | TA(OTHER_FLAG) | DA(OTHER_FLAG), OTHER_FLAG));
/* Set carry flag. */
return push_inst(compiler, OR | SA(OTHER_FLAG) | TA(EQUAL_FLAG) | DA(OTHER_FLAG), OTHER_FLAG);
case SLJIT_SUB:
if ((flags & SRC2_IMM) && src2 == SIMM_MIN) {
FAIL_IF(push_inst(compiler, ADDIU | SA(0) | T(TMP_REG2) | IMM(src2), DR(TMP_REG2)));
src2 = TMP_REG2;
flags &= ~SRC2_IMM;
}
is_handled = 0;
if (flags & SRC2_IMM) {
if (GET_FLAG_TYPE(op) == SLJIT_LESS || GET_FLAG_TYPE(op) == SLJIT_GREATER_EQUAL) {
FAIL_IF(push_inst(compiler, SLTIU | S(src1) | TA(OTHER_FLAG) | IMM(src2), OTHER_FLAG));
is_handled = 1;
}
else if (GET_FLAG_TYPE(op) == SLJIT_SIG_LESS || GET_FLAG_TYPE(op) == SLJIT_SIG_GREATER_EQUAL) {
FAIL_IF(push_inst(compiler, SLTI | S(src1) | TA(OTHER_FLAG) | IMM(src2), OTHER_FLAG));
is_handled = 1;
}
}
if (!is_handled && GET_FLAG_TYPE(op) >= SLJIT_LESS && GET_FLAG_TYPE(op) <= SLJIT_SIG_LESS_EQUAL) {
is_handled = 1;
if (flags & SRC2_IMM) {
FAIL_IF(push_inst(compiler, ADDIU | SA(0) | T(TMP_REG2) | IMM(src2), DR(TMP_REG2)));
src2 = TMP_REG2;
flags &= ~SRC2_IMM;
}
if (GET_FLAG_TYPE(op) == SLJIT_LESS || GET_FLAG_TYPE(op) == SLJIT_GREATER_EQUAL) {
FAIL_IF(push_inst(compiler, SLTU | S(src1) | T(src2) | DA(OTHER_FLAG), OTHER_FLAG));
}
else if (GET_FLAG_TYPE(op) == SLJIT_GREATER || GET_FLAG_TYPE(op) == SLJIT_LESS_EQUAL)
{
FAIL_IF(push_inst(compiler, SLTU | S(src2) | T(src1) | DA(OTHER_FLAG), OTHER_FLAG));
}
else if (GET_FLAG_TYPE(op) == SLJIT_SIG_LESS || GET_FLAG_TYPE(op) == SLJIT_SIG_GREATER_EQUAL) {
FAIL_IF(push_inst(compiler, SLT | S(src1) | T(src2) | DA(OTHER_FLAG), OTHER_FLAG));
}
else if (GET_FLAG_TYPE(op) == SLJIT_SIG_GREATER || GET_FLAG_TYPE(op) == SLJIT_SIG_LESS_EQUAL)
{
FAIL_IF(push_inst(compiler, SLT | S(src2) | T(src1) | DA(OTHER_FLAG), OTHER_FLAG));
}
}
if (is_handled) {
if (flags & SRC2_IMM) {
if (op & SLJIT_SET_Z)
FAIL_IF(push_inst(compiler, SELECT_OP(DADDIU, ADDIU) | S(src1) | TA(EQUAL_FLAG) | IMM(-src2), EQUAL_FLAG));
if (!(flags & UNUSED_DEST))
return push_inst(compiler, SELECT_OP(DADDIU, ADDIU) | S(src1) | T(dst) | IMM(-src2), DR(dst));
}
else {
if (op & SLJIT_SET_Z)
FAIL_IF(push_inst(compiler, SELECT_OP(DSUBU, SUBU) | S(src1) | T(src2) | DA(EQUAL_FLAG), EQUAL_FLAG));
if (!(flags & UNUSED_DEST))
return push_inst(compiler, SELECT_OP(DSUBU, SUBU) | S(src1) | T(src2) | D(dst), DR(dst));
}
return SLJIT_SUCCESS;
}
is_overflow = GET_FLAG_TYPE(op) == SLJIT_OVERFLOW;
is_carry = GET_FLAG_TYPE(op) == GET_FLAG_TYPE(SLJIT_SET_CARRY);
if (flags & SRC2_IMM) {
if (is_overflow) {
if (src2 >= 0)
FAIL_IF(push_inst(compiler, OR | S(src1) | T(src1) | DA(EQUAL_FLAG), EQUAL_FLAG));
else
FAIL_IF(push_inst(compiler, NOR | S(src1) | T(src1) | DA(EQUAL_FLAG), EQUAL_FLAG));
}
else if (op & SLJIT_SET_Z)
FAIL_IF(push_inst(compiler, SELECT_OP(DADDIU, ADDIU) | S(src1) | TA(EQUAL_FLAG) | IMM(-src2), EQUAL_FLAG));
if (is_overflow || is_carry)
FAIL_IF(push_inst(compiler, SLTIU | S(src1) | TA(OTHER_FLAG) | IMM(src2), OTHER_FLAG));
/* dst may be the same as src1 or src2. */
if (!(flags & UNUSED_DEST) || (op & VARIABLE_FLAG_MASK))
FAIL_IF(push_inst(compiler, SELECT_OP(DADDIU, ADDIU) | S(src1) | T(dst) | IMM(-src2), DR(dst)));
}
else {
if (is_overflow)
FAIL_IF(push_inst(compiler, XOR | S(src1) | T(src2) | DA(EQUAL_FLAG), EQUAL_FLAG));
else if (op & SLJIT_SET_Z)
FAIL_IF(push_inst(compiler, SELECT_OP(DSUBU, SUBU) | S(src1) | T(src2) | DA(EQUAL_FLAG), EQUAL_FLAG));
if (is_overflow || is_carry)
FAIL_IF(push_inst(compiler, SLTU | S(src1) | T(src2) | DA(OTHER_FLAG), OTHER_FLAG));
/* dst may be the same as src1 or src2. */
if (!(flags & UNUSED_DEST) || (op & VARIABLE_FLAG_MASK))
FAIL_IF(push_inst(compiler, SELECT_OP(DSUBU, SUBU) | S(src1) | T(src2) | D(dst), DR(dst)));
}
if (!is_overflow)
return SLJIT_SUCCESS;
FAIL_IF(push_inst(compiler, SELECT_OP(DSLL32, SLL) | TA(OTHER_FLAG) | D(TMP_REG1) | SH_IMM(31), DR(TMP_REG1)));
FAIL_IF(push_inst(compiler, XOR | S(TMP_REG1) | TA(EQUAL_FLAG) | DA(EQUAL_FLAG), EQUAL_FLAG));
FAIL_IF(push_inst(compiler, XOR | S(dst) | TA(EQUAL_FLAG) | DA(OTHER_FLAG), OTHER_FLAG));
if (op & SLJIT_SET_Z)
FAIL_IF(push_inst(compiler, SELECT_OP(DADDU, ADDU) | S(dst) | TA(0) | DA(EQUAL_FLAG), EQUAL_FLAG));
return push_inst(compiler, SELECT_OP(DSRL32, SRL) | TA(OTHER_FLAG) | DA(OTHER_FLAG) | SH_IMM(31), OTHER_FLAG);
case SLJIT_SUBC:
if ((flags & SRC2_IMM) && src2 == SIMM_MIN) {
FAIL_IF(push_inst(compiler, ADDIU | SA(0) | T(TMP_REG2) | IMM(src2), DR(TMP_REG2)));
src2 = TMP_REG2;
flags &= ~SRC2_IMM;
}
is_carry = GET_FLAG_TYPE(op) == GET_FLAG_TYPE(SLJIT_SET_CARRY);
if (flags & SRC2_IMM) {
if (is_carry)
FAIL_IF(push_inst(compiler, SLTIU | S(src1) | TA(EQUAL_FLAG) | IMM(src2), EQUAL_FLAG));
/* dst may be the same as src1 or src2. */
FAIL_IF(push_inst(compiler, SELECT_OP(DADDIU, ADDIU) | S(src1) | T(dst) | IMM(-src2), DR(dst)));
}
else {
if (is_carry)
FAIL_IF(push_inst(compiler, SLTU | S(src1) | T(src2) | DA(EQUAL_FLAG), EQUAL_FLAG));
/* dst may be the same as src1 or src2. */
FAIL_IF(push_inst(compiler, SELECT_OP(DSUBU, SUBU) | S(src1) | T(src2) | D(dst), DR(dst)));
}
if (is_carry)
FAIL_IF(push_inst(compiler, SLTU | S(dst) | TA(OTHER_FLAG) | D(TMP_REG1), DR(TMP_REG1)));
FAIL_IF(push_inst(compiler, SELECT_OP(DSUBU, SUBU) | S(dst) | TA(OTHER_FLAG) | D(dst), DR(dst)));
return (is_carry) ? push_inst(compiler, OR | SA(EQUAL_FLAG) | T(TMP_REG1) | DA(OTHER_FLAG), OTHER_FLAG) : SLJIT_SUCCESS;
case SLJIT_MUL:
SLJIT_ASSERT(!(flags & SRC2_IMM));
if (GET_FLAG_TYPE(op) != SLJIT_OVERFLOW) {
#if (defined SLJIT_MIPS_REV && SLJIT_MIPS_REV >= 6)
return push_inst(compiler, SELECT_OP(DMUL, MUL) | S(src1) | T(src2) | D(dst), DR(dst));
#elif (defined SLJIT_MIPS_REV && SLJIT_MIPS_REV >= 1)
if (op & SLJIT_32)
return push_inst(compiler, MUL | S(src1) | T(src2) | D(dst), DR(dst));
FAIL_IF(push_inst(compiler, DMULT | S(src1) | T(src2), MOVABLE_INS));
return push_inst(compiler, MFLO | D(dst), DR(dst));
#else /* SLJIT_MIPS_REV < 1 */
FAIL_IF(push_inst(compiler, SELECT_OP(DMULT, MULT) | S(src1) | T(src2), MOVABLE_INS));
return push_inst(compiler, MFLO | D(dst), DR(dst));
#endif /* SLJIT_MIPS_REV >= 6 */
}
#if (defined SLJIT_MIPS_REV && SLJIT_MIPS_REV >= 6)
FAIL_IF(push_inst(compiler, SELECT_OP(DMUL, MUL) | S(src1) | T(src2) | D(dst), DR(dst)));
FAIL_IF(push_inst(compiler, SELECT_OP(DMUH, MUH) | S(src1) | T(src2) | DA(EQUAL_FLAG), EQUAL_FLAG));
#else /* SLJIT_MIPS_REV < 6 */
FAIL_IF(push_inst(compiler, SELECT_OP(DMULT, MULT) | S(src1) | T(src2), MOVABLE_INS));
FAIL_IF(push_inst(compiler, MFHI | DA(EQUAL_FLAG), EQUAL_FLAG));
FAIL_IF(push_inst(compiler, MFLO | D(dst), DR(dst)));
#endif /* SLJIT_MIPS_REV >= 6 */
FAIL_IF(push_inst(compiler, SELECT_OP(DSRA32, SRA) | T(dst) | DA(OTHER_FLAG) | SH_IMM(31), OTHER_FLAG));
return push_inst(compiler, SELECT_OP(DSUBU, SUBU) | SA(EQUAL_FLAG) | TA(OTHER_FLAG) | DA(OTHER_FLAG), OTHER_FLAG);
case SLJIT_AND:
EMIT_LOGICAL(ANDI, AND);
return SLJIT_SUCCESS;
case SLJIT_OR:
EMIT_LOGICAL(ORI, OR);
return SLJIT_SUCCESS;
case SLJIT_XOR:
EMIT_LOGICAL(XORI, XOR);
return SLJIT_SUCCESS;
case SLJIT_SHL:
EMIT_SHIFT(DSLL, DSLL32, SLL, DSLLV, SLLV);
return SLJIT_SUCCESS;
case SLJIT_LSHR:
EMIT_SHIFT(DSRL, DSRL32, SRL, DSRLV, SRLV);
return SLJIT_SUCCESS;
case SLJIT_ASHR:
EMIT_SHIFT(DSRA, DSRA32, SRA, DSRAV, SRAV);
return SLJIT_SUCCESS;
}
SLJIT_UNREACHABLE();
return SLJIT_SUCCESS;
}
static SLJIT_INLINE sljit_s32 emit_const(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw init_value)
{
FAIL_IF(push_inst(compiler, LUI | T(dst) | IMM(init_value >> 48), DR(dst)));
@ -653,14 +238,20 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_call(struct sljit_compile
if (type & SLJIT_CALL_RETURN)
PTR_FAIL_IF(emit_stack_frame_release(compiler, 0, &ins));
PTR_FAIL_IF(call_with_args(compiler, arg_types, &ins));
if ((type & 0xff) != SLJIT_CALL_REG_ARG)
PTR_FAIL_IF(call_with_args(compiler, arg_types, &ins));
SLJIT_ASSERT(DR(PIC_ADDR_REG) == 25 && PIC_ADDR_REG == TMP_REG2);
PTR_FAIL_IF(emit_const(compiler, PIC_ADDR_REG, 0));
if (ins == NOP && compiler->delay_slot != UNMOVABLE_INS)
jump->flags |= IS_MOVABLE;
if (!(type & SLJIT_CALL_RETURN)) {
jump->flags |= IS_JAL | IS_CALL;
jump->flags |= IS_JAL;
if ((type & 0xff) != SLJIT_CALL_REG_ARG)
jump->flags |= IS_CALL;
PTR_FAIL_IF(push_inst(compiler, JALR | S(PIC_ADDR_REG) | DA(RETURN_ADDR_REG), UNMOVABLE_INS));
} else
PTR_FAIL_IF(push_inst(compiler, JR | S(PIC_ADDR_REG), UNMOVABLE_INS));
@ -668,6 +259,8 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_call(struct sljit_compile
jump->addr = compiler->size;
PTR_FAIL_IF(push_inst(compiler, ins, UNMOVABLE_INS));
/* Maximum number of instructions required for generating a constant. */
compiler->size += 6;
return jump;
}
@ -680,16 +273,37 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_icall(struct sljit_compiler *compi
CHECK_ERROR();
CHECK(check_sljit_emit_icall(compiler, type, arg_types, src, srcw));
if (src & SLJIT_MEM) {
ADJUST_LOCAL_OFFSET(src, srcw);
FAIL_IF(emit_op_mem(compiler, WORD_DATA | LOAD_DATA, DR(PIC_ADDR_REG), src, srcw));
src = PIC_ADDR_REG;
srcw = 0;
}
if ((type & 0xff) == SLJIT_CALL_REG_ARG) {
if (type & SLJIT_CALL_RETURN) {
if (src >= SLJIT_FIRST_SAVED_REG && src <= (SLJIT_S0 - SLJIT_KEPT_SAVEDS_COUNT(compiler->options))) {
FAIL_IF(push_inst(compiler, DADDU | S(src) | TA(0) | D(PIC_ADDR_REG), DR(PIC_ADDR_REG)));
src = PIC_ADDR_REG;
srcw = 0;
}
FAIL_IF(emit_stack_frame_release(compiler, 0, &ins));
if (ins != NOP)
FAIL_IF(push_inst(compiler, ins, MOVABLE_INS));
}
SLJIT_SKIP_CHECKS(compiler);
return sljit_emit_ijump(compiler, type, src, srcw);
}
SLJIT_ASSERT(DR(PIC_ADDR_REG) == 25 && PIC_ADDR_REG == TMP_REG2);
if (src & SLJIT_IMM)
FAIL_IF(load_immediate(compiler, DR(PIC_ADDR_REG), srcw));
else if (FAST_IS_REG(src))
else if (src != PIC_ADDR_REG)
FAIL_IF(push_inst(compiler, DADDU | S(src) | TA(0) | D(PIC_ADDR_REG), DR(PIC_ADDR_REG)));
else if (src & SLJIT_MEM) {
ADJUST_LOCAL_OFFSET(src, srcw);
FAIL_IF(emit_op_mem(compiler, WORD_DATA | LOAD_DATA, DR(PIC_ADDR_REG), src, srcw));
}
if (type & SLJIT_CALL_RETURN)
FAIL_IF(emit_stack_frame_release(compiler, 0, &ins));

File diff suppressed because it is too large Load Diff

View File

@ -38,12 +38,15 @@ static sljit_s32 load_immediate(struct sljit_compiler *compiler, sljit_s32 reg,
return (imm & 0xffff) ? push_inst(compiler, ORI | S(reg) | A(reg) | IMM(imm)) : SLJIT_SUCCESS;
}
/* Simplified mnemonics: clrlwi. */
#define INS_CLEAR_LEFT(dst, src, from) \
(RLWINM | S(src) | A(dst) | ((from) << 6) | (31 << 1))
(RLWINM | S(src) | A(dst) | RLWI_MBE(from, 31))
static SLJIT_INLINE sljit_s32 emit_single_op(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 flags,
sljit_s32 dst, sljit_s32 src1, sljit_s32 src2)
{
sljit_u32 imm;
switch (op) {
case SLJIT_MOV:
case SLJIT_MOV_U32:
@ -90,6 +93,16 @@ static SLJIT_INLINE sljit_s32 emit_single_op(struct sljit_compiler *compiler, sl
SLJIT_ASSERT(src1 == TMP_REG1);
return push_inst(compiler, CNTLZW | S(src2) | A(dst));
case SLJIT_CTZ:
SLJIT_ASSERT(src1 == TMP_REG1);
FAIL_IF(push_inst(compiler, NEG | D(TMP_REG1) | A(src2)));
FAIL_IF(push_inst(compiler, AND | S(src2) | A(dst) | B(TMP_REG1)));
FAIL_IF(push_inst(compiler, CNTLZW | S(dst) | A(dst)));
FAIL_IF(push_inst(compiler, ADDI | D(TMP_REG1) | A(dst) | IMM(-32)));
/* The highest bits are set, if dst < 32, zero otherwise. */
FAIL_IF(push_inst(compiler, SRWI(27) | S(TMP_REG1) | A(TMP_REG1)));
return push_inst(compiler, XOR | S(dst) | A(dst) | B(TMP_REG1));
case SLJIT_ADD:
if (flags & ALT_FORM1) {
/* Setting XER SO is not enough, CR SO is also needed. */
@ -103,12 +116,14 @@ static SLJIT_INLINE sljit_s32 emit_single_op(struct sljit_compiler *compiler, sl
if (flags & ALT_FORM3)
return push_inst(compiler, ADDIS | D(dst) | A(src1) | compiler->imm);
imm = compiler->imm;
if (flags & ALT_FORM4) {
FAIL_IF(push_inst(compiler, ADDIS | D(dst) | A(src1) | (((compiler->imm >> 16) & 0xffff) + ((compiler->imm >> 15) & 0x1))));
FAIL_IF(push_inst(compiler, ADDIS | D(dst) | A(src1) | (((imm >> 16) & 0xffff) + ((imm >> 15) & 0x1))));
src1 = dst;
}
return push_inst(compiler, ADDI | D(dst) | A(src1) | (compiler->imm & 0xffff));
return push_inst(compiler, ADDI | D(dst) | A(src1) | (imm & 0xffff));
}
if (flags & ALT_FORM3) {
SLJIT_ASSERT(src2 == TMP_REG2);
@ -208,8 +223,10 @@ static SLJIT_INLINE sljit_s32 emit_single_op(struct sljit_compiler *compiler, sl
}
if (flags & ALT_FORM3) {
SLJIT_ASSERT(src2 == TMP_REG2);
FAIL_IF(push_inst(compiler, ORI | S(src1) | A(dst) | IMM(compiler->imm)));
return push_inst(compiler, ORIS | S(dst) | A(dst) | IMM(compiler->imm >> 16));
imm = compiler->imm;
FAIL_IF(push_inst(compiler, ORI | S(src1) | A(dst) | IMM(imm)));
return push_inst(compiler, ORIS | S(dst) | A(dst) | IMM(imm >> 16));
}
return push_inst(compiler, OR | RC(flags) | S(src1) | A(dst) | B(src2));
@ -224,34 +241,78 @@ static SLJIT_INLINE sljit_s32 emit_single_op(struct sljit_compiler *compiler, sl
}
if (flags & ALT_FORM3) {
SLJIT_ASSERT(src2 == TMP_REG2);
FAIL_IF(push_inst(compiler, XORI | S(src1) | A(dst) | IMM(compiler->imm)));
return push_inst(compiler, XORIS | S(dst) | A(dst) | IMM(compiler->imm >> 16));
imm = compiler->imm;
FAIL_IF(push_inst(compiler, XORI | S(src1) | A(dst) | IMM(imm)));
return push_inst(compiler, XORIS | S(dst) | A(dst) | IMM(imm >> 16));
}
return push_inst(compiler, XOR | RC(flags) | S(src1) | A(dst) | B(src2));
case SLJIT_SHL:
case SLJIT_MSHL:
if (flags & ALT_FORM1) {
SLJIT_ASSERT(src2 == TMP_REG2);
compiler->imm &= 0x1f;
return push_inst(compiler, RLWINM | RC(flags) | S(src1) | A(dst) | (compiler->imm << 11) | ((31 - compiler->imm) << 1));
imm = compiler->imm & 0x1f;
return push_inst(compiler, SLWI(imm) | RC(flags) | S(src1) | A(dst));
}
if (op == SLJIT_MSHL) {
FAIL_IF(push_inst(compiler, ANDI | S(src2) | A(TMP_REG2) | 0x1f));
src2 = TMP_REG2;
}
return push_inst(compiler, SLW | RC(flags) | S(src1) | A(dst) | B(src2));
case SLJIT_LSHR:
case SLJIT_MLSHR:
if (flags & ALT_FORM1) {
SLJIT_ASSERT(src2 == TMP_REG2);
compiler->imm &= 0x1f;
return push_inst(compiler, RLWINM | RC(flags) | S(src1) | A(dst) | (((32 - compiler->imm) & 0x1f) << 11) | (compiler->imm << 6) | (31 << 1));
imm = compiler->imm & 0x1f;
/* Since imm can be 0, SRWI() cannot be used. */
return push_inst(compiler, RLWINM | RC(flags) | S(src1) | A(dst) | RLWI_SH((32 - imm) & 0x1f) | RLWI_MBE(imm, 31));
}
if (op == SLJIT_MLSHR) {
FAIL_IF(push_inst(compiler, ANDI | S(src2) | A(TMP_REG2) | 0x1f));
src2 = TMP_REG2;
}
return push_inst(compiler, SRW | RC(flags) | S(src1) | A(dst) | B(src2));
case SLJIT_ASHR:
case SLJIT_MASHR:
if (flags & ALT_FORM1) {
SLJIT_ASSERT(src2 == TMP_REG2);
compiler->imm &= 0x1f;
return push_inst(compiler, SRAWI | RC(flags) | S(src1) | A(dst) | (compiler->imm << 11));
imm = compiler->imm & 0x1f;
return push_inst(compiler, SRAWI | RC(flags) | S(src1) | A(dst) | (imm << 11));
}
if (op == SLJIT_MASHR) {
FAIL_IF(push_inst(compiler, ANDI | S(src2) | A(TMP_REG2) | 0x1f));
src2 = TMP_REG2;
}
return push_inst(compiler, SRAW | RC(flags) | S(src1) | A(dst) | B(src2));
case SLJIT_ROTL:
case SLJIT_ROTR:
if (flags & ALT_FORM1) {
SLJIT_ASSERT(src2 == TMP_REG2);
imm = compiler->imm;
if (op == SLJIT_ROTR)
imm = (sljit_u32)(-(sljit_s32)imm);
imm &= 0x1f;
return push_inst(compiler, RLWINM | S(src1) | A(dst) | RLWI_SH(imm) | RLWI_MBE(0, 31));
}
if (op == SLJIT_ROTR) {
FAIL_IF(push_inst(compiler, SUBFIC | D(TMP_REG2) | A(src2) | 0));
src2 = TMP_REG2;
}
return push_inst(compiler, RLWNM | S(src1) | A(dst) | B(src2) | RLWI_MBE(0, 31));
}
SLJIT_UNREACHABLE();
@ -277,8 +338,3 @@ SLJIT_API_FUNC_ATTRIBUTE void sljit_set_jump_addr(sljit_uw addr, sljit_uw new_ta
inst = (sljit_ins *)SLJIT_ADD_EXEC_OFFSET(inst, executable_offset);
SLJIT_CACHE_FLUSH(inst, inst + 2);
}
SLJIT_API_FUNC_ATTRIBUTE void sljit_set_const(sljit_uw addr, sljit_sw new_constant, sljit_sw executable_offset)
{
sljit_set_jump_addr(addr, (sljit_uw)new_constant, executable_offset);
}

View File

@ -35,8 +35,9 @@
#error "Must implement count leading zeroes"
#endif
#define PUSH_RLDICR(reg, shift) \
push_inst(compiler, RLDI(reg, reg, 63 - shift, shift, 1))
/* Computes SLDI(63 - shift). */
#define PUSH_SLDI_NEG(reg, shift) \
push_inst(compiler, RLDICR | S(reg) | A(reg) | RLDI_SH(63 - shift) | RLDI_ME(shift))
static sljit_s32 load_immediate(struct sljit_compiler *compiler, sljit_s32 reg, sljit_sw imm)
{
@ -66,14 +67,14 @@ static sljit_s32 load_immediate(struct sljit_compiler *compiler, sljit_s32 reg,
if ((tmp & ~0xffff000000000000ul) == 0) {
FAIL_IF(push_inst(compiler, ADDI | D(reg) | A(0) | (sljit_ins)(tmp >> 48)));
shift += 15;
return PUSH_RLDICR(reg, shift);
return PUSH_SLDI_NEG(reg, shift);
}
if ((tmp & ~0xffffffff00000000ul) == 0) {
FAIL_IF(push_inst(compiler, ADDIS | D(reg) | A(0) | (sljit_ins)(tmp >> 48)));
FAIL_IF(push_inst(compiler, ORI | S(reg) | A(reg) | IMM(tmp >> 32)));
shift += 31;
return PUSH_RLDICR(reg, shift);
return PUSH_SLDI_NEG(reg, shift);
}
/* Cut out the 16 bit from immediate. */
@ -82,13 +83,13 @@ static sljit_s32 load_immediate(struct sljit_compiler *compiler, sljit_s32 reg,
if (tmp2 <= 0xffff) {
FAIL_IF(push_inst(compiler, ADDI | D(reg) | A(0) | (sljit_ins)(tmp >> 48)));
FAIL_IF(PUSH_RLDICR(reg, shift));
FAIL_IF(PUSH_SLDI_NEG(reg, shift));
return push_inst(compiler, ORI | S(reg) | A(reg) | (sljit_ins)tmp2);
}
if (tmp2 <= 0xffffffff) {
FAIL_IF(push_inst(compiler, ADDI | D(reg) | A(0) | IMM(tmp >> 48)));
FAIL_IF(PUSH_RLDICR(reg, shift));
FAIL_IF(PUSH_SLDI_NEG(reg, shift));
FAIL_IF(push_inst(compiler, ORIS | S(reg) | A(reg) | (sljit_ins)(tmp2 >> 16)));
return (imm & 0xffff) ? push_inst(compiler, ORI | S(reg) | A(reg) | IMM(tmp2)) : SLJIT_SUCCESS;
}
@ -100,22 +101,23 @@ static sljit_s32 load_immediate(struct sljit_compiler *compiler, sljit_s32 reg,
FAIL_IF(push_inst(compiler, ADDI | D(reg) | A(0) | (sljit_ins)(tmp >> 48)));
shift2 += 15;
shift += (63 - shift2);
FAIL_IF(PUSH_RLDICR(reg, shift));
FAIL_IF(PUSH_SLDI_NEG(reg, shift));
FAIL_IF(push_inst(compiler, ORI | S(reg) | A(reg) | (sljit_ins)(tmp2 >> 48)));
return PUSH_RLDICR(reg, shift2);
return PUSH_SLDI_NEG(reg, shift2);
}
/* The general version. */
FAIL_IF(push_inst(compiler, ADDIS | D(reg) | A(0) | (sljit_ins)((sljit_uw)imm >> 48)));
FAIL_IF(push_inst(compiler, ORI | S(reg) | A(reg) | IMM(imm >> 32)));
FAIL_IF(PUSH_RLDICR(reg, 31));
FAIL_IF(PUSH_SLDI_NEG(reg, 31));
FAIL_IF(push_inst(compiler, ORIS | S(reg) | A(reg) | IMM(imm >> 16)));
return push_inst(compiler, ORI | S(reg) | A(reg) | IMM(imm));
}
/* Simplified mnemonics: clrldi. */
#define INS_CLEAR_LEFT(dst, src, from) \
(RLDICL | S(src) | A(dst) | ((from) << 6) | (1 << 5))
#undef PUSH_SLDI_NEG
#define CLRLDI(dst, src, n) \
(RLDICL | S(src) | A(dst) | RLDI_SH(0) | RLDI_MB(n))
/* Sign extension for integer operations. */
#define UN_EXTS() \
@ -145,6 +147,8 @@ static sljit_s32 load_immediate(struct sljit_compiler *compiler, sljit_s32 reg,
static SLJIT_INLINE sljit_s32 emit_single_op(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 flags,
sljit_s32 dst, sljit_s32 src1, sljit_s32 src2)
{
sljit_u32 imm;
switch (op) {
case SLJIT_MOV:
case SLJIT_MOV_P:
@ -159,7 +163,7 @@ static SLJIT_INLINE sljit_s32 emit_single_op(struct sljit_compiler *compiler, sl
if ((flags & (REG_DEST | REG2_SOURCE)) == (REG_DEST | REG2_SOURCE)) {
if (op == SLJIT_MOV_S32)
return push_inst(compiler, EXTSW | S(src2) | A(dst));
return push_inst(compiler, INS_CLEAR_LEFT(dst, src2, 0));
return push_inst(compiler, CLRLDI(dst, src2, 32));
}
else {
SLJIT_ASSERT(dst == src2);
@ -172,7 +176,7 @@ static SLJIT_INLINE sljit_s32 emit_single_op(struct sljit_compiler *compiler, sl
if ((flags & (REG_DEST | REG2_SOURCE)) == (REG_DEST | REG2_SOURCE)) {
if (op == SLJIT_MOV_S8)
return push_inst(compiler, EXTSB | S(src2) | A(dst));
return push_inst(compiler, INS_CLEAR_LEFT(dst, src2, 24));
return push_inst(compiler, CLRLDI(dst, src2, 56));
}
else if ((flags & REG_DEST) && op == SLJIT_MOV_S8)
return push_inst(compiler, EXTSB | S(src2) | A(dst));
@ -187,7 +191,7 @@ static SLJIT_INLINE sljit_s32 emit_single_op(struct sljit_compiler *compiler, sl
if ((flags & (REG_DEST | REG2_SOURCE)) == (REG_DEST | REG2_SOURCE)) {
if (op == SLJIT_MOV_S16)
return push_inst(compiler, EXTSH | S(src2) | A(dst));
return push_inst(compiler, INS_CLEAR_LEFT(dst, src2, 16));
return push_inst(compiler, CLRLDI(dst, src2, 48));
}
else {
SLJIT_ASSERT(dst == src2);
@ -201,22 +205,30 @@ static SLJIT_INLINE sljit_s32 emit_single_op(struct sljit_compiler *compiler, sl
case SLJIT_CLZ:
SLJIT_ASSERT(src1 == TMP_REG1);
if (flags & ALT_FORM1)
return push_inst(compiler, CNTLZW | S(src2) | A(dst));
return push_inst(compiler, CNTLZD | S(src2) | A(dst));
return push_inst(compiler, ((flags & ALT_FORM1) ? CNTLZW : CNTLZD) | S(src2) | A(dst));
case SLJIT_CTZ:
SLJIT_ASSERT(src1 == TMP_REG1);
FAIL_IF(push_inst(compiler, NEG | D(TMP_REG1) | A(src2)));
FAIL_IF(push_inst(compiler, AND | S(src2) | A(dst) | B(TMP_REG1)));
FAIL_IF(push_inst(compiler, ((flags & ALT_FORM1) ? CNTLZW : CNTLZD) | S(dst) | A(dst)));
FAIL_IF(push_inst(compiler, ADDI | D(TMP_REG1) | A(dst) | IMM((flags & ALT_FORM1) ? -32 : -64)));
/* The highest bits are set, if dst < bit width, zero otherwise. */
FAIL_IF(push_inst(compiler, ((flags & ALT_FORM1) ? SRWI(27) : SRDI(58)) | S(TMP_REG1) | A(TMP_REG1)));
return push_inst(compiler, XOR | S(dst) | A(dst) | B(TMP_REG1));
case SLJIT_ADD:
if (flags & ALT_FORM1) {
if (flags & ALT_SIGN_EXT) {
FAIL_IF(push_inst(compiler, RLDI(TMP_REG1, src1, 32, 31, 1)));
FAIL_IF(push_inst(compiler, SLDI(32) | S(src1) | A(TMP_REG1)));
src1 = TMP_REG1;
FAIL_IF(push_inst(compiler, RLDI(TMP_REG2, src2, 32, 31, 1)));
FAIL_IF(push_inst(compiler, SLDI(32) | S(src2) | A(TMP_REG2)));
src2 = TMP_REG2;
}
/* Setting XER SO is not enough, CR SO is also needed. */
FAIL_IF(push_inst(compiler, ADD | OE(ALT_SET_FLAGS) | RC(ALT_SET_FLAGS) | D(dst) | A(src1) | B(src2)));
if (flags & ALT_SIGN_EXT)
return push_inst(compiler, RLDI(dst, dst, 32, 32, 0));
return push_inst(compiler, SRDI(32) | S(dst) | A(dst));
return SLJIT_SUCCESS;
}
@ -227,12 +239,14 @@ static SLJIT_INLINE sljit_s32 emit_single_op(struct sljit_compiler *compiler, sl
if (flags & ALT_FORM3)
return push_inst(compiler, ADDIS | D(dst) | A(src1) | compiler->imm);
imm = compiler->imm;
if (flags & ALT_FORM4) {
FAIL_IF(push_inst(compiler, ADDIS | D(dst) | A(src1) | (((compiler->imm >> 16) & 0xffff) + ((compiler->imm >> 15) & 0x1))));
FAIL_IF(push_inst(compiler, ADDIS | D(dst) | A(src1) | (((imm >> 16) & 0xffff) + ((imm >> 15) & 0x1))));
src1 = dst;
}
return push_inst(compiler, ADDI | D(dst) | A(src1) | (compiler->imm & 0xffff));
return push_inst(compiler, ADDI | D(dst) | A(src1) | (imm & 0xffff));
}
if (flags & ALT_FORM3) {
SLJIT_ASSERT(src2 == TMP_REG2);
@ -287,11 +301,11 @@ static SLJIT_INLINE sljit_s32 emit_single_op(struct sljit_compiler *compiler, sl
if (flags & ALT_FORM3) {
if (flags & ALT_SIGN_EXT) {
if (src1 != TMP_ZERO) {
FAIL_IF(push_inst(compiler, RLDI(TMP_REG1, src1, 32, 31, 1)));
FAIL_IF(push_inst(compiler, SLDI(32) | S(src1) | A(TMP_REG1)));
src1 = TMP_REG1;
}
if (src2 != TMP_ZERO) {
FAIL_IF(push_inst(compiler, RLDI(TMP_REG2, src2, 32, 31, 1)));
FAIL_IF(push_inst(compiler, SLDI(32) | S(src2) | A(TMP_REG2)));
src2 = TMP_REG2;
}
}
@ -303,7 +317,7 @@ static SLJIT_INLINE sljit_s32 emit_single_op(struct sljit_compiler *compiler, sl
FAIL_IF(push_inst(compiler, NEG | OE(ALT_SET_FLAGS) | RC(ALT_SET_FLAGS) | D(dst) | A(src2)));
if (flags & ALT_SIGN_EXT)
return push_inst(compiler, RLDI(dst, dst, 32, 32, 0));
return push_inst(compiler, SRDI(32) | S(dst) | A(dst));
return SLJIT_SUCCESS;
}
@ -362,8 +376,10 @@ static SLJIT_INLINE sljit_s32 emit_single_op(struct sljit_compiler *compiler, sl
}
if (flags & ALT_FORM3) {
SLJIT_ASSERT(src2 == TMP_REG2);
FAIL_IF(push_inst(compiler, ORI | S(src1) | A(dst) | IMM(compiler->imm)));
return push_inst(compiler, ORIS | S(dst) | A(dst) | IMM(compiler->imm >> 16));
imm = compiler->imm;
FAIL_IF(push_inst(compiler, ORI | S(src1) | A(dst) | IMM(imm)));
return push_inst(compiler, ORIS | S(dst) | A(dst) | IMM(imm >> 16));
}
return push_inst(compiler, OR | RC(flags) | S(src1) | A(dst) | B(src2));
@ -378,46 +394,105 @@ static SLJIT_INLINE sljit_s32 emit_single_op(struct sljit_compiler *compiler, sl
}
if (flags & ALT_FORM3) {
SLJIT_ASSERT(src2 == TMP_REG2);
FAIL_IF(push_inst(compiler, XORI | S(src1) | A(dst) | IMM(compiler->imm)));
return push_inst(compiler, XORIS | S(dst) | A(dst) | IMM(compiler->imm >> 16));
imm = compiler->imm;
FAIL_IF(push_inst(compiler, XORI | S(src1) | A(dst) | IMM(imm)));
return push_inst(compiler, XORIS | S(dst) | A(dst) | IMM(imm >> 16));
}
return push_inst(compiler, XOR | RC(flags) | S(src1) | A(dst) | B(src2));
case SLJIT_SHL:
case SLJIT_MSHL:
if (flags & ALT_FORM1) {
SLJIT_ASSERT(src2 == TMP_REG2);
imm = compiler->imm;
if (flags & ALT_FORM2) {
compiler->imm &= 0x1f;
return push_inst(compiler, RLWINM | RC(flags) | S(src1) | A(dst) | (compiler->imm << 11) | ((31 - compiler->imm) << 1));
imm &= 0x1f;
return push_inst(compiler, SLWI(imm) | RC(flags) | S(src1) | A(dst));
}
compiler->imm &= 0x3f;
return push_inst(compiler, RLDI(dst, src1, compiler->imm, 63 - compiler->imm, 1) | RC(flags));
imm &= 0x3f;
return push_inst(compiler, SLDI(imm) | RC(flags) | S(src1) | A(dst));
}
if (op == SLJIT_MSHL) {
FAIL_IF(push_inst(compiler, ANDI | S(src2) | A(TMP_REG2) | ((flags & ALT_FORM2) ? 0x1f : 0x3f)));
src2 = TMP_REG2;
}
return push_inst(compiler, ((flags & ALT_FORM2) ? SLW : SLD) | RC(flags) | S(src1) | A(dst) | B(src2));
case SLJIT_LSHR:
case SLJIT_MLSHR:
if (flags & ALT_FORM1) {
SLJIT_ASSERT(src2 == TMP_REG2);
imm = compiler->imm;
if (flags & ALT_FORM2) {
compiler->imm &= 0x1f;
return push_inst(compiler, RLWINM | RC(flags) | S(src1) | A(dst) | (((32 - compiler->imm) & 0x1f) << 11) | (compiler->imm << 6) | (31 << 1));
imm &= 0x1f;
/* Since imm can be 0, SRWI() cannot be used. */
return push_inst(compiler, RLWINM | RC(flags) | S(src1) | A(dst) | RLWI_SH((32 - imm) & 0x1f) | RLWI_MBE(imm, 31));
}
compiler->imm &= 0x3f;
return push_inst(compiler, RLDI(dst, src1, 64 - compiler->imm, compiler->imm, 0) | RC(flags));
imm &= 0x3f;
/* Since imm can be 0, SRDI() cannot be used. */
return push_inst(compiler, RLDICL | RC(flags) | S(src1) | A(dst) | RLDI_SH((64 - imm) & 0x3f) | RLDI_MB(imm));
}
if (op == SLJIT_MLSHR) {
FAIL_IF(push_inst(compiler, ANDI | S(src2) | A(TMP_REG2) | ((flags & ALT_FORM2) ? 0x1f : 0x3f)));
src2 = TMP_REG2;
}
return push_inst(compiler, ((flags & ALT_FORM2) ? SRW : SRD) | RC(flags) | S(src1) | A(dst) | B(src2));
case SLJIT_ASHR:
case SLJIT_MASHR:
if (flags & ALT_FORM1) {
SLJIT_ASSERT(src2 == TMP_REG2);
imm = compiler->imm;
if (flags & ALT_FORM2) {
compiler->imm &= 0x1f;
return push_inst(compiler, SRAWI | RC(flags) | S(src1) | A(dst) | (compiler->imm << 11));
imm &= 0x1f;
return push_inst(compiler, SRAWI | RC(flags) | S(src1) | A(dst) | (imm << 11));
}
compiler->imm &= 0x3f;
return push_inst(compiler, SRADI | RC(flags) | S(src1) | A(dst) | ((compiler->imm & 0x1f) << 11) | ((compiler->imm & 0x20) >> 4));
imm &= 0x3f;
return push_inst(compiler, SRADI | RC(flags) | S(src1) | A(dst) | RLDI_SH(imm));
}
if (op == SLJIT_MASHR) {
FAIL_IF(push_inst(compiler, ANDI | S(src2) | A(TMP_REG2) | ((flags & ALT_FORM2) ? 0x1f : 0x3f)));
src2 = TMP_REG2;
}
return push_inst(compiler, ((flags & ALT_FORM2) ? SRAW : SRAD) | RC(flags) | S(src1) | A(dst) | B(src2));
case SLJIT_ROTL:
case SLJIT_ROTR:
if (flags & ALT_FORM1) {
SLJIT_ASSERT(src2 == TMP_REG2);
imm = compiler->imm;
if (op == SLJIT_ROTR)
imm = (sljit_u32)(-(sljit_s32)imm);
if (flags & ALT_FORM2) {
imm &= 0x1f;
return push_inst(compiler, RLWINM | S(src1) | A(dst) | RLWI_SH(imm) | RLWI_MBE(0, 31));
}
imm &= 0x3f;
return push_inst(compiler, RLDICL | S(src1) | A(dst) | RLDI_SH(imm));
}
if (op == SLJIT_ROTR) {
FAIL_IF(push_inst(compiler, SUBFIC | D(TMP_REG2) | A(src2) | 0));
src2 = TMP_REG2;
}
return push_inst(compiler, ((flags & ALT_FORM2) ? (RLWNM | RLWI_MBE(0, 31)) : (RLDCL | RLDI_MB(0))) | S(src1) | A(dst) | B(src2));
}
SLJIT_UNREACHABLE();
@ -483,7 +558,7 @@ static SLJIT_INLINE sljit_s32 emit_const(struct sljit_compiler *compiler, sljit_
{
FAIL_IF(push_inst(compiler, ADDIS | D(reg) | A(0) | IMM(init_value >> 48)));
FAIL_IF(push_inst(compiler, ORI | S(reg) | A(reg) | IMM(init_value >> 32)));
FAIL_IF(PUSH_RLDICR(reg, 31));
FAIL_IF(push_inst(compiler, SLDI(32) | S(reg) | A(reg)));
FAIL_IF(push_inst(compiler, ORIS | S(reg) | A(reg) | IMM(init_value >> 16)));
return push_inst(compiler, ORI | S(reg) | A(reg) | IMM(init_value));
}
@ -502,8 +577,3 @@ SLJIT_API_FUNC_ATTRIBUTE void sljit_set_jump_addr(sljit_uw addr, sljit_uw new_ta
inst = (sljit_ins *)SLJIT_ADD_EXEC_OFFSET(inst, executable_offset);
SLJIT_CACHE_FLUSH(inst, inst + 5);
}
SLJIT_API_FUNC_ATTRIBUTE void sljit_set_const(sljit_uw addr, sljit_sw new_constant, sljit_sw executable_offset)
{
sljit_set_jump_addr(addr, (sljit_uw)new_constant, executable_offset);
}

View File

@ -203,8 +203,13 @@ static const sljit_u8 freg_map[SLJIT_NUMBER_OF_FLOAT_REGISTERS + 3] = {
#define OR (HI(31) | LO(444))
#define ORI (HI(24))
#define ORIS (HI(25))
#define RLDICL (HI(30))
#define RLDCL (HI(30) | LO(8))
#define RLDICL (HI(30) | LO(0 << 1))
#define RLDICR (HI(30) | LO(1 << 1))
#define RLDIMI (HI(30) | LO(3 << 1))
#define RLWIMI (HI(20))
#define RLWINM (HI(21))
#define RLWNM (HI(23))
#define SLD (HI(31) | LO(27))
#define SLW (HI(31) | LO(24))
#define SRAD (HI(31) | LO(794))
@ -233,9 +238,24 @@ static const sljit_u8 freg_map[SLJIT_NUMBER_OF_FLOAT_REGISTERS + 3] = {
#define SIMM_MIN (-0x8000)
#define UIMM_MAX (0xffff)
#define RLDI(dst, src, sh, mb, type) \
(HI(30) | S(src) | A(dst) | ((sljit_ins)(type) << 2) | (((sljit_ins)(sh) & 0x1f) << 11) \
| (((sljit_ins)(sh) & 0x20) >> 4) | (((sljit_ins)(mb) & 0x1f) << 6) | ((sljit_ins)(mb) & 0x20))
/* Shift helpers. */
#define RLWI_SH(sh) ((sljit_ins)(sh) << 11)
#define RLWI_MBE(mb, me) (((sljit_ins)(mb) << 6) | ((sljit_ins)(me) << 1))
#define RLDI_SH(sh) ((((sljit_ins)(sh) & 0x1f) << 11) | (((sljit_ins)(sh) & 0x20) >> 4))
#define RLDI_MB(mb) ((((sljit_ins)(mb) & 0x1f) << 6) | ((sljit_ins)(mb) & 0x20))
#define RLDI_ME(me) RLDI_MB(me)
#define SLWI(shift) (RLWINM | RLWI_SH(shift) | RLWI_MBE(0, 31 - (shift)))
#define SLDI(shift) (RLDICR | RLDI_SH(shift) | RLDI_ME(63 - (shift)))
/* shift > 0 */
#define SRWI(shift) (RLWINM | RLWI_SH(32 - (shift)) | RLWI_MBE((shift), 31))
#define SRDI(shift) (RLDICL | RLDI_SH(64 - (shift)) | RLDI_MB(shift))
#if (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32)
#define SLWI_W(shift) SLWI(shift)
#else /* !SLJIT_CONFIG_PPC_32 */
#define SLWI_W(shift) SLDI(shift)
#endif /* SLJIT_CONFIG_PPC_32 */
#if (defined SLJIT_INDIRECT_CALL && SLJIT_INDIRECT_CALL)
SLJIT_API_FUNC_ATTRIBUTE void sljit_set_function_context(void** func_ptr, struct sljit_function_context* context, sljit_uw addr, void* func)
@ -368,10 +388,10 @@ static SLJIT_INLINE void put_label_set(struct sljit_put_label *put_label)
else {
inst[0] = ORIS | S(TMP_ZERO) | A(reg) | IMM(addr >> 48);
inst[1] = ORI | S(reg) | A(reg) | IMM((addr >> 32) & 0xffff);
inst ++;
inst++;
}
inst[1] = RLDI(reg, reg, 32, 31, 1);
inst[1] = SLDI(32) | S(reg) | A(reg);
inst[2] = ORIS | S(reg) | A(reg) | IMM((addr >> 16) & 0xffff);
inst += 2;
}
@ -379,7 +399,7 @@ static SLJIT_INLINE void put_label_set(struct sljit_put_label *put_label)
inst[1] = ORI | S(reg) | A(reg) | IMM(addr & 0xffff);
}
#endif
#endif /* SLJIT_CONFIG_PPC_64 */
SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compiler)
{
@ -497,8 +517,8 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil
}
next_addr = compute_next_addr(label, jump, const_, put_label);
}
code_ptr ++;
word_count ++;
code_ptr++;
word_count++;
} while (buf_ptr < buf_end);
buf = buf->next;
@ -641,14 +661,23 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_has_cpu_feature(sljit_s32 feature_type)
/* A saved register is set to a zero value. */
case SLJIT_HAS_ZERO_REGISTER:
case SLJIT_HAS_CLZ:
case SLJIT_HAS_ROT:
case SLJIT_HAS_PREFETCH:
return 1;
case SLJIT_HAS_CTZ:
return 2;
default:
return 0;
}
}
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_cmp_info(sljit_s32 type)
{
return (type >= SLJIT_UNORDERED && type <= SLJIT_ORDERED_LESS_EQUAL);
}
/* --------------------------------------------------------------------- */
/* Entry, exit */
/* --------------------------------------------------------------------- */
@ -715,13 +744,16 @@ ALT_FORM5 0x010000 */
#define STACK_MAX_DISTANCE (0x8000 - SSIZE_OF(sw) - LR_SAVE_OFFSET)
static sljit_s32 emit_op_mem(struct sljit_compiler *compiler, sljit_s32 inp_flags, sljit_s32 reg,
sljit_s32 arg, sljit_sw argw, sljit_s32 tmp_reg);
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compiler,
sljit_s32 options, sljit_s32 arg_types, sljit_s32 scratches, sljit_s32 saveds,
sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size)
{
sljit_s32 i, tmp, base, offset;
sljit_s32 word_arg_count = 0;
sljit_s32 saved_arg_count = 0;
sljit_s32 saved_arg_count = SLJIT_KEPT_SAVEDS_COUNT(options);
#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
sljit_s32 arg_count = 0;
#endif
@ -730,8 +762,12 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compi
CHECK(check_sljit_emit_enter(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size));
set_emit_enter(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size);
local_size += GET_SAVED_REGISTERS_SIZE(scratches, saveds, 1)
local_size += GET_SAVED_REGISTERS_SIZE(scratches, saveds - saved_arg_count, 0)
+ GET_SAVED_FLOAT_REGISTERS_SIZE(fscratches, fsaveds, sizeof(sljit_f64));
if (!(options & SLJIT_ENTER_REG_ARG))
local_size += SSIZE_OF(sw);
local_size = (local_size + SLJIT_LOCALS_OFFSET + 15) & ~0xf;
compiler->local_size = local_size;
@ -770,11 +806,13 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compi
FAIL_IF(push_inst(compiler, STFD | FS(i) | A(base) | IMM(offset)));
}
offset -= SSIZE_OF(sw);
FAIL_IF(push_inst(compiler, STACK_STORE | S(TMP_ZERO) | A(base) | IMM(offset)));
if (!(options & SLJIT_ENTER_REG_ARG)) {
offset -= SSIZE_OF(sw);
FAIL_IF(push_inst(compiler, STACK_STORE | S(TMP_ZERO) | A(base) | IMM(offset)));
}
tmp = SLJIT_S0 - saveds;
for (i = SLJIT_S0; i > tmp; i--) {
for (i = SLJIT_S0 - saved_arg_count; i > tmp; i--) {
offset -= SSIZE_OF(sw);
FAIL_IF(push_inst(compiler, STACK_STORE | S(i) | A(base) | IMM(offset)));
}
@ -785,9 +823,14 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compi
}
FAIL_IF(push_inst(compiler, STACK_STORE | S(0) | A(base) | IMM(local_size + LR_SAVE_OFFSET)));
if (options & SLJIT_ENTER_REG_ARG)
return SLJIT_SUCCESS;
FAIL_IF(push_inst(compiler, ADDI | D(TMP_ZERO) | A(0) | 0));
arg_types >>= SLJIT_ARG_SHIFT;
saved_arg_count = 0;
while (arg_types > 0) {
if ((arg_types & SLJIT_ARG_MASK) < SLJIT_ARG_TYPE_F64) {
@ -829,14 +872,17 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_set_context(struct sljit_compiler *comp
CHECK(check_sljit_set_context(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size));
set_set_context(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size);
local_size += GET_SAVED_REGISTERS_SIZE(scratches, saveds, 1)
local_size += GET_SAVED_REGISTERS_SIZE(scratches, saveds - SLJIT_KEPT_SAVEDS_COUNT(options), 0)
+ GET_SAVED_FLOAT_REGISTERS_SIZE(fscratches, fsaveds, sizeof(sljit_f64));
if (!(options & SLJIT_ENTER_REG_ARG))
local_size += SSIZE_OF(sw);
compiler->local_size = (local_size + SLJIT_LOCALS_OFFSET + 15) & ~0xf;
return SLJIT_SUCCESS;
}
static sljit_s32 emit_stack_frame_release(struct sljit_compiler *compiler)
static sljit_s32 emit_stack_frame_release(struct sljit_compiler *compiler, sljit_s32 is_return_to)
{
sljit_s32 i, tmp, base, offset;
sljit_s32 local_size = compiler->local_size;
@ -854,7 +900,8 @@ static sljit_s32 emit_stack_frame_release(struct sljit_compiler *compiler)
}
offset = local_size;
FAIL_IF(push_inst(compiler, STACK_LOAD | S(0) | A(base) | IMM(offset + LR_SAVE_OFFSET)));
if (!is_return_to)
FAIL_IF(push_inst(compiler, STACK_LOAD | S(0) | A(base) | IMM(offset + LR_SAVE_OFFSET)));
tmp = SLJIT_FS0 - compiler->fsaveds;
for (i = SLJIT_FS0; i > tmp; i--) {
@ -867,11 +914,13 @@ static sljit_s32 emit_stack_frame_release(struct sljit_compiler *compiler)
FAIL_IF(push_inst(compiler, LFD | FS(i) | A(base) | IMM(offset)));
}
offset -= SSIZE_OF(sw);
FAIL_IF(push_inst(compiler, STACK_LOAD | S(TMP_ZERO) | A(base) | IMM(offset)));
if (!(compiler->options & SLJIT_ENTER_REG_ARG)) {
offset -= SSIZE_OF(sw);
FAIL_IF(push_inst(compiler, STACK_LOAD | S(TMP_ZERO) | A(base) | IMM(offset)));
}
tmp = SLJIT_S0 - compiler->saveds;
for (i = SLJIT_S0; i > tmp; i--) {
for (i = SLJIT_S0 - SLJIT_KEPT_SAVEDS_COUNT(compiler->options); i > tmp; i--) {
offset -= SSIZE_OF(sw);
FAIL_IF(push_inst(compiler, STACK_LOAD | S(i) | A(base) | IMM(offset)));
}
@ -881,7 +930,8 @@ static sljit_s32 emit_stack_frame_release(struct sljit_compiler *compiler)
FAIL_IF(push_inst(compiler, STACK_LOAD | S(i) | A(base) | IMM(offset)));
}
push_inst(compiler, MTLR | S(0));
if (!is_return_to)
push_inst(compiler, MTLR | S(0));
if (local_size > 0)
return push_inst(compiler, ADDI | D(SLJIT_SP) | A(base) | IMM(local_size));
@ -890,17 +940,40 @@ static sljit_s32 emit_stack_frame_release(struct sljit_compiler *compiler)
return push_inst(compiler, OR | S(base) | A(SLJIT_SP) | B(base));
}
#undef STACK_STORE
#undef STACK_LOAD
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_return_void(struct sljit_compiler *compiler)
{
CHECK_ERROR();
CHECK(check_sljit_emit_return_void(compiler));
FAIL_IF(emit_stack_frame_release(compiler));
FAIL_IF(emit_stack_frame_release(compiler, 0));
return push_inst(compiler, BLR);
}
#undef STACK_STORE
#undef STACK_LOAD
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_return_to(struct sljit_compiler *compiler,
sljit_s32 src, sljit_sw srcw)
{
CHECK_ERROR();
CHECK(check_sljit_emit_return_to(compiler, src, srcw));
if (src & SLJIT_MEM) {
ADJUST_LOCAL_OFFSET(src, srcw);
FAIL_IF(emit_op_mem(compiler, WORD_DATA | LOAD_DATA, TMP_CALL_REG, src, srcw, TMP_CALL_REG));
src = TMP_CALL_REG;
srcw = 0;
} else if (src >= SLJIT_FIRST_SAVED_REG && src <= (SLJIT_S0 - SLJIT_KEPT_SAVEDS_COUNT(compiler->options))) {
FAIL_IF(push_inst(compiler, OR | S(src) | A(TMP_CALL_REG) | B(src)));
src = TMP_CALL_REG;
srcw = 0;
}
FAIL_IF(emit_stack_frame_release(compiler, 1));
SLJIT_SKIP_CHECKS(compiler);
return sljit_emit_ijump(compiler, SLJIT_JUMP, src, srcw);
}
/* --------------------------------------------------------------------- */
/* Operators */
@ -1066,7 +1139,6 @@ static sljit_s32 emit_op_mem(struct sljit_compiler *compiler, sljit_s32 inp_flag
{
sljit_ins inst;
sljit_s32 offs_reg;
sljit_sw high_short;
/* Should work when (arg & REG_MASK) == 0. */
SLJIT_ASSERT(A(0) == 0);
@ -1077,11 +1149,7 @@ static sljit_s32 emit_op_mem(struct sljit_compiler *compiler, sljit_s32 inp_flag
offs_reg = OFFS_REG(arg);
if (argw != 0) {
#if (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32)
FAIL_IF(push_inst(compiler, RLWINM | S(OFFS_REG(arg)) | A(tmp_reg) | ((sljit_ins)argw << 11) | ((31 - (sljit_ins)argw) << 1)));
#else
FAIL_IF(push_inst(compiler, RLDI(tmp_reg, OFFS_REG(arg), argw, 63 - argw, 1)));
#endif
FAIL_IF(push_inst(compiler, SLWI_W(argw) | S(OFFS_REG(arg)) | A(tmp_reg)));
offs_reg = tmp_reg;
}
@ -1089,7 +1157,7 @@ static sljit_s32 emit_op_mem(struct sljit_compiler *compiler, sljit_s32 inp_flag
#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
SLJIT_ASSERT(!(inst & INT_ALIGNED));
#endif
#endif /* SLJIT_CONFIG_PPC_64 */
return push_inst(compiler, INST_CODE_AND_DST(inst, inp_flags, reg) | A(arg & REG_MASK) | B(offs_reg));
}
@ -1104,36 +1172,24 @@ static sljit_s32 emit_op_mem(struct sljit_compiler *compiler, sljit_s32 inp_flag
inst = data_transfer_insts[(inp_flags | INDEXED) & MEM_MASK];
return push_inst(compiler, INST_CODE_AND_DST(inst, inp_flags, reg) | A(arg) | B(tmp_reg));
}
#endif
#endif /* SLJIT_CONFIG_PPC_64 */
if (argw <= SIMM_MAX && argw >= SIMM_MIN)
return push_inst(compiler, INST_CODE_AND_DST(inst, inp_flags, reg) | A(arg) | IMM(argw));
#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
if (argw <= 0x7fff7fffl && argw >= -0x80000000l) {
#endif
high_short = (sljit_s32)(argw + ((argw & 0x8000) << 1)) & ~0xffff;
#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
SLJIT_ASSERT(high_short && high_short <= 0x7fffffffl && high_short >= -0x80000000l);
#else
SLJIT_ASSERT(high_short);
#endif
FAIL_IF(push_inst(compiler, ADDIS | D(tmp_reg) | A(arg) | IMM(high_short >> 16)));
#endif /* SLJIT_CONFIG_PPC_64 */
FAIL_IF(push_inst(compiler, ADDIS | D(tmp_reg) | A(arg) | IMM((argw + 0x8000) >> 16)));
return push_inst(compiler, INST_CODE_AND_DST(inst, inp_flags, reg) | A(tmp_reg) | IMM(argw));
#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
}
/* The rest is PPC-64 only. */
FAIL_IF(load_immediate(compiler, tmp_reg, argw));
inst = data_transfer_insts[(inp_flags | INDEXED) & MEM_MASK];
return push_inst(compiler, INST_CODE_AND_DST(inst, inp_flags, reg) | A(arg) | B(tmp_reg));
#endif
#endif /* SLJIT_CONFIG_PPC_64 */
}
static sljit_s32 emit_op(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 input_flags,
@ -1273,11 +1329,7 @@ static sljit_s32 emit_prefetch(struct sljit_compiler *compiler,
if (srcw == 0)
return push_inst(compiler, DCBT | A(src & REG_MASK) | B(OFFS_REG(src)));
#if (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32)
FAIL_IF(push_inst(compiler, RLWINM | S(OFFS_REG(src)) | A(TMP_REG1) | ((sljit_ins)srcw << 11) | ((31 - (sljit_ins)srcw) << 1)));
#else
FAIL_IF(push_inst(compiler, RLDI(TMP_REG1, OFFS_REG(src), srcw, 63 - srcw, 1)));
#endif
FAIL_IF(push_inst(compiler, SLWI_W(srcw) | S(OFFS_REG(src)) | A(TMP_REG1)));
return push_inst(compiler, DCBT | A(src & REG_MASK) | B(TMP_REG1));
}
@ -1362,10 +1414,11 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compile
return emit_op(compiler, SLJIT_NOT, flags, dst, dstw, TMP_REG1, 0, src, srcw);
case SLJIT_CLZ:
case SLJIT_CTZ:
#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
return emit_op(compiler, SLJIT_CLZ, flags | (!(op_flags & SLJIT_32) ? 0 : ALT_FORM1), dst, dstw, TMP_REG1, 0, src, srcw);
return emit_op(compiler, op, flags | (!(op_flags & SLJIT_32) ? 0 : ALT_FORM1), dst, dstw, TMP_REG1, 0, src, srcw);
#else
return emit_op(compiler, SLJIT_CLZ, flags, dst, dstw, TMP_REG1, 0, src, srcw);
return emit_op(compiler, op, flags, dst, dstw, TMP_REG1, 0, src, srcw);
#endif
}
@ -1626,7 +1679,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2(struct sljit_compiler *compile
return emit_op(compiler, GET_OPCODE(op), flags | ALT_FORM2, dst, dstw, src2, src2w, TMP_REG2, 0);
}
}
if (GET_OPCODE(op) != SLJIT_AND) {
if (!HAS_FLAGS(op) && GET_OPCODE(op) != SLJIT_AND) {
/* Unlike or and xor, the and resets unwanted bits as well. */
if (TEST_UI_IMM(src2, src2w)) {
compiler->imm = (sljit_ins)src2w;
@ -1640,8 +1693,13 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2(struct sljit_compiler *compile
return emit_op(compiler, GET_OPCODE(op), flags, dst, dstw, src1, src1w, src2, src2w);
case SLJIT_SHL:
case SLJIT_MSHL:
case SLJIT_LSHR:
case SLJIT_MLSHR:
case SLJIT_ASHR:
case SLJIT_MASHR:
case SLJIT_ROTL:
case SLJIT_ROTR:
#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
if (op & SLJIT_32)
flags |= ALT_FORM2;
@ -1663,10 +1721,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2u(struct sljit_compiler *compil
CHECK_ERROR();
CHECK(check_sljit_emit_op2(compiler, op, 1, 0, 0, src1, src1w, src2, src2w));
#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \
|| (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
compiler->skip_checks = 1;
#endif
SLJIT_SKIP_CHECKS(compiler);
return sljit_emit_op2(compiler, op, TMP_REG2, 0, src1, src1w, src2, src2w);
}
@ -1674,6 +1729,102 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2u(struct sljit_compiler *compil
#undef TEST_SUB_FORM2
#undef TEST_SUB_FORM3
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_shift_into(struct sljit_compiler *compiler, sljit_s32 op,
sljit_s32 src_dst,
sljit_s32 src1, sljit_sw src1w,
sljit_s32 src2, sljit_sw src2w)
{
sljit_s32 is_right;
#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
sljit_s32 inp_flags = ((op & SLJIT_32) ? INT_DATA : WORD_DATA) | LOAD_DATA;
sljit_sw bit_length = (op & SLJIT_32) ? 32 : 64;
#else /* !SLJIT_CONFIG_PPC_64 */
sljit_s32 inp_flags = WORD_DATA | LOAD_DATA;
sljit_sw bit_length = 32;
#endif /* SLJIT_CONFIG_PPC_64 */
CHECK_ERROR();
CHECK(check_sljit_emit_shift_into(compiler, op, src_dst, src1, src1w, src2, src2w));
is_right = (GET_OPCODE(op) == SLJIT_LSHR || GET_OPCODE(op) == SLJIT_MLSHR);
if (src_dst == src1) {
SLJIT_SKIP_CHECKS(compiler);
return sljit_emit_op2(compiler, (is_right ? SLJIT_ROTR : SLJIT_ROTL) | (op & SLJIT_32), src_dst, 0, src_dst, 0, src2, src2w);
}
ADJUST_LOCAL_OFFSET(src1, src1w);
ADJUST_LOCAL_OFFSET(src2, src2w);
if (src2 & SLJIT_IMM) {
src2w &= bit_length - 1;
if (src2w == 0)
return SLJIT_SUCCESS;
} else if (src2 & SLJIT_MEM) {
FAIL_IF(emit_op_mem(compiler, inp_flags, TMP_REG2, src2, src2w, TMP_REG2));
src2 = TMP_REG2;
}
if (src1 & SLJIT_MEM) {
FAIL_IF(emit_op_mem(compiler, inp_flags, TMP_REG1, src1, src1w, TMP_REG1));
src1 = TMP_REG1;
} else if (src1 & SLJIT_IMM) {
FAIL_IF(load_immediate(compiler, TMP_REG1, src1w));
src1 = TMP_REG1;
}
if (src2 & SLJIT_IMM) {
#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
if (!(op & SLJIT_32)) {
if (is_right) {
FAIL_IF(push_inst(compiler, SRDI(src2w) | S(src_dst) | A(src_dst)));
return push_inst(compiler, RLDIMI | S(src1) | A(src_dst) | RLDI_SH(64 - src2w) | RLDI_MB(0));
}
FAIL_IF(push_inst(compiler, SLDI(src2w) | S(src_dst) | A(src_dst)));
/* Computes SRDI(64 - src2w). */
FAIL_IF(push_inst(compiler, RLDICL | S(src1) | A(TMP_REG1) | RLDI_SH(src2w) | RLDI_MB(64 - src2w)));
return push_inst(compiler, OR | S(src_dst) | A(src_dst) | B(TMP_REG1));
}
#endif /* SLJIT_CONFIG_PPC_64 */
if (is_right) {
FAIL_IF(push_inst(compiler, SRWI(src2w) | S(src_dst) | A(src_dst)));
return push_inst(compiler, RLWIMI | S(src1) | A(src_dst) | RLWI_SH(32 - src2w) | RLWI_MBE(0, src2w - 1));
}
FAIL_IF(push_inst(compiler, SLWI(src2w) | S(src_dst) | A(src_dst)));
return push_inst(compiler, RLWIMI | S(src1) | A(src_dst) | RLWI_SH(src2w) | RLWI_MBE(32 - src2w, 31));
}
#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
if (!(op & SLJIT_32)) {
if (GET_OPCODE(op) == SLJIT_MSHL || GET_OPCODE(op) == SLJIT_MLSHR) {
FAIL_IF(push_inst(compiler, ANDI | S(src2) | A(TMP_REG2) | 0x3f));
src2 = TMP_REG2;
}
FAIL_IF(push_inst(compiler, (is_right ? SRD : SLD) | S(src_dst) | A(src_dst) | B(src2)));
FAIL_IF(push_inst(compiler, (is_right ? SLDI(1) : SRDI(1)) | S(src1) | A(TMP_REG1)));
FAIL_IF(push_inst(compiler, XORI | S(src2) | A(TMP_REG2) | 0x3f));
FAIL_IF(push_inst(compiler, (is_right ? SLD : SRD) | S(TMP_REG1) | A(TMP_REG1) | B(TMP_REG2)));
return push_inst(compiler, OR | S(src_dst) | A(src_dst) | B(TMP_REG1));
}
#endif /* SLJIT_CONFIG_PPC_64 */
if (GET_OPCODE(op) == SLJIT_MSHL || GET_OPCODE(op) == SLJIT_MLSHR) {
FAIL_IF(push_inst(compiler, ANDI | S(src2) | A(TMP_REG2) | 0x1f));
src2 = TMP_REG2;
}
FAIL_IF(push_inst(compiler, (is_right ? SRW : SLW) | S(src_dst) | A(src_dst) | B(src2)));
FAIL_IF(push_inst(compiler, (is_right ? SLWI(1) : SRWI(1)) | S(src1) | A(TMP_REG1)));
FAIL_IF(push_inst(compiler, XORI | S(src2) | A(TMP_REG2) | 0x1f));
FAIL_IF(push_inst(compiler, (is_right ? SLW : SRW) | S(TMP_REG1) | A(TMP_REG1) | B(TMP_REG2)));
return push_inst(compiler, OR | S(src_dst) | A(src_dst) | B(TMP_REG1));
}
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_src(struct sljit_compiler *compiler, sljit_s32 op,
sljit_s32 src, sljit_sw srcw)
{
@ -1686,7 +1837,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_src(struct sljit_compiler *comp
if (FAST_IS_REG(src))
FAIL_IF(push_inst(compiler, MTLR | S(src)));
else {
FAIL_IF(emit_op(compiler, SLJIT_MOV, WORD_DATA, TMP_REG2, 0, TMP_REG1, 0, src, srcw));
FAIL_IF(emit_op_mem(compiler, WORD_DATA | LOAD_DATA, TMP_REG2, src, srcw, TMP_REG2));
FAIL_IF(push_inst(compiler, MTLR | S(TMP_REG2)));
}
@ -1782,11 +1933,7 @@ static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_sw_from_f64(struct sljit_comp
if (dst & OFFS_REG_MASK) {
dstw &= 0x3;
if (dstw) {
#if (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32)
FAIL_IF(push_inst(compiler, RLWINM | S(OFFS_REG(dst)) | A(TMP_REG1) | ((sljit_ins)dstw << 11) | ((31 - (sljit_ins)dstw) << 1)));
#else
FAIL_IF(push_inst(compiler, RLDI(TMP_REG1, OFFS_REG(dst), dstw, 63 - dstw, 1)));
#endif
FAIL_IF(push_inst(compiler, SLWI_W(dstw) | S(OFFS_REG(dst)) | A(TMP_REG1)));
dstw = TMP_REG1;
}
else
@ -1818,6 +1965,7 @@ static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_f64_from_sw(struct sljit_comp
if (src & SLJIT_IMM) {
if (GET_OPCODE(op) == SLJIT_CONV_F64_FROM_S32)
srcw = (sljit_s32)srcw;
FAIL_IF(load_immediate(compiler, TMP_REG1, srcw));
src = TMP_REG1;
}
@ -1863,7 +2011,7 @@ static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_f64_from_sw(struct sljit_comp
The double precision format has exactly 53 bit precision, so the lower 32 bit represents
the lower 32 bit of such value. The result of xor 2^31 is the same as adding 0x80000000
to the input, which shifts it into the 0 - 0xffffffff range. To get the converted floating
point value, we need to substract 2^53 + 2^31 from the constructed value. */
point value, we need to subtract 2^53 + 2^31 from the constructed value. */
FAIL_IF(push_inst(compiler, ADDIS | D(TMP_REG2) | A(0) | 0x4330));
if (invert_sign)
FAIL_IF(push_inst(compiler, XORIS | S(src) | A(TMP_REG1) | 0x8000));
@ -1899,7 +2047,21 @@ static SLJIT_INLINE sljit_s32 sljit_emit_fop1_cmp(struct sljit_compiler *compile
src2 = TMP_FREG2;
}
return push_inst(compiler, FCMPU | CRD(4) | FA(src1) | FB(src2));
FAIL_IF(push_inst(compiler, FCMPU | CRD(4) | FA(src1) | FB(src2)));
switch (GET_FLAG_TYPE(op)) {
case SLJIT_UNORDERED_OR_EQUAL:
case SLJIT_ORDERED_NOT_EQUAL:
return push_inst(compiler, CROR | ((4 + 2) << 21) | ((4 + 2) << 16) | ((4 + 3) << 11));
case SLJIT_UNORDERED_OR_LESS:
case SLJIT_ORDERED_GREATER_EQUAL:
return push_inst(compiler, CROR | ((4 + 0) << 21) | ((4 + 0) << 16) | ((4 + 3) << 11));
case SLJIT_UNORDERED_OR_GREATER:
case SLJIT_ORDERED_LESS_EQUAL:
return push_inst(compiler, CROR | ((4 + 1) << 21) | ((4 + 1) << 16) | ((4 + 3) << 11));
}
return SLJIT_SUCCESS;
}
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop1(struct sljit_compiler *compiler, sljit_s32 op,
@ -2076,38 +2238,50 @@ static sljit_ins get_bo_bi_flags(struct sljit_compiler *compiler, sljit_s32 type
case SLJIT_SIG_LESS_EQUAL:
return (4 << 21) | (1 << 16);
case SLJIT_LESS_F64:
return (12 << 21) | ((4 + 0) << 16);
case SLJIT_GREATER_EQUAL_F64:
return (4 << 21) | ((4 + 0) << 16);
case SLJIT_GREATER_F64:
return (12 << 21) | ((4 + 1) << 16);
case SLJIT_LESS_EQUAL_F64:
return (4 << 21) | ((4 + 1) << 16);
case SLJIT_OVERFLOW:
return (12 << 21) | (3 << 16);
case SLJIT_NOT_OVERFLOW:
return (4 << 21) | (3 << 16);
case SLJIT_EQUAL_F64:
case SLJIT_F_LESS:
case SLJIT_ORDERED_LESS:
case SLJIT_UNORDERED_OR_LESS:
return (12 << 21) | ((4 + 0) << 16);
case SLJIT_F_GREATER_EQUAL:
case SLJIT_ORDERED_GREATER_EQUAL:
case SLJIT_UNORDERED_OR_GREATER_EQUAL:
return (4 << 21) | ((4 + 0) << 16);
case SLJIT_F_GREATER:
case SLJIT_ORDERED_GREATER:
case SLJIT_UNORDERED_OR_GREATER:
return (12 << 21) | ((4 + 1) << 16);
case SLJIT_F_LESS_EQUAL:
case SLJIT_ORDERED_LESS_EQUAL:
case SLJIT_UNORDERED_OR_LESS_EQUAL:
return (4 << 21) | ((4 + 1) << 16);
case SLJIT_F_EQUAL:
case SLJIT_ORDERED_EQUAL:
case SLJIT_UNORDERED_OR_EQUAL:
return (12 << 21) | ((4 + 2) << 16);
case SLJIT_NOT_EQUAL_F64:
case SLJIT_F_NOT_EQUAL:
case SLJIT_ORDERED_NOT_EQUAL:
case SLJIT_UNORDERED_OR_NOT_EQUAL:
return (4 << 21) | ((4 + 2) << 16);
case SLJIT_UNORDERED_F64:
case SLJIT_UNORDERED:
return (12 << 21) | ((4 + 3) << 16);
case SLJIT_ORDERED_F64:
case SLJIT_ORDERED:
return (4 << 21) | ((4 + 3) << 16);
default:
SLJIT_ASSERT(type >= SLJIT_JUMP && type <= SLJIT_CALL_CDECL);
SLJIT_ASSERT(type >= SLJIT_JUMP && type <= SLJIT_CALL_REG_ARG);
return (20 << 21);
}
}
@ -2154,19 +2328,16 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_call(struct sljit_compile
CHECK_PTR(check_sljit_emit_call(compiler, type, arg_types));
#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
PTR_FAIL_IF(call_with_args(compiler, arg_types, NULL));
if ((type & 0xff) != SLJIT_CALL_REG_ARG)
PTR_FAIL_IF(call_with_args(compiler, arg_types, NULL));
#endif
if (type & SLJIT_CALL_RETURN) {
PTR_FAIL_IF(emit_stack_frame_release(compiler));
PTR_FAIL_IF(emit_stack_frame_release(compiler, 0));
type = SLJIT_JUMP | (type & SLJIT_REWRITABLE_JUMP);
}
#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \
|| (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
compiler->skip_checks = 1;
#endif
SLJIT_SKIP_CHECKS(compiler);
return sljit_emit_jump(compiler, type);
}
@ -2177,7 +2348,6 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_ijump(struct sljit_compiler *compi
CHECK_ERROR();
CHECK(check_sljit_emit_ijump(compiler, type, src, srcw));
ADJUST_LOCAL_OFFSET(src, srcw);
if (FAST_IS_REG(src)) {
#if (defined SLJIT_PASS_ENTRY_ADDR_TO_CALL && SLJIT_PASS_ENTRY_ADDR_TO_CALL)
@ -2204,9 +2374,9 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_ijump(struct sljit_compiler *compi
FAIL_IF(emit_const(compiler, TMP_CALL_REG, 0));
src_r = TMP_CALL_REG;
}
else {
FAIL_IF(emit_op(compiler, SLJIT_MOV, WORD_DATA, TMP_CALL_REG, 0, TMP_REG1, 0, src, srcw));
} else {
ADJUST_LOCAL_OFFSET(src, srcw);
FAIL_IF(emit_op_mem(compiler, WORD_DATA | LOAD_DATA, TMP_CALL_REG, src, srcw, TMP_CALL_REG));
src_r = TMP_CALL_REG;
}
@ -2225,29 +2395,26 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_icall(struct sljit_compiler *compi
if (src & SLJIT_MEM) {
ADJUST_LOCAL_OFFSET(src, srcw);
FAIL_IF(emit_op(compiler, SLJIT_MOV, WORD_DATA, TMP_CALL_REG, 0, TMP_REG1, 0, src, srcw));
FAIL_IF(emit_op_mem(compiler, WORD_DATA | LOAD_DATA, TMP_CALL_REG, src, srcw, TMP_CALL_REG));
src = TMP_CALL_REG;
}
if (type & SLJIT_CALL_RETURN) {
if (src >= SLJIT_FIRST_SAVED_REG && src <= SLJIT_S0) {
if (src >= SLJIT_FIRST_SAVED_REG && src <= (SLJIT_S0 - SLJIT_KEPT_SAVEDS_COUNT(compiler->options))) {
FAIL_IF(push_inst(compiler, OR | S(src) | A(TMP_CALL_REG) | B(src)));
src = TMP_CALL_REG;
}
FAIL_IF(emit_stack_frame_release(compiler));
FAIL_IF(emit_stack_frame_release(compiler, 0));
type = SLJIT_JUMP;
}
#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
FAIL_IF(call_with_args(compiler, arg_types, &src));
#endif
#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \
|| (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
compiler->skip_checks = 1;
if ((type & 0xff) != SLJIT_CALL_REG_ARG)
FAIL_IF(call_with_args(compiler, arg_types, &src));
#endif
SLJIT_SKIP_CHECKS(compiler);
return sljit_emit_ijump(compiler, type, src, srcw);
}
@ -2279,7 +2446,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_flags(struct sljit_compiler *co
bit = 0;
from_xer = 0;
switch (type & 0xff) {
switch (type) {
case SLJIT_LESS:
case SLJIT_SIG_LESS:
break;
@ -2332,38 +2499,50 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_flags(struct sljit_compiler *co
invert = (compiler->status_flags_state & SLJIT_CURRENT_FLAGS_ADD) != 0;
break;
case SLJIT_LESS_F64:
case SLJIT_F_LESS:
case SLJIT_ORDERED_LESS:
case SLJIT_UNORDERED_OR_LESS:
bit = 4 + 0;
break;
case SLJIT_GREATER_EQUAL_F64:
case SLJIT_F_GREATER_EQUAL:
case SLJIT_ORDERED_GREATER_EQUAL:
case SLJIT_UNORDERED_OR_GREATER_EQUAL:
bit = 4 + 0;
invert = 1;
break;
case SLJIT_GREATER_F64:
case SLJIT_F_GREATER:
case SLJIT_ORDERED_GREATER:
case SLJIT_UNORDERED_OR_GREATER:
bit = 4 + 1;
break;
case SLJIT_LESS_EQUAL_F64:
case SLJIT_F_LESS_EQUAL:
case SLJIT_ORDERED_LESS_EQUAL:
case SLJIT_UNORDERED_OR_LESS_EQUAL:
bit = 4 + 1;
invert = 1;
break;
case SLJIT_EQUAL_F64:
case SLJIT_F_EQUAL:
case SLJIT_ORDERED_EQUAL:
case SLJIT_UNORDERED_OR_EQUAL:
bit = 4 + 2;
break;
case SLJIT_NOT_EQUAL_F64:
case SLJIT_F_NOT_EQUAL:
case SLJIT_ORDERED_NOT_EQUAL:
case SLJIT_UNORDERED_OR_NOT_EQUAL:
bit = 4 + 2;
invert = 1;
break;
case SLJIT_UNORDERED_F64:
case SLJIT_UNORDERED:
bit = 4 + 3;
break;
case SLJIT_ORDERED_F64:
case SLJIT_ORDERED:
bit = 4 + 3;
invert = 1;
break;
@ -2374,7 +2553,8 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_flags(struct sljit_compiler *co
}
FAIL_IF(push_inst(compiler, (from_xer ? MFXER : MFCR) | D(reg)));
FAIL_IF(push_inst(compiler, RLWINM | S(reg) | A(reg) | ((1 + bit) << 11) | (31 << 6) | (31 << 1)));
/* Simplified mnemonics: extrwi. */
FAIL_IF(push_inst(compiler, RLWINM | S(reg) | A(reg) | RLWI_SH(1 + bit) | RLWI_MBE(31, 31)));
if (invert)
FAIL_IF(push_inst(compiler, XORI | S(reg) | A(reg) | 0x1));
@ -2385,10 +2565,8 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_flags(struct sljit_compiler *co
return emit_op_mem(compiler, input_flags, reg, dst, dstw, TMP_REG1);
}
#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \
|| (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
compiler->skip_checks = 1;
#endif
SLJIT_SKIP_CHECKS(compiler);
if (dst & SLJIT_MEM)
return sljit_emit_op2(compiler, saved_op, dst, saved_dstw, TMP_REG1, 0, TMP_REG2, 0);
return sljit_emit_op2(compiler, saved_op, dst, 0, dst, 0, TMP_REG2, 0);
@ -2404,15 +2582,94 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_cmov(struct sljit_compiler *compil
return sljit_emit_cmov_generic(compiler, type, dst_reg, src, srcw);;
}
#if (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32)
#define EMIT_MEM_LOAD_IMM(inst, mem, memw) \
((sljit_s16)(memw) > SIMM_MAX - SSIZE_OF(sw))
#else /* !SLJIT_CONFIG_PPC_32 */
#define EMIT_MEM_LOAD_IMM(inst, mem, memw) \
((((inst) & INT_ALIGNED) && ((memw) & 0x3) != 0) \
|| ((sljit_s16)(memw) > SIMM_MAX - SSIZE_OF(sw)) \
|| ((memw) > 0x7fff7fffl || (memw) < -0x80000000l)) \
#endif /* SLJIT_CONFIG_PPC_32 */
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_mem(struct sljit_compiler *compiler, sljit_s32 type,
sljit_s32 reg,
sljit_s32 mem, sljit_sw memw)
{
sljit_ins inst;
CHECK_ERROR();
CHECK(check_sljit_emit_mem(compiler, type, reg, mem, memw));
if (!(reg & REG_PAIR_MASK))
return sljit_emit_mem_unaligned(compiler, type, reg, mem, memw);
ADJUST_LOCAL_OFFSET(mem, memw);
inst = data_transfer_insts[WORD_DATA | ((type & SLJIT_MEM_STORE) ? 0 : LOAD_DATA)];
if (SLJIT_UNLIKELY(mem & OFFS_REG_MASK)) {
memw &= 0x3;
if (memw != 0) {
FAIL_IF(push_inst(compiler, SLWI_W(memw) | S(OFFS_REG(mem)) | A(TMP_REG1)));
FAIL_IF(push_inst(compiler, ADD | D(TMP_REG1) | A(TMP_REG1) | B(mem & REG_MASK)));
} else
FAIL_IF(push_inst(compiler, ADD | D(TMP_REG1) | A(mem & REG_MASK) | B(OFFS_REG(mem))));
mem = TMP_REG1;
memw = 0;
} else {
if (EMIT_MEM_LOAD_IMM(inst, mem, memw)) {
if ((mem & REG_MASK) != 0) {
SLJIT_SKIP_CHECKS(compiler);
FAIL_IF(sljit_emit_op2(compiler, SLJIT_ADD, TMP_REG1, 0, mem & REG_MASK, 0, SLJIT_IMM, memw));
} else
FAIL_IF(load_immediate(compiler, TMP_REG1, memw));
memw = 0;
mem = TMP_REG1;
} else if (memw > SIMM_MAX || memw < SIMM_MIN) {
FAIL_IF(push_inst(compiler, ADDIS | D(TMP_REG1) | A(mem & REG_MASK) | IMM((memw + 0x8000) >> 16)));
memw &= 0xffff;
mem = TMP_REG1;
} else {
memw &= 0xffff;
mem &= REG_MASK;
}
}
SLJIT_ASSERT((memw >= 0 && memw <= SIMM_MAX - SSIZE_OF(sw)) || (memw >= 0x8000 && memw <= 0xffff));
#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
inst &= (sljit_ins)~INT_ALIGNED;
#endif /* SLJIT_CONFIG_PPC_64 */
if (!(type & SLJIT_MEM_STORE) && mem == REG_PAIR_FIRST(reg)) {
FAIL_IF(push_inst(compiler, inst | D(REG_PAIR_SECOND(reg)) | A(mem) | IMM(memw + SSIZE_OF(sw))));
return push_inst(compiler, inst | D(REG_PAIR_FIRST(reg)) | A(mem) | IMM(memw));
}
FAIL_IF(push_inst(compiler, inst | D(REG_PAIR_FIRST(reg)) | A(mem) | IMM(memw)));
return push_inst(compiler, inst | D(REG_PAIR_SECOND(reg)) | A(mem) | IMM(memw + SSIZE_OF(sw)));
}
#undef EMIT_MEM_LOAD_IMM
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_mem_update(struct sljit_compiler *compiler, sljit_s32 type,
sljit_s32 reg,
sljit_s32 mem, sljit_sw memw)
{
sljit_s32 mem_flags;
sljit_ins inst;
CHECK_ERROR();
CHECK(check_sljit_emit_mem(compiler, type, reg, mem, memw));
CHECK(check_sljit_emit_mem_update(compiler, type, reg, mem, memw));
if (type & SLJIT_MEM_POST)
return SLJIT_ERR_UNSUPPORTED;
@ -2500,7 +2757,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_mem(struct sljit_compiler *compile
return SLJIT_SUCCESS;
}
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fmem(struct sljit_compiler *compiler, sljit_s32 type,
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fmem_update(struct sljit_compiler *compiler, sljit_s32 type,
sljit_s32 freg,
sljit_s32 mem, sljit_sw memw)
{
@ -2508,7 +2765,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fmem(struct sljit_compiler *compil
sljit_ins inst;
CHECK_ERROR();
CHECK(check_sljit_emit_fmem(compiler, type, freg, mem, memw));
CHECK(check_sljit_emit_fmem_update(compiler, type, freg, mem, memw));
if (type & SLJIT_MEM_POST)
return SLJIT_ERR_UNSUPPORTED;
@ -2587,3 +2844,8 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_put_label* sljit_emit_put_label(struct slj
return put_label;
}
SLJIT_API_FUNC_ATTRIBUTE void sljit_set_const(sljit_uw addr, sljit_sw new_constant, sljit_sw executable_offset)
{
sljit_set_jump_addr(addr, (sljit_uw)new_constant, executable_offset);
}

View File

@ -0,0 +1,73 @@
/*
* Stack-less Just-In-Time compiler
*
* Copyright Zoltan Herczeg (hzmester@freemail.hu). All rights reserved.
*
* Redistribution and use in source and binary forms, with or without modification, are
* permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice, this list of
* conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright notice, this list
* of conditions and the following disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND CONTRIBUTORS ``AS IS'' AND ANY
* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT
* SHALL THE COPYRIGHT HOLDER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
* TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
static sljit_s32 load_immediate(struct sljit_compiler *compiler, sljit_s32 dst_r, sljit_sw imm, sljit_s32 tmp_r)
{
SLJIT_UNUSED_ARG(tmp_r);
SLJIT_ASSERT(dst_r != tmp_r);
if (imm <= SIMM_MAX && imm >= SIMM_MIN)
return push_inst(compiler, ADDI | RD(dst_r) | RS1(TMP_ZERO) | IMM_I(imm));
if (imm & 0x800)
imm += 0x1000;
FAIL_IF(push_inst(compiler, LUI | RD(dst_r) | (sljit_ins)(imm & ~0xfff)));
if ((imm & 0xfff) == 0)
return SLJIT_SUCCESS;
return push_inst(compiler, ADDI | RD(dst_r) | RS1(dst_r) | IMM_I(imm));
}
static SLJIT_INLINE sljit_s32 emit_const(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw init_value, sljit_ins last_ins)
{
if ((init_value & 0x800) != 0)
init_value += 0x1000;
FAIL_IF(push_inst(compiler, LUI | RD(dst) | (sljit_ins)(init_value & ~0xfff)));
return push_inst(compiler, last_ins | RS1(dst) | IMM_I(init_value));
}
SLJIT_API_FUNC_ATTRIBUTE void sljit_set_jump_addr(sljit_uw addr, sljit_uw new_target, sljit_sw executable_offset)
{
sljit_ins *inst = (sljit_ins*)addr;
SLJIT_UNUSED_ARG(executable_offset);
if ((new_target & 0x800) != 0)
new_target += 0x1000;
SLJIT_UPDATE_WX_FLAGS(inst, inst + 5, 0);
SLJIT_ASSERT((inst[0] & 0x7f) == LUI);
inst[0] = (inst[0] & 0xfff) | (sljit_ins)((sljit_sw)new_target & ~0xfff);
SLJIT_ASSERT((inst[1] & 0x707f) == ADDI || (inst[1] & 0x707f) == JALR);
inst[1] = (inst[1] & 0xfffff) | IMM_I(new_target);
SLJIT_UPDATE_WX_FLAGS(inst, inst + 5, 1);
inst = (sljit_ins *)SLJIT_ADD_EXEC_OFFSET(inst, executable_offset);
SLJIT_CACHE_FLUSH(inst, inst + 5);
}

View File

@ -0,0 +1,183 @@
/*
* Stack-less Just-In-Time compiler
*
* Copyright Zoltan Herczeg (hzmester@freemail.hu). All rights reserved.
*
* Redistribution and use in source and binary forms, with or without modification, are
* permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice, this list of
* conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright notice, this list
* of conditions and the following disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND CONTRIBUTORS ``AS IS'' AND ANY
* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT
* SHALL THE COPYRIGHT HOLDER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
* TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
static sljit_s32 load_immediate(struct sljit_compiler *compiler, sljit_s32 dst_r, sljit_sw imm, sljit_s32 tmp_r)
{
sljit_sw high;
SLJIT_ASSERT(dst_r != tmp_r);
if (imm <= SIMM_MAX && imm >= SIMM_MIN)
return push_inst(compiler, ADDI | RD(dst_r) | RS1(TMP_ZERO) | IMM_I(imm));
if (imm <= 0x7fffffffl && imm >= S32_MIN) {
if (imm > S32_MAX) {
SLJIT_ASSERT((imm & 0x800) != 0);
FAIL_IF(push_inst(compiler, LUI | RD(dst_r) | (sljit_ins)0x80000000u));
return push_inst(compiler, XORI | RD(dst_r) | RS1(dst_r) | IMM_I(imm));
}
if ((imm & 0x800) != 0)
imm += 0x1000;
FAIL_IF(push_inst(compiler, LUI | RD(dst_r) | (sljit_ins)(imm & ~0xfff)));
if ((imm & 0xfff) == 0)
return SLJIT_SUCCESS;
return push_inst(compiler, ADDI | RD(dst_r) | RS1(dst_r) | IMM_I(imm));
}
/* Trailing zeroes could be used to produce shifted immediates. */
if (imm <= 0x7ffffffffffl && imm >= -0x80000000000l) {
high = imm >> 12;
if (imm & 0x800)
high = ~high;
if (high > S32_MAX) {
SLJIT_ASSERT((high & 0x800) != 0);
FAIL_IF(push_inst(compiler, LUI | RD(dst_r) | (sljit_ins)0x80000000u));
FAIL_IF(push_inst(compiler, XORI | RD(dst_r) | RS1(dst_r) | IMM_I(high)));
} else {
if ((high & 0x800) != 0)
high += 0x1000;
FAIL_IF(push_inst(compiler, LUI | RD(dst_r) | (sljit_ins)(high & ~0xfff)));
if ((high & 0xfff) != 0)
FAIL_IF(push_inst(compiler, ADDI | RD(dst_r) | RS1(dst_r) | IMM_I(high)));
}
FAIL_IF(push_inst(compiler, SLLI | RD(dst_r) | RS1(dst_r) | IMM_I(12)));
if ((imm & 0xfff) != 0)
return push_inst(compiler, XORI | RD(dst_r) | RS1(dst_r) | IMM_I(imm));
return SLJIT_SUCCESS;
}
high = imm >> 32;
imm = (sljit_s32)imm;
if ((imm & 0x80000000l) != 0)
high = ~high;
if (high <= 0x7ffff && high >= -0x80000) {
FAIL_IF(push_inst(compiler, LUI | RD(tmp_r) | (sljit_ins)(high << 12)));
high = 0x1000;
} else {
if ((high & 0x800) != 0)
high += 0x1000;
FAIL_IF(push_inst(compiler, LUI | RD(tmp_r) | (sljit_ins)(high & ~0xfff)));
high &= 0xfff;
}
if (imm <= SIMM_MAX && imm >= SIMM_MIN) {
FAIL_IF(push_inst(compiler, ADDI | RD(dst_r) | RS1(TMP_ZERO) | IMM_I(imm)));
imm = 0;
} else if (imm > S32_MAX) {
SLJIT_ASSERT((imm & 0x800) != 0);
FAIL_IF(push_inst(compiler, LUI | RD(dst_r) | (sljit_ins)0x80000000u));
imm = 0x1000 | (imm & 0xfff);
} else {
if ((imm & 0x800) != 0)
imm += 0x1000;
FAIL_IF(push_inst(compiler, LUI | RD(dst_r) | (sljit_ins)(imm & ~0xfff)));
imm &= 0xfff;
}
if ((high & 0xfff) != 0)
FAIL_IF(push_inst(compiler, ADDI | RD(tmp_r) | RS1(tmp_r) | IMM_I(high)));
if (imm & 0x1000)
FAIL_IF(push_inst(compiler, XORI | RD(dst_r) | RS1(dst_r) | IMM_I(imm)));
else if (imm != 0)
FAIL_IF(push_inst(compiler, ADDI | RD(dst_r) | RS1(dst_r) | IMM_I(imm)));
FAIL_IF(push_inst(compiler, SLLI | RD(tmp_r) | RS1(tmp_r) | IMM_I((high & 0x1000) ? 20 : 32)));
return push_inst(compiler, XOR | RD(dst_r) | RS1(dst_r) | RS2(tmp_r));
}
static SLJIT_INLINE sljit_s32 emit_const(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw init_value, sljit_ins last_ins)
{
sljit_sw high;
if ((init_value & 0x800) != 0)
init_value += 0x1000;
high = init_value >> 32;
if ((init_value & 0x80000000l) != 0)
high = ~high;
if ((high & 0x800) != 0)
high += 0x1000;
FAIL_IF(push_inst(compiler, LUI | RD(TMP_REG3) | (sljit_ins)(high & ~0xfff)));
FAIL_IF(push_inst(compiler, ADDI | RD(TMP_REG3) | RS1(TMP_REG3) | IMM_I(high)));
FAIL_IF(push_inst(compiler, LUI | RD(dst) | (sljit_ins)(init_value & ~0xfff)));
FAIL_IF(push_inst(compiler, SLLI | RD(TMP_REG3) | RS1(TMP_REG3) | IMM_I(32)));
FAIL_IF(push_inst(compiler, XOR | RD(dst) | RS1(dst) | RS2(TMP_REG3)));
return push_inst(compiler, last_ins | RS1(dst) | IMM_I(init_value));
}
SLJIT_API_FUNC_ATTRIBUTE void sljit_set_jump_addr(sljit_uw addr, sljit_uw new_target, sljit_sw executable_offset)
{
sljit_ins *inst = (sljit_ins*)addr;
sljit_sw high;
SLJIT_UNUSED_ARG(executable_offset);
if ((new_target & 0x800) != 0)
new_target += 0x1000;
high = (sljit_sw)new_target >> 32;
if ((new_target & 0x80000000l) != 0)
high = ~high;
if ((high & 0x800) != 0)
high += 0x1000;
SLJIT_UPDATE_WX_FLAGS(inst, inst + 5, 0);
SLJIT_ASSERT((inst[0] & 0x7f) == LUI);
inst[0] = (inst[0] & 0xfff) | (sljit_ins)(high & ~0xfff);
SLJIT_ASSERT((inst[1] & 0x707f) == ADDI);
inst[1] = (inst[1] & 0xfffff) | IMM_I(high);
SLJIT_ASSERT((inst[2] & 0x7f) == LUI);
inst[2] = (inst[2] & 0xfff) | (sljit_ins)((sljit_sw)new_target & ~0xfff);
SLJIT_ASSERT((inst[5] & 0x707f) == ADDI || (inst[5] & 0x707f) == JALR);
inst[5] = (inst[5] & 0xfffff) | IMM_I(new_target);
SLJIT_UPDATE_WX_FLAGS(inst, inst + 5, 1);
inst = (sljit_ins *)SLJIT_ADD_EXEC_OFFSET(inst, executable_offset);
SLJIT_CACHE_FLUSH(inst, inst + 5);
}

File diff suppressed because it is too large Load Diff

View File

@ -103,11 +103,8 @@ static const sljit_gpr r15 = 15; /* reg_map[SLJIT_NUMBER_OF_REGISTERS + 1]: stac
/* When reg cannot be unused. */
#define IS_GPR_REG(reg) ((reg > 0) && (reg) <= SLJIT_SP)
/* Link registers. The normal link register is r14, but since
we use that for flags we need to use r0 instead to do fast
calls so that flags are preserved. */
/* Link register. */
static const sljit_gpr link_r = 14; /* r14 */
static const sljit_gpr fast_link_r = 0; /* r0 */
#define TMP_FREG1 (0)
@ -220,7 +217,8 @@ static SLJIT_INLINE sljit_u8 get_cc(struct sljit_compiler *compiler, sljit_s32 t
}
/* fallthrough */
case SLJIT_EQUAL_F64:
case SLJIT_F_EQUAL:
case SLJIT_ORDERED_EQUAL:
return cc0;
case SLJIT_NOT_EQUAL:
@ -234,13 +232,14 @@ static SLJIT_INLINE sljit_u8 get_cc(struct sljit_compiler *compiler, sljit_s32 t
}
/* fallthrough */
case SLJIT_NOT_EQUAL_F64:
case SLJIT_UNORDERED_OR_NOT_EQUAL:
return (cc1 | cc2 | cc3);
case SLJIT_LESS:
return cc1;
case SLJIT_GREATER_EQUAL:
case SLJIT_UNORDERED_OR_GREATER_EQUAL:
return (cc0 | cc2 | cc3);
case SLJIT_GREATER:
@ -254,7 +253,8 @@ static SLJIT_INLINE sljit_u8 get_cc(struct sljit_compiler *compiler, sljit_s32 t
return (cc0 | cc1 | cc2);
case SLJIT_SIG_LESS:
case SLJIT_LESS_F64:
case SLJIT_F_LESS:
case SLJIT_ORDERED_LESS:
return cc1;
case SLJIT_NOT_CARRY:
@ -263,7 +263,8 @@ static SLJIT_INLINE sljit_u8 get_cc(struct sljit_compiler *compiler, sljit_s32 t
/* fallthrough */
case SLJIT_SIG_LESS_EQUAL:
case SLJIT_LESS_EQUAL_F64:
case SLJIT_F_LESS_EQUAL:
case SLJIT_ORDERED_LESS_EQUAL:
return (cc0 | cc1);
case SLJIT_CARRY:
@ -272,6 +273,7 @@ static SLJIT_INLINE sljit_u8 get_cc(struct sljit_compiler *compiler, sljit_s32 t
/* fallthrough */
case SLJIT_SIG_GREATER:
case SLJIT_UNORDERED_OR_GREATER:
/* Overflow is considered greater, see SLJIT_SUB. */
return cc2 | cc3;
@ -283,7 +285,7 @@ static SLJIT_INLINE sljit_u8 get_cc(struct sljit_compiler *compiler, sljit_s32 t
return (cc2 | cc3);
/* fallthrough */
case SLJIT_UNORDERED_F64:
case SLJIT_UNORDERED:
return cc3;
case SLJIT_NOT_OVERFLOW:
@ -291,14 +293,29 @@ static SLJIT_INLINE sljit_u8 get_cc(struct sljit_compiler *compiler, sljit_s32 t
return (cc0 | cc1);
/* fallthrough */
case SLJIT_ORDERED_F64:
case SLJIT_ORDERED:
return (cc0 | cc1 | cc2);
case SLJIT_GREATER_F64:
case SLJIT_F_NOT_EQUAL:
case SLJIT_ORDERED_NOT_EQUAL:
return (cc1 | cc2);
case SLJIT_F_GREATER:
case SLJIT_ORDERED_GREATER:
return cc2;
case SLJIT_GREATER_EQUAL_F64:
case SLJIT_F_GREATER_EQUAL:
case SLJIT_ORDERED_GREATER_EQUAL:
return (cc0 | cc2);
case SLJIT_UNORDERED_OR_LESS_EQUAL:
return (cc0 | cc1 | cc3);
case SLJIT_UNORDERED_OR_EQUAL:
return (cc0 | cc3);
case SLJIT_UNORDERED_OR_LESS:
return (cc1 | cc3);
}
SLJIT_UNREACHABLE();
@ -978,7 +995,7 @@ static sljit_s32 make_addr_bx(struct sljit_compiler *compiler,
(cond) ? EVAL(i1, r, addr) : EVAL(i2, r, addr)
/* May clobber tmp1. */
static sljit_s32 load_word(struct sljit_compiler *compiler, sljit_gpr dst,
static sljit_s32 load_word(struct sljit_compiler *compiler, sljit_gpr dst_r,
sljit_s32 src, sljit_sw srcw,
sljit_s32 is_32bit)
{
@ -986,21 +1003,36 @@ static sljit_s32 load_word(struct sljit_compiler *compiler, sljit_gpr dst,
sljit_ins ins;
SLJIT_ASSERT(src & SLJIT_MEM);
if (have_ldisp() || !is_32bit)
FAIL_IF(make_addr_bxy(compiler, &addr, src, srcw, tmp1));
else
if (is_32bit && ((src & OFFS_REG_MASK) || is_u12(srcw) || !is_s20(srcw))) {
FAIL_IF(make_addr_bx(compiler, &addr, src, srcw, tmp1));
return push_inst(compiler, 0x58000000 /* l */ | R20A(dst_r) | R16A(addr.index) | R12A(addr.base) | (sljit_ins)addr.offset);
}
if (is_32bit)
ins = WHEN(is_u12(addr.offset), dst, l, ly, addr);
else
ins = lg(dst, addr.offset, addr.index, addr.base);
FAIL_IF(make_addr_bxy(compiler, &addr, src, srcw, tmp1));
return push_inst(compiler, ins);
ins = is_32bit ? 0xe30000000058 /* ly */ : 0xe30000000004 /* lg */;
return push_inst(compiler, ins | R36A(dst_r) | R32A(addr.index) | R28A(addr.base) | disp_s20(addr.offset));
}
/* May clobber tmp1. */
static sljit_s32 store_word(struct sljit_compiler *compiler, sljit_gpr src,
static sljit_s32 load_unsigned_word(struct sljit_compiler *compiler, sljit_gpr dst_r,
sljit_s32 src, sljit_sw srcw,
sljit_s32 is_32bit)
{
struct addr addr;
sljit_ins ins;
SLJIT_ASSERT(src & SLJIT_MEM);
FAIL_IF(make_addr_bxy(compiler, &addr, src, srcw, tmp1));
ins = is_32bit ? 0xe30000000016 /* llgf */ : 0xe30000000004 /* lg */;
return push_inst(compiler, ins | R36A(dst_r) | R32A(addr.index) | R28A(addr.base) | disp_s20(addr.offset));
}
/* May clobber tmp1. */
static sljit_s32 store_word(struct sljit_compiler *compiler, sljit_gpr src_r,
sljit_s32 dst, sljit_sw dstw,
sljit_s32 is_32bit)
{
@ -1008,17 +1040,16 @@ static sljit_s32 store_word(struct sljit_compiler *compiler, sljit_gpr src,
sljit_ins ins;
SLJIT_ASSERT(dst & SLJIT_MEM);
if (have_ldisp() || !is_32bit)
FAIL_IF(make_addr_bxy(compiler, &addr, dst, dstw, tmp1));
else
if (is_32bit && ((dst & OFFS_REG_MASK) || is_u12(dstw) || !is_s20(dstw))) {
FAIL_IF(make_addr_bx(compiler, &addr, dst, dstw, tmp1));
return push_inst(compiler, 0x50000000 /* st */ | R20A(src_r) | R16A(addr.index) | R12A(addr.base) | (sljit_ins)addr.offset);
}
if (is_32bit)
ins = WHEN(is_u12(addr.offset), src, st, sty, addr);
else
ins = stg(src, addr.offset, addr.index, addr.base);
FAIL_IF(make_addr_bxy(compiler, &addr, dst, dstw, tmp1));
return push_inst(compiler, ins);
ins = is_32bit ? 0xe30000000050 /* sty */ : 0xe30000000024 /* stg */;
return push_inst(compiler, ins | R36A(src_r) | R32A(addr.index) | R28A(addr.base) | disp_s20(addr.offset));
}
#undef WHEN
@ -1618,16 +1649,24 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_has_cpu_feature(sljit_s32 feature_type)
{
/* TODO(mundaym): implement all */
switch (feature_type) {
case SLJIT_HAS_FPU:
case SLJIT_HAS_CLZ:
return have_eimm() ? 1 : 0; /* FLOGR instruction */
case SLJIT_HAS_ROT:
case SLJIT_HAS_PREFETCH:
return 1;
case SLJIT_HAS_CTZ:
return 2;
case SLJIT_HAS_CMOV:
return have_lscond1() ? 1 : 0;
case SLJIT_HAS_FPU:
return 1;
}
return 0;
}
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_cmp_info(sljit_s32 type)
{
return (type >= SLJIT_UNORDERED && type <= SLJIT_ORDERED_LESS_EQUAL);
}
/* --------------------------------------------------------------------- */
/* Entry, exit */
/* --------------------------------------------------------------------- */
@ -1636,7 +1675,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compi
sljit_s32 options, sljit_s32 arg_types, sljit_s32 scratches, sljit_s32 saveds,
sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size)
{
sljit_s32 word_arg_count = 0;
sljit_s32 saved_arg_count = SLJIT_KEPT_SAVEDS_COUNT(options);
sljit_s32 offset, i, tmp;
CHECK_ERROR();
@ -1648,8 +1687,13 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compi
offset = 2 * SSIZE_OF(sw);
if (saveds + scratches >= SLJIT_NUMBER_OF_REGISTERS) {
FAIL_IF(push_inst(compiler, stmg(r6, r14, offset, r15))); /* save registers TODO(MGM): optimize */
offset += 9 * SSIZE_OF(sw);
if (saved_arg_count == 0) {
FAIL_IF(push_inst(compiler, stmg(r6, r14, offset, r15)));
offset += 9 * SSIZE_OF(sw);
} else {
FAIL_IF(push_inst(compiler, stmg(r6, r13 - (sljit_gpr)saved_arg_count, offset, r15)));
offset += (8 - saved_arg_count) * SSIZE_OF(sw);
}
} else {
if (scratches == SLJIT_FIRST_SAVED_REG) {
FAIL_IF(push_inst(compiler, stg(r6, offset, 0, r15)));
@ -1659,15 +1703,30 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compi
offset += (scratches - (SLJIT_FIRST_SAVED_REG - 1)) * SSIZE_OF(sw);
}
if (saveds == 0) {
FAIL_IF(push_inst(compiler, stg(r14, offset, 0, r15)));
offset += SSIZE_OF(sw);
} else {
FAIL_IF(push_inst(compiler, stmg(r14 - (sljit_gpr)saveds, r14, offset, r15)));
offset += (saveds + 1) * SSIZE_OF(sw);
if (saved_arg_count == 0) {
if (saveds == 0) {
FAIL_IF(push_inst(compiler, stg(r14, offset, 0, r15)));
offset += SSIZE_OF(sw);
} else {
FAIL_IF(push_inst(compiler, stmg(r14 - (sljit_gpr)saveds, r14, offset, r15)));
offset += (saveds + 1) * SSIZE_OF(sw);
}
} else if (saveds > saved_arg_count) {
if (saveds == saved_arg_count + 1) {
FAIL_IF(push_inst(compiler, stg(r14 - (sljit_gpr)saveds, offset, 0, r15)));
offset += SSIZE_OF(sw);
} else {
FAIL_IF(push_inst(compiler, stmg(r14 - (sljit_gpr)saveds, r13 - (sljit_gpr)saved_arg_count, offset, r15)));
offset += (saveds - saved_arg_count) * SSIZE_OF(sw);
}
}
}
if (saved_arg_count > 0) {
FAIL_IF(push_inst(compiler, stg(r14, offset, 0, r15)));
offset += SSIZE_OF(sw);
}
tmp = SLJIT_FS0 - fsaveds;
for (i = SLJIT_FS0; i > tmp; i--) {
FAIL_IF(push_inst(compiler, 0x60000000 /* std */ | F20(i) | R12A(r15) | (sljit_ins)offset));
@ -1684,15 +1743,19 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compi
FAIL_IF(push_inst(compiler, 0xe30000000071 /* lay */ | R36A(r15) | R28A(r15) | disp_s20(-local_size)));
if (options & SLJIT_ENTER_REG_ARG)
return SLJIT_SUCCESS;
arg_types >>= SLJIT_ARG_SHIFT;
saved_arg_count = 0;
tmp = 0;
while (arg_types > 0) {
if ((arg_types & SLJIT_ARG_MASK) < SLJIT_ARG_TYPE_F64) {
if (!(arg_types & SLJIT_ARG_TYPE_SCRATCH_REG)) {
FAIL_IF(push_inst(compiler, lgr(gpr(SLJIT_S0 - tmp), gpr(SLJIT_R0 + word_arg_count))));
tmp++;
FAIL_IF(push_inst(compiler, lgr(gpr(SLJIT_S0 - saved_arg_count), gpr(SLJIT_R0 + tmp))));
saved_arg_count++;
}
word_arg_count++;
tmp++;
}
arg_types >>= SLJIT_ARG_SHIFT;
@ -1713,12 +1776,13 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_set_context(struct sljit_compiler *comp
return SLJIT_SUCCESS;
}
static sljit_s32 emit_stack_frame_release(struct sljit_compiler *compiler)
static sljit_s32 emit_stack_frame_release(struct sljit_compiler *compiler, sljit_gpr last_reg)
{
sljit_s32 offset, i, tmp;
sljit_s32 local_size = compiler->local_size;
sljit_s32 saveds = compiler->saveds;
sljit_s32 scratches = compiler->scratches;
sljit_s32 kept_saveds_count = SLJIT_KEPT_SAVEDS_COUNT(compiler->options);
if (is_u12(local_size))
FAIL_IF(push_inst(compiler, 0x41000000 /* ly */ | R20A(r15) | R12A(r15) | (sljit_ins)local_size));
@ -1727,8 +1791,13 @@ static sljit_s32 emit_stack_frame_release(struct sljit_compiler *compiler)
offset = 2 * SSIZE_OF(sw);
if (saveds + scratches >= SLJIT_NUMBER_OF_REGISTERS) {
FAIL_IF(push_inst(compiler, lmg(r6, r14, offset, r15))); /* save registers TODO(MGM): optimize */
offset += 9 * SSIZE_OF(sw);
if (kept_saveds_count == 0) {
FAIL_IF(push_inst(compiler, lmg(r6, last_reg, offset, r15)));
offset += 9 * SSIZE_OF(sw);
} else {
FAIL_IF(push_inst(compiler, lmg(r6, r13 - (sljit_gpr)kept_saveds_count, offset, r15)));
offset += (8 - kept_saveds_count) * SSIZE_OF(sw);
}
} else {
if (scratches == SLJIT_FIRST_SAVED_REG) {
FAIL_IF(push_inst(compiler, lg(r6, offset, 0, r15)));
@ -1738,15 +1807,35 @@ static sljit_s32 emit_stack_frame_release(struct sljit_compiler *compiler)
offset += (scratches - (SLJIT_FIRST_SAVED_REG - 1)) * SSIZE_OF(sw);
}
if (saveds == 0) {
FAIL_IF(push_inst(compiler, lg(r14, offset, 0, r15)));
offset += SSIZE_OF(sw);
} else {
FAIL_IF(push_inst(compiler, lmg(r14 - (sljit_gpr)saveds, r14, offset, r15)));
offset += (saveds + 1) * SSIZE_OF(sw);
if (kept_saveds_count == 0) {
if (saveds == 0) {
if (last_reg == r14)
FAIL_IF(push_inst(compiler, lg(r14, offset, 0, r15)));
offset += SSIZE_OF(sw);
} else if (saveds == 1 && last_reg == r13) {
FAIL_IF(push_inst(compiler, lg(r13, offset, 0, r15)));
offset += 2 * SSIZE_OF(sw);
} else {
FAIL_IF(push_inst(compiler, lmg(r14 - (sljit_gpr)saveds, last_reg, offset, r15)));
offset += (saveds + 1) * SSIZE_OF(sw);
}
} else if (saveds > kept_saveds_count) {
if (saveds == kept_saveds_count + 1) {
FAIL_IF(push_inst(compiler, lg(r14 - (sljit_gpr)saveds, offset, 0, r15)));
offset += SSIZE_OF(sw);
} else {
FAIL_IF(push_inst(compiler, lmg(r14 - (sljit_gpr)saveds, r13 - (sljit_gpr)kept_saveds_count, offset, r15)));
offset += (saveds - kept_saveds_count) * SSIZE_OF(sw);
}
}
}
if (kept_saveds_count > 0) {
if (last_reg == r14)
FAIL_IF(push_inst(compiler, lg(r14, offset, 0, r15)));
offset += SSIZE_OF(sw);
}
tmp = SLJIT_FS0 - compiler->fsaveds;
for (i = SLJIT_FS0; i > tmp; i--) {
FAIL_IF(push_inst(compiler, 0x68000000 /* ld */ | F20(i) | R12A(r15) | (sljit_ins)offset));
@ -1766,10 +1855,33 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_return_void(struct sljit_compiler
CHECK_ERROR();
CHECK(check_sljit_emit_return_void(compiler));
FAIL_IF(emit_stack_frame_release(compiler));
FAIL_IF(emit_stack_frame_release(compiler, r14));
return push_inst(compiler, br(r14)); /* return */
}
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_return_to(struct sljit_compiler *compiler,
sljit_s32 src, sljit_sw srcw)
{
CHECK_ERROR();
CHECK(check_sljit_emit_return_to(compiler, src, srcw));
if (src & SLJIT_MEM) {
ADJUST_LOCAL_OFFSET(src, srcw);
FAIL_IF(load_word(compiler, tmp1, src, srcw, 0 /* 64-bit */));
src = TMP_REG2;
srcw = 0;
} else if (src >= SLJIT_FIRST_SAVED_REG && src <= (SLJIT_S0 - SLJIT_KEPT_SAVEDS_COUNT(compiler->options))) {
FAIL_IF(push_inst(compiler, lgr(tmp1, gpr(src))));
src = TMP_REG2;
srcw = 0;
}
FAIL_IF(emit_stack_frame_release(compiler, r13));
SLJIT_SKIP_CHECKS(compiler);
return sljit_emit_ijump(compiler, SLJIT_JUMP, src, srcw);
}
/* --------------------------------------------------------------------- */
/* Operators */
/* --------------------------------------------------------------------- */
@ -1858,6 +1970,47 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op0(struct sljit_compiler *compile
return push_inst(compiler, lgr(arg1, tmp0));
}
static sljit_s32 sljit_emit_clz_ctz(struct sljit_compiler *compiler, sljit_s32 op, sljit_gpr dst_r, sljit_gpr src_r)
{
sljit_s32 is_ctz = (GET_OPCODE(op) == SLJIT_CTZ);
if ((op & SLJIT_32) && src_r != tmp0) {
FAIL_IF(push_inst(compiler, 0xb9160000 /* llgfr */ | R4A(tmp0) | R0A(src_r)));
src_r = tmp0;
}
if (is_ctz) {
FAIL_IF(push_inst(compiler, ((op & SLJIT_32) ? 0x1300 /* lcr */ : 0xb9030000 /* lcgr */) | R4A(tmp1) | R0A(src_r)));
if (src_r == tmp0)
FAIL_IF(push_inst(compiler, ((op & SLJIT_32) ? 0x1400 /* nr */ : 0xb9800000 /* ngr */) | R4A(tmp0) | R0A(tmp1)));
else
FAIL_IF(push_inst(compiler, 0xb9e40000 /* ngrk */ | R12A(tmp1) | R4A(tmp0) | R0A(src_r)));
src_r = tmp0;
}
FAIL_IF(push_inst(compiler, 0xb9830000 /* flogr */ | R4A(tmp0) | R0A(src_r)));
if (is_ctz)
FAIL_IF(push_inst(compiler, 0xec00000000d9 /* aghik */ | R36A(tmp1) | R32A(tmp0) | ((sljit_ins)(-64 & 0xffff) << 16)));
if (op & SLJIT_32) {
if (!is_ctz && dst_r != tmp0)
return push_inst(compiler, 0xec00000000d9 /* aghik */ | R36A(dst_r) | R32A(tmp0) | ((sljit_ins)(-32 & 0xffff) << 16));
FAIL_IF(push_inst(compiler, 0xc20800000000 /* agfi */ | R36A(tmp0) | (sljit_u32)-32));
}
if (is_ctz)
FAIL_IF(push_inst(compiler, 0xec0000000057 /* rxsbg */ | R36A(tmp0) | R32A(tmp1) | ((sljit_ins)((op & SLJIT_32) ? 59 : 58) << 24) | (63 << 16) | ((sljit_ins)((op & SLJIT_32) ? 5 : 6) << 8)));
if (dst_r == tmp0)
return SLJIT_SUCCESS;
return push_inst(compiler, ((op & SLJIT_32) ? 0x1800 /* lr */ : 0xb9040000 /* lgr */) | R4A(dst_r) | R0A(tmp0));
}
/* LEVAL will be defined later with different parameters as needed */
#define WHEN2(cond, i1, i2) (cond) ? LEVAL(i1) : LEVAL(i2)
@ -2091,23 +2244,25 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compile
dst_r = FAST_IS_REG(dst) ? gpr(REG_MASK & dst) : tmp0;
src_r = FAST_IS_REG(src) ? gpr(REG_MASK & src) : tmp0;
if (src & SLJIT_MEM)
FAIL_IF(load_word(compiler, src_r, src, srcw, src & SLJIT_32));
compiler->status_flags_state = op & (VARIABLE_FLAG_MASK | SLJIT_SET_Z);
/* TODO(mundaym): optimize loads and stores */
switch (opcode | (op & SLJIT_32)) {
switch (opcode) {
case SLJIT_NOT:
/* emulate ~x with x^-1 */
FAIL_IF(push_load_imm_inst(compiler, tmp1, -1));
if (src_r != dst_r)
FAIL_IF(push_inst(compiler, lgr(dst_r, src_r)));
if (src & SLJIT_MEM)
FAIL_IF(load_word(compiler, src_r, src, srcw, op & SLJIT_32));
FAIL_IF(push_inst(compiler, xgr(dst_r, tmp1)));
break;
case SLJIT_NOT32:
/* emulate ~x with x^-1 */
if (!(op & SLJIT_32)) {
FAIL_IF(push_load_imm_inst(compiler, tmp1, -1));
if (src_r != dst_r)
FAIL_IF(push_inst(compiler, lgr(dst_r, src_r)));
FAIL_IF(push_inst(compiler, xgr(dst_r, tmp1)));
break;
}
if (have_eimm())
FAIL_IF(push_inst(compiler, xilf(dst_r, 0xffffffff)));
else {
@ -2119,24 +2274,11 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compile
}
break;
case SLJIT_CLZ:
if (have_eimm()) {
FAIL_IF(push_inst(compiler, flogr(tmp0, src_r))); /* clobbers tmp1 */
if (dst_r != tmp0)
FAIL_IF(push_inst(compiler, lgr(dst_r, tmp0)));
} else {
abort(); /* TODO(mundaym): no eimm (?) */
}
break;
case SLJIT_CLZ32:
if (have_eimm()) {
FAIL_IF(push_inst(compiler, sllg(tmp1, src_r, 32, 0)));
FAIL_IF(push_inst(compiler, iilf(tmp1, 0xffffffff)));
FAIL_IF(push_inst(compiler, flogr(tmp0, tmp1))); /* clobbers tmp1 */
if (dst_r != tmp0)
FAIL_IF(push_inst(compiler, lr(dst_r, tmp0)));
} else {
abort(); /* TODO(mundaym): no eimm (?) */
}
case SLJIT_CTZ:
if (src & SLJIT_MEM)
FAIL_IF(load_unsigned_word(compiler, src_r, src, srcw, op & SLJIT_32));
FAIL_IF(sljit_emit_clz_ctz(compiler, op, dst_r, src_r));
break;
default:
SLJIT_UNREACHABLE();
@ -2145,9 +2287,8 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compile
if ((op & (SLJIT_SET_Z | VARIABLE_FLAG_MASK)) == (SLJIT_SET_Z | SLJIT_SET_OVERFLOW))
FAIL_IF(update_zero_overflow(compiler, op, dst_r));
/* TODO(carenas): doesn't need FAIL_IF */
if (dst & SLJIT_MEM)
FAIL_IF(store_word(compiler, dst_r, dst, dstw, op & SLJIT_32));
return store_word(compiler, dst_r, dst, dstw, op & SLJIT_32);
return SLJIT_SUCCESS;
}
@ -2166,11 +2307,6 @@ static SLJIT_INLINE int is_commutative(sljit_s32 op)
return 0;
}
static SLJIT_INLINE int is_shift(sljit_s32 op) {
sljit_s32 v = GET_OPCODE(op);
return (v == SLJIT_SHL || v == SLJIT_ASHR || v == SLJIT_LSHR) ? 1 : 0;
}
static const struct ins_forms add_forms = {
0x1a00, /* ar */
0xb9080000, /* agr */
@ -2604,33 +2740,41 @@ static sljit_s32 sljit_emit_shift(struct sljit_compiler *compiler, sljit_s32 op,
sljit_ins ins;
if (FAST_IS_REG(src1))
src_r = gpr(src1 & REG_MASK);
src_r = gpr(src1);
else
FAIL_IF(emit_move(compiler, tmp0, src1, src1w));
if (src2 & SLJIT_IMM)
if (!(src2 & SLJIT_IMM)) {
if (FAST_IS_REG(src2))
base_r = gpr(src2);
else {
FAIL_IF(emit_move(compiler, tmp1, src2, src2w));
base_r = tmp1;
}
if ((op & SLJIT_32) && (type == SLJIT_MSHL || type == SLJIT_MLSHR || type == SLJIT_MASHR)) {
if (base_r != tmp1) {
FAIL_IF(push_inst(compiler, 0xec0000000055 /* risbg */ | R36A(tmp1) | R32A(base_r) | (59 << 24) | (1 << 23) | (63 << 16)));
base_r = tmp1;
} else
FAIL_IF(push_inst(compiler, 0xa5070000 /* nill */ | R20A(tmp1) | 0x1f));
}
} else
imm = (sljit_ins)(src2w & ((op & SLJIT_32) ? 0x1f : 0x3f));
else if (FAST_IS_REG(src2))
base_r = gpr(src2 & REG_MASK);
else {
FAIL_IF(emit_move(compiler, tmp1, src2, src2w));
base_r = tmp1;
}
if ((op & SLJIT_32) && dst_r == src_r) {
if (type == SLJIT_SHL)
if (type == SLJIT_SHL || type == SLJIT_MSHL)
ins = 0x89000000 /* sll */;
else if (type == SLJIT_LSHR)
else if (type == SLJIT_LSHR || type == SLJIT_MLSHR)
ins = 0x88000000 /* srl */;
else
ins = 0x8a000000 /* sra */;
FAIL_IF(push_inst(compiler, ins | R20A(dst_r) | R12A(base_r) | imm));
}
else {
if (type == SLJIT_SHL)
} else {
if (type == SLJIT_SHL || type == SLJIT_MSHL)
ins = (op & SLJIT_32) ? 0xeb00000000df /* sllk */ : 0xeb000000000d /* sllg */;
else if (type == SLJIT_LSHR)
else if (type == SLJIT_LSHR || type == SLJIT_MLSHR)
ins = (op & SLJIT_32) ? 0xeb00000000de /* srlk */ : 0xeb000000000c /* srlg */;
else
ins = (op & SLJIT_32) ? 0xeb00000000dc /* srak */ : 0xeb000000000a /* srag */;
@ -2644,6 +2788,47 @@ static sljit_s32 sljit_emit_shift(struct sljit_compiler *compiler, sljit_s32 op,
return SLJIT_SUCCESS;
}
static sljit_s32 sljit_emit_rotate(struct sljit_compiler *compiler, sljit_s32 op,
sljit_s32 dst,
sljit_s32 src1, sljit_sw src1w,
sljit_s32 src2, sljit_sw src2w)
{
sljit_gpr dst_r = FAST_IS_REG(dst) ? gpr(dst & REG_MASK) : tmp0;
sljit_gpr src_r = tmp0;
sljit_gpr base_r = tmp0;
sljit_ins imm = 0;
sljit_ins ins;
if (FAST_IS_REG(src1))
src_r = gpr(src1);
else
FAIL_IF(emit_move(compiler, tmp0, src1, src1w));
if (!(src2 & SLJIT_IMM)) {
if (FAST_IS_REG(src2))
base_r = gpr(src2);
else {
FAIL_IF(emit_move(compiler, tmp1, src2, src2w));
base_r = tmp1;
}
}
if (GET_OPCODE(op) == SLJIT_ROTR) {
if (!(src2 & SLJIT_IMM)) {
ins = (op & SLJIT_32) ? 0x1300 /* lcr */ : 0xb9030000 /* lcgr */;
FAIL_IF(push_inst(compiler, ins | R4A(tmp1) | R0A(base_r)));
base_r = tmp1;
} else
src2w = -src2w;
}
if (src2 & SLJIT_IMM)
imm = (sljit_ins)(src2w & ((op & SLJIT_32) ? 0x1f : 0x3f));
ins = (op & SLJIT_32) ? 0xeb000000001d /* rll */ : 0xeb000000001c /* rllg */;
return push_inst(compiler, ins | R36A(dst_r) | R32A(src_r) | R28A(base_r) | (imm << 16));
}
static const struct ins_forms addc_forms = {
0xb9980000, /* alcr */
0xb9880000, /* alcgr */
@ -2716,10 +2901,17 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2(struct sljit_compiler *compile
FAIL_IF(sljit_emit_bitwise(compiler, op, dst, src1, src1w, src2, src2w));
break;
case SLJIT_SHL:
case SLJIT_MSHL:
case SLJIT_LSHR:
case SLJIT_MLSHR:
case SLJIT_ASHR:
case SLJIT_MASHR:
FAIL_IF(sljit_emit_shift(compiler, op, dst, src1, src1w, src2, src2w));
break;
case SLJIT_ROTL:
case SLJIT_ROTR:
FAIL_IF(sljit_emit_rotate(compiler, op, dst, src1, src1w, src2, src2w));
break;
}
if (dst & SLJIT_MEM)
@ -2734,18 +2926,130 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2u(struct sljit_compiler *compil
CHECK_ERROR();
CHECK(check_sljit_emit_op2(compiler, op, 1, 0, 0, src1, src1w, src2, src2w));
#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \
|| (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
compiler->skip_checks = 1;
#endif
SLJIT_SKIP_CHECKS(compiler);
return sljit_emit_op2(compiler, op, (sljit_s32)tmp0, 0, src1, src1w, src2, src2w);
}
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_shift_into(struct sljit_compiler *compiler, sljit_s32 op,
sljit_s32 src_dst,
sljit_s32 src1, sljit_sw src1w,
sljit_s32 src2, sljit_sw src2w)
{
sljit_s32 is_right;
sljit_sw bit_length = (op & SLJIT_32) ? 32 : 64;
sljit_gpr src_dst_r = gpr(src_dst);
sljit_gpr src1_r = tmp0;
sljit_gpr src2_r = tmp1;
sljit_ins ins;
CHECK_ERROR();
CHECK(check_sljit_emit_shift_into(compiler, op, src_dst, src1, src1w, src2, src2w));
is_right = (GET_OPCODE(op) == SLJIT_LSHR || GET_OPCODE(op) == SLJIT_MLSHR);
if (src_dst == src1) {
SLJIT_SKIP_CHECKS(compiler);
return sljit_emit_op2(compiler, (is_right ? SLJIT_ROTR : SLJIT_ROTL) | (op & SLJIT_32), src_dst, 0, src_dst, 0, src2, src2w);
}
ADJUST_LOCAL_OFFSET(src1, src1w);
ADJUST_LOCAL_OFFSET(src2, src2w);
if (src1 & SLJIT_MEM)
FAIL_IF(load_word(compiler, tmp0, src1, src1w, op & SLJIT_32));
else if (src1 & SLJIT_IMM)
FAIL_IF(push_load_imm_inst(compiler, tmp0, src1w));
else
src1_r = gpr(src1);
if (src2 & SLJIT_IMM) {
src2w &= bit_length - 1;
if (src2w == 0)
return SLJIT_SUCCESS;
} else if (!(src2 & SLJIT_MEM))
src2_r = gpr(src2);
else
FAIL_IF(load_word(compiler, tmp1, src2, src2w, op & SLJIT_32));
if (src2 & SLJIT_IMM) {
if (op & SLJIT_32) {
ins = is_right ? 0x88000000 /* srl */ : 0x89000000 /* sll */;
FAIL_IF(push_inst(compiler, ins | R20A(src_dst_r) | (sljit_ins)src2w));
} else {
ins = is_right ? 0xeb000000000c /* srlg */ : 0xeb000000000d /* sllg */;
FAIL_IF(push_inst(compiler, ins | R36A(src_dst_r) | R32A(src_dst_r) | ((sljit_ins)src2w << 16)));
}
ins = 0xec0000000055 /* risbg */;
if (is_right) {
src2w = bit_length - src2w;
ins |= ((sljit_ins)(64 - bit_length) << 24) | ((sljit_ins)(63 - src2w) << 16) | ((sljit_ins)src2w << 8);
} else
ins |= ((sljit_ins)(64 - src2w) << 24) | ((sljit_ins)63 << 16) | ((sljit_ins)src2w << 8);
return push_inst(compiler, ins | R36A(src_dst_r) | R32A(src1_r));
}
if (op & SLJIT_32) {
if (GET_OPCODE(op) == SLJIT_MSHL || GET_OPCODE(op) == SLJIT_MLSHR) {
if (src2_r != tmp1) {
FAIL_IF(push_inst(compiler, 0xec0000000055 /* risbg */ | R36A(tmp1) | R32A(src2_r) | (59 << 24) | (1 << 23) | (63 << 16)));
src2_r = tmp1;
} else
FAIL_IF(push_inst(compiler, 0xa5070000 /* nill */ | R20A(tmp1) | 0x1f));
}
ins = is_right ? 0x88000000 /* srl */ : 0x89000000 /* sll */;
FAIL_IF(push_inst(compiler, ins | R20A(src_dst_r) | R12A(src2_r)));
if (src2_r != tmp1) {
FAIL_IF(push_inst(compiler, 0xa50f0000 /* llill */ | R20A(tmp1) | 0x1f));
FAIL_IF(push_inst(compiler, 0x1700 /* xr */ | R4A(tmp1) | R0A(src2_r)));
} else
FAIL_IF(push_inst(compiler, 0xc00700000000 /* xilf */ | R36A(tmp1) | 0x1f));
if (src1_r == tmp0) {
ins = is_right ? 0x89000000 /* sll */ : 0x88000000 /* srl */;
FAIL_IF(push_inst(compiler, ins | R20A(tmp0) | R12A(tmp1) | 0x1));
} else {
ins = is_right ? 0xeb00000000df /* sllk */ : 0xeb00000000de /* srlk */;
FAIL_IF(push_inst(compiler, ins | R36A(tmp0) | R32A(src1_r) | R28A(tmp1) | (0x1 << 16)));
}
return push_inst(compiler, 0x1600 /* or */ | R4A(src_dst_r) | R0A(tmp0));
}
ins = is_right ? 0xeb000000000c /* srlg */ : 0xeb000000000d /* sllg */;
FAIL_IF(push_inst(compiler, ins | R36A(src_dst_r) | R32A(src_dst_r) | R28A(src2_r)));
ins = is_right ? 0xeb000000000d /* sllg */ : 0xeb000000000c /* srlg */;
if (!(op & SLJIT_SHIFT_INTO_NON_ZERO)) {
if (src2_r != tmp1)
FAIL_IF(push_inst(compiler, 0xa50f0000 /* llill */ | R20A(tmp1) | 0x3f));
FAIL_IF(push_inst(compiler, ins | R36A(tmp0) | R32A(src1_r) | (0x1 << 16)));
src1_r = tmp0;
if (src2_r != tmp1)
FAIL_IF(push_inst(compiler, 0xb9820000 /* xgr */ | R4A(tmp1) | R0A(src2_r)));
else
FAIL_IF(push_inst(compiler, 0xc00700000000 /* xilf */ | R36A(tmp1) | 0x3f));
} else
FAIL_IF(push_inst(compiler, 0xb9030000 /* lcgr */ | R4A(tmp1) | R0A(src2_r)));
FAIL_IF(push_inst(compiler, ins | R36A(tmp0) | R32A(src1_r) | R28A(tmp1)));
return push_inst(compiler, 0xb9810000 /* ogr */ | R4A(src_dst_r) | R0A(tmp0));
}
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_src(
struct sljit_compiler *compiler,
sljit_s32 op, sljit_s32 src, sljit_sw srcw)
{
sljit_gpr src_r;
struct addr addr;
CHECK_ERROR();
CHECK(check_sljit_emit_op_src(compiler, op, src, srcw));
@ -2759,16 +3063,14 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_src(
return push_inst(compiler, br(src_r));
case SLJIT_SKIP_FRAMES_BEFORE_FAST_RETURN:
/* TODO(carenas): implement? */
return SLJIT_SUCCESS;
case SLJIT_PREFETCH_L1:
case SLJIT_PREFETCH_L2:
case SLJIT_PREFETCH_L3:
case SLJIT_PREFETCH_ONCE:
/* TODO(carenas): implement */
return SLJIT_SUCCESS;
FAIL_IF(make_addr_bxy(compiler, &addr, src, srcw, tmp1));
return push_inst(compiler, 0xe31000000036 /* pfd */ | R32A(addr.index) | R28A(addr.base) | disp_s20(addr.offset));
default:
/* TODO(carenas): probably should not success by default */
return SLJIT_SUCCESS;
}
@ -3064,10 +3366,10 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fast_enter(struct sljit_compiler *
ADJUST_LOCAL_OFFSET(dst, dstw);
if (FAST_IS_REG(dst))
return push_inst(compiler, lgr(gpr(dst), fast_link_r));
return push_inst(compiler, lgr(gpr(dst), link_r));
/* memory */
return store_word(compiler, fast_link_r, dst, dstw, 0);
return store_word(compiler, link_r, dst, dstw, 0);
}
/* --------------------------------------------------------------------- */
@ -3107,7 +3409,7 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_jump(struct sljit_compile
/* emit jump instruction */
type &= 0xff;
if (type >= SLJIT_FAST_CALL)
PTR_FAIL_IF(push_inst(compiler, brasl(type == SLJIT_FAST_CALL ? fast_link_r : link_r, 0)));
PTR_FAIL_IF(push_inst(compiler, brasl(link_r, 0)));
else
PTR_FAIL_IF(push_inst(compiler, brcl(mask, 0)));
@ -3117,19 +3419,16 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_jump(struct sljit_compile
SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_call(struct sljit_compiler *compiler, sljit_s32 type,
sljit_s32 arg_types)
{
SLJIT_UNUSED_ARG(arg_types);
CHECK_ERROR_PTR();
CHECK_PTR(check_sljit_emit_call(compiler, type, arg_types));
if (type & SLJIT_CALL_RETURN) {
PTR_FAIL_IF(emit_stack_frame_release(compiler));
PTR_FAIL_IF(emit_stack_frame_release(compiler, r14));
type = SLJIT_JUMP | (type & SLJIT_REWRITABLE_JUMP);
}
#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \
|| (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
compiler->skip_checks = 1;
#endif
SLJIT_SKIP_CHECKS(compiler);
return sljit_emit_jump(compiler, type);
}
@ -3151,7 +3450,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_ijump(struct sljit_compiler *compi
/* emit jump instruction */
if (type >= SLJIT_FAST_CALL)
return push_inst(compiler, basr(type == SLJIT_FAST_CALL ? fast_link_r : link_r, src_r));
return push_inst(compiler, basr(link_r, src_r));
return push_inst(compiler, br(src_r));
}
@ -3169,23 +3468,21 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_icall(struct sljit_compiler *compi
ADJUST_LOCAL_OFFSET(src, srcw);
FAIL_IF(load_word(compiler, tmp1, src, srcw, 0 /* 64-bit */));
src = TMP_REG2;
srcw = 0;
}
if (type & SLJIT_CALL_RETURN) {
if (src >= SLJIT_FIRST_SAVED_REG && src <= SLJIT_S0) {
if (src >= SLJIT_FIRST_SAVED_REG && src <= (SLJIT_S0 - SLJIT_KEPT_SAVEDS_COUNT(compiler->options))) {
FAIL_IF(push_inst(compiler, lgr(tmp1, gpr(src))));
src = TMP_REG2;
srcw = 0;
}
FAIL_IF(emit_stack_frame_release(compiler));
FAIL_IF(emit_stack_frame_release(compiler, r14));
type = SLJIT_JUMP;
}
#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \
|| (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
compiler->skip_checks = 1;
#endif
SLJIT_SKIP_CHECKS(compiler);
return sljit_emit_ijump(compiler, type, src, srcw);
}
@ -3193,7 +3490,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_flags(struct sljit_compiler *co
sljit_s32 dst, sljit_sw dstw,
sljit_s32 type)
{
sljit_u8 mask = get_cc(compiler, type & 0xff);
sljit_u8 mask = get_cc(compiler, type);
CHECK_ERROR();
CHECK(check_sljit_emit_op_flags(compiler, op, dst, dstw, type));
@ -3263,27 +3560,92 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_cmov(struct sljit_compiler *compil
sljit_s32 dst_reg,
sljit_s32 src, sljit_sw srcw)
{
sljit_u8 mask = get_cc(compiler, type & 0xff);
sljit_gpr dst_r = gpr(dst_reg & ~SLJIT_32);
sljit_gpr src_r = FAST_IS_REG(src) ? gpr(src) : tmp0;
sljit_ins mask = get_cc(compiler, type & ~SLJIT_32);
sljit_gpr src_r;
sljit_ins ins;
CHECK_ERROR();
CHECK(check_sljit_emit_cmov(compiler, type, dst_reg, src, srcw));
if (src & SLJIT_IMM) {
/* TODO(mundaym): fast path with lscond2 */
FAIL_IF(push_load_imm_inst(compiler, src_r, srcw));
if (type & SLJIT_32)
srcw = (sljit_s32)srcw;
if (have_lscond2() && (src & SLJIT_IMM) && is_s16(srcw)) {
ins = (type & SLJIT_32) ? 0xec0000000042 /* lochi */ : 0xec0000000046 /* locghi */;
return push_inst(compiler, ins | R36A(gpr(dst_reg)) | (mask << 32) | (sljit_ins)(srcw & 0xffff) << 16);
}
#define LEVAL(i) i(dst_r, src_r, mask)
if (have_lscond1())
return push_inst(compiler,
WHEN2(dst_reg & SLJIT_32, locr, locgr));
if (src & SLJIT_IMM) {
FAIL_IF(push_load_imm_inst(compiler, tmp0, srcw));
src_r = tmp0;
} else
src_r = gpr(src);
#undef LEVAL
if (have_lscond1()) {
ins = (type & SLJIT_32) ? 0xb9f20000 /* locr */ : 0xb9e20000 /* locgr */;
return push_inst(compiler, ins | (mask << 12) | R4A(gpr(dst_reg)) | R0A(src_r));
}
/* TODO(mundaym): implement */
return SLJIT_ERR_UNSUPPORTED;
return sljit_emit_cmov_generic(compiler, type, dst_reg, src, srcw);
}
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_mem(struct sljit_compiler *compiler, sljit_s32 type,
sljit_s32 reg,
sljit_s32 mem, sljit_sw memw)
{
sljit_ins ins, reg1, reg2, base, offs = 0;
CHECK_ERROR();
CHECK(check_sljit_emit_mem(compiler, type, reg, mem, memw));
if (!(reg & REG_PAIR_MASK))
return sljit_emit_mem_unaligned(compiler, type, reg, mem, memw);
ADJUST_LOCAL_OFFSET(mem, memw);
base = gpr(mem & REG_MASK);
reg1 = gpr(REG_PAIR_FIRST(reg));
reg2 = gpr(REG_PAIR_SECOND(reg));
if (mem & OFFS_REG_MASK) {
memw &= 0x3;
offs = gpr(OFFS_REG(mem));
if (memw != 0) {
FAIL_IF(push_inst(compiler, 0xeb000000000d /* sllg */ | R36A(tmp1) | R32A(offs) | ((sljit_ins)memw << 16)));
offs = tmp1;
} else if (!(type & SLJIT_MEM_STORE) && (base == reg1 || base == reg2) && (offs == reg1 || offs == reg2)) {
FAIL_IF(push_inst(compiler, 0xb9f80000 | R12A(tmp1) | R4A(base) | R0A(offs)));
base = tmp1;
offs = 0;
}
memw = 0;
} else if (memw < -0x80000 || memw > 0x7ffff - ((reg2 == reg1 + 1) ? 0 : SSIZE_OF(sw))) {
FAIL_IF(push_load_imm_inst(compiler, tmp1, memw));
if (base == 0)
base = tmp1;
else
offs = tmp1;
memw = 0;
}
if (offs == 0 && reg2 == (reg1 + 1)) {
ins = (type & SLJIT_MEM_STORE) ? 0xeb0000000024 /* stmg */ : 0xeb0000000004 /* lmg */;
return push_inst(compiler, ins | R36A(reg1) | R32A(reg2) | R28A(base) | disp_s20((sljit_s32)memw));
}
ins = ((type & SLJIT_MEM_STORE) ? 0xe30000000024 /* stg */ : 0xe30000000004 /* lg */) | R32A(offs) | R28A(base);
if (!(type & SLJIT_MEM_STORE) && base == reg1) {
FAIL_IF(push_inst(compiler, ins | R36A(reg2) | disp_s20((sljit_s32)memw + SSIZE_OF(sw))));
return push_inst(compiler, ins | R36A(reg1) | disp_s20((sljit_s32)memw));
}
FAIL_IF(push_inst(compiler, ins | R36A(reg1) | disp_s20((sljit_s32)memw)));
return push_inst(compiler, ins | R36A(reg2) | disp_s20((sljit_s32)memw + SSIZE_OF(sw)));
}
/* --------------------------------------------------------------------- */

File diff suppressed because it is too large Load Diff

View File

@ -101,34 +101,38 @@ static sljit_u8* emit_x86_instruction(struct sljit_compiler *compiler, sljit_uw
/* Calculate size of b. */
inst_size += 1; /* mod r/m byte. */
if (b & SLJIT_MEM) {
if (!(b & OFFS_REG_MASK)) {
if (NOT_HALFWORD(immb)) {
PTR_FAIL_IF(emit_load_imm64(compiler, TMP_REG2, immb));
immb = 0;
if (b & REG_MASK)
b |= TO_OFFS_REG(TMP_REG2);
else
b |= TMP_REG2;
}
else if (reg_lmap[b & REG_MASK] == 4)
b |= TO_OFFS_REG(SLJIT_SP);
if (!(b & OFFS_REG_MASK) && NOT_HALFWORD(immb)) {
PTR_FAIL_IF(emit_load_imm64(compiler, TMP_REG2, immb));
immb = 0;
if (b & REG_MASK)
b |= TO_OFFS_REG(TMP_REG2);
else
b |= TMP_REG2;
}
if (!(b & REG_MASK))
inst_size += 1 + sizeof(sljit_s32); /* SIB byte required to avoid RIP based addressing. */
else {
if (reg_map[b & REG_MASK] >= 8)
rex |= REX_B;
if (immb != 0 && (!(b & OFFS_REG_MASK) || (b & OFFS_REG_MASK) == TO_OFFS_REG(SLJIT_SP))) {
if (immb != 0 && !(b & OFFS_REG_MASK)) {
/* Immediate operand. */
if (immb <= 127 && immb >= -128)
inst_size += sizeof(sljit_s8);
else
inst_size += sizeof(sljit_s32);
}
else if (reg_lmap[b & REG_MASK] == 5)
inst_size += sizeof(sljit_s8);
else if (reg_lmap[b & REG_MASK] == 5) {
/* Swap registers if possible. */
if ((b & OFFS_REG_MASK) && (immb & 0x3) == 0 && reg_lmap[OFFS_REG(b)] != 5)
b = SLJIT_MEM | OFFS_REG(b) | TO_OFFS_REG(b & REG_MASK);
else
inst_size += sizeof(sljit_s8);
}
if (reg_map[b & REG_MASK] >= 8)
rex |= REX_B;
if (reg_lmap[b & REG_MASK] == 4 && !(b & OFFS_REG_MASK))
b |= TO_OFFS_REG(SLJIT_SP);
if (b & OFFS_REG_MASK) {
inst_size += 1; /* SIB byte. */
@ -153,9 +157,9 @@ static sljit_u8* emit_x86_instruction(struct sljit_compiler *compiler, sljit_uw
inst_size += 4;
}
else if (flags & EX86_SHIFT_INS) {
imma &= compiler->mode32 ? 0x1f : 0x3f;
SLJIT_ASSERT(imma <= (compiler->mode32 ? 0x1f : 0x3f));
if (imma != 1) {
inst_size ++;
inst_size++;
flags |= EX86_BYTE_ARG;
}
} else if (flags & EX86_BYTE_ARG)
@ -223,7 +227,7 @@ static sljit_u8* emit_x86_instruction(struct sljit_compiler *compiler, sljit_uw
} else if (b & REG_MASK) {
reg_lmap_b = reg_lmap[b & REG_MASK];
if (!(b & OFFS_REG_MASK) || (b & OFFS_REG_MASK) == TO_OFFS_REG(SLJIT_SP) || reg_lmap_b == 5) {
if (!(b & OFFS_REG_MASK) || (b & OFFS_REG_MASK) == TO_OFFS_REG(SLJIT_SP)) {
if (immb != 0 || reg_lmap_b == 5) {
if (immb <= 127 && immb >= -128)
*buf_ptr |= 0x40;
@ -248,8 +252,14 @@ static sljit_u8* emit_x86_instruction(struct sljit_compiler *compiler, sljit_uw
}
}
else {
if (reg_lmap_b == 5)
*buf_ptr |= 0x40;
*buf_ptr++ |= 0x04;
*buf_ptr++ = U8(reg_lmap_b | (reg_lmap[OFFS_REG(b)] << 3) | (immb << 6));
if (reg_lmap_b == 5)
*buf_ptr++ = 0;
}
}
else {
@ -366,7 +376,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compi
{
sljit_uw size;
sljit_s32 word_arg_count = 0;
sljit_s32 saved_arg_count = 0;
sljit_s32 saved_arg_count = SLJIT_KEPT_SAVEDS_COUNT(options);
sljit_s32 saved_regs_size, tmp, i;
#ifdef _WIN64
sljit_s32 saved_float_regs_size;
@ -379,16 +389,19 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compi
CHECK(check_sljit_emit_enter(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size));
set_emit_enter(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size);
if (options & SLJIT_ENTER_REG_ARG)
arg_types = 0;
/* Emit ENDBR64 at function entry if needed. */
FAIL_IF(emit_endbranch(compiler));
compiler->mode32 = 0;
/* Including the return address saved by the call instruction. */
saved_regs_size = GET_SAVED_REGISTERS_SIZE(scratches, saveds, 1);
saved_regs_size = GET_SAVED_REGISTERS_SIZE(scratches, saveds - saved_arg_count, 1);
tmp = SLJIT_S0 - saveds;
for (i = SLJIT_S0; i > tmp; i--) {
for (i = SLJIT_S0 - saved_arg_count; i > tmp; i--) {
size = reg_map[i] >= 8 ? 2 : 1;
inst = (sljit_u8*)ensure_buf(compiler, 1 + size);
FAIL_IF(!inst);
@ -561,15 +574,15 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_set_context(struct sljit_compiler *comp
#endif /* _WIN64 */
/* Including the return address saved by the call instruction. */
saved_regs_size = GET_SAVED_REGISTERS_SIZE(scratches, saveds, 1);
saved_regs_size = GET_SAVED_REGISTERS_SIZE(scratches, saveds - SLJIT_KEPT_SAVEDS_COUNT(options), 1);
compiler->local_size = ((local_size + saved_regs_size + 0xf) & ~0xf) - saved_regs_size;
return SLJIT_SUCCESS;
}
static sljit_s32 emit_stack_frame_release(struct sljit_compiler *compiler)
static sljit_s32 emit_stack_frame_release(struct sljit_compiler *compiler, sljit_s32 is_return_to)
{
sljit_uw size;
sljit_s32 i, tmp;
sljit_s32 local_size, i, tmp;
sljit_u8 *inst;
#ifdef _WIN64
sljit_s32 saved_float_regs_offset;
@ -598,30 +611,21 @@ static sljit_s32 emit_stack_frame_release(struct sljit_compiler *compiler)
*inst = MOVAPS_x_xm;
saved_float_regs_offset += 16;
}
compiler->mode32 = 0;
}
#endif /* _WIN64 */
if (compiler->local_size > 0) {
if (compiler->local_size <= 127) {
inst = (sljit_u8*)ensure_buf(compiler, 1 + 4);
FAIL_IF(!inst);
INC_SIZE(4);
*inst++ = REX_W;
*inst++ = GROUP_BINARY_83;
*inst++ = MOD_REG | ADD | 4;
*inst = U8(compiler->local_size);
}
else {
inst = (sljit_u8*)ensure_buf(compiler, 1 + 7);
FAIL_IF(!inst);
INC_SIZE(7);
*inst++ = REX_W;
*inst++ = GROUP_BINARY_81;
*inst++ = MOD_REG | ADD | 4;
sljit_unaligned_store_s32(inst, compiler->local_size);
}
local_size = compiler->local_size;
if (is_return_to && compiler->scratches < SLJIT_FIRST_SAVED_REG && (compiler->saveds == SLJIT_KEPT_SAVEDS_COUNT(compiler->options))) {
local_size += SSIZE_OF(sw);
is_return_to = 0;
}
if (local_size > 0)
BINARY_IMM32(ADD, local_size, SLJIT_SP, 0);
tmp = compiler->scratches;
for (i = SLJIT_FIRST_SAVED_REG; i <= tmp; i++) {
size = reg_map[i] >= 8 ? 2 : 1;
@ -633,8 +637,8 @@ static sljit_s32 emit_stack_frame_release(struct sljit_compiler *compiler)
POP_REG(reg_lmap[i]);
}
tmp = compiler->saveds < SLJIT_NUMBER_OF_SAVED_REGISTERS ? (SLJIT_S0 + 1 - compiler->saveds) : SLJIT_FIRST_SAVED_REG;
for (i = tmp; i <= SLJIT_S0; i++) {
tmp = SLJIT_S0 - SLJIT_KEPT_SAVEDS_COUNT(compiler->options);
for (i = SLJIT_S0 + 1 - compiler->saveds; i <= tmp; i++) {
size = reg_map[i] >= 8 ? 2 : 1;
inst = (sljit_u8*)ensure_buf(compiler, 1 + size);
FAIL_IF(!inst);
@ -644,6 +648,9 @@ static sljit_s32 emit_stack_frame_release(struct sljit_compiler *compiler)
POP_REG(reg_lmap[i]);
}
if (is_return_to)
BINARY_IMM32(ADD, sizeof(sljit_sw), SLJIT_SP, 0);
return SLJIT_SUCCESS;
}
@ -654,7 +661,9 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_return_void(struct sljit_compiler
CHECK_ERROR();
CHECK(check_sljit_emit_return_void(compiler));
FAIL_IF(emit_stack_frame_release(compiler));
compiler->mode32 = 0;
FAIL_IF(emit_stack_frame_release(compiler, 0));
inst = (sljit_u8*)ensure_buf(compiler, 1 + 1);
FAIL_IF(!inst);
@ -663,6 +672,28 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_return_void(struct sljit_compiler
return SLJIT_SUCCESS;
}
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_return_to(struct sljit_compiler *compiler,
sljit_s32 src, sljit_sw srcw)
{
CHECK_ERROR();
CHECK(check_sljit_emit_return_to(compiler, src, srcw));
compiler->mode32 = 0;
if ((src & SLJIT_MEM) || (src >= SLJIT_FIRST_SAVED_REG && src <= (SLJIT_S0 - SLJIT_KEPT_SAVEDS_COUNT(compiler->options)))) {
ADJUST_LOCAL_OFFSET(src, srcw);
EMIT_MOV(compiler, TMP_REG2, 0, src, srcw);
src = TMP_REG2;
srcw = 0;
}
FAIL_IF(emit_stack_frame_release(compiler, 1));
SLJIT_SKIP_CHECKS(compiler);
return sljit_emit_ijump(compiler, SLJIT_JUMP, src, srcw);
}
/* --------------------------------------------------------------------- */
/* Call / return instructions */
/* --------------------------------------------------------------------- */
@ -786,17 +817,15 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_call(struct sljit_compile
compiler->mode32 = 0;
PTR_FAIL_IF(call_with_args(compiler, arg_types, NULL));
if ((type & 0xff) != SLJIT_CALL_REG_ARG)
PTR_FAIL_IF(call_with_args(compiler, arg_types, NULL));
if (type & SLJIT_CALL_RETURN) {
PTR_FAIL_IF(emit_stack_frame_release(compiler));
PTR_FAIL_IF(emit_stack_frame_release(compiler, 0));
type = SLJIT_JUMP | (type & SLJIT_REWRITABLE_JUMP);
}
#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \
|| (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
compiler->skip_checks = 1;
#endif
SLJIT_SKIP_CHECKS(compiler);
return sljit_emit_jump(compiler, type);
}
@ -816,22 +845,21 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_icall(struct sljit_compiler *compi
}
if (type & SLJIT_CALL_RETURN) {
if (src >= SLJIT_FIRST_SAVED_REG && src <= SLJIT_S0) {
if (src >= SLJIT_FIRST_SAVED_REG && src <= (SLJIT_S0 - SLJIT_KEPT_SAVEDS_COUNT(compiler->options))) {
EMIT_MOV(compiler, TMP_REG2, 0, src, srcw);
src = TMP_REG2;
}
FAIL_IF(emit_stack_frame_release(compiler));
type = SLJIT_JUMP;
FAIL_IF(emit_stack_frame_release(compiler, 0));
}
FAIL_IF(call_with_args(compiler, arg_types, &src));
if ((type & 0xff) != SLJIT_CALL_REG_ARG)
FAIL_IF(call_with_args(compiler, arg_types, &src));
#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \
|| (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
compiler->skip_checks = 1;
#endif
if (type & SLJIT_CALL_RETURN)
type = SLJIT_JUMP;
SLJIT_SKIP_CHECKS(compiler);
return sljit_emit_ijump(compiler, type, src, srcw);
}
@ -907,9 +935,89 @@ static sljit_s32 emit_fast_return(struct sljit_compiler *compiler, sljit_s32 src
}
/* --------------------------------------------------------------------- */
/* Extend input */
/* Other operations */
/* --------------------------------------------------------------------- */
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_mem(struct sljit_compiler *compiler, sljit_s32 type,
sljit_s32 reg,
sljit_s32 mem, sljit_sw memw)
{
sljit_u8* inst;
sljit_s32 i, next, reg_idx;
sljit_u8 regs[2];
CHECK_ERROR();
CHECK(check_sljit_emit_mem(compiler, type, reg, mem, memw));
if (!(reg & REG_PAIR_MASK))
return sljit_emit_mem_unaligned(compiler, type, reg, mem, memw);
ADJUST_LOCAL_OFFSET(mem, memw);
compiler->mode32 = 0;
if ((mem & REG_MASK) == 0) {
EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_IMM, memw);
mem = SLJIT_MEM1(TMP_REG1);
memw = 0;
} else if (!(mem & OFFS_REG_MASK) && ((memw < HALFWORD_MIN) || (memw > HALFWORD_MAX - SSIZE_OF(sw)))) {
EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_IMM, memw);
mem = SLJIT_MEM2(mem & REG_MASK, TMP_REG1);
memw = 0;
}
regs[0] = U8(REG_PAIR_FIRST(reg));
regs[1] = U8(REG_PAIR_SECOND(reg));
next = SSIZE_OF(sw);
if (!(type & SLJIT_MEM_STORE) && (regs[0] == (mem & REG_MASK) || regs[0] == OFFS_REG(mem))) {
if (regs[1] == (mem & REG_MASK) || regs[1] == OFFS_REG(mem)) {
/* Base and offset cannot be TMP_REG1. */
EMIT_MOV(compiler, TMP_REG1, 0, OFFS_REG(mem), 0);
if (regs[1] == OFFS_REG(mem))
next = -SSIZE_OF(sw);
mem = (mem & ~OFFS_REG_MASK) | TO_OFFS_REG(TMP_REG1);
} else {
next = -SSIZE_OF(sw);
if (!(mem & OFFS_REG_MASK))
memw += SSIZE_OF(sw);
}
}
for (i = 0; i < 2; i++) {
reg_idx = next > 0 ? i : (i ^ 0x1);
reg = regs[reg_idx];
if ((mem & OFFS_REG_MASK) && (reg_idx == 1)) {
inst = (sljit_u8*)ensure_buf(compiler, (sljit_uw)(1 + 5));
FAIL_IF(!inst);
INC_SIZE(5);
inst[0] = U8(REX_W | ((reg_map[reg] >= 8) ? REX_R : 0) | ((reg_map[mem & REG_MASK] >= 8) ? REX_B : 0) | ((reg_map[OFFS_REG(mem)] >= 8) ? REX_X : 0));
inst[1] = (type & SLJIT_MEM_STORE) ? MOV_rm_r : MOV_r_rm;
inst[2] = 0x44 | U8(reg_lmap[reg] << 3);
inst[3] = U8(memw << 6) | U8(reg_lmap[OFFS_REG(mem)] << 3) | reg_lmap[mem & REG_MASK];
inst[4] = sizeof(sljit_sw);
} else if (type & SLJIT_MEM_STORE) {
EMIT_MOV(compiler, mem, memw, reg, 0);
} else {
EMIT_MOV(compiler, reg, 0, mem, memw);
}
if (!(mem & OFFS_REG_MASK))
memw += next;
}
return SLJIT_SUCCESS;
}
static sljit_s32 emit_mov_int(struct sljit_compiler *compiler, sljit_s32 sign,
sljit_s32 dst, sljit_sw dstw,
sljit_s32 src, sljit_sw srcw)

View File

@ -26,11 +26,7 @@
SLJIT_API_FUNC_ATTRIBUTE const char* sljit_get_platform_name(void)
{
#if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)
return "x86" SLJIT_CPUINFO " ABI:fastcall";
#else
return "x86" SLJIT_CPUINFO;
#endif
}
/*
@ -78,10 +74,7 @@ static const sljit_u8 reg_map[SLJIT_NUMBER_OF_REGISTERS + 3] = {
#define CHECK_EXTRA_REGS(p, w, do) \
if (p >= SLJIT_R3 && p <= SLJIT_S3) { \
if (p <= compiler->scratches) \
w = compiler->scratches_offset + ((p) - SLJIT_R3) * SSIZE_OF(sw); \
else \
w = compiler->locals_offset + ((p) - SLJIT_S2) * SSIZE_OF(sw); \
w = (2 * SSIZE_OF(sw)) + ((p) - SLJIT_R3) * SSIZE_OF(sw); \
p = SLJIT_MEM1(SLJIT_SP); \
do; \
}
@ -181,6 +174,7 @@ static const sljit_u8 freg_lmap[SLJIT_NUMBER_OF_FLOAT_REGISTERS + 1] = {
#define AND_rm_r 0x21
#define ANDPD_x_xm 0x54
#define BSR_r_rm (/* GROUP_0F */ 0xbd)
#define BSF_r_rm (/* GROUP_0F */ 0xbc)
#define CALL_i32 0xe8
#define CALL_rm (/* GROUP_FF */ 2 << 3)
#define CDQ 0x99
@ -194,6 +188,8 @@ static const sljit_u8 freg_lmap[SLJIT_NUMBER_OF_FLOAT_REGISTERS + 1] = {
#define CVTTSD2SI_r_xm 0x2c
#define DIV (/* GROUP_F7 */ 6 << 3)
#define DIVSD_x_xm 0x5e
#define FLDS 0xd9
#define FLDL 0xdd
#define FSTPS 0xd9
#define FSTPD 0xdd
#define INT3 0xcc
@ -209,6 +205,7 @@ static const sljit_u8 freg_lmap[SLJIT_NUMBER_OF_FLOAT_REGISTERS + 1] = {
#define JMP_rm (/* GROUP_FF */ 4 << 3)
#define LEA_r_m 0x8d
#define LOOP_i8 0xe2
#define LZCNT_r_rm (/* GROUP_F3 */ /* GROUP_0F */ 0xbd)
#define MOV_r_rm 0x8b
#define MOV_r_i32 0xb8
#define MOV_rm_r 0x89
@ -242,6 +239,8 @@ static const sljit_u8 freg_lmap[SLJIT_NUMBER_OF_FLOAT_REGISTERS + 1] = {
#define PUSH_r 0x50
#define PUSH_rm (/* GROUP_FF */ 6 << 3)
#define PUSHF 0x9c
#define ROL (/* SHIFT */ 0 << 3)
#define ROR (/* SHIFT */ 1 << 3)
#define RET_near 0xc3
#define RET_i16 0xc2
#define SBB (/* BINARY */ 3 << 3)
@ -250,6 +249,8 @@ static const sljit_u8 freg_lmap[SLJIT_NUMBER_OF_FLOAT_REGISTERS + 1] = {
#define SBB_rm_r 0x19
#define SAR (/* SHIFT */ 7 << 3)
#define SHL (/* SHIFT */ 4 << 3)
#define SHLD (/* GROUP_0F */ 0xa5)
#define SHRD (/* GROUP_0F */ 0xad)
#define SHR (/* SHIFT */ 5 << 3)
#define SUB (/* BINARY */ 5 << 3)
#define SUB_EAX_i32 0x2d
@ -258,6 +259,7 @@ static const sljit_u8 freg_lmap[SLJIT_NUMBER_OF_FLOAT_REGISTERS + 1] = {
#define SUBSD_x_xm 0x5c
#define TEST_EAX_i32 0xa9
#define TEST_rm_r 0x85
#define TZCNT_r_rm (/* GROUP_F3 */ /* GROUP_0F */ 0xbc)
#define UCOMISD_x_xm 0x2e
#define UNPCKLPD_x_xm 0x14
#define XCHG_EAX_r 0x90
@ -269,6 +271,7 @@ static const sljit_u8 freg_lmap[SLJIT_NUMBER_OF_FLOAT_REGISTERS + 1] = {
#define XORPD_x_xm 0x57
#define GROUP_0F 0x0f
#define GROUP_F3 0xf3
#define GROUP_F7 0xf7
#define GROUP_FF 0xff
#define GROUP_BINARY_81 0x81
@ -290,10 +293,15 @@ static const sljit_u8 freg_lmap[SLJIT_NUMBER_OF_FLOAT_REGISTERS + 1] = {
/* Multithreading does not affect these static variables, since they store
built-in CPU features. Therefore they can be overwritten by different threads
if they detect the CPU features in the same time. */
#define CPU_FEATURE_DETECTED 0x001
#if (defined SLJIT_DETECT_SSE2 && SLJIT_DETECT_SSE2)
static sljit_s32 cpu_has_sse2 = -1;
#define CPU_FEATURE_SSE2 0x002
#endif
static sljit_s32 cpu_has_cmov = -1;
#define CPU_FEATURE_LZCNT 0x004
#define CPU_FEATURE_TZCNT 0x008
#define CPU_FEATURE_CMOV 0x010
static sljit_u32 cpu_feature_list = 0;
#ifdef _WIN32_WCE
#include <cmnintrin.h>
@ -326,17 +334,64 @@ static SLJIT_INLINE void sljit_unaligned_store_sw(void *addr, sljit_sw value)
static void get_cpu_features(void)
{
sljit_u32 features;
sljit_u32 feature_list = CPU_FEATURE_DETECTED;
sljit_u32 value;
#if defined(_MSC_VER) && _MSC_VER >= 1400
int CPUInfo[4];
__cpuid(CPUInfo, 0);
if (CPUInfo[0] >= 7) {
__cpuidex(CPUInfo, 7, 0);
if (CPUInfo[1] & 0x8)
feature_list |= CPU_FEATURE_TZCNT;
}
__cpuid(CPUInfo, (int)0x80000001);
if (CPUInfo[2] & 0x20)
feature_list |= CPU_FEATURE_LZCNT;
__cpuid(CPUInfo, 1);
features = (sljit_u32)CPUInfo[3];
value = (sljit_u32)CPUInfo[3];
#elif defined(__GNUC__) || defined(__INTEL_COMPILER) || defined(__SUNPRO_C)
/* AT&T syntax. */
__asm__ (
"movl $0x0, %%eax\n"
"lzcnt %%eax, %%eax\n"
"setnz %%al\n"
"movl %%eax, %0\n"
: "=g" (value)
:
#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
: "eax"
#else
: "rax"
#endif
);
if (value & 0x1)
feature_list |= CPU_FEATURE_LZCNT;
__asm__ (
"movl $0x0, %%eax\n"
"tzcnt %%eax, %%eax\n"
"setnz %%al\n"
"movl %%eax, %0\n"
: "=g" (value)
:
#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
: "eax"
#else
: "rax"
#endif
);
if (value & 0x1)
feature_list |= CPU_FEATURE_TZCNT;
__asm__ (
"movl $0x1, %%eax\n"
#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
@ -349,7 +404,7 @@ static void get_cpu_features(void)
"pop %%ebx\n"
#endif
"movl %%edx, %0\n"
: "=g" (features)
: "=g" (value)
:
#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
: "%eax", "%ecx", "%edx"
@ -361,47 +416,83 @@ static void get_cpu_features(void)
#else /* _MSC_VER && _MSC_VER >= 1400 */
/* Intel syntax. */
__asm {
mov eax, 0
lzcnt eax, eax
setnz al
mov value, eax
}
if (value & 0x1)
feature_list |= CPU_FEATURE_LZCNT;
__asm {
mov eax, 0
tzcnt eax, eax
setnz al
mov value, eax
}
if (value & 0x1)
feature_list |= CPU_FEATURE_TZCNT;
__asm {
mov eax, 1
cpuid
mov features, edx
mov value, edx
}
#endif /* _MSC_VER && _MSC_VER >= 1400 */
#if (defined SLJIT_DETECT_SSE2 && SLJIT_DETECT_SSE2)
cpu_has_sse2 = (features >> 26) & 0x1;
if (value & 0x4000000)
feature_list |= CPU_FEATURE_SSE2;
#endif
cpu_has_cmov = (features >> 15) & 0x1;
if (value & 0x8000)
feature_list |= CPU_FEATURE_CMOV;
cpu_feature_list = feature_list;
}
static sljit_u8 get_jump_code(sljit_uw type)
{
switch (type) {
case SLJIT_EQUAL:
case SLJIT_EQUAL_F64:
case SLJIT_F_EQUAL:
case SLJIT_UNORDERED_OR_EQUAL:
case SLJIT_ORDERED_EQUAL: /* Not supported. */
return 0x84 /* je */;
case SLJIT_NOT_EQUAL:
case SLJIT_NOT_EQUAL_F64:
case SLJIT_F_NOT_EQUAL:
case SLJIT_ORDERED_NOT_EQUAL:
case SLJIT_UNORDERED_OR_NOT_EQUAL: /* Not supported. */
return 0x85 /* jne */;
case SLJIT_LESS:
case SLJIT_CARRY:
case SLJIT_LESS_F64:
case SLJIT_F_LESS:
case SLJIT_UNORDERED_OR_LESS:
case SLJIT_UNORDERED_OR_GREATER:
return 0x82 /* jc */;
case SLJIT_GREATER_EQUAL:
case SLJIT_NOT_CARRY:
case SLJIT_GREATER_EQUAL_F64:
case SLJIT_F_GREATER_EQUAL:
case SLJIT_ORDERED_GREATER_EQUAL:
case SLJIT_ORDERED_LESS_EQUAL:
return 0x83 /* jae */;
case SLJIT_GREATER:
case SLJIT_GREATER_F64:
case SLJIT_F_GREATER:
case SLJIT_ORDERED_LESS:
case SLJIT_ORDERED_GREATER:
return 0x87 /* jnbe */;
case SLJIT_LESS_EQUAL:
case SLJIT_LESS_EQUAL_F64:
case SLJIT_F_LESS_EQUAL:
case SLJIT_UNORDERED_OR_GREATER_EQUAL:
case SLJIT_UNORDERED_OR_LESS_EQUAL:
return 0x86 /* jbe */;
case SLJIT_SIG_LESS:
@ -422,10 +513,10 @@ static sljit_u8 get_jump_code(sljit_uw type)
case SLJIT_NOT_OVERFLOW:
return 0x81 /* jno */;
case SLJIT_UNORDERED_F64:
case SLJIT_UNORDERED:
return 0x8a /* jp */;
case SLJIT_ORDERED_F64:
case SLJIT_ORDERED:
return 0x8b /* jpo */;
}
return 0;
@ -449,13 +540,13 @@ static sljit_u8* generate_near_jump_code(struct sljit_jump *jump, sljit_u8 *code
else
label_addr = jump->u.target - (sljit_uw)executable_offset;
short_jump = (sljit_sw)(label_addr - (jump->addr + 2)) >= -128 && (sljit_sw)(label_addr - (jump->addr + 2)) <= 127;
#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
if ((sljit_sw)(label_addr - (jump->addr + 1)) > HALFWORD_MAX || (sljit_sw)(label_addr - (jump->addr + 1)) < HALFWORD_MIN)
return generate_far_jump_code(jump, code_ptr);
#endif
short_jump = (sljit_sw)(label_addr - (jump->addr + 2)) >= -128 && (sljit_sw)(label_addr - (jump->addr + 2)) <= 127;
if (type == SLJIT_JUMP) {
if (short_jump)
*code_ptr++ = JMP_i8;
@ -581,32 +672,33 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil
jump = compiler->jumps;
while (jump) {
jump_addr = jump->addr + (sljit_uw)executable_offset;
if (jump->flags & (PATCH_MB | PATCH_MW)) {
if (jump->flags & JUMP_LABEL)
jump_addr = jump->u.label->addr;
else
jump_addr = jump->u.target;
if (jump->flags & PATCH_MB) {
SLJIT_ASSERT((sljit_sw)(jump->u.label->addr - (jump_addr + sizeof(sljit_s8))) >= -128 && (sljit_sw)(jump->u.label->addr - (jump_addr + sizeof(sljit_s8))) <= 127);
*(sljit_u8*)jump->addr = U8(jump->u.label->addr - (jump_addr + sizeof(sljit_s8)));
} else if (jump->flags & PATCH_MW) {
if (jump->flags & JUMP_LABEL) {
jump_addr -= jump->addr + (sljit_uw)executable_offset;
if (jump->flags & PATCH_MB) {
jump_addr -= sizeof(sljit_s8);
SLJIT_ASSERT((sljit_sw)jump_addr >= -128 && (sljit_sw)jump_addr <= 127);
*(sljit_u8*)jump->addr = U8(jump_addr);
} else {
jump_addr -= sizeof(sljit_s32);
#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
sljit_unaligned_store_sw((void*)jump->addr, (sljit_sw)(jump->u.label->addr - (jump_addr + sizeof(sljit_sw))));
sljit_unaligned_store_sw((void*)jump->addr, (sljit_sw)jump_addr);
#else
SLJIT_ASSERT((sljit_sw)(jump->u.label->addr - (jump_addr + sizeof(sljit_s32))) >= HALFWORD_MIN && (sljit_sw)(jump->u.label->addr - (jump_addr + sizeof(sljit_s32))) <= HALFWORD_MAX);
sljit_unaligned_store_s32((void*)jump->addr, (sljit_s32)(jump->u.label->addr - (jump_addr + sizeof(sljit_s32))));
#endif
}
else {
#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
sljit_unaligned_store_sw((void*)jump->addr, (sljit_sw)(jump->u.target - (jump_addr + sizeof(sljit_sw))));
#else
SLJIT_ASSERT((sljit_sw)(jump->u.target - (jump_addr + sizeof(sljit_s32))) >= HALFWORD_MIN && (sljit_sw)(jump->u.target - (jump_addr + sizeof(sljit_s32))) <= HALFWORD_MAX);
sljit_unaligned_store_s32((void*)jump->addr, (sljit_s32)(jump->u.target - (jump_addr + sizeof(sljit_s32))));
SLJIT_ASSERT((sljit_sw)jump_addr >= HALFWORD_MIN && (sljit_sw)jump_addr <= HALFWORD_MAX);
sljit_unaligned_store_s32((void*)jump->addr, (sljit_s32)jump_addr);
#endif
}
}
#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
else if (jump->flags & PATCH_MD)
sljit_unaligned_store_sw((void*)jump->addr, (sljit_sw)jump->u.label->addr);
else if (jump->flags & PATCH_MD) {
SLJIT_ASSERT(jump->flags & JUMP_LABEL);
sljit_unaligned_store_sw((void*)jump->addr, (sljit_sw)jump->u.label->addr);
}
#endif
jump = jump->next;
@ -647,9 +739,9 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_has_cpu_feature(sljit_s32 feature_type)
#ifdef SLJIT_IS_FPU_AVAILABLE
return SLJIT_IS_FPU_AVAILABLE;
#elif (defined SLJIT_DETECT_SSE2 && SLJIT_DETECT_SSE2)
if (cpu_has_sse2 == -1)
if (cpu_feature_list == 0)
get_cpu_features();
return cpu_has_sse2;
return (cpu_feature_list & CPU_FEATURE_SSE2) != 0;
#else /* SLJIT_DETECT_SSE2 */
return 1;
#endif /* SLJIT_DETECT_SSE2 */
@ -657,31 +749,57 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_has_cpu_feature(sljit_s32 feature_type)
#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
case SLJIT_HAS_VIRTUAL_REGISTERS:
return 1;
#endif
#endif /* SLJIT_CONFIG_X86_32 */
case SLJIT_HAS_CLZ:
case SLJIT_HAS_CMOV:
if (cpu_has_cmov == -1)
if (cpu_feature_list == 0)
get_cpu_features();
return cpu_has_cmov;
return (cpu_feature_list & CPU_FEATURE_LZCNT) ? 1 : 2;
case SLJIT_HAS_CTZ:
if (cpu_feature_list == 0)
get_cpu_features();
return (cpu_feature_list & CPU_FEATURE_TZCNT) ? 1 : 2;
case SLJIT_HAS_CMOV:
if (cpu_feature_list == 0)
get_cpu_features();
return (cpu_feature_list & CPU_FEATURE_CMOV) != 0;
case SLJIT_HAS_ROT:
case SLJIT_HAS_PREFETCH:
return 1;
case SLJIT_HAS_SSE2:
#if (defined SLJIT_DETECT_SSE2 && SLJIT_DETECT_SSE2)
if (cpu_has_sse2 == -1)
if (cpu_feature_list == 0)
get_cpu_features();
return cpu_has_sse2;
#else
return (cpu_feature_list & CPU_FEATURE_SSE2) != 0;
#else /* !SLJIT_DETECT_SSE2 */
return 1;
#endif
#endif /* SLJIT_DETECT_SSE2 */
default:
return 0;
}
}
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_cmp_info(sljit_s32 type)
{
if (type < SLJIT_UNORDERED || type > SLJIT_ORDERED_LESS_EQUAL)
return 0;
switch (type) {
case SLJIT_ORDERED_EQUAL:
case SLJIT_UNORDERED_OR_NOT_EQUAL:
return 0;
}
return 1;
}
/* --------------------------------------------------------------------- */
/* Operators */
/* --------------------------------------------------------------------- */
@ -1385,47 +1503,75 @@ static sljit_s32 emit_not_with_flags(struct sljit_compiler *compiler,
#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
static const sljit_sw emit_clz_arg = 32 + 31;
static const sljit_sw emit_ctz_arg = 32;
#endif
static sljit_s32 emit_clz(struct sljit_compiler *compiler, sljit_s32 op_flags,
static sljit_s32 emit_clz_ctz(struct sljit_compiler *compiler, sljit_s32 is_clz,
sljit_s32 dst, sljit_sw dstw,
sljit_s32 src, sljit_sw srcw)
{
sljit_u8* inst;
sljit_s32 dst_r;
sljit_sw max;
SLJIT_UNUSED_ARG(op_flags);
if (cpu_has_cmov == -1)
if (cpu_feature_list == 0)
get_cpu_features();
dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1;
if (is_clz ? (cpu_feature_list & CPU_FEATURE_LZCNT) : (cpu_feature_list & CPU_FEATURE_TZCNT)) {
/* Group prefix added separately. */
inst = (sljit_u8*)ensure_buf(compiler, 1 + 1);
FAIL_IF(!inst);
INC_SIZE(1);
*inst++ = GROUP_F3;
inst = emit_x86_instruction(compiler, 2, dst_r, 0, src, srcw);
FAIL_IF(!inst);
*inst++ = GROUP_0F;
*inst = is_clz ? LZCNT_r_rm : TZCNT_r_rm;
if (dst & SLJIT_MEM)
EMIT_MOV(compiler, dst, dstw, TMP_REG1, 0);
return SLJIT_SUCCESS;
}
inst = emit_x86_instruction(compiler, 2, dst_r, 0, src, srcw);
FAIL_IF(!inst);
*inst++ = GROUP_0F;
*inst = BSR_r_rm;
*inst = is_clz ? BSR_r_rm : BSF_r_rm;
#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
if (cpu_has_cmov) {
max = is_clz ? (32 + 31) : 32;
if (cpu_feature_list & CPU_FEATURE_CMOV) {
if (dst_r != TMP_REG1) {
EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_IMM, 32 + 31);
EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_IMM, max);
inst = emit_x86_instruction(compiler, 2, dst_r, 0, TMP_REG1, 0);
}
else
inst = emit_x86_instruction(compiler, 2, dst_r, 0, SLJIT_MEM0(), (sljit_sw)&emit_clz_arg);
inst = emit_x86_instruction(compiler, 2, dst_r, 0, SLJIT_MEM0(), is_clz ? (sljit_sw)&emit_clz_arg : (sljit_sw)&emit_ctz_arg);
FAIL_IF(!inst);
*inst++ = GROUP_0F;
*inst = CMOVE_r_rm;
}
else
FAIL_IF(sljit_emit_cmov_generic(compiler, SLJIT_EQUAL, dst_r, SLJIT_IMM, 32 + 31));
FAIL_IF(sljit_emit_cmov_generic(compiler, SLJIT_EQUAL, dst_r, SLJIT_IMM, max));
inst = emit_x86_instruction(compiler, 1 | EX86_BIN_INS, SLJIT_IMM, 31, dst_r, 0);
if (is_clz) {
inst = emit_x86_instruction(compiler, 1 | EX86_BIN_INS, SLJIT_IMM, 31, dst_r, 0);
FAIL_IF(!inst);
*(inst + 1) |= XOR;
}
#else
if (cpu_has_cmov) {
EMIT_MOV(compiler, TMP_REG2, 0, SLJIT_IMM, !(op_flags & SLJIT_32) ? (64 + 63) : (32 + 31));
if (is_clz)
max = compiler->mode32 ? (32 + 31) : (64 + 63);
else
max = compiler->mode32 ? 32 : 64;
if (cpu_feature_list & CPU_FEATURE_CMOV) {
EMIT_MOV(compiler, TMP_REG2, 0, SLJIT_IMM, max);
inst = emit_x86_instruction(compiler, 2, dst_r, 0, TMP_REG2, 0);
FAIL_IF(!inst);
@ -1433,14 +1579,15 @@ static sljit_s32 emit_clz(struct sljit_compiler *compiler, sljit_s32 op_flags,
*inst = CMOVE_r_rm;
}
else
FAIL_IF(sljit_emit_cmov_generic(compiler, SLJIT_EQUAL, dst_r, SLJIT_IMM, !(op_flags & SLJIT_32) ? (64 + 63) : (32 + 31)));
FAIL_IF(sljit_emit_cmov_generic(compiler, SLJIT_EQUAL, dst_r, SLJIT_IMM, max));
inst = emit_x86_instruction(compiler, 1 | EX86_BIN_INS, SLJIT_IMM, !(op_flags & SLJIT_32) ? 63 : 31, dst_r, 0);
if (is_clz) {
inst = emit_x86_instruction(compiler, 1 | EX86_BIN_INS, SLJIT_IMM, max >> 1, dst_r, 0);
FAIL_IF(!inst);
*(inst + 1) |= XOR;
}
#endif
FAIL_IF(!inst);
*(inst + 1) |= XOR;
if (dst & SLJIT_MEM)
EMIT_MOV(compiler, dst, dstw, TMP_REG1, 0);
return SLJIT_SUCCESS;
@ -1578,7 +1725,8 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compile
return emit_unary(compiler, NOT_rm, dst, dstw, src, srcw);
case SLJIT_CLZ:
return emit_clz(compiler, op_flags, dst, dstw, src, srcw);
case SLJIT_CTZ:
return emit_clz_ctz(compiler, (op == SLJIT_CLZ), dst, dstw, src, srcw);
}
return SLJIT_SUCCESS;
@ -2116,6 +2264,9 @@ static sljit_s32 emit_shift(struct sljit_compiler *compiler,
sljit_s32 src1, sljit_sw src1w,
sljit_s32 src2, sljit_sw src2w)
{
#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
sljit_s32 mode32;
#endif
sljit_u8* inst;
if ((src2 & SLJIT_IMM) || (src2 == SLJIT_PREF_SHIFT_REG)) {
@ -2155,41 +2306,62 @@ static sljit_s32 emit_shift(struct sljit_compiler *compiler,
inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_PREF_SHIFT_REG, 0, TMP_REG1, 0);
FAIL_IF(!inst);
*inst |= mode;
EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, TMP_REG1, 0);
return emit_mov(compiler, SLJIT_PREF_SHIFT_REG, 0, TMP_REG1, 0);
}
else if (FAST_IS_REG(dst) && dst != src2 && dst != TMP_REG1 && !ADDRESSING_DEPENDS_ON(src2, dst)) {
if (FAST_IS_REG(dst) && dst != src2 && dst != TMP_REG1 && !ADDRESSING_DEPENDS_ON(src2, dst)) {
if (src1 != dst)
EMIT_MOV(compiler, dst, 0, src1, src1w);
#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
mode32 = compiler->mode32;
compiler->mode32 = 0;
#endif
EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_PREF_SHIFT_REG, 0);
#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
compiler->mode32 = mode32;
#endif
EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, src2, src2w);
inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_PREF_SHIFT_REG, 0, dst, 0);
FAIL_IF(!inst);
*inst |= mode;
EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, TMP_REG1, 0);
}
else {
/* This case is complex since ecx itself may be used for
addressing, and this case must be supported as well. */
EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), 0, SLJIT_PREF_SHIFT_REG, 0);
EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, src2, src2w);
inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_PREF_SHIFT_REG, 0, TMP_REG1, 0);
FAIL_IF(!inst);
*inst |= mode;
EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, SLJIT_MEM1(SLJIT_SP), 0);
#else
EMIT_MOV(compiler, TMP_REG2, 0, SLJIT_PREF_SHIFT_REG, 0);
EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, src2, src2w);
inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_PREF_SHIFT_REG, 0, TMP_REG1, 0);
FAIL_IF(!inst);
*inst |= mode;
EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, TMP_REG2, 0);
#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
compiler->mode32 = 0;
#endif
if (dst != TMP_REG1)
return emit_mov(compiler, dst, dstw, TMP_REG1, 0);
EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, TMP_REG1, 0);
#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
compiler->mode32 = mode32;
#endif
return SLJIT_SUCCESS;
}
/* This case is complex since ecx itself may be used for
addressing, and this case must be supported as well. */
EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), 0, SLJIT_PREF_SHIFT_REG, 0);
#else /* !SLJIT_CONFIG_X86_32 */
mode32 = compiler->mode32;
compiler->mode32 = 0;
EMIT_MOV(compiler, TMP_REG2, 0, SLJIT_PREF_SHIFT_REG, 0);
compiler->mode32 = mode32;
#endif /* SLJIT_CONFIG_X86_32 */
EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, src2, src2w);
inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_PREF_SHIFT_REG, 0, TMP_REG1, 0);
FAIL_IF(!inst);
*inst |= mode;
#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, SLJIT_MEM1(SLJIT_SP), 0);
#else
compiler->mode32 = 0;
EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, TMP_REG2, 0);
compiler->mode32 = mode32;
#endif /* SLJIT_CONFIG_X86_32 */
if (dst != TMP_REG1)
return emit_mov(compiler, dst, dstw, TMP_REG1, 0);
return SLJIT_SUCCESS;
}
@ -2202,12 +2374,13 @@ static sljit_s32 emit_shift_with_flags(struct sljit_compiler *compiler,
/* The CPU does not set flags if the shift count is 0. */
if (src2 & SLJIT_IMM) {
#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
if ((src2w & 0x3f) != 0 || (compiler->mode32 && (src2w & 0x1f) != 0))
src2w &= compiler->mode32 ? 0x1f : 0x3f;
#else /* !SLJIT_CONFIG_X86_64 */
src2w &= 0x1f;
#endif /* SLJIT_CONFIG_X86_64 */
if (src2w != 0)
return emit_shift(compiler, mode, dst, dstw, src1, src1w, src2, src2w);
#else
if ((src2w & 0x1f) != 0)
return emit_shift(compiler, mode, dst, dstw, src1, src1w, src2, src2w);
#endif
if (!set_flags)
return emit_mov(compiler, dst, dstw, src1, src1w);
/* OR dst, src, 0 */
@ -2289,14 +2462,23 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2(struct sljit_compiler *compile
return emit_cum_binary(compiler, BINARY_OPCODE(XOR),
dst, dstw, src1, src1w, src2, src2w);
case SLJIT_SHL:
case SLJIT_MSHL:
return emit_shift_with_flags(compiler, SHL, HAS_FLAGS(op),
dst, dstw, src1, src1w, src2, src2w);
case SLJIT_LSHR:
case SLJIT_MLSHR:
return emit_shift_with_flags(compiler, SHR, HAS_FLAGS(op),
dst, dstw, src1, src1w, src2, src2w);
case SLJIT_ASHR:
case SLJIT_MASHR:
return emit_shift_with_flags(compiler, SAR, HAS_FLAGS(op),
dst, dstw, src1, src1w, src2, src2w);
case SLJIT_ROTL:
return emit_shift_with_flags(compiler, ROL, 0,
dst, dstw, src1, src1w, src2, src2w);
case SLJIT_ROTR:
return emit_shift_with_flags(compiler, ROR, 0,
dst, dstw, src1, src1w, src2, src2w);
}
return SLJIT_SUCCESS;
@ -2312,10 +2494,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2u(struct sljit_compiler *compil
CHECK(check_sljit_emit_op2(compiler, op, 1, 0, 0, src1, src1w, src2, src2w));
if (opcode != SLJIT_SUB && opcode != SLJIT_AND) {
#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \
|| (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
compiler->skip_checks = 1;
#endif
SLJIT_SKIP_CHECKS(compiler);
return sljit_emit_op2(compiler, op, TMP_REG1, 0, src1, src1w, src2, src2w);
}
@ -2334,6 +2513,122 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2u(struct sljit_compiler *compil
return emit_test_binary(compiler, src1, src1w, src2, src2w);
}
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_shift_into(struct sljit_compiler *compiler, sljit_s32 op,
sljit_s32 src_dst,
sljit_s32 src1, sljit_sw src1w,
sljit_s32 src2, sljit_sw src2w)
{
sljit_s32 restore_ecx = 0;
sljit_s32 is_rotate, is_left;
sljit_u8* inst;
sljit_sw dstw = 0;
#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
sljit_s32 tmp2 = SLJIT_MEM1(SLJIT_SP);
#else /* !SLJIT_CONFIG_X86_32 */
sljit_s32 tmp2 = TMP_REG2;
#endif /* SLJIT_CONFIG_X86_32 */
CHECK_ERROR();
CHECK(check_sljit_emit_shift_into(compiler, op, src_dst, src1, src1w, src2, src2w));
ADJUST_LOCAL_OFFSET(src1, src1w);
ADJUST_LOCAL_OFFSET(src2, src2w);
CHECK_EXTRA_REGS(src1, src1w, (void)0);
CHECK_EXTRA_REGS(src2, src2w, (void)0);
#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
compiler->mode32 = op & SLJIT_32;
#endif
if (src2 & SLJIT_IMM) {
#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
src2w &= 0x1f;
#else /* !SLJIT_CONFIG_X86_32 */
src2w &= (op & SLJIT_32) ? 0x1f : 0x3f;
#endif /* SLJIT_CONFIG_X86_32 */
if (src2w == 0)
return SLJIT_SUCCESS;
}
is_left = (GET_OPCODE(op) == SLJIT_SHL || GET_OPCODE(op) == SLJIT_MSHL);
is_rotate = (src_dst == src1);
CHECK_EXTRA_REGS(src_dst, dstw, (void)0);
if (is_rotate)
return emit_shift(compiler, is_left ? ROL : ROR, src_dst, dstw, src1, src1w, src2, src2w);
if ((src2 & SLJIT_IMM) || src2 == SLJIT_PREF_SHIFT_REG) {
if (!FAST_IS_REG(src1)) {
EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
src1 = TMP_REG1;
}
} else if (FAST_IS_REG(src1)) {
#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
compiler->mode32 = 0;
#endif
EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_PREF_SHIFT_REG, 0);
#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
compiler->mode32 = op & SLJIT_32;
#endif
EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, src2, src2w);
if (src1 == SLJIT_PREF_SHIFT_REG)
src1 = TMP_REG1;
if (src_dst == SLJIT_PREF_SHIFT_REG)
src_dst = TMP_REG1;
restore_ecx = 1;
} else {
EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
compiler->mode32 = 0;
#endif
EMIT_MOV(compiler, tmp2, 0, SLJIT_PREF_SHIFT_REG, 0);
#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
compiler->mode32 = op & SLJIT_32;
#endif
EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, src2, src2w);
src1 = TMP_REG1;
if (src_dst == SLJIT_PREF_SHIFT_REG) {
src_dst = tmp2;
SLJIT_ASSERT(dstw == 0);
}
restore_ecx = 2;
}
inst = emit_x86_instruction(compiler, 2, src1, 0, src_dst, dstw);
FAIL_IF(!inst);
inst[0] = GROUP_0F;
if (src2 & SLJIT_IMM) {
inst[1] = U8((is_left ? SHLD : SHRD) - 1);
/* Immedate argument is added separately. */
inst = (sljit_u8*)ensure_buf(compiler, 1 + 1);
FAIL_IF(!inst);
INC_SIZE(1);
*inst = U8(src2w);
} else
inst[1] = U8(is_left ? SHLD : SHRD);
#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
compiler->mode32 = 0;
#endif
if (restore_ecx == 1)
return emit_mov(compiler, SLJIT_PREF_SHIFT_REG, 0, TMP_REG1, 0);
if (restore_ecx == 2)
return emit_mov(compiler, SLJIT_PREF_SHIFT_REG, 0, tmp2, 0);
return SLJIT_SUCCESS;
}
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_src(struct sljit_compiler *compiler, sljit_s32 op,
sljit_s32 src, sljit_sw srcw)
{
@ -2516,6 +2811,19 @@ static SLJIT_INLINE sljit_s32 sljit_emit_fop1_cmp(struct sljit_compiler *compile
sljit_s32 src1, sljit_sw src1w,
sljit_s32 src2, sljit_sw src2w)
{
switch (GET_FLAG_TYPE(op)) {
case SLJIT_ORDERED_LESS:
case SLJIT_UNORDERED_OR_GREATER_EQUAL:
case SLJIT_UNORDERED_OR_GREATER:
case SLJIT_ORDERED_LESS_EQUAL:
if (!FAST_IS_REG(src2)) {
FAIL_IF(emit_sse2_load(compiler, op & SLJIT_32, TMP_FREG, src2, src2w));
src2 = TMP_FREG;
}
return emit_sse2_logic(compiler, UCOMISD_x_xm, !(op & SLJIT_32), src2, src1, src1w);
}
if (!FAST_IS_REG(src1)) {
FAIL_IF(emit_sse2_load(compiler, op & SLJIT_32, TMP_FREG, src1, src1w));
src1 = TMP_FREG;
@ -2769,7 +3077,6 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_flags(struct sljit_compiler *co
ADJUST_LOCAL_OFFSET(dst, dstw);
CHECK_EXTRA_REGS(dst, dstw, (void)0);
type &= 0xff;
/* setcc = jcc + 0x10. */
cond_set = U8(get_jump_code((sljit_uw)type) + 0x10);
@ -2813,10 +3120,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_flags(struct sljit_compiler *co
return emit_mov(compiler, dst, dstw, TMP_REG1, 0);
}
#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \
|| (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
compiler->skip_checks = 1;
#endif
SLJIT_SKIP_CHECKS(compiler);
return sljit_emit_op2(compiler, op, dst_save, dstw_save, dst_save, dstw_save, TMP_REG1, 0);
#else
@ -2839,10 +3143,10 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_flags(struct sljit_compiler *co
}
/* Low byte is not accessible. */
if (cpu_has_cmov == -1)
if (cpu_feature_list == 0)
get_cpu_features();
if (cpu_has_cmov) {
if (cpu_feature_list & CPU_FEATURE_CMOV) {
EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_IMM, 1);
/* a xor reg, reg operation would overwrite the flags. */
EMIT_MOV(compiler, dst, 0, SLJIT_IMM, 0);
@ -2927,10 +3231,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_flags(struct sljit_compiler *co
if (GET_OPCODE(op) < SLJIT_ADD)
return emit_mov(compiler, dst, dstw, TMP_REG1, 0);
#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \
|| (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
compiler->skip_checks = 1;
#endif
SLJIT_SKIP_CHECKS(compiler);
return sljit_emit_op2(compiler, op, dst_save, dstw_save, dst_save, dstw_save, TMP_REG1, 0);
#endif /* SLJIT_CONFIG_X86_64 */
}
@ -2945,7 +3246,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_cmov(struct sljit_compiler *compil
CHECK(check_sljit_emit_cmov(compiler, type, dst_reg, src, srcw));
#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
dst_reg &= ~SLJIT_32;
type &= ~SLJIT_32;
if (!sljit_has_cpu_feature(SLJIT_HAS_CMOV) || (dst_reg >= SLJIT_R3 && dst_reg <= SLJIT_S3))
return sljit_emit_cmov_generic(compiler, type, dst_reg, src, srcw);
@ -2958,8 +3259,8 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_cmov(struct sljit_compiler *compil
CHECK_EXTRA_REGS(src, srcw, (void)0);
#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
compiler->mode32 = dst_reg & SLJIT_32;
dst_reg &= ~SLJIT_32;
compiler->mode32 = type & SLJIT_32;
type &= ~SLJIT_32;
#endif
if (SLJIT_UNLIKELY(src & SLJIT_IMM)) {
@ -2971,7 +3272,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_cmov(struct sljit_compiler *compil
inst = emit_x86_instruction(compiler, 2, dst_reg, 0, src, srcw);
FAIL_IF(!inst);
*inst++ = GROUP_0F;
*inst = U8(get_jump_code(type & 0xff) - 0x40);
*inst = U8(get_jump_code((sljit_uw)type) - 0x40);
return SLJIT_SUCCESS;
}

View File

@ -59,38 +59,15 @@
#include <sys/mman.h>
#ifdef __NetBSD__
#if defined(PROT_MPROTECT)
#define check_se_protected(ptr, size) (0)
#define SLJIT_PROT_WX PROT_MPROTECT(PROT_EXEC)
#else /* !PROT_MPROTECT */
#ifdef _NETBSD_SOURCE
#include <sys/param.h>
#else /* !_NETBSD_SOURCE */
typedef unsigned int u_int;
#define devmajor_t sljit_s32
#endif /* _NETBSD_SOURCE */
#include <sys/sysctl.h>
#include <unistd.h>
#define check_se_protected(ptr, size) netbsd_se_protected()
static SLJIT_INLINE int netbsd_se_protected(void)
{
int mib[3];
int paxflags;
size_t len = sizeof(paxflags);
mib[0] = CTL_PROC;
mib[1] = getpid();
mib[2] = PROC_PID_PAXFLAGS;
if (SLJIT_UNLIKELY(sysctl(mib, 3, &paxflags, &len, NULL, 0) < 0))
return -1;
return (paxflags & CTL_PROC_PAXFLAGS_MPROTECT) ? -1 : 0;
}
#endif /* PROT_MPROTECT */
#define check_se_protected(ptr, size) (0)
#else /* POSIX */
#if !(defined SLJIT_SINGLE_THREADED && SLJIT_SINGLE_THREADED)
#include <pthread.h>
#define SLJIT_SE_LOCK() pthread_mutex_lock(&se_lock)
#define SLJIT_SE_UNLOCK() pthread_mutex_unlock(&se_lock)
#endif /* !SLJIT_SINGLE_THREADED */
#define check_se_protected(ptr, size) generic_se_protected(ptr, size)
static SLJIT_INLINE int generic_se_protected(void *ptr, sljit_uw size)
@ -102,22 +79,20 @@ static SLJIT_INLINE int generic_se_protected(void *ptr, sljit_uw size)
}
#endif /* NetBSD */
#if defined SLJIT_SINGLE_THREADED && SLJIT_SINGLE_THREADED
#ifndef SLJIT_SE_LOCK
#define SLJIT_SE_LOCK()
#endif
#ifndef SLJIT_SE_UNLOCK
#define SLJIT_SE_UNLOCK()
#else /* !SLJIT_SINGLE_THREADED */
#include <pthread.h>
#define SLJIT_SE_LOCK() pthread_mutex_lock(&se_lock)
#define SLJIT_SE_UNLOCK() pthread_mutex_unlock(&se_lock)
#endif /* SLJIT_SINGLE_THREADED */
#endif
#ifndef SLJIT_PROT_WX
#define SLJIT_PROT_WX 0
#endif /* !SLJIT_PROT_WX */
#endif
SLJIT_API_FUNC_ATTRIBUTE void* sljit_malloc_exec(sljit_uw size)
{
#if !(defined SLJIT_SINGLE_THREADED && SLJIT_SINGLE_THREADED)
#if !(defined SLJIT_SINGLE_THREADED && SLJIT_SINGLE_THREADED) \
&& !defined(__NetBSD__)
static pthread_mutex_t se_lock = PTHREAD_MUTEX_INITIALIZER;
#endif
static int se_protected = !SLJIT_PROT_WX;