aboutsummaryrefslogtreecommitdiff
path: root/tcg
diff options
context:
space:
mode:
Diffstat (limited to 'tcg')
-rw-r--r--tcg/README17
-rw-r--r--tcg/arm/tcg-target.c6
-rw-r--r--tcg/arm/tcg-target.h1
-rw-r--r--tcg/hppa/tcg-target.c214
-rw-r--r--tcg/hppa/tcg-target.h1
-rw-r--r--tcg/i386/tcg-target.c191
-rw-r--r--tcg/i386/tcg-target.h11
-rw-r--r--tcg/ia64/tcg-target.c6
-rw-r--r--tcg/ia64/tcg-target.h2
-rw-r--r--tcg/mips/tcg-target.c346
-rw-r--r--tcg/mips/tcg-target.h24
-rw-r--r--tcg/optimize.c413
-rw-r--r--tcg/ppc/tcg-target.c81
-rw-r--r--tcg/ppc/tcg-target.h1
-rw-r--r--tcg/ppc64/tcg-target.c6
-rw-r--r--tcg/ppc64/tcg-target.h2
-rw-r--r--tcg/s390/tcg-target.c5
-rw-r--r--tcg/s390/tcg-target.h2
-rw-r--r--tcg/sparc/tcg-target.c976
-rw-r--r--tcg/sparc/tcg-target.h35
-rw-r--r--tcg/tcg-op.h250
-rw-r--r--tcg/tcg-opc.h4
-rw-r--r--tcg/tcg.c37
-rw-r--r--tcg/tcg.h18
-rw-r--r--tcg/tci/tcg-target.c6
-rw-r--r--tcg/tci/tcg-target.h2
26 files changed, 1606 insertions, 1051 deletions
diff --git a/tcg/README b/tcg/README
index cfdfd96d0..27846f1ea 100644
--- a/tcg/README
+++ b/tcg/README
@@ -141,7 +141,7 @@ Define label 'label' at the current program point.
Jump to label.
-* brcond_i32/i64 cond, t0, t1, label
+* brcond_i32/i64 t0, t1, cond, label
Conditional jump if t0 cond t1 is true. cond can be:
TCG_COND_EQ
@@ -301,12 +301,18 @@ This operation would be equivalent to
********* Conditional moves
-* setcond_i32/i64 cond, dest, t1, t2
+* setcond_i32/i64 dest, t1, t2, cond
dest = (t1 cond t2)
Set DEST to 1 if (T1 cond T2) is true, otherwise set to 0.
+* movcond_i32/i64 dest, c1, c2, v1, v2, cond
+
+dest = (c1 cond c2 ? v1 : v2)
+
+Set DEST to V1 if (C1 cond C2) is true, otherwise set to V2.
+
********* Type conversions
* ext_i32_i64 t0, t1
@@ -354,7 +360,7 @@ The following opcodes are internal to TCG. Thus they are to be implemented by
32-bit host code generators, but are not to be emitted by guest translators.
They are emitted as needed by inline functions within "tcg-op.h".
-* brcond2_i32 cond, t0_low, t0_high, t1_low, t1_high, label
+* brcond2_i32 t0_low, t0_high, t1_low, t1_high, cond, label
Similar to brcond, except that the 64-bit values T0 and T1
are formed from two 32-bit arguments.
@@ -371,7 +377,7 @@ is returned in two 32-bit outputs.
Similar to mul, except two 32-bit (unsigned) inputs T1 and T2 yielding
the full 64-bit product T0. The later is returned in two 32-bit outputs.
-* setcond2_i32 cond, dest, t1_low, t1_high, t2_low, t2_high
+* setcond2_i32 dest, t1_low, t1_high, t2_low, t2_high, cond
Similar to setcond, except that the 64-bit values T1 and T2 are
formed from two 32-bit arguments. The result is a 32-bit value.
@@ -386,7 +392,8 @@ Exit the current TB and return the value t0 (word type).
Exit the current TB and jump to the TB index 'index' (constant) if the
current TB was linked to this TB. Otherwise execute the next
-instructions.
+instructions. Only indices 0 and 1 are valid and tcg_gen_goto_tb may be issued
+at most once with each slot index per TB.
* qemu_ld8u t0, t1, flags
qemu_ld8s t0, t1, flags
diff --git a/tcg/arm/tcg-target.c b/tcg/arm/tcg-target.c
index aed3b5324..2bad0a2b1 100644
--- a/tcg/arm/tcg-target.c
+++ b/tcg/arm/tcg-target.c
@@ -145,12 +145,6 @@ static void patch_reloc(uint8_t *code_ptr, int type,
}
}
-/* maximum number of register used for input function arguments */
-static inline int tcg_target_get_call_iarg_regs_count(int flags)
-{
- return 4;
-}
-
/* parse target specific constraints */
static int target_parse_constraint(TCGArgConstraint *ct, const char **pct_str)
{
diff --git a/tcg/arm/tcg-target.h b/tcg/arm/tcg-target.h
index c0b8f7274..e2299cadd 100644
--- a/tcg/arm/tcg-target.h
+++ b/tcg/arm/tcg-target.h
@@ -73,6 +73,7 @@ typedef enum {
#define TCG_TARGET_HAS_nand_i32 0
#define TCG_TARGET_HAS_nor_i32 0
#define TCG_TARGET_HAS_deposit_i32 0
+#define TCG_TARGET_HAS_movcond_i32 0
#define TCG_TARGET_HAS_GUEST_BASE
diff --git a/tcg/hppa/tcg-target.c b/tcg/hppa/tcg-target.c
index 8b81b70f1..2c79c1081 100644
--- a/tcg/hppa/tcg-target.c
+++ b/tcg/hppa/tcg-target.c
@@ -175,12 +175,6 @@ static void patch_reloc(uint8_t *code_ptr, int type,
*insn_ptr = insn;
}
-/* maximum number of register used for input function arguments */
-static inline int tcg_target_get_call_iarg_regs_count(int flags)
-{
- return 4;
-}
-
/* parse target specific constraints */
static int target_parse_constraint(TCGArgConstraint *ct, const char **pct_str)
{
@@ -820,19 +814,34 @@ static void tcg_out_comclr(TCGContext *s, int cond, TCGArg ret,
tcg_out32(s, op);
}
+static TCGCond const tcg_high_cond[] = {
+ [TCG_COND_EQ] = TCG_COND_EQ,
+ [TCG_COND_NE] = TCG_COND_NE,
+ [TCG_COND_LT] = TCG_COND_LT,
+ [TCG_COND_LE] = TCG_COND_LT,
+ [TCG_COND_GT] = TCG_COND_GT,
+ [TCG_COND_GE] = TCG_COND_GT,
+ [TCG_COND_LTU] = TCG_COND_LTU,
+ [TCG_COND_LEU] = TCG_COND_LTU,
+ [TCG_COND_GTU] = TCG_COND_GTU,
+ [TCG_COND_GEU] = TCG_COND_GTU
+};
+
static void tcg_out_brcond2(TCGContext *s, int cond, TCGArg al, TCGArg ah,
TCGArg bl, int blconst, TCGArg bh, int bhconst,
int label_index)
{
switch (cond) {
case TCG_COND_EQ:
+ tcg_out_comclr(s, TCG_COND_NE, TCG_REG_R0, al, bl, blconst);
+ tcg_out_brcond(s, TCG_COND_EQ, ah, bh, bhconst, label_index);
+ break;
case TCG_COND_NE:
- tcg_out_comclr(s, tcg_invert_cond(cond), TCG_REG_R0, al, bl, blconst);
- tcg_out_brcond(s, cond, ah, bh, bhconst, label_index);
+ tcg_out_brcond(s, TCG_COND_NE, al, bl, bhconst, label_index);
+ tcg_out_brcond(s, TCG_COND_NE, ah, bh, bhconst, label_index);
break;
-
default:
- tcg_out_brcond(s, cond, ah, bh, bhconst, label_index);
+ tcg_out_brcond(s, tcg_high_cond[cond], ah, bh, bhconst, label_index);
tcg_out_comclr(s, TCG_COND_NE, TCG_REG_R0, ah, bh, bhconst);
tcg_out_brcond(s, tcg_unsigned_cond(cond),
al, bl, blconst, label_index);
@@ -853,9 +862,8 @@ static void tcg_out_setcond2(TCGContext *s, int cond, TCGArg ret,
{
int scratch = TCG_REG_R20;
- if (ret != al && ret != ah
- && (blconst || ret != bl)
- && (bhconst || ret != bh)) {
+ /* Note that the low parts are fully consumed before scratch is set. */
+ if (ret != ah && (bhconst || ret != bh)) {
scratch = ret;
}
@@ -867,18 +875,49 @@ static void tcg_out_setcond2(TCGContext *s, int cond, TCGArg ret,
tcg_out_movi(s, TCG_TYPE_I32, scratch, cond == TCG_COND_NE);
break;
- default:
+ case TCG_COND_GE:
+ case TCG_COND_GEU:
+ case TCG_COND_LT:
+ case TCG_COND_LTU:
+ /* Optimize compares with low part zero. */
+ if (bl == 0) {
+ tcg_out_setcond(s, cond, ret, ah, bh, bhconst);
+ return;
+ }
+ /* FALLTHRU */
+
+ case TCG_COND_LE:
+ case TCG_COND_LEU:
+ case TCG_COND_GT:
+ case TCG_COND_GTU:
+ /* <= : ah < bh | (ah == bh && al <= bl) */
tcg_out_setcond(s, tcg_unsigned_cond(cond), scratch, al, bl, blconst);
tcg_out_comclr(s, TCG_COND_EQ, TCG_REG_R0, ah, bh, bhconst);
tcg_out_movi(s, TCG_TYPE_I32, scratch, 0);
- tcg_out_comclr(s, cond, TCG_REG_R0, ah, bh, bhconst);
+ tcg_out_comclr(s, tcg_invert_cond(tcg_high_cond[cond]),
+ TCG_REG_R0, ah, bh, bhconst);
tcg_out_movi(s, TCG_TYPE_I32, scratch, 1);
break;
+
+ default:
+ tcg_abort();
}
tcg_out_mov(s, TCG_TYPE_I32, ret, scratch);
}
+static void tcg_out_movcond(TCGContext *s, int cond, TCGArg ret,
+ TCGArg c1, TCGArg c2, int c2const,
+ TCGArg v1, int v1const)
+{
+ tcg_out_comclr(s, tcg_invert_cond(cond), TCG_REG_R0, c1, c2, c2const);
+ if (v1const) {
+ tcg_out_movi(s, TCG_TYPE_I32, ret, v1);
+ } else {
+ tcg_out_mov(s, TCG_TYPE_I32, ret, v1);
+ }
+}
+
#if defined(CONFIG_SOFTMMU)
#include "../../softmmu_defs.h"
@@ -943,10 +982,11 @@ static int tcg_out_tlb_read(TCGContext *s, int r0, int r1, int addrlo,
tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_R20, r1, offset);
}
- /* Compute the value that ought to appear in the TLB for a hit, namely, the page
- of the address. We include the low N bits of the address to catch unaligned
- accesses and force them onto the slow path. Do this computation after having
- issued the load from the TLB slot to give the load time to complete. */
+ /* Compute the value that ought to appear in the TLB for a hit, namely,
+ the page of the address. We include the low N bits of the address
+ to catch unaligned accesses and force them onto the slow path. Do
+ this computation after having issued the load from the TLB slot to
+ give the load time to complete. */
tcg_out_andi(s, r0, addrlo, TARGET_PAGE_MASK | ((1 << s_bits) - 1));
/* If not equal, jump to lab_miss. */
@@ -959,6 +999,36 @@ static int tcg_out_tlb_read(TCGContext *s, int r0, int r1, int addrlo,
return ret;
}
+
+static int tcg_out_arg_reg32(TCGContext *s, int argno, TCGArg v, bool vconst)
+{
+ if (argno < 4) {
+ if (vconst) {
+ tcg_out_movi(s, TCG_TYPE_I32, tcg_target_call_iarg_regs[argno], v);
+ } else {
+ tcg_out_mov(s, TCG_TYPE_I32, tcg_target_call_iarg_regs[argno], v);
+ }
+ } else {
+ if (vconst && v != 0) {
+ tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_R20, v);
+ v = TCG_REG_R20;
+ }
+ tcg_out_st(s, TCG_TYPE_I32, v, TCG_REG_CALL_STACK,
+ TCG_TARGET_CALL_STACK_OFFSET - ((argno - 3) * 4));
+ }
+ return argno + 1;
+}
+
+static int tcg_out_arg_reg64(TCGContext *s, int argno, TCGArg vl, TCGArg vh)
+{
+ /* 64-bit arguments must go in even reg pairs and stack slots. */
+ if (argno & 1) {
+ argno++;
+ }
+ argno = tcg_out_arg_reg32(s, argno, vl, false);
+ argno = tcg_out_arg_reg32(s, argno, vh, false);
+ return argno;
+}
#endif
static void tcg_out_qemu_ld_direct(TCGContext *s, int datalo_reg, int datahi_reg,
@@ -1039,39 +1109,36 @@ static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, int opc)
/* Note that addrhi_reg is only used for 64-bit guests. */
int addrhi_reg = (TARGET_LONG_BITS == 64 ? *args++ : TCG_REG_R0);
int mem_index = *args;
- int lab1, lab2, argreg, offset;
+ int lab1, lab2, argno, offset;
lab1 = gen_new_label();
lab2 = gen_new_label();
offset = offsetof(CPUArchState, tlb_table[mem_index][0].addr_read);
- offset = tcg_out_tlb_read(s, TCG_REG_R26, TCG_REG_R25, addrlo_reg, addrhi_reg,
- opc & 3, lab1, offset);
+ offset = tcg_out_tlb_read(s, TCG_REG_R26, TCG_REG_R25, addrlo_reg,
+ addrhi_reg, opc & 3, lab1, offset);
/* TLB Hit. */
- tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_R20, (offset ? TCG_REG_R1 : TCG_REG_R25),
+ tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_R20,
+ (offset ? TCG_REG_R1 : TCG_REG_R25),
offsetof(CPUArchState, tlb_table[mem_index][0].addend) - offset);
- tcg_out_qemu_ld_direct(s, datalo_reg, datahi_reg, addrlo_reg, TCG_REG_R20, opc);
+ tcg_out_qemu_ld_direct(s, datalo_reg, datahi_reg, addrlo_reg,
+ TCG_REG_R20, opc);
tcg_out_branch(s, lab2, 1);
/* TLB Miss. */
/* label1: */
tcg_out_label(s, lab1, s->code_ptr);
- argreg = TCG_REG_R26;
- tcg_out_mov(s, TCG_TYPE_I32, argreg--, addrlo_reg);
+ argno = 0;
+ argno = tcg_out_arg_reg32(s, argno, TCG_AREG0, false);
if (TARGET_LONG_BITS == 64) {
- tcg_out_mov(s, TCG_TYPE_I32, argreg--, addrhi_reg);
+ argno = tcg_out_arg_reg64(s, argno, addrlo_reg, addrhi_reg);
+ } else {
+ argno = tcg_out_arg_reg32(s, argno, addrlo_reg, false);
}
- tcg_out_movi(s, TCG_TYPE_I32, argreg, mem_index);
-
- /* XXX/FIXME: suboptimal */
- tcg_out_mov(s, TCG_TYPE_I32, tcg_target_call_iarg_regs[2],
- tcg_target_call_iarg_regs[1]);
- tcg_out_mov(s, TCG_TYPE_TL, tcg_target_call_iarg_regs[1],
- tcg_target_call_iarg_regs[0]);
- tcg_out_mov(s, TCG_TYPE_PTR, tcg_target_call_iarg_regs[0],
- TCG_AREG0);
+ argno = tcg_out_arg_reg32(s, argno, mem_index, true);
+
tcg_out_call(s, qemu_ld_helpers[opc & 3]);
switch (opc) {
@@ -1107,8 +1174,8 @@ static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, int opc)
#endif
}
-static void tcg_out_qemu_st_direct(TCGContext *s, int datalo_reg, int datahi_reg,
- int addr_reg, int opc)
+static void tcg_out_qemu_st_direct(TCGContext *s, int datalo_reg,
+ int datahi_reg, int addr_reg, int opc)
{
#ifdef TARGET_WORDS_BIGENDIAN
const int bswap = 0;
@@ -1161,17 +1228,18 @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, int opc)
/* Note that addrhi_reg is only used for 64-bit guests. */
int addrhi_reg = (TARGET_LONG_BITS == 64 ? *args++ : TCG_REG_R0);
int mem_index = *args;
- int lab1, lab2, argreg, offset;
+ int lab1, lab2, argno, next, offset;
lab1 = gen_new_label();
lab2 = gen_new_label();
offset = offsetof(CPUArchState, tlb_table[mem_index][0].addr_write);
- offset = tcg_out_tlb_read(s, TCG_REG_R26, TCG_REG_R25, addrlo_reg, addrhi_reg,
- opc, lab1, offset);
+ offset = tcg_out_tlb_read(s, TCG_REG_R26, TCG_REG_R25, addrlo_reg,
+ addrhi_reg, opc, lab1, offset);
/* TLB Hit. */
- tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_R20, (offset ? TCG_REG_R1 : TCG_REG_R25),
+ tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_R20,
+ (offset ? TCG_REG_R1 : TCG_REG_R25),
offsetof(CPUArchState, tlb_table[mem_index][0].addend) - offset);
/* There are no indexed stores, so we must do this addition explitly.
@@ -1184,63 +1252,46 @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, int opc)
/* label1: */
tcg_out_label(s, lab1, s->code_ptr);
- argreg = TCG_REG_R26;
- tcg_out_mov(s, TCG_TYPE_I32, argreg--, addrlo_reg);
+ argno = 0;
+ argno = tcg_out_arg_reg32(s, argno, TCG_AREG0, false);
if (TARGET_LONG_BITS == 64) {
- tcg_out_mov(s, TCG_TYPE_I32, argreg--, addrhi_reg);
+ argno = tcg_out_arg_reg64(s, argno, addrlo_reg, addrhi_reg);
+ } else {
+ argno = tcg_out_arg_reg32(s, argno, addrlo_reg, false);
}
+ next = (argno < 4 ? tcg_target_call_iarg_regs[argno] : TCG_REG_R20);
switch(opc) {
case 0:
- tcg_out_andi(s, argreg--, datalo_reg, 0xff);
- tcg_out_movi(s, TCG_TYPE_I32, argreg, mem_index);
+ tcg_out_andi(s, next, datalo_reg, 0xff);
+ argno = tcg_out_arg_reg32(s, argno, next, false);
break;
case 1:
- tcg_out_andi(s, argreg--, datalo_reg, 0xffff);
- tcg_out_movi(s, TCG_TYPE_I32, argreg, mem_index);
+ tcg_out_andi(s, next, datalo_reg, 0xffff);
+ argno = tcg_out_arg_reg32(s, argno, next, false);
break;
case 2:
- tcg_out_mov(s, TCG_TYPE_I32, argreg--, datalo_reg);
- tcg_out_movi(s, TCG_TYPE_I32, argreg, mem_index);
+ argno = tcg_out_arg_reg32(s, argno, datalo_reg, false);
break;
case 3:
- /* Because of the alignment required by the 64-bit data argument,
- we will always use R23/R24. Also, we will always run out of
- argument registers for storing mem_index, so that will have
- to go on the stack. */
- if (mem_index == 0) {
- argreg = TCG_REG_R0;
- } else {
- argreg = TCG_REG_R20;
- tcg_out_movi(s, TCG_TYPE_I32, argreg, mem_index);
- }
- tcg_out_mov(s, TCG_TYPE_I32, TCG_REG_R23, datahi_reg);
- tcg_out_mov(s, TCG_TYPE_I32, TCG_REG_R24, datalo_reg);
- tcg_out_st(s, TCG_TYPE_I32, argreg, TCG_REG_CALL_STACK,
- TCG_TARGET_CALL_STACK_OFFSET - 4);
+ argno = tcg_out_arg_reg64(s, argno, datalo_reg, datahi_reg);
break;
default:
tcg_abort();
}
+ argno = tcg_out_arg_reg32(s, argno, mem_index, true);
- /* XXX/FIXME: suboptimal */
- tcg_out_mov(s, TCG_TYPE_I32, tcg_target_call_iarg_regs[3],
- tcg_target_call_iarg_regs[2]);
- tcg_out_mov(s, TCG_TYPE_I64, tcg_target_call_iarg_regs[2],
- tcg_target_call_iarg_regs[1]);
- tcg_out_mov(s, TCG_TYPE_TL, tcg_target_call_iarg_regs[1],
- tcg_target_call_iarg_regs[0]);
- tcg_out_mov(s, TCG_TYPE_PTR, tcg_target_call_iarg_regs[0],
- TCG_AREG0);
tcg_out_call(s, qemu_st_helpers[opc]);
/* label2: */
tcg_out_label(s, lab2, s->code_ptr);
#else
- /* There are no indexed stores, so if GUEST_BASE is set we must do the add
- explicitly. Careful to avoid R20, which is used for the bswaps to follow. */
+ /* There are no indexed stores, so if GUEST_BASE is set we must do
+ the add explicitly. Careful to avoid R20, which is used for the
+ bswaps to follow. */
if (GUEST_BASE != 0) {
- tcg_out_arith(s, TCG_REG_R31, addrlo_reg, TCG_GUEST_BASE_REG, INSN_ADDL);
+ tcg_out_arith(s, TCG_REG_R31, addrlo_reg,
+ TCG_GUEST_BASE_REG, INSN_ADDL);
addrlo_reg = TCG_REG_R31;
}
tcg_out_qemu_st_direct(s, datalo_reg, datahi_reg, addrlo_reg, opc);
@@ -1475,6 +1526,11 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args,
args[3], const_args[3], args[4], const_args[4]);
break;
+ case INDEX_op_movcond_i32:
+ tcg_out_movcond(s, args[5], args[0], args[1], args[2], const_args[2],
+ args[3], const_args[3]);
+ break;
+
case INDEX_op_add2_i32:
tcg_out_add2(s, args[0], args[1], args[2], args[3],
args[4], args[5], const_args[4]);
@@ -1583,6 +1639,10 @@ static const TCGTargetOpDef hppa_op_defs[] = {
{ INDEX_op_setcond_i32, { "r", "rZ", "rI" } },
{ INDEX_op_setcond2_i32, { "r", "rZ", "rZ", "rI", "rI" } },
+ /* ??? We can actually support a signed 14-bit arg3, but we
+ only have existing constraints for a signed 11-bit. */
+ { INDEX_op_movcond_i32, { "r", "rZ", "rI", "rI", "0" } },
+
{ INDEX_op_add2_i32, { "r", "r", "rZ", "rZ", "rI", "rZ" } },
{ INDEX_op_sub2_i32, { "r", "r", "rI", "rZ", "rK", "rZ" } },
diff --git a/tcg/hppa/tcg-target.h b/tcg/hppa/tcg-target.h
index 01ef9605f..535135371 100644
--- a/tcg/hppa/tcg-target.h
+++ b/tcg/hppa/tcg-target.h
@@ -96,6 +96,7 @@ typedef enum {
#define TCG_TARGET_HAS_nand_i32 0
#define TCG_TARGET_HAS_nor_i32 0
#define TCG_TARGET_HAS_deposit_i32 1
+#define TCG_TARGET_HAS_movcond_i32 1
/* optional instructions automatically implemented */
#define TCG_TARGET_HAS_neg_i32 0 /* sub rd, 0, rs */
diff --git a/tcg/i386/tcg-target.c b/tcg/i386/tcg-target.c
index 34c2df80a..0e218c861 100644
--- a/tcg/i386/tcg-target.c
+++ b/tcg/i386/tcg-target.c
@@ -75,9 +75,7 @@ static const int tcg_target_call_iarg_regs[] = {
TCG_REG_R8,
TCG_REG_R9,
#else
- TCG_REG_EAX,
- TCG_REG_EDX,
- TCG_REG_ECX
+ /* 32 bit mode uses stack based calling convention (GCC default). */
#endif
};
@@ -88,6 +86,18 @@ static const int tcg_target_call_oarg_regs[] = {
#endif
};
+/* Registers used with L constraint, which are the first argument
+ registers on x86_64, and two random call clobbered registers on
+ i386. */
+#if TCG_TARGET_REG_BITS == 64
+# define TCG_REG_L0 tcg_target_call_iarg_regs[0]
+# define TCG_REG_L1 tcg_target_call_iarg_regs[1]
+# define TCG_REG_L2 tcg_target_call_iarg_regs[2]
+#else
+# define TCG_REG_L0 TCG_REG_EAX
+# define TCG_REG_L1 TCG_REG_EDX
+#endif
+
static uint8_t *tb_ret_addr;
static void patch_reloc(uint8_t *code_ptr, int type,
@@ -114,16 +124,6 @@ static void patch_reloc(uint8_t *code_ptr, int type,
}
}
-/* maximum number of register used for input function arguments */
-static inline int tcg_target_get_call_iarg_regs_count(int flags)
-{
- if (TCG_TARGET_REG_BITS == 64) {
- return 6;
- }
-
- return 0;
-}
-
/* parse target specific constraints */
static int target_parse_constraint(TCGArgConstraint *ct, const char **pct_str)
{
@@ -179,16 +179,16 @@ static int target_parse_constraint(TCGArgConstraint *ct, const char **pct_str)
/* qemu_ld/st address constraint */
case 'L':
ct->ct |= TCG_CT_REG;
- if (TCG_TARGET_REG_BITS == 64) {
+#if TCG_TARGET_REG_BITS == 64
tcg_regset_set32(ct->u.regs, 0, 0xffff);
- tcg_regset_reset_reg(ct->u.regs, tcg_target_call_iarg_regs[0]);
- tcg_regset_reset_reg(ct->u.regs, tcg_target_call_iarg_regs[1]);
- tcg_regset_reset_reg(ct->u.regs, tcg_target_call_iarg_regs[2]);
- } else {
+ tcg_regset_reset_reg(ct->u.regs, TCG_REG_L0);
+ tcg_regset_reset_reg(ct->u.regs, TCG_REG_L1);
+ tcg_regset_reset_reg(ct->u.regs, TCG_REG_L2);
+#else
tcg_regset_set32(ct->u.regs, 0, 0xff);
- tcg_regset_reset_reg(ct->u.regs, TCG_REG_EAX);
- tcg_regset_reset_reg(ct->u.regs, TCG_REG_EDX);
- }
+ tcg_regset_reset_reg(ct->u.regs, TCG_REG_L0);
+ tcg_regset_reset_reg(ct->u.regs, TCG_REG_L1);
+#endif
break;
case 'e':
@@ -249,6 +249,7 @@ static inline int tcg_target_const_match(tcg_target_long val,
#define OPC_ADD_GvEv (OPC_ARITH_GvEv | (ARITH_ADD << 3))
#define OPC_BSWAP (0xc8 | P_EXT)
#define OPC_CALL_Jz (0xe8)
+#define OPC_CMOVCC (0x40 | P_EXT) /* ... plus condition code */
#define OPC_CMP_GvEv (OPC_ARITH_GvEv | (ARITH_CMP << 3))
#define OPC_DEC_r32 (0x48)
#define OPC_IMUL_GvEv (0xaf | P_EXT)
@@ -263,6 +264,7 @@ static inline int tcg_target_const_match(tcg_target_long val,
#define OPC_MOVB_EvGv (0x88) /* stores, more or less */
#define OPC_MOVL_EvGv (0x89) /* stores, more or less */
#define OPC_MOVL_GvEv (0x8b) /* loads, more or less */
+#define OPC_MOVB_EvIz (0xc6)
#define OPC_MOVL_EvIz (0xc7)
#define OPC_MOVL_Iv (0xb8)
#define OPC_MOVSBL (0xbe | P_EXT)
@@ -935,6 +937,24 @@ static void tcg_out_setcond2(TCGContext *s, const TCGArg *args,
}
#endif
+static void tcg_out_movcond32(TCGContext *s, TCGCond cond, TCGArg dest,
+ TCGArg c1, TCGArg c2, int const_c2,
+ TCGArg v1)
+{
+ tcg_out_cmp(s, c1, c2, const_c2, 0);
+ tcg_out_modrm(s, OPC_CMOVCC | tcg_cond_to_jcc[cond], dest, v1);
+}
+
+#if TCG_TARGET_REG_BITS == 64
+static void tcg_out_movcond64(TCGContext *s, TCGCond cond, TCGArg dest,
+ TCGArg c1, TCGArg c2, int const_c2,
+ TCGArg v1)
+{
+ tcg_out_cmp(s, c1, c2, const_c2, P_REXW);
+ tcg_out_modrm(s, OPC_CMOVCC | tcg_cond_to_jcc[cond] | P_REXW, dest, v1);
+}
+#endif
+
static void tcg_out_branch(TCGContext *s, int call, tcg_target_long dest)
{
tcg_target_long disp = dest - (tcg_target_long)s->code_ptr - 5;
@@ -1009,8 +1029,8 @@ static inline void tcg_out_tlb_load(TCGContext *s, int addrlo_idx,
uint8_t **label_ptr, int which)
{
const int addrlo = args[addrlo_idx];
- const int r0 = tcg_target_call_iarg_regs[0];
- const int r1 = tcg_target_call_iarg_regs[1];
+ const int r0 = TCG_REG_L0;
+ const int r1 = TCG_REG_L1;
TCGType type = TCG_TYPE_I32;
int rexw = 0;
@@ -1172,8 +1192,7 @@ static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args,
label_ptr, offsetof(CPUTLBEntry, addr_read));
/* TLB Hit. */
- tcg_out_qemu_ld_direct(s, data_reg, data_reg2,
- tcg_target_call_iarg_regs[0], 0, opc);
+ tcg_out_qemu_ld_direct(s, data_reg, data_reg2, TCG_REG_L0, 0, opc);
/* jmp label2 */
tcg_out8(s, OPC_JMP_short);
@@ -1206,14 +1225,10 @@ static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args,
tcg_out_movi(s, TCG_TYPE_I32, tcg_target_call_iarg_regs[arg_idx],
mem_index);
/* XXX/FIXME: suboptimal */
- tcg_out_mov(s, TCG_TYPE_I64, tcg_target_call_iarg_regs[3],
- tcg_target_call_iarg_regs[2]);
- tcg_out_mov(s, TCG_TYPE_I64, tcg_target_call_iarg_regs[2],
- tcg_target_call_iarg_regs[1]);
- tcg_out_mov(s, TCG_TYPE_I64, tcg_target_call_iarg_regs[1],
- tcg_target_call_iarg_regs[0]);
- tcg_out_mov(s, TCG_TYPE_I64, tcg_target_call_iarg_regs[0],
- TCG_AREG0);
+ tcg_out_mov(s, TCG_TYPE_I64, tcg_target_call_iarg_regs[3], TCG_REG_L2);
+ tcg_out_mov(s, TCG_TYPE_I64, tcg_target_call_iarg_regs[2], TCG_REG_L1);
+ tcg_out_mov(s, TCG_TYPE_I64, tcg_target_call_iarg_regs[1], TCG_REG_L0);
+ tcg_out_mov(s, TCG_TYPE_I64, tcg_target_call_iarg_regs[0], TCG_AREG0);
#endif
tcg_out_calli(s, (tcg_target_long)qemu_ld_helpers[s_bits]);
@@ -1279,11 +1294,9 @@ static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args,
use the ADDR32 prefix. For now, do nothing. */
if (offset != GUEST_BASE) {
- tcg_out_movi(s, TCG_TYPE_I64,
- tcg_target_call_iarg_regs[0], GUEST_BASE);
- tgen_arithr(s, ARITH_ADD + P_REXW,
- tcg_target_call_iarg_regs[0], base);
- base = tcg_target_call_iarg_regs[0];
+ tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_L0, GUEST_BASE);
+ tgen_arithr(s, ARITH_ADD + P_REXW, TCG_REG_L0, base);
+ base = TCG_REG_L0;
offset = 0;
}
}
@@ -1304,8 +1317,8 @@ static void tcg_out_qemu_st_direct(TCGContext *s, int datalo, int datahi,
/* ??? Ideally we wouldn't need a scratch register. For user-only,
we could perform the bswap twice to restore the original value
instead of moving to the scratch. But as it is, the L constraint
- means that the second argument reg is definitely free here. */
- int scratch = tcg_target_call_iarg_regs[1];
+ means that TCG_REG_L1 is definitely free here. */
+ const int scratch = TCG_REG_L1;
switch (sizeop) {
case 0:
@@ -1378,8 +1391,7 @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args,
label_ptr, offsetof(CPUTLBEntry, addr_write));
/* TLB Hit. */
- tcg_out_qemu_st_direct(s, data_reg, data_reg2,
- tcg_target_call_iarg_regs[0], 0, opc);
+ tcg_out_qemu_st_direct(s, data_reg, data_reg2, TCG_REG_L0, 0, opc);
/* jmp label2 */
tcg_out8(s, OPC_JMP_short);
@@ -1414,18 +1426,14 @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args,
stack_adjust += 4;
#else
tcg_out_mov(s, (opc == 3 ? TCG_TYPE_I64 : TCG_TYPE_I32),
- tcg_target_call_iarg_regs[1], data_reg);
- tcg_out_movi(s, TCG_TYPE_I32, tcg_target_call_iarg_regs[2], mem_index);
+ TCG_REG_L1, data_reg);
+ tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_L2, mem_index);
stack_adjust = 0;
/* XXX/FIXME: suboptimal */
- tcg_out_mov(s, TCG_TYPE_I64, tcg_target_call_iarg_regs[3],
- tcg_target_call_iarg_regs[2]);
- tcg_out_mov(s, TCG_TYPE_I64, tcg_target_call_iarg_regs[2],
- tcg_target_call_iarg_regs[1]);
- tcg_out_mov(s, TCG_TYPE_I64, tcg_target_call_iarg_regs[1],
- tcg_target_call_iarg_regs[0]);
- tcg_out_mov(s, TCG_TYPE_I64, tcg_target_call_iarg_regs[0],
- TCG_AREG0);
+ tcg_out_mov(s, TCG_TYPE_I64, tcg_target_call_iarg_regs[3], TCG_REG_L2);
+ tcg_out_mov(s, TCG_TYPE_I64, tcg_target_call_iarg_regs[2], TCG_REG_L1);
+ tcg_out_mov(s, TCG_TYPE_I64, tcg_target_call_iarg_regs[1], TCG_REG_L0);
+ tcg_out_mov(s, TCG_TYPE_I64, tcg_target_call_iarg_regs[0], TCG_AREG0);
#endif
tcg_out_calli(s, (tcg_target_long)qemu_st_helpers[s_bits]);
@@ -1452,11 +1460,9 @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args,
use the ADDR32 prefix. For now, do nothing. */
if (offset != GUEST_BASE) {
- tcg_out_movi(s, TCG_TYPE_I64,
- tcg_target_call_iarg_regs[0], GUEST_BASE);
- tgen_arithr(s, ARITH_ADD + P_REXW,
- tcg_target_call_iarg_regs[0], base);
- base = tcg_target_call_iarg_regs[0];
+ tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_L0, GUEST_BASE);
+ tgen_arithr(s, ARITH_ADD + P_REXW, TCG_REG_L0, base);
+ base = TCG_REG_L0;
offset = 0;
}
}
@@ -1543,18 +1549,35 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
break;
OP_32_64(st8):
- tcg_out_modrm_offset(s, OPC_MOVB_EvGv | P_REXB_R,
- args[0], args[1], args[2]);
+ if (const_args[0]) {
+ tcg_out_modrm_offset(s, OPC_MOVB_EvIz,
+ 0, args[1], args[2]);
+ tcg_out8(s, args[0]);
+ } else {
+ tcg_out_modrm_offset(s, OPC_MOVB_EvGv | P_REXB_R,
+ args[0], args[1], args[2]);
+ }
break;
OP_32_64(st16):
- tcg_out_modrm_offset(s, OPC_MOVL_EvGv | P_DATA16,
- args[0], args[1], args[2]);
+ if (const_args[0]) {
+ tcg_out_modrm_offset(s, OPC_MOVL_EvIz | P_DATA16,
+ 0, args[1], args[2]);
+ tcg_out16(s, args[0]);
+ } else {
+ tcg_out_modrm_offset(s, OPC_MOVL_EvGv | P_DATA16,
+ args[0], args[1], args[2]);
+ }
break;
#if TCG_TARGET_REG_BITS == 64
case INDEX_op_st32_i64:
#endif
case INDEX_op_st_i32:
- tcg_out_st(s, TCG_TYPE_I32, args[0], args[1], args[2]);
+ if (const_args[0]) {
+ tcg_out_modrm_offset(s, OPC_MOVL_EvIz, 0, args[1], args[2]);
+ tcg_out32(s, args[0]);
+ } else {
+ tcg_out_st(s, TCG_TYPE_I32, args[0], args[1], args[2]);
+ }
break;
OP_32_64(add):
@@ -1650,6 +1673,10 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
tcg_out_setcond32(s, args[3], args[0], args[1],
args[2], const_args[2]);
break;
+ case INDEX_op_movcond_i32:
+ tcg_out_movcond32(s, args[5], args[0], args[1],
+ args[2], const_args[2], args[3]);
+ break;
OP_32_64(bswap16):
tcg_out_rolw_8(s, args[0]);
@@ -1758,7 +1785,13 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
tcg_out_ld(s, TCG_TYPE_I64, args[0], args[1], args[2]);
break;
case INDEX_op_st_i64:
- tcg_out_st(s, TCG_TYPE_I64, args[0], args[1], args[2]);
+ if (const_args[0]) {
+ tcg_out_modrm_offset(s, OPC_MOVL_EvIz | P_REXW,
+ 0, args[1], args[2]);
+ tcg_out32(s, args[0]);
+ } else {
+ tcg_out_st(s, TCG_TYPE_I64, args[0], args[1], args[2]);
+ }
break;
case INDEX_op_qemu_ld32s:
tcg_out_qemu_ld(s, args, 2 | 4);
@@ -1772,6 +1805,10 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
tcg_out_setcond64(s, args[3], args[0], args[1],
args[2], const_args[2]);
break;
+ case INDEX_op_movcond_i64:
+ tcg_out_movcond64(s, args[5], args[0], args[1],
+ args[2], const_args[2], args[3]);
+ break;
case INDEX_op_bswap64_i64:
tcg_out_bswap64(s, args[0]);
@@ -1820,9 +1857,9 @@ static const TCGTargetOpDef x86_op_defs[] = {
{ INDEX_op_ld16u_i32, { "r", "r" } },
{ INDEX_op_ld16s_i32, { "r", "r" } },
{ INDEX_op_ld_i32, { "r", "r" } },
- { INDEX_op_st8_i32, { "q", "r" } },
- { INDEX_op_st16_i32, { "r", "r" } },
- { INDEX_op_st_i32, { "r", "r" } },
+ { INDEX_op_st8_i32, { "qi", "r" } },
+ { INDEX_op_st16_i32, { "ri", "r" } },
+ { INDEX_op_st_i32, { "ri", "r" } },
{ INDEX_op_add_i32, { "r", "r", "ri" } },
{ INDEX_op_sub_i32, { "r", "0", "ri" } },
@@ -1856,6 +1893,9 @@ static const TCGTargetOpDef x86_op_defs[] = {
{ INDEX_op_setcond_i32, { "q", "r", "ri" } },
{ INDEX_op_deposit_i32, { "Q", "0", "Q" } },
+#if TCG_TARGET_HAS_movcond_i32
+ { INDEX_op_movcond_i32, { "r", "r", "ri", "r", "0" } },
+#endif
#if TCG_TARGET_REG_BITS == 32
{ INDEX_op_mulu2_i32, { "a", "d", "a", "r" } },
@@ -1873,10 +1913,10 @@ static const TCGTargetOpDef x86_op_defs[] = {
{ INDEX_op_ld32u_i64, { "r", "r" } },
{ INDEX_op_ld32s_i64, { "r", "r" } },
{ INDEX_op_ld_i64, { "r", "r" } },
- { INDEX_op_st8_i64, { "r", "r" } },
- { INDEX_op_st16_i64, { "r", "r" } },
- { INDEX_op_st32_i64, { "r", "r" } },
- { INDEX_op_st_i64, { "r", "r" } },
+ { INDEX_op_st8_i64, { "ri", "r" } },
+ { INDEX_op_st16_i64, { "ri", "r" } },
+ { INDEX_op_st32_i64, { "ri", "r" } },
+ { INDEX_op_st_i64, { "re", "r" } },
{ INDEX_op_add_i64, { "r", "0", "re" } },
{ INDEX_op_mul_i64, { "r", "0", "re" } },
@@ -1910,6 +1950,7 @@ static const TCGTargetOpDef x86_op_defs[] = {
{ INDEX_op_ext32u_i64, { "r", "r" } },
{ INDEX_op_deposit_i64, { "Q", "0", "Q" } },
+ { INDEX_op_movcond_i64, { "r", "r", "re", "r", "0" } },
#endif
#if TCG_TARGET_REG_BITS == 64
@@ -2008,15 +2049,17 @@ static void tcg_target_qemu_prologue(TCGContext *s)
#if TCG_TARGET_REG_BITS == 32
tcg_out_ld(s, TCG_TYPE_PTR, TCG_AREG0, TCG_REG_ESP,
(ARRAY_SIZE(tcg_target_callee_save_regs) + 1) * 4);
- tcg_out_ld(s, TCG_TYPE_PTR, tcg_target_call_iarg_regs[1], TCG_REG_ESP,
- (ARRAY_SIZE(tcg_target_callee_save_regs) + 2) * 4);
+ tcg_out_addi(s, TCG_REG_ESP, -stack_addend);
+ /* jmp *tb. */
+ tcg_out_modrm_offset(s, OPC_GRP5, EXT5_JMPN_Ev, TCG_REG_ESP,
+ (ARRAY_SIZE(tcg_target_callee_save_regs) + 2) * 4
+ + stack_addend);
#else
tcg_out_mov(s, TCG_TYPE_PTR, TCG_AREG0, tcg_target_call_iarg_regs[0]);
-#endif
tcg_out_addi(s, TCG_REG_ESP, -stack_addend);
-
/* jmp *tb. */
tcg_out_modrm(s, OPC_GRP5, EXT5_JMPN_Ev, tcg_target_call_iarg_regs[1]);
+#endif
/* TB epilogue */
tb_ret_addr = s->code_ptr;
diff --git a/tcg/i386/tcg-target.h b/tcg/i386/tcg-target.h
index 8be42f3cc..ace63ba37 100644
--- a/tcg/i386/tcg-target.h
+++ b/tcg/i386/tcg-target.h
@@ -67,7 +67,11 @@ typedef enum {
/* used for function call generation */
#define TCG_REG_CALL_STACK TCG_REG_ESP
#define TCG_TARGET_STACK_ALIGN 16
+#if defined(_WIN64)
+#define TCG_TARGET_CALL_STACK_OFFSET 32
+#else
#define TCG_TARGET_CALL_STACK_OFFSET 0
+#endif
/* optional instructions */
#define TCG_TARGET_HAS_div2_i32 1
@@ -86,6 +90,12 @@ typedef enum {
#define TCG_TARGET_HAS_nand_i32 0
#define TCG_TARGET_HAS_nor_i32 0
#define TCG_TARGET_HAS_deposit_i32 1
+#if defined(__x86_64__) || defined(__i686__)
+/* Use cmov only if the compiler is already doing so. */
+#define TCG_TARGET_HAS_movcond_i32 1
+#else
+#define TCG_TARGET_HAS_movcond_i32 0
+#endif
#if TCG_TARGET_REG_BITS == 64
#define TCG_TARGET_HAS_div2_i64 1
@@ -107,6 +117,7 @@ typedef enum {
#define TCG_TARGET_HAS_nand_i64 0
#define TCG_TARGET_HAS_nor_i64 0
#define TCG_TARGET_HAS_deposit_i64 1
+#define TCG_TARGET_HAS_movcond_i64 1
#endif
#define TCG_TARGET_deposit_i32_valid(ofs, len) \
diff --git a/tcg/ia64/tcg-target.c b/tcg/ia64/tcg-target.c
index 1745038f1..dc9c12cf1 100644
--- a/tcg/ia64/tcg-target.c
+++ b/tcg/ia64/tcg-target.c
@@ -176,12 +176,6 @@ static const int tcg_target_call_oarg_regs[] = {
TCG_REG_R8
};
-/* maximum number of register used for input function arguments */
-static inline int tcg_target_get_call_iarg_regs_count(int flags)
-{
- return 8;
-}
-
/*
* opcode formation
*/
diff --git a/tcg/ia64/tcg-target.h b/tcg/ia64/tcg-target.h
index c22962ac1..368aee419 100644
--- a/tcg/ia64/tcg-target.h
+++ b/tcg/ia64/tcg-target.h
@@ -133,6 +133,8 @@ typedef enum {
#define TCG_TARGET_HAS_rot_i64 1
#define TCG_TARGET_HAS_deposit_i32 0
#define TCG_TARGET_HAS_deposit_i64 0
+#define TCG_TARGET_HAS_movcond_i32 0
+#define TCG_TARGET_HAS_movcond_i64 0
/* optional instructions automatically implemented */
#define TCG_TARGET_HAS_neg_i32 0 /* sub r1, r0, r3 */
diff --git a/tcg/mips/tcg-target.c b/tcg/mips/tcg-target.c
index 74db83d44..34e3e7f82 100644
--- a/tcg/mips/tcg-target.c
+++ b/tcg/mips/tcg-target.c
@@ -68,7 +68,7 @@ static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = {
#endif
/* check if we really need so many registers :P */
-static const int tcg_target_reg_alloc_order[] = {
+static const TCGReg tcg_target_reg_alloc_order[] = {
TCG_REG_S0,
TCG_REG_S1,
TCG_REG_S2,
@@ -94,14 +94,14 @@ static const int tcg_target_reg_alloc_order[] = {
TCG_REG_V1
};
-static const int tcg_target_call_iarg_regs[4] = {
+static const TCGReg tcg_target_call_iarg_regs[4] = {
TCG_REG_A0,
TCG_REG_A1,
TCG_REG_A2,
TCG_REG_A3
};
-static const int tcg_target_call_oarg_regs[2] = {
+static const TCGReg tcg_target_call_oarg_regs[2] = {
TCG_REG_V0,
TCG_REG_V1
};
@@ -185,12 +185,6 @@ static void patch_reloc(uint8_t *code_ptr, int type,
}
}
-/* maximum number of register used for input function arguments */
-static inline int tcg_target_get_call_iarg_regs_count(int flags)
-{
- return 4;
-}
-
/* parse target specific constraints */
static int target_parse_constraint(TCGArgConstraint *ct, const char **pct_str)
{
@@ -278,6 +272,8 @@ static inline int tcg_target_const_match(tcg_target_long val,
enum {
OPC_BEQ = 0x04 << 26,
OPC_BNE = 0x05 << 26,
+ OPC_BLEZ = 0x06 << 26,
+ OPC_BGTZ = 0x07 << 26,
OPC_ADDIU = 0x09 << 26,
OPC_SLTI = 0x0A << 26,
OPC_SLTIU = 0x0B << 26,
@@ -298,12 +294,16 @@ enum {
OPC_SPECIAL = 0x00 << 26,
OPC_SLL = OPC_SPECIAL | 0x00,
OPC_SRL = OPC_SPECIAL | 0x02,
+ OPC_ROTR = OPC_SPECIAL | (0x01 << 21) | 0x02,
OPC_SRA = OPC_SPECIAL | 0x03,
OPC_SLLV = OPC_SPECIAL | 0x04,
OPC_SRLV = OPC_SPECIAL | 0x06,
+ OPC_ROTRV = OPC_SPECIAL | (0x01 << 6) | 0x06,
OPC_SRAV = OPC_SPECIAL | 0x07,
OPC_JR = OPC_SPECIAL | 0x08,
OPC_JALR = OPC_SPECIAL | 0x09,
+ OPC_MOVZ = OPC_SPECIAL | 0x0A,
+ OPC_MOVN = OPC_SPECIAL | 0x0B,
OPC_MFHI = OPC_SPECIAL | 0x10,
OPC_MFLO = OPC_SPECIAL | 0x12,
OPC_MULT = OPC_SPECIAL | 0x18,
@@ -319,7 +319,13 @@ enum {
OPC_SLT = OPC_SPECIAL | 0x2A,
OPC_SLTU = OPC_SPECIAL | 0x2B,
+ OPC_REGIMM = 0x01 << 26,
+ OPC_BLTZ = OPC_REGIMM | (0x00 << 16),
+ OPC_BGEZ = OPC_REGIMM | (0x01 << 16),
+
OPC_SPECIAL3 = 0x1f << 26,
+ OPC_INS = OPC_SPECIAL3 | 0x004,
+ OPC_WSBH = OPC_SPECIAL3 | 0x0a0,
OPC_SEB = OPC_SPECIAL3 | 0x420,
OPC_SEH = OPC_SPECIAL3 | 0x620,
};
@@ -327,7 +333,8 @@ enum {
/*
* Type reg
*/
-static inline void tcg_out_opc_reg(TCGContext *s, int opc, int rd, int rs, int rt)
+static inline void tcg_out_opc_reg(TCGContext *s, int opc,
+ TCGReg rd, TCGReg rs, TCGReg rt)
{
int32_t inst;
@@ -341,7 +348,8 @@ static inline void tcg_out_opc_reg(TCGContext *s, int opc, int rd, int rs, int r
/*
* Type immediate
*/
-static inline void tcg_out_opc_imm(TCGContext *s, int opc, int rt, int rs, int imm)
+static inline void tcg_out_opc_imm(TCGContext *s, int opc,
+ TCGReg rt, TCGReg rs, TCGArg imm)
{
int32_t inst;
@@ -355,7 +363,8 @@ static inline void tcg_out_opc_imm(TCGContext *s, int opc, int rt, int rs, int i
/*
* Type branch
*/
-static inline void tcg_out_opc_br(TCGContext *s, int opc, int rt, int rs)
+static inline void tcg_out_opc_br(TCGContext *s, int opc,
+ TCGReg rt, TCGReg rs)
{
/* We pay attention here to not modify the branch target by reading
the existing value and using it again. This ensure that caches and
@@ -368,7 +377,8 @@ static inline void tcg_out_opc_br(TCGContext *s, int opc, int rt, int rs)
/*
* Type sa
*/
-static inline void tcg_out_opc_sa(TCGContext *s, int opc, int rd, int rt, int sa)
+static inline void tcg_out_opc_sa(TCGContext *s, int opc,
+ TCGReg rd, TCGReg rt, TCGArg sa)
{
int32_t inst;
@@ -407,38 +417,47 @@ static inline void tcg_out_movi(TCGContext *s, TCGType type,
}
}
-static inline void tcg_out_bswap16(TCGContext *s, int ret, int arg)
+static inline void tcg_out_bswap16(TCGContext *s, TCGReg ret, TCGReg arg)
{
+#if defined(__mips_isa_rev) && (__mips_isa_rev >= 2)
+ tcg_out_opc_reg(s, OPC_WSBH, ret, 0, arg);
+#else
/* ret and arg can't be register at */
if (ret == TCG_REG_AT || arg == TCG_REG_AT) {
tcg_abort();
}
tcg_out_opc_sa(s, OPC_SRL, TCG_REG_AT, arg, 8);
- tcg_out_opc_imm(s, OPC_ANDI, TCG_REG_AT, TCG_REG_AT, 0x00ff);
-
tcg_out_opc_sa(s, OPC_SLL, ret, arg, 8);
tcg_out_opc_imm(s, OPC_ANDI, ret, ret, 0xff00);
tcg_out_opc_reg(s, OPC_OR, ret, ret, TCG_REG_AT);
+#endif
}
-static inline void tcg_out_bswap16s(TCGContext *s, int ret, int arg)
+static inline void tcg_out_bswap16s(TCGContext *s, TCGReg ret, TCGReg arg)
{
+#if defined(__mips_isa_rev) && (__mips_isa_rev >= 2)
+ tcg_out_opc_reg(s, OPC_WSBH, ret, 0, arg);
+ tcg_out_opc_reg(s, OPC_SEH, ret, 0, ret);
+#else
/* ret and arg can't be register at */
if (ret == TCG_REG_AT || arg == TCG_REG_AT) {
tcg_abort();
}
tcg_out_opc_sa(s, OPC_SRL, TCG_REG_AT, arg, 8);
- tcg_out_opc_imm(s, OPC_ANDI, TCG_REG_AT, TCG_REG_AT, 0xff);
-
tcg_out_opc_sa(s, OPC_SLL, ret, arg, 24);
tcg_out_opc_sa(s, OPC_SRA, ret, ret, 16);
tcg_out_opc_reg(s, OPC_OR, ret, ret, TCG_REG_AT);
+#endif
}
-static inline void tcg_out_bswap32(TCGContext *s, int ret, int arg)
+static inline void tcg_out_bswap32(TCGContext *s, TCGReg ret, TCGReg arg)
{
+#if defined(__mips_isa_rev) && (__mips_isa_rev >= 2)
+ tcg_out_opc_reg(s, OPC_WSBH, ret, 0, arg);
+ tcg_out_opc_sa(s, OPC_ROTR, ret, ret, 16);
+#else
/* ret and arg must be different and can't be register at */
if (ret == arg || ret == TCG_REG_AT || arg == TCG_REG_AT) {
tcg_abort();
@@ -456,11 +475,12 @@ static inline void tcg_out_bswap32(TCGContext *s, int ret, int arg)
tcg_out_opc_sa(s, OPC_SRL, TCG_REG_AT, arg, 8);
tcg_out_opc_imm(s, OPC_ANDI, TCG_REG_AT, TCG_REG_AT, 0xff00);
tcg_out_opc_reg(s, OPC_OR, ret, ret, TCG_REG_AT);
+#endif
}
-static inline void tcg_out_ext8s(TCGContext *s, int ret, int arg)
+static inline void tcg_out_ext8s(TCGContext *s, TCGReg ret, TCGReg arg)
{
-#ifdef _MIPS_ARCH_MIPS32R2
+#if defined(__mips_isa_rev) && (__mips_isa_rev >= 2)
tcg_out_opc_reg(s, OPC_SEB, ret, 0, arg);
#else
tcg_out_opc_sa(s, OPC_SLL, ret, arg, 24);
@@ -468,9 +488,9 @@ static inline void tcg_out_ext8s(TCGContext *s, int ret, int arg)
#endif
}
-static inline void tcg_out_ext16s(TCGContext *s, int ret, int arg)
+static inline void tcg_out_ext16s(TCGContext *s, TCGReg ret, TCGReg arg)
{
-#ifdef _MIPS_ARCH_MIPS32R2
+#if defined(__mips_isa_rev) && (__mips_isa_rev >= 2)
tcg_out_opc_reg(s, OPC_SEH, ret, 0, arg);
#else
tcg_out_opc_sa(s, OPC_SLL, ret, arg, 16);
@@ -478,8 +498,8 @@ static inline void tcg_out_ext16s(TCGContext *s, int ret, int arg)
#endif
}
-static inline void tcg_out_ldst(TCGContext *s, int opc, int arg,
- int arg1, tcg_target_long arg2)
+static inline void tcg_out_ldst(TCGContext *s, int opc, TCGArg arg,
+ TCGReg arg1, TCGArg arg2)
{
if (arg2 == (int16_t) arg2) {
tcg_out_opc_imm(s, opc, arg, arg1, arg2);
@@ -502,7 +522,7 @@ static inline void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg,
tcg_out_ldst(s, OPC_SW, arg, arg1, arg2);
}
-static inline void tcg_out_addi(TCGContext *s, int reg, tcg_target_long val)
+static inline void tcg_out_addi(TCGContext *s, TCGReg reg, TCGArg val)
{
if (val == (int16_t)val) {
tcg_out_opc_imm(s, OPC_ADDIU, reg, reg, val);
@@ -543,7 +563,7 @@ DEFINE_TCG_OUT_CALL_IARG(tcg_out_call_iarg_reg16, TCGReg arg)
#undef DEFINE_TCG_OUT_CALL_IARG_GET_ARG
#define DEFINE_TCG_OUT_CALL_IARG_GET_ARG(A) \
tcg_out_movi(s, TCG_TYPE_I32, A, arg);
-DEFINE_TCG_OUT_CALL_IARG(tcg_out_call_iarg_imm32, uint32_t arg)
+DEFINE_TCG_OUT_CALL_IARG(tcg_out_call_iarg_imm32, TCGArg arg)
#undef DEFINE_TCG_OUT_CALL_IARG_GET_ARG
/* We don't use the macro for this one to avoid an unnecessary reg-reg
@@ -573,8 +593,8 @@ static inline void tcg_out_call_iarg_reg64(TCGContext *s, int *arg_num,
#endif
}
-static void tcg_out_brcond(TCGContext *s, TCGCond cond, int arg1,
- int arg2, int label_index)
+static void tcg_out_brcond(TCGContext *s, TCGCond cond, TCGArg arg1,
+ TCGArg arg2, int label_index)
{
TCGLabel *l = &s->labels[label_index];
@@ -586,32 +606,48 @@ static void tcg_out_brcond(TCGContext *s, TCGCond cond, int arg1,
tcg_out_opc_br(s, OPC_BNE, arg1, arg2);
break;
case TCG_COND_LT:
- tcg_out_opc_reg(s, OPC_SLT, TCG_REG_AT, arg1, arg2);
- tcg_out_opc_br(s, OPC_BNE, TCG_REG_AT, TCG_REG_ZERO);
+ if (arg2 == 0) {
+ tcg_out_opc_br(s, OPC_BLTZ, 0, arg1);
+ } else {
+ tcg_out_opc_reg(s, OPC_SLT, TCG_REG_AT, arg1, arg2);
+ tcg_out_opc_br(s, OPC_BNE, TCG_REG_AT, TCG_REG_ZERO);
+ }
break;
case TCG_COND_LTU:
tcg_out_opc_reg(s, OPC_SLTU, TCG_REG_AT, arg1, arg2);
tcg_out_opc_br(s, OPC_BNE, TCG_REG_AT, TCG_REG_ZERO);
break;
case TCG_COND_GE:
- tcg_out_opc_reg(s, OPC_SLT, TCG_REG_AT, arg1, arg2);
- tcg_out_opc_br(s, OPC_BEQ, TCG_REG_AT, TCG_REG_ZERO);
+ if (arg2 == 0) {
+ tcg_out_opc_br(s, OPC_BGEZ, 0, arg1);
+ } else {
+ tcg_out_opc_reg(s, OPC_SLT, TCG_REG_AT, arg1, arg2);
+ tcg_out_opc_br(s, OPC_BEQ, TCG_REG_AT, TCG_REG_ZERO);
+ }
break;
case TCG_COND_GEU:
tcg_out_opc_reg(s, OPC_SLTU, TCG_REG_AT, arg1, arg2);
tcg_out_opc_br(s, OPC_BEQ, TCG_REG_AT, TCG_REG_ZERO);
break;
case TCG_COND_LE:
- tcg_out_opc_reg(s, OPC_SLT, TCG_REG_AT, arg2, arg1);
- tcg_out_opc_br(s, OPC_BEQ, TCG_REG_AT, TCG_REG_ZERO);
+ if (arg2 == 0) {
+ tcg_out_opc_br(s, OPC_BLEZ, 0, arg1);
+ } else {
+ tcg_out_opc_reg(s, OPC_SLT, TCG_REG_AT, arg2, arg1);
+ tcg_out_opc_br(s, OPC_BEQ, TCG_REG_AT, TCG_REG_ZERO);
+ }
break;
case TCG_COND_LEU:
tcg_out_opc_reg(s, OPC_SLTU, TCG_REG_AT, arg2, arg1);
tcg_out_opc_br(s, OPC_BEQ, TCG_REG_AT, TCG_REG_ZERO);
break;
case TCG_COND_GT:
- tcg_out_opc_reg(s, OPC_SLT, TCG_REG_AT, arg2, arg1);
- tcg_out_opc_br(s, OPC_BNE, TCG_REG_AT, TCG_REG_ZERO);
+ if (arg2 == 0) {
+ tcg_out_opc_br(s, OPC_BGTZ, 0, arg1);
+ } else {
+ tcg_out_opc_reg(s, OPC_SLT, TCG_REG_AT, arg2, arg1);
+ tcg_out_opc_br(s, OPC_BNE, TCG_REG_AT, TCG_REG_ZERO);
+ }
break;
case TCG_COND_GTU:
tcg_out_opc_reg(s, OPC_SLTU, TCG_REG_AT, arg2, arg1);
@@ -631,8 +667,9 @@ static void tcg_out_brcond(TCGContext *s, TCGCond cond, int arg1,
/* XXX: we implement it at the target level to avoid having to
handle cross basic blocks temporaries */
-static void tcg_out_brcond2(TCGContext *s, TCGCond cond, int arg1,
- int arg2, int arg3, int arg4, int label_index)
+static void tcg_out_brcond2(TCGContext *s, TCGCond cond, TCGArg arg1,
+ TCGArg arg2, TCGArg arg3, TCGArg arg4,
+ int label_index)
{
void *label_ptr;
@@ -694,8 +731,70 @@ static void tcg_out_brcond2(TCGContext *s, TCGCond cond, int arg1,
reloc_pc16(label_ptr, (tcg_target_long) s->code_ptr);
}
-static void tcg_out_setcond(TCGContext *s, TCGCond cond, int ret,
- int arg1, int arg2)
+static void tcg_out_movcond(TCGContext *s, TCGCond cond, TCGReg ret,
+ TCGArg c1, TCGArg c2, TCGArg v)
+{
+ switch (cond) {
+ case TCG_COND_EQ:
+ if (c1 == 0) {
+ tcg_out_opc_reg(s, OPC_MOVZ, ret, v, c2);
+ } else if (c2 == 0) {
+ tcg_out_opc_reg(s, OPC_MOVZ, ret, v, c1);
+ } else {
+ tcg_out_opc_reg(s, OPC_XOR, TCG_REG_AT, c1, c2);
+ tcg_out_opc_reg(s, OPC_MOVZ, ret, v, TCG_REG_AT);
+ }
+ break;
+ case TCG_COND_NE:
+ if (c1 == 0) {
+ tcg_out_opc_reg(s, OPC_MOVN, ret, v, c2);
+ } else if (c2 == 0) {
+ tcg_out_opc_reg(s, OPC_MOVN, ret, v, c1);
+ } else {
+ tcg_out_opc_reg(s, OPC_XOR, TCG_REG_AT, c1, c2);
+ tcg_out_opc_reg(s, OPC_MOVN, ret, v, TCG_REG_AT);
+ }
+ break;
+ case TCG_COND_LT:
+ tcg_out_opc_reg(s, OPC_SLT, TCG_REG_AT, c1, c2);
+ tcg_out_opc_reg(s, OPC_MOVN, ret, v, TCG_REG_AT);
+ break;
+ case TCG_COND_LTU:
+ tcg_out_opc_reg(s, OPC_SLTU, TCG_REG_AT, c1, c2);
+ tcg_out_opc_reg(s, OPC_MOVN, ret, v, TCG_REG_AT);
+ break;
+ case TCG_COND_GE:
+ tcg_out_opc_reg(s, OPC_SLT, TCG_REG_AT, c1, c2);
+ tcg_out_opc_reg(s, OPC_MOVZ, ret, v, TCG_REG_AT);
+ break;
+ case TCG_COND_GEU:
+ tcg_out_opc_reg(s, OPC_SLTU, TCG_REG_AT, c1, c2);
+ tcg_out_opc_reg(s, OPC_MOVZ, ret, v, TCG_REG_AT);
+ break;
+ case TCG_COND_LE:
+ tcg_out_opc_reg(s, OPC_SLT, TCG_REG_AT, c2, c1);
+ tcg_out_opc_reg(s, OPC_MOVZ, ret, v, TCG_REG_AT);
+ break;
+ case TCG_COND_LEU:
+ tcg_out_opc_reg(s, OPC_SLTU, TCG_REG_AT, c2, c1);
+ tcg_out_opc_reg(s, OPC_MOVZ, ret, v, TCG_REG_AT);
+ break;
+ case TCG_COND_GT:
+ tcg_out_opc_reg(s, OPC_SLT, TCG_REG_AT, c2, c1);
+ tcg_out_opc_reg(s, OPC_MOVN, ret, v, TCG_REG_AT);
+ break;
+ case TCG_COND_GTU:
+ tcg_out_opc_reg(s, OPC_SLTU, TCG_REG_AT, c2, c1);
+ tcg_out_opc_reg(s, OPC_MOVN, ret, v, TCG_REG_AT);
+ break;
+ default:
+ tcg_abort();
+ break;
+ }
+}
+
+static void tcg_out_setcond(TCGContext *s, TCGCond cond, TCGReg ret,
+ TCGArg arg1, TCGArg arg2)
{
switch (cond) {
case TCG_COND_EQ:
@@ -754,8 +853,8 @@ static void tcg_out_setcond(TCGContext *s, TCGCond cond, int ret,
/* XXX: we implement it at the target level to avoid having to
handle cross basic blocks temporaries */
-static void tcg_out_setcond2(TCGContext *s, TCGCond cond, int ret,
- int arg1, int arg2, int arg3, int arg4)
+static void tcg_out_setcond2(TCGContext *s, TCGCond cond, TCGReg ret,
+ TCGArg arg1, TCGArg arg2, TCGArg arg3, TCGArg arg4)
{
switch (cond) {
case TCG_COND_EQ:
@@ -842,18 +941,17 @@ static const void * const qemu_st_helpers[4] = {
static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args,
int opc)
{
- int addr_regl, addr_meml;
- int data_regl, data_regh, data_reg1, data_reg2;
- int mem_index, s_bits;
+ TCGReg addr_regl, data_regl, data_regh, data_reg1, data_reg2;
#if defined(CONFIG_SOFTMMU)
void *label1_ptr, *label2_ptr;
int arg_num;
-#endif
-#if TARGET_LONG_BITS == 64
-# if defined(CONFIG_SOFTMMU)
+ int mem_index, s_bits;
+ int addr_meml;
+# if TARGET_LONG_BITS == 64
uint8_t *label3_ptr;
+ TCGReg addr_regh;
+ int addr_memh;
# endif
- int addr_regh, addr_memh;
#endif
data_regl = *args++;
if (opc == 3)
@@ -861,11 +959,22 @@ static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args,
else
data_regh = 0;
addr_regl = *args++;
-#if TARGET_LONG_BITS == 64
+#if defined(CONFIG_SOFTMMU)
+# if TARGET_LONG_BITS == 64
addr_regh = *args++;
-#endif
+# if defined(TCG_TARGET_WORDS_BIGENDIAN)
+ addr_memh = 0;
+ addr_meml = 4;
+# else
+ addr_memh = 4;
+ addr_meml = 0;
+# endif
+# else
+ addr_meml = 0;
+# endif
mem_index = *args;
s_bits = opc & 3;
+#endif
if (opc == 3) {
#if defined(TCG_TARGET_WORDS_BIGENDIAN)
@@ -879,18 +988,6 @@ static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args,
data_reg1 = data_regl;
data_reg2 = 0;
}
-#if TARGET_LONG_BITS == 64
-# if defined(TCG_TARGET_WORDS_BIGENDIAN)
- addr_memh = 0;
- addr_meml = 4;
-# else
- addr_memh = 4;
- addr_meml = 0;
-# endif
-#else
- addr_meml = 0;
-#endif
-
#if defined(CONFIG_SOFTMMU)
tcg_out_opc_sa(s, OPC_SRL, TCG_REG_A0, addr_regl, TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS);
tcg_out_opc_imm(s, OPC_ANDI, TCG_REG_A0, TCG_REG_A0, (CPU_TLB_SIZE - 1) << CPU_TLB_ENTRY_BITS);
@@ -1029,50 +1126,56 @@ static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args,
static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args,
int opc)
{
- int addr_regl, addr_meml;
- int data_regl, data_regh, data_reg1, data_reg2;
- int mem_index, s_bits;
+ TCGReg addr_regl, data_regl, data_regh, data_reg1, data_reg2;
#if defined(CONFIG_SOFTMMU)
uint8_t *label1_ptr, *label2_ptr;
int arg_num;
+ int mem_index, s_bits;
+ int addr_meml;
#endif
#if TARGET_LONG_BITS == 64
# if defined(CONFIG_SOFTMMU)
uint8_t *label3_ptr;
+ TCGReg addr_regh;
+ int addr_memh;
# endif
- int addr_regh, addr_memh;
#endif
-
data_regl = *args++;
if (opc == 3) {
data_regh = *args++;
-#if defined(TCG_TARGET_WORDS_BIGENDIAN)
- data_reg1 = data_regh;
- data_reg2 = data_regl;
-#else
- data_reg1 = data_regl;
- data_reg2 = data_regh;
-#endif
} else {
- data_reg1 = data_regl;
- data_reg2 = 0;
data_regh = 0;
}
addr_regl = *args++;
-#if TARGET_LONG_BITS == 64
+#if defined(CONFIG_SOFTMMU)
+# if TARGET_LONG_BITS == 64
addr_regh = *args++;
-# if defined(TCG_TARGET_WORDS_BIGENDIAN)
+# if defined(TCG_TARGET_WORDS_BIGENDIAN)
addr_memh = 0;
addr_meml = 4;
-# else
+# else
addr_memh = 4;
addr_meml = 0;
-# endif
-#else
+# endif
+# else
addr_meml = 0;
-#endif
+# endif
mem_index = *args;
s_bits = opc;
+#endif
+
+ if (opc == 3) {
+#if defined(TCG_TARGET_WORDS_BIGENDIAN)
+ data_reg1 = data_regh;
+ data_reg2 = data_regl;
+#else
+ data_reg1 = data_regl;
+ data_reg2 = data_regh;
+#endif
+ } else {
+ data_reg1 = data_regl;
+ data_reg2 = 0;
+ }
#if defined(CONFIG_SOFTMMU)
tcg_out_opc_sa(s, OPC_SRL, TCG_REG_A0, addr_regl, TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS);
@@ -1157,7 +1260,8 @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args,
break;
case 1:
if (TCG_NEED_BSWAP) {
- tcg_out_bswap16(s, TCG_REG_T0, data_reg1);
+ tcg_out_opc_imm(s, OPC_ANDI, TCG_REG_T0, data_reg1, 0xffff);
+ tcg_out_bswap16(s, TCG_REG_T0, TCG_REG_T0);
tcg_out_opc_imm(s, OPC_SH, TCG_REG_T0, TCG_REG_A0, 0);
} else {
tcg_out_opc_imm(s, OPC_SH, data_reg1, TCG_REG_A0, 0);
@@ -1377,6 +1481,31 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
tcg_out_opc_reg(s, OPC_SRLV, args[0], args[2], args[1]);
}
break;
+ case INDEX_op_rotl_i32:
+ if (const_args[2]) {
+ tcg_out_opc_sa(s, OPC_ROTR, args[0], args[1], 0x20 - args[2]);
+ } else {
+ tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_AT, 32);
+ tcg_out_opc_reg(s, OPC_SUBU, TCG_REG_AT, TCG_REG_AT, args[2]);
+ tcg_out_opc_reg(s, OPC_ROTRV, args[0], TCG_REG_AT, args[1]);
+ }
+ break;
+ case INDEX_op_rotr_i32:
+ if (const_args[2]) {
+ tcg_out_opc_sa(s, OPC_ROTR, args[0], args[1], args[2]);
+ } else {
+ tcg_out_opc_reg(s, OPC_ROTRV, args[0], args[2], args[1]);
+ }
+ break;
+
+ /* The bswap routines do not work on non-R2 CPU. In that case
+ we let TCG generating the corresponding code. */
+ case INDEX_op_bswap16_i32:
+ tcg_out_bswap16(s, args[0], args[1]);
+ break;
+ case INDEX_op_bswap32_i32:
+ tcg_out_bswap32(s, args[0], args[1]);
+ break;
case INDEX_op_ext8s_i32:
tcg_out_ext8s(s, args[0], args[1]);
@@ -1385,6 +1514,11 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
tcg_out_ext16s(s, args[0], args[1]);
break;
+ case INDEX_op_deposit_i32:
+ tcg_out_opc_imm(s, OPC_INS, args[0], args[2],
+ ((args[3] + args[4] - 1) << 11) | (args[3] << 6));
+ break;
+
case INDEX_op_brcond_i32:
tcg_out_brcond(s, args[2], args[0], args[1], args[3]);
break;
@@ -1392,6 +1526,10 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
tcg_out_brcond2(s, args[4], args[0], args[1], args[2], args[3], args[5]);
break;
+ case INDEX_op_movcond_i32:
+ tcg_out_movcond(s, args[5], args[0], args[1], args[2], args[3]);
+ break;
+
case INDEX_op_setcond_i32:
tcg_out_setcond(s, args[3], args[0], args[1], args[2]);
break;
@@ -1453,34 +1591,42 @@ static const TCGTargetOpDef mips_op_defs[] = {
{ INDEX_op_st16_i32, { "rZ", "r" } },
{ INDEX_op_st_i32, { "rZ", "r" } },
- { INDEX_op_add_i32, { "r", "rZ", "rJZ" } },
+ { INDEX_op_add_i32, { "r", "rZ", "rJ" } },
{ INDEX_op_mul_i32, { "r", "rZ", "rZ" } },
{ INDEX_op_mulu2_i32, { "r", "r", "rZ", "rZ" } },
{ INDEX_op_div_i32, { "r", "rZ", "rZ" } },
{ INDEX_op_divu_i32, { "r", "rZ", "rZ" } },
{ INDEX_op_rem_i32, { "r", "rZ", "rZ" } },
{ INDEX_op_remu_i32, { "r", "rZ", "rZ" } },
- { INDEX_op_sub_i32, { "r", "rZ", "rJZ" } },
+ { INDEX_op_sub_i32, { "r", "rZ", "rJ" } },
- { INDEX_op_and_i32, { "r", "rZ", "rIZ" } },
+ { INDEX_op_and_i32, { "r", "rZ", "rI" } },
{ INDEX_op_nor_i32, { "r", "rZ", "rZ" } },
{ INDEX_op_not_i32, { "r", "rZ" } },
{ INDEX_op_or_i32, { "r", "rZ", "rIZ" } },
{ INDEX_op_xor_i32, { "r", "rZ", "rIZ" } },
- { INDEX_op_shl_i32, { "r", "rZ", "riZ" } },
- { INDEX_op_shr_i32, { "r", "rZ", "riZ" } },
- { INDEX_op_sar_i32, { "r", "rZ", "riZ" } },
+ { INDEX_op_shl_i32, { "r", "rZ", "ri" } },
+ { INDEX_op_shr_i32, { "r", "rZ", "ri" } },
+ { INDEX_op_sar_i32, { "r", "rZ", "ri" } },
+ { INDEX_op_rotr_i32, { "r", "rZ", "ri" } },
+ { INDEX_op_rotl_i32, { "r", "rZ", "ri" } },
+
+ { INDEX_op_bswap16_i32, { "r", "r" } },
+ { INDEX_op_bswap32_i32, { "r", "r" } },
{ INDEX_op_ext8s_i32, { "r", "rZ" } },
{ INDEX_op_ext16s_i32, { "r", "rZ" } },
+ { INDEX_op_deposit_i32, { "r", "0", "rZ" } },
+
{ INDEX_op_brcond_i32, { "rZ", "rZ" } },
+ { INDEX_op_movcond_i32, { "r", "rZ", "rZ", "rZ", "0" } },
{ INDEX_op_setcond_i32, { "r", "rZ", "rZ" } },
{ INDEX_op_setcond2_i32, { "r", "rZ", "rZ", "rZ", "rZ" } },
- { INDEX_op_add2_i32, { "r", "r", "rZ", "rZ", "rJZ", "rJZ" } },
- { INDEX_op_sub2_i32, { "r", "r", "rZ", "rZ", "rJZ", "rJZ" } },
+ { INDEX_op_add2_i32, { "r", "r", "rZ", "rZ", "rJ", "rJ" } },
+ { INDEX_op_sub2_i32, { "r", "r", "rZ", "rZ", "rJ", "rJ" } },
{ INDEX_op_brcond2_i32, { "rZ", "rZ", "rZ", "rZ" } },
#if TARGET_LONG_BITS == 32
@@ -1520,7 +1666,6 @@ static int tcg_target_callee_save_regs[] = {
TCG_REG_S5,
TCG_REG_S6,
TCG_REG_S7,
- TCG_REG_GP,
TCG_REG_FP,
TCG_REG_RA, /* should be last for ABI compliance */
};
@@ -1530,11 +1675,15 @@ static void tcg_target_qemu_prologue(TCGContext *s)
{
int i, frame_size;
- /* reserve some stack space */
+ /* reserve some stack space, also for TCG temps. */
frame_size = ARRAY_SIZE(tcg_target_callee_save_regs) * 4
- + TCG_STATIC_CALL_ARGS_SIZE;
+ + TCG_STATIC_CALL_ARGS_SIZE
+ + CPU_TEMP_BUF_NLONGS * sizeof(long);
frame_size = (frame_size + TCG_TARGET_STACK_ALIGN - 1) &
~(TCG_TARGET_STACK_ALIGN - 1);
+ tcg_set_frame(s, TCG_REG_SP, ARRAY_SIZE(tcg_target_callee_save_regs) * 4
+ + TCG_STATIC_CALL_ARGS_SIZE,
+ CPU_TEMP_BUF_NLONGS * sizeof(long));
/* TB prologue */
tcg_out_addi(s, TCG_REG_SP, -frame_size);
@@ -1586,8 +1735,7 @@ static void tcg_target_init(TCGContext *s)
tcg_regset_set_reg(s->reserved_regs, TCG_REG_T0); /* internal use */
tcg_regset_set_reg(s->reserved_regs, TCG_REG_RA); /* return address */
tcg_regset_set_reg(s->reserved_regs, TCG_REG_SP); /* stack pointer */
+ tcg_regset_set_reg(s->reserved_regs, TCG_REG_GP); /* global pointer */
tcg_add_target_add_op_defs(mips_op_defs);
- tcg_set_frame(s, TCG_AREG0, offsetof(CPUArchState, temp_buf),
- CPU_TEMP_BUF_NLONGS * sizeof(long));
}
diff --git a/tcg/mips/tcg-target.h b/tcg/mips/tcg-target.h
index 1c6193180..7020d6584 100644
--- a/tcg/mips/tcg-target.h
+++ b/tcg/mips/tcg-target.h
@@ -80,16 +80,34 @@ typedef enum {
#define TCG_TARGET_HAS_div_i32 1
#define TCG_TARGET_HAS_not_i32 1
#define TCG_TARGET_HAS_nor_i32 1
-#define TCG_TARGET_HAS_rot_i32 0
#define TCG_TARGET_HAS_ext8s_i32 1
#define TCG_TARGET_HAS_ext16s_i32 1
-#define TCG_TARGET_HAS_bswap32_i32 0
-#define TCG_TARGET_HAS_bswap16_i32 0
#define TCG_TARGET_HAS_andc_i32 0
#define TCG_TARGET_HAS_orc_i32 0
#define TCG_TARGET_HAS_eqv_i32 0
#define TCG_TARGET_HAS_nand_i32 0
+
+/* optional instructions only implemented on MIPS4, MIPS32 and Loongson 2 */
+#if (defined(__mips_isa_rev) && (__mips_isa_rev >= 1)) || \
+ defined(_MIPS_ARCH_LOONGSON2E) || defined(_MIPS_ARCH_LOONGSON2F) || \
+ defined(_MIPS_ARCH_MIPS4)
+#define TCG_TARGET_HAS_movcond_i32 1
+#else
+#define TCG_TARGET_HAS_movcond_i32 0
+#endif
+
+/* optional instructions only implemented on MIPS32R2 */
+#if defined(__mips_isa_rev) && (__mips_isa_rev >= 2)
+#define TCG_TARGET_HAS_bswap16_i32 1
+#define TCG_TARGET_HAS_bswap32_i32 1
+#define TCG_TARGET_HAS_rot_i32 1
+#define TCG_TARGET_HAS_deposit_i32 1
+#else
+#define TCG_TARGET_HAS_bswap16_i32 0
+#define TCG_TARGET_HAS_bswap32_i32 0
+#define TCG_TARGET_HAS_rot_i32 0
#define TCG_TARGET_HAS_deposit_i32 0
+#endif
/* optional instructions automatically implemented */
#define TCG_TARGET_HAS_neg_i32 0 /* sub rd, zero, rt */
diff --git a/tcg/optimize.c b/tcg/optimize.c
index fba0ed959..35532a1e0 100644
--- a/tcg/optimize.c
+++ b/tcg/optimize.c
@@ -39,8 +39,6 @@ typedef enum {
TCG_TEMP_UNDEF = 0,
TCG_TEMP_CONST,
TCG_TEMP_COPY,
- TCG_TEMP_HAS_COPY,
- TCG_TEMP_ANY
} tcg_temp_state;
struct tcg_temp_info {
@@ -52,39 +50,19 @@ struct tcg_temp_info {
static struct tcg_temp_info temps[TCG_MAX_TEMPS];
-/* Reset TEMP's state to TCG_TEMP_ANY. If TEMP was a representative of some
- class of equivalent temp's, a new representative should be chosen in this
- class. */
-static void reset_temp(TCGArg temp, int nb_temps, int nb_globals)
+/* Reset TEMP's state to TCG_TEMP_UNDEF. If TEMP only had one copy, remove
+ the copy flag from the left temp. */
+static void reset_temp(TCGArg temp)
{
- int i;
- TCGArg new_base = (TCGArg)-1;
- if (temps[temp].state == TCG_TEMP_HAS_COPY) {
- for (i = temps[temp].next_copy; i != temp; i = temps[i].next_copy) {
- if (i >= nb_globals) {
- temps[i].state = TCG_TEMP_HAS_COPY;
- new_base = i;
- break;
- }
- }
- for (i = temps[temp].next_copy; i != temp; i = temps[i].next_copy) {
- if (new_base == (TCGArg)-1) {
- temps[i].state = TCG_TEMP_ANY;
- } else {
- temps[i].val = new_base;
- }
+ if (temps[temp].state == TCG_TEMP_COPY) {
+ if (temps[temp].prev_copy == temps[temp].next_copy) {
+ temps[temps[temp].next_copy].state = TCG_TEMP_UNDEF;
+ } else {
+ temps[temps[temp].next_copy].prev_copy = temps[temp].prev_copy;
+ temps[temps[temp].prev_copy].next_copy = temps[temp].next_copy;
}
- temps[temps[temp].next_copy].prev_copy = temps[temp].prev_copy;
- temps[temps[temp].prev_copy].next_copy = temps[temp].next_copy;
- } else if (temps[temp].state == TCG_TEMP_COPY) {
- temps[temps[temp].next_copy].prev_copy = temps[temp].prev_copy;
- temps[temps[temp].prev_copy].next_copy = temps[temp].next_copy;
- new_base = temps[temp].val;
- }
- temps[temp].state = TCG_TEMP_ANY;
- if (new_base != (TCGArg)-1 && temps[new_base].next_copy == new_base) {
- temps[new_base].state = TCG_TEMP_ANY;
}
+ temps[temp].state = TCG_TEMP_UNDEF;
}
static int op_bits(TCGOpcode op)
@@ -107,36 +85,83 @@ static TCGOpcode op_to_movi(TCGOpcode op)
}
}
-static void tcg_opt_gen_mov(TCGContext *s, TCGArg *gen_args, TCGArg dst,
- TCGArg src, int nb_temps, int nb_globals)
+static TCGArg find_better_copy(TCGContext *s, TCGArg temp)
{
- reset_temp(dst, nb_temps, nb_globals);
- assert(temps[src].state != TCG_TEMP_COPY);
- /* Don't try to copy if one of temps is a global or either one
- is local and another is register */
- if (src >= nb_globals && dst >= nb_globals &&
- tcg_arg_is_local(s, src) == tcg_arg_is_local(s, dst)) {
- assert(temps[src].state != TCG_TEMP_CONST);
- if (temps[src].state != TCG_TEMP_HAS_COPY) {
- temps[src].state = TCG_TEMP_HAS_COPY;
+ TCGArg i;
+
+ /* If this is already a global, we can't do better. */
+ if (temp < s->nb_globals) {
+ return temp;
+ }
+
+ /* Search for a global first. */
+ for (i = temps[temp].next_copy ; i != temp ; i = temps[i].next_copy) {
+ if (i < s->nb_globals) {
+ return i;
+ }
+ }
+
+ /* If it is a temp, search for a temp local. */
+ if (!s->temps[temp].temp_local) {
+ for (i = temps[temp].next_copy ; i != temp ; i = temps[i].next_copy) {
+ if (s->temps[i].temp_local) {
+ return i;
+ }
+ }
+ }
+
+ /* Failure to find a better representation, return the same temp. */
+ return temp;
+}
+
+static bool temps_are_copies(TCGArg arg1, TCGArg arg2)
+{
+ TCGArg i;
+
+ if (arg1 == arg2) {
+ return true;
+ }
+
+ if (temps[arg1].state != TCG_TEMP_COPY
+ || temps[arg2].state != TCG_TEMP_COPY) {
+ return false;
+ }
+
+ for (i = temps[arg1].next_copy ; i != arg1 ; i = temps[i].next_copy) {
+ if (i == arg2) {
+ return true;
+ }
+ }
+
+ return false;
+}
+
+static void tcg_opt_gen_mov(TCGContext *s, TCGArg *gen_args,
+ TCGArg dst, TCGArg src)
+{
+ reset_temp(dst);
+ assert(temps[src].state != TCG_TEMP_CONST);
+
+ if (s->temps[src].type == s->temps[dst].type) {
+ if (temps[src].state != TCG_TEMP_COPY) {
+ temps[src].state = TCG_TEMP_COPY;
temps[src].next_copy = src;
temps[src].prev_copy = src;
}
temps[dst].state = TCG_TEMP_COPY;
- temps[dst].val = src;
temps[dst].next_copy = temps[src].next_copy;
temps[dst].prev_copy = src;
temps[temps[dst].next_copy].prev_copy = dst;
temps[src].next_copy = dst;
}
+
gen_args[0] = dst;
gen_args[1] = src;
}
-static void tcg_opt_gen_movi(TCGArg *gen_args, TCGArg dst, TCGArg val,
- int nb_temps, int nb_globals)
+static void tcg_opt_gen_movi(TCGArg *gen_args, TCGArg dst, TCGArg val)
{
- reset_temp(dst, nb_temps, nb_globals);
+ reset_temp(dst);
temps[dst].state = TCG_TEMP_CONST;
temps[dst].val = val;
gen_args[0] = dst;
@@ -267,58 +292,88 @@ static TCGArg do_constant_folding(TCGOpcode op, TCGArg x, TCGArg y)
return res;
}
+/* Return 2 if the condition can't be simplified, and the result
+ of the condition (0 or 1) if it can */
static TCGArg do_constant_folding_cond(TCGOpcode op, TCGArg x,
TCGArg y, TCGCond c)
{
- switch (op_bits(op)) {
- case 32:
+ if (temps[x].state == TCG_TEMP_CONST && temps[y].state == TCG_TEMP_CONST) {
+ switch (op_bits(op)) {
+ case 32:
+ switch (c) {
+ case TCG_COND_EQ:
+ return (uint32_t)temps[x].val == (uint32_t)temps[y].val;
+ case TCG_COND_NE:
+ return (uint32_t)temps[x].val != (uint32_t)temps[y].val;
+ case TCG_COND_LT:
+ return (int32_t)temps[x].val < (int32_t)temps[y].val;
+ case TCG_COND_GE:
+ return (int32_t)temps[x].val >= (int32_t)temps[y].val;
+ case TCG_COND_LE:
+ return (int32_t)temps[x].val <= (int32_t)temps[y].val;
+ case TCG_COND_GT:
+ return (int32_t)temps[x].val > (int32_t)temps[y].val;
+ case TCG_COND_LTU:
+ return (uint32_t)temps[x].val < (uint32_t)temps[y].val;
+ case TCG_COND_GEU:
+ return (uint32_t)temps[x].val >= (uint32_t)temps[y].val;
+ case TCG_COND_LEU:
+ return (uint32_t)temps[x].val <= (uint32_t)temps[y].val;
+ case TCG_COND_GTU:
+ return (uint32_t)temps[x].val > (uint32_t)temps[y].val;
+ }
+ break;
+ case 64:
+ switch (c) {
+ case TCG_COND_EQ:
+ return (uint64_t)temps[x].val == (uint64_t)temps[y].val;
+ case TCG_COND_NE:
+ return (uint64_t)temps[x].val != (uint64_t)temps[y].val;
+ case TCG_COND_LT:
+ return (int64_t)temps[x].val < (int64_t)temps[y].val;
+ case TCG_COND_GE:
+ return (int64_t)temps[x].val >= (int64_t)temps[y].val;
+ case TCG_COND_LE:
+ return (int64_t)temps[x].val <= (int64_t)temps[y].val;
+ case TCG_COND_GT:
+ return (int64_t)temps[x].val > (int64_t)temps[y].val;
+ case TCG_COND_LTU:
+ return (uint64_t)temps[x].val < (uint64_t)temps[y].val;
+ case TCG_COND_GEU:
+ return (uint64_t)temps[x].val >= (uint64_t)temps[y].val;
+ case TCG_COND_LEU:
+ return (uint64_t)temps[x].val <= (uint64_t)temps[y].val;
+ case TCG_COND_GTU:
+ return (uint64_t)temps[x].val > (uint64_t)temps[y].val;
+ }
+ break;
+ }
+ } else if (temps_are_copies(x, y)) {
switch (c) {
- case TCG_COND_EQ:
- return (uint32_t)x == (uint32_t)y;
- case TCG_COND_NE:
- return (uint32_t)x != (uint32_t)y;
- case TCG_COND_LT:
- return (int32_t)x < (int32_t)y;
- case TCG_COND_GE:
- return (int32_t)x >= (int32_t)y;
- case TCG_COND_LE:
- return (int32_t)x <= (int32_t)y;
case TCG_COND_GT:
- return (int32_t)x > (int32_t)y;
case TCG_COND_LTU:
- return (uint32_t)x < (uint32_t)y;
- case TCG_COND_GEU:
- return (uint32_t)x >= (uint32_t)y;
- case TCG_COND_LEU:
- return (uint32_t)x <= (uint32_t)y;
+ case TCG_COND_LT:
case TCG_COND_GTU:
- return (uint32_t)x > (uint32_t)y;
- }
- break;
- case 64:
- switch (c) {
- case TCG_COND_EQ:
- return (uint64_t)x == (uint64_t)y;
case TCG_COND_NE:
- return (uint64_t)x != (uint64_t)y;
- case TCG_COND_LT:
- return (int64_t)x < (int64_t)y;
+ return 0;
case TCG_COND_GE:
- return (int64_t)x >= (int64_t)y;
+ case TCG_COND_GEU:
case TCG_COND_LE:
- return (int64_t)x <= (int64_t)y;
- case TCG_COND_GT:
- return (int64_t)x > (int64_t)y;
+ case TCG_COND_LEU:
+ case TCG_COND_EQ:
+ return 1;
+ }
+ } else if (temps[y].state == TCG_TEMP_CONST && temps[y].val == 0) {
+ switch (c) {
case TCG_COND_LTU:
- return (uint64_t)x < (uint64_t)y;
+ return 0;
case TCG_COND_GEU:
- return (uint64_t)x >= (uint64_t)y;
- case TCG_COND_LEU:
- return (uint64_t)x <= (uint64_t)y;
- case TCG_COND_GTU:
- return (uint64_t)x > (uint64_t)y;
+ return 1;
+ default:
+ return 2;
}
- break;
+ } else {
+ return 2;
}
fprintf(stderr,
@@ -327,7 +382,6 @@ static TCGArg do_constant_folding_cond(TCGOpcode op, TCGArg x,
tcg_abort();
}
-
/* Propagate constants and copies, fold constant expressions. */
static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
TCGArg *args, TCGOpDef *tcg_op_defs)
@@ -337,12 +391,12 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
const TCGOpDef *def;
TCGArg *gen_args;
TCGArg tmp;
+ TCGCond cond;
+
/* Array VALS has an element for each temp.
If this temp holds a constant then its value is kept in VALS' element.
- If this temp is a copy of other ones then this equivalence class'
- representative is kept in VALS' element.
- If this temp is neither copy nor constant then corresponding VALS'
- element is unused. */
+ If this temp is a copy of other ones then the other copies are
+ available through the doubly linked circular list. */
nb_temps = s->nb_temps;
nb_globals = s->nb_globals;
@@ -354,11 +408,18 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
op = gen_opc_buf[op_index];
def = &tcg_op_defs[op];
/* Do copy propagation */
- if (!(def->flags & (TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS))) {
- assert(op != INDEX_op_call);
+ if (op == INDEX_op_call) {
+ int nb_oargs = args[0] >> 16;
+ int nb_iargs = args[0] & 0xffff;
+ for (i = nb_oargs + 1; i < nb_oargs + nb_iargs + 1; i++) {
+ if (temps[args[i]].state == TCG_TEMP_COPY) {
+ args[i] = find_better_copy(s, args[i]);
+ }
+ }
+ } else {
for (i = def->nb_oargs; i < def->nb_oargs + def->nb_iargs; i++) {
if (temps[args[i]].state == TCG_TEMP_COPY) {
- args[i] = temps[args[i]].val;
+ args[i] = find_better_copy(s, args[i]);
}
}
}
@@ -373,7 +434,10 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
CASE_OP_32_64(eqv):
CASE_OP_32_64(nand):
CASE_OP_32_64(nor):
- if (temps[args[1]].state == TCG_TEMP_CONST) {
+ /* Prefer the constant in second argument, and then the form
+ op a, a, b, which is better handled on non-RISC hosts. */
+ if (temps[args[1]].state == TCG_TEMP_CONST || (args[0] == args[2]
+ && temps[args[2]].state != TCG_TEMP_CONST)) {
tmp = args[1];
args[1] = args[2];
args[2] = tmp;
@@ -397,6 +461,25 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
args[3] = tcg_swap_cond(args[3]);
}
break;
+ CASE_OP_32_64(movcond):
+ cond = args[5];
+ if (temps[args[1]].state == TCG_TEMP_CONST
+ && temps[args[2]].state != TCG_TEMP_CONST) {
+ tmp = args[1];
+ args[1] = args[2];
+ args[2] = tmp;
+ cond = tcg_swap_cond(cond);
+ }
+ /* For movcond, we canonicalize the "false" input reg to match
+ the destination reg so that the tcg backend can implement
+ a "move if true" operation. */
+ if (args[0] == args[3]) {
+ tmp = args[3];
+ args[3] = args[4];
+ args[4] = tmp;
+ cond = tcg_invert_cond(cond);
+ }
+ args[5] = cond;
default:
break;
}
@@ -411,7 +494,7 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
if (temps[args[1]].state == TCG_TEMP_CONST
&& temps[args[1]].val == 0) {
gen_opc_buf[op_index] = op_to_movi(op);
- tcg_opt_gen_movi(gen_args, args[0], 0, nb_temps, nb_globals);
+ tcg_opt_gen_movi(gen_args, args[0], 0);
args += 3;
gen_args += 2;
continue;
@@ -438,14 +521,11 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
}
if (temps[args[2]].state == TCG_TEMP_CONST
&& temps[args[2]].val == 0) {
- if ((temps[args[0]].state == TCG_TEMP_COPY
- && temps[args[0]].val == args[1])
- || args[0] == args[1]) {
+ if (temps_are_copies(args[0], args[1])) {
gen_opc_buf[op_index] = INDEX_op_nop;
} else {
gen_opc_buf[op_index] = op_to_mov(op);
- tcg_opt_gen_mov(s, gen_args, args[0], args[1],
- nb_temps, nb_globals);
+ tcg_opt_gen_mov(s, gen_args, args[0], args[1]);
gen_args += 2;
}
args += 3;
@@ -463,7 +543,7 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
if ((temps[args[2]].state == TCG_TEMP_CONST
&& temps[args[2]].val == 0)) {
gen_opc_buf[op_index] = op_to_movi(op);
- tcg_opt_gen_movi(gen_args, args[0], 0, nb_temps, nb_globals);
+ tcg_opt_gen_movi(gen_args, args[0], 0);
args += 3;
gen_args += 2;
continue;
@@ -477,13 +557,12 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
switch (op) {
CASE_OP_32_64(or):
CASE_OP_32_64(and):
- if (args[1] == args[2]) {
- if (args[1] == args[0]) {
+ if (temps_are_copies(args[1], args[2])) {
+ if (temps_are_copies(args[0], args[1])) {
gen_opc_buf[op_index] = INDEX_op_nop;
} else {
gen_opc_buf[op_index] = op_to_mov(op);
- tcg_opt_gen_mov(s, gen_args, args[0], args[1], nb_temps,
- nb_globals);
+ tcg_opt_gen_mov(s, gen_args, args[0], args[1]);
gen_args += 2;
}
args += 3;
@@ -494,21 +573,34 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
break;
}
+ /* Simplify expression for "op r, a, a => movi r, 0" cases */
+ switch (op) {
+ CASE_OP_32_64(sub):
+ CASE_OP_32_64(xor):
+ if (temps_are_copies(args[1], args[2])) {
+ gen_opc_buf[op_index] = op_to_movi(op);
+ tcg_opt_gen_movi(gen_args, args[0], 0);
+ gen_args += 2;
+ args += 3;
+ continue;
+ }
+ break;
+ default:
+ break;
+ }
+
/* Propagate constants through copy operations and do constant
folding. Constants will be substituted to arguments by register
allocator where needed and possible. Also detect copies. */
switch (op) {
CASE_OP_32_64(mov):
- if ((temps[args[1]].state == TCG_TEMP_COPY
- && temps[args[1]].val == args[0])
- || args[0] == args[1]) {
+ if (temps_are_copies(args[0], args[1])) {
args += 2;
gen_opc_buf[op_index] = INDEX_op_nop;
break;
}
if (temps[args[1]].state != TCG_TEMP_CONST) {
- tcg_opt_gen_mov(s, gen_args, args[0], args[1],
- nb_temps, nb_globals);
+ tcg_opt_gen_mov(s, gen_args, args[0], args[1]);
gen_args += 2;
args += 2;
break;
@@ -520,7 +612,7 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
args[1] = temps[args[1]].val;
/* fallthrough */
CASE_OP_32_64(movi):
- tcg_opt_gen_movi(gen_args, args[0], args[1], nb_temps, nb_globals);
+ tcg_opt_gen_movi(gen_args, args[0], args[1]);
gen_args += 2;
args += 2;
break;
@@ -535,9 +627,9 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
if (temps[args[1]].state == TCG_TEMP_CONST) {
gen_opc_buf[op_index] = op_to_movi(op);
tmp = do_constant_folding(op, temps[args[1]].val, 0);
- tcg_opt_gen_movi(gen_args, args[0], tmp, nb_temps, nb_globals);
+ tcg_opt_gen_movi(gen_args, args[0], tmp);
} else {
- reset_temp(args[0], nb_temps, nb_globals);
+ reset_temp(args[0]);
gen_args[0] = args[0];
gen_args[1] = args[1];
}
@@ -565,10 +657,10 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
gen_opc_buf[op_index] = op_to_movi(op);
tmp = do_constant_folding(op, temps[args[1]].val,
temps[args[2]].val);
- tcg_opt_gen_movi(gen_args, args[0], tmp, nb_temps, nb_globals);
+ tcg_opt_gen_movi(gen_args, args[0], tmp);
gen_args += 2;
} else {
- reset_temp(args[0], nb_temps, nb_globals);
+ reset_temp(args[0]);
gen_args[0] = args[0];
gen_args[1] = args[1];
gen_args[2] = args[2];
@@ -576,16 +668,34 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
}
args += 3;
break;
- CASE_OP_32_64(setcond):
+ CASE_OP_32_64(deposit):
if (temps[args[1]].state == TCG_TEMP_CONST
&& temps[args[2]].state == TCG_TEMP_CONST) {
gen_opc_buf[op_index] = op_to_movi(op);
- tmp = do_constant_folding_cond(op, temps[args[1]].val,
- temps[args[2]].val, args[3]);
- tcg_opt_gen_movi(gen_args, args[0], tmp, nb_temps, nb_globals);
+ tmp = ((1ull << args[4]) - 1);
+ tmp = (temps[args[1]].val & ~(tmp << args[3]))
+ | ((temps[args[2]].val & tmp) << args[3]);
+ tcg_opt_gen_movi(gen_args, args[0], tmp);
+ gen_args += 2;
+ } else {
+ reset_temp(args[0]);
+ gen_args[0] = args[0];
+ gen_args[1] = args[1];
+ gen_args[2] = args[2];
+ gen_args[3] = args[3];
+ gen_args[4] = args[4];
+ gen_args += 5;
+ }
+ args += 5;
+ break;
+ CASE_OP_32_64(setcond):
+ tmp = do_constant_folding_cond(op, args[1], args[2], args[3]);
+ if (tmp != 2) {
+ gen_opc_buf[op_index] = op_to_movi(op);
+ tcg_opt_gen_movi(gen_args, args[0], tmp);
gen_args += 2;
} else {
- reset_temp(args[0], nb_temps, nb_globals);
+ reset_temp(args[0]);
gen_args[0] = args[0];
gen_args[1] = args[1];
gen_args[2] = args[2];
@@ -595,10 +705,9 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
args += 4;
break;
CASE_OP_32_64(brcond):
- if (temps[args[0]].state == TCG_TEMP_CONST
- && temps[args[1]].state == TCG_TEMP_CONST) {
- if (do_constant_folding_cond(op, temps[args[0]].val,
- temps[args[1]].val, args[2])) {
+ tmp = do_constant_folding_cond(op, args[0], args[1], args[2]);
+ if (tmp != 2) {
+ if (tmp) {
memset(temps, 0, nb_temps * sizeof(struct tcg_temp_info));
gen_opc_buf[op_index] = INDEX_op_br;
gen_args[0] = args[3];
@@ -608,7 +717,7 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
}
} else {
memset(temps, 0, nb_temps * sizeof(struct tcg_temp_info));
- reset_temp(args[0], nb_temps, nb_globals);
+ reset_temp(args[0]);
gen_args[0] = args[0];
gen_args[1] = args[1];
gen_args[2] = args[2];
@@ -617,15 +726,41 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
}
args += 4;
break;
+ CASE_OP_32_64(movcond):
+ tmp = do_constant_folding_cond(op, args[1], args[2], args[5]);
+ if (tmp != 2) {
+ if (temps_are_copies(args[0], args[4-tmp])) {
+ gen_opc_buf[op_index] = INDEX_op_nop;
+ } else if (temps[args[4-tmp]].state == TCG_TEMP_CONST) {
+ gen_opc_buf[op_index] = op_to_movi(op);
+ tcg_opt_gen_movi(gen_args, args[0], temps[args[4-tmp]].val);
+ gen_args += 2;
+ } else {
+ gen_opc_buf[op_index] = op_to_mov(op);
+ tcg_opt_gen_mov(s, gen_args, args[0], args[4-tmp]);
+ gen_args += 2;
+ }
+ } else {
+ reset_temp(args[0]);
+ gen_args[0] = args[0];
+ gen_args[1] = args[1];
+ gen_args[2] = args[2];
+ gen_args[3] = args[3];
+ gen_args[4] = args[4];
+ gen_args[5] = args[5];
+ gen_args += 6;
+ }
+ args += 6;
+ break;
case INDEX_op_call:
nb_call_args = (args[0] >> 16) + (args[0] & 0xffff);
if (!(args[nb_call_args + 1] & (TCG_CALL_CONST | TCG_CALL_PURE))) {
for (i = 0; i < nb_globals; i++) {
- reset_temp(i, nb_temps, nb_globals);
+ reset_temp(i);
}
}
for (i = 0; i < (args[0] >> 16); i++) {
- reset_temp(args[i + 1], nb_temps, nb_globals);
+ reset_temp(args[i + 1]);
}
i = nb_call_args + 3;
while (i) {
@@ -635,21 +770,17 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
i--;
}
break;
- case INDEX_op_set_label:
- case INDEX_op_jmp:
- case INDEX_op_br:
- memset(temps, 0, nb_temps * sizeof(struct tcg_temp_info));
- for (i = 0; i < def->nb_args; i++) {
- *gen_args = *args;
- args++;
- gen_args++;
- }
- break;
default:
/* Default case: we do know nothing about operation so no
- propagation is done. We only trash output args. */
- for (i = 0; i < def->nb_oargs; i++) {
- reset_temp(args[i], nb_temps, nb_globals);
+ propagation is done. We trash everything if the operation
+ is the end of a basic block, otherwise we only trash the
+ output args. */
+ if (def->flags & TCG_OPF_BB_END) {
+ memset(temps, 0, nb_temps * sizeof(struct tcg_temp_info));
+ } else {
+ for (i = 0; i < def->nb_oargs; i++) {
+ reset_temp(args[i]);
+ }
}
for (i = 0; i < def->nb_args; i++) {
gen_args[i] = args[i];
diff --git a/tcg/ppc/tcg-target.c b/tcg/ppc/tcg-target.c
index 26c4b33e6..90c275d69 100644
--- a/tcg/ppc/tcg-target.c
+++ b/tcg/ppc/tcg-target.c
@@ -221,12 +221,6 @@ static void patch_reloc(uint8_t *code_ptr, int type,
}
}
-/* maximum number of register used for input function arguments */
-static int tcg_target_get_call_iarg_regs_count(int flags)
-{
- return ARRAY_SIZE (tcg_target_call_iarg_regs);
-}
-
/* parse target specific constraints */
static int target_parse_constraint(TCGArgConstraint *ct, const char **pct_str)
{
@@ -390,6 +384,7 @@ static int tcg_target_const_match(tcg_target_long val,
#define ORC XO31(412)
#define EQV XO31(284)
#define NAND XO31(476)
+#define ISEL XO31( 15)
#define LBZX XO31( 87)
#define LHZX XO31(279)
@@ -1269,6 +1264,72 @@ static void tcg_out_setcond2 (TCGContext *s, const TCGArg *args,
);
}
+static void tcg_out_movcond (TCGContext *s, TCGCond cond,
+ TCGArg dest,
+ TCGArg c1, TCGArg c2,
+ TCGArg v1, TCGArg v2,
+ int const_c2)
+{
+ tcg_out_cmp (s, cond, c1, c2, const_c2, 7);
+
+ if (1) {
+ /* At least here on 7747A bit twiddling hacks are outperformed
+ by jumpy code (the testing was not scientific) */
+ if (dest == v2) {
+ cond = tcg_invert_cond (cond);
+ v2 = v1;
+ }
+ else {
+ if (dest != v1) {
+ tcg_out_mov (s, TCG_TYPE_I32, dest, v1);
+ }
+ }
+ /* Branch forward over one insn */
+ tcg_out32 (s, tcg_to_bc[cond] | 8);
+ tcg_out_mov (s, TCG_TYPE_I32, dest, v2);
+ }
+ else {
+ /* isel version, "if (1)" above should be replaced once a way
+ to figure out availability of isel on the underlying
+ hardware is found */
+ int tab, bc;
+
+ switch (cond) {
+ case TCG_COND_EQ:
+ tab = TAB (dest, v1, v2);
+ bc = CR_EQ;
+ break;
+ case TCG_COND_NE:
+ tab = TAB (dest, v2, v1);
+ bc = CR_EQ;
+ break;
+ case TCG_COND_LTU:
+ case TCG_COND_LT:
+ tab = TAB (dest, v1, v2);
+ bc = CR_LT;
+ break;
+ case TCG_COND_GEU:
+ case TCG_COND_GE:
+ tab = TAB (dest, v2, v1);
+ bc = CR_LT;
+ break;
+ case TCG_COND_LEU:
+ case TCG_COND_LE:
+ tab = TAB (dest, v2, v1);
+ bc = CR_GT;
+ break;
+ case TCG_COND_GTU:
+ case TCG_COND_GT:
+ tab = TAB (dest, v1, v2);
+ bc = CR_GT;
+ break;
+ default:
+ tcg_abort ();
+ }
+ tcg_out32 (s, ISEL | tab | ((bc + 28) << 6));
+ }
+}
+
static void tcg_out_brcond (TCGContext *s, TCGCond cond,
TCGArg arg1, TCGArg arg2, int const_arg2,
int label_index)
@@ -1826,6 +1887,13 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args,
);
break;
+ case INDEX_op_movcond_i32:
+ tcg_out_movcond (s, args[5], args[0],
+ args[1], args[2],
+ args[3], args[4],
+ const_args[2]);
+ break;
+
default:
tcg_dump_ops (s);
tcg_abort ();
@@ -1922,6 +1990,7 @@ static const TCGTargetOpDef ppc_op_defs[] = {
{ INDEX_op_ext16u_i32, { "r", "r" } },
{ INDEX_op_deposit_i32, { "r", "0", "r" } },
+ { INDEX_op_movcond_i32, { "r", "r", "ri", "r", "r" } },
{ -1 },
};
diff --git a/tcg/ppc/tcg-target.h b/tcg/ppc/tcg-target.h
index 2f37fd289..3259d898a 100644
--- a/tcg/ppc/tcg-target.h
+++ b/tcg/ppc/tcg-target.h
@@ -92,6 +92,7 @@ typedef enum {
#define TCG_TARGET_HAS_nand_i32 1
#define TCG_TARGET_HAS_nor_i32 1
#define TCG_TARGET_HAS_deposit_i32 1
+#define TCG_TARGET_HAS_movcond_i32 1
#define TCG_AREG0 TCG_REG_R27
diff --git a/tcg/ppc64/tcg-target.c b/tcg/ppc64/tcg-target.c
index 337cd419f..19944bc42 100644
--- a/tcg/ppc64/tcg-target.c
+++ b/tcg/ppc64/tcg-target.c
@@ -208,12 +208,6 @@ static void patch_reloc (uint8_t *code_ptr, int type,
}
}
-/* maximum number of register used for input function arguments */
-static int tcg_target_get_call_iarg_regs_count (int flags)
-{
- return ARRAY_SIZE (tcg_target_call_iarg_regs);
-}
-
/* parse target specific constraints */
static int target_parse_constraint (TCGArgConstraint *ct, const char **pct_str)
{
diff --git a/tcg/ppc64/tcg-target.h b/tcg/ppc64/tcg-target.h
index 97eec0843..57569e893 100644
--- a/tcg/ppc64/tcg-target.h
+++ b/tcg/ppc64/tcg-target.h
@@ -83,6 +83,7 @@ typedef enum {
#define TCG_TARGET_HAS_nand_i32 0
#define TCG_TARGET_HAS_nor_i32 0
#define TCG_TARGET_HAS_deposit_i32 0
+#define TCG_TARGET_HAS_movcond_i32 0
#define TCG_TARGET_HAS_div_i64 1
#define TCG_TARGET_HAS_rot_i64 0
@@ -103,6 +104,7 @@ typedef enum {
#define TCG_TARGET_HAS_nand_i64 0
#define TCG_TARGET_HAS_nor_i64 0
#define TCG_TARGET_HAS_deposit_i64 0
+#define TCG_TARGET_HAS_movcond_i64 0
#define TCG_AREG0 TCG_REG_R27
diff --git a/tcg/s390/tcg-target.c b/tcg/s390/tcg-target.c
index aac11d9b6..3b90605fb 100644
--- a/tcg/s390/tcg-target.c
+++ b/tcg/s390/tcg-target.c
@@ -356,11 +356,6 @@ static void patch_reloc(uint8_t *code_ptr, int type,
}
}
-static int tcg_target_get_call_iarg_regs_count(int flags)
-{
- return sizeof(tcg_target_call_iarg_regs) / sizeof(int);
-}
-
/* parse target specific constraints */
static int target_parse_constraint(TCGArgConstraint *ct, const char **pct_str)
{
diff --git a/tcg/s390/tcg-target.h b/tcg/s390/tcg-target.h
index 4f7dfaba5..ed55c331c 100644
--- a/tcg/s390/tcg-target.h
+++ b/tcg/s390/tcg-target.h
@@ -63,6 +63,7 @@ typedef enum TCGReg {
#define TCG_TARGET_HAS_nand_i32 0
#define TCG_TARGET_HAS_nor_i32 0
#define TCG_TARGET_HAS_deposit_i32 0
+#define TCG_TARGET_HAS_movcond_i32 0
#if TCG_TARGET_REG_BITS == 64
#define TCG_TARGET_HAS_div2_i64 1
@@ -84,6 +85,7 @@ typedef enum TCGReg {
#define TCG_TARGET_HAS_nand_i64 0
#define TCG_TARGET_HAS_nor_i64 0
#define TCG_TARGET_HAS_deposit_i64 0
+#define TCG_TARGET_HAS_movcond_i64 0
#endif
#define TCG_TARGET_HAS_GUEST_BASE
diff --git a/tcg/sparc/tcg-target.c b/tcg/sparc/tcg-target.c
index baed3b49f..8fd7f86de 100644
--- a/tcg/sparc/tcg-target.c
+++ b/tcg/sparc/tcg-target.c
@@ -59,7 +59,15 @@ static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = {
};
#endif
-#define ARG_OFFSET 1
+/* Define some temporary registers. T2 is used for constant generation. */
+#define TCG_REG_T1 TCG_REG_G1
+#define TCG_REG_T2 TCG_REG_O7
+
+#ifdef CONFIG_USE_GUEST_BASE
+# define TCG_GUEST_BASE_REG TCG_REG_I5
+#else
+# define TCG_GUEST_BASE_REG TCG_REG_G0
+#endif
static const int tcg_target_reg_alloc_order[] = {
TCG_REG_L0,
@@ -70,11 +78,25 @@ static const int tcg_target_reg_alloc_order[] = {
TCG_REG_L5,
TCG_REG_L6,
TCG_REG_L7,
+
TCG_REG_I0,
TCG_REG_I1,
TCG_REG_I2,
TCG_REG_I3,
TCG_REG_I4,
+ TCG_REG_I5,
+
+ TCG_REG_G2,
+ TCG_REG_G3,
+ TCG_REG_G4,
+ TCG_REG_G5,
+
+ TCG_REG_O0,
+ TCG_REG_O1,
+ TCG_REG_O2,
+ TCG_REG_O3,
+ TCG_REG_O4,
+ TCG_REG_O5,
};
static const int tcg_target_call_iarg_regs[6] = {
@@ -133,12 +155,6 @@ static void patch_reloc(uint8_t *code_ptr, int type,
}
}
-/* maximum number of register used for input function arguments */
-static inline int tcg_target_get_call_iarg_regs_count(int flags)
-{
- return 6;
-}
-
/* parse target specific constraints */
static int target_parse_constraint(TCGArgConstraint *ct, const char **pct_str)
{
@@ -157,7 +173,6 @@ static int target_parse_constraint(TCGArgConstraint *ct, const char **pct_str)
tcg_regset_reset_reg(ct->u.regs, TCG_REG_O0);
tcg_regset_reset_reg(ct->u.regs, TCG_REG_O1);
tcg_regset_reset_reg(ct->u.regs, TCG_REG_O2);
- tcg_regset_reset_reg(ct->u.regs, TCG_REG_O3);
break;
case 'I':
ct->ct |= TCG_CT_CONST_S11;
@@ -236,7 +251,7 @@ static inline int tcg_target_const_match(tcg_target_long val,
#define ARITH_XOR (INSN_OP(2) | INSN_OP3(0x03))
#define ARITH_SUB (INSN_OP(2) | INSN_OP3(0x04))
#define ARITH_SUBCC (INSN_OP(2) | INSN_OP3(0x14))
-#define ARITH_ADDX (INSN_OP(2) | INSN_OP3(0x10))
+#define ARITH_ADDX (INSN_OP(2) | INSN_OP3(0x08))
#define ARITH_SUBX (INSN_OP(2) | INSN_OP3(0x0c))
#define ARITH_UMUL (INSN_OP(2) | INSN_OP3(0x0a))
#define ARITH_UDIV (INSN_OP(2) | INSN_OP3(0x0e))
@@ -288,6 +303,16 @@ static inline int tcg_target_const_match(tcg_target_long val,
#define ASI_PRIMARY_LITTLE 0x88
#endif
+#define LDUH_LE (LDUHA | INSN_ASI(ASI_PRIMARY_LITTLE))
+#define LDSH_LE (LDSHA | INSN_ASI(ASI_PRIMARY_LITTLE))
+#define LDUW_LE (LDUWA | INSN_ASI(ASI_PRIMARY_LITTLE))
+#define LDSW_LE (LDSWA | INSN_ASI(ASI_PRIMARY_LITTLE))
+#define LDX_LE (LDXA | INSN_ASI(ASI_PRIMARY_LITTLE))
+
+#define STH_LE (STHA | INSN_ASI(ASI_PRIMARY_LITTLE))
+#define STW_LE (STWA | INSN_ASI(ASI_PRIMARY_LITTLE))
+#define STX_LE (STXA | INSN_ASI(ASI_PRIMARY_LITTLE))
+
static inline void tcg_out_arith(TCGContext *s, int rd, int rs1, int rs2,
int op)
{
@@ -353,71 +378,50 @@ static inline void tcg_out_movi(TCGContext *s, TCGType type,
tcg_out_sethi(s, ret, ~arg);
tcg_out_arithi(s, ret, ret, (arg & 0x3ff) | -0x400, ARITH_XOR);
} else {
- tcg_out_movi_imm32(s, TCG_REG_I4, arg >> (TCG_TARGET_REG_BITS / 2));
- tcg_out_arithi(s, TCG_REG_I4, TCG_REG_I4, 32, SHIFT_SLLX);
- tcg_out_movi_imm32(s, ret, arg);
- tcg_out_arith(s, ret, ret, TCG_REG_I4, ARITH_OR);
+ tcg_out_movi_imm32(s, ret, arg >> (TCG_TARGET_REG_BITS / 2));
+ tcg_out_arithi(s, ret, ret, 32, SHIFT_SLLX);
+ tcg_out_movi_imm32(s, TCG_REG_T2, arg);
+ tcg_out_arith(s, ret, ret, TCG_REG_T2, ARITH_OR);
}
}
-static inline void tcg_out_ld_raw(TCGContext *s, int ret,
- tcg_target_long arg)
-{
- tcg_out_sethi(s, ret, arg);
- tcg_out32(s, LDUW | INSN_RD(ret) | INSN_RS1(ret) |
- INSN_IMM13(arg & 0x3ff));
-}
-
-static inline void tcg_out_ld_ptr(TCGContext *s, int ret,
- tcg_target_long arg)
+static inline void tcg_out_ldst_rr(TCGContext *s, int data, int a1,
+ int a2, int op)
{
- if (!check_fit_tl(arg, 10))
- tcg_out_movi(s, TCG_TYPE_PTR, ret, arg & ~0x3ffULL);
- if (TCG_TARGET_REG_BITS == 64) {
- tcg_out32(s, LDX | INSN_RD(ret) | INSN_RS1(ret) |
- INSN_IMM13(arg & 0x3ff));
- } else {
- tcg_out32(s, LDUW | INSN_RD(ret) | INSN_RS1(ret) |
- INSN_IMM13(arg & 0x3ff));
- }
+ tcg_out32(s, op | INSN_RD(data) | INSN_RS1(a1) | INSN_RS2(a2));
}
-static inline void tcg_out_ldst(TCGContext *s, int ret, int addr, int offset, int op)
+static inline void tcg_out_ldst(TCGContext *s, int ret, int addr,
+ int offset, int op)
{
- if (check_fit_tl(offset, 13))
+ if (check_fit_tl(offset, 13)) {
tcg_out32(s, op | INSN_RD(ret) | INSN_RS1(addr) |
INSN_IMM13(offset));
- else {
- tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_I5, offset);
- tcg_out32(s, op | INSN_RD(ret) | INSN_RS1(TCG_REG_I5) |
- INSN_RS2(addr));
+ } else {
+ tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_T1, offset);
+ tcg_out_ldst_rr(s, ret, addr, TCG_REG_T1, op);
}
}
-static inline void tcg_out_ldst_asi(TCGContext *s, int ret, int addr,
- int offset, int op, int asi)
-{
- tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_I5, offset);
- tcg_out32(s, op | INSN_RD(ret) | INSN_RS1(TCG_REG_I5) |
- INSN_ASI(asi) | INSN_RS2(addr));
-}
-
static inline void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret,
TCGReg arg1, tcg_target_long arg2)
{
- if (type == TCG_TYPE_I32)
- tcg_out_ldst(s, ret, arg1, arg2, LDUW);
- else
- tcg_out_ldst(s, ret, arg1, arg2, LDX);
+ tcg_out_ldst(s, ret, arg1, arg2, (type == TCG_TYPE_I32 ? LDUW : LDX));
}
static inline void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg,
TCGReg arg1, tcg_target_long arg2)
{
- if (type == TCG_TYPE_I32)
- tcg_out_ldst(s, arg, arg1, arg2, STW);
- else
- tcg_out_ldst(s, arg, arg1, arg2, STX);
+ tcg_out_ldst(s, arg, arg1, arg2, (type == TCG_TYPE_I32 ? STW : STX));
+}
+
+static inline void tcg_out_ld_ptr(TCGContext *s, int ret,
+ tcg_target_long arg)
+{
+ if (!check_fit_tl(arg, 10)) {
+ tcg_out_movi(s, TCG_TYPE_PTR, ret, arg & ~0x3ff);
+ }
+ tcg_out_ld(s, TCG_TYPE_PTR, ret, ret, arg & 0x3ff);
}
static inline void tcg_out_sety(TCGContext *s, int rs)
@@ -436,20 +440,21 @@ static inline void tcg_out_addi(TCGContext *s, int reg, tcg_target_long val)
if (check_fit_tl(val, 13))
tcg_out_arithi(s, reg, reg, val, ARITH_ADD);
else {
- tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_I5, val);
- tcg_out_arith(s, reg, reg, TCG_REG_I5, ARITH_ADD);
+ tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_T1, val);
+ tcg_out_arith(s, reg, reg, TCG_REG_T1, ARITH_ADD);
}
}
}
-static inline void tcg_out_andi(TCGContext *s, int reg, tcg_target_long val)
+static inline void tcg_out_andi(TCGContext *s, int rd, int rs,
+ tcg_target_long val)
{
if (val != 0) {
if (check_fit_tl(val, 13))
- tcg_out_arithi(s, reg, reg, val, ARITH_AND);
+ tcg_out_arithi(s, rd, rs, val, ARITH_AND);
else {
- tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_I5, val);
- tcg_out_arith(s, reg, reg, TCG_REG_I5, ARITH_AND);
+ tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_T1, val);
+ tcg_out_arith(s, rd, rs, TCG_REG_T1, ARITH_AND);
}
}
}
@@ -461,8 +466,8 @@ static void tcg_out_div32(TCGContext *s, int rd, int rs1,
if (uns) {
tcg_out_sety(s, TCG_REG_G0);
} else {
- tcg_out_arithi(s, TCG_REG_I5, rs1, 31, SHIFT_SRA);
- tcg_out_sety(s, TCG_REG_I5);
+ tcg_out_arithi(s, TCG_REG_T1, rs1, 31, SHIFT_SRA);
+ tcg_out_sety(s, TCG_REG_T1);
}
tcg_out_arithc(s, rd, rs1, val2, val2const,
@@ -477,30 +482,33 @@ static inline void tcg_out_nop(TCGContext *s)
static void tcg_out_branch_i32(TCGContext *s, int opc, int label_index)
{
TCGLabel *l = &s->labels[label_index];
+ uint32_t off22;
if (l->has_value) {
- tcg_out32(s, (INSN_OP(0) | INSN_COND(opc, 0) | INSN_OP2(0x2)
- | INSN_OFF22(l->u.value - (unsigned long)s->code_ptr)));
+ off22 = INSN_OFF22(l->u.value - (unsigned long)s->code_ptr);
} else {
+ /* Make sure to preserve destinations during retranslation. */
+ off22 = *(uint32_t *)s->code_ptr & INSN_OFF22(-1);
tcg_out_reloc(s, s->code_ptr, R_SPARC_WDISP22, label_index, 0);
- tcg_out32(s, (INSN_OP(0) | INSN_COND(opc, 0) | INSN_OP2(0x2) | 0));
}
+ tcg_out32(s, INSN_OP(0) | INSN_COND(opc, 0) | INSN_OP2(0x2) | off22);
}
#if TCG_TARGET_REG_BITS == 64
static void tcg_out_branch_i64(TCGContext *s, int opc, int label_index)
{
TCGLabel *l = &s->labels[label_index];
+ uint32_t off19;
if (l->has_value) {
- tcg_out32(s, (INSN_OP(0) | INSN_COND(opc, 0) | INSN_OP2(0x1) |
- (0x5 << 19) |
- INSN_OFF19(l->u.value - (unsigned long)s->code_ptr)));
+ off19 = INSN_OFF19(l->u.value - (unsigned long)s->code_ptr);
} else {
+ /* Make sure to preserve destinations during retranslation. */
+ off19 = *(uint32_t *)s->code_ptr & INSN_OFF19(-1);
tcg_out_reloc(s, s->code_ptr, R_SPARC_WDISP19, label_index, 0);
- tcg_out32(s, (INSN_OP(0) | INSN_COND(opc, 0) | INSN_OP2(0x1) |
- (0x5 << 19) | 0));
}
+ tcg_out32(s, (INSN_OP(0) | INSN_COND(opc, 0) | INSN_OP2(0x1) |
+ (0x5 << 19) | off19));
}
#endif
@@ -608,8 +616,8 @@ static void tcg_out_setcond_i32(TCGContext *s, TCGCond cond, TCGArg ret,
case TCG_COND_GTU:
case TCG_COND_GEU:
if (c2const && c2 != 0) {
- tcg_out_movi_imm13(s, TCG_REG_I5, c2);
- c2 = TCG_REG_I5;
+ tcg_out_movi_imm13(s, TCG_REG_T1, c2);
+ c2 = TCG_REG_T1;
}
t = c1, c1 = c2, c2 = t, c2const = 0;
cond = tcg_swap_cond(cond);
@@ -621,18 +629,10 @@ static void tcg_out_setcond_i32(TCGContext *s, TCGCond cond, TCGArg ret,
default:
tcg_out_cmp(s, c1, c2, c2const);
-#if defined(__sparc_v9__) || defined(__sparc_v8plus__)
- tcg_out_movi_imm13(s, ret, 0);
- tcg_out32 (s, ARITH_MOVCC | INSN_RD(ret)
- | INSN_RS1(tcg_cond_to_bcond[cond])
- | MOVCC_ICC | INSN_IMM11(1));
-#else
- t = gen_new_label();
- tcg_out_branch_i32(s, INSN_COND(tcg_cond_to_bcond[cond], 1), t);
- tcg_out_movi_imm13(s, ret, 1);
tcg_out_movi_imm13(s, ret, 0);
- tcg_out_label(s, t, s->code_ptr);
-#endif
+ tcg_out32(s, ARITH_MOVCC | INSN_RD(ret)
+ | INSN_RS1(tcg_cond_to_bcond[cond])
+ | MOVCC_ICC | INSN_IMM11(1));
return;
}
@@ -664,15 +664,15 @@ static void tcg_out_setcond2_i32(TCGContext *s, TCGCond cond, TCGArg ret,
switch (cond) {
case TCG_COND_EQ:
- tcg_out_setcond_i32(s, TCG_COND_EQ, TCG_REG_I5, al, bl, blconst);
+ tcg_out_setcond_i32(s, TCG_COND_EQ, TCG_REG_T1, al, bl, blconst);
tcg_out_setcond_i32(s, TCG_COND_EQ, ret, ah, bh, bhconst);
- tcg_out_arith(s, ret, ret, TCG_REG_I5, ARITH_AND);
+ tcg_out_arith(s, ret, ret, TCG_REG_T1, ARITH_AND);
break;
case TCG_COND_NE:
- tcg_out_setcond_i32(s, TCG_COND_NE, TCG_REG_I5, al, al, blconst);
+ tcg_out_setcond_i32(s, TCG_COND_NE, TCG_REG_T1, al, al, blconst);
tcg_out_setcond_i32(s, TCG_COND_NE, ret, ah, bh, bhconst);
- tcg_out_arith(s, ret, ret, TCG_REG_I5, ARITH_OR);
+ tcg_out_arith(s, ret, ret, TCG_REG_T1, ARITH_OR);
break;
default:
@@ -695,14 +695,36 @@ static void tcg_out_setcond2_i32(TCGContext *s, TCGCond cond, TCGArg ret,
/* Generate global QEMU prologue and epilogue code */
static void tcg_target_qemu_prologue(TCGContext *s)
{
- tcg_set_frame(s, TCG_REG_I6, TCG_TARGET_CALL_STACK_OFFSET,
- CPU_TEMP_BUF_NLONGS * (int)sizeof(long));
+ int tmp_buf_size, frame_size;
+
+ /* The TCG temp buffer is at the top of the frame, immediately
+ below the frame pointer. */
+ tmp_buf_size = CPU_TEMP_BUF_NLONGS * (int)sizeof(long);
+ tcg_set_frame(s, TCG_REG_I6, TCG_TARGET_STACK_BIAS - tmp_buf_size,
+ tmp_buf_size);
+
+ /* TCG_TARGET_CALL_STACK_OFFSET includes the stack bias, but is
+ otherwise the minimal frame usable by callees. */
+ frame_size = TCG_TARGET_CALL_STACK_OFFSET - TCG_TARGET_STACK_BIAS;
+ frame_size += TCG_STATIC_CALL_ARGS_SIZE + tmp_buf_size;
+ frame_size += TCG_TARGET_STACK_ALIGN - 1;
+ frame_size &= -TCG_TARGET_STACK_ALIGN;
tcg_out32(s, SAVE | INSN_RD(TCG_REG_O6) | INSN_RS1(TCG_REG_O6) |
- INSN_IMM13(-(TCG_TARGET_STACK_MINFRAME +
- CPU_TEMP_BUF_NLONGS * (int)sizeof(long))));
+ INSN_IMM13(-frame_size));
+
+#ifdef CONFIG_USE_GUEST_BASE
+ if (GUEST_BASE != 0) {
+ tcg_out_movi(s, TCG_TYPE_PTR, TCG_GUEST_BASE_REG, GUEST_BASE);
+ tcg_regset_set_reg(s->reserved_regs, TCG_GUEST_BASE_REG);
+ }
+#endif
+
tcg_out32(s, JMPL | INSN_RD(TCG_REG_G0) | INSN_RS1(TCG_REG_I1) |
INSN_RS2(TCG_REG_G0));
- tcg_out_mov(s, TCG_TYPE_PTR, TCG_AREG0, TCG_REG_I0);
+ /* delay slot */
+ tcg_out_nop(s);
+
+ /* No epilogue required. We issue ret + restore directly in the TB. */
}
#if defined(CONFIG_SOFTMMU)
@@ -726,418 +748,309 @@ static const void * const qemu_st_helpers[4] = {
helper_stl_mmu,
helper_stq_mmu,
};
-#endif
-#if TARGET_LONG_BITS == 32
-#define TARGET_LD_OP LDUW
-#else
-#define TARGET_LD_OP LDX
-#endif
+/* Perform the TLB load and compare.
-#if defined(CONFIG_SOFTMMU)
-#if HOST_LONG_BITS == 32
-#define TARGET_ADDEND_LD_OP LDUW
+ Inputs:
+ ADDRLO_IDX contains the index into ARGS of the low part of the
+ address; the high part of the address is at ADDR_LOW_IDX+1.
+
+ MEM_INDEX and S_BITS are the memory context and log2 size of the load.
+
+ WHICH is the offset into the CPUTLBEntry structure of the slot to read.
+ This should be offsetof addr_read or addr_write.
+
+ The result of the TLB comparison is in %[ix]cc. The sanitized address
+ is in the returned register, maybe %o0. The TLB addend is in %o1. */
+
+static int tcg_out_tlb_load(TCGContext *s, int addrlo_idx, int mem_index,
+ int s_bits, const TCGArg *args, int which)
+{
+ const int addrlo = args[addrlo_idx];
+ const int r0 = TCG_REG_O0;
+ const int r1 = TCG_REG_O1;
+ const int r2 = TCG_REG_O2;
+ int addr = addrlo;
+ int tlb_ofs;
+
+ if (TCG_TARGET_REG_BITS == 32 && TARGET_LONG_BITS == 64) {
+ /* Assemble the 64-bit address in R0. */
+ tcg_out_arithi(s, r0, addrlo, 0, SHIFT_SRL);
+ tcg_out_arithi(s, r1, args[addrlo_idx + 1], 32, SHIFT_SLLX);
+ tcg_out_arith(s, r0, r0, r1, ARITH_OR);
+ }
+
+ /* Shift the page number down to tlb-entry. */
+ tcg_out_arithi(s, r1, addrlo,
+ TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS, SHIFT_SRL);
+
+ /* Mask out the page offset, except for the required alignment. */
+ tcg_out_andi(s, r0, addr, TARGET_PAGE_MASK | ((1 << s_bits) - 1));
+
+ /* Compute tlb index, modulo tlb size. */
+ tcg_out_andi(s, r1, r1, (CPU_TLB_SIZE - 1) << CPU_TLB_ENTRY_BITS);
+
+ /* Relative to the current ENV. */
+ tcg_out_arith(s, r1, TCG_AREG0, r1, ARITH_ADD);
+
+ /* Find a base address that can load both tlb comparator and addend. */
+ tlb_ofs = offsetof(CPUArchState, tlb_table[mem_index][0]);
+ if (!check_fit_tl(tlb_ofs + sizeof(CPUTLBEntry), 13)) {
+ tcg_out_addi(s, r1, tlb_ofs);
+ tlb_ofs = 0;
+ }
+
+ /* Load the tlb comparator and the addend. */
+ tcg_out_ld(s, TCG_TYPE_TL, r2, r1, tlb_ofs + which);
+ tcg_out_ld(s, TCG_TYPE_PTR, r1, r1, tlb_ofs+offsetof(CPUTLBEntry, addend));
+
+ /* subcc arg0, arg2, %g0 */
+ tcg_out_cmp(s, r0, r2, 0);
+
+ /* If the guest address must be zero-extended, do so now. */
+ if (TCG_TARGET_REG_BITS == 64 && TARGET_LONG_BITS == 32) {
+ tcg_out_arithi(s, r0, addrlo, 0, SHIFT_SRL);
+ return r0;
+ }
+ return addrlo;
+}
+#endif /* CONFIG_SOFTMMU */
+
+static const int qemu_ld_opc[8] = {
+#ifdef TARGET_WORDS_BIGENDIAN
+ LDUB, LDUH, LDUW, LDX, LDSB, LDSH, LDSW, LDX
#else
-#define TARGET_ADDEND_LD_OP LDX
-#endif
+ LDUB, LDUH_LE, LDUW_LE, LDX_LE, LDSB, LDSH_LE, LDSW_LE, LDX_LE
#endif
+};
-#ifdef __arch64__
-#define HOST_LD_OP LDX
-#define HOST_ST_OP STX
-#define HOST_SLL_OP SHIFT_SLLX
-#define HOST_SRA_OP SHIFT_SRAX
+static const int qemu_st_opc[4] = {
+#ifdef TARGET_WORDS_BIGENDIAN
+ STB, STH, STW, STX
#else
-#define HOST_LD_OP LDUW
-#define HOST_ST_OP STW
-#define HOST_SLL_OP SHIFT_SLL
-#define HOST_SRA_OP SHIFT_SRA
+ STB, STH_LE, STW_LE, STX_LE
#endif
+};
-static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args,
- int opc)
+static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, int sizeop)
{
- int addr_reg, data_reg, arg0, arg1, arg2, mem_index, s_bits;
+ int addrlo_idx = 1, datalo, datahi, addr_reg;
#if defined(CONFIG_SOFTMMU)
- uint32_t *label1_ptr, *label2_ptr;
+ int memi_idx, memi, s_bits, n;
+ uint32_t *label_ptr[2];
#endif
- data_reg = *args++;
- addr_reg = *args++;
- mem_index = *args;
- s_bits = opc & 3;
-
- arg0 = TCG_REG_O0;
- arg1 = TCG_REG_O1;
- arg2 = TCG_REG_O2;
+ datahi = datalo = args[0];
+ if (TCG_TARGET_REG_BITS == 32 && sizeop == 3) {
+ datahi = args[1];
+ addrlo_idx = 2;
+ }
#if defined(CONFIG_SOFTMMU)
- /* srl addr_reg, x, arg1 */
- tcg_out_arithi(s, arg1, addr_reg, TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS,
- SHIFT_SRL);
- /* and addr_reg, x, arg0 */
- tcg_out_arithi(s, arg0, addr_reg, TARGET_PAGE_MASK | ((1 << s_bits) - 1),
- ARITH_AND);
-
- /* and arg1, x, arg1 */
- tcg_out_andi(s, arg1, (CPU_TLB_SIZE - 1) << CPU_TLB_ENTRY_BITS);
+ memi_idx = addrlo_idx + 1 + (TARGET_LONG_BITS > TCG_TARGET_REG_BITS);
+ memi = args[memi_idx];
+ s_bits = sizeop & 3;
+
+ addr_reg = tcg_out_tlb_load(s, addrlo_idx, memi, s_bits, args,
+ offsetof(CPUTLBEntry, addr_read));
+
+ if (TCG_TARGET_REG_BITS == 32 && sizeop == 3) {
+ int reg64;
+
+ /* bne,pn %[xi]cc, label0 */
+ label_ptr[0] = (uint32_t *)s->code_ptr;
+ tcg_out32(s, (INSN_OP(0) | INSN_COND(COND_NE, 0) | INSN_OP2(0x1)
+ | ((TARGET_LONG_BITS == 64) << 21)));
+
+ /* TLB Hit. */
+ /* Load all 64-bits into an O/G register. */
+ reg64 = (datalo < 16 ? datalo : TCG_REG_O0);
+ tcg_out_ldst_rr(s, reg64, addr_reg, TCG_REG_O1, qemu_ld_opc[sizeop]);
+
+ /* Move the two 32-bit pieces into the destination registers. */
+ tcg_out_arithi(s, datahi, reg64, 32, SHIFT_SRLX);
+ if (reg64 != datalo) {
+ tcg_out_mov(s, TCG_TYPE_I32, datalo, reg64);
+ }
- /* add arg1, x, arg1 */
- tcg_out_addi(s, arg1, offsetof(CPUArchState,
- tlb_table[mem_index][0].addr_read));
+ /* b,a,pt label1 */
+ label_ptr[1] = (uint32_t *)s->code_ptr;
+ tcg_out32(s, (INSN_OP(0) | INSN_COND(COND_A, 0) | INSN_OP2(0x1)
+ | (1 << 29) | (1 << 19)));
+ } else {
+ /* The fast path is exactly one insn. Thus we can perform the
+ entire TLB Hit in the (annulled) delay slot of the branch
+ over the TLB Miss case. */
+
+ /* beq,a,pt %[xi]cc, label0 */
+ label_ptr[0] = NULL;
+ label_ptr[1] = (uint32_t *)s->code_ptr;
+ tcg_out32(s, (INSN_OP(0) | INSN_COND(COND_E, 0) | INSN_OP2(0x1)
+ | ((TARGET_LONG_BITS == 64) << 21)
+ | (1 << 29) | (1 << 19)));
+ /* delay slot */
+ tcg_out_ldst_rr(s, datalo, addr_reg, TCG_REG_O1, qemu_ld_opc[sizeop]);
+ }
- /* add env, arg1, arg1 */
- tcg_out_arith(s, arg1, TCG_AREG0, arg1, ARITH_ADD);
+ /* TLB Miss. */
- /* ld [arg1], arg2 */
- tcg_out32(s, TARGET_LD_OP | INSN_RD(arg2) | INSN_RS1(arg1) |
- INSN_RS2(TCG_REG_G0));
+ if (label_ptr[0]) {
+ *label_ptr[0] |= INSN_OFF19((unsigned long)s->code_ptr -
+ (unsigned long)label_ptr[0]);
+ }
+ n = 0;
+ tcg_out_mov(s, TCG_TYPE_PTR, tcg_target_call_iarg_regs[n++], TCG_AREG0);
+ if (TARGET_LONG_BITS > TCG_TARGET_REG_BITS) {
+ tcg_out_mov(s, TCG_TYPE_REG, tcg_target_call_iarg_regs[n++],
+ args[addrlo_idx + 1]);
+ }
+ tcg_out_mov(s, TCG_TYPE_REG, tcg_target_call_iarg_regs[n++],
+ args[addrlo_idx]);
- /* subcc arg0, arg2, %g0 */
- tcg_out_arith(s, TCG_REG_G0, arg0, arg2, ARITH_SUBCC);
-
- /* will become:
- be label1
- or
- be,pt %xcc label1 */
- label1_ptr = (uint32_t *)s->code_ptr;
- tcg_out32(s, 0);
-
- /* mov (delay slot) */
- tcg_out_mov(s, TCG_TYPE_PTR, arg0, addr_reg);
-
- /* mov */
- tcg_out_movi(s, TCG_TYPE_I32, arg1, mem_index);
- /* XXX/FIXME: suboptimal */
- tcg_out_mov(s, TCG_TYPE_I32, tcg_target_call_iarg_regs[3],
- tcg_target_call_iarg_regs[2]);
- tcg_out_mov(s, TCG_TYPE_I64, tcg_target_call_iarg_regs[2],
- tcg_target_call_iarg_regs[1]);
- tcg_out_mov(s, TCG_TYPE_TL, tcg_target_call_iarg_regs[1],
- tcg_target_call_iarg_regs[0]);
- tcg_out_mov(s, TCG_TYPE_PTR, tcg_target_call_iarg_regs[0],
- TCG_AREG0);
-
- /* XXX: move that code at the end of the TB */
/* qemu_ld_helper[s_bits](arg0, arg1) */
tcg_out32(s, CALL | ((((tcg_target_ulong)qemu_ld_helpers[s_bits]
- (tcg_target_ulong)s->code_ptr) >> 2)
& 0x3fffffff));
- /* Store AREG0 in stack to avoid ugly glibc bugs that mangle
- global registers */
- // delay slot
- tcg_out_ldst(s, TCG_AREG0, TCG_REG_CALL_STACK,
- TCG_TARGET_CALL_STACK_OFFSET - TCG_STATIC_CALL_ARGS_SIZE -
- sizeof(long), HOST_ST_OP);
- tcg_out_ldst(s, TCG_AREG0, TCG_REG_CALL_STACK,
- TCG_TARGET_CALL_STACK_OFFSET - TCG_STATIC_CALL_ARGS_SIZE -
- sizeof(long), HOST_LD_OP);
-
- /* data_reg = sign_extend(arg0) */
- switch(opc) {
+ /* delay slot */
+ tcg_out_movi(s, TCG_TYPE_I32, tcg_target_call_iarg_regs[n], memi);
+
+ n = tcg_target_call_oarg_regs[0];
+ /* datalo = sign_extend(arg0) */
+ switch (sizeop) {
case 0 | 4:
- /* sll arg0, 24/56, data_reg */
- tcg_out_arithi(s, data_reg, arg0, (int)sizeof(tcg_target_long) * 8 - 8,
- HOST_SLL_OP);
- /* sra data_reg, 24/56, data_reg */
- tcg_out_arithi(s, data_reg, data_reg,
- (int)sizeof(tcg_target_long) * 8 - 8, HOST_SRA_OP);
+ /* Recall that SRA sign extends from bit 31 through bit 63. */
+ tcg_out_arithi(s, datalo, n, 24, SHIFT_SLL);
+ tcg_out_arithi(s, datalo, datalo, 24, SHIFT_SRA);
break;
case 1 | 4:
- /* sll arg0, 16/48, data_reg */
- tcg_out_arithi(s, data_reg, arg0,
- (int)sizeof(tcg_target_long) * 8 - 16, HOST_SLL_OP);
- /* sra data_reg, 16/48, data_reg */
- tcg_out_arithi(s, data_reg, data_reg,
- (int)sizeof(tcg_target_long) * 8 - 16, HOST_SRA_OP);
+ tcg_out_arithi(s, datalo, n, 16, SHIFT_SLL);
+ tcg_out_arithi(s, datalo, datalo, 16, SHIFT_SRA);
break;
case 2 | 4:
- /* sll arg0, 32, data_reg */
- tcg_out_arithi(s, data_reg, arg0, 32, HOST_SLL_OP);
- /* sra data_reg, 32, data_reg */
- tcg_out_arithi(s, data_reg, data_reg, 32, HOST_SRA_OP);
+ tcg_out_arithi(s, datalo, n, 0, SHIFT_SRA);
break;
+ case 3:
+ if (TCG_TARGET_REG_BITS == 32) {
+ tcg_out_mov(s, TCG_TYPE_REG, datahi, n);
+ tcg_out_mov(s, TCG_TYPE_REG, datalo, n + 1);
+ break;
+ }
+ /* FALLTHRU */
case 0:
case 1:
case 2:
- case 3:
default:
/* mov */
- tcg_out_mov(s, TCG_TYPE_REG, data_reg, arg0);
+ tcg_out_mov(s, TCG_TYPE_REG, datalo, n);
break;
}
- /* will become:
- ba label2 */
- label2_ptr = (uint32_t *)s->code_ptr;
- tcg_out32(s, 0);
-
- /* nop (delay slot */
- tcg_out_nop(s);
-
- /* label1: */
-#if TARGET_LONG_BITS == 32
- /* be label1 */
- *label1_ptr = (INSN_OP(0) | INSN_COND(COND_E, 0) | INSN_OP2(0x2) |
- INSN_OFF22((unsigned long)s->code_ptr -
- (unsigned long)label1_ptr));
-#else
- /* be,pt %xcc label1 */
- *label1_ptr = (INSN_OP(0) | INSN_COND(COND_E, 0) | INSN_OP2(0x1) |
- (0x5 << 19) | INSN_OFF19((unsigned long)s->code_ptr -
- (unsigned long)label1_ptr));
-#endif
-
- /* ld [arg1 + x], arg1 */
- tcg_out_ldst(s, arg1, arg1, offsetof(CPUTLBEntry, addend) -
- offsetof(CPUTLBEntry, addr_read), TARGET_ADDEND_LD_OP);
-
-#if TARGET_LONG_BITS == 32
- /* and addr_reg, x, arg0 */
- tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_I5, 0xffffffff);
- tcg_out_arith(s, arg0, addr_reg, TCG_REG_I5, ARITH_AND);
- /* add arg0, arg1, arg0 */
- tcg_out_arith(s, arg0, arg0, arg1, ARITH_ADD);
+ *label_ptr[1] |= INSN_OFF19((unsigned long)s->code_ptr -
+ (unsigned long)label_ptr[1]);
#else
- /* add addr_reg, arg1, arg0 */
- tcg_out_arith(s, arg0, addr_reg, arg1, ARITH_ADD);
-#endif
+ addr_reg = args[addrlo_idx];
+ if (TCG_TARGET_REG_BITS == 64 && TARGET_LONG_BITS == 32) {
+ tcg_out_arithi(s, TCG_REG_T1, addr_reg, 0, SHIFT_SRL);
+ addr_reg = TCG_REG_T1;
+ }
+ if (TCG_TARGET_REG_BITS == 32 && sizeop == 3) {
+ int reg64 = (datalo < 16 ? datalo : TCG_REG_O0);
-#else
- arg0 = addr_reg;
-#endif
+ tcg_out_ldst_rr(s, reg64, addr_reg,
+ (GUEST_BASE ? TCG_GUEST_BASE_REG : TCG_REG_G0),
+ qemu_ld_opc[sizeop]);
- switch(opc) {
- case 0:
- /* ldub [arg0], data_reg */
- tcg_out_ldst(s, data_reg, arg0, 0, LDUB);
- break;
- case 0 | 4:
- /* ldsb [arg0], data_reg */
- tcg_out_ldst(s, data_reg, arg0, 0, LDSB);
- break;
- case 1:
-#ifdef TARGET_WORDS_BIGENDIAN
- /* lduh [arg0], data_reg */
- tcg_out_ldst(s, data_reg, arg0, 0, LDUH);
-#else
- /* lduha [arg0] ASI_PRIMARY_LITTLE, data_reg */
- tcg_out_ldst_asi(s, data_reg, arg0, 0, LDUHA, ASI_PRIMARY_LITTLE);
-#endif
- break;
- case 1 | 4:
-#ifdef TARGET_WORDS_BIGENDIAN
- /* ldsh [arg0], data_reg */
- tcg_out_ldst(s, data_reg, arg0, 0, LDSH);
-#else
- /* ldsha [arg0] ASI_PRIMARY_LITTLE, data_reg */
- tcg_out_ldst_asi(s, data_reg, arg0, 0, LDSHA, ASI_PRIMARY_LITTLE);
-#endif
- break;
- case 2:
-#ifdef TARGET_WORDS_BIGENDIAN
- /* lduw [arg0], data_reg */
- tcg_out_ldst(s, data_reg, arg0, 0, LDUW);
-#else
- /* lduwa [arg0] ASI_PRIMARY_LITTLE, data_reg */
- tcg_out_ldst_asi(s, data_reg, arg0, 0, LDUWA, ASI_PRIMARY_LITTLE);
-#endif
- break;
- case 2 | 4:
-#ifdef TARGET_WORDS_BIGENDIAN
- /* ldsw [arg0], data_reg */
- tcg_out_ldst(s, data_reg, arg0, 0, LDSW);
-#else
- /* ldswa [arg0] ASI_PRIMARY_LITTLE, data_reg */
- tcg_out_ldst_asi(s, data_reg, arg0, 0, LDSWA, ASI_PRIMARY_LITTLE);
-#endif
- break;
- case 3:
-#ifdef TARGET_WORDS_BIGENDIAN
- /* ldx [arg0], data_reg */
- tcg_out_ldst(s, data_reg, arg0, 0, LDX);
-#else
- /* ldxa [arg0] ASI_PRIMARY_LITTLE, data_reg */
- tcg_out_ldst_asi(s, data_reg, arg0, 0, LDXA, ASI_PRIMARY_LITTLE);
-#endif
- break;
- default:
- tcg_abort();
+ tcg_out_arithi(s, datahi, reg64, 32, SHIFT_SRLX);
+ if (reg64 != datalo) {
+ tcg_out_mov(s, TCG_TYPE_I32, datalo, reg64);
+ }
+ } else {
+ tcg_out_ldst_rr(s, datalo, addr_reg,
+ (GUEST_BASE ? TCG_GUEST_BASE_REG : TCG_REG_G0),
+ qemu_ld_opc[sizeop]);
}
-
-#if defined(CONFIG_SOFTMMU)
- /* label2: */
- *label2_ptr = (INSN_OP(0) | INSN_COND(COND_A, 0) | INSN_OP2(0x2) |
- INSN_OFF22((unsigned long)s->code_ptr -
- (unsigned long)label2_ptr));
-#endif
+#endif /* CONFIG_SOFTMMU */
}
-static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args,
- int opc)
+static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, int sizeop)
{
- int addr_reg, data_reg, arg0, arg1, arg2, mem_index, s_bits;
+ int addrlo_idx = 1, datalo, datahi, addr_reg;
#if defined(CONFIG_SOFTMMU)
- uint32_t *label1_ptr, *label2_ptr;
+ int memi_idx, memi, n;
+ uint32_t *label_ptr;
#endif
- data_reg = *args++;
- addr_reg = *args++;
- mem_index = *args;
-
- s_bits = opc;
-
- arg0 = TCG_REG_O0;
- arg1 = TCG_REG_O1;
- arg2 = TCG_REG_O2;
+ datahi = datalo = args[0];
+ if (TCG_TARGET_REG_BITS == 32 && sizeop == 3) {
+ datahi = args[1];
+ addrlo_idx = 2;
+ }
#if defined(CONFIG_SOFTMMU)
- /* srl addr_reg, x, arg1 */
- tcg_out_arithi(s, arg1, addr_reg, TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS,
- SHIFT_SRL);
-
- /* and addr_reg, x, arg0 */
- tcg_out_arithi(s, arg0, addr_reg, TARGET_PAGE_MASK | ((1 << s_bits) - 1),
- ARITH_AND);
-
- /* and arg1, x, arg1 */
- tcg_out_andi(s, arg1, (CPU_TLB_SIZE - 1) << CPU_TLB_ENTRY_BITS);
-
- /* add arg1, x, arg1 */
- tcg_out_addi(s, arg1, offsetof(CPUArchState,
- tlb_table[mem_index][0].addr_write));
-
- /* add env, arg1, arg1 */
- tcg_out_arith(s, arg1, TCG_AREG0, arg1, ARITH_ADD);
+ memi_idx = addrlo_idx + 1 + (TARGET_LONG_BITS > TCG_TARGET_REG_BITS);
+ memi = args[memi_idx];
+
+ addr_reg = tcg_out_tlb_load(s, addrlo_idx, memi, sizeop, args,
+ offsetof(CPUTLBEntry, addr_write));
+
+ if (TCG_TARGET_REG_BITS == 32 && sizeop == 3) {
+ /* Reconstruct the full 64-bit value. */
+ tcg_out_arithi(s, TCG_REG_T1, datalo, 0, SHIFT_SRL);
+ tcg_out_arithi(s, TCG_REG_O2, datahi, 32, SHIFT_SLLX);
+ tcg_out_arith(s, TCG_REG_O2, TCG_REG_T1, TCG_REG_O2, ARITH_OR);
+ datalo = TCG_REG_O2;
+ }
- /* ld [arg1], arg2 */
- tcg_out32(s, TARGET_LD_OP | INSN_RD(arg2) | INSN_RS1(arg1) |
- INSN_RS2(TCG_REG_G0));
+ /* The fast path is exactly one insn. Thus we can perform the entire
+ TLB Hit in the (annulled) delay slot of the branch over TLB Miss. */
+ /* beq,a,pt %[xi]cc, label0 */
+ label_ptr = (uint32_t *)s->code_ptr;
+ tcg_out32(s, (INSN_OP(0) | INSN_COND(COND_E, 0) | INSN_OP2(0x1)
+ | ((TARGET_LONG_BITS == 64) << 21)
+ | (1 << 29) | (1 << 19)));
+ /* delay slot */
+ tcg_out_ldst_rr(s, datalo, addr_reg, TCG_REG_O1, qemu_st_opc[sizeop]);
+
+ /* TLB Miss. */
+
+ n = 0;
+ tcg_out_mov(s, TCG_TYPE_PTR, tcg_target_call_iarg_regs[n++], TCG_AREG0);
+ if (TARGET_LONG_BITS > TCG_TARGET_REG_BITS) {
+ tcg_out_mov(s, TCG_TYPE_REG, tcg_target_call_iarg_regs[n++],
+ args[addrlo_idx + 1]);
+ }
+ tcg_out_mov(s, TCG_TYPE_REG, tcg_target_call_iarg_regs[n++],
+ args[addrlo_idx]);
+ if (TCG_TARGET_REG_BITS == 32 && sizeop == 3) {
+ tcg_out_mov(s, TCG_TYPE_REG, tcg_target_call_iarg_regs[n++], datahi);
+ }
+ tcg_out_mov(s, TCG_TYPE_REG, tcg_target_call_iarg_regs[n++], datalo);
- /* subcc arg0, arg2, %g0 */
- tcg_out_arith(s, TCG_REG_G0, arg0, arg2, ARITH_SUBCC);
-
- /* will become:
- be label1
- or
- be,pt %xcc label1 */
- label1_ptr = (uint32_t *)s->code_ptr;
- tcg_out32(s, 0);
-
- /* mov (delay slot) */
- tcg_out_mov(s, TCG_TYPE_PTR, arg0, addr_reg);
-
- /* mov */
- tcg_out_mov(s, TCG_TYPE_REG, arg1, data_reg);
-
- /* mov */
- tcg_out_movi(s, TCG_TYPE_I32, arg2, mem_index);
-
- /* XXX/FIXME: suboptimal */
- tcg_out_mov(s, TCG_TYPE_I32, tcg_target_call_iarg_regs[3],
- tcg_target_call_iarg_regs[2]);
- tcg_out_mov(s, TCG_TYPE_I64, tcg_target_call_iarg_regs[2],
- tcg_target_call_iarg_regs[1]);
- tcg_out_mov(s, TCG_TYPE_TL, tcg_target_call_iarg_regs[1],
- tcg_target_call_iarg_regs[0]);
- tcg_out_mov(s, TCG_TYPE_PTR, tcg_target_call_iarg_regs[0],
- TCG_AREG0);
- /* XXX: move that code at the end of the TB */
/* qemu_st_helper[s_bits](arg0, arg1, arg2) */
- tcg_out32(s, CALL | ((((tcg_target_ulong)qemu_st_helpers[s_bits]
+ tcg_out32(s, CALL | ((((tcg_target_ulong)qemu_st_helpers[sizeop]
- (tcg_target_ulong)s->code_ptr) >> 2)
& 0x3fffffff));
- /* Store AREG0 in stack to avoid ugly glibc bugs that mangle
- global registers */
- // delay slot
- tcg_out_ldst(s, TCG_AREG0, TCG_REG_CALL_STACK,
- TCG_TARGET_CALL_STACK_OFFSET - TCG_STATIC_CALL_ARGS_SIZE -
- sizeof(long), HOST_ST_OP);
- tcg_out_ldst(s, TCG_AREG0, TCG_REG_CALL_STACK,
- TCG_TARGET_CALL_STACK_OFFSET - TCG_STATIC_CALL_ARGS_SIZE -
- sizeof(long), HOST_LD_OP);
-
- /* will become:
- ba label2 */
- label2_ptr = (uint32_t *)s->code_ptr;
- tcg_out32(s, 0);
-
- /* nop (delay slot) */
- tcg_out_nop(s);
-
-#if TARGET_LONG_BITS == 32
- /* be label1 */
- *label1_ptr = (INSN_OP(0) | INSN_COND(COND_E, 0) | INSN_OP2(0x2) |
- INSN_OFF22((unsigned long)s->code_ptr -
- (unsigned long)label1_ptr));
-#else
- /* be,pt %xcc label1 */
- *label1_ptr = (INSN_OP(0) | INSN_COND(COND_E, 0) | INSN_OP2(0x1) |
- (0x5 << 19) | INSN_OFF19((unsigned long)s->code_ptr -
- (unsigned long)label1_ptr));
-#endif
-
- /* ld [arg1 + x], arg1 */
- tcg_out_ldst(s, arg1, arg1, offsetof(CPUTLBEntry, addend) -
- offsetof(CPUTLBEntry, addr_write), TARGET_ADDEND_LD_OP);
-
-#if TARGET_LONG_BITS == 32
- /* and addr_reg, x, arg0 */
- tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_I5, 0xffffffff);
- tcg_out_arith(s, arg0, addr_reg, TCG_REG_I5, ARITH_AND);
- /* add arg0, arg1, arg0 */
- tcg_out_arith(s, arg0, arg0, arg1, ARITH_ADD);
-#else
- /* add addr_reg, arg1, arg0 */
- tcg_out_arith(s, arg0, addr_reg, arg1, ARITH_ADD);
-#endif
+ /* delay slot */
+ tcg_out_movi(s, TCG_TYPE_REG, tcg_target_call_iarg_regs[n], memi);
+ *label_ptr |= INSN_OFF19((unsigned long)s->code_ptr -
+ (unsigned long)label_ptr);
#else
- arg0 = addr_reg;
-#endif
-
- switch(opc) {
- case 0:
- /* stb data_reg, [arg0] */
- tcg_out_ldst(s, data_reg, arg0, 0, STB);
- break;
- case 1:
-#ifdef TARGET_WORDS_BIGENDIAN
- /* sth data_reg, [arg0] */
- tcg_out_ldst(s, data_reg, arg0, 0, STH);
-#else
- /* stha data_reg, [arg0] ASI_PRIMARY_LITTLE */
- tcg_out_ldst_asi(s, data_reg, arg0, 0, STHA, ASI_PRIMARY_LITTLE);
-#endif
- break;
- case 2:
-#ifdef TARGET_WORDS_BIGENDIAN
- /* stw data_reg, [arg0] */
- tcg_out_ldst(s, data_reg, arg0, 0, STW);
-#else
- /* stwa data_reg, [arg0] ASI_PRIMARY_LITTLE */
- tcg_out_ldst_asi(s, data_reg, arg0, 0, STWA, ASI_PRIMARY_LITTLE);
-#endif
- break;
- case 3:
-#ifdef TARGET_WORDS_BIGENDIAN
- /* stx data_reg, [arg0] */
- tcg_out_ldst(s, data_reg, arg0, 0, STX);
-#else
- /* stxa data_reg, [arg0] ASI_PRIMARY_LITTLE */
- tcg_out_ldst_asi(s, data_reg, arg0, 0, STXA, ASI_PRIMARY_LITTLE);
-#endif
- break;
- default:
- tcg_abort();
+ addr_reg = args[addrlo_idx];
+ if (TCG_TARGET_REG_BITS == 64 && TARGET_LONG_BITS == 32) {
+ tcg_out_arithi(s, TCG_REG_T1, addr_reg, 0, SHIFT_SRL);
+ addr_reg = TCG_REG_T1;
}
-
-#if defined(CONFIG_SOFTMMU)
- /* label2: */
- *label2_ptr = (INSN_OP(0) | INSN_COND(COND_A, 0) | INSN_OP2(0x2) |
- INSN_OFF22((unsigned long)s->code_ptr -
- (unsigned long)label2_ptr));
-#endif
+ if (TCG_TARGET_REG_BITS == 32 && sizeop == 3) {
+ tcg_out_arithi(s, TCG_REG_T1, datalo, 0, SHIFT_SRL);
+ tcg_out_arithi(s, TCG_REG_O2, datahi, 32, SHIFT_SLLX);
+ tcg_out_arith(s, TCG_REG_O2, TCG_REG_T1, TCG_REG_O2, ARITH_OR);
+ datalo = TCG_REG_O2;
+ }
+ tcg_out_ldst_rr(s, datalo, addr_reg,
+ (GUEST_BASE ? TCG_GUEST_BASE_REG : TCG_REG_G0),
+ qemu_st_opc[sizeop]);
+#endif /* CONFIG_SOFTMMU */
}
static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args,
@@ -1156,39 +1069,33 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args,
case INDEX_op_goto_tb:
if (s->tb_jmp_offset) {
/* direct jump method */
- tcg_out_sethi(s, TCG_REG_I5, args[0] & 0xffffe000);
- tcg_out32(s, JMPL | INSN_RD(TCG_REG_G0) | INSN_RS1(TCG_REG_I5) |
- INSN_IMM13((args[0] & 0x1fff)));
+ uint32_t old_insn = *(uint32_t *)s->code_ptr;
s->tb_jmp_offset[args[0]] = s->code_ptr - s->code_buf;
+ /* Make sure to preserve links during retranslation. */
+ tcg_out32(s, CALL | (old_insn & ~INSN_OP(-1)));
} else {
/* indirect jump method */
- tcg_out_ld_ptr(s, TCG_REG_I5, (tcg_target_long)(s->tb_next + args[0]));
- tcg_out32(s, JMPL | INSN_RD(TCG_REG_G0) | INSN_RS1(TCG_REG_I5) |
+ tcg_out_ld_ptr(s, TCG_REG_T1,
+ (tcg_target_long)(s->tb_next + args[0]));
+ tcg_out32(s, JMPL | INSN_RD(TCG_REG_G0) | INSN_RS1(TCG_REG_T1) |
INSN_RS2(TCG_REG_G0));
}
tcg_out_nop(s);
s->tb_next_offset[args[0]] = s->code_ptr - s->code_buf;
break;
case INDEX_op_call:
- if (const_args[0])
+ if (const_args[0]) {
tcg_out32(s, CALL | ((((tcg_target_ulong)args[0]
- (tcg_target_ulong)s->code_ptr) >> 2)
& 0x3fffffff));
- else {
- tcg_out_ld_ptr(s, TCG_REG_I5,
+ } else {
+ tcg_out_ld_ptr(s, TCG_REG_T1,
(tcg_target_long)(s->tb_next + args[0]));
- tcg_out32(s, JMPL | INSN_RD(TCG_REG_O7) | INSN_RS1(TCG_REG_I5) |
+ tcg_out32(s, JMPL | INSN_RD(TCG_REG_O7) | INSN_RS1(TCG_REG_T1) |
INSN_RS2(TCG_REG_G0));
}
- /* Store AREG0 in stack to avoid ugly glibc bugs that mangle
- global registers */
- // delay slot
- tcg_out_ldst(s, TCG_AREG0, TCG_REG_CALL_STACK,
- TCG_TARGET_CALL_STACK_OFFSET - TCG_STATIC_CALL_ARGS_SIZE -
- sizeof(long), HOST_ST_OP);
- tcg_out_ldst(s, TCG_AREG0, TCG_REG_CALL_STACK,
- TCG_TARGET_CALL_STACK_OFFSET - TCG_STATIC_CALL_ARGS_SIZE -
- sizeof(long), HOST_LD_OP);
+ /* delay slot */
+ tcg_out_nop(s);
break;
case INDEX_op_jmp:
case INDEX_op_br:
@@ -1260,13 +1167,16 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args,
goto gen_arith;
case INDEX_op_shl_i32:
c = SHIFT_SLL;
- goto gen_arith;
+ do_shift32:
+ /* Limit immediate shift count lest we create an illegal insn. */
+ tcg_out_arithc(s, args[0], args[1], args[2] & 31, const_args[2], c);
+ break;
case INDEX_op_shr_i32:
c = SHIFT_SRL;
- goto gen_arith;
+ goto do_shift32;
case INDEX_op_sar_i32:
c = SHIFT_SRA;
- goto gen_arith;
+ goto do_shift32;
case INDEX_op_mul_i32:
c = ARITH_UMUL;
goto gen_arith;
@@ -1287,11 +1197,11 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args,
case INDEX_op_rem_i32:
case INDEX_op_remu_i32:
- tcg_out_div32(s, TCG_REG_I5, args[1], args[2], const_args[2],
+ tcg_out_div32(s, TCG_REG_T1, args[1], args[2], const_args[2],
opc == INDEX_op_remu_i32);
- tcg_out_arithc(s, TCG_REG_I5, TCG_REG_I5, args[2], const_args[2],
+ tcg_out_arithc(s, TCG_REG_T1, TCG_REG_T1, args[2], const_args[2],
ARITH_UMUL);
- tcg_out_arith(s, args[0], args[1], TCG_REG_I5, ARITH_SUB);
+ tcg_out_arith(s, args[0], args[1], TCG_REG_T1, ARITH_SUB);
break;
case INDEX_op_brcond_i32:
@@ -1356,6 +1266,9 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args,
tcg_out_qemu_ld(s, args, 2 | 4);
break;
#endif
+ case INDEX_op_qemu_ld64:
+ tcg_out_qemu_ld(s, args, 3);
+ break;
case INDEX_op_qemu_st8:
tcg_out_qemu_st(s, args, 0);
break;
@@ -1365,6 +1278,9 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args,
case INDEX_op_qemu_st32:
tcg_out_qemu_st(s, args, 2);
break;
+ case INDEX_op_qemu_st64:
+ tcg_out_qemu_st(s, args, 3);
+ break;
#if TCG_TARGET_REG_BITS == 64
case INDEX_op_movi_i64:
@@ -1381,13 +1297,16 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args,
break;
case INDEX_op_shl_i64:
c = SHIFT_SLLX;
- goto gen_arith;
+ do_shift64:
+ /* Limit immediate shift count lest we create an illegal insn. */
+ tcg_out_arithc(s, args[0], args[1], args[2] & 63, const_args[2], c);
+ break;
case INDEX_op_shr_i64:
c = SHIFT_SRLX;
- goto gen_arith;
+ goto do_shift64;
case INDEX_op_sar_i64:
c = SHIFT_SRAX;
- goto gen_arith;
+ goto do_shift64;
case INDEX_op_mul_i64:
c = ARITH_MULX;
goto gen_arith;
@@ -1399,11 +1318,11 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args,
goto gen_arith;
case INDEX_op_rem_i64:
case INDEX_op_remu_i64:
- tcg_out_arithc(s, TCG_REG_I5, args[1], args[2], const_args[2],
+ tcg_out_arithc(s, TCG_REG_T1, args[1], args[2], const_args[2],
opc == INDEX_op_rem_i64 ? ARITH_SDIVX : ARITH_UDIVX);
- tcg_out_arithc(s, TCG_REG_I5, TCG_REG_I5, args[2], const_args[2],
+ tcg_out_arithc(s, TCG_REG_T1, TCG_REG_T1, args[2], const_args[2],
ARITH_MULX);
- tcg_out_arith(s, args[0], args[1], TCG_REG_I5, ARITH_SUB);
+ tcg_out_arith(s, args[0], args[1], TCG_REG_T1, ARITH_SUB);
break;
case INDEX_op_ext32s_i64:
if (const_args[1]) {
@@ -1429,13 +1348,6 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args,
args[2], const_args[2]);
break;
- case INDEX_op_qemu_ld64:
- tcg_out_qemu_ld(s, args, 3);
- break;
- case INDEX_op_qemu_st64:
- tcg_out_qemu_st(s, args, 3);
- break;
-
#endif
gen_arith:
tcg_out_arithc(s, args[0], args[1], args[2], const_args[2], c);
@@ -1500,20 +1412,6 @@ static const TCGTargetOpDef sparc_op_defs[] = {
{ INDEX_op_mulu2_i32, { "r", "r", "r", "rJ" } },
#endif
- { INDEX_op_qemu_ld8u, { "r", "L" } },
- { INDEX_op_qemu_ld8s, { "r", "L" } },
- { INDEX_op_qemu_ld16u, { "r", "L" } },
- { INDEX_op_qemu_ld16s, { "r", "L" } },
- { INDEX_op_qemu_ld32, { "r", "L" } },
-#if TCG_TARGET_REG_BITS == 64
- { INDEX_op_qemu_ld32u, { "r", "L" } },
- { INDEX_op_qemu_ld32s, { "r", "L" } },
-#endif
-
- { INDEX_op_qemu_st8, { "L", "L" } },
- { INDEX_op_qemu_st16, { "L", "L" } },
- { INDEX_op_qemu_st32, { "L", "L" } },
-
#if TCG_TARGET_REG_BITS == 64
{ INDEX_op_mov_i64, { "r", "r" } },
{ INDEX_op_movi_i64, { "r" } },
@@ -1528,8 +1426,6 @@ static const TCGTargetOpDef sparc_op_defs[] = {
{ INDEX_op_st16_i64, { "r", "r" } },
{ INDEX_op_st32_i64, { "r", "r" } },
{ INDEX_op_st_i64, { "r", "r" } },
- { INDEX_op_qemu_ld64, { "L", "L" } },
- { INDEX_op_qemu_st64, { "L", "L" } },
{ INDEX_op_add_i64, { "r", "r", "rJ" } },
{ INDEX_op_mul_i64, { "r", "r", "rJ" } },
@@ -1557,6 +1453,47 @@ static const TCGTargetOpDef sparc_op_defs[] = {
{ INDEX_op_brcond_i64, { "r", "rJ" } },
{ INDEX_op_setcond_i64, { "r", "r", "rJ" } },
#endif
+
+#if TCG_TARGET_REG_BITS == 64
+ { INDEX_op_qemu_ld8u, { "r", "L" } },
+ { INDEX_op_qemu_ld8s, { "r", "L" } },
+ { INDEX_op_qemu_ld16u, { "r", "L" } },
+ { INDEX_op_qemu_ld16s, { "r", "L" } },
+ { INDEX_op_qemu_ld32, { "r", "L" } },
+ { INDEX_op_qemu_ld32u, { "r", "L" } },
+ { INDEX_op_qemu_ld32s, { "r", "L" } },
+ { INDEX_op_qemu_ld64, { "r", "L" } },
+
+ { INDEX_op_qemu_st8, { "L", "L" } },
+ { INDEX_op_qemu_st16, { "L", "L" } },
+ { INDEX_op_qemu_st32, { "L", "L" } },
+ { INDEX_op_qemu_st64, { "L", "L" } },
+#elif TARGET_LONG_BITS <= TCG_TARGET_REG_BITS
+ { INDEX_op_qemu_ld8u, { "r", "L" } },
+ { INDEX_op_qemu_ld8s, { "r", "L" } },
+ { INDEX_op_qemu_ld16u, { "r", "L" } },
+ { INDEX_op_qemu_ld16s, { "r", "L" } },
+ { INDEX_op_qemu_ld32, { "r", "L" } },
+ { INDEX_op_qemu_ld64, { "r", "r", "L" } },
+
+ { INDEX_op_qemu_st8, { "L", "L" } },
+ { INDEX_op_qemu_st16, { "L", "L" } },
+ { INDEX_op_qemu_st32, { "L", "L" } },
+ { INDEX_op_qemu_st64, { "L", "L", "L" } },
+#else
+ { INDEX_op_qemu_ld8u, { "r", "L", "L" } },
+ { INDEX_op_qemu_ld8s, { "r", "L", "L" } },
+ { INDEX_op_qemu_ld16u, { "r", "L", "L" } },
+ { INDEX_op_qemu_ld16s, { "r", "L", "L" } },
+ { INDEX_op_qemu_ld32, { "r", "L", "L" } },
+ { INDEX_op_qemu_ld64, { "L", "L", "L", "L" } },
+
+ { INDEX_op_qemu_st8, { "L", "L", "L" } },
+ { INDEX_op_qemu_st16, { "L", "L", "L" } },
+ { INDEX_op_qemu_st32, { "L", "L", "L" } },
+ { INDEX_op_qemu_st64, { "L", "L", "L", "L" } },
+#endif
+
{ -1 },
};
@@ -1583,25 +1520,23 @@ static void tcg_target_init(TCGContext *s)
(1 << TCG_REG_O7));
tcg_regset_clear(s->reserved_regs);
- tcg_regset_set_reg(s->reserved_regs, TCG_REG_G0);
-#if TCG_TARGET_REG_BITS == 64
- tcg_regset_set_reg(s->reserved_regs, TCG_REG_I4); // for internal use
-#endif
- tcg_regset_set_reg(s->reserved_regs, TCG_REG_I5); // for internal use
- tcg_regset_set_reg(s->reserved_regs, TCG_REG_I6);
- tcg_regset_set_reg(s->reserved_regs, TCG_REG_I7);
- tcg_regset_set_reg(s->reserved_regs, TCG_REG_O6);
- tcg_regset_set_reg(s->reserved_regs, TCG_REG_O7);
+ tcg_regset_set_reg(s->reserved_regs, TCG_REG_G0); /* zero */
+ tcg_regset_set_reg(s->reserved_regs, TCG_REG_G6); /* reserved for os */
+ tcg_regset_set_reg(s->reserved_regs, TCG_REG_G7); /* thread pointer */
+ tcg_regset_set_reg(s->reserved_regs, TCG_REG_I6); /* frame pointer */
+ tcg_regset_set_reg(s->reserved_regs, TCG_REG_I7); /* return address */
+ tcg_regset_set_reg(s->reserved_regs, TCG_REG_O6); /* stack pointer */
+ tcg_regset_set_reg(s->reserved_regs, TCG_REG_T1); /* for internal use */
+ tcg_regset_set_reg(s->reserved_regs, TCG_REG_T2); /* for internal use */
+
tcg_add_target_add_op_defs(sparc_op_defs);
}
#if TCG_TARGET_REG_BITS == 64
# define ELF_HOST_MACHINE EM_SPARCV9
-#elif defined(__sparc_v8plus__)
+#else
# define ELF_HOST_MACHINE EM_SPARC32PLUS
# define ELF_HOST_FLAGS EF_SPARC_32PLUS
-#else
-# define ELF_HOST_MACHINE EM_SPARC
#endif
typedef struct {
@@ -1657,3 +1592,18 @@ void tcg_register_jit(void *buf, size_t buf_size)
tcg_register_jit_int(buf, buf_size, &debug_frame, sizeof(debug_frame));
}
+
+void tb_set_jmp_target1(uintptr_t jmp_addr, uintptr_t addr)
+{
+ uint32_t *ptr = (uint32_t *)jmp_addr;
+ tcg_target_long disp = (tcg_target_long)(addr - jmp_addr) >> 2;
+
+ /* We can reach the entire address space for 32-bit. For 64-bit
+ the code_gen_buffer can't be larger than 2GB. */
+ if (TCG_TARGET_REG_BITS == 64 && !check_fit_tl(disp, 30)) {
+ tcg_abort();
+ }
+
+ *ptr = CALL | (disp & 0x3fffffff);
+ flush_icache_range(jmp_addr, jmp_addr + 4);
+}
diff --git a/tcg/sparc/tcg-target.h b/tcg/sparc/tcg-target.h
index 0ea87bef7..6314ffb30 100644
--- a/tcg/sparc/tcg-target.h
+++ b/tcg/sparc/tcg-target.h
@@ -66,22 +66,19 @@ typedef enum {
#define TCG_CT_CONST_S13 0x200
/* used for function call generation */
-#define TCG_REG_CALL_STACK TCG_REG_I6
-#ifdef __arch64__
-// Reserve space for AREG0
-#define TCG_TARGET_STACK_MINFRAME (176 + 4 * (int)sizeof(long) + \
- TCG_STATIC_CALL_ARGS_SIZE)
-#define TCG_TARGET_CALL_STACK_OFFSET (2047 - 16)
-#define TCG_TARGET_STACK_ALIGN 16
+#define TCG_REG_CALL_STACK TCG_REG_O6
+
+#if TCG_TARGET_REG_BITS == 64
+#define TCG_TARGET_STACK_BIAS 2047
+#define TCG_TARGET_STACK_ALIGN 16
+#define TCG_TARGET_CALL_STACK_OFFSET (128 + 6*8 + TCG_TARGET_STACK_BIAS)
#else
-// AREG0 + one word for alignment
-#define TCG_TARGET_STACK_MINFRAME (92 + (2 + 1) * (int)sizeof(long) + \
- TCG_STATIC_CALL_ARGS_SIZE)
-#define TCG_TARGET_CALL_STACK_OFFSET TCG_TARGET_STACK_MINFRAME
-#define TCG_TARGET_STACK_ALIGN 8
+#define TCG_TARGET_STACK_BIAS 0
+#define TCG_TARGET_STACK_ALIGN 8
+#define TCG_TARGET_CALL_STACK_OFFSET (64 + 4 + 6*4)
#endif
-#ifdef __arch64__
+#if TCG_TARGET_REG_BITS == 64
#define TCG_TARGET_EXTEND_ARGS 1
#endif
@@ -102,6 +99,7 @@ typedef enum {
#define TCG_TARGET_HAS_nand_i32 0
#define TCG_TARGET_HAS_nor_i32 0
#define TCG_TARGET_HAS_deposit_i32 0
+#define TCG_TARGET_HAS_movcond_i32 0
#if TCG_TARGET_REG_BITS == 64
#define TCG_TARGET_HAS_div_i64 1
@@ -123,15 +121,12 @@ typedef enum {
#define TCG_TARGET_HAS_nand_i64 0
#define TCG_TARGET_HAS_nor_i64 0
#define TCG_TARGET_HAS_deposit_i64 0
+#define TCG_TARGET_HAS_movcond_i64 0
#endif
-#ifdef CONFIG_SOLARIS
-#define TCG_AREG0 TCG_REG_G2
-#elif defined(__sparc_v9__)
-#define TCG_AREG0 TCG_REG_G5
-#else
-#define TCG_AREG0 TCG_REG_G6
-#endif
+#define TCG_TARGET_HAS_GUEST_BASE
+
+#define TCG_AREG0 TCG_REG_I0
static inline void flush_icache_range(tcg_target_ulong start,
tcg_target_ulong stop)
diff --git a/tcg/tcg-op.h b/tcg/tcg-op.h
index 169d3b2b0..bd93fe4f0 100644
--- a/tcg/tcg-op.h
+++ b/tcg/tcg-op.h
@@ -518,18 +518,34 @@ static inline void tcg_gen_and_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2)
}
}
-static inline void tcg_gen_andi_i32(TCGv_i32 ret, TCGv_i32 arg1, int32_t arg2)
+static inline void tcg_gen_andi_i32(TCGv_i32 ret, TCGv_i32 arg1, uint32_t arg2)
{
- /* some cases can be optimized here */
- if (arg2 == 0) {
+ TCGv_i32 t0;
+ /* Some cases can be optimized here. */
+ switch (arg2) {
+ case 0:
tcg_gen_movi_i32(ret, 0);
- } else if (arg2 == 0xffffffff) {
+ return;
+ case 0xffffffffu:
tcg_gen_mov_i32(ret, arg1);
- } else {
- TCGv_i32 t0 = tcg_const_i32(arg2);
- tcg_gen_and_i32(ret, arg1, t0);
- tcg_temp_free_i32(t0);
- }
+ return;
+ case 0xffu:
+ /* Don't recurse with tcg_gen_ext8u_i32. */
+ if (TCG_TARGET_HAS_ext8u_i32) {
+ tcg_gen_op2_i32(INDEX_op_ext8u_i32, ret, arg1);
+ return;
+ }
+ break;
+ case 0xffffu:
+ if (TCG_TARGET_HAS_ext16u_i32) {
+ tcg_gen_op2_i32(INDEX_op_ext16u_i32, ret, arg1);
+ return;
+ }
+ break;
+ }
+ t0 = tcg_const_i32(arg2);
+ tcg_gen_and_i32(ret, arg1, t0);
+ tcg_temp_free_i32(t0);
}
static inline void tcg_gen_or_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2)
@@ -543,9 +559,9 @@ static inline void tcg_gen_or_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2)
static inline void tcg_gen_ori_i32(TCGv_i32 ret, TCGv_i32 arg1, int32_t arg2)
{
- /* some cases can be optimized here */
- if (arg2 == 0xffffffff) {
- tcg_gen_movi_i32(ret, 0xffffffff);
+ /* Some cases can be optimized here. */
+ if (arg2 == -1) {
+ tcg_gen_movi_i32(ret, -1);
} else if (arg2 == 0) {
tcg_gen_mov_i32(ret, arg1);
} else {
@@ -566,9 +582,12 @@ static inline void tcg_gen_xor_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2)
static inline void tcg_gen_xori_i32(TCGv_i32 ret, TCGv_i32 arg1, int32_t arg2)
{
- /* some cases can be optimized here */
+ /* Some cases can be optimized here. */
if (arg2 == 0) {
tcg_gen_mov_i32(ret, arg1);
+ } else if (arg2 == -1 && TCG_TARGET_HAS_not_i32) {
+ /* Don't recurse with tcg_gen_not_i32. */
+ tcg_gen_op2_i32(INDEX_op_not_i32, ret, arg1);
} else {
TCGv_i32 t0 = tcg_const_i32(arg2);
tcg_gen_xor_i32(ret, arg1, t0);
@@ -1120,9 +1139,38 @@ static inline void tcg_gen_and_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2)
}
}
-static inline void tcg_gen_andi_i64(TCGv_i64 ret, TCGv_i64 arg1, int64_t arg2)
+static inline void tcg_gen_andi_i64(TCGv_i64 ret, TCGv_i64 arg1, uint64_t arg2)
{
- TCGv_i64 t0 = tcg_const_i64(arg2);
+ TCGv_i64 t0;
+ /* Some cases can be optimized here. */
+ switch (arg2) {
+ case 0:
+ tcg_gen_movi_i64(ret, 0);
+ return;
+ case 0xffffffffffffffffull:
+ tcg_gen_mov_i64(ret, arg1);
+ return;
+ case 0xffull:
+ /* Don't recurse with tcg_gen_ext8u_i32. */
+ if (TCG_TARGET_HAS_ext8u_i64) {
+ tcg_gen_op2_i64(INDEX_op_ext8u_i64, ret, arg1);
+ return;
+ }
+ break;
+ case 0xffffu:
+ if (TCG_TARGET_HAS_ext16u_i64) {
+ tcg_gen_op2_i64(INDEX_op_ext16u_i64, ret, arg1);
+ return;
+ }
+ break;
+ case 0xffffffffull:
+ if (TCG_TARGET_HAS_ext32u_i64) {
+ tcg_gen_op2_i64(INDEX_op_ext32u_i64, ret, arg1);
+ return;
+ }
+ break;
+ }
+ t0 = tcg_const_i64(arg2);
tcg_gen_and_i64(ret, arg1, t0);
tcg_temp_free_i64(t0);
}
@@ -1138,9 +1186,16 @@ static inline void tcg_gen_or_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2)
static inline void tcg_gen_ori_i64(TCGv_i64 ret, TCGv_i64 arg1, int64_t arg2)
{
- TCGv_i64 t0 = tcg_const_i64(arg2);
- tcg_gen_or_i64(ret, arg1, t0);
- tcg_temp_free_i64(t0);
+ /* Some cases can be optimized here. */
+ if (arg2 == -1) {
+ tcg_gen_movi_i64(ret, -1);
+ } else if (arg2 == 0) {
+ tcg_gen_mov_i64(ret, arg1);
+ } else {
+ TCGv_i64 t0 = tcg_const_i64(arg2);
+ tcg_gen_or_i64(ret, arg1, t0);
+ tcg_temp_free_i64(t0);
+ }
}
static inline void tcg_gen_xor_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2)
@@ -1154,9 +1209,17 @@ static inline void tcg_gen_xor_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2)
static inline void tcg_gen_xori_i64(TCGv_i64 ret, TCGv_i64 arg1, int64_t arg2)
{
- TCGv_i64 t0 = tcg_const_i64(arg2);
- tcg_gen_xor_i64(ret, arg1, t0);
- tcg_temp_free_i64(t0);
+ /* Some cases can be optimized here. */
+ if (arg2 == 0) {
+ tcg_gen_mov_i64(ret, arg1);
+ } else if (arg2 == -1 && TCG_TARGET_HAS_not_i64) {
+ /* Don't recurse with tcg_gen_not_i64. */
+ tcg_gen_op2_i64(INDEX_op_not_i64, ret, arg1);
+ } else {
+ TCGv_i64 t0 = tcg_const_i64(arg2);
+ tcg_gen_xor_i64(ret, arg1, t0);
+ tcg_temp_free_i64(t0);
+ }
}
static inline void tcg_gen_shl_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2)
@@ -1746,36 +1809,6 @@ static inline void tcg_gen_discard_i64(TCGv_i64 arg)
#endif
}
-static inline void tcg_gen_concat_i32_i64(TCGv_i64 dest, TCGv_i32 low, TCGv_i32 high)
-{
-#if TCG_TARGET_REG_BITS == 32
- tcg_gen_mov_i32(TCGV_LOW(dest), low);
- tcg_gen_mov_i32(TCGV_HIGH(dest), high);
-#else
- TCGv_i64 tmp = tcg_temp_new_i64();
- /* This extension is only needed for type correctness.
- We may be able to do better given target specific information. */
- tcg_gen_extu_i32_i64(tmp, high);
- tcg_gen_shli_i64(tmp, tmp, 32);
- tcg_gen_extu_i32_i64(dest, low);
- tcg_gen_or_i64(dest, dest, tmp);
- tcg_temp_free_i64(tmp);
-#endif
-}
-
-static inline void tcg_gen_concat32_i64(TCGv_i64 dest, TCGv_i64 low, TCGv_i64 high)
-{
-#if TCG_TARGET_REG_BITS == 32
- tcg_gen_concat_i32_i64(dest, TCGV_LOW(low), TCGV_LOW(high));
-#else
- TCGv_i64 tmp = tcg_temp_new_i64();
- tcg_gen_ext32u_i64(dest, low);
- tcg_gen_shli_i64(tmp, high, 32);
- tcg_gen_or_i64(dest, dest, tmp);
- tcg_temp_free_i64(tmp);
-#endif
-}
-
static inline void tcg_gen_andc_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2)
{
if (TCG_TARGET_HAS_andc_i32) {
@@ -2048,6 +2081,10 @@ static inline void tcg_gen_deposit_i32(TCGv_i32 ret, TCGv_i32 arg1,
uint32_t mask;
TCGv_i32 t1;
+ tcg_debug_assert(ofs < 32);
+ tcg_debug_assert(len <= 32);
+ tcg_debug_assert(ofs + len <= 32);
+
if (ofs == 0 && len == 32) {
tcg_gen_mov_i32(ret, arg2);
return;
@@ -2079,6 +2116,10 @@ static inline void tcg_gen_deposit_i64(TCGv_i64 ret, TCGv_i64 arg1,
uint64_t mask;
TCGv_i64 t1;
+ tcg_debug_assert(ofs < 64);
+ tcg_debug_assert(len <= 64);
+ tcg_debug_assert(ofs + len <= 64);
+
if (ofs == 0 && len == 64) {
tcg_gen_mov_i64(ret, arg2);
return;
@@ -2118,6 +2159,102 @@ static inline void tcg_gen_deposit_i64(TCGv_i64 ret, TCGv_i64 arg1,
tcg_temp_free_i64(t1);
}
+static inline void tcg_gen_concat_i32_i64(TCGv_i64 dest, TCGv_i32 low,
+ TCGv_i32 high)
+{
+#if TCG_TARGET_REG_BITS == 32
+ tcg_gen_mov_i32(TCGV_LOW(dest), low);
+ tcg_gen_mov_i32(TCGV_HIGH(dest), high);
+#else
+ TCGv_i64 tmp = tcg_temp_new_i64();
+ /* These extensions are only needed for type correctness.
+ We may be able to do better given target specific information. */
+ tcg_gen_extu_i32_i64(tmp, high);
+ tcg_gen_extu_i32_i64(dest, low);
+ /* If deposit is available, use it. Otherwise use the extra
+ knowledge that we have of the zero-extensions above. */
+ if (TCG_TARGET_HAS_deposit_i64 && TCG_TARGET_deposit_i64_valid(32, 32)) {
+ tcg_gen_deposit_i64(dest, dest, tmp, 32, 32);
+ } else {
+ tcg_gen_shli_i64(tmp, tmp, 32);
+ tcg_gen_or_i64(dest, dest, tmp);
+ }
+ tcg_temp_free_i64(tmp);
+#endif
+}
+
+static inline void tcg_gen_concat32_i64(TCGv_i64 dest, TCGv_i64 low,
+ TCGv_i64 high)
+{
+ tcg_gen_deposit_i64(dest, low, high, 32, 32);
+}
+
+static inline void tcg_gen_movcond_i32(TCGCond cond, TCGv_i32 ret,
+ TCGv_i32 c1, TCGv_i32 c2,
+ TCGv_i32 v1, TCGv_i32 v2)
+{
+ if (TCG_TARGET_HAS_movcond_i32) {
+ tcg_gen_op6i_i32(INDEX_op_movcond_i32, ret, c1, c2, v1, v2, cond);
+ } else {
+ TCGv_i32 t0 = tcg_temp_new_i32();
+ TCGv_i32 t1 = tcg_temp_new_i32();
+ tcg_gen_setcond_i32(cond, t0, c1, c2);
+ tcg_gen_neg_i32(t0, t0);
+ tcg_gen_and_i32(t1, v1, t0);
+ tcg_gen_andc_i32(ret, v2, t0);
+ tcg_gen_or_i32(ret, ret, t1);
+ tcg_temp_free_i32(t0);
+ tcg_temp_free_i32(t1);
+ }
+}
+
+static inline void tcg_gen_movcond_i64(TCGCond cond, TCGv_i64 ret,
+ TCGv_i64 c1, TCGv_i64 c2,
+ TCGv_i64 v1, TCGv_i64 v2)
+{
+#if TCG_TARGET_REG_BITS == 32
+ TCGv_i32 t0 = tcg_temp_new_i32();
+ TCGv_i32 t1 = tcg_temp_new_i32();
+ tcg_gen_op6i_i32(INDEX_op_setcond2_i32, t0,
+ TCGV_LOW(c1), TCGV_HIGH(c1),
+ TCGV_LOW(c2), TCGV_HIGH(c2), cond);
+
+ if (TCG_TARGET_HAS_movcond_i32) {
+ tcg_gen_movi_i32(t1, 0);
+ tcg_gen_movcond_i32(TCG_COND_NE, TCGV_LOW(ret), t0, t1,
+ TCGV_LOW(v1), TCGV_LOW(v2));
+ tcg_gen_movcond_i32(TCG_COND_NE, TCGV_HIGH(ret), t0, t1,
+ TCGV_HIGH(v1), TCGV_HIGH(v2));
+ } else {
+ tcg_gen_neg_i32(t0, t0);
+
+ tcg_gen_and_i32(t1, TCGV_LOW(v1), t0);
+ tcg_gen_andc_i32(TCGV_LOW(ret), TCGV_LOW(v2), t0);
+ tcg_gen_or_i32(TCGV_LOW(ret), TCGV_LOW(ret), t1);
+
+ tcg_gen_and_i32(t1, TCGV_HIGH(v1), t0);
+ tcg_gen_andc_i32(TCGV_HIGH(ret), TCGV_HIGH(v2), t0);
+ tcg_gen_or_i32(TCGV_HIGH(ret), TCGV_HIGH(ret), t1);
+ }
+ tcg_temp_free_i32(t0);
+ tcg_temp_free_i32(t1);
+#else
+ if (TCG_TARGET_HAS_movcond_i64) {
+ tcg_gen_op6i_i64(INDEX_op_movcond_i64, ret, c1, c2, v1, v2, cond);
+ } else {
+ TCGv_i64 t0 = tcg_temp_new_i64();
+ TCGv_i64 t1 = tcg_temp_new_i64();
+ tcg_gen_setcond_i64(cond, t0, c1, c2);
+ tcg_gen_neg_i64(t0, t0);
+ tcg_gen_and_i64(t1, v1, t0);
+ tcg_gen_andc_i64(ret, v2, t0);
+ tcg_gen_or_i64(ret, ret, t1);
+ tcg_temp_free_i64(t0);
+ tcg_temp_free_i64(t1);
+ }
+#endif
+}
+
/***************************************/
/* QEMU specific operations. Their type depend on the QEMU CPU
type. */
@@ -2166,8 +2303,15 @@ static inline void tcg_gen_exit_tb(tcg_target_long val)
tcg_gen_op1i(INDEX_op_exit_tb, val);
}
-static inline void tcg_gen_goto_tb(int idx)
+static inline void tcg_gen_goto_tb(unsigned idx)
{
+ /* We only support two chained exits. */
+ tcg_debug_assert(idx <= 1);
+#ifdef CONFIG_DEBUG_TCG
+ /* Verify that we havn't seen this numbered exit before. */
+ tcg_debug_assert((tcg_ctx.goto_tb_issue_mask & (1 << idx)) == 0);
+ tcg_ctx.goto_tb_issue_mask |= 1 << idx;
+#endif
tcg_gen_op1i(INDEX_op_goto_tb, idx);
}
@@ -2434,6 +2578,7 @@ static inline void tcg_gen_qemu_st64(TCGv_i64 arg, TCGv addr, int mem_index)
#define tcg_gen_deposit_tl tcg_gen_deposit_i64
#define tcg_const_tl tcg_const_i64
#define tcg_const_local_tl tcg_const_local_i64
+#define tcg_gen_movcond_tl tcg_gen_movcond_i64
#else
#define tcg_gen_movi_tl tcg_gen_movi_i32
#define tcg_gen_mov_tl tcg_gen_mov_i32
@@ -2505,6 +2650,7 @@ static inline void tcg_gen_qemu_st64(TCGv_i64 arg, TCGv addr, int mem_index)
#define tcg_gen_deposit_tl tcg_gen_deposit_i32
#define tcg_const_tl tcg_const_i32
#define tcg_const_local_tl tcg_const_local_i32
+#define tcg_gen_movcond_tl tcg_gen_movcond_i32
#endif
#if TCG_TARGET_REG_BITS == 32
diff --git a/tcg/tcg-opc.h b/tcg/tcg-opc.h
index 8e06d03b1..dbb0e3916 100644
--- a/tcg/tcg-opc.h
+++ b/tcg/tcg-opc.h
@@ -36,7 +36,7 @@ DEF(nopn, 0, 0, 1, 0) /* variable number of parameters */
DEF(discard, 1, 0, 0, 0)
-DEF(set_label, 0, 0, 1, 0)
+DEF(set_label, 0, 0, 1, TCG_OPF_BB_END)
DEF(call, 0, 1, 2, TCG_OPF_SIDE_EFFECTS) /* variable number of parameters */
DEF(jmp, 0, 1, 0, TCG_OPF_BB_END | TCG_OPF_SIDE_EFFECTS)
DEF(br, 0, 0, 1, TCG_OPF_BB_END | TCG_OPF_SIDE_EFFECTS)
@@ -51,6 +51,7 @@ DEF(br, 0, 0, 1, TCG_OPF_BB_END | TCG_OPF_SIDE_EFFECTS)
DEF(mov_i32, 1, 1, 0, 0)
DEF(movi_i32, 1, 0, 1, 0)
DEF(setcond_i32, 1, 2, 1, 0)
+DEF(movcond_i32, 1, 4, 1, IMPL(TCG_TARGET_HAS_movcond_i32))
/* load/store */
DEF(ld8u_i32, 1, 1, 1, 0)
DEF(ld8s_i32, 1, 1, 1, 0)
@@ -107,6 +108,7 @@ DEF(nor_i32, 1, 2, 0, IMPL(TCG_TARGET_HAS_nor_i32))
DEF(mov_i64, 1, 1, 0, IMPL64)
DEF(movi_i64, 1, 0, 1, IMPL64)
DEF(setcond_i64, 1, 2, 1, IMPL64)
+DEF(movcond_i64, 1, 4, 1, IMPL64 | IMPL(TCG_TARGET_HAS_movcond_i64))
/* load/store */
DEF(ld8u_i64, 1, 1, 1, IMPL64)
DEF(ld8s_i64, 1, 1, 1, IMPL64)
diff --git a/tcg/tcg.c b/tcg/tcg.c
index a4e7f42c7..c069e44a0 100644
--- a/tcg/tcg.c
+++ b/tcg/tcg.c
@@ -89,7 +89,6 @@ static void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg, TCGReg arg1,
tcg_target_long arg2);
static int tcg_target_const_match(tcg_target_long val,
const TCGArgConstraint *arg_ct);
-static int tcg_target_get_call_iarg_regs_count(int flags);
TCGOpDef tcg_op_defs[] = {
#define DEF(s, oargs, iargs, cargs, flags) { #s, oargs, iargs, cargs, iargs + oargs + cargs, flags },
@@ -299,6 +298,10 @@ void tcg_func_start(TCGContext *s)
s->nb_labels = 0;
s->current_frame_offset = s->frame_start;
+#ifdef CONFIG_DEBUG_TCG
+ s->goto_tb_issue_mask = 0;
+#endif
+
gen_opc_ptr = gen_opc_buf;
gen_opparam_ptr = gen_opparam_buf;
}
@@ -937,11 +940,7 @@ void tcg_dump_ops(TCGContext *s)
args[nb_oargs + i]));
}
}
- } else if (c == INDEX_op_movi_i32
-#if TCG_TARGET_REG_BITS == 64
- || c == INDEX_op_movi_i64
-#endif
- ) {
+ } else if (c == INDEX_op_movi_i32 || c == INDEX_op_movi_i64) {
tcg_target_ulong val;
TCGHelperInfo *th;
@@ -991,17 +990,13 @@ void tcg_dump_ops(TCGContext *s)
}
switch (c) {
case INDEX_op_brcond_i32:
-#if TCG_TARGET_REG_BITS == 32
- case INDEX_op_brcond2_i32:
-#elif TCG_TARGET_REG_BITS == 64
- case INDEX_op_brcond_i64:
-#endif
case INDEX_op_setcond_i32:
-#if TCG_TARGET_REG_BITS == 32
+ case INDEX_op_movcond_i32:
+ case INDEX_op_brcond2_i32:
case INDEX_op_setcond2_i32:
-#elif TCG_TARGET_REG_BITS == 64
+ case INDEX_op_brcond_i64:
case INDEX_op_setcond_i64:
-#endif
+ case INDEX_op_movcond_i64:
if (args[k] < ARRAY_SIZE(cond_name) && cond_name[args[k]]) {
qemu_log(",%s", cond_name[args[k++]]);
} else {
@@ -1297,11 +1292,6 @@ static void tcg_liveness_analysis(TCGContext *s)
args--;
}
break;
- case INDEX_op_set_label:
- args--;
- /* mark end of basic block */
- tcg_la_bb_end(s, dead_temps);
- break;
case INDEX_op_debug_insn_start:
args -= def->nb_args;
break;
@@ -1463,7 +1453,8 @@ static void temp_allocate_frame(TCGContext *s, int temp)
{
TCGTemp *ts;
ts = &s->temps[temp];
-#ifndef __sparc_v9__ /* Sparc64 stack is accessed with offset of 2047 */
+#if !(defined(__sparc__) && TCG_TARGET_REG_BITS == 64)
+ /* Sparc64 stack is accessed with offset of 2047 */
s->current_frame_offset = (s->current_frame_offset +
(tcg_target_long)sizeof(tcg_target_long) - 1) &
~(sizeof(tcg_target_long) - 1);
@@ -1866,7 +1857,7 @@ static int tcg_reg_alloc_call(TCGContext *s, const TCGOpDef *def,
flags = args[nb_oargs + nb_iargs];
- nb_regs = tcg_target_get_call_iarg_regs_count(flags);
+ nb_regs = ARRAY_SIZE(tcg_target_call_iarg_regs);
if (nb_regs > nb_params)
nb_regs = nb_params;
@@ -2108,16 +2099,12 @@ static inline int tcg_gen_code_common(TCGContext *s, uint8_t *gen_code_buf,
#endif
switch(opc) {
case INDEX_op_mov_i32:
-#if TCG_TARGET_REG_BITS == 64
case INDEX_op_mov_i64:
-#endif
dead_args = s->op_dead_args[op_index];
tcg_reg_alloc_mov(s, def, args, dead_args);
break;
case INDEX_op_movi_i32:
-#if TCG_TARGET_REG_BITS == 64
case INDEX_op_movi_i64:
-#endif
tcg_reg_alloc_movi(s, args);
break;
case INDEX_op_debug_insn_start:
diff --git a/tcg/tcg.h b/tcg/tcg.h
index 7a72729f3..af7464a65 100644
--- a/tcg/tcg.h
+++ b/tcg/tcg.h
@@ -79,6 +79,7 @@ typedef uint64_t TCGRegSet;
#define TCG_TARGET_HAS_nand_i64 0
#define TCG_TARGET_HAS_nor_i64 0
#define TCG_TARGET_HAS_deposit_i64 0
+#define TCG_TARGET_HAS_movcond_i64 0
#endif
#ifndef TCG_TARGET_deposit_i32_valid
@@ -343,7 +344,7 @@ struct TCGContext {
/* goto_tb support */
uint8_t *code_buf;
- unsigned long *tb_next;
+ uintptr_t *tb_next;
uint16_t *tb_next_offset;
uint16_t *tb_jmp_offset; /* != NULL if USE_DIRECT_JUMP */
@@ -389,6 +390,7 @@ struct TCGContext {
#ifdef CONFIG_DEBUG_TCG
int temps_in_use;
+ int goto_tb_issue_mask;
#endif
};
@@ -459,11 +461,6 @@ static inline TCGv_i64 tcg_temp_local_new_i64(void)
void tcg_temp_free_i64(TCGv_i64 arg);
char *tcg_get_arg_str_i64(TCGContext *s, char *buf, int buf_size, TCGv_i64 arg);
-static inline bool tcg_arg_is_local(TCGContext *s, TCGArg arg)
-{
- return s->temps[arg].temp_local;
-}
-
#if defined(CONFIG_DEBUG_TCG)
/* If you call tcg_clear_temp_count() at the start of a section of
* code which is not supposed to leak any TCG temporaries, then
@@ -534,6 +531,15 @@ do {\
abort();\
} while (0)
+#ifdef CONFIG_DEBUG_TCG
+# define tcg_debug_assert(X) do { assert(X); } while (0)
+#elif QEMU_GNUC_PREREQ(4, 5)
+# define tcg_debug_assert(X) \
+ do { if (!(X)) { __builtin_unreachable(); } } while (0)
+#else
+# define tcg_debug_assert(X) do { (void)(X); } while (0)
+#endif
+
void tcg_add_target_add_op_defs(const TCGTargetOpDef *tdefs);
#if TCG_TARGET_REG_BITS == 32
diff --git a/tcg/tci/tcg-target.c b/tcg/tci/tcg-target.c
index 003244cb0..3f4a24bb8 100644
--- a/tcg/tci/tcg-target.c
+++ b/tcg/tci/tcg-target.c
@@ -863,12 +863,6 @@ static int tcg_target_const_match(tcg_target_long val,
return arg_ct->ct & TCG_CT_CONST;
}
-/* Maximum number of register used for input function arguments. */
-static int tcg_target_get_call_iarg_regs_count(int flags)
-{
- return ARRAY_SIZE(tcg_target_call_iarg_regs);
-}
-
static void tcg_target_init(TCGContext *s)
{
#if defined(CONFIG_DEBUG_TCG_INTERPRETER)
diff --git a/tcg/tci/tcg-target.h b/tcg/tci/tcg-target.h
index 30a0f2159..6d894953a 100644
--- a/tcg/tci/tcg-target.h
+++ b/tcg/tci/tcg-target.h
@@ -75,6 +75,7 @@
#define TCG_TARGET_HAS_not_i32 1
#define TCG_TARGET_HAS_orc_i32 0
#define TCG_TARGET_HAS_rot_i32 1
+#define TCG_TARGET_HAS_movcond_i32 0
#if TCG_TARGET_REG_BITS == 64
#define TCG_TARGET_HAS_bswap16_i64 1
@@ -98,6 +99,7 @@
#define TCG_TARGET_HAS_not_i64 1
#define TCG_TARGET_HAS_orc_i64 0
#define TCG_TARGET_HAS_rot_i64 1
+#define TCG_TARGET_HAS_movcond_i64 0
#endif /* TCG_TARGET_REG_BITS == 64 */
/* Offset to user memory in user mode. */