mirror of
https://github.com/ruby/ruby.git
synced 2022-11-09 12:17:21 -05:00
YJIT: use shorter encoding for mov(r64,imm) when unambiguous (#5081)
* YJIT: use shorter encoding for mov(r64,imm) when unambiguous Previously, for small constants such as `mov(RAX, imm_opnd(Qundef))`, we emit an instruction with an 8-byte immediate. This form commonly gets the `movabs` mnemonic. In 64-bit mode, 32-bit operands get zero extended to 64-bit to fill the register, so when the immediate is small enough, we can save 4 bytes by using the `mov` variant that takes a 32-bit immediate and does a zero extension. Not implement with this change, there is an imm32 variant of `mov` that does sign extension we could use. When the constant is negative, we fallback to the `movabs` form. In railsbench, this change yields roughly a 12% code size reduction for the outlined block. Co-authored-by: Jemma Issroff <jemmaissroff@gmail.com> * [ci skip] comment edit. Please squash. Co-authored-by: Jemma Issroff <jemmaissroff@gmail.com>
This commit is contained in:
parent
82ae9b092c
commit
91a9062626
Notes:
git
2021-11-06 04:44:50 +09:00
Merged-By: maximecb <maximecb@ruby-lang.org>
2 changed files with 39 additions and 7 deletions
|
@ -182,10 +182,20 @@ void run_assembler_tests(void)
|
|||
// mov
|
||||
cb_set_pos(cb, 0); mov(cb, EAX, imm_opnd(7)); check_bytes(cb, "B807000000");
|
||||
cb_set_pos(cb, 0); mov(cb, EAX, imm_opnd(-3)); check_bytes(cb, "B8FDFFFFFF");
|
||||
cb_set_pos(cb, 0); mov(cb, R15, imm_opnd(3)); check_bytes(cb, "49BF0300000000000000");
|
||||
cb_set_pos(cb, 0); mov(cb, R15, imm_opnd(3)); check_bytes(cb, "41BF03000000");
|
||||
cb_set_pos(cb, 0); mov(cb, EAX, EBX); check_bytes(cb, "89D8");
|
||||
cb_set_pos(cb, 0); mov(cb, EAX, ECX); check_bytes(cb, "89C8");
|
||||
cb_set_pos(cb, 0); mov(cb, EDX, mem_opnd(32, RBX, 128)); check_bytes(cb, "8B9380000000");
|
||||
|
||||
// Test `mov rax, 3` => `mov eax, 3` optimization
|
||||
cb_set_pos(cb, 0); mov(cb, R8, imm_opnd(0x34)); check_bytes(cb, "41B834000000");
|
||||
cb_set_pos(cb, 0); mov(cb, R8, imm_opnd(0x80000000)); check_bytes(cb, "49B80000008000000000");
|
||||
cb_set_pos(cb, 0); mov(cb, R8, imm_opnd(-1)); check_bytes(cb, "49B8FFFFFFFFFFFFFFFF");
|
||||
|
||||
cb_set_pos(cb, 0); mov(cb, RAX, imm_opnd(0x34)); check_bytes(cb, "B834000000");
|
||||
cb_set_pos(cb, 0); mov(cb, RAX, imm_opnd(0x80000000)); check_bytes(cb, "48B80000008000000000");
|
||||
cb_set_pos(cb, 0); mov(cb, RAX, imm_opnd(-52)); check_bytes(cb, "48B8CCFFFFFFFFFFFFFF");
|
||||
cb_set_pos(cb, 0); mov(cb, RAX, imm_opnd(-1)); check_bytes(cb, "48B8FFFFFFFFFFFFFFFF");
|
||||
/*
|
||||
test(
|
||||
delegate void (CodeBlock cb) { cb.mov(X86Opnd(AL), X86Opnd(8, RCX, 0, 1, RDX)); },
|
||||
|
|
34
yjit_asm.c
34
yjit_asm.c
|
@ -1259,6 +1259,15 @@ void lea(codeblock_t *cb, x86opnd_t dst, x86opnd_t src)
|
|||
cb_write_rm(cb, false, true, dst, src, 0xFF, 1, 0x8D);
|
||||
}
|
||||
|
||||
// Does this number fit in 32 bits and stays the same if you zero extend it to 64 bit?
|
||||
// If the sign bit is clear, sign extension and zero extension yield the same
|
||||
// result.
|
||||
static bool
|
||||
zero_extendable_32bit(uint64_t number)
|
||||
{
|
||||
return number <= UINT32_MAX && (number & (1ull << 31ull)) == 0;
|
||||
}
|
||||
|
||||
/// mov - Data move operation
|
||||
void mov(codeblock_t *cb, x86opnd_t dst, x86opnd_t src)
|
||||
{
|
||||
|
@ -1275,14 +1284,27 @@ void mov(codeblock_t *cb, x86opnd_t dst, x86opnd_t src)
|
|||
unsig_imm_size(src.as.imm) <= dst.num_bits
|
||||
);
|
||||
|
||||
if (dst.num_bits == 16)
|
||||
cb_write_byte(cb, 0x66);
|
||||
if (rex_needed(dst) || dst.num_bits == 64)
|
||||
cb_write_rex(cb, dst.num_bits == 64, 0, 0, dst.as.reg.reg_no);
|
||||
// In case the source immediate could be zero extended to be 64
|
||||
// bit, we can use the 32-bit operands version of the instruction.
|
||||
// For example, we can turn mov(rax, 0x34) into the equivalent
|
||||
// mov(eax, 0x34).
|
||||
if (dst.num_bits == 64 && zero_extendable_32bit(src.as.unsig_imm)) {
|
||||
if (rex_needed(dst))
|
||||
cb_write_rex(cb, false, 0, 0, dst.as.reg.reg_no);
|
||||
cb_write_opcode(cb, 0xB8, dst);
|
||||
cb_write_int(cb, src.as.imm, 32);
|
||||
}
|
||||
else {
|
||||
if (dst.num_bits == 16)
|
||||
cb_write_byte(cb, 0x66);
|
||||
|
||||
cb_write_opcode(cb, (dst.num_bits == 8)? 0xB0:0xB8, dst);
|
||||
if (rex_needed(dst) || dst.num_bits == 64)
|
||||
cb_write_rex(cb, dst.num_bits == 64, 0, 0, dst.as.reg.reg_no);
|
||||
|
||||
cb_write_int(cb, src.as.imm, dst.num_bits);
|
||||
cb_write_opcode(cb, (dst.num_bits == 8)? 0xB0:0xB8, dst);
|
||||
|
||||
cb_write_int(cb, src.as.imm, dst.num_bits);
|
||||
}
|
||||
}
|
||||
|
||||
// M + Imm
|
||||
|
|
Loading…
Add table
Reference in a new issue