diff --git a/yjit/src/asm/arm64/arg/inst_offset.rs b/yjit/src/asm/arm64/arg/inst_offset.rs new file mode 100644 index 0000000000..f4a6bc73a0 --- /dev/null +++ b/yjit/src/asm/arm64/arg/inst_offset.rs @@ -0,0 +1,47 @@ +/// There are a lot of instructions in the AArch64 architectrue that take an +/// offset in terms of number of instructions. Usually they are jump +/// instructions or instructions that load a value relative to the current PC. +/// +/// This struct is used to mark those locations instead of a generic operand in +/// order to give better clarity to the developer when reading the AArch64 +/// backend code. It also helps to clarify that everything is in terms of a +/// number of instructions and not a number of bytes (i.e., the offset is the +/// number of bytes divided by 4). +#[derive(Copy, Clone)] +pub struct InstructionOffset(i32); + +impl InstructionOffset { + /// Create a new instruction offset. + pub fn from_insns(insns: i32) -> Self { + InstructionOffset(insns) + } + + /// Create a new instruction offset from a number of bytes. + pub fn from_bytes(bytes: i32) -> Self { + assert_eq!(bytes % 4, 0, "Byte offset must be a multiple of 4"); + InstructionOffset(bytes / 4) + } +} + +impl From for InstructionOffset { + /// Convert an i64 into an instruction offset. + fn from(value: i32) -> Self { + InstructionOffset(value) + } +} + +impl From for i32 { + /// Convert an instruction offset into a number of instructions as an i32. + fn from(offset: InstructionOffset) -> Self { + offset.0 + } +} + +impl From for i64 { + /// Convert an instruction offset into a number of instructions as an i64. + /// This is useful for when we're checking how many bits this offset fits + /// into. + fn from(offset: InstructionOffset) -> Self { + offset.0.into() + } +} diff --git a/yjit/src/asm/arm64/arg/mod.rs b/yjit/src/asm/arm64/arg/mod.rs index 9bf4a8ea13..7eb37834f9 100644 --- a/yjit/src/asm/arm64/arg/mod.rs +++ b/yjit/src/asm/arm64/arg/mod.rs @@ -3,6 +3,7 @@ mod bitmask_imm; mod condition; +mod inst_offset; mod sf; mod shifted_imm; mod sys_reg; @@ -10,6 +11,7 @@ mod truncate; pub use bitmask_imm::BitmaskImmediate; pub use condition::Condition; +pub use inst_offset::InstructionOffset; pub use sf::Sf; pub use shifted_imm::ShiftedImmediate; pub use sys_reg::SystemRegister; diff --git a/yjit/src/asm/arm64/inst/branch_cond.rs b/yjit/src/asm/arm64/inst/branch_cond.rs index c489bacef0..4338cf0f4f 100644 --- a/yjit/src/asm/arm64/inst/branch_cond.rs +++ b/yjit/src/asm/arm64/inst/branch_cond.rs @@ -1,4 +1,4 @@ -use super::super::arg::{Condition, truncate_imm}; +use super::super::arg::{Condition, InstructionOffset, truncate_imm}; /// The struct that represents an A64 conditional branch instruction that can be /// encoded. @@ -14,14 +14,14 @@ pub struct BranchCond { cond: u8, /// The instruction offset from this instruction to branch to. - imm19: i32 + offset: InstructionOffset } impl BranchCond { /// B.cond /// https://developer.arm.com/documentation/ddi0596/2020-12/Base-Instructions/B-cond--Branch-conditionally- - pub fn bcond(cond: u8, imm19: i32) -> Self { - Self { cond, imm19 } + pub fn bcond(cond: u8, offset: InstructionOffset) -> Self { + Self { cond, offset } } } @@ -34,7 +34,7 @@ impl From for u32 { 0 | (1 << 30) | (FAMILY << 26) - | (truncate_imm::<_, 19>(inst.imm19) << 5) + | (truncate_imm::<_, 19>(inst.offset) << 5) | (inst.cond as u32) } } @@ -53,25 +53,25 @@ mod tests { #[test] fn test_b_eq() { - let result: u32 = BranchCond::bcond(Condition::EQ, 32).into(); + let result: u32 = BranchCond::bcond(Condition::EQ, 32.into()).into(); assert_eq!(0x54000400, result); } #[test] fn test_b_vs() { - let result: u32 = BranchCond::bcond(Condition::VS, 32).into(); + let result: u32 = BranchCond::bcond(Condition::VS, 32.into()).into(); assert_eq!(0x54000406, result); } #[test] fn test_b_eq_max() { - let result: u32 = BranchCond::bcond(Condition::EQ, (1 << 18) - 1).into(); + let result: u32 = BranchCond::bcond(Condition::EQ, ((1 << 18) - 1).into()).into(); assert_eq!(0x547fffe0, result); } #[test] fn test_b_eq_min() { - let result: u32 = BranchCond::bcond(Condition::EQ, -(1 << 18)).into(); + let result: u32 = BranchCond::bcond(Condition::EQ, (-(1 << 18)).into()).into(); assert_eq!(0x54800000, result); } } diff --git a/yjit/src/asm/arm64/inst/call.rs b/yjit/src/asm/arm64/inst/call.rs index 32d924f799..74debac7f7 100644 --- a/yjit/src/asm/arm64/inst/call.rs +++ b/yjit/src/asm/arm64/inst/call.rs @@ -1,4 +1,4 @@ -use super::super::arg::truncate_imm; +use super::super::arg::{InstructionOffset, truncate_imm}; /// The operation to perform for this instruction. enum Op { @@ -20,8 +20,8 @@ enum Op { /// +-------------+-------------+-------------+-------------+-------------+-------------+-------------+-------------+ /// pub struct Call { - /// The PC-relative offset to jump to (which will be multiplied by 4). - imm26: i32, + /// The PC-relative offset to jump to in terms of number of instructions. + offset: InstructionOffset, /// The operation to perform for this instruction. op: Op @@ -30,14 +30,14 @@ pub struct Call { impl Call { /// B /// https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/B--Branch- - pub fn b(imm26: i32) -> Self { - Self { imm26, op: Op::Branch } + pub fn b(offset: InstructionOffset) -> Self { + Self { offset, op: Op::Branch } } /// BL /// https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/BL--Branch-with-Link-?lang=en - pub fn bl(imm26: i32) -> Self { - Self { imm26, op: Op::BranchWithLink } + pub fn bl(offset: InstructionOffset) -> Self { + Self { offset, op: Op::BranchWithLink } } } @@ -50,7 +50,7 @@ impl From for u32 { 0 | ((inst.op as u32) << 31) | (FAMILY << 26) - | truncate_imm::<_, 26>(inst.imm26) + | truncate_imm::<_, 26>(inst.offset) } } @@ -68,37 +68,37 @@ mod tests { #[test] fn test_bl() { - let result: u32 = Call::bl(0).into(); + let result: u32 = Call::bl(0.into()).into(); assert_eq!(0x94000000, result); } #[test] fn test_bl_positive() { - let result: u32 = Call::bl(256).into(); + let result: u32 = Call::bl(256.into()).into(); assert_eq!(0x94000100, result); } #[test] fn test_bl_negative() { - let result: u32 = Call::bl(-256).into(); + let result: u32 = Call::bl((-256).into()).into(); assert_eq!(0x97ffff00, result); } #[test] fn test_b() { - let result: u32 = Call::b(0).into(); + let result: u32 = Call::b(0.into()).into(); assert_eq!(0x14000000, result); } #[test] fn test_b_positive() { - let result: u32 = Call::b((1 << 25) - 1).into(); + let result: u32 = Call::b(((1 << 25) - 1).into()).into(); assert_eq!(0x15ffffff, result); } #[test] fn test_b_negative() { - let result: u32 = Call::b(-(1 << 25)).into(); + let result: u32 = Call::b((-(1 << 25)).into()).into(); assert_eq!(0x16000000, result); } } diff --git a/yjit/src/asm/arm64/inst/load_literal.rs b/yjit/src/asm/arm64/inst/load_literal.rs index c5ab09713c..3eade205c8 100644 --- a/yjit/src/asm/arm64/inst/load_literal.rs +++ b/yjit/src/asm/arm64/inst/load_literal.rs @@ -1,4 +1,4 @@ -use super::super::arg::truncate_imm; +use super::super::arg::{InstructionOffset, truncate_imm}; /// The size of the operands being operated on. enum Opc { @@ -32,7 +32,7 @@ pub struct LoadLiteral { rt: u8, /// The PC-relative number of instructions to load the value from. - imm19: i32, + offset: InstructionOffset, /// The size of the operands being operated on. opc: Opc @@ -41,8 +41,8 @@ pub struct LoadLiteral { impl LoadLiteral { /// LDR (load literal) /// https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/LDR--literal---Load-Register--literal--?lang=en - pub fn ldr_literal(rt: u8, imm19: i32, num_bits: u8) -> Self { - Self { rt, imm19, opc: num_bits.into() } + pub fn ldr_literal(rt: u8, offset: InstructionOffset, num_bits: u8) -> Self { + Self { rt, offset, opc: num_bits.into() } } } @@ -56,7 +56,7 @@ impl From for u32 { | ((inst.opc as u32) << 30) | (1 << 28) | (FAMILY << 25) - | (truncate_imm::<_, 19>(inst.imm19) << 5) + | (truncate_imm::<_, 19>(inst.offset) << 5) | (inst.rt as u32) } } @@ -75,14 +75,14 @@ mod tests { #[test] fn test_ldr_positive() { - let inst = LoadLiteral::ldr_literal(0, 5, 64); + let inst = LoadLiteral::ldr_literal(0, 5.into(), 64); let result: u32 = inst.into(); assert_eq!(0x580000a0, result); } #[test] fn test_ldr_negative() { - let inst = LoadLiteral::ldr_literal(0, -5, 64); + let inst = LoadLiteral::ldr_literal(0, (-5).into(), 64); let result: u32 = inst.into(); assert_eq!(0x58ffff60, result); } diff --git a/yjit/src/asm/arm64/mod.rs b/yjit/src/asm/arm64/mod.rs index b73b3125e2..420151c6d1 100644 --- a/yjit/src/asm/arm64/mod.rs +++ b/yjit/src/asm/arm64/mod.rs @@ -190,15 +190,9 @@ pub const fn b_offset_fits_bits(offset: i64) -> bool { } /// B - branch without link (offset is number of instructions to jump) -pub fn b(cb: &mut CodeBlock, imm26: A64Opnd) { - let bytes: [u8; 4] = match imm26 { - A64Opnd::Imm(imm26) => { - assert!(b_offset_fits_bits(imm26), "The immediate operand must be 26 bits or less."); - - Call::b(imm26 as i32).into() - }, - _ => panic!("Invalid operand combination to b instruction.") - }; +pub fn b(cb: &mut CodeBlock, offset: InstructionOffset) { + assert!(b_offset_fits_bits(offset.into()), "The immediate operand must be 26 bits or less."); + let bytes: [u8; 4] = Call::b(offset).into(); cb.write_bytes(&bytes); } @@ -208,33 +202,21 @@ pub fn b(cb: &mut CodeBlock, imm26: A64Opnd) { /// value into a register first, then use the b.cond instruction to skip past a /// direct jump. pub const fn bcond_offset_fits_bits(offset: i64) -> bool { - imm_fits_bits(offset, 21) && (offset & 0b11 == 0) + imm_fits_bits(offset, 19) } /// B.cond - branch to target if condition is true -pub fn bcond(cb: &mut CodeBlock, cond: u8, byte_offset: A64Opnd) { - let bytes: [u8; 4] = match byte_offset { - A64Opnd::Imm(imm) => { - assert!(bcond_offset_fits_bits(imm), "The immediate operand must be 21 bits or less and be aligned to a 2-bit boundary."); - - BranchCond::bcond(cond, (imm / 4) as i32).into() - }, - _ => panic!("Invalid operand combination to bcond instruction."), - }; +pub fn bcond(cb: &mut CodeBlock, cond: u8, offset: InstructionOffset) { + assert!(bcond_offset_fits_bits(offset.into()), "The offset must be 19 bits or less."); + let bytes: [u8; 4] = BranchCond::bcond(cond, offset).into(); cb.write_bytes(&bytes); } /// BL - branch with link (offset is number of instructions to jump) -pub fn bl(cb: &mut CodeBlock, imm26: A64Opnd) { - let bytes: [u8; 4] = match imm26 { - A64Opnd::Imm(imm26) => { - assert!(b_offset_fits_bits(imm26), "The immediate operand must be 26 bits or less."); - - Call::bl(imm26 as i32).into() - }, - _ => panic!("Invalid operand combination to bl instruction.") - }; +pub fn bl(cb: &mut CodeBlock, offset: InstructionOffset) { + assert!(b_offset_fits_bits(offset.into()), "The offset must be 26 bits or less."); + let bytes: [u8; 4] = Call::bl(offset).into(); cb.write_bytes(&bytes); } @@ -413,7 +395,7 @@ pub fn ldr(cb: &mut CodeBlock, rt: A64Opnd, rn: A64Opnd, rm: A64Opnd) { } /// LDR - load a PC-relative memory address into a register -pub fn ldr_literal(cb: &mut CodeBlock, rt: A64Opnd, rn: i32) { +pub fn ldr_literal(cb: &mut CodeBlock, rt: A64Opnd, rn: InstructionOffset) { let bytes: [u8; 4] = match rt { A64Opnd::Reg(rt) => { LoadLiteral::ldr_literal(rt.reg_no, rn, rt.num_bits).into() @@ -1087,45 +1069,52 @@ mod tests { #[test] fn test_bcond() { - check_bytes("01200054", |cb| bcond(cb, Condition::NE, A64Opnd::new_imm(0x400))); + let offset = InstructionOffset::from_insns(0x100); + check_bytes("01200054", |cb| bcond(cb, Condition::NE, offset)); } #[test] fn test_b() { - check_bytes("ffffff15", |cb| b(cb, A64Opnd::new_imm((1 << 25) - 1))); + let offset = InstructionOffset::from_insns((1 << 25) - 1); + check_bytes("ffffff15", |cb| b(cb, offset)); } #[test] #[should_panic] fn test_b_too_big() { // There are 26 bits available - check_bytes("", |cb| b(cb, A64Opnd::new_imm(1 << 25))); + let offset = InstructionOffset::from_insns(1 << 25); + check_bytes("", |cb| b(cb, offset)); } #[test] #[should_panic] fn test_b_too_small() { // There are 26 bits available - check_bytes("", |cb| b(cb, A64Opnd::new_imm(-(1 << 25) - 1))); + let offset = InstructionOffset::from_insns(-(1 << 25) - 1); + check_bytes("", |cb| b(cb, offset)); } #[test] fn test_bl() { - check_bytes("00000096", |cb| bl(cb, A64Opnd::new_imm(-(1 << 25)))); + let offset = InstructionOffset::from_insns(-(1 << 25)); + check_bytes("00000096", |cb| bl(cb, offset)); } #[test] #[should_panic] fn test_bl_too_big() { // There are 26 bits available - check_bytes("", |cb| bl(cb, A64Opnd::new_imm(1 << 25))); + let offset = InstructionOffset::from_insns(1 << 25); + check_bytes("", |cb| bl(cb, offset)); } #[test] #[should_panic] fn test_bl_too_small() { // There are 26 bits available - check_bytes("", |cb| bl(cb, A64Opnd::new_imm(-(1 << 25) - 1))); + let offset = InstructionOffset::from_insns(-(1 << 25) - 1); + check_bytes("", |cb| bl(cb, offset)); } #[test] @@ -1200,7 +1189,7 @@ mod tests { #[test] fn test_ldr_literal() { - check_bytes("40010058", |cb| ldr_literal(cb, X0, 10)); + check_bytes("40010058", |cb| ldr_literal(cb, X0, 10.into())); } #[test] diff --git a/yjit/src/backend/arm64/mod.rs b/yjit/src/backend/arm64/mod.rs index 32db0ab3dc..446332788a 100644 --- a/yjit/src/backend/arm64/mod.rs +++ b/yjit/src/backend/arm64/mod.rs @@ -589,13 +589,15 @@ impl Assembler Target::CodePtr(dst_ptr) => { let dst_addr = dst_ptr.into_i64(); let src_addr = cb.get_write_ptr().into_i64(); - let offset = dst_addr - src_addr; - let num_insns = if bcond_offset_fits_bits(offset) { + let num_insns = if bcond_offset_fits_bits((dst_addr - src_addr) / 4) { // If the jump offset fits into the conditional jump as // an immediate value and it's properly aligned, then we - // can use the b.cond instruction directly. - bcond(cb, CONDITION, A64Opnd::new_imm(offset)); + // can use the b.cond instruction directly. We're safe + // to use as i32 here since we already checked that it + // fits. + let bytes = (dst_addr - src_addr) as i32; + bcond(cb, CONDITION, InstructionOffset::from_bytes(bytes)); // Here we're going to return 1 because we've only // written out 1 instruction. @@ -604,12 +606,12 @@ impl Assembler // Otherwise, we need to load the address into a // register and use the branch register instruction. let dst_addr = dst_ptr.into_u64(); - let load_insns: i64 = emit_load_size(dst_addr).into(); + let load_insns: i32 = emit_load_size(dst_addr).into(); // We're going to write out the inverse condition so // that if it doesn't match it will skip over the // instructions used for branching. - bcond(cb, Condition::inverse(CONDITION), A64Opnd::new_imm((load_insns + 2) * 4)); + bcond(cb, Condition::inverse(CONDITION), (load_insns + 2).into()); emit_load_value(cb, Assembler::SCRATCH0, dst_addr); br(cb, Assembler::SCRATCH0); @@ -630,7 +632,8 @@ impl Assembler // offset. We're going to assume we can fit into a single // b.cond instruction. It will panic otherwise. cb.label_ref(label_idx, 4, |cb, src_addr, dst_addr| { - bcond(cb, CONDITION, A64Opnd::new_imm(dst_addr - (src_addr - 4))); + let bytes: i32 = (dst_addr - (src_addr - 4)).try_into().unwrap(); + bcond(cb, CONDITION, InstructionOffset::from_bytes(bytes)); }); }, Target::FunPtr(_) => unreachable!() @@ -756,8 +759,8 @@ impl Assembler // references to GC'd Value operands. If the value // being loaded is a heap object, we'll report that // back out to the gc_offsets list. - ldr_literal(cb, out.into(), 2); - b(cb, A64Opnd::new_imm(1 + (SIZEOF_VALUE as i64) / 4)); + ldr_literal(cb, out.into(), 2.into()); + b(cb, InstructionOffset::from_bytes(4 + (SIZEOF_VALUE as i32))); cb.write_bytes(&value.as_u64().to_le_bytes()); let ptr_offset: u32 = (cb.get_write_pos() as u32) - (SIZEOF_VALUE as u32); @@ -844,14 +847,11 @@ impl Assembler // The offset to the call target in bytes let src_addr = cb.get_write_ptr().into_i64(); let dst_addr = target.unwrap_fun_ptr() as i64; - let offset = dst_addr - src_addr; - // The offset in instruction count for BL's immediate - let offset = offset / 4; // Use BL if the offset is short enough to encode as an immediate. // Otherwise, use BLR with a register. - if b_offset_fits_bits(offset) { - bl(cb, A64Opnd::new_imm(offset)); + if b_offset_fits_bits((dst_addr - src_addr) / 4) { + bl(cb, InstructionOffset::from_bytes((dst_addr - src_addr) as i32)); } else { emit_load_value(cb, Self::SCRATCH0, dst_addr as u64); blr(cb, Self::SCRATCH0); @@ -875,19 +875,22 @@ impl Assembler let src_addr = cb.get_write_ptr().into_i64(); let dst_addr = dst_ptr.into_i64(); - // The offset between the two instructions in bytes. - // Note that when we encode this into a b - // instruction, we'll divide by 4 because it accepts - // the number of instructions to jump over. - let offset = dst_addr - src_addr; - let offset = offset / 4; - // If the offset is short enough, then we'll use the // branch instruction. Otherwise, we'll move the // destination into a register and use the branch // register instruction. - let num_insns = emit_load_value(cb, Self::SCRATCH0, dst_addr as u64); - br(cb, Self::SCRATCH0); + let num_insns = if b_offset_fits_bits((dst_addr - src_addr) / 4) { + b(cb, InstructionOffset::from_bytes((dst_addr - src_addr) as i32)); + 0 + } else { + let num_insns = emit_load_value(cb, Self::SCRATCH0, dst_addr as u64); + br(cb, Self::SCRATCH0); + num_insns + }; + + // Make sure it's always a consistent number of + // instructions in case it gets patched and has to + // use the other branch. for _ in num_insns..4 { nop(cb); } @@ -899,7 +902,8 @@ impl Assembler // to assume we can fit into a single b instruction. // It will panic otherwise. cb.label_ref(*label_idx, 4, |cb, src_addr, dst_addr| { - b(cb, A64Opnd::new_imm((dst_addr - (src_addr - 4)) / 4)); + let bytes: i32 = (dst_addr - (src_addr - 4)).try_into().unwrap(); + b(cb, InstructionOffset::from_bytes(bytes)); }); }, _ => unreachable!()