/* * Copyright (C) 2016 The Android Open Source Project * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #include "jni_macro_assembler_arm_vixl.h" #include #include #include "entrypoints/quick/quick_entrypoints.h" #include "lock_word.h" #include "thread.h" using namespace vixl::aarch32; // NOLINT(build/namespaces) namespace vixl32 = vixl::aarch32; using vixl::ExactAssemblyScope; using vixl::CodeBufferCheckScope; namespace art { namespace arm { #ifdef ___ #error "ARM Assembler macro already defined." #else #define ___ asm_.GetVIXLAssembler()-> #endif // The AAPCS requires 8-byte alignment. This is not as strict as the Managed ABI stack alignment. static constexpr size_t kAapcsStackAlignment = 8u; static_assert(kAapcsStackAlignment < kStackAlignment); // STRD immediate can encode any 4-byte aligned offset smaller than this cutoff. static constexpr size_t kStrdOffsetCutoff = 1024u; // ADD sp, imm can encode 4-byte aligned immediate smaller than this cutoff. static constexpr size_t kAddSpImmCutoff = 1024u; vixl::aarch32::Register AsVIXLRegister(ArmManagedRegister reg) { CHECK(reg.IsCoreRegister()); return vixl::aarch32::Register(reg.RegId()); } static inline vixl::aarch32::SRegister AsVIXLSRegister(ArmManagedRegister reg) { CHECK(reg.IsSRegister()); return vixl::aarch32::SRegister(reg.RegId() - kNumberOfCoreRegIds); } static inline vixl::aarch32::DRegister AsVIXLDRegister(ArmManagedRegister reg) { CHECK(reg.IsDRegister()); return vixl::aarch32::DRegister(reg.RegId() - kNumberOfCoreRegIds - kNumberOfSRegIds); } static inline vixl::aarch32::Register AsVIXLRegisterPairLow(ArmManagedRegister reg) { return vixl::aarch32::Register(reg.AsRegisterPairLow()); } static inline vixl::aarch32::Register AsVIXLRegisterPairHigh(ArmManagedRegister reg) { return vixl::aarch32::Register(reg.AsRegisterPairHigh()); } void ArmVIXLJNIMacroAssembler::FinalizeCode() { asm_.FinalizeCode(); } static constexpr size_t kFramePointerSize = static_cast(kArmPointerSize); void ArmVIXLJNIMacroAssembler::BuildFrame(size_t frame_size, ManagedRegister method_reg, ArrayRef callee_save_regs) { // If we're creating an actual frame with the method, enforce managed stack alignment, // otherwise only the native stack alignment. if (method_reg.IsNoRegister()) { CHECK_ALIGNED_PARAM(frame_size, kAapcsStackAlignment); } else { CHECK_ALIGNED_PARAM(frame_size, kStackAlignment); } // Push callee saves and link register. RegList core_spill_mask = 0; uint32_t fp_spill_mask = 0; for (const ManagedRegister& reg : callee_save_regs) { if (reg.AsArm().IsCoreRegister()) { core_spill_mask |= 1 << reg.AsArm().AsCoreRegister(); } else { fp_spill_mask |= 1 << reg.AsArm().AsSRegister(); } } if (core_spill_mask == (1u << lr.GetCode()) && fp_spill_mask == 0u && frame_size == 2 * kFramePointerSize && !method_reg.IsRegister()) { // Special case: Only LR to push and one word to skip. Do this with a single // 16-bit PUSH instruction by arbitrarily pushing r3 (without CFI for r3). core_spill_mask |= 1u << r3.GetCode(); ___ Push(RegisterList(core_spill_mask)); cfi().AdjustCFAOffset(2 * kFramePointerSize); cfi().RelOffset(DWARFReg(lr), kFramePointerSize); } else if (core_spill_mask != 0u) { ___ Push(RegisterList(core_spill_mask)); cfi().AdjustCFAOffset(POPCOUNT(core_spill_mask) * kFramePointerSize); cfi().RelOffsetForMany(DWARFReg(r0), 0, core_spill_mask, kFramePointerSize); } if (fp_spill_mask != 0) { uint32_t first = CTZ(fp_spill_mask); // Check that list is contiguous. DCHECK_EQ(fp_spill_mask >> CTZ(fp_spill_mask), ~0u >> (32 - POPCOUNT(fp_spill_mask))); ___ Vpush(SRegisterList(vixl32::SRegister(first), POPCOUNT(fp_spill_mask))); cfi().AdjustCFAOffset(POPCOUNT(fp_spill_mask) * kFramePointerSize); cfi().RelOffsetForMany(DWARFReg(s0), 0, fp_spill_mask, kFramePointerSize); } // Increase frame to required size. int pushed_values = POPCOUNT(core_spill_mask) + POPCOUNT(fp_spill_mask); // Must at least have space for Method* if we're going to spill it. CHECK_GE(frame_size, (pushed_values + (method_reg.IsRegister() ? 1u : 0u)) * kFramePointerSize); IncreaseFrameSize(frame_size - pushed_values * kFramePointerSize); // handles CFI as well. if (method_reg.IsRegister()) { // Write out Method*. CHECK(r0.Is(AsVIXLRegister(method_reg.AsArm()))); asm_.StoreToOffset(kStoreWord, r0, sp, 0); } } void ArmVIXLJNIMacroAssembler::RemoveFrame(size_t frame_size, ArrayRef callee_save_regs, bool may_suspend) { CHECK_ALIGNED(frame_size, kAapcsStackAlignment); // Compute callee saves to pop. RegList core_spill_mask = 0u; uint32_t fp_spill_mask = 0u; for (const ManagedRegister& reg : callee_save_regs) { if (reg.AsArm().IsCoreRegister()) { core_spill_mask |= 1u << reg.AsArm().AsCoreRegister(); } else { fp_spill_mask |= 1u << reg.AsArm().AsSRegister(); } } // Pop LR to PC unless we need to emit some read barrier code just before returning. bool emit_code_before_return = (kEmitCompilerReadBarrier && kUseBakerReadBarrier) && (may_suspend || (kIsDebugBuild && emit_run_time_checks_in_debug_mode_)); if ((core_spill_mask & (1u << lr.GetCode())) != 0u && !emit_code_before_return) { DCHECK_EQ(core_spill_mask & (1u << pc.GetCode()), 0u); core_spill_mask ^= (1u << lr.GetCode()) | (1u << pc.GetCode()); } // If there are no FP registers to pop and we pop PC, we can avoid emitting any CFI. if (fp_spill_mask == 0u && (core_spill_mask & (1u << pc.GetCode())) != 0u) { if (frame_size == POPCOUNT(core_spill_mask) * kFramePointerSize) { // Just pop all registers and avoid CFI. ___ Pop(RegisterList(core_spill_mask)); return; } else if (frame_size == 8u && core_spill_mask == (1u << pc.GetCode())) { // Special case: One word to ignore and one to pop to PC. We are free to clobber the // caller-save register r3 on return, so use a 16-bit POP instruction and avoid CFI. ___ Pop(RegisterList((1u << r3.GetCode()) | (1u << pc.GetCode()))); return; } } // We shall need to adjust CFI and restore it after the frame exit sequence. cfi().RememberState(); // Decrease frame to start of callee saves. size_t pop_values = POPCOUNT(core_spill_mask) + POPCOUNT(fp_spill_mask); CHECK_GE(frame_size, pop_values * kFramePointerSize); DecreaseFrameSize(frame_size - (pop_values * kFramePointerSize)); // handles CFI as well. // Pop FP callee saves. if (fp_spill_mask != 0u) { uint32_t first = CTZ(fp_spill_mask); // Check that list is contiguous. DCHECK_EQ(fp_spill_mask >> CTZ(fp_spill_mask), ~0u >> (32 - POPCOUNT(fp_spill_mask))); ___ Vpop(SRegisterList(vixl32::SRegister(first), POPCOUNT(fp_spill_mask))); cfi().AdjustCFAOffset(-kFramePointerSize * POPCOUNT(fp_spill_mask)); cfi().RestoreMany(DWARFReg(s0), fp_spill_mask); } // Pop core callee saves. if (core_spill_mask != 0u) { if (IsPowerOfTwo(core_spill_mask) && core_spill_mask != (1u << pc.GetCode()) && WhichPowerOf2(core_spill_mask) >= 8) { // FIXME(vixl): vixl fails to transform a pop with single high register // to a post-index STR (also known as POP encoding T3) and emits the LDMIA // (also known as POP encoding T2) which is UNPREDICTABLE for 1 register. // So we have to explicitly do the transformation here. Bug: 178048807 vixl32::Register reg(WhichPowerOf2(core_spill_mask)); ___ Ldr(reg, MemOperand(sp, kFramePointerSize, PostIndex)); } else { ___ Pop(RegisterList(core_spill_mask)); } if ((core_spill_mask & (1u << pc.GetCode())) == 0u) { cfi().AdjustCFAOffset(-kFramePointerSize * POPCOUNT(core_spill_mask)); cfi().RestoreMany(DWARFReg(r0), core_spill_mask); } } if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) { if (may_suspend) { // The method may be suspended; refresh the Marking Register. ___ Ldr(mr, MemOperand(tr, Thread::IsGcMarkingOffset().Int32Value())); } else { // The method shall not be suspended; no need to refresh the Marking Register. // The Marking Register is a callee-save register, and thus has been // preserved by native code following the AAPCS calling convention. // The following condition is a compile-time one, so it does not have a run-time cost. if (kIsDebugBuild) { // The following condition is a run-time one; it is executed after the // previous compile-time test, to avoid penalizing non-debug builds. if (emit_run_time_checks_in_debug_mode_) { // Emit a run-time check verifying that the Marking Register is up-to-date. UseScratchRegisterScope temps(asm_.GetVIXLAssembler()); vixl32::Register temp = temps.Acquire(); // Ensure we are not clobbering a callee-save register that was restored before. DCHECK_EQ(core_spill_mask & (1 << temp.GetCode()), 0) << "core_spill_mask hould not contain scratch register R" << temp.GetCode(); asm_.GenerateMarkingRegisterCheck(temp); } } } } // Return to LR. if ((core_spill_mask & (1u << pc.GetCode())) == 0u) { ___ Bx(vixl32::lr); } // The CFI should be restored for any code that follows the exit block. cfi().RestoreState(); cfi().DefCFAOffset(frame_size); } void ArmVIXLJNIMacroAssembler::IncreaseFrameSize(size_t adjust) { if (adjust != 0u) { asm_.AddConstant(sp, -adjust); cfi().AdjustCFAOffset(adjust); } } void ArmVIXLJNIMacroAssembler::DecreaseFrameSize(size_t adjust) { if (adjust != 0u) { asm_.AddConstant(sp, adjust); cfi().AdjustCFAOffset(-adjust); } } ManagedRegister ArmVIXLJNIMacroAssembler::CoreRegisterWithSize(ManagedRegister src, size_t size) { DCHECK(src.AsArm().IsCoreRegister()); DCHECK_EQ(size, 4u); return src; } void ArmVIXLJNIMacroAssembler::Store(FrameOffset dest, ManagedRegister m_src, size_t size) { Store(ArmManagedRegister::FromCoreRegister(SP), MemberOffset(dest.Int32Value()), m_src, size); } void ArmVIXLJNIMacroAssembler::Store(ManagedRegister m_base, MemberOffset offs, ManagedRegister m_src, size_t size) { ArmManagedRegister base = m_base.AsArm(); ArmManagedRegister src = m_src.AsArm(); if (src.IsNoRegister()) { CHECK_EQ(0u, size); } else if (src.IsCoreRegister()) { CHECK_EQ(4u, size); UseScratchRegisterScope temps(asm_.GetVIXLAssembler()); temps.Exclude(AsVIXLRegister(src)); asm_.StoreToOffset(kStoreWord, AsVIXLRegister(src), AsVIXLRegister(base), offs.Int32Value()); } else if (src.IsRegisterPair()) { CHECK_EQ(8u, size); ___ Strd(AsVIXLRegisterPairLow(src), AsVIXLRegisterPairHigh(src), MemOperand(AsVIXLRegister(base), offs.Int32Value())); } else if (src.IsSRegister()) { CHECK_EQ(4u, size); asm_.StoreSToOffset(AsVIXLSRegister(src), AsVIXLRegister(base), offs.Int32Value()); } else { CHECK_EQ(8u, size); CHECK(src.IsDRegister()) << src; asm_.StoreDToOffset(AsVIXLDRegister(src), AsVIXLRegister(base), offs.Int32Value()); } } void ArmVIXLJNIMacroAssembler::StoreRef(FrameOffset dest, ManagedRegister msrc) { vixl::aarch32::Register src = AsVIXLRegister(msrc.AsArm()); UseScratchRegisterScope temps(asm_.GetVIXLAssembler()); temps.Exclude(src); asm_.StoreToOffset(kStoreWord, src, sp, dest.Int32Value()); } void ArmVIXLJNIMacroAssembler::StoreRawPtr(FrameOffset dest, ManagedRegister msrc) { vixl::aarch32::Register src = AsVIXLRegister(msrc.AsArm()); UseScratchRegisterScope temps(asm_.GetVIXLAssembler()); temps.Exclude(src); asm_.StoreToOffset(kStoreWord, src, sp, dest.Int32Value()); } void ArmVIXLJNIMacroAssembler::StoreSpanning(FrameOffset dest, ManagedRegister msrc, FrameOffset in_off) { vixl::aarch32::Register src = AsVIXLRegister(msrc.AsArm()); asm_.StoreToOffset(kStoreWord, src, sp, dest.Int32Value()); UseScratchRegisterScope temps(asm_.GetVIXLAssembler()); vixl32::Register scratch = temps.Acquire(); asm_.LoadFromOffset(kLoadWord, scratch, sp, in_off.Int32Value()); asm_.StoreToOffset(kStoreWord, scratch, sp, dest.Int32Value() + 4); } void ArmVIXLJNIMacroAssembler::CopyRef(FrameOffset dest, FrameOffset src) { UseScratchRegisterScope temps(asm_.GetVIXLAssembler()); vixl32::Register scratch = temps.Acquire(); asm_.LoadFromOffset(kLoadWord, scratch, sp, src.Int32Value()); asm_.StoreToOffset(kStoreWord, scratch, sp, dest.Int32Value()); } void ArmVIXLJNIMacroAssembler::CopyRef(FrameOffset dest, ManagedRegister base, MemberOffset offs, bool unpoison_reference) { UseScratchRegisterScope temps(asm_.GetVIXLAssembler()); vixl32::Register scratch = temps.Acquire(); asm_.LoadFromOffset(kLoadWord, scratch, AsVIXLRegister(base.AsArm()), offs.Int32Value()); if (unpoison_reference) { asm_.MaybeUnpoisonHeapReference(scratch); } asm_.StoreToOffset(kStoreWord, scratch, sp, dest.Int32Value()); } void ArmVIXLJNIMacroAssembler::LoadRef(ManagedRegister mdest, ManagedRegister mbase, MemberOffset offs, bool unpoison_reference) { vixl::aarch32::Register dest = AsVIXLRegister(mdest.AsArm()); vixl::aarch32::Register base = AsVIXLRegister(mbase.AsArm()); UseScratchRegisterScope temps(asm_.GetVIXLAssembler()); temps.Exclude(dest, base); asm_.LoadFromOffset(kLoadWord, dest, base, offs.Int32Value()); if (unpoison_reference) { asm_.MaybeUnpoisonHeapReference(dest); } } void ArmVIXLJNIMacroAssembler::LoadRef(ManagedRegister dest ATTRIBUTE_UNUSED, FrameOffset src ATTRIBUTE_UNUSED) { UNIMPLEMENTED(FATAL); } void ArmVIXLJNIMacroAssembler::LoadRawPtr(ManagedRegister dest ATTRIBUTE_UNUSED, ManagedRegister base ATTRIBUTE_UNUSED, Offset offs ATTRIBUTE_UNUSED) { UNIMPLEMENTED(FATAL); } void ArmVIXLJNIMacroAssembler::StoreImmediateToFrame(FrameOffset dest, uint32_t imm) { UseScratchRegisterScope temps(asm_.GetVIXLAssembler()); vixl32::Register scratch = temps.Acquire(); asm_.LoadImmediate(scratch, imm); asm_.StoreToOffset(kStoreWord, scratch, sp, dest.Int32Value()); } void ArmVIXLJNIMacroAssembler::Load(ManagedRegister m_dst, FrameOffset src, size_t size) { return Load(m_dst.AsArm(), sp, src.Int32Value(), size); } void ArmVIXLJNIMacroAssembler::Load(ManagedRegister m_dst, ManagedRegister m_base, MemberOffset offs, size_t size) { return Load(m_dst.AsArm(), AsVIXLRegister(m_base.AsArm()), offs.Int32Value(), size); } void ArmVIXLJNIMacroAssembler::LoadFromThread(ManagedRegister m_dst, ThreadOffset32 src, size_t size) { return Load(m_dst.AsArm(), tr, src.Int32Value(), size); } void ArmVIXLJNIMacroAssembler::LoadRawPtrFromThread(ManagedRegister mdest, ThreadOffset32 offs) { vixl::aarch32::Register dest = AsVIXLRegister(mdest.AsArm()); UseScratchRegisterScope temps(asm_.GetVIXLAssembler()); temps.Exclude(dest); asm_.LoadFromOffset(kLoadWord, dest, tr, offs.Int32Value()); } void ArmVIXLJNIMacroAssembler::CopyRawPtrFromThread(FrameOffset fr_offs, ThreadOffset32 thr_offs) { UseScratchRegisterScope temps(asm_.GetVIXLAssembler()); vixl32::Register scratch = temps.Acquire(); asm_.LoadFromOffset(kLoadWord, scratch, tr, thr_offs.Int32Value()); asm_.StoreToOffset(kStoreWord, scratch, sp, fr_offs.Int32Value()); } void ArmVIXLJNIMacroAssembler::CopyRawPtrToThread(ThreadOffset32 thr_offs ATTRIBUTE_UNUSED, FrameOffset fr_offs ATTRIBUTE_UNUSED, ManagedRegister mscratch ATTRIBUTE_UNUSED) { UNIMPLEMENTED(FATAL); } void ArmVIXLJNIMacroAssembler::StoreStackOffsetToThread(ThreadOffset32 thr_offs, FrameOffset fr_offs) { UseScratchRegisterScope temps(asm_.GetVIXLAssembler()); vixl32::Register scratch = temps.Acquire(); asm_.AddConstant(scratch, sp, fr_offs.Int32Value()); asm_.StoreToOffset(kStoreWord, scratch, tr, thr_offs.Int32Value()); } void ArmVIXLJNIMacroAssembler::StoreStackPointerToThread(ThreadOffset32 thr_offs) { asm_.StoreToOffset(kStoreWord, sp, tr, thr_offs.Int32Value()); } void ArmVIXLJNIMacroAssembler::SignExtend(ManagedRegister mreg ATTRIBUTE_UNUSED, size_t size ATTRIBUTE_UNUSED) { UNIMPLEMENTED(FATAL) << "no sign extension necessary for arm"; } void ArmVIXLJNIMacroAssembler::ZeroExtend(ManagedRegister mreg ATTRIBUTE_UNUSED, size_t size ATTRIBUTE_UNUSED) { UNIMPLEMENTED(FATAL) << "no zero extension necessary for arm"; } static inline bool IsCoreRegisterOrPair(ArmManagedRegister reg) { return reg.IsCoreRegister() || reg.IsRegisterPair(); } static inline bool NoSpillGap(const ArgumentLocation& loc1, const ArgumentLocation& loc2) { DCHECK(!loc1.IsRegister()); DCHECK(!loc2.IsRegister()); uint32_t loc1_offset = loc1.GetFrameOffset().Uint32Value(); uint32_t loc2_offset = loc2.GetFrameOffset().Uint32Value(); return loc1_offset + loc1.GetSize() == loc2_offset; } static inline uint32_t GetSRegisterNumber(ArmManagedRegister reg) { if (reg.IsSRegister()) { return static_cast(reg.AsSRegister()); } else { DCHECK(reg.IsDRegister()); return 2u * static_cast(reg.AsDRegister()); } } // Get the number of locations to spill together. static inline size_t GetSpillChunkSize(ArrayRef dests, ArrayRef srcs, size_t start) { DCHECK_LT(start, dests.size()); DCHECK_ALIGNED(dests[start].GetFrameOffset().Uint32Value(), 4u); const ArgumentLocation& first_src = srcs[start]; DCHECK(first_src.IsRegister()); ArmManagedRegister first_src_reg = first_src.GetRegister().AsArm(); size_t end = start + 1u; if (IsCoreRegisterOrPair(first_src_reg)) { while (end != dests.size() && NoSpillGap(dests[end - 1u], dests[end]) && srcs[end].IsRegister() && IsCoreRegisterOrPair(srcs[end].GetRegister().AsArm())) { ++end; } } else { DCHECK(first_src_reg.IsSRegister() || first_src_reg.IsDRegister()); uint32_t next_sreg = GetSRegisterNumber(first_src_reg) + first_src.GetSize() / kSRegSizeInBytes; while (end != dests.size() && NoSpillGap(dests[end - 1u], dests[end]) && srcs[end].IsRegister() && !IsCoreRegisterOrPair(srcs[end].GetRegister().AsArm()) && GetSRegisterNumber(srcs[end].GetRegister().AsArm()) == next_sreg) { next_sreg += srcs[end].GetSize() / kSRegSizeInBytes; ++end; } } return end - start; } static inline uint32_t GetCoreRegisterMask(ArmManagedRegister reg) { if (reg.IsCoreRegister()) { return 1u << static_cast(reg.AsCoreRegister()); } else { DCHECK(reg.IsRegisterPair()); DCHECK_LT(reg.AsRegisterPairLow(), reg.AsRegisterPairHigh()); return (1u << static_cast(reg.AsRegisterPairLow())) | (1u << static_cast(reg.AsRegisterPairHigh())); } } static inline uint32_t GetCoreRegisterMask(ArrayRef srcs) { uint32_t mask = 0u; for (const ArgumentLocation& loc : srcs) { DCHECK(loc.IsRegister()); mask |= GetCoreRegisterMask(loc.GetRegister().AsArm()); } return mask; } static inline bool UseStrdForChunk(ArrayRef srcs, size_t start, size_t length) { DCHECK_GE(length, 2u); DCHECK(srcs[start].IsRegister()); DCHECK(srcs[start + 1u].IsRegister()); // The destination may not be 8B aligned (but it is 4B aligned). // Allow arbitrary destination offset, macro assembler will use a temp if needed. // Note: T32 allows unrelated registers in STRD. (A32 does not.) return length == 2u && srcs[start].GetRegister().AsArm().IsCoreRegister() && srcs[start + 1u].GetRegister().AsArm().IsCoreRegister(); } static inline bool UseVstrForChunk(ArrayRef srcs, size_t start, size_t length) { DCHECK_GE(length, 2u); DCHECK(srcs[start].IsRegister()); DCHECK(srcs[start + 1u].IsRegister()); // The destination may not be 8B aligned (but it is 4B aligned). // Allow arbitrary destination offset, macro assembler will use a temp if needed. return length == 2u && srcs[start].GetRegister().AsArm().IsSRegister() && srcs[start + 1u].GetRegister().AsArm().IsSRegister() && IsAligned<2u>(static_cast(srcs[start].GetRegister().AsArm().AsSRegister())); } void ArmVIXLJNIMacroAssembler::MoveArguments(ArrayRef dests, ArrayRef srcs, ArrayRef refs) { size_t arg_count = dests.size(); DCHECK_EQ(arg_count, srcs.size()); DCHECK_EQ(arg_count, refs.size()); // Convert reference registers to `jobject` values. // TODO: Delay this for references that are copied to another register. for (size_t i = 0; i != arg_count; ++i) { if (refs[i] != kInvalidReferenceOffset && srcs[i].IsRegister()) { // Note: We can clobber `srcs[i]` here as the register cannot hold more than one argument. ManagedRegister src_i_reg = srcs[i].GetRegister(); CreateJObject(src_i_reg, refs[i], src_i_reg, /*null_allowed=*/ i != 0u); } } // Native ABI is soft-float, so all destinations should be core registers or stack offsets. // And register locations should be first, followed by stack locations. auto is_register = [](const ArgumentLocation& loc) { return loc.IsRegister(); }; DCHECK(std::is_partitioned(dests.begin(), dests.end(), is_register)); size_t num_reg_dests = std::distance(dests.begin(), std::partition_point(dests.begin(), dests.end(), is_register)); // Collect registers to move. No need to record FP regs as destinations are only core regs. uint32_t src_regs = 0u; uint32_t dest_regs = 0u; uint32_t same_regs = 0u; for (size_t i = 0; i != num_reg_dests; ++i) { const ArgumentLocation& src = srcs[i]; const ArgumentLocation& dest = dests[i]; DCHECK(dest.IsRegister() && IsCoreRegisterOrPair(dest.GetRegister().AsArm())); if (src.IsRegister() && IsCoreRegisterOrPair(src.GetRegister().AsArm())) { if (src.GetRegister().Equals(dest.GetRegister())) { same_regs |= GetCoreRegisterMask(src.GetRegister().AsArm()); continue; } src_regs |= GetCoreRegisterMask(src.GetRegister().AsArm()); } dest_regs |= GetCoreRegisterMask(dest.GetRegister().AsArm()); } // Spill register arguments to stack slots. for (size_t i = num_reg_dests; i != arg_count; ) { const ArgumentLocation& src = srcs[i]; if (!src.IsRegister()) { ++i; continue; } const ArgumentLocation& dest = dests[i]; DCHECK_EQ(src.GetSize(), dest.GetSize()); // Even for references. DCHECK(!dest.IsRegister()); uint32_t frame_offset = dest.GetFrameOffset().Uint32Value(); size_t chunk_size = GetSpillChunkSize(dests, srcs, i); DCHECK_NE(chunk_size, 0u); if (chunk_size == 1u) { Store(dest.GetFrameOffset(), src.GetRegister(), dest.GetSize()); } else if (UseStrdForChunk(srcs, i, chunk_size)) { ___ Strd(AsVIXLRegister(srcs[i].GetRegister().AsArm()), AsVIXLRegister(srcs[i + 1u].GetRegister().AsArm()), MemOperand(sp, frame_offset)); } else if (UseVstrForChunk(srcs, i, chunk_size)) { size_t sreg = GetSRegisterNumber(src.GetRegister().AsArm()); DCHECK_ALIGNED(sreg, 2u); ___ Vstr(vixl32::DRegister(sreg / 2u), MemOperand(sp, frame_offset)); } else { UseScratchRegisterScope temps2(asm_.GetVIXLAssembler()); vixl32::Register base_reg; if (frame_offset == 0u) { base_reg = sp; } else { base_reg = temps2.Acquire(); ___ Add(base_reg, sp, frame_offset); } ArmManagedRegister src_reg = src.GetRegister().AsArm(); if (IsCoreRegisterOrPair(src_reg)) { uint32_t core_reg_mask = GetCoreRegisterMask(srcs.SubArray(i, chunk_size)); ___ Stm(base_reg, NO_WRITE_BACK, RegisterList(core_reg_mask)); } else { uint32_t start_sreg = GetSRegisterNumber(src_reg); const ArgumentLocation& last_dest = dests[i + chunk_size - 1u]; uint32_t total_size = last_dest.GetFrameOffset().Uint32Value() + last_dest.GetSize() - frame_offset; if (IsAligned<2u>(start_sreg) && IsAligned(frame_offset) && IsAligned(total_size)) { uint32_t dreg_count = total_size / kDRegSizeInBytes; DRegisterList dreg_list(vixl32::DRegister(start_sreg / 2u), dreg_count); ___ Vstm(F64, base_reg, NO_WRITE_BACK, dreg_list); } else { uint32_t sreg_count = total_size / kSRegSizeInBytes; SRegisterList sreg_list(vixl32::SRegister(start_sreg), sreg_count); ___ Vstm(F32, base_reg, NO_WRITE_BACK, sreg_list); } } } i += chunk_size; } // Copy incoming stack arguments to outgoing stack arguments. // Registers r0-r3 are argument registers for both managed and native ABI and r4 // is a scratch register in managed ABI but also a hidden argument register for // @CriticalNative call. We can use these registers as temporaries for copying // stack arguments as long as they do not currently hold live values. // TODO: Use the callee-save scratch registers instead to avoid using calling // convention knowledge in the assembler. This would require reordering the // argument move with pushing the IRT frame where those registers are used. uint32_t copy_temp_regs = ((1u << 5) - 1u) & ~(same_regs | src_regs); if ((dest_regs & (1u << R4)) != 0) { // For @CriticalNative, R4 shall hold the hidden argument but it is available // for use as a temporary at this point. However, it may be the only available // register, so we shall use IP as the second temporary if needed. // We do not need to worry about `CreateJObject` for @CriticalNative. DCHECK_NE(copy_temp_regs, 0u); DCHECK(std::all_of(refs.begin(), refs.end(), [](FrameOffset r) { return r == kInvalidReferenceOffset; })); } else { // For normal native and @FastNative, R4 and at least one of R0-R3 should be // available because there are only 3 destination registers R1-R3 where the // source registers can be moved. The R0 shall be filled by the `JNIEnv*` // argument later. We need to keep IP available for `CreateJObject()`. DCHECK_GE(POPCOUNT(copy_temp_regs), 2); } vixl32::Register copy_temp1 = vixl32::Register(LeastSignificantBit(copy_temp_regs)); copy_temp_regs ^= 1u << copy_temp1.GetCode(); vixl32::Register copy_xtemp = (copy_temp_regs != 0u) ? vixl32::Register(LeastSignificantBit(copy_temp_regs)) : vixl32::Register(); for (size_t i = num_reg_dests; i != arg_count; ++i) { if (srcs[i].IsRegister()) { continue; } FrameOffset src_offset = srcs[i].GetFrameOffset(); DCHECK_ALIGNED(src_offset.Uint32Value(), 4u); FrameOffset dest_offset = dests[i].GetFrameOffset(); DCHECK_ALIGNED(dest_offset.Uint32Value(), 4u); // Look for opportunities to move 2 words at a time with LDRD/STRD // when the source types are word-sized. if (srcs[i].GetSize() == 4u && i + 1u != arg_count && !srcs[i + 1u].IsRegister() && srcs[i + 1u].GetSize() == 4u && NoSpillGap(srcs[i], srcs[i + 1u]) && NoSpillGap(dests[i], dests[i + 1u]) && dest_offset.Uint32Value() < kStrdOffsetCutoff) { UseScratchRegisterScope temps(asm_.GetVIXLAssembler()); vixl32::Register copy_temp2 = copy_xtemp.IsValid() ? copy_xtemp : temps.Acquire(); ___ Ldrd(copy_temp1, copy_temp2, MemOperand(sp, src_offset.Uint32Value())); if (refs[i] != kInvalidReferenceOffset) { ArmManagedRegister m_copy_temp1 = ArmManagedRegister::FromCoreRegister( enum_cast(copy_temp1.GetCode())); CreateJObject(m_copy_temp1, refs[i], m_copy_temp1, /*null_allowed=*/ i != 0u); } if (refs[i + 1u] != kInvalidReferenceOffset) { ArmManagedRegister m_copy_temp2 = ArmManagedRegister::FromCoreRegister( enum_cast(copy_temp2.GetCode())); CreateJObject(m_copy_temp2, refs[i + 1u], m_copy_temp2, /*null_allowed=*/ true); } ___ Strd(copy_temp1, copy_temp2, MemOperand(sp, dest_offset.Uint32Value())); ++i; } else if (dests[i].GetSize() == 8u && dest_offset.Uint32Value() < kStrdOffsetCutoff) { UseScratchRegisterScope temps(asm_.GetVIXLAssembler()); vixl32::Register copy_temp2 = copy_xtemp.IsValid() ? copy_xtemp : temps.Acquire(); ___ Ldrd(copy_temp1, copy_temp2, MemOperand(sp, src_offset.Uint32Value())); ___ Strd(copy_temp1, copy_temp2, MemOperand(sp, dest_offset.Uint32Value())); } else if (refs[i] != kInvalidReferenceOffset) { // Do not use the `CreateJObject()` overload for stack target as it generates // worse code than explicitly using a low register temporary. ___ Ldr(copy_temp1, MemOperand(sp, src_offset.Uint32Value())); ArmManagedRegister m_copy_temp1 = ArmManagedRegister::FromCoreRegister( enum_cast(copy_temp1.GetCode())); CreateJObject(m_copy_temp1, refs[i], m_copy_temp1, /*null_allowed=*/ i != 0u); ___ Str(copy_temp1, MemOperand(sp, dest_offset.Uint32Value())); } else { Copy(dest_offset, src_offset, dests[i].GetSize()); } } // Fill destination registers from source core registers. // There should be no cycles, so this algorithm should make progress. while (src_regs != 0u) { uint32_t old_src_regs = src_regs; for (size_t i = 0; i != num_reg_dests; ++i) { DCHECK(dests[i].IsRegister() && IsCoreRegisterOrPair(dests[i].GetRegister().AsArm())); if (!srcs[i].IsRegister() || !IsCoreRegisterOrPair(srcs[i].GetRegister().AsArm())) { continue; } uint32_t dest_reg_mask = GetCoreRegisterMask(dests[i].GetRegister().AsArm()); if ((dest_reg_mask & dest_regs) == 0u) { continue; // Equals source, or already filled in one of previous iterations. } // There are no partial overlaps of 8-byte arguments, otherwise we would have to // tweak this check; Move() can deal with partial overlap for historical reasons. if ((dest_reg_mask & src_regs) != 0u) { continue; // Cannot clobber this register yet. } Move(dests[i].GetRegister(), srcs[i].GetRegister(), dests[i].GetSize()); uint32_t src_reg_mask = GetCoreRegisterMask(srcs[i].GetRegister().AsArm()); DCHECK_EQ(src_regs & src_reg_mask, src_reg_mask); src_regs &= ~src_reg_mask; // Allow clobbering the source register or pair. dest_regs &= ~dest_reg_mask; // Destination register or pair was filled. } CHECK_NE(old_src_regs, src_regs); DCHECK_EQ(0u, src_regs & ~old_src_regs); } // Now fill destination registers from FP registers or stack slots, looking for // opportunities to use LDRD/VMOV to fill 2 registers with one instruction. for (size_t i = 0, j; i != num_reg_dests; i = j) { j = i + 1u; DCHECK(dests[i].IsRegister()); ArmManagedRegister dest_reg = dests[i].GetRegister().AsArm(); DCHECK(IsCoreRegisterOrPair(dest_reg)); if (srcs[i].IsRegister() && IsCoreRegisterOrPair(srcs[i].GetRegister().AsArm())) { DCHECK_EQ(GetCoreRegisterMask(dests[i].GetRegister().AsArm()) & dest_regs, 0u); continue; // Equals destination or moved above. } DCHECK_NE(GetCoreRegisterMask(dest_reg) & dest_regs, 0u); if (dests[i].GetSize() == 4u) { // Find next register to load. while (j != num_reg_dests && (srcs[j].IsRegister() && IsCoreRegisterOrPair(srcs[j].GetRegister().AsArm()))) { DCHECK_EQ(GetCoreRegisterMask(dests[j].GetRegister().AsArm()) & dest_regs, 0u); ++j; // Equals destination or moved above. } if (j != num_reg_dests && dests[j].GetSize() == 4u) { if (!srcs[i].IsRegister() && !srcs[j].IsRegister() && NoSpillGap(srcs[i], srcs[j])) { ___ Ldrd(AsVIXLRegister(dests[i].GetRegister().AsArm()), AsVIXLRegister(dests[j].GetRegister().AsArm()), MemOperand(sp, srcs[i].GetFrameOffset().Uint32Value())); if (refs[i] != kInvalidReferenceOffset) { DCHECK_EQ(refs[i], srcs[i].GetFrameOffset()); CreateJObject(dest_reg, refs[i], dest_reg, /*null_allowed=*/ i != 0u); } if (refs[j] != kInvalidReferenceOffset) { DCHECK_EQ(refs[j], srcs[j].GetFrameOffset()); ManagedRegister dest_j_reg = dests[j].GetRegister(); CreateJObject(dest_j_reg, refs[j], dest_j_reg, /*null_allowed=*/ true); } ++j; continue; } if (srcs[i].IsRegister() && srcs[j].IsRegister()) { uint32_t first_sreg = GetSRegisterNumber(srcs[i].GetRegister().AsArm()); if (IsAligned<2u>(first_sreg) && first_sreg + 1u == GetSRegisterNumber(srcs[j].GetRegister().AsArm())) { ___ Vmov(AsVIXLRegister(dest_reg), AsVIXLRegister(dests[j].GetRegister().AsArm()), vixl32::DRegister(first_sreg / 2u)); ++j; continue; } } } } if (srcs[i].IsRegister()) { Move(dests[i].GetRegister(), srcs[i].GetRegister(), dests[i].GetSize()); } else if (refs[i] != kInvalidReferenceOffset) { CreateJObject(dest_reg, refs[i], ManagedRegister::NoRegister(), /*null_allowed=*/ i != 0u); } else { Load(dest_reg, srcs[i].GetFrameOffset(), dests[i].GetSize()); } } } void ArmVIXLJNIMacroAssembler::Move(ManagedRegister mdst, ManagedRegister msrc, size_t size ATTRIBUTE_UNUSED) { ArmManagedRegister dst = mdst.AsArm(); if (kIsDebugBuild) { // Check that the destination is not a scratch register. UseScratchRegisterScope temps(asm_.GetVIXLAssembler()); if (dst.IsCoreRegister()) { CHECK(!temps.IsAvailable(AsVIXLRegister(dst))); } else if (dst.IsDRegister()) { CHECK(!temps.IsAvailable(AsVIXLDRegister(dst))); } else if (dst.IsSRegister()) { CHECK(!temps.IsAvailable(AsVIXLSRegister(dst))); } else { CHECK(dst.IsRegisterPair()) << dst; CHECK(!temps.IsAvailable(AsVIXLRegisterPairLow(dst))); CHECK(!temps.IsAvailable(AsVIXLRegisterPairHigh(dst))); } } ArmManagedRegister src = msrc.AsArm(); if (!dst.Equals(src)) { if (dst.IsCoreRegister()) { if (src.IsCoreRegister()) { ___ Mov(AsVIXLRegister(dst), AsVIXLRegister(src)); } else { CHECK(src.IsSRegister()) << src; ___ Vmov(AsVIXLRegister(dst), AsVIXLSRegister(src)); } } else if (dst.IsDRegister()) { if (src.IsDRegister()) { ___ Vmov(F64, AsVIXLDRegister(dst), AsVIXLDRegister(src)); } else { // VMOV Dn, Rlo, Rhi (Dn = {Rlo, Rhi}) CHECK(src.IsRegisterPair()) << src; ___ Vmov(AsVIXLDRegister(dst), AsVIXLRegisterPairLow(src), AsVIXLRegisterPairHigh(src)); } } else if (dst.IsSRegister()) { if (src.IsSRegister()) { ___ Vmov(F32, AsVIXLSRegister(dst), AsVIXLSRegister(src)); } else { // VMOV Sn, Rn (Sn = Rn) CHECK(src.IsCoreRegister()) << src; ___ Vmov(AsVIXLSRegister(dst), AsVIXLRegister(src)); } } else { CHECK(dst.IsRegisterPair()) << dst; if (src.IsRegisterPair()) { // Ensure that the first move doesn't clobber the input of the second. if (src.AsRegisterPairHigh() != dst.AsRegisterPairLow()) { ___ Mov(AsVIXLRegisterPairLow(dst), AsVIXLRegisterPairLow(src)); ___ Mov(AsVIXLRegisterPairHigh(dst), AsVIXLRegisterPairHigh(src)); } else { ___ Mov(AsVIXLRegisterPairHigh(dst), AsVIXLRegisterPairHigh(src)); ___ Mov(AsVIXLRegisterPairLow(dst), AsVIXLRegisterPairLow(src)); } } else { CHECK(src.IsDRegister()) << src; ___ Vmov(AsVIXLRegisterPairLow(dst), AsVIXLRegisterPairHigh(dst), AsVIXLDRegister(src)); } } } } void ArmVIXLJNIMacroAssembler::Copy(FrameOffset dest, FrameOffset src, size_t size) { DCHECK(size == 4 || size == 8) << size; UseScratchRegisterScope temps(asm_.GetVIXLAssembler()); vixl32::Register scratch = temps.Acquire(); if (size == 4) { asm_.LoadFromOffset(kLoadWord, scratch, sp, src.Int32Value()); asm_.StoreToOffset(kStoreWord, scratch, sp, dest.Int32Value()); } else if (size == 8) { asm_.LoadFromOffset(kLoadWord, scratch, sp, src.Int32Value()); asm_.StoreToOffset(kStoreWord, scratch, sp, dest.Int32Value()); asm_.LoadFromOffset(kLoadWord, scratch, sp, src.Int32Value() + 4); asm_.StoreToOffset(kStoreWord, scratch, sp, dest.Int32Value() + 4); } } void ArmVIXLJNIMacroAssembler::Copy(FrameOffset dest ATTRIBUTE_UNUSED, ManagedRegister src_base ATTRIBUTE_UNUSED, Offset src_offset ATTRIBUTE_UNUSED, ManagedRegister mscratch ATTRIBUTE_UNUSED, size_t size ATTRIBUTE_UNUSED) { UNIMPLEMENTED(FATAL); } void ArmVIXLJNIMacroAssembler::Copy(ManagedRegister dest_base ATTRIBUTE_UNUSED, Offset dest_offset ATTRIBUTE_UNUSED, FrameOffset src ATTRIBUTE_UNUSED, ManagedRegister mscratch ATTRIBUTE_UNUSED, size_t size ATTRIBUTE_UNUSED) { UNIMPLEMENTED(FATAL); } void ArmVIXLJNIMacroAssembler::Copy(FrameOffset dst ATTRIBUTE_UNUSED, FrameOffset src_base ATTRIBUTE_UNUSED, Offset src_offset ATTRIBUTE_UNUSED, ManagedRegister mscratch ATTRIBUTE_UNUSED, size_t size ATTRIBUTE_UNUSED) { UNIMPLEMENTED(FATAL); } void ArmVIXLJNIMacroAssembler::Copy(ManagedRegister dest ATTRIBUTE_UNUSED, Offset dest_offset ATTRIBUTE_UNUSED, ManagedRegister src ATTRIBUTE_UNUSED, Offset src_offset ATTRIBUTE_UNUSED, ManagedRegister mscratch ATTRIBUTE_UNUSED, size_t size ATTRIBUTE_UNUSED) { UNIMPLEMENTED(FATAL); } void ArmVIXLJNIMacroAssembler::Copy(FrameOffset dst ATTRIBUTE_UNUSED, Offset dest_offset ATTRIBUTE_UNUSED, FrameOffset src ATTRIBUTE_UNUSED, Offset src_offset ATTRIBUTE_UNUSED, ManagedRegister scratch ATTRIBUTE_UNUSED, size_t size ATTRIBUTE_UNUSED) { UNIMPLEMENTED(FATAL); } void ArmVIXLJNIMacroAssembler::CreateJObject(ManagedRegister mout_reg, FrameOffset spilled_reference_offset, ManagedRegister min_reg, bool null_allowed) { vixl::aarch32::Register out_reg = AsVIXLRegister(mout_reg.AsArm()); vixl::aarch32::Register in_reg = min_reg.AsArm().IsNoRegister() ? vixl::aarch32::Register() : AsVIXLRegister(min_reg.AsArm()); UseScratchRegisterScope temps(asm_.GetVIXLAssembler()); temps.Exclude(out_reg); if (null_allowed) { // Null values get a jobject value null. Otherwise, the jobject is // the address of the spilled reference. // e.g. out_reg = (handle == 0) ? 0 : (SP+spilled_reference_offset) if (!in_reg.IsValid()) { asm_.LoadFromOffset(kLoadWord, out_reg, sp, spilled_reference_offset.Int32Value()); in_reg = out_reg; } if (out_reg.IsLow() && spilled_reference_offset.Uint32Value() < kAddSpImmCutoff) { // There is a 16-bit "ADD Rd, SP, " instruction we can use in IT-block. if (out_reg.Is(in_reg)) { ___ Cmp(in_reg, 0); } else { ___ Movs(out_reg, in_reg); } ExactAssemblyScope guard(asm_.GetVIXLAssembler(), 2 * vixl32::k16BitT32InstructionSizeInBytes); ___ it(ne); ___ add(ne, Narrow, out_reg, sp, spilled_reference_offset.Int32Value()); } else { vixl32::Register addr_reg = out_reg.Is(in_reg) ? temps.Acquire() : out_reg; vixl32::Register cond_mov_src_reg = out_reg.Is(in_reg) ? addr_reg : in_reg; vixl32::Condition cond = out_reg.Is(in_reg) ? ne : eq; ___ Add(addr_reg, sp, spilled_reference_offset.Int32Value()); ___ Cmp(in_reg, 0); ExactAssemblyScope guard(asm_.GetVIXLAssembler(), 2 * vixl32::k16BitT32InstructionSizeInBytes); ___ it(cond); ___ mov(cond, Narrow, out_reg, cond_mov_src_reg); } } else { asm_.AddConstant(out_reg, sp, spilled_reference_offset.Int32Value()); } } void ArmVIXLJNIMacroAssembler::CreateJObject(FrameOffset out_off, FrameOffset spilled_reference_offset, bool null_allowed) { UseScratchRegisterScope temps(asm_.GetVIXLAssembler()); vixl32::Register scratch = temps.Acquire(); if (null_allowed) { asm_.LoadFromOffset(kLoadWord, scratch, sp, spilled_reference_offset.Int32Value()); // Null values get a jobject value null. Otherwise, the jobject is // the address of the spilled reference. // e.g. scratch = (scratch == 0) ? 0 : (SP+spilled_reference_offset) ___ Cmp(scratch, 0); // FIXME: Using 32-bit T32 instruction in IT-block is deprecated. if (asm_.ShifterOperandCanHold(ADD, spilled_reference_offset.Int32Value())) { ExactAssemblyScope guard(asm_.GetVIXLAssembler(), 2 * vixl32::kMaxInstructionSizeInBytes, CodeBufferCheckScope::kMaximumSize); ___ it(ne, 0x8); asm_.AddConstantInIt(scratch, sp, spilled_reference_offset.Int32Value(), ne); } else { // TODO: Implement this (old arm assembler would have crashed here). UNIMPLEMENTED(FATAL); } } else { asm_.AddConstant(scratch, sp, spilled_reference_offset.Int32Value()); } asm_.StoreToOffset(kStoreWord, scratch, sp, out_off.Int32Value()); } void ArmVIXLJNIMacroAssembler::VerifyObject(ManagedRegister src ATTRIBUTE_UNUSED, bool could_be_null ATTRIBUTE_UNUSED) { // TODO: not validating references. } void ArmVIXLJNIMacroAssembler::VerifyObject(FrameOffset src ATTRIBUTE_UNUSED, bool could_be_null ATTRIBUTE_UNUSED) { // TODO: not validating references. } void ArmVIXLJNIMacroAssembler::Jump(ManagedRegister mbase, Offset offset) { vixl::aarch32::Register base = AsVIXLRegister(mbase.AsArm()); UseScratchRegisterScope temps(asm_.GetVIXLAssembler()); vixl32::Register scratch = temps.Acquire(); asm_.LoadFromOffset(kLoadWord, scratch, base, offset.Int32Value()); ___ Bx(scratch); } void ArmVIXLJNIMacroAssembler::Call(ManagedRegister mbase, Offset offset) { vixl::aarch32::Register base = AsVIXLRegister(mbase.AsArm()); asm_.LoadFromOffset(kLoadWord, lr, base, offset.Int32Value()); ___ Blx(lr); // TODO: place reference map on call. } void ArmVIXLJNIMacroAssembler::CallFromThread(ThreadOffset32 offset) { // Call *(TR + offset) asm_.LoadFromOffset(kLoadWord, lr, tr, offset.Int32Value()); ___ Blx(lr); // TODO: place reference map on call } void ArmVIXLJNIMacroAssembler::GetCurrentThread(ManagedRegister dest) { UseScratchRegisterScope temps(asm_.GetVIXLAssembler()); temps.Exclude(AsVIXLRegister(dest.AsArm())); ___ Mov(AsVIXLRegister(dest.AsArm()), tr); } void ArmVIXLJNIMacroAssembler::GetCurrentThread(FrameOffset dest_offset) { asm_.StoreToOffset(kStoreWord, tr, sp, dest_offset.Int32Value()); } void ArmVIXLJNIMacroAssembler::TryToTransitionFromRunnableToNative( JNIMacroLabel* label, ArrayRef scratch_regs) { constexpr uint32_t kNativeStateValue = Thread::StoredThreadStateValue(ThreadState::kNative); constexpr uint32_t kRunnableStateValue = Thread::StoredThreadStateValue(ThreadState::kRunnable); constexpr ThreadOffset32 thread_flags_offset = Thread::ThreadFlagsOffset(); constexpr ThreadOffset32 thread_held_mutex_mutator_lock_offset = Thread::HeldMutexOffset(kMutatorLock); DCHECK_GE(scratch_regs.size(), 2u); vixl32::Register scratch = AsVIXLRegister(scratch_regs[0].AsArm()); vixl32::Register scratch2 = AsVIXLRegister(scratch_regs[1].AsArm()); // CAS release, old_value = kRunnableStateValue, new_value = kNativeStateValue, no flags. vixl32::Label retry; ___ Bind(&retry); ___ Ldrex(scratch, MemOperand(tr, thread_flags_offset.Int32Value())); ___ Mov(scratch2, kNativeStateValue); // If any flags are set, go to the slow path. ___ Cmp(scratch, kRunnableStateValue); ___ B(ne, ArmVIXLJNIMacroLabel::Cast(label)->AsArm()); ___ Dmb(DmbOptions::ISH); // Memory barrier "any-store" for the "release" operation. ___ Strex(scratch, scratch2, MemOperand(tr, thread_flags_offset.Int32Value())); ___ Cmp(scratch, 0); ___ B(ne, &retry); // Clear `self->tlsPtr_.held_mutexes[kMutatorLock]`; `scratch` holds 0 at this point. ___ Str(scratch, MemOperand(tr, thread_held_mutex_mutator_lock_offset.Int32Value())); } void ArmVIXLJNIMacroAssembler::TryToTransitionFromNativeToRunnable( JNIMacroLabel* label, ArrayRef scratch_regs, ManagedRegister return_reg) { constexpr uint32_t kNativeStateValue = Thread::StoredThreadStateValue(ThreadState::kNative); constexpr uint32_t kRunnableStateValue = Thread::StoredThreadStateValue(ThreadState::kRunnable); constexpr ThreadOffset32 thread_flags_offset = Thread::ThreadFlagsOffset(); constexpr ThreadOffset32 thread_held_mutex_mutator_lock_offset = Thread::HeldMutexOffset(kMutatorLock); constexpr ThreadOffset32 thread_mutator_lock_offset = Thread::MutatorLockOffset(); // There must be at least two scratch registers. DCHECK_GE(scratch_regs.size(), 2u); DCHECK(!scratch_regs[0].AsArm().Overlaps(return_reg.AsArm())); vixl32::Register scratch = AsVIXLRegister(scratch_regs[0].AsArm()); DCHECK(!scratch_regs[1].AsArm().Overlaps(return_reg.AsArm())); vixl32::Register scratch2 = AsVIXLRegister(scratch_regs[1].AsArm()); // CAS acquire, old_value = kNativeStateValue, new_value = kRunnableStateValue, no flags. vixl32::Label retry; ___ Bind(&retry); ___ Ldrex(scratch, MemOperand(tr, thread_flags_offset.Int32Value())); // If any flags are set, or the state is not Native, go to the slow path. // (While the thread can theoretically transition between different Suspended states, // it would be very unexpected to see a state other than Native at this point.) ___ Eors(scratch2, scratch, kNativeStateValue); ___ B(ne, ArmVIXLJNIMacroLabel::Cast(label)->AsArm()); static_assert(kRunnableStateValue == 0u); ___ Strex(scratch, scratch2, MemOperand(tr, thread_flags_offset.Int32Value())); ___ Cmp(scratch, 0); ___ B(ne, &retry); ___ Dmb(DmbOptions::ISH); // Memory barrier "load-any" for the "acquire" operation. // Set `self->tlsPtr_.held_mutexes[kMutatorLock]` to the mutator lock. ___ Ldr(scratch, MemOperand(tr, thread_mutator_lock_offset.Int32Value())); ___ Str(scratch, MemOperand(tr, thread_held_mutex_mutator_lock_offset.Int32Value())); } void ArmVIXLJNIMacroAssembler::SuspendCheck(JNIMacroLabel* label) { UseScratchRegisterScope temps(asm_.GetVIXLAssembler()); vixl32::Register scratch = temps.Acquire(); asm_.LoadFromOffset(kLoadWord, scratch, tr, Thread::ThreadFlagsOffset().Int32Value()); ___ Tst(scratch, Thread::SuspendOrCheckpointRequestFlags()); ___ BPreferNear(ne, ArmVIXLJNIMacroLabel::Cast(label)->AsArm()); // TODO: think about using CBNZ here. } void ArmVIXLJNIMacroAssembler::ExceptionPoll(JNIMacroLabel* label) { UseScratchRegisterScope temps(asm_.GetVIXLAssembler()); vixl32::Register scratch = temps.Acquire(); asm_.LoadFromOffset(kLoadWord, scratch, tr, Thread::ExceptionOffset().Int32Value()); ___ Cmp(scratch, 0); ___ BPreferNear(ne, ArmVIXLJNIMacroLabel::Cast(label)->AsArm()); // TODO: think about using CBNZ here. } void ArmVIXLJNIMacroAssembler::DeliverPendingException() { // Pass exception object as argument. // Don't care about preserving r0 as this won't return. // Note: The scratch register from `ExceptionPoll()` may have been clobbered. asm_.LoadFromOffset(kLoadWord, r0, tr, Thread::ExceptionOffset().Int32Value()); ___ Ldr(lr, MemOperand(tr, QUICK_ENTRYPOINT_OFFSET(kArmPointerSize, pDeliverException).Int32Value())); ___ Blx(lr); } std::unique_ptr ArmVIXLJNIMacroAssembler::CreateLabel() { return std::unique_ptr(new ArmVIXLJNIMacroLabel()); } void ArmVIXLJNIMacroAssembler::Jump(JNIMacroLabel* label) { CHECK(label != nullptr); ___ B(ArmVIXLJNIMacroLabel::Cast(label)->AsArm()); } void ArmVIXLJNIMacroAssembler::TestGcMarking(JNIMacroLabel* label, JNIMacroUnaryCondition cond) { CHECK(label != nullptr); UseScratchRegisterScope temps(asm_.GetVIXLAssembler()); vixl32::Register test_reg; DCHECK_EQ(Thread::IsGcMarkingSize(), 4u); DCHECK(kUseReadBarrier); if (kUseBakerReadBarrier) { // TestGcMarking() is used in the JNI stub entry when the marking register is up to date. if (kIsDebugBuild && emit_run_time_checks_in_debug_mode_) { vixl32::Register temp = temps.Acquire(); asm_.GenerateMarkingRegisterCheck(temp); } test_reg = mr; } else { test_reg = temps.Acquire(); ___ Ldr(test_reg, MemOperand(tr, Thread::IsGcMarkingOffset().Int32Value())); } switch (cond) { case JNIMacroUnaryCondition::kZero: ___ CompareAndBranchIfZero(test_reg, ArmVIXLJNIMacroLabel::Cast(label)->AsArm()); break; case JNIMacroUnaryCondition::kNotZero: ___ CompareAndBranchIfNonZero(test_reg, ArmVIXLJNIMacroLabel::Cast(label)->AsArm()); break; default: LOG(FATAL) << "Not implemented unary condition: " << static_cast(cond); UNREACHABLE(); } } void ArmVIXLJNIMacroAssembler::TestMarkBit(ManagedRegister mref, JNIMacroLabel* label, JNIMacroUnaryCondition cond) { DCHECK(kUseBakerReadBarrier); vixl32::Register ref = AsVIXLRegister(mref.AsArm()); UseScratchRegisterScope temps(asm_.GetVIXLAssembler()); vixl32::Register scratch = temps.Acquire(); ___ Ldr(scratch, MemOperand(ref, mirror::Object::MonitorOffset().SizeValue())); static_assert(LockWord::kMarkBitStateSize == 1u); ___ Tst(scratch, LockWord::kMarkBitStateMaskShifted); switch (cond) { case JNIMacroUnaryCondition::kZero: ___ B(eq, ArmVIXLJNIMacroLabel::Cast(label)->AsArm()); break; case JNIMacroUnaryCondition::kNotZero: ___ B(ne, ArmVIXLJNIMacroLabel::Cast(label)->AsArm()); break; default: LOG(FATAL) << "Not implemented unary condition: " << static_cast(cond); UNREACHABLE(); } } void ArmVIXLJNIMacroAssembler::Bind(JNIMacroLabel* label) { CHECK(label != nullptr); ___ Bind(ArmVIXLJNIMacroLabel::Cast(label)->AsArm()); } void ArmVIXLJNIMacroAssembler::MemoryBarrier(ManagedRegister scratch ATTRIBUTE_UNUSED) { UNIMPLEMENTED(FATAL); } void ArmVIXLJNIMacroAssembler::Load(ArmManagedRegister dest, vixl32::Register base, int32_t offset, size_t size) { if (dest.IsNoRegister()) { CHECK_EQ(0u, size) << dest; } else if (dest.IsCoreRegister()) { vixl::aarch32::Register dst = AsVIXLRegister(dest); CHECK(!dst.Is(sp)) << dest; UseScratchRegisterScope temps(asm_.GetVIXLAssembler()); temps.Exclude(dst); if (size == 1u) { ___ Ldrb(dst, MemOperand(base, offset)); } else { CHECK_EQ(4u, size) << dest; ___ Ldr(dst, MemOperand(base, offset)); } } else if (dest.IsRegisterPair()) { CHECK_EQ(8u, size) << dest; ___ Ldr(AsVIXLRegisterPairLow(dest), MemOperand(base, offset)); ___ Ldr(AsVIXLRegisterPairHigh(dest), MemOperand(base, offset + 4)); } else if (dest.IsSRegister()) { ___ Vldr(AsVIXLSRegister(dest), MemOperand(base, offset)); } else { CHECK(dest.IsDRegister()) << dest; ___ Vldr(AsVIXLDRegister(dest), MemOperand(base, offset)); } } } // namespace arm } // namespace art