//===- subzero/src/IceTargetLoweringX8632.h - x86-32 lowering ---*- C++ -*-===// // // The Subzero Code Generator // // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// /// /// \file /// \brief Declares the TargetLoweringX8632 class, which implements the /// TargetLowering interface for the x86-32 architecture. /// //===----------------------------------------------------------------------===// #ifndef SUBZERO_SRC_ICETARGETLOWERINGX8632_H #define SUBZERO_SRC_ICETARGETLOWERINGX8632_H #include "IceAssemblerX8632.h" #include "IceDefs.h" #include "IceInst.h" #include "IceInstX8632.h" #include "IceRegistersX8632.h" #include "IceSwitchLowering.h" #include "IceTargetLoweringX86.h" #include "IceTargetLoweringX86RegClass.h" #include "IceUtils.h" #include #include #include namespace Ice { namespace X8632 { using namespace ::Ice::X86; constexpr Type WordType = IceType_i32; class BoolFoldingEntry { BoolFoldingEntry(const BoolFoldingEntry &) = delete; public: BoolFoldingEntry() = default; explicit BoolFoldingEntry(Inst *I); BoolFoldingEntry &operator=(const BoolFoldingEntry &) = default; /// Instr is the instruction producing the i1-type variable of interest. Inst *Instr = nullptr; /// IsComplex is the cached result of BoolFolding::hasComplexLowering(Instr). bool IsComplex = false; /// IsLiveOut is initialized conservatively to true, and is set to false when /// we encounter an instruction that ends Var's live range. We disable the /// folding optimization when Var is live beyond this basic block. Note that /// if liveness analysis is not performed (e.g. in Om1 mode), IsLiveOut will /// always be true and the folding optimization will never be performed. bool IsLiveOut = true; // NumUses counts the number of times Var is used as a source operand in the // basic block. If IsComplex is true and there is more than one use of Var, // then the folding optimization is disabled for Var. uint32_t NumUses = 0; }; class BoolFolding { public: enum BoolFoldingProducerKind { PK_None, // TODO(jpp): PK_Icmp32 is no longer meaningful. Rename to PK_IcmpNative. PK_Icmp32, PK_Icmp64, PK_Fcmp, PK_Trunc, PK_Arith // A flag-setting arithmetic instruction. }; /// Currently the actual enum values are not used (other than CK_None), but we /// go ahead and produce them anyway for symmetry with the /// BoolFoldingProducerKind. enum BoolFoldingConsumerKind { CK_None, CK_Br, CK_Select, CK_Sext, CK_Zext }; private: BoolFolding(const BoolFolding &) = delete; BoolFolding &operator=(const BoolFolding &) = delete; public: BoolFolding() = default; static BoolFoldingProducerKind getProducerKind(const Inst *Instr); static BoolFoldingConsumerKind getConsumerKind(const Inst *Instr); static bool hasComplexLowering(const Inst *Instr); static bool isValidFolding(BoolFoldingProducerKind ProducerKind, BoolFoldingConsumerKind ConsumerKind); void init(CfgNode *Node); const Inst *getProducerFor(const Operand *Opnd) const; void dump(const Cfg *Func) const; private: /// Returns true if Producers contains a valid entry for the given VarNum. bool containsValid(SizeT VarNum) const { auto Element = Producers.find(VarNum); return Element != Producers.end() && Element->second.Instr != nullptr; } void setInvalid(SizeT VarNum) { Producers[VarNum].Instr = nullptr; } void invalidateProducersOnStore(const Inst *Instr); /// Producers maps Variable::Number to a BoolFoldingEntry. CfgUnorderedMap Producers; }; class TargetX8632 : public TargetX86 { TargetX8632() = delete; TargetX8632(const TargetX8632 &) = delete; TargetX8632 &operator=(const TargetX8632 &) = delete; friend class BoolFolding; public: using BrCond = CondX86::BrCond; using CmppsCond = CondX86::CmppsCond; using SegmentRegisters = X86OperandMem::SegmentRegisters; using InstX86Br = Insts::Br; using InstX86FakeRMW = Insts::FakeRMW; using InstX86Label = Insts::Label; ~TargetX8632() override = default; static void staticInit(GlobalContext *Ctx); static bool shouldBePooled(const Constant *C); static ::Ice::Type getPointerType(); void translateOm1() override; void translateO2() override; void doLoadOpt(); bool doBranchOpt(Inst *I, const CfgNode *NextNode) override; SizeT getNumRegisters() const override { return RegisterSet::Reg_NUM; } Inst *createLoweredMove(Variable *Dest, Variable *SrcVar) override { if (isVectorType(Dest->getType())) { return Insts::Movp::create(Func, Dest, SrcVar); } return Insts::Mov::create(Func, Dest, SrcVar); (void)Dest; (void)SrcVar; return nullptr; } Variable *getPhysicalRegister(RegNumT RegNum, Type Ty = IceType_void) override; const char *getRegName(RegNumT RegNum, Type Ty) const override; static const char *getRegClassName(RegClass C) { auto ClassNum = static_cast(C); assert(ClassNum < RCX86_NUM); switch (ClassNum) { default: assert(C < RC_Target); return regClassString(C); case RCX86_Is64To8: return "i64to8"; // 64-bit GPR truncable to i8 case RCX86_Is32To8: return "i32to8"; // 32-bit GPR truncable to i8 case RCX86_Is16To8: return "i16to8"; // 16-bit GPR truncable to i8 case RCX86_IsTrunc8Rcvr: return "i8from"; // 8-bit GPR truncable from wider GPRs case RCX86_IsAhRcvr: return "i8fromah"; // 8-bit GPR that ah can be assigned to } } SmallBitVector getRegisterSet(RegSetMask Include, RegSetMask Exclude) const override; const SmallBitVector & getRegistersForVariable(const Variable *Var) const override { RegClass RC = Var->getRegClass(); assert(static_cast(RC) < RCX86_NUM); return TypeToRegisterSet[RC]; } const SmallBitVector & getAllRegistersForVariable(const Variable *Var) const override { RegClass RC = Var->getRegClass(); assert(static_cast(RC) < RCX86_NUM); return TypeToRegisterSetUnfiltered[RC]; } const SmallBitVector &getAliasesForRegister(RegNumT Reg) const override { Reg.assertIsValid(); return RegisterAliases[Reg]; } bool hasFramePointer() const override { return IsEbpBasedFrame; } void setHasFramePointer() override { IsEbpBasedFrame = true; } RegNumT getStackReg() const override { return RegX8632::Reg_esp; } RegNumT getFrameReg() const override { return RegX8632::Reg_ebp; } RegNumT getFrameOrStackReg() const override { // If the stack pointer needs to be aligned, then the frame pointer is // unaligned, so always use the stack pointer. if (needsStackPointerAlignment()) return getStackReg(); return IsEbpBasedFrame ? getFrameReg() : getStackReg(); } size_t typeWidthInBytesOnStack(Type Ty) const override { // Round up to the next multiple of WordType bytes. const uint32_t WordSizeInBytes = typeWidthInBytes(WordType); return Utils::applyAlignment(typeWidthInBytes(Ty), WordSizeInBytes); } uint32_t getStackAlignment() const override { return X86_STACK_ALIGNMENT_BYTES; } bool needsStackPointerAlignment() const override { // If the ABI's stack alignment is smaller than the vector size (16 bytes), // use the (realigned) stack pointer for addressing any stack variables. return X86_STACK_ALIGNMENT_BYTES < 16; } void reserveFixedAllocaArea(size_t Size, size_t Align) override { FixedAllocaSizeBytes = Size; assert(llvm::isPowerOf2_32(Align)); FixedAllocaAlignBytes = Align; PrologEmitsFixedAllocas = true; } /// Returns the (negative) offset from ebp/rbp where the fixed Allocas start. int32_t getFrameFixedAllocaOffset() const override { return FixedAllocaSizeBytes - (SpillAreaSizeBytes - maxOutArgsSizeBytes()); } virtual uint32_t maxOutArgsSizeBytes() const override { return MaxOutArgsSizeBytes; } virtual void updateMaxOutArgsSizeBytes(uint32_t Size) { MaxOutArgsSizeBytes = std::max(MaxOutArgsSizeBytes, Size); } bool shouldSplitToVariable64On32(Type Ty) const override { return Ty == IceType_i64; } SizeT getMinJumpTableSize() const override { return 4; } void emitVariable(const Variable *Var) const override; void emit(const ConstantInteger32 *C) const final; void emit(const ConstantInteger64 *C) const final; void emit(const ConstantFloat *C) const final; void emit(const ConstantDouble *C) const final; void emit(const ConstantUndef *C) const final; void emit(const ConstantRelocatable *C) const final; void initNodeForLowering(CfgNode *Node) override; Operand *loOperand(Operand *Operand); Operand *hiOperand(Operand *Operand); void addProlog(CfgNode *Node) override; void finishArgumentLowering(Variable *Arg, Variable *FramePtr, size_t BasicFrameOffset, size_t StackAdjBytes, size_t &InArgsSizeBytes); void addEpilog(CfgNode *Node) override; Operand *legalizeUndef(Operand *From, RegNumT RegNum = RegNumT()); protected: void postLower() override; void lowerAlloca(const InstAlloca *Instr) override; void lowerArguments() override; void lowerArithmetic(const InstArithmetic *Instr) override; void lowerAssign(const InstAssign *Instr) override; void lowerBr(const InstBr *Instr) override; void lowerBreakpoint(const InstBreakpoint *Instr) override; void lowerCall(const InstCall *Instr) override; void lowerCast(const InstCast *Instr) override; void lowerExtractElement(const InstExtractElement *Instr) override; void lowerFcmp(const InstFcmp *Instr) override; void lowerIcmp(const InstIcmp *Instr) override; void lowerIntrinsic(const InstIntrinsic *Instr) override; void lowerInsertElement(const InstInsertElement *Instr) override; void lowerLoad(const InstLoad *Instr) override; void lowerPhi(const InstPhi *Instr) override; void lowerRet(const InstRet *Instr) override; void lowerSelect(const InstSelect *Instr) override; void lowerShuffleVector(const InstShuffleVector *Instr) override; void lowerStore(const InstStore *Instr) override; void lowerSwitch(const InstSwitch *Instr) override; void lowerUnreachable(const InstUnreachable *Instr) override; void lowerOther(const Inst *Instr) override; void lowerRMW(const InstX86FakeRMW *RMW); void prelowerPhis() override; uint32_t getCallStackArgumentsSizeBytes(const CfgVector &ArgTypes, Type ReturnType); uint32_t getCallStackArgumentsSizeBytes(const InstCall *Instr) override; void genTargetHelperCallFor(Inst *Instr) override; /// OptAddr wraps all the possible operands that an x86 address might have. struct OptAddr { Variable *Base = nullptr; Variable *Index = nullptr; uint16_t Shift = 0; int32_t Offset = 0; ConstantRelocatable *Relocatable = nullptr; }; // Builds information for a canonical address expresion: // (Base, Index, Shift) X86OperandMem *computeAddressOpt(const Inst *Instr, Type MemType, Operand *Addr); void doAddressOptOther() override; void doAddressOptLoad() override; void doAddressOptStore() override; void doAddressOptLoadSubVector() override; void doAddressOptStoreSubVector() override; void doMockBoundsCheck(Operand *Opnd) override; /// Naive lowering of cmpxchg. void lowerAtomicCmpxchg(Variable *DestPrev, Operand *Ptr, Operand *Expected, Operand *Desired); /// Attempt a more optimized lowering of cmpxchg. Returns true if optimized. bool tryOptimizedCmpxchgCmpBr(Variable *DestPrev, Operand *Ptr, Operand *Expected, Operand *Desired); void lowerAtomicRMW(Variable *Dest, uint32_t Operation, Operand *Ptr, Operand *Val); void lowerCountZeros(bool Cttz, Type Ty, Variable *Dest, Operand *FirstVal, Operand *SecondVal); /// Load from memory for a given type. void typedLoad(Type Ty, Variable *Dest, Variable *Base, Constant *Offset); /// Store to memory for a given type. void typedStore(Type Ty, Variable *Value, Variable *Base, Constant *Offset); /// Copy memory of given type from Src to Dest using OffsetAmt on both. void copyMemory(Type Ty, Variable *Dest, Variable *Src, int32_t OffsetAmt); /// Replace some calls to memcpy with inline instructions. void lowerMemcpy(Operand *Dest, Operand *Src, Operand *Count); /// Replace some calls to memmove with inline instructions. void lowerMemmove(Operand *Dest, Operand *Src, Operand *Count); /// Replace some calls to memset with inline instructions. void lowerMemset(Operand *Dest, Operand *Val, Operand *Count); /// Lower an indirect jump adding sandboxing when needed. void lowerIndirectJump(Variable *JumpTarget); /// Check the comparison is in [Min,Max]. The flags register will be modified /// with: /// - below equal, if in range /// - above, set if not in range /// The index into the range is returned. Operand *lowerCmpRange(Operand *Comparison, uint64_t Min, uint64_t Max); /// Lowering of a cluster of switch cases. If the case is not matched control /// will pass to the default label provided. If the default label is nullptr /// then control will fall through to the next instruction. DoneCmp should be /// true if the flags contain the result of a comparison with the Comparison. void lowerCaseCluster(const CaseCluster &Case, Operand *Src0, bool DoneCmp, CfgNode *DefaultLabel = nullptr); using LowerBinOp = void (TargetX8632::*)(Variable *, Operand *); void expandAtomicRMWAsCmpxchg(LowerBinOp op_lo, LowerBinOp op_hi, Variable *Dest, Operand *Ptr, Operand *Val); void eliminateNextVectorSextInstruction(Variable *SignExtendedResult); void emitStackProbe(size_t StackSizeBytes); /// Emit just the call instruction (without argument or return variable /// processing), sandboxing if needed. Inst *emitCallToTarget(Operand *CallTarget, Variable *ReturnReg, size_t NumVariadicFpArgs = 0); /// Materialize the moves needed to return a value of the specified type. Variable *moveReturnValueToRegister(Operand *Value, Type ReturnType); /// Emit a jump table to the constant pool. void emitJumpTable(const Cfg *Func, const InstJumpTable *JumpTable) const override; /// Emit a fake use of esp to make sure esp stays alive for the entire /// function. Otherwise some esp adjustments get dead-code eliminated. void keepEspLiveAtExit() { Variable *esp = Func->getTarget()->getPhysicalRegister(getStackReg(), WordType); Context.insert(esp); } /// Operand legalization helpers. To deal with address mode constraints, the /// helpers will create a new Operand and emit instructions that guarantee /// that the Operand kind is one of those indicated by the LegalMask (a /// bitmask of allowed kinds). If the input Operand is known to already meet /// the constraints, it may be simply returned as the result, without creating /// any new instructions or operands. enum OperandLegalization { Legal_None = 0, Legal_Reg = 1 << 0, // physical register, not stack location Legal_Imm = 1 << 1, Legal_Mem = 1 << 2, // includes [eax+4*ecx] as well as [esp+12] Legal_Rematerializable = 1 << 3, Legal_AddrAbs = 1 << 4, // ConstantRelocatable doesn't have to add RebasePtr Legal_Default = ~(Legal_Rematerializable | Legal_AddrAbs) // TODO(stichnot): Figure out whether this default works for x86-64. }; using LegalMask = uint32_t; Operand *legalize(Operand *From, LegalMask Allowed = Legal_Default, RegNumT RegNum = RegNumT()); Variable *legalizeToReg(Operand *From, RegNumT RegNum = RegNumT()); /// Legalize the first source operand for use in the cmp instruction. Operand *legalizeSrc0ForCmp(Operand *Src0, Operand *Src1); /// Turn a pointer operand into a memory operand that can be used by a real /// load/store operation. Legalizes the operand as well. This is a nop if the /// operand is already a legal memory operand. X86OperandMem *formMemoryOperand(Operand *Ptr, Type Ty, bool DoLegalize = true); Variable *makeReg(Type Ty, RegNumT RegNum = RegNumT()); static Type stackSlotType(); static constexpr uint32_t NoSizeLimit = 0; /// Returns the largest type which is equal to or larger than Size bytes. The /// type is suitable for copying memory i.e. a load and store will be a single /// instruction (for example x86 will get f64 not i64). static Type largestTypeInSize(uint32_t Size, uint32_t MaxSize = NoSizeLimit); /// Returns the smallest type which is equal to or larger than Size bytes. If /// one doesn't exist then the largest type smaller than Size bytes is /// returned. The type is suitable for memory copies as described at /// largestTypeInSize. static Type firstTypeThatFitsSize(uint32_t Size, uint32_t MaxSize = NoSizeLimit); Variable *copyToReg8(Operand *Src, RegNumT RegNum = RegNumT()); Variable *copyToReg(Operand *Src, RegNumT RegNum = RegNumT()); /// Returns a register containing all zeros, without affecting the FLAGS /// register, using the best instruction for the type. Variable *makeZeroedRegister(Type Ty, RegNumT RegNum = RegNumT()); /// \name Returns a vector in a register with the given constant entries. /// @{ Variable *makeVectorOfZeros(Type Ty, RegNumT RegNum = RegNumT()); Variable *makeVectorOfOnes(Type Ty, RegNumT RegNum = RegNumT()); Variable *makeVectorOfMinusOnes(Type Ty, RegNumT RegNum = RegNumT()); Variable *makeVectorOfHighOrderBits(Type Ty, RegNumT RegNum = RegNumT()); Variable *makeVectorOfFabsMask(Type Ty, RegNumT RegNum = RegNumT()); /// @} /// Return a memory operand corresponding to a stack allocated Variable. X86OperandMem *getMemoryOperandForStackSlot(Type Ty, Variable *Slot, uint32_t Offset = 0); /// The following are helpers that insert lowered x86 instructions with /// minimal syntactic overhead, so that the lowering code can look as close to /// assembly as practical. void _adc(Variable *Dest, Operand *Src0) { Context.insert(Dest, Src0); } void _adc_rmw(X86OperandMem *DestSrc0, Operand *Src1) { Context.insert(DestSrc0, Src1); } void _add(Variable *Dest, Operand *Src0) { Context.insert(Dest, Src0); } void _add_rmw(X86OperandMem *DestSrc0, Operand *Src1) { Context.insert(DestSrc0, Src1); } void _addps(Variable *Dest, Operand *Src0) { Context.insert(Dest, Src0); } void _addss(Variable *Dest, Operand *Src0) { Context.insert(Dest, Src0); } void _add_sp(Operand *Adjustment); void _and(Variable *Dest, Operand *Src0) { Context.insert(Dest, Src0); } void _andnps(Variable *Dest, Operand *Src0) { Context.insert(Dest, Src0); } void _andps(Variable *Dest, Operand *Src0) { Context.insert(Dest, Src0); } void _and_rmw(X86OperandMem *DestSrc0, Operand *Src1) { Context.insert(DestSrc0, Src1); } void _blendvps(Variable *Dest, Operand *Src0, Operand *Src1) { Context.insert(Dest, Src0, Src1); } void _br(BrCond Condition, CfgNode *TargetTrue, CfgNode *TargetFalse) { Context.insert(TargetTrue, TargetFalse, Condition, InstX86Br::Far); } void _br(CfgNode *Target) { Context.insert(Target, InstX86Br::Far); } void _br(BrCond Condition, CfgNode *Target) { Context.insert(Target, Condition, InstX86Br::Far); } void _br(BrCond Condition, InstX86Label *Label, InstX86Br::Mode Kind = InstX86Br::Near) { Context.insert(Label, Condition, Kind); } void _bsf(Variable *Dest, Operand *Src0) { Context.insert(Dest, Src0); } void _bsr(Variable *Dest, Operand *Src0) { Context.insert(Dest, Src0); } void _bswap(Variable *SrcDest) { Context.insert(SrcDest); } void _cbwdq(Variable *Dest, Operand *Src0) { Context.insert(Dest, Src0); } void _cmov(Variable *Dest, Operand *Src0, BrCond Condition) { Context.insert(Dest, Src0, Condition); } void _cmp(Operand *Src0, Operand *Src1) { Context.insert(Src0, Src1); } void _cmpps(Variable *Dest, Operand *Src0, CmppsCond Condition) { Context.insert(Dest, Src0, Condition); } void _cmpxchg(Operand *DestOrAddr, Variable *Eax, Variable *Desired, bool Locked) { Context.insert(DestOrAddr, Eax, Desired, Locked); // Mark eax as possibly modified by cmpxchg. Context.insert(Eax, llvm::dyn_cast(DestOrAddr)); _set_dest_redefined(); Context.insert(Eax); } void _cmpxchg8b(X86OperandMem *Addr, Variable *Edx, Variable *Eax, Variable *Ecx, Variable *Ebx, bool Locked) { Context.insert(Addr, Edx, Eax, Ecx, Ebx, Locked); // Mark edx, and eax as possibly modified by cmpxchg8b. Context.insert(Edx); _set_dest_redefined(); Context.insert(Edx); Context.insert(Eax); _set_dest_redefined(); Context.insert(Eax); } void _cvt(Variable *Dest, Operand *Src0, Insts::Cvt::CvtVariant Variant) { Context.insert(Dest, Src0, Variant); } void _round(Variable *Dest, Operand *Src0, Operand *Imm) { Context.insert(Dest, Src0, Imm); } void _div(Variable *Dest, Operand *Src0, Operand *Src1) { Context.insert(Dest, Src0, Src1); } void _divps(Variable *Dest, Operand *Src0) { Context.insert(Dest, Src0); } void _divss(Variable *Dest, Operand *Src0) { Context.insert(Dest, Src0); } void _fld(Operand *Src0) { Context.insert(Src0); } void _fstp(Variable *Dest) { Context.insert(Dest); } void _idiv(Variable *Dest, Operand *Src0, Operand *Src1) { Context.insert(Dest, Src0, Src1); } void _imul(Variable *Dest, Operand *Src0) { Context.insert(Dest, Src0); } void _imul_imm(Variable *Dest, Operand *Src0, Constant *Imm) { Context.insert(Dest, Src0, Imm); } void _insertps(Variable *Dest, Operand *Src0, Operand *Src1) { Context.insert(Dest, Src0, Src1); } void _int3() { Context.insert(); } void _jmp(Operand *Target) { Context.insert(Target); } void _lea(Variable *Dest, Operand *Src0) { Context.insert(Dest, Src0); } void _link_bp(); void _push_reg(RegNumT RegNum); void _pop_reg(RegNumT RegNum); void _mfence() { Context.insert(); } /// Moves can be used to redefine registers, creating "partial kills" for /// liveness. Mark where moves are used in this way. void _redefined(Inst *MovInst, bool IsRedefinition = true) { if (IsRedefinition) MovInst->setDestRedefined(); } /// If Dest=nullptr is passed in, then a new variable is created, marked as /// infinite register allocation weight, and returned through the in/out Dest /// argument. Insts::Mov *_mov(Variable *&Dest, Operand *Src0, RegNumT RegNum = RegNumT()) { if (Dest == nullptr) Dest = makeReg(Src0->getType(), RegNum); return Context.insert(Dest, Src0); } void _mov_sp(Operand *NewValue); Insts::Movp *_movp(Variable *Dest, Operand *Src0) { return Context.insert(Dest, Src0); } void _movd(Variable *Dest, Operand *Src0) { Context.insert(Dest, Src0); } void _movq(Variable *Dest, Operand *Src0) { Context.insert(Dest, Src0); } void _movss(Variable *Dest, Variable *Src0) { Context.insert(Dest, Src0); } void _movsx(Variable *Dest, Operand *Src0) { Context.insert(Dest, Src0); } Insts::Movzx *_movzx(Variable *Dest, Operand *Src0) { return Context.insert(Dest, Src0); } void _maxss(Variable *Dest, Operand *Src0) { Context.insert(Dest, Src0); } void _minss(Variable *Dest, Operand *Src0) { Context.insert(Dest, Src0); } void _maxps(Variable *Dest, Operand *Src0) { Context.insert(Dest, Src0); } void _minps(Variable *Dest, Operand *Src0) { Context.insert(Dest, Src0); } void _mul(Variable *Dest, Variable *Src0, Operand *Src1) { Context.insert(Dest, Src0, Src1); } void _mulps(Variable *Dest, Operand *Src0) { Context.insert(Dest, Src0); } void _mulss(Variable *Dest, Operand *Src0) { Context.insert(Dest, Src0); } void _neg(Variable *SrcDest) { Context.insert(SrcDest); } void _nop(SizeT Variant) { Context.insert(Variant); } void _or(Variable *Dest, Operand *Src0) { Context.insert(Dest, Src0); } void _orps(Variable *Dest, Operand *Src0) { Context.insert(Dest, Src0); } void _or_rmw(X86OperandMem *DestSrc0, Operand *Src1) { Context.insert(DestSrc0, Src1); } void _padd(Variable *Dest, Operand *Src0) { Context.insert(Dest, Src0); } void _padds(Variable *Dest, Operand *Src0) { Context.insert(Dest, Src0); } void _paddus(Variable *Dest, Operand *Src0) { Context.insert(Dest, Src0); } void _pand(Variable *Dest, Operand *Src0) { Context.insert(Dest, Src0); } void _pandn(Variable *Dest, Operand *Src0) { Context.insert(Dest, Src0); } void _pblendvb(Variable *Dest, Operand *Src0, Operand *Src1) { Context.insert(Dest, Src0, Src1); } void _pcmpeq(Variable *Dest, Operand *Src0, Type ArithmeticTypeOverride = IceType_void) { Context.insert(Dest, Src0, ArithmeticTypeOverride); } void _pcmpgt(Variable *Dest, Operand *Src0) { Context.insert(Dest, Src0); } void _pextr(Variable *Dest, Operand *Src0, Operand *Src1) { Context.insert(Dest, Src0, Src1); } void _pinsr(Variable *Dest, Operand *Src0, Operand *Src1) { Context.insert(Dest, Src0, Src1); } void _pmull(Variable *Dest, Operand *Src0) { Context.insert(Dest, Src0); } void _pmulhw(Variable *Dest, Operand *Src0) { Context.insert(Dest, Src0); } void _pmulhuw(Variable *Dest, Operand *Src0) { Context.insert(Dest, Src0); } void _pmaddwd(Variable *Dest, Operand *Src0) { Context.insert(Dest, Src0); } void _pmuludq(Variable *Dest, Operand *Src0) { Context.insert(Dest, Src0); } void _pop(Variable *Dest) { Context.insert(Dest); } void _por(Variable *Dest, Operand *Src0) { Context.insert(Dest, Src0); } void _punpckl(Variable *Dest, Operand *Src0) { Context.insert(Dest, Src0); } void _punpckh(Variable *Dest, Operand *Src0) { Context.insert(Dest, Src0); } void _packss(Variable *Dest, Operand *Src0) { Context.insert(Dest, Src0); } void _packus(Variable *Dest, Operand *Src0) { Context.insert(Dest, Src0); } void _pshufb(Variable *Dest, Operand *Src0) { Context.insert(Dest, Src0); } void _pshufd(Variable *Dest, Operand *Src0, Operand *Src1) { Context.insert(Dest, Src0, Src1); } void _psll(Variable *Dest, Operand *Src0) { Context.insert(Dest, Src0); } void _psra(Variable *Dest, Operand *Src0) { Context.insert(Dest, Src0); } void _psrl(Variable *Dest, Operand *Src0) { Context.insert(Dest, Src0); } void _psub(Variable *Dest, Operand *Src0) { Context.insert(Dest, Src0); } void _psubs(Variable *Dest, Operand *Src0) { Context.insert(Dest, Src0); } void _psubus(Variable *Dest, Operand *Src0) { Context.insert(Dest, Src0); } void _push(Operand *Src0) { Context.insert(Src0); } void _pxor(Variable *Dest, Operand *Src0) { Context.insert(Dest, Src0); } void _ret(Variable *Src0 = nullptr) { Context.insert(Src0); } void _rol(Variable *Dest, Operand *Src0) { Context.insert(Dest, Src0); } void _round(Variable *Dest, Operand *Src, Constant *Imm) { Context.insert(Dest, Src, Imm); } void _sar(Variable *Dest, Operand *Src0) { Context.insert(Dest, Src0); } void _sbb(Variable *Dest, Operand *Src0) { Context.insert(Dest, Src0); } void _sbb_rmw(X86OperandMem *DestSrc0, Operand *Src1) { Context.insert(DestSrc0, Src1); } void _setcc(Variable *Dest, BrCond Condition) { Context.insert(Dest, Condition); } void _shl(Variable *Dest, Operand *Src0) { Context.insert(Dest, Src0); } void _shld(Variable *Dest, Variable *Src0, Operand *Src1) { Context.insert(Dest, Src0, Src1); } void _shr(Variable *Dest, Operand *Src0) { Context.insert(Dest, Src0); } void _shrd(Variable *Dest, Variable *Src0, Operand *Src1) { Context.insert(Dest, Src0, Src1); } void _shufps(Variable *Dest, Operand *Src0, Operand *Src1) { Context.insert(Dest, Src0, Src1); } void _movmsk(Variable *Dest, Operand *Src0) { Context.insert(Dest, Src0); } void _sqrt(Variable *Dest, Operand *Src0) { Context.insert(Dest, Src0); } void _store(Operand *Value, X86Operand *Mem) { Context.insert(Value, Mem); } void _storep(Variable *Value, X86OperandMem *Mem) { Context.insert(Value, Mem); } void _storeq(Operand *Value, X86OperandMem *Mem) { Context.insert(Value, Mem); } void _stored(Operand *Value, X86OperandMem *Mem) { Context.insert(Value, Mem); } void _sub(Variable *Dest, Operand *Src0) { Context.insert(Dest, Src0); } void _sub_rmw(X86OperandMem *DestSrc0, Operand *Src1) { Context.insert(DestSrc0, Src1); } void _sub_sp(Operand *Adjustment); void _subps(Variable *Dest, Operand *Src0) { Context.insert(Dest, Src0); } void _subss(Variable *Dest, Operand *Src0) { Context.insert(Dest, Src0); } void _test(Operand *Src0, Operand *Src1) { Context.insert(Src0, Src1); } void _ucomiss(Operand *Src0, Operand *Src1) { Context.insert(Src0, Src1); } void _ud2() { Context.insert(); } void _unlink_bp(); void _xadd(Operand *Dest, Variable *Src, bool Locked) { Context.insert(Dest, Src, Locked); // The xadd exchanges Dest and Src (modifying Src). Model that update with // a FakeDef followed by a FakeUse. Context.insert(Src, llvm::dyn_cast(Dest)); _set_dest_redefined(); Context.insert(Src); } void _xchg(Operand *Dest, Variable *Src) { Context.insert(Dest, Src); // The xchg modifies Dest and Src -- model that update with a // FakeDef/FakeUse. Context.insert(Src, llvm::dyn_cast(Dest)); _set_dest_redefined(); Context.insert(Src); } void _xor(Variable *Dest, Operand *Src0) { Context.insert(Dest, Src0); } void _xorps(Variable *Dest, Operand *Src0) { Context.insert(Dest, Src0); } void _xor_rmw(X86OperandMem *DestSrc0, Operand *Src1) { Context.insert(DestSrc0, Src1); } void _iaca_start() { if (!BuildDefs::minimal()) Context.insert(); } void _iaca_end() { if (!BuildDefs::minimal()) Context.insert(); } /// This class helps wrap IACA markers around the code generated by the /// current scope. It means you don't need to put an end before each return. class ScopedIacaMark { ScopedIacaMark(const ScopedIacaMark &) = delete; ScopedIacaMark &operator=(const ScopedIacaMark &) = delete; public: ScopedIacaMark(TargetX8632 *Lowering) : Lowering(Lowering) { Lowering->_iaca_start(); } ~ScopedIacaMark() { end(); } void end() { if (!Lowering) return; Lowering->_iaca_end(); Lowering = nullptr; } private: TargetX8632 *Lowering; }; bool optimizeScalarMul(Variable *Dest, Operand *Src0, int32_t Src1); void findRMW(); static uint32_t applyStackAlignment(uint32_t Value); bool IsEbpBasedFrame = false; #if defined(_WIN32) // Windows 32-bit only guarantees 4 byte stack alignment static constexpr uint32_t X86_STACK_ALIGNMENT_BYTES = 4; #else /// Stack alignment guaranteed by the System V ABI. static constexpr uint32_t X86_STACK_ALIGNMENT_BYTES = 16; #endif /// Stack alignment required by the currently lowered function. size_t RequiredStackAlignment = X86_STACK_ALIGNMENT_BYTES; size_t SpillAreaSizeBytes = 0; size_t FixedAllocaSizeBytes = 0; size_t FixedAllocaAlignBytes = 0; bool PrologEmitsFixedAllocas = false; uint32_t MaxOutArgsSizeBytes = 0; static std::array TypeToRegisterSet; static std::array TypeToRegisterSetUnfiltered; static std::array RegisterAliases; SmallBitVector RegsUsed; std::array PhysicalRegisters; // RebasePtr is a Variable that holds the Rebasing pointer (if any) for the // current sandboxing type. Variable *RebasePtr = nullptr; private: void lowerShift64(InstArithmetic::OpKind Op, Operand *Src0Lo, Operand *Src0Hi, Operand *Src1Lo, Variable *DestLo, Variable *DestHi); /// Emit the code for a combined operation and consumer instruction, or set /// the destination variable of the operation if Consumer == nullptr. void lowerIcmpAndConsumer(const InstIcmp *Icmp, const Inst *Consumer); void lowerFcmpAndConsumer(const InstFcmp *Fcmp, const Inst *Consumer); void lowerArithAndConsumer(const InstArithmetic *Arith, const Inst *Consumer); /// Emit a setcc instruction if Consumer == nullptr; otherwise emit a /// specialized version of Consumer. void setccOrConsumer(BrCond Condition, Variable *Dest, const Inst *Consumer); /// Emit a mov [1|0] instruction if Consumer == nullptr; otherwise emit a /// specialized version of Consumer. void movOrConsumer(bool IcmpResult, Variable *Dest, const Inst *Consumer); /// Emit the code for instructions with a vector type. void lowerIcmpVector(const InstIcmp *Icmp); void lowerFcmpVector(const InstFcmp *Icmp); void lowerSelectVector(const InstSelect *Instr); /// Helpers for select lowering. void lowerSelectMove(Variable *Dest, BrCond Cond, Operand *SrcT, Operand *SrcF); void lowerSelectIntMove(Variable *Dest, BrCond Cond, Operand *SrcT, Operand *SrcF); /// Generic helper to move an arbitrary type from Src to Dest. void lowerMove(Variable *Dest, Operand *Src, bool IsRedefinition); /// Optimizations for idiom recognition. bool lowerOptimizeFcmpSelect(const InstFcmp *Fcmp, const InstSelect *Select); /// x86lowerIcmp64 handles 64-bit icmp lowering. void lowerIcmp64(const InstIcmp *Icmp, const Inst *Consumer); BoolFolding FoldingInfo; /// Helpers for lowering ShuffleVector /// @{ Variable *lowerShuffleVector_AllFromSameSrc(Operand *Src, SizeT Index0, SizeT Index1, SizeT Index2, SizeT Index3); static constexpr SizeT IGNORE_INDEX = 0x80000000u; Variable *lowerShuffleVector_TwoFromSameSrc(Operand *Src0, SizeT Index0, SizeT Index1, Operand *Src1, SizeT Index2, SizeT Index3); static constexpr SizeT UNIFIED_INDEX_0 = 0; static constexpr SizeT UNIFIED_INDEX_1 = 2; Variable *lowerShuffleVector_UnifyFromDifferentSrcs(Operand *Src0, SizeT Index0, Operand *Src1, SizeT Index1); static constexpr SizeT CLEAR_ALL_BITS = 0x80; SizeT PshufbMaskCount = 0; GlobalString lowerShuffleVector_NewMaskName(); ConstantRelocatable *lowerShuffleVector_CreatePshufbMask( int8_t Idx0, int8_t Idx1, int8_t Idx2, int8_t Idx3, int8_t Idx4, int8_t Idx5, int8_t Idx6, int8_t Idx7, int8_t Idx8, int8_t Idx9, int8_t Idx10, int8_t Idx11, int8_t Idx12, int8_t Idx13, int8_t Idx14, int8_t Idx15); void lowerShuffleVector_UsingPshufb(Variable *Dest, Operand *Src0, Operand *Src1, int8_t Idx0, int8_t Idx1, int8_t Idx2, int8_t Idx3, int8_t Idx4, int8_t Idx5, int8_t Idx6, int8_t Idx7, int8_t Idx8, int8_t Idx9, int8_t Idx10, int8_t Idx11, int8_t Idx12, int8_t Idx13, int8_t Idx14, int8_t Idx15); /// @} /// The following table summarizes the logic for lowering the fcmp /// instruction. There is one table entry for each of the 16 conditions. /// /// The first four columns describe the case when the operands are floating /// point scalar values. A comment in lowerFcmp() describes the lowering /// template. In the most general case, there is a compare followed by two /// conditional branches, because some fcmp conditions don't map to a single /// x86 conditional branch. However, in many cases it is possible to swap the /// operands in the comparison and have a single conditional branch. Since /// it's quite tedious to validate the table by hand, good execution tests are /// helpful. /// /// The last two columns describe the case when the operands are vectors of /// floating point values. For most fcmp conditions, there is a clear mapping /// to a single x86 cmpps instruction variant. Some fcmp conditions require /// special code to handle and these are marked in the table with a /// Cmpps_Invalid predicate. /// {@ static const struct TableFcmpType { uint32_t Default; bool SwapScalarOperands; CondX86::BrCond C1, C2; bool SwapVectorOperands; CondX86::CmppsCond Predicate; } TableFcmp[]; static const size_t TableFcmpSize; /// @} /// The following table summarizes the logic for lowering the icmp instruction /// for i32 and narrower types. Each icmp condition has a clear mapping to an /// x86 conditional branch instruction. /// {@ static const struct TableIcmp32Type { CondX86::BrCond Mapping; } TableIcmp32[]; static const size_t TableIcmp32Size; /// @} /// The following table summarizes the logic for lowering the icmp instruction /// for the i64 type. For Eq and Ne, two separate 32-bit comparisons and /// conditional branches are needed. For the other conditions, three separate /// conditional branches are needed. /// {@ static const struct TableIcmp64Type { CondX86::BrCond C1, C2, C3; } TableIcmp64[]; static const size_t TableIcmp64Size; /// @} static CondX86::BrCond getIcmp32Mapping(InstIcmp::ICond Cond) { assert(static_cast(Cond) < TableIcmp32Size); return TableIcmp32[Cond].Mapping; } public: static std::unique_ptr<::Ice::TargetLowering> create(Cfg *Func) { return makeUnique(Func); } std::unique_ptr<::Ice::Assembler> createAssembler() const override { return makeUnique(); } private: ENABLE_MAKE_UNIQUE; explicit TargetX8632(Cfg *Func); }; class TargetDataX8632 final : public TargetDataLowering { TargetDataX8632() = delete; TargetDataX8632(const TargetDataX8632 &) = delete; TargetDataX8632 &operator=(const TargetDataX8632 &) = delete; public: ~TargetDataX8632() override = default; static std::unique_ptr create(GlobalContext *Ctx) { return makeUnique(Ctx); } void lowerGlobals(const VariableDeclarationList &Vars, const std::string &SectionSuffix) override; void lowerConstants() override; void lowerJumpTables() override; private: ENABLE_MAKE_UNIQUE; explicit TargetDataX8632(GlobalContext *Ctx) : TargetDataLowering(Ctx) {} template static void emitConstantPool(GlobalContext *Ctx); }; class TargetHeaderX86 : public TargetHeaderLowering { TargetHeaderX86() = delete; TargetHeaderX86(const TargetHeaderX86 &) = delete; TargetHeaderX86 &operator=(const TargetHeaderX86 &) = delete; public: ~TargetHeaderX86() = default; static std::unique_ptr create(GlobalContext *Ctx) { return makeUnique(Ctx); } private: ENABLE_MAKE_UNIQUE; explicit TargetHeaderX86(GlobalContext *Ctx) : TargetHeaderLowering(Ctx) {} }; } // end of namespace X8632 } // end of namespace Ice #endif // SUBZERO_SRC_ICETARGETLOWERINGX8632_H