aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib/Target/AArch64')
-rw-r--r--llvm/lib/Target/AArch64/AArch64.td9
-rw-r--r--llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp13
-rw-r--r--llvm/lib/Target/AArch64/AArch64Combine.td20
-rw-r--r--llvm/lib/Target/AArch64/AArch64FastISel.cpp529
-rw-r--r--llvm/lib/Target/AArch64/AArch64FrameLowering.cpp28
-rw-r--r--llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp5
-rw-r--r--llvm/lib/Target/AArch64/AArch64ISelLowering.cpp232
-rw-r--r--llvm/lib/Target/AArch64/AArch64ISelLowering.h3
-rw-r--r--llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp6
-rw-r--r--llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td28
-rw-r--r--llvm/lib/Target/AArch64/AArch64SchedA55.td1
-rw-r--r--llvm/lib/Target/AArch64/AArch64Subtarget.h9
-rw-r--r--llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp121
-rw-r--r--llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h27
-rw-r--r--llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp2
-rw-r--r--llvm/lib/Target/AArch64/GISel/AArch64GlobalISelUtils.cpp25
-rw-r--r--llvm/lib/Target/AArch64/GISel/AArch64GlobalISelUtils.h8
-rw-r--r--llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp60
-rw-r--r--llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp51
-rw-r--r--llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.h5
-rw-r--r--llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerCombiner.cpp30
-rw-r--r--llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerLowering.cpp107
-rw-r--r--llvm/lib/Target/AArch64/GISel/AArch64PreLegalizerCombiner.cpp49
-rw-r--r--llvm/lib/Target/AArch64/SVEInstrFormats.td58
-rw-r--r--llvm/lib/Target/AArch64/SVEIntrinsicOpts.cpp39
25 files changed, 918 insertions, 547 deletions
diff --git a/llvm/lib/Target/AArch64/AArch64.td b/llvm/lib/Target/AArch64/AArch64.td
index bdf2e517deda..133a6b16e979 100644
--- a/llvm/lib/Target/AArch64/AArch64.td
+++ b/llvm/lib/Target/AArch64/AArch64.td
@@ -147,12 +147,12 @@ def FeatureZCRegMove : SubtargetFeature<"zcm", "HasZeroCycleRegMove", "true",
def FeatureZCZeroingGP : SubtargetFeature<"zcz-gp", "HasZeroCycleZeroingGP", "true",
"Has zero-cycle zeroing instructions for generic registers">;
-def FeatureZCZeroingFP : SubtargetFeature<"zcz-fp", "HasZeroCycleZeroingFP", "true",
- "Has zero-cycle zeroing instructions for FP registers">;
+def FeatureNoZCZeroingFP : SubtargetFeature<"no-zcz-fp", "HasZeroCycleZeroingFP", "false",
+ "Has no zero-cycle zeroing instructions for FP registers">;
def FeatureZCZeroing : SubtargetFeature<"zcz", "HasZeroCycleZeroing", "true",
"Has zero-cycle zeroing instructions",
- [FeatureZCZeroingGP, FeatureZCZeroingFP]>;
+ [FeatureZCZeroingGP]>;
/// ... but the floating-point version doesn't quite work in rare cases on older
/// CPUs.
@@ -915,8 +915,7 @@ def ProcExynosM3 : SubtargetFeature<"exynosm3", "ARMProcFamily", "ExynosM3",
FeatureLSLFast,
FeaturePerfMon,
FeaturePostRAScheduler,
- FeaturePredictableSelectIsExpensive,
- FeatureZCZeroingFP]>;
+ FeaturePredictableSelectIsExpensive]>;
def ProcExynosM4 : SubtargetFeature<"exynosm4", "ARMProcFamily", "ExynosM3",
"Samsung Exynos-M4 processors",
diff --git a/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp b/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp
index 9b757f7aba5e..3373e6c91b7f 100644
--- a/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp
+++ b/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp
@@ -1091,17 +1091,16 @@ void AArch64AsmPrinter::LowerFAULTING_OP(const MachineInstr &FaultingMI) {
void AArch64AsmPrinter::EmitFMov0(const MachineInstr &MI) {
Register DestReg = MI.getOperand(0).getReg();
if (STI->hasZeroCycleZeroingFP() && !STI->hasZeroCycleZeroingFPWorkaround()) {
- // Convert H/S/D register to corresponding Q register
+ // Convert H/S register to corresponding D register
if (AArch64::H0 <= DestReg && DestReg <= AArch64::H31)
- DestReg = AArch64::Q0 + (DestReg - AArch64::H0);
+ DestReg = AArch64::D0 + (DestReg - AArch64::H0);
else if (AArch64::S0 <= DestReg && DestReg <= AArch64::S31)
- DestReg = AArch64::Q0 + (DestReg - AArch64::S0);
- else {
+ DestReg = AArch64::D0 + (DestReg - AArch64::S0);
+ else
assert(AArch64::D0 <= DestReg && DestReg <= AArch64::D31);
- DestReg = AArch64::Q0 + (DestReg - AArch64::D0);
- }
+
MCInst MOVI;
- MOVI.setOpcode(AArch64::MOVIv2d_ns);
+ MOVI.setOpcode(AArch64::MOVID);
MOVI.addOperand(MCOperand::createReg(DestReg));
MOVI.addOperand(MCOperand::createImm(0));
EmitToStreamer(*OutStreamer, MOVI);
diff --git a/llvm/lib/Target/AArch64/AArch64Combine.td b/llvm/lib/Target/AArch64/AArch64Combine.td
index d5ea2d3eee98..07608fc56990 100644
--- a/llvm/lib/Target/AArch64/AArch64Combine.td
+++ b/llvm/lib/Target/AArch64/AArch64Combine.td
@@ -127,7 +127,14 @@ def adjust_icmp_imm : GICombineRule <
(apply [{ applyAdjustICmpImmAndPred(*${root}, ${matchinfo}, B, Observer); }])
>;
-def icmp_lowering : GICombineGroup<[adjust_icmp_imm]>;
+def swap_icmp_operands : GICombineRule <
+ (defs root:$root),
+ (match (wip_match_opcode G_ICMP):$root,
+ [{ return trySwapICmpOperands(*${root}, MRI); }]),
+ (apply [{ applySwapICmpOperands(*${root}, Observer); }])
+>;
+
+def icmp_lowering : GICombineGroup<[adjust_icmp_imm, swap_icmp_operands]>;
def extractvecelt_pairwise_add_matchdata : GIDefMatchData<"std::tuple<unsigned, LLT, Register>">;
def extractvecelt_pairwise_add : GICombineRule<
@@ -154,6 +161,14 @@ def build_vector_to_dup : GICombineRule<
def build_vector_lowering : GICombineGroup<[build_vector_to_dup]>;
+def bitfield_extract_from_sext_inreg : GICombineRule<
+ (defs root:$root, build_fn_matchinfo:$info),
+ (match (wip_match_opcode G_SEXT_INREG):$root,
+ [{ return matchBitfieldExtractFromSExtInReg(*${root}, MRI, ${info}); }]),
+ (apply [{ return Helper.applyBuildFn(*${root}, ${info}); }])>;
+
+def form_bitfield_extract : GICombineGroup<[bitfield_extract_from_sext_inreg]>;
+
// Post-legalization combines which should happen at all optimization levels.
// (E.g. ones that facilitate matching for the selector) For example, matching
// pseudos.
@@ -172,6 +187,7 @@ def AArch64PostLegalizerCombinerHelper
hoist_logic_op_with_same_opcode_hands,
redundant_and, xor_of_and_with_same_reg,
extractvecelt_pairwise_add, redundant_or,
- mul_const, redundant_sext_inreg]> {
+ mul_const, redundant_sext_inreg,
+ form_bitfield_extract]> {
let DisableRuleOption = "aarch64postlegalizercombiner-disable-rule";
}
diff --git a/llvm/lib/Target/AArch64/AArch64FastISel.cpp b/llvm/lib/Target/AArch64/AArch64FastISel.cpp
index ecc68ccda03d..95b5699552b0 100644
--- a/llvm/lib/Target/AArch64/AArch64FastISel.cpp
+++ b/llvm/lib/Target/AArch64/AArch64FastISel.cpp
@@ -195,34 +195,32 @@ private:
const Value *Cond);
bool optimizeIntExtLoad(const Instruction *I, MVT RetVT, MVT SrcVT);
bool optimizeSelect(const SelectInst *SI);
- std::pair<unsigned, bool> getRegForGEPIndex(const Value *Idx);
+ unsigned getRegForGEPIndex(const Value *Idx);
// Emit helper routines.
unsigned emitAddSub(bool UseAdd, MVT RetVT, const Value *LHS,
const Value *RHS, bool SetFlags = false,
bool WantResult = true, bool IsZExt = false);
unsigned emitAddSub_rr(bool UseAdd, MVT RetVT, unsigned LHSReg,
- bool LHSIsKill, unsigned RHSReg, bool RHSIsKill,
- bool SetFlags = false, bool WantResult = true);
+ unsigned RHSReg, bool SetFlags = false,
+ bool WantResult = true);
unsigned emitAddSub_ri(bool UseAdd, MVT RetVT, unsigned LHSReg,
- bool LHSIsKill, uint64_t Imm, bool SetFlags = false,
+ uint64_t Imm, bool SetFlags = false,
bool WantResult = true);
unsigned emitAddSub_rs(bool UseAdd, MVT RetVT, unsigned LHSReg,
- bool LHSIsKill, unsigned RHSReg, bool RHSIsKill,
- AArch64_AM::ShiftExtendType ShiftType,
+ unsigned RHSReg, AArch64_AM::ShiftExtendType ShiftType,
uint64_t ShiftImm, bool SetFlags = false,
bool WantResult = true);
unsigned emitAddSub_rx(bool UseAdd, MVT RetVT, unsigned LHSReg,
- bool LHSIsKill, unsigned RHSReg, bool RHSIsKill,
- AArch64_AM::ShiftExtendType ExtType,
- uint64_t ShiftImm, bool SetFlags = false,
+ unsigned RHSReg, AArch64_AM::ShiftExtendType ExtType,
+ uint64_t ShiftImm, bool SetFlags = false,
bool WantResult = true);
// Emit functions.
bool emitCompareAndBranch(const BranchInst *BI);
bool emitCmp(const Value *LHS, const Value *RHS, bool IsZExt);
bool emitICmp(MVT RetVT, const Value *LHS, const Value *RHS, bool IsZExt);
- bool emitICmp_ri(MVT RetVT, unsigned LHSReg, bool LHSIsKill, uint64_t Imm);
+ bool emitICmp_ri(MVT RetVT, unsigned LHSReg, uint64_t Imm);
bool emitFCmp(MVT RetVT, const Value *LHS, const Value *RHS);
unsigned emitLoad(MVT VT, MVT ResultVT, Address Addr, bool WantZExt = true,
MachineMemOperand *MMO = nullptr);
@@ -235,42 +233,34 @@ private:
unsigned emitAdd(MVT RetVT, const Value *LHS, const Value *RHS,
bool SetFlags = false, bool WantResult = true,
bool IsZExt = false);
- unsigned emitAdd_ri_(MVT VT, unsigned Op0, bool Op0IsKill, int64_t Imm);
+ unsigned emitAdd_ri_(MVT VT, unsigned Op0, int64_t Imm);
unsigned emitSub(MVT RetVT, const Value *LHS, const Value *RHS,
bool SetFlags = false, bool WantResult = true,
bool IsZExt = false);
- unsigned emitSubs_rr(MVT RetVT, unsigned LHSReg, bool LHSIsKill,
- unsigned RHSReg, bool RHSIsKill, bool WantResult = true);
- unsigned emitSubs_rs(MVT RetVT, unsigned LHSReg, bool LHSIsKill,
- unsigned RHSReg, bool RHSIsKill,
+ unsigned emitSubs_rr(MVT RetVT, unsigned LHSReg, unsigned RHSReg,
+ bool WantResult = true);
+ unsigned emitSubs_rs(MVT RetVT, unsigned LHSReg, unsigned RHSReg,
AArch64_AM::ShiftExtendType ShiftType, uint64_t ShiftImm,
bool WantResult = true);
unsigned emitLogicalOp(unsigned ISDOpc, MVT RetVT, const Value *LHS,
const Value *RHS);
unsigned emitLogicalOp_ri(unsigned ISDOpc, MVT RetVT, unsigned LHSReg,
- bool LHSIsKill, uint64_t Imm);
+ uint64_t Imm);
unsigned emitLogicalOp_rs(unsigned ISDOpc, MVT RetVT, unsigned LHSReg,
- bool LHSIsKill, unsigned RHSReg, bool RHSIsKill,
- uint64_t ShiftImm);
- unsigned emitAnd_ri(MVT RetVT, unsigned LHSReg, bool LHSIsKill, uint64_t Imm);
- unsigned emitMul_rr(MVT RetVT, unsigned Op0, bool Op0IsKill,
- unsigned Op1, bool Op1IsKill);
- unsigned emitSMULL_rr(MVT RetVT, unsigned Op0, bool Op0IsKill,
- unsigned Op1, bool Op1IsKill);
- unsigned emitUMULL_rr(MVT RetVT, unsigned Op0, bool Op0IsKill,
- unsigned Op1, bool Op1IsKill);
- unsigned emitLSL_rr(MVT RetVT, unsigned Op0Reg, bool Op0IsKill,
- unsigned Op1Reg, bool Op1IsKill);
- unsigned emitLSL_ri(MVT RetVT, MVT SrcVT, unsigned Op0Reg, bool Op0IsKill,
- uint64_t Imm, bool IsZExt = true);
- unsigned emitLSR_rr(MVT RetVT, unsigned Op0Reg, bool Op0IsKill,
- unsigned Op1Reg, bool Op1IsKill);
- unsigned emitLSR_ri(MVT RetVT, MVT SrcVT, unsigned Op0Reg, bool Op0IsKill,
- uint64_t Imm, bool IsZExt = true);
- unsigned emitASR_rr(MVT RetVT, unsigned Op0Reg, bool Op0IsKill,
- unsigned Op1Reg, bool Op1IsKill);
- unsigned emitASR_ri(MVT RetVT, MVT SrcVT, unsigned Op0Reg, bool Op0IsKill,
- uint64_t Imm, bool IsZExt = false);
+ unsigned RHSReg, uint64_t ShiftImm);
+ unsigned emitAnd_ri(MVT RetVT, unsigned LHSReg, uint64_t Imm);
+ unsigned emitMul_rr(MVT RetVT, unsigned Op0, unsigned Op1);
+ unsigned emitSMULL_rr(MVT RetVT, unsigned Op0, unsigned Op1);
+ unsigned emitUMULL_rr(MVT RetVT, unsigned Op0, unsigned Op1);
+ unsigned emitLSL_rr(MVT RetVT, unsigned Op0Reg, unsigned Op1Reg);
+ unsigned emitLSL_ri(MVT RetVT, MVT SrcVT, unsigned Op0Reg, uint64_t Imm,
+ bool IsZExt = true);
+ unsigned emitLSR_rr(MVT RetVT, unsigned Op0Reg, unsigned Op1Reg);
+ unsigned emitLSR_ri(MVT RetVT, MVT SrcVT, unsigned Op0Reg, uint64_t Imm,
+ bool IsZExt = true);
+ unsigned emitASR_rr(MVT RetVT, unsigned Op0Reg, unsigned Op1Reg);
+ unsigned emitASR_ri(MVT RetVT, MVT SrcVT, unsigned Op0Reg, uint64_t Imm,
+ bool IsZExt = false);
unsigned materializeInt(const ConstantInt *CI, MVT VT);
unsigned materializeFP(const ConstantFP *CFP, MVT VT);
@@ -414,8 +404,8 @@ unsigned AArch64FastISel::materializeFP(const ConstantFP *CFP, MVT VT) {
return fastEmitInst_i(Opc, TLI.getRegClassFor(VT), Imm);
}
- // For the MachO large code model materialize the FP constant in code.
- if (Subtarget->isTargetMachO() && TM.getCodeModel() == CodeModel::Large) {
+ // For the large code model materialize the FP constant in code.
+ if (TM.getCodeModel() == CodeModel::Large) {
unsigned Opc1 = Is64Bit ? AArch64::MOVi64imm : AArch64::MOVi32imm;
const TargetRegisterClass *RC = Is64Bit ?
&AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
@@ -554,7 +544,7 @@ unsigned AArch64FastISel::fastMaterializeFloatZero(const ConstantFP* CFP) {
bool Is64Bit = (VT == MVT::f64);
unsigned ZReg = Is64Bit ? AArch64::XZR : AArch64::WZR;
unsigned Opc = Is64Bit ? AArch64::FMOVXDr : AArch64::FMOVWSr;
- return fastEmitInst_r(Opc, TLI.getRegClassFor(VT), ZReg, /*IsKill=*/true);
+ return fastEmitInst_r(Opc, TLI.getRegClassFor(VT), ZReg);
}
/// Check if the multiply is by a power-of-2 constant.
@@ -764,9 +754,7 @@ bool AArch64FastISel::computeAddress(const Value *Obj, Address &Addr, Type *Ty)
unsigned Reg = getRegForValue(LHS);
if (!Reg)
return false;
- bool RegIsKill = hasTrivialKill(LHS);
- Reg = fastEmitInst_extractsubreg(MVT::i32, Reg, RegIsKill,
- AArch64::sub_32);
+ Reg = fastEmitInst_extractsubreg(MVT::i32, Reg, AArch64::sub_32);
Addr.setOffsetReg(Reg);
return true;
}
@@ -862,9 +850,7 @@ bool AArch64FastISel::computeAddress(const Value *Obj, Address &Addr, Type *Ty)
unsigned Reg = getRegForValue(LHS);
if (!Reg)
return false;
- bool RegIsKill = hasTrivialKill(LHS);
- Reg = fastEmitInst_extractsubreg(MVT::i32, Reg, RegIsKill,
- AArch64::sub_32);
+ Reg = fastEmitInst_extractsubreg(MVT::i32, Reg, AArch64::sub_32);
Addr.setOffsetReg(Reg);
return true;
}
@@ -1064,26 +1050,22 @@ bool AArch64FastISel::simplifyAddress(Address &Addr, MVT VT) {
if (Addr.getExtendType() == AArch64_AM::SXTW ||
Addr.getExtendType() == AArch64_AM::UXTW )
ResultReg = emitAddSub_rx(/*UseAdd=*/true, MVT::i64, Addr.getReg(),
- /*TODO:IsKill=*/false, Addr.getOffsetReg(),
- /*TODO:IsKill=*/false, Addr.getExtendType(),
+ Addr.getOffsetReg(), Addr.getExtendType(),
Addr.getShift());
else
ResultReg = emitAddSub_rs(/*UseAdd=*/true, MVT::i64, Addr.getReg(),
- /*TODO:IsKill=*/false, Addr.getOffsetReg(),
- /*TODO:IsKill=*/false, AArch64_AM::LSL,
+ Addr.getOffsetReg(), AArch64_AM::LSL,
Addr.getShift());
} else {
if (Addr.getExtendType() == AArch64_AM::UXTW)
ResultReg = emitLSL_ri(MVT::i64, MVT::i32, Addr.getOffsetReg(),
- /*Op0IsKill=*/false, Addr.getShift(),
- /*IsZExt=*/true);
+ Addr.getShift(), /*IsZExt=*/true);
else if (Addr.getExtendType() == AArch64_AM::SXTW)
ResultReg = emitLSL_ri(MVT::i64, MVT::i32, Addr.getOffsetReg(),
- /*Op0IsKill=*/false, Addr.getShift(),
- /*IsZExt=*/false);
+ Addr.getShift(), /*IsZExt=*/false);
else
ResultReg = emitLSL_ri(MVT::i64, MVT::i64, Addr.getOffsetReg(),
- /*Op0IsKill=*/false, Addr.getShift());
+ Addr.getShift());
}
if (!ResultReg)
return false;
@@ -1100,7 +1082,7 @@ bool AArch64FastISel::simplifyAddress(Address &Addr, MVT VT) {
unsigned ResultReg;
if (Addr.getReg())
// Try to fold the immediate into the add instruction.
- ResultReg = emitAdd_ri_(MVT::i64, Addr.getReg(), /*IsKill=*/false, Offset);
+ ResultReg = emitAdd_ri_(MVT::i64, Addr.getReg(), Offset);
else
ResultReg = fastEmit_i(MVT::i64, MVT::i64, ISD::Constant, Offset);
@@ -1199,7 +1181,6 @@ unsigned AArch64FastISel::emitAddSub(bool UseAdd, MVT RetVT, const Value *LHS,
unsigned LHSReg = getRegForValue(LHS);
if (!LHSReg)
return 0;
- bool LHSIsKill = hasTrivialKill(LHS);
if (NeedExtend)
LHSReg = emitIntExt(SrcVT, LHSReg, RetVT, IsZExt);
@@ -1208,15 +1189,14 @@ unsigned AArch64FastISel::emitAddSub(bool UseAdd, MVT RetVT, const Value *LHS,
if (const auto *C = dyn_cast<ConstantInt>(RHS)) {
uint64_t Imm = IsZExt ? C->getZExtValue() : C->getSExtValue();
if (C->isNegative())
- ResultReg = emitAddSub_ri(!UseAdd, RetVT, LHSReg, LHSIsKill, -Imm,
- SetFlags, WantResult);
+ ResultReg = emitAddSub_ri(!UseAdd, RetVT, LHSReg, -Imm, SetFlags,
+ WantResult);
else
- ResultReg = emitAddSub_ri(UseAdd, RetVT, LHSReg, LHSIsKill, Imm, SetFlags,
+ ResultReg = emitAddSub_ri(UseAdd, RetVT, LHSReg, Imm, SetFlags,
WantResult);
} else if (const auto *C = dyn_cast<Constant>(RHS))
if (C->isNullValue())
- ResultReg = emitAddSub_ri(UseAdd, RetVT, LHSReg, LHSIsKill, 0, SetFlags,
- WantResult);
+ ResultReg = emitAddSub_ri(UseAdd, RetVT, LHSReg, 0, SetFlags, WantResult);
if (ResultReg)
return ResultReg;
@@ -1230,17 +1210,14 @@ unsigned AArch64FastISel::emitAddSub(bool UseAdd, MVT RetVT, const Value *LHS,
unsigned RHSReg = getRegForValue(SI->getOperand(0));
if (!RHSReg)
return 0;
- bool RHSIsKill = hasTrivialKill(SI->getOperand(0));
- return emitAddSub_rx(UseAdd, RetVT, LHSReg, LHSIsKill, RHSReg,
- RHSIsKill, ExtendType, C->getZExtValue(),
- SetFlags, WantResult);
+ return emitAddSub_rx(UseAdd, RetVT, LHSReg, RHSReg, ExtendType,
+ C->getZExtValue(), SetFlags, WantResult);
}
unsigned RHSReg = getRegForValue(RHS);
if (!RHSReg)
return 0;
- bool RHSIsKill = hasTrivialKill(RHS);
- return emitAddSub_rx(UseAdd, RetVT, LHSReg, LHSIsKill, RHSReg, RHSIsKill,
- ExtendType, 0, SetFlags, WantResult);
+ return emitAddSub_rx(UseAdd, RetVT, LHSReg, RHSReg, ExtendType, 0,
+ SetFlags, WantResult);
}
// Check if the mul can be folded into the instruction.
@@ -1258,10 +1235,8 @@ unsigned AArch64FastISel::emitAddSub(bool UseAdd, MVT RetVT, const Value *LHS,
unsigned RHSReg = getRegForValue(MulLHS);
if (!RHSReg)
return 0;
- bool RHSIsKill = hasTrivialKill(MulLHS);
- ResultReg = emitAddSub_rs(UseAdd, RetVT, LHSReg, LHSIsKill, RHSReg,
- RHSIsKill, AArch64_AM::LSL, ShiftVal, SetFlags,
- WantResult);
+ ResultReg = emitAddSub_rs(UseAdd, RetVT, LHSReg, RHSReg, AArch64_AM::LSL,
+ ShiftVal, SetFlags, WantResult);
if (ResultReg)
return ResultReg;
}
@@ -1283,10 +1258,8 @@ unsigned AArch64FastISel::emitAddSub(bool UseAdd, MVT RetVT, const Value *LHS,
unsigned RHSReg = getRegForValue(SI->getOperand(0));
if (!RHSReg)
return 0;
- bool RHSIsKill = hasTrivialKill(SI->getOperand(0));
- ResultReg = emitAddSub_rs(UseAdd, RetVT, LHSReg, LHSIsKill, RHSReg,
- RHSIsKill, ShiftType, ShiftVal, SetFlags,
- WantResult);
+ ResultReg = emitAddSub_rs(UseAdd, RetVT, LHSReg, RHSReg, ShiftType,
+ ShiftVal, SetFlags, WantResult);
if (ResultReg)
return ResultReg;
}
@@ -1297,18 +1270,15 @@ unsigned AArch64FastISel::emitAddSub(bool UseAdd, MVT RetVT, const Value *LHS,
unsigned RHSReg = getRegForValue(RHS);
if (!RHSReg)
return 0;
- bool RHSIsKill = hasTrivialKill(RHS);
if (NeedExtend)
RHSReg = emitIntExt(SrcVT, RHSReg, RetVT, IsZExt);
- return emitAddSub_rr(UseAdd, RetVT, LHSReg, LHSIsKill, RHSReg, RHSIsKill,
- SetFlags, WantResult);
+ return emitAddSub_rr(UseAdd, RetVT, LHSReg, RHSReg, SetFlags, WantResult);
}
unsigned AArch64FastISel::emitAddSub_rr(bool UseAdd, MVT RetVT, unsigned LHSReg,
- bool LHSIsKill, unsigned RHSReg,
- bool RHSIsKill, bool SetFlags,
+ unsigned RHSReg, bool SetFlags,
bool WantResult) {
assert(LHSReg && RHSReg && "Invalid register number.");
@@ -1339,14 +1309,14 @@ unsigned AArch64FastISel::emitAddSub_rr(bool UseAdd, MVT RetVT, unsigned LHSReg,
LHSReg = constrainOperandRegClass(II, LHSReg, II.getNumDefs());
RHSReg = constrainOperandRegClass(II, RHSReg, II.getNumDefs() + 1);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg)
- .addReg(LHSReg, getKillRegState(LHSIsKill))
- .addReg(RHSReg, getKillRegState(RHSIsKill));
+ .addReg(LHSReg)
+ .addReg(RHSReg);
return ResultReg;
}
unsigned AArch64FastISel::emitAddSub_ri(bool UseAdd, MVT RetVT, unsigned LHSReg,
- bool LHSIsKill, uint64_t Imm,
- bool SetFlags, bool WantResult) {
+ uint64_t Imm, bool SetFlags,
+ bool WantResult) {
assert(LHSReg && "Invalid register number.");
if (RetVT != MVT::i32 && RetVT != MVT::i64)
@@ -1383,15 +1353,14 @@ unsigned AArch64FastISel::emitAddSub_ri(bool UseAdd, MVT RetVT, unsigned LHSReg,
const MCInstrDesc &II = TII.get(Opc);
LHSReg = constrainOperandRegClass(II, LHSReg, II.getNumDefs());
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg)
- .addReg(LHSReg, getKillRegState(LHSIsKill))
+ .addReg(LHSReg)
.addImm(Imm)
.addImm(getShifterImm(AArch64_AM::LSL, ShiftImm));
return ResultReg;
}
unsigned AArch64FastISel::emitAddSub_rs(bool UseAdd, MVT RetVT, unsigned LHSReg,
- bool LHSIsKill, unsigned RHSReg,
- bool RHSIsKill,
+ unsigned RHSReg,
AArch64_AM::ShiftExtendType ShiftType,
uint64_t ShiftImm, bool SetFlags,
bool WantResult) {
@@ -1426,15 +1395,14 @@ unsigned AArch64FastISel::emitAddSub_rs(bool UseAdd, MVT RetVT, unsigned LHSReg,
LHSReg = constrainOperandRegClass(II, LHSReg, II.getNumDefs());
RHSReg = constrainOperandRegClass(II, RHSReg, II.getNumDefs() + 1);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg)
- .addReg(LHSReg, getKillRegState(LHSIsKill))
- .addReg(RHSReg, getKillRegState(RHSIsKill))
+ .addReg(LHSReg)
+ .addReg(RHSReg)
.addImm(getShifterImm(ShiftType, ShiftImm));
return ResultReg;
}
unsigned AArch64FastISel::emitAddSub_rx(bool UseAdd, MVT RetVT, unsigned LHSReg,
- bool LHSIsKill, unsigned RHSReg,
- bool RHSIsKill,
+ unsigned RHSReg,
AArch64_AM::ShiftExtendType ExtType,
uint64_t ShiftImm, bool SetFlags,
bool WantResult) {
@@ -1471,8 +1439,8 @@ unsigned AArch64FastISel::emitAddSub_rx(bool UseAdd, MVT RetVT, unsigned LHSReg,
LHSReg = constrainOperandRegClass(II, LHSReg, II.getNumDefs());
RHSReg = constrainOperandRegClass(II, RHSReg, II.getNumDefs() + 1);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg)
- .addReg(LHSReg, getKillRegState(LHSIsKill))
- .addReg(RHSReg, getKillRegState(RHSIsKill))
+ .addReg(LHSReg)
+ .addReg(RHSReg)
.addImm(getArithExtendImm(ExtType, ShiftImm));
return ResultReg;
}
@@ -1505,9 +1473,8 @@ bool AArch64FastISel::emitICmp(MVT RetVT, const Value *LHS, const Value *RHS,
IsZExt) != 0;
}
-bool AArch64FastISel::emitICmp_ri(MVT RetVT, unsigned LHSReg, bool LHSIsKill,
- uint64_t Imm) {
- return emitAddSub_ri(/*UseAdd=*/false, RetVT, LHSReg, LHSIsKill, Imm,
+bool AArch64FastISel::emitICmp_ri(MVT RetVT, unsigned LHSReg, uint64_t Imm) {
+ return emitAddSub_ri(/*UseAdd=*/false, RetVT, LHSReg, Imm,
/*SetFlags=*/true, /*WantResult=*/false) != 0;
}
@@ -1525,24 +1492,22 @@ bool AArch64FastISel::emitFCmp(MVT RetVT, const Value *LHS, const Value *RHS) {
unsigned LHSReg = getRegForValue(LHS);
if (!LHSReg)
return false;
- bool LHSIsKill = hasTrivialKill(LHS);
if (UseImm) {
unsigned Opc = (RetVT == MVT::f64) ? AArch64::FCMPDri : AArch64::FCMPSri;
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc))
- .addReg(LHSReg, getKillRegState(LHSIsKill));
+ .addReg(LHSReg);
return true;
}
unsigned RHSReg = getRegForValue(RHS);
if (!RHSReg)
return false;
- bool RHSIsKill = hasTrivialKill(RHS);
unsigned Opc = (RetVT == MVT::f64) ? AArch64::FCMPDrr : AArch64::FCMPSrr;
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc))
- .addReg(LHSReg, getKillRegState(LHSIsKill))
- .addReg(RHSReg, getKillRegState(RHSIsKill));
+ .addReg(LHSReg)
+ .addReg(RHSReg);
return true;
}
@@ -1557,13 +1522,12 @@ unsigned AArch64FastISel::emitAdd(MVT RetVT, const Value *LHS, const Value *RHS,
/// First try to emit an add with an immediate operand using emitAddSub_ri. If
/// that fails, then try to materialize the immediate into a register and use
/// emitAddSub_rr instead.
-unsigned AArch64FastISel::emitAdd_ri_(MVT VT, unsigned Op0, bool Op0IsKill,
- int64_t Imm) {
+unsigned AArch64FastISel::emitAdd_ri_(MVT VT, unsigned Op0, int64_t Imm) {
unsigned ResultReg;
if (Imm < 0)
- ResultReg = emitAddSub_ri(false, VT, Op0, Op0IsKill, -Imm);
+ ResultReg = emitAddSub_ri(false, VT, Op0, -Imm);
else
- ResultReg = emitAddSub_ri(true, VT, Op0, Op0IsKill, Imm);
+ ResultReg = emitAddSub_ri(true, VT, Op0, Imm);
if (ResultReg)
return ResultReg;
@@ -1572,7 +1536,7 @@ unsigned AArch64FastISel::emitAdd_ri_(MVT VT, unsigned Op0, bool Op0IsKill,
if (!CReg)
return 0;
- ResultReg = emitAddSub_rr(true, VT, Op0, Op0IsKill, CReg, true);
+ ResultReg = emitAddSub_rr(true, VT, Op0, CReg);
return ResultReg;
}
@@ -1583,20 +1547,17 @@ unsigned AArch64FastISel::emitSub(MVT RetVT, const Value *LHS, const Value *RHS,
}
unsigned AArch64FastISel::emitSubs_rr(MVT RetVT, unsigned LHSReg,
- bool LHSIsKill, unsigned RHSReg,
- bool RHSIsKill, bool WantResult) {
- return emitAddSub_rr(/*UseAdd=*/false, RetVT, LHSReg, LHSIsKill, RHSReg,
- RHSIsKill, /*SetFlags=*/true, WantResult);
+ unsigned RHSReg, bool WantResult) {
+ return emitAddSub_rr(/*UseAdd=*/false, RetVT, LHSReg, RHSReg,
+ /*SetFlags=*/true, WantResult);
}
unsigned AArch64FastISel::emitSubs_rs(MVT RetVT, unsigned LHSReg,
- bool LHSIsKill, unsigned RHSReg,
- bool RHSIsKill,
+ unsigned RHSReg,
AArch64_AM::ShiftExtendType ShiftType,
uint64_t ShiftImm, bool WantResult) {
- return emitAddSub_rs(/*UseAdd=*/false, RetVT, LHSReg, LHSIsKill, RHSReg,
- RHSIsKill, ShiftType, ShiftImm, /*SetFlags=*/true,
- WantResult);
+ return emitAddSub_rs(/*UseAdd=*/false, RetVT, LHSReg, RHSReg, ShiftType,
+ ShiftImm, /*SetFlags=*/true, WantResult);
}
unsigned AArch64FastISel::emitLogicalOp(unsigned ISDOpc, MVT RetVT,
@@ -1619,12 +1580,11 @@ unsigned AArch64FastISel::emitLogicalOp(unsigned ISDOpc, MVT RetVT,
unsigned LHSReg = getRegForValue(LHS);
if (!LHSReg)
return 0;
- bool LHSIsKill = hasTrivialKill(LHS);
unsigned ResultReg = 0;
if (const auto *C = dyn_cast<ConstantInt>(RHS)) {
uint64_t Imm = C->getZExtValue();
- ResultReg = emitLogicalOp_ri(ISDOpc, RetVT, LHSReg, LHSIsKill, Imm);
+ ResultReg = emitLogicalOp_ri(ISDOpc, RetVT, LHSReg, Imm);
}
if (ResultReg)
return ResultReg;
@@ -1645,9 +1605,7 @@ unsigned AArch64FastISel::emitLogicalOp(unsigned ISDOpc, MVT RetVT,
unsigned RHSReg = getRegForValue(MulLHS);
if (!RHSReg)
return 0;
- bool RHSIsKill = hasTrivialKill(MulLHS);
- ResultReg = emitLogicalOp_rs(ISDOpc, RetVT, LHSReg, LHSIsKill, RHSReg,
- RHSIsKill, ShiftVal);
+ ResultReg = emitLogicalOp_rs(ISDOpc, RetVT, LHSReg, RHSReg, ShiftVal);
if (ResultReg)
return ResultReg;
}
@@ -1661,9 +1619,7 @@ unsigned AArch64FastISel::emitLogicalOp(unsigned ISDOpc, MVT RetVT,
unsigned RHSReg = getRegForValue(SI->getOperand(0));
if (!RHSReg)
return 0;
- bool RHSIsKill = hasTrivialKill(SI->getOperand(0));
- ResultReg = emitLogicalOp_rs(ISDOpc, RetVT, LHSReg, LHSIsKill, RHSReg,
- RHSIsKill, ShiftVal);
+ ResultReg = emitLogicalOp_rs(ISDOpc, RetVT, LHSReg, RHSReg, ShiftVal);
if (ResultReg)
return ResultReg;
}
@@ -1672,20 +1628,18 @@ unsigned AArch64FastISel::emitLogicalOp(unsigned ISDOpc, MVT RetVT,
unsigned RHSReg = getRegForValue(RHS);
if (!RHSReg)
return 0;
- bool RHSIsKill = hasTrivialKill(RHS);
MVT VT = std::max(MVT::i32, RetVT.SimpleTy);
- ResultReg = fastEmit_rr(VT, VT, ISDOpc, LHSReg, LHSIsKill, RHSReg, RHSIsKill);
+ ResultReg = fastEmit_rr(VT, VT, ISDOpc, LHSReg, RHSReg);
if (RetVT >= MVT::i8 && RetVT <= MVT::i16) {
uint64_t Mask = (RetVT == MVT::i8) ? 0xff : 0xffff;
- ResultReg = emitAnd_ri(MVT::i32, ResultReg, /*IsKill=*/true, Mask);
+ ResultReg = emitAnd_ri(MVT::i32, ResultReg, Mask);
}
return ResultReg;
}
unsigned AArch64FastISel::emitLogicalOp_ri(unsigned ISDOpc, MVT RetVT,
- unsigned LHSReg, bool LHSIsKill,
- uint64_t Imm) {
+ unsigned LHSReg, uint64_t Imm) {
static_assert((ISD::AND + 1 == ISD::OR) && (ISD::AND + 2 == ISD::XOR),
"ISD nodes are not consecutive!");
static const unsigned OpcTable[3][2] = {
@@ -1720,18 +1674,17 @@ unsigned AArch64FastISel::emitLogicalOp_ri(unsigned ISDOpc, MVT RetVT,
return 0;
unsigned ResultReg =
- fastEmitInst_ri(Opc, RC, LHSReg, LHSIsKill,
+ fastEmitInst_ri(Opc, RC, LHSReg,
AArch64_AM::encodeLogicalImmediate(Imm, RegSize));
if (RetVT >= MVT::i8 && RetVT <= MVT::i16 && ISDOpc != ISD::AND) {
uint64_t Mask = (RetVT == MVT::i8) ? 0xff : 0xffff;
- ResultReg = emitAnd_ri(MVT::i32, ResultReg, /*IsKill=*/true, Mask);
+ ResultReg = emitAnd_ri(MVT::i32, ResultReg, Mask);
}
return ResultReg;
}
unsigned AArch64FastISel::emitLogicalOp_rs(unsigned ISDOpc, MVT RetVT,
- unsigned LHSReg, bool LHSIsKill,
- unsigned RHSReg, bool RHSIsKill,
+ unsigned LHSReg, unsigned RHSReg,
uint64_t ShiftImm) {
static_assert((ISD::AND + 1 == ISD::OR) && (ISD::AND + 2 == ISD::XOR),
"ISD nodes are not consecutive!");
@@ -1763,18 +1716,18 @@ unsigned AArch64FastISel::emitLogicalOp_rs(unsigned ISDOpc, MVT RetVT,
break;
}
unsigned ResultReg =
- fastEmitInst_rri(Opc, RC, LHSReg, LHSIsKill, RHSReg, RHSIsKill,
+ fastEmitInst_rri(Opc, RC, LHSReg, RHSReg,
AArch64_AM::getShifterImm(AArch64_AM::LSL, ShiftImm));
if (RetVT >= MVT::i8 && RetVT <= MVT::i16) {
uint64_t Mask = (RetVT == MVT::i8) ? 0xff : 0xffff;
- ResultReg = emitAnd_ri(MVT::i32, ResultReg, /*IsKill=*/true, Mask);
+ ResultReg = emitAnd_ri(MVT::i32, ResultReg, Mask);
}
return ResultReg;
}
-unsigned AArch64FastISel::emitAnd_ri(MVT RetVT, unsigned LHSReg, bool LHSIsKill,
+unsigned AArch64FastISel::emitAnd_ri(MVT RetVT, unsigned LHSReg,
uint64_t Imm) {
- return emitLogicalOp_ri(ISD::AND, RetVT, LHSReg, LHSIsKill, Imm);
+ return emitLogicalOp_ri(ISD::AND, RetVT, LHSReg, Imm);
}
unsigned AArch64FastISel::emitLoad(MVT VT, MVT RetVT, Address Addr,
@@ -1895,7 +1848,7 @@ unsigned AArch64FastISel::emitLoad(MVT VT, MVT RetVT, Address Addr,
// Loading an i1 requires special handling.
if (VT == MVT::i1) {
- unsigned ANDReg = emitAnd_ri(MVT::i32, ResultReg, /*IsKill=*/true, 1);
+ unsigned ANDReg = emitAnd_ri(MVT::i32, ResultReg, 1);
assert(ANDReg && "Unexpected AND instruction emission failure.");
ResultReg = ANDReg;
}
@@ -2049,7 +2002,6 @@ bool AArch64FastISel::selectLoad(const Instruction *I) {
removeDeadCode(I, std::next(I));
} else
ResultReg = fastEmitInst_extractsubreg(MVT::i32, ResultReg,
- /*IsKill=*/true,
AArch64::sub_32);
}
updateValueMap(I, ResultReg);
@@ -2157,7 +2109,7 @@ bool AArch64FastISel::emitStore(MVT VT, unsigned SrcReg, Address Addr,
// Storing an i1 requires special handling.
if (VTIsi1 && SrcReg != AArch64::WZR) {
- unsigned ANDReg = emitAnd_ri(MVT::i32, SrcReg, /*TODO:IsKill=*/false, 1);
+ unsigned ANDReg = emitAnd_ri(MVT::i32, SrcReg, 1);
assert(ANDReg && "Unexpected AND instruction emission failure.");
SrcReg = ANDReg;
}
@@ -2390,11 +2342,9 @@ bool AArch64FastISel::emitCompareAndBranch(const BranchInst *BI) {
unsigned SrcReg = getRegForValue(LHS);
if (!SrcReg)
return false;
- bool SrcIsKill = hasTrivialKill(LHS);
if (BW == 64 && !Is64Bit)
- SrcReg = fastEmitInst_extractsubreg(MVT::i32, SrcReg, SrcIsKill,
- AArch64::sub_32);
+ SrcReg = fastEmitInst_extractsubreg(MVT::i32, SrcReg, AArch64::sub_32);
if ((BW < 32) && !IsBitTest)
SrcReg = emitIntExt(VT, SrcReg, MVT::i32, /*isZExt=*/true);
@@ -2403,7 +2353,7 @@ bool AArch64FastISel::emitCompareAndBranch(const BranchInst *BI) {
SrcReg = constrainOperandRegClass(II, SrcReg, II.getNumDefs());
MachineInstrBuilder MIB =
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc))
- .addReg(SrcReg, getKillRegState(SrcIsKill));
+ .addReg(SrcReg);
if (IsBitTest)
MIB.addImm(TestBit);
MIB.addMBB(TBB);
@@ -2521,7 +2471,6 @@ bool AArch64FastISel::selectBranch(const Instruction *I) {
unsigned CondReg = getRegForValue(BI->getCondition());
if (CondReg == 0)
return false;
- bool CondRegIsKill = hasTrivialKill(BI->getCondition());
// i1 conditions come as i32 values, test the lowest bit with tb(n)z.
unsigned Opcode = AArch64::TBNZW;
@@ -2534,7 +2483,7 @@ bool AArch64FastISel::selectBranch(const Instruction *I) {
unsigned ConstrainedCondReg
= constrainOperandRegClass(II, CondReg, II.getNumDefs());
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II)
- .addReg(ConstrainedCondReg, getKillRegState(CondRegIsKill))
+ .addReg(ConstrainedCondReg)
.addImm(0)
.addMBB(TBB);
@@ -2684,19 +2633,16 @@ bool AArch64FastISel::optimizeSelect(const SelectInst *SI) {
unsigned Src1Reg = getRegForValue(Src1Val);
if (!Src1Reg)
return false;
- bool Src1IsKill = hasTrivialKill(Src1Val);
unsigned Src2Reg = getRegForValue(Src2Val);
if (!Src2Reg)
return false;
- bool Src2IsKill = hasTrivialKill(Src2Val);
- if (NeedExtraOp) {
- Src1Reg = emitLogicalOp_ri(ISD::XOR, MVT::i32, Src1Reg, Src1IsKill, 1);
- Src1IsKill = true;
- }
+ if (NeedExtraOp)
+ Src1Reg = emitLogicalOp_ri(ISD::XOR, MVT::i32, Src1Reg, 1);
+
unsigned ResultReg = fastEmitInst_rr(Opc, &AArch64::GPR32RegClass, Src1Reg,
- Src1IsKill, Src2Reg, Src2IsKill);
+ Src2Reg);
updateValueMap(SI, ResultReg);
return true;
}
@@ -2768,9 +2714,6 @@ bool AArch64FastISel::selectSelect(const Instruction *I) {
unsigned SrcReg = getRegForValue(FoldSelect);
if (!SrcReg)
return false;
- unsigned UseReg = lookUpRegForValue(SI);
- if (UseReg)
- MRI.clearKillFlags(UseReg);
updateValueMap(I, SrcReg);
return true;
@@ -2799,7 +2742,6 @@ bool AArch64FastISel::selectSelect(const Instruction *I) {
unsigned CondReg = getRegForValue(Cond);
if (!CondReg)
return false;
- bool CondIsKill = hasTrivialKill(Cond);
const MCInstrDesc &II = TII.get(AArch64::ANDSWri);
CondReg = constrainOperandRegClass(II, CondReg, 1);
@@ -2807,26 +2749,20 @@ bool AArch64FastISel::selectSelect(const Instruction *I) {
// Emit a TST instruction (ANDS wzr, reg, #imm).
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II,
AArch64::WZR)
- .addReg(CondReg, getKillRegState(CondIsKill))
+ .addReg(CondReg)
.addImm(AArch64_AM::encodeLogicalImmediate(1, 32));
}
unsigned Src1Reg = getRegForValue(SI->getTrueValue());
- bool Src1IsKill = hasTrivialKill(SI->getTrueValue());
-
unsigned Src2Reg = getRegForValue(SI->getFalseValue());
- bool Src2IsKill = hasTrivialKill(SI->getFalseValue());
if (!Src1Reg || !Src2Reg)
return false;
- if (ExtraCC != AArch64CC::AL) {
- Src2Reg = fastEmitInst_rri(Opc, RC, Src1Reg, Src1IsKill, Src2Reg,
- Src2IsKill, ExtraCC);
- Src2IsKill = true;
- }
- unsigned ResultReg = fastEmitInst_rri(Opc, RC, Src1Reg, Src1IsKill, Src2Reg,
- Src2IsKill, CC);
+ if (ExtraCC != AArch64CC::AL)
+ Src2Reg = fastEmitInst_rri(Opc, RC, Src1Reg, Src2Reg, ExtraCC);
+
+ unsigned ResultReg = fastEmitInst_rri(Opc, RC, Src1Reg, Src2Reg, CC);
updateValueMap(I, ResultReg);
return true;
}
@@ -2911,7 +2847,6 @@ bool AArch64FastISel::selectIntToFP(const Instruction *I, bool Signed) {
unsigned SrcReg = getRegForValue(I->getOperand(0));
if (!SrcReg)
return false;
- bool SrcIsKill = hasTrivialKill(I->getOperand(0));
EVT SrcVT = TLI.getValueType(DL, I->getOperand(0)->getType(), true);
@@ -2921,7 +2856,6 @@ bool AArch64FastISel::selectIntToFP(const Instruction *I, bool Signed) {
emitIntExt(SrcVT.getSimpleVT(), SrcReg, MVT::i32, /*isZExt*/ !Signed);
if (!SrcReg)
return false;
- SrcIsKill = true;
}
unsigned Opc;
@@ -2937,8 +2871,7 @@ bool AArch64FastISel::selectIntToFP(const Instruction *I, bool Signed) {
Opc = (DestVT == MVT::f32) ? AArch64::UCVTFUWSri : AArch64::UCVTFUWDri;
}
- unsigned ResultReg = fastEmitInst_r(Opc, TLI.getRegClassFor(DestVT), SrcReg,
- SrcIsKill);
+ unsigned ResultReg = fastEmitInst_r(Opc, TLI.getRegClassFor(DestVT), SrcReg);
updateValueMap(I, ResultReg);
return true;
}
@@ -3491,7 +3424,7 @@ bool AArch64FastISel::fastLowerIntrinsicCall(const IntrinsicInst *II) {
unsigned Depth = cast<ConstantInt>(II->getOperand(0))->getZExtValue();
while (Depth--) {
DestReg = fastEmitInst_ri(AArch64::LDRXui, &AArch64::GPR64RegClass,
- SrcReg, /*IsKill=*/true, 0);
+ SrcReg, 0);
assert(DestReg && "Unexpected LDR instruction emission failure.");
SrcReg = DestReg;
}
@@ -3637,10 +3570,9 @@ bool AArch64FastISel::fastLowerIntrinsicCall(const IntrinsicInst *II) {
unsigned SrcReg = getRegForValue(II->getOperand(0));
if (!SrcReg)
return false;
- bool SrcRegIsKill = hasTrivialKill(II->getOperand(0));
unsigned ResultReg = createResultReg(TLI.getRegClassFor(VT));
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg)
- .addReg(SrcReg, getKillRegState(SrcRegIsKill));
+ .addReg(SrcReg);
updateValueMap(II, ResultReg);
return true;
}
@@ -3663,9 +3595,8 @@ bool AArch64FastISel::fastLowerIntrinsicCall(const IntrinsicInst *II) {
unsigned Op0Reg = getRegForValue(II->getOperand(0));
if (!Op0Reg)
return false;
- bool Op0IsKill = hasTrivialKill(II->getOperand(0));
- unsigned ResultReg = fastEmit_r(VT, VT, ISD::FSQRT, Op0Reg, Op0IsKill);
+ unsigned ResultReg = fastEmit_r(VT, VT, ISD::FSQRT, Op0Reg);
if (!ResultReg)
return false;
@@ -3742,33 +3673,26 @@ bool AArch64FastISel::fastLowerIntrinsicCall(const IntrinsicInst *II) {
unsigned LHSReg = getRegForValue(LHS);
if (!LHSReg)
return false;
- bool LHSIsKill = hasTrivialKill(LHS);
unsigned RHSReg = getRegForValue(RHS);
if (!RHSReg)
return false;
- bool RHSIsKill = hasTrivialKill(RHS);
if (VT == MVT::i32) {
- MulReg = emitSMULL_rr(MVT::i64, LHSReg, LHSIsKill, RHSReg, RHSIsKill);
- unsigned ShiftReg = emitLSR_ri(MVT::i64, MVT::i64, MulReg,
- /*IsKill=*/false, 32);
- MulReg = fastEmitInst_extractsubreg(VT, MulReg, /*IsKill=*/true,
- AArch64::sub_32);
- ShiftReg = fastEmitInst_extractsubreg(VT, ShiftReg, /*IsKill=*/true,
- AArch64::sub_32);
- emitSubs_rs(VT, ShiftReg, /*IsKill=*/true, MulReg, /*IsKill=*/false,
- AArch64_AM::ASR, 31, /*WantResult=*/false);
+ MulReg = emitSMULL_rr(MVT::i64, LHSReg, RHSReg);
+ unsigned ShiftReg = emitLSR_ri(MVT::i64, MVT::i64, MulReg, 32);
+ MulReg = fastEmitInst_extractsubreg(VT, MulReg, AArch64::sub_32);
+ ShiftReg = fastEmitInst_extractsubreg(VT, ShiftReg, AArch64::sub_32);
+ emitSubs_rs(VT, ShiftReg, MulReg, AArch64_AM::ASR, 31,
+ /*WantResult=*/false);
} else {
assert(VT == MVT::i64 && "Unexpected value type.");
// LHSReg and RHSReg cannot be killed by this Mul, since they are
// reused in the next instruction.
- MulReg = emitMul_rr(VT, LHSReg, /*IsKill=*/false, RHSReg,
- /*IsKill=*/false);
- unsigned SMULHReg = fastEmit_rr(VT, VT, ISD::MULHS, LHSReg, LHSIsKill,
- RHSReg, RHSIsKill);
- emitSubs_rs(VT, SMULHReg, /*IsKill=*/true, MulReg, /*IsKill=*/false,
- AArch64_AM::ASR, 63, /*WantResult=*/false);
+ MulReg = emitMul_rr(VT, LHSReg, RHSReg);
+ unsigned SMULHReg = fastEmit_rr(VT, VT, ISD::MULHS, LHSReg, RHSReg);
+ emitSubs_rs(VT, SMULHReg, MulReg, AArch64_AM::ASR, 63,
+ /*WantResult=*/false);
}
break;
}
@@ -3777,30 +3701,23 @@ bool AArch64FastISel::fastLowerIntrinsicCall(const IntrinsicInst *II) {
unsigned LHSReg = getRegForValue(LHS);
if (!LHSReg)
return false;
- bool LHSIsKill = hasTrivialKill(LHS);
unsigned RHSReg = getRegForValue(RHS);
if (!RHSReg)
return false;
- bool RHSIsKill = hasTrivialKill(RHS);
if (VT == MVT::i32) {
- MulReg = emitUMULL_rr(MVT::i64, LHSReg, LHSIsKill, RHSReg, RHSIsKill);
- emitSubs_rs(MVT::i64, AArch64::XZR, /*IsKill=*/true, MulReg,
- /*IsKill=*/false, AArch64_AM::LSR, 32,
+ MulReg = emitUMULL_rr(MVT::i64, LHSReg, RHSReg);
+ emitSubs_rs(MVT::i64, AArch64::XZR, MulReg, AArch64_AM::LSR, 32,
/*WantResult=*/false);
- MulReg = fastEmitInst_extractsubreg(VT, MulReg, /*IsKill=*/true,
- AArch64::sub_32);
+ MulReg = fastEmitInst_extractsubreg(VT, MulReg, AArch64::sub_32);
} else {
assert(VT == MVT::i64 && "Unexpected value type.");
// LHSReg and RHSReg cannot be killed by this Mul, since they are
// reused in the next instruction.
- MulReg = emitMul_rr(VT, LHSReg, /*IsKill=*/false, RHSReg,
- /*IsKill=*/false);
- unsigned UMULHReg = fastEmit_rr(VT, VT, ISD::MULHU, LHSReg, LHSIsKill,
- RHSReg, RHSIsKill);
- emitSubs_rr(VT, AArch64::XZR, /*IsKill=*/true, UMULHReg,
- /*IsKill=*/false, /*WantResult=*/false);
+ MulReg = emitMul_rr(VT, LHSReg, RHSReg);
+ unsigned UMULHReg = fastEmit_rr(VT, VT, ISD::MULHU, LHSReg, RHSReg);
+ emitSubs_rr(VT, AArch64::XZR, UMULHReg, /*WantResult=*/false);
}
break;
}
@@ -3816,8 +3733,8 @@ bool AArch64FastISel::fastLowerIntrinsicCall(const IntrinsicInst *II) {
return false;
ResultReg2 = fastEmitInst_rri(AArch64::CSINCWr, &AArch64::GPR32RegClass,
- AArch64::WZR, /*IsKill=*/true, AArch64::WZR,
- /*IsKill=*/true, getInvertedCondCode(CC));
+ AArch64::WZR, AArch64::WZR,
+ getInvertedCondCode(CC));
(void)ResultReg2;
assert((ResultReg1 + 1) == ResultReg2 &&
"Nonconsecutive result registers.");
@@ -3917,7 +3834,7 @@ bool AArch64FastISel::selectRet(const Instruction *I) {
// "Callee" (i.e. value producer) zero extends pointers at function
// boundary.
if (Subtarget->isTargetILP32() && RV->getType()->isPointerTy())
- SrcReg = emitAnd_ri(MVT::i64, SrcReg, false, 0xffffffff);
+ SrcReg = emitAnd_ri(MVT::i64, SrcReg, 0xffffffff);
// Make the copy.
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
@@ -3959,7 +3876,6 @@ bool AArch64FastISel::selectTrunc(const Instruction *I) {
unsigned SrcReg = getRegForValue(Op);
if (!SrcReg)
return false;
- bool SrcIsKill = hasTrivialKill(Op);
// If we're truncating from i64 to a smaller non-legal type then generate an
// AND. Otherwise, we know the high bits are undefined and a truncate only
@@ -3984,16 +3900,16 @@ bool AArch64FastISel::selectTrunc(const Instruction *I) {
break;
}
// Issue an extract_subreg to get the lower 32-bits.
- unsigned Reg32 = fastEmitInst_extractsubreg(MVT::i32, SrcReg, SrcIsKill,
+ unsigned Reg32 = fastEmitInst_extractsubreg(MVT::i32, SrcReg,
AArch64::sub_32);
// Create the AND instruction which performs the actual truncation.
- ResultReg = emitAnd_ri(MVT::i32, Reg32, /*IsKill=*/true, Mask);
+ ResultReg = emitAnd_ri(MVT::i32, Reg32, Mask);
assert(ResultReg && "Unexpected AND instruction emission failure.");
} else {
ResultReg = createResultReg(&AArch64::GPR32RegClass);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
TII.get(TargetOpcode::COPY), ResultReg)
- .addReg(SrcReg, getKillRegState(SrcIsKill));
+ .addReg(SrcReg);
}
updateValueMap(I, ResultReg);
@@ -4009,7 +3925,7 @@ unsigned AArch64FastISel::emiti1Ext(unsigned SrcReg, MVT DestVT, bool IsZExt) {
DestVT = MVT::i32;
if (IsZExt) {
- unsigned ResultReg = emitAnd_ri(MVT::i32, SrcReg, /*TODO:IsKill=*/false, 1);
+ unsigned ResultReg = emitAnd_ri(MVT::i32, SrcReg, 1);
assert(ResultReg && "Unexpected AND instruction emission failure.");
if (DestVT == MVT::i64) {
// We're ZExt i1 to i64. The ANDWri Wd, Ws, #1 implicitly clears the
@@ -4029,12 +3945,11 @@ unsigned AArch64FastISel::emiti1Ext(unsigned SrcReg, MVT DestVT, bool IsZExt) {
return 0;
}
return fastEmitInst_rii(AArch64::SBFMWri, &AArch64::GPR32RegClass, SrcReg,
- /*TODO:IsKill=*/false, 0, 0);
+ 0, 0);
}
}
-unsigned AArch64FastISel::emitMul_rr(MVT RetVT, unsigned Op0, bool Op0IsKill,
- unsigned Op1, bool Op1IsKill) {
+unsigned AArch64FastISel::emitMul_rr(MVT RetVT, unsigned Op0, unsigned Op1) {
unsigned Opc, ZReg;
switch (RetVT.SimpleTy) {
default: return 0;
@@ -4049,32 +3964,27 @@ unsigned AArch64FastISel::emitMul_rr(MVT RetVT, unsigned Op0, bool Op0IsKill,
const TargetRegisterClass *RC =
(RetVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
- return fastEmitInst_rrr(Opc, RC, Op0, Op0IsKill, Op1, Op1IsKill,
- /*IsKill=*/ZReg, true);
+ return fastEmitInst_rrr(Opc, RC, Op0, Op1, ZReg);
}
-unsigned AArch64FastISel::emitSMULL_rr(MVT RetVT, unsigned Op0, bool Op0IsKill,
- unsigned Op1, bool Op1IsKill) {
+unsigned AArch64FastISel::emitSMULL_rr(MVT RetVT, unsigned Op0, unsigned Op1) {
if (RetVT != MVT::i64)
return 0;
return fastEmitInst_rrr(AArch64::SMADDLrrr, &AArch64::GPR64RegClass,
- Op0, Op0IsKill, Op1, Op1IsKill,
- AArch64::XZR, /*IsKill=*/true);
+ Op0, Op1, AArch64::XZR);
}
-unsigned AArch64FastISel::emitUMULL_rr(MVT RetVT, unsigned Op0, bool Op0IsKill,
- unsigned Op1, bool Op1IsKill) {
+unsigned AArch64FastISel::emitUMULL_rr(MVT RetVT, unsigned Op0, unsigned Op1) {
if (RetVT != MVT::i64)
return 0;
return fastEmitInst_rrr(AArch64::UMADDLrrr, &AArch64::GPR64RegClass,
- Op0, Op0IsKill, Op1, Op1IsKill,
- AArch64::XZR, /*IsKill=*/true);
+ Op0, Op1, AArch64::XZR);
}
-unsigned AArch64FastISel::emitLSL_rr(MVT RetVT, unsigned Op0Reg, bool Op0IsKill,
- unsigned Op1Reg, bool Op1IsKill) {
+unsigned AArch64FastISel::emitLSL_rr(MVT RetVT, unsigned Op0Reg,
+ unsigned Op1Reg) {
unsigned Opc = 0;
bool NeedTrunc = false;
uint64_t Mask = 0;
@@ -4088,20 +3998,17 @@ unsigned AArch64FastISel::emitLSL_rr(MVT RetVT, unsigned Op0Reg, bool Op0IsKill,
const TargetRegisterClass *RC =
(RetVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
- if (NeedTrunc) {
- Op1Reg = emitAnd_ri(MVT::i32, Op1Reg, Op1IsKill, Mask);
- Op1IsKill = true;
- }
- unsigned ResultReg = fastEmitInst_rr(Opc, RC, Op0Reg, Op0IsKill, Op1Reg,
- Op1IsKill);
if (NeedTrunc)
- ResultReg = emitAnd_ri(MVT::i32, ResultReg, /*IsKill=*/true, Mask);
+ Op1Reg = emitAnd_ri(MVT::i32, Op1Reg, Mask);
+
+ unsigned ResultReg = fastEmitInst_rr(Opc, RC, Op0Reg, Op1Reg);
+ if (NeedTrunc)
+ ResultReg = emitAnd_ri(MVT::i32, ResultReg, Mask);
return ResultReg;
}
unsigned AArch64FastISel::emitLSL_ri(MVT RetVT, MVT SrcVT, unsigned Op0,
- bool Op0IsKill, uint64_t Shift,
- bool IsZExt) {
+ uint64_t Shift, bool IsZExt) {
assert(RetVT.SimpleTy >= SrcVT.SimpleTy &&
"Unexpected source/return type pair.");
assert((SrcVT == MVT::i1 || SrcVT == MVT::i8 || SrcVT == MVT::i16 ||
@@ -4123,7 +4030,7 @@ unsigned AArch64FastISel::emitLSL_ri(MVT RetVT, MVT SrcVT, unsigned Op0,
unsigned ResultReg = createResultReg(RC);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
TII.get(TargetOpcode::COPY), ResultReg)
- .addReg(Op0, getKillRegState(Op0IsKill));
+ .addReg(Op0);
return ResultReg;
} else
return emitIntExt(SrcVT, Op0, RetVT, IsZExt);
@@ -4171,16 +4078,15 @@ unsigned AArch64FastISel::emitLSL_ri(MVT RetVT, MVT SrcVT, unsigned Op0,
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
TII.get(AArch64::SUBREG_TO_REG), TmpReg)
.addImm(0)
- .addReg(Op0, getKillRegState(Op0IsKill))
+ .addReg(Op0)
.addImm(AArch64::sub_32);
Op0 = TmpReg;
- Op0IsKill = true;
}
- return fastEmitInst_rii(Opc, RC, Op0, Op0IsKill, ImmR, ImmS);
+ return fastEmitInst_rii(Opc, RC, Op0, ImmR, ImmS);
}
-unsigned AArch64FastISel::emitLSR_rr(MVT RetVT, unsigned Op0Reg, bool Op0IsKill,
- unsigned Op1Reg, bool Op1IsKill) {
+unsigned AArch64FastISel::emitLSR_rr(MVT RetVT, unsigned Op0Reg,
+ unsigned Op1Reg) {
unsigned Opc = 0;
bool NeedTrunc = false;
uint64_t Mask = 0;
@@ -4195,20 +4101,17 @@ unsigned AArch64FastISel::emitLSR_rr(MVT RetVT, unsigned Op0Reg, bool Op0IsKill,
const TargetRegisterClass *RC =
(RetVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
if (NeedTrunc) {
- Op0Reg = emitAnd_ri(MVT::i32, Op0Reg, Op0IsKill, Mask);
- Op1Reg = emitAnd_ri(MVT::i32, Op1Reg, Op1IsKill, Mask);
- Op0IsKill = Op1IsKill = true;
+ Op0Reg = emitAnd_ri(MVT::i32, Op0Reg, Mask);
+ Op1Reg = emitAnd_ri(MVT::i32, Op1Reg, Mask);
}
- unsigned ResultReg = fastEmitInst_rr(Opc, RC, Op0Reg, Op0IsKill, Op1Reg,
- Op1IsKill);
+ unsigned ResultReg = fastEmitInst_rr(Opc, RC, Op0Reg, Op1Reg);
if (NeedTrunc)
- ResultReg = emitAnd_ri(MVT::i32, ResultReg, /*IsKill=*/true, Mask);
+ ResultReg = emitAnd_ri(MVT::i32, ResultReg, Mask);
return ResultReg;
}
unsigned AArch64FastISel::emitLSR_ri(MVT RetVT, MVT SrcVT, unsigned Op0,
- bool Op0IsKill, uint64_t Shift,
- bool IsZExt) {
+ uint64_t Shift, bool IsZExt) {
assert(RetVT.SimpleTy >= SrcVT.SimpleTy &&
"Unexpected source/return type pair.");
assert((SrcVT == MVT::i1 || SrcVT == MVT::i8 || SrcVT == MVT::i16 ||
@@ -4230,7 +4133,7 @@ unsigned AArch64FastISel::emitLSR_ri(MVT RetVT, MVT SrcVT, unsigned Op0,
unsigned ResultReg = createResultReg(RC);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
TII.get(TargetOpcode::COPY), ResultReg)
- .addReg(Op0, getKillRegState(Op0IsKill));
+ .addReg(Op0);
return ResultReg;
} else
return emitIntExt(SrcVT, Op0, RetVT, IsZExt);
@@ -4274,7 +4177,6 @@ unsigned AArch64FastISel::emitLSR_ri(MVT RetVT, MVT SrcVT, unsigned Op0,
Op0 = emitIntExt(SrcVT, Op0, RetVT, IsZExt);
if (!Op0)
return 0;
- Op0IsKill = true;
SrcVT = RetVT;
SrcBits = SrcVT.getSizeInBits();
IsZExt = true;
@@ -4292,16 +4194,15 @@ unsigned AArch64FastISel::emitLSR_ri(MVT RetVT, MVT SrcVT, unsigned Op0,
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
TII.get(AArch64::SUBREG_TO_REG), TmpReg)
.addImm(0)
- .addReg(Op0, getKillRegState(Op0IsKill))
+ .addReg(Op0)
.addImm(AArch64::sub_32);
Op0 = TmpReg;
- Op0IsKill = true;
}
- return fastEmitInst_rii(Opc, RC, Op0, Op0IsKill, ImmR, ImmS);
+ return fastEmitInst_rii(Opc, RC, Op0, ImmR, ImmS);
}
-unsigned AArch64FastISel::emitASR_rr(MVT RetVT, unsigned Op0Reg, bool Op0IsKill,
- unsigned Op1Reg, bool Op1IsKill) {
+unsigned AArch64FastISel::emitASR_rr(MVT RetVT, unsigned Op0Reg,
+ unsigned Op1Reg) {
unsigned Opc = 0;
bool NeedTrunc = false;
uint64_t Mask = 0;
@@ -4317,19 +4218,16 @@ unsigned AArch64FastISel::emitASR_rr(MVT RetVT, unsigned Op0Reg, bool Op0IsKill,
(RetVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
if (NeedTrunc) {
Op0Reg = emitIntExt(RetVT, Op0Reg, MVT::i32, /*isZExt=*/false);
- Op1Reg = emitAnd_ri(MVT::i32, Op1Reg, Op1IsKill, Mask);
- Op0IsKill = Op1IsKill = true;
+ Op1Reg = emitAnd_ri(MVT::i32, Op1Reg, Mask);
}
- unsigned ResultReg = fastEmitInst_rr(Opc, RC, Op0Reg, Op0IsKill, Op1Reg,
- Op1IsKill);
+ unsigned ResultReg = fastEmitInst_rr(Opc, RC, Op0Reg, Op1Reg);
if (NeedTrunc)
- ResultReg = emitAnd_ri(MVT::i32, ResultReg, /*IsKill=*/true, Mask);
+ ResultReg = emitAnd_ri(MVT::i32, ResultReg, Mask);
return ResultReg;
}
unsigned AArch64FastISel::emitASR_ri(MVT RetVT, MVT SrcVT, unsigned Op0,
- bool Op0IsKill, uint64_t Shift,
- bool IsZExt) {
+ uint64_t Shift, bool IsZExt) {
assert(RetVT.SimpleTy >= SrcVT.SimpleTy &&
"Unexpected source/return type pair.");
assert((SrcVT == MVT::i1 || SrcVT == MVT::i8 || SrcVT == MVT::i16 ||
@@ -4351,7 +4249,7 @@ unsigned AArch64FastISel::emitASR_ri(MVT RetVT, MVT SrcVT, unsigned Op0,
unsigned ResultReg = createResultReg(RC);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
TII.get(TargetOpcode::COPY), ResultReg)
- .addReg(Op0, getKillRegState(Op0IsKill));
+ .addReg(Op0);
return ResultReg;
} else
return emitIntExt(SrcVT, Op0, RetVT, IsZExt);
@@ -4401,12 +4299,11 @@ unsigned AArch64FastISel::emitASR_ri(MVT RetVT, MVT SrcVT, unsigned Op0,
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
TII.get(AArch64::SUBREG_TO_REG), TmpReg)
.addImm(0)
- .addReg(Op0, getKillRegState(Op0IsKill))
+ .addReg(Op0)
.addImm(AArch64::sub_32);
Op0 = TmpReg;
- Op0IsKill = true;
}
- return fastEmitInst_rii(Opc, RC, Op0, Op0IsKill, ImmR, ImmS);
+ return fastEmitInst_rii(Opc, RC, Op0, ImmR, ImmS);
}
unsigned AArch64FastISel::emitIntExt(MVT SrcVT, unsigned SrcReg, MVT DestVT,
@@ -4467,7 +4364,7 @@ unsigned AArch64FastISel::emitIntExt(MVT SrcVT, unsigned SrcReg, MVT DestVT,
const TargetRegisterClass *RC =
(DestVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
- return fastEmitInst_rii(Opc, RC, SrcReg, /*TODO:IsKill=*/false, 0, Imm);
+ return fastEmitInst_rii(Opc, RC, SrcReg, 0, Imm);
}
static bool isZExtLoad(const MachineInstr *LI) {
@@ -4590,7 +4487,6 @@ bool AArch64FastISel::selectIntExt(const Instruction *I) {
unsigned SrcReg = getRegForValue(I->getOperand(0));
if (!SrcReg)
return false;
- bool SrcIsKill = hasTrivialKill(I->getOperand(0));
// Try to optimize already sign-/zero-extended values from function arguments.
bool IsZExt = isa<ZExtInst>(I);
@@ -4601,17 +4497,10 @@ bool AArch64FastISel::selectIntExt(const Instruction *I) {
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
TII.get(AArch64::SUBREG_TO_REG), ResultReg)
.addImm(0)
- .addReg(SrcReg, getKillRegState(SrcIsKill))
+ .addReg(SrcReg)
.addImm(AArch64::sub_32);
SrcReg = ResultReg;
}
- // Conservatively clear all kill flags from all uses, because we are
- // replacing a sign-/zero-extend instruction at IR level with a nop at MI
- // level. The result of the instruction at IR level might have been
- // trivially dead, which is now not longer true.
- unsigned UseReg = lookUpRegForValue(I);
- if (UseReg)
- MRI.clearKillFlags(UseReg);
updateValueMap(I, SrcReg);
return true;
@@ -4651,23 +4540,18 @@ bool AArch64FastISel::selectRem(const Instruction *I, unsigned ISDOpcode) {
unsigned Src0Reg = getRegForValue(I->getOperand(0));
if (!Src0Reg)
return false;
- bool Src0IsKill = hasTrivialKill(I->getOperand(0));
unsigned Src1Reg = getRegForValue(I->getOperand(1));
if (!Src1Reg)
return false;
- bool Src1IsKill = hasTrivialKill(I->getOperand(1));
const TargetRegisterClass *RC =
(DestVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
- unsigned QuotReg = fastEmitInst_rr(DivOpc, RC, Src0Reg, /*IsKill=*/false,
- Src1Reg, /*IsKill=*/false);
+ unsigned QuotReg = fastEmitInst_rr(DivOpc, RC, Src0Reg, Src1Reg);
assert(QuotReg && "Unexpected DIV instruction emission failure.");
// The remainder is computed as numerator - (quotient * denominator) using the
// MSUB instruction.
- unsigned ResultReg = fastEmitInst_rrr(MSubOpc, RC, QuotReg, /*IsKill=*/true,
- Src1Reg, Src1IsKill, Src0Reg,
- Src0IsKill);
+ unsigned ResultReg = fastEmitInst_rrr(MSubOpc, RC, QuotReg, Src1Reg, Src0Reg);
updateValueMap(I, ResultReg);
return true;
}
@@ -4715,10 +4599,9 @@ bool AArch64FastISel::selectMul(const Instruction *I) {
unsigned Src0Reg = getRegForValue(Src0);
if (!Src0Reg)
return false;
- bool Src0IsKill = hasTrivialKill(Src0);
unsigned ResultReg =
- emitLSL_ri(VT, SrcVT, Src0Reg, Src0IsKill, ShiftVal, IsZExt);
+ emitLSL_ri(VT, SrcVT, Src0Reg, ShiftVal, IsZExt);
if (ResultReg) {
updateValueMap(I, ResultReg);
@@ -4729,14 +4612,12 @@ bool AArch64FastISel::selectMul(const Instruction *I) {
unsigned Src0Reg = getRegForValue(I->getOperand(0));
if (!Src0Reg)
return false;
- bool Src0IsKill = hasTrivialKill(I->getOperand(0));
unsigned Src1Reg = getRegForValue(I->getOperand(1));
if (!Src1Reg)
return false;
- bool Src1IsKill = hasTrivialKill(I->getOperand(1));
- unsigned ResultReg = emitMul_rr(VT, Src0Reg, Src0IsKill, Src1Reg, Src1IsKill);
+ unsigned ResultReg = emitMul_rr(VT, Src0Reg, Src1Reg);
if (!ResultReg)
return false;
@@ -4782,18 +4663,17 @@ bool AArch64FastISel::selectShift(const Instruction *I) {
unsigned Op0Reg = getRegForValue(Op0);
if (!Op0Reg)
return false;
- bool Op0IsKill = hasTrivialKill(Op0);
switch (I->getOpcode()) {
default: llvm_unreachable("Unexpected instruction.");
case Instruction::Shl:
- ResultReg = emitLSL_ri(RetVT, SrcVT, Op0Reg, Op0IsKill, ShiftVal, IsZExt);
+ ResultReg = emitLSL_ri(RetVT, SrcVT, Op0Reg, ShiftVal, IsZExt);
break;
case Instruction::AShr:
- ResultReg = emitASR_ri(RetVT, SrcVT, Op0Reg, Op0IsKill, ShiftVal, IsZExt);
+ ResultReg = emitASR_ri(RetVT, SrcVT, Op0Reg, ShiftVal, IsZExt);
break;
case Instruction::LShr:
- ResultReg = emitLSR_ri(RetVT, SrcVT, Op0Reg, Op0IsKill, ShiftVal, IsZExt);
+ ResultReg = emitLSR_ri(RetVT, SrcVT, Op0Reg, ShiftVal, IsZExt);
break;
}
if (!ResultReg)
@@ -4806,24 +4686,22 @@ bool AArch64FastISel::selectShift(const Instruction *I) {
unsigned Op0Reg = getRegForValue(I->getOperand(0));
if (!Op0Reg)
return false;
- bool Op0IsKill = hasTrivialKill(I->getOperand(0));
unsigned Op1Reg = getRegForValue(I->getOperand(1));
if (!Op1Reg)
return false;
- bool Op1IsKill = hasTrivialKill(I->getOperand(1));
unsigned ResultReg = 0;
switch (I->getOpcode()) {
default: llvm_unreachable("Unexpected instruction.");
case Instruction::Shl:
- ResultReg = emitLSL_rr(RetVT, Op0Reg, Op0IsKill, Op1Reg, Op1IsKill);
+ ResultReg = emitLSL_rr(RetVT, Op0Reg, Op1Reg);
break;
case Instruction::AShr:
- ResultReg = emitASR_rr(RetVT, Op0Reg, Op0IsKill, Op1Reg, Op1IsKill);
+ ResultReg = emitASR_rr(RetVT, Op0Reg, Op1Reg);
break;
case Instruction::LShr:
- ResultReg = emitLSR_rr(RetVT, Op0Reg, Op0IsKill, Op1Reg, Op1IsKill);
+ ResultReg = emitLSR_rr(RetVT, Op0Reg, Op1Reg);
break;
}
@@ -4865,9 +4743,8 @@ bool AArch64FastISel::selectBitCast(const Instruction *I) {
unsigned Op0Reg = getRegForValue(I->getOperand(0));
if (!Op0Reg)
return false;
- bool Op0IsKill = hasTrivialKill(I->getOperand(0));
- unsigned ResultReg = fastEmitInst_r(Opc, RC, Op0Reg, Op0IsKill);
+ unsigned ResultReg = fastEmitInst_r(Opc, RC, Op0Reg);
if (!ResultReg)
return false;
@@ -4930,10 +4807,9 @@ bool AArch64FastISel::selectSDiv(const Instruction *I) {
unsigned Src0Reg = getRegForValue(I->getOperand(0));
if (!Src0Reg)
return false;
- bool Src0IsKill = hasTrivialKill(I->getOperand(0));
if (cast<BinaryOperator>(I)->isExact()) {
- unsigned ResultReg = emitASR_ri(VT, VT, Src0Reg, Src0IsKill, Lg2);
+ unsigned ResultReg = emitASR_ri(VT, VT, Src0Reg, Lg2);
if (!ResultReg)
return false;
updateValueMap(I, ResultReg);
@@ -4941,12 +4817,12 @@ bool AArch64FastISel::selectSDiv(const Instruction *I) {
}
int64_t Pow2MinusOne = (1ULL << Lg2) - 1;
- unsigned AddReg = emitAdd_ri_(VT, Src0Reg, /*IsKill=*/false, Pow2MinusOne);
+ unsigned AddReg = emitAdd_ri_(VT, Src0Reg, Pow2MinusOne);
if (!AddReg)
return false;
// (Src0 < 0) ? Pow2 - 1 : 0;
- if (!emitICmp_ri(VT, Src0Reg, /*IsKill=*/false, 0))
+ if (!emitICmp_ri(VT, Src0Reg, 0))
return false;
unsigned SelectOpc;
@@ -4958,9 +4834,8 @@ bool AArch64FastISel::selectSDiv(const Instruction *I) {
SelectOpc = AArch64::CSELWr;
RC = &AArch64::GPR32RegClass;
}
- unsigned SelectReg =
- fastEmitInst_rri(SelectOpc, RC, AddReg, /*IsKill=*/true, Src0Reg,
- Src0IsKill, AArch64CC::LT);
+ unsigned SelectReg = fastEmitInst_rri(SelectOpc, RC, AddReg, Src0Reg,
+ AArch64CC::LT);
if (!SelectReg)
return false;
@@ -4969,10 +4844,10 @@ bool AArch64FastISel::selectSDiv(const Instruction *I) {
unsigned ZeroReg = (VT == MVT::i64) ? AArch64::XZR : AArch64::WZR;
unsigned ResultReg;
if (C.isNegative())
- ResultReg = emitAddSub_rs(/*UseAdd=*/false, VT, ZeroReg, /*IsKill=*/true,
- SelectReg, /*IsKill=*/true, AArch64_AM::ASR, Lg2);
+ ResultReg = emitAddSub_rs(/*UseAdd=*/false, VT, ZeroReg, SelectReg,
+ AArch64_AM::ASR, Lg2);
else
- ResultReg = emitASR_ri(VT, VT, SelectReg, /*IsKill=*/true, Lg2);
+ ResultReg = emitASR_ri(VT, VT, SelectReg, Lg2);
if (!ResultReg)
return false;
@@ -4984,23 +4859,20 @@ bool AArch64FastISel::selectSDiv(const Instruction *I) {
/// This is mostly a copy of the existing FastISel getRegForGEPIndex code. We
/// have to duplicate it for AArch64, because otherwise we would fail during the
/// sign-extend emission.
-std::pair<unsigned, bool> AArch64FastISel::getRegForGEPIndex(const Value *Idx) {
+unsigned AArch64FastISel::getRegForGEPIndex(const Value *Idx) {
unsigned IdxN = getRegForValue(Idx);
if (IdxN == 0)
// Unhandled operand. Halt "fast" selection and bail.
- return std::pair<unsigned, bool>(0, false);
-
- bool IdxNIsKill = hasTrivialKill(Idx);
+ return 0;
// If the index is smaller or larger than intptr_t, truncate or extend it.
MVT PtrVT = TLI.getPointerTy(DL);
EVT IdxVT = EVT::getEVT(Idx->getType(), /*HandleUnknown=*/false);
if (IdxVT.bitsLT(PtrVT)) {
IdxN = emitIntExt(IdxVT.getSimpleVT(), IdxN, PtrVT, /*isZExt=*/false);
- IdxNIsKill = true;
} else if (IdxVT.bitsGT(PtrVT))
llvm_unreachable("AArch64 FastISel doesn't support types larger than i64");
- return std::pair<unsigned, bool>(IdxN, IdxNIsKill);
+ return IdxN;
}
/// This is mostly a copy of the existing FastISel GEP code, but we have to
@@ -5014,7 +4886,6 @@ bool AArch64FastISel::selectGetElementPtr(const Instruction *I) {
unsigned N = getRegForValue(I->getOperand(0));
if (!N)
return false;
- bool NIsKill = hasTrivialKill(I->getOperand(0));
// Keep a running tab of the total offset to coalesce multiple N = N + Offset
// into a single N = N + TotalOffset.
@@ -5041,18 +4912,15 @@ bool AArch64FastISel::selectGetElementPtr(const Instruction *I) {
continue;
}
if (TotalOffs) {
- N = emitAdd_ri_(VT, N, NIsKill, TotalOffs);
+ N = emitAdd_ri_(VT, N, TotalOffs);
if (!N)
return false;
- NIsKill = true;
TotalOffs = 0;
}
// N = N + Idx * ElementSize;
uint64_t ElementSize = DL.getTypeAllocSize(Ty);
- std::pair<unsigned, bool> Pair = getRegForGEPIndex(Idx);
- unsigned IdxN = Pair.first;
- bool IdxNIsKill = Pair.second;
+ unsigned IdxN = getRegForGEPIndex(Idx);
if (!IdxN)
return false;
@@ -5060,18 +4928,17 @@ bool AArch64FastISel::selectGetElementPtr(const Instruction *I) {
unsigned C = fastEmit_i(VT, VT, ISD::Constant, ElementSize);
if (!C)
return false;
- IdxN = emitMul_rr(VT, IdxN, IdxNIsKill, C, true);
+ IdxN = emitMul_rr(VT, IdxN, C);
if (!IdxN)
return false;
- IdxNIsKill = true;
}
- N = fastEmit_rr(VT, VT, ISD::ADD, N, NIsKill, IdxN, IdxNIsKill);
+ N = fastEmit_rr(VT, VT, ISD::ADD, N, IdxN);
if (!N)
return false;
}
}
if (TotalOffs) {
- N = emitAdd_ri_(VT, N, NIsKill, TotalOffs);
+ N = emitAdd_ri_(VT, N, TotalOffs);
if (!N)
return false;
}
diff --git a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
index f5df1c5e2929..06cc68155c37 100644
--- a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
@@ -252,7 +252,7 @@ bool AArch64FrameLowering::homogeneousPrologEpilog(
// Bail on stack adjustment needed on return for simplicity.
const MachineFrameInfo &MFI = MF.getFrameInfo();
const TargetRegisterInfo *RegInfo = MF.getSubtarget().getRegisterInfo();
- if (MFI.hasVarSizedObjects() || RegInfo->needsStackRealignment(MF))
+ if (MFI.hasVarSizedObjects() || RegInfo->hasStackRealignment(MF))
return false;
if (Exit && getArgumentPopSize(MF, *Exit))
return false;
@@ -363,7 +363,7 @@ bool AArch64FrameLowering::hasFP(const MachineFunction &MF) const {
return true;
if (MFI.hasVarSizedObjects() || MFI.isFrameAddressTaken() ||
MFI.hasStackMap() || MFI.hasPatchPoint() ||
- RegInfo->needsStackRealignment(MF))
+ RegInfo->hasStackRealignment(MF))
return true;
// With large callframes around we may need to use FP to access the scavenging
// emergency spillslot.
@@ -616,7 +616,7 @@ bool AArch64FrameLowering::canUseAsPrologue(
const AArch64RegisterInfo *RegInfo = Subtarget.getRegisterInfo();
// Don't need a scratch register if we're not going to re-align the stack.
- if (!RegInfo->needsStackRealignment(*MF))
+ if (!RegInfo->hasStackRealignment(*MF))
return true;
// Otherwise, we can use any block as long as it has a scratch register
// available.
@@ -678,7 +678,7 @@ bool AArch64FrameLowering::shouldCombineCSRLocalStackBump(
if (MFI.hasVarSizedObjects())
return false;
- if (RegInfo->needsStackRealignment(MF))
+ if (RegInfo->hasStackRealignment(MF))
return false;
// This isn't strictly necessary, but it simplifies things a bit since the
@@ -1375,7 +1375,7 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF,
if (NumBytes) {
// Alignment is required for the parent frame, not the funclet
const bool NeedsRealignment =
- !IsFunclet && RegInfo->needsStackRealignment(MF);
+ !IsFunclet && RegInfo->hasStackRealignment(MF);
unsigned scratchSPReg = AArch64::SP;
if (NeedsRealignment) {
@@ -1981,13 +1981,13 @@ StackOffset AArch64FrameLowering::resolveFrameOffsetReference(
// Argument access should always use the FP.
if (isFixed) {
UseFP = hasFP(MF);
- } else if (isCSR && RegInfo->needsStackRealignment(MF)) {
+ } else if (isCSR && RegInfo->hasStackRealignment(MF)) {
// References to the CSR area must use FP if we're re-aligning the stack
// since the dynamically-sized alignment padding is between the SP/BP and
// the CSR area.
assert(hasFP(MF) && "Re-aligned stack must have frame pointer");
UseFP = true;
- } else if (hasFP(MF) && !RegInfo->needsStackRealignment(MF)) {
+ } else if (hasFP(MF) && !RegInfo->hasStackRealignment(MF)) {
// If the FPOffset is negative and we're producing a signed immediate, we
// have to keep in mind that the available offset range for negative
// offsets is smaller than for positive ones. If an offset is available
@@ -2029,9 +2029,10 @@ StackOffset AArch64FrameLowering::resolveFrameOffsetReference(
}
}
- assert(((isFixed || isCSR) || !RegInfo->needsStackRealignment(MF) || !UseFP) &&
- "In the presence of dynamic stack pointer realignment, "
- "non-argument/CSR objects cannot be accessed through the frame pointer");
+ assert(
+ ((isFixed || isCSR) || !RegInfo->hasStackRealignment(MF) || !UseFP) &&
+ "In the presence of dynamic stack pointer realignment, "
+ "non-argument/CSR objects cannot be accessed through the frame pointer");
if (isSVE) {
StackOffset FPOffset =
@@ -2041,10 +2042,9 @@ StackOffset AArch64FrameLowering::resolveFrameOffsetReference(
StackOffset::get(MFI.getStackSize() - AFI->getCalleeSavedStackSize(),
ObjectOffset);
// Always use the FP for SVE spills if available and beneficial.
- if (hasFP(MF) &&
- (SPOffset.getFixed() ||
- FPOffset.getScalable() < SPOffset.getScalable() ||
- RegInfo->needsStackRealignment(MF))) {
+ if (hasFP(MF) && (SPOffset.getFixed() ||
+ FPOffset.getScalable() < SPOffset.getScalable() ||
+ RegInfo->hasStackRealignment(MF))) {
FrameReg = RegInfo->getFrameRegister(MF);
return FPOffset;
}
diff --git a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
index 94b5d7718d0c..f70eee603706 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
@@ -1339,6 +1339,11 @@ bool AArch64DAGToDAGISel::tryIndexedLoad(SDNode *N) {
SDValue Ops[] = { Base, Offset, Chain };
SDNode *Res = CurDAG->getMachineNode(Opcode, dl, MVT::i64, DstVT,
MVT::Other, Ops);
+
+ // Transfer memoperands.
+ MachineMemOperand *MemOp = cast<MemSDNode>(N)->getMemOperand();
+ CurDAG->setNodeMemRefs(cast<MachineSDNode>(Res), {MemOp});
+
// Either way, we're replacing the node, so tell the caller that.
SDValue LoadedVal = SDValue(Res, 1);
if (InsertTo64) {
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 5ab8d8a5d6f1..718fc8b7c1d0 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -344,6 +344,18 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
setCondCodeAction(ISD::SETUGT, VT, Expand);
setCondCodeAction(ISD::SETUEQ, VT, Expand);
setCondCodeAction(ISD::SETUNE, VT, Expand);
+
+ setOperationAction(ISD::FREM, VT, Expand);
+ setOperationAction(ISD::FPOW, VT, Expand);
+ setOperationAction(ISD::FPOWI, VT, Expand);
+ setOperationAction(ISD::FCOS, VT, Expand);
+ setOperationAction(ISD::FSIN, VT, Expand);
+ setOperationAction(ISD::FSINCOS, VT, Expand);
+ setOperationAction(ISD::FEXP, VT, Expand);
+ setOperationAction(ISD::FEXP2, VT, Expand);
+ setOperationAction(ISD::FLOG, VT, Expand);
+ setOperationAction(ISD::FLOG2, VT, Expand);
+ setOperationAction(ISD::FLOG10, VT, Expand);
}
}
@@ -1135,6 +1147,7 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
setOperationAction(ISD::VECREDUCE_UMAX, VT, Custom);
setOperationAction(ISD::VECREDUCE_SMIN, VT, Custom);
setOperationAction(ISD::VECREDUCE_SMAX, VT, Custom);
+ setOperationAction(ISD::STEP_VECTOR, VT, Custom);
setOperationAction(ISD::MULHU, VT, Expand);
setOperationAction(ISD::MULHS, VT, Expand);
@@ -1167,6 +1180,13 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
for (auto VT : {MVT::nxv2f16, MVT::nxv4f16, MVT::nxv8f16, MVT::nxv2f32,
MVT::nxv4f32, MVT::nxv2f64}) {
+ for (auto InnerVT : {MVT::nxv2f16, MVT::nxv4f16, MVT::nxv8f16,
+ MVT::nxv2f32, MVT::nxv4f32, MVT::nxv2f64}) {
+ // Avoid marking truncating FP stores as legal to prevent the
+ // DAGCombiner from creating unsupported truncating stores.
+ setTruncStoreAction(VT, InnerVT, Expand);
+ }
+
setOperationAction(ISD::CONCAT_VECTORS, VT, Custom);
setOperationAction(ISD::INSERT_SUBVECTOR, VT, Custom);
setOperationAction(ISD::MGATHER, VT, Custom);
@@ -1387,6 +1407,20 @@ void AArch64TargetLowering::addTypeForFixedLengthSVE(MVT VT) {
// We use EXTRACT_SUBVECTOR to "cast" a scalable vector to a fixed length one.
setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom);
+ if (VT.isFloatingPoint()) {
+ setCondCodeAction(ISD::SETO, VT, Expand);
+ setCondCodeAction(ISD::SETOLT, VT, Expand);
+ setCondCodeAction(ISD::SETLT, VT, Expand);
+ setCondCodeAction(ISD::SETOLE, VT, Expand);
+ setCondCodeAction(ISD::SETLE, VT, Expand);
+ setCondCodeAction(ISD::SETULT, VT, Expand);
+ setCondCodeAction(ISD::SETULE, VT, Expand);
+ setCondCodeAction(ISD::SETUGE, VT, Expand);
+ setCondCodeAction(ISD::SETUGT, VT, Expand);
+ setCondCodeAction(ISD::SETUEQ, VT, Expand);
+ setCondCodeAction(ISD::SETUNE, VT, Expand);
+ }
+
// Lower fixed length vector operations to scalable equivalents.
setOperationAction(ISD::ABS, VT, Custom);
setOperationAction(ISD::ADD, VT, Custom);
@@ -1399,6 +1433,7 @@ void AArch64TargetLowering::addTypeForFixedLengthSVE(MVT VT) {
setOperationAction(ISD::CTTZ, VT, Custom);
setOperationAction(ISD::FABS, VT, Custom);
setOperationAction(ISD::FADD, VT, Custom);
+ setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
setOperationAction(ISD::FCEIL, VT, Custom);
setOperationAction(ISD::FDIV, VT, Custom);
setOperationAction(ISD::FFLOOR, VT, Custom);
@@ -1420,6 +1455,7 @@ void AArch64TargetLowering::addTypeForFixedLengthSVE(MVT VT) {
setOperationAction(ISD::MUL, VT, Custom);
setOperationAction(ISD::OR, VT, Custom);
setOperationAction(ISD::SDIV, VT, Custom);
+ setOperationAction(ISD::SELECT, VT, Custom);
setOperationAction(ISD::SETCC, VT, Custom);
setOperationAction(ISD::SHL, VT, Custom);
setOperationAction(ISD::SIGN_EXTEND, VT, Custom);
@@ -1442,6 +1478,7 @@ void AArch64TargetLowering::addTypeForFixedLengthSVE(MVT VT) {
setOperationAction(ISD::VECREDUCE_FMAX, VT, Custom);
setOperationAction(ISD::VECREDUCE_FMIN, VT, Custom);
setOperationAction(ISD::VECREDUCE_OR, VT, Custom);
+ setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
setOperationAction(ISD::VECREDUCE_SMAX, VT, Custom);
setOperationAction(ISD::VECREDUCE_SMIN, VT, Custom);
setOperationAction(ISD::VECREDUCE_UMAX, VT, Custom);
@@ -2123,6 +2160,24 @@ MachineBasicBlock *AArch64TargetLowering::EmitInstrWithCustomInserter(
// Lowering Code
//===----------------------------------------------------------------------===//
+/// isZerosVector - Check whether SDNode N is a zero-filled vector.
+static bool isZerosVector(const SDNode *N) {
+ // Look through a bit convert.
+ while (N->getOpcode() == ISD::BITCAST)
+ N = N->getOperand(0).getNode();
+
+ if (ISD::isConstantSplatVectorAllZeros(N))
+ return true;
+
+ if (N->getOpcode() != AArch64ISD::DUP)
+ return false;
+
+ auto Opnd0 = N->getOperand(0);
+ auto *CINT = dyn_cast<ConstantSDNode>(Opnd0);
+ auto *CFP = dyn_cast<ConstantFPSDNode>(Opnd0);
+ return (CINT && CINT->isNullValue()) || (CFP && CFP->isZero());
+}
+
/// changeIntCCToAArch64CC - Convert a DAG integer condition code to an AArch64
/// CC
static AArch64CC::CondCode changeIntCCToAArch64CC(ISD::CondCode CC) {
@@ -3894,9 +3949,13 @@ SDValue AArch64TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
Op.getOperand(2));
}
case Intrinsic::aarch64_neon_sdot:
- case Intrinsic::aarch64_neon_udot: {
- unsigned Opcode = IntNo == Intrinsic::aarch64_neon_udot ? AArch64ISD::UDOT
- : AArch64ISD::SDOT;
+ case Intrinsic::aarch64_neon_udot:
+ case Intrinsic::aarch64_sve_sdot:
+ case Intrinsic::aarch64_sve_udot: {
+ unsigned Opcode = (IntNo == Intrinsic::aarch64_neon_udot ||
+ IntNo == Intrinsic::aarch64_sve_udot)
+ ? AArch64ISD::UDOT
+ : AArch64ISD::SDOT;
return DAG.getNode(Opcode, dl, Op.getValueType(), Op.getOperand(1),
Op.getOperand(2), Op.getOperand(3));
}
@@ -4402,6 +4461,8 @@ SDValue AArch64TargetLowering::LowerOperation(SDValue Op,
return LowerVECTOR_SHUFFLE(Op, DAG);
case ISD::SPLAT_VECTOR:
return LowerSPLAT_VECTOR(Op, DAG);
+ case ISD::STEP_VECTOR:
+ return LowerSTEP_VECTOR(Op, DAG);
case ISD::EXTRACT_SUBVECTOR:
return LowerEXTRACT_SUBVECTOR(Op, DAG);
case ISD::INSERT_SUBVECTOR:
@@ -5107,11 +5168,11 @@ bool AArch64TargetLowering::isEligibleForTailCallOptimization(
const Function &CallerF = MF.getFunction();
CallingConv::ID CallerCC = CallerF.getCallingConv();
- // If this function uses the C calling convention but has an SVE signature,
- // then it preserves more registers and should assume the SVE_VectorCall CC.
+ // Functions using the C or Fast calling convention that have an SVE signature
+ // preserve more registers and should assume the SVE_VectorCall CC.
// The check for matching callee-saved regs will determine whether it is
// eligible for TCO.
- if (CallerCC == CallingConv::C &&
+ if ((CallerCC == CallingConv::C || CallerCC == CallingConv::Fast) &&
AArch64RegisterInfo::hasSVEArgsOrReturn(&MF))
CallerCC = CallingConv::AArch64_SVE_VectorCall;
@@ -5304,7 +5365,7 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI,
// Check callee args/returns for SVE registers and set calling convention
// accordingly.
- if (CallConv == CallingConv::C) {
+ if (CallConv == CallingConv::C || CallConv == CallingConv::Fast) {
bool CalleeOutSVE = any_of(Outs, [](ISD::OutputArg &Out){
return Out.VT.isScalableVector();
});
@@ -6994,6 +7055,17 @@ SDValue AArch64TargetLowering::LowerSELECT(SDValue Op,
return DAG.getNode(ISD::VSELECT, DL, Ty, SplatPred, TVal, FVal);
}
+ if (useSVEForFixedLengthVectorVT(Ty)) {
+ // FIXME: Ideally this would be the same as above using i1 types, however
+ // for the moment we can't deal with fixed i1 vector types properly, so
+ // instead extend the predicate to a result type sized integer vector.
+ MVT SplatValVT = MVT::getIntegerVT(Ty.getScalarSizeInBits());
+ MVT PredVT = MVT::getVectorVT(SplatValVT, Ty.getVectorElementCount());
+ SDValue SplatVal = DAG.getSExtOrTrunc(CCVal, DL, SplatValVT);
+ SDValue SplatPred = DAG.getNode(ISD::SPLAT_VECTOR, DL, PredVT, SplatVal);
+ return DAG.getNode(ISD::VSELECT, DL, Ty, SplatPred, TVal, FVal);
+ }
+
// Optimize {s|u}{add|sub|mul}.with.overflow feeding into a select
// instruction.
if (ISD::isOverflowIntrOpRes(CCVal)) {
@@ -9049,6 +9121,20 @@ SDValue AArch64TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op,
return GenerateTBL(Op, ShuffleMask, DAG);
}
+SDValue AArch64TargetLowering::LowerSTEP_VECTOR(SDValue Op,
+ SelectionDAG &DAG) const {
+ SDLoc dl(Op);
+ EVT VT = Op.getValueType();
+ assert(VT.isScalableVector() &&
+ "Only expect scalable vectors for STEP_VECTOR");
+ assert(VT.getScalarType() != MVT::i1 &&
+ "Vectors of i1 types not supported for STEP_VECTOR");
+
+ SDValue StepVal = Op.getOperand(0);
+ SDValue Zero = DAG.getConstant(0, dl, StepVal.getValueType());
+ return DAG.getNode(AArch64ISD::INDEX_VECTOR, dl, VT, Zero, StepVal);
+}
+
SDValue AArch64TargetLowering::LowerSPLAT_VECTOR(SDValue Op,
SelectionDAG &DAG) const {
SDLoc dl(Op);
@@ -9663,10 +9749,10 @@ SDValue AArch64TargetLowering::LowerBUILD_VECTOR(SDValue Op,
}
if (i > 0)
isOnlyLowElement = false;
- if (!isa<ConstantFPSDNode>(V) && !isa<ConstantSDNode>(V))
+ if (!isIntOrFPConstant(V))
isConstant = false;
- if (isa<ConstantSDNode>(V) || isa<ConstantFPSDNode>(V)) {
+ if (isIntOrFPConstant(V)) {
++NumConstantLanes;
if (!ConstantValue.getNode())
ConstantValue = V;
@@ -9691,7 +9777,7 @@ SDValue AArch64TargetLowering::LowerBUILD_VECTOR(SDValue Op,
// Convert BUILD_VECTOR where all elements but the lowest are undef into
// SCALAR_TO_VECTOR, except for when we have a single-element constant vector
// as SimplifyDemandedBits will just turn that back into BUILD_VECTOR.
- if (isOnlyLowElement && !(NumElts == 1 && isa<ConstantSDNode>(Value))) {
+ if (isOnlyLowElement && !(NumElts == 1 && isIntOrFPConstant(Value))) {
LLVM_DEBUG(dbgs() << "LowerBUILD_VECTOR: only low element used, creating 1 "
"SCALAR_TO_VECTOR node\n");
return DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Value);
@@ -9832,7 +9918,7 @@ SDValue AArch64TargetLowering::LowerBUILD_VECTOR(SDValue Op,
for (unsigned i = 0; i < NumElts; ++i) {
SDValue V = Op.getOperand(i);
SDValue LaneIdx = DAG.getConstant(i, dl, MVT::i64);
- if (!isa<ConstantSDNode>(V) && !isa<ConstantFPSDNode>(V))
+ if (!isIntOrFPConstant(V))
// Note that type legalization likely mucked about with the VT of the
// source operand, so we may have to convert it here before inserting.
Val = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, Val, V, LaneIdx);
@@ -9932,6 +10018,9 @@ SDValue AArch64TargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op,
SelectionDAG &DAG) const {
assert(Op.getOpcode() == ISD::INSERT_VECTOR_ELT && "Unknown opcode!");
+ if (useSVEForFixedLengthVectorVT(Op.getValueType()))
+ return LowerFixedLengthInsertVectorElt(Op, DAG);
+
// Check for non-constant or out of range lane.
EVT VT = Op.getOperand(0).getValueType();
ConstantSDNode *CI = dyn_cast<ConstantSDNode>(Op.getOperand(2));
@@ -9967,8 +10056,11 @@ AArch64TargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op,
SelectionDAG &DAG) const {
assert(Op.getOpcode() == ISD::EXTRACT_VECTOR_ELT && "Unknown opcode!");
- // Check for non-constant or out of range lane.
EVT VT = Op.getOperand(0).getValueType();
+ if (useSVEForFixedLengthVectorVT(VT))
+ return LowerFixedLengthExtractVectorElt(Op, DAG);
+
+ // Check for non-constant or out of range lane.
ConstantSDNode *CI = dyn_cast<ConstantSDNode>(Op.getOperand(1));
if (!CI || CI->getZExtValue() >= VT.getVectorNumElements())
return SDValue();
@@ -10372,11 +10464,8 @@ static SDValue EmitVectorComparison(SDValue LHS, SDValue RHS,
SDValue AArch64TargetLowering::LowerVSETCC(SDValue Op,
SelectionDAG &DAG) const {
- if (Op.getValueType().isScalableVector()) {
- if (Op.getOperand(0).getValueType().isFloatingPoint())
- return Op;
+ if (Op.getValueType().isScalableVector())
return LowerToPredicatedOp(Op, DAG, AArch64ISD::SETCC_MERGE_ZERO);
- }
if (useSVEForFixedLengthVectorVT(Op.getOperand(0).getValueType()))
return LowerFixedLengthVectorSetccToSVE(Op, DAG);
@@ -13280,7 +13369,7 @@ static SDValue performAddDotCombine(SDNode *N, SelectionDAG &DAG) {
auto isZeroDot = [](SDValue Dot) {
return (Dot.getOpcode() == AArch64ISD::UDOT ||
Dot.getOpcode() == AArch64ISD::SDOT) &&
- ISD::isBuildVectorAllZeros(Dot.getOperand(0).getNode());
+ isZerosVector(Dot.getOperand(0).getNode());
};
if (!isZeroDot(Dot))
std::swap(Dot, A);
@@ -13911,78 +14000,7 @@ static SDValue performExtendCombine(SDNode *N,
return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), N->getValueType(0), NewABD);
}
-
- // This is effectively a custom type legalization for AArch64.
- //
- // Type legalization will split an extend of a small, legal, type to a larger
- // illegal type by first splitting the destination type, often creating
- // illegal source types, which then get legalized in isel-confusing ways,
- // leading to really terrible codegen. E.g.,
- // %result = v8i32 sext v8i8 %value
- // becomes
- // %losrc = extract_subreg %value, ...
- // %hisrc = extract_subreg %value, ...
- // %lo = v4i32 sext v4i8 %losrc
- // %hi = v4i32 sext v4i8 %hisrc
- // Things go rapidly downhill from there.
- //
- // For AArch64, the [sz]ext vector instructions can only go up one element
- // size, so we can, e.g., extend from i8 to i16, but to go from i8 to i32
- // take two instructions.
- //
- // This implies that the most efficient way to do the extend from v8i8
- // to two v4i32 values is to first extend the v8i8 to v8i16, then do
- // the normal splitting to happen for the v8i16->v8i32.
-
- // This is pre-legalization to catch some cases where the default
- // type legalization will create ill-tempered code.
- if (!DCI.isBeforeLegalizeOps())
- return SDValue();
-
- // We're only interested in cleaning things up for non-legal vector types
- // here. If both the source and destination are legal, things will just
- // work naturally without any fiddling.
- const TargetLowering &TLI = DAG.getTargetLoweringInfo();
- EVT ResVT = N->getValueType(0);
- if (!ResVT.isVector() || TLI.isTypeLegal(ResVT))
- return SDValue();
- // If the vector type isn't a simple VT, it's beyond the scope of what
- // we're worried about here. Let legalization do its thing and hope for
- // the best.
- SDValue Src = N->getOperand(0);
- EVT SrcVT = Src->getValueType(0);
- if (!ResVT.isSimple() || !SrcVT.isSimple())
- return SDValue();
-
- // If the source VT is a 64-bit fixed or scalable vector, we can play games
- // and get the better results we want.
- if (SrcVT.getSizeInBits().getKnownMinSize() != 64)
- return SDValue();
-
- unsigned SrcEltSize = SrcVT.getScalarSizeInBits();
- ElementCount SrcEC = SrcVT.getVectorElementCount();
- SrcVT = MVT::getVectorVT(MVT::getIntegerVT(SrcEltSize * 2), SrcEC);
- SDLoc DL(N);
- Src = DAG.getNode(N->getOpcode(), DL, SrcVT, Src);
-
- // Now split the rest of the operation into two halves, each with a 64
- // bit source.
- EVT LoVT, HiVT;
- SDValue Lo, Hi;
- LoVT = HiVT = ResVT.getHalfNumVectorElementsVT(*DAG.getContext());
-
- EVT InNVT = EVT::getVectorVT(*DAG.getContext(), SrcVT.getVectorElementType(),
- LoVT.getVectorElementCount());
- Lo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, InNVT, Src,
- DAG.getConstant(0, DL, MVT::i64));
- Hi = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, InNVT, Src,
- DAG.getConstant(InNVT.getVectorMinNumElements(), DL, MVT::i64));
- Lo = DAG.getNode(N->getOpcode(), DL, LoVT, Lo);
- Hi = DAG.getNode(N->getOpcode(), DL, HiVT, Hi);
-
- // Now combine the parts back together so we still have a single result
- // like the combiner expects.
- return DAG.getNode(ISD::CONCAT_VECTORS, DL, ResVT, Lo, Hi);
+ return SDValue();
}
static SDValue splitStoreSplat(SelectionDAG &DAG, StoreSDNode &St,
@@ -15213,7 +15231,8 @@ static SDValue performVSelectCombine(SDNode *N, SelectionDAG &DAG) {
}
}
- if (N0.getOpcode() != ISD::SETCC || CCVT.getVectorNumElements() != 1 ||
+ if (N0.getOpcode() != ISD::SETCC ||
+ CCVT.getVectorElementCount() != ElementCount::getFixed(1) ||
CCVT.getVectorElementType() != MVT::i1)
return SDValue();
@@ -17221,6 +17240,35 @@ SDValue AArch64TargetLowering::LowerFixedLengthVectorTruncateToSVE(
return convertFromScalableVector(DAG, VT, Val);
}
+SDValue AArch64TargetLowering::LowerFixedLengthExtractVectorElt(
+ SDValue Op, SelectionDAG &DAG) const {
+ EVT VT = Op.getValueType();
+ EVT InVT = Op.getOperand(0).getValueType();
+ assert(InVT.isFixedLengthVector() && "Expected fixed length vector type!");
+
+ SDLoc DL(Op);
+ EVT ContainerVT = getContainerForFixedLengthVector(DAG, InVT);
+ SDValue Op0 = convertToScalableVector(DAG, ContainerVT, Op->getOperand(0));
+
+ return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, Op0, Op.getOperand(1));
+}
+
+SDValue AArch64TargetLowering::LowerFixedLengthInsertVectorElt(
+ SDValue Op, SelectionDAG &DAG) const {
+ EVT VT = Op.getValueType();
+ assert(VT.isFixedLengthVector() && "Expected fixed length vector type!");
+
+ SDLoc DL(Op);
+ EVT InVT = Op.getOperand(0).getValueType();
+ EVT ContainerVT = getContainerForFixedLengthVector(DAG, InVT);
+ SDValue Op0 = convertToScalableVector(DAG, ContainerVT, Op->getOperand(0));
+
+ auto ScalableRes = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, ContainerVT, Op0,
+ Op.getOperand(1), Op.getOperand(2));
+
+ return convertFromScalableVector(DAG, VT, ScalableRes);
+}
+
// Convert vector operation 'Op' to an equivalent predicated operation whereby
// the original operation's type is used to construct a suitable predicate.
// NOTE: The results for inactive lanes are undefined.
@@ -17437,10 +17485,6 @@ SDValue AArch64TargetLowering::LowerFixedLengthVectorSetccToSVE(
assert(Op.getValueType() == InVT.changeTypeToInteger() &&
"Expected integer result of the same bit length as the inputs!");
- // Expand floating point vector comparisons.
- if (InVT.isFloatingPoint())
- return SDValue();
-
auto Op1 = convertToScalableVector(DAG, ContainerVT, Op.getOperand(0));
auto Op2 = convertToScalableVector(DAG, ContainerVT, Op.getOperand(1));
auto Pg = getPredicateForFixedLengthVector(DAG, DL, InVT);
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.h b/llvm/lib/Target/AArch64/AArch64ISelLowering.h
index 1264d6779924..63df22326150 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.h
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.h
@@ -936,6 +936,7 @@ private:
SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerSPLAT_VECTOR(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerSTEP_VECTOR(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerDUPQLane(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerToPredicatedOp(SDValue Op, SelectionDAG &DAG, unsigned NewOp,
bool OverrideNEON = false) const;
@@ -987,6 +988,8 @@ private:
SDValue LowerFixedLengthVectorStoreToSVE(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerFixedLengthVectorTruncateToSVE(SDValue Op,
SelectionDAG &DAG) const;
+ SDValue LowerFixedLengthExtractVectorElt(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerFixedLengthInsertVectorElt(SDValue Op, SelectionDAG &DAG) const;
SDValue BuildSDIVPow2(SDNode *N, const APInt &Divisor, SelectionDAG &DAG,
SmallVectorImpl<SDNode *> &Created) const override;
diff --git a/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp b/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp
index f90856d14b2f..9f1b791e18b5 100644
--- a/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp
@@ -382,7 +382,7 @@ bool AArch64RegisterInfo::hasBasePointer(const MachineFunction &MF) const {
// stack needs to be dynamically re-aligned, the base pointer is the only
// reliable way to reference the locals.
if (MFI.hasVarSizedObjects() || MF.hasEHFunclets()) {
- if (needsStackRealignment(MF))
+ if (hasStackRealignment(MF))
return true;
if (MF.getSubtarget<AArch64Subtarget>().hasSVE()) {
@@ -437,7 +437,7 @@ AArch64RegisterInfo::useFPForScavengingIndex(const MachineFunction &MF) const {
assert((!MF.getSubtarget<AArch64Subtarget>().hasSVE() ||
AFI->hasCalculatedStackSizeSVE()) &&
"Expected SVE area to be calculated by this point");
- return TFI.hasFP(MF) && !needsStackRealignment(MF) && !AFI->getStackSizeSVE();
+ return TFI.hasFP(MF) && !hasStackRealignment(MF) && !AFI->getStackSizeSVE();
}
bool AArch64RegisterInfo::requiresFrameIndexScavenging(
@@ -761,7 +761,7 @@ unsigned AArch64RegisterInfo::getLocalAddressRegister(
const auto &MFI = MF.getFrameInfo();
if (!MF.hasEHFunclets() && !MFI.hasVarSizedObjects())
return AArch64::SP;
- else if (needsStackRealignment(MF))
+ else if (hasStackRealignment(MF))
return getBaseRegister();
return getFrameRegister(MF);
}
diff --git a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
index 63a53cc0c8f1..df4e2cd44623 100644
--- a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
@@ -262,18 +262,6 @@ def index_vector : SDNode<"AArch64ISD::INDEX_VECTOR", SDT_IndexVector, []>;
def reinterpret_cast : SDNode<"AArch64ISD::REINTERPRET_CAST", SDTUnaryOp>;
-def setoge_or_setge : PatFrags<(ops node:$lhs, node:$rhs),
- [(setoge node:$lhs, node:$rhs),
- (setge node:$lhs, node:$rhs)]>;
-def setogt_or_setgt : PatFrags<(ops node:$lhs, node:$rhs),
- [(setogt node:$lhs, node:$rhs),
- (setgt node:$lhs, node:$rhs)]>;
-def setoeq_or_seteq : PatFrags<(ops node:$lhs, node:$rhs),
- [(setoeq node:$lhs, node:$rhs),
- (seteq node:$lhs, node:$rhs)]>;
-def setone_or_setne : PatFrags<(ops node:$lhs, node:$rhs),
- [(setone node:$lhs, node:$rhs),
- (setne node:$lhs, node:$rhs)]>;
def AArch64mul_p_oneuse : PatFrag<(ops node:$pred, node:$src1, node:$src2),
(AArch64mul_p node:$pred, node:$src1, node:$src2), [{
return N->hasOneUse();
@@ -365,8 +353,8 @@ let Predicates = [HasSVE] in {
defm SDIV_ZPZZ : sve_int_bin_pred_sd<AArch64sdiv_p>;
defm UDIV_ZPZZ : sve_int_bin_pred_sd<AArch64udiv_p>;
- defm SDOT_ZZZ : sve_intx_dot<0b0, "sdot", int_aarch64_sve_sdot>;
- defm UDOT_ZZZ : sve_intx_dot<0b1, "udot", int_aarch64_sve_udot>;
+ defm SDOT_ZZZ : sve_intx_dot<0b0, "sdot", AArch64sdot>;
+ defm UDOT_ZZZ : sve_intx_dot<0b1, "udot", AArch64udot>;
defm SDOT_ZZZI : sve_intx_dot_by_indexed_elem<0b0, "sdot", int_aarch64_sve_sdot_lane>;
defm UDOT_ZZZI : sve_intx_dot_by_indexed_elem<0b1, "udot", int_aarch64_sve_udot_lane>;
@@ -1252,11 +1240,11 @@ let Predicates = [HasSVE] in {
defm CMPLO_PPzZI : sve_int_ucmp_vi<0b10, "cmplo", SETULT, SETUGT>;
defm CMPLS_PPzZI : sve_int_ucmp_vi<0b11, "cmpls", SETULE, SETUGE>;
- defm FCMGE_PPzZZ : sve_fp_3op_p_pd_cc<0b000, "fcmge", int_aarch64_sve_fcmpge, setoge_or_setge>;
- defm FCMGT_PPzZZ : sve_fp_3op_p_pd_cc<0b001, "fcmgt", int_aarch64_sve_fcmpgt, setogt_or_setgt>;
- defm FCMEQ_PPzZZ : sve_fp_3op_p_pd_cc<0b010, "fcmeq", int_aarch64_sve_fcmpeq, setoeq_or_seteq>;
- defm FCMNE_PPzZZ : sve_fp_3op_p_pd_cc<0b011, "fcmne", int_aarch64_sve_fcmpne, setone_or_setne>;
- defm FCMUO_PPzZZ : sve_fp_3op_p_pd_cc<0b100, "fcmuo", int_aarch64_sve_fcmpuo, setuo>;
+ defm FCMGE_PPzZZ : sve_fp_3op_p_pd_cc<0b000, "fcmge", int_aarch64_sve_fcmpge, SETOGE, SETGE, SETOLE, SETLE>;
+ defm FCMGT_PPzZZ : sve_fp_3op_p_pd_cc<0b001, "fcmgt", int_aarch64_sve_fcmpgt, SETOGT, SETGT, SETOLT, SETLT>;
+ defm FCMEQ_PPzZZ : sve_fp_3op_p_pd_cc<0b010, "fcmeq", int_aarch64_sve_fcmpeq, SETOEQ, SETEQ, SETOEQ, SETEQ>;
+ defm FCMNE_PPzZZ : sve_fp_3op_p_pd_cc<0b011, "fcmne", int_aarch64_sve_fcmpne, SETONE, SETNE, SETONE, SETNE>;
+ defm FCMUO_PPzZZ : sve_fp_3op_p_pd_cc<0b100, "fcmuo", int_aarch64_sve_fcmpuo, SETUO, SETUO, SETUO, SETUO>;
defm FACGE_PPzZZ : sve_fp_3op_p_pd<0b101, "facge", int_aarch64_sve_facge>;
defm FACGT_PPzZZ : sve_fp_3op_p_pd<0b111, "facgt", int_aarch64_sve_facgt>;
@@ -2288,8 +2276,6 @@ let Predicates = [HasSVE] in {
(EXTRACT_SUBREG (DUP_ZZI_D ZPR:$vec, sve_elm_idx_extdup_d:$index), dsub)>;
def : Pat<(f16 (vector_extract (nxv8f16 ZPR:$vec), sve_elm_idx_extdup_h:$index)),
(EXTRACT_SUBREG (DUP_ZZI_H ZPR:$vec, sve_elm_idx_extdup_h:$index), hsub)>;
- def : Pat<(f16 (vector_extract (nxv8f16 ZPR:$vec), sve_elm_idx_extdup_h:$index)),
- (EXTRACT_SUBREG (DUP_ZZI_H ZPR:$vec, sve_elm_idx_extdup_h:$index), hsub)>;
def : Pat<(f16 (vector_extract (nxv4f16 ZPR:$vec), sve_elm_idx_extdup_s:$index)),
(EXTRACT_SUBREG (DUP_ZZI_S ZPR:$vec, sve_elm_idx_extdup_s:$index), hsub)>;
def : Pat<(f16 (vector_extract (nxv2f16 ZPR:$vec), sve_elm_idx_extdup_d:$index)),
diff --git a/llvm/lib/Target/AArch64/AArch64SchedA55.td b/llvm/lib/Target/AArch64/AArch64SchedA55.td
index ff7766f2caec..0015c27228f6 100644
--- a/llvm/lib/Target/AArch64/AArch64SchedA55.td
+++ b/llvm/lib/Target/AArch64/AArch64SchedA55.td
@@ -339,5 +339,4 @@ def : InstRW<[CortexA55WriteFSqrtHP], (instregex "^.*SQRT.*16$")>;
def : InstRW<[CortexA55WriteFSqrtSP], (instregex "^.*SQRT.*32$")>;
def : InstRW<[CortexA55WriteFSqrtDP], (instregex "^.*SQRT.*64$")>;
-def A55RCU : RetireControlUnit<64, 0>;
}
diff --git a/llvm/lib/Target/AArch64/AArch64Subtarget.h b/llvm/lib/Target/AArch64/AArch64Subtarget.h
index 8fe2f125982f..ce5a0128e622 100644
--- a/llvm/lib/Target/AArch64/AArch64Subtarget.h
+++ b/llvm/lib/Target/AArch64/AArch64Subtarget.h
@@ -196,9 +196,14 @@ protected:
// HasZeroCycleZeroing - Has zero-cycle zeroing instructions.
bool HasZeroCycleZeroing = false;
bool HasZeroCycleZeroingGP = false;
- bool HasZeroCycleZeroingFP = false;
bool HasZeroCycleZeroingFPWorkaround = false;
+ // It is generally beneficial to rewrite "fmov s0, wzr" to "movi d0, #0".
+ // as movi is more efficient across all cores. Newer cores can eliminate
+ // fmovs early and there is no difference with movi, but this not true for
+ // all implementations.
+ bool HasZeroCycleZeroingFP = true;
+
// StrictAlign - Disallow unaligned memory accesses.
bool StrictAlign = false;
@@ -557,7 +562,7 @@ public:
bool enableEarlyIfConversion() const override;
- bool enableAdvancedRASplitCost() const override { return true; }
+ bool enableAdvancedRASplitCost() const override { return false; }
std::unique_ptr<PBQPRAConstraint> getCustomPBQPConstraints() const override;
diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
index 23b6978edac1..148239b3d789 100644
--- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
@@ -212,7 +212,7 @@ AArch64TTIImpl::getPopcntSupport(unsigned TyWidth) {
return TTI::PSK_Software;
}
-unsigned
+InstructionCost
AArch64TTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
TTI::TargetCostKind CostKind) {
auto *RetTy = ICA.getReturnType();
@@ -260,6 +260,19 @@ AArch64TTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
return LT.first;
break;
}
+ case Intrinsic::experimental_stepvector: {
+ unsigned Cost = 1; // Cost of the `index' instruction
+ auto LT = TLI->getTypeLegalizationCost(DL, RetTy);
+ // Legalisation of illegal vectors involves an `index' instruction plus
+ // (LT.first - 1) vector adds.
+ if (LT.first > 1) {
+ Type *LegalVTy = EVT(LT.second).getTypeForEVT(RetTy->getContext());
+ unsigned AddCost =
+ getArithmeticInstrCost(Instruction::Add, LegalVTy, CostKind);
+ Cost += AddCost * (LT.first - 1);
+ }
+ return Cost;
+ }
default:
break;
}
@@ -378,6 +391,23 @@ int AArch64TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
{ ISD::TRUNCATE, MVT::v8i8, MVT::v8i32, 3 },
{ ISD::TRUNCATE, MVT::v16i8, MVT::v16i32, 6 },
+ // Truncations on nxvmiN
+ { ISD::TRUNCATE, MVT::nxv2i1, MVT::nxv2i16, 1 },
+ { ISD::TRUNCATE, MVT::nxv2i1, MVT::nxv2i32, 1 },
+ { ISD::TRUNCATE, MVT::nxv2i1, MVT::nxv2i64, 1 },
+ { ISD::TRUNCATE, MVT::nxv4i1, MVT::nxv4i16, 1 },
+ { ISD::TRUNCATE, MVT::nxv4i1, MVT::nxv4i32, 1 },
+ { ISD::TRUNCATE, MVT::nxv4i1, MVT::nxv4i64, 2 },
+ { ISD::TRUNCATE, MVT::nxv8i1, MVT::nxv8i16, 1 },
+ { ISD::TRUNCATE, MVT::nxv8i1, MVT::nxv8i32, 3 },
+ { ISD::TRUNCATE, MVT::nxv8i1, MVT::nxv8i64, 5 },
+ { ISD::TRUNCATE, MVT::nxv2i16, MVT::nxv2i32, 1 },
+ { ISD::TRUNCATE, MVT::nxv2i32, MVT::nxv2i64, 1 },
+ { ISD::TRUNCATE, MVT::nxv4i16, MVT::nxv4i32, 1 },
+ { ISD::TRUNCATE, MVT::nxv4i32, MVT::nxv4i64, 2 },
+ { ISD::TRUNCATE, MVT::nxv8i16, MVT::nxv8i32, 3 },
+ { ISD::TRUNCATE, MVT::nxv8i32, MVT::nxv8i64, 6 },
+
// The number of shll instructions for the extension.
{ ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i16, 3 },
{ ISD::ZERO_EXTEND, MVT::v4i64, MVT::v4i16, 3 },
@@ -459,6 +489,23 @@ int AArch64TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
{ ISD::FP_TO_UINT, MVT::v4i16, MVT::v4f32, 2 },
{ ISD::FP_TO_UINT, MVT::v4i8, MVT::v4f32, 2 },
+ // Lowering scalable
+ { ISD::FP_TO_SINT, MVT::nxv2i32, MVT::nxv2f32, 1 },
+ { ISD::FP_TO_SINT, MVT::nxv4i32, MVT::nxv4f32, 1 },
+ { ISD::FP_TO_SINT, MVT::nxv2i64, MVT::nxv2f64, 1 },
+ { ISD::FP_TO_UINT, MVT::nxv2i32, MVT::nxv2f32, 1 },
+ { ISD::FP_TO_UINT, MVT::nxv4i32, MVT::nxv4f32, 1 },
+ { ISD::FP_TO_UINT, MVT::nxv2i64, MVT::nxv2f64, 1 },
+
+
+ // Complex, from nxv2f32 legal type is nxv2i32 (no cost) or nxv2i64 (1 ext)
+ { ISD::FP_TO_SINT, MVT::nxv2i64, MVT::nxv2f32, 2 },
+ { ISD::FP_TO_SINT, MVT::nxv2i16, MVT::nxv2f32, 1 },
+ { ISD::FP_TO_SINT, MVT::nxv2i8, MVT::nxv2f32, 1 },
+ { ISD::FP_TO_UINT, MVT::nxv2i64, MVT::nxv2f32, 2 },
+ { ISD::FP_TO_UINT, MVT::nxv2i16, MVT::nxv2f32, 1 },
+ { ISD::FP_TO_UINT, MVT::nxv2i8, MVT::nxv2f32, 1 },
+
// Complex, from v2f64: legal type is v2i32, 1 narrowing => ~2.
{ ISD::FP_TO_SINT, MVT::v2i32, MVT::v2f64, 2 },
{ ISD::FP_TO_SINT, MVT::v2i16, MVT::v2f64, 2 },
@@ -466,6 +513,75 @@ int AArch64TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
{ ISD::FP_TO_UINT, MVT::v2i32, MVT::v2f64, 2 },
{ ISD::FP_TO_UINT, MVT::v2i16, MVT::v2f64, 2 },
{ ISD::FP_TO_UINT, MVT::v2i8, MVT::v2f64, 2 },
+
+ // Complex, from nxv2f64: legal type is nxv2i32, 1 narrowing => ~2.
+ { ISD::FP_TO_SINT, MVT::nxv2i32, MVT::nxv2f64, 2 },
+ { ISD::FP_TO_SINT, MVT::nxv2i16, MVT::nxv2f64, 2 },
+ { ISD::FP_TO_SINT, MVT::nxv2i8, MVT::nxv2f64, 2 },
+ { ISD::FP_TO_UINT, MVT::nxv2i32, MVT::nxv2f64, 2 },
+ { ISD::FP_TO_UINT, MVT::nxv2i16, MVT::nxv2f64, 2 },
+ { ISD::FP_TO_UINT, MVT::nxv2i8, MVT::nxv2f64, 2 },
+
+ // Complex, from nxv4f32 legal type is nxv4i16, 1 narrowing => ~2
+ { ISD::FP_TO_SINT, MVT::nxv4i16, MVT::nxv4f32, 2 },
+ { ISD::FP_TO_SINT, MVT::nxv4i8, MVT::nxv4f32, 2 },
+ { ISD::FP_TO_UINT, MVT::nxv4i16, MVT::nxv4f32, 2 },
+ { ISD::FP_TO_UINT, MVT::nxv4i8, MVT::nxv4f32, 2 },
+
+ // Complex, from nxv8f64: legal type is nxv8i32, 1 narrowing => ~2.
+ { ISD::FP_TO_SINT, MVT::nxv8i32, MVT::nxv8f64, 2 },
+ { ISD::FP_TO_SINT, MVT::nxv8i16, MVT::nxv8f64, 2 },
+ { ISD::FP_TO_SINT, MVT::nxv8i8, MVT::nxv8f64, 2 },
+ { ISD::FP_TO_UINT, MVT::nxv8i32, MVT::nxv8f64, 2 },
+ { ISD::FP_TO_UINT, MVT::nxv8i16, MVT::nxv8f64, 2 },
+ { ISD::FP_TO_UINT, MVT::nxv8i8, MVT::nxv8f64, 2 },
+
+ // Complex, from nxv4f64: legal type is nxv4i32, 1 narrowing => ~2.
+ { ISD::FP_TO_SINT, MVT::nxv4i32, MVT::nxv4f64, 2 },
+ { ISD::FP_TO_SINT, MVT::nxv4i16, MVT::nxv4f64, 2 },
+ { ISD::FP_TO_SINT, MVT::nxv4i8, MVT::nxv4f64, 2 },
+ { ISD::FP_TO_UINT, MVT::nxv4i32, MVT::nxv4f64, 2 },
+ { ISD::FP_TO_UINT, MVT::nxv4i16, MVT::nxv4f64, 2 },
+ { ISD::FP_TO_UINT, MVT::nxv4i8, MVT::nxv4f64, 2 },
+
+ // Complex, from nxv8f32: legal type is nxv8i32 (no cost) or nxv8i64 (1 ext).
+ { ISD::FP_TO_SINT, MVT::nxv8i64, MVT::nxv8f32, 2 },
+ { ISD::FP_TO_SINT, MVT::nxv8i16, MVT::nxv8f32, 3 },
+ { ISD::FP_TO_SINT, MVT::nxv8i8, MVT::nxv8f32, 1 },
+ { ISD::FP_TO_UINT, MVT::nxv8i64, MVT::nxv8f32, 2 },
+ { ISD::FP_TO_UINT, MVT::nxv8i16, MVT::nxv8f32, 1 },
+ { ISD::FP_TO_UINT, MVT::nxv8i8, MVT::nxv8f32, 1 },
+
+ // Truncate from nxvmf32 to nxvmf16.
+ { ISD::FP_ROUND, MVT::nxv2f16, MVT::nxv2f32, 1 },
+ { ISD::FP_ROUND, MVT::nxv4f16, MVT::nxv4f32, 1 },
+ { ISD::FP_ROUND, MVT::nxv8f16, MVT::nxv8f32, 3 },
+
+ // Truncate from nxvmf64 to nxvmf16.
+ { ISD::FP_ROUND, MVT::nxv2f16, MVT::nxv2f64, 1 },
+ { ISD::FP_ROUND, MVT::nxv4f16, MVT::nxv4f64, 3 },
+ { ISD::FP_ROUND, MVT::nxv8f16, MVT::nxv8f64, 7 },
+
+ // Truncate from nxvmf64 to nxvmf32.
+ { ISD::FP_ROUND, MVT::nxv2f32, MVT::nxv2f64, 1 },
+ { ISD::FP_ROUND, MVT::nxv4f32, MVT::nxv4f64, 3 },
+ { ISD::FP_ROUND, MVT::nxv8f32, MVT::nxv8f64, 6 },
+
+ // Extend from nxvmf16 to nxvmf32.
+ { ISD::FP_EXTEND, MVT::nxv2f32, MVT::nxv2f16, 1},
+ { ISD::FP_EXTEND, MVT::nxv4f32, MVT::nxv4f16, 1},
+ { ISD::FP_EXTEND, MVT::nxv8f32, MVT::nxv8f16, 2},
+
+ // Extend from nxvmf16 to nxvmf64.
+ { ISD::FP_EXTEND, MVT::nxv2f64, MVT::nxv2f16, 1},
+ { ISD::FP_EXTEND, MVT::nxv4f64, MVT::nxv4f16, 2},
+ { ISD::FP_EXTEND, MVT::nxv8f64, MVT::nxv8f16, 4},
+
+ // Extend from nxvmf32 to nxvmf64.
+ { ISD::FP_EXTEND, MVT::nxv2f64, MVT::nxv2f32, 1},
+ { ISD::FP_EXTEND, MVT::nxv4f64, MVT::nxv4f32, 2},
+ { ISD::FP_EXTEND, MVT::nxv8f64, MVT::nxv8f32, 6},
+
};
if (const auto *Entry = ConvertCostTableLookup(ConversionTbl, ISD,
@@ -537,7 +653,8 @@ int AArch64TTIImpl::getExtractWithExtendCost(unsigned Opcode, Type *Dst,
}
unsigned AArch64TTIImpl::getCFInstrCost(unsigned Opcode,
- TTI::TargetCostKind CostKind) {
+ TTI::TargetCostKind CostKind,
+ const Instruction *I) {
if (CostKind != TTI::TCK_RecipThroughput)
return Opcode == Instruction::PHI ? 0 : 1;
assert(CostKind == TTI::TCK_RecipThroughput && "unexpected CostKind");
diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h
index afb470592c8b..7a6cfd36fcc3 100644
--- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h
+++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h
@@ -97,18 +97,22 @@ public:
return 31;
}
- unsigned getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
- TTI::TargetCostKind CostKind);
-
- unsigned getRegisterBitWidth(bool Vector) const {
- if (Vector) {
+ InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
+ TTI::TargetCostKind CostKind);
+
+ TypeSize getRegisterBitWidth(TargetTransformInfo::RegisterKind K) const {
+ switch (K) {
+ case TargetTransformInfo::RGK_Scalar:
+ return TypeSize::getFixed(64);
+ case TargetTransformInfo::RGK_FixedWidthVector:
if (ST->hasSVE())
- return std::max(ST->getMinSVEVectorSizeInBits(), 128u);
- if (ST->hasNEON())
- return 128;
- return 0;
+ return TypeSize::getFixed(
+ std::max(ST->getMinSVEVectorSizeInBits(), 128u));
+ return TypeSize::getFixed(ST->hasNEON() ? 128 : 0);
+ case TargetTransformInfo::RGK_ScalableVector:
+ return TypeSize::getScalable(ST->hasSVE() ? 128 : 0);
}
- return 64;
+ llvm_unreachable("Unsupported register kind");
}
unsigned getMinVectorRegisterBitWidth() {
@@ -135,7 +139,8 @@ public:
int getExtractWithExtendCost(unsigned Opcode, Type *Dst, VectorType *VecTy,
unsigned Index);
- unsigned getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind);
+ unsigned getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind,
+ const Instruction *I = nullptr);
int getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index);
diff --git a/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp b/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp
index e495003e3972..9141e786977a 100644
--- a/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp
+++ b/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp
@@ -2906,6 +2906,7 @@ static const struct Extension {
{"mte", {AArch64::FeatureMTE}},
{"memtag", {AArch64::FeatureMTE}},
{"tlb-rmi", {AArch64::FeatureTLB_RMI}},
+ {"pan", {AArch64::FeaturePAN}},
{"pan-rwv", {AArch64::FeaturePAN_RWV}},
{"ccpp", {AArch64::FeatureCCPP}},
{"rcpc", {AArch64::FeatureRCPC}},
@@ -2921,7 +2922,6 @@ static const struct Extension {
{"pauth", {AArch64::FeaturePAuth}},
{"flagm", {AArch64::FeatureFlagM}},
// FIXME: Unsupported extensions
- {"pan", {}},
{"lor", {}},
{"rdma", {}},
{"profile", {}},
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64GlobalISelUtils.cpp b/llvm/lib/Target/AArch64/GISel/AArch64GlobalISelUtils.cpp
index 7c543028af9f..a1392ccb59e6 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64GlobalISelUtils.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64GlobalISelUtils.cpp
@@ -29,10 +29,31 @@ AArch64GISelUtils::getAArch64VectorSplat(const MachineInstr &MI,
return RegOrConstant(Src);
}
-Optional<int64_t> AArch64GISelUtils::getAArch64VectorSplatScalar(
- const MachineInstr &MI, const MachineRegisterInfo &MRI) {
+Optional<int64_t>
+AArch64GISelUtils::getAArch64VectorSplatScalar(const MachineInstr &MI,
+ const MachineRegisterInfo &MRI) {
auto Splat = getAArch64VectorSplat(MI, MRI);
if (!Splat || Splat->isReg())
return None;
return Splat->getCst();
}
+
+bool AArch64GISelUtils::isCMN(const MachineInstr *MaybeSub,
+ const CmpInst::Predicate &Pred,
+ const MachineRegisterInfo &MRI) {
+ // Match:
+ //
+ // %sub = G_SUB 0, %y
+ // %cmp = G_ICMP eq/ne, %sub, %z
+ //
+ // Or
+ //
+ // %sub = G_SUB 0, %y
+ // %cmp = G_ICMP eq/ne, %z, %sub
+ if (!MaybeSub || MaybeSub->getOpcode() != TargetOpcode::G_SUB ||
+ !CmpInst::isEquality(Pred))
+ return false;
+ auto MaybeZero =
+ getConstantVRegValWithLookThrough(MaybeSub->getOperand(1).getReg(), MRI);
+ return MaybeZero && MaybeZero->Value.getZExtValue() == 0;
+}
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64GlobalISelUtils.h b/llvm/lib/Target/AArch64/GISel/AArch64GlobalISelUtils.h
index b1e575d4e4d6..142d999ef05a 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64GlobalISelUtils.h
+++ b/llvm/lib/Target/AArch64/GISel/AArch64GlobalISelUtils.h
@@ -15,9 +15,12 @@
#include "llvm/ADT/Optional.h"
#include "llvm/CodeGen/GlobalISel/Utils.h"
#include "llvm/CodeGen/Register.h"
+#include "MCTargetDesc/AArch64AddressingModes.h"
+#include "llvm/IR/InstrTypes.h"
#include <cstdint>
namespace llvm {
+
namespace AArch64GISelUtils {
/// \returns true if \p C is a legal immediate operand for an arithmetic
@@ -36,6 +39,11 @@ Optional<RegOrConstant> getAArch64VectorSplat(const MachineInstr &MI,
Optional<int64_t> getAArch64VectorSplatScalar(const MachineInstr &MI,
const MachineRegisterInfo &MRI);
+/// \returns true if \p MaybeSub and \p Pred are part of a CMN tree for an
+/// integer compare.
+bool isCMN(const MachineInstr *MaybeSub, const CmpInst::Predicate &Pred,
+ const MachineRegisterInfo &MRI);
+
} // namespace AArch64GISelUtils
} // namespace llvm
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
index 68c2e1e95048..7160432884fe 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
@@ -18,6 +18,7 @@
#include "AArch64RegisterInfo.h"
#include "AArch64Subtarget.h"
#include "AArch64TargetMachine.h"
+#include "AArch64GlobalISelUtils.h"
#include "MCTargetDesc/AArch64AddressingModes.h"
#include "MCTargetDesc/AArch64MCTargetDesc.h"
#include "llvm/ADT/Optional.h"
@@ -1796,7 +1797,7 @@ bool AArch64InstructionSelector::selectVectorAshrLshr(
NegOpc = AArch64::NEGv8i16;
} else if (Ty == LLT::vector(16, 8)) {
Opc = IsASHR ? AArch64::SSHLv16i8 : AArch64::USHLv16i8;
- NegOpc = AArch64::NEGv8i16;
+ NegOpc = AArch64::NEGv16i8;
} else if (Ty == LLT::vector(8, 8)) {
Opc = IsASHR ? AArch64::SSHLv8i8 : AArch64::USHLv8i8;
NegOpc = AArch64::NEGv8i8;
@@ -2305,6 +2306,30 @@ bool AArch64InstructionSelector::select(MachineInstr &I) {
MachineIRBuilder MIB(I);
switch (Opcode) {
+ case TargetOpcode::G_SBFX:
+ case TargetOpcode::G_UBFX: {
+ static const unsigned OpcTable[2][2] = {
+ {AArch64::UBFMWri, AArch64::UBFMXri},
+ {AArch64::SBFMWri, AArch64::SBFMXri}};
+ bool IsSigned = Opcode == TargetOpcode::G_SBFX;
+ unsigned Size = Ty.getSizeInBits();
+ unsigned Opc = OpcTable[IsSigned][Size == 64];
+ auto Cst1 =
+ getConstantVRegValWithLookThrough(I.getOperand(2).getReg(), MRI);
+ assert(Cst1 && "Should have gotten a constant for src 1?");
+ auto Cst2 =
+ getConstantVRegValWithLookThrough(I.getOperand(3).getReg(), MRI);
+ assert(Cst2 && "Should have gotten a constant for src 2?");
+ auto LSB = Cst1->Value.getZExtValue();
+ auto Width = Cst2->Value.getZExtValue();
+ MachineIRBuilder MIB(I);
+ auto BitfieldInst =
+ MIB.buildInstr(Opc, {I.getOperand(0)}, {I.getOperand(1)})
+ .addImm(LSB)
+ .addImm(Width);
+ I.eraseFromParent();
+ return constrainSelectedInstRegOperands(*BitfieldInst, TII, TRI, RBI);
+ }
case TargetOpcode::G_BRCOND:
return selectCompareBranch(I, MF, MRI);
@@ -4553,37 +4578,10 @@ MachineInstr *AArch64InstructionSelector::tryFoldIntegerCompare(
//
// cmn z, y
- // Helper lambda to detect the subtract followed by the compare.
- // Takes in the def of the LHS or RHS, and checks if it's a subtract from 0.
- auto IsCMN = [&](MachineInstr *DefMI, const AArch64CC::CondCode &CC) {
- if (!DefMI || DefMI->getOpcode() != TargetOpcode::G_SUB)
- return false;
-
- // Need to make sure NZCV is the same at the end of the transformation.
- if (CC != AArch64CC::EQ && CC != AArch64CC::NE)
- return false;
-
- // We want to match against SUBs.
- if (DefMI->getOpcode() != TargetOpcode::G_SUB)
- return false;
-
- // Make sure that we're getting
- // x = G_SUB 0, y
- auto ValAndVReg =
- getConstantVRegValWithLookThrough(DefMI->getOperand(1).getReg(), MRI);
- if (!ValAndVReg || ValAndVReg->Value != 0)
- return false;
-
- // This can safely be represented as a CMN.
- return true;
- };
-
// Check if the RHS or LHS of the G_ICMP is defined by a SUB
MachineInstr *LHSDef = getDefIgnoringCopies(LHS.getReg(), MRI);
MachineInstr *RHSDef = getDefIgnoringCopies(RHS.getReg(), MRI);
- CmpInst::Predicate P = (CmpInst::Predicate)Predicate.getPredicate();
- const AArch64CC::CondCode CC = changeICMPPredToAArch64CC(P);
-
+ auto P = static_cast<CmpInst::Predicate>(Predicate.getPredicate());
// Given this:
//
// x = G_SUB 0, y
@@ -4592,7 +4590,7 @@ MachineInstr *AArch64InstructionSelector::tryFoldIntegerCompare(
// Produce this:
//
// cmn y, z
- if (IsCMN(LHSDef, CC))
+ if (isCMN(LHSDef, P, MRI))
return emitCMN(LHSDef->getOperand(2), RHS, MIRBuilder);
// Same idea here, but with the RHS of the compare instead:
@@ -4605,7 +4603,7 @@ MachineInstr *AArch64InstructionSelector::tryFoldIntegerCompare(
// Produce this:
//
// cmn z, y
- if (IsCMN(RHSDef, CC))
+ if (isCMN(RHSDef, P, MRI))
return emitCMN(LHS, RHSDef->getOperand(2), MIRBuilder);
// Given this:
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
index 83ffe09612bb..d2b7b566cf85 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
@@ -647,6 +647,7 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
.legalForCartesianProduct(
{s32, s64, v8s8, v16s8, v4s16, v8s16, v2s32, v4s32})
.scalarize(1);
+ getActionDefinitionsBuilder(G_CTLZ_ZERO_UNDEF).lower();
getActionDefinitionsBuilder(G_SHUFFLE_VECTOR)
.legalIf([=](const LegalityQuery &Query) {
@@ -681,7 +682,8 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
getActionDefinitionsBuilder(G_DYN_STACKALLOC).lower();
- getActionDefinitionsBuilder({G_MEMCPY, G_MEMMOVE, G_MEMSET}).libcall();
+ getActionDefinitionsBuilder({G_BZERO, G_MEMCPY, G_MEMMOVE, G_MEMSET})
+ .libcall();
getActionDefinitionsBuilder(G_ABS).lowerIf(
[=](const LegalityQuery &Query) { return Query.Types[0].isScalar(); });
@@ -689,16 +691,33 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
getActionDefinitionsBuilder(G_VECREDUCE_FADD)
// We only have FADDP to do reduction-like operations. Lower the rest.
.legalFor({{s32, v2s32}, {s64, v2s64}})
+ .clampMaxNumElements(1, s64, 2)
+ .clampMaxNumElements(1, s32, 2)
.lower();
getActionDefinitionsBuilder(G_VECREDUCE_ADD)
.legalFor(
{{s8, v16s8}, {s16, v8s16}, {s32, v4s32}, {s32, v2s32}, {s64, v2s64}})
+ .clampMaxNumElements(1, s64, 2)
+ .clampMaxNumElements(1, s32, 4)
.lower();
getActionDefinitionsBuilder({G_UADDSAT, G_USUBSAT})
.lowerIf([=](const LegalityQuery &Q) { return Q.Types[0].isScalar(); });
+ getActionDefinitionsBuilder({G_FSHL, G_FSHR}).lower();
+
+ getActionDefinitionsBuilder(G_ROTR)
+ .legalFor({{s32, s64}, {s64, s64}})
+ .customIf([=](const LegalityQuery &Q) {
+ return Q.Types[0].isScalar() && Q.Types[1].getScalarSizeInBits() < 64;
+ })
+ .lower();
+ getActionDefinitionsBuilder(G_ROTL).lower();
+
+ getActionDefinitionsBuilder({G_SBFX, G_UBFX})
+ .customFor({{s32, s32}, {s64, s64}});
+
computeTables();
verify(*ST.getInstrInfo());
}
@@ -725,11 +744,33 @@ bool AArch64LegalizerInfo::legalizeCustom(LegalizerHelper &Helper,
return legalizeSmallCMGlobalValue(MI, MRI, MIRBuilder, Observer);
case TargetOpcode::G_TRUNC:
return legalizeVectorTrunc(MI, Helper);
+ case TargetOpcode::G_SBFX:
+ case TargetOpcode::G_UBFX:
+ return legalizeBitfieldExtract(MI, MRI, Helper);
+ case TargetOpcode::G_ROTR:
+ return legalizeRotate(MI, MRI, Helper);
}
llvm_unreachable("expected switch to return");
}
+bool AArch64LegalizerInfo::legalizeRotate(MachineInstr &MI,
+ MachineRegisterInfo &MRI,
+ LegalizerHelper &Helper) const {
+ // To allow for imported patterns to match, we ensure that the rotate amount
+ // is 64b with an extension.
+ Register AmtReg = MI.getOperand(2).getReg();
+ LLT AmtTy = MRI.getType(AmtReg);
+ (void)AmtTy;
+ assert(AmtTy.isScalar() && "Expected a scalar rotate");
+ assert(AmtTy.getSizeInBits() < 64 && "Expected this rotate to be legal");
+ auto NewAmt = Helper.MIRBuilder.buildSExt(LLT::scalar(64), AmtReg);
+ Helper.Observer.changingInstr(MI);
+ MI.getOperand(2).setReg(NewAmt.getReg(0));
+ Helper.Observer.changedInstr(MI);
+ return true;
+}
+
static void extractParts(Register Reg, MachineRegisterInfo &MRI,
MachineIRBuilder &MIRBuilder, LLT Ty, int NumParts,
SmallVectorImpl<Register> &VRegs) {
@@ -944,3 +985,11 @@ bool AArch64LegalizerInfo::legalizeVaArg(MachineInstr &MI,
MI.eraseFromParent();
return true;
}
+
+bool AArch64LegalizerInfo::legalizeBitfieldExtract(
+ MachineInstr &MI, MachineRegisterInfo &MRI, LegalizerHelper &Helper) const {
+ // Only legal if we can select immediate forms.
+ // TODO: Lower this otherwise.
+ return getConstantVRegValWithLookThrough(MI.getOperand(2).getReg(), MRI) &&
+ getConstantVRegValWithLookThrough(MI.getOperand(3).getReg(), MRI);
+}
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.h b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.h
index 8217e37c8512..5d78dc64a2f1 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.h
+++ b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.h
@@ -17,6 +17,7 @@
#include "llvm/CodeGen/GlobalISel/GISelChangeObserver.h"
#include "llvm/CodeGen/GlobalISel/LegalizerHelper.h"
#include "llvm/CodeGen/GlobalISel/LegalizerInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
namespace llvm {
@@ -47,6 +48,10 @@ private:
MachineIRBuilder &MIRBuilder,
GISelChangeObserver &Observer) const;
bool legalizeVectorTrunc(MachineInstr &MI, LegalizerHelper &Helper) const;
+ bool legalizeBitfieldExtract(MachineInstr &MI, MachineRegisterInfo &MRI,
+ LegalizerHelper &Helper) const;
+ bool legalizeRotate(MachineInstr &MI, MachineRegisterInfo &MRI,
+ LegalizerHelper &Helper) const;
const AArch64Subtarget *ST;
};
} // End llvm namespace.
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerCombiner.cpp b/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerCombiner.cpp
index 66a5747e3031..4bfbcb5c419b 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerCombiner.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerCombiner.cpp
@@ -24,6 +24,7 @@
#include "llvm/CodeGen/GlobalISel/CombinerHelper.h"
#include "llvm/CodeGen/GlobalISel/CombinerInfo.h"
#include "llvm/CodeGen/GlobalISel/GISelKnownBits.h"
+#include "llvm/CodeGen/GlobalISel/MIPatternMatch.h"
#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
#include "llvm/CodeGen/GlobalISel/Utils.h"
#include "llvm/CodeGen/MachineDominators.h"
@@ -36,6 +37,7 @@
#define DEBUG_TYPE "aarch64-postlegalizer-combiner"
using namespace llvm;
+using namespace MIPatternMatch;
/// This combine tries do what performExtractVectorEltCombine does in SDAG.
/// Rewrite for pairwise fadd pattern
@@ -238,6 +240,34 @@ bool applyAArch64MulConstCombine(
return true;
}
+/// Form a G_SBFX from a G_SEXT_INREG fed by a right shift.
+static bool matchBitfieldExtractFromSExtInReg(
+ MachineInstr &MI, MachineRegisterInfo &MRI,
+ std::function<void(MachineIRBuilder &)> &MatchInfo) {
+ assert(MI.getOpcode() == TargetOpcode::G_SEXT_INREG);
+ Register Dst = MI.getOperand(0).getReg();
+ Register Src = MI.getOperand(1).getReg();
+ int64_t Width = MI.getOperand(2).getImm();
+ LLT Ty = MRI.getType(Src);
+ assert((Ty == LLT::scalar(32) || Ty == LLT::scalar(64)) &&
+ "Unexpected type for G_SEXT_INREG?");
+ Register ShiftSrc;
+ int64_t ShiftImm;
+ if (!mi_match(
+ Src, MRI,
+ m_OneNonDBGUse(m_any_of(m_GAShr(m_Reg(ShiftSrc), m_ICst(ShiftImm)),
+ m_GLShr(m_Reg(ShiftSrc), m_ICst(ShiftImm))))))
+ return false;
+ if (ShiftImm < 0 || ShiftImm + Width > Ty.getSizeInBits())
+ return false;
+ MatchInfo = [=](MachineIRBuilder &B) {
+ auto Cst1 = B.buildConstant(Ty, ShiftImm);
+ auto Cst2 = B.buildConstant(Ty, ShiftImm + Width - 1);
+ B.buildInstr(TargetOpcode::G_SBFX, {Dst}, {ShiftSrc, Cst1, Cst2});
+ };
+ return true;
+}
+
#define AARCH64POSTLEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_DEPS
#include "AArch64GenPostLegalizeGICombiner.inc"
#undef AARCH64POSTLEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_DEPS
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerLowering.cpp b/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerLowering.cpp
index 130416a04c6d..558cd239f6f7 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerLowering.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerLowering.cpp
@@ -735,6 +735,113 @@ static bool applyBuildVectorToDup(MachineInstr &MI, MachineRegisterInfo &MRI,
return true;
}
+/// \returns how many instructions would be saved by folding a G_ICMP's shift
+/// and/or extension operations.
+static unsigned getCmpOperandFoldingProfit(Register CmpOp,
+ const MachineRegisterInfo &MRI) {
+ // No instructions to save if there's more than one use or no uses.
+ if (!MRI.hasOneNonDBGUse(CmpOp))
+ return 0;
+
+ // FIXME: This is duplicated with the selector. (See: selectShiftedRegister)
+ auto IsSupportedExtend = [&](const MachineInstr &MI) {
+ if (MI.getOpcode() == TargetOpcode::G_SEXT_INREG)
+ return true;
+ if (MI.getOpcode() != TargetOpcode::G_AND)
+ return false;
+ auto ValAndVReg =
+ getConstantVRegValWithLookThrough(MI.getOperand(2).getReg(), MRI);
+ if (!ValAndVReg)
+ return false;
+ uint64_t Mask = ValAndVReg->Value.getZExtValue();
+ return (Mask == 0xFF || Mask == 0xFFFF || Mask == 0xFFFFFFFF);
+ };
+
+ MachineInstr *Def = getDefIgnoringCopies(CmpOp, MRI);
+ if (IsSupportedExtend(*Def))
+ return 1;
+
+ unsigned Opc = Def->getOpcode();
+ if (Opc != TargetOpcode::G_SHL && Opc != TargetOpcode::G_ASHR &&
+ Opc != TargetOpcode::G_LSHR)
+ return 0;
+
+ auto MaybeShiftAmt =
+ getConstantVRegValWithLookThrough(Def->getOperand(2).getReg(), MRI);
+ if (!MaybeShiftAmt)
+ return 0;
+ uint64_t ShiftAmt = MaybeShiftAmt->Value.getZExtValue();
+ MachineInstr *ShiftLHS =
+ getDefIgnoringCopies(Def->getOperand(1).getReg(), MRI);
+
+ // Check if we can fold an extend and a shift.
+ // FIXME: This is duplicated with the selector. (See:
+ // selectArithExtendedRegister)
+ if (IsSupportedExtend(*ShiftLHS))
+ return (ShiftAmt <= 4) ? 2 : 1;
+
+ LLT Ty = MRI.getType(Def->getOperand(0).getReg());
+ if (Ty.isVector())
+ return 0;
+ unsigned ShiftSize = Ty.getSizeInBits();
+ if ((ShiftSize == 32 && ShiftAmt <= 31) ||
+ (ShiftSize == 64 && ShiftAmt <= 63))
+ return 1;
+ return 0;
+}
+
+/// \returns true if it would be profitable to swap the LHS and RHS of a G_ICMP
+/// instruction \p MI.
+static bool trySwapICmpOperands(MachineInstr &MI,
+ const MachineRegisterInfo &MRI) {
+ assert(MI.getOpcode() == TargetOpcode::G_ICMP);
+ // Swap the operands if it would introduce a profitable folding opportunity.
+ // (e.g. a shift + extend).
+ //
+ // For example:
+ // lsl w13, w11, #1
+ // cmp w13, w12
+ // can be turned into:
+ // cmp w12, w11, lsl #1
+
+ // Don't swap if there's a constant on the RHS, because we know we can fold
+ // that.
+ Register RHS = MI.getOperand(3).getReg();
+ auto RHSCst = getConstantVRegValWithLookThrough(RHS, MRI);
+ if (RHSCst && isLegalArithImmed(RHSCst->Value.getSExtValue()))
+ return false;
+
+ Register LHS = MI.getOperand(2).getReg();
+ auto Pred = static_cast<CmpInst::Predicate>(MI.getOperand(1).getPredicate());
+ auto GetRegForProfit = [&](Register Reg) {
+ MachineInstr *Def = getDefIgnoringCopies(Reg, MRI);
+ return isCMN(Def, Pred, MRI) ? Def->getOperand(2).getReg() : Reg;
+ };
+
+ // Don't have a constant on the RHS. If we swap the LHS and RHS of the
+ // compare, would we be able to fold more instructions?
+ Register TheLHS = GetRegForProfit(LHS);
+ Register TheRHS = GetRegForProfit(RHS);
+
+ // If the LHS is more likely to give us a folding opportunity, then swap the
+ // LHS and RHS.
+ return (getCmpOperandFoldingProfit(TheLHS, MRI) >
+ getCmpOperandFoldingProfit(TheRHS, MRI));
+}
+
+static bool applySwapICmpOperands(MachineInstr &MI,
+ GISelChangeObserver &Observer) {
+ auto Pred = static_cast<CmpInst::Predicate>(MI.getOperand(1).getPredicate());
+ Register LHS = MI.getOperand(2).getReg();
+ Register RHS = MI.getOperand(3).getReg();
+ Observer.changedInstr(MI);
+ MI.getOperand(1).setPredicate(CmpInst::getSwappedPredicate(Pred));
+ MI.getOperand(2).setReg(RHS);
+ MI.getOperand(3).setReg(LHS);
+ Observer.changedInstr(MI);
+ return true;
+}
+
#define AARCH64POSTLEGALIZERLOWERINGHELPER_GENCOMBINERHELPER_DEPS
#include "AArch64GenPostLegalizeGILowering.inc"
#undef AARCH64POSTLEGALIZERLOWERINGHELPER_GENCOMBINERHELPER_DEPS
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64PreLegalizerCombiner.cpp b/llvm/lib/Target/AArch64/GISel/AArch64PreLegalizerCombiner.cpp
index 26029b4db11f..4efc63ea68b7 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64PreLegalizerCombiner.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64PreLegalizerCombiner.cpp
@@ -217,6 +217,46 @@ static bool applyFoldGlobalOffset(MachineInstr &MI, MachineRegisterInfo &MRI,
return true;
}
+/// Replace a G_MEMSET with a value of 0 with a G_BZERO instruction if it is
+/// supported and beneficial to do so.
+///
+/// \note This only applies on Darwin.
+///
+/// \returns true if \p MI was replaced with a G_BZERO.
+static bool tryEmitBZero(MachineInstr &MI, MachineIRBuilder &MIRBuilder,
+ bool MinSize) {
+ assert(MI.getOpcode() == TargetOpcode::G_MEMSET);
+ MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
+ auto &TLI = *MIRBuilder.getMF().getSubtarget().getTargetLowering();
+ if (!TLI.getLibcallName(RTLIB::BZERO))
+ return false;
+ auto Zero = getConstantVRegValWithLookThrough(MI.getOperand(1).getReg(), MRI);
+ if (!Zero || Zero->Value.getSExtValue() != 0)
+ return false;
+
+ // It's not faster to use bzero rather than memset for sizes <= 256.
+ // However, it *does* save us a mov from wzr, so if we're going for
+ // minsize, use bzero even if it's slower.
+ if (!MinSize) {
+ // If the size is known, check it. If it is not known, assume using bzero is
+ // better.
+ if (auto Size =
+ getConstantVRegValWithLookThrough(MI.getOperand(2).getReg(), MRI)) {
+ if (Size->Value.getSExtValue() <= 256)
+ return false;
+ }
+ }
+
+ MIRBuilder.setInstrAndDebugLoc(MI);
+ MIRBuilder
+ .buildInstr(TargetOpcode::G_BZERO, {},
+ {MI.getOperand(0), MI.getOperand(2)})
+ .addImm(MI.getOperand(3).getImm())
+ .addMemOperand(*MI.memoperands_begin());
+ MI.eraseFromParent();
+ return true;
+}
+
class AArch64PreLegalizerCombinerHelperState {
protected:
CombinerHelper &Helper;
@@ -263,7 +303,8 @@ bool AArch64PreLegalizerCombinerInfo::combine(GISelChangeObserver &Observer,
if (Generated.tryCombineAll(Observer, MI, B))
return true;
- switch (MI.getOpcode()) {
+ unsigned Opc = MI.getOpcode();
+ switch (Opc) {
case TargetOpcode::G_CONCAT_VECTORS:
return Helper.tryCombineConcatVectors(MI);
case TargetOpcode::G_SHUFFLE_VECTOR:
@@ -275,7 +316,11 @@ bool AArch64PreLegalizerCombinerInfo::combine(GISelChangeObserver &Observer,
// heuristics decide.
unsigned MaxLen = EnableOpt ? 0 : 32;
// Try to inline memcpy type calls if optimizations are enabled.
- return !EnableMinSize ? Helper.tryCombineMemCpyFamily(MI, MaxLen) : false;
+ if (!EnableMinSize && Helper.tryCombineMemCpyFamily(MI, MaxLen))
+ return true;
+ if (Opc == TargetOpcode::G_MEMSET)
+ return tryEmitBZero(MI, B, EnableMinSize);
+ return false;
}
}
diff --git a/llvm/lib/Target/AArch64/SVEInstrFormats.td b/llvm/lib/Target/AArch64/SVEInstrFormats.td
index 178c83b98599..2d3aa10b8c1e 100644
--- a/llvm/lib/Target/AArch64/SVEInstrFormats.td
+++ b/llvm/lib/Target/AArch64/SVEInstrFormats.td
@@ -1019,6 +1019,30 @@ multiclass sve_int_perm_dup_i<string asm> {
(!cast<Instruction>(NAME # _D) ZPR64:$Zd, FPR64asZPR:$Dn, 0), 2>;
def : InstAlias<"mov $Zd, $Qn",
(!cast<Instruction>(NAME # _Q) ZPR128:$Zd, FPR128asZPR:$Qn, 0), 2>;
+
+ // Duplicate extracted element of vector into all vector elements
+ def : Pat<(nxv16i8 (AArch64dup (i32 (vector_extract (nxv16i8 ZPR:$vec), sve_elm_idx_extdup_b:$index)))),
+ (!cast<Instruction>(NAME # _B) ZPR:$vec, sve_elm_idx_extdup_b:$index)>;
+ def : Pat<(nxv8i16 (AArch64dup (i32 (vector_extract (nxv8i16 ZPR:$vec), sve_elm_idx_extdup_h:$index)))),
+ (!cast<Instruction>(NAME # _H) ZPR:$vec, sve_elm_idx_extdup_h:$index)>;
+ def : Pat<(nxv4i32 (AArch64dup (i32 (vector_extract (nxv4i32 ZPR:$vec), sve_elm_idx_extdup_s:$index)))),
+ (!cast<Instruction>(NAME # _S) ZPR:$vec, sve_elm_idx_extdup_s:$index)>;
+ def : Pat<(nxv2i64 (AArch64dup (i64 (vector_extract (nxv2i64 ZPR:$vec), sve_elm_idx_extdup_d:$index)))),
+ (!cast<Instruction>(NAME # _D) ZPR:$vec, sve_elm_idx_extdup_d:$index)>;
+ def : Pat<(nxv8f16 (AArch64dup (f16 (vector_extract (nxv8f16 ZPR:$vec), sve_elm_idx_extdup_h:$index)))),
+ (!cast<Instruction>(NAME # _H) ZPR:$vec, sve_elm_idx_extdup_h:$index)>;
+ def : Pat<(nxv8bf16 (AArch64dup (bf16 (vector_extract (nxv8bf16 ZPR:$vec), sve_elm_idx_extdup_h:$index)))),
+ (!cast<Instruction>(NAME # _H) ZPR:$vec, sve_elm_idx_extdup_h:$index)>;
+ def : Pat<(nxv4f16 (AArch64dup (f16 (vector_extract (nxv4f16 ZPR:$vec), sve_elm_idx_extdup_s:$index)))),
+ (!cast<Instruction>(NAME # _S) ZPR:$vec, sve_elm_idx_extdup_s:$index)>;
+ def : Pat<(nxv2f16 (AArch64dup (f16 (vector_extract (nxv2f16 ZPR:$vec), sve_elm_idx_extdup_d:$index)))),
+ (!cast<Instruction>(NAME # _D) ZPR:$vec, sve_elm_idx_extdup_d:$index)>;
+ def : Pat<(nxv4f32 (AArch64dup (f32 (vector_extract (nxv4f32 ZPR:$vec), sve_elm_idx_extdup_s:$index)))),
+ (!cast<Instruction>(NAME # _S) ZPR:$vec, sve_elm_idx_extdup_s:$index)>;
+ def : Pat<(nxv2f32 (AArch64dup (f32 (vector_extract (nxv2f32 ZPR:$vec), sve_elm_idx_extdup_d:$index)))),
+ (!cast<Instruction>(NAME # _D) ZPR:$vec, sve_elm_idx_extdup_d:$index)>;
+ def : Pat<(nxv2f64 (AArch64dup (f64 (vector_extract (nxv2f64 ZPR:$vec), sve_elm_idx_extdup_d:$index)))),
+ (!cast<Instruction>(NAME # _D) ZPR:$vec, sve_elm_idx_extdup_d:$index)>;
}
class sve_int_perm_tbl<bits<2> sz8_64, bits<2> opc, string asm, ZPRRegOp zprty,
@@ -2828,10 +2852,8 @@ multiclass sve_intx_dot_by_indexed_elem<bit opc, string asm,
let Inst{19-16} = Zm;
}
- def : Pat<(nxv4i32 (op nxv4i32:$Op1, nxv16i8:$Op2, nxv16i8:$Op3, (i32 VectorIndexS32b_timm:$idx))),
- (!cast<Instruction>(NAME # _S) $Op1, $Op2, $Op3, VectorIndexS32b_timm:$idx)>;
- def : Pat<(nxv2i64 (op nxv2i64:$Op1, nxv8i16:$Op2, nxv8i16:$Op3, (i32 VectorIndexD32b_timm:$idx))),
- (!cast<Instruction>(NAME # _D) $Op1, $Op2, $Op3, VectorIndexD32b_timm:$idx)>;
+ def : SVE_4_Op_Imm_Pat<nxv4i32, op, nxv4i32, nxv16i8, nxv16i8, i32, VectorIndexS32b_timm, !cast<Instruction>(NAME # _S)>;
+ def : SVE_4_Op_Imm_Pat<nxv2i64, op, nxv2i64, nxv8i16, nxv8i16, i32, VectorIndexD32b_timm, !cast<Instruction>(NAME # _D)>;
}
//===----------------------------------------------------------------------===//
@@ -4685,20 +4707,22 @@ multiclass sve_fp_3op_p_pd<bits<3> opc, string asm, SDPatternOperator op> {
}
multiclass sve_fp_3op_p_pd_cc<bits<3> opc, string asm, SDPatternOperator op,
- SDPatternOperator op_nopred>
+ CondCode cc1, CondCode cc2,
+ CondCode invcc1, CondCode invcc2>
: sve_fp_3op_p_pd<opc, asm, op> {
- def : SVE_2_Op_AllActive_Pat<nxv8i1, op_nopred, nxv8f16, nxv8f16,
- !cast<Instruction>(NAME # _H), PTRUE_H>;
- def : SVE_2_Op_AllActive_Pat<nxv4i1, op_nopred, nxv4f16, nxv4f16,
- !cast<Instruction>(NAME # _H), PTRUE_S>;
- def : SVE_2_Op_AllActive_Pat<nxv2i1, op_nopred, nxv2f16, nxv2f16,
- !cast<Instruction>(NAME # _H), PTRUE_D>;
- def : SVE_2_Op_AllActive_Pat<nxv4i1, op_nopred, nxv4f32, nxv4f32,
- !cast<Instruction>(NAME # _S), PTRUE_S>;
- def : SVE_2_Op_AllActive_Pat<nxv2i1, op_nopred, nxv2f32, nxv2f32,
- !cast<Instruction>(NAME # _S), PTRUE_D>;
- def : SVE_2_Op_AllActive_Pat<nxv2i1, op_nopred, nxv2f64, nxv2f64,
- !cast<Instruction>(NAME # _D), PTRUE_D>;
+ defm : SVE_SETCC_Pat<cc1, invcc1, nxv8i1, nxv8f16, !cast<Instruction>(NAME # _H)>;
+ defm : SVE_SETCC_Pat<cc1, invcc1, nxv4i1, nxv4f16, !cast<Instruction>(NAME # _H)>;
+ defm : SVE_SETCC_Pat<cc1, invcc1, nxv2i1, nxv2f16, !cast<Instruction>(NAME # _H)>;
+ defm : SVE_SETCC_Pat<cc1, invcc1, nxv4i1, nxv4f32, !cast<Instruction>(NAME # _S)>;
+ defm : SVE_SETCC_Pat<cc1, invcc1, nxv2i1, nxv2f32, !cast<Instruction>(NAME # _S)>;
+ defm : SVE_SETCC_Pat<cc1, invcc1, nxv2i1, nxv2f64, !cast<Instruction>(NAME # _D)>;
+
+ defm : SVE_SETCC_Pat<cc2, invcc2, nxv8i1, nxv8f16, !cast<Instruction>(NAME # _H)>;
+ defm : SVE_SETCC_Pat<cc2, invcc2, nxv4i1, nxv4f16, !cast<Instruction>(NAME # _H)>;
+ defm : SVE_SETCC_Pat<cc2, invcc2, nxv2i1, nxv2f16, !cast<Instruction>(NAME # _H)>;
+ defm : SVE_SETCC_Pat<cc2, invcc2, nxv4i1, nxv4f32, !cast<Instruction>(NAME # _S)>;
+ defm : SVE_SETCC_Pat<cc2, invcc2, nxv2i1, nxv2f32, !cast<Instruction>(NAME # _S)>;
+ defm : SVE_SETCC_Pat<cc2, invcc2, nxv2i1, nxv2f64, !cast<Instruction>(NAME # _D)>;
}
//===----------------------------------------------------------------------===//
diff --git a/llvm/lib/Target/AArch64/SVEIntrinsicOpts.cpp b/llvm/lib/Target/AArch64/SVEIntrinsicOpts.cpp
index 6b8cb786bb6c..e90740030460 100644
--- a/llvm/lib/Target/AArch64/SVEIntrinsicOpts.cpp
+++ b/llvm/lib/Target/AArch64/SVEIntrinsicOpts.cpp
@@ -78,6 +78,7 @@ private:
static bool optimizeConvertFromSVBool(IntrinsicInst *I);
static bool optimizePTest(IntrinsicInst *I);
static bool optimizeVectorMul(IntrinsicInst *I);
+ static bool optimizeTBL(IntrinsicInst *I);
static bool processPhiNode(IntrinsicInst *I);
};
@@ -437,6 +438,41 @@ bool SVEIntrinsicOpts::optimizeVectorMul(IntrinsicInst *I) {
return Changed;
}
+bool SVEIntrinsicOpts::optimizeTBL(IntrinsicInst *I) {
+ assert(I->getIntrinsicID() == Intrinsic::aarch64_sve_tbl &&
+ "Unexpected opcode");
+
+ auto *OpVal = I->getOperand(0);
+ auto *OpIndices = I->getOperand(1);
+ VectorType *VTy = cast<VectorType>(I->getType());
+
+ // Check whether OpIndices is an aarch64_sve_dup_x intrinsic call with
+ // constant splat value < minimal element count of result.
+ auto *DupXIntrI = dyn_cast<IntrinsicInst>(OpIndices);
+ if (!DupXIntrI || DupXIntrI->getIntrinsicID() != Intrinsic::aarch64_sve_dup_x)
+ return false;
+
+ auto *SplatValue = dyn_cast<ConstantInt>(DupXIntrI->getOperand(0));
+ if (!SplatValue ||
+ SplatValue->getValue().uge(VTy->getElementCount().getKnownMinValue()))
+ return false;
+
+ // Convert sve_tbl(OpVal sve_dup_x(SplatValue)) to
+ // splat_vector(extractelement(OpVal, SplatValue)) for further optimization.
+ LLVMContext &Ctx = I->getContext();
+ IRBuilder<> Builder(Ctx);
+ Builder.SetInsertPoint(I);
+ auto *Extract = Builder.CreateExtractElement(OpVal, SplatValue);
+ auto *VectorSplat =
+ Builder.CreateVectorSplat(VTy->getElementCount(), Extract);
+
+ I->replaceAllUsesWith(VectorSplat);
+ I->eraseFromParent();
+ if (DupXIntrI->use_empty())
+ DupXIntrI->eraseFromParent();
+ return true;
+}
+
bool SVEIntrinsicOpts::optimizeConvertFromSVBool(IntrinsicInst *I) {
assert(I->getIntrinsicID() == Intrinsic::aarch64_sve_convert_from_svbool &&
"Unexpected opcode");
@@ -507,6 +543,8 @@ bool SVEIntrinsicOpts::optimizeIntrinsic(Instruction *I) {
case Intrinsic::aarch64_sve_ptest_first:
case Intrinsic::aarch64_sve_ptest_last:
return optimizePTest(IntrI);
+ case Intrinsic::aarch64_sve_tbl:
+ return optimizeTBL(IntrI);
default:
return false;
}
@@ -560,6 +598,7 @@ bool SVEIntrinsicOpts::runOnModule(Module &M) {
case Intrinsic::aarch64_sve_ptrue:
case Intrinsic::aarch64_sve_mul:
case Intrinsic::aarch64_sve_fmul:
+ case Intrinsic::aarch64_sve_tbl:
for (User *U : F.users())
Functions.insert(cast<Instruction>(U)->getFunction());
break;