aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJessica Paquette <jpaquette@apple.com>2020-11-30 17:21:21 -0800
committerJessica Paquette <jpaquette@apple.com>2020-12-01 15:45:14 -0800
commit6c3fa97d8a628541c82d8981aabefcb2dcb29f17 (patch)
treed6a8647ffe8dc41a49d5d93e847400f24ce6cf67
parent[NFC][AMDGPU] AMDGPU code object V4 ABI documentation (diff)
downloadllvm-project-6c3fa97d8a628541c82d8981aabefcb2dcb29f17.tar.gz
llvm-project-6c3fa97d8a628541c82d8981aabefcb2dcb29f17.tar.bz2
llvm-project-6c3fa97d8a628541c82d8981aabefcb2dcb29f17.zip
[AArch64][GlobalISel] Select Bcc when it's better than TB(N)Z
Instead of falling back to selecting TB(N)Z when we fail to select an optimized compare against 0, select Bcc instead. Also simplify selectCompareBranch a little while we're here, because the logic was kind of hard to follow. At -O0, this is a 0.1% geomean code size improvement for CTMark. A simple example of where this can kick in is here: https://godbolt.org/z/4rra6P In the example above, GlobalISel currently produces a subs, cset, and tbnz. SelectionDAG, on the other hand, just emits a compare and b.le. Differential Revision: https://reviews.llvm.org/D92358
-rw-r--r--llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp100
-rw-r--r--llvm/test/CodeGen/AArch64/GlobalISel/opt-and-tbnz-tbz.mir6
-rw-r--r--llvm/test/CodeGen/AArch64/GlobalISel/tbnz-slt.mir6
3 files changed, 60 insertions, 52 deletions
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
index 6691bf068042..3dba92eea3d3 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
@@ -257,6 +257,11 @@ private:
MachineBasicBlock *DstMBB,
MachineIRBuilder &MIB) const;
+ /// Emit a CB(N)Z instruction which branches to \p DestMBB.
+ MachineInstr *emitCBZ(Register CompareReg, bool IsNegative,
+ MachineBasicBlock *DestMBB,
+ MachineIRBuilder &MIB) const;
+
// Equivalent to the i32shift_a and friends from AArch64InstrInfo.td.
// We use these manually instead of using the importer since it doesn't
// support SDNodeXForm.
@@ -1394,9 +1399,7 @@ bool AArch64InstructionSelector::tryOptAndIntoCompareBranch(
// Only support EQ and NE. If we have LT, then it *is* possible to fold, but
// we don't want to do this. When we have an AND and LT, we need a TST/ANDS,
// so folding would be redundant.
- if (Pred != CmpInst::Predicate::ICMP_EQ &&
- Pred != CmpInst::Predicate::ICMP_NE)
- return false;
+ assert(ICmpInst::isEquality(Pred) && "Expected only eq/ne?");
// Check if the AND has a constant on its RHS which we can use as a mask.
// If it's a power of 2, then it's the same as checking a specific bit.
@@ -1415,6 +1418,27 @@ bool AArch64InstructionSelector::tryOptAndIntoCompareBranch(
return true;
}
+MachineInstr *AArch64InstructionSelector::emitCBZ(Register CompareReg,
+ bool IsNegative,
+ MachineBasicBlock *DestMBB,
+ MachineIRBuilder &MIB) const {
+ assert(ProduceNonFlagSettingCondBr && "CBZ does not set flags!");
+ MachineRegisterInfo &MRI = *MIB.getMRI();
+ assert(RBI.getRegBank(CompareReg, MRI, TRI)->getID() ==
+ AArch64::GPRRegBankID &&
+ "Expected GPRs only?");
+ auto Ty = MRI.getType(CompareReg);
+ unsigned Width = Ty.getSizeInBits();
+ assert(!Ty.isVector() && "Expected scalar only?");
+ assert(Width <= 64 && "Expected width to be at most 64?");
+ static const unsigned OpcTable[2][2] = {{AArch64::CBZW, AArch64::CBZX},
+ {AArch64::CBNZW, AArch64::CBNZX}};
+ unsigned Opc = OpcTable[IsNegative][Width == 64];
+ auto BranchMI = MIB.buildInstr(Opc, {}, {CompareReg}).addMBB(DestMBB);
+ constrainSelectedInstRegOperands(*BranchMI, TII, TRI, RBI);
+ return &*BranchMI;
+}
+
bool AArch64InstructionSelector::selectCompareBranch(
MachineInstr &I, MachineFunction &MF, MachineRegisterInfo &MRI) const {
@@ -1477,51 +1501,39 @@ bool AArch64InstructionSelector::selectCompareBranch(
}
}
- if (!VRegAndVal) {
- std::swap(RHS, LHS);
- VRegAndVal = getConstantVRegValWithLookThrough(RHS, MRI);
- LHSMI = getDefIgnoringCopies(LHS, MRI);
- }
+ // Attempt to handle commutative condition codes. Right now, that's only
+ // eq/ne.
+ if (ICmpInst::isEquality(Pred)) {
+ if (!VRegAndVal) {
+ std::swap(RHS, LHS);
+ VRegAndVal = getConstantVRegValWithLookThrough(RHS, MRI);
+ LHSMI = getDefIgnoringCopies(LHS, MRI);
+ }
- if (!VRegAndVal || VRegAndVal->Value != 0) {
- // If we can't select a CBZ then emit a cmp + Bcc.
- auto Pred =
- static_cast<CmpInst::Predicate>(CCMI->getOperand(1).getPredicate());
- emitIntegerCompare(CCMI->getOperand(2), CCMI->getOperand(3),
- CCMI->getOperand(1), MIB);
- const AArch64CC::CondCode CC = changeICMPPredToAArch64CC(Pred);
- MIB.buildInstr(AArch64::Bcc, {}, {}).addImm(CC).addMBB(DestMBB);
- I.eraseFromParent();
- return true;
- }
+ if (VRegAndVal && VRegAndVal->Value == 0) {
+ // If there's a G_AND feeding into this branch, try to fold it away by
+ // emitting a TB(N)Z instead.
+ if (tryOptAndIntoCompareBranch(LHSMI, VRegAndVal->Value, Pred, DestMBB,
+ MIB)) {
+ I.eraseFromParent();
+ return true;
+ }
- // Try to emit a TB(N)Z for an eq or ne condition.
- if (tryOptAndIntoCompareBranch(LHSMI, VRegAndVal->Value, Pred, DestMBB,
- MIB)) {
- I.eraseFromParent();
- return true;
+ // Otherwise, try to emit a CB(N)Z instead.
+ auto LHSTy = MRI.getType(LHS);
+ if (!LHSTy.isVector() && LHSTy.getSizeInBits() <= 64) {
+ emitCBZ(LHS, /*IsNegative = */ Pred == CmpInst::ICMP_NE, DestMBB, MIB);
+ I.eraseFromParent();
+ return true;
+ }
+ }
}
- const RegisterBank &RB = *RBI.getRegBank(LHS, MRI, TRI);
- if (RB.getID() != AArch64::GPRRegBankID)
- return false;
- if (Pred != CmpInst::ICMP_NE && Pred != CmpInst::ICMP_EQ)
- return false;
-
- const unsigned CmpWidth = MRI.getType(LHS).getSizeInBits();
- unsigned CBOpc = 0;
- if (CmpWidth <= 32)
- CBOpc = (Pred == CmpInst::ICMP_EQ ? AArch64::CBZW : AArch64::CBNZW);
- else if (CmpWidth == 64)
- CBOpc = (Pred == CmpInst::ICMP_EQ ? AArch64::CBZX : AArch64::CBNZX);
- else
- return false;
-
- BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(CBOpc))
- .addUse(LHS)
- .addMBB(DestMBB)
- .constrainAllUses(TII, TRI, RBI);
-
+ // Couldn't optimize. Emit a compare + bcc.
+ emitIntegerCompare(CCMI->getOperand(2), CCMI->getOperand(3),
+ CCMI->getOperand(1), MIB);
+ const AArch64CC::CondCode CC = changeICMPPredToAArch64CC(Pred);
+ MIB.buildInstr(AArch64::Bcc, {}, {}).addImm(CC).addMBB(DestMBB);
I.eraseFromParent();
return true;
}
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/opt-and-tbnz-tbz.mir b/llvm/test/CodeGen/AArch64/GlobalISel/opt-and-tbnz-tbz.mir
index bb6ba25d06f7..154f00b96de3 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/opt-and-tbnz-tbz.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/opt-and-tbnz-tbz.mir
@@ -143,8 +143,7 @@ body: |
; CHECK: liveins: $w0
; CHECK: [[COPY:%[0-9]+]]:gpr32 = COPY $w0
; CHECK: [[ANDSWri:%[0-9]+]]:gpr32 = ANDSWri [[COPY]], 0, implicit-def $nzcv
- ; CHECK: [[CSINCWr:%[0-9]+]]:gpr32 = CSINCWr $wzr, $wzr, 10, implicit $nzcv
- ; CHECK: TBNZW [[CSINCWr]], 0, %bb.1
+ ; CHECK: Bcc 11, %bb.1, implicit $nzcv
; CHECK: B %bb.0
; CHECK: bb.1:
; CHECK: RET_ReallyLR
@@ -176,8 +175,7 @@ body: |
; CHECK: liveins: $w0
; CHECK: [[COPY:%[0-9]+]]:gpr32 = COPY $w0
; CHECK: [[ANDSWri:%[0-9]+]]:gpr32 = ANDSWri [[COPY]], 0, implicit-def $nzcv
- ; CHECK: [[CSINCWr:%[0-9]+]]:gpr32 = CSINCWr $wzr, $wzr, 13, implicit $nzcv
- ; CHECK: TBNZW [[CSINCWr]], 0, %bb.1
+ ; CHECK: Bcc 12, %bb.1, implicit $nzcv
; CHECK: B %bb.0
; CHECK: bb.1:
; CHECK: RET_ReallyLR
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/tbnz-slt.mir b/llvm/test/CodeGen/AArch64/GlobalISel/tbnz-slt.mir
index 2be18832a0e5..d8f962cdfb76 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/tbnz-slt.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/tbnz-slt.mir
@@ -100,8 +100,7 @@ body: |
; CHECK: successors: %bb.0(0x40000000), %bb.1(0x40000000)
; CHECK: %copy:gpr64 = COPY $x0
; CHECK: [[ANDSXri:%[0-9]+]]:gpr64 = ANDSXri %copy, 8000, implicit-def $nzcv
- ; CHECK: %cmp:gpr32 = CSINCWr $wzr, $wzr, 10, implicit $nzcv
- ; CHECK: TBNZW %cmp, 0, %bb.1
+ ; CHECK: Bcc 11, %bb.1, implicit $nzcv
; CHECK: B %bb.0
; CHECK: bb.1:
; CHECK: RET_ReallyLR
@@ -133,8 +132,7 @@ body: |
; CHECK: %copy:gpr64 = COPY $x0
; CHECK: %zero:gpr64 = COPY $xzr
; CHECK: [[SUBSXrr:%[0-9]+]]:gpr64 = SUBSXrr %zero, %copy, implicit-def $nzcv
- ; CHECK: %cmp:gpr32 = CSINCWr $wzr, $wzr, 10, implicit $nzcv
- ; CHECK: TBNZW %cmp, 0, %bb.1
+ ; CHECK: Bcc 11, %bb.1, implicit $nzcv
; CHECK: B %bb.0
; CHECK: bb.1:
; CHECK: RET_ReallyLR