[AArch64][GlobalISel] Select Bcc when it's better than TB(N)Z

Instead of falling back to selecting TB(N)Z when we fail to select an optimized compare against 0, select Bcc instead. Also simplify selectCompareBranch a little while we're here, because the logic was kind of hard to follow. At -O0, this is a 0.1% geomean code size improvement for CTMark. A simple example of where this can kick in is here: https://godbolt.org/z/4rra6P In the example above, GlobalISel currently produces a subs, cset, and tbnz. SelectionDAG, on the other hand, just emits a compare and b.le. Differential Revision: https://reviews.llvm.org/D92358
author: Jessica Paquette <jpaquette@apple.com> 2020-11-30 17:21:21 -0800
committer: Jessica Paquette <jpaquette@apple.com> 2020-12-01 15:45:14 -0800
commit: 6c3fa97d8a628541c82d8981aabefcb2dcb29f17 (patch)
tree: d6a8647ffe8dc41a49d5d93e847400f24ce6cf67
parent: [NFC][AMDGPU] AMDGPU code object V4 ABI documentation (diff)
download: llvm-project-6c3fa97d8a628541c82d8981aabefcb2dcb29f17.tar.gz
llvm-project-6c3fa97d8a628541c82d8981aabefcb2dcb29f17.tar.bz2
llvm-project-6c3fa97d8a628541c82d8981aabefcb2dcb29f17.zip
3 files changed, 60 insertions, 52 deletions
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
index 6691bf068042..3dba92eea3d3 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
@@ -257,6 +257,11 @@ private:
                             MachineBasicBlock *DstMBB,
                             MachineIRBuilder &MIB) const;
 
+  /// Emit a CB(N)Z instruction which branches to \p DestMBB.
+  MachineInstr *emitCBZ(Register CompareReg, bool IsNegative,
+                        MachineBasicBlock *DestMBB,
+                        MachineIRBuilder &MIB) const;
+
   // Equivalent to the i32shift_a and friends from AArch64InstrInfo.td.
   // We use these manually instead of using the importer since it doesn't
   // support SDNodeXForm.
@@ -1394,9 +1399,7 @@ bool AArch64InstructionSelector::tryOptAndIntoCompareBranch(
   // Only support EQ and NE. If we have LT, then it *is* possible to fold, but
   // we don't want to do this. When we have an AND and LT, we need a TST/ANDS,
   // so folding would be redundant.
-  if (Pred != CmpInst::Predicate::ICMP_EQ &&
-      Pred != CmpInst::Predicate::ICMP_NE)
-    return false;
+  assert(ICmpInst::isEquality(Pred) && "Expected only eq/ne?");
 
   // Check if the AND has a constant on its RHS which we can use as a mask.
   // If it's a power of 2, then it's the same as checking a specific bit.
@@ -1415,6 +1418,27 @@ bool AArch64InstructionSelector::tryOptAndIntoCompareBranch(
   return true;
 }
 
+MachineInstr *AArch64InstructionSelector::emitCBZ(Register CompareReg,
+                                                  bool IsNegative,
+                                                  MachineBasicBlock *DestMBB,
+                                                  MachineIRBuilder &MIB) const {
+  assert(ProduceNonFlagSettingCondBr && "CBZ does not set flags!");
+  MachineRegisterInfo &MRI = *MIB.getMRI();
+  assert(RBI.getRegBank(CompareReg, MRI, TRI)->getID() ==
+             AArch64::GPRRegBankID &&
+         "Expected GPRs only?");
+  auto Ty = MRI.getType(CompareReg);
+  unsigned Width = Ty.getSizeInBits();
+  assert(!Ty.isVector() && "Expected scalar only?");
+  assert(Width <= 64 && "Expected width to be at most 64?");
+  static const unsigned OpcTable[2][2] = {{AArch64::CBZW, AArch64::CBZX},
+                                          {AArch64::CBNZW, AArch64::CBNZX}};
+  unsigned Opc = OpcTable[IsNegative][Width == 64];
+  auto BranchMI = MIB.buildInstr(Opc, {}, {CompareReg}).addMBB(DestMBB);
+  constrainSelectedInstRegOperands(*BranchMI, TII, TRI, RBI);
+  return &*BranchMI;
+}
+
 bool AArch64InstructionSelector::selectCompareBranch(
     MachineInstr &I, MachineFunction &MF, MachineRegisterInfo &MRI) const {
 
@@ -1477,51 +1501,39 @@ bool AArch64InstructionSelector::selectCompareBranch(
     }
   }
 
-  if (!VRegAndVal) {
-    std::swap(RHS, LHS);
-    VRegAndVal = getConstantVRegValWithLookThrough(RHS, MRI);
-    LHSMI = getDefIgnoringCopies(LHS, MRI);
-  }
+  // Attempt to handle commutative condition codes. Right now, that's only
+  // eq/ne.
+  if (ICmpInst::isEquality(Pred)) {
+    if (!VRegAndVal) {
+      std::swap(RHS, LHS);
+      VRegAndVal = getConstantVRegValWithLookThrough(RHS, MRI);
+      LHSMI = getDefIgnoringCopies(LHS, MRI);
+    }
 
-  if (!VRegAndVal || VRegAndVal->Value != 0) {
-    // If we can't select a CBZ then emit a cmp + Bcc.
-    auto Pred =
-        static_cast<CmpInst::Predicate>(CCMI->getOperand(1).getPredicate());
-    emitIntegerCompare(CCMI->getOperand(2), CCMI->getOperand(3),
-                       CCMI->getOperand(1), MIB);
-    const AArch64CC::CondCode CC = changeICMPPredToAArch64CC(Pred);
-    MIB.buildInstr(AArch64::Bcc, {}, {}).addImm(CC).addMBB(DestMBB);
-    I.eraseFromParent();
-    return true;
-  }
+    if (VRegAndVal && VRegAndVal->Value == 0) {
+      // If there's a G_AND feeding into this branch, try to fold it away by
+      // emitting a TB(N)Z instead.
+      if (tryOptAndIntoCompareBranch(LHSMI, VRegAndVal->Value, Pred, DestMBB,
+                                     MIB)) {
+        I.eraseFromParent();
+        return true;
+      }
 
-  // Try to emit a TB(N)Z for an eq or ne condition.
-  if (tryOptAndIntoCompareBranch(LHSMI, VRegAndVal->Value, Pred, DestMBB,
-                                 MIB)) {
-    I.eraseFromParent();
-    return true;
+      // Otherwise, try to emit a CB(N)Z instead.
+      auto LHSTy = MRI.getType(LHS);
+      if (!LHSTy.isVector() && LHSTy.getSizeInBits() <= 64) {
+        emitCBZ(LHS, /*IsNegative = */ Pred == CmpInst::ICMP_NE, DestMBB, MIB);
+        I.eraseFromParent();
+        return true;
+      }
+    }
   }
 
-  const RegisterBank &RB = *RBI.getRegBank(LHS, MRI, TRI);
-  if (RB.getID() != AArch64::GPRRegBankID)
-    return false;
-  if (Pred != CmpInst::ICMP_NE && Pred != CmpInst::ICMP_EQ)
-    return false;
-
-  const unsigned CmpWidth = MRI.getType(LHS).getSizeInBits();
-  unsigned CBOpc = 0;
-  if (CmpWidth <= 32)
-    CBOpc = (Pred == CmpInst::ICMP_EQ ? AArch64::CBZW : AArch64::CBNZW);
-  else if (CmpWidth == 64)
-    CBOpc = (Pred == CmpInst::ICMP_EQ ? AArch64::CBZX : AArch64::CBNZX);
-  else
-    return false;
-
-  BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(CBOpc))
-      .addUse(LHS)
-      .addMBB(DestMBB)
-      .constrainAllUses(TII, TRI, RBI);
-
+  // Couldn't optimize. Emit a compare + bcc.
+  emitIntegerCompare(CCMI->getOperand(2), CCMI->getOperand(3),
+                     CCMI->getOperand(1), MIB);
+  const AArch64CC::CondCode CC = changeICMPPredToAArch64CC(Pred);
+  MIB.buildInstr(AArch64::Bcc, {}, {}).addImm(CC).addMBB(DestMBB);
   I.eraseFromParent();
   return true;
 }
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/opt-and-tbnz-tbz.mir b/llvm/test/CodeGen/AArch64/GlobalISel/opt-and-tbnz-tbz.mir
index bb6ba25d06f7..154f00b96de3 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/opt-and-tbnz-tbz.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/opt-and-tbnz-tbz.mir
@@ -143,8 +143,7 @@ body:             |
   ; CHECK:   liveins: $w0
   ; CHECK:   [[COPY:%[0-9]+]]:gpr32 = COPY $w0
   ; CHECK:   [[ANDSWri:%[0-9]+]]:gpr32 = ANDSWri [[COPY]], 0, implicit-def $nzcv
-  ; CHECK:   [[CSINCWr:%[0-9]+]]:gpr32 = CSINCWr $wzr, $wzr, 10, implicit $nzcv
-  ; CHECK:   TBNZW [[CSINCWr]], 0, %bb.1
+  ; CHECK:   Bcc 11, %bb.1, implicit $nzcv
   ; CHECK:   B %bb.0
   ; CHECK: bb.1:
   ; CHECK:   RET_ReallyLR
@@ -176,8 +175,7 @@ body:             |
   ; CHECK:   liveins: $w0
   ; CHECK:   [[COPY:%[0-9]+]]:gpr32 = COPY $w0
   ; CHECK:   [[ANDSWri:%[0-9]+]]:gpr32 = ANDSWri [[COPY]], 0, implicit-def $nzcv
-  ; CHECK:   [[CSINCWr:%[0-9]+]]:gpr32 = CSINCWr $wzr, $wzr, 13, implicit $nzcv
-  ; CHECK:   TBNZW [[CSINCWr]], 0, %bb.1
+  ; CHECK:   Bcc 12, %bb.1, implicit $nzcv
   ; CHECK:   B %bb.0
   ; CHECK: bb.1:
   ; CHECK:   RET_ReallyLR
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/tbnz-slt.mir b/llvm/test/CodeGen/AArch64/GlobalISel/tbnz-slt.mir
index 2be18832a0e5..d8f962cdfb76 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/tbnz-slt.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/tbnz-slt.mir
@@ -100,8 +100,7 @@ body:             |
   ; CHECK:   successors: %bb.0(0x40000000), %bb.1(0x40000000)
   ; CHECK:   %copy:gpr64 = COPY $x0
   ; CHECK:   [[ANDSXri:%[0-9]+]]:gpr64 = ANDSXri %copy, 8000, implicit-def $nzcv
-  ; CHECK:   %cmp:gpr32 = CSINCWr $wzr, $wzr, 10, implicit $nzcv
-  ; CHECK:   TBNZW %cmp, 0, %bb.1
+  ; CHECK:   Bcc 11, %bb.1, implicit $nzcv
   ; CHECK:   B %bb.0
   ; CHECK: bb.1:
   ; CHECK:   RET_ReallyLR
@@ -133,8 +132,7 @@ body:             |
   ; CHECK:   %copy:gpr64 = COPY $x0
   ; CHECK:   %zero:gpr64 = COPY $xzr
   ; CHECK:   [[SUBSXrr:%[0-9]+]]:gpr64 = SUBSXrr %zero, %copy, implicit-def $nzcv
-  ; CHECK:   %cmp:gpr32 = CSINCWr $wzr, $wzr, 10, implicit $nzcv
-  ; CHECK:   TBNZW %cmp, 0, %bb.1
+  ; CHECK:   Bcc 11, %bb.1, implicit $nzcv
   ; CHECK:   B %bb.0
   ; CHECK: bb.1:
   ; CHECK:   RET_ReallyLR
author	Jessica Paquette <jpaquette@apple.com>	2020-11-30 17:21:21 -0800
committer	Jessica Paquette <jpaquette@apple.com>	2020-12-01 15:45:14 -0800
commit	6c3fa97d8a628541c82d8981aabefcb2dcb29f17 (patch)
tree	d6a8647ffe8dc41a49d5d93e847400f24ce6cf67
parent	[NFC][AMDGPU] AMDGPU code object V4 ABI documentation (diff)
download	llvm-project-6c3fa97d8a628541c82d8981aabefcb2dcb29f17.tar.gz llvm-project-6c3fa97d8a628541c82d8981aabefcb2dcb29f17.tar.bz2 llvm-project-6c3fa97d8a628541c82d8981aabefcb2dcb29f17.zip