aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAlbion Fung <albion.fung@ibm.com>2021-03-24 15:57:27 -0400
committerAlbion Fung <conanap@lep82435v.canlab.ibm.com>2021-03-24 15:59:59 -0400
commite29bb074c62ce46343c2d6c77a463e5c22939f03 (patch)
treeab78bc6587fb3f9579b1ef5ac206ebb59154f2ae /llvm/lib/Target/PowerPC
parentAdd a progress class that can track long running operations in LLDB. (diff)
downloadllvm-project-e29bb074c62ce46343c2d6c77a463e5c22939f03.tar.gz
llvm-project-e29bb074c62ce46343c2d6c77a463e5c22939f03.tar.bz2
llvm-project-e29bb074c62ce46343c2d6c77a463e5c22939f03.zip
[PowerPC] Exploit xxsplti32dx (constant materialization) for scalars
This patch exploits the xxsplti32dx instruction available on Power10 in place of constant pool loads where xxspltidp would not be able to, usually because the immediate cannot fit into 32 bits. Differential Revision: https://reviews.llvm.org/D95458
Diffstat (limited to 'llvm/lib/Target/PowerPC')
-rw-r--r--llvm/lib/Target/PowerPC/PPCISelLowering.cpp18
-rw-r--r--llvm/lib/Target/PowerPC/PPCISelLowering.h1
-rw-r--r--llvm/lib/Target/PowerPC/PPCInstrInfo.cpp1
-rw-r--r--llvm/lib/Target/PowerPC/PPCInstrInfo.td24
-rw-r--r--llvm/lib/Target/PowerPC/PPCInstrPrefix.td34
5 files changed, 66 insertions, 12 deletions
diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
index 5e004c4522b3..65ca0d81fc2a 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -8820,6 +8820,18 @@ bool llvm::convertToNonDenormSingle(APInt &ArgAPInt) {
return Success;
}
+// Nondestructive check for convertTonNonDenormSingle.
+bool llvm::checkConvertToNonDenormSingle(APFloat &ArgAPFloat) {
+ // Only convert if it loses info, since XXSPLTIDP should
+ // handle the other case.
+ APFloat APFloatToConvert = ArgAPFloat;
+ bool LosesInfo = true;
+ APFloatToConvert.convert(APFloat::IEEEsingle(), APFloat::rmNearestTiesToEven,
+ &LosesInfo);
+
+ return (!LosesInfo && !APFloatToConvert.isDenormal());
+}
+
// If this is a case we can't handle, return null and let the default
// expansion code take care of it. If we CAN select this case, and if it
// selects to a single instruction, return Op. Otherwise, if we can codegen
@@ -16115,10 +16127,8 @@ bool PPCTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT,
case MVT::f32:
case MVT::f64:
if (Subtarget.hasPrefixInstrs()) {
- // With prefixed instructions, we can materialize anything that can be
- // represented with a 32-bit immediate, not just positive zero.
- APFloat APFloatOfImm = Imm;
- return convertToNonDenormSingle(APFloatOfImm);
+ // we can materialize all immediatess via XXSPLTI32DX and XXSPLTIDP.
+ return true;
}
LLVM_FALLTHROUGH;
case MVT::ppcf128:
diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.h b/llvm/lib/Target/PowerPC/PPCISelLowering.h
index f69b5aceccdc..9e85d6f51014 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.h
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.h
@@ -1332,6 +1332,7 @@ namespace llvm {
bool convertToNonDenormSingle(APInt &ArgAPInt);
bool convertToNonDenormSingle(APFloat &ArgAPFloat);
+ bool checkConvertToNonDenormSingle(APFloat &ArgAPFloat);
} // end namespace llvm
diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp b/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
index 867bbd62b294..6887f098ba1b 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
+++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
@@ -1108,6 +1108,7 @@ bool PPCInstrInfo::isReallyTriviallyReMaterializable(const MachineInstr &MI,
case PPC::XXLXORspz:
case PPC::XXLXORdpz:
case PPC::XXLEQVOnes:
+ case PPC::XXSPLTI32DX:
case PPC::V_SET0B:
case PPC::V_SET0H:
case PPC::V_SET0:
diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.td b/llvm/lib/Target/PowerPC/PPCInstrInfo.td
index bdd3728df7b9..aa820eee4dcc 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrInfo.td
+++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.td
@@ -399,6 +399,30 @@ def getFPAs32BitInt : SDNodeXForm<fpimm, [{
SDLoc(N), MVT::i32);
}]>;
+// Check if the value can be converted to be single precision immediate, which
+// can be exploited by XXSPLTIDP. Ensure that it cannot be converted to single
+// precision before exploiting with XXSPLTI32DX.
+def nzFPImmAsi64 : PatLeaf<(fpimm), [{
+ APFloat APFloatOfN = N->getValueAPF();
+ return !N->isExactlyValue(+0.0) && !checkConvertToNonDenormSingle(APFloatOfN);
+}]>;
+
+// Get the Hi bits of a 64 bit immediate.
+def getFPAs64BitIntHi : SDNodeXForm<fpimm, [{
+ APFloat APFloatOfN = N->getValueAPF();
+ uint32_t Hi = (uint32_t)((APFloatOfN.bitcastToAPInt().getZExtValue() &
+ 0xFFFFFFFF00000000LL) >> 32);
+ return CurDAG->getTargetConstant(Hi, SDLoc(N), MVT::i32);
+}]>;
+
+// Get the Lo bits of a 64 bit immediate.
+def getFPAs64BitIntLo : SDNodeXForm<fpimm, [{
+ APFloat APFloatOfN = N->getValueAPF();
+ uint32_t Lo = (uint32_t)(APFloatOfN.bitcastToAPInt().getZExtValue() &
+ 0xFFFFFFFF);
+ return CurDAG->getTargetConstant(Lo, SDLoc(N), MVT::i32);
+}]>;
+
def imm34 : PatLeaf<(imm), [{
return isInt<34>(N->getSExtValue());
}]>;
diff --git a/llvm/lib/Target/PowerPC/PPCInstrPrefix.td b/llvm/lib/Target/PowerPC/PPCInstrPrefix.td
index 7f12a404dc04..9a6631c08555 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrPrefix.td
+++ b/llvm/lib/Target/PowerPC/PPCInstrPrefix.td
@@ -1867,14 +1867,6 @@ let Predicates = [PrefixInstrs] in {
"xxspltidp $XT, $IMM32", IIC_VecGeneral,
[(set v2f64:$XT,
(PPCxxspltidp i32:$IMM32))]>;
- def XXSPLTI32DX :
- 8RR_DForm_IMM32_XT6_IX<32, 0, (outs vsrc:$XT),
- (ins vsrc:$XTi, u1imm:$IX, i32imm:$IMM32),
- "xxsplti32dx $XT, $IX, $IMM32", IIC_VecGeneral,
- [(set v2i64:$XT,
- (PPCxxsplti32dx v2i64:$XTi, i32:$IX,
- i32:$IMM32))]>,
- RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">;
def XXPERMX :
8RR_XX4Form_IMM3_XTABC6<34, 0, (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB,
vsrc:$XC, u3imm:$UIM),
@@ -1898,6 +1890,19 @@ let Predicates = [PrefixInstrs] in {
IIC_VecGeneral, []>;
}
+// XXSPLI32DX needs extra flags to make sure the compiler does not attempt
+// to spill part of the instruction when the values are similar.
+let isReMaterializable = 1, isMoveImm = 1, Predicates = [PrefixInstrs] in {
+ def XXSPLTI32DX :
+ 8RR_DForm_IMM32_XT6_IX<32, 0, (outs vsrc:$XT),
+ (ins vsrc:$XTi, u1imm:$IX, i32imm:$IMM32),
+ "xxsplti32dx $XT, $IX, $IMM32", IIC_VecGeneral,
+ [(set v2i64:$XT,
+ (PPCxxsplti32dx v2i64:$XTi, i32:$IX,
+ i32:$IMM32))]>,
+ RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">;
+}
+
let Predicates = [IsISA3_1] in {
def SETBC : XForm_XT5_BI5<31, 384, (outs gprc:$RT), (ins crbitrc:$BI),
"setbc $RT, $BI", IIC_IntCompare, []>;
@@ -2623,6 +2628,19 @@ let AddedComplexity = 400, Predicates = [PrefixInstrs] in {
(COPY_TO_REGCLASS (XXSPLTIDP (getFPAs32BitInt fpimm:$A)),
VSFRC)>;
+// To replace constant pool with XXSPLTI32DX for scalars.
+def : Pat<(f32 nzFPImmAsi64:$A),
+ (COPY_TO_REGCLASS (XXSPLTI32DX (XXSPLTI32DX(IMPLICIT_DEF), 0,
+ (getFPAs64BitIntHi $A)),
+ 1, (getFPAs64BitIntLo $A)),
+ VSRC)>;
+
+def : Pat<(f64 nzFPImmAsi64:$A),
+ (COPY_TO_REGCLASS (XXSPLTI32DX (XXSPLTI32DX (IMPLICIT_DEF), 0,
+ (getFPAs64BitIntHi $A)),
+ 1, (getFPAs64BitIntLo $A)),
+ VSRC)>;
+
// Anonymous patterns for XXEVAL
// AND
// and(A, B, C)