diff options
author | LemonBoy <thatlemon@gmail.com> | 2021-03-17 16:59:55 +0100 |
---|---|---|
committer | Tom Stellard <tstellar@redhat.com> | 2021-03-31 12:10:37 -0700 |
commit | 31001be371e8f2c74470e727e54503fb2aabec8b (patch) | |
tree | 93ea8be722a9712ea312b2f9a9c77749193a27cb | |
parent | [RISCV][MC] Fix nf encoding for vector ld/st whole register (diff) | |
download | llvm-project-31001be371e8f2c74470e727e54503fb2aabec8b.tar.gz llvm-project-31001be371e8f2c74470e727e54503fb2aabec8b.tar.bz2 llvm-project-31001be371e8f2c74470e727e54503fb2aabec8b.zip |
[LoopVectorize] Refine hasIrregularType predicatellvmorg-12.0.0-rc4
The `hasIrregularType` predicate checks whether an array of N values of type Ty is "bitcast-compatible" with a <N x Ty> vector.
The previous check returned invalid results in some cases where there's some padding between the array elements: eg. a 4-element array of u7 values is considered as compatible with <4 x u7>, even though the vector is only loading/storing 28 bits instead of 32.
The problem causes LLVM to generate incorrect code for some targets: for AArch64 the vector loads/stores are lowered in terms of ubfx/bfi, effectively losing the top (N * padding bits).
Reviewed By: lebedev.ri
Differential Revision: https://reviews.llvm.org/D97465
(cherry picked from commit 4f024938e4c932feba4d28573ec4522106f8d879)
-rw-r--r-- | llvm/lib/Transforms/Vectorize/LoopVectorize.cpp | 22 | ||||
-rw-r--r-- | llvm/test/Transforms/LoopVectorize/irregular_type.ll | 27 |
2 files changed, 34 insertions, 15 deletions
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index d36e078444bc..b456a97aa4ec 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -372,19 +372,11 @@ static Type *getMemInstValueType(Value *I) { /// A helper function that returns true if the given type is irregular. The /// type is irregular if its allocated size doesn't equal the store size of an -/// element of the corresponding vector type at the given vectorization factor. -static bool hasIrregularType(Type *Ty, const DataLayout &DL, ElementCount VF) { - // Determine if an array of VF elements of type Ty is "bitcast compatible" - // with a <VF x Ty> vector. - if (VF.isVector()) { - auto *VectorTy = VectorType::get(Ty, VF); - return TypeSize::get(VF.getKnownMinValue() * - DL.getTypeAllocSize(Ty).getFixedValue(), - VF.isScalable()) != DL.getTypeStoreSize(VectorTy); - } - - // If the vectorization factor is one, we just check if an array of type Ty - // requires padding between elements. +/// element of the corresponding vector type. +static bool hasIrregularType(Type *Ty, const DataLayout &DL) { + // Determine if an array of N elements of type Ty is "bitcast compatible" + // with a <N x Ty> vector. + // This is only true if there is no padding between the array elements. return DL.getTypeAllocSizeInBits(Ty) != DL.getTypeSizeInBits(Ty); } @@ -5212,7 +5204,7 @@ bool LoopVectorizationCostModel::interleavedAccessCanBeWidened( // requires padding and will be scalarized. auto &DL = I->getModule()->getDataLayout(); auto *ScalarTy = getMemInstValueType(I); - if (hasIrregularType(ScalarTy, DL, VF)) + if (hasIrregularType(ScalarTy, DL)) return false; // Check if masking is required. @@ -5259,7 +5251,7 @@ bool LoopVectorizationCostModel::memoryInstructionCanBeWidened( // requires padding and will be scalarized. auto &DL = I->getModule()->getDataLayout(); auto *ScalarTy = LI ? LI->getType() : SI->getValueOperand()->getType(); - if (hasIrregularType(ScalarTy, DL, VF)) + if (hasIrregularType(ScalarTy, DL)) return false; return true; diff --git a/llvm/test/Transforms/LoopVectorize/irregular_type.ll b/llvm/test/Transforms/LoopVectorize/irregular_type.ll new file mode 100644 index 000000000000..167a1a101e6f --- /dev/null +++ b/llvm/test/Transforms/LoopVectorize/irregular_type.ll @@ -0,0 +1,27 @@ +; RUN: opt %s -loop-vectorize -force-vector-width=4 -S | FileCheck %s + +; Ensure the array loads/stores are not optimized into vector operations when +; the element type has padding bits. + +; CHECK: foo +; CHECK: vector.body +; CHECK-NOT: load <4 x i7> +; CHECK-NOT: store <4 x i7> +; CHECK: for.body +define void @foo(i7* %a, i64 %n) { +entry: + br label %for.body + +for.body: + %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] + %arrayidx = getelementptr inbounds i7, i7* %a, i64 %indvars.iv + %0 = load i7, i7* %arrayidx, align 1 + %sub = add nuw nsw i7 %0, 0 + store i7 %sub, i7* %arrayidx, align 1 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %cmp = icmp eq i64 %indvars.iv.next, %n + br i1 %cmp, label %for.exit, label %for.body + +for.exit: + ret void +} |