1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
|
From 72da952b396f43d2ce6ca20a0f0d75212fea6b28 Mon Sep 17 00:00:00 2001
From: Craig Topper <craig.topper@intel.com>
Date: Mon, 1 Oct 2018 07:08:41 +0000
Subject: [PATCH] [X86] Stop X86DomainReassignment from creating copies between
GR8/GR16 physical registers and k-registers.
We can only copy between a k-register and a GR32/GR64 register.
This patch detects that the copy will be illegal and prevents the domain reassignment from happening for that closure.
This probably isn't the best fix, and we should probably figure out how to handle this correctly.
Fixes PR38803.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@343443 91177308-0d34-0410-b5e6-96231b3b80d8
---
lib/Target/X86/X86DomainReassignment.cpp | 21 +++++++++++
test/CodeGen/X86/pr38803.ll | 48 ++++++++++++++++++++++++
2 files changed, 69 insertions(+)
create mode 100644 test/CodeGen/X86/pr38803.ll
diff --git a/lib/Target/X86/X86DomainReassignment.cpp b/lib/Target/X86/X86DomainReassignment.cpp
index 5196446b39e..62588e9509d 100644
--- a/lib/Target/X86/X86DomainReassignment.cpp
+++ b/lib/Target/X86/X86DomainReassignment.cpp
@@ -217,6 +217,27 @@ public:
InstrCOPYReplacer(unsigned SrcOpcode, RegDomain DstDomain, unsigned DstOpcode)
: InstrReplacer(SrcOpcode, DstOpcode), DstDomain(DstDomain) {}
+ bool isLegal(const MachineInstr *MI,
+ const TargetInstrInfo *TII) const override {
+ if (!InstrConverterBase::isLegal(MI, TII))
+ return false;
+
+ // Don't allow copies to/flow GR8/GR16 physical registers.
+ // FIXME: Is there some better way to support this?
+ unsigned DstReg = MI->getOperand(0).getReg();
+ if (TargetRegisterInfo::isPhysicalRegister(DstReg) &&
+ (X86::GR8RegClass.contains(DstReg) ||
+ X86::GR16RegClass.contains(DstReg)))
+ return false;
+ unsigned SrcReg = MI->getOperand(1).getReg();
+ if (TargetRegisterInfo::isPhysicalRegister(SrcReg) &&
+ (X86::GR8RegClass.contains(SrcReg) ||
+ X86::GR16RegClass.contains(SrcReg)))
+ return false;
+
+ return true;
+ }
+
double getExtraCost(const MachineInstr *MI,
MachineRegisterInfo *MRI) const override {
assert(MI->getOpcode() == TargetOpcode::COPY && "Expected a COPY");
diff --git a/test/CodeGen/X86/pr38803.ll b/test/CodeGen/X86/pr38803.ll
new file mode 100644
index 00000000000..a1ca27d89b0
--- /dev/null
+++ b/test/CodeGen/X86/pr38803.ll
@@ -0,0 +1,48 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mcpu=skylake-avx512 -mtriple=x86_64-unknown-unknown | FileCheck %s
+
+@b = local_unnamed_addr global i32 0, align 4
+@c = local_unnamed_addr global i32 0, align 4
+@d = local_unnamed_addr global float 0.000000e+00, align 4
+
+define float @_Z3fn2v() {
+; CHECK-LABEL: _Z3fn2v:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: pushq %rax
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: callq _Z1av
+; CHECK-NEXT: # kill: def $al killed $al def $eax
+; CHECK-NEXT: kmovd %eax, %k1
+; CHECK-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; CHECK-NEXT: vmovss %xmm0, %xmm0, %xmm0 {%k1} {z}
+; CHECK-NEXT: cmpl $0, {{.*}}(%rip)
+; CHECK-NEXT: je .LBB0_2
+; CHECK-NEXT: # %bb.1: # %if.then
+; CHECK-NEXT: vcvtsi2ssl {{.*}}(%rip), %xmm1, %xmm1
+; CHECK-NEXT: kmovd %eax, %k1
+; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
+; CHECK-NEXT: vmovss %xmm2, %xmm0, %xmm1 {%k1}
+; CHECK-NEXT: vmovss %xmm1, {{.*}}(%rip)
+; CHECK-NEXT: .LBB0_2: # %if.end
+; CHECK-NEXT: popq %rax
+; CHECK-NEXT: .cfi_def_cfa_offset 8
+; CHECK-NEXT: retq
+entry:
+ %call = tail call zeroext i1 @_Z1av()
+ %cond = select i1 %call, float 7.500000e-01, float 0.000000e+00
+ %0 = load i32, i32* @c, align 4
+ %tobool2 = icmp eq i32 %0, 0
+ br i1 %tobool2, label %if.end, label %if.then
+
+if.then: ; preds = %entry
+ %1 = load i32, i32* @b, align 4
+ %2 = sitofp i32 %1 to float
+ %conv5 = select i1 %call, float 0.000000e+00, float %2
+ store float %conv5, float* @d, align 4
+ br label %if.end
+
+if.end: ; preds = %entry, %if.then
+ ret float %cond
+}
+
+declare zeroext i1 @_Z1av()
--
2.19.0
|