aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/test/CodeGen/AMDGPU/GlobalISel/xnor.ll')
-rw-r--r--llvm/test/CodeGen/AMDGPU/GlobalISel/xnor.ll117
1 files changed, 117 insertions, 0 deletions
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/xnor.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/xnor.ll
index b201066cb7a4..b9237bc5babe 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/xnor.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/xnor.ll
@@ -3,12 +3,18 @@
; RUN: llc -global-isel -march=amdgcn -mcpu=gfx801 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX8 %s
; RUN: llc -global-isel -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX900 %s
; RUN: llc -global-isel -march=amdgcn -mcpu=gfx906 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX906 %s
+; RUN: llc -global-isel -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX10 %s
define amdgpu_ps i32 @scalar_xnor_i32_one_use(i32 inreg %a, i32 inreg %b) {
; GCN-LABEL: scalar_xnor_i32_one_use:
; GCN: ; %bb.0: ; %entry
; GCN-NEXT: s_xnor_b32 s0, s0, s1
; GCN-NEXT: ; return to shader part epilog
+;
+; GFX10-LABEL: scalar_xnor_i32_one_use:
+; GFX10: ; %bb.0: ; %entry
+; GFX10-NEXT: s_xnor_b32 s0, s0, s1
+; GFX10-NEXT: ; return to shader part epilog
entry:
%xor = xor i32 %a, %b
%r0.val = xor i32 %xor, -1
@@ -47,6 +53,12 @@ define amdgpu_ps i32 @scalar_xnor_v2i16_one_use(<2 x i16> inreg %a, <2 x i16> in
; GFX906-NEXT: s_xor_b32 s0, s0, s1
; GFX906-NEXT: s_xor_b32 s0, s0, -1
; GFX906-NEXT: ; return to shader part epilog
+;
+; GFX10-LABEL: scalar_xnor_v2i16_one_use:
+; GFX10: ; %bb.0: ; %entry
+; GFX10-NEXT: s_xor_b32 s0, s0, s1
+; GFX10-NEXT: s_xor_b32 s0, s0, -1
+; GFX10-NEXT: ; return to shader part epilog
entry:
%xor = xor <2 x i16> %a, %b
%r0.val = xor <2 x i16> %xor, <i16 -1, i16 -1>
@@ -62,6 +74,14 @@ define amdgpu_ps <2 x i32> @scalar_xnor_i32_mul_use(i32 inreg %a, i32 inreg %b)
; GCN-NEXT: s_add_i32 s1, s1, s0
; GCN-NEXT: s_mov_b32 s0, s2
; GCN-NEXT: ; return to shader part epilog
+;
+; GFX10-LABEL: scalar_xnor_i32_mul_use:
+; GFX10: ; %bb.0: ; %entry
+; GFX10-NEXT: s_xor_b32 s1, s0, s1
+; GFX10-NEXT: s_not_b32 s2, s1
+; GFX10-NEXT: s_add_i32 s1, s1, s0
+; GFX10-NEXT: s_mov_b32 s0, s2
+; GFX10-NEXT: ; return to shader part epilog
entry:
%xor = xor i32 %a, %b
%r0.val = xor i32 %xor, -1
@@ -76,6 +96,11 @@ define amdgpu_ps i64 @scalar_xnor_i64_one_use(i64 inreg %a, i64 inreg %b) {
; GCN: ; %bb.0:
; GCN-NEXT: s_xnor_b64 s[0:1], s[0:1], s[2:3]
; GCN-NEXT: ; return to shader part epilog
+;
+; GFX10-LABEL: scalar_xnor_i64_one_use:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: s_xnor_b64 s[0:1], s[0:1], s[2:3]
+; GFX10-NEXT: ; return to shader part epilog
%xor = xor i64 %a, %b
%r0.val = xor i64 %xor, -1
ret i64 %r0.val
@@ -138,6 +163,14 @@ define amdgpu_ps i64 @scalar_xnor_v4i16_one_use(<4 x i16> inreg %a, <4 x i16> in
; GFX906-NEXT: s_xor_b64 s[0:1], s[0:1], s[2:3]
; GFX906-NEXT: s_xor_b64 s[0:1], s[0:1], s[4:5]
; GFX906-NEXT: ; return to shader part epilog
+;
+; GFX10-LABEL: scalar_xnor_v4i16_one_use:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: s_mov_b32 s4, -1
+; GFX10-NEXT: s_xor_b64 s[0:1], s[0:1], s[2:3]
+; GFX10-NEXT: s_mov_b32 s5, s4
+; GFX10-NEXT: s_xor_b64 s[0:1], s[0:1], s[4:5]
+; GFX10-NEXT: ; return to shader part epilog
%xor = xor <4 x i16> %a, %b
%ret = xor <4 x i16> %xor, <i16 -1, i16 -1, i16 -1, i16 -1>
%cast = bitcast <4 x i16> %ret to i64
@@ -157,6 +190,19 @@ define amdgpu_ps <2 x i64> @scalar_xnor_i64_mul_use(i64 inreg %a, i64 inreg %b)
; GCN-NEXT: s_mov_b32 s0, s4
; GCN-NEXT: s_mov_b32 s1, s5
; GCN-NEXT: ; return to shader part epilog
+;
+; GFX10-LABEL: scalar_xnor_i64_mul_use:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: s_xor_b64 s[2:3], s[0:1], s[2:3]
+; GFX10-NEXT: s_not_b64 s[4:5], s[2:3]
+; GFX10-NEXT: s_add_u32 s2, s2, s0
+; GFX10-NEXT: s_cselect_b32 s0, 1, 0
+; GFX10-NEXT: s_and_b32 s0, s0, 1
+; GFX10-NEXT: s_cmp_lg_u32 s0, 0
+; GFX10-NEXT: s_mov_b32 s0, s4
+; GFX10-NEXT: s_addc_u32 s3, s3, s1
+; GFX10-NEXT: s_mov_b32 s1, s5
+; GFX10-NEXT: ; return to shader part epilog
%xor = xor i64 %a, %b
%r0.val = xor i64 %xor, -1
%r1.val = add i64 %xor, %a
@@ -192,6 +238,13 @@ define i32 @vector_xnor_i32_one_use(i32 %a, i32 %b) {
; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX906-NEXT: v_xnor_b32_e32 v0, v0, v1
; GFX906-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-LABEL: vector_xnor_i32_one_use:
+; GFX10: ; %bb.0: ; %entry
+; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
+; GFX10-NEXT: v_xor3_b32 v0, v0, v1, -1
+; GFX10-NEXT: s_setpc_b64 s[30:31]
entry:
%xor = xor i32 %a, %b
%r = xor i32 %xor, -1
@@ -207,6 +260,16 @@ define i64 @vector_xnor_i64_one_use(i64 %a, i64 %b) {
; GCN-NEXT: v_xor_b32_e32 v0, -1, v0
; GCN-NEXT: v_xor_b32_e32 v1, -1, v1
; GCN-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-LABEL: vector_xnor_i64_one_use:
+; GFX10: ; %bb.0: ; %entry
+; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
+; GFX10-NEXT: v_xor_b32_e32 v0, v0, v2
+; GFX10-NEXT: v_xor_b32_e32 v1, v1, v3
+; GFX10-NEXT: v_xor_b32_e32 v0, -1, v0
+; GFX10-NEXT: v_xor_b32_e32 v1, -1, v1
+; GFX10-NEXT: s_setpc_b64 s[30:31]
entry:
%xor = xor i64 %a, %b
%r = xor i64 %xor, -1
@@ -236,6 +299,11 @@ define amdgpu_ps float @xnor_s_v_i32_one_use(i32 inreg %s, i32 %v) {
; GFX906: ; %bb.0:
; GFX906-NEXT: v_xnor_b32_e32 v0, s0, v0
; GFX906-NEXT: ; return to shader part epilog
+;
+; GFX10-LABEL: xnor_s_v_i32_one_use:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: v_xor3_b32 v0, s0, v0, -1
+; GFX10-NEXT: ; return to shader part epilog
%xor = xor i32 %s, %v
%d = xor i32 %xor, -1
%cast = bitcast i32 %d to float
@@ -265,6 +333,11 @@ define amdgpu_ps float @xnor_v_s_i32_one_use(i32 inreg %s, i32 %v) {
; GFX906: ; %bb.0:
; GFX906-NEXT: v_xnor_b32_e64 v0, v0, s0
; GFX906-NEXT: ; return to shader part epilog
+;
+; GFX10-LABEL: xnor_v_s_i32_one_use:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: v_xor3_b32 v0, v0, s0, -1
+; GFX10-NEXT: ; return to shader part epilog
%xor = xor i32 %v, %s
%d = xor i32 %xor, -1
%cast = bitcast i32 %d to float
@@ -307,6 +380,15 @@ define amdgpu_ps <2 x float> @xnor_i64_s_v_one_use(i64 inreg %a, i64 %b64) {
; GFX906-NEXT: v_xor_b32_e32 v0, -1, v0
; GFX906-NEXT: v_xor_b32_e32 v1, -1, v1
; GFX906-NEXT: ; return to shader part epilog
+;
+; GFX10-LABEL: xnor_i64_s_v_one_use:
+; GFX10: ; %bb.0: ; %entry
+; GFX10-NEXT: v_lshlrev_b64 v[0:1], 29, v[0:1]
+; GFX10-NEXT: v_xor_b32_e32 v0, s0, v0
+; GFX10-NEXT: v_xor_b32_e32 v1, s1, v1
+; GFX10-NEXT: v_xor_b32_e32 v0, -1, v0
+; GFX10-NEXT: v_xor_b32_e32 v1, -1, v1
+; GFX10-NEXT: ; return to shader part epilog
entry:
%b = shl i64 %b64, 29
%xor = xor i64 %a, %b
@@ -351,6 +433,15 @@ define amdgpu_ps <2 x float> @xnor_i64_v_s_one_use(i64 inreg %a, i64 %b64) {
; GFX906-NEXT: v_xor_b32_e32 v0, -1, v0
; GFX906-NEXT: v_xor_b32_e32 v1, -1, v1
; GFX906-NEXT: ; return to shader part epilog
+;
+; GFX10-LABEL: xnor_i64_v_s_one_use:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: v_lshlrev_b64 v[0:1], 29, v[0:1]
+; GFX10-NEXT: v_xor_b32_e32 v0, s0, v0
+; GFX10-NEXT: v_xor_b32_e32 v1, s1, v1
+; GFX10-NEXT: v_xor_b32_e32 v0, -1, v0
+; GFX10-NEXT: v_xor_b32_e32 v1, -1, v1
+; GFX10-NEXT: ; return to shader part epilog
%b = shl i64 %b64, 29
%xor = xor i64 %b, %a
%r0.val = xor i64 %xor, -1
@@ -385,6 +476,13 @@ define i32 @vector_xor_na_b_i32_one_use(i32 %a, i32 %b) {
; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX906-NEXT: v_xnor_b32_e32 v0, v0, v1
; GFX906-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-LABEL: vector_xor_na_b_i32_one_use:
+; GFX10: ; %bb.0: ; %entry
+; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
+; GFX10-NEXT: v_xor3_b32 v0, v0, -1, v1
+; GFX10-NEXT: s_setpc_b64 s[30:31]
entry:
%na = xor i32 %a, -1
%r = xor i32 %na, %b
@@ -418,6 +516,13 @@ define i32 @vector_xor_a_nb_i32_one_use(i32 %a, i32 %b) {
; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX906-NEXT: v_xnor_b32_e32 v0, v1, v0
; GFX906-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-LABEL: vector_xor_a_nb_i32_one_use:
+; GFX10: ; %bb.0: ; %entry
+; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
+; GFX10-NEXT: v_xor3_b32 v0, v1, -1, v0
+; GFX10-NEXT: s_setpc_b64 s[30:31]
entry:
%nb = xor i32 %b, -1
%r = xor i32 %a, %nb
@@ -430,6 +535,12 @@ define amdgpu_ps <2 x i32> @scalar_xor_a_nb_i64_one_use(i64 inreg %a, i64 inreg
; GCN-NEXT: s_not_b64 s[2:3], s[2:3]
; GCN-NEXT: s_xor_b64 s[0:1], s[0:1], s[2:3]
; GCN-NEXT: ; return to shader part epilog
+;
+; GFX10-LABEL: scalar_xor_a_nb_i64_one_use:
+; GFX10: ; %bb.0: ; %entry
+; GFX10-NEXT: s_not_b64 s[2:3], s[2:3]
+; GFX10-NEXT: s_xor_b64 s[0:1], s[0:1], s[2:3]
+; GFX10-NEXT: ; return to shader part epilog
entry:
%nb = xor i64 %b, -1
%r0.val = xor i64 %a, %nb
@@ -443,6 +554,12 @@ define amdgpu_ps <2 x i32> @scalar_xor_na_b_i64_one_use(i64 inreg %a, i64 inreg
; GCN-NEXT: s_not_b64 s[0:1], s[0:1]
; GCN-NEXT: s_xor_b64 s[0:1], s[0:1], s[2:3]
; GCN-NEXT: ; return to shader part epilog
+;
+; GFX10-LABEL: scalar_xor_na_b_i64_one_use:
+; GFX10: ; %bb.0: ; %entry
+; GFX10-NEXT: s_not_b64 s[0:1], s[0:1]
+; GFX10-NEXT: s_xor_b64 s[0:1], s[0:1], s[2:3]
+; GFX10-NEXT: ; return to shader part epilog
entry:
%na = xor i64 %a, -1
%r0.val = xor i64 %na, %b