1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
3; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -early-live-intervals -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
4
5define amdgpu_kernel void @set_inactive(i32 addrspace(1)* %out, i32 %in) {
6; GCN-LABEL: set_inactive:
7; GCN:       ; %bb.0:
8; GCN-NEXT:    s_load_dword s4, s[0:1], 0x2c
9; GCN-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
10; GCN-NEXT:    s_mov_b32 s3, 0xf000
11; GCN-NEXT:    s_mov_b32 s2, -1
12; GCN-NEXT:    s_waitcnt lgkmcnt(0)
13; GCN-NEXT:    v_mov_b32_e32 v0, s4
14; GCN-NEXT:    s_not_b64 exec, exec
15; GCN-NEXT:    v_mov_b32_e32 v0, 42
16; GCN-NEXT:    s_not_b64 exec, exec
17; GCN-NEXT:    buffer_store_dword v0, off, s[0:3], 0
18; GCN-NEXT:    s_endpgm
19  %tmp = call i32 @llvm.amdgcn.set.inactive.i32(i32 %in, i32 42) #0
20  store i32 %tmp, i32 addrspace(1)* %out
21  ret void
22}
23
24define amdgpu_kernel void @set_inactive_64(i64 addrspace(1)* %out, i64 %in) {
25; GCN-LABEL: set_inactive_64:
26; GCN:       ; %bb.0:
27; GCN-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x24
28; GCN-NEXT:    s_mov_b32 s7, 0xf000
29; GCN-NEXT:    s_mov_b32 s6, -1
30; GCN-NEXT:    s_waitcnt lgkmcnt(0)
31; GCN-NEXT:    v_mov_b32_e32 v0, s2
32; GCN-NEXT:    s_mov_b32 s4, s0
33; GCN-NEXT:    s_mov_b32 s5, s1
34; GCN-NEXT:    v_mov_b32_e32 v1, s3
35; GCN-NEXT:    s_not_b64 exec, exec
36; GCN-NEXT:    v_mov_b32_e32 v0, 0
37; GCN-NEXT:    v_mov_b32_e32 v1, 0
38; GCN-NEXT:    s_not_b64 exec, exec
39; GCN-NEXT:    buffer_store_dwordx2 v[0:1], off, s[4:7], 0
40; GCN-NEXT:    s_endpgm
41  %tmp = call i64 @llvm.amdgcn.set.inactive.i64(i64 %in, i64 0) #0
42  store i64 %tmp, i64 addrspace(1)* %out
43  ret void
44}
45
46define amdgpu_kernel void @set_inactive_scc(i32 addrspace(1)* %out, i32 %in, <4 x i32> inreg %desc) {
47; GCN-LABEL: set_inactive_scc:
48; GCN:       ; %bb.0:
49; GCN-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x34
50; GCN-NEXT:    s_load_dword s2, s[0:1], 0x2c
51; GCN-NEXT:    s_waitcnt lgkmcnt(0)
52; GCN-NEXT:    s_buffer_load_dword s3, s[4:7], 0x0
53; GCN-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
54; GCN-NEXT:    v_mov_b32_e32 v0, s2
55; GCN-NEXT:    s_not_b64 exec, exec
56; GCN-NEXT:    v_mov_b32_e32 v0, 42
57; GCN-NEXT:    s_not_b64 exec, exec
58; GCN-NEXT:    s_waitcnt lgkmcnt(0)
59; GCN-NEXT:    s_cmp_lg_u32 s3, 56
60; GCN-NEXT:    s_mov_b64 s[2:3], -1
61; GCN-NEXT:    s_cbranch_scc1 .LBB2_3
62; GCN-NEXT:  ; %bb.1: ; %Flow
63; GCN-NEXT:    s_andn2_b64 vcc, exec, s[2:3]
64; GCN-NEXT:    s_cbranch_vccz .LBB2_4
65; GCN-NEXT:  .LBB2_2: ; %.exit
66; GCN-NEXT:    s_endpgm
67; GCN-NEXT:  .LBB2_3: ; %.one
68; GCN-NEXT:    v_add_u32_e32 v1, vcc, 1, v0
69; GCN-NEXT:    s_mov_b32 s3, 0xf000
70; GCN-NEXT:    s_mov_b32 s2, -1
71; GCN-NEXT:    buffer_store_dword v1, off, s[0:3], 0
72; GCN-NEXT:    s_mov_b64 s[2:3], 0
73; GCN-NEXT:    s_cbranch_execnz .LBB2_2
74; GCN-NEXT:  .LBB2_4: ; %.zero
75; GCN-NEXT:    s_mov_b32 s3, 0xf000
76; GCN-NEXT:    s_mov_b32 s2, -1
77; GCN-NEXT:    buffer_store_dword v0, off, s[0:3], 0
78; GCN-NEXT:    s_endpgm
79  %val = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 0, i32 0)
80  %cmp = icmp eq i32 %val, 56
81  %tmp = call i32 @llvm.amdgcn.set.inactive.i32(i32 %in, i32 42) #0
82  br i1 %cmp, label %.zero, label %.one
83
84.zero:
85  store i32 %tmp, i32 addrspace(1)* %out
86  br label %.exit
87
88.one:
89  %tmp.1 = add i32 %tmp, 1
90  store i32 %tmp.1, i32 addrspace(1)* %out
91  br label %.exit
92
93.exit:
94  ret void
95}
96
97declare i32 @llvm.amdgcn.set.inactive.i32(i32, i32) #0
98declare i64 @llvm.amdgcn.set.inactive.i64(i64, i64) #0
99declare i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32>, i32, i32)
100
101attributes #0 = { convergent readnone }
102