1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -o - %s | FileCheck %s
3; RUN: llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -o - %s | FileCheck %s
4
5; Check for optimizing the passed implicit workitem ID based on the
6; required group size. This should avoid a few bit packing operations.
7
8declare hidden void @callee() #0
9
10define amdgpu_kernel void @known_x_0(i32 addrspace(1)* %out) !reqd_work_group_size !0 {
11; CHECK-LABEL: known_x_0:
12; CHECK:       ; %bb.0:
13; CHECK-NEXT:    s_add_u32 flat_scratch_lo, s6, s9
14; CHECK-NEXT:    s_addc_u32 flat_scratch_hi, s7, 0
15; CHECK-NEXT:    s_add_u32 s0, s0, s9
16; CHECK-NEXT:    v_lshlrev_b32_e32 v0, 20, v2
17; CHECK-NEXT:    s_addc_u32 s1, s1, 0
18; CHECK-NEXT:    v_lshl_or_b32 v31, v1, 10, v0
19; CHECK-NEXT:    s_mov_b32 s32, 0
20; CHECK-NEXT:    s_getpc_b64 s[4:5]
21; CHECK-NEXT:    s_add_u32 s4, s4, callee@rel32@lo+4
22; CHECK-NEXT:    s_addc_u32 s5, s5, callee@rel32@hi+12
23; CHECK-NEXT:    s_swappc_b64 s[30:31], s[4:5]
24; CHECK-NEXT:    s_endpgm
25  call void @callee()
26  ret void
27}
28; CHECK: .amdhsa_system_vgpr_workitem_id 2
29
30define amdgpu_kernel void @known_y_0(i32 addrspace(1)* %out) !reqd_work_group_size !1 {
31; CHECK-LABEL: known_y_0:
32; CHECK:       ; %bb.0:
33; CHECK-NEXT:    s_add_u32 flat_scratch_lo, s6, s9
34; CHECK-NEXT:    s_addc_u32 flat_scratch_hi, s7, 0
35; CHECK-NEXT:    s_add_u32 s0, s0, s9
36; CHECK-NEXT:    s_addc_u32 s1, s1, 0
37; CHECK-NEXT:    v_lshl_or_b32 v31, v2, 20, v0
38; CHECK-NEXT:    s_mov_b32 s32, 0
39; CHECK-NEXT:    s_getpc_b64 s[4:5]
40; CHECK-NEXT:    s_add_u32 s4, s4, callee@rel32@lo+4
41; CHECK-NEXT:    s_addc_u32 s5, s5, callee@rel32@hi+12
42; CHECK-NEXT:    s_swappc_b64 s[30:31], s[4:5]
43; CHECK-NEXT:    s_endpgm
44  call void @callee()
45  ret void
46}
47; CHECK: .amdhsa_system_vgpr_workitem_id 2
48
49define amdgpu_kernel void @known_z_0(i32 addrspace(1)* %out) !reqd_work_group_size !2 {
50; CHECK-LABEL: known_z_0:
51; CHECK:       ; %bb.0:
52; CHECK-NEXT:    s_add_u32 flat_scratch_lo, s6, s9
53; CHECK-NEXT:    s_addc_u32 flat_scratch_hi, s7, 0
54; CHECK-NEXT:    s_add_u32 s0, s0, s9
55; CHECK-NEXT:    s_addc_u32 s1, s1, 0
56; CHECK-NEXT:    v_lshl_or_b32 v31, v1, 10, v0
57; CHECK-NEXT:    s_mov_b32 s32, 0
58; CHECK-NEXT:    s_getpc_b64 s[4:5]
59; CHECK-NEXT:    s_add_u32 s4, s4, callee@rel32@lo+4
60; CHECK-NEXT:    s_addc_u32 s5, s5, callee@rel32@hi+12
61; CHECK-NEXT:    s_swappc_b64 s[30:31], s[4:5]
62; CHECK-NEXT:    s_endpgm
63  call void @callee()
64  ret void
65}
66; CHECK: .amdhsa_system_vgpr_workitem_id 1
67
68define amdgpu_kernel void @known_yz_0(i32 addrspace(1)* %out) !reqd_work_group_size !3 {
69; CHECK-LABEL: known_yz_0:
70; CHECK:       ; %bb.0:
71; CHECK-NEXT:    s_add_u32 flat_scratch_lo, s6, s9
72; CHECK-NEXT:    s_addc_u32 flat_scratch_hi, s7, 0
73; CHECK-NEXT:    s_add_u32 s0, s0, s9
74; CHECK-NEXT:    s_addc_u32 s1, s1, 0
75; CHECK-NEXT:    v_mov_b32_e32 v31, v0
76; CHECK-NEXT:    s_mov_b32 s32, 0
77; CHECK-NEXT:    s_getpc_b64 s[4:5]
78; CHECK-NEXT:    s_add_u32 s4, s4, callee@rel32@lo+4
79; CHECK-NEXT:    s_addc_u32 s5, s5, callee@rel32@hi+12
80; CHECK-NEXT:    s_swappc_b64 s[30:31], s[4:5]
81; CHECK-NEXT:    s_endpgm
82  call void @callee()
83  ret void
84}
85; CHECK: .amdhsa_system_vgpr_workitem_id 0
86
87define amdgpu_kernel void @known_xz_0(i32 addrspace(1)* %out) !reqd_work_group_size !4 {
88; CHECK-LABEL: known_xz_0:
89; CHECK:       ; %bb.0:
90; CHECK-NEXT:    s_add_u32 flat_scratch_lo, s6, s9
91; CHECK-NEXT:    s_addc_u32 flat_scratch_hi, s7, 0
92; CHECK-NEXT:    s_add_u32 s0, s0, s9
93; CHECK-NEXT:    s_addc_u32 s1, s1, 0
94; CHECK-NEXT:    v_lshlrev_b32_e32 v31, 10, v1
95; CHECK-NEXT:    s_mov_b32 s32, 0
96; CHECK-NEXT:    s_getpc_b64 s[4:5]
97; CHECK-NEXT:    s_add_u32 s4, s4, callee@rel32@lo+4
98; CHECK-NEXT:    s_addc_u32 s5, s5, callee@rel32@hi+12
99; CHECK-NEXT:    s_swappc_b64 s[30:31], s[4:5]
100; CHECK-NEXT:    s_endpgm
101  call void @callee()
102  ret void
103}
104; CHECK: .amdhsa_system_vgpr_workitem_id 1
105
106
107define amdgpu_kernel void @known_xyz_0(i32 addrspace(1)* %out) !reqd_work_group_size !5 {
108; CHECK-LABEL: known_xyz_0:
109; CHECK:       ; %bb.0:
110; CHECK-NEXT:    s_add_u32 flat_scratch_lo, s6, s9
111; CHECK-NEXT:    s_addc_u32 flat_scratch_hi, s7, 0
112; CHECK-NEXT:    s_add_u32 s0, s0, s9
113; CHECK-NEXT:    s_addc_u32 s1, s1, 0
114; CHECK-NEXT:    v_mov_b32_e32 v31, 0
115; CHECK-NEXT:    s_mov_b32 s32, 0
116; CHECK-NEXT:    s_getpc_b64 s[4:5]
117; CHECK-NEXT:    s_add_u32 s4, s4, callee@rel32@lo+4
118; CHECK-NEXT:    s_addc_u32 s5, s5, callee@rel32@hi+12
119; CHECK-NEXT:    s_swappc_b64 s[30:31], s[4:5]
120; CHECK-NEXT:    s_endpgm
121  call void @callee()
122  ret void
123}
124; CHECK: .amdhsa_system_vgpr_workitem_id 0
125
126attributes #0 = { "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" }
127
128!0 = !{i32 1, i32 64, i32 64}
129!1 = !{i32 64, i32 1, i32 64}
130!2 = !{i32 64, i32 64, i32 1}
131!3 = !{i32 64, i32 1, i32 1}
132!4 = !{i32 1, i32 64, i32 1}
133!5 = !{i32 1, i32 1, i32 1}
134