1;RUN: llc < %s -march=amdgcn -mcpu=verde -verify-machineinstrs | FileCheck --check-prefix=GCN %s
2;RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck --check-prefixes=GCN,VI %s
3
4; GCN-LABEL: {{^}}v_interp:
5; GCN-NOT: s_wqm
6; GCN: s_mov_b32 m0, s{{[0-9]+}}
7; GCN-DAG: v_interp_p1_f32 v{{[0-9]+}}, v{{[0-9]+}}, attr0.x{{$}}
8; GCN-DAG: v_interp_p1_f32 v{{[0-9]+}}, v{{[0-9]+}}, attr0.y{{$}}
9; GCN-DAG: v_interp_p2_f32 v{{[0-9]+}}, v{{[0-9]+}}, attr0.y{{$}}
10; GCN-DAG: v_interp_mov_f32 v{{[0-9]+}}, p0, attr0.x{{$}}
11define amdgpu_ps void @v_interp(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, i32 inreg, <2 x float>) {
12main_body:
13  %i = extractelement <2 x float> %4, i32 0
14  %j = extractelement <2 x float> %4, i32 1
15  %p0_0 = call float @llvm.amdgcn.interp.p1(float %i, i32 0, i32 0, i32 %3)
16  %p1_0 = call float @llvm.amdgcn.interp.p2(float %p0_0, float %j, i32 0, i32 0, i32 %3)
17  %p0_1 = call float @llvm.amdgcn.interp.p1(float %i, i32 1, i32 0, i32 %3)
18  %p1_1 = call float @llvm.amdgcn.interp.p2(float %p0_1, float %j, i32 1, i32 0, i32 %3)
19  %const = call float @llvm.amdgcn.interp.mov(i32 2, i32 0, i32 0, i32 %3)
20  %w = fadd float %p1_1, %const
21  call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %p0_0, float %p0_0, float %p1_1, float %w)
22  ret void
23}
24
25; GCN-LABEL: {{^}}v_interp_p1:
26; GCN: s_movk_i32 m0, 0x100
27; GCN-DAG: v_interp_p1_f32 v{{[0-9]+}}, v{{[0-9]+}}, attr0.x{{$}}
28; GCN-DAG: v_interp_p1_f32 v{{[0-9]+}}, v{{[0-9]+}}, attr0.y{{$}}
29; GCN-DAG: v_interp_p1_f32 v{{[0-9]+}}, v{{[0-9]+}}, attr0.z{{$}}
30; GCN-DAG: v_interp_p1_f32 v{{[0-9]+}}, v{{[0-9]+}}, attr0.w{{$}}
31; GCN-DAG: v_interp_p1_f32 v{{[0-9]+}}, v{{[0-9]+}}, attr0.x{{$}}
32
33; GCN-DAG: v_interp_p1_f32 v{{[0-9]+}}, v{{[0-9]+}}, attr1.x{{$}}
34; GCN-DAG: v_interp_p1_f32 v{{[0-9]+}}, v{{[0-9]+}}, attr2.y{{$}}
35; GCN-DAG: v_interp_p1_f32 v{{[0-9]+}}, v{{[0-9]+}}, attr3.z{{$}}
36; GCN-DAG: v_interp_p1_f32 v{{[0-9]+}}, v{{[0-9]+}}, attr4.w{{$}}
37; GCN-DAG: v_interp_p1_f32 v{{[0-9]+}}, v{{[0-9]+}}, attr63.w{{$}}
38; GCN-DAG: v_interp_p1_f32 v{{[0-9]+}}, v{{[0-9]+}}, attr64.w{{$}}
39; GCN-DAG: v_interp_p1_f32 v{{[0-9]+}}, v{{[0-9]+}}, attr64.x{{$}}
40define amdgpu_ps void @v_interp_p1(float %i) {
41  %p0_0 = call float @llvm.amdgcn.interp.p1(float %i, i32 0, i32 0, i32 256)
42  %p0_1 = call float @llvm.amdgcn.interp.p1(float %i, i32 1, i32 0, i32 256)
43  %p0_2 = call float @llvm.amdgcn.interp.p1(float %i, i32 2, i32 0, i32 256)
44  %p0_3 = call float @llvm.amdgcn.interp.p1(float %i, i32 3, i32 0, i32 256)
45  %p0_4 = call float @llvm.amdgcn.interp.p1(float %i, i32 4, i32 0, i32 256)
46  %p0_5 = call float @llvm.amdgcn.interp.p1(float %i, i32 0, i32 1, i32 256)
47  %p0_6 = call float @llvm.amdgcn.interp.p1(float %i, i32 1, i32 2, i32 256)
48  %p0_7 = call float @llvm.amdgcn.interp.p1(float %i, i32 2, i32 3, i32 256)
49  %p0_8 = call float @llvm.amdgcn.interp.p1(float %i, i32 3, i32 4, i32 256)
50  %p0_9 = call float @llvm.amdgcn.interp.p1(float %i, i32 3, i32 63, i32 256)
51  %p0_10 = call float @llvm.amdgcn.interp.p1(float %i, i32 3, i32 64, i32 256)
52  %p0_11 = call float @llvm.amdgcn.interp.p1(float %i, i32 4, i32 64, i32 256)
53
54  store volatile float %p0_0, float addrspace(1)* undef
55  store volatile float %p0_1, float addrspace(1)* undef
56  store volatile float %p0_2, float addrspace(1)* undef
57  store volatile float %p0_3, float addrspace(1)* undef
58  store volatile float %p0_4, float addrspace(1)* undef
59  store volatile float %p0_5, float addrspace(1)* undef
60  store volatile float %p0_6, float addrspace(1)* undef
61  store volatile float %p0_7, float addrspace(1)* undef
62  store volatile float %p0_8, float addrspace(1)* undef
63  store volatile float %p0_9, float addrspace(1)* undef
64  store volatile float %p0_10, float addrspace(1)* undef
65  store volatile float %p0_11, float addrspace(1)* undef
66  ret void
67}
68
69; GCN-LABEL: {{^}}v_interp_p2:
70; GCN: s_movk_i32 m0, 0x100
71; GCN-DAG: v_interp_p2_f32 v{{[0-9]+}}, v{{[0-9]+}}, attr0.x{{$}}
72; GCN-DAG: v_interp_p2_f32 v{{[0-9]+}}, v{{[0-9]+}}, attr0.y{{$}}
73; GCN-DAG: v_interp_p2_f32 v{{[0-9]+}}, v{{[0-9]+}}, attr0.z{{$}}
74; GCN-DAG: v_interp_p2_f32 v{{[0-9]+}}, v{{[0-9]+}}, attr0.w{{$}}
75; GCN-DAG: v_interp_p2_f32 v{{[0-9]+}}, v{{[0-9]+}}, attr0.x{{$}}
76; GCN-DAG: v_interp_p2_f32 v{{[0-9]+}}, v{{[0-9]+}}, attr0.x{{$}}
77; GCN-DAG: v_interp_p2_f32 v{{[0-9]+}}, v{{[0-9]+}}, attr63.x{{$}}
78; GCN-DAG: v_interp_p2_f32 v{{[0-9]+}}, v{{[0-9]+}}, attr64.x{{$}}
79; GCN-DAG: v_interp_p2_f32 v{{[0-9]+}}, v{{[0-9]+}}, attr64.x{{$}}
80define amdgpu_ps void @v_interp_p2(float %x, float %j) {
81  %p2_0 = call float @llvm.amdgcn.interp.p2(float %x, float %j, i32 0, i32 0, i32 256)
82  %p2_1 = call float @llvm.amdgcn.interp.p2(float %x, float %j, i32 1, i32 0, i32 256)
83  %p2_2 = call float @llvm.amdgcn.interp.p2(float %x, float %j, i32 2, i32 0, i32 256)
84  %p2_3 = call float @llvm.amdgcn.interp.p2(float %x, float %j, i32 3, i32 0, i32 256)
85  %p2_4 = call float @llvm.amdgcn.interp.p2(float %x, float %j, i32 4, i32 0, i32 256)
86
87  %p2_5 = call float @llvm.amdgcn.interp.p2(float %x, float %j, i32 0, i32 1, i32 256)
88  %p2_6 = call float @llvm.amdgcn.interp.p2(float %x, float %j, i32 0, i32 63, i32 256)
89  %p2_7 = call float @llvm.amdgcn.interp.p2(float %x, float %j, i32 0, i32 64, i32 256)
90  %p2_8 = call float @llvm.amdgcn.interp.p2(float %x, float %j, i32 4, i32 64, i32 256)
91
92  store volatile float %p2_0, float addrspace(1)* undef
93  store volatile float %p2_1, float addrspace(1)* undef
94  store volatile float %p2_2, float addrspace(1)* undef
95  store volatile float %p2_3, float addrspace(1)* undef
96  store volatile float %p2_4, float addrspace(1)* undef
97  store volatile float %p2_5, float addrspace(1)* undef
98  store volatile float %p2_6, float addrspace(1)* undef
99  store volatile float %p2_7, float addrspace(1)* undef
100  store volatile float %p2_8, float addrspace(1)* undef
101  ret void
102}
103
104; GCN-LABEL: {{^}}v_interp_mov:
105; GCN: s_movk_i32 m0, 0x100
106; GCN-DAG: v_interp_mov_f32 v{{[0-9]+}}, p10, attr0.x{{$}}
107; GCN-DAG: v_interp_mov_f32 v{{[0-9]+}}, p20, attr0.x{{$}}
108; GCN-DAG: v_interp_mov_f32 v{{[0-9]+}}, p0, attr0.x{{$}}
109; GCN-DAG: v_interp_mov_f32 v{{[0-9]+}}, invalid_param_3, attr0.x{{$}}
110
111; GCN-DAG: v_interp_mov_f32 v{{[0-9]+}}, p10, attr0.x{{$}}
112; GCN-DAG: v_interp_mov_f32 v{{[0-9]+}}, p10, attr0.z{{$}}
113; GCN-DAG: v_interp_mov_f32 v{{[0-9]+}}, p10, attr0.w{{$}}
114; GCN-DAG: v_interp_mov_f32 v{{[0-9]+}}, p10, attr0.x{{$}}
115; GCN-DAG: v_interp_mov_f32 v{{[0-9]+}}, invalid_param_8, attr0.x{{$}}
116
117; GCN-DAG: v_interp_mov_f32 v{{[0-9]+}}, p10, attr63.y{{$}}
118; GCN-DAG: v_interp_mov_f32 v{{[0-9]+}}, p10, attr64.y{{$}}
119; GCN-DAG: v_interp_mov_f32 v{{[0-9]+}}, invalid_param_3, attr64.y{{$}}
120; GCN-DAG: v_interp_mov_f32 v{{[0-9]+}}, invalid_param_10, attr64.x{{$}}
121define amdgpu_ps void @v_interp_mov(float %x, float %j) {
122  %mov_0 = call float @llvm.amdgcn.interp.mov(i32 0, i32 0, i32 0, i32 256)
123  %mov_1 = call float @llvm.amdgcn.interp.mov(i32 1, i32 0, i32 0, i32 256)
124  %mov_2 = call float @llvm.amdgcn.interp.mov(i32 2, i32 0, i32 0, i32 256)
125  %mov_3 = call float @llvm.amdgcn.interp.mov(i32 3, i32 0, i32 0, i32 256)
126
127  %mov_4 = call float @llvm.amdgcn.interp.mov(i32 0, i32 1, i32 0, i32 256)
128  %mov_5 = call float @llvm.amdgcn.interp.mov(i32 0, i32 2, i32 0, i32 256)
129  %mov_6 = call float @llvm.amdgcn.interp.mov(i32 0, i32 3, i32 0, i32 256)
130  %mov_7 = call float @llvm.amdgcn.interp.mov(i32 0, i32 4, i32 0, i32 256)
131  %mov_8 = call float @llvm.amdgcn.interp.mov(i32 8, i32 4, i32 0, i32 256)
132
133  %mov_9 = call float @llvm.amdgcn.interp.mov(i32 0, i32 1, i32 63, i32 256)
134  %mov_10 = call float @llvm.amdgcn.interp.mov(i32 0, i32 1, i32 64, i32 256)
135  %mov_11 = call float @llvm.amdgcn.interp.mov(i32 3, i32 1, i32 64, i32 256)
136  %mov_12 = call float @llvm.amdgcn.interp.mov(i32 10, i32 4, i32 64, i32 256)
137
138  store volatile float %mov_0, float addrspace(1)* undef
139  store volatile float %mov_1, float addrspace(1)* undef
140  store volatile float %mov_2, float addrspace(1)* undef
141  store volatile float %mov_3, float addrspace(1)* undef
142
143  store volatile float %mov_4, float addrspace(1)* undef
144  store volatile float %mov_5, float addrspace(1)* undef
145  store volatile float %mov_6, float addrspace(1)* undef
146  store volatile float %mov_7, float addrspace(1)* undef
147  store volatile float %mov_8, float addrspace(1)* undef
148
149  store volatile float %mov_9, float addrspace(1)* undef
150  store volatile float %mov_10, float addrspace(1)* undef
151  store volatile float %mov_11, float addrspace(1)* undef
152  store volatile float %mov_12, float addrspace(1)* undef
153  ret void
154}
155
156; SI won't merge ds memory operations, because of the signed offset bug, so
157; we only have check lines for VI.
158; VI-LABEL: v_interp_readnone:
159; VI: s_mov_b32 m0, 0
160; VI-DAG: v_mov_b32_e32 [[ZERO:v[0-9]+]], 0
161; VI-DAG: v_interp_mov_f32 v{{[0-9]+}}, p0, attr0.x{{$}}
162; VI: s_mov_b32 m0, -1{{$}}
163; VI: ds_write2_b32 v{{[0-9]+}}, [[ZERO]], [[ZERO]] offset1:4
164define amdgpu_ps void @v_interp_readnone(float addrspace(3)* %lds) {
165  store float 0.0, float addrspace(3)* %lds
166  %tmp1 = call float @llvm.amdgcn.interp.mov(i32 2, i32 0, i32 0, i32 0)
167  %tmp2 = getelementptr float, float addrspace(3)* %lds, i32 4
168  store float 0.0, float addrspace(3)* %tmp2
169  call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %tmp1, float %tmp1, float %tmp1, float %tmp1)
170  ret void
171}
172
173; Function Attrs: nounwind readnone
174declare float @llvm.amdgcn.interp.p1(float, i32, i32, i32) #0
175
176; Function Attrs: nounwind readnone
177declare float @llvm.amdgcn.interp.p2(float, float, i32, i32, i32) #0
178
179declare float @llvm.amdgcn.interp.mov(i32, i32, i32, i32) #0
180
181declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
182
183attributes #0 = { nounwind readnone }
184