1; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -strict-whitespace -check-prefixes=GCN,PREGFX11 %s
2; RUN: llc -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -strict-whitespace -check-prefixes=GCN,GFX10,PREGFX11 %s
3; RUN: llc -march=amdgcn -mcpu=gfx1100 -amdgpu-enable-vopd=0 -verify-machineinstrs < %s | FileCheck -strict-whitespace -check-prefixes=GCN,GFX11 %s
4
5declare void @llvm.amdgcn.exp.f32(i32, i32, float, float, float, float, i1, i1) #1
6declare void @llvm.amdgcn.exp.i32(i32, i32, i32, i32, i32, i32, i1, i1) #1
7declare float @llvm.amdgcn.raw.buffer.load.f32(<4 x i32>, i32, i32, i32) #2
8
9; GCN-LABEL: {{^}}test_export_zeroes_f32:
10; GCN: exp mrt0 off, off, off, off{{$}}
11; GCN: exp mrt0 off, off, off, off done{{$}}
12define amdgpu_kernel void @test_export_zeroes_f32() #0 {
13
14  call void @llvm.amdgcn.exp.f32(i32 0, i32 0, float 0.0, float 0.0, float 0.0, float 0.0, i1 false, i1 false)
15  call void @llvm.amdgcn.exp.f32(i32 0, i32 0, float 0.0, float 0.0, float 0.0, float 0.0, i1 true, i1 false)
16  ret void
17}
18
19; FIXME: Should not set up registers for the unused source registers.
20
21; GCN-LABEL: {{^}}test_export_en_src0_f32:
22; GCN-DAG: v_mov_b32_e32 [[SRC0:v[0-9]+]], 1.0
23; GCN-DAG: v_mov_b32_e32 [[SRC1:v[0-9]+]], 2.0
24; GCN-DAG: v_mov_b32_e32 [[SRC2:v[0-9]+]], 0.5
25; GCN-DAG: v_mov_b32_e32 [[SRC3:v[0-9]+]], 4.0
26; GCN: exp mrt0 [[SRC0]], off, off, off done{{$}}
27define amdgpu_kernel void @test_export_en_src0_f32() #0 {
28  call void @llvm.amdgcn.exp.f32(i32 0, i32 1, float 1.0, float 2.0, float 0.5, float 4.0, i1 true, i1 false)
29  ret void
30}
31
32; GCN-LABEL: {{^}}test_export_en_src1_f32:
33; GCN-DAG: v_mov_b32_e32 [[SRC0:v[0-9]+]], 1.0
34; GCN-DAG: v_mov_b32_e32 [[SRC1:v[0-9]+]], 2.0
35; GCN-DAG: v_mov_b32_e32 [[SRC2:v[0-9]+]], 0.5
36; GCN-DAG: v_mov_b32_e32 [[SRC3:v[0-9]+]], 4.0
37; GCN: exp mrt0 off, [[SRC1]], off, off done{{$}}
38define amdgpu_kernel void @test_export_en_src1_f32() #0 {
39  call void @llvm.amdgcn.exp.f32(i32 0, i32 2, float 1.0, float 2.0, float 0.5, float 4.0, i1 true, i1 false)
40  ret void
41}
42
43; GCN-LABEL: {{^}}test_export_en_src2_f32:
44; GCN-DAG: v_mov_b32_e32 [[SRC0:v[0-9]+]], 1.0
45; GCN-DAG: v_mov_b32_e32 [[SRC1:v[0-9]+]], 2.0
46; GCN-DAG: v_mov_b32_e32 [[SRC2:v[0-9]+]], 0.5
47; GCN-DAG: v_mov_b32_e32 [[SRC3:v[0-9]+]], 4.0
48; GCN: exp mrt0 off, off, [[SRC2]], off done{{$}}
49define amdgpu_kernel void @test_export_en_src2_f32() #0 {
50  call void @llvm.amdgcn.exp.f32(i32 0, i32 4, float 1.0, float 2.0, float 0.5, float 4.0, i1 true, i1 false)
51  ret void
52}
53
54; GCN-LABEL: {{^}}test_export_en_src3_f32:
55; GCN-DAG: v_mov_b32_e32 [[SRC0:v[0-9]+]], 1.0
56; GCN-DAG: v_mov_b32_e32 [[SRC1:v[0-9]+]], 2.0
57; GCN-DAG: v_mov_b32_e32 [[SRC2:v[0-9]+]], 0.5
58; GCN-DAG: v_mov_b32_e32 [[SRC3:v[0-9]+]], 4.0
59; GCN: exp mrt0 off, off, off, [[SRC3]] done{{$}}
60define amdgpu_kernel void @test_export_en_src3_f32() #0 {
61  call void @llvm.amdgcn.exp.f32(i32 0, i32 8, float 1.0, float 2.0, float 0.5, float 4.0, i1 true, i1 false)
62  ret void
63}
64
65; GCN-LABEL: {{^}}test_export_en_src0_src1_f32:
66; GCN-DAG: v_mov_b32_e32 [[SRC0:v[0-9]+]], 1.0
67; GCN-DAG: v_mov_b32_e32 [[SRC1:v[0-9]+]], 2.0
68; GCN-DAG: v_mov_b32_e32 [[SRC2:v[0-9]+]], 0.5
69; GCN-DAG: v_mov_b32_e32 [[SRC3:v[0-9]+]], 4.0
70; GCN: exp mrt0 [[SRC0]], [[SRC1]], off, off done{{$}}
71define amdgpu_kernel void @test_export_en_src0_src1_f32() #0 {
72  call void @llvm.amdgcn.exp.f32(i32 0, i32 3, float 1.0, float 2.0, float 0.5, float 4.0, i1 true, i1 false)
73  ret void
74}
75
76; GCN-LABEL: {{^}}test_export_en_src0_src2_f32:
77; GCN-DAG: v_mov_b32_e32 [[SRC0:v[0-9]+]], 1.0
78; GCN-DAG: v_mov_b32_e32 [[SRC1:v[0-9]+]], 2.0
79; GCN-DAG: v_mov_b32_e32 [[SRC2:v[0-9]+]], 0.5
80; GCN-DAG: v_mov_b32_e32 [[SRC3:v[0-9]+]], 4.0
81; GCN: exp mrt0 [[SRC0]], off, [[SRC2]], off done{{$}}
82define amdgpu_kernel void @test_export_en_src0_src2_f32() #0 {
83  call void @llvm.amdgcn.exp.f32(i32 0, i32 5, float 1.0, float 2.0, float 0.5, float 4.0, i1 true, i1 false)
84  ret void
85}
86
87; GCN-LABEL: {{^}}test_export_en_src0_src3_f32:
88; GCN-DAG: v_mov_b32_e32 [[SRC0:v[0-9]+]], 1.0
89; GCN-DAG: v_mov_b32_e32 [[SRC1:v[0-9]+]], 2.0
90; GCN-DAG: v_mov_b32_e32 [[SRC2:v[0-9]+]], 0.5
91; GCN-DAG: v_mov_b32_e32 [[SRC3:v[0-9]+]], 4.0
92; GCN: exp mrt0 [[SRC0]], off, off, [[SRC3]]{{$}}
93; GCN: exp mrt0 [[SRC0]], off, off, [[SRC3]] done{{$}}
94define amdgpu_kernel void @test_export_en_src0_src3_f32() #0 {
95  call void @llvm.amdgcn.exp.f32(i32 0, i32 9, float 1.0, float 2.0, float 0.5, float 4.0, i1 false, i1 false)
96  call void @llvm.amdgcn.exp.f32(i32 0, i32 9, float 1.0, float 2.0, float 0.5, float 4.0, i1 true, i1 false)
97  ret void
98}
99
100; GCN-LABEL: {{^}}test_export_en_src0_src1_src2_src3_f32:
101; GCN-DAG: v_mov_b32_e32 [[SRC0:v[0-9]+]], 1.0
102; GCN-DAG: v_mov_b32_e32 [[SRC1:v[0-9]+]], 2.0
103; GCN-DAG: v_mov_b32_e32 [[SRC2:v[0-9]+]], 0.5
104; GCN-DAG: v_mov_b32_e32 [[SRC3:v[0-9]+]], 4.0
105; GCN: exp mrt0 [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]]{{$}}
106; GCN: exp mrt0 [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]] done{{$}}
107define amdgpu_kernel void @test_export_en_src0_src1_src2_src3_f32() #0 {
108  call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float 1.0, float 2.0, float 0.5, float 4.0, i1 false, i1 false)
109  call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float 1.0, float 2.0, float 0.5, float 4.0, i1 true, i1 false)
110  ret void
111}
112
113; GCN-LABEL: {{^}}test_export_mrt7_f32:
114; GCN-DAG: v_mov_b32_e32 [[VHALF:v[0-9]+]], 0.5
115; GCN: exp mrt7 [[VHALF]], [[VHALF]], [[VHALF]], [[VHALF]]{{$}}
116; GCN: exp mrt7 [[VHALF]], [[VHALF]], [[VHALF]], [[VHALF]] done{{$}}
117define amdgpu_kernel void @test_export_mrt7_f32() #0 {
118  call void @llvm.amdgcn.exp.f32(i32 7, i32 15, float 0.5, float 0.5, float 0.5, float 0.5, i1 false, i1 false)
119  call void @llvm.amdgcn.exp.f32(i32 7, i32 15, float 0.5, float 0.5, float 0.5, float 0.5, i1 true, i1 false)
120  ret void
121}
122
123; GCN-LABEL: {{^}}test_export_z_f32:
124; GCN-DAG: v_mov_b32_e32 [[SRC0:v[0-9]+]], 1.0
125; GCN-DAG: v_mov_b32_e32 [[SRC1:v[0-9]+]], 2.0
126; GCN-DAG: v_mov_b32_e32 [[SRC2:v[0-9]+]], 0.5
127; GCN-DAG: v_mov_b32_e32 [[SRC3:v[0-9]+]], 4.0
128; GCN: exp mrtz [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]]{{$}}
129; GCN: exp mrtz [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]] done{{$}}
130define amdgpu_kernel void @test_export_z_f32() #0 {
131  call void @llvm.amdgcn.exp.f32(i32 8, i32 15, float 1.0, float 2.0, float 0.5, float 4.0, i1 false, i1 false)
132  call void @llvm.amdgcn.exp.f32(i32 8, i32 15, float 1.0, float 2.0, float 0.5, float 4.0, i1 true, i1 false)
133  ret void
134}
135
136; GCN-LABEL: {{^}}test_export_null_f32:
137; PREGFX11-DAG: v_mov_b32_e32 [[SRC0:v[0-9]+]], 1.0
138; PREGFX11-DAG: v_mov_b32_e32 [[SRC1:v[0-9]+]], 2.0
139; PREGFX11-DAG: v_mov_b32_e32 [[SRC2:v[0-9]+]], 0.5
140; PREGFX11-DAG: v_mov_b32_e32 [[SRC3:v[0-9]+]], 4.0
141; PREGFX11: exp null [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]]{{$}}
142; PREGFX11: exp null [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]] done{{$}}
143define amdgpu_kernel void @test_export_null_f32() #0 {
144  call void @llvm.amdgcn.exp.f32(i32 9, i32 15, float 1.0, float 2.0, float 0.5, float 4.0, i1 false, i1 false)
145  call void @llvm.amdgcn.exp.f32(i32 9, i32 15, float 1.0, float 2.0, float 0.5, float 4.0, i1 true, i1 false)
146  ret void
147}
148
149; GCN-LABEL: {{^}}test_export_reserved10_f32:
150; GCN-DAG: v_mov_b32_e32 [[SRC0:v[0-9]+]], 1.0
151; GCN-DAG: v_mov_b32_e32 [[SRC1:v[0-9]+]], 2.0
152; GCN-DAG: v_mov_b32_e32 [[SRC2:v[0-9]+]], 0.5
153; GCN-DAG: v_mov_b32_e32 [[SRC3:v[0-9]+]], 4.0
154; GCN: exp invalid_target_10 [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]]{{$}}
155; GCN: exp invalid_target_10 [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]] done{{$}}
156define amdgpu_kernel void @test_export_reserved10_f32() #0 {
157  call void @llvm.amdgcn.exp.f32(i32 10, i32 15, float 1.0, float 2.0, float 0.5, float 4.0, i1 false, i1 false)
158  call void @llvm.amdgcn.exp.f32(i32 10, i32 15, float 1.0, float 2.0, float 0.5, float 4.0, i1 true, i1 false)
159  ret void
160}
161
162; GCN-LABEL: {{^}}test_export_reserved11_f32:
163; GCN-DAG: v_mov_b32_e32 [[SRC0:v[0-9]+]], 1.0
164; GCN-DAG: v_mov_b32_e32 [[SRC1:v[0-9]+]], 2.0
165; GCN-DAG: v_mov_b32_e32 [[SRC2:v[0-9]+]], 0.5
166; GCN-DAG: v_mov_b32_e32 [[SRC3:v[0-9]+]], 4.0
167; GCN: exp invalid_target_11 [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]]{{$}}
168; GCN: exp invalid_target_11 [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]] done{{$}}
169define amdgpu_kernel void @test_export_reserved11_f32() #0 {
170  call void @llvm.amdgcn.exp.f32(i32 11, i32 15, float 1.0, float 2.0, float 0.5, float 4.0, i1 false, i1 false)
171  call void @llvm.amdgcn.exp.f32(i32 11, i32 15, float 1.0, float 2.0, float 0.5, float 4.0, i1 true, i1 false)
172  ret void
173}
174
175; GCN-LABEL: {{^}}test_export_pos0_f32:
176; GCN-DAG: v_mov_b32_e32 [[SRC0:v[0-9]+]], 1.0
177; GCN-DAG: v_mov_b32_e32 [[SRC1:v[0-9]+]], 2.0
178; GCN-DAG: v_mov_b32_e32 [[SRC2:v[0-9]+]], 0.5
179; GCN-DAG: v_mov_b32_e32 [[SRC3:v[0-9]+]], 4.0
180; GCN: exp pos0 [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]]{{$}}
181; GCN: exp pos0 [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]] done{{$}}
182define amdgpu_kernel void @test_export_pos0_f32() #0 {
183  call void @llvm.amdgcn.exp.f32(i32 12, i32 15, float 1.0, float 2.0, float 0.5, float 4.0, i1 false, i1 false)
184  call void @llvm.amdgcn.exp.f32(i32 12, i32 15, float 1.0, float 2.0, float 0.5, float 4.0, i1 true, i1 false)
185  ret void
186}
187
188; GCN-LABEL: {{^}}test_export_pos3_f32:
189; GCN-DAG: v_mov_b32_e32 [[SRC0:v[0-9]+]], 1.0
190; GCN-DAG: v_mov_b32_e32 [[SRC1:v[0-9]+]], 2.0
191; GCN-DAG: v_mov_b32_e32 [[SRC2:v[0-9]+]], 0.5
192; GCN-DAG: v_mov_b32_e32 [[SRC3:v[0-9]+]], 4.0
193; GCN: exp pos3 [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]]{{$}}
194; GCN: exp pos3 [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]] done{{$}}
195define amdgpu_kernel void @test_export_pos3_f32() #0 {
196  call void @llvm.amdgcn.exp.f32(i32 15, i32 15, float 1.0, float 2.0, float 0.5, float 4.0, i1 false, i1 false)
197  call void @llvm.amdgcn.exp.f32(i32 15, i32 15, float 1.0, float 2.0, float 0.5, float 4.0, i1 true, i1 false)
198  ret void
199}
200
201; GCN-LABEL: {{^}}test_export_param0_f32:
202; PREGFX11-DAG: v_mov_b32_e32 [[SRC0:v[0-9]+]], 1.0
203; PREGFX11-DAG: v_mov_b32_e32 [[SRC1:v[0-9]+]], 2.0
204; PREGFX11-DAG: v_mov_b32_e32 [[SRC2:v[0-9]+]], 0.5
205; PREGFX11-DAG: v_mov_b32_e32 [[SRC3:v[0-9]+]], 4.0
206; PREGFX11: exp param0 [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]]{{$}}
207; PREGFX11: exp param0 [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]] done{{$}}
208define amdgpu_kernel void @test_export_param0_f32() #0 {
209  call void @llvm.amdgcn.exp.f32(i32 32, i32 15, float 1.0, float 2.0, float 0.5, float 4.0, i1 false, i1 false)
210  call void @llvm.amdgcn.exp.f32(i32 32, i32 15, float 1.0, float 2.0, float 0.5, float 4.0, i1 true, i1 false)
211  ret void
212}
213
214; GCN-LABEL: {{^}}test_export_param31_f32:
215; PREGFX11-DAG: v_mov_b32_e32 [[SRC0:v[0-9]+]], 1.0
216; PREGFX11-DAG: v_mov_b32_e32 [[SRC1:v[0-9]+]], 2.0
217; PREGFX11-DAG: v_mov_b32_e32 [[SRC2:v[0-9]+]], 0.5
218; PREGFX11-DAG: v_mov_b32_e32 [[SRC3:v[0-9]+]], 4.0
219; PREGFX11: exp param31 [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]]{{$}}
220; PREGFX11: exp param31 [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]] done{{$}}
221define amdgpu_kernel void @test_export_param31_f32() #0 {
222  call void @llvm.amdgcn.exp.f32(i32 63, i32 15, float 1.0, float 2.0, float 0.5, float 4.0, i1 false, i1 false)
223  call void @llvm.amdgcn.exp.f32(i32 63, i32 15, float 1.0, float 2.0, float 0.5, float 4.0, i1 true, i1 false)
224  ret void
225}
226
227; GCN-LABEL: {{^}}test_export_vm_f32:
228; GCN-DAG: v_mov_b32_e32 [[SRC0:v[0-9]+]], 1.0
229; GCN-DAG: v_mov_b32_e32 [[SRC1:v[0-9]+]], 2.0
230; GCN-DAG: v_mov_b32_e32 [[SRC2:v[0-9]+]], 0.5
231; GCN-DAG: v_mov_b32_e32 [[SRC3:v[0-9]+]], 4.0
232; PREGFX11: exp mrt0 [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]] vm{{$}}
233; PREGFX11: exp mrt0 [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]] done vm{{$}}
234; GFX11: exp mrt0 [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]]{{$}}
235; GFX11: exp mrt0 [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]] done{{$}}
236define amdgpu_kernel void @test_export_vm_f32() #0 {
237  call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float 1.0, float 2.0, float 0.5, float 4.0, i1 false, i1 true)
238  call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float 1.0, float 2.0, float 0.5, float 4.0, i1 true, i1 true)
239  ret void
240}
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256; GCN-LABEL: {{^}}test_export_zeroes_i32:
257; GCN: exp mrt0 off, off, off, off{{$}}
258; GCN: exp mrt0 off, off, off, off done{{$}}
259define amdgpu_kernel void @test_export_zeroes_i32() #0 {
260
261  call void @llvm.amdgcn.exp.i32(i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i1 false, i1 false)
262  call void @llvm.amdgcn.exp.i32(i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i1 true, i1 false)
263  ret void
264}
265
266; FIXME: Should not set up registers for the unused source registers.
267
268; GCN-LABEL: {{^}}test_export_en_src0_i32:
269; GCN-DAG: v_mov_b32_e32 [[SRC0:v[0-9]+]], 1
270; GCN-DAG: v_mov_b32_e32 [[SRC1:v[0-9]+]], 2
271; GCN-DAG: v_mov_b32_e32 [[SRC2:v[0-9]+]], 5
272; GCN-DAG: v_mov_b32_e32 [[SRC3:v[0-9]+]], 4
273; GCN: exp mrt0 [[SRC0]], off, off, off done{{$}}
274define amdgpu_kernel void @test_export_en_src0_i32() #0 {
275  call void @llvm.amdgcn.exp.i32(i32 0, i32 1, i32 1, i32 2, i32 5, i32 4, i1 true, i1 false)
276  ret void
277}
278
279; GCN-LABEL: {{^}}test_export_en_src1_i32:
280; GCN-DAG: v_mov_b32_e32 [[SRC0:v[0-9]+]], 1
281; GCN-DAG: v_mov_b32_e32 [[SRC1:v[0-9]+]], 2
282; GCN-DAG: v_mov_b32_e32 [[SRC2:v[0-9]+]], 5
283; GCN-DAG: v_mov_b32_e32 [[SRC3:v[0-9]+]], 4
284; GCN: exp mrt0 off, [[SRC1]], off, off done{{$}}
285define amdgpu_kernel void @test_export_en_src1_i32() #0 {
286  call void @llvm.amdgcn.exp.i32(i32 0, i32 2, i32 1, i32 2, i32 5, i32 4, i1 true, i1 false)
287  ret void
288}
289
290; GCN-LABEL: {{^}}test_export_en_src2_i32:
291; GCN-DAG: v_mov_b32_e32 [[SRC0:v[0-9]+]], 1
292; GCN-DAG: v_mov_b32_e32 [[SRC1:v[0-9]+]], 2
293; GCN-DAG: v_mov_b32_e32 [[SRC2:v[0-9]+]], 5
294; GCN-DAG: v_mov_b32_e32 [[SRC3:v[0-9]+]], 4
295; GCN: exp mrt0 off, off, [[SRC2]], off done{{$}}
296define amdgpu_kernel void @test_export_en_src2_i32() #0 {
297  call void @llvm.amdgcn.exp.i32(i32 0, i32 4, i32 1, i32 2, i32 5, i32 4, i1 true, i1 false)
298  ret void
299}
300
301; GCN-LABEL: {{^}}test_export_en_src3_i32:
302; GCN-DAG: v_mov_b32_e32 [[SRC0:v[0-9]+]], 1
303; GCN-DAG: v_mov_b32_e32 [[SRC1:v[0-9]+]], 2
304; GCN-DAG: v_mov_b32_e32 [[SRC2:v[0-9]+]], 5
305; GCN-DAG: v_mov_b32_e32 [[SRC3:v[0-9]+]], 4
306; GCN: exp mrt0 off, off, off, [[SRC3]] done{{$}}
307define amdgpu_kernel void @test_export_en_src3_i32() #0 {
308  call void @llvm.amdgcn.exp.i32(i32 0, i32 8, i32 1, i32 2, i32 5, i32 4, i1 true, i1 false)
309  ret void
310}
311
312; GCN-LABEL: {{^}}test_export_en_src0_src1_i32:
313; GCN-DAG: v_mov_b32_e32 [[SRC0:v[0-9]+]], 1
314; GCN-DAG: v_mov_b32_e32 [[SRC1:v[0-9]+]], 2
315; GCN-DAG: v_mov_b32_e32 [[SRC2:v[0-9]+]], 5
316; GCN-DAG: v_mov_b32_e32 [[SRC3:v[0-9]+]], 4
317; GCN: exp mrt0 [[SRC0]], [[SRC1]], off, off done{{$}}
318define amdgpu_kernel void @test_export_en_src0_src1_i32() #0 {
319  call void @llvm.amdgcn.exp.i32(i32 0, i32 3, i32 1, i32 2, i32 5, i32 4, i1 true, i1 false)
320  ret void
321}
322
323; GCN-LABEL: {{^}}test_export_en_src0_src2_i32:
324; GCN-DAG: v_mov_b32_e32 [[SRC0:v[0-9]+]], 1
325; GCN-DAG: v_mov_b32_e32 [[SRC1:v[0-9]+]], 2
326; GCN-DAG: v_mov_b32_e32 [[SRC2:v[0-9]+]], 5
327; GCN-DAG: v_mov_b32_e32 [[SRC3:v[0-9]+]], 4
328; GCN: exp mrt0 [[SRC0]], off, [[SRC2]], off done{{$}}
329define amdgpu_kernel void @test_export_en_src0_src2_i32() #0 {
330  call void @llvm.amdgcn.exp.i32(i32 0, i32 5, i32 1, i32 2, i32 5, i32 4, i1 true, i1 false)
331  ret void
332}
333
334; GCN-LABEL: {{^}}test_export_en_src0_src3_i32:
335; GCN-DAG: v_mov_b32_e32 [[SRC0:v[0-9]+]], 1
336; GCN-DAG: v_mov_b32_e32 [[SRC1:v[0-9]+]], 2
337; GCN-DAG: v_mov_b32_e32 [[SRC2:v[0-9]+]], 5
338; GCN-DAG: v_mov_b32_e32 [[SRC3:v[0-9]+]], 4
339; GCN: exp mrt0 [[SRC0]], off, off, [[SRC3]]{{$}}
340; GCN: exp mrt0 [[SRC0]], off, off, [[SRC3]] done{{$}}
341define amdgpu_kernel void @test_export_en_src0_src3_i32() #0 {
342  call void @llvm.amdgcn.exp.i32(i32 0, i32 9, i32 1, i32 2, i32 5, i32 4, i1 false, i1 false)
343  call void @llvm.amdgcn.exp.i32(i32 0, i32 9, i32 1, i32 2, i32 5, i32 4, i1 true, i1 false)
344  ret void
345}
346
347; GCN-LABEL: {{^}}test_export_en_src0_src1_src2_src3_i32:
348; GCN-DAG: v_mov_b32_e32 [[SRC0:v[0-9]+]], 1
349; GCN-DAG: v_mov_b32_e32 [[SRC1:v[0-9]+]], 2
350; GCN-DAG: v_mov_b32_e32 [[SRC2:v[0-9]+]], 5
351; GCN-DAG: v_mov_b32_e32 [[SRC3:v[0-9]+]], 4
352; GCN: exp mrt0 [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]]{{$}}
353; GCN: exp mrt0 [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]] done{{$}}
354define amdgpu_kernel void @test_export_en_src0_src1_src2_src3_i32() #0 {
355  call void @llvm.amdgcn.exp.i32(i32 0, i32 15, i32 1, i32 2, i32 5, i32 4, i1 false, i1 false)
356  call void @llvm.amdgcn.exp.i32(i32 0, i32 15, i32 1, i32 2, i32 5, i32 4, i1 true, i1 false)
357  ret void
358}
359
360; GCN-LABEL: {{^}}test_export_mrt7_i32:
361; GCN-DAG: v_mov_b32_e32 [[VHALF:v[0-9]+]], 5
362; GCN: exp mrt7 [[VHALF]], [[VHALF]], [[VHALF]], [[VHALF]]{{$}}
363; GCN: exp mrt7 [[VHALF]], [[VHALF]], [[VHALF]], [[VHALF]] done{{$}}
364define amdgpu_kernel void @test_export_mrt7_i32() #0 {
365  call void @llvm.amdgcn.exp.i32(i32 7, i32 15, i32 5, i32 5, i32 5, i32 5, i1 false, i1 false)
366  call void @llvm.amdgcn.exp.i32(i32 7, i32 15, i32 5, i32 5, i32 5, i32 5, i1 true, i1 false)
367  ret void
368}
369
370; GCN-LABEL: {{^}}test_export_z_i32:
371; GCN-DAG: v_mov_b32_e32 [[SRC0:v[0-9]+]], 1
372; GCN-DAG: v_mov_b32_e32 [[SRC1:v[0-9]+]], 2
373; GCN-DAG: v_mov_b32_e32 [[SRC2:v[0-9]+]], 5
374; GCN-DAG: v_mov_b32_e32 [[SRC3:v[0-9]+]], 4
375; GCN: exp mrtz [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]]{{$}}
376; GCN: exp mrtz [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]] done{{$}}
377define amdgpu_kernel void @test_export_z_i32() #0 {
378  call void @llvm.amdgcn.exp.i32(i32 8, i32 15, i32 1, i32 2, i32 5, i32 4, i1 false, i1 false)
379  call void @llvm.amdgcn.exp.i32(i32 8, i32 15, i32 1, i32 2, i32 5, i32 4, i1 true, i1 false)
380  ret void
381}
382
383; GCN-LABEL: {{^}}test_export_null_i32:
384; PREGFX11-DAG: v_mov_b32_e32 [[SRC0:v[0-9]+]], 1
385; PREGFX11-DAG: v_mov_b32_e32 [[SRC1:v[0-9]+]], 2
386; PREGFX11-DAG: v_mov_b32_e32 [[SRC2:v[0-9]+]], 5
387; PREGFX11-DAG: v_mov_b32_e32 [[SRC3:v[0-9]+]], 4
388; PREGFX11: exp null [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]]{{$}}
389; PREGFX11: exp null [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]] done{{$}}
390define amdgpu_kernel void @test_export_null_i32() #0 {
391  call void @llvm.amdgcn.exp.i32(i32 9, i32 15, i32 1, i32 2, i32 5, i32 4, i1 false, i1 false)
392  call void @llvm.amdgcn.exp.i32(i32 9, i32 15, i32 1, i32 2, i32 5, i32 4, i1 true, i1 false)
393  ret void
394}
395
396; GCN-LABEL: {{^}}test_export_reserved10_i32:
397; GCN-DAG: v_mov_b32_e32 [[SRC0:v[0-9]+]], 1
398; GCN-DAG: v_mov_b32_e32 [[SRC1:v[0-9]+]], 2
399; GCN-DAG: v_mov_b32_e32 [[SRC2:v[0-9]+]], 5
400; GCN-DAG: v_mov_b32_e32 [[SRC3:v[0-9]+]], 4
401; GCN: exp invalid_target_10 [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]]{{$}}
402; GCN: exp invalid_target_10 [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]] done{{$}}
403define amdgpu_kernel void @test_export_reserved10_i32() #0 {
404  call void @llvm.amdgcn.exp.i32(i32 10, i32 15, i32 1, i32 2, i32 5, i32 4, i1 false, i1 false)
405  call void @llvm.amdgcn.exp.i32(i32 10, i32 15, i32 1, i32 2, i32 5, i32 4, i1 true, i1 false)
406  ret void
407}
408
409; GCN-LABEL: {{^}}test_export_reserved11_i32:
410; GCN-DAG: v_mov_b32_e32 [[SRC0:v[0-9]+]], 1
411; GCN-DAG: v_mov_b32_e32 [[SRC1:v[0-9]+]], 2
412; GCN-DAG: v_mov_b32_e32 [[SRC2:v[0-9]+]], 5
413; GCN-DAG: v_mov_b32_e32 [[SRC3:v[0-9]+]], 4
414; GCN: exp invalid_target_11 [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]]{{$}}
415; GCN: exp invalid_target_11 [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]] done{{$}}
416define amdgpu_kernel void @test_export_reserved11_i32() #0 {
417  call void @llvm.amdgcn.exp.i32(i32 11, i32 15, i32 1, i32 2, i32 5, i32 4, i1 false, i1 false)
418  call void @llvm.amdgcn.exp.i32(i32 11, i32 15, i32 1, i32 2, i32 5, i32 4, i1 true, i1 false)
419  ret void
420}
421
422; GCN-LABEL: {{^}}test_export_pos0_i32:
423; GCN-DAG: v_mov_b32_e32 [[SRC0:v[0-9]+]], 1
424; GCN-DAG: v_mov_b32_e32 [[SRC1:v[0-9]+]], 2
425; GCN-DAG: v_mov_b32_e32 [[SRC2:v[0-9]+]], 5
426; GCN-DAG: v_mov_b32_e32 [[SRC3:v[0-9]+]], 4
427; GCN: exp pos0 [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]]{{$}}
428; GCN: exp pos0 [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]] done{{$}}
429define amdgpu_kernel void @test_export_pos0_i32() #0 {
430  call void @llvm.amdgcn.exp.i32(i32 12, i32 15, i32 1, i32 2, i32 5, i32 4, i1 false, i1 false)
431  call void @llvm.amdgcn.exp.i32(i32 12, i32 15, i32 1, i32 2, i32 5, i32 4, i1 true, i1 false)
432  ret void
433}
434
435; GCN-LABEL: {{^}}test_export_pos3_i32:
436; GCN-DAG: v_mov_b32_e32 [[SRC0:v[0-9]+]], 1
437; GCN-DAG: v_mov_b32_e32 [[SRC1:v[0-9]+]], 2
438; GCN-DAG: v_mov_b32_e32 [[SRC2:v[0-9]+]], 5
439; GCN-DAG: v_mov_b32_e32 [[SRC3:v[0-9]+]], 4
440; GCN: exp pos3 [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]]{{$}}
441; GCN: exp pos3 [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]] done{{$}}
442define amdgpu_kernel void @test_export_pos3_i32() #0 {
443  call void @llvm.amdgcn.exp.i32(i32 15, i32 15, i32 1, i32 2, i32 5, i32 4, i1 false, i1 false)
444  call void @llvm.amdgcn.exp.i32(i32 15, i32 15, i32 1, i32 2, i32 5, i32 4, i1 true, i1 false)
445  ret void
446}
447
448; GCN-LABEL: {{^}}test_export_param0_i32:
449; PREGFX11-DAG: v_mov_b32_e32 [[SRC0:v[0-9]+]], 1
450; PREGFX11-DAG: v_mov_b32_e32 [[SRC1:v[0-9]+]], 2
451; PREGFX11-DAG: v_mov_b32_e32 [[SRC2:v[0-9]+]], 5
452; PREGFX11-DAG: v_mov_b32_e32 [[SRC3:v[0-9]+]], 4
453; PREGFX11: exp param0 [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]]{{$}}
454; PREGFX11: exp param0 [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]] done{{$}}
455define amdgpu_kernel void @test_export_param0_i32() #0 {
456  call void @llvm.amdgcn.exp.i32(i32 32, i32 15, i32 1, i32 2, i32 5, i32 4, i1 false, i1 false)
457  call void @llvm.amdgcn.exp.i32(i32 32, i32 15, i32 1, i32 2, i32 5, i32 4, i1 true, i1 false)
458  ret void
459}
460
461; GCN-LABEL: {{^}}test_export_param31_i32:
462; PREGFX11-DAG: v_mov_b32_e32 [[SRC0:v[0-9]+]], 1
463; PREGFX11-DAG: v_mov_b32_e32 [[SRC1:v[0-9]+]], 2
464; PREGFX11-DAG: v_mov_b32_e32 [[SRC2:v[0-9]+]], 5
465; PREGFX11-DAG: v_mov_b32_e32 [[SRC3:v[0-9]+]], 4
466; PREGFX11: exp param31 [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]]{{$}}
467; PREGFX11: exp param31 [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]] done{{$}}
468define amdgpu_kernel void @test_export_param31_i32() #0 {
469  call void @llvm.amdgcn.exp.i32(i32 63, i32 15, i32 1, i32 2, i32 5, i32 4, i1 false, i1 false)
470  call void @llvm.amdgcn.exp.i32(i32 63, i32 15, i32 1, i32 2, i32 5, i32 4, i1 true, i1 false)
471  ret void
472}
473
474; GCN-LABEL: {{^}}test_export_vm_i32:
475; GCN-DAG: v_mov_b32_e32 [[SRC0:v[0-9]+]], 1
476; GCN-DAG: v_mov_b32_e32 [[SRC1:v[0-9]+]], 2
477; GCN-DAG: v_mov_b32_e32 [[SRC2:v[0-9]+]], 5
478; GCN-DAG: v_mov_b32_e32 [[SRC3:v[0-9]+]], 4
479; PREGFX11: exp mrt0 [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]] vm{{$}}
480; PREGFX11: exp mrt0 [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]] done vm{{$}}
481; GFX11: exp mrt0 [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]]{{$}}
482; GFX11: exp mrt0 [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]] done{{$}}
483define amdgpu_kernel void @test_export_vm_i32() #0 {
484  call void @llvm.amdgcn.exp.i32(i32 0, i32 15, i32 1, i32 2, i32 5, i32 4, i1 false, i1 true)
485  call void @llvm.amdgcn.exp.i32(i32 0, i32 15, i32 1, i32 2, i32 5, i32 4, i1 true, i1 true)
486  ret void
487}
488
489; GCN-LABEL: {{^}}test_if_export_f32:
490; GCN: s_cbranch_execz
491; GCN: exp
492define amdgpu_ps void @test_if_export_f32(i32 %flag, float %x, float %y, float %z, float %w) #0 {
493  %cc = icmp eq i32 %flag, 0
494  br i1 %cc, label %end, label %exp
495
496exp:
497  call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %x, float %y, float %z, float %w, i1 false, i1 false)
498  br label %end
499
500end:
501  ret void
502}
503
504; GCN-LABEL: {{^}}test_if_export_vm_f32:
505; GCN: s_cbranch_execz
506; GCN: exp
507define amdgpu_ps void @test_if_export_vm_f32(i32 %flag, float %x, float %y, float %z, float %w) #0 {
508  %cc = icmp eq i32 %flag, 0
509  br i1 %cc, label %end, label %exp
510
511exp:
512  call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %x, float %y, float %z, float %w, i1 false, i1 true)
513  br label %end
514
515end:
516  ret void
517}
518
519; GCN-LABEL: {{^}}test_if_export_done_f32:
520; GCN: s_cbranch_execz
521; GCN: exp
522define amdgpu_ps void @test_if_export_done_f32(i32 %flag, float %x, float %y, float %z, float %w) #0 {
523  %cc = icmp eq i32 %flag, 0
524  br i1 %cc, label %end, label %exp
525
526exp:
527  call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %x, float %y, float %z, float %w, i1 true, i1 false)
528  br label %end
529
530end:
531  ret void
532}
533
534; GCN-LABEL: {{^}}test_if_export_vm_done_f32:
535; GCN: s_cbranch_execz
536; GCN: exp
537define amdgpu_ps void @test_if_export_vm_done_f32(i32 %flag, float %x, float %y, float %z, float %w) #0 {
538  %cc = icmp eq i32 %flag, 0
539  br i1 %cc, label %end, label %exp
540
541exp:
542  call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %x, float %y, float %z, float %w, i1 true, i1 true)
543  br label %end
544
545end:
546  ret void
547}
548
549; GCN-LABEL: {{^}}test_export_clustering:
550; PREGFX11-DAG: v_mov_b32_e32 [[W0:v[0-9]+]], 0
551; PREGFX11-DAG: v_mov_b32_e32 [[W1:v[0-9]+]], 1.0
552; PREGFX11-DAG: v_mov_b32_e32 [[X:v[0-9]+]], s0
553; PREGFX11-DAG: v_mov_b32_e32 [[Y:v[0-9]+]], s1
554; PREGFX11-DAG: v_add_f32_e{{32|64}} [[Z0:v[0-9]+]]
555; PREGFX11-DAG: v_sub_f32_e{{32|64}} [[Z1:v[0-9]+]]
556; PREGFX11: exp param0 [[X]], [[Y]], [[Z0]], [[W0]]{{$}}
557; PREGFX11-NEXT: exp param1 [[X]], [[Y]], [[Z1]], [[W1]] done{{$}}
558define amdgpu_kernel void @test_export_clustering(float %x, float %y) #0 {
559  %z0 = fadd float %x, %y
560  call void @llvm.amdgcn.exp.f32(i32 32, i32 15, float %x, float %y, float %z0, float 0.0, i1 false, i1 false)
561  %z1 = fsub float %y, %x
562  call void @llvm.amdgcn.exp.f32(i32 33, i32 15, float %x, float %y, float %z1, float 1.0, i1 true, i1 false)
563  ret void
564}
565
566; GCN-LABEL: {{^}}test_export_pos_before_param:
567; PREGFX11: exp pos0
568; PREGFX11-NOT: s_waitcnt
569; PREGFX11: exp param0
570define amdgpu_kernel void @test_export_pos_before_param(float %x, float %y) #0 {
571  %z0 = fadd float %x, %y
572  call void @llvm.amdgcn.exp.f32(i32 32, i32 15, float 1.0, float 1.0, float 1.0, float %z0, i1 false, i1 false)
573  %z1 = fsub float %y, %x
574  call void @llvm.amdgcn.exp.f32(i32 12, i32 15, float 0.0, float 0.0, float 0.0, float %z1, i1 true, i1 false)
575  ret void
576}
577
578; GCN-LABEL: {{^}}test_export_pos4_before_param:
579; GFX10: exp pos4
580; GFX10-NOT: s_waitcnt
581; GFX10: exp param0
582define amdgpu_kernel void @test_export_pos4_before_param(float %x, float %y) #0 {
583  %z0 = fadd float %x, %y
584  call void @llvm.amdgcn.exp.f32(i32 32, i32 15, float 1.0, float 1.0, float 1.0, float %z0, i1 false, i1 false)
585  %z1 = fsub float %y, %x
586  call void @llvm.amdgcn.exp.f32(i32 16, i32 15, float 0.0, float 0.0, float 0.0, float %z1, i1 true, i1 false)
587  ret void
588}
589
590; GCN-LABEL: {{^}}test_export_pos_before_param_ordered:
591; PREGFX11: exp pos0
592; PREGFX11: exp pos1
593; PREGFX11: exp pos2
594; PREGFX11-NOT: s_waitcnt
595; PREGFX11: exp param0
596; PREGFX11: exp param1
597; PREGFX11: exp param2
598define amdgpu_kernel void @test_export_pos_before_param_ordered(float %x, float %y) #0 {
599  %z0 = fadd float %x, %y
600  call void @llvm.amdgcn.exp.f32(i32 32, i32 15, float 1.0, float 1.0, float 1.0, float %z0, i1 false, i1 false)
601  call void @llvm.amdgcn.exp.f32(i32 33, i32 15, float 1.0, float 1.0, float 1.0, float %z0, i1 false, i1 false)
602  call void @llvm.amdgcn.exp.f32(i32 34, i32 15, float 1.0, float 1.0, float 1.0, float %z0, i1 false, i1 false)
603  %z1 = fsub float %y, %x
604  call void @llvm.amdgcn.exp.f32(i32 12, i32 15, float 0.0, float 0.0, float 0.0, float %z1, i1 false, i1 false)
605  call void @llvm.amdgcn.exp.f32(i32 13, i32 15, float 0.0, float 0.0, float 0.0, float %z1, i1 false, i1 false)
606  call void @llvm.amdgcn.exp.f32(i32 14, i32 15, float 0.0, float 0.0, float 0.0, float %z1, i1 true, i1 false)
607  ret void
608}
609
610; GCN-LABEL: {{^}}test_export_pos_before_param_across_load:
611; PREGFX11: exp pos0
612; PREGFX11-NEXT: exp param0
613; PREGFX11-NEXT: exp param1
614define amdgpu_kernel void @test_export_pos_before_param_across_load(i32 %idx) #0 {
615  call void @llvm.amdgcn.exp.f32(i32 32, i32 15, float 1.0, float 1.0, float 1.0, float 1.0, i1 false, i1 false)
616  call void @llvm.amdgcn.exp.f32(i32 33, i32 15, float 1.0, float 1.0, float 1.0, float 0.5, i1 false, i1 false)
617  %load = call float @llvm.amdgcn.raw.buffer.load.f32(<4 x i32> undef, i32 %idx, i32 0, i32 0)
618  call void @llvm.amdgcn.exp.f32(i32 12, i32 15, float 0.0, float 0.0, float 0.0, float %load, i1 true, i1 false)
619  ret void
620}
621
622; GCN-LABEL: {{^}}test_export_across_store_load:
623; PREGFX11: buffer_store
624; PREGFX11: buffer_load
625; PREGFX11: exp pos0
626; PREGFX11: exp param0
627; PREGFX11: exp param1
628define amdgpu_kernel void @test_export_across_store_load(i32 %idx, float %v) #0 {
629  %data0 = alloca <4 x float>, align 8, addrspace(5)
630  %data1 = alloca <4 x float>, align 8, addrspace(5)
631  %cmp = icmp eq i32 %idx, 1
632  %data = select i1 %cmp, <4 x float> addrspace(5)* %data0, <4 x float> addrspace(5)* %data1
633  %sptr = getelementptr inbounds <4 x float>, <4 x float> addrspace(5)* %data, i32 0, i32 0
634  store float %v, float addrspace(5)* %sptr, align 8
635  call void @llvm.amdgcn.exp.f32(i32 12, i32 15, float 0.0, float 0.0, float 0.0, float 1.0, i1 true, i1 false)
636  %ptr0 = getelementptr inbounds <4 x float>, <4 x float> addrspace(5)* %data0, i32 0, i32 0
637  %load0 = load float, float addrspace(5)* %ptr0, align 8
638  call void @llvm.amdgcn.exp.f32(i32 32, i32 15, float %load0, float 0.0, float 1.0, float 0.0, i1 false, i1 false)
639  call void @llvm.amdgcn.exp.f32(i32 33, i32 15, float %load0, float 0.0, float 1.0, float 0.0, i1 false, i1 false)
640  ret void
641}
642
643attributes #0 = { nounwind }
644attributes #1 = { nounwind inaccessiblememonly }
645attributes #2 = { nounwind readnone }
646