1;RUN: llc < %s -march=amdgcn -mcpu=verde -verify-machineinstrs | FileCheck %s -check-prefix=CHECK -check-prefix=SICI
2;RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck %s -check-prefix=CHECK -check-prefix=VI
3
4;CHECK-LABEL: {{^}}buffer_load:
5;CHECK: buffer_load_format_xyzw v[0:3], off, s[0:3], 0
6;CHECK: buffer_load_format_xyzw v[4:7], off, s[0:3], 0 glc
7;CHECK: buffer_load_format_xyzw v[8:11], off, s[0:3], 0 slc
8define amdgpu_ps {<4 x float>, <4 x float>, <4 x float>} @buffer_load(<4 x i32> inreg) {
9main_body:
10  %data = call <4 x float> @llvm.amdgcn.buffer.load.format.v4f32(<4 x i32> %0, i32 0, i32 0, i1 0, i1 0)
11  %data_glc = call <4 x float> @llvm.amdgcn.buffer.load.format.v4f32(<4 x i32> %0, i32 0, i32 0, i1 1, i1 0)
12  %data_slc = call <4 x float> @llvm.amdgcn.buffer.load.format.v4f32(<4 x i32> %0, i32 0, i32 0, i1 0, i1 1)
13  %r0 = insertvalue {<4 x float>, <4 x float>, <4 x float>} undef, <4 x float> %data, 0
14  %r1 = insertvalue {<4 x float>, <4 x float>, <4 x float>} %r0, <4 x float> %data_glc, 1
15  %r2 = insertvalue {<4 x float>, <4 x float>, <4 x float>} %r1, <4 x float> %data_slc, 2
16  ret {<4 x float>, <4 x float>, <4 x float>} %r2
17}
18
19;CHECK-LABEL: {{^}}buffer_load_immoffs:
20;CHECK: buffer_load_format_xyzw v[0:3], off, s[0:3], 0 offset:42
21define amdgpu_ps <4 x float> @buffer_load_immoffs(<4 x i32> inreg) {
22main_body:
23  %data = call <4 x float> @llvm.amdgcn.buffer.load.format.v4f32(<4 x i32> %0, i32 0, i32 42, i1 0, i1 0)
24  ret <4 x float> %data
25}
26
27;CHECK-LABEL: {{^}}buffer_load_immoffs_large:
28;SICI: v_mov_b32_e32 [[VOFS:v[0-9]+]], 0x1038
29;SICI: buffer_load_format_xyzw {{v\[[0-9]+:[0-9]+\]}}, [[VOFS]], s[0:3], 0 offen
30;SICI: buffer_load_format_xyzw {{v\[[0-9]+:[0-9]+\]}}, {{v[0-9]+}}, s[0:3], 0 offen
31;VI-DAG: buffer_load_format_xyzw {{v\[[0-9]+:[0-9]+\]}}, off, s[0:3], 60 offset:4092
32;VI-DAG: s_movk_i32 [[OFS1:s[0-9]+]], 0x7ffc
33;VI-DAG: buffer_load_format_xyzw {{v\[[0-9]+:[0-9]+\]}}, off, s[0:3], [[OFS1]] offset:4092
34;SICI: buffer_load_format_xyzw {{v\[[0-9]+:[0-9]+\]}}, {{v[0-9]+}}, s[0:3], 0 offen
35;VI-DAG: s_mov_b32 [[OFS2:s[0-9]+]], 0x8ffc
36;VI-DAG: buffer_load_format_xyzw {{v\[[0-9]+:[0-9]+\]}}, off, s[0:3], [[OFS2]] offset:4
37define amdgpu_ps <4 x float> @buffer_load_immoffs_large(<4 x i32> inreg) {
38main_body:
39  %d.0 = call <4 x float> @llvm.amdgcn.buffer.load.format.v4f32(<4 x i32> %0, i32 0, i32 4152, i1 0, i1 0)
40  %d.1 = call <4 x float> @llvm.amdgcn.buffer.load.format.v4f32(<4 x i32> %0, i32 0, i32 36856, i1 0, i1 0)
41  %d.2 = call <4 x float> @llvm.amdgcn.buffer.load.format.v4f32(<4 x i32> %0, i32 0, i32 36864, i1 0, i1 0)
42  %d.3 = fadd <4 x float> %d.0, %d.1
43  %data = fadd <4 x float> %d.2, %d.3
44  ret <4 x float> %data
45}
46
47;CHECK-LABEL: {{^}}buffer_load_immoffs_reuse:
48;VI: s_movk_i32 [[OFS:s[0-9]+]], 0xffc
49;VI: buffer_load_format_xyzw {{v\[[0-9]+:[0-9]+\]}}, off, s[0:3], [[OFS]] offset:68
50;VI-NOT: s_mov
51;VI: buffer_load_format_xyzw {{v\[[0-9]+:[0-9]+\]}}, off, s[0:3], [[OFS]] offset:84
52define amdgpu_ps <4 x float> @buffer_load_immoffs_reuse(<4 x i32> inreg) {
53main_body:
54  %d.0 = call <4 x float> @llvm.amdgcn.buffer.load.format.v4f32(<4 x i32> %0, i32 0, i32 4160, i1 0, i1 0)
55  %d.1 = call <4 x float> @llvm.amdgcn.buffer.load.format.v4f32(<4 x i32> %0, i32 0, i32 4176, i1 0, i1 0)
56  %data = fadd <4 x float> %d.0, %d.1
57  ret <4 x float> %data
58}
59
60;CHECK-LABEL: {{^}}buffer_load_idx:
61;CHECK: buffer_load_format_xyzw v[0:3], v0, s[0:3], 0 idxen
62define amdgpu_ps <4 x float> @buffer_load_idx(<4 x i32> inreg, i32) {
63main_body:
64  %data = call <4 x float> @llvm.amdgcn.buffer.load.format.v4f32(<4 x i32> %0, i32 %1, i32 0, i1 0, i1 0)
65  ret <4 x float> %data
66}
67
68;CHECK-LABEL: {{^}}buffer_load_ofs:
69;CHECK: buffer_load_format_xyzw v[0:3], v0, s[0:3], 0 offen
70define amdgpu_ps <4 x float> @buffer_load_ofs(<4 x i32> inreg, i32) {
71main_body:
72  %data = call <4 x float> @llvm.amdgcn.buffer.load.format.v4f32(<4 x i32> %0, i32 0, i32 %1, i1 0, i1 0)
73  ret <4 x float> %data
74}
75
76;CHECK-LABEL: {{^}}buffer_load_ofs_imm:
77;CHECK: buffer_load_format_xyzw v[0:3], v0, s[0:3], 0 offen offset:60
78define amdgpu_ps <4 x float> @buffer_load_ofs_imm(<4 x i32> inreg, i32) {
79main_body:
80  %ofs = add i32 %1, 60
81  %data = call <4 x float> @llvm.amdgcn.buffer.load.format.v4f32(<4 x i32> %0, i32 0, i32 %ofs, i1 0, i1 0)
82  ret <4 x float> %data
83}
84
85;CHECK-LABEL: {{^}}buffer_load_both:
86;CHECK: buffer_load_format_xyzw v[0:3], v[0:1], s[0:3], 0 idxen offen
87define amdgpu_ps <4 x float> @buffer_load_both(<4 x i32> inreg, i32, i32) {
88main_body:
89  %data = call <4 x float> @llvm.amdgcn.buffer.load.format.v4f32(<4 x i32> %0, i32 %1, i32 %2, i1 0, i1 0)
90  ret <4 x float> %data
91}
92
93;CHECK-LABEL: {{^}}buffer_load_both_reversed:
94;CHECK: v_mov_b32_e32 v2, v0
95;CHECK: buffer_load_format_xyzw v[0:3], v[1:2], s[0:3], 0 idxen offen
96define amdgpu_ps <4 x float> @buffer_load_both_reversed(<4 x i32> inreg, i32, i32) {
97main_body:
98  %data = call <4 x float> @llvm.amdgcn.buffer.load.format.v4f32(<4 x i32> %0, i32 %2, i32 %1, i1 0, i1 0)
99  ret <4 x float> %data
100}
101
102;CHECK-LABEL: {{^}}buffer_load_x:
103;CHECK: buffer_load_format_x v0, off, s[0:3], 0
104define amdgpu_ps float @buffer_load_x(<4 x i32> inreg %rsrc) {
105main_body:
106  %data = call float @llvm.amdgcn.buffer.load.format.f32(<4 x i32> %rsrc, i32 0, i32 0, i1 0, i1 0)
107  ret float %data
108}
109
110;CHECK-LABEL: {{^}}buffer_load_xy:
111;CHECK: buffer_load_format_xy v[0:1], off, s[0:3], 0
112define amdgpu_ps <2 x float> @buffer_load_xy(<4 x i32> inreg %rsrc) {
113main_body:
114  %data = call <2 x float> @llvm.amdgcn.buffer.load.format.v2f32(<4 x i32> %rsrc, i32 0, i32 0, i1 0, i1 0)
115  ret <2 x float> %data
116}
117
118declare float @llvm.amdgcn.buffer.load.format.f32(<4 x i32>, i32, i32, i1, i1) #0
119declare <2 x float> @llvm.amdgcn.buffer.load.format.v2f32(<4 x i32>, i32, i32, i1, i1) #0
120declare <4 x float> @llvm.amdgcn.buffer.load.format.v4f32(<4 x i32>, i32, i32, i1, i1) #0
121
122attributes #0 = { nounwind readonly }
123