14f703f5eSTim Renouf;RUN: llc < %s -march=amdgcn -mcpu=verde -verify-machineinstrs | FileCheck -check-prefix=VERDE %s
24f703f5eSTim Renouf;RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck %s
34f703f5eSTim Renouf
44f703f5eSTim Renouf;CHECK-LABEL: {{^}}buffer_store:
54f703f5eSTim Renouf;CHECK-NOT: s_waitcnt
64f703f5eSTim Renouf;CHECK: buffer_store_format_xyzw v[0:3], {{v[0-9]+}}, s[0:3], 0 idxen
74f703f5eSTim Renouf;CHECK: buffer_store_format_xyzw v[4:7], {{v[0-9]+}}, s[0:3], 0 idxen glc
84f703f5eSTim Renouf;CHECK: buffer_store_format_xyzw v[8:11], {{v[0-9]+}}, s[0:3], 0 idxen slc
94f703f5eSTim Renoufdefine amdgpu_ps void @buffer_store(<4 x i32> inreg, <4 x float>, <4 x float>, <4 x float>) {
104f703f5eSTim Renoufmain_body:
114f703f5eSTim Renouf  call void @llvm.amdgcn.struct.buffer.store.format.v4f32(<4 x float> %1, <4 x i32> %0, i32 0, i32 0, i32 0, i32 0)
124f703f5eSTim Renouf  call void @llvm.amdgcn.struct.buffer.store.format.v4f32(<4 x float> %2, <4 x i32> %0, i32 0, i32 0, i32 0, i32 1)
134f703f5eSTim Renouf  call void @llvm.amdgcn.struct.buffer.store.format.v4f32(<4 x float> %3, <4 x i32> %0, i32 0, i32 0, i32 0, i32 2)
144f703f5eSTim Renouf  ret void
154f703f5eSTim Renouf}
164f703f5eSTim Renouf
174f703f5eSTim Renouf;CHECK-LABEL: {{^}}buffer_store_immoffs:
184f703f5eSTim Renouf;CHECK-NOT: s_waitcnt
194f703f5eSTim Renouf;CHECK: buffer_store_format_xyzw v[0:3], {{v[0-9]+}}, s[0:3], 0 idxen offset:42
204f703f5eSTim Renoufdefine amdgpu_ps void @buffer_store_immoffs(<4 x i32> inreg, <4 x float>) {
214f703f5eSTim Renoufmain_body:
224f703f5eSTim Renouf  call void @llvm.amdgcn.struct.buffer.store.format.v4f32(<4 x float> %1, <4 x i32> %0, i32 0, i32 42, i32 0, i32 0)
234f703f5eSTim Renouf  ret void
244f703f5eSTim Renouf}
254f703f5eSTim Renouf
264f703f5eSTim Renouf;CHECK-LABEL: {{^}}buffer_store_idx:
274f703f5eSTim Renouf;CHECK-NOT: s_waitcnt
284f703f5eSTim Renouf;CHECK: buffer_store_format_xyzw v[0:3], v4, s[0:3], 0 idxen
294f703f5eSTim Renoufdefine amdgpu_ps void @buffer_store_idx(<4 x i32> inreg, <4 x float>, i32) {
304f703f5eSTim Renoufmain_body:
314f703f5eSTim Renouf  call void @llvm.amdgcn.struct.buffer.store.format.v4f32(<4 x float> %1, <4 x i32> %0, i32 %2, i32 0, i32 0, i32 0)
324f703f5eSTim Renouf  ret void
334f703f5eSTim Renouf}
344f703f5eSTim Renouf
354f703f5eSTim Renouf;CHECK-LABEL: {{^}}buffer_store_ofs:
364f703f5eSTim Renouf;CHECK-NOT: s_waitcnt
374f703f5eSTim Renouf;CHECK: buffer_store_format_xyzw v[0:3], v[4:5], s[0:3], 0 idxen offen
384f703f5eSTim Renoufdefine amdgpu_ps void @buffer_store_ofs(<4 x i32> inreg, <4 x float>, i32) {
394f703f5eSTim Renoufmain_body:
404f703f5eSTim Renouf  call void @llvm.amdgcn.struct.buffer.store.format.v4f32(<4 x float> %1, <4 x i32> %0, i32 0, i32 %2, i32 0, i32 0)
414f703f5eSTim Renouf  ret void
424f703f5eSTim Renouf}
434f703f5eSTim Renouf
444f703f5eSTim Renouf;CHECK-LABEL: {{^}}buffer_store_both:
454f703f5eSTim Renouf;CHECK-NOT: s_waitcnt
464f703f5eSTim Renouf;CHECK: buffer_store_format_xyzw v[0:3], v[4:5], s[0:3], 0 idxen offen
474f703f5eSTim Renoufdefine amdgpu_ps void @buffer_store_both(<4 x i32> inreg, <4 x float>, i32, i32) {
484f703f5eSTim Renoufmain_body:
494f703f5eSTim Renouf  call void @llvm.amdgcn.struct.buffer.store.format.v4f32(<4 x float> %1, <4 x i32> %0, i32 %2, i32 %3, i32 0, i32 0)
504f703f5eSTim Renouf  ret void
514f703f5eSTim Renouf}
524f703f5eSTim Renouf
534f703f5eSTim Renouf;CHECK-LABEL: {{^}}buffer_store_both_reversed:
544f703f5eSTim Renouf;CHECK: v_mov_b32_e32 v6, v4
554f703f5eSTim Renouf;CHECK-NOT: s_waitcnt
564f703f5eSTim Renouf;CHECK: buffer_store_format_xyzw v[0:3], v[5:6], s[0:3], 0 idxen offen
574f703f5eSTim Renoufdefine amdgpu_ps void @buffer_store_both_reversed(<4 x i32> inreg, <4 x float>, i32, i32) {
584f703f5eSTim Renoufmain_body:
594f703f5eSTim Renouf  call void @llvm.amdgcn.struct.buffer.store.format.v4f32(<4 x float> %1, <4 x i32> %0, i32 %3, i32 %2, i32 0, i32 0)
604f703f5eSTim Renouf  ret void
614f703f5eSTim Renouf}
624f703f5eSTim Renouf
634f703f5eSTim Renouf; Ideally, the register allocator would avoid the wait here
644f703f5eSTim Renouf;
654f703f5eSTim Renouf;CHECK-LABEL: {{^}}buffer_store_wait:
664f703f5eSTim Renouf;CHECK-NOT: s_waitcnt
674f703f5eSTim Renouf;CHECK: buffer_store_format_xyzw v[0:3], v4, s[0:3], 0 idxen
684f703f5eSTim Renouf;VERDE: s_waitcnt expcnt(0)
694f703f5eSTim Renouf;CHECK: buffer_load_format_xyzw v[0:3], v5, s[0:3], 0 idxen
704f703f5eSTim Renouf;CHECK: s_waitcnt vmcnt(0)
714f703f5eSTim Renouf;CHECK: buffer_store_format_xyzw v[0:3], v6, s[0:3], 0 idxen
724f703f5eSTim Renoufdefine amdgpu_ps void @buffer_store_wait(<4 x i32> inreg, <4 x float>, i32, i32, i32) {
734f703f5eSTim Renoufmain_body:
744f703f5eSTim Renouf  call void @llvm.amdgcn.struct.buffer.store.format.v4f32(<4 x float> %1, <4 x i32> %0, i32 %2, i32 0, i32 0, i32 0)
754f703f5eSTim Renouf  %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32> %0, i32 %3, i32 0, i32 0, i32 0)
764f703f5eSTim Renouf  call void @llvm.amdgcn.struct.buffer.store.format.v4f32(<4 x float> %data, <4 x i32> %0, i32 %4, i32 0, i32 0, i32 0)
774f703f5eSTim Renouf  ret void
784f703f5eSTim Renouf}
794f703f5eSTim Renouf
804f703f5eSTim Renouf;CHECK-LABEL: {{^}}buffer_store_x1:
814f703f5eSTim Renouf;CHECK-NOT: s_waitcnt
824f703f5eSTim Renouf;CHECK: buffer_store_format_x v0, v1, s[0:3], 0 idxen
834f703f5eSTim Renoufdefine amdgpu_ps void @buffer_store_x1(<4 x i32> inreg %rsrc, float %data, i32 %index) {
844f703f5eSTim Renoufmain_body:
854f703f5eSTim Renouf  call void @llvm.amdgcn.struct.buffer.store.format.f32(float %data, <4 x i32> %rsrc, i32 %index, i32 0, i32 0, i32 0)
864f703f5eSTim Renouf  ret void
874f703f5eSTim Renouf}
884f703f5eSTim Renouf
89*d07f9e73SCarl Ritson;CHECK-LABEL: {{^}}buffer_store_x1_i32:
90*d07f9e73SCarl Ritson;CHECK-NOT: s_waitcnt
91*d07f9e73SCarl Ritson;CHECK: buffer_store_format_x v0, v1, s[0:3], 0 idxen
92*d07f9e73SCarl Ritsondefine amdgpu_ps void @buffer_store_x1_i32(<4 x i32> inreg %rsrc, i32 %data, i32 %index) {
93*d07f9e73SCarl Ritsonmain_body:
94*d07f9e73SCarl Ritson  call void @llvm.amdgcn.struct.buffer.store.format.i32(i32 %data, <4 x i32> %rsrc, i32 %index, i32 0, i32 0, i32 0)
95*d07f9e73SCarl Ritson  ret void
96*d07f9e73SCarl Ritson}
97*d07f9e73SCarl Ritson
984f703f5eSTim Renouf;CHECK-LABEL: {{^}}buffer_store_x2:
994f703f5eSTim Renouf;CHECK-NOT: s_waitcnt
1004f703f5eSTim Renouf;CHECK: buffer_store_format_xy v[0:1], v2, s[0:3], 0 idxen
1014f703f5eSTim Renoufdefine amdgpu_ps void @buffer_store_x2(<4 x i32> inreg %rsrc, <2 x float> %data, i32 %index) {
1024f703f5eSTim Renoufmain_body:
1034f703f5eSTim Renouf  call void @llvm.amdgcn.struct.buffer.store.format.v2f32(<2 x float> %data, <4 x i32> %rsrc, i32 %index, i32 0, i32 0, i32 0)
1044f703f5eSTim Renouf  ret void
1054f703f5eSTim Renouf}
1064f703f5eSTim Renouf
1074f703f5eSTim Renoufdeclare void @llvm.amdgcn.struct.buffer.store.format.f32(float, <4 x i32>, i32, i32, i32, i32) #0
1084f703f5eSTim Renoufdeclare void @llvm.amdgcn.struct.buffer.store.format.v2f32(<2 x float>, <4 x i32>, i32, i32, i32, i32) #0
1094f703f5eSTim Renoufdeclare void @llvm.amdgcn.struct.buffer.store.format.v4f32(<4 x float>, <4 x i32>, i32, i32, i32, i32) #0
110*d07f9e73SCarl Ritsondeclare void @llvm.amdgcn.struct.buffer.store.format.i32(i32, <4 x i32>, i32, i32, i32, i32) #0
1114f703f5eSTim Renoufdeclare <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32>, i32, i32, i32, i32) #1
1124f703f5eSTim Renouf
1134f703f5eSTim Renoufattributes #0 = { nounwind }
1144f703f5eSTim Renoufattributes #1 = { nounwind readonly }
115