1; RUN: llc -O0 -mtriple=amdgcn-mesa-mesa3d -mcpu=bonaire < %s | FileCheck %s 2; RUN: llc -O0 -mtriple=amdgcn-mesa-mesa3d -mcpu=tonga < %s | FileCheck %s 3; RUN: llc -O0 -mtriple=amdgcn-amd-amdhsa -mcpu=fiji < %s | FileCheck -check-prefixes=CHECK,HSA %s 4 5; Disable optimizations in case there are optimizations added that 6; specialize away generic pointer accesses. 7 8 9; These testcases might become useless when there are optimizations to 10; remove generic pointers. 11 12; CHECK-LABEL: {{^}}store_flat_i32: 13; CHECK-DAG: s_load_dwordx2 s{{\[}}[[LO_SREG:[0-9]+]]:[[HI_SREG:[0-9]+]]], 14; CHECK-DAG: s_load_dword s[[SDATA:[0-9]+]], 15; CHECK: s_waitcnt lgkmcnt(0) 16; CHECK-DAG: v_mov_b32_e32 v[[DATA:[0-9]+]], s[[SDATA]] 17; CHECK-DAG: v_mov_b32_e32 v[[LO_VREG:[0-9]+]], s[[LO_SREG]] 18; CHECK-DAG: v_mov_b32_e32 v[[HI_VREG:[0-9]+]], s[[HI_SREG]] 19; CHECK: flat_store_dword v{{\[}}[[LO_VREG]]:[[HI_VREG]]{{\]}}, v[[DATA]] 20define void @store_flat_i32(i32 addrspace(1)* %gptr, i32 %x) #0 { 21 %fptr = addrspacecast i32 addrspace(1)* %gptr to i32 addrspace(4)* 22 store i32 %x, i32 addrspace(4)* %fptr, align 4 23 ret void 24} 25 26; CHECK-LABEL: {{^}}store_flat_i64: 27; CHECK: flat_store_dwordx2 28define void @store_flat_i64(i64 addrspace(1)* %gptr, i64 %x) #0 { 29 %fptr = addrspacecast i64 addrspace(1)* %gptr to i64 addrspace(4)* 30 store i64 %x, i64 addrspace(4)* %fptr, align 8 31 ret void 32} 33 34; CHECK-LABEL: {{^}}store_flat_v4i32: 35; CHECK: flat_store_dwordx4 36define void @store_flat_v4i32(<4 x i32> addrspace(1)* %gptr, <4 x i32> %x) #0 { 37 %fptr = addrspacecast <4 x i32> addrspace(1)* %gptr to <4 x i32> addrspace(4)* 38 store <4 x i32> %x, <4 x i32> addrspace(4)* %fptr, align 16 39 ret void 40} 41 42; CHECK-LABEL: {{^}}store_flat_trunc_i16: 43; CHECK: flat_store_short 44define void @store_flat_trunc_i16(i16 addrspace(1)* %gptr, i32 %x) #0 { 45 %fptr = addrspacecast i16 addrspace(1)* %gptr to i16 addrspace(4)* 46 %y = trunc i32 %x to i16 47 store i16 %y, i16 addrspace(4)* %fptr, align 2 48 ret void 49} 50 51; CHECK-LABEL: {{^}}store_flat_trunc_i8: 52; CHECK: flat_store_byte 53define void @store_flat_trunc_i8(i8 addrspace(1)* %gptr, i32 %x) #0 { 54 %fptr = addrspacecast i8 addrspace(1)* %gptr to i8 addrspace(4)* 55 %y = trunc i32 %x to i8 56 store i8 %y, i8 addrspace(4)* %fptr, align 2 57 ret void 58} 59 60 61 62; CHECK-LABEL: load_flat_i32: 63; CHECK: flat_load_dword 64define void @load_flat_i32(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %gptr) #0 { 65 %fptr = addrspacecast i32 addrspace(1)* %gptr to i32 addrspace(4)* 66 %fload = load i32, i32 addrspace(4)* %fptr, align 4 67 store i32 %fload, i32 addrspace(1)* %out, align 4 68 ret void 69} 70 71; CHECK-LABEL: load_flat_i64: 72; CHECK: flat_load_dwordx2 73define void @load_flat_i64(i64 addrspace(1)* noalias %out, i64 addrspace(1)* noalias %gptr) #0 { 74 %fptr = addrspacecast i64 addrspace(1)* %gptr to i64 addrspace(4)* 75 %fload = load i64, i64 addrspace(4)* %fptr, align 8 76 store i64 %fload, i64 addrspace(1)* %out, align 8 77 ret void 78} 79 80; CHECK-LABEL: load_flat_v4i32: 81; CHECK: flat_load_dwordx4 82define void @load_flat_v4i32(<4 x i32> addrspace(1)* noalias %out, <4 x i32> addrspace(1)* noalias %gptr) #0 { 83 %fptr = addrspacecast <4 x i32> addrspace(1)* %gptr to <4 x i32> addrspace(4)* 84 %fload = load <4 x i32>, <4 x i32> addrspace(4)* %fptr, align 32 85 store <4 x i32> %fload, <4 x i32> addrspace(1)* %out, align 8 86 ret void 87} 88 89; CHECK-LABEL: sextload_flat_i8: 90; CHECK: flat_load_sbyte 91define void @sextload_flat_i8(i32 addrspace(1)* noalias %out, i8 addrspace(1)* noalias %gptr) #0 { 92 %fptr = addrspacecast i8 addrspace(1)* %gptr to i8 addrspace(4)* 93 %fload = load i8, i8 addrspace(4)* %fptr, align 4 94 %ext = sext i8 %fload to i32 95 store i32 %ext, i32 addrspace(1)* %out, align 4 96 ret void 97} 98 99; CHECK-LABEL: zextload_flat_i8: 100; CHECK: flat_load_ubyte 101define void @zextload_flat_i8(i32 addrspace(1)* noalias %out, i8 addrspace(1)* noalias %gptr) #0 { 102 %fptr = addrspacecast i8 addrspace(1)* %gptr to i8 addrspace(4)* 103 %fload = load i8, i8 addrspace(4)* %fptr, align 4 104 %ext = zext i8 %fload to i32 105 store i32 %ext, i32 addrspace(1)* %out, align 4 106 ret void 107} 108 109; CHECK-LABEL: sextload_flat_i16: 110; CHECK: flat_load_sshort 111define void @sextload_flat_i16(i32 addrspace(1)* noalias %out, i16 addrspace(1)* noalias %gptr) #0 { 112 %fptr = addrspacecast i16 addrspace(1)* %gptr to i16 addrspace(4)* 113 %fload = load i16, i16 addrspace(4)* %fptr, align 4 114 %ext = sext i16 %fload to i32 115 store i32 %ext, i32 addrspace(1)* %out, align 4 116 ret void 117} 118 119; CHECK-LABEL: zextload_flat_i16: 120; CHECK: flat_load_ushort 121define void @zextload_flat_i16(i32 addrspace(1)* noalias %out, i16 addrspace(1)* noalias %gptr) #0 { 122 %fptr = addrspacecast i16 addrspace(1)* %gptr to i16 addrspace(4)* 123 %fload = load i16, i16 addrspace(4)* %fptr, align 4 124 %ext = zext i16 %fload to i32 125 store i32 %ext, i32 addrspace(1)* %out, align 4 126 ret void 127} 128 129; CHECK-LABEL: flat_scratch_unaligned_load: 130; CHECK: flat_load_ubyte 131; CHECK: flat_load_ubyte 132; CHECK: flat_load_ubyte 133; CHECK: flat_load_ubyte 134define void @flat_scratch_unaligned_load() { 135 %scratch = alloca i32 136 %fptr = addrspacecast i32* %scratch to i32 addrspace(4)* 137 %ld = load volatile i32, i32 addrspace(4)* %fptr, align 1 138 ret void 139} 140 141; CHECK-LABEL: flat_scratch_unaligned_store: 142; CHECK: flat_store_byte 143; CHECK: flat_store_byte 144; CHECK: flat_store_byte 145; CHECK: flat_store_byte 146define void @flat_scratch_unaligned_store() { 147 %scratch = alloca i32 148 %fptr = addrspacecast i32* %scratch to i32 addrspace(4)* 149 store volatile i32 0, i32 addrspace(4)* %fptr, align 1 150 ret void 151} 152 153; CHECK-LABEL: flat_scratch_multidword_load: 154; HSA: flat_load_dword 155; HSA: flat_load_dword 156; FIXME: These tests are broken for os = mesa3d, becasue it doesn't initialize flat_scr 157define void @flat_scratch_multidword_load() { 158 %scratch = alloca <2 x i32> 159 %fptr = addrspacecast <2 x i32>* %scratch to <2 x i32> addrspace(4)* 160 %ld = load volatile <2 x i32>, <2 x i32> addrspace(4)* %fptr 161 ret void 162} 163 164; CHECK-LABEL: flat_scratch_multidword_store: 165; HSA: flat_store_dword 166; HSA: flat_store_dword 167; FIXME: These tests are broken for os = mesa3d, becasue it doesn't initialize flat_scr 168define void @flat_scratch_multidword_store() { 169 %scratch = alloca <2 x i32> 170 %fptr = addrspacecast <2 x i32>* %scratch to <2 x i32> addrspace(4)* 171 store volatile <2 x i32> zeroinitializer, <2 x i32> addrspace(4)* %fptr 172 ret void 173} 174 175attributes #0 = { nounwind } 176attributes #1 = { nounwind convergent } 177attributes #3 = { nounwind readnone } 178