1*cee313d2SEric Christopher; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -infer-address-spaces %s | FileCheck %s 2*cee313d2SEric Christopher 3*cee313d2SEric Christopher; Trivial optimization of generic addressing 4*cee313d2SEric Christopher 5*cee313d2SEric Christopher; CHECK-LABEL: @load_global_from_flat( 6*cee313d2SEric Christopher; CHECK-NEXT: %tmp0 = addrspacecast float* %generic_scalar to float addrspace(1)* 7*cee313d2SEric Christopher; CHECK-NEXT: %tmp1 = load float, float addrspace(1)* %tmp0 8*cee313d2SEric Christopher; CHECK-NEXT: ret float %tmp1 9*cee313d2SEric Christopherdefine float @load_global_from_flat(float* %generic_scalar) #0 { 10*cee313d2SEric Christopher %tmp0 = addrspacecast float* %generic_scalar to float addrspace(1)* 11*cee313d2SEric Christopher %tmp1 = load float, float addrspace(1)* %tmp0 12*cee313d2SEric Christopher ret float %tmp1 13*cee313d2SEric Christopher} 14*cee313d2SEric Christopher 15*cee313d2SEric Christopher; CHECK-LABEL: @load_constant_from_flat( 16*cee313d2SEric Christopher; CHECK-NEXT: %tmp0 = addrspacecast float* %generic_scalar to float addrspace(4)* 17*cee313d2SEric Christopher; CHECK-NEXT: %tmp1 = load float, float addrspace(4)* %tmp0 18*cee313d2SEric Christopher; CHECK-NEXT: ret float %tmp1 19*cee313d2SEric Christopherdefine float @load_constant_from_flat(float* %generic_scalar) #0 { 20*cee313d2SEric Christopher %tmp0 = addrspacecast float* %generic_scalar to float addrspace(4)* 21*cee313d2SEric Christopher %tmp1 = load float, float addrspace(4)* %tmp0 22*cee313d2SEric Christopher ret float %tmp1 23*cee313d2SEric Christopher} 24*cee313d2SEric Christopher 25*cee313d2SEric Christopher; CHECK-LABEL: @load_group_from_flat( 26*cee313d2SEric Christopher; CHECK-NEXT: %tmp0 = addrspacecast float* %generic_scalar to float addrspace(3)* 27*cee313d2SEric Christopher; CHECK-NEXT: %tmp1 = load float, float addrspace(3)* %tmp0 28*cee313d2SEric Christopher; CHECK-NEXT: ret float %tmp1 29*cee313d2SEric Christopherdefine float @load_group_from_flat(float* %generic_scalar) #0 { 30*cee313d2SEric Christopher %tmp0 = addrspacecast float* %generic_scalar to float addrspace(3)* 31*cee313d2SEric Christopher %tmp1 = load float, float addrspace(3)* %tmp0 32*cee313d2SEric Christopher ret float %tmp1 33*cee313d2SEric Christopher} 34*cee313d2SEric Christopher 35*cee313d2SEric Christopher; CHECK-LABEL: @load_private_from_flat( 36*cee313d2SEric Christopher; CHECK-NEXT: %tmp0 = addrspacecast float* %generic_scalar to float addrspace(5)* 37*cee313d2SEric Christopher; CHECK-NEXT: %tmp1 = load float, float addrspace(5)* %tmp0 38*cee313d2SEric Christopher; CHECK-NEXT: ret float %tmp1 39*cee313d2SEric Christopherdefine float @load_private_from_flat(float* %generic_scalar) #0 { 40*cee313d2SEric Christopher %tmp0 = addrspacecast float* %generic_scalar to float addrspace(5)* 41*cee313d2SEric Christopher %tmp1 = load float, float addrspace(5)* %tmp0 42*cee313d2SEric Christopher ret float %tmp1 43*cee313d2SEric Christopher} 44*cee313d2SEric Christopher 45*cee313d2SEric Christopher; CHECK-LABEL: @store_global_from_flat( 46*cee313d2SEric Christopher; CHECK-NEXT: %tmp0 = addrspacecast float* %generic_scalar to float addrspace(1)* 47*cee313d2SEric Christopher; CHECK-NEXT: store float 0.000000e+00, float addrspace(1)* %tmp0 48*cee313d2SEric Christopherdefine amdgpu_kernel void @store_global_from_flat(float* %generic_scalar) #0 { 49*cee313d2SEric Christopher %tmp0 = addrspacecast float* %generic_scalar to float addrspace(1)* 50*cee313d2SEric Christopher store float 0.0, float addrspace(1)* %tmp0 51*cee313d2SEric Christopher ret void 52*cee313d2SEric Christopher} 53*cee313d2SEric Christopher 54*cee313d2SEric Christopher; CHECK-LABEL: @store_group_from_flat( 55*cee313d2SEric Christopher; CHECK-NEXT: %tmp0 = addrspacecast float* %generic_scalar to float addrspace(3)* 56*cee313d2SEric Christopher; CHECK-NEXT: store float 0.000000e+00, float addrspace(3)* %tmp0 57*cee313d2SEric Christopherdefine amdgpu_kernel void @store_group_from_flat(float* %generic_scalar) #0 { 58*cee313d2SEric Christopher %tmp0 = addrspacecast float* %generic_scalar to float addrspace(3)* 59*cee313d2SEric Christopher store float 0.0, float addrspace(3)* %tmp0 60*cee313d2SEric Christopher ret void 61*cee313d2SEric Christopher} 62*cee313d2SEric Christopher 63*cee313d2SEric Christopher; CHECK-LABEL: @store_private_from_flat( 64*cee313d2SEric Christopher; CHECK-NEXT: %tmp0 = addrspacecast float* %generic_scalar to float addrspace(5)* 65*cee313d2SEric Christopher; CHECK-NEXT: store float 0.000000e+00, float addrspace(5)* %tmp0 66*cee313d2SEric Christopherdefine amdgpu_kernel void @store_private_from_flat(float* %generic_scalar) #0 { 67*cee313d2SEric Christopher %tmp0 = addrspacecast float* %generic_scalar to float addrspace(5)* 68*cee313d2SEric Christopher store float 0.0, float addrspace(5)* %tmp0 69*cee313d2SEric Christopher ret void 70*cee313d2SEric Christopher} 71*cee313d2SEric Christopher 72*cee313d2SEric Christopher; optimized to global load/store. 73*cee313d2SEric Christopher; CHECK-LABEL: @load_store_global( 74*cee313d2SEric Christopher; CHECK-NEXT: %val = load i32, i32 addrspace(1)* %input, align 4 75*cee313d2SEric Christopher; CHECK-NEXT: store i32 %val, i32 addrspace(1)* %output, align 4 76*cee313d2SEric Christopher; CHECK-NEXT: ret void 77*cee313d2SEric Christopherdefine amdgpu_kernel void @load_store_global(i32 addrspace(1)* nocapture %input, i32 addrspace(1)* nocapture %output) #0 { 78*cee313d2SEric Christopher %tmp0 = addrspacecast i32 addrspace(1)* %input to i32* 79*cee313d2SEric Christopher %tmp1 = addrspacecast i32 addrspace(1)* %output to i32* 80*cee313d2SEric Christopher %val = load i32, i32* %tmp0, align 4 81*cee313d2SEric Christopher store i32 %val, i32* %tmp1, align 4 82*cee313d2SEric Christopher ret void 83*cee313d2SEric Christopher} 84*cee313d2SEric Christopher 85*cee313d2SEric Christopher; Optimized to group load/store. 86*cee313d2SEric Christopher; CHECK-LABEL: @load_store_group( 87*cee313d2SEric Christopher; CHECK-NEXT: %val = load i32, i32 addrspace(3)* %input, align 4 88*cee313d2SEric Christopher; CHECK-NEXT: store i32 %val, i32 addrspace(3)* %output, align 4 89*cee313d2SEric Christopher; CHECK-NEXT: ret void 90*cee313d2SEric Christopherdefine amdgpu_kernel void @load_store_group(i32 addrspace(3)* nocapture %input, i32 addrspace(3)* nocapture %output) #0 { 91*cee313d2SEric Christopher %tmp0 = addrspacecast i32 addrspace(3)* %input to i32* 92*cee313d2SEric Christopher %tmp1 = addrspacecast i32 addrspace(3)* %output to i32* 93*cee313d2SEric Christopher %val = load i32, i32* %tmp0, align 4 94*cee313d2SEric Christopher store i32 %val, i32* %tmp1, align 4 95*cee313d2SEric Christopher ret void 96*cee313d2SEric Christopher} 97*cee313d2SEric Christopher 98*cee313d2SEric Christopher; Optimized to private load/store. 99*cee313d2SEric Christopher; CHECK-LABEL: @load_store_private( 100*cee313d2SEric Christopher; CHECK-NEXT: %val = load i32, i32 addrspace(5)* %input, align 4 101*cee313d2SEric Christopher; CHECK-NEXT: store i32 %val, i32 addrspace(5)* %output, align 4 102*cee313d2SEric Christopher; CHECK-NEXT: ret void 103*cee313d2SEric Christopherdefine amdgpu_kernel void @load_store_private(i32 addrspace(5)* nocapture %input, i32 addrspace(5)* nocapture %output) #0 { 104*cee313d2SEric Christopher %tmp0 = addrspacecast i32 addrspace(5)* %input to i32* 105*cee313d2SEric Christopher %tmp1 = addrspacecast i32 addrspace(5)* %output to i32* 106*cee313d2SEric Christopher %val = load i32, i32* %tmp0, align 4 107*cee313d2SEric Christopher store i32 %val, i32* %tmp1, align 4 108*cee313d2SEric Christopher ret void 109*cee313d2SEric Christopher} 110*cee313d2SEric Christopher 111*cee313d2SEric Christopher; No optimization. flat load/store. 112*cee313d2SEric Christopher; CHECK-LABEL: @load_store_flat( 113*cee313d2SEric Christopher; CHECK-NEXT: %val = load i32, i32* %input, align 4 114*cee313d2SEric Christopher; CHECK-NEXT: store i32 %val, i32* %output, align 4 115*cee313d2SEric Christopher; CHECK-NEXT: ret void 116*cee313d2SEric Christopherdefine amdgpu_kernel void @load_store_flat(i32* nocapture %input, i32* nocapture %output) #0 { 117*cee313d2SEric Christopher %val = load i32, i32* %input, align 4 118*cee313d2SEric Christopher store i32 %val, i32* %output, align 4 119*cee313d2SEric Christopher ret void 120*cee313d2SEric Christopher} 121*cee313d2SEric Christopher 122*cee313d2SEric Christopher; CHECK-LABEL: @store_addrspacecast_ptr_value( 123*cee313d2SEric Christopher; CHECK: %cast = addrspacecast i32 addrspace(1)* %input to i32* 124*cee313d2SEric Christopher; CHECK-NEXT: store i32* %cast, i32* addrspace(1)* %output, align 4 125*cee313d2SEric Christopherdefine amdgpu_kernel void @store_addrspacecast_ptr_value(i32 addrspace(1)* nocapture %input, i32* addrspace(1)* nocapture %output) #0 { 126*cee313d2SEric Christopher %cast = addrspacecast i32 addrspace(1)* %input to i32* 127*cee313d2SEric Christopher store i32* %cast, i32* addrspace(1)* %output, align 4 128*cee313d2SEric Christopher ret void 129*cee313d2SEric Christopher} 130*cee313d2SEric Christopher 131*cee313d2SEric Christopher; CHECK-LABEL: @atomicrmw_add_global_to_flat( 132*cee313d2SEric Christopher; CHECK-NEXT: %ret = atomicrmw add i32 addrspace(1)* %global.ptr, i32 %y seq_cst 133*cee313d2SEric Christopherdefine i32 @atomicrmw_add_global_to_flat(i32 addrspace(1)* %global.ptr, i32 %y) #0 { 134*cee313d2SEric Christopher %cast = addrspacecast i32 addrspace(1)* %global.ptr to i32* 135*cee313d2SEric Christopher %ret = atomicrmw add i32* %cast, i32 %y seq_cst 136*cee313d2SEric Christopher ret i32 %ret 137*cee313d2SEric Christopher} 138*cee313d2SEric Christopher 139*cee313d2SEric Christopher; CHECK-LABEL: @atomicrmw_add_group_to_flat( 140*cee313d2SEric Christopher; CHECK-NEXT: %ret = atomicrmw add i32 addrspace(3)* %group.ptr, i32 %y seq_cst 141*cee313d2SEric Christopherdefine i32 @atomicrmw_add_group_to_flat(i32 addrspace(3)* %group.ptr, i32 %y) #0 { 142*cee313d2SEric Christopher %cast = addrspacecast i32 addrspace(3)* %group.ptr to i32* 143*cee313d2SEric Christopher %ret = atomicrmw add i32* %cast, i32 %y seq_cst 144*cee313d2SEric Christopher ret i32 %ret 145*cee313d2SEric Christopher} 146*cee313d2SEric Christopher 147*cee313d2SEric Christopher; CHECK-LABEL: @cmpxchg_global_to_flat( 148*cee313d2SEric Christopher; CHECK: %ret = cmpxchg i32 addrspace(1)* %global.ptr, i32 %cmp, i32 %val seq_cst monotonic 149*cee313d2SEric Christopherdefine { i32, i1 } @cmpxchg_global_to_flat(i32 addrspace(1)* %global.ptr, i32 %cmp, i32 %val) #0 { 150*cee313d2SEric Christopher %cast = addrspacecast i32 addrspace(1)* %global.ptr to i32* 151*cee313d2SEric Christopher %ret = cmpxchg i32* %cast, i32 %cmp, i32 %val seq_cst monotonic 152*cee313d2SEric Christopher ret { i32, i1 } %ret 153*cee313d2SEric Christopher} 154*cee313d2SEric Christopher 155*cee313d2SEric Christopher; CHECK-LABEL: @cmpxchg_group_to_flat( 156*cee313d2SEric Christopher; CHECK: %ret = cmpxchg i32 addrspace(3)* %group.ptr, i32 %cmp, i32 %val seq_cst monotonic 157*cee313d2SEric Christopherdefine { i32, i1 } @cmpxchg_group_to_flat(i32 addrspace(3)* %group.ptr, i32 %cmp, i32 %val) #0 { 158*cee313d2SEric Christopher %cast = addrspacecast i32 addrspace(3)* %group.ptr to i32* 159*cee313d2SEric Christopher %ret = cmpxchg i32* %cast, i32 %cmp, i32 %val seq_cst monotonic 160*cee313d2SEric Christopher ret { i32, i1 } %ret 161*cee313d2SEric Christopher} 162*cee313d2SEric Christopher 163*cee313d2SEric Christopher; Not pointer operand 164*cee313d2SEric Christopher; CHECK-LABEL: @cmpxchg_group_to_flat_wrong_operand( 165*cee313d2SEric Christopher; CHECK: %cast.cmp = addrspacecast i32 addrspace(3)* %cmp.ptr to i32* 166*cee313d2SEric Christopher; CHECK: %ret = cmpxchg i32* addrspace(3)* %cas.ptr, i32* %cast.cmp, i32* %val seq_cst monotonic 167*cee313d2SEric Christopherdefine { i32*, i1 } @cmpxchg_group_to_flat_wrong_operand(i32* addrspace(3)* %cas.ptr, i32 addrspace(3)* %cmp.ptr, i32* %val) #0 { 168*cee313d2SEric Christopher %cast.cmp = addrspacecast i32 addrspace(3)* %cmp.ptr to i32* 169*cee313d2SEric Christopher %ret = cmpxchg i32* addrspace(3)* %cas.ptr, i32* %cast.cmp, i32* %val seq_cst monotonic 170*cee313d2SEric Christopher ret { i32*, i1 } %ret 171*cee313d2SEric Christopher} 172*cee313d2SEric Christopher 173*cee313d2SEric Christopher; Null pointer in local addr space 174*cee313d2SEric Christopher; CHECK-LABEL: @local_nullptr 175*cee313d2SEric Christopher; CHECK: icmp ne i8 addrspace(3)* %a, addrspacecast (i8 addrspace(5)* null to i8 addrspace(3)*) 176*cee313d2SEric Christopher; CHECK-NOT: i8 addrspace(3)* null 177*cee313d2SEric Christopherdefine void @local_nullptr(i32 addrspace(1)* nocapture %results, i8 addrspace(3)* %a) { 178*cee313d2SEric Christopherentry: 179*cee313d2SEric Christopher %tobool = icmp ne i8 addrspace(3)* %a, addrspacecast (i8 addrspace(5)* null to i8 addrspace(3)*) 180*cee313d2SEric Christopher %conv = zext i1 %tobool to i32 181*cee313d2SEric Christopher store i32 %conv, i32 addrspace(1)* %results, align 4 182*cee313d2SEric Christopher ret void 183*cee313d2SEric Christopher} 184*cee313d2SEric Christopher 185*cee313d2SEric Christopherattributes #0 = { nounwind } 186