1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --check-globals 2; RUN: opt -mtriple=amdgcn-unknown-amdhsa -S -amdgpu-annotate-kernel-features < %s | FileCheck -check-prefix=HSA %s 3 4declare void @llvm.memcpy.p1i32.p4i32.i32(i32 addrspace(1)* nocapture, i32 addrspace(4)* nocapture, i32, i1) #0 5 6@lds.i32 = unnamed_addr addrspace(3) global i32 undef, align 4 7@lds.arr = unnamed_addr addrspace(3) global [256 x i32] undef, align 4 8 9@global.i32 = unnamed_addr addrspace(1) global i32 undef, align 4 10@global.arr = unnamed_addr addrspace(1) global [256 x i32] undef, align 4 11 12;. 13; HSA: @[[LDS_I32:[a-zA-Z0-9_$"\\.-]+]] = unnamed_addr addrspace(3) global i32 undef, align 4 14; HSA: @[[LDS_ARR:[a-zA-Z0-9_$"\\.-]+]] = unnamed_addr addrspace(3) global [256 x i32] undef, align 4 15; HSA: @[[GLOBAL_I32:[a-zA-Z0-9_$"\\.-]+]] = unnamed_addr addrspace(1) global i32 undef, align 4 16; HSA: @[[GLOBAL_ARR:[a-zA-Z0-9_$"\\.-]+]] = unnamed_addr addrspace(1) global [256 x i32] undef, align 4 17;. 18define amdgpu_kernel void @store_cast_0_flat_to_group_addrspacecast() #1 { 19; HSA-LABEL: define {{[^@]+}}@store_cast_0_flat_to_group_addrspacecast 20; HSA-SAME: () #[[ATTR1:[0-9]+]] { 21; HSA-NEXT: store i32 7, i32 addrspace(3)* addrspacecast (i32 addrspace(4)* null to i32 addrspace(3)*), align 4 22; HSA-NEXT: ret void 23; 24 store i32 7, i32 addrspace(3)* addrspacecast (i32 addrspace(4)* null to i32 addrspace(3)*) 25 ret void 26} 27 28define amdgpu_kernel void @store_cast_0_group_to_flat_addrspacecast() #1 { 29; HSA-LABEL: define {{[^@]+}}@store_cast_0_group_to_flat_addrspacecast 30; HSA-SAME: () #[[ATTR2:[0-9]+]] { 31; HSA-NEXT: store i32 7, i32 addrspace(4)* addrspacecast (i32 addrspace(3)* null to i32 addrspace(4)*), align 4 32; HSA-NEXT: ret void 33; 34 store i32 7, i32 addrspace(4)* addrspacecast (i32 addrspace(3)* null to i32 addrspace(4)*) 35 ret void 36} 37 38define amdgpu_kernel void @store_constant_cast_group_gv_to_flat() #1 { 39; HSA-LABEL: define {{[^@]+}}@store_constant_cast_group_gv_to_flat 40; HSA-SAME: () #[[ATTR2]] { 41; HSA-NEXT: store i32 7, i32 addrspace(4)* addrspacecast (i32 addrspace(3)* @lds.i32 to i32 addrspace(4)*), align 4 42; HSA-NEXT: ret void 43; 44 store i32 7, i32 addrspace(4)* addrspacecast (i32 addrspace(3)* @lds.i32 to i32 addrspace(4)*) 45 ret void 46} 47 48define amdgpu_kernel void @store_constant_cast_group_gv_gep_to_flat() #1 { 49; HSA-LABEL: define {{[^@]+}}@store_constant_cast_group_gv_gep_to_flat 50; HSA-SAME: () #[[ATTR2]] { 51; HSA-NEXT: store i32 7, i32 addrspace(4)* getelementptr ([256 x i32], [256 x i32] addrspace(4)* addrspacecast ([256 x i32] addrspace(3)* @lds.arr to [256 x i32] addrspace(4)*), i64 0, i64 8), align 4 52; HSA-NEXT: ret void 53; 54 store i32 7, i32 addrspace(4)* getelementptr ([256 x i32], [256 x i32] addrspace(4)* addrspacecast ([256 x i32] addrspace(3)* @lds.arr to [256 x i32] addrspace(4)*), i64 0, i64 8) 55 ret void 56} 57 58define amdgpu_kernel void @store_constant_cast_global_gv_to_flat() #1 { 59; HSA-LABEL: define {{[^@]+}}@store_constant_cast_global_gv_to_flat 60; HSA-SAME: () #[[ATTR1]] { 61; HSA-NEXT: store i32 7, i32 addrspace(4)* addrspacecast (i32 addrspace(1)* @global.i32 to i32 addrspace(4)*), align 4 62; HSA-NEXT: ret void 63; 64 store i32 7, i32 addrspace(4)* addrspacecast (i32 addrspace(1)* @global.i32 to i32 addrspace(4)*) 65 ret void 66} 67 68define amdgpu_kernel void @store_constant_cast_global_gv_gep_to_flat() #1 { 69; HSA-LABEL: define {{[^@]+}}@store_constant_cast_global_gv_gep_to_flat 70; HSA-SAME: () #[[ATTR1]] { 71; HSA-NEXT: store i32 7, i32 addrspace(4)* getelementptr ([256 x i32], [256 x i32] addrspace(4)* addrspacecast ([256 x i32] addrspace(1)* @global.arr to [256 x i32] addrspace(4)*), i64 0, i64 8), align 4 72; HSA-NEXT: ret void 73; 74 store i32 7, i32 addrspace(4)* getelementptr ([256 x i32], [256 x i32] addrspace(4)* addrspacecast ([256 x i32] addrspace(1)* @global.arr to [256 x i32] addrspace(4)*), i64 0, i64 8) 75 ret void 76} 77 78define amdgpu_kernel void @load_constant_cast_group_gv_gep_to_flat(i32 addrspace(1)* %out) #1 { 79; HSA-LABEL: define {{[^@]+}}@load_constant_cast_group_gv_gep_to_flat 80; HSA-SAME: (i32 addrspace(1)* [[OUT:%.*]]) #[[ATTR2]] { 81; HSA-NEXT: [[VAL:%.*]] = load i32, i32 addrspace(4)* getelementptr ([256 x i32], [256 x i32] addrspace(4)* addrspacecast ([256 x i32] addrspace(3)* @lds.arr to [256 x i32] addrspace(4)*), i64 0, i64 8), align 4 82; HSA-NEXT: store i32 [[VAL]], i32 addrspace(1)* [[OUT]], align 4 83; HSA-NEXT: ret void 84; 85 %val = load i32, i32 addrspace(4)* getelementptr ([256 x i32], [256 x i32] addrspace(4)* addrspacecast ([256 x i32] addrspace(3)* @lds.arr to [256 x i32] addrspace(4)*), i64 0, i64 8) 86 store i32 %val, i32 addrspace(1)* %out 87 ret void 88} 89 90define amdgpu_kernel void @atomicrmw_constant_cast_group_gv_gep_to_flat(i32 addrspace(1)* %out) #1 { 91; HSA-LABEL: define {{[^@]+}}@atomicrmw_constant_cast_group_gv_gep_to_flat 92; HSA-SAME: (i32 addrspace(1)* [[OUT:%.*]]) #[[ATTR2]] { 93; HSA-NEXT: [[VAL:%.*]] = atomicrmw add i32 addrspace(4)* getelementptr ([256 x i32], [256 x i32] addrspace(4)* addrspacecast ([256 x i32] addrspace(3)* @lds.arr to [256 x i32] addrspace(4)*), i64 0, i64 8), i32 1 seq_cst, align 4 94; HSA-NEXT: store i32 [[VAL]], i32 addrspace(1)* [[OUT]], align 4 95; HSA-NEXT: ret void 96; 97 %val = atomicrmw add i32 addrspace(4)* getelementptr ([256 x i32], [256 x i32] addrspace(4)* addrspacecast ([256 x i32] addrspace(3)* @lds.arr to [256 x i32] addrspace(4)*), i64 0, i64 8), i32 1 seq_cst 98 store i32 %val, i32 addrspace(1)* %out 99 ret void 100} 101 102define amdgpu_kernel void @cmpxchg_constant_cast_group_gv_gep_to_flat(i32 addrspace(1)* %out) #1 { 103; HSA-LABEL: define {{[^@]+}}@cmpxchg_constant_cast_group_gv_gep_to_flat 104; HSA-SAME: (i32 addrspace(1)* [[OUT:%.*]]) #[[ATTR2]] { 105; HSA-NEXT: [[VAL:%.*]] = cmpxchg i32 addrspace(4)* getelementptr ([256 x i32], [256 x i32] addrspace(4)* addrspacecast ([256 x i32] addrspace(3)* @lds.arr to [256 x i32] addrspace(4)*), i64 0, i64 8), i32 0, i32 1 seq_cst seq_cst, align 4 106; HSA-NEXT: [[VAL0:%.*]] = extractvalue { i32, i1 } [[VAL]], 0 107; HSA-NEXT: store i32 [[VAL0]], i32 addrspace(1)* [[OUT]], align 4 108; HSA-NEXT: ret void 109; 110 %val = cmpxchg i32 addrspace(4)* getelementptr ([256 x i32], [256 x i32] addrspace(4)* addrspacecast ([256 x i32] addrspace(3)* @lds.arr to [256 x i32] addrspace(4)*), i64 0, i64 8), i32 0, i32 1 seq_cst seq_cst 111 %val0 = extractvalue { i32, i1 } %val, 0 112 store i32 %val0, i32 addrspace(1)* %out 113 ret void 114} 115 116define amdgpu_kernel void @memcpy_constant_cast_group_gv_gep_to_flat(i32 addrspace(1)* %out) #1 { 117; HSA-LABEL: define {{[^@]+}}@memcpy_constant_cast_group_gv_gep_to_flat 118; HSA-SAME: (i32 addrspace(1)* [[OUT:%.*]]) #[[ATTR2]] { 119; HSA-NEXT: call void @llvm.memcpy.p1i32.p4i32.i32(i32 addrspace(1)* align 4 [[OUT]], i32 addrspace(4)* align 4 getelementptr ([256 x i32], [256 x i32] addrspace(4)* addrspacecast ([256 x i32] addrspace(3)* @lds.arr to [256 x i32] addrspace(4)*), i64 0, i64 8), i32 32, i1 false) 120; HSA-NEXT: ret void 121; 122 call void @llvm.memcpy.p1i32.p4i32.i32(i32 addrspace(1)* align 4 %out, i32 addrspace(4)* align 4 getelementptr ([256 x i32], [256 x i32] addrspace(4)* addrspacecast ([256 x i32] addrspace(3)* @lds.arr to [256 x i32] addrspace(4)*), i64 0, i64 8), i32 32, i1 false) 123 ret void 124} 125 126; Can't just search the pointer value 127define amdgpu_kernel void @store_value_constant_cast_lds_gv_gep_to_flat(i32 addrspace(4)* addrspace(1)* %out) #1 { 128; HSA-LABEL: define {{[^@]+}}@store_value_constant_cast_lds_gv_gep_to_flat 129; HSA-SAME: (i32 addrspace(4)* addrspace(1)* [[OUT:%.*]]) #[[ATTR2]] { 130; HSA-NEXT: store i32 addrspace(4)* getelementptr ([256 x i32], [256 x i32] addrspace(4)* addrspacecast ([256 x i32] addrspace(3)* @lds.arr to [256 x i32] addrspace(4)*), i64 0, i64 8), i32 addrspace(4)* addrspace(1)* [[OUT]], align 8 131; HSA-NEXT: ret void 132; 133 store i32 addrspace(4)* getelementptr ([256 x i32], [256 x i32] addrspace(4)* addrspacecast ([256 x i32] addrspace(3)* @lds.arr to [256 x i32] addrspace(4)*), i64 0, i64 8), i32 addrspace(4)* addrspace(1)* %out 134 ret void 135} 136 137; Can't just search pointer types 138define amdgpu_kernel void @store_ptrtoint_value_constant_cast_lds_gv_gep_to_flat(i64 addrspace(1)* %out) #1 { 139; HSA-LABEL: define {{[^@]+}}@store_ptrtoint_value_constant_cast_lds_gv_gep_to_flat 140; HSA-SAME: (i64 addrspace(1)* [[OUT:%.*]]) #[[ATTR2]] { 141; HSA-NEXT: store i64 ptrtoint (i32 addrspace(4)* getelementptr ([256 x i32], [256 x i32] addrspace(4)* addrspacecast ([256 x i32] addrspace(3)* @lds.arr to [256 x i32] addrspace(4)*), i64 0, i64 8) to i64), i64 addrspace(1)* [[OUT]], align 4 142; HSA-NEXT: ret void 143; 144 store i64 ptrtoint (i32 addrspace(4)* getelementptr ([256 x i32], [256 x i32] addrspace(4)* addrspacecast ([256 x i32] addrspace(3)* @lds.arr to [256 x i32] addrspace(4)*), i64 0, i64 8) to i64), i64 addrspace(1)* %out 145 ret void 146} 147 148; Cast group to flat, do GEP, cast back to group 149define amdgpu_kernel void @store_constant_cast_group_gv_gep_to_flat_to_group() #1 { 150; HSA-LABEL: define {{[^@]+}}@store_constant_cast_group_gv_gep_to_flat_to_group 151; HSA-SAME: () #[[ATTR2]] { 152; HSA-NEXT: store i32 7, i32 addrspace(3)* addrspacecast (i32 addrspace(4)* getelementptr ([256 x i32], [256 x i32] addrspace(4)* addrspacecast ([256 x i32] addrspace(3)* @lds.arr to [256 x i32] addrspace(4)*), i64 0, i64 8) to i32 addrspace(3)*), align 4 153; HSA-NEXT: ret void 154; 155 store i32 7, i32 addrspace(3)* addrspacecast (i32 addrspace(4)* getelementptr ([256 x i32], [256 x i32] addrspace(4)* addrspacecast ([256 x i32] addrspace(3)* @lds.arr to [256 x i32] addrspace(4)*), i64 0, i64 8) to i32 addrspace(3)*) 156 ret void 157} 158 159define i32 addrspace(3)* @ret_constant_cast_group_gv_gep_to_flat_to_group() #1 { 160; HSA-LABEL: define {{[^@]+}}@ret_constant_cast_group_gv_gep_to_flat_to_group 161; HSA-SAME: () #[[ATTR2]] { 162; HSA-NEXT: ret i32 addrspace(3)* addrspacecast (i32 addrspace(4)* getelementptr ([256 x i32], [256 x i32] addrspace(4)* addrspacecast ([256 x i32] addrspace(3)* @lds.arr to [256 x i32] addrspace(4)*), i64 0, i64 8) to i32 addrspace(3)*) 163; 164 ret i32 addrspace(3)* addrspacecast (i32 addrspace(4)* getelementptr ([256 x i32], [256 x i32] addrspace(4)* addrspacecast ([256 x i32] addrspace(3)* @lds.arr to [256 x i32] addrspace(4)*), i64 0, i64 8) to i32 addrspace(3)*) 165} 166 167attributes #0 = { argmemonly nounwind } 168attributes #1 = { nounwind } 169;. 170; HSA: attributes #[[ATTR0:[0-9]+]] = { argmemonly nofree nounwind willreturn } 171; HSA: attributes #[[ATTR1]] = { nounwind } 172; HSA: attributes #[[ATTR2]] = { nounwind "amdgpu-queue-ptr" } 173;. 174