1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --check-globals 2; RUN: opt -mtriple=amdgcn-unknown-amdhsa -S -amdgpu-annotate-kernel-features < %s | FileCheck -check-prefix=HSA %s 3 4target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5" 5 6declare i32 @llvm.amdgcn.workgroup.id.x() #0 7declare i32 @llvm.amdgcn.workgroup.id.y() #0 8declare i32 @llvm.amdgcn.workgroup.id.z() #0 9 10declare i32 @llvm.amdgcn.workitem.id.x() #0 11declare i32 @llvm.amdgcn.workitem.id.y() #0 12declare i32 @llvm.amdgcn.workitem.id.z() #0 13 14declare i8 addrspace(4)* @llvm.amdgcn.dispatch.ptr() #0 15declare i8 addrspace(4)* @llvm.amdgcn.queue.ptr() #0 16declare i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() #0 17 18declare i1 @llvm.amdgcn.is.shared(i8* nocapture) #2 19declare i1 @llvm.amdgcn.is.private(i8* nocapture) #2 20 21define amdgpu_kernel void @use_tgid_x(i32 addrspace(1)* %ptr) #1 { 22; HSA-LABEL: define {{[^@]+}}@use_tgid_x 23; HSA-SAME: (i32 addrspace(1)* [[PTR:%.*]]) #[[ATTR1:[0-9]+]] { 24; HSA-NEXT: [[VAL:%.*]] = call i32 @llvm.amdgcn.workgroup.id.x() 25; HSA-NEXT: store i32 [[VAL]], i32 addrspace(1)* [[PTR]], align 4 26; HSA-NEXT: ret void 27; 28 %val = call i32 @llvm.amdgcn.workgroup.id.x() 29 store i32 %val, i32 addrspace(1)* %ptr 30 ret void 31} 32 33define amdgpu_kernel void @use_tgid_y(i32 addrspace(1)* %ptr) #1 { 34; HSA-LABEL: define {{[^@]+}}@use_tgid_y 35; HSA-SAME: (i32 addrspace(1)* [[PTR:%.*]]) #[[ATTR2:[0-9]+]] { 36; HSA-NEXT: [[VAL:%.*]] = call i32 @llvm.amdgcn.workgroup.id.y() 37; HSA-NEXT: store i32 [[VAL]], i32 addrspace(1)* [[PTR]], align 4 38; HSA-NEXT: ret void 39; 40 %val = call i32 @llvm.amdgcn.workgroup.id.y() 41 store i32 %val, i32 addrspace(1)* %ptr 42 ret void 43} 44 45define amdgpu_kernel void @multi_use_tgid_y(i32 addrspace(1)* %ptr) #1 { 46; HSA-LABEL: define {{[^@]+}}@multi_use_tgid_y 47; HSA-SAME: (i32 addrspace(1)* [[PTR:%.*]]) #[[ATTR2]] { 48; HSA-NEXT: [[VAL0:%.*]] = call i32 @llvm.amdgcn.workgroup.id.y() 49; HSA-NEXT: store volatile i32 [[VAL0]], i32 addrspace(1)* [[PTR]], align 4 50; HSA-NEXT: [[VAL1:%.*]] = call i32 @llvm.amdgcn.workgroup.id.y() 51; HSA-NEXT: store volatile i32 [[VAL1]], i32 addrspace(1)* [[PTR]], align 4 52; HSA-NEXT: ret void 53; 54 %val0 = call i32 @llvm.amdgcn.workgroup.id.y() 55 store volatile i32 %val0, i32 addrspace(1)* %ptr 56 %val1 = call i32 @llvm.amdgcn.workgroup.id.y() 57 store volatile i32 %val1, i32 addrspace(1)* %ptr 58 ret void 59} 60 61define amdgpu_kernel void @use_tgid_x_y(i32 addrspace(1)* %ptr) #1 { 62; HSA-LABEL: define {{[^@]+}}@use_tgid_x_y 63; HSA-SAME: (i32 addrspace(1)* [[PTR:%.*]]) #[[ATTR2]] { 64; HSA-NEXT: [[VAL0:%.*]] = call i32 @llvm.amdgcn.workgroup.id.x() 65; HSA-NEXT: [[VAL1:%.*]] = call i32 @llvm.amdgcn.workgroup.id.y() 66; HSA-NEXT: store volatile i32 [[VAL0]], i32 addrspace(1)* [[PTR]], align 4 67; HSA-NEXT: store volatile i32 [[VAL1]], i32 addrspace(1)* [[PTR]], align 4 68; HSA-NEXT: ret void 69; 70 %val0 = call i32 @llvm.amdgcn.workgroup.id.x() 71 %val1 = call i32 @llvm.amdgcn.workgroup.id.y() 72 store volatile i32 %val0, i32 addrspace(1)* %ptr 73 store volatile i32 %val1, i32 addrspace(1)* %ptr 74 ret void 75} 76 77define amdgpu_kernel void @use_tgid_z(i32 addrspace(1)* %ptr) #1 { 78; HSA-LABEL: define {{[^@]+}}@use_tgid_z 79; HSA-SAME: (i32 addrspace(1)* [[PTR:%.*]]) #[[ATTR3:[0-9]+]] { 80; HSA-NEXT: [[VAL:%.*]] = call i32 @llvm.amdgcn.workgroup.id.z() 81; HSA-NEXT: store i32 [[VAL]], i32 addrspace(1)* [[PTR]], align 4 82; HSA-NEXT: ret void 83; 84 %val = call i32 @llvm.amdgcn.workgroup.id.z() 85 store i32 %val, i32 addrspace(1)* %ptr 86 ret void 87} 88 89define amdgpu_kernel void @use_tgid_x_z(i32 addrspace(1)* %ptr) #1 { 90; HSA-LABEL: define {{[^@]+}}@use_tgid_x_z 91; HSA-SAME: (i32 addrspace(1)* [[PTR:%.*]]) #[[ATTR3]] { 92; HSA-NEXT: [[VAL0:%.*]] = call i32 @llvm.amdgcn.workgroup.id.x() 93; HSA-NEXT: [[VAL1:%.*]] = call i32 @llvm.amdgcn.workgroup.id.z() 94; HSA-NEXT: store volatile i32 [[VAL0]], i32 addrspace(1)* [[PTR]], align 4 95; HSA-NEXT: store volatile i32 [[VAL1]], i32 addrspace(1)* [[PTR]], align 4 96; HSA-NEXT: ret void 97; 98 %val0 = call i32 @llvm.amdgcn.workgroup.id.x() 99 %val1 = call i32 @llvm.amdgcn.workgroup.id.z() 100 store volatile i32 %val0, i32 addrspace(1)* %ptr 101 store volatile i32 %val1, i32 addrspace(1)* %ptr 102 ret void 103} 104 105define amdgpu_kernel void @use_tgid_y_z(i32 addrspace(1)* %ptr) #1 { 106; HSA-LABEL: define {{[^@]+}}@use_tgid_y_z 107; HSA-SAME: (i32 addrspace(1)* [[PTR:%.*]]) #[[ATTR4:[0-9]+]] { 108; HSA-NEXT: [[VAL0:%.*]] = call i32 @llvm.amdgcn.workgroup.id.y() 109; HSA-NEXT: [[VAL1:%.*]] = call i32 @llvm.amdgcn.workgroup.id.z() 110; HSA-NEXT: store volatile i32 [[VAL0]], i32 addrspace(1)* [[PTR]], align 4 111; HSA-NEXT: store volatile i32 [[VAL1]], i32 addrspace(1)* [[PTR]], align 4 112; HSA-NEXT: ret void 113; 114 %val0 = call i32 @llvm.amdgcn.workgroup.id.y() 115 %val1 = call i32 @llvm.amdgcn.workgroup.id.z() 116 store volatile i32 %val0, i32 addrspace(1)* %ptr 117 store volatile i32 %val1, i32 addrspace(1)* %ptr 118 ret void 119} 120 121define amdgpu_kernel void @use_tgid_x_y_z(i32 addrspace(1)* %ptr) #1 { 122; HSA-LABEL: define {{[^@]+}}@use_tgid_x_y_z 123; HSA-SAME: (i32 addrspace(1)* [[PTR:%.*]]) #[[ATTR4]] { 124; HSA-NEXT: [[VAL0:%.*]] = call i32 @llvm.amdgcn.workgroup.id.x() 125; HSA-NEXT: [[VAL1:%.*]] = call i32 @llvm.amdgcn.workgroup.id.y() 126; HSA-NEXT: [[VAL2:%.*]] = call i32 @llvm.amdgcn.workgroup.id.z() 127; HSA-NEXT: store volatile i32 [[VAL0]], i32 addrspace(1)* [[PTR]], align 4 128; HSA-NEXT: store volatile i32 [[VAL1]], i32 addrspace(1)* [[PTR]], align 4 129; HSA-NEXT: store volatile i32 [[VAL2]], i32 addrspace(1)* [[PTR]], align 4 130; HSA-NEXT: ret void 131; 132 %val0 = call i32 @llvm.amdgcn.workgroup.id.x() 133 %val1 = call i32 @llvm.amdgcn.workgroup.id.y() 134 %val2 = call i32 @llvm.amdgcn.workgroup.id.z() 135 store volatile i32 %val0, i32 addrspace(1)* %ptr 136 store volatile i32 %val1, i32 addrspace(1)* %ptr 137 store volatile i32 %val2, i32 addrspace(1)* %ptr 138 ret void 139} 140 141define amdgpu_kernel void @use_tidig_x(i32 addrspace(1)* %ptr) #1 { 142; HSA-LABEL: define {{[^@]+}}@use_tidig_x 143; HSA-SAME: (i32 addrspace(1)* [[PTR:%.*]]) #[[ATTR1]] { 144; HSA-NEXT: [[VAL:%.*]] = call i32 @llvm.amdgcn.workitem.id.x() 145; HSA-NEXT: store i32 [[VAL]], i32 addrspace(1)* [[PTR]], align 4 146; HSA-NEXT: ret void 147; 148 %val = call i32 @llvm.amdgcn.workitem.id.x() 149 store i32 %val, i32 addrspace(1)* %ptr 150 ret void 151} 152 153define amdgpu_kernel void @use_tidig_y(i32 addrspace(1)* %ptr) #1 { 154; HSA-LABEL: define {{[^@]+}}@use_tidig_y 155; HSA-SAME: (i32 addrspace(1)* [[PTR:%.*]]) #[[ATTR5:[0-9]+]] { 156; HSA-NEXT: [[VAL:%.*]] = call i32 @llvm.amdgcn.workitem.id.y() 157; HSA-NEXT: store i32 [[VAL]], i32 addrspace(1)* [[PTR]], align 4 158; HSA-NEXT: ret void 159; 160 %val = call i32 @llvm.amdgcn.workitem.id.y() 161 store i32 %val, i32 addrspace(1)* %ptr 162 ret void 163} 164 165define amdgpu_kernel void @use_tidig_z(i32 addrspace(1)* %ptr) #1 { 166; HSA-LABEL: define {{[^@]+}}@use_tidig_z 167; HSA-SAME: (i32 addrspace(1)* [[PTR:%.*]]) #[[ATTR6:[0-9]+]] { 168; HSA-NEXT: [[VAL:%.*]] = call i32 @llvm.amdgcn.workitem.id.z() 169; HSA-NEXT: store i32 [[VAL]], i32 addrspace(1)* [[PTR]], align 4 170; HSA-NEXT: ret void 171; 172 %val = call i32 @llvm.amdgcn.workitem.id.z() 173 store i32 %val, i32 addrspace(1)* %ptr 174 ret void 175} 176 177define amdgpu_kernel void @use_tidig_x_tgid_x(i32 addrspace(1)* %ptr) #1 { 178; HSA-LABEL: define {{[^@]+}}@use_tidig_x_tgid_x 179; HSA-SAME: (i32 addrspace(1)* [[PTR:%.*]]) #[[ATTR1]] { 180; HSA-NEXT: [[VAL0:%.*]] = call i32 @llvm.amdgcn.workitem.id.x() 181; HSA-NEXT: [[VAL1:%.*]] = call i32 @llvm.amdgcn.workgroup.id.x() 182; HSA-NEXT: store volatile i32 [[VAL0]], i32 addrspace(1)* [[PTR]], align 4 183; HSA-NEXT: store volatile i32 [[VAL1]], i32 addrspace(1)* [[PTR]], align 4 184; HSA-NEXT: ret void 185; 186 %val0 = call i32 @llvm.amdgcn.workitem.id.x() 187 %val1 = call i32 @llvm.amdgcn.workgroup.id.x() 188 store volatile i32 %val0, i32 addrspace(1)* %ptr 189 store volatile i32 %val1, i32 addrspace(1)* %ptr 190 ret void 191} 192 193define amdgpu_kernel void @use_tidig_y_tgid_y(i32 addrspace(1)* %ptr) #1 { 194; HSA-LABEL: define {{[^@]+}}@use_tidig_y_tgid_y 195; HSA-SAME: (i32 addrspace(1)* [[PTR:%.*]]) #[[ATTR7:[0-9]+]] { 196; HSA-NEXT: [[VAL0:%.*]] = call i32 @llvm.amdgcn.workitem.id.y() 197; HSA-NEXT: [[VAL1:%.*]] = call i32 @llvm.amdgcn.workgroup.id.y() 198; HSA-NEXT: store volatile i32 [[VAL0]], i32 addrspace(1)* [[PTR]], align 4 199; HSA-NEXT: store volatile i32 [[VAL1]], i32 addrspace(1)* [[PTR]], align 4 200; HSA-NEXT: ret void 201; 202 %val0 = call i32 @llvm.amdgcn.workitem.id.y() 203 %val1 = call i32 @llvm.amdgcn.workgroup.id.y() 204 store volatile i32 %val0, i32 addrspace(1)* %ptr 205 store volatile i32 %val1, i32 addrspace(1)* %ptr 206 ret void 207} 208 209define amdgpu_kernel void @use_tidig_x_y_z(i32 addrspace(1)* %ptr) #1 { 210; HSA-LABEL: define {{[^@]+}}@use_tidig_x_y_z 211; HSA-SAME: (i32 addrspace(1)* [[PTR:%.*]]) #[[ATTR8:[0-9]+]] { 212; HSA-NEXT: [[VAL0:%.*]] = call i32 @llvm.amdgcn.workitem.id.x() 213; HSA-NEXT: [[VAL1:%.*]] = call i32 @llvm.amdgcn.workitem.id.y() 214; HSA-NEXT: [[VAL2:%.*]] = call i32 @llvm.amdgcn.workitem.id.z() 215; HSA-NEXT: store volatile i32 [[VAL0]], i32 addrspace(1)* [[PTR]], align 4 216; HSA-NEXT: store volatile i32 [[VAL1]], i32 addrspace(1)* [[PTR]], align 4 217; HSA-NEXT: store volatile i32 [[VAL2]], i32 addrspace(1)* [[PTR]], align 4 218; HSA-NEXT: ret void 219; 220 %val0 = call i32 @llvm.amdgcn.workitem.id.x() 221 %val1 = call i32 @llvm.amdgcn.workitem.id.y() 222 %val2 = call i32 @llvm.amdgcn.workitem.id.z() 223 store volatile i32 %val0, i32 addrspace(1)* %ptr 224 store volatile i32 %val1, i32 addrspace(1)* %ptr 225 store volatile i32 %val2, i32 addrspace(1)* %ptr 226 ret void 227} 228 229define amdgpu_kernel void @use_all_workitems(i32 addrspace(1)* %ptr) #1 { 230; HSA-LABEL: define {{[^@]+}}@use_all_workitems 231; HSA-SAME: (i32 addrspace(1)* [[PTR:%.*]]) #[[ATTR9:[0-9]+]] { 232; HSA-NEXT: [[VAL0:%.*]] = call i32 @llvm.amdgcn.workitem.id.x() 233; HSA-NEXT: [[VAL1:%.*]] = call i32 @llvm.amdgcn.workitem.id.y() 234; HSA-NEXT: [[VAL2:%.*]] = call i32 @llvm.amdgcn.workitem.id.z() 235; HSA-NEXT: [[VAL3:%.*]] = call i32 @llvm.amdgcn.workgroup.id.x() 236; HSA-NEXT: [[VAL4:%.*]] = call i32 @llvm.amdgcn.workgroup.id.y() 237; HSA-NEXT: [[VAL5:%.*]] = call i32 @llvm.amdgcn.workgroup.id.z() 238; HSA-NEXT: store volatile i32 [[VAL0]], i32 addrspace(1)* [[PTR]], align 4 239; HSA-NEXT: store volatile i32 [[VAL1]], i32 addrspace(1)* [[PTR]], align 4 240; HSA-NEXT: store volatile i32 [[VAL2]], i32 addrspace(1)* [[PTR]], align 4 241; HSA-NEXT: store volatile i32 [[VAL3]], i32 addrspace(1)* [[PTR]], align 4 242; HSA-NEXT: store volatile i32 [[VAL4]], i32 addrspace(1)* [[PTR]], align 4 243; HSA-NEXT: store volatile i32 [[VAL5]], i32 addrspace(1)* [[PTR]], align 4 244; HSA-NEXT: ret void 245; 246 %val0 = call i32 @llvm.amdgcn.workitem.id.x() 247 %val1 = call i32 @llvm.amdgcn.workitem.id.y() 248 %val2 = call i32 @llvm.amdgcn.workitem.id.z() 249 %val3 = call i32 @llvm.amdgcn.workgroup.id.x() 250 %val4 = call i32 @llvm.amdgcn.workgroup.id.y() 251 %val5 = call i32 @llvm.amdgcn.workgroup.id.z() 252 store volatile i32 %val0, i32 addrspace(1)* %ptr 253 store volatile i32 %val1, i32 addrspace(1)* %ptr 254 store volatile i32 %val2, i32 addrspace(1)* %ptr 255 store volatile i32 %val3, i32 addrspace(1)* %ptr 256 store volatile i32 %val4, i32 addrspace(1)* %ptr 257 store volatile i32 %val5, i32 addrspace(1)* %ptr 258 ret void 259} 260 261define amdgpu_kernel void @use_dispatch_ptr(i32 addrspace(1)* %ptr) #1 { 262; HSA-LABEL: define {{[^@]+}}@use_dispatch_ptr 263; HSA-SAME: (i32 addrspace(1)* [[PTR:%.*]]) #[[ATTR10:[0-9]+]] { 264; HSA-NEXT: [[DISPATCH_PTR:%.*]] = call i8 addrspace(4)* @llvm.amdgcn.dispatch.ptr() 265; HSA-NEXT: [[BC:%.*]] = bitcast i8 addrspace(4)* [[DISPATCH_PTR]] to i32 addrspace(4)* 266; HSA-NEXT: [[VAL:%.*]] = load i32, i32 addrspace(4)* [[BC]], align 4 267; HSA-NEXT: store i32 [[VAL]], i32 addrspace(1)* [[PTR]], align 4 268; HSA-NEXT: ret void 269; 270 %dispatch.ptr = call i8 addrspace(4)* @llvm.amdgcn.dispatch.ptr() 271 %bc = bitcast i8 addrspace(4)* %dispatch.ptr to i32 addrspace(4)* 272 %val = load i32, i32 addrspace(4)* %bc 273 store i32 %val, i32 addrspace(1)* %ptr 274 ret void 275} 276 277define amdgpu_kernel void @use_queue_ptr(i32 addrspace(1)* %ptr) #1 { 278; HSA-LABEL: define {{[^@]+}}@use_queue_ptr 279; HSA-SAME: (i32 addrspace(1)* [[PTR:%.*]]) #[[ATTR11:[0-9]+]] { 280; HSA-NEXT: [[DISPATCH_PTR:%.*]] = call i8 addrspace(4)* @llvm.amdgcn.queue.ptr() 281; HSA-NEXT: [[BC:%.*]] = bitcast i8 addrspace(4)* [[DISPATCH_PTR]] to i32 addrspace(4)* 282; HSA-NEXT: [[VAL:%.*]] = load i32, i32 addrspace(4)* [[BC]], align 4 283; HSA-NEXT: store i32 [[VAL]], i32 addrspace(1)* [[PTR]], align 4 284; HSA-NEXT: ret void 285; 286 %dispatch.ptr = call i8 addrspace(4)* @llvm.amdgcn.queue.ptr() 287 %bc = bitcast i8 addrspace(4)* %dispatch.ptr to i32 addrspace(4)* 288 %val = load i32, i32 addrspace(4)* %bc 289 store i32 %val, i32 addrspace(1)* %ptr 290 ret void 291} 292 293define amdgpu_kernel void @use_kernarg_segment_ptr(i32 addrspace(1)* %ptr) #1 { 294; HSA-LABEL: define {{[^@]+}}@use_kernarg_segment_ptr 295; HSA-SAME: (i32 addrspace(1)* [[PTR:%.*]]) #[[ATTR12:[0-9]+]] { 296; HSA-NEXT: [[DISPATCH_PTR:%.*]] = call i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() 297; HSA-NEXT: [[BC:%.*]] = bitcast i8 addrspace(4)* [[DISPATCH_PTR]] to i32 addrspace(4)* 298; HSA-NEXT: [[VAL:%.*]] = load i32, i32 addrspace(4)* [[BC]], align 4 299; HSA-NEXT: store i32 [[VAL]], i32 addrspace(1)* [[PTR]], align 4 300; HSA-NEXT: ret void 301; 302 %dispatch.ptr = call i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() 303 %bc = bitcast i8 addrspace(4)* %dispatch.ptr to i32 addrspace(4)* 304 %val = load i32, i32 addrspace(4)* %bc 305 store i32 %val, i32 addrspace(1)* %ptr 306 ret void 307} 308 309define amdgpu_kernel void @use_group_to_flat_addrspacecast(i32 addrspace(3)* %ptr) #1 { 310; HSA-LABEL: define {{[^@]+}}@use_group_to_flat_addrspacecast 311; HSA-SAME: (i32 addrspace(3)* [[PTR:%.*]]) #[[ATTR11]] { 312; HSA-NEXT: [[STOF:%.*]] = addrspacecast i32 addrspace(3)* [[PTR]] to i32* 313; HSA-NEXT: store volatile i32 0, i32* [[STOF]], align 4 314; HSA-NEXT: ret void 315; 316 %stof = addrspacecast i32 addrspace(3)* %ptr to i32* 317 store volatile i32 0, i32* %stof 318 ret void 319} 320 321define amdgpu_kernel void @use_private_to_flat_addrspacecast(i32 addrspace(5)* %ptr) #1 { 322; HSA-LABEL: define {{[^@]+}}@use_private_to_flat_addrspacecast 323; HSA-SAME: (i32 addrspace(5)* [[PTR:%.*]]) #[[ATTR11]] { 324; HSA-NEXT: [[STOF:%.*]] = addrspacecast i32 addrspace(5)* [[PTR]] to i32* 325; HSA-NEXT: store volatile i32 0, i32* [[STOF]], align 4 326; HSA-NEXT: ret void 327; 328 %stof = addrspacecast i32 addrspace(5)* %ptr to i32* 329 store volatile i32 0, i32* %stof 330 ret void 331} 332 333define amdgpu_kernel void @use_flat_to_group_addrspacecast(i32* %ptr) #1 { 334; HSA-LABEL: define {{[^@]+}}@use_flat_to_group_addrspacecast 335; HSA-SAME: (i32* [[PTR:%.*]]) #[[ATTR1]] { 336; HSA-NEXT: [[FTOS:%.*]] = addrspacecast i32* [[PTR]] to i32 addrspace(3)* 337; HSA-NEXT: store volatile i32 0, i32 addrspace(3)* [[FTOS]], align 4 338; HSA-NEXT: ret void 339; 340 %ftos = addrspacecast i32* %ptr to i32 addrspace(3)* 341 store volatile i32 0, i32 addrspace(3)* %ftos 342 ret void 343} 344 345define amdgpu_kernel void @use_flat_to_private_addrspacecast(i32* %ptr) #1 { 346; HSA-LABEL: define {{[^@]+}}@use_flat_to_private_addrspacecast 347; HSA-SAME: (i32* [[PTR:%.*]]) #[[ATTR1]] { 348; HSA-NEXT: [[FTOS:%.*]] = addrspacecast i32* [[PTR]] to i32 addrspace(5)* 349; HSA-NEXT: store volatile i32 0, i32 addrspace(5)* [[FTOS]], align 4 350; HSA-NEXT: ret void 351; 352 %ftos = addrspacecast i32* %ptr to i32 addrspace(5)* 353 store volatile i32 0, i32 addrspace(5)* %ftos 354 ret void 355} 356 357; No-op addrspacecast should not use queue ptr 358define amdgpu_kernel void @use_global_to_flat_addrspacecast(i32 addrspace(1)* %ptr) #1 { 359; HSA-LABEL: define {{[^@]+}}@use_global_to_flat_addrspacecast 360; HSA-SAME: (i32 addrspace(1)* [[PTR:%.*]]) #[[ATTR1]] { 361; HSA-NEXT: [[STOF:%.*]] = addrspacecast i32 addrspace(1)* [[PTR]] to i32* 362; HSA-NEXT: store volatile i32 0, i32* [[STOF]], align 4 363; HSA-NEXT: ret void 364; 365 %stof = addrspacecast i32 addrspace(1)* %ptr to i32* 366 store volatile i32 0, i32* %stof 367 ret void 368} 369 370define amdgpu_kernel void @use_constant_to_flat_addrspacecast(i32 addrspace(4)* %ptr) #1 { 371; HSA-LABEL: define {{[^@]+}}@use_constant_to_flat_addrspacecast 372; HSA-SAME: (i32 addrspace(4)* [[PTR:%.*]]) #[[ATTR1]] { 373; HSA-NEXT: [[STOF:%.*]] = addrspacecast i32 addrspace(4)* [[PTR]] to i32* 374; HSA-NEXT: [[LD:%.*]] = load volatile i32, i32* [[STOF]], align 4 375; HSA-NEXT: ret void 376; 377 %stof = addrspacecast i32 addrspace(4)* %ptr to i32* 378 %ld = load volatile i32, i32* %stof 379 ret void 380} 381 382define amdgpu_kernel void @use_flat_to_global_addrspacecast(i32* %ptr) #1 { 383; HSA-LABEL: define {{[^@]+}}@use_flat_to_global_addrspacecast 384; HSA-SAME: (i32* [[PTR:%.*]]) #[[ATTR1]] { 385; HSA-NEXT: [[FTOS:%.*]] = addrspacecast i32* [[PTR]] to i32 addrspace(1)* 386; HSA-NEXT: store volatile i32 0, i32 addrspace(1)* [[FTOS]], align 4 387; HSA-NEXT: ret void 388; 389 %ftos = addrspacecast i32* %ptr to i32 addrspace(1)* 390 store volatile i32 0, i32 addrspace(1)* %ftos 391 ret void 392} 393 394define amdgpu_kernel void @use_flat_to_constant_addrspacecast(i32* %ptr) #1 { 395; HSA-LABEL: define {{[^@]+}}@use_flat_to_constant_addrspacecast 396; HSA-SAME: (i32* [[PTR:%.*]]) #[[ATTR1]] { 397; HSA-NEXT: [[FTOS:%.*]] = addrspacecast i32* [[PTR]] to i32 addrspace(4)* 398; HSA-NEXT: [[LD:%.*]] = load volatile i32, i32 addrspace(4)* [[FTOS]], align 4 399; HSA-NEXT: ret void 400; 401 %ftos = addrspacecast i32* %ptr to i32 addrspace(4)* 402 %ld = load volatile i32, i32 addrspace(4)* %ftos 403 ret void 404} 405 406define amdgpu_kernel void @use_is_shared(i8* %ptr) #1 { 407; HSA-LABEL: define {{[^@]+}}@use_is_shared 408; HSA-SAME: (i8* [[PTR:%.*]]) #[[ATTR11]] { 409; HSA-NEXT: [[IS_SHARED:%.*]] = call i1 @llvm.amdgcn.is.shared(i8* [[PTR]]) 410; HSA-NEXT: [[EXT:%.*]] = zext i1 [[IS_SHARED]] to i32 411; HSA-NEXT: store i32 [[EXT]], i32 addrspace(1)* undef, align 4 412; HSA-NEXT: ret void 413; 414 %is.shared = call i1 @llvm.amdgcn.is.shared(i8* %ptr) 415 %ext = zext i1 %is.shared to i32 416 store i32 %ext, i32 addrspace(1)* undef 417 ret void 418} 419 420define amdgpu_kernel void @use_is_private(i8* %ptr) #1 { 421; HSA-LABEL: define {{[^@]+}}@use_is_private 422; HSA-SAME: (i8* [[PTR:%.*]]) #[[ATTR11]] { 423; HSA-NEXT: [[IS_PRIVATE:%.*]] = call i1 @llvm.amdgcn.is.private(i8* [[PTR]]) 424; HSA-NEXT: [[EXT:%.*]] = zext i1 [[IS_PRIVATE]] to i32 425; HSA-NEXT: store i32 [[EXT]], i32 addrspace(1)* undef, align 4 426; HSA-NEXT: ret void 427; 428 %is.private = call i1 @llvm.amdgcn.is.private(i8* %ptr) 429 %ext = zext i1 %is.private to i32 430 store i32 %ext, i32 addrspace(1)* undef 431 ret void 432} 433 434define amdgpu_kernel void @use_alloca() #1 { 435; HSA-LABEL: define {{[^@]+}}@use_alloca 436; HSA-SAME: () #[[ATTR13:[0-9]+]] { 437; HSA-NEXT: [[ALLOCA:%.*]] = alloca i32, align 4, addrspace(5) 438; HSA-NEXT: store i32 0, i32 addrspace(5)* [[ALLOCA]], align 4 439; HSA-NEXT: ret void 440; 441 %alloca = alloca i32, addrspace(5) 442 store i32 0, i32 addrspace(5)* %alloca 443 ret void 444} 445 446define amdgpu_kernel void @use_alloca_non_entry_block() #1 { 447; HSA-LABEL: define {{[^@]+}}@use_alloca_non_entry_block 448; HSA-SAME: () #[[ATTR13]] { 449; HSA-NEXT: entry: 450; HSA-NEXT: br label [[BB:%.*]] 451; HSA: bb: 452; HSA-NEXT: [[ALLOCA:%.*]] = alloca i32, align 4, addrspace(5) 453; HSA-NEXT: store i32 0, i32 addrspace(5)* [[ALLOCA]], align 4 454; HSA-NEXT: ret void 455; 456entry: 457 br label %bb 458 459bb: 460 %alloca = alloca i32, addrspace(5) 461 store i32 0, i32 addrspace(5)* %alloca 462 ret void 463} 464 465define void @use_alloca_func() #1 { 466; HSA-LABEL: define {{[^@]+}}@use_alloca_func 467; HSA-SAME: () #[[ATTR13]] { 468; HSA-NEXT: [[ALLOCA:%.*]] = alloca i32, align 4, addrspace(5) 469; HSA-NEXT: store i32 0, i32 addrspace(5)* [[ALLOCA]], align 4 470; HSA-NEXT: ret void 471; 472 %alloca = alloca i32, addrspace(5) 473 store i32 0, i32 addrspace(5)* %alloca 474 ret void 475} 476 477attributes #0 = { nounwind readnone speculatable } 478attributes #1 = { nounwind } 479 480;. 481; HSA: attributes #[[ATTR0:[0-9]+]] = { nounwind readnone speculatable willreturn } 482; HSA: attributes #[[ATTR1]] = { nounwind } 483; HSA: attributes #[[ATTR2]] = { nounwind "amdgpu-work-group-id-y" } 484; HSA: attributes #[[ATTR3]] = { nounwind "amdgpu-work-group-id-z" } 485; HSA: attributes #[[ATTR4]] = { nounwind "amdgpu-work-group-id-y" "amdgpu-work-group-id-z" } 486; HSA: attributes #[[ATTR5]] = { nounwind "amdgpu-work-item-id-y" } 487; HSA: attributes #[[ATTR6]] = { nounwind "amdgpu-work-item-id-z" } 488; HSA: attributes #[[ATTR7]] = { nounwind "amdgpu-work-group-id-y" "amdgpu-work-item-id-y" } 489; HSA: attributes #[[ATTR8]] = { nounwind "amdgpu-work-item-id-y" "amdgpu-work-item-id-z" } 490; HSA: attributes #[[ATTR9]] = { nounwind "amdgpu-work-group-id-y" "amdgpu-work-group-id-z" "amdgpu-work-item-id-y" "amdgpu-work-item-id-z" } 491; HSA: attributes #[[ATTR10]] = { nounwind "amdgpu-dispatch-ptr" } 492; HSA: attributes #[[ATTR11]] = { nounwind "amdgpu-queue-ptr" } 493; HSA: attributes #[[ATTR12]] = { nounwind "amdgpu-kernarg-segment-ptr" } 494; HSA: attributes #[[ATTR13]] = { nounwind "amdgpu-stack-objects" } 495;. 496