1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --check-globals 2; RUN: opt -mtriple=amdgcn-unknown-amdhsa -S -amdgpu-annotate-kernel-features < %s | FileCheck -check-prefixes=HSA,AKF_HSA %s 3; RUN: opt -mtriple=amdgcn-unknown-amdhsa -S -amdgpu-attributor < %s | FileCheck -check-prefixes=HSA,ATTRIBUTOR_HSA %s 4 5target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5" 6 7declare i32 @llvm.amdgcn.workgroup.id.x() #0 8declare i32 @llvm.amdgcn.workgroup.id.y() #0 9declare i32 @llvm.amdgcn.workgroup.id.z() #0 10 11declare i32 @llvm.amdgcn.workitem.id.x() #0 12declare i32 @llvm.amdgcn.workitem.id.y() #0 13declare i32 @llvm.amdgcn.workitem.id.z() #0 14 15declare i8 addrspace(4)* @llvm.amdgcn.dispatch.ptr() #0 16declare i8 addrspace(4)* @llvm.amdgcn.queue.ptr() #0 17declare i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() #0 18 19declare i1 @llvm.amdgcn.is.shared(i8* nocapture) #2 20declare i1 @llvm.amdgcn.is.private(i8* nocapture) #2 21 22define amdgpu_kernel void @use_tgid_x(i32 addrspace(1)* %ptr) #1 { 23; HSA-LABEL: define {{[^@]+}}@use_tgid_x 24; HSA-SAME: (i32 addrspace(1)* [[PTR:%.*]]) #[[ATTR1:[0-9]+]] { 25; HSA-NEXT: [[VAL:%.*]] = call i32 @llvm.amdgcn.workgroup.id.x() 26; HSA-NEXT: store i32 [[VAL]], i32 addrspace(1)* [[PTR]], align 4 27; HSA-NEXT: ret void 28; 29 %val = call i32 @llvm.amdgcn.workgroup.id.x() 30 store i32 %val, i32 addrspace(1)* %ptr 31 ret void 32} 33 34define amdgpu_kernel void @use_tgid_y(i32 addrspace(1)* %ptr) #1 { 35; HSA-LABEL: define {{[^@]+}}@use_tgid_y 36; HSA-SAME: (i32 addrspace(1)* [[PTR:%.*]]) #[[ATTR2:[0-9]+]] { 37; HSA-NEXT: [[VAL:%.*]] = call i32 @llvm.amdgcn.workgroup.id.y() 38; HSA-NEXT: store i32 [[VAL]], i32 addrspace(1)* [[PTR]], align 4 39; HSA-NEXT: ret void 40; 41 %val = call i32 @llvm.amdgcn.workgroup.id.y() 42 store i32 %val, i32 addrspace(1)* %ptr 43 ret void 44} 45 46define amdgpu_kernel void @multi_use_tgid_y(i32 addrspace(1)* %ptr) #1 { 47; HSA-LABEL: define {{[^@]+}}@multi_use_tgid_y 48; HSA-SAME: (i32 addrspace(1)* [[PTR:%.*]]) #[[ATTR2]] { 49; HSA-NEXT: [[VAL0:%.*]] = call i32 @llvm.amdgcn.workgroup.id.y() 50; HSA-NEXT: store volatile i32 [[VAL0]], i32 addrspace(1)* [[PTR]], align 4 51; HSA-NEXT: [[VAL1:%.*]] = call i32 @llvm.amdgcn.workgroup.id.y() 52; HSA-NEXT: store volatile i32 [[VAL1]], i32 addrspace(1)* [[PTR]], align 4 53; HSA-NEXT: ret void 54; 55 %val0 = call i32 @llvm.amdgcn.workgroup.id.y() 56 store volatile i32 %val0, i32 addrspace(1)* %ptr 57 %val1 = call i32 @llvm.amdgcn.workgroup.id.y() 58 store volatile i32 %val1, i32 addrspace(1)* %ptr 59 ret void 60} 61 62define amdgpu_kernel void @use_tgid_x_y(i32 addrspace(1)* %ptr) #1 { 63; HSA-LABEL: define {{[^@]+}}@use_tgid_x_y 64; HSA-SAME: (i32 addrspace(1)* [[PTR:%.*]]) #[[ATTR2]] { 65; HSA-NEXT: [[VAL0:%.*]] = call i32 @llvm.amdgcn.workgroup.id.x() 66; HSA-NEXT: [[VAL1:%.*]] = call i32 @llvm.amdgcn.workgroup.id.y() 67; HSA-NEXT: store volatile i32 [[VAL0]], i32 addrspace(1)* [[PTR]], align 4 68; HSA-NEXT: store volatile i32 [[VAL1]], i32 addrspace(1)* [[PTR]], align 4 69; HSA-NEXT: ret void 70; 71 %val0 = call i32 @llvm.amdgcn.workgroup.id.x() 72 %val1 = call i32 @llvm.amdgcn.workgroup.id.y() 73 store volatile i32 %val0, i32 addrspace(1)* %ptr 74 store volatile i32 %val1, i32 addrspace(1)* %ptr 75 ret void 76} 77 78define amdgpu_kernel void @use_tgid_z(i32 addrspace(1)* %ptr) #1 { 79; HSA-LABEL: define {{[^@]+}}@use_tgid_z 80; HSA-SAME: (i32 addrspace(1)* [[PTR:%.*]]) #[[ATTR3:[0-9]+]] { 81; HSA-NEXT: [[VAL:%.*]] = call i32 @llvm.amdgcn.workgroup.id.z() 82; HSA-NEXT: store i32 [[VAL]], i32 addrspace(1)* [[PTR]], align 4 83; HSA-NEXT: ret void 84; 85 %val = call i32 @llvm.amdgcn.workgroup.id.z() 86 store i32 %val, i32 addrspace(1)* %ptr 87 ret void 88} 89 90define amdgpu_kernel void @use_tgid_x_z(i32 addrspace(1)* %ptr) #1 { 91; HSA-LABEL: define {{[^@]+}}@use_tgid_x_z 92; HSA-SAME: (i32 addrspace(1)* [[PTR:%.*]]) #[[ATTR3]] { 93; HSA-NEXT: [[VAL0:%.*]] = call i32 @llvm.amdgcn.workgroup.id.x() 94; HSA-NEXT: [[VAL1:%.*]] = call i32 @llvm.amdgcn.workgroup.id.z() 95; HSA-NEXT: store volatile i32 [[VAL0]], i32 addrspace(1)* [[PTR]], align 4 96; HSA-NEXT: store volatile i32 [[VAL1]], i32 addrspace(1)* [[PTR]], align 4 97; HSA-NEXT: ret void 98; 99 %val0 = call i32 @llvm.amdgcn.workgroup.id.x() 100 %val1 = call i32 @llvm.amdgcn.workgroup.id.z() 101 store volatile i32 %val0, i32 addrspace(1)* %ptr 102 store volatile i32 %val1, i32 addrspace(1)* %ptr 103 ret void 104} 105 106define amdgpu_kernel void @use_tgid_y_z(i32 addrspace(1)* %ptr) #1 { 107; HSA-LABEL: define {{[^@]+}}@use_tgid_y_z 108; HSA-SAME: (i32 addrspace(1)* [[PTR:%.*]]) #[[ATTR4:[0-9]+]] { 109; HSA-NEXT: [[VAL0:%.*]] = call i32 @llvm.amdgcn.workgroup.id.y() 110; HSA-NEXT: [[VAL1:%.*]] = call i32 @llvm.amdgcn.workgroup.id.z() 111; HSA-NEXT: store volatile i32 [[VAL0]], i32 addrspace(1)* [[PTR]], align 4 112; HSA-NEXT: store volatile i32 [[VAL1]], i32 addrspace(1)* [[PTR]], align 4 113; HSA-NEXT: ret void 114; 115 %val0 = call i32 @llvm.amdgcn.workgroup.id.y() 116 %val1 = call i32 @llvm.amdgcn.workgroup.id.z() 117 store volatile i32 %val0, i32 addrspace(1)* %ptr 118 store volatile i32 %val1, i32 addrspace(1)* %ptr 119 ret void 120} 121 122define amdgpu_kernel void @use_tgid_x_y_z(i32 addrspace(1)* %ptr) #1 { 123; HSA-LABEL: define {{[^@]+}}@use_tgid_x_y_z 124; HSA-SAME: (i32 addrspace(1)* [[PTR:%.*]]) #[[ATTR4]] { 125; HSA-NEXT: [[VAL0:%.*]] = call i32 @llvm.amdgcn.workgroup.id.x() 126; HSA-NEXT: [[VAL1:%.*]] = call i32 @llvm.amdgcn.workgroup.id.y() 127; HSA-NEXT: [[VAL2:%.*]] = call i32 @llvm.amdgcn.workgroup.id.z() 128; HSA-NEXT: store volatile i32 [[VAL0]], i32 addrspace(1)* [[PTR]], align 4 129; HSA-NEXT: store volatile i32 [[VAL1]], i32 addrspace(1)* [[PTR]], align 4 130; HSA-NEXT: store volatile i32 [[VAL2]], i32 addrspace(1)* [[PTR]], align 4 131; HSA-NEXT: ret void 132; 133 %val0 = call i32 @llvm.amdgcn.workgroup.id.x() 134 %val1 = call i32 @llvm.amdgcn.workgroup.id.y() 135 %val2 = call i32 @llvm.amdgcn.workgroup.id.z() 136 store volatile i32 %val0, i32 addrspace(1)* %ptr 137 store volatile i32 %val1, i32 addrspace(1)* %ptr 138 store volatile i32 %val2, i32 addrspace(1)* %ptr 139 ret void 140} 141 142define amdgpu_kernel void @use_tidig_x(i32 addrspace(1)* %ptr) #1 { 143; HSA-LABEL: define {{[^@]+}}@use_tidig_x 144; HSA-SAME: (i32 addrspace(1)* [[PTR:%.*]]) #[[ATTR1]] { 145; HSA-NEXT: [[VAL:%.*]] = call i32 @llvm.amdgcn.workitem.id.x() 146; HSA-NEXT: store i32 [[VAL]], i32 addrspace(1)* [[PTR]], align 4 147; HSA-NEXT: ret void 148; 149 %val = call i32 @llvm.amdgcn.workitem.id.x() 150 store i32 %val, i32 addrspace(1)* %ptr 151 ret void 152} 153 154define amdgpu_kernel void @use_tidig_y(i32 addrspace(1)* %ptr) #1 { 155; HSA-LABEL: define {{[^@]+}}@use_tidig_y 156; HSA-SAME: (i32 addrspace(1)* [[PTR:%.*]]) #[[ATTR5:[0-9]+]] { 157; HSA-NEXT: [[VAL:%.*]] = call i32 @llvm.amdgcn.workitem.id.y() 158; HSA-NEXT: store i32 [[VAL]], i32 addrspace(1)* [[PTR]], align 4 159; HSA-NEXT: ret void 160; 161 %val = call i32 @llvm.amdgcn.workitem.id.y() 162 store i32 %val, i32 addrspace(1)* %ptr 163 ret void 164} 165 166define amdgpu_kernel void @use_tidig_z(i32 addrspace(1)* %ptr) #1 { 167; HSA-LABEL: define {{[^@]+}}@use_tidig_z 168; HSA-SAME: (i32 addrspace(1)* [[PTR:%.*]]) #[[ATTR6:[0-9]+]] { 169; HSA-NEXT: [[VAL:%.*]] = call i32 @llvm.amdgcn.workitem.id.z() 170; HSA-NEXT: store i32 [[VAL]], i32 addrspace(1)* [[PTR]], align 4 171; HSA-NEXT: ret void 172; 173 %val = call i32 @llvm.amdgcn.workitem.id.z() 174 store i32 %val, i32 addrspace(1)* %ptr 175 ret void 176} 177 178define amdgpu_kernel void @use_tidig_x_tgid_x(i32 addrspace(1)* %ptr) #1 { 179; HSA-LABEL: define {{[^@]+}}@use_tidig_x_tgid_x 180; HSA-SAME: (i32 addrspace(1)* [[PTR:%.*]]) #[[ATTR1]] { 181; HSA-NEXT: [[VAL0:%.*]] = call i32 @llvm.amdgcn.workitem.id.x() 182; HSA-NEXT: [[VAL1:%.*]] = call i32 @llvm.amdgcn.workgroup.id.x() 183; HSA-NEXT: store volatile i32 [[VAL0]], i32 addrspace(1)* [[PTR]], align 4 184; HSA-NEXT: store volatile i32 [[VAL1]], i32 addrspace(1)* [[PTR]], align 4 185; HSA-NEXT: ret void 186; 187 %val0 = call i32 @llvm.amdgcn.workitem.id.x() 188 %val1 = call i32 @llvm.amdgcn.workgroup.id.x() 189 store volatile i32 %val0, i32 addrspace(1)* %ptr 190 store volatile i32 %val1, i32 addrspace(1)* %ptr 191 ret void 192} 193 194define amdgpu_kernel void @use_tidig_y_tgid_y(i32 addrspace(1)* %ptr) #1 { 195; HSA-LABEL: define {{[^@]+}}@use_tidig_y_tgid_y 196; HSA-SAME: (i32 addrspace(1)* [[PTR:%.*]]) #[[ATTR7:[0-9]+]] { 197; HSA-NEXT: [[VAL0:%.*]] = call i32 @llvm.amdgcn.workitem.id.y() 198; HSA-NEXT: [[VAL1:%.*]] = call i32 @llvm.amdgcn.workgroup.id.y() 199; HSA-NEXT: store volatile i32 [[VAL0]], i32 addrspace(1)* [[PTR]], align 4 200; HSA-NEXT: store volatile i32 [[VAL1]], i32 addrspace(1)* [[PTR]], align 4 201; HSA-NEXT: ret void 202; 203 %val0 = call i32 @llvm.amdgcn.workitem.id.y() 204 %val1 = call i32 @llvm.amdgcn.workgroup.id.y() 205 store volatile i32 %val0, i32 addrspace(1)* %ptr 206 store volatile i32 %val1, i32 addrspace(1)* %ptr 207 ret void 208} 209 210define amdgpu_kernel void @use_tidig_x_y_z(i32 addrspace(1)* %ptr) #1 { 211; HSA-LABEL: define {{[^@]+}}@use_tidig_x_y_z 212; HSA-SAME: (i32 addrspace(1)* [[PTR:%.*]]) #[[ATTR8:[0-9]+]] { 213; HSA-NEXT: [[VAL0:%.*]] = call i32 @llvm.amdgcn.workitem.id.x() 214; HSA-NEXT: [[VAL1:%.*]] = call i32 @llvm.amdgcn.workitem.id.y() 215; HSA-NEXT: [[VAL2:%.*]] = call i32 @llvm.amdgcn.workitem.id.z() 216; HSA-NEXT: store volatile i32 [[VAL0]], i32 addrspace(1)* [[PTR]], align 4 217; HSA-NEXT: store volatile i32 [[VAL1]], i32 addrspace(1)* [[PTR]], align 4 218; HSA-NEXT: store volatile i32 [[VAL2]], i32 addrspace(1)* [[PTR]], align 4 219; HSA-NEXT: ret void 220; 221 %val0 = call i32 @llvm.amdgcn.workitem.id.x() 222 %val1 = call i32 @llvm.amdgcn.workitem.id.y() 223 %val2 = call i32 @llvm.amdgcn.workitem.id.z() 224 store volatile i32 %val0, i32 addrspace(1)* %ptr 225 store volatile i32 %val1, i32 addrspace(1)* %ptr 226 store volatile i32 %val2, i32 addrspace(1)* %ptr 227 ret void 228} 229 230define amdgpu_kernel void @use_all_workitems(i32 addrspace(1)* %ptr) #1 { 231; HSA-LABEL: define {{[^@]+}}@use_all_workitems 232; HSA-SAME: (i32 addrspace(1)* [[PTR:%.*]]) #[[ATTR9:[0-9]+]] { 233; HSA-NEXT: [[VAL0:%.*]] = call i32 @llvm.amdgcn.workitem.id.x() 234; HSA-NEXT: [[VAL1:%.*]] = call i32 @llvm.amdgcn.workitem.id.y() 235; HSA-NEXT: [[VAL2:%.*]] = call i32 @llvm.amdgcn.workitem.id.z() 236; HSA-NEXT: [[VAL3:%.*]] = call i32 @llvm.amdgcn.workgroup.id.x() 237; HSA-NEXT: [[VAL4:%.*]] = call i32 @llvm.amdgcn.workgroup.id.y() 238; HSA-NEXT: [[VAL5:%.*]] = call i32 @llvm.amdgcn.workgroup.id.z() 239; HSA-NEXT: store volatile i32 [[VAL0]], i32 addrspace(1)* [[PTR]], align 4 240; HSA-NEXT: store volatile i32 [[VAL1]], i32 addrspace(1)* [[PTR]], align 4 241; HSA-NEXT: store volatile i32 [[VAL2]], i32 addrspace(1)* [[PTR]], align 4 242; HSA-NEXT: store volatile i32 [[VAL3]], i32 addrspace(1)* [[PTR]], align 4 243; HSA-NEXT: store volatile i32 [[VAL4]], i32 addrspace(1)* [[PTR]], align 4 244; HSA-NEXT: store volatile i32 [[VAL5]], i32 addrspace(1)* [[PTR]], align 4 245; HSA-NEXT: ret void 246; 247 %val0 = call i32 @llvm.amdgcn.workitem.id.x() 248 %val1 = call i32 @llvm.amdgcn.workitem.id.y() 249 %val2 = call i32 @llvm.amdgcn.workitem.id.z() 250 %val3 = call i32 @llvm.amdgcn.workgroup.id.x() 251 %val4 = call i32 @llvm.amdgcn.workgroup.id.y() 252 %val5 = call i32 @llvm.amdgcn.workgroup.id.z() 253 store volatile i32 %val0, i32 addrspace(1)* %ptr 254 store volatile i32 %val1, i32 addrspace(1)* %ptr 255 store volatile i32 %val2, i32 addrspace(1)* %ptr 256 store volatile i32 %val3, i32 addrspace(1)* %ptr 257 store volatile i32 %val4, i32 addrspace(1)* %ptr 258 store volatile i32 %val5, i32 addrspace(1)* %ptr 259 ret void 260} 261 262define amdgpu_kernel void @use_dispatch_ptr(i32 addrspace(1)* %ptr) #1 { 263; HSA-LABEL: define {{[^@]+}}@use_dispatch_ptr 264; HSA-SAME: (i32 addrspace(1)* [[PTR:%.*]]) #[[ATTR10:[0-9]+]] { 265; HSA-NEXT: [[DISPATCH_PTR:%.*]] = call i8 addrspace(4)* @llvm.amdgcn.dispatch.ptr() 266; HSA-NEXT: [[BC:%.*]] = bitcast i8 addrspace(4)* [[DISPATCH_PTR]] to i32 addrspace(4)* 267; HSA-NEXT: [[VAL:%.*]] = load i32, i32 addrspace(4)* [[BC]], align 4 268; HSA-NEXT: store i32 [[VAL]], i32 addrspace(1)* [[PTR]], align 4 269; HSA-NEXT: ret void 270; 271 %dispatch.ptr = call i8 addrspace(4)* @llvm.amdgcn.dispatch.ptr() 272 %bc = bitcast i8 addrspace(4)* %dispatch.ptr to i32 addrspace(4)* 273 %val = load i32, i32 addrspace(4)* %bc 274 store i32 %val, i32 addrspace(1)* %ptr 275 ret void 276} 277 278define amdgpu_kernel void @use_queue_ptr(i32 addrspace(1)* %ptr) #1 { 279; HSA-LABEL: define {{[^@]+}}@use_queue_ptr 280; HSA-SAME: (i32 addrspace(1)* [[PTR:%.*]]) #[[ATTR11:[0-9]+]] { 281; HSA-NEXT: [[DISPATCH_PTR:%.*]] = call i8 addrspace(4)* @llvm.amdgcn.queue.ptr() 282; HSA-NEXT: [[BC:%.*]] = bitcast i8 addrspace(4)* [[DISPATCH_PTR]] to i32 addrspace(4)* 283; HSA-NEXT: [[VAL:%.*]] = load i32, i32 addrspace(4)* [[BC]], align 4 284; HSA-NEXT: store i32 [[VAL]], i32 addrspace(1)* [[PTR]], align 4 285; HSA-NEXT: ret void 286; 287 %dispatch.ptr = call i8 addrspace(4)* @llvm.amdgcn.queue.ptr() 288 %bc = bitcast i8 addrspace(4)* %dispatch.ptr to i32 addrspace(4)* 289 %val = load i32, i32 addrspace(4)* %bc 290 store i32 %val, i32 addrspace(1)* %ptr 291 ret void 292} 293 294define amdgpu_kernel void @use_kernarg_segment_ptr(i32 addrspace(1)* %ptr) #1 { 295; HSA-LABEL: define {{[^@]+}}@use_kernarg_segment_ptr 296; HSA-SAME: (i32 addrspace(1)* [[PTR:%.*]]) #[[ATTR12:[0-9]+]] { 297; HSA-NEXT: [[DISPATCH_PTR:%.*]] = call i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() 298; HSA-NEXT: [[BC:%.*]] = bitcast i8 addrspace(4)* [[DISPATCH_PTR]] to i32 addrspace(4)* 299; HSA-NEXT: [[VAL:%.*]] = load i32, i32 addrspace(4)* [[BC]], align 4 300; HSA-NEXT: store i32 [[VAL]], i32 addrspace(1)* [[PTR]], align 4 301; HSA-NEXT: ret void 302; 303 %dispatch.ptr = call i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() 304 %bc = bitcast i8 addrspace(4)* %dispatch.ptr to i32 addrspace(4)* 305 %val = load i32, i32 addrspace(4)* %bc 306 store i32 %val, i32 addrspace(1)* %ptr 307 ret void 308} 309 310define amdgpu_kernel void @use_group_to_flat_addrspacecast(i32 addrspace(3)* %ptr) #1 { 311; HSA-LABEL: define {{[^@]+}}@use_group_to_flat_addrspacecast 312; HSA-SAME: (i32 addrspace(3)* [[PTR:%.*]]) #[[ATTR11]] { 313; HSA-NEXT: [[STOF:%.*]] = addrspacecast i32 addrspace(3)* [[PTR]] to i32* 314; HSA-NEXT: store volatile i32 0, i32* [[STOF]], align 4 315; HSA-NEXT: ret void 316; 317 %stof = addrspacecast i32 addrspace(3)* %ptr to i32* 318 store volatile i32 0, i32* %stof 319 ret void 320} 321 322define amdgpu_kernel void @use_private_to_flat_addrspacecast(i32 addrspace(5)* %ptr) #1 { 323; HSA-LABEL: define {{[^@]+}}@use_private_to_flat_addrspacecast 324; HSA-SAME: (i32 addrspace(5)* [[PTR:%.*]]) #[[ATTR11]] { 325; HSA-NEXT: [[STOF:%.*]] = addrspacecast i32 addrspace(5)* [[PTR]] to i32* 326; HSA-NEXT: store volatile i32 0, i32* [[STOF]], align 4 327; HSA-NEXT: ret void 328; 329 %stof = addrspacecast i32 addrspace(5)* %ptr to i32* 330 store volatile i32 0, i32* %stof 331 ret void 332} 333 334define amdgpu_kernel void @use_flat_to_group_addrspacecast(i32* %ptr) #1 { 335; HSA-LABEL: define {{[^@]+}}@use_flat_to_group_addrspacecast 336; HSA-SAME: (i32* [[PTR:%.*]]) #[[ATTR1]] { 337; HSA-NEXT: [[FTOS:%.*]] = addrspacecast i32* [[PTR]] to i32 addrspace(3)* 338; HSA-NEXT: store volatile i32 0, i32 addrspace(3)* [[FTOS]], align 4 339; HSA-NEXT: ret void 340; 341 %ftos = addrspacecast i32* %ptr to i32 addrspace(3)* 342 store volatile i32 0, i32 addrspace(3)* %ftos 343 ret void 344} 345 346define amdgpu_kernel void @use_flat_to_private_addrspacecast(i32* %ptr) #1 { 347; HSA-LABEL: define {{[^@]+}}@use_flat_to_private_addrspacecast 348; HSA-SAME: (i32* [[PTR:%.*]]) #[[ATTR1]] { 349; HSA-NEXT: [[FTOS:%.*]] = addrspacecast i32* [[PTR]] to i32 addrspace(5)* 350; HSA-NEXT: store volatile i32 0, i32 addrspace(5)* [[FTOS]], align 4 351; HSA-NEXT: ret void 352; 353 %ftos = addrspacecast i32* %ptr to i32 addrspace(5)* 354 store volatile i32 0, i32 addrspace(5)* %ftos 355 ret void 356} 357 358; No-op addrspacecast should not use queue ptr 359define amdgpu_kernel void @use_global_to_flat_addrspacecast(i32 addrspace(1)* %ptr) #1 { 360; HSA-LABEL: define {{[^@]+}}@use_global_to_flat_addrspacecast 361; HSA-SAME: (i32 addrspace(1)* [[PTR:%.*]]) #[[ATTR1]] { 362; HSA-NEXT: [[STOF:%.*]] = addrspacecast i32 addrspace(1)* [[PTR]] to i32* 363; HSA-NEXT: store volatile i32 0, i32* [[STOF]], align 4 364; HSA-NEXT: ret void 365; 366 %stof = addrspacecast i32 addrspace(1)* %ptr to i32* 367 store volatile i32 0, i32* %stof 368 ret void 369} 370 371define amdgpu_kernel void @use_constant_to_flat_addrspacecast(i32 addrspace(4)* %ptr) #1 { 372; HSA-LABEL: define {{[^@]+}}@use_constant_to_flat_addrspacecast 373; HSA-SAME: (i32 addrspace(4)* [[PTR:%.*]]) #[[ATTR1]] { 374; HSA-NEXT: [[STOF:%.*]] = addrspacecast i32 addrspace(4)* [[PTR]] to i32* 375; HSA-NEXT: [[LD:%.*]] = load volatile i32, i32* [[STOF]], align 4 376; HSA-NEXT: ret void 377; 378 %stof = addrspacecast i32 addrspace(4)* %ptr to i32* 379 %ld = load volatile i32, i32* %stof 380 ret void 381} 382 383define amdgpu_kernel void @use_flat_to_global_addrspacecast(i32* %ptr) #1 { 384; HSA-LABEL: define {{[^@]+}}@use_flat_to_global_addrspacecast 385; HSA-SAME: (i32* [[PTR:%.*]]) #[[ATTR1]] { 386; HSA-NEXT: [[FTOS:%.*]] = addrspacecast i32* [[PTR]] to i32 addrspace(1)* 387; HSA-NEXT: store volatile i32 0, i32 addrspace(1)* [[FTOS]], align 4 388; HSA-NEXT: ret void 389; 390 %ftos = addrspacecast i32* %ptr to i32 addrspace(1)* 391 store volatile i32 0, i32 addrspace(1)* %ftos 392 ret void 393} 394 395define amdgpu_kernel void @use_flat_to_constant_addrspacecast(i32* %ptr) #1 { 396; HSA-LABEL: define {{[^@]+}}@use_flat_to_constant_addrspacecast 397; HSA-SAME: (i32* [[PTR:%.*]]) #[[ATTR1]] { 398; HSA-NEXT: [[FTOS:%.*]] = addrspacecast i32* [[PTR]] to i32 addrspace(4)* 399; HSA-NEXT: [[LD:%.*]] = load volatile i32, i32 addrspace(4)* [[FTOS]], align 4 400; HSA-NEXT: ret void 401; 402 %ftos = addrspacecast i32* %ptr to i32 addrspace(4)* 403 %ld = load volatile i32, i32 addrspace(4)* %ftos 404 ret void 405} 406 407define amdgpu_kernel void @use_is_shared(i8* %ptr) #1 { 408; AKF_HSA-LABEL: define {{[^@]+}}@use_is_shared 409; AKF_HSA-SAME: (i8* [[PTR:%.*]]) #[[ATTR11]] { 410; AKF_HSA-NEXT: [[IS_SHARED:%.*]] = call i1 @llvm.amdgcn.is.shared(i8* [[PTR]]) 411; AKF_HSA-NEXT: [[EXT:%.*]] = zext i1 [[IS_SHARED]] to i32 412; AKF_HSA-NEXT: store i32 [[EXT]], i32 addrspace(1)* undef, align 4 413; AKF_HSA-NEXT: ret void 414; 415; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@use_is_shared 416; ATTRIBUTOR_HSA-SAME: (i8* [[PTR:%.*]]) #[[ATTR1]] { 417; ATTRIBUTOR_HSA-NEXT: ret void 418; 419 %is.shared = call i1 @llvm.amdgcn.is.shared(i8* %ptr) 420 %ext = zext i1 %is.shared to i32 421 store i32 %ext, i32 addrspace(1)* undef 422 ret void 423} 424 425define amdgpu_kernel void @use_is_private(i8* %ptr) #1 { 426; AKF_HSA-LABEL: define {{[^@]+}}@use_is_private 427; AKF_HSA-SAME: (i8* [[PTR:%.*]]) #[[ATTR11]] { 428; AKF_HSA-NEXT: [[IS_PRIVATE:%.*]] = call i1 @llvm.amdgcn.is.private(i8* [[PTR]]) 429; AKF_HSA-NEXT: [[EXT:%.*]] = zext i1 [[IS_PRIVATE]] to i32 430; AKF_HSA-NEXT: store i32 [[EXT]], i32 addrspace(1)* undef, align 4 431; AKF_HSA-NEXT: ret void 432; 433; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@use_is_private 434; ATTRIBUTOR_HSA-SAME: (i8* [[PTR:%.*]]) #[[ATTR1]] { 435; ATTRIBUTOR_HSA-NEXT: ret void 436; 437 %is.private = call i1 @llvm.amdgcn.is.private(i8* %ptr) 438 %ext = zext i1 %is.private to i32 439 store i32 %ext, i32 addrspace(1)* undef 440 ret void 441} 442 443define amdgpu_kernel void @use_alloca() #1 { 444; AKF_HSA-LABEL: define {{[^@]+}}@use_alloca 445; AKF_HSA-SAME: () #[[ATTR13:[0-9]+]] { 446; AKF_HSA-NEXT: [[ALLOCA:%.*]] = alloca i32, align 4, addrspace(5) 447; AKF_HSA-NEXT: store i32 0, i32 addrspace(5)* [[ALLOCA]], align 4 448; AKF_HSA-NEXT: ret void 449; 450; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@use_alloca 451; ATTRIBUTOR_HSA-SAME: () #[[ATTR13:[0-9]+]] { 452; ATTRIBUTOR_HSA-NEXT: [[ALLOCA:%.*]] = alloca i32, align 4, addrspace(5) 453; ATTRIBUTOR_HSA-NEXT: ret void 454; 455 %alloca = alloca i32, addrspace(5) 456 store i32 0, i32 addrspace(5)* %alloca 457 ret void 458} 459 460define amdgpu_kernel void @use_alloca_non_entry_block() #1 { 461; AKF_HSA-LABEL: define {{[^@]+}}@use_alloca_non_entry_block 462; AKF_HSA-SAME: () #[[ATTR13]] { 463; AKF_HSA-NEXT: entry: 464; AKF_HSA-NEXT: br label [[BB:%.*]] 465; AKF_HSA: bb: 466; AKF_HSA-NEXT: [[ALLOCA:%.*]] = alloca i32, align 4, addrspace(5) 467; AKF_HSA-NEXT: store i32 0, i32 addrspace(5)* [[ALLOCA]], align 4 468; AKF_HSA-NEXT: ret void 469; 470; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@use_alloca_non_entry_block 471; ATTRIBUTOR_HSA-SAME: () #[[ATTR13]] { 472; ATTRIBUTOR_HSA-NEXT: entry: 473; ATTRIBUTOR_HSA-NEXT: br label [[BB:%.*]] 474; ATTRIBUTOR_HSA: bb: 475; ATTRIBUTOR_HSA-NEXT: [[ALLOCA:%.*]] = alloca i32, align 4, addrspace(5) 476; ATTRIBUTOR_HSA-NEXT: ret void 477; 478entry: 479 br label %bb 480 481bb: 482 %alloca = alloca i32, addrspace(5) 483 store i32 0, i32 addrspace(5)* %alloca 484 ret void 485} 486 487define void @use_alloca_func() #1 { 488; AKF_HSA-LABEL: define {{[^@]+}}@use_alloca_func 489; AKF_HSA-SAME: () #[[ATTR13]] { 490; AKF_HSA-NEXT: [[ALLOCA:%.*]] = alloca i32, align 4, addrspace(5) 491; AKF_HSA-NEXT: store i32 0, i32 addrspace(5)* [[ALLOCA]], align 4 492; AKF_HSA-NEXT: ret void 493; 494; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@use_alloca_func 495; ATTRIBUTOR_HSA-SAME: () #[[ATTR13]] { 496; ATTRIBUTOR_HSA-NEXT: [[ALLOCA:%.*]] = alloca i32, align 4, addrspace(5) 497; ATTRIBUTOR_HSA-NEXT: ret void 498; 499 %alloca = alloca i32, addrspace(5) 500 store i32 0, i32 addrspace(5)* %alloca 501 ret void 502} 503 504attributes #0 = { nounwind readnone speculatable } 505attributes #1 = { nounwind } 506 507;. 508; AKF_HSA: attributes #[[ATTR0:[0-9]+]] = { nounwind readnone speculatable willreturn } 509; AKF_HSA: attributes #[[ATTR1]] = { nounwind } 510; AKF_HSA: attributes #[[ATTR2]] = { nounwind "amdgpu-work-group-id-y" } 511; AKF_HSA: attributes #[[ATTR3]] = { nounwind "amdgpu-work-group-id-z" } 512; AKF_HSA: attributes #[[ATTR4]] = { nounwind "amdgpu-work-group-id-y" "amdgpu-work-group-id-z" } 513; AKF_HSA: attributes #[[ATTR5]] = { nounwind "amdgpu-work-item-id-y" } 514; AKF_HSA: attributes #[[ATTR6]] = { nounwind "amdgpu-work-item-id-z" } 515; AKF_HSA: attributes #[[ATTR7]] = { nounwind "amdgpu-work-group-id-y" "amdgpu-work-item-id-y" } 516; AKF_HSA: attributes #[[ATTR8]] = { nounwind "amdgpu-work-item-id-y" "amdgpu-work-item-id-z" } 517; AKF_HSA: attributes #[[ATTR9]] = { nounwind "amdgpu-work-group-id-y" "amdgpu-work-group-id-z" "amdgpu-work-item-id-y" "amdgpu-work-item-id-z" } 518; AKF_HSA: attributes #[[ATTR10]] = { nounwind "amdgpu-dispatch-ptr" } 519; AKF_HSA: attributes #[[ATTR11]] = { nounwind "amdgpu-queue-ptr" } 520; AKF_HSA: attributes #[[ATTR12]] = { nounwind "amdgpu-kernarg-segment-ptr" } 521; AKF_HSA: attributes #[[ATTR13]] = { nounwind "amdgpu-stack-objects" } 522;. 523; ATTRIBUTOR_HSA: attributes #[[ATTR0:[0-9]+]] = { nounwind readnone speculatable willreturn } 524; ATTRIBUTOR_HSA: attributes #[[ATTR1]] = { nounwind "uniform-work-group-size"="false" } 525; ATTRIBUTOR_HSA: attributes #[[ATTR2]] = { nounwind "amdgpu-work-group-id-y" "uniform-work-group-size"="false" } 526; ATTRIBUTOR_HSA: attributes #[[ATTR3]] = { nounwind "amdgpu-work-group-id-z" "uniform-work-group-size"="false" } 527; ATTRIBUTOR_HSA: attributes #[[ATTR4]] = { nounwind "amdgpu-work-group-id-y" "amdgpu-work-group-id-z" "uniform-work-group-size"="false" } 528; ATTRIBUTOR_HSA: attributes #[[ATTR5]] = { nounwind "amdgpu-work-item-id-y" "uniform-work-group-size"="false" } 529; ATTRIBUTOR_HSA: attributes #[[ATTR6]] = { nounwind "amdgpu-work-item-id-z" "uniform-work-group-size"="false" } 530; ATTRIBUTOR_HSA: attributes #[[ATTR7]] = { nounwind "amdgpu-work-group-id-y" "amdgpu-work-item-id-y" "uniform-work-group-size"="false" } 531; ATTRIBUTOR_HSA: attributes #[[ATTR8]] = { nounwind "amdgpu-work-item-id-y" "amdgpu-work-item-id-z" "uniform-work-group-size"="false" } 532; ATTRIBUTOR_HSA: attributes #[[ATTR9]] = { nounwind "amdgpu-work-group-id-y" "amdgpu-work-group-id-z" "amdgpu-work-item-id-y" "amdgpu-work-item-id-z" "uniform-work-group-size"="false" } 533; ATTRIBUTOR_HSA: attributes #[[ATTR10]] = { nounwind "amdgpu-dispatch-ptr" "uniform-work-group-size"="false" } 534; ATTRIBUTOR_HSA: attributes #[[ATTR11]] = { nounwind "amdgpu-queue-ptr" "uniform-work-group-size"="false" } 535; ATTRIBUTOR_HSA: attributes #[[ATTR12]] = { nounwind "amdgpu-kernarg-segment-ptr" "uniform-work-group-size"="false" } 536; ATTRIBUTOR_HSA: attributes #[[ATTR13]] = { nounwind "amdgpu-stack-objects" "uniform-work-group-size"="false" } 537;. 538