1; RUN: llc -march=amdgcn -mtriple=amdgcn-- -mcpu=hawaii -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,CI %s 2; RUN: llc -march=amdgcn -mtriple=amdgcn-- -mcpu=fiji -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX89 %s 3; RUN: llc -march=amdgcn -mtriple=amdgcn-- -mcpu=gfx900 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX89,GFX9 %s 4 5; GCN-LABEL: {{^}}i1_func_void: 6; GCN: buffer_load_ubyte v0, off 7; GCN-NEXT: s_setpc_b64 8define i1 @i1_func_void() #0 { 9 %val = load i1, i1 addrspace(1)* undef 10 ret i1 %val 11} 12 13; FIXME: Missing and? 14; GCN-LABEL: {{^}}i1_zeroext_func_void: 15; GCN: buffer_load_ubyte v0, off 16; GCN-NEXT: s_setpc_b64 17define zeroext i1 @i1_zeroext_func_void() #0 { 18 %val = load i1, i1 addrspace(1)* undef 19 ret i1 %val 20} 21 22; GCN-LABEL: {{^}}i1_signext_func_void: 23; GCN: buffer_load_ubyte v0, off 24; GCN-NEXT: s_waitcnt vmcnt(0) 25; GCN-NEXT: v_bfe_i32 v0, v0, 0, 1{{$}} 26; GCN-NEXT: s_setpc_b64 27define signext i1 @i1_signext_func_void() #0 { 28 %val = load i1, i1 addrspace(1)* undef 29 ret i1 %val 30} 31 32; GCN-LABEL: {{^}}i8_func_void: 33; GCN: buffer_load_ubyte v0, off 34; GCN-NEXT: s_setpc_b64 35define i8 @i8_func_void() #0 { 36 %val = load i8, i8 addrspace(1)* undef 37 ret i8 %val 38} 39 40; GCN-LABEL: {{^}}i8_zeroext_func_void: 41; GCN: buffer_load_ubyte v0, off 42; GCN-NEXT: s_setpc_b64 43define zeroext i8 @i8_zeroext_func_void() #0 { 44 %val = load i8, i8 addrspace(1)* undef 45 ret i8 %val 46} 47 48; GCN-LABEL: {{^}}i8_signext_func_void: 49; GCN: buffer_load_sbyte v0, off 50; GCN-NEXT: s_setpc_b64 51define signext i8 @i8_signext_func_void() #0 { 52 %val = load i8, i8 addrspace(1)* undef 53 ret i8 %val 54} 55 56; GCN-LABEL: {{^}}i16_func_void: 57; GCN: buffer_load_ushort v0, off 58; GCN-NEXT: s_setpc_b64 59define i16 @i16_func_void() #0 { 60 %val = load i16, i16 addrspace(1)* undef 61 ret i16 %val 62} 63 64; GCN-LABEL: {{^}}i16_zeroext_func_void: 65; GCN: buffer_load_ushort v0, off 66; GCN-NEXT: s_setpc_b64 67define zeroext i16 @i16_zeroext_func_void() #0 { 68 %val = load i16, i16 addrspace(1)* undef 69 ret i16 %val 70} 71 72; GCN-LABEL: {{^}}i16_signext_func_void: 73; GCN: buffer_load_sshort v0, off 74; GCN-NEXT: s_setpc_b64 75define signext i16 @i16_signext_func_void() #0 { 76 %val = load i16, i16 addrspace(1)* undef 77 ret i16 %val 78} 79 80; GCN-LABEL: {{^}}i32_func_void: 81; GCN: buffer_load_dword v0, off 82; GCN-NEXT: s_setpc_b64 83define i32 @i32_func_void() #0 { 84 %val = load i32, i32 addrspace(1)* undef 85 ret i32 %val 86} 87 88; GCN-LABEL: {{^}}i48_func_void: 89; GCN: buffer_load_dword v0, off 90; GCN-NEXT: buffer_load_ushort v1, off 91; GCN-NEXT: s_setpc_b64 92define i48 @i48_func_void() #0 { 93 %val = load i48, i48 addrspace(1)* undef, align 8 94 ret i48 %val 95} 96 97; GCN-LABEL: {{^}}i48_zeroext_func_void: 98; GCN: buffer_load_dword v0, off 99; GCN-NEXT: buffer_load_ushort v1, off 100; GCN-NEXT: s_setpc_b64 101define zeroext i48 @i48_zeroext_func_void() #0 { 102 %val = load i48, i48 addrspace(1)* undef, align 8 103 ret i48 %val 104} 105 106; GCN-LABEL: {{^}}i48_signext_func_void: 107; GCN: buffer_load_dword v0, off 108; GCN-NEXT: buffer_load_sshort v1, off 109; GCN-NEXT: s_setpc_b64 110define signext i48 @i48_signext_func_void() #0 { 111 %val = load i48, i48 addrspace(1)* undef, align 8 112 ret i48 %val 113} 114 115; GCN-LABEL: {{^}}i63_func_void: 116define i63 @i63_func_void(i63 %val) #0 { 117 ret i63 %val 118} 119 120; GCN-LABEL: {{^}}i63_zeroext_func_void: 121; GCN: s_waitcnt 122; GCN-NEXT: v_and_b32_e32 v1, 0x7fffffff, v1 123; GCN-NEXT: s_setpc_b64 124define zeroext i63 @i63_zeroext_func_void(i63 %val) #0 { 125 ret i63 %val 126} 127 128; GCN-LABEL: {{^}}i63_signext_func_void: 129; GCN: s_waitcnt 130; CI-NEXT: v_lshl_b64 v[0:1], v[0:1], 1 131; CI-NEXT: v_ashr_i64 v[0:1], v[0:1], 1 132 133; GFX89-NEXT: v_lshlrev_b64 v[0:1], 1, v[0:1] 134; GFX89-NEXT: v_ashrrev_i64 v[0:1], 1, v[0:1] 135 136; GCN-NEXT: s_setpc_b64 137define signext i63 @i63_signext_func_void(i63 %val) #0 { 138 ret i63 %val 139} 140 141; GCN-LABEL: {{^}}i64_func_void: 142; GCN: buffer_load_dwordx2 v[0:1], off 143; GCN-NEXT: s_setpc_b64 144define i64 @i64_func_void() #0 { 145 %val = load i64, i64 addrspace(1)* undef 146 ret i64 %val 147} 148 149; GCN-LABEL: {{^}}i65_func_void: 150; GCN-DAG: buffer_load_dwordx2 v[0:1], off 151; GCN-DAG: buffer_load_ubyte v2, off 152; GCN-NEXT: s_setpc_b64 153define i65 @i65_func_void() #0 { 154 %val = load i65, i65 addrspace(1)* undef 155 ret i65 %val 156} 157 158; GCN-LABEL: {{^}}f32_func_void: 159; GCN: buffer_load_dword v0, off, s[4:7], 0 160; GCN-NEXT: s_setpc_b64 161define float @f32_func_void() #0 { 162 %val = load float, float addrspace(1)* undef 163 ret float %val 164} 165 166; GCN-LABEL: {{^}}f64_func_void: 167; GCN: buffer_load_dwordx2 v[0:1], off 168; GCN-NEXT: s_setpc_b64 169define double @f64_func_void() #0 { 170 %val = load double, double addrspace(1)* undef 171 ret double %val 172} 173 174; GCN-LABEL: {{^}}v2f64_func_void: 175; GCN: buffer_load_dwordx4 v[0:3], off 176; GCN-NEXT: s_setpc_b64 177define <2 x double> @v2f64_func_void() #0 { 178 %val = load <2 x double>, <2 x double> addrspace(1)* undef 179 ret <2 x double> %val 180} 181 182; GCN-LABEL: {{^}}v2i32_func_void: 183; GCN: buffer_load_dwordx2 v[0:1], off 184; GCN-NEXT: s_setpc_b64 185define <2 x i32> @v2i32_func_void() #0 { 186 %val = load <2 x i32>, <2 x i32> addrspace(1)* undef 187 ret <2 x i32> %val 188} 189 190; GCN-LABEL: {{^}}v3i32_func_void: 191; GCN: buffer_load_dwordx3 v[0:2], off 192; GCN-NEXT: s_setpc_b64 193define <3 x i32> @v3i32_func_void() #0 { 194 %val = load <3 x i32>, <3 x i32> addrspace(1)* undef 195 ret <3 x i32> %val 196} 197 198; GCN-LABEL: {{^}}v4i32_func_void: 199; GCN: buffer_load_dwordx4 v[0:3], off 200; GCN-NEXT: s_setpc_b64 201define <4 x i32> @v4i32_func_void() #0 { 202 %val = load <4 x i32>, <4 x i32> addrspace(1)* undef 203 ret <4 x i32> %val 204} 205 206; GCN-LABEL: {{^}}v5i32_func_void: 207; GCN-DAG: buffer_load_dword v4, off 208; GCN-DAG: buffer_load_dwordx4 v[0:3], off 209; GCN-NEXT: s_setpc_b64 210define <5 x i32> @v5i32_func_void() #0 { 211 %val = load volatile <5 x i32>, <5 x i32> addrspace(1)* undef 212 ret <5 x i32> %val 213} 214 215; GCN-LABEL: {{^}}v8i32_func_void: 216; GCN-DAG: buffer_load_dwordx4 v[0:3], off 217; GCN-DAG: buffer_load_dwordx4 v[4:7], off 218; GCN-NEXT: s_setpc_b64 219define <8 x i32> @v8i32_func_void() #0 { 220 %ptr = load volatile <8 x i32> addrspace(1)*, <8 x i32> addrspace(1)* addrspace(4)* undef 221 %val = load <8 x i32>, <8 x i32> addrspace(1)* %ptr 222 ret <8 x i32> %val 223} 224 225; GCN-LABEL: {{^}}v16i32_func_void: 226; GCN-DAG: buffer_load_dwordx4 v[0:3], off 227; GCN-DAG: buffer_load_dwordx4 v[4:7], off 228; GCN-DAG: buffer_load_dwordx4 v[8:11], off 229; GCN-DAG: buffer_load_dwordx4 v[12:15], off 230; GCN-NEXT: s_setpc_b64 231define <16 x i32> @v16i32_func_void() #0 { 232 %ptr = load volatile <16 x i32> addrspace(1)*, <16 x i32> addrspace(1)* addrspace(4)* undef 233 %val = load <16 x i32>, <16 x i32> addrspace(1)* %ptr 234 ret <16 x i32> %val 235} 236 237; GCN-LABEL: {{^}}v32i32_func_void: 238; GCN-DAG: buffer_load_dwordx4 v[0:3], off 239; GCN-DAG: buffer_load_dwordx4 v[4:7], off 240; GCN-DAG: buffer_load_dwordx4 v[8:11], off 241; GCN-DAG: buffer_load_dwordx4 v[12:15], off 242; GCN-DAG: buffer_load_dwordx4 v[16:19], off 243; GCN-DAG: buffer_load_dwordx4 v[20:23], off 244; GCN-DAG: buffer_load_dwordx4 v[24:27], off 245; GCN-DAG: buffer_load_dwordx4 v[28:31], off 246; GCN-NEXT: s_setpc_b64 247define <32 x i32> @v32i32_func_void() #0 { 248 %ptr = load volatile <32 x i32> addrspace(1)*, <32 x i32> addrspace(1)* addrspace(4)* undef 249 %val = load <32 x i32>, <32 x i32> addrspace(1)* %ptr 250 ret <32 x i32> %val 251} 252 253; GCN-LABEL: {{^}}v2i64_func_void: 254; GCN: buffer_load_dwordx4 v[0:3], off 255; GCN-NEXT: s_setpc_b64 256define <2 x i64> @v2i64_func_void() #0 { 257 %val = load <2 x i64>, <2 x i64> addrspace(1)* undef 258 ret <2 x i64> %val 259} 260 261; GCN-LABEL: {{^}}v3i64_func_void: 262; GCN-DAG: buffer_load_dwordx4 v[0:3], off 263; GCN-DAG: buffer_load_dwordx4 v[4:7], off 264; GCN-NEXT: s_setpc_b64 265define <3 x i64> @v3i64_func_void() #0 { 266 %ptr = load volatile <3 x i64> addrspace(1)*, <3 x i64> addrspace(1)* addrspace(4)* undef 267 %val = load <3 x i64>, <3 x i64> addrspace(1)* %ptr 268 ret <3 x i64> %val 269} 270 271; GCN-LABEL: {{^}}v4i64_func_void: 272; GCN: buffer_load_dwordx4 v[0:3], off 273; GCN: buffer_load_dwordx4 v[4:7], off 274; GCN-NEXT: s_setpc_b64 275define <4 x i64> @v4i64_func_void() #0 { 276 %ptr = load volatile <4 x i64> addrspace(1)*, <4 x i64> addrspace(1)* addrspace(4)* undef 277 %val = load <4 x i64>, <4 x i64> addrspace(1)* %ptr 278 ret <4 x i64> %val 279} 280 281; GCN-LABEL: {{^}}v5i64_func_void: 282; GCN-DAG: buffer_load_dwordx4 v[0:3], off 283; GCN-DAG: buffer_load_dwordx4 v[4:7], off 284; GCN-DAG: buffer_load_dwordx4 v[8:11], off 285; GCN-NEXT: s_setpc_b64 286define <5 x i64> @v5i64_func_void() #0 { 287 %ptr = load volatile <5 x i64> addrspace(1)*, <5 x i64> addrspace(1)* addrspace(4)* undef 288 %val = load <5 x i64>, <5 x i64> addrspace(1)* %ptr 289 ret <5 x i64> %val 290} 291 292; GCN-LABEL: {{^}}v8i64_func_void: 293; GCN-DAG: buffer_load_dwordx4 v[0:3], off 294; GCN-DAG: buffer_load_dwordx4 v[4:7], off 295; GCN-DAG: buffer_load_dwordx4 v[8:11], off 296; GCN-DAG: buffer_load_dwordx4 v[12:15], off 297; GCN-NEXT: s_setpc_b64 298define <8 x i64> @v8i64_func_void() #0 { 299 %ptr = load volatile <8 x i64> addrspace(1)*, <8 x i64> addrspace(1)* addrspace(4)* undef 300 %val = load <8 x i64>, <8 x i64> addrspace(1)* %ptr 301 ret <8 x i64> %val 302} 303 304; GCN-LABEL: {{^}}v16i64_func_void: 305; GCN-DAG: buffer_load_dwordx4 v[0:3], off 306; GCN-DAG: buffer_load_dwordx4 v[4:7], off 307; GCN-DAG: buffer_load_dwordx4 v[8:11], off 308; GCN-DAG: buffer_load_dwordx4 v[12:15], off 309; GCN-DAG: buffer_load_dwordx4 v[16:19], off 310; GCN-DAG: buffer_load_dwordx4 v[20:23], off 311; GCN-DAG: buffer_load_dwordx4 v[24:27], off 312; GCN-DAG: buffer_load_dwordx4 v[28:31], off 313; GCN-NEXT: s_setpc_b64 314define <16 x i64> @v16i64_func_void() #0 { 315 %ptr = load volatile <16 x i64> addrspace(1)*, <16 x i64> addrspace(1)* addrspace(4)* undef 316 %val = load <16 x i64>, <16 x i64> addrspace(1)* %ptr 317 ret <16 x i64> %val 318} 319 320; GCN-LABEL: {{^}}v2i16_func_void: 321; GFX9: buffer_load_dword v0, off 322; GFX9-NEXT: s_setpc_b64 323define <2 x i16> @v2i16_func_void() #0 { 324 %val = load <2 x i16>, <2 x i16> addrspace(1)* undef 325 ret <2 x i16> %val 326} 327 328; GCN-LABEL: {{^}}v3i16_func_void: 329; GFX9: buffer_load_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, off 330; GFX9-NEXT: s_setpc_b64 331define <3 x i16> @v3i16_func_void() #0 { 332 %val = load <3 x i16>, <3 x i16> addrspace(1)* undef 333 ret <3 x i16> %val 334} 335 336; GCN-LABEL: {{^}}v4i16_func_void: 337; GFX9: buffer_load_dwordx2 v[0:1], off 338; GFX9-NEXT: s_setpc_b64 339define <4 x i16> @v4i16_func_void() #0 { 340 %val = load <4 x i16>, <4 x i16> addrspace(1)* undef 341 ret <4 x i16> %val 342} 343 344; GCN-LABEL: {{^}}v4f16_func_void: 345; GFX9: buffer_load_dwordx2 v[0:1], off 346; GFX9-NEXT: s_setpc_b64 347define <4 x half> @v4f16_func_void() #0 { 348 %val = load <4 x half>, <4 x half> addrspace(1)* undef 349 ret <4 x half> %val 350} 351 352; FIXME: Mixing buffer and global 353; FIXME: Should not scalarize 354; GCN-LABEL: {{^}}v5i16_func_void: 355; GFX9: buffer_load_dwordx2 v[0:1] 356; GFX9-NEXT: global_load_short_d16 v2 357; GFX9-NEXT: s_setpc_b64 358define <5 x i16> @v5i16_func_void() #0 { 359 %ptr = load volatile <5 x i16> addrspace(1)*, <5 x i16> addrspace(1)* addrspace(4)* undef 360 %val = load <5 x i16>, <5 x i16> addrspace(1)* %ptr 361 ret <5 x i16> %val 362} 363 364; GCN-LABEL: {{^}}v8i16_func_void: 365; GFX9-DAG: buffer_load_dwordx4 v[0:3], off 366; GFX9-NEXT: s_setpc_b64 367define <8 x i16> @v8i16_func_void() #0 { 368 %ptr = load volatile <8 x i16> addrspace(1)*, <8 x i16> addrspace(1)* addrspace(4)* undef 369 %val = load <8 x i16>, <8 x i16> addrspace(1)* %ptr 370 ret <8 x i16> %val 371} 372 373; GCN-LABEL: {{^}}v16i16_func_void: 374; GFX9: buffer_load_dwordx4 v[0:3], off 375; GFX9: buffer_load_dwordx4 v[4:7], off 376; GFX9-NEXT: s_setpc_b64 377define <16 x i16> @v16i16_func_void() #0 { 378 %ptr = load volatile <16 x i16> addrspace(1)*, <16 x i16> addrspace(1)* addrspace(4)* undef 379 %val = load <16 x i16>, <16 x i16> addrspace(1)* %ptr 380 ret <16 x i16> %val 381} 382 383; FIXME: Should pack 384; GCN-LABEL: {{^}}v16i8_func_void: 385; GCN-DAG: v12 386; GCN-DAG: v13 387; GCN-DAG: v14 388; GCN-DAG: v15 389define <16 x i8> @v16i8_func_void() #0 { 390 %ptr = load volatile <16 x i8> addrspace(1)*, <16 x i8> addrspace(1)* addrspace(4)* undef 391 %val = load <16 x i8>, <16 x i8> addrspace(1)* %ptr 392 ret <16 x i8> %val 393} 394 395; FIXME: Should pack 396; GCN-LABEL: {{^}}v4i8_func_void: 397; GCN: buffer_load_dword v0 398; GCN-DAG: v_lshrrev_b32_e32 v1, 8, v0 399; GCN-DAG: v_lshrrev_b32_e32 v2, 16, v0 400; GCN-DAG: v_lshrrev_b32_e32 v3, 24, v0 401; GCN: s_setpc_b64 402define <4 x i8> @v4i8_func_void() #0 { 403 %ptr = load volatile <4 x i8> addrspace(1)*, <4 x i8> addrspace(1)* addrspace(4)* undef 404 %val = load <4 x i8>, <4 x i8> addrspace(1)* %ptr 405 ret <4 x i8> %val 406} 407 408; GCN-LABEL: {{^}}struct_i8_i32_func_void: 409; GCN-DAG: buffer_load_dword v1 410; GCN-DAG: buffer_load_ubyte v0 411; GCN-NEXT: s_setpc_b64 412define {i8, i32} @struct_i8_i32_func_void() #0 { 413 %val = load { i8, i32 }, { i8, i32 } addrspace(1)* undef 414 ret { i8, i32 } %val 415} 416 417; GCN-LABEL: {{^}}void_func_sret_struct_i8_i32: 418; GCN: buffer_load_ubyte [[VAL0:v[0-9]+]] 419; GCN: buffer_load_dword [[VAL1:v[0-9]+]] 420; GCN: buffer_store_byte [[VAL0]], v0, s[0:3], 0 offen{{$}} 421; GCN: buffer_store_dword [[VAL1]], v0, s[0:3], 0 offen offset:4{{$}} 422define void @void_func_sret_struct_i8_i32({ i8, i32 } addrspace(5)* sret %arg0) #0 { 423 %val0 = load volatile i8, i8 addrspace(1)* undef 424 %val1 = load volatile i32, i32 addrspace(1)* undef 425 %gep0 = getelementptr inbounds { i8, i32 }, { i8, i32 } addrspace(5)* %arg0, i32 0, i32 0 426 %gep1 = getelementptr inbounds { i8, i32 }, { i8, i32 } addrspace(5)* %arg0, i32 0, i32 1 427 store i8 %val0, i8 addrspace(5)* %gep0 428 store i32 %val1, i32 addrspace(5)* %gep1 429 ret void 430} 431 432; FIXME: Should be able to fold offsets in all of these pre-gfx9. Call 433; lowering introduces an extra CopyToReg/CopyFromReg obscuring the 434; AssertZext inserted. Not using it introduces the spills. 435 436; GCN-LABEL: {{^}}v33i32_func_void: 437; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen{{$}} 438; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:4{{$}} 439; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:8{{$}} 440; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:12{{$}} 441; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:16{{$}} 442; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:20{{$}} 443; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:24{{$}} 444; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:28{{$}} 445; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:32{{$}} 446; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:36{{$}} 447; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:40{{$}} 448; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:44{{$}} 449; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:48{{$}} 450; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:52{{$}} 451; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:56{{$}} 452; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:60{{$}} 453; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:64{{$}} 454; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:68{{$}} 455; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:72{{$}} 456; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:76{{$}} 457; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:80{{$}} 458; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:84{{$}} 459; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:88{{$}} 460; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:92{{$}} 461; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:96{{$}} 462; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:100{{$}} 463; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:104{{$}} 464; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:108{{$}} 465; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:112{{$}} 466; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:116{{$}} 467; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:120{{$}} 468; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:124{{$}} 469; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:128{{$}} 470; GFX9-NEXT: s_setpc_b64 471define <33 x i32> @v33i32_func_void() #0 { 472 %ptr = load volatile <33 x i32> addrspace(1)*, <33 x i32> addrspace(1)* addrspace(4)* undef 473 %val = load <33 x i32>, <33 x i32> addrspace(1)* %ptr 474 ret <33 x i32> %val 475} 476 477; GCN-LABEL: {{^}}struct_v32i32_i32_func_void: 478; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen{{$}} 479; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:4{{$}} 480; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:8{{$}} 481; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:12{{$}} 482; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:16{{$}} 483; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:20{{$}} 484; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:24{{$}} 485; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:28{{$}} 486; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:32{{$}} 487; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:36{{$}} 488; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:40{{$}} 489; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:44{{$}} 490; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:48{{$}} 491; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:52{{$}} 492; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:56{{$}} 493; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:60{{$}} 494; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:64{{$}} 495; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:68{{$}} 496; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:72{{$}} 497; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:76{{$}} 498; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:80{{$}} 499; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:84{{$}} 500; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:88{{$}} 501; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:92{{$}} 502; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:96{{$}} 503; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:100{{$}} 504; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:104{{$}} 505; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:108{{$}} 506; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:112{{$}} 507; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:116{{$}} 508; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:120{{$}} 509; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:124{{$}} 510; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:128{{$}} 511; GFX9-NEXT: s_setpc_b64 512define { <32 x i32>, i32 } @struct_v32i32_i32_func_void() #0 { 513 %ptr = load volatile { <32 x i32>, i32 } addrspace(1)*, { <32 x i32>, i32 } addrspace(1)* addrspace(4)* undef 514 %val = load { <32 x i32>, i32 }, { <32 x i32>, i32 } addrspace(1)* %ptr 515 ret { <32 x i32>, i32 }%val 516} 517 518; GCN-LABEL: {{^}}struct_i32_v32i32_func_void: 519; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen{{$}} 520; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:128{{$}} 521; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:132{{$}} 522; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:136{{$}} 523; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:140{{$}} 524; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:144{{$}} 525; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:148{{$}} 526; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:152{{$}} 527; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:156{{$}} 528; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:160{{$}} 529; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:164{{$}} 530; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:168{{$}} 531; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:172{{$}} 532; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:176{{$}} 533; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:180{{$}} 534; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:184{{$}} 535; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:188{{$}} 536; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:192{{$}} 537; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:196{{$}} 538; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:200{{$}} 539; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:204{{$}} 540; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:208{{$}} 541; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:212{{$}} 542; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:216{{$}} 543; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:220{{$}} 544; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:224{{$}} 545; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:228{{$}} 546; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:232{{$}} 547; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:236{{$}} 548; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:240{{$}} 549; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:244{{$}} 550; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:248{{$}} 551; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:252{{$}} 552; GFX9-NEXT: s_setpc_b64 553define { i32, <32 x i32> } @struct_i32_v32i32_func_void() #0 { 554 %ptr = load volatile { i32, <32 x i32> } addrspace(1)*, { i32, <32 x i32> } addrspace(1)* addrspace(4)* undef 555 %val = load { i32, <32 x i32> }, { i32, <32 x i32> } addrspace(1)* %ptr 556 ret { i32, <32 x i32> }%val 557} 558 559; Make sure the last struct component is returned in v3, not v4. 560; GCN-LABEL: {{^}}v3i32_struct_func_void_wasted_reg: 561; GCN: ds_read_b32 v0, 562; GCN: ds_read_b32 v1, 563; GCN: ds_read_b32 v2, 564; GCN: ds_read_b32 v3, 565define { <3 x i32>, i32 } @v3i32_struct_func_void_wasted_reg() #0 { 566 %load0 = load volatile i32, i32 addrspace(3)* undef 567 %load1 = load volatile i32, i32 addrspace(3)* undef 568 %load2 = load volatile i32, i32 addrspace(3)* undef 569 %load3 = load volatile i32, i32 addrspace(3)* undef 570 571 %insert.0 = insertelement <3 x i32> undef, i32 %load0, i32 0 572 %insert.1 = insertelement <3 x i32> %insert.0, i32 %load1, i32 1 573 %insert.2 = insertelement <3 x i32> %insert.1, i32 %load2, i32 2 574 %insert.3 = insertvalue { <3 x i32>, i32 } undef, <3 x i32> %insert.2, 0 575 %insert.4 = insertvalue { <3 x i32>, i32 } %insert.3, i32 %load3, 1 576 ret { <3 x i32>, i32 } %insert.4 577} 578 579; GCN-LABEL: {{^}}v3f32_struct_func_void_wasted_reg: 580; GCN: ds_read_b32 v0, 581; GCN: ds_read_b32 v1, 582; GCN: ds_read_b32 v2, 583; GCN: ds_read_b32 v3, 584define { <3 x float>, i32 } @v3f32_struct_func_void_wasted_reg() #0 { 585 %load0 = load volatile float, float addrspace(3)* undef 586 %load1 = load volatile float, float addrspace(3)* undef 587 %load2 = load volatile float, float addrspace(3)* undef 588 %load3 = load volatile i32, i32 addrspace(3)* undef 589 590 %insert.0 = insertelement <3 x float> undef, float %load0, i32 0 591 %insert.1 = insertelement <3 x float> %insert.0, float %load1, i32 1 592 %insert.2 = insertelement <3 x float> %insert.1, float %load2, i32 2 593 %insert.3 = insertvalue { <3 x float>, i32 } undef, <3 x float> %insert.2, 0 594 %insert.4 = insertvalue { <3 x float>, i32 } %insert.3, i32 %load3, 1 595 ret { <3 x float>, i32 } %insert.4 596} 597 598; GCN-LABEL: {{^}}void_func_sret_max_known_zero_bits: 599; GCN: v_lshrrev_b32_e32 [[LSHR16:v[0-9]+]], 16, v0 600; GCN: ds_write_b32 {{v[0-9]+}}, [[LSHR16]] 601 602; GCN: v_mov_b32_e32 [[HIGH_BITS:v[0-9]+]], 0 603; GCN: ds_write_b32 {{v[0-9]+}}, [[HIGH_BITS]] 604; GCN-NEXT: ds_write_b32 {{v[0-9]+}}, [[HIGH_BITS]] 605define void @void_func_sret_max_known_zero_bits(i8 addrspace(5)* sret %arg0) #0 { 606 %arg0.int = ptrtoint i8 addrspace(5)* %arg0 to i32 607 608 %lshr0 = lshr i32 %arg0.int, 16 609 %lshr1 = lshr i32 %arg0.int, 17 610 %lshr2 = lshr i32 %arg0.int, 18 611 612 store volatile i32 %lshr0, i32 addrspace(3)* undef 613 store volatile i32 %lshr1, i32 addrspace(3)* undef 614 store volatile i32 %lshr2, i32 addrspace(3)* undef 615 ret void 616} 617 618attributes #0 = { nounwind } 619