1; RUN: llc -march=amdgcn -mtriple=amdgcn-- -mcpu=hawaii -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,CI %s 2; RUN: llc -march=amdgcn -mtriple=amdgcn-- -mcpu=fiji -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX89 %s 3; RUN: llc -march=amdgcn -mtriple=amdgcn-- -mcpu=gfx900 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX89,GFX9 %s 4 5; GCN-LABEL: {{^}}i1_func_void: 6; GCN: buffer_load_ubyte v0, off 7; GCN-NEXT: s_waitcnt 8; GCN-NEXT: s_setpc_b64 9define i1 @i1_func_void() #0 { 10 %val = load i1, i1 addrspace(1)* undef 11 ret i1 %val 12} 13 14; FIXME: Missing and? 15; GCN-LABEL: {{^}}i1_zeroext_func_void: 16; GCN: buffer_load_ubyte v0, off 17; GCN-NEXT: s_waitcnt vmcnt(0) 18; GCN-NEXT: s_setpc_b64 19define zeroext i1 @i1_zeroext_func_void() #0 { 20 %val = load i1, i1 addrspace(1)* undef 21 ret i1 %val 22} 23 24; GCN-LABEL: {{^}}i1_signext_func_void: 25; GCN: buffer_load_ubyte v0, off 26; GCN-NEXT: s_waitcnt vmcnt(0) 27; GCN-NEXT: v_bfe_i32 v0, v0, 0, 1{{$}} 28; GCN-NEXT: s_setpc_b64 29define signext i1 @i1_signext_func_void() #0 { 30 %val = load i1, i1 addrspace(1)* undef 31 ret i1 %val 32} 33 34; GCN-LABEL: {{^}}i8_func_void: 35; GCN: buffer_load_ubyte v0, off 36; GCN-NEXT: s_waitcnt vmcnt(0) 37; GCN-NEXT: s_setpc_b64 38define i8 @i8_func_void() #0 { 39 %val = load i8, i8 addrspace(1)* undef 40 ret i8 %val 41} 42 43; GCN-LABEL: {{^}}i8_zeroext_func_void: 44; GCN: buffer_load_ubyte v0, off 45; GCN-NEXT: s_waitcnt vmcnt(0) 46; GCN-NEXT: s_setpc_b64 47define zeroext i8 @i8_zeroext_func_void() #0 { 48 %val = load i8, i8 addrspace(1)* undef 49 ret i8 %val 50} 51 52; GCN-LABEL: {{^}}i8_signext_func_void: 53; GCN: buffer_load_sbyte v0, off 54; GCN-NEXT: s_waitcnt vmcnt(0) 55; GCN-NEXT: s_setpc_b64 56define signext i8 @i8_signext_func_void() #0 { 57 %val = load i8, i8 addrspace(1)* undef 58 ret i8 %val 59} 60 61; GCN-LABEL: {{^}}i16_func_void: 62; GCN: buffer_load_ushort v0, off 63; GCN-NEXT: s_waitcnt vmcnt(0) 64; GCN-NEXT: s_setpc_b64 65define i16 @i16_func_void() #0 { 66 %val = load i16, i16 addrspace(1)* undef 67 ret i16 %val 68} 69 70; GCN-LABEL: {{^}}i16_zeroext_func_void: 71; GCN: buffer_load_ushort v0, off 72; GCN-NEXT: s_waitcnt vmcnt(0) 73; GCN-NEXT: s_setpc_b64 74define zeroext i16 @i16_zeroext_func_void() #0 { 75 %val = load i16, i16 addrspace(1)* undef 76 ret i16 %val 77} 78 79; GCN-LABEL: {{^}}i16_signext_func_void: 80; GCN: buffer_load_sshort v0, off 81; GCN-NEXT: s_waitcnt vmcnt(0) 82; GCN-NEXT: s_setpc_b64 83define signext i16 @i16_signext_func_void() #0 { 84 %val = load i16, i16 addrspace(1)* undef 85 ret i16 %val 86} 87 88; GCN-LABEL: {{^}}i32_func_void: 89; GCN: buffer_load_dword v0, off 90; GCN-NEXT: s_waitcnt vmcnt(0) 91; GCN-NEXT: s_setpc_b64 92define i32 @i32_func_void() #0 { 93 %val = load i32, i32 addrspace(1)* undef 94 ret i32 %val 95} 96 97; GCN-LABEL: {{^}}i48_func_void: 98; GCN: buffer_load_dword v0, off 99; GCN-NEXT: buffer_load_ushort v1, off 100; GCN-NEXT: s_waitcnt vmcnt(0) 101; GCN-NEXT: s_setpc_b64 102define i48 @i48_func_void() #0 { 103 %val = load i48, i48 addrspace(1)* undef, align 8 104 ret i48 %val 105} 106 107; GCN-LABEL: {{^}}i48_zeroext_func_void: 108; GCN: buffer_load_dword v0, off 109; GCN-NEXT: buffer_load_ushort v1, off 110; GCN-NEXT: s_waitcnt vmcnt(0) 111; GCN-NEXT: s_setpc_b64 112define zeroext i48 @i48_zeroext_func_void() #0 { 113 %val = load i48, i48 addrspace(1)* undef, align 8 114 ret i48 %val 115} 116 117; GCN-LABEL: {{^}}i48_signext_func_void: 118; GCN: buffer_load_dword v0, off 119; GCN-NEXT: buffer_load_sshort v1, off 120; GCN-NEXT: s_waitcnt vmcnt(0) 121; GCN-NEXT: s_setpc_b64 122define signext i48 @i48_signext_func_void() #0 { 123 %val = load i48, i48 addrspace(1)* undef, align 8 124 ret i48 %val 125} 126 127; GCN-LABEL: {{^}}i63_func_void: 128; GCN: s_waitcnt 129; GCN-NEXT: s_setpc_b64 130define i63 @i63_func_void(i63 %val) #0 { 131 ret i63 %val 132} 133 134; GCN-LABEL: {{^}}i63_zeroext_func_void: 135; GCN: s_waitcnt 136; GCN-NEXT: v_and_b32_e32 v1, 0x7fffffff, v1 137; GCN-NEXT: s_setpc_b64 138define zeroext i63 @i63_zeroext_func_void(i63 %val) #0 { 139 ret i63 %val 140} 141 142; GCN-LABEL: {{^}}i63_signext_func_void: 143; GCN: s_waitcnt 144; CI-NEXT: v_lshl_b64 v[0:1], v[0:1], 1 145; CI-NEXT: v_ashr_i64 v[0:1], v[0:1], 1 146 147; GFX89-NEXT: v_lshlrev_b64 v[0:1], 1, v[0:1] 148; GFX89-NEXT: v_ashrrev_i64 v[0:1], 1, v[0:1] 149 150; GCN-NEXT: s_setpc_b64 151define signext i63 @i63_signext_func_void(i63 %val) #0 { 152 ret i63 %val 153} 154 155; GCN-LABEL: {{^}}i64_func_void: 156; GCN: buffer_load_dwordx2 v[0:1], off 157; GCN-NEXT: s_waitcnt vmcnt(0) 158; GCN-NEXT: s_setpc_b64 159define i64 @i64_func_void() #0 { 160 %val = load i64, i64 addrspace(1)* undef 161 ret i64 %val 162} 163 164; GCN-LABEL: {{^}}i65_func_void: 165; GCN-DAG: buffer_load_dwordx2 v[0:1], off 166; GCN-DAG: buffer_load_ubyte v2, off 167; GCN: s_waitcnt vmcnt(0) 168; GCN-NEXT: s_setpc_b64 169define i65 @i65_func_void() #0 { 170 %val = load i65, i65 addrspace(1)* undef 171 ret i65 %val 172} 173 174; GCN-LABEL: {{^}}f32_func_void: 175; GCN: buffer_load_dword v0, off, s[4:7], 0 176; GCN-NEXT: s_waitcnt vmcnt(0) 177; GCN-NEXT: s_setpc_b64 178define float @f32_func_void() #0 { 179 %val = load float, float addrspace(1)* undef 180 ret float %val 181} 182 183; GCN-LABEL: {{^}}f64_func_void: 184; GCN: buffer_load_dwordx2 v[0:1], off 185; GCN-NEXT: s_waitcnt vmcnt(0) 186; GCN-NEXT: s_setpc_b64 187define double @f64_func_void() #0 { 188 %val = load double, double addrspace(1)* undef 189 ret double %val 190} 191 192; GCN-LABEL: {{^}}v2f64_func_void: 193; GCN: buffer_load_dwordx4 v[0:3], off 194; GCN-NEXT: s_waitcnt vmcnt(0) 195; GCN-NEXT: s_setpc_b64 196define <2 x double> @v2f64_func_void() #0 { 197 %val = load <2 x double>, <2 x double> addrspace(1)* undef 198 ret <2 x double> %val 199} 200 201; GCN-LABEL: {{^}}v2i32_func_void: 202; GCN: buffer_load_dwordx2 v[0:1], off 203; GCN-NEXT: s_waitcnt vmcnt(0) 204; GCN-NEXT: s_setpc_b64 205define <2 x i32> @v2i32_func_void() #0 { 206 %val = load <2 x i32>, <2 x i32> addrspace(1)* undef 207 ret <2 x i32> %val 208} 209 210; GCN-LABEL: {{^}}v3i32_func_void: 211; GCN: buffer_load_dwordx3 v[0:2], off 212; GCN-NEXT: s_waitcnt vmcnt(0) 213; GCN-NEXT: s_setpc_b64 214define <3 x i32> @v3i32_func_void() #0 { 215 %val = load <3 x i32>, <3 x i32> addrspace(1)* undef 216 ret <3 x i32> %val 217} 218 219; GCN-LABEL: {{^}}v4i32_func_void: 220; GCN: buffer_load_dwordx4 v[0:3], off 221; GCN-NEXT: s_waitcnt vmcnt(0) 222; GCN-NEXT: s_setpc_b64 223define <4 x i32> @v4i32_func_void() #0 { 224 %val = load <4 x i32>, <4 x i32> addrspace(1)* undef 225 ret <4 x i32> %val 226} 227 228; GCN-LABEL: {{^}}v5i32_func_void: 229; GCN-DAG: buffer_load_dword v4, off 230; GCN-DAG: buffer_load_dwordx4 v[0:3], off 231; GCN: s_waitcnt vmcnt(0) 232; GCN-NEXT: s_setpc_b64 233define <5 x i32> @v5i32_func_void() #0 { 234 %val = load volatile <5 x i32>, <5 x i32> addrspace(1)* undef 235 ret <5 x i32> %val 236} 237 238; GCN-LABEL: {{^}}v8i32_func_void: 239; GCN-DAG: buffer_load_dwordx4 v[0:3], off 240; GCN-DAG: buffer_load_dwordx4 v[4:7], off 241; GCN: s_waitcnt vmcnt(0) 242; GCN-NEXT: s_setpc_b64 243define <8 x i32> @v8i32_func_void() #0 { 244 %ptr = load volatile <8 x i32> addrspace(1)*, <8 x i32> addrspace(1)* addrspace(4)* undef 245 %val = load <8 x i32>, <8 x i32> addrspace(1)* %ptr 246 ret <8 x i32> %val 247} 248 249; GCN-LABEL: {{^}}v16i32_func_void: 250; GCN-DAG: buffer_load_dwordx4 v[0:3], off 251; GCN-DAG: buffer_load_dwordx4 v[4:7], off 252; GCN-DAG: buffer_load_dwordx4 v[8:11], off 253; GCN-DAG: buffer_load_dwordx4 v[12:15], off 254; GCN: s_waitcnt vmcnt(0) 255; GCN-NEXT: s_setpc_b64 256define <16 x i32> @v16i32_func_void() #0 { 257 %ptr = load volatile <16 x i32> addrspace(1)*, <16 x i32> addrspace(1)* addrspace(4)* undef 258 %val = load <16 x i32>, <16 x i32> addrspace(1)* %ptr 259 ret <16 x i32> %val 260} 261 262; GCN-LABEL: {{^}}v32i32_func_void: 263; GCN-DAG: buffer_load_dwordx4 v[0:3], off 264; GCN-DAG: buffer_load_dwordx4 v[4:7], off 265; GCN-DAG: buffer_load_dwordx4 v[8:11], off 266; GCN-DAG: buffer_load_dwordx4 v[12:15], off 267; GCN-DAG: buffer_load_dwordx4 v[16:19], off 268; GCN-DAG: buffer_load_dwordx4 v[20:23], off 269; GCN-DAG: buffer_load_dwordx4 v[24:27], off 270; GCN-DAG: buffer_load_dwordx4 v[28:31], off 271; GCN: s_waitcnt vmcnt(0) 272; GCN-NEXT: s_setpc_b64 273define <32 x i32> @v32i32_func_void() #0 { 274 %ptr = load volatile <32 x i32> addrspace(1)*, <32 x i32> addrspace(1)* addrspace(4)* undef 275 %val = load <32 x i32>, <32 x i32> addrspace(1)* %ptr 276 ret <32 x i32> %val 277} 278 279; GCN-LABEL: {{^}}v2i64_func_void: 280; GCN: buffer_load_dwordx4 v[0:3], off 281; GCN-NEXT: s_waitcnt vmcnt(0) 282; GCN-NEXT: s_setpc_b64 283define <2 x i64> @v2i64_func_void() #0 { 284 %val = load <2 x i64>, <2 x i64> addrspace(1)* undef 285 ret <2 x i64> %val 286} 287 288; GCN-LABEL: {{^}}v3i64_func_void: 289; GCN-DAG: buffer_load_dwordx4 v[0:3], off 290; GCN-DAG: buffer_load_dwordx2 v[4:5], off 291; GCN: s_waitcnt vmcnt(0) 292; GCN-NEXT: s_setpc_b64 293define <3 x i64> @v3i64_func_void() #0 { 294 %ptr = load volatile <3 x i64> addrspace(1)*, <3 x i64> addrspace(1)* addrspace(4)* undef 295 %val = load <3 x i64>, <3 x i64> addrspace(1)* %ptr 296 ret <3 x i64> %val 297} 298 299; GCN-LABEL: {{^}}v4i64_func_void: 300; GCN: buffer_load_dwordx4 v[0:3], off 301; GCN: buffer_load_dwordx4 v[4:7], off 302; GCN-NEXT: s_waitcnt vmcnt(0) 303; GCN-NEXT: s_setpc_b64 304define <4 x i64> @v4i64_func_void() #0 { 305 %ptr = load volatile <4 x i64> addrspace(1)*, <4 x i64> addrspace(1)* addrspace(4)* undef 306 %val = load <4 x i64>, <4 x i64> addrspace(1)* %ptr 307 ret <4 x i64> %val 308} 309 310; GCN-LABEL: {{^}}v5i64_func_void: 311; GCN-DAG: buffer_load_dwordx4 v[0:3], off 312; GCN-DAG: buffer_load_dwordx4 v[4:7], off 313; GCN-DAG: buffer_load_dwordx4 v[8:11], off 314; GCN: s_waitcnt vmcnt(0) 315; GCN-NEXT: s_setpc_b64 316define <5 x i64> @v5i64_func_void() #0 { 317 %ptr = load volatile <5 x i64> addrspace(1)*, <5 x i64> addrspace(1)* addrspace(4)* undef 318 %val = load <5 x i64>, <5 x i64> addrspace(1)* %ptr 319 ret <5 x i64> %val 320} 321 322; GCN-LABEL: {{^}}v8i64_func_void: 323; GCN-DAG: buffer_load_dwordx4 v[0:3], off 324; GCN-DAG: buffer_load_dwordx4 v[4:7], off 325; GCN-DAG: buffer_load_dwordx4 v[8:11], off 326; GCN-DAG: buffer_load_dwordx4 v[12:15], off 327; GCN: s_waitcnt vmcnt(0) 328; GCN-NEXT: s_setpc_b64 329define <8 x i64> @v8i64_func_void() #0 { 330 %ptr = load volatile <8 x i64> addrspace(1)*, <8 x i64> addrspace(1)* addrspace(4)* undef 331 %val = load <8 x i64>, <8 x i64> addrspace(1)* %ptr 332 ret <8 x i64> %val 333} 334 335; GCN-LABEL: {{^}}v16i64_func_void: 336; GCN-DAG: buffer_load_dwordx4 v[0:3], off 337; GCN-DAG: buffer_load_dwordx4 v[4:7], off 338; GCN-DAG: buffer_load_dwordx4 v[8:11], off 339; GCN-DAG: buffer_load_dwordx4 v[12:15], off 340; GCN-DAG: buffer_load_dwordx4 v[16:19], off 341; GCN-DAG: buffer_load_dwordx4 v[20:23], off 342; GCN-DAG: buffer_load_dwordx4 v[24:27], off 343; GCN-DAG: buffer_load_dwordx4 v[28:31], off 344; GCN: s_waitcnt vmcnt(0) 345; GCN-NEXT: s_setpc_b64 346define <16 x i64> @v16i64_func_void() #0 { 347 %ptr = load volatile <16 x i64> addrspace(1)*, <16 x i64> addrspace(1)* addrspace(4)* undef 348 %val = load <16 x i64>, <16 x i64> addrspace(1)* %ptr 349 ret <16 x i64> %val 350} 351 352; GCN-LABEL: {{^}}v2i16_func_void: 353; GFX9: buffer_load_dword v0, off 354; GFX9-NEXT: s_waitcnt vmcnt(0) 355; GFX9-NEXT: s_setpc_b64 356define <2 x i16> @v2i16_func_void() #0 { 357 %val = load <2 x i16>, <2 x i16> addrspace(1)* undef 358 ret <2 x i16> %val 359} 360 361; GCN-LABEL: {{^}}v3i16_func_void: 362; GFX9: buffer_load_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, off 363; GFX9-NEXT: s_waitcnt vmcnt(0) 364; GFX9-NEXT: s_setpc_b64 365define <3 x i16> @v3i16_func_void() #0 { 366 %val = load <3 x i16>, <3 x i16> addrspace(1)* undef 367 ret <3 x i16> %val 368} 369 370; GCN-LABEL: {{^}}v4i16_func_void: 371; GFX9: buffer_load_dwordx2 v[0:1], off 372; GFX9-NEXT: s_waitcnt vmcnt(0) 373; GFX9-NEXT: s_setpc_b64 374define <4 x i16> @v4i16_func_void() #0 { 375 %val = load <4 x i16>, <4 x i16> addrspace(1)* undef 376 ret <4 x i16> %val 377} 378 379; GCN-LABEL: {{^}}v4f16_func_void: 380; GFX9: buffer_load_dwordx2 v[0:1], off 381; GFX9-NEXT: s_waitcnt vmcnt(0) 382; GFX9-NEXT: s_setpc_b64 383define <4 x half> @v4f16_func_void() #0 { 384 %val = load <4 x half>, <4 x half> addrspace(1)* undef 385 ret <4 x half> %val 386} 387 388; FIXME: Mixing buffer and global 389; FIXME: Should not scalarize 390; GCN-LABEL: {{^}}v5i16_func_void: 391; GFX9: buffer_load_dwordx2 v[0:1] 392; GFX9-NEXT: s_nop 0 393; GFX9-NEXT: global_load_short_d16 v2 394; GFX9-NEXT: s_waitcnt 395; GFX9-NEXT: s_setpc_b64 396define <5 x i16> @v5i16_func_void() #0 { 397 %ptr = load volatile <5 x i16> addrspace(1)*, <5 x i16> addrspace(1)* addrspace(4)* undef 398 %val = load <5 x i16>, <5 x i16> addrspace(1)* %ptr 399 ret <5 x i16> %val 400} 401 402; GCN-LABEL: {{^}}v8i16_func_void: 403; GFX9-DAG: buffer_load_dwordx4 v[0:3], off 404; GFX9: s_waitcnt vmcnt(0) 405; GFX9-NEXT: s_setpc_b64 406define <8 x i16> @v8i16_func_void() #0 { 407 %ptr = load volatile <8 x i16> addrspace(1)*, <8 x i16> addrspace(1)* addrspace(4)* undef 408 %val = load <8 x i16>, <8 x i16> addrspace(1)* %ptr 409 ret <8 x i16> %val 410} 411 412; GCN-LABEL: {{^}}v16i16_func_void: 413; GFX9: buffer_load_dwordx4 v[0:3], off 414; GFX9: buffer_load_dwordx4 v[4:7], off 415; GFX9: s_waitcnt vmcnt(0) 416; GFX9-NEXT: s_setpc_b64 417define <16 x i16> @v16i16_func_void() #0 { 418 %ptr = load volatile <16 x i16> addrspace(1)*, <16 x i16> addrspace(1)* addrspace(4)* undef 419 %val = load <16 x i16>, <16 x i16> addrspace(1)* %ptr 420 ret <16 x i16> %val 421} 422 423; FIXME: Should pack 424; GCN-LABEL: {{^}}v16i8_func_void: 425; GCN-DAG: v12 426; GCN-DAG: v13 427; GCN-DAG: v14 428; GCN-DAG: v15 429define <16 x i8> @v16i8_func_void() #0 { 430 %ptr = load volatile <16 x i8> addrspace(1)*, <16 x i8> addrspace(1)* addrspace(4)* undef 431 %val = load <16 x i8>, <16 x i8> addrspace(1)* %ptr 432 ret <16 x i8> %val 433} 434 435; FIXME: Should pack 436; GCN-LABEL: {{^}}v4i8_func_void: 437; GCN: buffer_load_dword v0 438; GCN-DAG: v_lshrrev_b32_e32 v1, 8, v0 439; GCN-DAG: v_lshrrev_b32_e32 v2, 16, v0 440; GCN-DAG: v_lshrrev_b32_e32 v3, 24, v0 441; GCN: s_setpc_b64 442define <4 x i8> @v4i8_func_void() #0 { 443 %ptr = load volatile <4 x i8> addrspace(1)*, <4 x i8> addrspace(1)* addrspace(4)* undef 444 %val = load <4 x i8>, <4 x i8> addrspace(1)* %ptr 445 ret <4 x i8> %val 446} 447 448; GCN-LABEL: {{^}}struct_i8_i32_func_void: 449; GCN-DAG: buffer_load_dword v1 450; GCN-DAG: buffer_load_ubyte v0 451; GCN: s_waitcnt vmcnt(0) 452; GCN-NEXT: s_setpc_b64 453define {i8, i32} @struct_i8_i32_func_void() #0 { 454 %val = load { i8, i32 }, { i8, i32 } addrspace(1)* undef 455 ret { i8, i32 } %val 456} 457 458; GCN-LABEL: {{^}}void_func_sret_struct_i8_i32: 459; GCN: buffer_load_ubyte [[VAL0:v[0-9]+]] 460; GCN: buffer_load_dword [[VAL1:v[0-9]+]] 461; GCN: buffer_store_byte [[VAL0]], v0, s[0:3], 0 offen{{$}} 462; GCN: buffer_store_dword [[VAL1]], v0, s[0:3], 0 offen offset:4{{$}} 463define void @void_func_sret_struct_i8_i32({ i8, i32 } addrspace(5)* sret({ i8, i32 }) %arg0) #0 { 464 %val0 = load volatile i8, i8 addrspace(1)* undef 465 %val1 = load volatile i32, i32 addrspace(1)* undef 466 %gep0 = getelementptr inbounds { i8, i32 }, { i8, i32 } addrspace(5)* %arg0, i32 0, i32 0 467 %gep1 = getelementptr inbounds { i8, i32 }, { i8, i32 } addrspace(5)* %arg0, i32 0, i32 1 468 store i8 %val0, i8 addrspace(5)* %gep0 469 store i32 %val1, i32 addrspace(5)* %gep1 470 ret void 471} 472 473; FIXME: Should be able to fold offsets in all of these pre-gfx9. Call 474; lowering introduces an extra CopyToReg/CopyFromReg obscuring the 475; AssertZext inserted. Not using it introduces the spills. 476 477; GCN-LABEL: {{^}}v33i32_func_void: 478; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen{{$}} 479; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:4{{$}} 480; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:8{{$}} 481; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:12{{$}} 482; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:16{{$}} 483; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:20{{$}} 484; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:24{{$}} 485; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:28{{$}} 486; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:32{{$}} 487; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:36{{$}} 488; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:40{{$}} 489; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:44{{$}} 490; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:48{{$}} 491; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:52{{$}} 492; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:56{{$}} 493; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:60{{$}} 494; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:64{{$}} 495; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:68{{$}} 496; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:72{{$}} 497; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:76{{$}} 498; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:80{{$}} 499; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:84{{$}} 500; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:88{{$}} 501; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:92{{$}} 502; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:96{{$}} 503; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:100{{$}} 504; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:104{{$}} 505; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:108{{$}} 506; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:112{{$}} 507; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:116{{$}} 508; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:120{{$}} 509; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:124{{$}} 510; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:128{{$}} 511; GFX9: s_waitcnt vmcnt(0) 512; GFX9-NEXT: s_setpc_b64 513define <33 x i32> @v33i32_func_void() #0 { 514 %ptr = load volatile <33 x i32> addrspace(1)*, <33 x i32> addrspace(1)* addrspace(4)* undef 515 %val = load <33 x i32>, <33 x i32> addrspace(1)* %ptr 516 ret <33 x i32> %val 517} 518 519; GCN-LABEL: {{^}}struct_v32i32_i32_func_void: 520; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen{{$}} 521; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:4{{$}} 522; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:8{{$}} 523; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:12{{$}} 524; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:16{{$}} 525; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:20{{$}} 526; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:24{{$}} 527; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:28{{$}} 528; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:32{{$}} 529; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:36{{$}} 530; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:40{{$}} 531; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:44{{$}} 532; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:48{{$}} 533; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:52{{$}} 534; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:56{{$}} 535; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:60{{$}} 536; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:64{{$}} 537; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:68{{$}} 538; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:72{{$}} 539; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:76{{$}} 540; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:80{{$}} 541; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:84{{$}} 542; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:88{{$}} 543; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:92{{$}} 544; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:96{{$}} 545; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:100{{$}} 546; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:104{{$}} 547; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:108{{$}} 548; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:112{{$}} 549; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:116{{$}} 550; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:120{{$}} 551; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:124{{$}} 552; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:128{{$}} 553; GFX9: s_waitcnt vmcnt(0) 554; GFX9-NEXT: s_setpc_b64 555define { <32 x i32>, i32 } @struct_v32i32_i32_func_void() #0 { 556 %ptr = load volatile { <32 x i32>, i32 } addrspace(1)*, { <32 x i32>, i32 } addrspace(1)* addrspace(4)* undef 557 %val = load { <32 x i32>, i32 }, { <32 x i32>, i32 } addrspace(1)* %ptr 558 ret { <32 x i32>, i32 }%val 559} 560 561; GCN-LABEL: {{^}}struct_i32_v32i32_func_void: 562; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen{{$}} 563; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:128{{$}} 564; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:132{{$}} 565; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:136{{$}} 566; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:140{{$}} 567; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:144{{$}} 568; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:148{{$}} 569; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:152{{$}} 570; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:156{{$}} 571; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:160{{$}} 572; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:164{{$}} 573; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:168{{$}} 574; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:172{{$}} 575; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:176{{$}} 576; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:180{{$}} 577; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:184{{$}} 578; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:188{{$}} 579; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:192{{$}} 580; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:196{{$}} 581; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:200{{$}} 582; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:204{{$}} 583; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:208{{$}} 584; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:212{{$}} 585; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:216{{$}} 586; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:220{{$}} 587; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:224{{$}} 588; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:228{{$}} 589; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:232{{$}} 590; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:236{{$}} 591; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:240{{$}} 592; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:244{{$}} 593; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:248{{$}} 594; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:252{{$}} 595; GFX9: s_waitcnt vmcnt(0) 596; GFX9-NEXT: s_setpc_b64 597define { i32, <32 x i32> } @struct_i32_v32i32_func_void() #0 { 598 %ptr = load volatile { i32, <32 x i32> } addrspace(1)*, { i32, <32 x i32> } addrspace(1)* addrspace(4)* undef 599 %val = load { i32, <32 x i32> }, { i32, <32 x i32> } addrspace(1)* %ptr 600 ret { i32, <32 x i32> }%val 601} 602 603; Make sure the last struct component is returned in v3, not v4. 604; GCN-LABEL: {{^}}v3i32_struct_func_void_wasted_reg: 605; GCN: ds_read_b32 v0, 606; GCN: ds_read_b32 v1, 607; GCN: ds_read_b32 v2, 608; GCN: ds_read_b32 v3, 609define { <3 x i32>, i32 } @v3i32_struct_func_void_wasted_reg() #0 { 610 %load0 = load volatile i32, i32 addrspace(3)* undef 611 %load1 = load volatile i32, i32 addrspace(3)* undef 612 %load2 = load volatile i32, i32 addrspace(3)* undef 613 %load3 = load volatile i32, i32 addrspace(3)* undef 614 615 %insert.0 = insertelement <3 x i32> undef, i32 %load0, i32 0 616 %insert.1 = insertelement <3 x i32> %insert.0, i32 %load1, i32 1 617 %insert.2 = insertelement <3 x i32> %insert.1, i32 %load2, i32 2 618 %insert.3 = insertvalue { <3 x i32>, i32 } undef, <3 x i32> %insert.2, 0 619 %insert.4 = insertvalue { <3 x i32>, i32 } %insert.3, i32 %load3, 1 620 ret { <3 x i32>, i32 } %insert.4 621} 622 623; GCN-LABEL: {{^}}v3f32_struct_func_void_wasted_reg: 624; GCN: ds_read_b32 v0, 625; GCN: ds_read_b32 v1, 626; GCN: ds_read_b32 v2, 627; GCN: ds_read_b32 v3, 628define { <3 x float>, i32 } @v3f32_struct_func_void_wasted_reg() #0 { 629 %load0 = load volatile float, float addrspace(3)* undef 630 %load1 = load volatile float, float addrspace(3)* undef 631 %load2 = load volatile float, float addrspace(3)* undef 632 %load3 = load volatile i32, i32 addrspace(3)* undef 633 634 %insert.0 = insertelement <3 x float> undef, float %load0, i32 0 635 %insert.1 = insertelement <3 x float> %insert.0, float %load1, i32 1 636 %insert.2 = insertelement <3 x float> %insert.1, float %load2, i32 2 637 %insert.3 = insertvalue { <3 x float>, i32 } undef, <3 x float> %insert.2, 0 638 %insert.4 = insertvalue { <3 x float>, i32 } %insert.3, i32 %load3, 1 639 ret { <3 x float>, i32 } %insert.4 640} 641 642; GCN-LABEL: {{^}}void_func_sret_max_known_zero_bits: 643; GCN: v_lshrrev_b32_e32 [[LSHR16:v[0-9]+]], 16, v0 644; GCN: ds_write_b32 {{v[0-9]+}}, [[LSHR16]] 645 646; GCN: v_mov_b32_e32 [[HIGH_BITS:v[0-9]+]], 0 647; GCN: ds_write_b32 {{v[0-9]+}}, [[HIGH_BITS]] 648; GCN-NEXT: ds_write_b32 {{v[0-9]+}}, [[HIGH_BITS]] 649define void @void_func_sret_max_known_zero_bits(i8 addrspace(5)* sret(i8) %arg0) #0 { 650 %arg0.int = ptrtoint i8 addrspace(5)* %arg0 to i32 651 652 %lshr0 = lshr i32 %arg0.int, 16 653 %lshr1 = lshr i32 %arg0.int, 17 654 %lshr2 = lshr i32 %arg0.int, 18 655 656 store volatile i32 %lshr0, i32 addrspace(3)* undef 657 store volatile i32 %lshr1, i32 addrspace(3)* undef 658 store volatile i32 %lshr2, i32 addrspace(3)* undef 659 ret void 660} 661 662attributes #0 = { nounwind } 663