1; RUN: llc -march=amdgcn -mtriple=amdgcn-- -mcpu=hawaii -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,CI %s 2; RUN: llc -march=amdgcn -mtriple=amdgcn-- -mcpu=fiji -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX89 %s 3; RUN: llc -march=amdgcn -mtriple=amdgcn-- -mcpu=gfx900 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX89,GFX9 %s 4 5; GCN-LABEL: {{^}}i1_func_void: 6; GCN: buffer_load_ubyte v0, off 7; GCN-NEXT: s_waitcnt 8; GCN-NEXT: s_setpc_b64 9define i1 @i1_func_void() #0 { 10 %val = load i1, i1 addrspace(1)* undef 11 ret i1 %val 12} 13 14; FIXME: Missing and? 15; GCN-LABEL: {{^}}i1_zeroext_func_void: 16; GCN: buffer_load_ubyte v0, off 17; GCN-NEXT: s_waitcnt vmcnt(0) 18; GCN-NEXT: s_setpc_b64 19define zeroext i1 @i1_zeroext_func_void() #0 { 20 %val = load i1, i1 addrspace(1)* undef 21 ret i1 %val 22} 23 24; GCN-LABEL: {{^}}i1_signext_func_void: 25; GCN: buffer_load_ubyte v0, off 26; GCN-NEXT: s_waitcnt vmcnt(0) 27; GCN-NEXT: v_bfe_i32 v0, v0, 0, 1{{$}} 28; GCN-NEXT: s_setpc_b64 29define signext i1 @i1_signext_func_void() #0 { 30 %val = load i1, i1 addrspace(1)* undef 31 ret i1 %val 32} 33 34; GCN-LABEL: {{^}}i8_func_void: 35; GCN: buffer_load_ubyte v0, off 36; GCN-NEXT: s_waitcnt vmcnt(0) 37; GCN-NEXT: s_setpc_b64 38define i8 @i8_func_void() #0 { 39 %val = load i8, i8 addrspace(1)* undef 40 ret i8 %val 41} 42 43; GCN-LABEL: {{^}}i8_zeroext_func_void: 44; GCN: buffer_load_ubyte v0, off 45; GCN-NEXT: s_waitcnt vmcnt(0) 46; GCN-NEXT: s_setpc_b64 47define zeroext i8 @i8_zeroext_func_void() #0 { 48 %val = load i8, i8 addrspace(1)* undef 49 ret i8 %val 50} 51 52; GCN-LABEL: {{^}}i8_signext_func_void: 53; GCN: buffer_load_sbyte v0, off 54; GCN-NEXT: s_waitcnt vmcnt(0) 55; GCN-NEXT: s_setpc_b64 56define signext i8 @i8_signext_func_void() #0 { 57 %val = load i8, i8 addrspace(1)* undef 58 ret i8 %val 59} 60 61; GCN-LABEL: {{^}}i16_func_void: 62; GCN: buffer_load_ushort v0, off 63; GCN-NEXT: s_waitcnt vmcnt(0) 64; GCN-NEXT: s_setpc_b64 65define i16 @i16_func_void() #0 { 66 %val = load i16, i16 addrspace(1)* undef 67 ret i16 %val 68} 69 70; GCN-LABEL: {{^}}i16_zeroext_func_void: 71; GCN: buffer_load_ushort v0, off 72; GCN-NEXT: s_waitcnt vmcnt(0) 73; GCN-NEXT: s_setpc_b64 74define zeroext i16 @i16_zeroext_func_void() #0 { 75 %val = load i16, i16 addrspace(1)* undef 76 ret i16 %val 77} 78 79; GCN-LABEL: {{^}}i16_signext_func_void: 80; GCN: buffer_load_sshort v0, off 81; GCN-NEXT: s_waitcnt vmcnt(0) 82; GCN-NEXT: s_setpc_b64 83define signext i16 @i16_signext_func_void() #0 { 84 %val = load i16, i16 addrspace(1)* undef 85 ret i16 %val 86} 87 88; GCN-LABEL: {{^}}i32_func_void: 89; GCN: buffer_load_dword v0, off 90; GCN-NEXT: s_waitcnt vmcnt(0) 91; GCN-NEXT: s_setpc_b64 92define i32 @i32_func_void() #0 { 93 %val = load i32, i32 addrspace(1)* undef 94 ret i32 %val 95} 96 97; GCN-LABEL: {{^}}i48_func_void: 98; GCN: buffer_load_dword v0, off 99; GCN-NEXT: buffer_load_ushort v1, off 100; GCN-NEXT: s_waitcnt vmcnt(0) 101; GCN-NEXT: s_setpc_b64 102define i48 @i48_func_void() #0 { 103 %val = load i48, i48 addrspace(1)* undef, align 8 104 ret i48 %val 105} 106 107; GCN-LABEL: {{^}}i48_zeroext_func_void: 108; GCN: buffer_load_dword v0, off 109; GCN-NEXT: buffer_load_ushort v1, off 110; GCN-NEXT: s_waitcnt vmcnt(0) 111; GCN-NEXT: s_setpc_b64 112define zeroext i48 @i48_zeroext_func_void() #0 { 113 %val = load i48, i48 addrspace(1)* undef, align 8 114 ret i48 %val 115} 116 117; GCN-LABEL: {{^}}i48_signext_func_void: 118; GCN: buffer_load_dword v0, off 119; GCN-NEXT: buffer_load_sshort v1, off 120; GCN-NEXT: s_waitcnt vmcnt(0) 121; GCN-NEXT: s_setpc_b64 122define signext i48 @i48_signext_func_void() #0 { 123 %val = load i48, i48 addrspace(1)* undef, align 8 124 ret i48 %val 125} 126 127; GCN-LABEL: {{^}}i63_func_void: 128; GCN: s_waitcnt 129; GCN-NEXT: s_setpc_b64 130define i63 @i63_func_void(i63 %val) #0 { 131 ret i63 %val 132} 133 134; GCN-LABEL: {{^}}i63_zeroext_func_void: 135; GCN: s_waitcnt 136; GCN-NEXT: v_and_b32_e32 v1, 0x7fffffff, v1 137; GCN-NEXT: s_setpc_b64 138define zeroext i63 @i63_zeroext_func_void(i63 %val) #0 { 139 ret i63 %val 140} 141 142; GCN-LABEL: {{^}}i63_signext_func_void: 143; GCN: s_waitcnt 144; CI-NEXT: v_lshl_b64 v[0:1], v[0:1], 1 145; CI-NEXT: v_ashr_i64 v[0:1], v[0:1], 1 146 147; GFX89-NEXT: v_lshlrev_b64 v[0:1], 1, v[0:1] 148; GFX89-NEXT: v_ashrrev_i64 v[0:1], 1, v[0:1] 149 150; GCN-NEXT: s_setpc_b64 151define signext i63 @i63_signext_func_void(i63 %val) #0 { 152 ret i63 %val 153} 154 155; GCN-LABEL: {{^}}i64_func_void: 156; GCN: buffer_load_dwordx2 v[0:1], off 157; GCN-NEXT: s_waitcnt vmcnt(0) 158; GCN-NEXT: s_setpc_b64 159define i64 @i64_func_void() #0 { 160 %val = load i64, i64 addrspace(1)* undef 161 ret i64 %val 162} 163 164; GCN-LABEL: {{^}}i65_func_void: 165; GCN-DAG: buffer_load_dwordx2 v[0:1], off 166; GCN-DAG: buffer_load_ubyte v2, off 167; GCN: s_waitcnt vmcnt(0) 168; GCN-NEXT: s_setpc_b64 169define i65 @i65_func_void() #0 { 170 %val = load i65, i65 addrspace(1)* undef 171 ret i65 %val 172} 173 174; GCN-LABEL: {{^}}f32_func_void: 175; GCN: buffer_load_dword v0, off, s[4:7], 0 176; GCN-NEXT: s_waitcnt vmcnt(0) 177; GCN-NEXT: s_setpc_b64 178define float @f32_func_void() #0 { 179 %val = load float, float addrspace(1)* undef 180 ret float %val 181} 182 183; GCN-LABEL: {{^}}f64_func_void: 184; GCN: buffer_load_dwordx2 v[0:1], off 185; GCN-NEXT: s_waitcnt vmcnt(0) 186; GCN-NEXT: s_setpc_b64 187define double @f64_func_void() #0 { 188 %val = load double, double addrspace(1)* undef 189 ret double %val 190} 191 192; GCN-LABEL: {{^}}v2f64_func_void: 193; GCN: buffer_load_dwordx4 v[0:3], off 194; GCN-NEXT: s_waitcnt vmcnt(0) 195; GCN-NEXT: s_setpc_b64 196define <2 x double> @v2f64_func_void() #0 { 197 %val = load <2 x double>, <2 x double> addrspace(1)* undef 198 ret <2 x double> %val 199} 200 201; GCN-LABEL: {{^}}v2i32_func_void: 202; GCN: buffer_load_dwordx2 v[0:1], off 203; GCN-NEXT: s_waitcnt vmcnt(0) 204; GCN-NEXT: s_setpc_b64 205define <2 x i32> @v2i32_func_void() #0 { 206 %val = load <2 x i32>, <2 x i32> addrspace(1)* undef 207 ret <2 x i32> %val 208} 209 210; GCN-LABEL: {{^}}v3i32_func_void: 211; GCN: buffer_load_dwordx3 v[0:2], off 212; GCN-NEXT: s_waitcnt vmcnt(0) 213; GCN-NEXT: s_setpc_b64 214define <3 x i32> @v3i32_func_void() #0 { 215 %val = load <3 x i32>, <3 x i32> addrspace(1)* undef 216 ret <3 x i32> %val 217} 218 219; GCN-LABEL: {{^}}v4i32_func_void: 220; GCN: buffer_load_dwordx4 v[0:3], off 221; GCN-NEXT: s_waitcnt vmcnt(0) 222; GCN-NEXT: s_setpc_b64 223define <4 x i32> @v4i32_func_void() #0 { 224 %val = load <4 x i32>, <4 x i32> addrspace(1)* undef 225 ret <4 x i32> %val 226} 227 228; GCN-LABEL: {{^}}v5i32_func_void: 229; GCN-DAG: buffer_load_dword v4, off 230; GCN-DAG: buffer_load_dwordx4 v[0:3], off 231; GCN: s_waitcnt vmcnt(0) 232; GCN-NEXT: s_setpc_b64 233define <5 x i32> @v5i32_func_void() #0 { 234 %val = load volatile <5 x i32>, <5 x i32> addrspace(1)* undef 235 ret <5 x i32> %val 236} 237 238; GCN-LABEL: {{^}}v8i32_func_void: 239; GCN-DAG: buffer_load_dwordx4 v[0:3], off 240; GCN-DAG: buffer_load_dwordx4 v[4:7], off 241; GCN: s_waitcnt vmcnt(0) 242; GCN-NEXT: s_setpc_b64 243define <8 x i32> @v8i32_func_void() #0 { 244 %ptr = load volatile <8 x i32> addrspace(1)*, <8 x i32> addrspace(1)* addrspace(4)* undef 245 %val = load <8 x i32>, <8 x i32> addrspace(1)* %ptr 246 ret <8 x i32> %val 247} 248 249; GCN-LABEL: {{^}}v16i32_func_void: 250; GCN-DAG: buffer_load_dwordx4 v[0:3], off 251; GCN-DAG: buffer_load_dwordx4 v[4:7], off 252; GCN-DAG: buffer_load_dwordx4 v[8:11], off 253; GCN-DAG: buffer_load_dwordx4 v[12:15], off 254; GCN: s_waitcnt vmcnt(0) 255; GCN-NEXT: s_setpc_b64 256define <16 x i32> @v16i32_func_void() #0 { 257 %ptr = load volatile <16 x i32> addrspace(1)*, <16 x i32> addrspace(1)* addrspace(4)* undef 258 %val = load <16 x i32>, <16 x i32> addrspace(1)* %ptr 259 ret <16 x i32> %val 260} 261 262; GCN-LABEL: {{^}}v32i32_func_void: 263; GCN-DAG: buffer_load_dwordx4 v[0:3], off 264; GCN-DAG: buffer_load_dwordx4 v[4:7], off 265; GCN-DAG: buffer_load_dwordx4 v[8:11], off 266; GCN-DAG: buffer_load_dwordx4 v[12:15], off 267; GCN-DAG: buffer_load_dwordx4 v[16:19], off 268; GCN-DAG: buffer_load_dwordx4 v[20:23], off 269; GCN-DAG: buffer_load_dwordx4 v[24:27], off 270; GCN-DAG: buffer_load_dwordx4 v[28:31], off 271; GCN: s_waitcnt vmcnt(0) 272; GCN-NEXT: s_setpc_b64 273define <32 x i32> @v32i32_func_void() #0 { 274 %ptr = load volatile <32 x i32> addrspace(1)*, <32 x i32> addrspace(1)* addrspace(4)* undef 275 %val = load <32 x i32>, <32 x i32> addrspace(1)* %ptr 276 ret <32 x i32> %val 277} 278 279; GCN-LABEL: {{^}}v2i64_func_void: 280; GCN: buffer_load_dwordx4 v[0:3], off 281; GCN-NEXT: s_waitcnt vmcnt(0) 282; GCN-NEXT: s_setpc_b64 283define <2 x i64> @v2i64_func_void() #0 { 284 %val = load <2 x i64>, <2 x i64> addrspace(1)* undef 285 ret <2 x i64> %val 286} 287 288; GCN-LABEL: {{^}}v3i64_func_void: 289; GCN-DAG: buffer_load_dwordx4 v[0:3], off 290; GCN-DAG: buffer_load_dwordx2 v[4:5], off 291; GCN: s_waitcnt vmcnt(0) 292; GCN-NEXT: s_setpc_b64 293define <3 x i64> @v3i64_func_void() #0 { 294 %ptr = load volatile <3 x i64> addrspace(1)*, <3 x i64> addrspace(1)* addrspace(4)* undef 295 %val = load <3 x i64>, <3 x i64> addrspace(1)* %ptr 296 ret <3 x i64> %val 297} 298 299; GCN-LABEL: {{^}}v4i64_func_void: 300; GCN: buffer_load_dwordx4 v[0:3], off 301; GCN: buffer_load_dwordx4 v[4:7], off 302; GCN-NEXT: s_waitcnt vmcnt(0) 303; GCN-NEXT: s_setpc_b64 304define <4 x i64> @v4i64_func_void() #0 { 305 %ptr = load volatile <4 x i64> addrspace(1)*, <4 x i64> addrspace(1)* addrspace(4)* undef 306 %val = load <4 x i64>, <4 x i64> addrspace(1)* %ptr 307 ret <4 x i64> %val 308} 309 310; GCN-LABEL: {{^}}v5i64_func_void: 311; GCN-DAG: buffer_load_dwordx4 v[0:3], off 312; GCN-DAG: buffer_load_dwordx4 v[4:7], off 313; GCN-DAG: buffer_load_dwordx4 v[8:11], off 314; GCN: s_waitcnt vmcnt(0) 315; GCN-NEXT: s_setpc_b64 316define <5 x i64> @v5i64_func_void() #0 { 317 %ptr = load volatile <5 x i64> addrspace(1)*, <5 x i64> addrspace(1)* addrspace(4)* undef 318 %val = load <5 x i64>, <5 x i64> addrspace(1)* %ptr 319 ret <5 x i64> %val 320} 321 322; GCN-LABEL: {{^}}v8i64_func_void: 323; GCN-DAG: buffer_load_dwordx4 v[0:3], off 324; GCN-DAG: buffer_load_dwordx4 v[4:7], off 325; GCN-DAG: buffer_load_dwordx4 v[8:11], off 326; GCN-DAG: buffer_load_dwordx4 v[12:15], off 327; GCN: s_waitcnt vmcnt(0) 328; GCN-NEXT: s_setpc_b64 329define <8 x i64> @v8i64_func_void() #0 { 330 %ptr = load volatile <8 x i64> addrspace(1)*, <8 x i64> addrspace(1)* addrspace(4)* undef 331 %val = load <8 x i64>, <8 x i64> addrspace(1)* %ptr 332 ret <8 x i64> %val 333} 334 335; GCN-LABEL: {{^}}v16i64_func_void: 336; GCN-DAG: buffer_load_dwordx4 v[0:3], off 337; GCN-DAG: buffer_load_dwordx4 v[4:7], off 338; GCN-DAG: buffer_load_dwordx4 v[8:11], off 339; GCN-DAG: buffer_load_dwordx4 v[12:15], off 340; GCN-DAG: buffer_load_dwordx4 v[16:19], off 341; GCN-DAG: buffer_load_dwordx4 v[20:23], off 342; GCN-DAG: buffer_load_dwordx4 v[24:27], off 343; GCN-DAG: buffer_load_dwordx4 v[28:31], off 344; GCN: s_waitcnt vmcnt(0) 345; GCN-NEXT: s_setpc_b64 346define <16 x i64> @v16i64_func_void() #0 { 347 %ptr = load volatile <16 x i64> addrspace(1)*, <16 x i64> addrspace(1)* addrspace(4)* undef 348 %val = load <16 x i64>, <16 x i64> addrspace(1)* %ptr 349 ret <16 x i64> %val 350} 351 352; GCN-LABEL: {{^}}v2i16_func_void: 353; GFX9: buffer_load_dword v0, off 354; GFX9-NEXT: s_waitcnt vmcnt(0) 355; GFX9-NEXT: s_setpc_b64 356define <2 x i16> @v2i16_func_void() #0 { 357 %val = load <2 x i16>, <2 x i16> addrspace(1)* undef 358 ret <2 x i16> %val 359} 360 361; GCN-LABEL: {{^}}v3i16_func_void: 362; GFX9: buffer_load_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, off 363; GFX9-NEXT: s_waitcnt vmcnt(0) 364; GFX9-NEXT: s_setpc_b64 365define <3 x i16> @v3i16_func_void() #0 { 366 %val = load <3 x i16>, <3 x i16> addrspace(1)* undef 367 ret <3 x i16> %val 368} 369 370; GCN-LABEL: {{^}}v4i16_func_void: 371; GFX9: buffer_load_dwordx2 v[0:1], off 372; GFX9-NEXT: s_waitcnt vmcnt(0) 373; GFX9-NEXT: s_setpc_b64 374define <4 x i16> @v4i16_func_void() #0 { 375 %val = load <4 x i16>, <4 x i16> addrspace(1)* undef 376 ret <4 x i16> %val 377} 378 379; GCN-LABEL: {{^}}v4f16_func_void: 380; GFX9: buffer_load_dwordx2 v[0:1], off 381; GFX9-NEXT: s_waitcnt vmcnt(0) 382; GFX9-NEXT: s_setpc_b64 383define <4 x half> @v4f16_func_void() #0 { 384 %val = load <4 x half>, <4 x half> addrspace(1)* undef 385 ret <4 x half> %val 386} 387 388; FIXME: Mixing buffer and global 389; FIXME: Should not scalarize 390; GCN-LABEL: {{^}}v5i16_func_void: 391; GFX9: buffer_load_dwordx4 v[0:3] 392; GFX9-NEXT: s_waitcnt 393; GFX9-NEXT: s_setpc_b64 394define <5 x i16> @v5i16_func_void() #0 { 395 %ptr = load volatile <5 x i16> addrspace(1)*, <5 x i16> addrspace(1)* addrspace(4)* undef 396 %val = load <5 x i16>, <5 x i16> addrspace(1)* %ptr 397 ret <5 x i16> %val 398} 399 400; GCN-LABEL: {{^}}v8i16_func_void: 401; GFX9-DAG: buffer_load_dwordx4 v[0:3], off 402; GFX9: s_waitcnt vmcnt(0) 403; GFX9-NEXT: s_setpc_b64 404define <8 x i16> @v8i16_func_void() #0 { 405 %ptr = load volatile <8 x i16> addrspace(1)*, <8 x i16> addrspace(1)* addrspace(4)* undef 406 %val = load <8 x i16>, <8 x i16> addrspace(1)* %ptr 407 ret <8 x i16> %val 408} 409 410; GCN-LABEL: {{^}}v16i16_func_void: 411; GFX9: buffer_load_dwordx4 v[0:3], off 412; GFX9: buffer_load_dwordx4 v[4:7], off 413; GFX9: s_waitcnt vmcnt(0) 414; GFX9-NEXT: s_setpc_b64 415define <16 x i16> @v16i16_func_void() #0 { 416 %ptr = load volatile <16 x i16> addrspace(1)*, <16 x i16> addrspace(1)* addrspace(4)* undef 417 %val = load <16 x i16>, <16 x i16> addrspace(1)* %ptr 418 ret <16 x i16> %val 419} 420 421; FIXME: Should pack 422; GCN-LABEL: {{^}}v16i8_func_void: 423; GCN-DAG: v12 424; GCN-DAG: v13 425; GCN-DAG: v14 426; GCN-DAG: v15 427define <16 x i8> @v16i8_func_void() #0 { 428 %ptr = load volatile <16 x i8> addrspace(1)*, <16 x i8> addrspace(1)* addrspace(4)* undef 429 %val = load <16 x i8>, <16 x i8> addrspace(1)* %ptr 430 ret <16 x i8> %val 431} 432 433; FIXME: Should pack 434; GCN-LABEL: {{^}}v4i8_func_void: 435; GCN: buffer_load_dword v0 436; GCN-DAG: v_lshrrev_b32_e32 v1, 8, v0 437; GCN-DAG: v_lshrrev_b32_e32 v2, 16, v0 438; GCN-DAG: v_lshrrev_b32_e32 v3, 24, v0 439; GCN: s_setpc_b64 440define <4 x i8> @v4i8_func_void() #0 { 441 %ptr = load volatile <4 x i8> addrspace(1)*, <4 x i8> addrspace(1)* addrspace(4)* undef 442 %val = load <4 x i8>, <4 x i8> addrspace(1)* %ptr 443 ret <4 x i8> %val 444} 445 446; GCN-LABEL: {{^}}struct_i8_i32_func_void: 447; GCN-DAG: buffer_load_dword v1 448; GCN-DAG: buffer_load_ubyte v0 449; GCN: s_waitcnt vmcnt(0) 450; GCN-NEXT: s_setpc_b64 451define {i8, i32} @struct_i8_i32_func_void() #0 { 452 %val = load { i8, i32 }, { i8, i32 } addrspace(1)* undef 453 ret { i8, i32 } %val 454} 455 456; GCN-LABEL: {{^}}void_func_sret_struct_i8_i32: 457; GCN: buffer_load_ubyte [[VAL0:v[0-9]+]] 458; GCN: buffer_load_dword [[VAL1:v[0-9]+]] 459; GCN: buffer_store_byte [[VAL0]], v0, s[0:3], 0 offen{{$}} 460; GCN: buffer_store_dword [[VAL1]], v0, s[0:3], 0 offen offset:4{{$}} 461define void @void_func_sret_struct_i8_i32({ i8, i32 } addrspace(5)* sret({ i8, i32 }) %arg0) #0 { 462 %val0 = load volatile i8, i8 addrspace(1)* undef 463 %val1 = load volatile i32, i32 addrspace(1)* undef 464 %gep0 = getelementptr inbounds { i8, i32 }, { i8, i32 } addrspace(5)* %arg0, i32 0, i32 0 465 %gep1 = getelementptr inbounds { i8, i32 }, { i8, i32 } addrspace(5)* %arg0, i32 0, i32 1 466 store i8 %val0, i8 addrspace(5)* %gep0 467 store i32 %val1, i32 addrspace(5)* %gep1 468 ret void 469} 470 471; FIXME: Should be able to fold offsets in all of these pre-gfx9. Call 472; lowering introduces an extra CopyToReg/CopyFromReg obscuring the 473; AssertZext inserted. Not using it introduces the spills. 474 475; GCN-LABEL: {{^}}v33i32_func_void: 476; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen{{$}} 477; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:4{{$}} 478; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:8{{$}} 479; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:12{{$}} 480; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:16{{$}} 481; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:20{{$}} 482; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:24{{$}} 483; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:28{{$}} 484; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:32{{$}} 485; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:36{{$}} 486; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:40{{$}} 487; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:44{{$}} 488; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:48{{$}} 489; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:52{{$}} 490; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:56{{$}} 491; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:60{{$}} 492; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:64{{$}} 493; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:68{{$}} 494; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:72{{$}} 495; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:76{{$}} 496; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:80{{$}} 497; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:84{{$}} 498; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:88{{$}} 499; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:92{{$}} 500; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:96{{$}} 501; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:100{{$}} 502; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:104{{$}} 503; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:108{{$}} 504; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:112{{$}} 505; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:116{{$}} 506; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:120{{$}} 507; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:124{{$}} 508; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:128{{$}} 509; GFX9: s_waitcnt vmcnt(0) 510; GFX9-NEXT: s_setpc_b64 511define <33 x i32> @v33i32_func_void() #0 { 512 %ptr = load volatile <33 x i32> addrspace(1)*, <33 x i32> addrspace(1)* addrspace(4)* undef 513 %val = load <33 x i32>, <33 x i32> addrspace(1)* %ptr 514 ret <33 x i32> %val 515} 516 517; GCN-LABEL: {{^}}struct_v32i32_i32_func_void: 518; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen{{$}} 519; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:4{{$}} 520; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:8{{$}} 521; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:12{{$}} 522; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:16{{$}} 523; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:20{{$}} 524; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:24{{$}} 525; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:28{{$}} 526; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:32{{$}} 527; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:36{{$}} 528; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:40{{$}} 529; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:44{{$}} 530; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:48{{$}} 531; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:52{{$}} 532; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:56{{$}} 533; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:60{{$}} 534; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:64{{$}} 535; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:68{{$}} 536; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:72{{$}} 537; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:76{{$}} 538; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:80{{$}} 539; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:84{{$}} 540; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:88{{$}} 541; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:92{{$}} 542; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:96{{$}} 543; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:100{{$}} 544; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:104{{$}} 545; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:108{{$}} 546; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:112{{$}} 547; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:116{{$}} 548; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:120{{$}} 549; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:124{{$}} 550; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:128{{$}} 551; GFX9: s_waitcnt vmcnt(0) 552; GFX9-NEXT: s_setpc_b64 553define { <32 x i32>, i32 } @struct_v32i32_i32_func_void() #0 { 554 %ptr = load volatile { <32 x i32>, i32 } addrspace(1)*, { <32 x i32>, i32 } addrspace(1)* addrspace(4)* undef 555 %val = load { <32 x i32>, i32 }, { <32 x i32>, i32 } addrspace(1)* %ptr 556 ret { <32 x i32>, i32 }%val 557} 558 559; GCN-LABEL: {{^}}struct_i32_v32i32_func_void: 560; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen{{$}} 561; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:128{{$}} 562; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:132{{$}} 563; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:136{{$}} 564; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:140{{$}} 565; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:144{{$}} 566; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:148{{$}} 567; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:152{{$}} 568; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:156{{$}} 569; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:160{{$}} 570; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:164{{$}} 571; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:168{{$}} 572; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:172{{$}} 573; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:176{{$}} 574; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:180{{$}} 575; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:184{{$}} 576; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:188{{$}} 577; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:192{{$}} 578; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:196{{$}} 579; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:200{{$}} 580; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:204{{$}} 581; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:208{{$}} 582; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:212{{$}} 583; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:216{{$}} 584; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:220{{$}} 585; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:224{{$}} 586; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:228{{$}} 587; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:232{{$}} 588; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:236{{$}} 589; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:240{{$}} 590; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:244{{$}} 591; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:248{{$}} 592; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:252{{$}} 593; GFX9: s_waitcnt vmcnt(0) 594; GFX9-NEXT: s_setpc_b64 595define { i32, <32 x i32> } @struct_i32_v32i32_func_void() #0 { 596 %ptr = load volatile { i32, <32 x i32> } addrspace(1)*, { i32, <32 x i32> } addrspace(1)* addrspace(4)* undef 597 %val = load { i32, <32 x i32> }, { i32, <32 x i32> } addrspace(1)* %ptr 598 ret { i32, <32 x i32> }%val 599} 600 601; Make sure the last struct component is returned in v3, not v4. 602; GCN-LABEL: {{^}}v3i32_struct_func_void_wasted_reg: 603; GCN: ds_read_b32 v0, 604; GCN: ds_read_b32 v1, 605; GCN: ds_read_b32 v2, 606; GCN: ds_read_b32 v3, 607define { <3 x i32>, i32 } @v3i32_struct_func_void_wasted_reg() #0 { 608 %load0 = load volatile i32, i32 addrspace(3)* undef 609 %load1 = load volatile i32, i32 addrspace(3)* undef 610 %load2 = load volatile i32, i32 addrspace(3)* undef 611 %load3 = load volatile i32, i32 addrspace(3)* undef 612 613 %insert.0 = insertelement <3 x i32> undef, i32 %load0, i32 0 614 %insert.1 = insertelement <3 x i32> %insert.0, i32 %load1, i32 1 615 %insert.2 = insertelement <3 x i32> %insert.1, i32 %load2, i32 2 616 %insert.3 = insertvalue { <3 x i32>, i32 } undef, <3 x i32> %insert.2, 0 617 %insert.4 = insertvalue { <3 x i32>, i32 } %insert.3, i32 %load3, 1 618 ret { <3 x i32>, i32 } %insert.4 619} 620 621; GCN-LABEL: {{^}}v3f32_struct_func_void_wasted_reg: 622; GCN: ds_read_b32 v0, 623; GCN: ds_read_b32 v1, 624; GCN: ds_read_b32 v2, 625; GCN: ds_read_b32 v3, 626define { <3 x float>, i32 } @v3f32_struct_func_void_wasted_reg() #0 { 627 %load0 = load volatile float, float addrspace(3)* undef 628 %load1 = load volatile float, float addrspace(3)* undef 629 %load2 = load volatile float, float addrspace(3)* undef 630 %load3 = load volatile i32, i32 addrspace(3)* undef 631 632 %insert.0 = insertelement <3 x float> undef, float %load0, i32 0 633 %insert.1 = insertelement <3 x float> %insert.0, float %load1, i32 1 634 %insert.2 = insertelement <3 x float> %insert.1, float %load2, i32 2 635 %insert.3 = insertvalue { <3 x float>, i32 } undef, <3 x float> %insert.2, 0 636 %insert.4 = insertvalue { <3 x float>, i32 } %insert.3, i32 %load3, 1 637 ret { <3 x float>, i32 } %insert.4 638} 639 640; GCN-LABEL: {{^}}void_func_sret_max_known_zero_bits: 641; GCN: v_lshrrev_b32_e32 [[LSHR16:v[0-9]+]], 16, v0 642; GCN: ds_write_b32 {{v[0-9]+}}, [[LSHR16]] 643 644; GCN: v_mov_b32_e32 [[HIGH_BITS:v[0-9]+]], 0 645; GCN: ds_write_b32 {{v[0-9]+}}, [[HIGH_BITS]] 646; GCN-NEXT: ds_write_b32 {{v[0-9]+}}, [[HIGH_BITS]] 647define void @void_func_sret_max_known_zero_bits(i8 addrspace(5)* sret(i8) %arg0) #0 { 648 %arg0.int = ptrtoint i8 addrspace(5)* %arg0 to i32 649 650 %lshr0 = lshr i32 %arg0.int, 16 651 %lshr1 = lshr i32 %arg0.int, 17 652 %lshr2 = lshr i32 %arg0.int, 18 653 654 store volatile i32 %lshr0, i32 addrspace(3)* undef 655 store volatile i32 %lshr1, i32 addrspace(3)* undef 656 store volatile i32 %lshr2, i32 addrspace(3)* undef 657 ret void 658} 659 660attributes #0 = { nounwind } 661