1; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=hawaii -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,CI %s 2; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=fiji -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX89 %s 3; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=gfx900 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX89,GFX9 %s 4 5; GCN-LABEL: {{^}}i1_func_void: 6; GCN: buffer_load_ubyte v0, off 7; GCN-NEXT: s_waitcnt 8; GCN-NEXT: s_setpc_b64 9define i1 @i1_func_void() #0 { 10 %val = load i1, i1 addrspace(1)* undef 11 ret i1 %val 12} 13 14; FIXME: Missing and? 15; GCN-LABEL: {{^}}i1_zeroext_func_void: 16; GCN: buffer_load_ubyte v0, off 17; GCN-NEXT: s_waitcnt vmcnt(0) 18; GCN-NEXT: s_setpc_b64 19define zeroext i1 @i1_zeroext_func_void() #0 { 20 %val = load i1, i1 addrspace(1)* undef 21 ret i1 %val 22} 23 24; GCN-LABEL: {{^}}i1_signext_func_void: 25; GCN: buffer_load_ubyte v0, off 26; GCN-NEXT: s_waitcnt vmcnt(0) 27; GCN-NEXT: v_bfe_i32 v0, v0, 0, 1{{$}} 28; GCN-NEXT: s_setpc_b64 29define signext i1 @i1_signext_func_void() #0 { 30 %val = load i1, i1 addrspace(1)* undef 31 ret i1 %val 32} 33 34; GCN-LABEL: {{^}}i8_func_void: 35; GCN: buffer_load_ubyte v0, off 36; GCN-NEXT: s_waitcnt vmcnt(0) 37; GCN-NEXT: s_setpc_b64 38define i8 @i8_func_void() #0 { 39 %val = load i8, i8 addrspace(1)* undef 40 ret i8 %val 41} 42 43; GCN-LABEL: {{^}}i8_zeroext_func_void: 44; GCN: buffer_load_ubyte v0, off 45; GCN-NEXT: s_waitcnt vmcnt(0) 46; GCN-NEXT: s_setpc_b64 47define zeroext i8 @i8_zeroext_func_void() #0 { 48 %val = load i8, i8 addrspace(1)* undef 49 ret i8 %val 50} 51 52; GCN-LABEL: {{^}}i8_signext_func_void: 53; GCN: buffer_load_sbyte v0, off 54; GCN-NEXT: s_waitcnt vmcnt(0) 55; GCN-NEXT: s_setpc_b64 56define signext i8 @i8_signext_func_void() #0 { 57 %val = load i8, i8 addrspace(1)* undef 58 ret i8 %val 59} 60 61; GCN-LABEL: {{^}}i16_func_void: 62; GCN: buffer_load_ushort v0, off 63; GCN-NEXT: s_waitcnt vmcnt(0) 64; GCN-NEXT: s_setpc_b64 65define i16 @i16_func_void() #0 { 66 %val = load i16, i16 addrspace(1)* undef 67 ret i16 %val 68} 69 70; GCN-LABEL: {{^}}i16_zeroext_func_void: 71; GCN: buffer_load_ushort v0, off 72; GCN-NEXT: s_waitcnt vmcnt(0) 73; GCN-NEXT: s_setpc_b64 74define zeroext i16 @i16_zeroext_func_void() #0 { 75 %val = load i16, i16 addrspace(1)* undef 76 ret i16 %val 77} 78 79; GCN-LABEL: {{^}}i16_signext_func_void: 80; GCN: buffer_load_sshort v0, off 81; GCN-NEXT: s_waitcnt vmcnt(0) 82; GCN-NEXT: s_setpc_b64 83define signext i16 @i16_signext_func_void() #0 { 84 %val = load i16, i16 addrspace(1)* undef 85 ret i16 %val 86} 87 88; GCN-LABEL: {{^}}i32_func_void: 89; GCN: buffer_load_dword v0, off 90; GCN-NEXT: s_waitcnt vmcnt(0) 91; GCN-NEXT: s_setpc_b64 92define i32 @i32_func_void() #0 { 93 %val = load i32, i32 addrspace(1)* undef 94 ret i32 %val 95} 96 97; GCN-LABEL: {{^}}i64_func_void: 98; GCN: buffer_load_dwordx2 v[0:1], off 99; GCN-NEXT: s_waitcnt vmcnt(0) 100; GCN-NEXT: s_setpc_b64 101define i64 @i64_func_void() #0 { 102 %val = load i64, i64 addrspace(1)* undef 103 ret i64 %val 104} 105 106; GCN-LABEL: {{^}}f32_func_void: 107; GCN: buffer_load_dword v0, off, s[8:11], 0 108; GCN-NEXT: s_waitcnt vmcnt(0) 109; GCN-NEXT: s_setpc_b64 110define float @f32_func_void() #0 { 111 %val = load float, float addrspace(1)* undef 112 ret float %val 113} 114 115; GCN-LABEL: {{^}}f64_func_void: 116; GCN: buffer_load_dwordx2 v[0:1], off 117; GCN-NEXT: s_waitcnt vmcnt(0) 118; GCN-NEXT: s_setpc_b64 119define double @f64_func_void() #0 { 120 %val = load double, double addrspace(1)* undef 121 ret double %val 122} 123 124; GCN-LABEL: {{^}}v2i32_func_void: 125; GCN: buffer_load_dwordx2 v[0:1], off 126; GCN-NEXT: s_waitcnt vmcnt(0) 127; GCN-NEXT: s_setpc_b64 128define <2 x i32> @v2i32_func_void() #0 { 129 %val = load <2 x i32>, <2 x i32> addrspace(1)* undef 130 ret <2 x i32> %val 131} 132 133; GCN-LABEL: {{^}}v3i32_func_void: 134; GCN: buffer_load_dwordx4 v[0:3], off 135; GCN-NEXT: s_waitcnt vmcnt(0) 136; GCN-NEXT: s_setpc_b64 137define <3 x i32> @v3i32_func_void() #0 { 138 %val = load <3 x i32>, <3 x i32> addrspace(1)* undef 139 ret <3 x i32> %val 140} 141 142; GCN-LABEL: {{^}}v4i32_func_void: 143; GCN: buffer_load_dwordx4 v[0:3], off 144; GCN-NEXT: s_waitcnt vmcnt(0) 145; GCN-NEXT: s_setpc_b64 146define <4 x i32> @v4i32_func_void() #0 { 147 %val = load <4 x i32>, <4 x i32> addrspace(1)* undef 148 ret <4 x i32> %val 149} 150 151; GCN-LABEL: {{^}}v5i32_func_void: 152; GCN-DAG: buffer_load_dword v4, off 153; GCN-DAG: buffer_load_dwordx4 v[0:3], off 154; GCN: s_waitcnt vmcnt(0) 155; GCN-NEXT: s_setpc_b64 156define <5 x i32> @v5i32_func_void() #0 { 157 %val = load volatile <5 x i32>, <5 x i32> addrspace(1)* undef 158 ret <5 x i32> %val 159} 160 161; GCN-LABEL: {{^}}v8i32_func_void: 162; GCN-DAG: buffer_load_dwordx4 v[0:3], off 163; GCN-DAG: buffer_load_dwordx4 v[4:7], off 164; GCN: s_waitcnt vmcnt(0) 165; GCN-NEXT: s_setpc_b64 166define <8 x i32> @v8i32_func_void() #0 { 167 %ptr = load volatile <8 x i32> addrspace(1)*, <8 x i32> addrspace(1)* addrspace(2)* undef 168 %val = load <8 x i32>, <8 x i32> addrspace(1)* %ptr 169 ret <8 x i32> %val 170} 171 172; GCN-LABEL: {{^}}v16i32_func_void: 173; GCN-DAG: buffer_load_dwordx4 v[0:3], off 174; GCN-DAG: buffer_load_dwordx4 v[4:7], off 175; GCN-DAG: buffer_load_dwordx4 v[8:11], off 176; GCN-DAG: buffer_load_dwordx4 v[12:15], off 177; GCN: s_waitcnt vmcnt(0) 178; GCN-NEXT: s_setpc_b64 179define <16 x i32> @v16i32_func_void() #0 { 180 %ptr = load volatile <16 x i32> addrspace(1)*, <16 x i32> addrspace(1)* addrspace(2)* undef 181 %val = load <16 x i32>, <16 x i32> addrspace(1)* %ptr 182 ret <16 x i32> %val 183} 184 185; GCN-LABEL: {{^}}v32i32_func_void: 186; GCN-DAG: buffer_load_dwordx4 v[0:3], off 187; GCN-DAG: buffer_load_dwordx4 v[4:7], off 188; GCN-DAG: buffer_load_dwordx4 v[8:11], off 189; GCN-DAG: buffer_load_dwordx4 v[12:15], off 190; GCN-DAG: buffer_load_dwordx4 v[16:19], off 191; GCN-DAG: buffer_load_dwordx4 v[20:23], off 192; GCN-DAG: buffer_load_dwordx4 v[24:27], off 193; GCN-DAG: buffer_load_dwordx4 v[28:31], off 194; GCN: s_waitcnt vmcnt(0) 195; GCN-NEXT: s_setpc_b64 196define <32 x i32> @v32i32_func_void() #0 { 197 %ptr = load volatile <32 x i32> addrspace(1)*, <32 x i32> addrspace(1)* addrspace(2)* undef 198 %val = load <32 x i32>, <32 x i32> addrspace(1)* %ptr 199 ret <32 x i32> %val 200} 201 202; GCN-LABEL: {{^}}v2i64_func_void: 203; GCN: buffer_load_dwordx4 v[0:3], off 204; GCN-NEXT: s_waitcnt vmcnt(0) 205; GCN-NEXT: s_setpc_b64 206define <2 x i64> @v2i64_func_void() #0 { 207 %val = load <2 x i64>, <2 x i64> addrspace(1)* undef 208 ret <2 x i64> %val 209} 210 211; GCN-LABEL: {{^}}v3i64_func_void: 212; GCN-DAG: buffer_load_dwordx4 v[0:3], off 213; GCN-DAG: buffer_load_dwordx4 v[4:7], off 214; GCN: s_waitcnt vmcnt(0) 215; GCN-NEXT: s_setpc_b64 216define <3 x i64> @v3i64_func_void() #0 { 217 %ptr = load volatile <3 x i64> addrspace(1)*, <3 x i64> addrspace(1)* addrspace(2)* undef 218 %val = load <3 x i64>, <3 x i64> addrspace(1)* %ptr 219 ret <3 x i64> %val 220} 221 222; GCN-LABEL: {{^}}v4i64_func_void: 223; GCN: buffer_load_dwordx4 v[0:3], off 224; GCN: buffer_load_dwordx4 v[4:7], off 225; GCN-NEXT: s_waitcnt vmcnt(0) 226; GCN-NEXT: s_setpc_b64 227define <4 x i64> @v4i64_func_void() #0 { 228 %ptr = load volatile <4 x i64> addrspace(1)*, <4 x i64> addrspace(1)* addrspace(2)* undef 229 %val = load <4 x i64>, <4 x i64> addrspace(1)* %ptr 230 ret <4 x i64> %val 231} 232 233; GCN-LABEL: {{^}}v5i64_func_void: 234; GCN-DAG: buffer_load_dwordx4 v[0:3], off 235; GCN-DAG: buffer_load_dwordx4 v[4:7], off 236; GCN-DAG: buffer_load_dwordx4 v[8:11], off 237; GCN: s_waitcnt vmcnt(0) 238; GCN-NEXT: s_setpc_b64 239define <5 x i64> @v5i64_func_void() #0 { 240 %ptr = load volatile <5 x i64> addrspace(1)*, <5 x i64> addrspace(1)* addrspace(2)* undef 241 %val = load <5 x i64>, <5 x i64> addrspace(1)* %ptr 242 ret <5 x i64> %val 243} 244 245; GCN-LABEL: {{^}}v8i64_func_void: 246; GCN-DAG: buffer_load_dwordx4 v[0:3], off 247; GCN-DAG: buffer_load_dwordx4 v[4:7], off 248; GCN-DAG: buffer_load_dwordx4 v[8:11], off 249; GCN-DAG: buffer_load_dwordx4 v[12:15], off 250; GCN: s_waitcnt vmcnt(0) 251; GCN-NEXT: s_setpc_b64 252define <8 x i64> @v8i64_func_void() #0 { 253 %ptr = load volatile <8 x i64> addrspace(1)*, <8 x i64> addrspace(1)* addrspace(2)* undef 254 %val = load <8 x i64>, <8 x i64> addrspace(1)* %ptr 255 ret <8 x i64> %val 256} 257 258; GCN-LABEL: {{^}}v16i64_func_void: 259; GCN-DAG: buffer_load_dwordx4 v[0:3], off 260; GCN-DAG: buffer_load_dwordx4 v[4:7], off 261; GCN-DAG: buffer_load_dwordx4 v[8:11], off 262; GCN-DAG: buffer_load_dwordx4 v[12:15], off 263; GCN-DAG: buffer_load_dwordx4 v[16:19], off 264; GCN-DAG: buffer_load_dwordx4 v[20:23], off 265; GCN-DAG: buffer_load_dwordx4 v[24:27], off 266; GCN-DAG: buffer_load_dwordx4 v[28:31], off 267; GCN: s_waitcnt vmcnt(0) 268; GCN-NEXT: s_setpc_b64 269define <16 x i64> @v16i64_func_void() #0 { 270 %ptr = load volatile <16 x i64> addrspace(1)*, <16 x i64> addrspace(1)* addrspace(2)* undef 271 %val = load <16 x i64>, <16 x i64> addrspace(1)* %ptr 272 ret <16 x i64> %val 273} 274 275; GCN-LABEL: {{^}}v2i16_func_void: 276; GFX9: buffer_load_dword v0, off 277; GFX9-NEXT: s_waitcnt vmcnt(0) 278; GFX9-NEXT: s_setpc_b64 279define <2 x i16> @v2i16_func_void() #0 { 280 %val = load <2 x i16>, <2 x i16> addrspace(1)* undef 281 ret <2 x i16> %val 282} 283 284; GCN-LABEL: {{^}}v3i16_func_void: 285; GFX9: buffer_load_dwordx2 v[0:1], off 286; GFX9: s_waitcnt vmcnt(0) 287; GFX9: v_lshrrev_b32 288; GFX9: s_setpc_b64 289define <3 x i16> @v3i16_func_void() #0 { 290 %val = load <3 x i16>, <3 x i16> addrspace(1)* undef 291 ret <3 x i16> %val 292} 293 294; GCN-LABEL: {{^}}v4i16_func_void: 295; GFX9: buffer_load_dwordx2 v[0:1], off 296; GFX9-NEXT: s_waitcnt vmcnt(0) 297; GFX9-NEXT: s_setpc_b64 298define <4 x i16> @v4i16_func_void() #0 { 299 %val = load <4 x i16>, <4 x i16> addrspace(1)* undef 300 ret <4 x i16> %val 301} 302 303; FIXME: Should not scalarize 304; GCN-LABEL: {{^}}v5i16_func_void: 305; GFX9: buffer_load_dwordx2 v[0:1] 306; GFX9: buffer_load_ushort v4 307; GFX9: v_lshrrev_b32_e32 v3, 16, v1 308; GFX9: v_mov_b32_e32 v2, v1 309; GFX9: v_lshrrev_b32_e32 v1, 16, v0 310; GCN: s_setpc_b64 311define <5 x i16> @v5i16_func_void() #0 { 312 %ptr = load volatile <5 x i16> addrspace(1)*, <5 x i16> addrspace(1)* addrspace(2)* undef 313 %val = load <5 x i16>, <5 x i16> addrspace(1)* %ptr 314 ret <5 x i16> %val 315} 316 317; GCN-LABEL: {{^}}v8i16_func_void: 318; GFX9-DAG: buffer_load_dwordx4 v[0:3], off 319; GFX9: s_waitcnt vmcnt(0) 320; GFX9-NEXT: s_setpc_b64 321define <8 x i16> @v8i16_func_void() #0 { 322 %ptr = load volatile <8 x i16> addrspace(1)*, <8 x i16> addrspace(1)* addrspace(2)* undef 323 %val = load <8 x i16>, <8 x i16> addrspace(1)* %ptr 324 ret <8 x i16> %val 325} 326 327; GCN-LABEL: {{^}}v16i16_func_void: 328; GFX9: buffer_load_dwordx4 v[0:3], off 329; GFX9: buffer_load_dwordx4 v[4:7], off 330; GFX9: s_waitcnt vmcnt(0) 331; GFX9-NEXT: s_setpc_b64 332define <16 x i16> @v16i16_func_void() #0 { 333 %ptr = load volatile <16 x i16> addrspace(1)*, <16 x i16> addrspace(1)* addrspace(2)* undef 334 %val = load <16 x i16>, <16 x i16> addrspace(1)* %ptr 335 ret <16 x i16> %val 336} 337 338; FIXME: Should pack 339; GCN-LABEL: {{^}}v16i8_func_void: 340; GCN-DAG: v12 341; GCN-DAG: v13 342; GCN-DAG: v14 343; GCN-DAG: v15 344define <16 x i8> @v16i8_func_void() #0 { 345 %ptr = load volatile <16 x i8> addrspace(1)*, <16 x i8> addrspace(1)* addrspace(2)* undef 346 %val = load <16 x i8>, <16 x i8> addrspace(1)* %ptr 347 ret <16 x i8> %val 348} 349 350; FIXME: Should pack 351; GCN-LABEL: {{^}}v4i8_func_void: 352; GCN: buffer_load_dword v0 353; GCN-DAG: v_lshrrev_b32_e32 v2, 16, v0 354; GCN-DAG: v_lshrrev_b32_e32 v3, 24, v0 355; CI-DAG: v_bfe_u32 v1, v0, 8, 8 356; GFX89-DAG: v_lshrrev_b16_e32 v1, 8, v0 357; GCN: s_setpc_b64 358define <4 x i8> @v4i8_func_void() #0 { 359 %ptr = load volatile <4 x i8> addrspace(1)*, <4 x i8> addrspace(1)* addrspace(2)* undef 360 %val = load <4 x i8>, <4 x i8> addrspace(1)* %ptr 361 ret <4 x i8> %val 362} 363 364; GCN-LABEL: {{^}}struct_i8_i32_func_void: 365; GCN-DAG: buffer_load_dword v1 366; GCN-DAG: buffer_load_ubyte v0 367; GCN: s_waitcnt vmcnt(0) 368; GCN-NEXT: s_setpc_b64 369define {i8, i32} @struct_i8_i32_func_void() #0 { 370 %val = load { i8, i32 }, { i8, i32 } addrspace(1)* undef 371 ret { i8, i32 } %val 372} 373 374; GCN-LABEL: {{^}}void_func_sret_struct_i8_i32: 375; GCN: buffer_load_ubyte [[VAL0:v[0-9]+]] 376; GCN: buffer_load_dword [[VAL1:v[0-9]+]] 377; GCN: buffer_store_byte [[VAL0]], v0, s[0:3], s4 offen{{$}} 378; GCN: buffer_store_dword [[VAL1]], v0, s[0:3], s4 offen offset:4{{$}} 379define void @void_func_sret_struct_i8_i32({ i8, i32 } addrspace(5)* sret %arg0) #0 { 380 %val0 = load volatile i8, i8 addrspace(1)* undef 381 %val1 = load volatile i32, i32 addrspace(1)* undef 382 %gep0 = getelementptr inbounds { i8, i32 }, { i8, i32 } addrspace(5)* %arg0, i32 0, i32 0 383 %gep1 = getelementptr inbounds { i8, i32 }, { i8, i32 } addrspace(5)* %arg0, i32 0, i32 1 384 store i8 %val0, i8 addrspace(5)* %gep0 385 store i32 %val1, i32 addrspace(5)* %gep1 386 ret void 387} 388 389; FIXME: Should be able to fold offsets in all of these pre-gfx9. Call 390; lowering introduces an extra CopyToReg/CopyFromReg obscuring the 391; AssertZext inserted. Not using it introduces the spills. 392 393; GCN-LABEL: {{^}}v33i32_func_void: 394; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen{{$}} 395; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:4{{$}} 396; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:8{{$}} 397; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:12{{$}} 398; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:16{{$}} 399; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:20{{$}} 400; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:24{{$}} 401; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:28{{$}} 402; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:32{{$}} 403; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:36{{$}} 404; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:40{{$}} 405; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:44{{$}} 406; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:48{{$}} 407; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:52{{$}} 408; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:56{{$}} 409; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:60{{$}} 410; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:64{{$}} 411; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:68{{$}} 412; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:72{{$}} 413; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:76{{$}} 414; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:80{{$}} 415; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:84{{$}} 416; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:88{{$}} 417; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:92{{$}} 418; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:96{{$}} 419; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:100{{$}} 420; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:104{{$}} 421; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:108{{$}} 422; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:112{{$}} 423; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:116{{$}} 424; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:120{{$}} 425; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:124{{$}} 426; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:128{{$}} 427; GFX9: s_waitcnt vmcnt(0) 428; GFX9-NEXT: s_setpc_b64 429define <33 x i32> @v33i32_func_void() #0 { 430 %ptr = load volatile <33 x i32> addrspace(1)*, <33 x i32> addrspace(1)* addrspace(2)* undef 431 %val = load <33 x i32>, <33 x i32> addrspace(1)* %ptr 432 ret <33 x i32> %val 433} 434 435; GCN-LABEL: {{^}}struct_v32i32_i32_func_void: 436; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen{{$}} 437; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:4{{$}} 438; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:8{{$}} 439; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:12{{$}} 440; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:16{{$}} 441; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:20{{$}} 442; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:24{{$}} 443; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:28{{$}} 444; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:32{{$}} 445; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:36{{$}} 446; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:40{{$}} 447; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:44{{$}} 448; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:48{{$}} 449; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:52{{$}} 450; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:56{{$}} 451; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:60{{$}} 452; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:64{{$}} 453; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:68{{$}} 454; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:72{{$}} 455; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:76{{$}} 456; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:80{{$}} 457; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:84{{$}} 458; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:88{{$}} 459; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:92{{$}} 460; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:96{{$}} 461; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:100{{$}} 462; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:104{{$}} 463; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:108{{$}} 464; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:112{{$}} 465; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:116{{$}} 466; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:120{{$}} 467; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:124{{$}} 468; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:128{{$}} 469; GFX9: s_waitcnt vmcnt(0) 470; GFX9-NEXT: s_setpc_b64 471define { <32 x i32>, i32 } @struct_v32i32_i32_func_void() #0 { 472 %ptr = load volatile { <32 x i32>, i32 } addrspace(1)*, { <32 x i32>, i32 } addrspace(1)* addrspace(2)* undef 473 %val = load { <32 x i32>, i32 }, { <32 x i32>, i32 } addrspace(1)* %ptr 474 ret { <32 x i32>, i32 }%val 475} 476 477; GCN-LABEL: {{^}}struct_i32_v32i32_func_void: 478; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen{{$}} 479; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:128{{$}} 480; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:132{{$}} 481; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:136{{$}} 482; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:140{{$}} 483; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:144{{$}} 484; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:148{{$}} 485; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:152{{$}} 486; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:156{{$}} 487; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:160{{$}} 488; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:164{{$}} 489; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:168{{$}} 490; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:172{{$}} 491; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:176{{$}} 492; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:180{{$}} 493; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:184{{$}} 494; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:188{{$}} 495; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:192{{$}} 496; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:196{{$}} 497; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:200{{$}} 498; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:204{{$}} 499; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:208{{$}} 500; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:212{{$}} 501; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:216{{$}} 502; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:220{{$}} 503; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:224{{$}} 504; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:228{{$}} 505; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:232{{$}} 506; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:236{{$}} 507; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:240{{$}} 508; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:244{{$}} 509; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:248{{$}} 510; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:252{{$}} 511; GFX9: s_waitcnt vmcnt(0) 512; GFX9-NEXT: s_setpc_b64 513define { i32, <32 x i32> } @struct_i32_v32i32_func_void() #0 { 514 %ptr = load volatile { i32, <32 x i32> } addrspace(1)*, { i32, <32 x i32> } addrspace(1)* addrspace(2)* undef 515 %val = load { i32, <32 x i32> }, { i32, <32 x i32> } addrspace(1)* %ptr 516 ret { i32, <32 x i32> }%val 517} 518 519attributes #0 = { nounwind } 520