1; RUN: llc -march=amdgcn -mtriple=amdgcn-- -mcpu=hawaii -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,CI %s 2; RUN: llc -march=amdgcn -mtriple=amdgcn-- -mcpu=fiji -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX89 %s 3; RUN: llc -march=amdgcn -mtriple=amdgcn-- -mcpu=gfx900 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX89,GFX9 %s 4 5; GCN-LABEL: {{^}}i1_func_void: 6; GCN: buffer_load_ubyte v0, off 7; GCN-NEXT: s_waitcnt 8; GCN-NEXT: s_setpc_b64 9define i1 @i1_func_void() #0 { 10 %val = load i1, i1 addrspace(1)* undef 11 ret i1 %val 12} 13 14; FIXME: Missing and? 15; GCN-LABEL: {{^}}i1_zeroext_func_void: 16; GCN: buffer_load_ubyte v0, off 17; GCN-NEXT: s_waitcnt vmcnt(0) 18; GCN-NEXT: s_setpc_b64 19define zeroext i1 @i1_zeroext_func_void() #0 { 20 %val = load i1, i1 addrspace(1)* undef 21 ret i1 %val 22} 23 24; GCN-LABEL: {{^}}i1_signext_func_void: 25; GCN: buffer_load_ubyte v0, off 26; GCN-NEXT: s_waitcnt vmcnt(0) 27; GCN-NEXT: v_bfe_i32 v0, v0, 0, 1{{$}} 28; GCN-NEXT: s_setpc_b64 29define signext i1 @i1_signext_func_void() #0 { 30 %val = load i1, i1 addrspace(1)* undef 31 ret i1 %val 32} 33 34; GCN-LABEL: {{^}}i8_func_void: 35; GCN: buffer_load_ubyte v0, off 36; GCN-NEXT: s_waitcnt vmcnt(0) 37; GCN-NEXT: s_setpc_b64 38define i8 @i8_func_void() #0 { 39 %val = load i8, i8 addrspace(1)* undef 40 ret i8 %val 41} 42 43; GCN-LABEL: {{^}}i8_zeroext_func_void: 44; GCN: buffer_load_ubyte v0, off 45; GCN-NEXT: s_waitcnt vmcnt(0) 46; GCN-NEXT: s_setpc_b64 47define zeroext i8 @i8_zeroext_func_void() #0 { 48 %val = load i8, i8 addrspace(1)* undef 49 ret i8 %val 50} 51 52; GCN-LABEL: {{^}}i8_signext_func_void: 53; GCN: buffer_load_sbyte v0, off 54; GCN-NEXT: s_waitcnt vmcnt(0) 55; GCN-NEXT: s_setpc_b64 56define signext i8 @i8_signext_func_void() #0 { 57 %val = load i8, i8 addrspace(1)* undef 58 ret i8 %val 59} 60 61; GCN-LABEL: {{^}}i16_func_void: 62; GCN: buffer_load_ushort v0, off 63; GCN-NEXT: s_waitcnt vmcnt(0) 64; GCN-NEXT: s_setpc_b64 65define i16 @i16_func_void() #0 { 66 %val = load i16, i16 addrspace(1)* undef 67 ret i16 %val 68} 69 70; GCN-LABEL: {{^}}i16_zeroext_func_void: 71; GCN: buffer_load_ushort v0, off 72; GCN-NEXT: s_waitcnt vmcnt(0) 73; GCN-NEXT: s_setpc_b64 74define zeroext i16 @i16_zeroext_func_void() #0 { 75 %val = load i16, i16 addrspace(1)* undef 76 ret i16 %val 77} 78 79; GCN-LABEL: {{^}}i16_signext_func_void: 80; GCN: buffer_load_sshort v0, off 81; GCN-NEXT: s_waitcnt vmcnt(0) 82; GCN-NEXT: s_setpc_b64 83define signext i16 @i16_signext_func_void() #0 { 84 %val = load i16, i16 addrspace(1)* undef 85 ret i16 %val 86} 87 88; GCN-LABEL: {{^}}i32_func_void: 89; GCN: buffer_load_dword v0, off 90; GCN-NEXT: s_waitcnt vmcnt(0) 91; GCN-NEXT: s_setpc_b64 92define i32 @i32_func_void() #0 { 93 %val = load i32, i32 addrspace(1)* undef 94 ret i32 %val 95} 96 97; GCN-LABEL: {{^}}i64_func_void: 98; GCN: buffer_load_dwordx2 v[0:1], off 99; GCN-NEXT: s_waitcnt vmcnt(0) 100; GCN-NEXT: s_setpc_b64 101define i64 @i64_func_void() #0 { 102 %val = load i64, i64 addrspace(1)* undef 103 ret i64 %val 104} 105 106; GCN-LABEL: {{^}}f32_func_void: 107; GCN: buffer_load_dword v0, off, s[8:11], 0 108; GCN-NEXT: s_waitcnt vmcnt(0) 109; GCN-NEXT: s_setpc_b64 110define float @f32_func_void() #0 { 111 %val = load float, float addrspace(1)* undef 112 ret float %val 113} 114 115; GCN-LABEL: {{^}}f64_func_void: 116; GCN: buffer_load_dwordx2 v[0:1], off 117; GCN-NEXT: s_waitcnt vmcnt(0) 118; GCN-NEXT: s_setpc_b64 119define double @f64_func_void() #0 { 120 %val = load double, double addrspace(1)* undef 121 ret double %val 122} 123 124; GCN-LABEL: {{^}}v2f64_func_void: 125; GCN: buffer_load_dwordx4 v[0:3], off 126; GCN-NEXT: s_waitcnt vmcnt(0) 127; GCN-NEXT: s_setpc_b64 128define <2 x double> @v2f64_func_void() #0 { 129 %val = load <2 x double>, <2 x double> addrspace(1)* undef 130 ret <2 x double> %val 131} 132 133; GCN-LABEL: {{^}}v2i32_func_void: 134; GCN: buffer_load_dwordx2 v[0:1], off 135; GCN-NEXT: s_waitcnt vmcnt(0) 136; GCN-NEXT: s_setpc_b64 137define <2 x i32> @v2i32_func_void() #0 { 138 %val = load <2 x i32>, <2 x i32> addrspace(1)* undef 139 ret <2 x i32> %val 140} 141 142; GCN-LABEL: {{^}}v3i32_func_void: 143; GCN: buffer_load_dwordx4 v[0:3], off 144; GCN-NEXT: s_waitcnt vmcnt(0) 145; GCN-NEXT: s_setpc_b64 146define <3 x i32> @v3i32_func_void() #0 { 147 %val = load <3 x i32>, <3 x i32> addrspace(1)* undef 148 ret <3 x i32> %val 149} 150 151; GCN-LABEL: {{^}}v4i32_func_void: 152; GCN: buffer_load_dwordx4 v[0:3], off 153; GCN-NEXT: s_waitcnt vmcnt(0) 154; GCN-NEXT: s_setpc_b64 155define <4 x i32> @v4i32_func_void() #0 { 156 %val = load <4 x i32>, <4 x i32> addrspace(1)* undef 157 ret <4 x i32> %val 158} 159 160; GCN-LABEL: {{^}}v5i32_func_void: 161; GCN-DAG: buffer_load_dword v4, off 162; GCN-DAG: buffer_load_dwordx4 v[0:3], off 163; GCN: s_waitcnt vmcnt(0) 164; GCN-NEXT: s_setpc_b64 165define <5 x i32> @v5i32_func_void() #0 { 166 %val = load volatile <5 x i32>, <5 x i32> addrspace(1)* undef 167 ret <5 x i32> %val 168} 169 170; GCN-LABEL: {{^}}v8i32_func_void: 171; GCN-DAG: buffer_load_dwordx4 v[0:3], off 172; GCN-DAG: buffer_load_dwordx4 v[4:7], off 173; GCN: s_waitcnt vmcnt(0) 174; GCN-NEXT: s_setpc_b64 175define <8 x i32> @v8i32_func_void() #0 { 176 %ptr = load volatile <8 x i32> addrspace(1)*, <8 x i32> addrspace(1)* addrspace(4)* undef 177 %val = load <8 x i32>, <8 x i32> addrspace(1)* %ptr 178 ret <8 x i32> %val 179} 180 181; GCN-LABEL: {{^}}v16i32_func_void: 182; GCN-DAG: buffer_load_dwordx4 v[0:3], off 183; GCN-DAG: buffer_load_dwordx4 v[4:7], off 184; GCN-DAG: buffer_load_dwordx4 v[8:11], off 185; GCN-DAG: buffer_load_dwordx4 v[12:15], off 186; GCN: s_waitcnt vmcnt(0) 187; GCN-NEXT: s_setpc_b64 188define <16 x i32> @v16i32_func_void() #0 { 189 %ptr = load volatile <16 x i32> addrspace(1)*, <16 x i32> addrspace(1)* addrspace(4)* undef 190 %val = load <16 x i32>, <16 x i32> addrspace(1)* %ptr 191 ret <16 x i32> %val 192} 193 194; GCN-LABEL: {{^}}v32i32_func_void: 195; GCN-DAG: buffer_load_dwordx4 v[0:3], off 196; GCN-DAG: buffer_load_dwordx4 v[4:7], off 197; GCN-DAG: buffer_load_dwordx4 v[8:11], off 198; GCN-DAG: buffer_load_dwordx4 v[12:15], off 199; GCN-DAG: buffer_load_dwordx4 v[16:19], off 200; GCN-DAG: buffer_load_dwordx4 v[20:23], off 201; GCN-DAG: buffer_load_dwordx4 v[24:27], off 202; GCN-DAG: buffer_load_dwordx4 v[28:31], off 203; GCN: s_waitcnt vmcnt(0) 204; GCN-NEXT: s_setpc_b64 205define <32 x i32> @v32i32_func_void() #0 { 206 %ptr = load volatile <32 x i32> addrspace(1)*, <32 x i32> addrspace(1)* addrspace(4)* undef 207 %val = load <32 x i32>, <32 x i32> addrspace(1)* %ptr 208 ret <32 x i32> %val 209} 210 211; GCN-LABEL: {{^}}v2i64_func_void: 212; GCN: buffer_load_dwordx4 v[0:3], off 213; GCN-NEXT: s_waitcnt vmcnt(0) 214; GCN-NEXT: s_setpc_b64 215define <2 x i64> @v2i64_func_void() #0 { 216 %val = load <2 x i64>, <2 x i64> addrspace(1)* undef 217 ret <2 x i64> %val 218} 219 220; GCN-LABEL: {{^}}v3i64_func_void: 221; GCN-DAG: buffer_load_dwordx4 v[0:3], off 222; GCN-DAG: buffer_load_dwordx4 v[4:7], off 223; GCN: s_waitcnt vmcnt(0) 224; GCN-NEXT: s_setpc_b64 225define <3 x i64> @v3i64_func_void() #0 { 226 %ptr = load volatile <3 x i64> addrspace(1)*, <3 x i64> addrspace(1)* addrspace(4)* undef 227 %val = load <3 x i64>, <3 x i64> addrspace(1)* %ptr 228 ret <3 x i64> %val 229} 230 231; GCN-LABEL: {{^}}v4i64_func_void: 232; GCN: buffer_load_dwordx4 v[0:3], off 233; GCN: buffer_load_dwordx4 v[4:7], off 234; GCN-NEXT: s_waitcnt vmcnt(0) 235; GCN-NEXT: s_setpc_b64 236define <4 x i64> @v4i64_func_void() #0 { 237 %ptr = load volatile <4 x i64> addrspace(1)*, <4 x i64> addrspace(1)* addrspace(4)* undef 238 %val = load <4 x i64>, <4 x i64> addrspace(1)* %ptr 239 ret <4 x i64> %val 240} 241 242; GCN-LABEL: {{^}}v5i64_func_void: 243; GCN-DAG: buffer_load_dwordx4 v[0:3], off 244; GCN-DAG: buffer_load_dwordx4 v[4:7], off 245; GCN-DAG: buffer_load_dwordx4 v[8:11], off 246; GCN: s_waitcnt vmcnt(0) 247; GCN-NEXT: s_setpc_b64 248define <5 x i64> @v5i64_func_void() #0 { 249 %ptr = load volatile <5 x i64> addrspace(1)*, <5 x i64> addrspace(1)* addrspace(4)* undef 250 %val = load <5 x i64>, <5 x i64> addrspace(1)* %ptr 251 ret <5 x i64> %val 252} 253 254; GCN-LABEL: {{^}}v8i64_func_void: 255; GCN-DAG: buffer_load_dwordx4 v[0:3], off 256; GCN-DAG: buffer_load_dwordx4 v[4:7], off 257; GCN-DAG: buffer_load_dwordx4 v[8:11], off 258; GCN-DAG: buffer_load_dwordx4 v[12:15], off 259; GCN: s_waitcnt vmcnt(0) 260; GCN-NEXT: s_setpc_b64 261define <8 x i64> @v8i64_func_void() #0 { 262 %ptr = load volatile <8 x i64> addrspace(1)*, <8 x i64> addrspace(1)* addrspace(4)* undef 263 %val = load <8 x i64>, <8 x i64> addrspace(1)* %ptr 264 ret <8 x i64> %val 265} 266 267; GCN-LABEL: {{^}}v16i64_func_void: 268; GCN-DAG: buffer_load_dwordx4 v[0:3], off 269; GCN-DAG: buffer_load_dwordx4 v[4:7], off 270; GCN-DAG: buffer_load_dwordx4 v[8:11], off 271; GCN-DAG: buffer_load_dwordx4 v[12:15], off 272; GCN-DAG: buffer_load_dwordx4 v[16:19], off 273; GCN-DAG: buffer_load_dwordx4 v[20:23], off 274; GCN-DAG: buffer_load_dwordx4 v[24:27], off 275; GCN-DAG: buffer_load_dwordx4 v[28:31], off 276; GCN: s_waitcnt vmcnt(0) 277; GCN-NEXT: s_setpc_b64 278define <16 x i64> @v16i64_func_void() #0 { 279 %ptr = load volatile <16 x i64> addrspace(1)*, <16 x i64> addrspace(1)* addrspace(4)* undef 280 %val = load <16 x i64>, <16 x i64> addrspace(1)* %ptr 281 ret <16 x i64> %val 282} 283 284; GCN-LABEL: {{^}}v2i16_func_void: 285; GFX9: buffer_load_dword v0, off 286; GFX9-NEXT: s_waitcnt vmcnt(0) 287; GFX9-NEXT: s_setpc_b64 288define <2 x i16> @v2i16_func_void() #0 { 289 %val = load <2 x i16>, <2 x i16> addrspace(1)* undef 290 ret <2 x i16> %val 291} 292 293; GCN-LABEL: {{^}}v3i16_func_void: 294; GFX9: buffer_load_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, off 295; GFX9-NEXT: s_waitcnt vmcnt(0) 296; GFX9-NEXT: s_setpc_b64 297define <3 x i16> @v3i16_func_void() #0 { 298 %val = load <3 x i16>, <3 x i16> addrspace(1)* undef 299 ret <3 x i16> %val 300} 301 302; GCN-LABEL: {{^}}v4i16_func_void: 303; GFX9: buffer_load_dwordx2 v[0:1], off 304; GFX9-NEXT: s_waitcnt vmcnt(0) 305; GFX9-NEXT: s_setpc_b64 306define <4 x i16> @v4i16_func_void() #0 { 307 %val = load <4 x i16>, <4 x i16> addrspace(1)* undef 308 ret <4 x i16> %val 309} 310 311; GCN-LABEL: {{^}}v4f16_func_void: 312; GFX9: buffer_load_dwordx2 v[0:1], off 313; GFX9-NEXT: s_waitcnt vmcnt(0) 314; GFX9-NEXT: s_setpc_b64 315define <4 x half> @v4f16_func_void() #0 { 316 %val = load <4 x half>, <4 x half> addrspace(1)* undef 317 ret <4 x half> %val 318} 319 320; FIXME: Mixing buffer and global 321; FIXME: Should not scalarize 322; GCN-LABEL: {{^}}v5i16_func_void: 323; GFX9: buffer_load_dwordx2 v[0:1] 324; GFX9-NEXT: global_load_short_d16 v2 325; GFX9-NEXT: s_waitcnt 326; GFX9-NEXT: s_setpc_b64 327define <5 x i16> @v5i16_func_void() #0 { 328 %ptr = load volatile <5 x i16> addrspace(1)*, <5 x i16> addrspace(1)* addrspace(4)* undef 329 %val = load <5 x i16>, <5 x i16> addrspace(1)* %ptr 330 ret <5 x i16> %val 331} 332 333; GCN-LABEL: {{^}}v8i16_func_void: 334; GFX9-DAG: buffer_load_dwordx4 v[0:3], off 335; GFX9: s_waitcnt vmcnt(0) 336; GFX9-NEXT: s_setpc_b64 337define <8 x i16> @v8i16_func_void() #0 { 338 %ptr = load volatile <8 x i16> addrspace(1)*, <8 x i16> addrspace(1)* addrspace(4)* undef 339 %val = load <8 x i16>, <8 x i16> addrspace(1)* %ptr 340 ret <8 x i16> %val 341} 342 343; GCN-LABEL: {{^}}v16i16_func_void: 344; GFX9: buffer_load_dwordx4 v[0:3], off 345; GFX9: buffer_load_dwordx4 v[4:7], off 346; GFX9: s_waitcnt vmcnt(0) 347; GFX9-NEXT: s_setpc_b64 348define <16 x i16> @v16i16_func_void() #0 { 349 %ptr = load volatile <16 x i16> addrspace(1)*, <16 x i16> addrspace(1)* addrspace(4)* undef 350 %val = load <16 x i16>, <16 x i16> addrspace(1)* %ptr 351 ret <16 x i16> %val 352} 353 354; FIXME: Should pack 355; GCN-LABEL: {{^}}v16i8_func_void: 356; GCN-DAG: v12 357; GCN-DAG: v13 358; GCN-DAG: v14 359; GCN-DAG: v15 360define <16 x i8> @v16i8_func_void() #0 { 361 %ptr = load volatile <16 x i8> addrspace(1)*, <16 x i8> addrspace(1)* addrspace(4)* undef 362 %val = load <16 x i8>, <16 x i8> addrspace(1)* %ptr 363 ret <16 x i8> %val 364} 365 366; FIXME: Should pack 367; GCN-LABEL: {{^}}v4i8_func_void: 368; GCN: buffer_load_dword v0 369; GCN-DAG: v_lshrrev_b32_e32 v1, 8, v0 370; GCN-DAG: v_lshrrev_b32_e32 v2, 16, v0 371; GCN-DAG: v_lshrrev_b32_e32 v3, 24, v0 372; GCN: s_setpc_b64 373define <4 x i8> @v4i8_func_void() #0 { 374 %ptr = load volatile <4 x i8> addrspace(1)*, <4 x i8> addrspace(1)* addrspace(4)* undef 375 %val = load <4 x i8>, <4 x i8> addrspace(1)* %ptr 376 ret <4 x i8> %val 377} 378 379; GCN-LABEL: {{^}}struct_i8_i32_func_void: 380; GCN-DAG: buffer_load_dword v1 381; GCN-DAG: buffer_load_ubyte v0 382; GCN: s_waitcnt vmcnt(0) 383; GCN-NEXT: s_setpc_b64 384define {i8, i32} @struct_i8_i32_func_void() #0 { 385 %val = load { i8, i32 }, { i8, i32 } addrspace(1)* undef 386 ret { i8, i32 } %val 387} 388 389; GCN-LABEL: {{^}}void_func_sret_struct_i8_i32: 390; GCN: buffer_load_ubyte [[VAL0:v[0-9]+]] 391; GCN: buffer_load_dword [[VAL1:v[0-9]+]] 392; GCN: buffer_store_byte [[VAL0]], v0, s[0:3], s4 offen{{$}} 393; GCN: buffer_store_dword [[VAL1]], v0, s[0:3], s4 offen offset:4{{$}} 394define void @void_func_sret_struct_i8_i32({ i8, i32 } addrspace(5)* sret %arg0) #0 { 395 %val0 = load volatile i8, i8 addrspace(1)* undef 396 %val1 = load volatile i32, i32 addrspace(1)* undef 397 %gep0 = getelementptr inbounds { i8, i32 }, { i8, i32 } addrspace(5)* %arg0, i32 0, i32 0 398 %gep1 = getelementptr inbounds { i8, i32 }, { i8, i32 } addrspace(5)* %arg0, i32 0, i32 1 399 store i8 %val0, i8 addrspace(5)* %gep0 400 store i32 %val1, i32 addrspace(5)* %gep1 401 ret void 402} 403 404; FIXME: Should be able to fold offsets in all of these pre-gfx9. Call 405; lowering introduces an extra CopyToReg/CopyFromReg obscuring the 406; AssertZext inserted. Not using it introduces the spills. 407 408; GCN-LABEL: {{^}}v33i32_func_void: 409; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen{{$}} 410; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:4{{$}} 411; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:8{{$}} 412; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:12{{$}} 413; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:16{{$}} 414; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:20{{$}} 415; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:24{{$}} 416; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:28{{$}} 417; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:32{{$}} 418; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:36{{$}} 419; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:40{{$}} 420; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:44{{$}} 421; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:48{{$}} 422; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:52{{$}} 423; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:56{{$}} 424; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:60{{$}} 425; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:64{{$}} 426; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:68{{$}} 427; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:72{{$}} 428; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:76{{$}} 429; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:80{{$}} 430; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:84{{$}} 431; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:88{{$}} 432; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:92{{$}} 433; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:96{{$}} 434; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:100{{$}} 435; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:104{{$}} 436; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:108{{$}} 437; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:112{{$}} 438; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:116{{$}} 439; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:120{{$}} 440; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:124{{$}} 441; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:128{{$}} 442; GFX9: s_waitcnt vmcnt(0) 443; GFX9-NEXT: s_setpc_b64 444define <33 x i32> @v33i32_func_void() #0 { 445 %ptr = load volatile <33 x i32> addrspace(1)*, <33 x i32> addrspace(1)* addrspace(4)* undef 446 %val = load <33 x i32>, <33 x i32> addrspace(1)* %ptr 447 ret <33 x i32> %val 448} 449 450; GCN-LABEL: {{^}}struct_v32i32_i32_func_void: 451; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen{{$}} 452; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:4{{$}} 453; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:8{{$}} 454; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:12{{$}} 455; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:16{{$}} 456; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:20{{$}} 457; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:24{{$}} 458; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:28{{$}} 459; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:32{{$}} 460; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:36{{$}} 461; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:40{{$}} 462; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:44{{$}} 463; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:48{{$}} 464; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:52{{$}} 465; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:56{{$}} 466; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:60{{$}} 467; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:64{{$}} 468; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:68{{$}} 469; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:72{{$}} 470; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:76{{$}} 471; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:80{{$}} 472; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:84{{$}} 473; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:88{{$}} 474; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:92{{$}} 475; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:96{{$}} 476; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:100{{$}} 477; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:104{{$}} 478; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:108{{$}} 479; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:112{{$}} 480; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:116{{$}} 481; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:120{{$}} 482; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:124{{$}} 483; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:128{{$}} 484; GFX9: s_waitcnt vmcnt(0) 485; GFX9-NEXT: s_setpc_b64 486define { <32 x i32>, i32 } @struct_v32i32_i32_func_void() #0 { 487 %ptr = load volatile { <32 x i32>, i32 } addrspace(1)*, { <32 x i32>, i32 } addrspace(1)* addrspace(4)* undef 488 %val = load { <32 x i32>, i32 }, { <32 x i32>, i32 } addrspace(1)* %ptr 489 ret { <32 x i32>, i32 }%val 490} 491 492; GCN-LABEL: {{^}}struct_i32_v32i32_func_void: 493; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen{{$}} 494; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:128{{$}} 495; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:132{{$}} 496; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:136{{$}} 497; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:140{{$}} 498; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:144{{$}} 499; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:148{{$}} 500; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:152{{$}} 501; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:156{{$}} 502; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:160{{$}} 503; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:164{{$}} 504; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:168{{$}} 505; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:172{{$}} 506; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:176{{$}} 507; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:180{{$}} 508; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:184{{$}} 509; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:188{{$}} 510; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:192{{$}} 511; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:196{{$}} 512; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:200{{$}} 513; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:204{{$}} 514; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:208{{$}} 515; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:212{{$}} 516; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:216{{$}} 517; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:220{{$}} 518; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:224{{$}} 519; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:228{{$}} 520; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:232{{$}} 521; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:236{{$}} 522; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:240{{$}} 523; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:244{{$}} 524; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:248{{$}} 525; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:252{{$}} 526; GFX9: s_waitcnt vmcnt(0) 527; GFX9-NEXT: s_setpc_b64 528define { i32, <32 x i32> } @struct_i32_v32i32_func_void() #0 { 529 %ptr = load volatile { i32, <32 x i32> } addrspace(1)*, { i32, <32 x i32> } addrspace(1)* addrspace(4)* undef 530 %val = load { i32, <32 x i32> }, { i32, <32 x i32> } addrspace(1)* %ptr 531 ret { i32, <32 x i32> }%val 532} 533 534; Make sure the last struct component is returned in v3, not v4. 535; GCN-LABEL: {{^}}v3i32_struct_func_void_wasted_reg: 536; GCN: ds_read_b32 v0, 537; GCN: ds_read_b32 v1, 538; GCN: ds_read_b32 v2, 539; GCN: ds_read_b32 v3, 540define { <3 x i32>, i32 } @v3i32_struct_func_void_wasted_reg() #0 { 541 %load0 = load volatile i32, i32 addrspace(3)* undef 542 %load1 = load volatile i32, i32 addrspace(3)* undef 543 %load2 = load volatile i32, i32 addrspace(3)* undef 544 %load3 = load volatile i32, i32 addrspace(3)* undef 545 546 %insert.0 = insertelement <3 x i32> undef, i32 %load0, i32 0 547 %insert.1 = insertelement <3 x i32> %insert.0, i32 %load1, i32 1 548 %insert.2 = insertelement <3 x i32> %insert.1, i32 %load2, i32 2 549 %insert.3 = insertvalue { <3 x i32>, i32 } undef, <3 x i32> %insert.2, 0 550 %insert.4 = insertvalue { <3 x i32>, i32 } %insert.3, i32 %load3, 1 551 ret { <3 x i32>, i32 } %insert.4 552} 553 554; GCN-LABEL: {{^}}v3f32_struct_func_void_wasted_reg: 555; GCN: ds_read_b32 v0, 556; GCN: ds_read_b32 v1, 557; GCN: ds_read_b32 v2, 558; GCN: ds_read_b32 v3, 559define { <3 x float>, i32 } @v3f32_struct_func_void_wasted_reg() #0 { 560 %load0 = load volatile float, float addrspace(3)* undef 561 %load1 = load volatile float, float addrspace(3)* undef 562 %load2 = load volatile float, float addrspace(3)* undef 563 %load3 = load volatile i32, i32 addrspace(3)* undef 564 565 %insert.0 = insertelement <3 x float> undef, float %load0, i32 0 566 %insert.1 = insertelement <3 x float> %insert.0, float %load1, i32 1 567 %insert.2 = insertelement <3 x float> %insert.1, float %load2, i32 2 568 %insert.3 = insertvalue { <3 x float>, i32 } undef, <3 x float> %insert.2, 0 569 %insert.4 = insertvalue { <3 x float>, i32 } %insert.3, i32 %load3, 1 570 ret { <3 x float>, i32 } %insert.4 571} 572 573attributes #0 = { nounwind } 574