1; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=hawaii -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,CI %s 2; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=fiji -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX89 %s 3; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=gfx900 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX89,GFX9 %s 4 5; GCN-LABEL: {{^}}i1_func_void: 6; GCN: buffer_load_ubyte v0, off 7; GCN-NEXT: s_waitcnt 8; GCN-NEXT: s_setpc_b64 9define i1 @i1_func_void() #0 { 10 %val = load i1, i1 addrspace(1)* undef 11 ret i1 %val 12} 13 14; FIXME: Missing and? 15; GCN-LABEL: {{^}}i1_zeroext_func_void: 16; GCN: buffer_load_ubyte v0, off 17; GCN-NEXT: s_waitcnt vmcnt(0) 18; GCN-NEXT: s_setpc_b64 19define zeroext i1 @i1_zeroext_func_void() #0 { 20 %val = load i1, i1 addrspace(1)* undef 21 ret i1 %val 22} 23 24; GCN-LABEL: {{^}}i1_signext_func_void: 25; GCN: buffer_load_ubyte v0, off 26; GCN-NEXT: s_waitcnt vmcnt(0) 27; GCN-NEXT: v_bfe_i32 v0, v0, 0, 1{{$}} 28; GCN-NEXT: s_setpc_b64 29define signext i1 @i1_signext_func_void() #0 { 30 %val = load i1, i1 addrspace(1)* undef 31 ret i1 %val 32} 33 34; GCN-LABEL: {{^}}i8_func_void: 35; GCN: buffer_load_ubyte v0, off 36; GCN-NEXT: s_waitcnt vmcnt(0) 37; GCN-NEXT: s_setpc_b64 38define i8 @i8_func_void() #0 { 39 %val = load i8, i8 addrspace(1)* undef 40 ret i8 %val 41} 42 43; GCN-LABEL: {{^}}i8_zeroext_func_void: 44; GCN: buffer_load_ubyte v0, off 45; GCN-NEXT: s_waitcnt vmcnt(0) 46; GCN-NEXT: s_setpc_b64 47define zeroext i8 @i8_zeroext_func_void() #0 { 48 %val = load i8, i8 addrspace(1)* undef 49 ret i8 %val 50} 51 52; GCN-LABEL: {{^}}i8_signext_func_void: 53; GCN: buffer_load_sbyte v0, off 54; GCN-NEXT: s_waitcnt vmcnt(0) 55; GCN-NEXT: s_setpc_b64 56define signext i8 @i8_signext_func_void() #0 { 57 %val = load i8, i8 addrspace(1)* undef 58 ret i8 %val 59} 60 61; GCN-LABEL: {{^}}i16_func_void: 62; GCN: buffer_load_ushort v0, off 63; GCN-NEXT: s_waitcnt vmcnt(0) 64; GCN-NEXT: s_setpc_b64 65define i16 @i16_func_void() #0 { 66 %val = load i16, i16 addrspace(1)* undef 67 ret i16 %val 68} 69 70; GCN-LABEL: {{^}}i16_zeroext_func_void: 71; GCN: buffer_load_ushort v0, off 72; GCN-NEXT: s_waitcnt vmcnt(0) 73; GCN-NEXT: s_setpc_b64 74define zeroext i16 @i16_zeroext_func_void() #0 { 75 %val = load i16, i16 addrspace(1)* undef 76 ret i16 %val 77} 78 79; GCN-LABEL: {{^}}i16_signext_func_void: 80; GCN: buffer_load_sshort v0, off 81; GCN-NEXT: s_waitcnt vmcnt(0) 82; GCN-NEXT: s_setpc_b64 83define signext i16 @i16_signext_func_void() #0 { 84 %val = load i16, i16 addrspace(1)* undef 85 ret i16 %val 86} 87 88; GCN-LABEL: {{^}}i32_func_void: 89; GCN: buffer_load_dword v0, off 90; GCN-NEXT: s_waitcnt vmcnt(0) 91; GCN-NEXT: s_setpc_b64 92define i32 @i32_func_void() #0 { 93 %val = load i32, i32 addrspace(1)* undef 94 ret i32 %val 95} 96 97; GCN-LABEL: {{^}}i64_func_void: 98; GCN: buffer_load_dwordx2 v[0:1], off 99; GCN-NEXT: s_waitcnt vmcnt(0) 100; GCN-NEXT: s_setpc_b64 101define i64 @i64_func_void() #0 { 102 %val = load i64, i64 addrspace(1)* undef 103 ret i64 %val 104} 105 106; GCN-LABEL: {{^}}f32_func_void: 107; GCN: buffer_load_dword v0, off, s[8:11], 0 108; GCN-NEXT: s_waitcnt vmcnt(0) 109; GCN-NEXT: s_setpc_b64 110define float @f32_func_void() #0 { 111 %val = load float, float addrspace(1)* undef 112 ret float %val 113} 114 115; GCN-LABEL: {{^}}f64_func_void: 116; GCN: buffer_load_dwordx2 v[0:1], off 117; GCN-NEXT: s_waitcnt vmcnt(0) 118; GCN-NEXT: s_setpc_b64 119define double @f64_func_void() #0 { 120 %val = load double, double addrspace(1)* undef 121 ret double %val 122} 123 124; GCN-LABEL: {{^}}v2f64_func_void: 125; GCN: buffer_load_dwordx4 v[0:3], off 126; GCN-NEXT: s_waitcnt vmcnt(0) 127; GCN-NEXT: s_setpc_b64 128define <2 x double> @v2f64_func_void() #0 { 129 %val = load <2 x double>, <2 x double> addrspace(1)* undef 130 ret <2 x double> %val 131} 132 133; GCN-LABEL: {{^}}v2i32_func_void: 134; GCN: buffer_load_dwordx2 v[0:1], off 135; GCN-NEXT: s_waitcnt vmcnt(0) 136; GCN-NEXT: s_setpc_b64 137define <2 x i32> @v2i32_func_void() #0 { 138 %val = load <2 x i32>, <2 x i32> addrspace(1)* undef 139 ret <2 x i32> %val 140} 141 142; GCN-LABEL: {{^}}v3i32_func_void: 143; GCN: buffer_load_dwordx4 v[0:3], off 144; GCN-NEXT: s_waitcnt vmcnt(0) 145; GCN-NEXT: s_setpc_b64 146define <3 x i32> @v3i32_func_void() #0 { 147 %val = load <3 x i32>, <3 x i32> addrspace(1)* undef 148 ret <3 x i32> %val 149} 150 151; GCN-LABEL: {{^}}v4i32_func_void: 152; GCN: buffer_load_dwordx4 v[0:3], off 153; GCN-NEXT: s_waitcnt vmcnt(0) 154; GCN-NEXT: s_setpc_b64 155define <4 x i32> @v4i32_func_void() #0 { 156 %val = load <4 x i32>, <4 x i32> addrspace(1)* undef 157 ret <4 x i32> %val 158} 159 160; GCN-LABEL: {{^}}v5i32_func_void: 161; GCN-DAG: buffer_load_dword v4, off 162; GCN-DAG: buffer_load_dwordx4 v[0:3], off 163; GCN: s_waitcnt vmcnt(0) 164; GCN-NEXT: s_setpc_b64 165define <5 x i32> @v5i32_func_void() #0 { 166 %val = load volatile <5 x i32>, <5 x i32> addrspace(1)* undef 167 ret <5 x i32> %val 168} 169 170; GCN-LABEL: {{^}}v8i32_func_void: 171; GCN-DAG: buffer_load_dwordx4 v[0:3], off 172; GCN-DAG: buffer_load_dwordx4 v[4:7], off 173; GCN: s_waitcnt vmcnt(0) 174; GCN-NEXT: s_setpc_b64 175define <8 x i32> @v8i32_func_void() #0 { 176 %ptr = load volatile <8 x i32> addrspace(1)*, <8 x i32> addrspace(1)* addrspace(4)* undef 177 %val = load <8 x i32>, <8 x i32> addrspace(1)* %ptr 178 ret <8 x i32> %val 179} 180 181; GCN-LABEL: {{^}}v16i32_func_void: 182; GCN-DAG: buffer_load_dwordx4 v[0:3], off 183; GCN-DAG: buffer_load_dwordx4 v[4:7], off 184; GCN-DAG: buffer_load_dwordx4 v[8:11], off 185; GCN-DAG: buffer_load_dwordx4 v[12:15], off 186; GCN: s_waitcnt vmcnt(0) 187; GCN-NEXT: s_setpc_b64 188define <16 x i32> @v16i32_func_void() #0 { 189 %ptr = load volatile <16 x i32> addrspace(1)*, <16 x i32> addrspace(1)* addrspace(4)* undef 190 %val = load <16 x i32>, <16 x i32> addrspace(1)* %ptr 191 ret <16 x i32> %val 192} 193 194; GCN-LABEL: {{^}}v32i32_func_void: 195; GCN-DAG: buffer_load_dwordx4 v[0:3], off 196; GCN-DAG: buffer_load_dwordx4 v[4:7], off 197; GCN-DAG: buffer_load_dwordx4 v[8:11], off 198; GCN-DAG: buffer_load_dwordx4 v[12:15], off 199; GCN-DAG: buffer_load_dwordx4 v[16:19], off 200; GCN-DAG: buffer_load_dwordx4 v[20:23], off 201; GCN-DAG: buffer_load_dwordx4 v[24:27], off 202; GCN-DAG: buffer_load_dwordx4 v[28:31], off 203; GCN: s_waitcnt vmcnt(0) 204; GCN-NEXT: s_setpc_b64 205define <32 x i32> @v32i32_func_void() #0 { 206 %ptr = load volatile <32 x i32> addrspace(1)*, <32 x i32> addrspace(1)* addrspace(4)* undef 207 %val = load <32 x i32>, <32 x i32> addrspace(1)* %ptr 208 ret <32 x i32> %val 209} 210 211; GCN-LABEL: {{^}}v2i64_func_void: 212; GCN: buffer_load_dwordx4 v[0:3], off 213; GCN-NEXT: s_waitcnt vmcnt(0) 214; GCN-NEXT: s_setpc_b64 215define <2 x i64> @v2i64_func_void() #0 { 216 %val = load <2 x i64>, <2 x i64> addrspace(1)* undef 217 ret <2 x i64> %val 218} 219 220; GCN-LABEL: {{^}}v3i64_func_void: 221; GCN-DAG: buffer_load_dwordx4 v[0:3], off 222; GCN-DAG: buffer_load_dwordx4 v[4:7], off 223; GCN: s_waitcnt vmcnt(0) 224; GCN-NEXT: s_setpc_b64 225define <3 x i64> @v3i64_func_void() #0 { 226 %ptr = load volatile <3 x i64> addrspace(1)*, <3 x i64> addrspace(1)* addrspace(4)* undef 227 %val = load <3 x i64>, <3 x i64> addrspace(1)* %ptr 228 ret <3 x i64> %val 229} 230 231; GCN-LABEL: {{^}}v4i64_func_void: 232; GCN: buffer_load_dwordx4 v[0:3], off 233; GCN: buffer_load_dwordx4 v[4:7], off 234; GCN-NEXT: s_waitcnt vmcnt(0) 235; GCN-NEXT: s_setpc_b64 236define <4 x i64> @v4i64_func_void() #0 { 237 %ptr = load volatile <4 x i64> addrspace(1)*, <4 x i64> addrspace(1)* addrspace(4)* undef 238 %val = load <4 x i64>, <4 x i64> addrspace(1)* %ptr 239 ret <4 x i64> %val 240} 241 242; GCN-LABEL: {{^}}v5i64_func_void: 243; GCN-DAG: buffer_load_dwordx4 v[0:3], off 244; GCN-DAG: buffer_load_dwordx4 v[4:7], off 245; GCN-DAG: buffer_load_dwordx4 v[8:11], off 246; GCN: s_waitcnt vmcnt(0) 247; GCN-NEXT: s_setpc_b64 248define <5 x i64> @v5i64_func_void() #0 { 249 %ptr = load volatile <5 x i64> addrspace(1)*, <5 x i64> addrspace(1)* addrspace(4)* undef 250 %val = load <5 x i64>, <5 x i64> addrspace(1)* %ptr 251 ret <5 x i64> %val 252} 253 254; GCN-LABEL: {{^}}v8i64_func_void: 255; GCN-DAG: buffer_load_dwordx4 v[0:3], off 256; GCN-DAG: buffer_load_dwordx4 v[4:7], off 257; GCN-DAG: buffer_load_dwordx4 v[8:11], off 258; GCN-DAG: buffer_load_dwordx4 v[12:15], off 259; GCN: s_waitcnt vmcnt(0) 260; GCN-NEXT: s_setpc_b64 261define <8 x i64> @v8i64_func_void() #0 { 262 %ptr = load volatile <8 x i64> addrspace(1)*, <8 x i64> addrspace(1)* addrspace(4)* undef 263 %val = load <8 x i64>, <8 x i64> addrspace(1)* %ptr 264 ret <8 x i64> %val 265} 266 267; GCN-LABEL: {{^}}v16i64_func_void: 268; GCN-DAG: buffer_load_dwordx4 v[0:3], off 269; GCN-DAG: buffer_load_dwordx4 v[4:7], off 270; GCN-DAG: buffer_load_dwordx4 v[8:11], off 271; GCN-DAG: buffer_load_dwordx4 v[12:15], off 272; GCN-DAG: buffer_load_dwordx4 v[16:19], off 273; GCN-DAG: buffer_load_dwordx4 v[20:23], off 274; GCN-DAG: buffer_load_dwordx4 v[24:27], off 275; GCN-DAG: buffer_load_dwordx4 v[28:31], off 276; GCN: s_waitcnt vmcnt(0) 277; GCN-NEXT: s_setpc_b64 278define <16 x i64> @v16i64_func_void() #0 { 279 %ptr = load volatile <16 x i64> addrspace(1)*, <16 x i64> addrspace(1)* addrspace(4)* undef 280 %val = load <16 x i64>, <16 x i64> addrspace(1)* %ptr 281 ret <16 x i64> %val 282} 283 284; GCN-LABEL: {{^}}v2i16_func_void: 285; GFX9: buffer_load_dword v0, off 286; GFX9-NEXT: s_waitcnt vmcnt(0) 287; GFX9-NEXT: s_setpc_b64 288define <2 x i16> @v2i16_func_void() #0 { 289 %val = load <2 x i16>, <2 x i16> addrspace(1)* undef 290 ret <2 x i16> %val 291} 292 293; GCN-LABEL: {{^}}v3i16_func_void: 294; GFX9: buffer_load_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, off 295; GFX9: s_waitcnt vmcnt(0) 296; GFX9: v_lshrrev_b32 297; GFX9: s_setpc_b64 298define <3 x i16> @v3i16_func_void() #0 { 299 %val = load <3 x i16>, <3 x i16> addrspace(1)* undef 300 ret <3 x i16> %val 301} 302 303; GCN-LABEL: {{^}}v4i16_func_void: 304; GFX9: buffer_load_dwordx2 v[0:1], off 305; GFX9-NEXT: s_waitcnt vmcnt(0) 306; GFX9-NEXT: s_setpc_b64 307define <4 x i16> @v4i16_func_void() #0 { 308 %val = load <4 x i16>, <4 x i16> addrspace(1)* undef 309 ret <4 x i16> %val 310} 311 312; GCN-LABEL: {{^}}v4f16_func_void: 313; GFX9: buffer_load_dwordx2 v[0:1], off 314; GFX9-NEXT: s_waitcnt vmcnt(0) 315; GFX9-NEXT: s_setpc_b64 316define <4 x half> @v4f16_func_void() #0 { 317 %val = load <4 x half>, <4 x half> addrspace(1)* undef 318 ret <4 x half> %val 319} 320 321; FIXME: Should not scalarize 322; GCN-LABEL: {{^}}v5i16_func_void: 323; GFX9: buffer_load_dwordx2 v[0:1] 324; GFX9: buffer_load_ushort v4 325; GFX9: v_lshrrev_b32_e32 v5, 16, v0 326; GFX9: v_lshrrev_b32_e32 v3, 16, v1 327; GCN: s_setpc_b64 328define <5 x i16> @v5i16_func_void() #0 { 329 %ptr = load volatile <5 x i16> addrspace(1)*, <5 x i16> addrspace(1)* addrspace(4)* undef 330 %val = load <5 x i16>, <5 x i16> addrspace(1)* %ptr 331 ret <5 x i16> %val 332} 333 334; GCN-LABEL: {{^}}v8i16_func_void: 335; GFX9-DAG: buffer_load_dwordx4 v[0:3], off 336; GFX9: s_waitcnt vmcnt(0) 337; GFX9-NEXT: s_setpc_b64 338define <8 x i16> @v8i16_func_void() #0 { 339 %ptr = load volatile <8 x i16> addrspace(1)*, <8 x i16> addrspace(1)* addrspace(4)* undef 340 %val = load <8 x i16>, <8 x i16> addrspace(1)* %ptr 341 ret <8 x i16> %val 342} 343 344; GCN-LABEL: {{^}}v16i16_func_void: 345; GFX9: buffer_load_dwordx4 v[0:3], off 346; GFX9: buffer_load_dwordx4 v[4:7], off 347; GFX9: s_waitcnt vmcnt(0) 348; GFX9-NEXT: s_setpc_b64 349define <16 x i16> @v16i16_func_void() #0 { 350 %ptr = load volatile <16 x i16> addrspace(1)*, <16 x i16> addrspace(1)* addrspace(4)* undef 351 %val = load <16 x i16>, <16 x i16> addrspace(1)* %ptr 352 ret <16 x i16> %val 353} 354 355; FIXME: Should pack 356; GCN-LABEL: {{^}}v16i8_func_void: 357; GCN-DAG: v12 358; GCN-DAG: v13 359; GCN-DAG: v14 360; GCN-DAG: v15 361define <16 x i8> @v16i8_func_void() #0 { 362 %ptr = load volatile <16 x i8> addrspace(1)*, <16 x i8> addrspace(1)* addrspace(4)* undef 363 %val = load <16 x i8>, <16 x i8> addrspace(1)* %ptr 364 ret <16 x i8> %val 365} 366 367; FIXME: Should pack 368; GCN-LABEL: {{^}}v4i8_func_void: 369; GCN: buffer_load_dword v0 370; GCN-DAG: v_lshrrev_b32_e32 v1, 8, v0 371; GCN-DAG: v_lshrrev_b32_e32 v2, 16, v0 372; GCN-DAG: v_lshrrev_b32_e32 v3, 24, v0 373; GCN: s_setpc_b64 374define <4 x i8> @v4i8_func_void() #0 { 375 %ptr = load volatile <4 x i8> addrspace(1)*, <4 x i8> addrspace(1)* addrspace(4)* undef 376 %val = load <4 x i8>, <4 x i8> addrspace(1)* %ptr 377 ret <4 x i8> %val 378} 379 380; GCN-LABEL: {{^}}struct_i8_i32_func_void: 381; GCN-DAG: buffer_load_dword v1 382; GCN-DAG: buffer_load_ubyte v0 383; GCN: s_waitcnt vmcnt(0) 384; GCN-NEXT: s_setpc_b64 385define {i8, i32} @struct_i8_i32_func_void() #0 { 386 %val = load { i8, i32 }, { i8, i32 } addrspace(1)* undef 387 ret { i8, i32 } %val 388} 389 390; GCN-LABEL: {{^}}void_func_sret_struct_i8_i32: 391; GCN: buffer_load_ubyte [[VAL0:v[0-9]+]] 392; GCN: buffer_load_dword [[VAL1:v[0-9]+]] 393; GCN: buffer_store_byte [[VAL0]], v0, s[0:3], s4 offen{{$}} 394; GCN: buffer_store_dword [[VAL1]], v0, s[0:3], s4 offen offset:4{{$}} 395define void @void_func_sret_struct_i8_i32({ i8, i32 } addrspace(5)* sret %arg0) #0 { 396 %val0 = load volatile i8, i8 addrspace(1)* undef 397 %val1 = load volatile i32, i32 addrspace(1)* undef 398 %gep0 = getelementptr inbounds { i8, i32 }, { i8, i32 } addrspace(5)* %arg0, i32 0, i32 0 399 %gep1 = getelementptr inbounds { i8, i32 }, { i8, i32 } addrspace(5)* %arg0, i32 0, i32 1 400 store i8 %val0, i8 addrspace(5)* %gep0 401 store i32 %val1, i32 addrspace(5)* %gep1 402 ret void 403} 404 405; FIXME: Should be able to fold offsets in all of these pre-gfx9. Call 406; lowering introduces an extra CopyToReg/CopyFromReg obscuring the 407; AssertZext inserted. Not using it introduces the spills. 408 409; GCN-LABEL: {{^}}v33i32_func_void: 410; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen{{$}} 411; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:4{{$}} 412; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:8{{$}} 413; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:12{{$}} 414; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:16{{$}} 415; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:20{{$}} 416; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:24{{$}} 417; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:28{{$}} 418; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:32{{$}} 419; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:36{{$}} 420; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:40{{$}} 421; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:44{{$}} 422; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:48{{$}} 423; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:52{{$}} 424; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:56{{$}} 425; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:60{{$}} 426; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:64{{$}} 427; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:68{{$}} 428; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:72{{$}} 429; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:76{{$}} 430; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:80{{$}} 431; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:84{{$}} 432; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:88{{$}} 433; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:92{{$}} 434; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:96{{$}} 435; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:100{{$}} 436; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:104{{$}} 437; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:108{{$}} 438; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:112{{$}} 439; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:116{{$}} 440; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:120{{$}} 441; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:124{{$}} 442; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:128{{$}} 443; GFX9: s_waitcnt vmcnt(0) 444; GFX9-NEXT: s_setpc_b64 445define <33 x i32> @v33i32_func_void() #0 { 446 %ptr = load volatile <33 x i32> addrspace(1)*, <33 x i32> addrspace(1)* addrspace(4)* undef 447 %val = load <33 x i32>, <33 x i32> addrspace(1)* %ptr 448 ret <33 x i32> %val 449} 450 451; GCN-LABEL: {{^}}struct_v32i32_i32_func_void: 452; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen{{$}} 453; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:4{{$}} 454; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:8{{$}} 455; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:12{{$}} 456; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:16{{$}} 457; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:20{{$}} 458; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:24{{$}} 459; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:28{{$}} 460; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:32{{$}} 461; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:36{{$}} 462; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:40{{$}} 463; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:44{{$}} 464; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:48{{$}} 465; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:52{{$}} 466; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:56{{$}} 467; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:60{{$}} 468; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:64{{$}} 469; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:68{{$}} 470; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:72{{$}} 471; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:76{{$}} 472; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:80{{$}} 473; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:84{{$}} 474; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:88{{$}} 475; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:92{{$}} 476; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:96{{$}} 477; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:100{{$}} 478; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:104{{$}} 479; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:108{{$}} 480; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:112{{$}} 481; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:116{{$}} 482; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:120{{$}} 483; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:124{{$}} 484; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:128{{$}} 485; GFX9: s_waitcnt vmcnt(0) 486; GFX9-NEXT: s_setpc_b64 487define { <32 x i32>, i32 } @struct_v32i32_i32_func_void() #0 { 488 %ptr = load volatile { <32 x i32>, i32 } addrspace(1)*, { <32 x i32>, i32 } addrspace(1)* addrspace(4)* undef 489 %val = load { <32 x i32>, i32 }, { <32 x i32>, i32 } addrspace(1)* %ptr 490 ret { <32 x i32>, i32 }%val 491} 492 493; GCN-LABEL: {{^}}struct_i32_v32i32_func_void: 494; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen{{$}} 495; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:128{{$}} 496; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:132{{$}} 497; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:136{{$}} 498; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:140{{$}} 499; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:144{{$}} 500; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:148{{$}} 501; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:152{{$}} 502; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:156{{$}} 503; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:160{{$}} 504; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:164{{$}} 505; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:168{{$}} 506; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:172{{$}} 507; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:176{{$}} 508; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:180{{$}} 509; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:184{{$}} 510; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:188{{$}} 511; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:192{{$}} 512; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:196{{$}} 513; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:200{{$}} 514; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:204{{$}} 515; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:208{{$}} 516; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:212{{$}} 517; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:216{{$}} 518; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:220{{$}} 519; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:224{{$}} 520; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:228{{$}} 521; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:232{{$}} 522; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:236{{$}} 523; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:240{{$}} 524; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:244{{$}} 525; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:248{{$}} 526; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:252{{$}} 527; GFX9: s_waitcnt vmcnt(0) 528; GFX9-NEXT: s_setpc_b64 529define { i32, <32 x i32> } @struct_i32_v32i32_func_void() #0 { 530 %ptr = load volatile { i32, <32 x i32> } addrspace(1)*, { i32, <32 x i32> } addrspace(1)* addrspace(4)* undef 531 %val = load { i32, <32 x i32> }, { i32, <32 x i32> } addrspace(1)* %ptr 532 ret { i32, <32 x i32> }%val 533} 534 535attributes #0 = { nounwind } 536