1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -march=amdgcn -mcpu=gfx900 -amdgpu-sroa=0 -mattr=-promote-alloca -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX900,GFX900-MUBUF %s 3; RUN: llc -march=amdgcn -mcpu=gfx906 -amdgpu-sroa=0 -mattr=-promote-alloca,+sram-ecc -verify-machineinstrs < %s | FileCheck --check-prefix=GFX906 %s 4; RUN: llc -march=amdgcn -mcpu=fiji -amdgpu-sroa=0 -mattr=-promote-alloca -verify-machineinstrs < %s | FileCheck --check-prefix=GFX803 %s 5; RUN: llc -march=amdgcn -mcpu=gfx900 -amdgpu-sroa=0 -mattr=-promote-alloca -verify-machineinstrs --mattr=+enable-flat-scratch < %s | FileCheck -check-prefixes=GFX900,GFX900-FLATSCR %s 6 7define <2 x i16> @load_local_lo_v2i16_undeflo(i16 addrspace(3)* %in) #0 { 8; GFX900-LABEL: load_local_lo_v2i16_undeflo: 9; GFX900: ; %bb.0: ; %entry 10; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11; GFX900-NEXT: ds_read_u16_d16 v0, v0 12; GFX900-NEXT: s_waitcnt lgkmcnt(0) 13; GFX900-NEXT: s_setpc_b64 s[30:31] 14; 15; GFX906-LABEL: load_local_lo_v2i16_undeflo: 16; GFX906: ; %bb.0: ; %entry 17; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 18; GFX906-NEXT: ds_read_u16 v0, v0 19; GFX906-NEXT: s_waitcnt lgkmcnt(0) 20; GFX906-NEXT: s_setpc_b64 s[30:31] 21; 22; GFX803-LABEL: load_local_lo_v2i16_undeflo: 23; GFX803: ; %bb.0: ; %entry 24; GFX803-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 25; GFX803-NEXT: s_mov_b32 m0, -1 26; GFX803-NEXT: ds_read_u16 v0, v0 27; GFX803-NEXT: s_waitcnt lgkmcnt(0) 28; GFX803-NEXT: s_setpc_b64 s[30:31] 29entry: 30 %load = load i16, i16 addrspace(3)* %in 31 %build = insertelement <2 x i16> undef, i16 %load, i32 0 32 ret <2 x i16> %build 33} 34 35define <2 x i16> @load_local_lo_v2i16_reglo(i16 addrspace(3)* %in, i16 %reg) #0 { 36; GFX900-LABEL: load_local_lo_v2i16_reglo: 37; GFX900: ; %bb.0: ; %entry 38; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 39; GFX900-NEXT: ds_read_u16 v0, v0 40; GFX900-NEXT: s_waitcnt lgkmcnt(0) 41; GFX900-NEXT: v_and_b32_e32 v0, 0xffff, v0 42; GFX900-NEXT: v_lshl_or_b32 v0, v1, 16, v0 43; GFX900-NEXT: s_setpc_b64 s[30:31] 44; 45; GFX906-LABEL: load_local_lo_v2i16_reglo: 46; GFX906: ; %bb.0: ; %entry 47; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 48; GFX906-NEXT: ds_read_u16 v0, v0 49; GFX906-NEXT: s_waitcnt lgkmcnt(0) 50; GFX906-NEXT: v_and_b32_e32 v0, 0xffff, v0 51; GFX906-NEXT: v_lshl_or_b32 v0, v1, 16, v0 52; GFX906-NEXT: s_setpc_b64 s[30:31] 53; 54; GFX803-LABEL: load_local_lo_v2i16_reglo: 55; GFX803: ; %bb.0: ; %entry 56; GFX803-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 57; GFX803-NEXT: s_mov_b32 m0, -1 58; GFX803-NEXT: ds_read_u16 v0, v0 59; GFX803-NEXT: v_lshlrev_b32_e32 v1, 16, v1 60; GFX803-NEXT: s_waitcnt lgkmcnt(0) 61; GFX803-NEXT: v_or_b32_e32 v0, v0, v1 62; GFX803-NEXT: s_setpc_b64 s[30:31] 63entry: 64 %load = load i16, i16 addrspace(3)* %in 65 %build0 = insertelement <2 x i16> undef, i16 %reg, i32 1 66 %build1 = insertelement <2 x i16> %build0, i16 %load, i32 0 67 ret <2 x i16> %build1 68} 69 70; Show that we get reasonable regalloc without physreg constraints. 71define void @load_local_lo_v2i16_reglo_vreg(i16 addrspace(3)* %in, i16 %reg) #0 { 72; GFX900-LABEL: load_local_lo_v2i16_reglo_vreg: 73; GFX900: ; %bb.0: ; %entry 74; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 75; GFX900-NEXT: ds_read_u16 v0, v0 76; GFX900-NEXT: s_waitcnt lgkmcnt(0) 77; GFX900-NEXT: v_and_b32_e32 v0, 0xffff, v0 78; GFX900-NEXT: v_lshl_or_b32 v0, v1, 16, v0 79; GFX900-NEXT: global_store_dword v[0:1], v0, off 80; GFX900-NEXT: s_waitcnt vmcnt(0) 81; GFX900-NEXT: s_setpc_b64 s[30:31] 82; 83; GFX906-LABEL: load_local_lo_v2i16_reglo_vreg: 84; GFX906: ; %bb.0: ; %entry 85; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 86; GFX906-NEXT: ds_read_u16 v0, v0 87; GFX906-NEXT: s_waitcnt lgkmcnt(0) 88; GFX906-NEXT: v_and_b32_e32 v0, 0xffff, v0 89; GFX906-NEXT: v_lshl_or_b32 v0, v1, 16, v0 90; GFX906-NEXT: global_store_dword v[0:1], v0, off 91; GFX906-NEXT: s_waitcnt vmcnt(0) 92; GFX906-NEXT: s_setpc_b64 s[30:31] 93; 94; GFX803-LABEL: load_local_lo_v2i16_reglo_vreg: 95; GFX803: ; %bb.0: ; %entry 96; GFX803-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 97; GFX803-NEXT: s_mov_b32 m0, -1 98; GFX803-NEXT: ds_read_u16 v0, v0 99; GFX803-NEXT: v_lshlrev_b32_e32 v1, 16, v1 100; GFX803-NEXT: s_waitcnt lgkmcnt(0) 101; GFX803-NEXT: v_or_b32_e32 v0, v0, v1 102; GFX803-NEXT: flat_store_dword v[0:1], v0 103; GFX803-NEXT: s_waitcnt vmcnt(0) 104; GFX803-NEXT: s_setpc_b64 s[30:31] 105entry: 106 %load = load i16, i16 addrspace(3)* %in 107 %build0 = insertelement <2 x i16> undef, i16 %reg, i32 1 108 %build1 = insertelement <2 x i16> %build0, i16 %load, i32 0 109 store <2 x i16> %build1, <2 x i16> addrspace(1)* undef 110 ret void 111} 112 113define <2 x i16> @load_local_lo_v2i16_zerolo(i16 addrspace(3)* %in) #0 { 114; GFX900-LABEL: load_local_lo_v2i16_zerolo: 115; GFX900: ; %bb.0: ; %entry 116; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 117; GFX900-NEXT: v_mov_b32_e32 v1, 0 118; GFX900-NEXT: ds_read_u16_d16 v1, v0 119; GFX900-NEXT: s_waitcnt lgkmcnt(0) 120; GFX900-NEXT: v_mov_b32_e32 v0, v1 121; GFX900-NEXT: s_setpc_b64 s[30:31] 122; 123; GFX906-LABEL: load_local_lo_v2i16_zerolo: 124; GFX906: ; %bb.0: ; %entry 125; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 126; GFX906-NEXT: ds_read_u16 v0, v0 127; GFX906-NEXT: s_waitcnt lgkmcnt(0) 128; GFX906-NEXT: v_and_b32_e32 v0, 0xffff, v0 129; GFX906-NEXT: s_setpc_b64 s[30:31] 130; 131; GFX803-LABEL: load_local_lo_v2i16_zerolo: 132; GFX803: ; %bb.0: ; %entry 133; GFX803-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 134; GFX803-NEXT: s_mov_b32 m0, -1 135; GFX803-NEXT: ds_read_u16 v0, v0 136; GFX803-NEXT: s_waitcnt lgkmcnt(0) 137; GFX803-NEXT: s_setpc_b64 s[30:31] 138entry: 139 %load = load i16, i16 addrspace(3)* %in 140 %build = insertelement <2 x i16> zeroinitializer, i16 %load, i32 0 141 ret <2 x i16> %build 142} 143 144define <2 x half> @load_local_lo_v2f16_fpimm(half addrspace(3)* %in) #0 { 145; GFX900-LABEL: load_local_lo_v2f16_fpimm: 146; GFX900: ; %bb.0: ; %entry 147; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 148; GFX900-NEXT: v_mov_b32_e32 v1, 2.0 149; GFX900-NEXT: ds_read_u16_d16 v1, v0 150; GFX900-NEXT: s_waitcnt lgkmcnt(0) 151; GFX900-NEXT: v_mov_b32_e32 v0, v1 152; GFX900-NEXT: s_setpc_b64 s[30:31] 153; 154; GFX906-LABEL: load_local_lo_v2f16_fpimm: 155; GFX906: ; %bb.0: ; %entry 156; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 157; GFX906-NEXT: ds_read_u16 v0, v0 158; GFX906-NEXT: s_movk_i32 s4, 0x4000 159; GFX906-NEXT: s_waitcnt lgkmcnt(0) 160; GFX906-NEXT: v_and_b32_e32 v0, 0xffff, v0 161; GFX906-NEXT: v_lshl_or_b32 v0, s4, 16, v0 162; GFX906-NEXT: s_setpc_b64 s[30:31] 163; 164; GFX803-LABEL: load_local_lo_v2f16_fpimm: 165; GFX803: ; %bb.0: ; %entry 166; GFX803-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 167; GFX803-NEXT: s_mov_b32 m0, -1 168; GFX803-NEXT: ds_read_u16 v0, v0 169; GFX803-NEXT: s_waitcnt lgkmcnt(0) 170; GFX803-NEXT: v_or_b32_e32 v0, 2.0, v0 171; GFX803-NEXT: s_setpc_b64 s[30:31] 172entry: 173 %load = load half, half addrspace(3)* %in 174 %build = insertelement <2 x half> <half 0.0, half 2.0>, half %load, i32 0 175 ret <2 x half> %build 176} 177 178define void @load_local_lo_v2f16_reghi_vreg(half addrspace(3)* %in, i32 %reg) #0 { 179; GFX900-LABEL: load_local_lo_v2f16_reghi_vreg: 180; GFX900: ; %bb.0: ; %entry 181; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 182; GFX900-NEXT: ds_read_u16_d16 v1, v0 183; GFX900-NEXT: s_waitcnt lgkmcnt(0) 184; GFX900-NEXT: global_store_dword v[0:1], v1, off 185; GFX900-NEXT: s_waitcnt vmcnt(0) 186; GFX900-NEXT: s_setpc_b64 s[30:31] 187; 188; GFX906-LABEL: load_local_lo_v2f16_reghi_vreg: 189; GFX906: ; %bb.0: ; %entry 190; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 191; GFX906-NEXT: ds_read_u16 v0, v0 192; GFX906-NEXT: v_lshrrev_b32_e32 v1, 16, v1 193; GFX906-NEXT: s_waitcnt lgkmcnt(0) 194; GFX906-NEXT: v_and_b32_e32 v0, 0xffff, v0 195; GFX906-NEXT: v_lshl_or_b32 v0, v1, 16, v0 196; GFX906-NEXT: global_store_dword v[0:1], v0, off 197; GFX906-NEXT: s_waitcnt vmcnt(0) 198; GFX906-NEXT: s_setpc_b64 s[30:31] 199; 200; GFX803-LABEL: load_local_lo_v2f16_reghi_vreg: 201; GFX803: ; %bb.0: ; %entry 202; GFX803-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 203; GFX803-NEXT: s_mov_b32 m0, -1 204; GFX803-NEXT: ds_read_u16 v0, v0 205; GFX803-NEXT: v_and_b32_e32 v1, 0xffff0000, v1 206; GFX803-NEXT: s_waitcnt lgkmcnt(0) 207; GFX803-NEXT: v_or_b32_e32 v0, v0, v1 208; GFX803-NEXT: flat_store_dword v[0:1], v0 209; GFX803-NEXT: s_waitcnt vmcnt(0) 210; GFX803-NEXT: s_setpc_b64 s[30:31] 211entry: 212 %reg.bc = bitcast i32 %reg to <2 x half> 213 %load = load half, half addrspace(3)* %in 214 %build1 = insertelement <2 x half> %reg.bc, half %load, i32 0 215 store <2 x half> %build1, <2 x half> addrspace(1)* undef 216 ret void 217} 218 219define void @load_local_lo_v2f16_reglo_vreg(half addrspace(3)* %in, half %reg) #0 { 220; GFX900-LABEL: load_local_lo_v2f16_reglo_vreg: 221; GFX900: ; %bb.0: ; %entry 222; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 223; GFX900-NEXT: ds_read_u16 v0, v0 224; GFX900-NEXT: s_waitcnt lgkmcnt(0) 225; GFX900-NEXT: v_and_b32_e32 v0, 0xffff, v0 226; GFX900-NEXT: v_lshl_or_b32 v0, v1, 16, v0 227; GFX900-NEXT: global_store_dword v[0:1], v0, off 228; GFX900-NEXT: s_waitcnt vmcnt(0) 229; GFX900-NEXT: s_setpc_b64 s[30:31] 230; 231; GFX906-LABEL: load_local_lo_v2f16_reglo_vreg: 232; GFX906: ; %bb.0: ; %entry 233; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 234; GFX906-NEXT: ds_read_u16 v0, v0 235; GFX906-NEXT: s_waitcnt lgkmcnt(0) 236; GFX906-NEXT: v_and_b32_e32 v0, 0xffff, v0 237; GFX906-NEXT: v_lshl_or_b32 v0, v1, 16, v0 238; GFX906-NEXT: global_store_dword v[0:1], v0, off 239; GFX906-NEXT: s_waitcnt vmcnt(0) 240; GFX906-NEXT: s_setpc_b64 s[30:31] 241; 242; GFX803-LABEL: load_local_lo_v2f16_reglo_vreg: 243; GFX803: ; %bb.0: ; %entry 244; GFX803-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 245; GFX803-NEXT: s_mov_b32 m0, -1 246; GFX803-NEXT: ds_read_u16 v0, v0 247; GFX803-NEXT: v_lshlrev_b32_e32 v1, 16, v1 248; GFX803-NEXT: s_waitcnt lgkmcnt(0) 249; GFX803-NEXT: v_or_b32_e32 v0, v0, v1 250; GFX803-NEXT: flat_store_dword v[0:1], v0 251; GFX803-NEXT: s_waitcnt vmcnt(0) 252; GFX803-NEXT: s_setpc_b64 s[30:31] 253entry: 254 %load = load half, half addrspace(3)* %in 255 %build0 = insertelement <2 x half> undef, half %reg, i32 1 256 %build1 = insertelement <2 x half> %build0, half %load, i32 0 257 store <2 x half> %build1, <2 x half> addrspace(1)* undef 258 ret void 259} 260 261define void @load_local_lo_v2i16_reghi_vreg_zexti8(i8 addrspace(3)* %in, i32 %reg) #0 { 262; GFX900-LABEL: load_local_lo_v2i16_reghi_vreg_zexti8: 263; GFX900: ; %bb.0: ; %entry 264; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 265; GFX900-NEXT: ds_read_u8_d16 v1, v0 266; GFX900-NEXT: s_waitcnt lgkmcnt(0) 267; GFX900-NEXT: global_store_dword v[0:1], v1, off 268; GFX900-NEXT: s_waitcnt vmcnt(0) 269; GFX900-NEXT: s_setpc_b64 s[30:31] 270; 271; GFX906-LABEL: load_local_lo_v2i16_reghi_vreg_zexti8: 272; GFX906: ; %bb.0: ; %entry 273; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 274; GFX906-NEXT: ds_read_u8 v0, v0 275; GFX906-NEXT: v_mov_b32_e32 v2, 0xffff 276; GFX906-NEXT: s_waitcnt lgkmcnt(0) 277; GFX906-NEXT: v_bfi_b32 v0, v2, v0, v1 278; GFX906-NEXT: global_store_dword v[0:1], v0, off 279; GFX906-NEXT: s_waitcnt vmcnt(0) 280; GFX906-NEXT: s_setpc_b64 s[30:31] 281; 282; GFX803-LABEL: load_local_lo_v2i16_reghi_vreg_zexti8: 283; GFX803: ; %bb.0: ; %entry 284; GFX803-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 285; GFX803-NEXT: s_mov_b32 m0, -1 286; GFX803-NEXT: ds_read_u8 v0, v0 287; GFX803-NEXT: v_and_b32_e32 v1, 0xffff0000, v1 288; GFX803-NEXT: s_waitcnt lgkmcnt(0) 289; GFX803-NEXT: v_or_b32_e32 v0, v0, v1 290; GFX803-NEXT: flat_store_dword v[0:1], v0 291; GFX803-NEXT: s_waitcnt vmcnt(0) 292; GFX803-NEXT: s_setpc_b64 s[30:31] 293entry: 294 %reg.bc = bitcast i32 %reg to <2 x i16> 295 %load = load i8, i8 addrspace(3)* %in 296 %ext = zext i8 %load to i16 297 %build1 = insertelement <2 x i16> %reg.bc, i16 %ext, i32 0 298 store <2 x i16> %build1, <2 x i16> addrspace(1)* undef 299 ret void 300} 301 302define void @load_local_lo_v2i16_reglo_vreg_zexti8(i8 addrspace(3)* %in, i16 %reg) #0 { 303; GFX900-LABEL: load_local_lo_v2i16_reglo_vreg_zexti8: 304; GFX900: ; %bb.0: ; %entry 305; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 306; GFX900-NEXT: ds_read_u8 v0, v0 307; GFX900-NEXT: s_waitcnt lgkmcnt(0) 308; GFX900-NEXT: v_and_b32_e32 v0, 0xffff, v0 309; GFX900-NEXT: v_lshl_or_b32 v0, v1, 16, v0 310; GFX900-NEXT: global_store_dword v[0:1], v0, off 311; GFX900-NEXT: s_waitcnt vmcnt(0) 312; GFX900-NEXT: s_setpc_b64 s[30:31] 313; 314; GFX906-LABEL: load_local_lo_v2i16_reglo_vreg_zexti8: 315; GFX906: ; %bb.0: ; %entry 316; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 317; GFX906-NEXT: ds_read_u8 v0, v0 318; GFX906-NEXT: s_waitcnt lgkmcnt(0) 319; GFX906-NEXT: v_and_b32_e32 v0, 0xffff, v0 320; GFX906-NEXT: v_lshl_or_b32 v0, v1, 16, v0 321; GFX906-NEXT: global_store_dword v[0:1], v0, off 322; GFX906-NEXT: s_waitcnt vmcnt(0) 323; GFX906-NEXT: s_setpc_b64 s[30:31] 324; 325; GFX803-LABEL: load_local_lo_v2i16_reglo_vreg_zexti8: 326; GFX803: ; %bb.0: ; %entry 327; GFX803-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 328; GFX803-NEXT: s_mov_b32 m0, -1 329; GFX803-NEXT: ds_read_u8 v0, v0 330; GFX803-NEXT: v_lshlrev_b32_e32 v1, 16, v1 331; GFX803-NEXT: s_waitcnt lgkmcnt(0) 332; GFX803-NEXT: v_or_b32_e32 v0, v0, v1 333; GFX803-NEXT: flat_store_dword v[0:1], v0 334; GFX803-NEXT: s_waitcnt vmcnt(0) 335; GFX803-NEXT: s_setpc_b64 s[30:31] 336entry: 337 %load = load i8, i8 addrspace(3)* %in 338 %ext = zext i8 %load to i16 339 %build0 = insertelement <2 x i16> undef, i16 %reg, i32 1 340 %build1 = insertelement <2 x i16> %build0, i16 %ext, i32 0 341 store <2 x i16> %build1, <2 x i16> addrspace(1)* undef 342 ret void 343} 344 345define void @load_local_lo_v2i16_reghi_vreg_sexti8(i8 addrspace(3)* %in, i32 %reg) #0 { 346; GFX900-LABEL: load_local_lo_v2i16_reghi_vreg_sexti8: 347; GFX900: ; %bb.0: ; %entry 348; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 349; GFX900-NEXT: ds_read_i8_d16 v1, v0 350; GFX900-NEXT: s_waitcnt lgkmcnt(0) 351; GFX900-NEXT: global_store_dword v[0:1], v1, off 352; GFX900-NEXT: s_waitcnt vmcnt(0) 353; GFX900-NEXT: s_setpc_b64 s[30:31] 354; 355; GFX906-LABEL: load_local_lo_v2i16_reghi_vreg_sexti8: 356; GFX906: ; %bb.0: ; %entry 357; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 358; GFX906-NEXT: ds_read_i8 v0, v0 359; GFX906-NEXT: v_mov_b32_e32 v2, 0xffff 360; GFX906-NEXT: s_waitcnt lgkmcnt(0) 361; GFX906-NEXT: v_bfi_b32 v0, v2, v0, v1 362; GFX906-NEXT: global_store_dword v[0:1], v0, off 363; GFX906-NEXT: s_waitcnt vmcnt(0) 364; GFX906-NEXT: s_setpc_b64 s[30:31] 365; 366; GFX803-LABEL: load_local_lo_v2i16_reghi_vreg_sexti8: 367; GFX803: ; %bb.0: ; %entry 368; GFX803-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 369; GFX803-NEXT: s_mov_b32 m0, -1 370; GFX803-NEXT: ds_read_i8 v0, v0 371; GFX803-NEXT: v_and_b32_e32 v1, 0xffff0000, v1 372; GFX803-NEXT: s_waitcnt lgkmcnt(0) 373; GFX803-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD 374; GFX803-NEXT: flat_store_dword v[0:1], v0 375; GFX803-NEXT: s_waitcnt vmcnt(0) 376; GFX803-NEXT: s_setpc_b64 s[30:31] 377entry: 378 %reg.bc = bitcast i32 %reg to <2 x i16> 379 %load = load i8, i8 addrspace(3)* %in 380 %ext = sext i8 %load to i16 381 %build1 = insertelement <2 x i16> %reg.bc, i16 %ext, i32 0 382 store <2 x i16> %build1, <2 x i16> addrspace(1)* undef 383 ret void 384} 385 386define void @load_local_lo_v2i16_reglo_vreg_sexti8(i8 addrspace(3)* %in, i16 %reg) #0 { 387; GFX900-LABEL: load_local_lo_v2i16_reglo_vreg_sexti8: 388; GFX900: ; %bb.0: ; %entry 389; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 390; GFX900-NEXT: ds_read_i8 v0, v0 391; GFX900-NEXT: s_waitcnt lgkmcnt(0) 392; GFX900-NEXT: v_and_b32_e32 v0, 0xffff, v0 393; GFX900-NEXT: v_lshl_or_b32 v0, v1, 16, v0 394; GFX900-NEXT: global_store_dword v[0:1], v0, off 395; GFX900-NEXT: s_waitcnt vmcnt(0) 396; GFX900-NEXT: s_setpc_b64 s[30:31] 397; 398; GFX906-LABEL: load_local_lo_v2i16_reglo_vreg_sexti8: 399; GFX906: ; %bb.0: ; %entry 400; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 401; GFX906-NEXT: ds_read_i8 v0, v0 402; GFX906-NEXT: s_waitcnt lgkmcnt(0) 403; GFX906-NEXT: v_and_b32_e32 v0, 0xffff, v0 404; GFX906-NEXT: v_lshl_or_b32 v0, v1, 16, v0 405; GFX906-NEXT: global_store_dword v[0:1], v0, off 406; GFX906-NEXT: s_waitcnt vmcnt(0) 407; GFX906-NEXT: s_setpc_b64 s[30:31] 408; 409; GFX803-LABEL: load_local_lo_v2i16_reglo_vreg_sexti8: 410; GFX803: ; %bb.0: ; %entry 411; GFX803-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 412; GFX803-NEXT: s_mov_b32 m0, -1 413; GFX803-NEXT: ds_read_i8 v0, v0 414; GFX803-NEXT: v_lshlrev_b32_e32 v1, 16, v1 415; GFX803-NEXT: s_waitcnt lgkmcnt(0) 416; GFX803-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD 417; GFX803-NEXT: flat_store_dword v[0:1], v0 418; GFX803-NEXT: s_waitcnt vmcnt(0) 419; GFX803-NEXT: s_setpc_b64 s[30:31] 420entry: 421 %load = load i8, i8 addrspace(3)* %in 422 %ext = sext i8 %load to i16 423 %build0 = insertelement <2 x i16> undef, i16 %reg, i32 1 424 %build1 = insertelement <2 x i16> %build0, i16 %ext, i32 0 425 store <2 x i16> %build1, <2 x i16> addrspace(1)* undef 426 ret void 427} 428 429define void @load_local_lo_v2f16_reglo_vreg_zexti8(i8 addrspace(3)* %in, half %reg) #0 { 430; GFX900-LABEL: load_local_lo_v2f16_reglo_vreg_zexti8: 431; GFX900: ; %bb.0: ; %entry 432; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 433; GFX900-NEXT: ds_read_u8 v0, v0 434; GFX900-NEXT: s_waitcnt lgkmcnt(0) 435; GFX900-NEXT: v_and_b32_e32 v0, 0xffff, v0 436; GFX900-NEXT: v_lshl_or_b32 v0, v1, 16, v0 437; GFX900-NEXT: global_store_dword v[0:1], v0, off 438; GFX900-NEXT: s_waitcnt vmcnt(0) 439; GFX900-NEXT: s_setpc_b64 s[30:31] 440; 441; GFX906-LABEL: load_local_lo_v2f16_reglo_vreg_zexti8: 442; GFX906: ; %bb.0: ; %entry 443; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 444; GFX906-NEXT: ds_read_u8 v0, v0 445; GFX906-NEXT: s_waitcnt lgkmcnt(0) 446; GFX906-NEXT: v_and_b32_e32 v0, 0xffff, v0 447; GFX906-NEXT: v_lshl_or_b32 v0, v1, 16, v0 448; GFX906-NEXT: global_store_dword v[0:1], v0, off 449; GFX906-NEXT: s_waitcnt vmcnt(0) 450; GFX906-NEXT: s_setpc_b64 s[30:31] 451; 452; GFX803-LABEL: load_local_lo_v2f16_reglo_vreg_zexti8: 453; GFX803: ; %bb.0: ; %entry 454; GFX803-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 455; GFX803-NEXT: s_mov_b32 m0, -1 456; GFX803-NEXT: ds_read_u8 v0, v0 457; GFX803-NEXT: v_lshlrev_b32_e32 v1, 16, v1 458; GFX803-NEXT: s_waitcnt lgkmcnt(0) 459; GFX803-NEXT: v_or_b32_e32 v0, v0, v1 460; GFX803-NEXT: flat_store_dword v[0:1], v0 461; GFX803-NEXT: s_waitcnt vmcnt(0) 462; GFX803-NEXT: s_setpc_b64 s[30:31] 463entry: 464 %load = load i8, i8 addrspace(3)* %in 465 %ext = zext i8 %load to i16 466 %bitcast = bitcast i16 %ext to half 467 %build0 = insertelement <2 x half> undef, half %reg, i32 1 468 %build1 = insertelement <2 x half> %build0, half %bitcast, i32 0 469 store <2 x half> %build1, <2 x half> addrspace(1)* undef 470 ret void 471} 472 473define void @load_local_lo_v2f16_reglo_vreg_sexti8(i8 addrspace(3)* %in, half %reg) #0 { 474; GFX900-LABEL: load_local_lo_v2f16_reglo_vreg_sexti8: 475; GFX900: ; %bb.0: ; %entry 476; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 477; GFX900-NEXT: ds_read_i8 v0, v0 478; GFX900-NEXT: s_waitcnt lgkmcnt(0) 479; GFX900-NEXT: v_and_b32_e32 v0, 0xffff, v0 480; GFX900-NEXT: v_lshl_or_b32 v0, v1, 16, v0 481; GFX900-NEXT: global_store_dword v[0:1], v0, off 482; GFX900-NEXT: s_waitcnt vmcnt(0) 483; GFX900-NEXT: s_setpc_b64 s[30:31] 484; 485; GFX906-LABEL: load_local_lo_v2f16_reglo_vreg_sexti8: 486; GFX906: ; %bb.0: ; %entry 487; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 488; GFX906-NEXT: ds_read_i8 v0, v0 489; GFX906-NEXT: s_waitcnt lgkmcnt(0) 490; GFX906-NEXT: v_and_b32_e32 v0, 0xffff, v0 491; GFX906-NEXT: v_lshl_or_b32 v0, v1, 16, v0 492; GFX906-NEXT: global_store_dword v[0:1], v0, off 493; GFX906-NEXT: s_waitcnt vmcnt(0) 494; GFX906-NEXT: s_setpc_b64 s[30:31] 495; 496; GFX803-LABEL: load_local_lo_v2f16_reglo_vreg_sexti8: 497; GFX803: ; %bb.0: ; %entry 498; GFX803-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 499; GFX803-NEXT: s_mov_b32 m0, -1 500; GFX803-NEXT: ds_read_i8 v0, v0 501; GFX803-NEXT: v_lshlrev_b32_e32 v1, 16, v1 502; GFX803-NEXT: s_waitcnt lgkmcnt(0) 503; GFX803-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD 504; GFX803-NEXT: flat_store_dword v[0:1], v0 505; GFX803-NEXT: s_waitcnt vmcnt(0) 506; GFX803-NEXT: s_setpc_b64 s[30:31] 507entry: 508 %load = load i8, i8 addrspace(3)* %in 509 %ext = sext i8 %load to i16 510 %bitcast = bitcast i16 %ext to half 511 %build0 = insertelement <2 x half> undef, half %reg, i32 1 512 %build1 = insertelement <2 x half> %build0, half %bitcast, i32 0 513 store <2 x half> %build1, <2 x half> addrspace(1)* undef 514 ret void 515} 516 517define void @load_local_lo_v2i16_reghi_vreg_multi_use_lo(i16 addrspace(3)* %in, <2 x i16> %reg) #0 { 518; GFX900-LABEL: load_local_lo_v2i16_reghi_vreg_multi_use_lo: 519; GFX900: ; %bb.0: ; %entry 520; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 521; GFX900-NEXT: ds_read_u16 v0, v0 522; GFX900-NEXT: v_mov_b32_e32 v2, 0 523; GFX900-NEXT: v_mov_b32_e32 v3, 0xffff 524; GFX900-NEXT: s_waitcnt lgkmcnt(0) 525; GFX900-NEXT: ds_write_b16 v2, v0 526; GFX900-NEXT: v_bfi_b32 v0, v3, v0, v1 527; GFX900-NEXT: global_store_dword v[0:1], v0, off 528; GFX900-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 529; GFX900-NEXT: s_setpc_b64 s[30:31] 530; 531; GFX906-LABEL: load_local_lo_v2i16_reghi_vreg_multi_use_lo: 532; GFX906: ; %bb.0: ; %entry 533; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 534; GFX906-NEXT: ds_read_u16 v0, v0 535; GFX906-NEXT: v_mov_b32_e32 v2, 0 536; GFX906-NEXT: v_mov_b32_e32 v3, 0xffff 537; GFX906-NEXT: s_waitcnt lgkmcnt(0) 538; GFX906-NEXT: ds_write_b16 v2, v0 539; GFX906-NEXT: v_bfi_b32 v0, v3, v0, v1 540; GFX906-NEXT: global_store_dword v[0:1], v0, off 541; GFX906-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 542; GFX906-NEXT: s_setpc_b64 s[30:31] 543; 544; GFX803-LABEL: load_local_lo_v2i16_reghi_vreg_multi_use_lo: 545; GFX803: ; %bb.0: ; %entry 546; GFX803-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 547; GFX803-NEXT: s_mov_b32 m0, -1 548; GFX803-NEXT: ds_read_u16 v0, v0 549; GFX803-NEXT: v_mov_b32_e32 v2, 0 550; GFX803-NEXT: v_and_b32_e32 v1, 0xffff0000, v1 551; GFX803-NEXT: s_waitcnt lgkmcnt(0) 552; GFX803-NEXT: ds_write_b16 v2, v0 553; GFX803-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD 554; GFX803-NEXT: flat_store_dword v[0:1], v0 555; GFX803-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 556; GFX803-NEXT: s_setpc_b64 s[30:31] 557entry: 558 %load = load i16, i16 addrspace(3)* %in 559 %elt1 = extractelement <2 x i16> %reg, i32 1 560 store i16 %load, i16 addrspace(3)* null 561 %build1 = insertelement <2 x i16> %reg, i16 %load, i32 0 562 store <2 x i16> %build1, <2 x i16> addrspace(1)* undef 563 ret void 564} 565 566define void @load_local_lo_v2i16_reghi_vreg_multi_use_hi(i16 addrspace(3)* %in, <2 x i16> %reg) #0 { 567; GFX900-LABEL: load_local_lo_v2i16_reghi_vreg_multi_use_hi: 568; GFX900: ; %bb.0: ; %entry 569; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 570; GFX900-NEXT: v_lshrrev_b32_e32 v2, 16, v1 571; GFX900-NEXT: ds_read_u16_d16 v1, v0 572; GFX900-NEXT: v_mov_b32_e32 v0, 0 573; GFX900-NEXT: ds_write_b16 v0, v2 574; GFX900-NEXT: s_waitcnt lgkmcnt(1) 575; GFX900-NEXT: global_store_dword v[0:1], v1, off 576; GFX900-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 577; GFX900-NEXT: s_setpc_b64 s[30:31] 578; 579; GFX906-LABEL: load_local_lo_v2i16_reghi_vreg_multi_use_hi: 580; GFX906: ; %bb.0: ; %entry 581; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 582; GFX906-NEXT: ds_read_u16 v0, v0 583; GFX906-NEXT: v_lshrrev_b32_e32 v2, 16, v1 584; GFX906-NEXT: v_mov_b32_e32 v3, 0 585; GFX906-NEXT: ds_write_b16 v3, v2 586; GFX906-NEXT: v_mov_b32_e32 v2, 0xffff 587; GFX906-NEXT: s_waitcnt lgkmcnt(1) 588; GFX906-NEXT: v_bfi_b32 v0, v2, v0, v1 589; GFX906-NEXT: global_store_dword v[0:1], v0, off 590; GFX906-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 591; GFX906-NEXT: s_setpc_b64 s[30:31] 592; 593; GFX803-LABEL: load_local_lo_v2i16_reghi_vreg_multi_use_hi: 594; GFX803: ; %bb.0: ; %entry 595; GFX803-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 596; GFX803-NEXT: s_mov_b32 m0, -1 597; GFX803-NEXT: ds_read_u16 v0, v0 598; GFX803-NEXT: v_lshrrev_b32_e32 v2, 16, v1 599; GFX803-NEXT: v_and_b32_e32 v1, 0xffff0000, v1 600; GFX803-NEXT: v_mov_b32_e32 v3, 0 601; GFX803-NEXT: ds_write_b16 v3, v2 602; GFX803-NEXT: s_waitcnt lgkmcnt(1) 603; GFX803-NEXT: v_or_b32_e32 v0, v0, v1 604; GFX803-NEXT: flat_store_dword v[0:1], v0 605; GFX803-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 606; GFX803-NEXT: s_setpc_b64 s[30:31] 607entry: 608 %load = load i16, i16 addrspace(3)* %in 609 %elt1 = extractelement <2 x i16> %reg, i32 1 610 store i16 %elt1, i16 addrspace(3)* null 611 %build1 = insertelement <2 x i16> %reg, i16 %load, i32 0 612 store <2 x i16> %build1, <2 x i16> addrspace(1)* undef 613 ret void 614} 615 616define void @load_local_lo_v2i16_reghi_vreg_multi_use_lohi(i16 addrspace(3)* noalias %in, <2 x i16> %reg, i16 addrspace(3)* noalias %out0, i16 addrspace(3)* noalias %out1) #0 { 617; GFX900-LABEL: load_local_lo_v2i16_reghi_vreg_multi_use_lohi: 618; GFX900: ; %bb.0: ; %entry 619; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 620; GFX900-NEXT: ds_read_u16 v0, v0 621; GFX900-NEXT: v_lshrrev_b32_e32 v4, 16, v1 622; GFX900-NEXT: s_waitcnt lgkmcnt(0) 623; GFX900-NEXT: ds_write_b16 v2, v0 624; GFX900-NEXT: ds_write_b16 v3, v4 625; GFX900-NEXT: v_mov_b32_e32 v2, 0xffff 626; GFX900-NEXT: v_bfi_b32 v0, v2, v0, v1 627; GFX900-NEXT: global_store_dword v[0:1], v0, off 628; GFX900-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 629; GFX900-NEXT: s_setpc_b64 s[30:31] 630; 631; GFX906-LABEL: load_local_lo_v2i16_reghi_vreg_multi_use_lohi: 632; GFX906: ; %bb.0: ; %entry 633; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 634; GFX906-NEXT: ds_read_u16 v0, v0 635; GFX906-NEXT: v_lshrrev_b32_e32 v4, 16, v1 636; GFX906-NEXT: s_waitcnt lgkmcnt(0) 637; GFX906-NEXT: ds_write_b16 v2, v0 638; GFX906-NEXT: ds_write_b16 v3, v4 639; GFX906-NEXT: v_mov_b32_e32 v2, 0xffff 640; GFX906-NEXT: v_bfi_b32 v0, v2, v0, v1 641; GFX906-NEXT: global_store_dword v[0:1], v0, off 642; GFX906-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 643; GFX906-NEXT: s_setpc_b64 s[30:31] 644; 645; GFX803-LABEL: load_local_lo_v2i16_reghi_vreg_multi_use_lohi: 646; GFX803: ; %bb.0: ; %entry 647; GFX803-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 648; GFX803-NEXT: s_mov_b32 m0, -1 649; GFX803-NEXT: ds_read_u16 v0, v0 650; GFX803-NEXT: v_lshrrev_b32_e32 v4, 16, v1 651; GFX803-NEXT: v_and_b32_e32 v1, 0xffff0000, v1 652; GFX803-NEXT: s_waitcnt lgkmcnt(0) 653; GFX803-NEXT: ds_write_b16 v2, v0 654; GFX803-NEXT: ds_write_b16 v3, v4 655; GFX803-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD 656; GFX803-NEXT: flat_store_dword v[0:1], v0 657; GFX803-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 658; GFX803-NEXT: s_setpc_b64 s[30:31] 659entry: 660 %load = load i16, i16 addrspace(3)* %in 661 %elt1 = extractelement <2 x i16> %reg, i32 1 662 store i16 %load, i16 addrspace(3)* %out0 663 store i16 %elt1, i16 addrspace(3)* %out1 664 %build1 = insertelement <2 x i16> %reg, i16 %load, i32 0 665 store <2 x i16> %build1, <2 x i16> addrspace(1)* undef 666 ret void 667} 668 669define void @load_global_lo_v2i16_reglo_vreg(i16 addrspace(1)* %in, i32 %reg) #0 { 670; GFX900-LABEL: load_global_lo_v2i16_reglo_vreg: 671; GFX900: ; %bb.0: ; %entry 672; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 673; GFX900-NEXT: global_load_short_d16 v2, v[0:1], off offset:-4094 674; GFX900-NEXT: s_waitcnt vmcnt(0) 675; GFX900-NEXT: global_store_dword v[0:1], v2, off 676; GFX900-NEXT: s_waitcnt vmcnt(0) 677; GFX900-NEXT: s_setpc_b64 s[30:31] 678; 679; GFX906-LABEL: load_global_lo_v2i16_reglo_vreg: 680; GFX906: ; %bb.0: ; %entry 681; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 682; GFX906-NEXT: global_load_ushort v0, v[0:1], off offset:-4094 683; GFX906-NEXT: v_mov_b32_e32 v1, 0xffff 684; GFX906-NEXT: s_waitcnt vmcnt(0) 685; GFX906-NEXT: v_bfi_b32 v0, v1, v0, v2 686; GFX906-NEXT: global_store_dword v[0:1], v0, off 687; GFX906-NEXT: s_waitcnt vmcnt(0) 688; GFX906-NEXT: s_setpc_b64 s[30:31] 689; 690; GFX803-LABEL: load_global_lo_v2i16_reglo_vreg: 691; GFX803: ; %bb.0: ; %entry 692; GFX803-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 693; GFX803-NEXT: v_add_u32_e32 v0, vcc, 0xfffff002, v0 694; GFX803-NEXT: v_addc_u32_e32 v1, vcc, -1, v1, vcc 695; GFX803-NEXT: flat_load_ushort v0, v[0:1] 696; GFX803-NEXT: v_and_b32_e32 v1, 0xffff0000, v2 697; GFX803-NEXT: s_waitcnt vmcnt(0) 698; GFX803-NEXT: v_or_b32_e32 v0, v0, v1 699; GFX803-NEXT: flat_store_dword v[0:1], v0 700; GFX803-NEXT: s_waitcnt vmcnt(0) 701; GFX803-NEXT: s_setpc_b64 s[30:31] 702entry: 703 %reg.bc = bitcast i32 %reg to <2 x i16> 704 %gep = getelementptr inbounds i16, i16 addrspace(1)* %in, i64 -2047 705 %load = load i16, i16 addrspace(1)* %gep 706 %build1 = insertelement <2 x i16> %reg.bc, i16 %load, i32 0 707 store <2 x i16> %build1, <2 x i16> addrspace(1)* undef 708 ret void 709} 710 711define void @load_global_lo_v2f16_reglo_vreg(half addrspace(1)* %in, i32 %reg) #0 { 712; GFX900-LABEL: load_global_lo_v2f16_reglo_vreg: 713; GFX900: ; %bb.0: ; %entry 714; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 715; GFX900-NEXT: global_load_short_d16 v2, v[0:1], off offset:-4094 716; GFX900-NEXT: s_waitcnt vmcnt(0) 717; GFX900-NEXT: global_store_dword v[0:1], v2, off 718; GFX900-NEXT: s_waitcnt vmcnt(0) 719; GFX900-NEXT: s_setpc_b64 s[30:31] 720; 721; GFX906-LABEL: load_global_lo_v2f16_reglo_vreg: 722; GFX906: ; %bb.0: ; %entry 723; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 724; GFX906-NEXT: global_load_ushort v0, v[0:1], off offset:-4094 725; GFX906-NEXT: v_lshrrev_b32_e32 v1, 16, v2 726; GFX906-NEXT: s_waitcnt vmcnt(0) 727; GFX906-NEXT: v_and_b32_e32 v0, 0xffff, v0 728; GFX906-NEXT: v_lshl_or_b32 v0, v1, 16, v0 729; GFX906-NEXT: global_store_dword v[0:1], v0, off 730; GFX906-NEXT: s_waitcnt vmcnt(0) 731; GFX906-NEXT: s_setpc_b64 s[30:31] 732; 733; GFX803-LABEL: load_global_lo_v2f16_reglo_vreg: 734; GFX803: ; %bb.0: ; %entry 735; GFX803-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 736; GFX803-NEXT: v_add_u32_e32 v0, vcc, 0xfffff002, v0 737; GFX803-NEXT: v_addc_u32_e32 v1, vcc, -1, v1, vcc 738; GFX803-NEXT: flat_load_ushort v0, v[0:1] 739; GFX803-NEXT: v_and_b32_e32 v1, 0xffff0000, v2 740; GFX803-NEXT: s_waitcnt vmcnt(0) 741; GFX803-NEXT: v_or_b32_e32 v0, v0, v1 742; GFX803-NEXT: flat_store_dword v[0:1], v0 743; GFX803-NEXT: s_waitcnt vmcnt(0) 744; GFX803-NEXT: s_setpc_b64 s[30:31] 745entry: 746 %reg.bc = bitcast i32 %reg to <2 x half> 747 %gep = getelementptr inbounds half, half addrspace(1)* %in, i64 -2047 748 %load = load half, half addrspace(1)* %gep 749 %build1 = insertelement <2 x half> %reg.bc, half %load, i32 0 750 store <2 x half> %build1, <2 x half> addrspace(1)* undef 751 ret void 752} 753 754define void @load_global_lo_v2i16_reglo_vreg_zexti8(i8 addrspace(1)* %in, i32 %reg) #0 { 755; GFX900-LABEL: load_global_lo_v2i16_reglo_vreg_zexti8: 756; GFX900: ; %bb.0: ; %entry 757; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 758; GFX900-NEXT: global_load_ubyte_d16 v2, v[0:1], off offset:-4095 759; GFX900-NEXT: s_waitcnt vmcnt(0) 760; GFX900-NEXT: global_store_dword v[0:1], v2, off 761; GFX900-NEXT: s_waitcnt vmcnt(0) 762; GFX900-NEXT: s_setpc_b64 s[30:31] 763; 764; GFX906-LABEL: load_global_lo_v2i16_reglo_vreg_zexti8: 765; GFX906: ; %bb.0: ; %entry 766; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 767; GFX906-NEXT: global_load_ubyte v0, v[0:1], off offset:-4095 768; GFX906-NEXT: v_mov_b32_e32 v1, 0xffff 769; GFX906-NEXT: s_waitcnt vmcnt(0) 770; GFX906-NEXT: v_bfi_b32 v0, v1, v0, v2 771; GFX906-NEXT: global_store_dword v[0:1], v0, off 772; GFX906-NEXT: s_waitcnt vmcnt(0) 773; GFX906-NEXT: s_setpc_b64 s[30:31] 774; 775; GFX803-LABEL: load_global_lo_v2i16_reglo_vreg_zexti8: 776; GFX803: ; %bb.0: ; %entry 777; GFX803-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 778; GFX803-NEXT: v_add_u32_e32 v0, vcc, 0xfffff001, v0 779; GFX803-NEXT: v_addc_u32_e32 v1, vcc, -1, v1, vcc 780; GFX803-NEXT: flat_load_ubyte v0, v[0:1] 781; GFX803-NEXT: v_and_b32_e32 v1, 0xffff0000, v2 782; GFX803-NEXT: s_waitcnt vmcnt(0) 783; GFX803-NEXT: v_or_b32_e32 v0, v0, v1 784; GFX803-NEXT: flat_store_dword v[0:1], v0 785; GFX803-NEXT: s_waitcnt vmcnt(0) 786; GFX803-NEXT: s_setpc_b64 s[30:31] 787entry: 788 %reg.bc = bitcast i32 %reg to <2 x i16> 789 %gep = getelementptr inbounds i8, i8 addrspace(1)* %in, i64 -4095 790 %load = load i8, i8 addrspace(1)* %gep 791 %ext = zext i8 %load to i16 792 %build1 = insertelement <2 x i16> %reg.bc, i16 %ext, i32 0 793 store <2 x i16> %build1, <2 x i16> addrspace(1)* undef 794 ret void 795} 796 797define void @load_global_lo_v2i16_reglo_vreg_sexti8(i8 addrspace(1)* %in, i32 %reg) #0 { 798; GFX900-LABEL: load_global_lo_v2i16_reglo_vreg_sexti8: 799; GFX900: ; %bb.0: ; %entry 800; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 801; GFX900-NEXT: global_load_sbyte_d16 v2, v[0:1], off offset:-4095 802; GFX900-NEXT: s_waitcnt vmcnt(0) 803; GFX900-NEXT: global_store_dword v[0:1], v2, off 804; GFX900-NEXT: s_waitcnt vmcnt(0) 805; GFX900-NEXT: s_setpc_b64 s[30:31] 806; 807; GFX906-LABEL: load_global_lo_v2i16_reglo_vreg_sexti8: 808; GFX906: ; %bb.0: ; %entry 809; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 810; GFX906-NEXT: global_load_sbyte v0, v[0:1], off offset:-4095 811; GFX906-NEXT: v_mov_b32_e32 v1, 0xffff 812; GFX906-NEXT: s_waitcnt vmcnt(0) 813; GFX906-NEXT: v_bfi_b32 v0, v1, v0, v2 814; GFX906-NEXT: global_store_dword v[0:1], v0, off 815; GFX906-NEXT: s_waitcnt vmcnt(0) 816; GFX906-NEXT: s_setpc_b64 s[30:31] 817; 818; GFX803-LABEL: load_global_lo_v2i16_reglo_vreg_sexti8: 819; GFX803: ; %bb.0: ; %entry 820; GFX803-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 821; GFX803-NEXT: v_add_u32_e32 v0, vcc, 0xfffff001, v0 822; GFX803-NEXT: v_addc_u32_e32 v1, vcc, -1, v1, vcc 823; GFX803-NEXT: flat_load_sbyte v0, v[0:1] 824; GFX803-NEXT: v_and_b32_e32 v1, 0xffff0000, v2 825; GFX803-NEXT: s_waitcnt vmcnt(0) 826; GFX803-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD 827; GFX803-NEXT: flat_store_dword v[0:1], v0 828; GFX803-NEXT: s_waitcnt vmcnt(0) 829; GFX803-NEXT: s_setpc_b64 s[30:31] 830entry: 831 %reg.bc = bitcast i32 %reg to <2 x i16> 832 %gep = getelementptr inbounds i8, i8 addrspace(1)* %in, i64 -4095 833 %load = load i8, i8 addrspace(1)* %gep 834 %ext = sext i8 %load to i16 835 %build1 = insertelement <2 x i16> %reg.bc, i16 %ext, i32 0 836 store <2 x i16> %build1, <2 x i16> addrspace(1)* undef 837 ret void 838} 839 840define void @load_global_lo_v2f16_reglo_vreg_zexti8(i8 addrspace(1)* %in, i32 %reg) #0 { 841; GFX900-LABEL: load_global_lo_v2f16_reglo_vreg_zexti8: 842; GFX900: ; %bb.0: ; %entry 843; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 844; GFX900-NEXT: global_load_ubyte_d16 v2, v[0:1], off offset:-4095 845; GFX900-NEXT: s_waitcnt vmcnt(0) 846; GFX900-NEXT: global_store_dword v[0:1], v2, off 847; GFX900-NEXT: s_waitcnt vmcnt(0) 848; GFX900-NEXT: s_setpc_b64 s[30:31] 849; 850; GFX906-LABEL: load_global_lo_v2f16_reglo_vreg_zexti8: 851; GFX906: ; %bb.0: ; %entry 852; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 853; GFX906-NEXT: global_load_ubyte v0, v[0:1], off offset:-4095 854; GFX906-NEXT: v_lshrrev_b32_e32 v1, 16, v2 855; GFX906-NEXT: s_waitcnt vmcnt(0) 856; GFX906-NEXT: v_and_b32_e32 v0, 0xffff, v0 857; GFX906-NEXT: v_lshl_or_b32 v0, v1, 16, v0 858; GFX906-NEXT: global_store_dword v[0:1], v0, off 859; GFX906-NEXT: s_waitcnt vmcnt(0) 860; GFX906-NEXT: s_setpc_b64 s[30:31] 861; 862; GFX803-LABEL: load_global_lo_v2f16_reglo_vreg_zexti8: 863; GFX803: ; %bb.0: ; %entry 864; GFX803-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 865; GFX803-NEXT: v_add_u32_e32 v0, vcc, 0xfffff001, v0 866; GFX803-NEXT: v_addc_u32_e32 v1, vcc, -1, v1, vcc 867; GFX803-NEXT: flat_load_ubyte v0, v[0:1] 868; GFX803-NEXT: v_and_b32_e32 v1, 0xffff0000, v2 869; GFX803-NEXT: s_waitcnt vmcnt(0) 870; GFX803-NEXT: v_or_b32_e32 v0, v0, v1 871; GFX803-NEXT: flat_store_dword v[0:1], v0 872; GFX803-NEXT: s_waitcnt vmcnt(0) 873; GFX803-NEXT: s_setpc_b64 s[30:31] 874entry: 875 %reg.bc = bitcast i32 %reg to <2 x half> 876 %gep = getelementptr inbounds i8, i8 addrspace(1)* %in, i64 -4095 877 %load = load i8, i8 addrspace(1)* %gep 878 %ext = zext i8 %load to i16 879 %bitcast = bitcast i16 %ext to half 880 %build1 = insertelement <2 x half> %reg.bc, half %bitcast, i32 0 881 store <2 x half> %build1, <2 x half> addrspace(1)* undef 882 ret void 883} 884 885define void @load_global_lo_v2f16_reglo_vreg_sexti8(i8 addrspace(1)* %in, i32 %reg) #0 { 886; GFX900-LABEL: load_global_lo_v2f16_reglo_vreg_sexti8: 887; GFX900: ; %bb.0: ; %entry 888; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 889; GFX900-NEXT: global_load_sbyte_d16 v2, v[0:1], off offset:-4095 890; GFX900-NEXT: s_waitcnt vmcnt(0) 891; GFX900-NEXT: global_store_dword v[0:1], v2, off 892; GFX900-NEXT: s_waitcnt vmcnt(0) 893; GFX900-NEXT: s_setpc_b64 s[30:31] 894; 895; GFX906-LABEL: load_global_lo_v2f16_reglo_vreg_sexti8: 896; GFX906: ; %bb.0: ; %entry 897; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 898; GFX906-NEXT: global_load_sbyte v0, v[0:1], off offset:-4095 899; GFX906-NEXT: v_lshrrev_b32_e32 v1, 16, v2 900; GFX906-NEXT: s_waitcnt vmcnt(0) 901; GFX906-NEXT: v_and_b32_e32 v0, 0xffff, v0 902; GFX906-NEXT: v_lshl_or_b32 v0, v1, 16, v0 903; GFX906-NEXT: global_store_dword v[0:1], v0, off 904; GFX906-NEXT: s_waitcnt vmcnt(0) 905; GFX906-NEXT: s_setpc_b64 s[30:31] 906; 907; GFX803-LABEL: load_global_lo_v2f16_reglo_vreg_sexti8: 908; GFX803: ; %bb.0: ; %entry 909; GFX803-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 910; GFX803-NEXT: v_add_u32_e32 v0, vcc, 0xfffff001, v0 911; GFX803-NEXT: v_addc_u32_e32 v1, vcc, -1, v1, vcc 912; GFX803-NEXT: flat_load_sbyte v0, v[0:1] 913; GFX803-NEXT: v_and_b32_e32 v1, 0xffff0000, v2 914; GFX803-NEXT: s_waitcnt vmcnt(0) 915; GFX803-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD 916; GFX803-NEXT: flat_store_dword v[0:1], v0 917; GFX803-NEXT: s_waitcnt vmcnt(0) 918; GFX803-NEXT: s_setpc_b64 s[30:31] 919entry: 920 %reg.bc = bitcast i32 %reg to <2 x half> 921 %gep = getelementptr inbounds i8, i8 addrspace(1)* %in, i64 -4095 922 %load = load i8, i8 addrspace(1)* %gep 923 %ext = sext i8 %load to i16 924 %bitcast = bitcast i16 %ext to half 925 %build1 = insertelement <2 x half> %reg.bc, half %bitcast, i32 0 926 store <2 x half> %build1, <2 x half> addrspace(1)* undef 927 ret void 928} 929 930define void @load_flat_lo_v2i16_reghi_vreg(i16* %in, i32 %reg) #0 { 931; GFX900-LABEL: load_flat_lo_v2i16_reghi_vreg: 932; GFX900: ; %bb.0: ; %entry 933; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 934; GFX900-NEXT: flat_load_short_d16 v2, v[0:1] 935; GFX900-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 936; GFX900-NEXT: global_store_dword v[0:1], v2, off 937; GFX900-NEXT: s_waitcnt vmcnt(0) 938; GFX900-NEXT: s_setpc_b64 s[30:31] 939; 940; GFX906-LABEL: load_flat_lo_v2i16_reghi_vreg: 941; GFX906: ; %bb.0: ; %entry 942; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 943; GFX906-NEXT: flat_load_ushort v0, v[0:1] 944; GFX906-NEXT: v_mov_b32_e32 v1, 0xffff 945; GFX906-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 946; GFX906-NEXT: v_bfi_b32 v0, v1, v0, v2 947; GFX906-NEXT: global_store_dword v[0:1], v0, off 948; GFX906-NEXT: s_waitcnt vmcnt(0) 949; GFX906-NEXT: s_setpc_b64 s[30:31] 950; 951; GFX803-LABEL: load_flat_lo_v2i16_reghi_vreg: 952; GFX803: ; %bb.0: ; %entry 953; GFX803-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 954; GFX803-NEXT: flat_load_ushort v0, v[0:1] 955; GFX803-NEXT: v_and_b32_e32 v1, 0xffff0000, v2 956; GFX803-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 957; GFX803-NEXT: v_or_b32_e32 v0, v0, v1 958; GFX803-NEXT: flat_store_dword v[0:1], v0 959; GFX803-NEXT: s_waitcnt vmcnt(0) 960; GFX803-NEXT: s_setpc_b64 s[30:31] 961entry: 962 %reg.bc = bitcast i32 %reg to <2 x i16> 963 %load = load i16, i16* %in 964 %build1 = insertelement <2 x i16> %reg.bc, i16 %load, i32 0 965 store <2 x i16> %build1, <2 x i16> addrspace(1)* undef 966 ret void 967} 968 969define void @load_flat_lo_v2f16_reghi_vreg(half* %in, i32 %reg) #0 { 970; GFX900-LABEL: load_flat_lo_v2f16_reghi_vreg: 971; GFX900: ; %bb.0: ; %entry 972; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 973; GFX900-NEXT: flat_load_short_d16 v2, v[0:1] 974; GFX900-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 975; GFX900-NEXT: global_store_dword v[0:1], v2, off 976; GFX900-NEXT: s_waitcnt vmcnt(0) 977; GFX900-NEXT: s_setpc_b64 s[30:31] 978; 979; GFX906-LABEL: load_flat_lo_v2f16_reghi_vreg: 980; GFX906: ; %bb.0: ; %entry 981; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 982; GFX906-NEXT: flat_load_ushort v0, v[0:1] 983; GFX906-NEXT: v_lshrrev_b32_e32 v1, 16, v2 984; GFX906-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 985; GFX906-NEXT: v_and_b32_e32 v0, 0xffff, v0 986; GFX906-NEXT: v_lshl_or_b32 v0, v1, 16, v0 987; GFX906-NEXT: global_store_dword v[0:1], v0, off 988; GFX906-NEXT: s_waitcnt vmcnt(0) 989; GFX906-NEXT: s_setpc_b64 s[30:31] 990; 991; GFX803-LABEL: load_flat_lo_v2f16_reghi_vreg: 992; GFX803: ; %bb.0: ; %entry 993; GFX803-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 994; GFX803-NEXT: flat_load_ushort v0, v[0:1] 995; GFX803-NEXT: v_and_b32_e32 v1, 0xffff0000, v2 996; GFX803-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 997; GFX803-NEXT: v_or_b32_e32 v0, v0, v1 998; GFX803-NEXT: flat_store_dword v[0:1], v0 999; GFX803-NEXT: s_waitcnt vmcnt(0) 1000; GFX803-NEXT: s_setpc_b64 s[30:31] 1001 1002; FIXME: the and above should be removable 1003entry: 1004 %reg.bc = bitcast i32 %reg to <2 x half> 1005 %load = load half, half* %in 1006 %build1 = insertelement <2 x half> %reg.bc, half %load, i32 0 1007 store <2 x half> %build1, <2 x half> addrspace(1)* undef 1008 ret void 1009} 1010 1011define void @load_flat_lo_v2i16_reglo_vreg_zexti8(i8* %in, i32 %reg) #0 { 1012; GFX900-LABEL: load_flat_lo_v2i16_reglo_vreg_zexti8: 1013; GFX900: ; %bb.0: ; %entry 1014; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1015; GFX900-NEXT: flat_load_ubyte_d16 v2, v[0:1] 1016; GFX900-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1017; GFX900-NEXT: global_store_dword v[0:1], v2, off 1018; GFX900-NEXT: s_waitcnt vmcnt(0) 1019; GFX900-NEXT: s_setpc_b64 s[30:31] 1020; 1021; GFX906-LABEL: load_flat_lo_v2i16_reglo_vreg_zexti8: 1022; GFX906: ; %bb.0: ; %entry 1023; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1024; GFX906-NEXT: flat_load_ubyte v0, v[0:1] 1025; GFX906-NEXT: v_mov_b32_e32 v1, 0xffff 1026; GFX906-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1027; GFX906-NEXT: v_bfi_b32 v0, v1, v0, v2 1028; GFX906-NEXT: global_store_dword v[0:1], v0, off 1029; GFX906-NEXT: s_waitcnt vmcnt(0) 1030; GFX906-NEXT: s_setpc_b64 s[30:31] 1031; 1032; GFX803-LABEL: load_flat_lo_v2i16_reglo_vreg_zexti8: 1033; GFX803: ; %bb.0: ; %entry 1034; GFX803-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1035; GFX803-NEXT: flat_load_ubyte v0, v[0:1] 1036; GFX803-NEXT: v_and_b32_e32 v1, 0xffff0000, v2 1037; GFX803-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1038; GFX803-NEXT: v_or_b32_e32 v0, v0, v1 1039; GFX803-NEXT: flat_store_dword v[0:1], v0 1040; GFX803-NEXT: s_waitcnt vmcnt(0) 1041; GFX803-NEXT: s_setpc_b64 s[30:31] 1042entry: 1043 %reg.bc = bitcast i32 %reg to <2 x i16> 1044 %load = load i8, i8* %in 1045 %ext = zext i8 %load to i16 1046 %build1 = insertelement <2 x i16> %reg.bc, i16 %ext, i32 0 1047 store <2 x i16> %build1, <2 x i16> addrspace(1)* undef 1048 ret void 1049} 1050 1051define void @load_flat_lo_v2i16_reglo_vreg_sexti8(i8* %in, i32 %reg) #0 { 1052; GFX900-LABEL: load_flat_lo_v2i16_reglo_vreg_sexti8: 1053; GFX900: ; %bb.0: ; %entry 1054; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1055; GFX900-NEXT: flat_load_sbyte_d16 v2, v[0:1] 1056; GFX900-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1057; GFX900-NEXT: global_store_dword v[0:1], v2, off 1058; GFX900-NEXT: s_waitcnt vmcnt(0) 1059; GFX900-NEXT: s_setpc_b64 s[30:31] 1060; 1061; GFX906-LABEL: load_flat_lo_v2i16_reglo_vreg_sexti8: 1062; GFX906: ; %bb.0: ; %entry 1063; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1064; GFX906-NEXT: flat_load_sbyte v0, v[0:1] 1065; GFX906-NEXT: v_mov_b32_e32 v1, 0xffff 1066; GFX906-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1067; GFX906-NEXT: v_bfi_b32 v0, v1, v0, v2 1068; GFX906-NEXT: global_store_dword v[0:1], v0, off 1069; GFX906-NEXT: s_waitcnt vmcnt(0) 1070; GFX906-NEXT: s_setpc_b64 s[30:31] 1071; 1072; GFX803-LABEL: load_flat_lo_v2i16_reglo_vreg_sexti8: 1073; GFX803: ; %bb.0: ; %entry 1074; GFX803-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1075; GFX803-NEXT: flat_load_sbyte v0, v[0:1] 1076; GFX803-NEXT: v_and_b32_e32 v1, 0xffff0000, v2 1077; GFX803-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1078; GFX803-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD 1079; GFX803-NEXT: flat_store_dword v[0:1], v0 1080; GFX803-NEXT: s_waitcnt vmcnt(0) 1081; GFX803-NEXT: s_setpc_b64 s[30:31] 1082entry: 1083 %reg.bc = bitcast i32 %reg to <2 x i16> 1084 %load = load i8, i8* %in 1085 %ext = sext i8 %load to i16 1086 %build1 = insertelement <2 x i16> %reg.bc, i16 %ext, i32 0 1087 store <2 x i16> %build1, <2 x i16> addrspace(1)* undef 1088 ret void 1089} 1090 1091define void @load_flat_lo_v2f16_reglo_vreg_zexti8(i8* %in, i32 %reg) #0 { 1092; GFX900-LABEL: load_flat_lo_v2f16_reglo_vreg_zexti8: 1093; GFX900: ; %bb.0: ; %entry 1094; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1095; GFX900-NEXT: flat_load_ubyte_d16 v2, v[0:1] 1096; GFX900-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1097; GFX900-NEXT: global_store_dword v[0:1], v2, off 1098; GFX900-NEXT: s_waitcnt vmcnt(0) 1099; GFX900-NEXT: s_setpc_b64 s[30:31] 1100; 1101; GFX906-LABEL: load_flat_lo_v2f16_reglo_vreg_zexti8: 1102; GFX906: ; %bb.0: ; %entry 1103; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1104; GFX906-NEXT: flat_load_ubyte v0, v[0:1] 1105; GFX906-NEXT: v_lshrrev_b32_e32 v1, 16, v2 1106; GFX906-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1107; GFX906-NEXT: v_and_b32_e32 v0, 0xffff, v0 1108; GFX906-NEXT: v_lshl_or_b32 v0, v1, 16, v0 1109; GFX906-NEXT: global_store_dword v[0:1], v0, off 1110; GFX906-NEXT: s_waitcnt vmcnt(0) 1111; GFX906-NEXT: s_setpc_b64 s[30:31] 1112; 1113; GFX803-LABEL: load_flat_lo_v2f16_reglo_vreg_zexti8: 1114; GFX803: ; %bb.0: ; %entry 1115; GFX803-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1116; GFX803-NEXT: flat_load_ubyte v0, v[0:1] 1117; GFX803-NEXT: v_and_b32_e32 v1, 0xffff0000, v2 1118; GFX803-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1119; GFX803-NEXT: v_or_b32_e32 v0, v0, v1 1120; GFX803-NEXT: flat_store_dword v[0:1], v0 1121; GFX803-NEXT: s_waitcnt vmcnt(0) 1122; GFX803-NEXT: s_setpc_b64 s[30:31] 1123entry: 1124 %reg.bc = bitcast i32 %reg to <2 x half> 1125 %load = load i8, i8* %in 1126 %ext = zext i8 %load to i16 1127 %bitcast = bitcast i16 %ext to half 1128 %build1 = insertelement <2 x half> %reg.bc, half %bitcast, i32 0 1129 store <2 x half> %build1, <2 x half> addrspace(1)* undef 1130 ret void 1131} 1132 1133define void @load_flat_lo_v2f16_reglo_vreg_sexti8(i8* %in, i32 %reg) #0 { 1134; GFX900-LABEL: load_flat_lo_v2f16_reglo_vreg_sexti8: 1135; GFX900: ; %bb.0: ; %entry 1136; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1137; GFX900-NEXT: flat_load_sbyte_d16 v2, v[0:1] 1138; GFX900-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1139; GFX900-NEXT: global_store_dword v[0:1], v2, off 1140; GFX900-NEXT: s_waitcnt vmcnt(0) 1141; GFX900-NEXT: s_setpc_b64 s[30:31] 1142; 1143; GFX906-LABEL: load_flat_lo_v2f16_reglo_vreg_sexti8: 1144; GFX906: ; %bb.0: ; %entry 1145; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1146; GFX906-NEXT: flat_load_sbyte v0, v[0:1] 1147; GFX906-NEXT: v_lshrrev_b32_e32 v1, 16, v2 1148; GFX906-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1149; GFX906-NEXT: v_and_b32_e32 v0, 0xffff, v0 1150; GFX906-NEXT: v_lshl_or_b32 v0, v1, 16, v0 1151; GFX906-NEXT: global_store_dword v[0:1], v0, off 1152; GFX906-NEXT: s_waitcnt vmcnt(0) 1153; GFX906-NEXT: s_setpc_b64 s[30:31] 1154; 1155; GFX803-LABEL: load_flat_lo_v2f16_reglo_vreg_sexti8: 1156; GFX803: ; %bb.0: ; %entry 1157; GFX803-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1158; GFX803-NEXT: flat_load_sbyte v0, v[0:1] 1159; GFX803-NEXT: v_and_b32_e32 v1, 0xffff0000, v2 1160; GFX803-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1161; GFX803-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD 1162; GFX803-NEXT: flat_store_dword v[0:1], v0 1163; GFX803-NEXT: s_waitcnt vmcnt(0) 1164; GFX803-NEXT: s_setpc_b64 s[30:31] 1165entry: 1166 %reg.bc = bitcast i32 %reg to <2 x half> 1167 %load = load i8, i8* %in 1168 %ext = sext i8 %load to i16 1169 %bitcast = bitcast i16 %ext to half 1170 %build1 = insertelement <2 x half> %reg.bc, half %bitcast, i32 0 1171 store <2 x half> %build1, <2 x half> addrspace(1)* undef 1172 ret void 1173} 1174 1175define void @load_private_lo_v2i16_reglo_vreg(i16 addrspace(5)* byval(i16) %in, i32 %reg) #0 { 1176; GFX900-MUBUF-LABEL: load_private_lo_v2i16_reglo_vreg: 1177; GFX900-MUBUF: ; %bb.0: ; %entry 1178; GFX900-MUBUF-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1179; GFX900-MUBUF-NEXT: buffer_load_short_d16 v0, off, s[0:3], s32 offset:4094 1180; GFX900-MUBUF-NEXT: s_waitcnt vmcnt(0) 1181; GFX900-MUBUF-NEXT: global_store_dword v[0:1], v0, off 1182; GFX900-MUBUF-NEXT: s_waitcnt vmcnt(0) 1183; GFX900-MUBUF-NEXT: s_setpc_b64 s[30:31] 1184; 1185; GFX906-LABEL: load_private_lo_v2i16_reglo_vreg: 1186; GFX906: ; %bb.0: ; %entry 1187; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1188; GFX906-NEXT: buffer_load_ushort v1, off, s[0:3], s32 offset:4094 1189; GFX906-NEXT: v_mov_b32_e32 v2, 0xffff 1190; GFX906-NEXT: s_waitcnt vmcnt(0) 1191; GFX906-NEXT: v_bfi_b32 v0, v2, v1, v0 1192; GFX906-NEXT: global_store_dword v[0:1], v0, off 1193; GFX906-NEXT: s_waitcnt vmcnt(0) 1194; GFX906-NEXT: s_setpc_b64 s[30:31] 1195; 1196; GFX803-LABEL: load_private_lo_v2i16_reglo_vreg: 1197; GFX803: ; %bb.0: ; %entry 1198; GFX803-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1199; GFX803-NEXT: buffer_load_ushort v1, off, s[0:3], s32 offset:4094 1200; GFX803-NEXT: v_and_b32_e32 v0, 0xffff0000, v0 1201; GFX803-NEXT: s_waitcnt vmcnt(0) 1202; GFX803-NEXT: v_or_b32_e32 v0, v1, v0 1203; GFX803-NEXT: flat_store_dword v[0:1], v0 1204; GFX803-NEXT: s_waitcnt vmcnt(0) 1205; GFX803-NEXT: s_setpc_b64 s[30:31] 1206; 1207; GFX900-FLATSCR-LABEL: load_private_lo_v2i16_reglo_vreg: 1208; GFX900-FLATSCR: ; %bb.0: ; %entry 1209; GFX900-FLATSCR-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1210; GFX900-FLATSCR-NEXT: scratch_load_short_d16 v0, off, s32 offset:4094 1211; GFX900-FLATSCR-NEXT: s_waitcnt vmcnt(0) 1212; GFX900-FLATSCR-NEXT: global_store_dword v[0:1], v0, off 1213; GFX900-FLATSCR-NEXT: s_waitcnt vmcnt(0) 1214; GFX900-FLATSCR-NEXT: s_setpc_b64 s[30:31] 1215entry: 1216 %reg.bc = bitcast i32 %reg to <2 x i16> 1217 %gep = getelementptr inbounds i16, i16 addrspace(5)* %in, i64 2047 1218 %load = load i16, i16 addrspace(5)* %gep 1219 %build1 = insertelement <2 x i16> %reg.bc, i16 %load, i32 0 1220 store <2 x i16> %build1, <2 x i16> addrspace(1)* undef 1221 ret void 1222} 1223 1224define void @load_private_lo_v2i16_reghi_vreg(i16 addrspace(5)* byval(i16) %in, i16 %reg) #0 { 1225; GFX900-MUBUF-LABEL: load_private_lo_v2i16_reghi_vreg: 1226; GFX900-MUBUF: ; %bb.0: ; %entry 1227; GFX900-MUBUF-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1228; GFX900-MUBUF-NEXT: buffer_load_ushort v1, off, s[0:3], s32 offset:4094 1229; GFX900-MUBUF-NEXT: s_waitcnt vmcnt(0) 1230; GFX900-MUBUF-NEXT: v_and_b32_e32 v1, 0xffff, v1 1231; GFX900-MUBUF-NEXT: v_lshl_or_b32 v0, v0, 16, v1 1232; GFX900-MUBUF-NEXT: global_store_dword v[0:1], v0, off 1233; GFX900-MUBUF-NEXT: s_waitcnt vmcnt(0) 1234; GFX900-MUBUF-NEXT: s_setpc_b64 s[30:31] 1235; 1236; GFX906-LABEL: load_private_lo_v2i16_reghi_vreg: 1237; GFX906: ; %bb.0: ; %entry 1238; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1239; GFX906-NEXT: buffer_load_ushort v1, off, s[0:3], s32 offset:4094 1240; GFX906-NEXT: s_waitcnt vmcnt(0) 1241; GFX906-NEXT: v_and_b32_e32 v1, 0xffff, v1 1242; GFX906-NEXT: v_lshl_or_b32 v0, v0, 16, v1 1243; GFX906-NEXT: global_store_dword v[0:1], v0, off 1244; GFX906-NEXT: s_waitcnt vmcnt(0) 1245; GFX906-NEXT: s_setpc_b64 s[30:31] 1246; 1247; GFX803-LABEL: load_private_lo_v2i16_reghi_vreg: 1248; GFX803: ; %bb.0: ; %entry 1249; GFX803-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1250; GFX803-NEXT: buffer_load_ushort v1, off, s[0:3], s32 offset:4094 1251; GFX803-NEXT: v_lshlrev_b32_e32 v0, 16, v0 1252; GFX803-NEXT: s_waitcnt vmcnt(0) 1253; GFX803-NEXT: v_or_b32_e32 v0, v1, v0 1254; GFX803-NEXT: flat_store_dword v[0:1], v0 1255; GFX803-NEXT: s_waitcnt vmcnt(0) 1256; GFX803-NEXT: s_setpc_b64 s[30:31] 1257; 1258; GFX900-FLATSCR-LABEL: load_private_lo_v2i16_reghi_vreg: 1259; GFX900-FLATSCR: ; %bb.0: ; %entry 1260; GFX900-FLATSCR-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1261; GFX900-FLATSCR-NEXT: scratch_load_ushort v1, off, s32 offset:4094 1262; GFX900-FLATSCR-NEXT: s_waitcnt vmcnt(0) 1263; GFX900-FLATSCR-NEXT: v_and_b32_e32 v1, 0xffff, v1 1264; GFX900-FLATSCR-NEXT: v_lshl_or_b32 v0, v0, 16, v1 1265; GFX900-FLATSCR-NEXT: global_store_dword v[0:1], v0, off 1266; GFX900-FLATSCR-NEXT: s_waitcnt vmcnt(0) 1267; GFX900-FLATSCR-NEXT: s_setpc_b64 s[30:31] 1268entry: 1269 %gep = getelementptr inbounds i16, i16 addrspace(5)* %in, i64 2047 1270 %load = load i16, i16 addrspace(5)* %gep 1271 %build0 = insertelement <2 x i16> undef, i16 %reg, i32 1 1272 %build1 = insertelement <2 x i16> %build0, i16 %load, i32 0 1273 store <2 x i16> %build1, <2 x i16> addrspace(1)* undef 1274 ret void 1275} 1276 1277define void @load_private_lo_v2f16_reglo_vreg(half addrspace(5)* byval(half) %in, i32 %reg) #0 { 1278; GFX900-MUBUF-LABEL: load_private_lo_v2f16_reglo_vreg: 1279; GFX900-MUBUF: ; %bb.0: ; %entry 1280; GFX900-MUBUF-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1281; GFX900-MUBUF-NEXT: buffer_load_short_d16 v0, off, s[0:3], s32 offset:4094 1282; GFX900-MUBUF-NEXT: s_waitcnt vmcnt(0) 1283; GFX900-MUBUF-NEXT: global_store_dword v[0:1], v0, off 1284; GFX900-MUBUF-NEXT: s_waitcnt vmcnt(0) 1285; GFX900-MUBUF-NEXT: s_setpc_b64 s[30:31] 1286; 1287; GFX906-LABEL: load_private_lo_v2f16_reglo_vreg: 1288; GFX906: ; %bb.0: ; %entry 1289; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1290; GFX906-NEXT: buffer_load_ushort v1, off, s[0:3], s32 offset:4094 1291; GFX906-NEXT: v_lshrrev_b32_e32 v0, 16, v0 1292; GFX906-NEXT: s_waitcnt vmcnt(0) 1293; GFX906-NEXT: v_and_b32_e32 v1, 0xffff, v1 1294; GFX906-NEXT: v_lshl_or_b32 v0, v0, 16, v1 1295; GFX906-NEXT: global_store_dword v[0:1], v0, off 1296; GFX906-NEXT: s_waitcnt vmcnt(0) 1297; GFX906-NEXT: s_setpc_b64 s[30:31] 1298; 1299; GFX803-LABEL: load_private_lo_v2f16_reglo_vreg: 1300; GFX803: ; %bb.0: ; %entry 1301; GFX803-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1302; GFX803-NEXT: buffer_load_ushort v1, off, s[0:3], s32 offset:4094 1303; GFX803-NEXT: v_and_b32_e32 v0, 0xffff0000, v0 1304; GFX803-NEXT: s_waitcnt vmcnt(0) 1305; GFX803-NEXT: v_or_b32_e32 v0, v1, v0 1306; GFX803-NEXT: flat_store_dword v[0:1], v0 1307; GFX803-NEXT: s_waitcnt vmcnt(0) 1308; GFX803-NEXT: s_setpc_b64 s[30:31] 1309; 1310; GFX900-FLATSCR-LABEL: load_private_lo_v2f16_reglo_vreg: 1311; GFX900-FLATSCR: ; %bb.0: ; %entry 1312; GFX900-FLATSCR-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1313; GFX900-FLATSCR-NEXT: scratch_load_short_d16 v0, off, s32 offset:4094 1314; GFX900-FLATSCR-NEXT: s_waitcnt vmcnt(0) 1315; GFX900-FLATSCR-NEXT: global_store_dword v[0:1], v0, off 1316; GFX900-FLATSCR-NEXT: s_waitcnt vmcnt(0) 1317; GFX900-FLATSCR-NEXT: s_setpc_b64 s[30:31] 1318entry: 1319 %reg.bc = bitcast i32 %reg to <2 x half> 1320 %gep = getelementptr inbounds half, half addrspace(5)* %in, i64 2047 1321 %load = load half, half addrspace(5)* %gep 1322 %build1 = insertelement <2 x half> %reg.bc, half %load, i32 0 1323 store <2 x half> %build1, <2 x half> addrspace(1)* undef 1324 ret void 1325} 1326 1327define void @load_private_lo_v2i16_reglo_vreg_nooff(i16 addrspace(5)* %in, i32 %reg) #0 { 1328; GFX900-MUBUF-LABEL: load_private_lo_v2i16_reglo_vreg_nooff: 1329; GFX900-MUBUF: ; %bb.0: ; %entry 1330; GFX900-MUBUF-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1331; GFX900-MUBUF-NEXT: buffer_load_short_d16 v1, off, s[0:3], 0 offset:4094 glc 1332; GFX900-MUBUF-NEXT: s_waitcnt vmcnt(0) 1333; GFX900-MUBUF-NEXT: global_store_dword v[0:1], v1, off 1334; GFX900-MUBUF-NEXT: s_waitcnt vmcnt(0) 1335; GFX900-MUBUF-NEXT: s_setpc_b64 s[30:31] 1336; 1337; GFX906-LABEL: load_private_lo_v2i16_reglo_vreg_nooff: 1338; GFX906: ; %bb.0: ; %entry 1339; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1340; GFX906-NEXT: buffer_load_ushort v0, off, s[0:3], 0 offset:4094 glc 1341; GFX906-NEXT: s_waitcnt vmcnt(0) 1342; GFX906-NEXT: v_mov_b32_e32 v2, 0xffff 1343; GFX906-NEXT: v_bfi_b32 v0, v2, v0, v1 1344; GFX906-NEXT: global_store_dword v[0:1], v0, off 1345; GFX906-NEXT: s_waitcnt vmcnt(0) 1346; GFX906-NEXT: s_setpc_b64 s[30:31] 1347; 1348; GFX803-LABEL: load_private_lo_v2i16_reglo_vreg_nooff: 1349; GFX803: ; %bb.0: ; %entry 1350; GFX803-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1351; GFX803-NEXT: buffer_load_ushort v0, off, s[0:3], 0 offset:4094 glc 1352; GFX803-NEXT: s_waitcnt vmcnt(0) 1353; GFX803-NEXT: v_and_b32_e32 v1, 0xffff0000, v1 1354; GFX803-NEXT: v_or_b32_e32 v0, v0, v1 1355; GFX803-NEXT: flat_store_dword v[0:1], v0 1356; GFX803-NEXT: s_waitcnt vmcnt(0) 1357; GFX803-NEXT: s_setpc_b64 s[30:31] 1358; 1359; GFX900-FLATSCR-LABEL: load_private_lo_v2i16_reglo_vreg_nooff: 1360; GFX900-FLATSCR: ; %bb.0: ; %entry 1361; GFX900-FLATSCR-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1362; GFX900-FLATSCR-NEXT: s_movk_i32 s0, 0xffe 1363; GFX900-FLATSCR-NEXT: scratch_load_short_d16 v1, off, s0 glc 1364; GFX900-FLATSCR-NEXT: s_waitcnt vmcnt(0) 1365; GFX900-FLATSCR-NEXT: global_store_dword v[0:1], v1, off 1366; GFX900-FLATSCR-NEXT: s_waitcnt vmcnt(0) 1367; GFX900-FLATSCR-NEXT: s_setpc_b64 s[30:31] 1368entry: 1369 %reg.bc = bitcast i32 %reg to <2 x i16> 1370 %load = load volatile i16, i16 addrspace(5)* inttoptr (i32 4094 to i16 addrspace(5)*) 1371 %build1 = insertelement <2 x i16> %reg.bc, i16 %load, i32 0 1372 store <2 x i16> %build1, <2 x i16> addrspace(1)* undef 1373 ret void 1374} 1375 1376define void @load_private_lo_v2i16_reghi_vreg_nooff(i16 addrspace(5)* %in, i32 %reg) #0 { 1377; GFX900-MUBUF-LABEL: load_private_lo_v2i16_reghi_vreg_nooff: 1378; GFX900-MUBUF: ; %bb.0: ; %entry 1379; GFX900-MUBUF-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1380; GFX900-MUBUF-NEXT: buffer_load_short_d16 v1, off, s[0:3], 0 offset:4094 glc 1381; GFX900-MUBUF-NEXT: s_waitcnt vmcnt(0) 1382; GFX900-MUBUF-NEXT: global_store_dword v[0:1], v1, off 1383; GFX900-MUBUF-NEXT: s_waitcnt vmcnt(0) 1384; GFX900-MUBUF-NEXT: s_setpc_b64 s[30:31] 1385; 1386; GFX906-LABEL: load_private_lo_v2i16_reghi_vreg_nooff: 1387; GFX906: ; %bb.0: ; %entry 1388; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1389; GFX906-NEXT: buffer_load_ushort v0, off, s[0:3], 0 offset:4094 glc 1390; GFX906-NEXT: s_waitcnt vmcnt(0) 1391; GFX906-NEXT: v_mov_b32_e32 v2, 0xffff 1392; GFX906-NEXT: v_bfi_b32 v0, v2, v0, v1 1393; GFX906-NEXT: global_store_dword v[0:1], v0, off 1394; GFX906-NEXT: s_waitcnt vmcnt(0) 1395; GFX906-NEXT: s_setpc_b64 s[30:31] 1396; 1397; GFX803-LABEL: load_private_lo_v2i16_reghi_vreg_nooff: 1398; GFX803: ; %bb.0: ; %entry 1399; GFX803-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1400; GFX803-NEXT: buffer_load_ushort v0, off, s[0:3], 0 offset:4094 glc 1401; GFX803-NEXT: s_waitcnt vmcnt(0) 1402; GFX803-NEXT: v_and_b32_e32 v1, 0xffff0000, v1 1403; GFX803-NEXT: v_or_b32_e32 v0, v0, v1 1404; GFX803-NEXT: flat_store_dword v[0:1], v0 1405; GFX803-NEXT: s_waitcnt vmcnt(0) 1406; GFX803-NEXT: s_setpc_b64 s[30:31] 1407; 1408; GFX900-FLATSCR-LABEL: load_private_lo_v2i16_reghi_vreg_nooff: 1409; GFX900-FLATSCR: ; %bb.0: ; %entry 1410; GFX900-FLATSCR-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1411; GFX900-FLATSCR-NEXT: s_movk_i32 s0, 0xffe 1412; GFX900-FLATSCR-NEXT: scratch_load_short_d16 v1, off, s0 glc 1413; GFX900-FLATSCR-NEXT: s_waitcnt vmcnt(0) 1414; GFX900-FLATSCR-NEXT: global_store_dword v[0:1], v1, off 1415; GFX900-FLATSCR-NEXT: s_waitcnt vmcnt(0) 1416; GFX900-FLATSCR-NEXT: s_setpc_b64 s[30:31] 1417entry: 1418 %reg.bc = bitcast i32 %reg to <2 x i16> 1419 %load = load volatile i16, i16 addrspace(5)* inttoptr (i32 4094 to i16 addrspace(5)*) 1420 %build1 = insertelement <2 x i16> %reg.bc, i16 %load, i32 0 1421 store <2 x i16> %build1, <2 x i16> addrspace(1)* undef 1422 ret void 1423} 1424 1425define void @load_private_lo_v2f16_reglo_vreg_nooff(half addrspace(5)* %in, i32 %reg) #0 { 1426; GFX900-MUBUF-LABEL: load_private_lo_v2f16_reglo_vreg_nooff: 1427; GFX900-MUBUF: ; %bb.0: ; %entry 1428; GFX900-MUBUF-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1429; GFX900-MUBUF-NEXT: buffer_load_short_d16 v1, off, s[0:3], 0 offset:4094 glc 1430; GFX900-MUBUF-NEXT: s_waitcnt vmcnt(0) 1431; GFX900-MUBUF-NEXT: global_store_dword v[0:1], v1, off 1432; GFX900-MUBUF-NEXT: s_waitcnt vmcnt(0) 1433; GFX900-MUBUF-NEXT: s_setpc_b64 s[30:31] 1434; 1435; GFX906-LABEL: load_private_lo_v2f16_reglo_vreg_nooff: 1436; GFX906: ; %bb.0: ; %entry 1437; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1438; GFX906-NEXT: buffer_load_ushort v0, off, s[0:3], 0 offset:4094 glc 1439; GFX906-NEXT: s_waitcnt vmcnt(0) 1440; GFX906-NEXT: v_lshrrev_b32_e32 v1, 16, v1 1441; GFX906-NEXT: v_and_b32_e32 v0, 0xffff, v0 1442; GFX906-NEXT: v_lshl_or_b32 v0, v1, 16, v0 1443; GFX906-NEXT: global_store_dword v[0:1], v0, off 1444; GFX906-NEXT: s_waitcnt vmcnt(0) 1445; GFX906-NEXT: s_setpc_b64 s[30:31] 1446; 1447; GFX803-LABEL: load_private_lo_v2f16_reglo_vreg_nooff: 1448; GFX803: ; %bb.0: ; %entry 1449; GFX803-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1450; GFX803-NEXT: buffer_load_ushort v0, off, s[0:3], 0 offset:4094 glc 1451; GFX803-NEXT: s_waitcnt vmcnt(0) 1452; GFX803-NEXT: v_and_b32_e32 v1, 0xffff0000, v1 1453; GFX803-NEXT: v_or_b32_e32 v0, v0, v1 1454; GFX803-NEXT: flat_store_dword v[0:1], v0 1455; GFX803-NEXT: s_waitcnt vmcnt(0) 1456; GFX803-NEXT: s_setpc_b64 s[30:31] 1457; 1458; GFX900-FLATSCR-LABEL: load_private_lo_v2f16_reglo_vreg_nooff: 1459; GFX900-FLATSCR: ; %bb.0: ; %entry 1460; GFX900-FLATSCR-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1461; GFX900-FLATSCR-NEXT: s_movk_i32 s0, 0xffe 1462; GFX900-FLATSCR-NEXT: scratch_load_short_d16 v1, off, s0 glc 1463; GFX900-FLATSCR-NEXT: s_waitcnt vmcnt(0) 1464; GFX900-FLATSCR-NEXT: global_store_dword v[0:1], v1, off 1465; GFX900-FLATSCR-NEXT: s_waitcnt vmcnt(0) 1466; GFX900-FLATSCR-NEXT: s_setpc_b64 s[30:31] 1467entry: 1468 %reg.bc = bitcast i32 %reg to <2 x half> 1469 %load = load volatile half, half addrspace(5)* inttoptr (i32 4094 to half addrspace(5)*) 1470 %build1 = insertelement <2 x half> %reg.bc, half %load, i32 0 1471 store <2 x half> %build1, <2 x half> addrspace(1)* undef 1472 ret void 1473} 1474 1475define void @load_private_lo_v2i16_reglo_vreg_zexti8(i8 addrspace(5)* byval(i8) %in, i32 %reg) #0 { 1476; GFX900-MUBUF-LABEL: load_private_lo_v2i16_reglo_vreg_zexti8: 1477; GFX900-MUBUF: ; %bb.0: ; %entry 1478; GFX900-MUBUF-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1479; GFX900-MUBUF-NEXT: buffer_load_ubyte_d16 v0, off, s[0:3], s32 offset:4095 1480; GFX900-MUBUF-NEXT: s_waitcnt vmcnt(0) 1481; GFX900-MUBUF-NEXT: global_store_dword v[0:1], v0, off 1482; GFX900-MUBUF-NEXT: s_waitcnt vmcnt(0) 1483; GFX900-MUBUF-NEXT: s_setpc_b64 s[30:31] 1484; 1485; GFX906-LABEL: load_private_lo_v2i16_reglo_vreg_zexti8: 1486; GFX906: ; %bb.0: ; %entry 1487; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1488; GFX906-NEXT: buffer_load_ubyte v1, off, s[0:3], s32 offset:4095 1489; GFX906-NEXT: v_mov_b32_e32 v2, 0xffff 1490; GFX906-NEXT: s_waitcnt vmcnt(0) 1491; GFX906-NEXT: v_bfi_b32 v0, v2, v1, v0 1492; GFX906-NEXT: global_store_dword v[0:1], v0, off 1493; GFX906-NEXT: s_waitcnt vmcnt(0) 1494; GFX906-NEXT: s_setpc_b64 s[30:31] 1495; 1496; GFX803-LABEL: load_private_lo_v2i16_reglo_vreg_zexti8: 1497; GFX803: ; %bb.0: ; %entry 1498; GFX803-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1499; GFX803-NEXT: buffer_load_ubyte v1, off, s[0:3], s32 offset:4095 1500; GFX803-NEXT: v_and_b32_e32 v0, 0xffff0000, v0 1501; GFX803-NEXT: s_waitcnt vmcnt(0) 1502; GFX803-NEXT: v_or_b32_e32 v0, v1, v0 1503; GFX803-NEXT: flat_store_dword v[0:1], v0 1504; GFX803-NEXT: s_waitcnt vmcnt(0) 1505; GFX803-NEXT: s_setpc_b64 s[30:31] 1506; 1507; GFX900-FLATSCR-LABEL: load_private_lo_v2i16_reglo_vreg_zexti8: 1508; GFX900-FLATSCR: ; %bb.0: ; %entry 1509; GFX900-FLATSCR-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1510; GFX900-FLATSCR-NEXT: scratch_load_ubyte_d16 v0, off, s32 offset:4095 1511; GFX900-FLATSCR-NEXT: s_waitcnt vmcnt(0) 1512; GFX900-FLATSCR-NEXT: global_store_dword v[0:1], v0, off 1513; GFX900-FLATSCR-NEXT: s_waitcnt vmcnt(0) 1514; GFX900-FLATSCR-NEXT: s_setpc_b64 s[30:31] 1515entry: 1516 %reg.bc = bitcast i32 %reg to <2 x i16> 1517 %gep = getelementptr inbounds i8, i8 addrspace(5)* %in, i64 4095 1518 %load = load i8, i8 addrspace(5)* %gep 1519 %ext = zext i8 %load to i16 1520 %build1 = insertelement <2 x i16> %reg.bc, i16 %ext, i32 0 1521 store <2 x i16> %build1, <2 x i16> addrspace(1)* undef 1522 ret void 1523} 1524 1525define void @load_private_lo_v2i16_reglo_vreg_sexti8(i8 addrspace(5)* byval(i8) %in, i32 %reg) #0 { 1526; GFX900-MUBUF-LABEL: load_private_lo_v2i16_reglo_vreg_sexti8: 1527; GFX900-MUBUF: ; %bb.0: ; %entry 1528; GFX900-MUBUF-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1529; GFX900-MUBUF-NEXT: buffer_load_sbyte_d16 v0, off, s[0:3], s32 offset:4095 1530; GFX900-MUBUF-NEXT: s_waitcnt vmcnt(0) 1531; GFX900-MUBUF-NEXT: global_store_dword v[0:1], v0, off 1532; GFX900-MUBUF-NEXT: s_waitcnt vmcnt(0) 1533; GFX900-MUBUF-NEXT: s_setpc_b64 s[30:31] 1534; 1535; GFX906-LABEL: load_private_lo_v2i16_reglo_vreg_sexti8: 1536; GFX906: ; %bb.0: ; %entry 1537; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1538; GFX906-NEXT: buffer_load_sbyte v1, off, s[0:3], s32 offset:4095 1539; GFX906-NEXT: v_mov_b32_e32 v2, 0xffff 1540; GFX906-NEXT: s_waitcnt vmcnt(0) 1541; GFX906-NEXT: v_bfi_b32 v0, v2, v1, v0 1542; GFX906-NEXT: global_store_dword v[0:1], v0, off 1543; GFX906-NEXT: s_waitcnt vmcnt(0) 1544; GFX906-NEXT: s_setpc_b64 s[30:31] 1545; 1546; GFX803-LABEL: load_private_lo_v2i16_reglo_vreg_sexti8: 1547; GFX803: ; %bb.0: ; %entry 1548; GFX803-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1549; GFX803-NEXT: buffer_load_sbyte v1, off, s[0:3], s32 offset:4095 1550; GFX803-NEXT: v_and_b32_e32 v0, 0xffff0000, v0 1551; GFX803-NEXT: s_waitcnt vmcnt(0) 1552; GFX803-NEXT: v_or_b32_sdwa v0, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD 1553; GFX803-NEXT: flat_store_dword v[0:1], v0 1554; GFX803-NEXT: s_waitcnt vmcnt(0) 1555; GFX803-NEXT: s_setpc_b64 s[30:31] 1556; 1557; GFX900-FLATSCR-LABEL: load_private_lo_v2i16_reglo_vreg_sexti8: 1558; GFX900-FLATSCR: ; %bb.0: ; %entry 1559; GFX900-FLATSCR-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1560; GFX900-FLATSCR-NEXT: scratch_load_sbyte_d16 v0, off, s32 offset:4095 1561; GFX900-FLATSCR-NEXT: s_waitcnt vmcnt(0) 1562; GFX900-FLATSCR-NEXT: global_store_dword v[0:1], v0, off 1563; GFX900-FLATSCR-NEXT: s_waitcnt vmcnt(0) 1564; GFX900-FLATSCR-NEXT: s_setpc_b64 s[30:31] 1565entry: 1566 %reg.bc = bitcast i32 %reg to <2 x i16> 1567 %gep = getelementptr inbounds i8, i8 addrspace(5)* %in, i64 4095 1568 %load = load i8, i8 addrspace(5)* %gep 1569 %ext = sext i8 %load to i16 1570 %build1 = insertelement <2 x i16> %reg.bc, i16 %ext, i32 0 1571 store <2 x i16> %build1, <2 x i16> addrspace(1)* undef 1572 ret void 1573} 1574 1575define void @load_private_lo_v2i16_reglo_vreg_nooff_zexti8(i8 addrspace(5)* %in, i32 %reg) #0 { 1576; GFX900-MUBUF-LABEL: load_private_lo_v2i16_reglo_vreg_nooff_zexti8: 1577; GFX900-MUBUF: ; %bb.0: ; %entry 1578; GFX900-MUBUF-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1579; GFX900-MUBUF-NEXT: buffer_load_ubyte_d16 v1, off, s[0:3], 0 offset:4094 glc 1580; GFX900-MUBUF-NEXT: s_waitcnt vmcnt(0) 1581; GFX900-MUBUF-NEXT: global_store_dword v[0:1], v1, off 1582; GFX900-MUBUF-NEXT: s_waitcnt vmcnt(0) 1583; GFX900-MUBUF-NEXT: s_setpc_b64 s[30:31] 1584; 1585; GFX906-LABEL: load_private_lo_v2i16_reglo_vreg_nooff_zexti8: 1586; GFX906: ; %bb.0: ; %entry 1587; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1588; GFX906-NEXT: buffer_load_ubyte v0, off, s[0:3], 0 offset:4094 glc 1589; GFX906-NEXT: s_waitcnt vmcnt(0) 1590; GFX906-NEXT: v_mov_b32_e32 v2, 0xffff 1591; GFX906-NEXT: v_bfi_b32 v0, v2, v0, v1 1592; GFX906-NEXT: global_store_dword v[0:1], v0, off 1593; GFX906-NEXT: s_waitcnt vmcnt(0) 1594; GFX906-NEXT: s_setpc_b64 s[30:31] 1595; 1596; GFX803-LABEL: load_private_lo_v2i16_reglo_vreg_nooff_zexti8: 1597; GFX803: ; %bb.0: ; %entry 1598; GFX803-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1599; GFX803-NEXT: buffer_load_ubyte v0, off, s[0:3], 0 offset:4094 glc 1600; GFX803-NEXT: s_waitcnt vmcnt(0) 1601; GFX803-NEXT: v_and_b32_e32 v1, 0xffff0000, v1 1602; GFX803-NEXT: v_or_b32_e32 v0, v0, v1 1603; GFX803-NEXT: flat_store_dword v[0:1], v0 1604; GFX803-NEXT: s_waitcnt vmcnt(0) 1605; GFX803-NEXT: s_setpc_b64 s[30:31] 1606; 1607; GFX900-FLATSCR-LABEL: load_private_lo_v2i16_reglo_vreg_nooff_zexti8: 1608; GFX900-FLATSCR: ; %bb.0: ; %entry 1609; GFX900-FLATSCR-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1610; GFX900-FLATSCR-NEXT: s_movk_i32 s0, 0xffe 1611; GFX900-FLATSCR-NEXT: scratch_load_ubyte_d16 v1, off, s0 glc 1612; GFX900-FLATSCR-NEXT: s_waitcnt vmcnt(0) 1613; GFX900-FLATSCR-NEXT: global_store_dword v[0:1], v1, off 1614; GFX900-FLATSCR-NEXT: s_waitcnt vmcnt(0) 1615; GFX900-FLATSCR-NEXT: s_setpc_b64 s[30:31] 1616entry: 1617 %reg.bc = bitcast i32 %reg to <2 x i16> 1618 %load = load volatile i8, i8 addrspace(5)* inttoptr (i32 4094 to i8 addrspace(5)*) 1619 %ext = zext i8 %load to i16 1620 %build1 = insertelement <2 x i16> %reg.bc, i16 %ext, i32 0 1621 store <2 x i16> %build1, <2 x i16> addrspace(1)* undef 1622 ret void 1623} 1624 1625define void @load_private_lo_v2i16_reglo_vreg_nooff_sexti8(i8 addrspace(5)* %in, i32 %reg) #0 { 1626; GFX900-MUBUF-LABEL: load_private_lo_v2i16_reglo_vreg_nooff_sexti8: 1627; GFX900-MUBUF: ; %bb.0: ; %entry 1628; GFX900-MUBUF-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1629; GFX900-MUBUF-NEXT: buffer_load_sbyte_d16 v1, off, s[0:3], 0 offset:4094 glc 1630; GFX900-MUBUF-NEXT: s_waitcnt vmcnt(0) 1631; GFX900-MUBUF-NEXT: global_store_dword v[0:1], v1, off 1632; GFX900-MUBUF-NEXT: s_waitcnt vmcnt(0) 1633; GFX900-MUBUF-NEXT: s_setpc_b64 s[30:31] 1634; 1635; GFX906-LABEL: load_private_lo_v2i16_reglo_vreg_nooff_sexti8: 1636; GFX906: ; %bb.0: ; %entry 1637; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1638; GFX906-NEXT: buffer_load_sbyte v0, off, s[0:3], 0 offset:4094 glc 1639; GFX906-NEXT: s_waitcnt vmcnt(0) 1640; GFX906-NEXT: v_mov_b32_e32 v2, 0xffff 1641; GFX906-NEXT: v_bfi_b32 v0, v2, v0, v1 1642; GFX906-NEXT: global_store_dword v[0:1], v0, off 1643; GFX906-NEXT: s_waitcnt vmcnt(0) 1644; GFX906-NEXT: s_setpc_b64 s[30:31] 1645; 1646; GFX803-LABEL: load_private_lo_v2i16_reglo_vreg_nooff_sexti8: 1647; GFX803: ; %bb.0: ; %entry 1648; GFX803-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1649; GFX803-NEXT: buffer_load_sbyte v0, off, s[0:3], 0 offset:4094 glc 1650; GFX803-NEXT: s_waitcnt vmcnt(0) 1651; GFX803-NEXT: v_and_b32_e32 v1, 0xffff0000, v1 1652; GFX803-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD 1653; GFX803-NEXT: flat_store_dword v[0:1], v0 1654; GFX803-NEXT: s_waitcnt vmcnt(0) 1655; GFX803-NEXT: s_setpc_b64 s[30:31] 1656; 1657; GFX900-FLATSCR-LABEL: load_private_lo_v2i16_reglo_vreg_nooff_sexti8: 1658; GFX900-FLATSCR: ; %bb.0: ; %entry 1659; GFX900-FLATSCR-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1660; GFX900-FLATSCR-NEXT: s_movk_i32 s0, 0xffe 1661; GFX900-FLATSCR-NEXT: scratch_load_sbyte_d16 v1, off, s0 glc 1662; GFX900-FLATSCR-NEXT: s_waitcnt vmcnt(0) 1663; GFX900-FLATSCR-NEXT: global_store_dword v[0:1], v1, off 1664; GFX900-FLATSCR-NEXT: s_waitcnt vmcnt(0) 1665; GFX900-FLATSCR-NEXT: s_setpc_b64 s[30:31] 1666entry: 1667 %reg.bc = bitcast i32 %reg to <2 x i16> 1668 %load = load volatile i8, i8 addrspace(5)* inttoptr (i32 4094 to i8 addrspace(5)*) 1669 %ext = sext i8 %load to i16 1670 %build1 = insertelement <2 x i16> %reg.bc, i16 %ext, i32 0 1671 store <2 x i16> %build1, <2 x i16> addrspace(1)* undef 1672 ret void 1673} 1674 1675define void @load_private_lo_v2f16_reglo_vreg_nooff_zexti8(i8 addrspace(5)* %in, i32 %reg) #0 { 1676; GFX900-MUBUF-LABEL: load_private_lo_v2f16_reglo_vreg_nooff_zexti8: 1677; GFX900-MUBUF: ; %bb.0: ; %entry 1678; GFX900-MUBUF-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1679; GFX900-MUBUF-NEXT: buffer_load_ubyte_d16 v1, off, s[0:3], 0 offset:4094 glc 1680; GFX900-MUBUF-NEXT: s_waitcnt vmcnt(0) 1681; GFX900-MUBUF-NEXT: global_store_dword v[0:1], v1, off 1682; GFX900-MUBUF-NEXT: s_waitcnt vmcnt(0) 1683; GFX900-MUBUF-NEXT: s_setpc_b64 s[30:31] 1684; 1685; GFX906-LABEL: load_private_lo_v2f16_reglo_vreg_nooff_zexti8: 1686; GFX906: ; %bb.0: ; %entry 1687; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1688; GFX906-NEXT: buffer_load_ubyte v0, off, s[0:3], 0 offset:4094 glc 1689; GFX906-NEXT: s_waitcnt vmcnt(0) 1690; GFX906-NEXT: v_lshrrev_b32_e32 v1, 16, v1 1691; GFX906-NEXT: v_and_b32_e32 v0, 0xffff, v0 1692; GFX906-NEXT: v_lshl_or_b32 v0, v1, 16, v0 1693; GFX906-NEXT: global_store_dword v[0:1], v0, off 1694; GFX906-NEXT: s_waitcnt vmcnt(0) 1695; GFX906-NEXT: s_setpc_b64 s[30:31] 1696; 1697; GFX803-LABEL: load_private_lo_v2f16_reglo_vreg_nooff_zexti8: 1698; GFX803: ; %bb.0: ; %entry 1699; GFX803-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1700; GFX803-NEXT: buffer_load_ubyte v0, off, s[0:3], 0 offset:4094 glc 1701; GFX803-NEXT: s_waitcnt vmcnt(0) 1702; GFX803-NEXT: v_and_b32_e32 v1, 0xffff0000, v1 1703; GFX803-NEXT: v_or_b32_e32 v0, v0, v1 1704; GFX803-NEXT: flat_store_dword v[0:1], v0 1705; GFX803-NEXT: s_waitcnt vmcnt(0) 1706; GFX803-NEXT: s_setpc_b64 s[30:31] 1707; 1708; GFX900-FLATSCR-LABEL: load_private_lo_v2f16_reglo_vreg_nooff_zexti8: 1709; GFX900-FLATSCR: ; %bb.0: ; %entry 1710; GFX900-FLATSCR-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1711; GFX900-FLATSCR-NEXT: s_movk_i32 s0, 0xffe 1712; GFX900-FLATSCR-NEXT: scratch_load_ubyte_d16 v1, off, s0 glc 1713; GFX900-FLATSCR-NEXT: s_waitcnt vmcnt(0) 1714; GFX900-FLATSCR-NEXT: global_store_dword v[0:1], v1, off 1715; GFX900-FLATSCR-NEXT: s_waitcnt vmcnt(0) 1716; GFX900-FLATSCR-NEXT: s_setpc_b64 s[30:31] 1717entry: 1718 %reg.bc = bitcast i32 %reg to <2 x half> 1719 %load = load volatile i8, i8 addrspace(5)* inttoptr (i32 4094 to i8 addrspace(5)*) 1720 %ext = zext i8 %load to i16 1721 %bc.ext = bitcast i16 %ext to half 1722 %build1 = insertelement <2 x half> %reg.bc, half %bc.ext, i32 0 1723 store <2 x half> %build1, <2 x half> addrspace(1)* undef 1724 ret void 1725} 1726 1727define void @load_constant_lo_v2i16_reglo_vreg(i16 addrspace(4)* %in, i32 %reg) #0 { 1728; GFX900-LABEL: load_constant_lo_v2i16_reglo_vreg: 1729; GFX900: ; %bb.0: ; %entry 1730; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1731; GFX900-NEXT: global_load_short_d16 v2, v[0:1], off offset:-4094 1732; GFX900-NEXT: s_waitcnt vmcnt(0) 1733; GFX900-NEXT: global_store_dword v[0:1], v2, off 1734; GFX900-NEXT: s_waitcnt vmcnt(0) 1735; GFX900-NEXT: s_setpc_b64 s[30:31] 1736; 1737; GFX906-LABEL: load_constant_lo_v2i16_reglo_vreg: 1738; GFX906: ; %bb.0: ; %entry 1739; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1740; GFX906-NEXT: global_load_ushort v0, v[0:1], off offset:-4094 1741; GFX906-NEXT: v_mov_b32_e32 v1, 0xffff 1742; GFX906-NEXT: s_waitcnt vmcnt(0) 1743; GFX906-NEXT: v_bfi_b32 v0, v1, v0, v2 1744; GFX906-NEXT: global_store_dword v[0:1], v0, off 1745; GFX906-NEXT: s_waitcnt vmcnt(0) 1746; GFX906-NEXT: s_setpc_b64 s[30:31] 1747; 1748; GFX803-LABEL: load_constant_lo_v2i16_reglo_vreg: 1749; GFX803: ; %bb.0: ; %entry 1750; GFX803-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1751; GFX803-NEXT: v_add_u32_e32 v0, vcc, 0xfffff002, v0 1752; GFX803-NEXT: v_addc_u32_e32 v1, vcc, -1, v1, vcc 1753; GFX803-NEXT: flat_load_ushort v0, v[0:1] 1754; GFX803-NEXT: v_and_b32_e32 v1, 0xffff0000, v2 1755; GFX803-NEXT: s_waitcnt vmcnt(0) 1756; GFX803-NEXT: v_or_b32_e32 v0, v0, v1 1757; GFX803-NEXT: flat_store_dword v[0:1], v0 1758; GFX803-NEXT: s_waitcnt vmcnt(0) 1759; GFX803-NEXT: s_setpc_b64 s[30:31] 1760entry: 1761 %reg.bc = bitcast i32 %reg to <2 x i16> 1762 %gep = getelementptr inbounds i16, i16 addrspace(4)* %in, i64 -2047 1763 %load = load i16, i16 addrspace(4)* %gep 1764 %build1 = insertelement <2 x i16> %reg.bc, i16 %load, i32 0 1765 store <2 x i16> %build1, <2 x i16> addrspace(1)* undef 1766 ret void 1767} 1768 1769define void @load_constant_lo_v2f16_reglo_vreg(half addrspace(4)* %in, i32 %reg) #0 { 1770; GFX900-LABEL: load_constant_lo_v2f16_reglo_vreg: 1771; GFX900: ; %bb.0: ; %entry 1772; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1773; GFX900-NEXT: global_load_short_d16 v2, v[0:1], off offset:-4094 1774; GFX900-NEXT: s_waitcnt vmcnt(0) 1775; GFX900-NEXT: global_store_dword v[0:1], v2, off 1776; GFX900-NEXT: s_waitcnt vmcnt(0) 1777; GFX900-NEXT: s_setpc_b64 s[30:31] 1778; 1779; GFX906-LABEL: load_constant_lo_v2f16_reglo_vreg: 1780; GFX906: ; %bb.0: ; %entry 1781; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1782; GFX906-NEXT: global_load_ushort v0, v[0:1], off offset:-4094 1783; GFX906-NEXT: v_lshrrev_b32_e32 v1, 16, v2 1784; GFX906-NEXT: s_waitcnt vmcnt(0) 1785; GFX906-NEXT: v_and_b32_e32 v0, 0xffff, v0 1786; GFX906-NEXT: v_lshl_or_b32 v0, v1, 16, v0 1787; GFX906-NEXT: global_store_dword v[0:1], v0, off 1788; GFX906-NEXT: s_waitcnt vmcnt(0) 1789; GFX906-NEXT: s_setpc_b64 s[30:31] 1790; 1791; GFX803-LABEL: load_constant_lo_v2f16_reglo_vreg: 1792; GFX803: ; %bb.0: ; %entry 1793; GFX803-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1794; GFX803-NEXT: v_add_u32_e32 v0, vcc, 0xfffff002, v0 1795; GFX803-NEXT: v_addc_u32_e32 v1, vcc, -1, v1, vcc 1796; GFX803-NEXT: flat_load_ushort v0, v[0:1] 1797; GFX803-NEXT: v_and_b32_e32 v1, 0xffff0000, v2 1798; GFX803-NEXT: s_waitcnt vmcnt(0) 1799; GFX803-NEXT: v_or_b32_e32 v0, v0, v1 1800; GFX803-NEXT: flat_store_dword v[0:1], v0 1801; GFX803-NEXT: s_waitcnt vmcnt(0) 1802; GFX803-NEXT: s_setpc_b64 s[30:31] 1803entry: 1804 %reg.bc = bitcast i32 %reg to <2 x half> 1805 %gep = getelementptr inbounds half, half addrspace(4)* %in, i64 -2047 1806 %load = load half, half addrspace(4)* %gep 1807 %build1 = insertelement <2 x half> %reg.bc, half %load, i32 0 1808 store <2 x half> %build1, <2 x half> addrspace(1)* undef 1809 ret void 1810} 1811 1812define void @load_constant_lo_v2f16_reglo_vreg_zexti8(i8 addrspace(4)* %in, i32 %reg) #0 { 1813; GFX900-LABEL: load_constant_lo_v2f16_reglo_vreg_zexti8: 1814; GFX900: ; %bb.0: ; %entry 1815; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1816; GFX900-NEXT: global_load_ubyte_d16 v2, v[0:1], off offset:-4095 1817; GFX900-NEXT: s_waitcnt vmcnt(0) 1818; GFX900-NEXT: global_store_dword v[0:1], v2, off 1819; GFX900-NEXT: s_waitcnt vmcnt(0) 1820; GFX900-NEXT: s_setpc_b64 s[30:31] 1821; 1822; GFX906-LABEL: load_constant_lo_v2f16_reglo_vreg_zexti8: 1823; GFX906: ; %bb.0: ; %entry 1824; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1825; GFX906-NEXT: global_load_ubyte v0, v[0:1], off offset:-4095 1826; GFX906-NEXT: v_lshrrev_b32_e32 v1, 16, v2 1827; GFX906-NEXT: s_waitcnt vmcnt(0) 1828; GFX906-NEXT: v_and_b32_e32 v0, 0xffff, v0 1829; GFX906-NEXT: v_lshl_or_b32 v0, v1, 16, v0 1830; GFX906-NEXT: global_store_dword v[0:1], v0, off 1831; GFX906-NEXT: s_waitcnt vmcnt(0) 1832; GFX906-NEXT: s_setpc_b64 s[30:31] 1833; 1834; GFX803-LABEL: load_constant_lo_v2f16_reglo_vreg_zexti8: 1835; GFX803: ; %bb.0: ; %entry 1836; GFX803-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1837; GFX803-NEXT: v_add_u32_e32 v0, vcc, 0xfffff001, v0 1838; GFX803-NEXT: v_addc_u32_e32 v1, vcc, -1, v1, vcc 1839; GFX803-NEXT: flat_load_ubyte v0, v[0:1] 1840; GFX803-NEXT: v_and_b32_e32 v1, 0xffff0000, v2 1841; GFX803-NEXT: s_waitcnt vmcnt(0) 1842; GFX803-NEXT: v_or_b32_e32 v0, v0, v1 1843; GFX803-NEXT: flat_store_dword v[0:1], v0 1844; GFX803-NEXT: s_waitcnt vmcnt(0) 1845; GFX803-NEXT: s_setpc_b64 s[30:31] 1846entry: 1847 %reg.bc = bitcast i32 %reg to <2 x half> 1848 %gep = getelementptr inbounds i8, i8 addrspace(4)* %in, i64 -4095 1849 %load = load i8, i8 addrspace(4)* %gep 1850 %ext = zext i8 %load to i16 1851 %bitcast = bitcast i16 %ext to half 1852 %build1 = insertelement <2 x half> %reg.bc, half %bitcast, i32 0 1853 store <2 x half> %build1, <2 x half> addrspace(1)* undef 1854 ret void 1855} 1856 1857define void @load_constant_lo_v2f16_reglo_vreg_sexti8(i8 addrspace(4)* %in, i32 %reg) #0 { 1858; GFX900-LABEL: load_constant_lo_v2f16_reglo_vreg_sexti8: 1859; GFX900: ; %bb.0: ; %entry 1860; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1861; GFX900-NEXT: global_load_sbyte_d16 v2, v[0:1], off offset:-4095 1862; GFX900-NEXT: s_waitcnt vmcnt(0) 1863; GFX900-NEXT: global_store_dword v[0:1], v2, off 1864; GFX900-NEXT: s_waitcnt vmcnt(0) 1865; GFX900-NEXT: s_setpc_b64 s[30:31] 1866; 1867; GFX906-LABEL: load_constant_lo_v2f16_reglo_vreg_sexti8: 1868; GFX906: ; %bb.0: ; %entry 1869; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1870; GFX906-NEXT: global_load_sbyte v0, v[0:1], off offset:-4095 1871; GFX906-NEXT: v_lshrrev_b32_e32 v1, 16, v2 1872; GFX906-NEXT: s_waitcnt vmcnt(0) 1873; GFX906-NEXT: v_and_b32_e32 v0, 0xffff, v0 1874; GFX906-NEXT: v_lshl_or_b32 v0, v1, 16, v0 1875; GFX906-NEXT: global_store_dword v[0:1], v0, off 1876; GFX906-NEXT: s_waitcnt vmcnt(0) 1877; GFX906-NEXT: s_setpc_b64 s[30:31] 1878; 1879; GFX803-LABEL: load_constant_lo_v2f16_reglo_vreg_sexti8: 1880; GFX803: ; %bb.0: ; %entry 1881; GFX803-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1882; GFX803-NEXT: v_add_u32_e32 v0, vcc, 0xfffff001, v0 1883; GFX803-NEXT: v_addc_u32_e32 v1, vcc, -1, v1, vcc 1884; GFX803-NEXT: flat_load_sbyte v0, v[0:1] 1885; GFX803-NEXT: v_and_b32_e32 v1, 0xffff0000, v2 1886; GFX803-NEXT: s_waitcnt vmcnt(0) 1887; GFX803-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD 1888; GFX803-NEXT: flat_store_dword v[0:1], v0 1889; GFX803-NEXT: s_waitcnt vmcnt(0) 1890; GFX803-NEXT: s_setpc_b64 s[30:31] 1891entry: 1892 %reg.bc = bitcast i32 %reg to <2 x half> 1893 %gep = getelementptr inbounds i8, i8 addrspace(4)* %in, i64 -4095 1894 %load = load i8, i8 addrspace(4)* %gep 1895 %ext = sext i8 %load to i16 1896 %bitcast = bitcast i16 %ext to half 1897 %build1 = insertelement <2 x half> %reg.bc, half %bitcast, i32 0 1898 store <2 x half> %build1, <2 x half> addrspace(1)* undef 1899 ret void 1900} 1901 1902define void @load_private_lo_v2i16_reglo_vreg_to_offset(i32 %reg) #0 { 1903; GFX900-MUBUF-LABEL: load_private_lo_v2i16_reglo_vreg_to_offset: 1904; GFX900-MUBUF: ; %bb.0: ; %entry 1905; GFX900-MUBUF-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1906; GFX900-MUBUF-NEXT: v_mov_b32_e32 v1, 0x7b 1907; GFX900-MUBUF-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:4 1908; GFX900-MUBUF-NEXT: s_waitcnt vmcnt(0) 1909; GFX900-MUBUF-NEXT: v_mov_b32_e32 v1, 44 1910; GFX900-MUBUF-NEXT: buffer_load_short_d16 v0, v1, s[0:3], s32 offen offset:4054 glc 1911; GFX900-MUBUF-NEXT: s_waitcnt vmcnt(0) 1912; GFX900-MUBUF-NEXT: global_store_dword v[0:1], v0, off 1913; GFX900-MUBUF-NEXT: s_waitcnt vmcnt(0) 1914; GFX900-MUBUF-NEXT: s_setpc_b64 s[30:31] 1915; 1916; GFX906-LABEL: load_private_lo_v2i16_reglo_vreg_to_offset: 1917; GFX906: ; %bb.0: ; %entry 1918; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1919; GFX906-NEXT: v_mov_b32_e32 v1, 0x7b 1920; GFX906-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:4 1921; GFX906-NEXT: s_waitcnt vmcnt(0) 1922; GFX906-NEXT: v_mov_b32_e32 v3, 44 1923; GFX906-NEXT: buffer_load_ushort v1, v3, s[0:3], s32 offen offset:4054 glc 1924; GFX906-NEXT: s_waitcnt vmcnt(0) 1925; GFX906-NEXT: v_mov_b32_e32 v2, 0xffff 1926; GFX906-NEXT: v_bfi_b32 v0, v2, v1, v0 1927; GFX906-NEXT: global_store_dword v[0:1], v0, off 1928; GFX906-NEXT: s_waitcnt vmcnt(0) 1929; GFX906-NEXT: s_setpc_b64 s[30:31] 1930; 1931; GFX803-LABEL: load_private_lo_v2i16_reglo_vreg_to_offset: 1932; GFX803: ; %bb.0: ; %entry 1933; GFX803-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1934; GFX803-NEXT: v_mov_b32_e32 v1, 0x7b 1935; GFX803-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:4 1936; GFX803-NEXT: s_waitcnt vmcnt(0) 1937; GFX803-NEXT: v_mov_b32_e32 v2, 44 1938; GFX803-NEXT: buffer_load_ushort v1, v2, s[0:3], s32 offen offset:4054 glc 1939; GFX803-NEXT: s_waitcnt vmcnt(0) 1940; GFX803-NEXT: v_and_b32_e32 v0, 0xffff0000, v0 1941; GFX803-NEXT: v_or_b32_e32 v0, v1, v0 1942; GFX803-NEXT: flat_store_dword v[0:1], v0 1943; GFX803-NEXT: s_waitcnt vmcnt(0) 1944; GFX803-NEXT: s_setpc_b64 s[30:31] 1945; 1946; GFX900-FLATSCR-LABEL: load_private_lo_v2i16_reglo_vreg_to_offset: 1947; GFX900-FLATSCR: ; %bb.0: ; %entry 1948; GFX900-FLATSCR-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1949; GFX900-FLATSCR-NEXT: v_mov_b32_e32 v1, 0x7b 1950; GFX900-FLATSCR-NEXT: scratch_store_dword off, v1, s32 offset:4 1951; GFX900-FLATSCR-NEXT: s_waitcnt vmcnt(0) 1952; GFX900-FLATSCR-NEXT: s_add_i32 vcc_hi, s32, 44 1953; GFX900-FLATSCR-NEXT: scratch_load_short_d16 v0, off, vcc_hi offset:4054 glc 1954; GFX900-FLATSCR-NEXT: s_waitcnt vmcnt(0) 1955; GFX900-FLATSCR-NEXT: global_store_dword v[0:1], v0, off 1956; GFX900-FLATSCR-NEXT: s_waitcnt vmcnt(0) 1957; GFX900-FLATSCR-NEXT: s_setpc_b64 s[30:31] 1958entry: 1959 %obj0 = alloca [10 x i32], align 4, addrspace(5) 1960 %obj1 = alloca [4096 x i16], align 2, addrspace(5) 1961 %reg.bc = bitcast i32 %reg to <2 x i16> 1962 %bc = bitcast [10 x i32] addrspace(5)* %obj0 to i32 addrspace(5)* 1963 store volatile i32 123, i32 addrspace(5)* %bc 1964 %gep = getelementptr inbounds [4096 x i16], [4096 x i16] addrspace(5)* %obj1, i32 0, i32 2027 1965 %load = load volatile i16, i16 addrspace(5)* %gep 1966 %build1 = insertelement <2 x i16> %reg.bc, i16 %load, i32 0 1967 store <2 x i16> %build1, <2 x i16> addrspace(1)* undef 1968 ret void 1969} 1970 1971define void @load_private_lo_v2i16_reglo_vreg_sexti8_to_offset(i32 %reg) #0 { 1972; GFX900-MUBUF-LABEL: load_private_lo_v2i16_reglo_vreg_sexti8_to_offset: 1973; GFX900-MUBUF: ; %bb.0: ; %entry 1974; GFX900-MUBUF-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1975; GFX900-MUBUF-NEXT: v_mov_b32_e32 v1, 0x7b 1976; GFX900-MUBUF-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:4 1977; GFX900-MUBUF-NEXT: s_waitcnt vmcnt(0) 1978; GFX900-MUBUF-NEXT: v_mov_b32_e32 v1, 44 1979; GFX900-MUBUF-NEXT: buffer_load_sbyte_d16 v0, v1, s[0:3], s32 offen offset:4055 glc 1980; GFX900-MUBUF-NEXT: s_waitcnt vmcnt(0) 1981; GFX900-MUBUF-NEXT: global_store_dword v[0:1], v0, off 1982; GFX900-MUBUF-NEXT: s_waitcnt vmcnt(0) 1983; GFX900-MUBUF-NEXT: s_setpc_b64 s[30:31] 1984; 1985; GFX906-LABEL: load_private_lo_v2i16_reglo_vreg_sexti8_to_offset: 1986; GFX906: ; %bb.0: ; %entry 1987; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1988; GFX906-NEXT: v_mov_b32_e32 v1, 0x7b 1989; GFX906-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:4 1990; GFX906-NEXT: s_waitcnt vmcnt(0) 1991; GFX906-NEXT: v_mov_b32_e32 v3, 44 1992; GFX906-NEXT: buffer_load_sbyte v1, v3, s[0:3], s32 offen offset:4055 glc 1993; GFX906-NEXT: s_waitcnt vmcnt(0) 1994; GFX906-NEXT: v_mov_b32_e32 v2, 0xffff 1995; GFX906-NEXT: v_bfi_b32 v0, v2, v1, v0 1996; GFX906-NEXT: global_store_dword v[0:1], v0, off 1997; GFX906-NEXT: s_waitcnt vmcnt(0) 1998; GFX906-NEXT: s_setpc_b64 s[30:31] 1999; 2000; GFX803-LABEL: load_private_lo_v2i16_reglo_vreg_sexti8_to_offset: 2001; GFX803: ; %bb.0: ; %entry 2002; GFX803-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2003; GFX803-NEXT: v_mov_b32_e32 v1, 0x7b 2004; GFX803-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:4 2005; GFX803-NEXT: s_waitcnt vmcnt(0) 2006; GFX803-NEXT: v_mov_b32_e32 v2, 44 2007; GFX803-NEXT: buffer_load_sbyte v1, v2, s[0:3], s32 offen offset:4055 glc 2008; GFX803-NEXT: s_waitcnt vmcnt(0) 2009; GFX803-NEXT: v_and_b32_e32 v0, 0xffff0000, v0 2010; GFX803-NEXT: v_or_b32_sdwa v0, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD 2011; GFX803-NEXT: flat_store_dword v[0:1], v0 2012; GFX803-NEXT: s_waitcnt vmcnt(0) 2013; GFX803-NEXT: s_setpc_b64 s[30:31] 2014; 2015; GFX900-FLATSCR-LABEL: load_private_lo_v2i16_reglo_vreg_sexti8_to_offset: 2016; GFX900-FLATSCR: ; %bb.0: ; %entry 2017; GFX900-FLATSCR-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2018; GFX900-FLATSCR-NEXT: v_mov_b32_e32 v1, 0x7b 2019; GFX900-FLATSCR-NEXT: scratch_store_dword off, v1, s32 offset:4 2020; GFX900-FLATSCR-NEXT: s_waitcnt vmcnt(0) 2021; GFX900-FLATSCR-NEXT: s_add_i32 vcc_hi, s32, 44 2022; GFX900-FLATSCR-NEXT: scratch_load_sbyte_d16 v0, off, vcc_hi offset:4055 glc 2023; GFX900-FLATSCR-NEXT: s_waitcnt vmcnt(0) 2024; GFX900-FLATSCR-NEXT: global_store_dword v[0:1], v0, off 2025; GFX900-FLATSCR-NEXT: s_waitcnt vmcnt(0) 2026; GFX900-FLATSCR-NEXT: s_setpc_b64 s[30:31] 2027entry: 2028 %obj0 = alloca [10 x i32], align 4, addrspace(5) 2029 %obj1 = alloca [4096 x i8], align 2, addrspace(5) 2030 %reg.bc = bitcast i32 %reg to <2 x i16> 2031 %bc = bitcast [10 x i32] addrspace(5)* %obj0 to i32 addrspace(5)* 2032 store volatile i32 123, i32 addrspace(5)* %bc 2033 %gep = getelementptr inbounds [4096 x i8], [4096 x i8] addrspace(5)* %obj1, i32 0, i32 4055 2034 %load = load volatile i8, i8 addrspace(5)* %gep 2035 %load.ext = sext i8 %load to i16 2036 %build1 = insertelement <2 x i16> %reg.bc, i16 %load.ext, i32 0 2037 store <2 x i16> %build1, <2 x i16> addrspace(1)* undef 2038 ret void 2039} 2040 2041define void @load_private_lo_v2i16_reglo_vreg_zexti8_to_offset(i32 %reg) #0 { 2042; GFX900-MUBUF-LABEL: load_private_lo_v2i16_reglo_vreg_zexti8_to_offset: 2043; GFX900-MUBUF: ; %bb.0: ; %entry 2044; GFX900-MUBUF-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2045; GFX900-MUBUF-NEXT: v_mov_b32_e32 v1, 0x7b 2046; GFX900-MUBUF-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:4 2047; GFX900-MUBUF-NEXT: s_waitcnt vmcnt(0) 2048; GFX900-MUBUF-NEXT: v_mov_b32_e32 v1, 44 2049; GFX900-MUBUF-NEXT: buffer_load_ubyte_d16 v0, v1, s[0:3], s32 offen offset:4055 glc 2050; GFX900-MUBUF-NEXT: s_waitcnt vmcnt(0) 2051; GFX900-MUBUF-NEXT: global_store_dword v[0:1], v0, off 2052; GFX900-MUBUF-NEXT: s_waitcnt vmcnt(0) 2053; GFX900-MUBUF-NEXT: s_setpc_b64 s[30:31] 2054; 2055; GFX906-LABEL: load_private_lo_v2i16_reglo_vreg_zexti8_to_offset: 2056; GFX906: ; %bb.0: ; %entry 2057; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2058; GFX906-NEXT: v_mov_b32_e32 v1, 0x7b 2059; GFX906-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:4 2060; GFX906-NEXT: s_waitcnt vmcnt(0) 2061; GFX906-NEXT: v_mov_b32_e32 v3, 44 2062; GFX906-NEXT: buffer_load_ubyte v1, v3, s[0:3], s32 offen offset:4055 glc 2063; GFX906-NEXT: s_waitcnt vmcnt(0) 2064; GFX906-NEXT: v_mov_b32_e32 v2, 0xffff 2065; GFX906-NEXT: v_bfi_b32 v0, v2, v1, v0 2066; GFX906-NEXT: global_store_dword v[0:1], v0, off 2067; GFX906-NEXT: s_waitcnt vmcnt(0) 2068; GFX906-NEXT: s_setpc_b64 s[30:31] 2069; 2070; GFX803-LABEL: load_private_lo_v2i16_reglo_vreg_zexti8_to_offset: 2071; GFX803: ; %bb.0: ; %entry 2072; GFX803-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2073; GFX803-NEXT: v_mov_b32_e32 v1, 0x7b 2074; GFX803-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:4 2075; GFX803-NEXT: s_waitcnt vmcnt(0) 2076; GFX803-NEXT: v_mov_b32_e32 v2, 44 2077; GFX803-NEXT: buffer_load_ubyte v1, v2, s[0:3], s32 offen offset:4055 glc 2078; GFX803-NEXT: s_waitcnt vmcnt(0) 2079; GFX803-NEXT: v_and_b32_e32 v0, 0xffff0000, v0 2080; GFX803-NEXT: v_or_b32_e32 v0, v1, v0 2081; GFX803-NEXT: flat_store_dword v[0:1], v0 2082; GFX803-NEXT: s_waitcnt vmcnt(0) 2083; GFX803-NEXT: s_setpc_b64 s[30:31] 2084; 2085; GFX900-FLATSCR-LABEL: load_private_lo_v2i16_reglo_vreg_zexti8_to_offset: 2086; GFX900-FLATSCR: ; %bb.0: ; %entry 2087; GFX900-FLATSCR-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2088; GFX900-FLATSCR-NEXT: v_mov_b32_e32 v1, 0x7b 2089; GFX900-FLATSCR-NEXT: scratch_store_dword off, v1, s32 offset:4 2090; GFX900-FLATSCR-NEXT: s_waitcnt vmcnt(0) 2091; GFX900-FLATSCR-NEXT: s_add_i32 vcc_hi, s32, 44 2092; GFX900-FLATSCR-NEXT: scratch_load_ubyte_d16 v0, off, vcc_hi offset:4055 glc 2093; GFX900-FLATSCR-NEXT: s_waitcnt vmcnt(0) 2094; GFX900-FLATSCR-NEXT: global_store_dword v[0:1], v0, off 2095; GFX900-FLATSCR-NEXT: s_waitcnt vmcnt(0) 2096; GFX900-FLATSCR-NEXT: s_setpc_b64 s[30:31] 2097entry: 2098 %obj0 = alloca [10 x i32], align 4, addrspace(5) 2099 %obj1 = alloca [4096 x i8], align 2, addrspace(5) 2100 %reg.bc = bitcast i32 %reg to <2 x i16> 2101 %bc = bitcast [10 x i32] addrspace(5)* %obj0 to i32 addrspace(5)* 2102 store volatile i32 123, i32 addrspace(5)* %bc 2103 %gep = getelementptr inbounds [4096 x i8], [4096 x i8] addrspace(5)* %obj1, i32 0, i32 4055 2104 %load = load volatile i8, i8 addrspace(5)* %gep 2105 %load.ext = zext i8 %load to i16 2106 %build1 = insertelement <2 x i16> %reg.bc, i16 %load.ext, i32 0 2107 store <2 x i16> %build1, <2 x i16> addrspace(1)* undef 2108 ret void 2109} 2110 2111define void @load_private_lo_v2f16_reglo_vreg_sexti8_to_offset(i32 %reg) #0 { 2112; GFX900-MUBUF-LABEL: load_private_lo_v2f16_reglo_vreg_sexti8_to_offset: 2113; GFX900-MUBUF: ; %bb.0: ; %entry 2114; GFX900-MUBUF-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2115; GFX900-MUBUF-NEXT: v_mov_b32_e32 v1, 0x7b 2116; GFX900-MUBUF-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:4 2117; GFX900-MUBUF-NEXT: s_waitcnt vmcnt(0) 2118; GFX900-MUBUF-NEXT: v_mov_b32_e32 v1, 44 2119; GFX900-MUBUF-NEXT: buffer_load_sbyte_d16 v0, v1, s[0:3], s32 offen offset:4055 glc 2120; GFX900-MUBUF-NEXT: s_waitcnt vmcnt(0) 2121; GFX900-MUBUF-NEXT: global_store_dword v[0:1], v0, off 2122; GFX900-MUBUF-NEXT: s_waitcnt vmcnt(0) 2123; GFX900-MUBUF-NEXT: s_setpc_b64 s[30:31] 2124; 2125; GFX906-LABEL: load_private_lo_v2f16_reglo_vreg_sexti8_to_offset: 2126; GFX906: ; %bb.0: ; %entry 2127; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2128; GFX906-NEXT: v_mov_b32_e32 v1, 0x7b 2129; GFX906-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:4 2130; GFX906-NEXT: s_waitcnt vmcnt(0) 2131; GFX906-NEXT: v_mov_b32_e32 v2, 44 2132; GFX906-NEXT: buffer_load_sbyte v1, v2, s[0:3], s32 offen offset:4055 glc 2133; GFX906-NEXT: s_waitcnt vmcnt(0) 2134; GFX906-NEXT: v_lshrrev_b32_e32 v0, 16, v0 2135; GFX906-NEXT: v_and_b32_e32 v1, 0xffff, v1 2136; GFX906-NEXT: v_lshl_or_b32 v0, v0, 16, v1 2137; GFX906-NEXT: global_store_dword v[0:1], v0, off 2138; GFX906-NEXT: s_waitcnt vmcnt(0) 2139; GFX906-NEXT: s_setpc_b64 s[30:31] 2140; 2141; GFX803-LABEL: load_private_lo_v2f16_reglo_vreg_sexti8_to_offset: 2142; GFX803: ; %bb.0: ; %entry 2143; GFX803-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2144; GFX803-NEXT: v_mov_b32_e32 v1, 0x7b 2145; GFX803-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:4 2146; GFX803-NEXT: s_waitcnt vmcnt(0) 2147; GFX803-NEXT: v_mov_b32_e32 v2, 44 2148; GFX803-NEXT: buffer_load_sbyte v1, v2, s[0:3], s32 offen offset:4055 glc 2149; GFX803-NEXT: s_waitcnt vmcnt(0) 2150; GFX803-NEXT: v_and_b32_e32 v0, 0xffff0000, v0 2151; GFX803-NEXT: v_or_b32_sdwa v0, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD 2152; GFX803-NEXT: flat_store_dword v[0:1], v0 2153; GFX803-NEXT: s_waitcnt vmcnt(0) 2154; GFX803-NEXT: s_setpc_b64 s[30:31] 2155; 2156; GFX900-FLATSCR-LABEL: load_private_lo_v2f16_reglo_vreg_sexti8_to_offset: 2157; GFX900-FLATSCR: ; %bb.0: ; %entry 2158; GFX900-FLATSCR-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2159; GFX900-FLATSCR-NEXT: v_mov_b32_e32 v1, 0x7b 2160; GFX900-FLATSCR-NEXT: scratch_store_dword off, v1, s32 offset:4 2161; GFX900-FLATSCR-NEXT: s_waitcnt vmcnt(0) 2162; GFX900-FLATSCR-NEXT: s_add_i32 vcc_hi, s32, 44 2163; GFX900-FLATSCR-NEXT: scratch_load_sbyte_d16 v0, off, vcc_hi offset:4055 glc 2164; GFX900-FLATSCR-NEXT: s_waitcnt vmcnt(0) 2165; GFX900-FLATSCR-NEXT: global_store_dword v[0:1], v0, off 2166; GFX900-FLATSCR-NEXT: s_waitcnt vmcnt(0) 2167; GFX900-FLATSCR-NEXT: s_setpc_b64 s[30:31] 2168entry: 2169 %obj0 = alloca [10 x i32], align 4, addrspace(5) 2170 %obj1 = alloca [4096 x i8], align 2, addrspace(5) 2171 %reg.bc = bitcast i32 %reg to <2 x half> 2172 %bc = bitcast [10 x i32] addrspace(5)* %obj0 to i32 addrspace(5)* 2173 store volatile i32 123, i32 addrspace(5)* %bc 2174 %gep = getelementptr inbounds [4096 x i8], [4096 x i8] addrspace(5)* %obj1, i32 0, i32 4055 2175 %load = load volatile i8, i8 addrspace(5)* %gep 2176 %load.ext = sext i8 %load to i16 2177 %bitcast = bitcast i16 %load.ext to half 2178 %build1 = insertelement <2 x half> %reg.bc, half %bitcast, i32 0 2179 store <2 x half> %build1, <2 x half> addrspace(1)* undef 2180 ret void 2181} 2182 2183define void @load_private_lo_v2f16_reglo_vreg_zexti8_to_offset(i32 %reg) #0 { 2184; GFX900-MUBUF-LABEL: load_private_lo_v2f16_reglo_vreg_zexti8_to_offset: 2185; GFX900-MUBUF: ; %bb.0: ; %entry 2186; GFX900-MUBUF-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2187; GFX900-MUBUF-NEXT: v_mov_b32_e32 v1, 0x7b 2188; GFX900-MUBUF-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:4 2189; GFX900-MUBUF-NEXT: s_waitcnt vmcnt(0) 2190; GFX900-MUBUF-NEXT: v_mov_b32_e32 v1, 44 2191; GFX900-MUBUF-NEXT: buffer_load_ubyte_d16 v0, v1, s[0:3], s32 offen offset:4055 glc 2192; GFX900-MUBUF-NEXT: s_waitcnt vmcnt(0) 2193; GFX900-MUBUF-NEXT: global_store_dword v[0:1], v0, off 2194; GFX900-MUBUF-NEXT: s_waitcnt vmcnt(0) 2195; GFX900-MUBUF-NEXT: s_setpc_b64 s[30:31] 2196; 2197; GFX906-LABEL: load_private_lo_v2f16_reglo_vreg_zexti8_to_offset: 2198; GFX906: ; %bb.0: ; %entry 2199; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2200; GFX906-NEXT: v_mov_b32_e32 v1, 0x7b 2201; GFX906-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:4 2202; GFX906-NEXT: s_waitcnt vmcnt(0) 2203; GFX906-NEXT: v_mov_b32_e32 v2, 44 2204; GFX906-NEXT: buffer_load_ubyte v1, v2, s[0:3], s32 offen offset:4055 glc 2205; GFX906-NEXT: s_waitcnt vmcnt(0) 2206; GFX906-NEXT: v_lshrrev_b32_e32 v0, 16, v0 2207; GFX906-NEXT: v_and_b32_e32 v1, 0xffff, v1 2208; GFX906-NEXT: v_lshl_or_b32 v0, v0, 16, v1 2209; GFX906-NEXT: global_store_dword v[0:1], v0, off 2210; GFX906-NEXT: s_waitcnt vmcnt(0) 2211; GFX906-NEXT: s_setpc_b64 s[30:31] 2212; 2213; GFX803-LABEL: load_private_lo_v2f16_reglo_vreg_zexti8_to_offset: 2214; GFX803: ; %bb.0: ; %entry 2215; GFX803-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2216; GFX803-NEXT: v_mov_b32_e32 v1, 0x7b 2217; GFX803-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:4 2218; GFX803-NEXT: s_waitcnt vmcnt(0) 2219; GFX803-NEXT: v_mov_b32_e32 v2, 44 2220; GFX803-NEXT: buffer_load_ubyte v1, v2, s[0:3], s32 offen offset:4055 glc 2221; GFX803-NEXT: s_waitcnt vmcnt(0) 2222; GFX803-NEXT: v_and_b32_e32 v0, 0xffff0000, v0 2223; GFX803-NEXT: v_or_b32_e32 v0, v1, v0 2224; GFX803-NEXT: flat_store_dword v[0:1], v0 2225; GFX803-NEXT: s_waitcnt vmcnt(0) 2226; GFX803-NEXT: s_setpc_b64 s[30:31] 2227; 2228; GFX900-FLATSCR-LABEL: load_private_lo_v2f16_reglo_vreg_zexti8_to_offset: 2229; GFX900-FLATSCR: ; %bb.0: ; %entry 2230; GFX900-FLATSCR-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2231; GFX900-FLATSCR-NEXT: v_mov_b32_e32 v1, 0x7b 2232; GFX900-FLATSCR-NEXT: scratch_store_dword off, v1, s32 offset:4 2233; GFX900-FLATSCR-NEXT: s_waitcnt vmcnt(0) 2234; GFX900-FLATSCR-NEXT: s_add_i32 vcc_hi, s32, 44 2235; GFX900-FLATSCR-NEXT: scratch_load_ubyte_d16 v0, off, vcc_hi offset:4055 glc 2236; GFX900-FLATSCR-NEXT: s_waitcnt vmcnt(0) 2237; GFX900-FLATSCR-NEXT: global_store_dword v[0:1], v0, off 2238; GFX900-FLATSCR-NEXT: s_waitcnt vmcnt(0) 2239; GFX900-FLATSCR-NEXT: s_setpc_b64 s[30:31] 2240entry: 2241 %obj0 = alloca [10 x i32], align 4, addrspace(5) 2242 %obj1 = alloca [4096 x i8], align 2, addrspace(5) 2243 %reg.bc = bitcast i32 %reg to <2 x half> 2244 %bc = bitcast [10 x i32] addrspace(5)* %obj0 to i32 addrspace(5)* 2245 store volatile i32 123, i32 addrspace(5)* %bc 2246 %gep = getelementptr inbounds [4096 x i8], [4096 x i8] addrspace(5)* %obj1, i32 0, i32 4055 2247 %load = load volatile i8, i8 addrspace(5)* %gep 2248 %load.ext = zext i8 %load to i16 2249 %bitcast = bitcast i16 %load.ext to half 2250 %build1 = insertelement <2 x half> %reg.bc, half %bitcast, i32 0 2251 store <2 x half> %build1, <2 x half> addrspace(1)* undef 2252 ret void 2253} 2254 2255attributes #0 = { nounwind } 2256